Package org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators

Examples of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore


    private MapReduceOper getMROper(){
        return new MapReduceOper(new OperatorKey(scope, nig.getNextNodeId(scope)));
    }
  
    private POStore getStore(){
        return new POStore(new OperatorKey(scope, nig.getNextNodeId(scope)));
    }
View Full Code Here


        // execute using appropriate engine
        FileLocalizer.clearDeleteOnFail();
        List<ExecJob> execJobs = pigContext.getExecutionEngine().execute(pp, "job_pigexec_");
        for (ExecJob execJob: execJobs) {
            if (execJob.getStatus()==ExecJob.JOB_STATUS.FAILED) {
                POStore store = execJob.getPOStore();
                try {
                    store.getStoreFunc().cleanupOnFailure(store.getSFile().getFileName(),
                            new Job(ConfigurationUtil.toConfiguration(execJob.getConfiguration())));
                } catch (IOException e) {
                    throw new ExecException(e);
                }
            }
View Full Code Here

        try {
            PhysicalOperator leaf = plan.getLeaves().get(0);
            FileSpec spec = null;
            if(!(leaf instanceof POStore)){
                String scope = leaf.getOperatorKey().getScope();
                POStore str = new POStore(new OperatorKey(scope,
                    NodeIdGenerator.getGenerator().getNextNodeId(scope)));
                spec = new FileSpec(FileLocalizer.getTemporaryPath(null,
                    pigContext).toString(),
                    new FuncSpec(BinStorage.class.getName()));
                str.setSFile(spec);
                plan.addAsLeaf(str);
            } else{
                spec = ((POStore)leaf).getSFile();
            }
            return spec;
View Full Code Here

   
    @Override
    public void visitLOStore(LOStore loStore) throws IOException {
        String scope = DEFAULT_SCOPE;
//        System.err.println("Entering Store");
        POStore store = new POStore(new OperatorKey(scope, nodeGen
                .getNextNodeId(scope)));
        store.setAlias(((LogicalRelationalOperator)loStore.getPlan().
                getPredecessors(loStore).get(0)).getAlias());
        store.setSFile(loStore.getOutputSpec());
        // TODO Implement this
        //store.setInputSpec(loStore.getInputSpec());
//        try {
            // create a new schema for ourselves so that when
            // we serialize we are not serializing objects that
            // contain the schema - apparently Java tries to
            // serialize the object containing the schema if
            // we are trying to serialize the schema reference in
            // the containing object. The schema here will be serialized
            // in JobControlCompiler
            store.setSchema(translateSchema( loStore.getSchema() ));
//        } catch (FrontendException e1) {
//            int errorCode = 1060;
//            String message = "Cannot resolve Store output schema"; 
//            throw new VisitorException(message, errorCode, PigException.BUG, e1);   
//        }
View Full Code Here

        ld.setPc(pigContext);
        return ld;
    }
   
    private POStore getStore(){
        POStore st = new POStore(new OperatorKey(scope,nig.getNextNodeId(scope)));
        // mark store as tmp store. These could be removed by the
        // optimizer, because it wasn't the user requesting it.
        st.setIsTmpStore(true);
        return st;
    }
View Full Code Here

                curMROp = mro;
            }
            else if(mro.isMapDone() && !mro.isReduceDone()){
                FileSpec fSpec = getTempFileSpec();
               
                POStore st = getStore();
                st.setSFile(fSpec);
                mro.reducePlan.addAsLeaf(st);
                mro.setReduceDone(true);
                curMROp = startNew(fSpec, mro);
                curMROp.setMapDone(true);
            }
View Full Code Here

        for (MapReduceOper mmro : mergedPlans) {
            mmro.setReduceDone(true);
            FileSpec fileSpec = getTempFileSpec();
            POLoad ld = getLoad();
            ld.setLFile(fileSpec);
            POStore str = getStore();
            str.setSFile(fileSpec);
            mmro.reducePlan.addAsLeaf(str);
            mro.mapPlan.add(ld);
            if(leaf!=null)
                mro.mapPlan.connect(ld, leaf);
            MRPlan.connect(mmro, mro);
View Full Code Here

            int errCode = 2023;
            String msg = "Received a multi input plan when expecting only a single input one.";
            throw new PlanException(msg, errCode, PigException.BUG);
        }
        MapReduceOper mro = compiledInputs[0];
        POStore str = getStore();
        str.setSFile(fSpec);
        if (!mro.isMapDone()) {
            mro.mapPlan.addAsLeaf(str);
            mro.setMapDoneSingle(true);
        } else if (mro.isMapDone() && !mro.isReduceDone()) {
            mro.reducePlan.addAsLeaf(str);
View Full Code Here

            curMROp = phyToMROpMap.get(op.getInputs().get(op.getFragment()));
            for(int i=0;i<compiledInputs.length;i++){
                MapReduceOper mro = compiledInputs[i];
                if(curMROp.equals(mro))
                    continue;
                POStore str = getStore();
                str.setSFile(replFiles[i]);
                if (!mro.isMapDone()) {
                    mro.mapPlan.addAsLeaf(str);
                    mro.setMapDoneSingle(true);
                } else if (mro.isMapDone() && !mro.isReduceDone()) {
                    mro.reducePlan.addAsLeaf(str);
View Full Code Here

            }
           
            else if(!rightMROpr.reduceDone){
                // Indexer must run in map. If we are in reduce, close it and start new MROper.
                // No need of yanking in this case. Since we are starting brand new MR Operator and it will contain nothing.
                POStore rightStore = getStore();
                FileSpec rightStrFile = getTempFileSpec();
                rightStore.setSFile(rightStrFile);
                rightMROpr.setReduceDone(true);
                rightMROpr = startNew(rightStrFile, rightMROpr);
                rightPipelinePlan = null;
            }
           
            else{
                int errCode = 2022;
                String msg = "Both map and reduce phases have been done. This is unexpected while compiling.";
                throw new PlanException(msg, errCode, PigException.BUG);
            }
           
            joinOp.setupRightPipeline(rightPipelinePlan);
      rightMROpr.requestedParallelism = 1; // we need exactly one reducer for indexing job.       
           
            // At this point, we must be operating on map plan of right input and it would contain nothing else other then a POLoad.
            POLoad rightLoader = (POLoad)rightMROpr.mapPlan.getRoots().get(0);           
            LoadFunc rightLoadFunc = (LoadFunc) PigContext.instantiateFuncFromSpec(rightLoader.getLFile().getFuncSpec());
            joinOp.setSignature(rightLoader.getSignature());
            if(rightLoadFunc instanceof IndexableLoadFunc) {
                joinOp.setRightLoaderFuncSpec(rightLoader.getLFile().getFuncSpec());
                joinOp.setRightInputFileName(rightLoader.getLFile().getFileName());
               
                // we don't need the right MROper since
                // the right loader is an IndexableLoadFunc which can handle the index
                // itself
                MRPlan.remove(rightMROpr);
                if(rightMROpr == compiledInputs[0]) {
                    compiledInputs[0] = null;
                } else if(rightMROpr == compiledInputs[1]) {
                    compiledInputs[1] = null;
                }
                rightMROpr = null;
               
                // validate that the join keys in merge join are only                                                                                                                                                                             
                // simple column projections or '*' and not expression - expressions                                                                                                                                                              
                // cannot be handled when the index is built by the storage layer on the sorted                                                                                                                                                   
                // data when the sorted data (and corresponding index) is written.                                                                                                                                                                
                // So merge join will be restricted not have expressions as                                                                                                                                                                       
                // join keys     
                int numInputs = mPlan.getPredecessors(joinOp).size(); // should be 2
                for(int i = 0; i < numInputs; i++) {
                    List<PhysicalPlan> keyPlans = joinOp.getInnerPlansOf(i);
                    for (PhysicalPlan keyPlan : keyPlans) {
                        for(PhysicalOperator op : keyPlan) {
                            if(!(op instanceof POProject)) {
                                int errCode = 1106;
                                String errMsg = "Merge join is possible only for simple column or '*' join keys when using " +
                                rightLoader.getLFile().getFuncSpec() + " as the loader";
                                throw new MRCompilerException(errMsg, errCode, PigException.INPUT);
                            }
                        }
                    }
                }
            } else {
                // Replace POLoad with  indexer.
                String[] indexerArgs = new String[3];
                FileSpec origRightLoaderFileSpec = rightLoader.getLFile();
                indexerArgs[0] = origRightLoaderFileSpec.getFuncSpec().toString();
                if (! (PigContext.instantiateFuncFromSpec(indexerArgs[0]) instanceof OrderedLoadFunc)){
                    int errCode = 1104;
                    String errMsg = "Right input of merge-join must implement " +
                    "OrderedLoadFunc interface. The specified loader "
                    + indexerArgs[0] + " doesn't implement it";
                    throw new MRCompilerException(errMsg,errCode);
                }
                List<PhysicalPlan> rightInpPlans = joinOp.getInnerPlansOf(1);
                indexerArgs[1] = ObjectSerializer.serialize((Serializable)rightInpPlans);
                indexerArgs[2] = ObjectSerializer.serialize(rightPipelinePlan);
                FileSpec lFile = new FileSpec(rightLoader.getLFile().getFileName(),new FuncSpec(MergeJoinIndexer.class.getName(), indexerArgs));
                rightLoader.setLFile(lFile);
   
                // Loader of mro will return a tuple of form -
                // (keyFirst1, keyFirst2, .. , position, splitIndex) See MergeJoinIndexer
                // Now set up a POLocalRearrange which has "all" as the key and tuple fetched
                // by loader as the "value" of POLocalRearrange
                // Sorting of index can possibly be achieved by using Hadoop sorting
                // between map and reduce instead of Pig doing sort. If that is so,
                // it will simplify lot of the code below.
               
                PhysicalPlan lrPP = new PhysicalPlan();
                ConstantExpression ce = new ConstantExpression(new OperatorKey(scope,nig.getNextNodeId(scope)));
                ce.setValue("all");
                ce.setResultType(DataType.CHARARRAY);
                lrPP.add(ce);
   
                List<PhysicalPlan> lrInnerPlans = new ArrayList<PhysicalPlan>();
                lrInnerPlans.add(lrPP);
   
                POLocalRearrange lr = new POLocalRearrange(new OperatorKey(scope,nig.getNextNodeId(scope)));
                lr.setIndex(0);
                lr.setKeyType(DataType.CHARARRAY);
                lr.setPlans(lrInnerPlans);
                lr.setResultType(DataType.TUPLE);
                rightMROpr.mapPlan.addAsLeaf(lr);
   
                rightMROpr.setMapDone(true);
   
                // On the reduce side of this indexing job, there will be a global rearrange followed by POSort.
                // Output of POSort will be index file dumped on the DFS.
   
                // First add POPackage.
                POPackage pkg = new POPackage(new OperatorKey(scope,nig.getNextNodeId(scope)));
                pkg.setKeyType(DataType.CHARARRAY);
                pkg.setNumInps(1);
                pkg.setInner(new boolean[]{false});
                rightMROpr.reducePlan.add(pkg);
   
                // Next project tuples from the bag created by POPackage.
                POProject topPrj = new POProject(new OperatorKey(scope,nig.getNextNodeId(scope)));
                topPrj.setColumn(1);
                topPrj.setResultType(DataType.TUPLE);
                topPrj.setOverloaded(true);
                rightMROpr.reducePlan.add(topPrj);
                rightMROpr.reducePlan.connect(pkg, topPrj);
   
                // Now create and add POSort. Sort plan is project *.
                List<PhysicalPlan> sortPlans = new ArrayList<PhysicalPlan>(1);
                PhysicalPlan innerSortPlan = new PhysicalPlan();
                POProject prj = new POProject(new OperatorKey(scope,nig.getNextNodeId(scope)));
                prj.setStar(true);
                prj.setOverloaded(false);
                prj.setResultType(DataType.TUPLE);
                innerSortPlan.add(prj);
                sortPlans.add(innerSortPlan);
   
                // Currently we assume all columns are in asc order.
                // Add two because filename and offset are added by Indexer in addition to keys.
                List<Boolean>  mAscCols = new ArrayList<Boolean>(rightInpPlans.size()+2);
                for(int i=0; i< rightInpPlans.size()+2; i++)
                    mAscCols.add(true);
   
                POSort sortOp = new POSort(new OperatorKey(scope,nig.getNextNodeId(scope)),1, null, sortPlans, mAscCols, null);
                rightMROpr.reducePlan.add(sortOp);
                rightMROpr.reducePlan.connect(topPrj, sortOp);
   
                POStore st = getStore();
                FileSpec strFile = getTempFileSpec();
                st.setSFile(strFile);
                rightMROpr.reducePlan.addAsLeaf(st);
                rightMROpr.setReduceDone(true);
               
                // set up the DefaultIndexableLoader for the join operator
                String[] defaultIndexableLoaderArgs = new String[5];
                defaultIndexableLoaderArgs[0] = origRightLoaderFileSpec.getFuncSpec().toString();
                defaultIndexableLoaderArgs[1] = strFile.getFileName();
                defaultIndexableLoaderArgs[2] = strFile.getFuncSpec().toString();
                defaultIndexableLoaderArgs[3] = joinOp.getOperatorKey().scope;
                defaultIndexableLoaderArgs[4] = origRightLoaderFileSpec.getFileName();
                joinOp.setRightLoaderFuncSpec((new FuncSpec(DefaultIndexableLoader.class.getName(), defaultIndexableLoaderArgs)));
                joinOp.setRightInputFileName(origRightLoaderFileSpec.getFileName())
               
                joinOp.setIndexFile(strFile.getFileName());
                
            }
           
  
//            joinOp.setIndexFile(strFile);
           
            // We are done with right side. Lets work on left now.
            // Join will be materialized in leftMROper.
            if(!curMROp.mapDone) // Life is easy
                curMROp.mapPlan.addAsLeaf(joinOp);
           
            else if(!curMROp.reduceDone){  // This is a map-side join. Close this MROper and start afresh.
                POStore leftStore = getStore();
                FileSpec leftStrFile = getTempFileSpec();
                leftStore.setSFile(leftStrFile);
                curMROp.setReduceDone(true);
                curMROp = startNew(leftStrFile, curMROp);
                curMROp.mapPlan.addAsLeaf(joinOp);
            }
           
View Full Code Here

TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.