Examples of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore

Package org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators

Examples of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore

org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore
The store operator which is used in two ways: 1) As a local operator it can be used to store files 2) In the Map Reduce setting, it is used to create jobs from MapReduce operators which keep the loads and stores in the Map and Reduce Plans till the job is created

    private MapReduceOper getMROper(){
        return new MapReduceOper(new OperatorKey(scope, nig.getNextNodeId(scope)));
    } 
   
    private POStore getStore(){
        return new POStore(new OperatorKey(scope, nig.getNextNodeId(scope)));
    }

View Full Code Here

        // execute using appropriate engine
        FileLocalizer.clearDeleteOnFail();
        List<ExecJob> execJobs = pigContext.getExecutionEngine().execute(pp, "job_pigexec_");
        for (ExecJob execJob: execJobs) {
            if (execJob.getStatus()==ExecJob.JOB_STATUS.FAILED) {
                POStore store = execJob.getPOStore();
                try {
                    store.getStoreFunc().cleanupOnFailure(store.getSFile().getFileName(),
                            new Job(ConfigurationUtil.toConfiguration(execJob.getConfiguration())));
                } catch (IOException e) {
                    throw new ExecException(e);
                }
            }

View Full Code Here

        try {
            PhysicalOperator leaf = plan.getLeaves().get(0);
            FileSpec spec = null;
            if(!(leaf instanceof POStore)){
                String scope = leaf.getOperatorKey().getScope();
                POStore str = new POStore(new OperatorKey(scope,
                    NodeIdGenerator.getGenerator().getNextNodeId(scope)));
                spec = new FileSpec(FileLocalizer.getTemporaryPath(null,
                    pigContext).toString(),
                    new FuncSpec(BinStorage.class.getName()));
                str.setSFile(spec);
                plan.addAsLeaf(str);
            } else{
                spec = ((POStore)leaf).getSFile();
            }
            return spec;

View Full Code Here

    
    @Override
    public void visitLOStore(LOStore loStore) throws IOException {
        String scope = DEFAULT_SCOPE;
//        System.err.println("Entering Store");
        POStore store = new POStore(new OperatorKey(scope, nodeGen
                .getNextNodeId(scope)));
        store.setAlias(((LogicalRelationalOperator)loStore.getPlan().
                getPredecessors(loStore).get(0)).getAlias());
        store.setSFile(loStore.getOutputSpec());
        // TODO Implement this
        //store.setInputSpec(loStore.getInputSpec());
//        try {
            // create a new schema for ourselves so that when
            // we serialize we are not serializing objects that
            // contain the schema - apparently Java tries to
            // serialize the object containing the schema if
            // we are trying to serialize the schema reference in
            // the containing object. The schema here will be serialized
            // in JobControlCompiler
            store.setSchema(translateSchema( loStore.getSchema() ));
//        } catch (FrontendException e1) {
//            int errorCode = 1060;
//            String message = "Cannot resolve Store output schema";  
//            throw new VisitorException(message, errorCode, PigException.BUG, e1);    
//        }

View Full Code Here

        ld.setPc(pigContext);
        return ld;
    }
    
    private POStore getStore(){
        POStore st = new POStore(new OperatorKey(scope,nig.getNextNodeId(scope)));
        // mark store as tmp store. These could be removed by the
        // optimizer, because it wasn't the user requesting it.
        st.setIsTmpStore(true);
        return st;
    }

View Full Code Here

                curMROp = mro;
            }
            else if(mro.isMapDone() && !mro.isReduceDone()){
                FileSpec fSpec = getTempFileSpec();
                
                POStore st = getStore();
                st.setSFile(fSpec);
                mro.reducePlan.addAsLeaf(st);
                mro.setReduceDone(true);
                curMROp = startNew(fSpec, mro);
                curMROp.setMapDone(true);
            }

View Full Code Here

        for (MapReduceOper mmro : mergedPlans) {
            mmro.setReduceDone(true);
            FileSpec fileSpec = getTempFileSpec();
            POLoad ld = getLoad();
            ld.setLFile(fileSpec);
            POStore str = getStore();
            str.setSFile(fileSpec);
            mmro.reducePlan.addAsLeaf(str);
            mro.mapPlan.add(ld);
            if(leaf!=null)
                mro.mapPlan.connect(ld, leaf);
            MRPlan.connect(mmro, mro);

View Full Code Here

            int errCode = 2023;
            String msg = "Received a multi input plan when expecting only a single input one.";
            throw new PlanException(msg, errCode, PigException.BUG);
        }
        MapReduceOper mro = compiledInputs[0];
        POStore str = getStore();
        str.setSFile(fSpec);
        if (!mro.isMapDone()) {
            mro.mapPlan.addAsLeaf(str);
            mro.setMapDoneSingle(true);
        } else if (mro.isMapDone() && !mro.isReduceDone()) {
            mro.reducePlan.addAsLeaf(str);

View Full Code Here

            curMROp = phyToMROpMap.get(op.getInputs().get(op.getFragment()));
            for(int i=0;i<compiledInputs.length;i++){
                MapReduceOper mro = compiledInputs[i];
                if(curMROp.equals(mro))
                    continue;
                POStore str = getStore();
                str.setSFile(replFiles[i]);
                if (!mro.isMapDone()) {
                    mro.mapPlan.addAsLeaf(str);
                    mro.setMapDoneSingle(true);
                } else if (mro.isMapDone() && !mro.isReduceDone()) {
                    mro.reducePlan.addAsLeaf(str);

View Full Code Here

            }
            
            else if(!rightMROpr.reduceDone){ 
                // Indexer must run in map. If we are in reduce, close it and start new MROper.
                // No need of yanking in this case. Since we are starting brand new MR Operator and it will contain nothing.
                POStore rightStore = getStore();
                FileSpec rightStrFile = getTempFileSpec();
                rightStore.setSFile(rightStrFile);
                rightMROpr.setReduceDone(true);
                rightMROpr = startNew(rightStrFile, rightMROpr);
                rightPipelinePlan = null; 
            }
            
            else{
                int errCode = 2022;
                String msg = "Both map and reduce phases have been done. This is unexpected while compiling.";
                throw new PlanException(msg, errCode, PigException.BUG);
            }
            
            joinOp.setupRightPipeline(rightPipelinePlan);
      rightMROpr.requestedParallelism = 1; // we need exactly one reducer for indexing job.        
            
            // At this point, we must be operating on map plan of right input and it would contain nothing else other then a POLoad.
            POLoad rightLoader = (POLoad)rightMROpr.mapPlan.getRoots().get(0);            
            LoadFunc rightLoadFunc = (LoadFunc) PigContext.instantiateFuncFromSpec(rightLoader.getLFile().getFuncSpec());
            joinOp.setSignature(rightLoader.getSignature());
            if(rightLoadFunc instanceof IndexableLoadFunc) {
                joinOp.setRightLoaderFuncSpec(rightLoader.getLFile().getFuncSpec());
                joinOp.setRightInputFileName(rightLoader.getLFile().getFileName());
                
                // we don't need the right MROper since
                // the right loader is an IndexableLoadFunc which can handle the index
                // itself
                MRPlan.remove(rightMROpr);
                if(rightMROpr == compiledInputs[0]) {
                    compiledInputs[0] = null;
                } else if(rightMROpr == compiledInputs[1]) {
                    compiledInputs[1] = null;
                } 
                rightMROpr = null;
                
                // validate that the join keys in merge join are only                                                                                                                                                                              
                // simple column projections or '*' and not expression - expressions                                                                                                                                                               
                // cannot be handled when the index is built by the storage layer on the sorted                                                                                                                                                    
                // data when the sorted data (and corresponding index) is written.                                                                                                                                                                 
                // So merge join will be restricted not have expressions as                                                                                                                                                                        
                // join keys      
                int numInputs = mPlan.getPredecessors(joinOp).size(); // should be 2
                for(int i = 0; i < numInputs; i++) {
                    List<PhysicalPlan> keyPlans = joinOp.getInnerPlansOf(i);
                    for (PhysicalPlan keyPlan : keyPlans) {
                        for(PhysicalOperator op : keyPlan) {
                            if(!(op instanceof POProject)) {
                                int errCode = 1106;
                                String errMsg = "Merge join is possible only for simple column or '*' join keys when using " +
                                rightLoader.getLFile().getFuncSpec() + " as the loader";
                                throw new MRCompilerException(errMsg, errCode, PigException.INPUT);
                            }
                        }
                    }
                }
            } else {
                // Replace POLoad with  indexer.
                String[] indexerArgs = new String[3];
                FileSpec origRightLoaderFileSpec = rightLoader.getLFile();
                indexerArgs[0] = origRightLoaderFileSpec.getFuncSpec().toString();
                if (! (PigContext.instantiateFuncFromSpec(indexerArgs[0]) instanceof OrderedLoadFunc)){
                    int errCode = 1104;
                    String errMsg = "Right input of merge-join must implement " +
                    "OrderedLoadFunc interface. The specified loader " 
                    + indexerArgs[0] + " doesn't implement it";
                    throw new MRCompilerException(errMsg,errCode);
                }
                List<PhysicalPlan> rightInpPlans = joinOp.getInnerPlansOf(1);
                indexerArgs[1] = ObjectSerializer.serialize((Serializable)rightInpPlans);
                indexerArgs[2] = ObjectSerializer.serialize(rightPipelinePlan);
                FileSpec lFile = new FileSpec(rightLoader.getLFile().getFileName(),new FuncSpec(MergeJoinIndexer.class.getName(), indexerArgs));
                rightLoader.setLFile(lFile);
    
                // Loader of mro will return a tuple of form - 
                // (keyFirst1, keyFirst2, .. , position, splitIndex) See MergeJoinIndexer
                // Now set up a POLocalRearrange which has "all" as the key and tuple fetched
                // by loader as the "value" of POLocalRearrange
                // Sorting of index can possibly be achieved by using Hadoop sorting 
                // between map and reduce instead of Pig doing sort. If that is so, 
                // it will simplify lot of the code below.
                
                PhysicalPlan lrPP = new PhysicalPlan();
                ConstantExpression ce = new ConstantExpression(new OperatorKey(scope,nig.getNextNodeId(scope)));
                ce.setValue("all");
                ce.setResultType(DataType.CHARARRAY);
                lrPP.add(ce);
    
                List<PhysicalPlan> lrInnerPlans = new ArrayList<PhysicalPlan>();
                lrInnerPlans.add(lrPP);
    
                POLocalRearrange lr = new POLocalRearrange(new OperatorKey(scope,nig.getNextNodeId(scope)));
                lr.setIndex(0);
                lr.setKeyType(DataType.CHARARRAY);
                lr.setPlans(lrInnerPlans);
                lr.setResultType(DataType.TUPLE);
                rightMROpr.mapPlan.addAsLeaf(lr);
    
                rightMROpr.setMapDone(true);
    
                // On the reduce side of this indexing job, there will be a global rearrange followed by POSort.
                // Output of POSort will be index file dumped on the DFS.
    
                // First add POPackage.
                POPackage pkg = new POPackage(new OperatorKey(scope,nig.getNextNodeId(scope)));
                pkg.setKeyType(DataType.CHARARRAY);
                pkg.setNumInps(1); 
                pkg.setInner(new boolean[]{false});
                rightMROpr.reducePlan.add(pkg);
    
                // Next project tuples from the bag created by POPackage.
                POProject topPrj = new POProject(new OperatorKey(scope,nig.getNextNodeId(scope)));
                topPrj.setColumn(1);
                topPrj.setResultType(DataType.TUPLE);
                topPrj.setOverloaded(true);
                rightMROpr.reducePlan.add(topPrj);
                rightMROpr.reducePlan.connect(pkg, topPrj);
    
                // Now create and add POSort. Sort plan is project *.
                List<PhysicalPlan> sortPlans = new ArrayList<PhysicalPlan>(1);
                PhysicalPlan innerSortPlan = new PhysicalPlan();
                POProject prj = new POProject(new OperatorKey(scope,nig.getNextNodeId(scope)));
                prj.setStar(true);
                prj.setOverloaded(false);
                prj.setResultType(DataType.TUPLE);
                innerSortPlan.add(prj);
                sortPlans.add(innerSortPlan);
    
                // Currently we assume all columns are in asc order.
                // Add two because filename and offset are added by Indexer in addition to keys.
                List<Boolean>  mAscCols = new ArrayList<Boolean>(rightInpPlans.size()+2);
                for(int i=0; i< rightInpPlans.size()+2; i++)
                    mAscCols.add(true);
    
                POSort sortOp = new POSort(new OperatorKey(scope,nig.getNextNodeId(scope)),1, null, sortPlans, mAscCols, null);
                rightMROpr.reducePlan.add(sortOp);
                rightMROpr.reducePlan.connect(topPrj, sortOp);
    
                POStore st = getStore();
                FileSpec strFile = getTempFileSpec();
                st.setSFile(strFile);
                rightMROpr.reducePlan.addAsLeaf(st);
                rightMROpr.setReduceDone(true);
                
                // set up the DefaultIndexableLoader for the join operator
                String[] defaultIndexableLoaderArgs = new String[5];
                defaultIndexableLoaderArgs[0] = origRightLoaderFileSpec.getFuncSpec().toString();
                defaultIndexableLoaderArgs[1] = strFile.getFileName();
                defaultIndexableLoaderArgs[2] = strFile.getFuncSpec().toString();
                defaultIndexableLoaderArgs[3] = joinOp.getOperatorKey().scope;
                defaultIndexableLoaderArgs[4] = origRightLoaderFileSpec.getFileName();
                joinOp.setRightLoaderFuncSpec((new FuncSpec(DefaultIndexableLoader.class.getName(), defaultIndexableLoaderArgs)));
                joinOp.setRightInputFileName(origRightLoaderFileSpec.getFileName());  
                
                joinOp.setIndexFile(strFile.getFileName());
                 
            }
            
   
//            joinOp.setIndexFile(strFile);
            
            // We are done with right side. Lets work on left now.
            // Join will be materialized in leftMROper.
            if(!curMROp.mapDone) // Life is easy 
                curMROp.mapPlan.addAsLeaf(joinOp);
            
            else if(!curMROp.reduceDone){  // This is a map-side join. Close this MROper and start afresh.
                POStore leftStore = getStore();
                FileSpec leftStrFile = getTempFileSpec();
                leftStore.setSFile(leftStrFile);
                curMROp.setReduceDone(true);
                curMROp = startNew(leftStrFile, curMROp);
                curMROp.mapPlan.addAsLeaf(joinOp);
            }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore

com.netflix.lipstick.util.OutputSamplerTest

org.apache.pig.backend.executionengine.util.ExecTools

org.apache.pig.backend.hadoop.executionengine.fetch.FetchLauncher

org.apache.pig.backend.hadoop.executionengine.HExecutionEngine

org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler

org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.LimitAdjuster

org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher

org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRCompiler

org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRCompiler$LimitAdjuster

org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MultiQueryOptimizer

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.