Examples of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore

Package org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators

Examples of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore

org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore
The store operator which is used in two ways: 1) As a local operator it can be used to store files 2) In the Map Reduce setting, it is used to create jobs from MapReduce operators which keep the loads and stores in the Map and Reduce Plans till the job is created

            launcher.reset();
        }
        
        for (OutputStats output : stats.getOutputStats()) {
            if (!output.isSuccessful()) {
                POStore store = output.getPOStore();
                try {
                    store.getStoreFunc().cleanupOnFailure(
                            store.getSFile().getFileName(),
                            new Job(output.getConf()));
                } catch (IOException e) {
                    throw new ExecException(e);
                }
            }

View Full Code Here

            LOG.warn("unable to get stores of the job");
            return;
        }
        
        if (mapStores.size() + reduceStores.size() == 1) {
            POStore sto = (mapStores.size() > 0) ? mapStores.get(0)
                    : reduceStores.get(0);
            if (!sto.isTmpStore()) {
                long records = (mapStores.size() > 0) ? mapOutputRecords
                        : reduceOutputRecords;           
                OutputStats ds = new OutputStats(sto.getSFile().getFileName(),
                        hdfsBytesWritten, records, (state == JobState.SUCCESS));
                ds.setPOStore(sto);
                ds.setConf(conf);
                outputs.add(ds);
                
                if (state == JobState.SUCCESS) {
                    ScriptState.get().emitOutputCompletedNotification(ds);
                }
            }
        } else {
            for (POStore sto : mapStores) {
                if (sto.isTmpStore()) continue;
                addOneOutputStats(sto);
            }
            for (POStore sto : reduceStores) {
                if (sto.isTmpStore()) continue;
                addOneOutputStats(sto);
            }     
        }
    }

View Full Code Here

            nwJob.setOutputFormatClass(PigOutputFormat.class);
            
            if (mapStores.size() + reduceStores.size() == 1) { // single store case
                log.info("Setting up single store job");
                
                POStore st;
                if (reduceStores.isEmpty()) {
                    st = mapStores.get(0);
                    if(!pigContext.inIllustrator)
                        mro.mapPlan.remove(st);
                }
                else {
                    st = reduceStores.get(0);
                    if(!pigContext.inIllustrator)
                        mro.reducePlan.remove(st);
                }


                // set out filespecs
                String outputPathString = st.getSFile().getFileName();
                if (!outputPathString.contains("://") || outputPathString.startsWith("hdfs://")) {
                    conf.set("pig.streaming.log.dir",
                            new Path(outputPathString, LOG_DIR).toString());
                } else {
                    String tmpLocationStr =  FileLocalizer
                    .getTemporaryPath(pigContext).toString();
                    tmpLocation = new Path(tmpLocationStr);
                    conf.set("pig.streaming.log.dir",
                            new Path(tmpLocation, LOG_DIR).toString());
                
            } 
                conf.set("pig.streaming.task.output.dir", outputPathString);
            }
           else if (mapStores.size() + reduceStores.size() > 0) { // multi store case
                log.info("Setting up multi store job");
                String tmpLocationStr =  FileLocalizer
                .getTemporaryPath(pigContext).toString();
                tmpLocation = new Path(tmpLocationStr);


                nwJob.setOutputFormatClass(PigOutputFormat.class);
                
                int idx = 0;
                for (POStore sto: storeLocations) {
                    sto.setMultiStore(true);
                    sto.setIndex(idx++);
                }
 
                conf.set("pig.streaming.log.dir", 
                            new Path(tmpLocation, LOG_DIR).toString());
                conf.set("pig.streaming.task.output.dir", tmpLocation.toString());
           }


            // store map key type
            // this is needed when the key is null to create
            // an appropriate NullableXXXWritable object
            conf.set("pig.map.keytype", ObjectSerializer.serialize(new byte[] { mro.mapKeyType }));


            // set parent plan in all operators in map and reduce plans
            // currently the parent plan is really used only when POStream is present in the plan
            new PhyPlanSetter(mro.mapPlan).visit();
            new PhyPlanSetter(mro.reducePlan).visit();
            
            // this call modifies the ReplFiles names of POFRJoin operators
            // within the MR plans, must be called before the plans are
            // serialized
            setupDistributedCacheForJoin(mro, pigContext, conf);


            // Search to see if we have any UDFs that need to pack things into the
            // distrubted cache.
            setupDistributedCacheForUdfs(mro, pigContext, conf);


            POPackage pack = null;
            if(mro.reducePlan.isEmpty()){
                //MapOnly Job
                nwJob.setMapperClass(PigMapOnly.Map.class);
                nwJob.setNumReduceTasks(0);
                if(!pigContext.inIllustrator)
                    conf.set("pig.mapPlan", ObjectSerializer.serialize(mro.mapPlan));
                if(mro.isEndOfAllInputSetInMap()) {
                    // this is used in Map.close() to decide whether the
                    // pipeline needs to be rerun one more time in the close()
                    // The pipeline is rerun if there either was a stream or POMergeJoin
                    conf.set(END_OF_INP_IN_MAP, "true");
                }
            }
            else{
                //Map Reduce Job
                //Process the POPackage operator and remove it from the reduce plan
                if(!mro.combinePlan.isEmpty()){
                    POPackage combPack = (POPackage)mro.combinePlan.getRoots().get(0);
                    mro.combinePlan.remove(combPack);
                    nwJob.setCombinerClass(PigCombiner.Combine.class);
                    conf.set("pig.combinePlan", ObjectSerializer.serialize(mro.combinePlan));
                    conf.set("pig.combine.package", ObjectSerializer.serialize(combPack));
                } else if (mro.needsDistinctCombiner()) {
                    nwJob.setCombinerClass(DistinctCombiner.Combine.class);
                    log.info("Setting identity combiner class.");
                }
                pack = (POPackage)mro.reducePlan.getRoots().get(0);
                if(!pigContext.inIllustrator)
                    mro.reducePlan.remove(pack);
                nwJob.setMapperClass(PigMapReduce.Map.class);
                nwJob.setReducerClass(PigMapReduce.Reduce.class);
                
                // first check the PARALLE in query, then check the defaultParallel in PigContext, and last do estimation
                if (mro.requestedParallelism > 0)
                    nwJob.setNumReduceTasks(mro.requestedParallelism);
    else if (pigContext.defaultParallel > 0)
                    conf.set("mapred.reduce.tasks", ""+pigContext.defaultParallel);
                else
                    estimateNumberOfReducers(conf,lds);
                
                if (mro.customPartitioner != null)
                  nwJob.setPartitionerClass(PigContext.resolveClassName(mro.customPartitioner));


                if(!pigContext.inIllustrator)
                    conf.set("pig.mapPlan", ObjectSerializer.serialize(mro.mapPlan));
                if(mro.isEndOfAllInputSetInMap()) {
                    // this is used in Map.close() to decide whether the
                    // pipeline needs to be rerun one more time in the close()
                    // The pipeline is rerun only if there was a stream or merge-join.
                    conf.set(END_OF_INP_IN_MAP, "true");
                }
                if(!pigContext.inIllustrator)
                    conf.set("pig.reducePlan", ObjectSerializer.serialize(mro.reducePlan));
                if(mro.isEndOfAllInputSetInReduce()) {
                    // this is used in Map.close() to decide whether the
                    // pipeline needs to be rerun one more time in the close()
                    // The pipeline is rerun only if there was a stream
                    conf.set("pig.stream.in.reduce", "true");
                }
                if (!pigContext.inIllustrator)
                    conf.set("pig.reduce.package", ObjectSerializer.serialize(pack));
                conf.set("pig.reduce.key.type", Byte.toString(pack.getKeyType())); 
                
                if (mro.getUseSecondaryKey()) {
                    nwJob.setGroupingComparatorClass(PigSecondaryKeyGroupComparator.class);
                    nwJob.setPartitionerClass(SecondaryKeyPartitioner.class);
                    nwJob.setSortComparatorClass(PigSecondaryKeyComparator.class);
                    nwJob.setOutputKeyClass(NullableTuple.class);
                    conf.set("pig.secondarySortOrder",
                            ObjectSerializer.serialize(mro.getSecondarySortOrder()));


                }
                else
                {
                    Class<? extends WritableComparable> keyClass = HDataType.getWritableComparableTypes(pack.getKeyType()).getClass();
                    nwJob.setOutputKeyClass(keyClass);
                    selectComparator(mro, pack.getKeyType(), nwJob);
                }
                nwJob.setOutputValueClass(NullableTuple.class);
            }
        
            if(mro.isGlobalSort() || mro.isLimitAfterSort()){
                // Only set the quantiles file and sort partitioner if we're a
                // global sort, not for limit after sort.
                if (mro.isGlobalSort()) {
                    String symlink = addSingleFileToDistributedCache(
                            pigContext, conf, mro.getQuantFile(), "pigsample");
                    conf.set("pig.quantilesFile", symlink);
                    nwJob.setPartitionerClass(WeightedRangePartitioner.class);
                }
                
                if (mro.isUDFComparatorUsed) {  
                    boolean usercomparator = false;
                    for (String compFuncSpec : mro.UDFs) {
                        Class comparator = PigContext.resolveClassName(compFuncSpec);
                        if(ComparisonFunc.class.isAssignableFrom(comparator)) {
                            nwJob.setMapperClass(PigMapReduce.MapWithComparator.class);
                            nwJob.setReducerClass(PigMapReduce.ReduceWithComparator.class);
                            conf.set("pig.reduce.package", ObjectSerializer.serialize(pack));
                            conf.set("pig.usercomparator", "true");
                            nwJob.setOutputKeyClass(NullableTuple.class);
                            nwJob.setSortComparatorClass(comparator);
                            usercomparator = true;
                            break;
                        }
                    }
                    if (!usercomparator) {
                        String msg = "Internal error. Can't find the UDF comparator";
                        throw new IOException (msg);
                    }
                    
                } else {
                    conf.set("pig.sortOrder",
                        ObjectSerializer.serialize(mro.getSortOrder()));
                }
            }
            
            if (mro.isSkewedJoin()) {
                String symlink = addSingleFileToDistributedCache(pigContext,
                        conf, mro.getSkewedJoinPartitionFile(), "pigdistkey");
                conf.set("pig.keyDistFile", symlink);
                nwJob.setPartitionerClass(SkewedPartitioner.class);
                nwJob.setMapperClass(PigMapReduce.MapWithPartitionIndex.class);
                nwJob.setMapOutputKeyClass(NullablePartitionWritable.class);
                nwJob.setGroupingComparatorClass(PigGroupingPartitionWritableComparator.class);
            }
            
            if (!pigContext.inIllustrator)
            {
                // unset inputs for POStore, otherwise, map/reduce plan will be unnecessarily deserialized 
                for (POStore st: mapStores) { st.setInputs(null); st.setParentPlan(null);}
                for (POStore st: reduceStores) { st.setInputs(null); st.setParentPlan(null);}
                conf.set(PIG_MAP_STORES, ObjectSerializer.serialize(mapStores));
                conf.set(PIG_REDUCE_STORES, ObjectSerializer.serialize(reduceStores));
            }


            // tmp file compression setups

View Full Code Here

                        FileSpec newSpec = getTempFileSpec();
                        
                        // replace oldSpec in mro with newSpec
                        new FindStoreNameVisitor(pl, newSpec, oldSpec).visit();
                        
                        POStore newSto = getStore();
                        newSto.setSFile(oldSpec);
                        if (MRPlan.getPredecessors(mrOp)!=null && 
                                MRPlan.getPredecessors(mrOp).contains(mro))
                            MRPlan.disconnect(mro, mrOp);
                        MapReduceOper catMROp = getConcatenateJob(newSpec, mro, newSto); 
                        MRPlan.connect(catMROp, mrOp);

View Full Code Here

        ld.setPc(pigContext);
        return ld;
    }
    
    private POStore getStore(){
        POStore st = new POStore(new OperatorKey(scope,nig.getNextNodeId(scope)));
        // mark store as tmp store. These could be removed by the
        // optimizer, because it wasn't the user requesting it.
        st.setIsTmpStore(true);
        return st;
    }

View Full Code Here

            if (!mro.isMapDone()) {
                mro.mapPlan.addAsLeaf(op);
            } else if (mro.isMapDone() && !mro.isReduceDone()) {
                FileSpec fSpec = getTempFileSpec();
                
                POStore st = getStore();
                st.setSFile(fSpec);
                mro.reducePlan.addAsLeaf(st);
                mro.setReduceDone(true);
                mro = startNew(fSpec, mro);
                mro.mapPlan.addAsLeaf(op);
                compiledInputs[0] = mro;

View Full Code Here

                curMROp = mro;
            }
            else if(mro.isMapDone() && !mro.isReduceDone()){
                FileSpec fSpec = getTempFileSpec();
                
                POStore st = getStore();
                st.setSFile(fSpec);
                mro.reducePlan.addAsLeaf(st);
                mro.setReduceDone(true);
                curMROp = startNew(fSpec, mro);
                curMROp.setMapDone(true);
            }

View Full Code Here

        for (MapReduceOper mmro : mergedPlans) {
            mmro.setReduceDone(true);
            FileSpec fileSpec = getTempFileSpec();
            POLoad ld = getLoad();
            ld.setLFile(fileSpec);
            POStore str = getStore();
            str.setSFile(fileSpec);
            mmro.reducePlan.addAsLeaf(str);
            mro.mapPlan.add(ld);
            if(leaf!=null)
                mro.mapPlan.connect(ld, leaf);
            MRPlan.connect(mmro, mro);

View Full Code Here

            int errCode = 2023;
            String msg = "Received a multi input plan when expecting only a single input one.";
            throw new PlanException(msg, errCode, PigException.BUG);
        }
        MapReduceOper mro = compiledInputs[0];
        POStore str = getStore();
        str.setSFile(fSpec);
        if (!mro.isMapDone()) {
            mro.mapPlan.addAsLeaf(str);
            mro.setMapDoneSingle(true);
        } else if (mro.isMapDone() && !mro.isReduceDone()) {
            mro.reducePlan.addAsLeaf(str);

View Full Code Here

            curMROp = phyToMROpMap.get(op.getInputs().get(op.getFragment()));
            for(int i=0;i<compiledInputs.length;i++){
                MapReduceOper mro = compiledInputs[i];
                if(curMROp.equals(mro))
                    continue;
                POStore str = getStore();
                str.setSFile(replFiles[i]);
                
                Configuration conf = 
                    ConfigurationUtil.toConfiguration(pigContext.getProperties());
                boolean combinable = !conf.getBoolean("pig.noSplitCombination", false);
                
                if (!mro.isMapDone()) {   
                    if (combinable && hasTooManyInputFiles(mro, conf)) { 
                        POStore tmpSto = getStore();
                        FileSpec fSpec = getTempFileSpec();
                        tmpSto.setSFile(fSpec);                         
                        mro.mapPlan.addAsLeaf(tmpSto);
                        mro.setMapDoneSingle(true);                    
                        MapReduceOper catMROp = getConcatenateJob(fSpec, mro, str); 
                        MRPlan.connect(catMROp, curMROp);
                    } else {
                        mro.mapPlan.addAsLeaf(str);
                        mro.setMapDoneSingle(true); 
                        MRPlan.connect(mro, curMROp);
                    }
                } else if (mro.isMapDone() && !mro.isReduceDone()) {
                    if (combinable && (mro.requestedParallelism >= fileConcatenationThreshold)) {
                        POStore tmpSto = getStore();
                        FileSpec fSpec = getTempFileSpec();
                        tmpSto.setSFile(fSpec); 
                        mro.reducePlan.addAsLeaf(tmpSto);
                        mro.setReduceDone(true);
                        MapReduceOper catMROp = getConcatenateJob(fSpec, mro, str); 
                        MRPlan.connect(catMROp, curMROp);
                    } else {

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore

com.netflix.lipstick.util.OutputSamplerTest

org.apache.pig.backend.executionengine.util.ExecTools

org.apache.pig.backend.hadoop.executionengine.fetch.FetchLauncher

org.apache.pig.backend.hadoop.executionengine.HExecutionEngine

org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler

org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.LimitAdjuster

org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher

org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRCompiler

org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRCompiler$LimitAdjuster

org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MultiQueryOptimizer

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.