Package org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators

Examples of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POPackage$PeekedBag


            // Search to see if we have any UDFs that need to pack things into the
            // distrubted cache.
            setupDistributedCacheForUdfs(mro, pigContext, conf);

            POPackage pack = null;
            if(mro.reducePlan.isEmpty()){
                //MapOnly Job
                nwJob.setMapperClass(PigMapOnly.Map.class);
                nwJob.setNumReduceTasks(0);
                if(!pigContext.inIllustrator)
                    conf.set("pig.mapPlan", ObjectSerializer.serialize(mro.mapPlan));
                if(mro.isEndOfAllInputSetInMap()) {
                    // this is used in Map.close() to decide whether the
                    // pipeline needs to be rerun one more time in the close()
                    // The pipeline is rerun if there either was a stream or POMergeJoin
                    conf.set(END_OF_INP_IN_MAP, "true");
                }
            }
            else{
                //Map Reduce Job
                //Process the POPackage operator and remove it from the reduce plan
                if(!mro.combinePlan.isEmpty()){
                    POPackage combPack = (POPackage)mro.combinePlan.getRoots().get(0);
                    mro.combinePlan.remove(combPack);
                    nwJob.setCombinerClass(PigCombiner.Combine.class);
                    conf.set("pig.combinePlan", ObjectSerializer.serialize(mro.combinePlan));
                    conf.set("pig.combine.package", ObjectSerializer.serialize(combPack));
                } else if (mro.needsDistinctCombiner()) {
View Full Code Here


           
            op = mrRoots.get(0);
            if (!(op instanceof POPackage)) {
                return;
            }
            POPackage pack = (POPackage)op;
           
            List<PhysicalOperator> sucs = mr.reducePlan.getSuccessors(pack);
            if (sucs == null || sucs.size()!=1) {
                return;
            }
           
            op = sucs.get(0);
            boolean lastInputFlattened = true;
            boolean allSimple = true;
            if (op instanceof POForEach)
            {
                POForEach forEach = (POForEach)op;
                List<PhysicalPlan> planList = forEach.getInputPlans();
                List<Boolean> flatten = forEach.getToBeFlattened();
                POProject projOfLastInput = null;
                int i = 0;
                // check all nested foreach plans
                // 1. If it is simple projection
                // 2. If last input is all flattened
                for (PhysicalPlan p:planList)
                {
                    PhysicalOperator opProj = p.getRoots().get(0);
                    if (!(opProj instanceof POProject))
                    {
                        allSimple = false;
                        break;
                    }
                    POProject proj = (POProject)opProj;
                    // the project should just be for one column
                    // from the input
                    if(proj.isProjectToEnd() || proj.getColumns().size() != 1) {
                        allSimple = false;
                        break;
                    }
                   
                    try {
                        // if input to project is the last input
                        if (proj.getColumn() == pack.getNumInps())
                        {
                            // if we had already seen another project
                            // which was also for the last input, then
                            // we might be trying to flatten twice on the
                            // last input in which case we can't optimize by
View Full Code Here

            throw new MRCompilerException(msg, errCode, PigException.BUG, e);
        }
       
        mro.mapPlan.addAsLeaf(lr);
       
        POPackage spkg = (POPackage)sortMROp.reducePlan.getRoots().get(0);

        POPackage pkg = null;
        try {
            pkg = spkg.clone();
        } catch (Exception e) {
            int errCode = 2148;
            String msg = "Error cloning POPackageLite for limit after sort";
View Full Code Here

        lr.setPlans(eps);
        lr.setResultType(DataType.TUPLE);
       
        mro.mapPlan.addAsLeaf(lr);
       
        POPackage pkg = new POPackage(new OperatorKey(scope,nig.getNextNodeId(scope)));
        pkg.setKeyType(DataType.TUPLE);
        pkg.setNumInps(1);
        boolean[] inner = {false};
        pkg.setInner(inner);
        mro.reducePlan.add(pkg);
       
        mro.reducePlan.addAsLeaf(getPlainForEachOP());
    }
View Full Code Here

           
            addToMap(lr);
           
            blocking(op);
           
            POPackage pkg = new POPackage(new OperatorKey(scope,nig.getNextNodeId(scope)));
            pkg.setKeyType(DataType.TUPLE);
            pkg.setDistinct(true);
            pkg.setNumInps(1);
            boolean[] inner = {false};
            pkg.setInner(inner);
            curMROp.reducePlan.add(pkg);
           
            List<PhysicalPlan> eps1 = new ArrayList<PhysicalPlan>();
            List<Boolean> flat1 = new ArrayList<Boolean>();
            PhysicalPlan ep1 = new PhysicalPlan();
View Full Code Here

      if(gr.getRequestedParallelism() > curMROp.requestedParallelism)
        curMROp.requestedParallelism = gr.getRequestedParallelism();
      compiledInputs = new MapReduceOper[] {curMROp};
     
      // create POPakcage
      POPackage pkg = new POPackage(new OperatorKey(scope,nig.getNextNodeId(scope)), rp);
      pkg.setKeyType(type);
      pkg.setResultType(DataType.TUPLE);
      pkg.setNumInps(2);
      boolean [] inner = op.getInnerFlags();
      pkg.setInner(inner);           
      pkg.visit(this);      
      compiledInputs = new MapReduceOper[] {curMROp};
     
      // create POForEach
      List<PhysicalPlan> eps = new ArrayList<PhysicalPlan>();
      List<Boolean> flat = new ArrayList<Boolean>();
View Full Code Here

        mro.mapPlan.add(lr);
        mro.mapPlan.connect(nfe1, lr);
       
        mro.setMapDone(true);
       
        POPackage pkg = new POPackage(new OperatorKey(scope,nig.getNextNodeId(scope)));
        pkg.setKeyType(DataType.CHARARRAY);
        pkg.setNumInps(1);
        boolean[] inner = {false};
        pkg.setInner(inner);
        mro.reducePlan.add(pkg);
       
        // Lets start building the plan which will have the sort
        // for the foreach
        PhysicalPlan fe2Plan = new PhysicalPlan();
View Full Code Here

        PhysicalOperator root = reduceRoots.get(0);
        if (!(root instanceof POPackage)) {
            messageCollector.collect("Expected reduce root to be a POPackage", MessageType.Warning, PigWarning.NON_PACKAGE_REDUCE_PLAN_ROOT);
            return;
        }
        POPackage pack = (POPackage)root;

        List<PhysicalOperator> packSuccessors =
            mr.reducePlan.getSuccessors(root);
        if (packSuccessors == null || packSuccessors.size() != 1) return;
        PhysicalOperator successor = packSuccessors.get(0);
View Full Code Here

            // set parent plan in all operators in map and reduce plans
            // currently the parent plan is really used only when POStream is present in the plan
            new PhyPlanSetter(mro.mapPlan).visit();
            new PhyPlanSetter(mro.reducePlan).visit();

            POPackage pack = null;
            if(mro.reducePlan.isEmpty()){
                //MapOnly Job
                jobConf.setMapperClass(PigMapOnly.Map.class);
                jobConf.setNumReduceTasks(0);
                jobConf.set("pig.mapPlan", ObjectSerializer.serialize(mro.mapPlan));
                if(mro.isEndOfAllInputSetInMap()) {
                    // this is used in Map.close() to decide whether the
                    // pipeline needs to be rerun one more time in the close()
                    // The pipeline is rerun if there either was a stream or POMergeJoin
                    jobConf.set(END_OF_INP_IN_MAP, "true");
                }
            }
            else{
                //Map Reduce Job
                //Process the POPackage operator and remove it from the reduce plan
                if(!mro.combinePlan.isEmpty()){
                    POPackage combPack = (POPackage)mro.combinePlan.getRoots().get(0);
                    mro.combinePlan.remove(combPack);
                    jobConf.setCombinerClass(PigCombiner.Combine.class);
                    jobConf.set("pig.combinePlan", ObjectSerializer.serialize(mro.combinePlan));
                    jobConf.set("pig.combine.package", ObjectSerializer.serialize(combPack));
                } else if (mro.needsDistinctCombiner()) {
View Full Code Here

           
            op = mrRoots.get(0);
            if (!(op instanceof POPackage)) {
                return;
            }
            POPackage pack = (POPackage)op;
           
            List<PhysicalOperator> sucs = mr.reducePlan.getSuccessors(pack);
            if (sucs.size()!=1) {
                return;
            }
           
            op = sucs.get(0);
            boolean lastInputFlattened = true;
            boolean allSimple = true;
            if (op instanceof POForEach)
            {
                POForEach forEach = (POForEach)op;
                List<PhysicalPlan> planList = forEach.getInputPlans();
                List<Boolean> flatten = forEach.getToBeFlattened();
                POProject projOfLastInput = null;
                int i = 0;
                // check all nested foreach plans
                // 1. If it is simple projection
                // 2. If last input is all flattened
                for (PhysicalPlan p:planList)
                {
                    PhysicalOperator opProj = p.getRoots().get(0);
                    if (!(opProj instanceof POProject))
                    {
                        allSimple = false;
                        break;
                    }
                    POProject proj = (POProject)opProj;
                    // the project should just be for one column
                    // from the input
                    if(proj.getColumns().size() != 1) {
                        allSimple = false;
                        break;
                    }
                   
                    try {
                        // if input to project is the last input
                        if (proj.getColumn() == pack.getNumInps())
                        {
                            // if we had already seen another project
                            // which was also for the last input, then
                            // we might be trying to flatten twice on the
                            // last input in which case we can't optimize by
View Full Code Here

TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POPackage$PeekedBag

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.