Package org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators

Examples of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POPackage$POPackageTupleBuffer


       
        POGlobalRearrange poGlobal = new POGlobalRearrange(new OperatorKey(
                scope, nodeGen.getNextNodeId(scope)), cross
                .getRequestedParallelisam());
        poGlobal.setAlias(cross.getAlias());
        POPackage poPackage = new POPackage(new OperatorKey(scope, nodeGen
                .getNextNodeId(scope)), cross.getRequestedParallelisam());
        poGlobal.setAlias(cross.getAlias());
        currentPlan.add(poGlobal);
        currentPlan.add(poPackage);
       
        int count = 0;
       
        try {
            currentPlan.connect(poGlobal, poPackage);
            List<Boolean> flattenLst = Arrays.asList(true, true);
           
            for (Operator op : inputs) {
                PhysicalPlan fep1 = new PhysicalPlan();
                ConstantExpression ce1 = new ConstantExpression(new OperatorKey(scope, nodeGen.getNextNodeId(scope)),cross.getRequestedParallelisam());
                ce1.setValue(inputs.size());
                ce1.setResultType(DataType.INTEGER);
                fep1.add(ce1);
               
                ConstantExpression ce2 = new ConstantExpression(new OperatorKey(scope, nodeGen.getNextNodeId(scope)),cross.getRequestedParallelisam());
                ce2.setValue(count);
                ce2.setResultType(DataType.INTEGER);
                fep1.add(ce2);
                /*Tuple ce1val = TupleFactory.getInstance().newTuple(2);
                ce1val.set(0,inputs.size());
                ce1val.set(1,count);
                ce1.setValue(ce1val);
                ce1.setResultType(DataType.TUPLE);*/
               
               

                POUserFunc gfc = new POUserFunc(new OperatorKey(scope, nodeGen.getNextNodeId(scope)),cross.getRequestedParallelisam(), Arrays.asList((PhysicalOperator)ce1,(PhysicalOperator)ce2), new FuncSpec(GFCross.class.getName()));
                gfc.setAlias(cross.getAlias());
                gfc.setResultType(DataType.BAG);
                fep1.addAsLeaf(gfc);
                gfc.setInputs(Arrays.asList((PhysicalOperator)ce1,(PhysicalOperator)ce2));
                /*fep1.add(gfc);
                fep1.connect(ce1, gfc);
                fep1.connect(ce2, gfc);*/
               
                PhysicalPlan fep2 = new PhysicalPlan();
                POProject feproj = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), cross.getRequestedParallelisam());
                feproj.setAlias(cross.getAlias());
                feproj.setResultType(DataType.TUPLE);
                feproj.setStar(true);
                feproj.setOverloaded(false);
                fep2.add(feproj);
                List<PhysicalPlan> fePlans = Arrays.asList(fep1, fep2);
               
                POForEach fe = new POForEach(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), cross.getRequestedParallelisam(), fePlans, flattenLst );
                fe.setAlias(cross.getAlias());
                currentPlan.add(fe);
                currentPlan.connect(logToPhyMap.get(op), fe);
               
                POLocalRearrange physOp = new POLocalRearrange(new OperatorKey(
                        scope, nodeGen.getNextNodeId(scope)), cross
                        .getRequestedParallelisam());
                physOp.setAlias(cross.getAlias());
                List<PhysicalPlan> lrPlans = new ArrayList<PhysicalPlan>();
                for(int i=0;i<inputs.size();i++){
                    PhysicalPlan lrp1 = new PhysicalPlan();
                    POProject lrproj1 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), cross.getRequestedParallelisam(), i);
                    lrproj1.setAlias(cross.getAlias());
                    lrproj1.setOverloaded(false);
                    lrproj1.setResultType(DataType.INTEGER);
                    lrp1.add(lrproj1);
                    lrPlans.add(lrp1);
                }
               
                physOp.setCross(true);
                physOp.setIndex(count++);
                physOp.setKeyType(DataType.TUPLE);
                physOp.setPlans(lrPlans);
                physOp.setResultType(DataType.TUPLE);
               
                currentPlan.add(physOp);
                currentPlan.connect(fe, physOp);
                currentPlan.connect(physOp, poGlobal);
            }
        } catch (PlanException e1) {
            int errCode = 2015;
            String msg = "Invalid physical operators in the physical plan" ;
            throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e1);
        } catch (ExecException e) {
            int errCode = 2058;
            String msg = "Unable to set index on newly create POLocalRearrange.";
            throw new VisitorException(msg, errCode, PigException.BUG, e);
        }
       
        poPackage.setKeyType(DataType.TUPLE);
        poPackage.setResultType(DataType.TUPLE);
        poPackage.setNumInps(count);
        boolean inner[] = new boolean[count];
        for (int i=0;i<count;i++) {
            inner[i] = true;
        }
        poPackage.setInner(inner);
       
        List<PhysicalPlan> fePlans = new ArrayList<PhysicalPlan>();
        List<Boolean> flattenLst = new ArrayList<Boolean>();
        for(int i=1;i<=count;i++){
            PhysicalPlan fep1 = new PhysicalPlan();
View Full Code Here


        switch (cg.getGroupType()) {
        case COLLECTED:
            translateCollectedCogroup(cg);
            break;
        case REGULAR:
            POPackage poPackage = compileToLR_GR_PackTrio(cg, cg.getCustomPartitioner(), cg.getInner(), cg.getExpressionPlans());
            poPackage.setPackageType(PackageType.GROUP);           
            logToPhyMap.put(cg, poPackage);
            break;
        case MERGE:
            translateMergeCogroup(cg);
            break;
View Full Code Here

            }
           
            return;
        }
        else if (loj.getJoinType() == LOJoin.JOINTYPE.HASH){
            POPackage poPackage = compileToLR_GR_PackTrio(loj, loj.getCustomPartitioner(), innerFlags, loj.getExpressionPlans());
            POForEach fe = compileFE4Flattening(innerFlags,  scope, parallel, alias, inputs);
            currentPlan.add(fe);
            try {
                currentPlan.connect(poPackage, fe);
            } catch (PlanException e) {
                throw new LogicalToPhysicalTranslatorException(e.getDetailedMessage(),
                        e.getErrorCode(),e.getErrorSource(),e);
            }
            logToPhyMap.put(loj, fe);
            poPackage.setPackageType(POPackage.PackageType.JOIN);
        }
        translateSoftLinks(loj);
    }
View Full Code Here

        POGlobalRearrange poGlobal = new POGlobalRearrange(new OperatorKey(
                DEFAULT_SCOPE, nodeGen.getNextNodeId(DEFAULT_SCOPE)), relationalOp.getRequestedParallelisam());
        poGlobal.setAlias(relationalOp.getAlias());
        poGlobal.setCustomPartitioner(customPartitioner);
        POPackage poPackage = new POPackage(new OperatorKey(DEFAULT_SCOPE, nodeGen
                .getNextNodeId(DEFAULT_SCOPE)), relationalOp.getRequestedParallelisam());
        poPackage.setAlias(relationalOp.getAlias());
        currentPlan.add(poGlobal);
        currentPlan.add(poPackage);

        try {
            currentPlan.connect(poGlobal, poPackage);
        } catch (PlanException e1) {
            int errCode = 2015;
            String msg = "Invalid physical operators in the physical plan" ;
            throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e1);
        }

        int count = 0;
        Byte type = null;
        List<Operator> inputs = relationalOp.getPlan().getPredecessors(relationalOp);
        for (int i=0;i<inputs.size();i++) {
            Operator op = inputs.get(i);
            List<LogicalExpressionPlan> plans = (List<LogicalExpressionPlan>)innerPlans.get(i);
            POLocalRearrange physOp = new POLocalRearrange(new OperatorKey(
                    DEFAULT_SCOPE, nodeGen.getNextNodeId(DEFAULT_SCOPE)), relationalOp.getRequestedParallelisam());
            physOp.setAlias(relationalOp.getAlias());
            List<PhysicalPlan> exprPlans = translateExpressionPlans(relationalOp, plans);
            try {
                physOp.setPlans(exprPlans);
            } catch (PlanException pe) {
                int errCode = 2071;
                String msg = "Problem with setting up local rearrange's plans.";
                throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, pe);
            }
            try {
                physOp.setIndex(count++);
            } catch (ExecException e1) {
                int errCode = 2058;
                String msg = "Unable to set index on newly create POLocalRearrange.";
                throw new VisitorException(msg, errCode, PigException.BUG, e1);
            }
            if (plans.size() > 1) {
                type = DataType.TUPLE;
                physOp.setKeyType(type);
            } else {
                type = exprPlans.get(0).getLeaves().get(0).getResultType();
                physOp.setKeyType(type);
            }
            physOp.setResultType(DataType.TUPLE);

            currentPlan.add(physOp);

            try {
                currentPlan.connect(logToPhyMap.get(op), physOp);
                currentPlan.connect(physOp, poGlobal);
            } catch (PlanException e) {
                int errCode = 2015;
                String msg = "Invalid physical operators in the physical plan" ;
                throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
            }
        }
       
        poPackage.setKeyType(type);
        poPackage.setResultType(DataType.TUPLE);
        poPackage.setNumInps(count);
        poPackage.setInner(innerFlags);
        return poPackage;
    }
View Full Code Here

            // this call modifies the ReplFiles names of POFRJoin operators
            // within the MR plans, must be called before the plans are
            // serialized
            setupDistributedCacheForJoin(mro, pigContext, conf);

            POPackage pack = null;
            if(mro.reducePlan.isEmpty()){
                //MapOnly Job
                nwJob.setMapperClass(PigMapOnly.Map.class);
                nwJob.setNumReduceTasks(0);
                conf.set("pig.mapPlan", ObjectSerializer.serialize(mro.mapPlan));
                if(mro.isEndOfAllInputSetInMap()) {
                    // this is used in Map.close() to decide whether the
                    // pipeline needs to be rerun one more time in the close()
                    // The pipeline is rerun if there either was a stream or POMergeJoin
                    conf.set(END_OF_INP_IN_MAP, "true");
                }
            }
            else{
                //Map Reduce Job
                //Process the POPackage operator and remove it from the reduce plan
                if(!mro.combinePlan.isEmpty()){
                    POPackage combPack = (POPackage)mro.combinePlan.getRoots().get(0);
                    mro.combinePlan.remove(combPack);
                    nwJob.setCombinerClass(PigCombiner.Combine.class);
                    conf.set("pig.combinePlan", ObjectSerializer.serialize(mro.combinePlan));
                    conf.set("pig.combine.package", ObjectSerializer.serialize(combPack));
                } else if (mro.needsDistinctCombiner()) {
View Full Code Here

           
            op = mrRoots.get(0);
            if (!(op instanceof POPackage)) {
                return;
            }
            POPackage pack = (POPackage)op;
           
            List<PhysicalOperator> sucs = mr.reducePlan.getSuccessors(pack);
            if (sucs.size()!=1) {
                return;
            }
           
            op = sucs.get(0);
            boolean lastInputFlattened = true;
            boolean allSimple = true;
            if (op instanceof POForEach)
            {
                POForEach forEach = (POForEach)op;
                List<PhysicalPlan> planList = forEach.getInputPlans();
                List<Boolean> flatten = forEach.getToBeFlattened();
                POProject projOfLastInput = null;
                int i = 0;
                // check all nested foreach plans
                // 1. If it is simple projection
                // 2. If last input is all flattened
                for (PhysicalPlan p:planList)
                {
                    PhysicalOperator opProj = p.getRoots().get(0);
                    if (!(opProj instanceof POProject))
                    {
                        allSimple = false;
                        break;
                    }
                    POProject proj = (POProject)opProj;
                    // the project should just be for one column
                    // from the input
                    if(proj.getColumns().size() != 1) {
                        allSimple = false;
                        break;
                    }
                   
                    try {
                        // if input to project is the last input
                        if (proj.getColumn() == pack.getNumInps())
                        {
                            // if we had already seen another project
                            // which was also for the last input, then
                            // we might be trying to flatten twice on the
                            // last input in which case we can't optimize by
View Full Code Here

                {
                    int errorCode = 2214;
                    throw new VisitorException("Cannot find POLocalRearrange to set secondary plan", errorCode);
                }
            }
            POPackage pack = (POPackage) root;
            pack.setUseSecondaryKey(true);
        }
    }
View Full Code Here

        // be present only in the reduce plan. Search in these two
        // plans accordingly
        if(!mr.combinePlan.isEmpty()) {
            PackageDiscoverer pkgDiscoverer = new PackageDiscoverer(mr.combinePlan);
            pkgDiscoverer.visit();
            POPackage pkg = pkgDiscoverer.getPkg();
            if(pkg != null) {
                handlePackage(mr, pkg);
            }  
        }
       
        if(!mr.reducePlan.isEmpty()) {
            PackageDiscoverer pkgDiscoverer = new PackageDiscoverer(mr.reducePlan);
            pkgDiscoverer.visit();
            POPackage pkg = pkgDiscoverer.getPkg();
            if(pkg != null) {
                // if the POPackage is actually a POPostCombinerPackage, then we should
                // just look for the corresponding LocalRearrange(s) in the combine plan
                if(pkg instanceof POCombinerPackage) {
                    if(patchPackage(mr.combinePlan, pkg) != pkg.getNumInps()) {
                        int errCode = 2085;
                        String msg = "Unexpected problem during optimization." +
                        " Could not find LocalRearrange in combine plan.";
                        throw new OptimizerException(msg, errCode, PigException.BUG);
                    }
View Full Code Here

            NullableTuple it = new NullableTuple((Tuple) db2Iter.next());
            it.setIndex((byte)1);
            db.add(it);
        }
        //ITIterator iti = new TestPackage.ITIterator(db.iterator());
        POPackage pop = new POPackage(new OperatorKey("", r.nextLong()));
        pop.setNumInps(2);
        pop.setInner(inner);
        PigNullableWritable k = HDataType.getWritableComparableTypes(key, (byte)0);
        pop.attachInput(k, db.iterator());
       
        // we are not doing any optimization to remove
        // parts of the "value" which are present in the "key" in this
        // unit test - so set up the "keyInfo" accordingly in
        // the POPackage
        Map<Integer, Pair<Boolean, Map<Integer, Integer>>> keyInfo =
            new HashMap<Integer, Pair<Boolean, Map<Integer,Integer>>>();
        Pair<Boolean, Map<Integer, Integer>> p =
            new Pair<Boolean, Map<Integer, Integer>>(false, new HashMap<Integer, Integer>());
        keyInfo.put(0, p);
        keyInfo.put(1, p);
        pop.setKeyInfo(keyInfo);
        Tuple t = null;
        Result res = null;
        res = (Result) pop.getNext(t);
        if(res.returnStatus==POStatus.STATUS_NULL && inner[0]) return;
        assertEquals(POStatus.STATUS_OK, res.returnStatus);

        t = (Tuple) res.result;
        Object outKey = t.get(0);
View Full Code Here

      if(gr.getRequestedParallelism() > curMROp.requestedParallelism)
        curMROp.requestedParallelism = gr.getRequestedParallelism();
      compiledInputs = new MapReduceOper[] {curMROp};
     
      // create POPakcage
      POPackage pkg = new POPackage(new OperatorKey(scope,nig.getNextNodeId(scope)), rp);
      pkg.setKeyType(type);
      pkg.setResultType(DataType.TUPLE);
      pkg.setNumInps(2);
      boolean [] inner = op.getInnerFlags();
      pkg.setInner(inner);           
      pkg.visit(this);      
      compiledInputs = new MapReduceOper[] {curMROp};
     
      // create POForEach
      List<PhysicalPlan> eps = new ArrayList<PhysicalPlan>();
      List<Boolean> flat = new ArrayList<Boolean>();
View Full Code Here

TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POPackage$POPackageTupleBuffer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.