Package org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators

Examples of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POPackage$POPackageTupleBuffer


            throw new OptimizerException(msg, errCode, PigException.BUG);
        }
        int i = 0;
        int curIndex = initialIndex;
        while (i < end) {
            POPackage pkg = pkgs.get(i);
            addShiftedKeyInfoIndex(curIndex, pkg);
            curIndex++;
            i++;
        }
        return curIndex; // could be used in a caller who recursively called this function
View Full Code Here


       
    }

    private void mergeOneCombinePlanWithIndex(PhysicalPlan from,
            PhysicalPlan to, int initial, int current, byte mapKeyType) throws VisitorException {
        POPackage cpk = (POPackage)from.getRoots().get(0);
        from.remove(cpk);
       
        PODemux demux = (PODemux)to.getLeaves().get(0);
       
        boolean isSameKeyType = demux.isSameMapKeyType();
       
        POMultiQueryPackage pkg = (POMultiQueryPackage)to.getRoots().get(0);
       
        // if current > initial + 1, it means we had
        // a split in the map of the MROper we are trying to
        // merge. In that case we would have changed the indices
        // of the POLocalRearranges in the split to be in the
        // range initial to current. To handle key, value pairs
        // coming out of those POLocalRearranges, we add
        // the Packages in the 'from' POMultiQueryPackage (in this case,
        // it has to be a POMultiQueryPackage since we had
        // a POSplit in the map) to the 'to' POMultiQueryPackage.
        // These Packages would have correct positions in the package
        // list and would be able to handle the outputs from the different
        // POLocalRearranges.
        int total = current - initial;
        int pkCount = 0;
        if (cpk instanceof POMultiQueryPackage) {
            List<POPackage> pkgs = ((POMultiQueryPackage)cpk).getPackages();
            for (POPackage p : pkgs) {
                pkg.addPackage(p);
                if (!isSameKeyType) {
                    p.setKeyType(DataType.TUPLE);
                }
                pkCount++;
            }
        } else {
            pkg.addPackage(cpk);
            pkCount = 1;
        }

        if (pkCount != total) {
            int errCode = 2146;
            String msg = "Internal Error. Inconsistency in key index found during optimization.";
            throw new OptimizerException(msg, errCode, PigException.BUG);
        }

        // all packages should have the same key type
        if (!isSameKeyType) {
            cpk.setKeyType(DataType.TUPLE);         
        }
       
        pkg.setKeyType(cpk.getKeyType());
       
        boolean[] keyPos = cpk.getKeyPositionsInTuple();
       
        // See comment above for why we flatten the Packages
        // in the from plan - for the same reason, we flatten
        // the inner plans of Demux operator now.
        int plCount = 0;
View Full Code Here

        PhysicalOperator root = reduceRoots.get(0);
        if (!(root instanceof POPackage)) {
          messageCollector.collect("Expected reduce root to be a POPackage", MessageType.Warning, PigWarning.NON_PACKAGE_REDUCE_PLAN_ROOT);
            return;
        }
        POPackage pack = (POPackage)root;

        List<PhysicalOperator> packSuccessors =
            mr.reducePlan.getSuccessors(root);
        if (packSuccessors == null || packSuccessors.size() != 1) return;
        PhysicalOperator successor = packSuccessors.get(0);
View Full Code Here

            // distributed cache.
            setupDistributedCacheForUdfs(mro, pigContext, conf);

            SchemaTupleFrontend.copyAllGeneratedToDistributedCache(pigContext, conf);

            POPackage pack = null;
            if(mro.reducePlan.isEmpty()){
                //MapOnly Job
                log.info("Map only job, skipping reducer estimation");
                nwJob.setMapperClass(PigMapOnly.Map.class);
                nwJob.setNumReduceTasks(0);
                if(!pigContext.inIllustrator)
                    conf.set("pig.mapPlan", ObjectSerializer.serialize(mro.mapPlan));
                if(mro.isEndOfAllInputSetInMap()) {
                    // this is used in Map.close() to decide whether the
                    // pipeline needs to be rerun one more time in the close()
                    // The pipeline is rerun if there either was a stream or POMergeJoin
                    conf.set(END_OF_INP_IN_MAP, "true");
                }
            }
            else{
                //Map Reduce Job

                // Estimate the number of reducers
                log.info("Reduce phase detected, estimating # of required reducers.");
                adjustNumReducers(plan, mro, nwJob);

                //Process the POPackage operator and remove it from the reduce plan
                if(!mro.combinePlan.isEmpty()){
                    POPackage combPack = (POPackage)mro.combinePlan.getRoots().get(0);
                    mro.combinePlan.remove(combPack);
                    nwJob.setCombinerClass(PigCombiner.Combine.class);
                    conf.set("pig.combinePlan", ObjectSerializer.serialize(mro.combinePlan));
                    conf.set("pig.combine.package", ObjectSerializer.serialize(combPack));
                } else if (mro.needsDistinctCombiner()) {
View Full Code Here

                {
                    int errorCode = 2214;
                    throw new VisitorException("Cannot find POLocalRearrange to set secondary plan", errorCode);
                }
            }
            POPackage pack = (POPackage) root;
            pack.setUseSecondaryKey(true);
        }
    }
View Full Code Here

        if (!(root instanceof POPackage)) {
            messageCollector.collect("Expected reduce root to be a POPackage", MessageType.Warning,
                    PigWarning.NON_PACKAGE_REDUCE_PLAN_ROOT);
            return;
        }
        POPackage pack = (POPackage)root;

        List<PhysicalOperator> packSuccessors = reducePlan.getSuccessors(root);
        if (packSuccessors == null || packSuccessors.size() != 1) {
            return;
        }
        PhysicalOperator successor = packSuccessors.get(0);

        if (successor instanceof POLimit) {
            // POLimit is acceptable, as long has it has a single foreach as
            // successor
            List<PhysicalOperator> limitSucs = reducePlan.getSuccessors(successor);
            if (limitSucs != null && limitSucs.size() == 1 &&
                    limitSucs.get(0) instanceof POForEach) {
                // the code below will now further examine the foreach
                successor = limitSucs.get(0);
            }
        }
        if (successor instanceof POForEach) {
            POForEach foreach = (POForEach)successor;
            List<PhysicalPlan> feInners = foreach.getInputPlans();

            // find algebraic operators and also check if the foreach statement
            // is suitable for combiner use
            List<Pair<PhysicalOperator, PhysicalPlan>> algebraicOps = findAlgebraicOps(feInners);
            if (algebraicOps == null || algebraicOps.size() == 0) {
                // the plan is not combinable or there is nothing to combine
                // we're done
                return;
            }
            if (combinePlan != null && combinePlan.getRoots().size() != 0) {
                messageCollector.collect("Wasn't expecting to find anything already " +
                        "in the combiner!", MessageType.Warning, PigWarning.NON_EMPTY_COMBINE_PLAN);
                return;
            }

            LOG.info("Choosing to move algebraic foreach to combiner");
            try {
                // replace PODistinct->Project[*] with distinct udf (which is Algebraic)
                for (Pair<PhysicalOperator, PhysicalPlan> op2plan : algebraicOps) {
                    if (! (op2plan.first instanceof PODistinct)) {
                        continue;
                    }
                    DistinctPatcher distinctPatcher = new DistinctPatcher(op2plan.second);
                    distinctPatcher.visit();
                    if (distinctPatcher.getDistinct() == null) {
                        int errCode = 2073;
                        String msg = "Problem with replacing distinct operator with distinct built-in function.";
                        throw new PlanException(msg, errCode, PigException.BUG);
                    }
                    op2plan.first = distinctPatcher.getDistinct();
                }

                // create new map foreach
                POForEach mfe = createForEachWithGrpProj(foreach, rearrange.getKeyType());
                Map<PhysicalOperator, Integer> op2newpos = Maps.newHashMap();
                Integer pos = 1;
                // create plan for each algebraic udf and add as inner plan in map-foreach
                for (Pair<PhysicalOperator, PhysicalPlan> op2plan : algebraicOps) {
                    PhysicalPlan udfPlan = createPlanWithPredecessors(op2plan.first, op2plan.second);
                    mfe.addInputPlan(udfPlan, false);
                    op2newpos.put(op2plan.first, pos++);
                }
                changeFunc(mfe, POUserFunc.INITIAL);

                // since we will only be creating SingleTupleBag as input to
                // the map foreach, we should flag the POProjects in the map
                // foreach inner plans to also use SingleTupleBag
                for (PhysicalPlan mpl : mfe.getInputPlans()) {
                    try {
                        new fixMapProjects(mpl).visit();
                    } catch (VisitorException e) {
                        int errCode = 2089;
                        String msg = "Unable to flag project operator to use single tuple bag.";
                        throw new PlanException(msg, errCode, PigException.BUG, e);
                    }
                }

                // create new combine foreach
                POForEach cfe = createForEachWithGrpProj(foreach, rearrange.getKeyType());
                // add algebraic functions with appropriate projection
                addAlgebraicFuncToCombineFE(cfe, op2newpos);
                changeFunc(cfe, POUserFunc.INTERMEDIATE);

                // fix projection and function time for algebraic functions in reduce foreach
                for (Pair<PhysicalOperator, PhysicalPlan> op2plan : algebraicOps) {
                    setProjectInput(op2plan.first, op2plan.second, op2newpos.get(op2plan.first));
                    ((POUserFunc)op2plan.first).setAlgebraicFunction(POUserFunc.FINAL);
                }

                // we have modified the foreach inner plans - so set them again
                // for the foreach so that foreach can do any re-initialization
                // around them.
                // FIXME - this is a necessary evil right now because the leaves
                // are explicitly stored in the POForeach as a list rather than
                // computed each time at run time from the plans for
                // optimization. Do we want to have the Foreach compute the
                // leaves each time and have Java optimize it (will Java
                // optimize?)?
                mfe.setInputPlans(mfe.getInputPlans());
                cfe.setInputPlans(cfe.getInputPlans());
                foreach.setInputPlans(foreach.getInputPlans());

                // tell POCombinerPackage which fields need projected and which
                // placed in bags. First field is simple project rest need to go
                // into bags
                int numFields = algebraicOps.size() + 1; // algebraic funcs + group key
                boolean[] bags = new boolean[numFields];
                bags[0] = false;
                for (int i = 1; i < numFields; i++) {
                    bags[i] = true;
                }

                // Use the POCombiner package in the combine plan
                // as it needs to act differently than the regular
                // package operator.
                CombinerPackager pkgr = new CombinerPackager(pack.getPkgr(), bags);
                POPackage combinePack = pack.clone();
                combinePack.setPkgr(pkgr);
                combinePack.setParentPlan(null);

                combinePlan.add(combinePack);
                combinePlan.add(cfe);
                combinePlan.connect(combinePack, cfe);

View Full Code Here

        return curIndex;
    }

    private void mergeOneReducePlanWithIndex(PhysicalPlan from,
            PhysicalPlan to, int initial, int current, byte mapKeyType) throws VisitorException {
        POPackage pk = (POPackage)from.getRoots().get(0);
        from.remove(pk);
        Packager fromPkgr = pk.getPkgr();

        if (!(fromPkgr instanceof MultiQueryPackager)) {
            // XXX the index of the original keyInfo map is always 0,
            // we need to shift the index so that the lookups works
            // with the new indexed key
View Full Code Here

                edge.schedulingType, out, in);
    }

    private void addCombiner(PhysicalPlan combinePlan, TezOperator pkgTezOp,
            Configuration conf) throws IOException {
        POPackage combPack = (POPackage) combinePlan.getRoots().get(0);
        POLocalRearrange combRearrange = (POLocalRearrange) combinePlan
                .getLeaves().get(0);
        setIntermediateOutputKeyValue(combRearrange.getKeyType(), conf, pkgTezOp);

        LoRearrangeDiscoverer lrDiscoverer = new LoRearrangeDiscoverer(
View Full Code Here

    }

    private void mergeOneCombinePlanWithIndex(PhysicalPlan from,
            PhysicalPlan to, int initial, int current, byte mapKeyType) throws VisitorException {
        POPackage cpk = (POPackage)from.getRoots().get(0);
        from.remove(cpk);
        Packager cpkgr = cpk.getPkgr();

        PODemux demux = (PODemux)to.getLeaves().get(0);

        MultiQueryPackager toPkgr = (MultiQueryPackager) ((POPackage) to
                .getRoots().get(0)).getPkgr();

        boolean isSameKeyType = toPkgr.isSameMapKeyType();

        // if current > initial + 1, it means we had
        // a split in the map of the MROper we are trying to
        // merge. In that case we would have changed the indices
        // of the POLocalRearranges in the split to be in the
        // range initial to current. To handle key, value pairs
        // coming out of those POLocalRearranges, we add
        // the Packages in the 'from' POMultiQueryPackage (in this case,
        // it has to be a POMultiQueryPackage since we had
        // a POSplit in the map) to the 'to' POMultiQueryPackage.
        // These Packages would have correct positions in the package
        // list and would be able to handle the outputs from the different
        // POLocalRearranges.
        int total = current - initial;
        int pkCount = 0;
        if (cpkgr instanceof MultiQueryPackager) {
            List<Packager> pkgrs = ((MultiQueryPackager) cpkgr).getPackagers();
            for (Packager p : pkgrs) {
                toPkgr.addPackager(p);
                if (!isSameKeyType) {
                    p.setKeyType(DataType.TUPLE);
                }
                pkCount++;
            }
        } else {
            toPkgr.addPackager(cpkgr);
            pkCount = 1;
        }

        toPkgr.setSameMapKeyType(isSameKeyType);

        if (pkCount != total) {
            int errCode = 2146;
            String msg = "Internal Error. Inconsistency in key index found during optimization.";
            throw new OptimizerException(msg, errCode, PigException.BUG);
        }

        // all packages should have the same key type
        if (!isSameKeyType) {
            cpk.getPkgr().setKeyType(DataType.TUPLE);
        }

        toPkgr.setKeyType(cpk.getPkgr().getKeyType());

        // See comment above for why we flatten the Packages
        // in the from plan - for the same reason, we flatten
        // the inner plans of Demux operator now.
        int plCount = 0;
View Full Code Here

        // Configure the classes for incoming shuffles to this TezOp
        // TODO: Refactor out resetting input keys, PIG-3957
        List<PhysicalOperator> roots = tezOp.plan.getRoots();
        if (roots.size() == 1 && roots.get(0) instanceof POPackage) {
            POPackage pack = (POPackage) roots.get(0);

            List<PhysicalOperator> succsList = tezOp.plan.getSuccessors(pack);
            if (succsList != null) {
                succsList = new ArrayList<PhysicalOperator>(succsList);
            }
            byte keyType = pack.getPkgr().getKeyType();
            tezOp.plan.remove(pack);
            payloadConf.set("pig.reduce.package", ObjectSerializer.serialize(pack));
            setIntermediateOutputKeyValue(keyType, payloadConf, tezOp);
            POShuffleTezLoad newPack = new POShuffleTezLoad(pack);
            if (tezOp.isSkewedJoin()) {
                newPack.setSkewedJoins(true);
            }
            tezOp.plan.add(newPack);

            // Set input keys for POShuffleTezLoad. This is used to identify
            // the inputs that are attached to the POShuffleTezLoad in the
            // backend.
            Map<Integer, String> localRearrangeMap = new TreeMap<Integer, String>();
            for (TezOperator pred : mPlan.getPredecessors(tezOp)) {
                if (tezOp.getSampleOperator() != null && tezOp.getSampleOperator() == pred) {
                    // skip sample vertex input
                } else {
                    String inputKey = pred.getOperatorKey().toString();
                    if (pred.isVertexGroup()) {
                        pred = mPlan.getOperator(pred.getVertexGroupMembers().get(0));
                    }
                    LinkedList<POLocalRearrangeTez> lrs =
                            PlanHelper.getPhysicalOperators(pred.plan, POLocalRearrangeTez.class);
                    for (POLocalRearrangeTez lr : lrs) {
                        if (lr.isConnectedToPackage()
                                && lr.getOutputKey().equals(tezOp.getOperatorKey().toString())) {
                            localRearrangeMap.put((int) lr.getIndex(), inputKey);
                        }
                    }
                }
            }
            for (Map.Entry<Integer, String> entry : localRearrangeMap.entrySet()) {
                newPack.addInputKey(entry.getValue());
            }

            if (succsList != null) {
                for (PhysicalOperator succs : succsList) {
                    tezOp.plan.connect(newPack, succs);
                }
            }

            setIntermediateOutputKeyValue(pack.getPkgr().getKeyType(), payloadConf, tezOp);
        } else if (roots.size() == 1 && roots.get(0) instanceof POIdentityInOutTez) {
            POIdentityInOutTez identityInOut = (POIdentityInOutTez) roots.get(0);
            // TODO Need to fix multiple input key mapping
            TezOperator identityInOutPred = null;
            for (TezOperator pred : mPlan.getPredecessors(tezOp)) {
View Full Code Here

TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POPackage$POPackageTupleBuffer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.