Package org.apache.pig.backend.hadoop.executionengine.physicalLayer

Examples of org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator


        List<PhysicalOperator> mapLeaves = mr.mapPlan.getLeaves();
        if (mapLeaves == null || mapLeaves.size() != 1) {
            messageCollector.collect("Expected map to have single leaf!", MessageType.Warning, PigWarning.MULTI_LEAF_MAP);
            return;
        }
        PhysicalOperator mapLeaf = mapLeaves.get(0);
        if (!(mapLeaf instanceof POLocalRearrange)) {
            return;
        }
        POLocalRearrange rearrange = (POLocalRearrange)mapLeaf;

        List<PhysicalOperator> reduceRoots = mr.reducePlan.getRoots();
        if (reduceRoots.size() != 1) {
          messageCollector.collect("Expected reduce to have single leaf", MessageType.Warning, PigWarning.MULTI_LEAF_REDUCE);
            return;
        }

        // I expect that the first root should always be a POPackage.  If
        // not, I don't know what's going on, so I'm out of here.
        PhysicalOperator root = reduceRoots.get(0);
        if (!(root instanceof POPackage)) {
          messageCollector.collect("Expected reduce root to be a POPackage", MessageType.Warning, PigWarning.NON_PACKAGE_REDUCE_PLAN_ROOT);
            return;
        }
        POPackage pack = (POPackage)root;

        List<PhysicalOperator> packSuccessors =
            mr.reducePlan.getSuccessors(root);
        if (packSuccessors == null || packSuccessors.size() != 1) return;
        PhysicalOperator successor = packSuccessors.get(0);

        // Need to check if this is a distinct.
        if (successor instanceof POFilter) {
            /*
               Later
View Full Code Here


        if (apc.sawNonAlgebraic) return ExprType.NOT_ALGEBRAIC;
        if(apc.sawDistinctAgg) return ExprType.DISTINCT;
       
        // we did not see a Non algebraic or a distinct so far
        // proceed to check leaves
        PhysicalOperator leaf = leaves.get(0);
        if (leaf instanceof POProject) {
            POProject proj = (POProject)leaf;
            // Check that it's a simple project.  We can't currently handle
            // things like group.$0, because that requires resetting types on
            // the reduce side.
View Full Code Here

                        String msg = "Problem with replacing distinct operator with distinct built-in function.";
                        throw new PlanException(msg, errCode, PigException.BUG, e);
                    }
                   
                   
                    PhysicalOperator leaf = plans[j].getLeaves().get(0);
                    // make the Distinct POUserFunc the leaf in the map and combine plans.
                    if( j != plans.length - 1) {
                        while(!((leaf instanceof POUserFunc) &&
                                ((POUserFunc)leaf).getFuncSpec().getClassName().startsWith(DISTINCT_UDF_CLASSNAME))) {
                            plans[j].remove(leaf);
View Full Code Here

                if (leaves == null || leaves.size() != 1) {
                    int errCode = 2019;
                    String msg = "Expected to find plan with single leaf. Found " + leaves.size() + " leaves.";
                    throw new PlanException(msg, errCode, PigException.BUG);
                }
                PhysicalOperator leaf = leaves.get(0);
                // the combine plan could have an extra foreach inner plan
                // to project the key - so make sure we check the index
                // before looking in exprs
                if(i < exprs.size()  && exprs.get(i) == ExprType.DISTINCT) {
                    // if there is a distinctagg, we have to
                    // look for the Distinct POUserFunc and
                    // change its input to be a project of
                    // column "i"
                    PhysicalOperator op = getDistinctUserFunc(plans.get(i), leaf);
                    setProjectInput(op, plans.get(i), i);
                } else {
                    // Leaf should be either a projection or a UDF
                    if (leaf instanceof POProject) {
                        ((POProject)leaf).setColumn(i);
View Full Code Here

            int errCode = 2019;
            String msg = "Expected to find plan with single leaf. Found " + leaves.size() + " leaves.";
            throw new PlanException(msg, errCode, PigException.BUG);
        }

        PhysicalOperator leaf = leaves.get(0);
        if (!(leaf instanceof POUserFunc)) {
            int errCode = 2020;
            String msg = "Expected to find plan with UDF leaf. Found " + leaf.getClass().getSimpleName();
            throw new PlanException(msg, errCode, PigException.BUG);
        }
        POUserFunc func = (POUserFunc)leaf;
        try {
            func.setAlgebraicFunction(type);
View Full Code Here

    // return plan;
    //
    // }

    private void evaluateOperator(LogicalOperator op) {
        PhysicalOperator physOp = LogToPhyMap.get(op);
        Random r = new Random();
        // get the list of original inputs

        List<PhysicalOperator> inputs = physOp.getInputs();
        physOp.setInputs(null);
        physOp.setLineageTracer(lineage);
        PhysicalPlan phy = new PhysicalPlan();
        phy.add(physOp);

        // replace the original inputs by POReads
        for (LogicalOperator l : op.getPlan().getPredecessors(op)) {
            DataBag bag = derivedData.get(l);
            PORead por = new PORead(new OperatorKey("", r.nextLong()), bag);
            phy.add(por);
            try {
                phy.connect(por, physOp);
            } catch (PlanException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
                log.error("Error connecting " + por.name() + " to "
                        + physOp.name());
            }
        }

        DataBag output = BagFactory.getInstance().newDefaultBag();
        Tuple t = null;
        try {
            for (Result res = physOp.getNext(t); res.returnStatus != POStatus.STATUS_EOP; res = physOp
                    .getNext(t)) {
                output.add((Tuple) res.result);
            }
        } catch (ExecException e) {
            log.error("Error evaluating operator : " + physOp.name());
        }
        derivedData.put(op, output);

        try {
            Collection<IdentityHashSet<Tuple>> eq = EquivalenceClasses
                    .getEquivalenceClasses(op, derivedData);
            EqClasses.addAll(eq);
            OpToEqClasses.put(op, eq);
        } catch (ExecException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
            log
                    .error("Error updating equivalence classes while evaluating operators. \n"
                            + e.getMessage());
        }

        // re-attach the original operators
        physOp.setInputs(inputs);
        physOp.setLineageTracer(null);
    }
View Full Code Here

        }

        LineageTracer oldLineage = this.lineage;
        this.lineage = new LineageTracer();

        PhysicalOperator physOp = LogToPhyMap.get(op);
        Random r = new Random();
        // get the list of original inputs
        // List<PhysicalOperator> inputs = physOp.getInputs();
        List<PhysicalOperator> inputs = new ArrayList<PhysicalOperator>();
        PhysicalPlan phy = new PhysicalPlan();
        phy.add(physOp);

        for (PhysicalOperator input : physOp.getInputs()) {
            inputs.add(input.getInputs().get(0));
            input.setInputs(null);
            phy.add(input);
            try {
                phy.connect(input, physOp);
            } catch (PlanException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
                log.error("Error connecting " + input.name() + " to "
                        + physOp.name());
            }
        }
        physOp.setLineageTracer(lineage);

        physOp.setLineageTracer(null);

        // replace the original inputs by POReads
        for (int i = 0; i < inputs.size(); i++) {
            DataBag bag = derivedData.get(op.getInputs().get(i));
            PORead por = new PORead(new OperatorKey("", r.nextLong()), bag);
            phy.add(por);
            try {
                phy.connect(por, physOp.getInputs().get(i));
            } catch (PlanException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
                log.error("Error connecting " + por.name() + " to "
                        + physOp.name());
            }
        }

        // replace the original inputs by POReads
        // for(LogicalOperator l : op.getPlan().getPredecessors(op)) {
        // DataBag bag = derivedData.get(l);
        // PORead por = new PORead(new OperatorKey("", r.nextLong()), bag);
        // phy.add(por);
        // try {
        // phy.connect(por, physOp);
        // } catch (PlanException e) {
        // // TODO Auto-generated catch block
        // e.printStackTrace();
        // log.error("Error connecting " + por.name() + " to " + physOp.name());
        // }
        // }

        DataBag output = BagFactory.getInstance().newDefaultBag();
        Tuple t = null;
        try {
            for (Result res = physOp.getNext(t); res.returnStatus != POStatus.STATUS_EOP; res = physOp
                    .getNext(t)) {
                output.add((Tuple) res.result);
            }
        } catch (ExecException e) {
            log.error("Error evaluating operator : " + physOp.name());
        }

        this.lineage = oldLineage;

        physOp.setInputs(inputs);
        physOp.setLineageTracer(null);

        return output;
    }
View Full Code Here

    @Override
    protected void visit(LOCogroup cg) throws VisitorException {
        // evaluateOperator(cg);
        // there is a slightly different code path for cogroup because of the
        // local rearranges
        PhysicalOperator physOp = LogToPhyMap.get(cg);
        Random r = new Random();
        // get the list of original inputs

        // List<PhysicalOperator> inputs = physOp.getInputs();
        List<PhysicalOperator> inputs = new ArrayList<PhysicalOperator>();
        PhysicalPlan phy = new PhysicalPlan();
        phy.add(physOp);

        // for(PhysicalOperator input : physOp.getInputs()) {
        for (PhysicalOperator input : physPlan.getPredecessors(physOp)) {
            inputs.add(input.getInputs().get(0));
            // input.setInputs(null);
            phy.add(input);
            try {
                phy.connect(input, physOp);
            } catch (PlanException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
                log.error("Error connecting " + input.name() + " to "
                        + physOp.name());
            }
        }

        physOp.setLineageTracer(lineage);

        // replace the original inputs by POReads
        for (int i = 0; i < inputs.size(); i++) {
            DataBag bag = derivedData.get(cg.getInputs().get(i));
            PORead por = new PORead(new OperatorKey("", r.nextLong()), bag);
            phy.add(por);
            try {
                phy.connect(por, physOp.getInputs().get(i));
            } catch (PlanException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
                log.error("Error connecting " + por.name() + " to "
                        + physOp.name());
            }
        }

        DataBag output = BagFactory.getInstance().newDefaultBag();
        Tuple t = null;
        try {
            for (Result res = physOp.getNext(t); res.returnStatus != POStatus.STATUS_EOP; res = physOp
                    .getNext(t)) {
                output.add((Tuple) res.result);
            }
        } catch (ExecException e) {
            log.error("Error evaluating operator : " + physOp.name());
        }
        derivedData.put(cg, output);

        try {
            Collection<IdentityHashSet<Tuple>> eq = EquivalenceClasses
                    .getEquivalenceClasses(cg, derivedData);
            EqClasses.addAll(eq);
            OpToEqClasses.put(cg, eq);
        } catch (ExecException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
            log
                    .error("Error updating equivalence classes while evaluating operators. \n"
                            + e.getMessage());
        }

        // re-attach the original operators
        // for(int i = 0; i < inputs.size(); i++) {
        // try {
        // physPlan.connect(inputs.get(i), physOp.getInputs().get(i));
        //   
        // } catch (PlanException e) {
        // // TODO Auto-generated catch block
        // e.printStackTrace();
        // log.error("Error connecting " + inputs.get(i).name() + " to " +
        // physOp.getInputs().get(i).name());
        // }
        // }
        physOp.setLineageTracer(null);
    }
View Full Code Here

                }           
                processedSet.clear();
            }
           
            int idx = processedSet.nextClearBit(0);
            PhysicalOperator leaf = myPlans.get(idx).getLeaves().get(0);
           
            // a nested demux object is stored in multiple positions
            // of the inner plan list, corresponding to the indexes of
            // its inner plans; skip the object if it's already processed.
            if (curLeaf != null && leaf.getOperatorKey().equals(curLeaf.getOperatorKey())) {
                processedSet.set(idx++);
                if (idx < myPlans.size()) {
                    continue;
                } else {
                    res = eop;
                }
            } else {
                curLeaf = leaf;
                res = leaf.getNext(dummyTuple);
              
                if (res.returnStatus == POStatus.STATUS_EOP)  {
                    processedSet.set(idx++);       
                    if (idx < myPlans.size()) {
                        continue;
View Full Code Here

    }

    private Result processPlan() throws ExecException {
  
        int idx = processedSet.nextClearBit(0);
        PhysicalOperator leaf = myPlans.get(idx).getLeaves().get(0);
       
        Result res = runPipeline(leaf);
       
        if (res.returnStatus == POStatus.STATUS_EOP) {
            processedSet.set(idx++);       
View Full Code Here

TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.