Package org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators

Examples of org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POProject


    private POForEach getPlainForEachOP()
    {
        List<PhysicalPlan> eps1 = new ArrayList<PhysicalPlan>();
        List<Boolean> flat1 = new ArrayList<Boolean>();
        PhysicalPlan ep1 = new PhysicalPlan();
        POProject prj1 = new POProject(new OperatorKey(scope,nig.getNextNodeId(scope)));
        prj1.setResultType(DataType.TUPLE);
        prj1.setStar(false);
        prj1.setColumn(1);
        prj1.setOverloaded(true);
        ep1.add(prj1);
        eps1.add(ep1);
        flat1.add(true);
        POForEach fe = new POForEach(new OperatorKey(scope, nig
                .getNextNodeId(scope)), -1, eps1, flat1);
View Full Code Here


    @Override
    public void visitDistinct(PODistinct op) throws VisitorException {
        try{
            MapReduceOper mro = compiledInputs[0];
            PhysicalPlan ep = new PhysicalPlan();
            POProject prjStar = new POProject(new OperatorKey(scope,nig.getNextNodeId(scope)));
            prjStar.setResultType(DataType.TUPLE);
            prjStar.setStar(true);
            ep.add(prjStar);
           
            List<PhysicalPlan> eps = new ArrayList<PhysicalPlan>();
            eps.add(ep);
           
            POLocalRearrange lr = new POLocalRearrange(new OperatorKey(scope,nig.getNextNodeId(scope)));
            lr.setIndex(0);
            lr.setKeyType(DataType.TUPLE);
            lr.setPlans(eps);
            lr.setResultType(DataType.TUPLE);
            lr.setDistinct(true);
            if(!mro.isMapDone()){
                mro.mapPlan.addAsLeaf(lr);
            }
            else if(mro.isMapDone() && ! mro.isReduceDone()){
                mro.reducePlan.addAsLeaf(lr);
            }
           
            blocking(op);
           
            POPackage pkg = new POPackage(new OperatorKey(scope,nig.getNextNodeId(scope)));
            pkg.setKeyType(DataType.TUPLE);
            pkg.setDistinct(true);
            pkg.setNumInps(1);
            boolean[] inner = {false};
            pkg.setInner(inner);
            curMROp.reducePlan.add(pkg);
           
            List<PhysicalPlan> eps1 = new ArrayList<PhysicalPlan>();
            List<Boolean> flat1 = new ArrayList<Boolean>();
            PhysicalPlan ep1 = new PhysicalPlan();
            POProject prj1 = new POProject(new OperatorKey(scope,nig.getNextNodeId(scope)));
            prj1.setResultType(DataType.TUPLE);
            prj1.setStar(false);
            prj1.setColumn(0);
            prj1.setOverloaded(false);
            ep1.add(prj1);
            eps1.add(ep1);
            flat1.add(true);
            POForEach nfe1 = new POForEach(new OperatorKey(scope, nig
                    .getNextNodeId(scope)), op.getRequestedParallelism(), eps1,
View Full Code Here

     
      PhysicalPlan ep;
      // Add corresponding POProjects
      for (int i=0; i < 2; i++ ) {
          ep = new PhysicalPlan();
          POProject prj = new POProject(new OperatorKey(scope,nig.getNextNodeId(scope)));
          prj.setColumn(i+1);
          prj.setOverloaded(false);
          prj.setResultType(DataType.BAG);
          ep.add(prj);
          eps.add(ep);
          if (!inner[i]) {
              // Add an empty bag for outer join
              CompilerUtils.addEmptyBagOuterJoin(ep, op.getSchema(i));
View Full Code Here

        }

        if (fields == null) {
            // This is project *
            PhysicalPlan ep = new PhysicalPlan();
            POProject prj = new POProject(new OperatorKey(scope,nig.getNextNodeId(scope)));
            prj.setStar(true);
            prj.setOverloaded(false);
            prj.setResultType(DataType.TUPLE);
            ep.add(prj);
            eps1.add(ep);
        } else {
            /*
            for (int i : fields) {
                PhysicalPlan ep = new PhysicalPlan();
                POProject prj = new POProject(new OperatorKey(scope,
                    nig.getNextNodeId(scope)));
                prj.setColumn(i);
                prj.setOverloaded(false);
                prj.setResultType(DataType.BYTEARRAY);
                ep.add(prj);
                eps1.add(ep);
            }
            */
            // Attach the sort plans to the local rearrange to get the
            // projection.
            eps1.addAll(sort.getSortPlans());

            // Visit the first sort plan to figure out our key type.  We only
            // have to visit the first because if we have more than one plan,
            // then the key type will be tuple.
            try {
                FindKeyTypeVisitor fktv =
                    new FindKeyTypeVisitor(sort.getSortPlans().get(0));
                fktv.visit();
                keyType = fktv.keyType;
            } catch (VisitorException ve) {
                int errCode = 2035;
                String msg = "Internal error. Could not compute key type of sort operator.";
                throw new PlanException(msg, errCode, PigException.BUG, ve);
            }
        }
       
        POLocalRearrange lr = new POLocalRearrange(new OperatorKey(scope,nig.getNextNodeId(scope)));
        try {
            lr.setIndex(0);
        } catch (ExecException e) {
          int errCode = 2058;
          String msg = "Unable to set index on newly created POLocalRearrange.";
            throw new PlanException(msg, errCode, PigException.BUG, e);
        }
        lr.setKeyType((fields == null || fields.length>1) ? DataType.TUPLE :
            keyType);
        lr.setPlans(eps1);
        lr.setResultType(DataType.TUPLE);
        lr.setAlias(sort.getAlias());
        mro.mapPlan.addAsLeaf(lr);
       
        mro.setMapDone(true);
       
        if (limit!=-1) {
          POPackageLite pkg_c = new POPackageLite(new OperatorKey(scope,nig.getNextNodeId(scope)));
          pkg_c.setKeyType((fields.length>1) ? DataType.TUPLE : keyType);
            pkg_c.setNumInps(1);
            //pkg.setResultType(DataType.TUPLE);           
            mro.combinePlan.add(pkg_c);
         
            List<PhysicalPlan> eps_c1 = new ArrayList<PhysicalPlan>();
            List<Boolean> flat_c1 = new ArrayList<Boolean>();
            PhysicalPlan ep_c1 = new PhysicalPlan();
            POProject prj_c1 = new POProject(new OperatorKey(scope,nig.getNextNodeId(scope)));
            prj_c1.setColumn(1);
            prj_c1.setOverloaded(false);
            prj_c1.setResultType(DataType.BAG);
            ep_c1.add(prj_c1);
            eps_c1.add(ep_c1);
            flat_c1.add(true);
            POForEach fe_c1 = new POForEach(new OperatorKey(scope,nig.getNextNodeId(scope)),
                -1, eps_c1, flat_c1);
            fe_c1.setResultType(DataType.TUPLE);
            mro.combinePlan.addAsLeaf(fe_c1);
           
            POLimit pLimit = new POLimit(new OperatorKey(scope,nig.getNextNodeId(scope)));
          pLimit.setLimit(limit);
          mro.combinePlan.addAsLeaf(pLimit);
           
            List<PhysicalPlan> eps_c2 = new ArrayList<PhysicalPlan>();
            eps_c2.addAll(sort.getSortPlans());
       
          POLocalRearrange lr_c2 = new POLocalRearrange(new OperatorKey(scope,nig.getNextNodeId(scope)));
          try {
                lr_c2.setIndex(0);
            } catch (ExecException e) {
              int errCode = 2058;
              String msg = "Unable to set index on newly created POLocalRearrange.";             
                throw new PlanException(msg, errCode, PigException.BUG, e);
            }
          lr_c2.setKeyType((fields.length>1) ? DataType.TUPLE : keyType);
          lr_c2.setPlans(eps_c2);
          lr_c2.setResultType(DataType.TUPLE);
          mro.combinePlan.addAsLeaf(lr_c2);
        }
       
        POPackageLite pkg = new POPackageLite(new OperatorKey(scope,nig.getNextNodeId(scope)));
        pkg.setKeyType((fields == null || fields.length>1) ? DataType.TUPLE :
            keyType);
        pkg.setNumInps(1);      
        mro.reducePlan.add(pkg);
       
        PhysicalPlan ep = new PhysicalPlan();
        POProject prj = new POProject(new OperatorKey(scope,nig.getNextNodeId(scope)));
        prj.setColumn(1);
        prj.setOverloaded(false);
        prj.setResultType(DataType.BAG);
        ep.add(prj);
        List<PhysicalPlan> eps2 = new ArrayList<PhysicalPlan>();
        eps2.add(ep);
        List<Boolean> flattened = new ArrayList<Boolean>();
        flattened.add(true);
View Full Code Here

      // it first adds all the plans to get key columns,
      List<PhysicalPlan> transformPlans = new ArrayList<PhysicalPlan>();
      transformPlans.addAll(groups);
       
      // then it adds a column for memory size
      POProject prjStar = new POProject(new OperatorKey(scope,nig.getNextNodeId(scope)));
        prjStar.setResultType(DataType.TUPLE);
        prjStar.setStar(true);           
       
        List<PhysicalOperator> ufInps = new ArrayList<PhysicalOperator>();
        ufInps.add(prjStar);
       
      PhysicalPlan ep = new PhysicalPlan();
View Full Code Here

              throw new RuntimeException(e);
            }
            // Set up the projections of the key columns
            if (fields == null) {
                PhysicalPlan ep = new PhysicalPlan();
                POProject prj = new POProject(new OperatorKey(scope,
                    nig.getNextNodeId(scope)));
                prj.setStar(true);
                prj.setOverloaded(false);
                prj.setResultType(DataType.TUPLE);
                ep.add(prj);
                eps1.add(ep);
                flat1.add(true);
            } else {
                for (Pair<Integer,Byte> i : fields) {
                    PhysicalPlan ep = new PhysicalPlan();
                    POProject prj = new POProject(new OperatorKey(scope,nig.getNextNodeId(scope)));
                    // Check for i being equal to -1. -1 is used by getSortCols for a non POProject
                    // operator. Since Order by does not allow expression operators, it should never be set to
                    // -1
                    if (i.first == -1) {
                      int errCode = 2174;
                      String msg = "Internal exception. Could not create a sampler job";
                        throw new MRCompilerException(msg, errCode, PigException.BUG);
                    }
                    prj.setColumn(i.first);
                    prj.setOverloaded(false);
                    prj.setResultType(i.second);
                    ep.add(prj);
                    eps1.add(ep);
                    flat1.add(true);
                }
            }
        }else{
          for(int i=0; i<transformPlans.size(); i++) {
            eps1.add(transformPlans.get(i));
            flat1.add(true);
          }
        }
       
        // This foreach will pick the sort key columns from the RandomSampleLoader output
        POForEach nfe1 = new POForEach(new OperatorKey(scope,nig.getNextNodeId(scope)),-1,eps1,flat1);
        mro.mapPlan.addAsLeaf(nfe1);
       
        // Now set up a POLocalRearrange which has "all" as the key and the output of the
        // foreach will be the "value" out of POLocalRearrange
        PhysicalPlan ep1 = new PhysicalPlan();
        ConstantExpression ce = new ConstantExpression(new OperatorKey(scope,nig.getNextNodeId(scope)));
        ce.setValue("all");
        ce.setResultType(DataType.CHARARRAY);
        ep1.add(ce);
       
        List<PhysicalPlan> eps = new ArrayList<PhysicalPlan>();
        eps.add(ep1);
       
        POLocalRearrange lr = new POLocalRearrange(new OperatorKey(scope,nig.getNextNodeId(scope)));
        try {
            lr.setIndex(0);
        } catch (ExecException e) {
          int errCode = 2058;
          String msg = "Unable to set index on newly created POLocalRearrange.";
            throw new PlanException(msg, errCode, PigException.BUG, e);
        }
        lr.setKeyType(DataType.CHARARRAY);
        lr.setPlans(eps);
        lr.setResultType(DataType.TUPLE);
        lr.setAlias(sort.getAlias());
        mro.mapPlan.add(lr);
        mro.mapPlan.connect(nfe1, lr);
       
        mro.setMapDone(true);
       
        POPackage pkg = new POPackage(new OperatorKey(scope,nig.getNextNodeId(scope)));
        pkg.setKeyType(DataType.CHARARRAY);
        pkg.setNumInps(1);
        boolean[] inner = {false};
        pkg.setInner(inner);
        mro.reducePlan.add(pkg);
       
        // Lets start building the plan which will have the sort
        // for the foreach
        PhysicalPlan fe2Plan = new PhysicalPlan();
        // Top level project which just projects the tuple which is coming
        // from the foreach after the package
        POProject topPrj = new POProject(new OperatorKey(scope,nig.getNextNodeId(scope)));
        topPrj.setColumn(1);
        topPrj.setResultType(DataType.TUPLE);
        topPrj.setOverloaded(true);
        fe2Plan.add(topPrj);
       
        // the projections which will form sort plans
        List<PhysicalPlan> nesSortPlanLst = new ArrayList<PhysicalPlan>();            
        if (sortKeyPlans != null) {
          for(int i=0; i<sortKeyPlans.size(); i++) {         
            nesSortPlanLst.add(sortKeyPlans.get(i));         
          }
        }else{  
          Pair<Integer,Byte>[] fields = null;
            try{
              fields = getSortCols(sort.getSortPlans());
            }catch(Exception e) {
              throw new RuntimeException(e);
            }
            // Set up the projections of the key columns
            if (fields == null) {
                PhysicalPlan ep = new PhysicalPlan();
                POProject prj = new POProject(new OperatorKey(scope,
                    nig.getNextNodeId(scope)));
                prj.setStar(true);
                prj.setOverloaded(false);
                prj.setResultType(DataType.TUPLE);
                ep.add(prj);
                nesSortPlanLst.add(ep);
            } else {
                for (int i=0; i<fields.length; i++) {
                    PhysicalPlan ep = new PhysicalPlan();
                    POProject prj = new POProject(new OperatorKey(scope,nig.getNextNodeId(scope)));
                    prj.setColumn(i);
                    prj.setOverloaded(false);
                    prj.setResultType(fields[i].second);
                    ep.add(prj);
                    nesSortPlanLst.add(ep);
                }
            }                      
        }
       
        sort.setSortPlans(nesSortPlanLst);
        sort.setResultType(DataType.BAG);
        fe2Plan.add(sort);
        fe2Plan.connect(topPrj, sort);
       
        // The plan which will have a constant representing the
        // degree of parallelism for the final order by map-reduce job
        // this will either come from a "order by parallel x" in the script
        // or will be the default number of reducers for the cluster if
        // "parallel x" is not used in the script
        PhysicalPlan rpep = new PhysicalPlan();
        ConstantExpression rpce = new ConstantExpression(new OperatorKey(scope,nig.getNextNodeId(scope)));
        rpce.setRequestedParallelism(rp);
        int val = rp;
        if(val<=0){
            HExecutionEngine eng = pigContext.getExecutionEngine();
            if(pigContext.getExecType() != ExecType.LOCAL){
                try {
                    if(val<=0)
                        val = pigContext.defaultParallel;
                    if (val<=0)
                        val = eng.getJobConf().getNumReduceTasks();
                    if (val<=0)
                        val = 1;
                } catch (Exception e) {
                    int errCode = 6015;
                    String msg = "Problem getting the default number of reduces from the Job Client.";
                    throw new MRCompilerException(msg, errCode, PigException.REMOTE_ENVIRONMENT, e);
                }
            } else {
              val = 1; // local mode, set it to 1
            }
        }
        int parallelismForSort = (rp <= 0 ? val : rp);
        rpce.setValue(parallelismForSort);
       
        rpce.setResultType(DataType.INTEGER);
        rpep.add(rpce);
       
        List<PhysicalPlan> genEps = new ArrayList<PhysicalPlan>();
        genEps.add(rpep);
        genEps.add(fe2Plan);
       
        List<Boolean> flattened2 = new ArrayList<Boolean>();
        flattened2.add(false);
        flattened2.add(false);
       
        POForEach nfe2 = new POForEach(new OperatorKey(scope,nig.getNextNodeId(scope)),-1, genEps, flattened2);
        mro.reducePlan.add(nfe2);
        mro.reducePlan.connect(pkg, nfe2);
       
        // Let's connect the output from the foreach containing
        // number of quantiles and the sorted bag of samples to
        // another foreach with the FindQuantiles udf. The input
        // to the FindQuantiles udf is a project(*) which takes the
        // foreach input and gives it to the udf
        PhysicalPlan ep4 = new PhysicalPlan();
        POProject prjStar4 = new POProject(new OperatorKey(scope,nig.getNextNodeId(scope)));
        prjStar4.setResultType(DataType.TUPLE);
        prjStar4.setStar(true);
        ep4.add(prjStar4);
       
        List<PhysicalOperator> ufInps = new ArrayList<PhysicalOperator>();
        ufInps.add(prjStar4);
     
View Full Code Here

        cluster.shutDown();
    }
   
    private void setUp1() throws PlanException, ExecException{
        lr = GenPhyOp.topLocalRearrangeOPWithPlanPlain(0,0,db.iterator().next());
        POProject proj = GenPhyOp.exprProject();
        proj.setColumn(0);
        proj.setResultType(DataType.TUPLE);
        proj.setOverloaded(true);
        Tuple t = new DefaultTuple();
        t.append(db);
        proj.attachInput(t);
        List<PhysicalOperator> inputs = new ArrayList<PhysicalOperator>();
        inputs.add(proj);
        lr.setInputs(inputs);
    }
View Full Code Here

        POLocalRearrange lrT = GenPhyOp.topLocalRearrangeOPWithPlanPlain(0, 1, db.iterator().next());
        List<PhysicalPlan> plansT = lrT.getPlans();
        plans.add(plansT.get(0));
        lr.setPlans(plans);
       
        POProject proj = GenPhyOp.exprProject();
        proj.setColumn(0);
        proj.setResultType(DataType.TUPLE);
        proj.setOverloaded(true);
        Tuple t = new DefaultTuple();
        t.append(db);
        proj.attachInput(t);
        List<PhysicalOperator> inputs = new ArrayList<PhysicalOperator>();
        inputs.add(proj);
        lr.setInputs(inputs);
    }
View Full Code Here

    }
   
    POCast op = new POCast(new OperatorKey("", r.nextLong()), -1);
    LoadFunc load = new TestLoader();
    op.setFuncSpec(new FuncSpec(load.getClass().getName()));
    POProject prj = new POProject(new OperatorKey("", r.nextLong()), -1, 0);
    PhysicalPlan plan = new PhysicalPlan();
    plan.add(prj);
    plan.add(op);
    plan.connect(prj, op);
   
    prj.setResultType(DataType.INTEGER);
    // Plan to test when result type is ByteArray and casting is requested
    // for example casting of values coming out of map lookup.
    POCast opWithInputTypeAsBA = new POCast(new OperatorKey("", r.nextLong()), -1);
    PhysicalPlan planToTestBACasts = constructPlan(opWithInputTypeAsBA);
   
View Full Code Here

    }
   
    POCast op = new POCast(new OperatorKey("", r.nextLong()), -1);
    LoadFunc load = new TestLoader();
    op.setFuncSpec(new FuncSpec(load.getClass().getName()));
    POProject prj = new POProject(new OperatorKey("", r.nextLong()), -1, 0);
    PhysicalPlan plan = new PhysicalPlan();
    plan.add(prj);
    plan.add(op);
    plan.connect(prj, op);
   
    prj.setResultType(DataType.LONG);
   
    // Plan to test when result type is ByteArray and casting is requested
    // for example casting of values coming out of map lookup.
    POCast opWithInputTypeAsBA = new POCast(new OperatorKey("", r.nextLong()), -1);
    PhysicalPlan planToTestBACasts = constructPlan(opWithInputTypeAsBA);
View Full Code Here

TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POProject

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.