Package org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators

Examples of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.PORank


     * or is rank by (dense or not)
     **/
    @Override
    public void visit(LORank loRank) throws FrontendException {
        String scope = DEFAULT_SCOPE;
        PORank poRank;
        POCounter poCounter;

        Random randomGenerator = new Random();
        Long operationID = Math.abs(randomGenerator.nextLong());

        try {
            // Physical operations for RANK operator:
            // In case of a RANK BY operation, then are necessary five steps:
            //   1.- Group by the fields involved on the rank operation: POPackage
            //   2.- In case of multi-fields, the key (group field) is flatten: POForEach
            //   3.- Sort operation by the fields available after flattening: POSort
            //   4.- Each group is sequentially counted on each mapper through a global counter: POCounter
            //   5.- Global counters are summed and passed to the rank operation: PORank
            if(!loRank.isRowNumber()) {

                boolean[] flags = {false};

                MultiMap<Integer, LogicalExpressionPlan> expressionPlans = new MultiMap<Integer, LogicalExpressionPlan>();
                for(int i = 0 ; i < loRank.getRankColPlans().size() ; i++)
                    expressionPlans.put(i,loRank.getRankColPlans());

                POPackage poPackage = compileToLR_GR_PackTrio(loRank, null, flags, expressionPlans);
                poPackage.getPkgr().setPackageType(PackageType.GROUP);
                translateSoftLinks(loRank);

                List<Boolean> flattenLst = Arrays.asList(true, false);

                PhysicalPlan fep1 = new PhysicalPlan();
                POProject feproj1 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1);
                feproj1.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                feproj1.setColumn(0);
                feproj1.setResultType(poPackage.getPkgr().getKeyType());
                feproj1.setStar(false);
                feproj1.setOverloaded(false);
                fep1.add(feproj1);


                PhysicalPlan fep2 = new PhysicalPlan();
                POProject feproj2 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1);
                feproj2.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                feproj2.setColumn(1);
                feproj2.setResultType(DataType.BAG);
                feproj2.setStar(false);
                feproj2.setOverloaded(false);
                fep2.add(feproj2);
                List<PhysicalPlan> fePlans = Arrays.asList(fep1, fep2);

                POForEach poForEach = new POForEach(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1, fePlans, flattenLst);

                List<LogicalExpressionPlan> rankPlans = loRank.getRankColPlans();
                byte[] newTypes = new byte[rankPlans.size()];

                for(int i = 0; i < rankPlans.size(); i++) {
                    LogicalExpressionPlan loep = rankPlans.get(i);
                    Iterator<Operator> inpOpers = loep.getOperators();

                    while(inpOpers.hasNext()) {
                        Operator oper = inpOpers.next();
                        newTypes[i] = ((ProjectExpression) oper).getType();
                    }
                }

                List<PhysicalPlan> newPhysicalPlan = new ArrayList<PhysicalPlan>();
                List<Boolean> newOrderPlan = new ArrayList<Boolean>();

                for(int i = 0; i < loRank.getRankColPlans().size(); i++) {
                    PhysicalPlan fep3 = new PhysicalPlan();
                    POProject feproj3 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1);
                    feproj3.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                    feproj3.setColumn(i);
                    feproj3.setResultType(newTypes[i]);
                    feproj3.setStar(false);
                    feproj3.setOverloaded(false);
                    fep3.add(feproj3);

                    newPhysicalPlan.add(fep3);
                    newOrderPlan.add(loRank.getAscendingCol().get(i));
                }

                POSort poSort;
                poSort = new POSort(new OperatorKey(scope, nodeGen
                        .getNextNodeId(scope)), -1, null,
                        newPhysicalPlan, newOrderPlan, null);
                poSort.addOriginalLocation(loRank.getAlias(), loRank.getLocation());


                poCounter = new POCounter(
                        new OperatorKey(scope, nodeGen
                                .getNextNodeId(scope)), -1 , null,
                                newPhysicalPlan, newOrderPlan);

                poCounter.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                poCounter.setResultType(DataType.TUPLE);
                poCounter.setIsRowNumber(loRank.isRowNumber());
                poCounter.setIsDenseRank(loRank.isDenseRank());
                poCounter.setOperationID(String.valueOf(operationID));

                poRank = new PORank(
                        new OperatorKey(scope, nodeGen
                                .getNextNodeId(scope)), -1 , null,
                                newPhysicalPlan, newOrderPlan);

                poRank.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                poRank.setResultType(DataType.TUPLE);
                poRank.setOperationID(String.valueOf(operationID));

                List<Boolean> flattenLst2 = Arrays.asList(false, true);

                PhysicalPlan fep12 = new PhysicalPlan();
                POProject feproj12 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1);
                feproj12.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                feproj12.setColumn(0);
                feproj12.setResultType(DataType.LONG);
                feproj12.setStar(false);
                feproj12.setOverloaded(false);
                fep12.add(feproj12);


                PhysicalPlan fep22 = new PhysicalPlan();
                POProject feproj22 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1);
                feproj22.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                feproj22.setColumn(loRank.getRankColPlans().size()+1);
                feproj22.setResultType(DataType.BAG);
                feproj22.setStar(false);
                feproj22.setOverloaded(false);
                fep22.add(feproj22);
                List<PhysicalPlan> fePlans2 = Arrays.asList(fep12, fep22);

                POForEach poForEach2 = new POForEach(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1, fePlans2, flattenLst2);

                currentPlan.add(poForEach);
                currentPlan.add(poSort);
                currentPlan.add(poCounter);
                currentPlan.add(poRank);
                currentPlan.add(poForEach2);

                try {
                    currentPlan.connect(poPackage, poForEach);
                    currentPlan.connect(poForEach, poSort);
                    currentPlan.connect(poSort, poCounter);
                    currentPlan.connect(poCounter, poRank);
                    currentPlan.connect(poRank, poForEach2);
                } catch (PlanException e) {
                    throw new LogicalToPhysicalTranslatorException(e.getMessage(),e.getErrorCode(),e.getErrorSource(),e);
                }

                logToPhyMap.put(loRank, poForEach2);

                // In case of a RANK operation, are used two steps:
                //   1.- Each tuple is counted sequentially on each mapper, and are produced global counters
                //   2.- Global counters are gathered and summed, each tuple calls to the respective counter value
                //       in order to calculate the corresponding rank value.
            } else {

                List<LogicalExpressionPlan> logPlans = loRank.getRankColPlans();
                List<PhysicalPlan> rankPlans = new ArrayList<PhysicalPlan>(logPlans.size());

                // convert all the logical expression plans to physical expression plans
                currentPlans.push(currentPlan);
                for (LogicalExpressionPlan plan : logPlans) {
                    currentPlan = new PhysicalPlan();
                    PlanWalker childWalker = new ReverseDependencyOrderWalkerWOSeenChk(plan);
                    pushWalker(childWalker);
                    childWalker.walk(new ExpToPhyTranslationVisitor( currentWalker.getPlan(),
                            childWalker, loRank, currentPlan, logToPhyMap));
                    rankPlans.add(currentPlan);
                    popWalker();
                }
                currentPlan = currentPlans.pop();



                poCounter = new POCounter(
                        new OperatorKey(scope, nodeGen
                                .getNextNodeId(scope)), -1 , null,
                                rankPlans, loRank.getAscendingCol());

                poCounter.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                poCounter.setResultType(DataType.TUPLE);
                poCounter.setIsRowNumber(loRank.isRowNumber());
                poCounter.setIsDenseRank(loRank.isDenseRank());
                poCounter.setOperationID(String.valueOf(operationID));

                poRank = new PORank(
                        new OperatorKey(scope, nodeGen
                                .getNextNodeId(scope)), -1 , null,
                                rankPlans, loRank.getAscendingCol());

                poRank.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                poRank.setResultType(DataType.TUPLE);
                poRank.setOperationID(String.valueOf(operationID));

                currentPlan.add(poCounter);
                currentPlan.add(poRank);

                List<Operator> op = loRank.getPlan().getPredecessors(loRank);
View Full Code Here


     * or is rank by (dense or not)
     **/
    @Override
    public void visit(LORank loRank) throws FrontendException {
        String scope = DEFAULT_SCOPE;
        PORank poRank;
        POCounter poCounter;

        Random randomGenerator = new Random();
        Long operationID = Math.abs(randomGenerator.nextLong());

        try {
            // Physical operations for RANK operator:
            // In case of a RANK BY operation, then are necessary five steps:
            //   1.- Group by the fields involved on the rank operation: POPackage
            //   2.- In case of multi-fields, the key (group field) is flatten: POForEach
            //   3.- Sort operation by the fields available after flattening: POSort
            //   4.- Each group is sequentially counted on each mapper through a global counter: POCounter
            //   5.- Global counters are summed and passed to the rank operation: PORank
            if(!loRank.isRowNumber()) {

                boolean[] flags = {false};

                MultiMap<Integer, LogicalExpressionPlan> expressionPlans = new MultiMap<Integer, LogicalExpressionPlan>();
                for(int i = 0 ; i < loRank.getRankColPlans().size() ; i++)
                    expressionPlans.put(i,loRank.getRankColPlans());

                POPackage poPackage = compileToLR_GR_PackTrio(loRank, null, flags, expressionPlans);
                poPackage.getPkgr().setPackageType(PackageType.GROUP);
                translateSoftLinks(loRank);

                List<Boolean> flattenLst = Arrays.asList(true, false);

                PhysicalPlan fep1 = new PhysicalPlan();
                POProject feproj1 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1);
                feproj1.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                feproj1.setColumn(0);
                feproj1.setResultType(poPackage.getPkgr().getKeyType());
                feproj1.setStar(false);
                feproj1.setOverloaded(false);
                fep1.add(feproj1);


                PhysicalPlan fep2 = new PhysicalPlan();
                POProject feproj2 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1);
                feproj2.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                feproj2.setColumn(1);
                feproj2.setResultType(DataType.BAG);
                feproj2.setStar(false);
                feproj2.setOverloaded(false);
                fep2.add(feproj2);
                List<PhysicalPlan> fePlans = Arrays.asList(fep1, fep2);

                POForEach poForEach = new POForEach(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1, fePlans, flattenLst);

                List<LogicalExpressionPlan> rankPlans = loRank.getRankColPlans();
                byte[] newTypes = new byte[rankPlans.size()];

                for(int i = 0; i < rankPlans.size(); i++) {
                    LogicalExpressionPlan loep = rankPlans.get(i);
                    Iterator<Operator> inpOpers = loep.getOperators();

                    while(inpOpers.hasNext()) {
                        Operator oper = inpOpers.next();
                        newTypes[i] = ((ProjectExpression) oper).getType();
                    }
                }

                List<PhysicalPlan> newPhysicalPlan = new ArrayList<PhysicalPlan>();
                List<Boolean> newOrderPlan = new ArrayList<Boolean>();

                for(int i = 0; i < loRank.getRankColPlans().size(); i++) {
                    PhysicalPlan fep3 = new PhysicalPlan();
                    POProject feproj3 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1);
                    feproj3.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                    feproj3.setColumn(i);
                    feproj3.setResultType(newTypes[i]);
                    feproj3.setStar(false);
                    feproj3.setOverloaded(false);
                    fep3.add(feproj3);

                    newPhysicalPlan.add(fep3);
                    newOrderPlan.add(loRank.getAscendingCol().get(i));
                }

                POSort poSort;
                poSort = new POSort(new OperatorKey(scope, nodeGen
                        .getNextNodeId(scope)), -1, null,
                        newPhysicalPlan, newOrderPlan, null);
                poSort.addOriginalLocation(loRank.getAlias(), loRank.getLocation());


                poCounter = new POCounter(
                        new OperatorKey(scope, nodeGen
                                .getNextNodeId(scope)), -1 , null,
                                newPhysicalPlan, newOrderPlan);

                poCounter.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                poCounter.setResultType(DataType.TUPLE);
                poCounter.setIsRowNumber(loRank.isRowNumber());
                poCounter.setIsDenseRank(loRank.isDenseRank());
                poCounter.setOperationID(String.valueOf(operationID));

                poRank = new PORank(
                        new OperatorKey(scope, nodeGen
                                .getNextNodeId(scope)), -1 , null,
                                newPhysicalPlan, newOrderPlan);

                poRank.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                poRank.setResultType(DataType.TUPLE);
                poRank.setOperationID(String.valueOf(operationID));

                List<Boolean> flattenLst2 = Arrays.asList(false, true);

                PhysicalPlan fep12 = new PhysicalPlan();
                POProject feproj12 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1);
                feproj12.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                feproj12.setColumn(0);
                feproj12.setResultType(DataType.LONG);
                feproj12.setStar(false);
                feproj12.setOverloaded(false);
                fep12.add(feproj12);


                PhysicalPlan fep22 = new PhysicalPlan();
                POProject feproj22 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1);
                feproj22.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                feproj22.setColumn(loRank.getRankColPlans().size()+1);
                feproj22.setResultType(DataType.BAG);
                feproj22.setStar(false);
                feproj22.setOverloaded(false);
                fep22.add(feproj22);
                List<PhysicalPlan> fePlans2 = Arrays.asList(fep12, fep22);

                POForEach poForEach2 = new POForEach(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1, fePlans2, flattenLst2);

                currentPlan.add(poForEach);
                currentPlan.add(poSort);
                currentPlan.add(poCounter);
                currentPlan.add(poRank);
                currentPlan.add(poForEach2);

                try {
                    currentPlan.connect(poPackage, poForEach);
                    currentPlan.connect(poForEach, poSort);
                    currentPlan.connect(poSort, poCounter);
                    currentPlan.connect(poCounter, poRank);
                    currentPlan.connect(poRank, poForEach2);
                } catch (PlanException e) {
                    throw new LogicalToPhysicalTranslatorException(e.getMessage(),e.getErrorCode(),e.getErrorSource(),e);
                }

                logToPhyMap.put(loRank, poForEach2);

                // In case of a RANK operation, are used two steps:
                //   1.- Each tuple is counted sequentially on each mapper, and are produced global counters
                //   2.- Global counters are gathered and summed, each tuple calls to the respective counter value
                //       in order to calculate the corresponding rank value.
            } else {

                List<LogicalExpressionPlan> logPlans = loRank.getRankColPlans();
                List<PhysicalPlan> rankPlans = new ArrayList<PhysicalPlan>(logPlans.size());

                // convert all the logical expression plans to physical expression plans
                currentPlans.push(currentPlan);
                for (LogicalExpressionPlan plan : logPlans) {
                    currentPlan = new PhysicalPlan();
                    PlanWalker childWalker = new ReverseDependencyOrderWalkerWOSeenChk(plan);
                    pushWalker(childWalker);
                    childWalker.walk(new ExpToPhyTranslationVisitor( currentWalker.getPlan(),
                            childWalker, loRank, currentPlan, logToPhyMap));
                    rankPlans.add(currentPlan);
                    popWalker();
                }
                currentPlan = currentPlans.pop();



                poCounter = new POCounter(
                        new OperatorKey(scope, nodeGen
                                .getNextNodeId(scope)), -1 , null,
                                rankPlans, loRank.getAscendingCol());

                poCounter.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                poCounter.setResultType(DataType.TUPLE);
                poCounter.setIsRowNumber(loRank.isRowNumber());
                poCounter.setIsDenseRank(loRank.isDenseRank());
                poCounter.setOperationID(String.valueOf(operationID));

                poRank = new PORank(
                        new OperatorKey(scope, nodeGen
                                .getNextNodeId(scope)), -1 , null,
                                rankPlans, loRank.getAscendingCol());

                poRank.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                poRank.setResultType(DataType.TUPLE);
                poRank.setOperationID(String.valueOf(operationID));

                currentPlan.add(poCounter);
                currentPlan.add(poRank);

                List<Operator> op = loRank.getPlan().getPredecessors(loRank);
View Full Code Here

     * or is rank by (dense or not)
     **/
    @Override
    public void visit(LORank loRank) throws FrontendException {
        String scope = DEFAULT_SCOPE;
        PORank poRank;
        POCounter poCounter;

        Random randomGenerator = new Random();
        Long operationID = Math.abs(randomGenerator.nextLong());

        try {
            // Physical operations for RANK operator:
            // In case of a RANK BY operation, then are necessary five steps:
            //   1.- Group by the fields involved on the rank operation: POPackage
            //   2.- In case of multi-fields, the key (group field) is flatten: POForEach
            //   3.- Sort operation by the fields available after flattening: POSort
            //   4.- Each group is sequentially counted on each mapper through a global counter: POCounter
            //   5.- Global counters are summed and passed to the rank operation: PORank
            if(!loRank.isRowNumber()) {

                boolean[] flags = {false};

                MultiMap<Integer, LogicalExpressionPlan> expressionPlans = new MultiMap<Integer, LogicalExpressionPlan>();
                for(int i = 0 ; i < loRank.getRankColPlans().size() ; i++)
                    expressionPlans.put(i,loRank.getRankColPlans());

                POPackage poPackage = compileToLR_GR_PackTrio(loRank, null, flags, expressionPlans);
                poPackage.setPackageType(PackageType.GROUP);
                translateSoftLinks(loRank);

                List<Boolean> flattenLst = Arrays.asList(true, false);

                PhysicalPlan fep1 = new PhysicalPlan();
                POProject feproj1 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1);
                feproj1.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                feproj1.setColumn(0);
                feproj1.setResultType(poPackage.getKeyType());
                feproj1.setStar(false);
                feproj1.setOverloaded(false);
                fep1.add(feproj1);


                PhysicalPlan fep2 = new PhysicalPlan();
                POProject feproj2 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1);
                feproj2.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                feproj2.setColumn(1);
                feproj2.setResultType(DataType.BAG);
                feproj2.setStar(false);
                feproj2.setOverloaded(false);
                fep2.add(feproj2);
                List<PhysicalPlan> fePlans = Arrays.asList(fep1, fep2);

                POForEach poForEach = new POForEach(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1, fePlans, flattenLst);

                List<LogicalExpressionPlan> rankPlans = loRank.getRankColPlans();
                byte[] newTypes = new byte[rankPlans.size()];

                for(int i = 0; i < rankPlans.size(); i++) {
                    LogicalExpressionPlan loep = rankPlans.get(i);
                    Iterator<Operator> inpOpers = loep.getOperators();

                    while(inpOpers.hasNext()) {
                        Operator oper = inpOpers.next();
                        newTypes[i] = ((ProjectExpression) oper).getType();
                    }
                }

                List<PhysicalPlan> newPhysicalPlan = new ArrayList<PhysicalPlan>();
                List<Boolean> newOrderPlan = new ArrayList<Boolean>();

                for(int i = 0; i < loRank.getRankColPlans().size(); i++) {
                    PhysicalPlan fep3 = new PhysicalPlan();
                    POProject feproj3 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1);
                    feproj3.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                    feproj3.setColumn(i);
                    feproj3.setResultType(newTypes[i]);
                    feproj3.setStar(false);
                    feproj3.setOverloaded(false);
                    fep3.add(feproj3);

                    newPhysicalPlan.add(fep3);
                    newOrderPlan.add(loRank.getAscendingCol().get(i));
                }

                POSort poSort;
                poSort = new POSort(new OperatorKey(scope, nodeGen
                        .getNextNodeId(scope)), -1, null,
                        newPhysicalPlan, newOrderPlan, null);
                poSort.addOriginalLocation(loRank.getAlias(), loRank.getLocation());


                poCounter = new POCounter(
                        new OperatorKey(scope, nodeGen
                                .getNextNodeId(scope)), -1 , null,
                                newPhysicalPlan, newOrderPlan);

                poCounter.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                poCounter.setResultType(DataType.TUPLE);
                poCounter.setIsRowNumber(loRank.isRowNumber());
                poCounter.setIsDenseRank(loRank.isDenseRank());
                poCounter.setOperationID(String.valueOf(operationID));

                poRank = new PORank(
                        new OperatorKey(scope, nodeGen
                                .getNextNodeId(scope)), -1 , null,
                                newPhysicalPlan, newOrderPlan);

                poRank.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                poRank.setResultType(DataType.TUPLE);
                poRank.setOperationID(String.valueOf(operationID));

                List<Boolean> flattenLst2 = Arrays.asList(false, true);

                PhysicalPlan fep12 = new PhysicalPlan();
                POProject feproj12 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1);
                feproj12.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                feproj12.setColumn(0);
                feproj12.setResultType(DataType.LONG);
                feproj12.setStar(false);
                feproj12.setOverloaded(false);
                fep12.add(feproj12);


                PhysicalPlan fep22 = new PhysicalPlan();
                POProject feproj22 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1);
                feproj22.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                feproj22.setColumn(loRank.getRankColPlans().size()+1);
                feproj22.setResultType(DataType.BAG);
                feproj22.setStar(false);
                feproj22.setOverloaded(false);
                fep22.add(feproj22);
                List<PhysicalPlan> fePlans2 = Arrays.asList(fep12, fep22);

                POForEach poForEach2 = new POForEach(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1, fePlans2, flattenLst2);

                currentPlan.add(poForEach);
                currentPlan.add(poSort);
                currentPlan.add(poCounter);
                currentPlan.add(poRank);
                currentPlan.add(poForEach2);

                try {
                    currentPlan.connect(poPackage, poForEach);
                    currentPlan.connect(poForEach, poSort);
                    currentPlan.connect(poSort, poCounter);
                    currentPlan.connect(poCounter, poRank);
                    currentPlan.connect(poRank, poForEach2);
                } catch (PlanException e) {
                    throw new LogicalToPhysicalTranslatorException(e.getMessage(),e.getErrorCode(),e.getErrorSource(),e);
                }

                logToPhyMap.put(loRank, poForEach2);

                // In case of a RANK operation, are used two steps:
                //   1.- Each tuple is counted sequentially on each mapper, and are produced global counters
                //   2.- Global counters are gathered and summed, each tuple calls to the respective counter value
                //       in order to calculate the corresponding rank value.
            } else {

                List<LogicalExpressionPlan> logPlans = loRank.getRankColPlans();
                List<PhysicalPlan> rankPlans = new ArrayList<PhysicalPlan>(logPlans.size());

                // convert all the logical expression plans to physical expression plans
                currentPlans.push(currentPlan);
                for (LogicalExpressionPlan plan : logPlans) {
                    currentPlan = new PhysicalPlan();
                    PlanWalker childWalker = new ReverseDependencyOrderWalkerWOSeenChk(plan);
                    pushWalker(childWalker);
                    childWalker.walk(new ExpToPhyTranslationVisitor( currentWalker.getPlan(),
                            childWalker, loRank, currentPlan, logToPhyMap));
                    rankPlans.add(currentPlan);
                    popWalker();
                }
                currentPlan = currentPlans.pop();



                poCounter = new POCounter(
                        new OperatorKey(scope, nodeGen
                                .getNextNodeId(scope)), -1 , null,
                                rankPlans, loRank.getAscendingCol());

                poCounter.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                poCounter.setResultType(DataType.TUPLE);
                poCounter.setIsRowNumber(loRank.isRowNumber());
                poCounter.setIsDenseRank(loRank.isDenseRank());
                poCounter.setOperationID(String.valueOf(operationID));

                poRank = new PORank(
                        new OperatorKey(scope, nodeGen
                                .getNextNodeId(scope)), -1 , null,
                                rankPlans, loRank.getAscendingCol());

                poRank.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                poRank.setResultType(DataType.TUPLE);
                poRank.setOperationID(String.valueOf(operationID));

                currentPlan.add(poCounter);
                currentPlan.add(poRank);

                List<Operator> op = loRank.getPlan().getPredecessors(loRank);
View Full Code Here

     * or is rank by (dense or not)
     **/
    @Override
    public void visit(LORank loRank) throws FrontendException {
        String scope = DEFAULT_SCOPE;
        PORank poRank;
        POCounter poCounter;

        Random randomGenerator = new Random();
        Long operationID = Math.abs(randomGenerator.nextLong());

        try {
            // Physical operations for RANK operator:
            // In case of a RANK BY operation, then are necessary five steps:
            //   1.- Group by the fields involved on the rank operation: POPackage
            //   2.- In case of multi-fields, the key (group field) is flatten: POForEach
            //   3.- Sort operation by the fields available after flattening: POSort
            //   4.- Each group is sequentially counted on each mapper through a global counter: POCounter
            //   5.- Global counters are summed and passed to the rank operation: PORank
            if(!loRank.isRowNumber()) {

                boolean[] flags = {false};

                MultiMap<Integer, LogicalExpressionPlan> expressionPlans = new MultiMap<Integer, LogicalExpressionPlan>();
                for(int i = 0 ; i < loRank.getRankColPlans().size() ; i++)
                    expressionPlans.put(i,loRank.getRankColPlans());

                POPackage poPackage = compileToLR_GR_PackTrio(loRank, null, flags, expressionPlans);
                poPackage.setPackageType(PackageType.GROUP);
                translateSoftLinks(loRank);

                List<Boolean> flattenLst = Arrays.asList(true, false);

                PhysicalPlan fep1 = new PhysicalPlan();
                POProject feproj1 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1);
                feproj1.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                feproj1.setColumn(0);
                feproj1.setResultType(poPackage.getKeyType());
                feproj1.setStar(false);
                feproj1.setOverloaded(false);
                fep1.add(feproj1);


                PhysicalPlan fep2 = new PhysicalPlan();
                POProject feproj2 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1);
                feproj2.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                feproj2.setColumn(1);
                feproj2.setResultType(DataType.BAG);
                feproj2.setStar(false);
                feproj2.setOverloaded(false);
                fep2.add(feproj2);
                List<PhysicalPlan> fePlans = Arrays.asList(fep1, fep2);

                POForEach poForEach = new POForEach(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1, fePlans, flattenLst);

                List<LogicalExpressionPlan> rankPlans = loRank.getRankColPlans();
                byte[] newTypes = new byte[rankPlans.size()];

                for(int i = 0; i < rankPlans.size(); i++) {
                    LogicalExpressionPlan loep = rankPlans.get(i);
                    Iterator<Operator> inpOpers = loep.getOperators();

                    while(inpOpers.hasNext()) {
                        Operator oper = inpOpers.next();
                        newTypes[i] = ((ProjectExpression) oper).getType();
                    }
                }

                List<PhysicalPlan> newPhysicalPlan = new ArrayList<PhysicalPlan>();
                List<Boolean> newOrderPlan = new ArrayList<Boolean>();

                for(int i = 0; i < loRank.getRankColPlans().size(); i++) {
                    PhysicalPlan fep3 = new PhysicalPlan();
                    POProject feproj3 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1);
                    feproj3.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                    feproj3.setColumn(i);
                    feproj3.setResultType(newTypes[i]);
                    feproj3.setStar(false);
                    feproj3.setOverloaded(false);
                    fep3.add(feproj3);

                    newPhysicalPlan.add(fep3);
                    newOrderPlan.add(loRank.getAscendingCol().get(i));
                }

                POSort poSort;
                poSort = new POSort(new OperatorKey(scope, nodeGen
                        .getNextNodeId(scope)), -1, null,
                        newPhysicalPlan, newOrderPlan, null);
                poSort.addOriginalLocation(loRank.getAlias(), loRank.getLocation());


                poCounter = new POCounter(
                        new OperatorKey(scope, nodeGen
                                .getNextNodeId(scope)), -1 , null,
                                newPhysicalPlan, newOrderPlan);

                poCounter.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                poCounter.setResultType(DataType.TUPLE);
                poCounter.setIsRowNumber(loRank.isRowNumber());
                poCounter.setIsDenseRank(loRank.isDenseRank());
                poCounter.setOperationID(String.valueOf(operationID));

                poRank = new PORank(
                        new OperatorKey(scope, nodeGen
                                .getNextNodeId(scope)), -1 , null,
                                newPhysicalPlan, newOrderPlan);

                poRank.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                poRank.setResultType(DataType.TUPLE);
                poRank.setOperationID(String.valueOf(operationID));

                List<Boolean> flattenLst2 = Arrays.asList(false, true);

                PhysicalPlan fep12 = new PhysicalPlan();
                POProject feproj12 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1);
                feproj12.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                feproj12.setColumn(0);
                feproj12.setResultType(DataType.LONG);
                feproj12.setStar(false);
                feproj12.setOverloaded(false);
                fep12.add(feproj12);


                PhysicalPlan fep22 = new PhysicalPlan();
                POProject feproj22 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1);
                feproj22.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                feproj22.setColumn(loRank.getRankColPlans().size()+1);
                feproj22.setResultType(DataType.BAG);
                feproj22.setStar(false);
                feproj22.setOverloaded(false);
                fep22.add(feproj22);
                List<PhysicalPlan> fePlans2 = Arrays.asList(fep12, fep22);

                POForEach poForEach2 = new POForEach(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), -1, fePlans2, flattenLst2);

                currentPlan.add(poForEach);
                currentPlan.add(poSort);
                currentPlan.add(poCounter);
                currentPlan.add(poRank);
                currentPlan.add(poForEach2);

                try {
                    currentPlan.connect(poPackage, poForEach);
                    currentPlan.connect(poForEach, poSort);
                    currentPlan.connect(poSort, poCounter);
                    currentPlan.connect(poCounter, poRank);
                    currentPlan.connect(poRank, poForEach2);
                } catch (PlanException e) {
                    throw new LogicalToPhysicalTranslatorException(e.getMessage(),e.getErrorCode(),e.getErrorSource(),e);
                }

                logToPhyMap.put(loRank, poForEach2);

                // In case of a RANK operation, are used two steps:
                //   1.- Each tuple is counted sequentially on each mapper, and are produced global counters
                //   2.- Global counters are gathered and summed, each tuple calls to the respective counter value
                //       in order to calculate the corresponding rank value.
            } else {

                List<LogicalExpressionPlan> logPlans = loRank.getRankColPlans();
                List<PhysicalPlan> rankPlans = new ArrayList<PhysicalPlan>(logPlans.size());

                // convert all the logical expression plans to physical expression plans
                currentPlans.push(currentPlan);
                for (LogicalExpressionPlan plan : logPlans) {
                    currentPlan = new PhysicalPlan();
                    PlanWalker childWalker = new ReverseDependencyOrderWalkerWOSeenChk(plan);
                    pushWalker(childWalker);
                    childWalker.walk(new ExpToPhyTranslationVisitor( currentWalker.getPlan(),
                            childWalker, loRank, currentPlan, logToPhyMap));
                    rankPlans.add(currentPlan);
                    popWalker();
                }
                currentPlan = currentPlans.pop();



                poCounter = new POCounter(
                        new OperatorKey(scope, nodeGen
                                .getNextNodeId(scope)), -1 , null,
                                rankPlans, loRank.getAscendingCol());

                poCounter.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                poCounter.setResultType(DataType.TUPLE);
                poCounter.setIsRowNumber(loRank.isRowNumber());
                poCounter.setIsDenseRank(loRank.isDenseRank());
                poCounter.setOperationID(String.valueOf(operationID));

                poRank = new PORank(
                        new OperatorKey(scope, nodeGen
                                .getNextNodeId(scope)), -1 , null,
                                rankPlans, loRank.getAscendingCol());

                poRank.addOriginalLocation(loRank.getAlias(), loRank.getLocation());
                poRank.setResultType(DataType.TUPLE);
                poRank.setOperationID(String.valueOf(operationID));

                currentPlan.add(poCounter);
                currentPlan.add(poRank);

                List<Operator> op = loRank.getPlan().getPredecessors(loRank);
View Full Code Here

TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.PORank

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.