Package org.apache.pig.impl.plan

Examples of org.apache.pig.impl.plan.PlanException


            }
           
            else{
                int errCode = 2022;
                String msg = "Both map and reduce phases have been done. This is unexpected while compiling.";
                throw new PlanException(msg, errCode, PigException.BUG);
            }
           
            joinOp.setupRightPipeline(rightPipelinePlan);
                       
            // At this point, we must be operating on map plan of right input and it would contain nothing else other then a POLoad.
            POLoad rightLoader = (POLoad)rightMROpr.mapPlan.getRoots().get(0);
            joinOp.setRightLoaderFuncSpec(rightLoader.getLFile().getFuncSpec());

            // Replace POLoad with  indexer.
            String[] indexerArgs = new String[3];
            indexerArgs[0] = rightLoader.getLFile().getFuncSpec().toString();
             if (! (PigContext.instantiateFuncFromSpec(indexerArgs[0]) instanceof SamplableLoader)){
                 int errCode = 1104;
                 String errMsg = "Right input of merge-join must implement SamplableLoader interface. The specified loader " + indexerArgs[0] + " doesn't implement it";
                 throw new MRCompilerException(errMsg,errCode);
             }
            List<PhysicalPlan> rightInpPlans = joinOp.getInnerPlansOf(1);
            indexerArgs[1] = ObjectSerializer.serialize((Serializable)rightInpPlans);
            indexerArgs[2] = ObjectSerializer.serialize(rightPipelinePlan);
            FileSpec lFile = new FileSpec(rightLoader.getLFile().getFileName(),new FuncSpec(MergeJoinIndexer.class.getName(), indexerArgs));
            rightLoader.setLFile(lFile);

            // Loader of mro will return a tuple of form (key1, key2, ..,filename, offset)
            // Now set up a POLocalRearrange which has "all" as the key and tuple fetched
            // by loader as the "value" of POLocalRearrange
            // Sorting of index can possibly be achieved by using Hadoop sorting between map and reduce instead of Pig doing sort. If that is so,
            // it will simplify lot of the code below.
           
            PhysicalPlan lrPP = new PhysicalPlan();
            ConstantExpression ce = new ConstantExpression(new OperatorKey(scope,nig.getNextNodeId(scope)));
            ce.setValue("all");
            ce.setResultType(DataType.CHARARRAY);
            lrPP.add(ce);

            List<PhysicalPlan> lrInnerPlans = new ArrayList<PhysicalPlan>();
            lrInnerPlans.add(lrPP);

            POLocalRearrange lr = new POLocalRearrange(new OperatorKey(scope,nig.getNextNodeId(scope)));
            lr.setIndex(0);
            lr.setKeyType(DataType.CHARARRAY);
            lr.setPlans(lrInnerPlans);
            lr.setResultType(DataType.TUPLE);
            rightMROpr.mapPlan.addAsLeaf(lr);

            rightMROpr.setMapDone(true);

            // On the reduce side of this indexing job, there will be a global rearrange followed by POSort.
            // Output of POSort will be index file dumped on the DFS.

            // First add POPackage.
            POPackage pkg = new POPackage(new OperatorKey(scope,nig.getNextNodeId(scope)));
            pkg.setKeyType(DataType.CHARARRAY);
            pkg.setNumInps(1);
            pkg.setInner(new boolean[]{false});
            rightMROpr.reducePlan.add(pkg);

            // Next project tuples from the bag created by POPackage.
            POProject topPrj = new POProject(new OperatorKey(scope,nig.getNextNodeId(scope)));
            topPrj.setColumn(1);
            topPrj.setResultType(DataType.TUPLE);
            topPrj.setOverloaded(true);
            rightMROpr.reducePlan.add(topPrj);
            rightMROpr.reducePlan.connect(pkg, topPrj);

            // Now create and add POSort. Sort plan is project *.
            List<PhysicalPlan> sortPlans = new ArrayList<PhysicalPlan>(1);
            PhysicalPlan innerSortPlan = new PhysicalPlan();
            POProject prj = new POProject(new OperatorKey(scope,nig.getNextNodeId(scope)));
            prj.setStar(true);
            prj.setOverloaded(false);
            prj.setResultType(DataType.TUPLE);
            innerSortPlan.add(prj);
            sortPlans.add(innerSortPlan);

            // Currently we assume all columns are in asc order.
            // Add two because filename and offset are added by Indexer in addition to keys.
            List<Boolean>  mAscCols = new ArrayList<Boolean>(rightInpPlans.size()+2);
            for(int i=0; i< rightInpPlans.size()+2; i++)
                mAscCols.add(true);

            POSort sortOp = new POSort(new OperatorKey(scope,nig.getNextNodeId(scope)),1, null, sortPlans, mAscCols, null);
            rightMROpr.reducePlan.add(sortOp);
            rightMROpr.reducePlan.connect(topPrj, sortOp);

            POStore st = getStore();
            FileSpec strFile = getTempFileSpec();
            st.setSFile(strFile);
            rightMROpr.reducePlan.addAsLeaf(st);
            rightMROpr.setReduceDone(true);
  
            joinOp.setIndexFile(strFile);
           
            // We are done with right side. Lets work on left now.
            // Join will be materialized in leftMROper.
            if(!curMROp.mapDone) // Life is easy
                curMROp.mapPlan.addAsLeaf(joinOp);
           
            else if(!curMROp.reduceDone){  // This is a map-side join. Close this MROper and start afresh.
                POStore leftStore = getStore();
                FileSpec leftStrFile = getTempFileSpec();
                leftStore.setSFile(leftStrFile);
                curMROp.setReduceDone(true);
                curMROp = startNew(leftStrFile, curMROp);
                curMROp.mapPlan.addAsLeaf(joinOp);
            }
           
            else{
                int errCode = 2022;
                String msg = "Both map and reduce phases have been done. This is unexpected while compiling.";
                throw new PlanException(msg, errCode, PigException.BUG);
            }

            // We want to ensure indexing job runs prior to actual join job. So, connect them in order.
            MRPlan.connect(rightMROpr, curMROp);
        }
View Full Code Here


        mro.reducePlan.addAsLeaf(str);
        mro.setReduceDone(true);
      } else {
        int errCode = 2022;
        String msg = "Both map and reduce phases have been done. This is unexpected while compiling.";
        throw new PlanException(msg, errCode, PigException.BUG);
      }
     
      FileSpec partitionFile = getTempFileSpec();
      int rp = op.getRequestedParallelism();
     
      Pair<MapReduceOper, Integer> sampleJobPair = getSkewedJoinSampleJob(op, mro, fSpec, partitionFile, rp);           
      rp = sampleJobPair.second;
     
      // set parallelism of SkewedJoin as the value calculated by sampling job
      // if "parallel" is specified in join statement, "rp" is equal to that number
      // if not specified, use the value that sampling process calculated
      // based on default.
      op.setRequestedParallelism(rp);
           
      // load the temp file for first table as input of join           
      MapReduceOper[] joinInputs = new MapReduceOper[] {startNew(fSpec, sampleJobPair.first), compiledInputs[1]};           
      MapReduceOper[] rearrangeOutputs = new MapReduceOper[2];                      
     
      compiledInputs = new MapReduceOper[] {joinInputs[0]};
      // run POLocalRearrange for first join table
      POLocalRearrange lr = new POLocalRearrange(new OperatorKey(scope,nig.getNextNodeId(scope)), rp);           
      try {
        lr.setIndex(0);               
      } catch (ExecException e) {
        int errCode = 2058;
        String msg = "Unable to set index on newly created POLocalRearrange.";
        throw new PlanException(msg, errCode, PigException.BUG, e);
      }
     
      List<PhysicalOperator> l = plan.getPredecessors(op);
      MultiMap<PhysicalOperator, PhysicalPlan> joinPlans = op.getJoinPlans();
      List<PhysicalPlan> groups = (List<PhysicalPlan>)joinPlans.get(l.get(0));
      // check the type of group keys, if there are more than one field, the key is TUPLE.
      byte type = DataType.TUPLE;
      if (groups.size() == 1) {
        type = groups.get(0).getLeaves().get(0).getResultType();               
      }              
     
      lr.setKeyType(type);           
      lr.setPlans(groups);
      lr.setResultType(DataType.TUPLE);
     
      lr.visit(this);
      if(lr.getRequestedParallelism() > curMROp.requestedParallelism)
        curMROp.requestedParallelism = lr.getRequestedParallelism();
      rearrangeOutputs[0] = curMROp;
     
      compiledInputs = new MapReduceOper[] {joinInputs[1]};      
      // if the map for current input is already closed, then start a new job
      if (compiledInputs[0].isMapDone() && !compiledInputs[0].isReduceDone()) {
        FileSpec f = getTempFileSpec();
        POStore s = getStore();
        s.setSFile(f);
        compiledInputs[0].reducePlan.addAsLeaf(s);
        compiledInputs[0].setReduceDone(true);
        compiledInputs[0] = startNew(f, compiledInputs[0]);
      }              
     
      // run POPartitionRearrange for second join table
      lr = new POPartitionRearrange(new OperatorKey(scope,nig.getNextNodeId(scope)), rp);           
      try {
        lr.setIndex(1);
      } catch (ExecException e) {
        int errCode = 2058;
        String msg = "Unable to set index on newly created POLocalRearrange.";
        throw new PlanException(msg, errCode, PigException.BUG, e);
      }              
      ((POPartitionRearrange)lr).setPartitionFile(partitionFile.getFileName());
     
      groups = (List<PhysicalPlan>)joinPlans.get(l.get(1));
      lr.setPlans(groups);
View Full Code Here

            }
            return ret;
        }
        int errCode = 2026;
        String msg = "No expression plan found in POSort.";
        throw new PlanException(msg, errCode, PigException.BUG);
    }
View Full Code Here

                fktv.visit();
                keyType = fktv.keyType;
            } catch (VisitorException ve) {
                int errCode = 2035;
                String msg = "Internal error. Could not compute key type of sort operator.";
                throw new PlanException(msg, errCode, PigException.BUG, ve);
            }
        }
       
        POLocalRearrange lr = new POLocalRearrange(new OperatorKey(scope,nig.getNextNodeId(scope)));
        try {
            lr.setIndex(0);
        } catch (ExecException e) {
          int errCode = 2058;
          String msg = "Unable to set index on newly created POLocalRearrange.";
            throw new PlanException(msg, errCode, PigException.BUG, e);
        }
        lr.setKeyType((fields == null || fields.length>1) ? DataType.TUPLE :
            keyType);
        lr.setPlans(eps1);
        lr.setResultType(DataType.TUPLE);
        mro.mapPlan.addAsLeaf(lr);
       
        mro.setMapDone(true);
       
        if (limit!=-1) {
          POPackageLite pkg_c = new POPackageLite(new OperatorKey(scope,nig.getNextNodeId(scope)));
          pkg_c.setKeyType((fields.length>1) ? DataType.TUPLE : keyType);
            pkg_c.setNumInps(1);
            //pkg.setResultType(DataType.TUPLE);           
            mro.combinePlan.add(pkg_c);
         
            List<PhysicalPlan> eps_c1 = new ArrayList<PhysicalPlan>();
            List<Boolean> flat_c1 = new ArrayList<Boolean>();
            PhysicalPlan ep_c1 = new PhysicalPlan();
            POProject prj_c1 = new POProject(new OperatorKey(scope,nig.getNextNodeId(scope)));
            prj_c1.setColumn(1);
            prj_c1.setOverloaded(false);
            prj_c1.setResultType(DataType.BAG);
            ep_c1.add(prj_c1);
            eps_c1.add(ep_c1);
            flat_c1.add(true);
            POForEach fe_c1 = new POForEach(new OperatorKey(scope,nig.getNextNodeId(scope)),
                -1, eps_c1, flat_c1);
            fe_c1.setResultType(DataType.TUPLE);
            mro.combinePlan.addAsLeaf(fe_c1);
           
            POLimit pLimit = new POLimit(new OperatorKey(scope,nig.getNextNodeId(scope)));
          pLimit.setLimit(limit);
          mro.combinePlan.addAsLeaf(pLimit);
           
            List<PhysicalPlan> eps_c2 = new ArrayList<PhysicalPlan>();
            eps_c2.addAll(sort.getSortPlans());
       
          POLocalRearrange lr_c2 = new POLocalRearrange(new OperatorKey(scope,nig.getNextNodeId(scope)));
          try {
                lr_c2.setIndex(0);
            } catch (ExecException e) {
              int errCode = 2058;
              String msg = "Unable to set index on newly created POLocalRearrange.";             
                throw new PlanException(msg, errCode, PigException.BUG, e);
            }
          lr_c2.setKeyType((fields.length>1) ? DataType.TUPLE : keyType);
          lr_c2.setPlans(eps_c2);
          lr_c2.setResultType(DataType.TUPLE);
          mro.combinePlan.addAsLeaf(lr_c2);
View Full Code Here

        String inputFile = lFile.getFileName();

        return getSamplingJob(sort, prevJob, transformPlans, lFile, sampleFile, rp, null,
                  PartitionSkewedKeys.class.getName(), new String[]{per, mc, inputFile}, PoissonSampleLoader.class.getName());
      }catch(Exception e) {
        throw new PlanException(e);
      }
    }      
View Full Code Here

        try {
            lr.setIndex(0);
        } catch (ExecException e) {
          int errCode = 2058;
          String msg = "Unable to set index on newly created POLocalRearrange.";
            throw new PlanException(msg, errCode, PigException.BUG, e);
        }
        lr.setKeyType(DataType.CHARARRAY);
        lr.setPlans(eps);
        lr.setResultType(DataType.TUPLE);
        mro.mapPlan.add(lr);
View Full Code Here

            fePlan.connect(isEmpty, bincond);
            fePlan.connect(ce, bincond);
            fePlan.connect(relationProject, bincond);

        } catch (Exception e) {
            throw new PlanException("Error setting up outerjoin", e);
        }
       
       
    }
View Full Code Here

    // Return true if we saw physical operators other than project in the plan
    static private boolean collectColumnChain(PhysicalPlan plan,
            ColumnChainInfo columnChainInfo) throws PlanException {
        if (plan.getRoots().size() != 1) {
            int errorCode = 2207;
            throw new PlanException(
                    "POForEach inner plan has more than 1 root", errorCode);
        }

        PhysicalOperator currentNode = plan.getRoots().get(0);

        while (currentNode != null) {
            if (currentNode instanceof POProject) {
                POProject project = (POProject) currentNode;
                columnChainInfo.insertInReduce(project.isStar(), project
                        .getColumns(), project.getResultType());
            } else {
                return true;
            }
            List<PhysicalOperator> succs = plan.getSuccessors(currentNode);
            if (succs == null)
                break;
            if (succs.size() != 1) {
                int errorCode = 2208;
                throw new PlanException(
                        "Exception visiting foreach inner plan", errorCode);
            }
            currentNode = succs.get(0);
        }
        return false;
View Full Code Here

                            if (preds==null || !(preds.get(0) instanceof POProject))
                                mProjectedColsMap.put(project.getColumn(), keyIndex);
                        } catch (ExecException e) {
                            int errCode = 2070;
                            String msg = "Problem in accessing column from project operator.";
                            throw new PlanException(msg, errCode, PigException.BUG);
                        }
                    }
                    if(project.getResultType() == DataType.TUPLE)
                        isKeyTuple = true;
                }
View Full Code Here

                            if (preds==null || !(preds.get(0) instanceof POProject))
                                mSecondaryProjectedColsMap.put(project.getColumn(), keyIndex);
                        } catch (ExecException e) {
                            int errCode = 2070;
                            String msg = "Problem in accessing column from project operator.";
                            throw new PlanException(msg, errCode, PigException.BUG);
                        }
                    }
                    if(project.getResultType() == DataType.TUPLE)
                        isSecondaryKeyTuple = true;
                }
View Full Code Here

TOP

Related Classes of org.apache.pig.impl.plan.PlanException

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.