Examples of org.apache.hadoop.hive.ql.parse.ParseContext

org.apache.hadoop.hive.ql.parse.ParseContext
Parse Context: The current parse context. This is passed to the optimizer which then transforms the operator tree using the parse context. All the optimizations are performed sequentially and then the new parse context populated. Note that since the parse context contains the operator tree, it can be easily retrieved by the next optimization step or finally for task generation after the plan has been completely optimized.

   */
  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx,
      Object... nodeOutputs) throws SemanticException {
    TableScanOperator op = (TableScanOperator) nd;
    GenMRProcContext ctx = (GenMRProcContext) opProcCtx;
    ParseContext parseCtx = ctx.getParseCtx();
    Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx();


    // create a dummy MapReduce task
    MapredWork currWork = GenMapRedUtils.getMapRedWork(parseCtx);
    Task<? extends Serializable> currTask = TaskFactory.get(currWork, parseCtx.getConf());
    Operator<? extends Serializable> currTopOp = op;
    ctx.setCurrTask(currTask);
    ctx.setCurrTopOp(currTopOp);


    for (String alias : parseCtx.getTopOps().keySet()) {
      Operator<? extends Serializable> currOp = parseCtx.getTopOps().get(alias);
      if (currOp == op) {
        String currAliasId = alias;
        ctx.setCurrAliasId(currAliasId);
        mapCurrCtx.put(op, new GenMapRedCtx(currTask, currTopOp, currAliasId));


        QBParseInfo parseInfo = parseCtx.getQB().getParseInfo();
        if (parseInfo.isAnalyzeCommand()) {


          //   ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS;
          // The plan consists of a simple MapRedTask followed by a StatsTask.
          // The MR task is just a simple TableScanOperator


          StatsWork statsWork = new StatsWork(parseCtx.getQB().getParseInfo().getTableSpec());
          statsWork.setAggKey(op.getConf().getStatsAggPrefix());
          Task<StatsWork> statsTask = TaskFactory.get(statsWork, parseCtx.getConf());
          currTask.addDependentTask(statsTask);
          ctx.getRootTasks().add(currTask);
          currWork.setGatheringStats(true);
          // NOTE: here we should use the new partition predicate pushdown API to get a list of pruned list,
          // and pass it to setTaskPlan as the last parameter

View Full Code Here

      }


      if (op.equals(HiveOperation.CREATETABLE_AS_SELECT)
          || op.equals(HiveOperation.QUERY)) {
        SemanticAnalyzer querySem = (SemanticAnalyzer) sem;
        ParseContext parseCtx = querySem.getParseContext();
        Map<TableScanOperator, Table> tsoTopMap = parseCtx.getTopToTable();


        for (Map.Entry<String, Operator<? extends Serializable>> topOpMap : querySem
            .getParseContext().getTopOps().entrySet()) {
          Operator<? extends Serializable> topOp = topOpMap.getValue();
          if (topOp instanceof TableScanOperator
              && tsoTopMap.containsKey(topOp)) {
            TableScanOperator tableScanOp = (TableScanOperator) topOp;
            Table tbl = tsoTopMap.get(tableScanOp);
            List<Integer> neededColumnIds = tableScanOp.getNeededColumnIDs();
            List<FieldSchema> columns = tbl.getCols();
            List<String> cols = new ArrayList<String>();
            if (neededColumnIds != null && neededColumnIds.size() > 0) {
              for (int i = 0; i < neededColumnIds.size(); i++) {
                cols.add(columns.get(neededColumnIds.get(i)).getName());
              }
            } else {
              for (int i = 0; i < columns.size(); i++) {
                cols.add(columns.get(i).getName());
              }
            }
            if (tbl.isPartitioned() && tableUsePartLevelAuth.get(tbl.getTableName())) {
              String alias_id = topOpMap.getKey();
              PrunedPartitionList partsList = PartitionPruner.prune(parseCtx
                  .getTopToTable().get(topOp), parseCtx.getOpToPartPruner()
                  .get(topOp), parseCtx.getConf(), alias_id, parseCtx
                  .getPrunedPartitions());
              Set<Partition> parts = new HashSet<Partition>();
              parts.addAll(partsList.getConfirmedPartns());
              parts.addAll(partsList.getUnknownPartns());
              for (Partition part : parts) {

View Full Code Here

          + rewriteQueryCtx.getAggregateFunction() + "`)"
          + " from " + rewriteQueryCtx.getIndexName()
          + " group by " + gbyKeys + " ";
        //create a new ParseContext for the query to retrieve its operator tree,
        //and the required GroupByOperator from it
        ParseContext newDAGContext = RewriteParseContextGenerator.generateOperatorTree(
            rewriteQueryCtx.getParseContext().getConf(),
            selReplacementCommand);


        //we get our new GroupByOperator here
        Map<GroupByOperator, Set<String>> newGbyOpMap = newDAGContext.getGroupOpToInputTables();
        GroupByOperator newGbyOperator = newGbyOpMap.keySet().iterator().next();
        GroupByDesc oldConf = operator.getConf();


        //we need this information to set the correct colList, outputColumnNames in SelectOperator
        ExprNodeColumnDesc aggrExprNode = null;


        //Construct the new AggregationDesc to get rid of the current
        //internal names and replace them with new internal names
        //as required by the operator tree
        GroupByDesc newConf = newGbyOperator.getConf();
        List<AggregationDesc> newAggrList = newConf.getAggregators();
        if(newAggrList != null && newAggrList.size() > 0){
          for (AggregationDesc aggregationDesc : newAggrList) {
            rewriteQueryCtx.setEval(aggregationDesc.getGenericUDAFEvaluator());
            aggrExprNode = (ExprNodeColumnDesc)aggregationDesc.getParameters().get(0);
            rewriteQueryCtx.setAggrExprNode(aggrExprNode);
          }
        }


        //Now the GroupByOperator has the new AggregationList; sum(`_count_of_indexed_key`)
        //instead of count(indexed_key)
        OpParseContext gbyOPC = rewriteQueryCtx.getOpc().get(operator);
        RowResolver gbyRR = newDAGContext.getOpParseCtx().get(newGbyOperator).getRowResolver();
        gbyOPC.setRowResolver(gbyRR);
        rewriteQueryCtx.getOpc().put(operator, gbyOPC);


        oldConf.setAggregators((ArrayList<AggregationDesc>) newAggrList);
        operator.setConf(oldConf);

View Full Code Here

      // create alias to task mapping and alias to input file mapping for resolver
      HashMap<String, Task<? extends Serializable>> aliasToTask = new HashMap<String, Task<? extends Serializable>>();
      HashMap<String, ArrayList<String>> pathToAliases = currTask.getWork().getPathToAliases();


      // get parseCtx for this Join Operator
      ParseContext parseCtx = physicalContext.getParseContext();
      QBJoinTree joinTree = parseCtx.getJoinContext().get(joinOp);


      // start to generate multiple map join tasks
      JoinDesc joinDesc = joinOp.getConf();
      Byte[] order = joinDesc.getTagOrder();
      int numAliases = order.length;
      
      long aliasTotalKnownInputSize = 0;
      HashMap<String, Long> aliasToSize = new HashMap<String, Long>();
      try {
        // go over all the input paths, and calculate a known total size, known
        // size for each input alias.
        Utilities.getInputSummary(context, currWork, null).getLength();
        
        // set alias to size mapping, this can be used to determine if one table
        // is choosen as big table, what's the total size of left tables, which
        // are going to be small tables.
        for (Map.Entry<String, ArrayList<String>> entry : pathToAliases.entrySet()) {
          String path = entry.getKey();
          List<String> aliasList = entry.getValue();
          ContentSummary cs = context.getCS(path);
          if (cs != null) {
            long size = cs.getLength();
            for (String alias : aliasList) {
              aliasTotalKnownInputSize += size;
              Long es = aliasToSize.get(alias);
              if(es == null) {
                es = new Long(0);
              }
              es += size;
              aliasToSize.put(alias, es);
            }
          }
        }
        
        HashSet<Integer> bigTableCandidates = MapJoinProcessor.getBigTableCandidates(joinDesc.getConds());
        
        // no table could be the big table; there is no need to convert
        if (bigTableCandidates == null) {
          return null;
        }
        currWork.setOpParseCtxMap(parseCtx.getOpParseCtx());
        currWork.setJoinTree(joinTree);


        String xml = currWork.toXML();
        String bigTableAlias = null;


        long ThresholdOfSmallTblSizeSum = HiveConf.getLongVar(context.getConf(),
            HiveConf.ConfVars.HIVESMALLTABLESFILESIZE);
        for (int i = 0; i < numAliases; i++) {
          // this table cannot be big table
          if (!bigTableCandidates.contains(i)) {
            continue;
          }
          
          // create map join task and set big table as i
          // deep copy a new mapred work from xml
          InputStream in = new ByteArrayInputStream(xml.getBytes("UTF-8"));
          MapredWork newWork = Utilities.deserializeMapRedWork(in, physicalContext.getConf());
          // create a mapred task for this work
          MapRedTask newTask = (MapRedTask) TaskFactory.get(newWork, physicalContext
              .getParseContext().getConf());
          JoinOperator newJoinOp = getJoinOp(newTask);


          // optimize this newWork and assume big table position is i
          bigTableAlias = MapJoinProcessor.genMapJoinOpAndLocalWork(newWork, newJoinOp, i);


          Long aliasKnownSize = aliasToSize.get(bigTableAlias);
          if (aliasKnownSize != null && aliasKnownSize.longValue() > 0) {
            long smallTblTotalKnownSize = aliasTotalKnownInputSize
                - aliasKnownSize.longValue();
            if(smallTblTotalKnownSize > ThresholdOfSmallTblSizeSum) {
              //this table is not good to be a big table.
              continue;
            }
          }
          
          // add into conditional task
          listWorks.add(newWork);
          listTasks.add(newTask);
          newTask.setTaskTag(Task.CONVERTED_MAPJOIN);


          //set up backup task
          newTask.setBackupTask(currTask);
          newTask.setBackupChildrenTasks(currTask.getChildTasks());


          // put the mapping alias to task
          aliasToTask.put(bigTableAlias, newTask);
        }
      } catch (Exception e) {
        e.printStackTrace();
        throw new SemanticException("Generate Map Join Task Error: " + e.getMessage());
      }


      // insert current common join task to conditional task
      listWorks.add(currTask.getWork());
      listTasks.add(currTask);
      // clear JoinTree and OP Parse Context
      currWork.setOpParseCtxMap(null);
      currWork.setJoinTree(null);


      // create conditional task and insert conditional task into task tree
      ConditionalWork cndWork = new ConditionalWork(listWorks);
      ConditionalTask cndTsk = (ConditionalTask) TaskFactory.get(cndWork, parseCtx.getConf());
      cndTsk.setListTasks(listTasks);


      // set resolver and resolver context
      cndTsk.setResolver(new ConditionalResolverCommonJoin());
      ConditionalResolverCommonJoinCtx resolverCtx = new ConditionalResolverCommonJoinCtx();

View Full Code Here

   *          context
   */
  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx,
      Object... nodeOutputs) throws SemanticException {
    GenMRProcContext ctx = (GenMRProcContext) opProcCtx;
    ParseContext parseCtx = ctx.getParseCtx();
    boolean chDir = false;
    Task<? extends Serializable> currTask = ctx.getCurrTask();
    FileSinkOperator fsOp = (FileSinkOperator) nd;
    boolean isInsertTable = // is INSERT OVERWRITE TABLE
      fsOp.getConf().getTableInfo().getTableName() != null &&
      parseCtx.getQB().getParseInfo().isInsertToTable();
    HiveConf hconf = parseCtx.getConf();




    // Has the user enabled merging of files for map-only jobs or for all jobs
    if ((ctx.getMvTask() != null) && (!ctx.getMvTask().isEmpty())) {
      List<Task<? extends Serializable>> mvTasks = ctx.getMvTask();


      // In case of unions or map-joins, it is possible that the file has
      // already been seen.
      // So, no need to attempt to merge the files again.
      if ((ctx.getSeenFileSinkOps() == null)
          || (!ctx.getSeenFileSinkOps().contains(nd))) {


        // no need of merging if the move is to a local file system
        MoveTask mvTask = (MoveTask) findMoveTask(mvTasks, fsOp);


        if (isInsertTable &&
            hconf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
          addStatsTask(fsOp, mvTask, currTask, parseCtx.getConf());
        }


        if ((mvTask != null) && !mvTask.isLocal()) {
          // There are separate configuration parameters to control whether to
          // merge for a map-only job

View Full Code Here


    ReduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc(
        new ArrayList<ExprNodeDesc>(), valueCols, outputColumns, false, -1, -1,
        -1);
    OperatorFactory.getAndMakeChild(rsDesc, inputRS, tsMerge);
    ParseContext parseCtx = ctx.getParseCtx();
    FileSinkDesc fsConf = fsOp.getConf();


    // Add the extract operator to get the value fields
    RowResolver out_rwsch = new RowResolver();
    RowResolver interim_rwsch = ctx.getParseCtx().getOpParseCtx().get(fsOp).getRowResolver();
    Integer pos = Integer.valueOf(0);
    for (ColumnInfo colInfo : interim_rwsch.getColumnInfos()) {
      String[] info = interim_rwsch.reverseLookup(colInfo.getInternalName());
      out_rwsch.put(info[0], info[1], new ColumnInfo(pos.toString(), colInfo
          .getType(), info[0], colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol()));
      pos = Integer.valueOf(pos.intValue() + 1);
    }


    Operator<ExtractDesc> extract = OperatorFactory.getAndMakeChild(new ExtractDesc(
        new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
            Utilities.ReduceField.VALUE.toString(), "", false)),
            new RowSchema(out_rwsch.getColumnInfos()));


    TableDesc ts = (TableDesc) fsConf.getTableInfo().clone();
    fsConf.getTableInfo().getProperties().remove(
        org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS);


    FileSinkDesc newFSD = new FileSinkDesc(finalName, ts, parseCtx.getConf()
        .getBoolVar(HiveConf.ConfVars.COMPRESSRESULT));
    FileSinkOperator newOutput = (FileSinkOperator) OperatorFactory.
      getAndMakeChild(newFSD, inputRS, extract);


    HiveConf conf = parseCtx.getConf();
    MapredWork cplan = createMergeTask(conf, tsMerge, fsConf);
    cplan.setReducer(extract);


    // NOTE: we should gather stats in MR1 (rather than the merge MR job)
    // since it is unknown if the merge MR will be triggered at execution time.

View Full Code Here

  private void createMergeJob(FileSinkOperator fsOp, GenMRProcContext ctx, String finalName)
      throws SemanticException {


    // if the hadoop version support CombineFileInputFormat (version >= 0.20),
    // create a Map-only job for merge, otherwise create a MapReduce merge job.
    ParseContext parseCtx = ctx.getParseCtx();
    HiveConf conf = parseCtx.getConf();
    if (conf.getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPONLY) &&
        Utilities.supportCombineFileInputFormat()) {
      // create Map-only merge job
      createMap4Merge(fsOp, ctx, finalName);
      LOG.info("use CombineHiveInputformat for the merge job");

View Full Code Here

  private void createMap4Merge(FileSinkOperator fsInput, GenMRProcContext ctx, String finalName) throws SemanticException {


    //
    // 1. create the operator tree
    //
    ParseContext parseCtx = ctx.getParseCtx();
    FileSinkDesc fsInputDesc = fsInput.getConf();


    // Create a TableScan operator
    RowSchema inputRS = fsInput.getSchema();
    Operator<? extends Serializable> tsMerge = OperatorFactory.get(TableScanDesc.class, inputRS);


    // Create a FileSink operator
    TableDesc ts = (TableDesc) fsInputDesc.getTableInfo().clone();
    FileSinkDesc fsOutputDesc =  new FileSinkDesc(finalName, ts,
        parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSRESULT));
    FileSinkOperator fsOutput = (FileSinkOperator) OperatorFactory.getAndMakeChild(
        fsOutputDesc,  inputRS, tsMerge);


    // If the input FileSinkOperator is a dynamic partition enabled, the tsMerge input schema
    // needs to include the partition column, and the fsOutput should have
    // a DynamicPartitionCtx to indicate that it needs to dynamically partitioned.
    DynamicPartitionCtx dpCtx = fsInputDesc.getDynPartCtx();
    if (dpCtx != null && dpCtx.getNumDPCols() > 0) {
      // adding DP ColumnInfo to the RowSchema signature
      ArrayList<ColumnInfo> signature = inputRS.getSignature();
      String tblAlias = fsInputDesc.getTableInfo().getTableName();
      LinkedHashMap<String, String> colMap = new LinkedHashMap<String, String>();
      StringBuilder partCols = new StringBuilder();
      for (String dpCol: dpCtx.getDPColNames()) {
        ColumnInfo colInfo = new ColumnInfo(dpCol,
            TypeInfoFactory.stringTypeInfo, // all partition column type should be string
            tblAlias, true); // partition column is virtual column
        signature.add(colInfo);
        colMap.put(dpCol, dpCol); // input and output have the same column name
        partCols.append(dpCol).append('/');
      }
      partCols.setLength(partCols.length()-1); // remove the last '/'
      inputRS.setSignature(signature);


      // create another DynamicPartitionCtx, which has a different input-to-DP column mapping
      DynamicPartitionCtx dpCtx2 = new DynamicPartitionCtx(dpCtx);
      dpCtx2.setInputToDPCols(colMap);
      fsOutputDesc.setDynPartCtx(dpCtx2);


      // update the FileSinkOperator to include partition columns
      fsInputDesc.getTableInfo().getProperties().setProperty(
        org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS,
        partCols.toString()); // list of dynamic partition column names
    } else {
      // non-partitioned table
      fsInputDesc.getTableInfo().getProperties().remove(
        org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS);
    }


    //
    // 2. Constructing a conditional task consisting of a move task and a map reduce task
    //
    MapRedTask currTask = (MapRedTask) ctx.getCurrTask();
    MoveWork dummyMv = new MoveWork(null, null, null,
        new LoadFileDesc(fsInputDesc.getDirName(), finalName, true, null, null), false);
    MapredWork cplan;


    if(parseCtx.getConf().getBoolVar(HiveConf.ConfVars.
        HIVEMERGERCFILEBLOCKLEVEL) &&
        fsInputDesc.getTableInfo().getInputFileFormatClass().
        equals(RCFileInputFormat.class)) {


      // Check if InputFormatClass is valid
      String inputFormatClass = parseCtx.getConf().
          getVar(HiveConf.ConfVars.HIVEMERGEINPUTFORMATBLOCKLEVEL);
      try {
        Class c = (Class <? extends InputFormat>) Class.forName(inputFormatClass);


        LOG.info("RCFile format- Using block level merge");

View Full Code Here

    if (chDir) {
      dest = fsOp.getConf().getDirName();


      // generate the temporary file
      // it must be on the same file system as the current destination
      ParseContext parseCtx = ctx.getParseCtx();
      Context baseCtx = parseCtx.getContext();
      String tmpDir = baseCtx.getExternalTmpFileURI((new Path(dest)).toUri());


      fsOp.getConf().setDirName(tmpDir);
    }

View Full Code Here

        return null;
      }


      LOG.info("Looking for table scans where optimization is applicable");
      // create a the context for walking operators
      ParseContext parseContext = physicalContext.getParseContext();
      WalkerCtx walkerCtx = new WalkerCtx();


      Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
      opRules.put(new RuleRegExp("R1", "TS%"), new TableScanProcessor());
      opRules.put(new RuleRegExp("R2", "GBY%.*FS%"), new FileSinkProcessor());


      // The dispatcher fires the processor corresponding to the closest
      // matching rule and passes the context along
      Dispatcher disp = new DefaultRuleDispatcher(null, opRules, walkerCtx);
      GraphWalker ogw = new PreOrderWalker(disp);


      // Create a list of topOp nodes
      ArrayList<Node> topNodes = new ArrayList<Node>();
      // Get the top Nodes for this map-reduce task
      for (Operator<? extends Serializable> 
           workOperator : topOperators) {
        if (parseContext.getTopOps().values().contains(workOperator)) {
          topNodes.add(workOperator);
        }
      }


      if (task.getReducer() != null) {

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.hive.ql.parse.ParseContext

org.apache.hadoop.hive.ql.Driver

org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin

org.apache.hadoop.hive.ql.optimizer.DynamicPartitionPruningOptimization

org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils

org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1

org.apache.hadoop.hive.ql.optimizer.GenMRRedSink3

org.apache.hadoop.hive.ql.optimizer.GenMRRedSink4

org.apache.hadoop.hive.ql.optimizer.GenMRTableScan1

org.apache.hadoop.hive.ql.optimizer.GenMRUnion1

org.apache.hadoop.hive.ql.optimizer.index.RewriteParseContextGenerator

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.