Examples of org.apache.hadoop.hive.ql.parse.ParseContext

org.apache.hadoop.hive.ql.parse.ParseContext
Parse Context: The current parse context. This is passed to the optimizer which then transforms the operator tree using the parse context. All the optimizations are performed sequentially and then the new parse context populated. Note that since the parse context contains the operator tree, it can be easily retrieved by the next optimization step or finally for task generation after the plan has been completely optimized.

      }


      if (op.equals(HiveOperation.CREATETABLE_AS_SELECT)
          || op.equals(HiveOperation.QUERY)) {
        SemanticAnalyzer querySem = (SemanticAnalyzer) sem;
        ParseContext parseCtx = querySem.getParseContext();
        Map<TableScanOperator, Table> tsoTopMap = parseCtx.getTopToTable();


        for (Map.Entry<String, Operator<? extends OperatorDesc>> topOpMap : querySem
            .getParseContext().getTopOps().entrySet()) {
          Operator<? extends OperatorDesc> topOp = topOpMap.getValue();
          if (topOp instanceof TableScanOperator

View Full Code Here

      }


      if (op.equals(HiveOperation.CREATETABLE_AS_SELECT)
          || op.equals(HiveOperation.QUERY)) {
        SemanticAnalyzer querySem = (SemanticAnalyzer) sem;
        ParseContext parseCtx = querySem.getParseContext();
        Map<TableScanOperator, Table> tsoTopMap = parseCtx.getTopToTable();


        for (Map.Entry<String, Operator<? extends Serializable>> topOpMap : querySem
            .getParseContext().getTopOps().entrySet()) {
          Operator<? extends Serializable> topOp = topOpMap.getValue();
          if (topOp instanceof TableScanOperator
              && tsoTopMap.containsKey(topOp)) {
            TableScanOperator tableScanOp = (TableScanOperator) topOp;
            Table tbl = tsoTopMap.get(tableScanOp);
            List<Integer> neededColumnIds = tableScanOp.getNeededColumnIDs();
            List<FieldSchema> columns = tbl.getCols();
            List<String> cols = new ArrayList<String>();
            if (neededColumnIds != null && neededColumnIds.size() > 0) {
              for (int i = 0; i < neededColumnIds.size(); i++) {
                cols.add(columns.get(neededColumnIds.get(i)).getName());
              }
            } else {
              for (int i = 0; i < columns.size(); i++) {
                cols.add(columns.get(i).getName());
              }
            }
            if (tbl.isPartitioned() && tableUsePartLevelAuth.get(tbl.getTableName())) {
              String alias_id = topOpMap.getKey();
              PrunedPartitionList partsList = PartitionPruner.prune(parseCtx
                  .getTopToTable().get(topOp), parseCtx.getOpToPartPruner()
                  .get(topOp), parseCtx.getConf(), alias_id, parseCtx
                  .getPrunedPartitions());
              Set<Partition> parts = new HashSet<Partition>();
              parts.addAll(partsList.getConfirmedPartns());
              parts.addAll(partsList.getUnknownPartns());
              for (Partition part : parts) {

View Full Code Here

    List<Node> opChildren = operator.getChildren();
    TableScanDesc operatorDesc = operator.getConf();
    ExprNodeDesc predicate = operatorDesc.getFilterExpr();


    IndexWhereProcCtx context = (IndexWhereProcCtx) procCtx;
    ParseContext pctx = context.getParseContext();
    LOG.info("Processing predicate for index optimization");
    if (predicate == null) {
      LOG.info("null predicate pushed down");
      return null;
    }
    LOG.info(predicate.getExprString());


    // check if we have indexes on all partitions in this table scan
    Set<Partition> queryPartitions;
    try {
      queryPartitions = checkPartitionsCoveredByIndex(operator, pctx);
      if (queryPartitions == null) { // partitions not covered
        return null;
      }
    } catch (HiveException e) {
      LOG.error("Fatal Error: problem accessing metastore", e);
      throw new SemanticException(e);
    }


    // we can only process MapReduce tasks to check input size
    if (!context.getCurrentTask().isMapRedTask()) {
      return null;
    }
    MapRedTask currentTask = (MapRedTask) context.getCurrentTask();


    // get potential reentrant index queries from each index
    Map<Index, HiveIndexQueryContext> queryContexts = new HashMap<Index, HiveIndexQueryContext>();
    Collection<List<Index>> tableIndexes = indexes.values();
    for (List<Index> indexesOnTable : tableIndexes) {
      List<List<Index>> indexesByType = new ArrayList<List<Index>>();
      for (Index index : indexesOnTable) {
        boolean added = false;
        for (List<Index> indexType : indexesByType) {
          if (indexType.isEmpty()) {
            indexType.add(index);
            added = true;
          } else if (indexType.get(0).getIndexHandlerClass().equals(
                index.getIndexHandlerClass())) {
            indexType.add(index);
            added = true;
            break;
          }
        }
        if (!added) {
          List<Index> newType = new ArrayList<Index>();
          newType.add(index);
          indexesByType.add(newType);
        }
      }


      // choose index type with most indexes of the same type on the table
      // TODO HIVE-2130 This would be a good place for some sort of cost based choice?
      List<Index> bestIndexes = indexesByType.get(0);
      for (List<Index> indexTypes : indexesByType) {
        if (bestIndexes.size() < indexTypes.size()) {
          bestIndexes = indexTypes;
        }
      }


      // rewrite index queries for the chosen index type
      HiveIndexQueryContext queryContext = new HiveIndexQueryContext();
      queryContext.setQueryPartitions(queryPartitions);
      rewriteForIndexes(predicate, bestIndexes, pctx, currentTask, queryContext);
      List<Task<?>> indexTasks = queryContext.getQueryTasks();


      if (indexTasks != null && indexTasks.size() > 0) {
        queryContexts.put(bestIndexes.get(0), queryContext);
      }
    }
    // choose an index rewrite to use
    if (queryContexts.size() > 0) {
      // TODO HIVE-2130 This would be a good place for some sort of cost based choice?
      Index chosenIndex = queryContexts.keySet().iterator().next();


      // modify the parse context to use indexing
      // we need to delay this until we choose one index so that we don't attempt to modify pctx multiple times
      HiveIndexQueryContext queryContext = queryContexts.get(chosenIndex);


      // prepare the map reduce job to use indexing
      MapredWork work = currentTask.getWork();
      work.setInputformat(queryContext.getIndexInputFormat());
      work.setIndexIntermediateFile(queryContext.getIndexIntermediateFile());


      // modify inputs based on index query
      Set<ReadEntity> inputs = pctx.getSemanticInputs();
      inputs.addAll(queryContext.getAdditionalSemanticInputs());
      List<Task<?>> chosenRewrite = queryContext.getQueryTasks();


      // add dependencies so index query runs first
      insertIndexQuery(pctx, context, chosenRewrite);

View Full Code Here

  public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs)
      throws SemanticException {


    Task<? extends Serializable> task = (Task<? extends Serializable>) nd;


    ParseContext pctx = physicalContext.getParseContext();


    // create the regex's so the walker can recognize our WHERE queries
    Map<Rule, NodeProcessor> operatorRules = createOperatorRules(pctx);


    // check for no indexes on any table
    if (operatorRules == null) {
      return null;
    }


    // create context so the walker can carry the current task with it.
    IndexWhereProcCtx indexWhereOptimizeCtx = new IndexWhereProcCtx(task, pctx);


    // create the dispatcher, which fires the processor according to the rule that
    // best matches
    Dispatcher dispatcher = new DefaultRuleDispatcher(getDefaultProcessor(),
                                                      operatorRules,
                                                      indexWhereOptimizeCtx);


    // walk the mapper operator(not task) tree
    GraphWalker ogw = new DefaultGraphWalker(dispatcher);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pctx.getTopOps().values());
    ogw.startWalking(topNodes, null);


    return null;
  }

View Full Code Here

   *          context
   */
  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx,
      Object... nodeOutputs) throws SemanticException {
    GenMRProcContext ctx = (GenMRProcContext) opProcCtx;
    ParseContext parseCtx = ctx.getParseCtx();
    boolean chDir = false;
    Task<? extends Serializable> currTask = ctx.getCurrTask();
    FileSinkOperator fsOp = (FileSinkOperator) nd;
    boolean isInsertTable = // is INSERT OVERWRITE TABLE
      fsOp.getConf().getTableInfo().getTableName() != null &&
      parseCtx.getQB().getParseInfo().isInsertToTable();
    HiveConf hconf = parseCtx.getConf();




    // Has the user enabled merging of files for map-only jobs or for all jobs
    if ((ctx.getMvTask() != null) && (!ctx.getMvTask().isEmpty())) {
      List<Task<? extends Serializable>> mvTasks = ctx.getMvTask();


      // In case of unions or map-joins, it is possible that the file has
      // already been seen.
      // So, no need to attempt to merge the files again.
      if ((ctx.getSeenFileSinkOps() == null)
          || (!ctx.getSeenFileSinkOps().contains(nd))) {


        // no need of merging if the move is to a local file system
        MoveTask mvTask = (MoveTask) findMoveTask(mvTasks, fsOp);


        if (isInsertTable &&
            hconf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
          addStatsTask(fsOp, mvTask, currTask, parseCtx.getConf());
        }


        if ((mvTask != null) && !mvTask.isLocal()) {
          // There are separate configuration parameters to control whether to
          // merge for a map-only job

View Full Code Here


    ReduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc(
        new ArrayList<ExprNodeDesc>(), valueCols, outputColumns, false, -1, -1,
        -1);
    OperatorFactory.getAndMakeChild(rsDesc, inputRS, tsMerge);
    ParseContext parseCtx = ctx.getParseCtx();
    FileSinkDesc fsConf = fsOp.getConf();


    // Add the extract operator to get the value fields
    RowResolver out_rwsch = new RowResolver();
    RowResolver interim_rwsch = ctx.getParseCtx().getOpParseCtx().get(fsOp).getRowResolver();
    Integer pos = Integer.valueOf(0);
    for (ColumnInfo colInfo : interim_rwsch.getColumnInfos()) {
      String[] info = interim_rwsch.reverseLookup(colInfo.getInternalName());
      out_rwsch.put(info[0], info[1], new ColumnInfo(pos.toString(), colInfo
          .getType(), info[0], colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol()));
      pos = Integer.valueOf(pos.intValue() + 1);
    }


    Operator<ExtractDesc> extract = OperatorFactory.getAndMakeChild(new ExtractDesc(
        new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
            Utilities.ReduceField.VALUE.toString(), "", false)),
            new RowSchema(out_rwsch.getColumnInfos()));


    TableDesc ts = (TableDesc) fsConf.getTableInfo().clone();
    fsConf.getTableInfo().getProperties().remove(
        org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS);


    FileSinkDesc newFSD = new FileSinkDesc(finalName, ts, parseCtx.getConf()
        .getBoolVar(HiveConf.ConfVars.COMPRESSRESULT));
    FileSinkOperator newOutput = (FileSinkOperator) OperatorFactory.
      getAndMakeChild(newFSD, inputRS, extract);


    HiveConf conf = parseCtx.getConf();
    MapredWork cplan = createMergeTask(conf, tsMerge, fsConf);
    cplan.setReducer(extract);


    // NOTE: we should gather stats in MR1 (rather than the merge MR job)
    // since it is unknown if the merge MR will be triggered at execution time.

View Full Code Here

  private void createMergeJob(FileSinkOperator fsOp, GenMRProcContext ctx, String finalName)
      throws SemanticException {


    // if the hadoop version support CombineFileInputFormat (version >= 0.20),
    // create a Map-only job for merge, otherwise create a MapReduce merge job.
    ParseContext parseCtx = ctx.getParseCtx();
    HiveConf conf = parseCtx.getConf();
    if (conf.getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPONLY) &&
        Utilities.supportCombineFileInputFormat()) {
      // create Map-only merge job
      createMap4Merge(fsOp, ctx, finalName);
      LOG.info("use CombineHiveInputformat for the merge job");

View Full Code Here

  private void createMap4Merge(FileSinkOperator fsInput, GenMRProcContext ctx, String finalName) throws SemanticException {


    //
    // 1. create the operator tree
    //
    ParseContext parseCtx = ctx.getParseCtx();
    FileSinkDesc fsInputDesc = fsInput.getConf();


    // Create a TableScan operator
    RowSchema inputRS = fsInput.getSchema();
    Operator<? extends Serializable> tsMerge = OperatorFactory.get(TableScanDesc.class, inputRS);


    // Create a FileSink operator
    TableDesc ts = (TableDesc) fsInputDesc.getTableInfo().clone();
    FileSinkDesc fsOutputDesc =  new FileSinkDesc(finalName, ts,
        parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSRESULT));
    FileSinkOperator fsOutput = (FileSinkOperator) OperatorFactory.getAndMakeChild(
        fsOutputDesc,  inputRS, tsMerge);


    // If the input FileSinkOperator is a dynamic partition enabled, the tsMerge input schema
    // needs to include the partition column, and the fsOutput should have
    // a DynamicPartitionCtx to indicate that it needs to dynamically partitioned.
    DynamicPartitionCtx dpCtx = fsInputDesc.getDynPartCtx();
    if (dpCtx != null && dpCtx.getNumDPCols() > 0) {
      // adding DP ColumnInfo to the RowSchema signature
      ArrayList<ColumnInfo> signature = inputRS.getSignature();
      String tblAlias = fsInputDesc.getTableInfo().getTableName();
      LinkedHashMap<String, String> colMap = new LinkedHashMap<String, String>();
      StringBuilder partCols = new StringBuilder();
      for (String dpCol: dpCtx.getDPColNames()) {
        ColumnInfo colInfo = new ColumnInfo(dpCol,
            TypeInfoFactory.stringTypeInfo, // all partition column type should be string
            tblAlias, true); // partition column is virtual column
        signature.add(colInfo);
        colMap.put(dpCol, dpCol); // input and output have the same column name
        partCols.append(dpCol).append('/');
      }
      partCols.setLength(partCols.length()-1); // remove the last '/'
      inputRS.setSignature(signature);


      // create another DynamicPartitionCtx, which has a different input-to-DP column mapping
      DynamicPartitionCtx dpCtx2 = new DynamicPartitionCtx(dpCtx);
      dpCtx2.setInputToDPCols(colMap);
      fsOutputDesc.setDynPartCtx(dpCtx2);


      // update the FileSinkOperator to include partition columns
      fsInputDesc.getTableInfo().getProperties().setProperty(
        org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS,
        partCols.toString()); // list of dynamic partition column names
    } else {
      // non-partitioned table
      fsInputDesc.getTableInfo().getProperties().remove(
        org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS);
    }


    //
    // 2. Constructing a conditional task consisting of a move task and a map reduce task
    //
    MapRedTask currTask = (MapRedTask) ctx.getCurrTask();
    MoveWork dummyMv = new MoveWork(null, null, null,
        new LoadFileDesc(fsInputDesc.getDirName(), finalName, true, null, null), false);
    MapredWork cplan;


    if(parseCtx.getConf().getBoolVar(HiveConf.ConfVars.
        HIVEMERGERCFILEBLOCKLEVEL) &&
        fsInputDesc.getTableInfo().getInputFileFormatClass().
        equals(RCFileInputFormat.class)) {


      // Check if InputFormatClass is valid
      String inputFormatClass = parseCtx.getConf().
          getVar(HiveConf.ConfVars.HIVEMERGEINPUTFORMATBLOCKLEVEL);
      try {
        Class c = (Class <? extends InputFormat>) Class.forName(inputFormatClass);


        LOG.info("RCFile format- Using block level merge");

View Full Code Here

    if (chDir) {
      dest = fsOp.getConf().getDirName();


      // generate the temporary file
      // it must be on the same file system as the current destination
      ParseContext parseCtx = ctx.getParseCtx();
      Context baseCtx = parseCtx.getContext();
      String tmpDir = baseCtx.getExternalTmpFileURI((new Path(dest)).toUri());


      fsOp.getConf().setDirName(tmpDir);
    }

View Full Code Here

        return null;
      }
    }


    //can safely convert the join to a map join.
    ParseContext parseContext = context.parseContext;
    MapJoinOperator mapJoinOp = MapJoinProcessor.
      convertJoinOpMapJoinOp(context.conf, parseContext.getOpParseCtx(),
          joinOp, parseContext.getJoinContext().get(joinOp), bigTablePosition, true);


    Operator<? extends OperatorDesc> parentBigTableOp
      = mapJoinOp.getParentOperators().get(bigTablePosition);


    if (parentBigTableOp instanceof ReduceSinkOperator) {

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.hive.ql.parse.ParseContext

org.apache.hadoop.hive.ql.Driver

org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin

org.apache.hadoop.hive.ql.optimizer.DynamicPartitionPruningOptimization

org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils

org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1

org.apache.hadoop.hive.ql.optimizer.GenMRRedSink3

org.apache.hadoop.hive.ql.optimizer.GenMRRedSink4

org.apache.hadoop.hive.ql.optimizer.GenMRTableScan1

org.apache.hadoop.hive.ql.optimizer.GenMRUnion1

org.apache.hadoop.hive.ql.optimizer.index.RewriteParseContextGenerator

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.