Package org.apache.hadoop.hive.ql.parse

Examples of org.apache.hadoop.hive.ql.parse.ParseContext


      }

      if (op.equals(HiveOperation.CREATETABLE_AS_SELECT)
          || op.equals(HiveOperation.QUERY)) {
        SemanticAnalyzer querySem = (SemanticAnalyzer) sem;
        ParseContext parseCtx = querySem.getParseContext();
        Map<TableScanOperator, Table> tsoTopMap = parseCtx.getTopToTable();

        for (Map.Entry<String, Operator<? extends OperatorDesc>> topOpMap : querySem
            .getParseContext().getTopOps().entrySet()) {
          Operator<? extends OperatorDesc> topOp = topOpMap.getValue();
          if (topOp instanceof TableScanOperator
View Full Code Here


      }

      if (op.equals(HiveOperation.CREATETABLE_AS_SELECT)
          || op.equals(HiveOperation.QUERY)) {
        SemanticAnalyzer querySem = (SemanticAnalyzer) sem;
        ParseContext parseCtx = querySem.getParseContext();
        Map<TableScanOperator, Table> tsoTopMap = parseCtx.getTopToTable();

        for (Map.Entry<String, Operator<? extends Serializable>> topOpMap : querySem
            .getParseContext().getTopOps().entrySet()) {
          Operator<? extends Serializable> topOp = topOpMap.getValue();
          if (topOp instanceof TableScanOperator
              && tsoTopMap.containsKey(topOp)) {
            TableScanOperator tableScanOp = (TableScanOperator) topOp;
            Table tbl = tsoTopMap.get(tableScanOp);
            List<Integer> neededColumnIds = tableScanOp.getNeededColumnIDs();
            List<FieldSchema> columns = tbl.getCols();
            List<String> cols = new ArrayList<String>();
            if (neededColumnIds != null && neededColumnIds.size() > 0) {
              for (int i = 0; i < neededColumnIds.size(); i++) {
                cols.add(columns.get(neededColumnIds.get(i)).getName());
              }
            } else {
              for (int i = 0; i < columns.size(); i++) {
                cols.add(columns.get(i).getName());
              }
            }
            if (tbl.isPartitioned() && tableUsePartLevelAuth.get(tbl.getTableName())) {
              String alias_id = topOpMap.getKey();
              PrunedPartitionList partsList = PartitionPruner.prune(parseCtx
                  .getTopToTable().get(topOp), parseCtx.getOpToPartPruner()
                  .get(topOp), parseCtx.getConf(), alias_id, parseCtx
                  .getPrunedPartitions());
              Set<Partition> parts = new HashSet<Partition>();
              parts.addAll(partsList.getConfirmedPartns());
              parts.addAll(partsList.getUnknownPartns());
              for (Partition part : parts) {
View Full Code Here

    List<Node> opChildren = operator.getChildren();
    TableScanDesc operatorDesc = operator.getConf();
    ExprNodeDesc predicate = operatorDesc.getFilterExpr();

    IndexWhereProcCtx context = (IndexWhereProcCtx) procCtx;
    ParseContext pctx = context.getParseContext();
    LOG.info("Processing predicate for index optimization");
    if (predicate == null) {
      LOG.info("null predicate pushed down");
      return null;
    }
    LOG.info(predicate.getExprString());

    // check if we have indexes on all partitions in this table scan
    Set<Partition> queryPartitions;
    try {
      queryPartitions = checkPartitionsCoveredByIndex(operator, pctx);
      if (queryPartitions == null) { // partitions not covered
        return null;
      }
    } catch (HiveException e) {
      LOG.error("Fatal Error: problem accessing metastore", e);
      throw new SemanticException(e);
    }

    // we can only process MapReduce tasks to check input size
    if (!context.getCurrentTask().isMapRedTask()) {
      return null;
    }
    MapRedTask currentTask = (MapRedTask) context.getCurrentTask();

    // get potential reentrant index queries from each index
    Map<Index, HiveIndexQueryContext> queryContexts = new HashMap<Index, HiveIndexQueryContext>();
    Collection<List<Index>> tableIndexes = indexes.values();
    for (List<Index> indexesOnTable : tableIndexes) {
      List<List<Index>> indexesByType = new ArrayList<List<Index>>();
      for (Index index : indexesOnTable) {
        boolean added = false;
        for (List<Index> indexType : indexesByType) {
          if (indexType.isEmpty()) {
            indexType.add(index);
            added = true;
          } else if (indexType.get(0).getIndexHandlerClass().equals(
                index.getIndexHandlerClass())) {
            indexType.add(index);
            added = true;
            break;
          }
        }
        if (!added) {
          List<Index> newType = new ArrayList<Index>();
          newType.add(index);
          indexesByType.add(newType);
        }
      }

      // choose index type with most indexes of the same type on the table
      // TODO HIVE-2130 This would be a good place for some sort of cost based choice?
      List<Index> bestIndexes = indexesByType.get(0);
      for (List<Index> indexTypes : indexesByType) {
        if (bestIndexes.size() < indexTypes.size()) {
          bestIndexes = indexTypes;
        }
      }

      // rewrite index queries for the chosen index type
      HiveIndexQueryContext queryContext = new HiveIndexQueryContext();
      queryContext.setQueryPartitions(queryPartitions);
      rewriteForIndexes(predicate, bestIndexes, pctx, currentTask, queryContext);
      List<Task<?>> indexTasks = queryContext.getQueryTasks();

      if (indexTasks != null && indexTasks.size() > 0) {
        queryContexts.put(bestIndexes.get(0), queryContext);
      }
    }
    // choose an index rewrite to use
    if (queryContexts.size() > 0) {
      // TODO HIVE-2130 This would be a good place for some sort of cost based choice?
      Index chosenIndex = queryContexts.keySet().iterator().next();

      // modify the parse context to use indexing
      // we need to delay this until we choose one index so that we don't attempt to modify pctx multiple times
      HiveIndexQueryContext queryContext = queryContexts.get(chosenIndex);

      // prepare the map reduce job to use indexing
      MapredWork work = currentTask.getWork();
      work.setInputformat(queryContext.getIndexInputFormat());
      work.setIndexIntermediateFile(queryContext.getIndexIntermediateFile());

      // modify inputs based on index query
      Set<ReadEntity> inputs = pctx.getSemanticInputs();
      inputs.addAll(queryContext.getAdditionalSemanticInputs());
      List<Task<?>> chosenRewrite = queryContext.getQueryTasks();

      // add dependencies so index query runs first
      insertIndexQuery(pctx, context, chosenRewrite);
View Full Code Here

  public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs)
      throws SemanticException {

    Task<? extends Serializable> task = (Task<? extends Serializable>) nd;

    ParseContext pctx = physicalContext.getParseContext();

    // create the regex's so the walker can recognize our WHERE queries
    Map<Rule, NodeProcessor> operatorRules = createOperatorRules(pctx);

    // check for no indexes on any table
    if (operatorRules == null) {
      return null;
    }

    // create context so the walker can carry the current task with it.
    IndexWhereProcCtx indexWhereOptimizeCtx = new IndexWhereProcCtx(task, pctx);

    // create the dispatcher, which fires the processor according to the rule that
    // best matches
    Dispatcher dispatcher = new DefaultRuleDispatcher(getDefaultProcessor(),
                                                      operatorRules,
                                                      indexWhereOptimizeCtx);

    // walk the mapper operator(not task) tree
    GraphWalker ogw = new DefaultGraphWalker(dispatcher);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pctx.getTopOps().values());
    ogw.startWalking(topNodes, null);

    return null;
  }
View Full Code Here

   *          context
   */
  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx,
      Object... nodeOutputs) throws SemanticException {
    GenMRProcContext ctx = (GenMRProcContext) opProcCtx;
    ParseContext parseCtx = ctx.getParseCtx();
    boolean chDir = false;
    Task<? extends Serializable> currTask = ctx.getCurrTask();
    FileSinkOperator fsOp = (FileSinkOperator) nd;
    boolean isInsertTable = // is INSERT OVERWRITE TABLE
      fsOp.getConf().getTableInfo().getTableName() != null &&
      parseCtx.getQB().getParseInfo().isInsertToTable();
    HiveConf hconf = parseCtx.getConf();


    // Has the user enabled merging of files for map-only jobs or for all jobs
    if ((ctx.getMvTask() != null) && (!ctx.getMvTask().isEmpty())) {
      List<Task<? extends Serializable>> mvTasks = ctx.getMvTask();

      // In case of unions or map-joins, it is possible that the file has
      // already been seen.
      // So, no need to attempt to merge the files again.
      if ((ctx.getSeenFileSinkOps() == null)
          || (!ctx.getSeenFileSinkOps().contains(nd))) {

        // no need of merging if the move is to a local file system
        MoveTask mvTask = (MoveTask) findMoveTask(mvTasks, fsOp);

        if (isInsertTable &&
            hconf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
          addStatsTask(fsOp, mvTask, currTask, parseCtx.getConf());
        }

        if ((mvTask != null) && !mvTask.isLocal()) {
          // There are separate configuration parameters to control whether to
          // merge for a map-only job
View Full Code Here

    ReduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc(
        new ArrayList<ExprNodeDesc>(), valueCols, outputColumns, false, -1, -1,
        -1);
    OperatorFactory.getAndMakeChild(rsDesc, inputRS, tsMerge);
    ParseContext parseCtx = ctx.getParseCtx();
    FileSinkDesc fsConf = fsOp.getConf();

    // Add the extract operator to get the value fields
    RowResolver out_rwsch = new RowResolver();
    RowResolver interim_rwsch = ctx.getParseCtx().getOpParseCtx().get(fsOp).getRowResolver();
    Integer pos = Integer.valueOf(0);
    for (ColumnInfo colInfo : interim_rwsch.getColumnInfos()) {
      String[] info = interim_rwsch.reverseLookup(colInfo.getInternalName());
      out_rwsch.put(info[0], info[1], new ColumnInfo(pos.toString(), colInfo
          .getType(), info[0], colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol()));
      pos = Integer.valueOf(pos.intValue() + 1);
    }

    Operator<ExtractDesc> extract = OperatorFactory.getAndMakeChild(new ExtractDesc(
        new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
            Utilities.ReduceField.VALUE.toString(), "", false)),
            new RowSchema(out_rwsch.getColumnInfos()));

    TableDesc ts = (TableDesc) fsConf.getTableInfo().clone();
    fsConf.getTableInfo().getProperties().remove(
        org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS);

    FileSinkDesc newFSD = new FileSinkDesc(finalName, ts, parseCtx.getConf()
        .getBoolVar(HiveConf.ConfVars.COMPRESSRESULT));
    FileSinkOperator newOutput = (FileSinkOperator) OperatorFactory.
      getAndMakeChild(newFSD, inputRS, extract);

    HiveConf conf = parseCtx.getConf();
    MapredWork cplan = createMergeTask(conf, tsMerge, fsConf);
    cplan.setReducer(extract);

    // NOTE: we should gather stats in MR1 (rather than the merge MR job)
    // since it is unknown if the merge MR will be triggered at execution time.
View Full Code Here

  private void createMergeJob(FileSinkOperator fsOp, GenMRProcContext ctx, String finalName)
      throws SemanticException {

    // if the hadoop version support CombineFileInputFormat (version >= 0.20),
    // create a Map-only job for merge, otherwise create a MapReduce merge job.
    ParseContext parseCtx = ctx.getParseCtx();
    HiveConf conf = parseCtx.getConf();
    if (conf.getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPONLY) &&
        Utilities.supportCombineFileInputFormat()) {
      // create Map-only merge job
      createMap4Merge(fsOp, ctx, finalName);
      LOG.info("use CombineHiveInputformat for the merge job");
View Full Code Here

  private void createMap4Merge(FileSinkOperator fsInput, GenMRProcContext ctx, String finalName) throws SemanticException {

    //
    // 1. create the operator tree
    //
    ParseContext parseCtx = ctx.getParseCtx();
    FileSinkDesc fsInputDesc = fsInput.getConf();

    // Create a TableScan operator
    RowSchema inputRS = fsInput.getSchema();
    Operator<? extends Serializable> tsMerge = OperatorFactory.get(TableScanDesc.class, inputRS);

    // Create a FileSink operator
    TableDesc ts = (TableDesc) fsInputDesc.getTableInfo().clone();
    FileSinkDesc fsOutputDesc =  new FileSinkDesc(finalName, ts,
        parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSRESULT));
    FileSinkOperator fsOutput = (FileSinkOperator) OperatorFactory.getAndMakeChild(
        fsOutputDesc,  inputRS, tsMerge);

    // If the input FileSinkOperator is a dynamic partition enabled, the tsMerge input schema
    // needs to include the partition column, and the fsOutput should have
    // a DynamicPartitionCtx to indicate that it needs to dynamically partitioned.
    DynamicPartitionCtx dpCtx = fsInputDesc.getDynPartCtx();
    if (dpCtx != null && dpCtx.getNumDPCols() > 0) {
      // adding DP ColumnInfo to the RowSchema signature
      ArrayList<ColumnInfo> signature = inputRS.getSignature();
      String tblAlias = fsInputDesc.getTableInfo().getTableName();
      LinkedHashMap<String, String> colMap = new LinkedHashMap<String, String>();
      StringBuilder partCols = new StringBuilder();
      for (String dpCol: dpCtx.getDPColNames()) {
        ColumnInfo colInfo = new ColumnInfo(dpCol,
            TypeInfoFactory.stringTypeInfo, // all partition column type should be string
            tblAlias, true); // partition column is virtual column
        signature.add(colInfo);
        colMap.put(dpCol, dpCol); // input and output have the same column name
        partCols.append(dpCol).append('/');
      }
      partCols.setLength(partCols.length()-1); // remove the last '/'
      inputRS.setSignature(signature);

      // create another DynamicPartitionCtx, which has a different input-to-DP column mapping
      DynamicPartitionCtx dpCtx2 = new DynamicPartitionCtx(dpCtx);
      dpCtx2.setInputToDPCols(colMap);
      fsOutputDesc.setDynPartCtx(dpCtx2);

      // update the FileSinkOperator to include partition columns
      fsInputDesc.getTableInfo().getProperties().setProperty(
        org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS,
        partCols.toString()); // list of dynamic partition column names
    } else {
      // non-partitioned table
      fsInputDesc.getTableInfo().getProperties().remove(
        org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS);
    }

    //
    // 2. Constructing a conditional task consisting of a move task and a map reduce task
    //
    MapRedTask currTask = (MapRedTask) ctx.getCurrTask();
    MoveWork dummyMv = new MoveWork(null, null, null,
        new LoadFileDesc(fsInputDesc.getDirName(), finalName, true, null, null), false);
    MapredWork cplan;

    if(parseCtx.getConf().getBoolVar(HiveConf.ConfVars.
        HIVEMERGERCFILEBLOCKLEVEL) &&
        fsInputDesc.getTableInfo().getInputFileFormatClass().
        equals(RCFileInputFormat.class)) {

      // Check if InputFormatClass is valid
      String inputFormatClass = parseCtx.getConf().
          getVar(HiveConf.ConfVars.HIVEMERGEINPUTFORMATBLOCKLEVEL);
      try {
        Class c = (Class <? extends InputFormat>) Class.forName(inputFormatClass);

        LOG.info("RCFile format- Using block level merge");
View Full Code Here

    if (chDir) {
      dest = fsOp.getConf().getDirName();

      // generate the temporary file
      // it must be on the same file system as the current destination
      ParseContext parseCtx = ctx.getParseCtx();
      Context baseCtx = parseCtx.getContext();
      String tmpDir = baseCtx.getExternalTmpFileURI((new Path(dest)).toUri());

      fsOp.getConf().setDirName(tmpDir);
    }
View Full Code Here

        return null;
      }
    }

    //can safely convert the join to a map join.
    ParseContext parseContext = context.parseContext;
    MapJoinOperator mapJoinOp = MapJoinProcessor.
      convertJoinOpMapJoinOp(context.conf, parseContext.getOpParseCtx(),
          joinOp, parseContext.getJoinContext().get(joinOp), bigTablePosition, true);

    Operator<? extends OperatorDesc> parentBigTableOp
      = mapJoinOp.getParentOperators().get(bigTablePosition);

    if (parentBigTableOp instanceof ReduceSinkOperator) {
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.parse.ParseContext

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.