Examples of org.apache.hadoop.hive.ql.parse.ParseContext

org.apache.hadoop.hive.ql.parse.ParseContext
Parse Context: The current parse context. This is passed to the optimizer which then transforms the operator tree using the parse context. All the optimizations are performed sequentially and then the new parse context populated. Note that since the parse context contains the operator tree, it can be easily retrieved by the next optimization step or finally for task generation after the plan has been completely optimized.

   */
  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException {
    ReduceSinkOperator op = (ReduceSinkOperator)nd;
    GenMRProcContext ctx = (GenMRProcContext)opProcCtx;


    ParseContext parseCtx = ctx.getParseCtx();


    // map-join consisted on a bunch of map-only jobs, and it has been split after the mapjoin
    Operator<? extends Serializable> reducer = op.getChildOperators().get(0);
    Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx();
    GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0));

View Full Code Here

  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx,
      Object... nodeOutputs) throws SemanticException {
    ReduceSinkOperator op = (ReduceSinkOperator) nd;
    GenMRProcContext ctx = (GenMRProcContext) opProcCtx;


    ParseContext parseCtx = ctx.getParseCtx();
    UnionProcContext uCtx = parseCtx.getUCtx();


    // union was map only - no special processing needed
    if (uCtx.isMapOnlySubq()) {
      return (new GenMRRedSink1()).process(nd, stack, opProcCtx, nodeOutputs);
    }

View Full Code Here

    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {


      // LineageCtx
      LineageCtx lCtx = (LineageCtx) procCtx;
      ParseContext pctx = lCtx.getParseCtx();


      // Table scan operator.
      TableScanOperator top = (TableScanOperator)nd;
      org.apache.hadoop.hive.ql.metadata.Table t = pctx.getTopToTable().get(top);
      Table tab = t.getTTable();


      // Generate the mappings
      RowSchema rs = top.getSchema();
      List<FieldSchema> cols = t.getAllCols();
      Map<String, FieldSchema> fieldSchemaMap = new HashMap<String, FieldSchema>();
      for(FieldSchema col : cols) {
        fieldSchemaMap.put(col.getName(), col);
      }


      Iterator<VirtualColumn> vcs = VirtualColumn.getRegistry(pctx.getConf()).iterator();
      while (vcs.hasNext()) {
        VirtualColumn vc = vcs.next();
        fieldSchemaMap.put(vc.getName(), new FieldSchema(vc.getName(),
            vc.getTypeInfo().getTypeName(), ""));
      }

View Full Code Here

   */
  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx,
      Object... nodeOutputs) throws SemanticException {
    UnionOperator union = (UnionOperator) nd;
    GenMRProcContext ctx = (GenMRProcContext) opProcCtx;
    ParseContext parseCtx = ctx.getParseCtx();
    UnionProcContext uCtx = parseCtx.getUCtx();


    // Map-only subqueries can be optimized in future to not write to a file in
    // future
    Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = ctx
        .getMapCurrCtx();


    // The plan needs to be broken only if one of the sub-queries involve a
    // map-reduce job
    if (uCtx.isMapOnlySubq()) {
      // merge currTask from multiple topOps
      HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = ctx
          .getOpTaskMap();
      if (opTaskMap != null && opTaskMap.size() > 0) {
        Task<? extends Serializable> tsk = opTaskMap.get(null);
        if (tsk != null) {
          ctx.setCurrTask(tsk);
        }
      }


      UnionParseContext uPrsCtx = uCtx.getUnionParseContext(union);
      if ((uPrsCtx != null) && (uPrsCtx.getMapJoinQuery())) {
        GenMapRedUtils.mergeMapJoinUnion(union, ctx, UnionProcFactory
            .getPositionParent(union, stack));
      } else {
        mapCurrCtx.put((Operator<? extends Serializable>) nd, new GenMapRedCtx(
            ctx.getCurrTask(), ctx.getCurrTopOp(), ctx.getCurrAliasId()));
      }
      return null;
    }


    ctx.setCurrUnionOp(union);


    UnionParseContext uPrsCtx = uCtx.getUnionParseContext(union);
    assert uPrsCtx != null;


    Task<? extends Serializable> currTask = ctx.getCurrTask();
    int pos = UnionProcFactory.getPositionParent(union, stack);


    // is the current task a root task
    if (uPrsCtx.getRootTask(pos) && (!ctx.getRootTasks().contains(currTask))) {
      ctx.getRootTasks().add(currTask);
    }


    GenMRUnionCtx uCtxTask = ctx.getUnionTask(union);
    Task<? extends Serializable> uTask = null;


    Operator<? extends Serializable> parent = union.getParentOperators().get(
        pos);
    MapredWork uPlan = null;


    // union is encountered for the first time
    if (uCtxTask == null) {
      uCtxTask = new GenMRUnionCtx();
      uPlan = GenMapRedUtils.getMapRedWork(parseCtx);
      uTask = TaskFactory.get(uPlan, parseCtx.getConf());
      uCtxTask.setUTask(uTask);
      ctx.setUnionTask(union, uCtxTask);
    } else {
      uTask = uCtxTask.getUTask();
    }


    // If there is a mapjoin at position 'pos'
    if (uPrsCtx.getMapJoinSubq(pos)) {
      AbstractMapJoinOperator<? extends MapJoinDesc> mjOp = ctx.getCurrMapJoinOp();
      assert mjOp != null;
      GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(mjOp);
      assert mjCtx != null;
      MapredWork plan = (MapredWork) currTask.getWork();


      String taskTmpDir = mjCtx.getTaskTmpDir();
      TableDesc tt_desc = mjCtx.getTTDesc();
      assert plan.getPathToAliases().get(taskTmpDir) == null;
      plan.getPathToAliases().put(taskTmpDir, new ArrayList<String>());
      plan.getPathToAliases().get(taskTmpDir).add(taskTmpDir);
      plan.getPathToPartitionInfo().put(taskTmpDir,
          new PartitionDesc(tt_desc, null));
      plan.getAliasToWork().put(taskTmpDir, mjCtx.getRootMapJoinOp());
    }


    TableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils
        .getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol"));


    // generate the temporary file
    Context baseCtx = parseCtx.getContext();
    String taskTmpDir = baseCtx.getMRTmpFileURI();


    // Add the path to alias mapping
    uCtxTask.addTaskTmpDir(taskTmpDir);
    uCtxTask.addTTDesc(tt_desc);


    // The union task is empty. The files created for all the inputs are
    // assembled in the
    // union context and later used to initialize the union plan


    // Create a file sink operator for this file name
    Operator<? extends Serializable> fs_op = OperatorFactory.get(
        new FileSinkDesc(taskTmpDir, tt_desc, parseCtx.getConf().getBoolVar(
        HiveConf.ConfVars.COMPRESSINTERMEDIATE)), parent.getSchema());


    assert parent.getChildOperators().size() == 1;
    parent.getChildOperators().set(0, fs_op);

View Full Code Here

   * @throws SemanticException
   */
  public static ParseContext generateOperatorTree(HiveConf conf,
      String command) throws SemanticException{
    Context ctx;
    ParseContext subPCtx = null;
    try {
      ctx = new Context(conf);
      ParseDriver pd = new ParseDriver();
      ASTNode tree = pd.parse(command, ctx);
      tree = ParseUtils.findRootNonNullToken(tree);

View Full Code Here

   */
  private static void doSemanticAnalysis(SemanticAnalyzer sem,
      ASTNode ast, Context ctx) throws SemanticException {
    QB qb = new QB(null, null, false);
    ASTNode child = ast;
    ParseContext subPCtx = ((SemanticAnalyzer) sem).getParseContext();
    subPCtx.setContext(ctx);
    ((SemanticAnalyzer) sem).init(subPCtx);


    LOG.info("Starting Sub-query Semantic Analysis");
    sem.doPhase1(child, qb, sem.initPhase1Ctx());
    LOG.info("Completed phase 1 of Sub-query Semantic Analysis");

View Full Code Here

        
        if(ctx.contains(childReduceSink)) {
          return null;
        }
        
        ParseContext pGraphContext = ctx.getPctx();
        HashMap<String, String> childColumnMapping = getPartitionAndKeyColumnMapping(childReduceSink);
        ReduceSinkOperator parentRS = null;
        parentRS = findSingleParentReduceSink(childReduceSink, pGraphContext);
        if (parentRS == null) {
          ctx.addRejectedReduceSinkOperator(childReduceSink);

View Full Code Here

    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      AbstractMapJoinOperator<MapJoinDesc> mapJoin = (AbstractMapJoinOperator<MapJoinDesc>) nd;
      GenMRProcContext opProcCtx = (GenMRProcContext) procCtx;


      ParseContext parseCtx = opProcCtx.getParseCtx();
      MapredWork cplan = GenMapRedUtils.getMapRedWork(parseCtx);
      Task<? extends Serializable> redTask = TaskFactory.get(cplan, parseCtx
          .getConf());
      Task<? extends Serializable> currTask = opProcCtx.getCurrTask();


      // find the branch on which this processor was invoked
      int pos = getPositionParent(mapJoin, stack);

View Full Code Here

      AbstractMapJoinOperator<MapJoinDesc> mapJoin = (AbstractMapJoinOperator<MapJoinDesc>) sel.getParentOperators().get(
          0);
      assert sel.getParentOperators().size() == 1;


      GenMRProcContext ctx = (GenMRProcContext) procCtx;
      ParseContext parseCtx = ctx.getParseCtx();


      // is the mapjoin followed by a reducer
      List<AbstractMapJoinOperator<? extends MapJoinDesc>> listMapJoinOps = parseCtx
          .getListMapJoinOpsNoReducer();


      if (listMapJoinOps.contains(mapJoin)) {
        ctx.setCurrAliasId(null);
        ctx.setCurrTopOp(null);
        Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = ctx
            .getMapCurrCtx();
        mapCurrCtx.put((Operator<? extends Serializable>) nd, new GenMapRedCtx(
            ctx.getCurrTask(), null, null));
        return null;
      }


      ctx.setCurrMapJoinOp(mapJoin);


      Task<? extends Serializable> currTask = ctx.getCurrTask();
      GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(mapJoin);
      if (mjCtx == null) {
        mjCtx = new GenMRMapJoinCtx();
        ctx.setMapJoinCtx(mapJoin, mjCtx);
      }


      MapredWork mjPlan = GenMapRedUtils.getMapRedWork(parseCtx);
      Task<? extends Serializable> mjTask = TaskFactory.get(mjPlan, parseCtx
          .getConf());


      TableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils
          .getFieldSchemasFromRowSchema(mapJoin.getSchema(), "temporarycol"));


      // generate the temporary file
      Context baseCtx = parseCtx.getContext();
      String taskTmpDir = baseCtx.getMRTmpFileURI();


      // Add the path to alias mapping
      mjCtx.setTaskTmpDir(taskTmpDir);
      mjCtx.setTTDesc(tt_desc);
      mjCtx.setRootMapJoinOp(sel);


      sel.setParentOperators(null);


      // Create a file sink operator for this file name
      Operator<? extends Serializable> fs_op = OperatorFactory.get(
          new FileSinkDesc(taskTmpDir, tt_desc, parseCtx.getConf().getBoolVar(
          HiveConf.ConfVars.COMPRESSINTERMEDIATE)), mapJoin.getSchema());


      assert mapJoin.getChildOperators().size() == 1;
      mapJoin.getChildOperators().set(0, fs_op);

View Full Code Here

    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      GenMRProcContext ctx = (GenMRProcContext) procCtx;


      ParseContext parseCtx = ctx.getParseCtx();
      UnionProcContext uCtx = parseCtx.getUCtx();


      // union was map only - no special processing needed
      if (uCtx.isMapOnlySubq()) {
        return (new TableScanMapJoin())
            .process(nd, stack, procCtx, nodeOutputs);

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.hive.ql.parse.ParseContext

org.apache.hadoop.hive.ql.Driver

org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin

org.apache.hadoop.hive.ql.optimizer.DynamicPartitionPruningOptimization

org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils

org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1

org.apache.hadoop.hive.ql.optimizer.GenMRRedSink3

org.apache.hadoop.hive.ql.optimizer.GenMRRedSink4

org.apache.hadoop.hive.ql.optimizer.GenMRTableScan1

org.apache.hadoop.hive.ql.optimizer.GenMRUnion1

org.apache.hadoop.hive.ql.optimizer.index.RewriteParseContextGenerator

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.