Examples of ExtractOperator


Examples of org.apache.hadoop.hive.ql.exec.ExtractOperator

      RowResolver exRR = exPair.getSecond();
      ExtractDesc exConf = new ExtractDesc(new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
          Utilities.ReduceField.VALUE.toString(), "", false));

      // Create Extract Operator
      ExtractOperator exOp = (ExtractOperator) putOpInsertMap(
          OperatorFactory.getAndMakeChild(exConf, new RowSchema(exRR.getColumnInfos()), rsOp),
          exRR, parseCtx);

      // link EX to FS
      fsOp.getParentOperators().clear();
      fsOp.getParentOperators().add(exOp);
      exOp.getChildOperators().add(fsOp);

      // Set if partition sorted or partition bucket sorted
      fsOp.getConf().setDpSortState(FileSinkDesc.DPSortState.PARTITION_SORTED);
      if (bucketColumns.size() > 0) {
        fsOp.getConf().setDpSortState(FileSinkDesc.DPSortState.PARTITION_BUCKET_SORTED);
      }

      // update partition column info in FS descriptor
      ArrayList<ExprNodeDesc> partitionColumns = getPositionsToExprNodes(partitionPositions, rsOp
          .getSchema().getSignature());
      fsOp.getConf().setPartitionCols(partitionColumns);

      LOG.info("Inserted " + rsOp.getOperatorId() + " and " + exOp.getOperatorId()
          + " as parent of " + fsOp.getOperatorId() + " and child of " + fsParent.getOperatorId());
      return null;
    }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.ExtractOperator

      RowResolver exRR = exPair.getSecond();
      ExtractDesc exConf = new ExtractDesc(new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
          Utilities.ReduceField.VALUE.toString(), "", false));

      // Create Extract Operator
      ExtractOperator exOp = (ExtractOperator) putOpInsertMap(
          OperatorFactory.getAndMakeChild(exConf, new RowSchema(exRR.getColumnInfos()), rsOp),
          exRR, parseCtx);

      // link EX to FS
      fsOp.getParentOperators().clear();
      fsOp.getParentOperators().add(exOp);
      exOp.getChildOperators().add(fsOp);

      // Set if partition sorted or partition bucket sorted
      fsOp.getConf().setDpSortState(FileSinkDesc.DPSortState.PARTITION_SORTED);
      if (bucketColumns.size() > 0) {
        fsOp.getConf().setDpSortState(FileSinkDesc.DPSortState.PARTITION_BUCKET_SORTED);
      }

      // update partition column info in FS descriptor
      ArrayList<ExprNodeDesc> partitionColumns = getPositionsToExprNodes(partitionPositions, rsOp
          .getSchema().getSignature());
      fsOp.getConf().setPartitionCols(partitionColumns);

      LOG.info("Inserted " + rsOp.getOperatorId() + " and " + exOp.getOperatorId()
          + " as parent of " + fsOp.getOperatorId() + " and child of " + fsParent.getOperatorId());
      return null;
    }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.ExtractOperator

    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {

      // If the reduce sink has not been introduced due to bucketing/sorting, ignore it
      FileSinkOperator fsOp = (FileSinkOperator) nd;
      ExtractOperator exOp = (ExtractOperator) fsOp.getParentOperators().get(0);
      ReduceSinkOperator rsOp = (ReduceSinkOperator) exOp.getParentOperators().get(0);

      List<ReduceSinkOperator> rsOps = pGraphContext
          .getReduceSinkOperatorsAddedByEnforceBucketingSorting();
      // nothing to do
      if ((rsOps != null) && (!rsOps.contains(rsOp))) {
View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.ExtractOperator

    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {

      BucketingSortingCtx bctx = (BucketingSortingCtx)procCtx;
      ExtractOperator exop = (ExtractOperator)nd;

      // As of writing this, there is no case where this could be false, this is just protection
      // from possible future changes
      if (exop.getParentOperators().size() != 1) {
        return null;
      }

      Operator<? extends OperatorDesc> parent = exop.getParentOperators().get(0);

      // The caller of this method should guarantee this
      assert(parent instanceof ReduceSinkOperator);

      ReduceSinkOperator rop = (ReduceSinkOperator)parent;

      // Go through the set of partition columns, and find their representatives in the values
      // These represent the bucketed columns
      List<BucketCol> bucketCols = new ArrayList<BucketCol>();
      for (int i = 0; i < rop.getConf().getPartitionCols().size(); i++) {
        boolean valueColFound = false;
        for (int j = 0; j < rop.getConf().getValueCols().size(); j++) {
          if (new ExprNodeDescEqualityWrapper(rop.getConf().getValueCols().get(j)).equals(
              new ExprNodeDescEqualityWrapper(rop.getConf().getPartitionCols().get(i)))) {

            bucketCols.add(new BucketCol(
                rop.getSchema().getSignature().get(j).getInternalName(), j));
            valueColFound = true;
            break;
          }
        }

        // If the partition columns can't all be found in the values then the data is not bucketed
        if (!valueColFound) {
          bucketCols.clear();
          break;
        }
      }

      // Go through the set of key columns, and find their representatives in the values
      // These represent the sorted columns
      String sortOrder = rop.getConf().getOrder();
      List<SortCol> sortCols = new ArrayList<SortCol>();
      for (int i = 0; i < rop.getConf().getKeyCols().size(); i++) {
        boolean valueColFound = false;
        for (int j = 0; j < rop.getConf().getValueCols().size(); j++) {
          if (new ExprNodeDescEqualityWrapper(rop.getConf().getValueCols().get(j)).equals(
              new ExprNodeDescEqualityWrapper(rop.getConf().getKeyCols().get(i)))) {

            sortCols.add(new SortCol(
                rop.getSchema().getSignature().get(j).getInternalName(), j, sortOrder.charAt(i)));
            valueColFound = true;
            break;
          }
        }

        // If the sorted columns can't all be found in the values then the data is only sorted on
        // the columns seen up until now
        if (!valueColFound) {
          break;
        }
      }

      List<ColumnInfo> colInfos = exop.getSchema().getSignature();

      if (!bucketCols.isEmpty()) {
        List<BucketCol> newBucketCols = getNewBucketCols(bucketCols, colInfos);
        bctx.setBucketedCols(exop, newBucketCols);
      }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.ExtractOperator

      RowResolver exRR = exPair.getSecond();
      ExtractDesc exConf = new ExtractDesc(new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
          Utilities.ReduceField.VALUE.toString(), "", false));

      // Create Extract Operator
      ExtractOperator exOp = (ExtractOperator) putOpInsertMap(
          OperatorFactory.getAndMakeChild(exConf, new RowSchema(exRR.getColumnInfos()), rsOp),
          exRR, parseCtx);

      // link EX to FS
      fsOp.getParentOperators().clear();
      fsOp.getParentOperators().add(exOp);
      exOp.getChildOperators().add(fsOp);

      // Set if partition sorted or partition bucket sorted
      fsOp.getConf().setDpSortState(FileSinkDesc.DPSortState.PARTITION_SORTED);
      if (bucketColumns.size() > 0) {
        fsOp.getConf().setDpSortState(FileSinkDesc.DPSortState.PARTITION_BUCKET_SORTED);
      }

      // update partition column info in FS descriptor
      ArrayList<ExprNodeDesc> partitionColumns = getPositionsToExprNodes(partitionPositions, rsOp
          .getSchema().getSignature());
      fsOp.getConf().setPartitionCols(partitionColumns);

      LOG.info("Inserted " + rsOp.getOperatorId() + " and " + exOp.getOperatorId()
          + " as parent of " + fsOp.getOperatorId() + " and child of " + fsParent.getOperatorId());
      return null;
    }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.ExtractOperator

    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {

      // If the reduce sink has not been introduced due to bucketing/sorting, ignore it
      FileSinkOperator fsOp = (FileSinkOperator) nd;
      ExtractOperator exOp = (ExtractOperator) fsOp.getParentOperators().get(0);
      ReduceSinkOperator rsOp = (ReduceSinkOperator) exOp.getParentOperators().get(0);

      List<ReduceSinkOperator> rsOps = pGraphContext
          .getReduceSinkOperatorsAddedByEnforceBucketingSorting();
      // nothing to do
      if ((rsOps != null) && (!rsOps.contains(rsOp))) {
View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.ExtractOperator

    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {

      BucketingSortingCtx bctx = (BucketingSortingCtx)procCtx;
      ExtractOperator exop = (ExtractOperator)nd;

      // As of writing this, there is no case where this could be false, this is just protection
      // from possible future changes
      if (exop.getParentOperators().size() != 1) {
        return null;
      }

      Operator<? extends OperatorDesc> parent = exop.getParentOperators().get(0);

      // The caller of this method should guarantee this
      if (parent instanceof ReduceSinkOperator) {
        extractTraits(bctx, (ReduceSinkOperator)parent, exop);
      }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.ExtractOperator

      RowResolver exRR = exPair.getSecond();
      ExtractDesc exConf = new ExtractDesc(new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
          Utilities.ReduceField.VALUE.toString(), "", false));

      // Create Extract Operator
      ExtractOperator exOp = (ExtractOperator) putOpInsertMap(
          OperatorFactory.getAndMakeChild(exConf, new RowSchema(exRR.getColumnInfos()), rsOp),
          exRR, parseCtx);

      // link EX to FS
      fsOp.getParentOperators().clear();
      fsOp.getParentOperators().add(exOp);
      exOp.getChildOperators().add(fsOp);

      // Set if partition sorted or partition bucket sorted
      fsOp.getConf().setDpSortState(FileSinkDesc.DPSortState.PARTITION_SORTED);
      if (bucketColumns.size() > 0) {
        fsOp.getConf().setDpSortState(FileSinkDesc.DPSortState.PARTITION_BUCKET_SORTED);
      }

      // update partition column info in FS descriptor
      ArrayList<ExprNodeDesc> partitionColumns = getPositionsToExprNodes(partitionPositions, rsOp
          .getSchema().getSignature());
      fsOp.getConf().setPartitionCols(partitionColumns);

      LOG.info("Inserted " + rsOp.getOperatorId() + " and " + exOp.getOperatorId()
          + " as parent of " + fsOp.getOperatorId() + " and child of " + fsParent.getOperatorId());
      return null;
    }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.ExtractOperator

      RowResolver exRR = exPair.getSecond();
      ExtractDesc exConf = new ExtractDesc(new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
          Utilities.ReduceField.VALUE.toString(), "", false));

      // Create Extract Operator
      ExtractOperator exOp = (ExtractOperator) putOpInsertMap(
          OperatorFactory.getAndMakeChild(exConf, new RowSchema(exRR.getColumnInfos()), rsOp),
          exRR, parseCtx);

      // link EX to FS
      fsOp.getParentOperators().clear();
      fsOp.getParentOperators().add(exOp);
      exOp.getChildOperators().add(fsOp);

      // Set if partition sorted or partition bucket sorted
      fsOp.getConf().setDpSortState(FileSinkDesc.DPSortState.PARTITION_SORTED);
      if (bucketColumns.size() > 0) {
        fsOp.getConf().setDpSortState(FileSinkDesc.DPSortState.PARTITION_BUCKET_SORTED);
      }

      // update partition column info in FS descriptor
      ArrayList<ExprNodeDesc> partitionColumns = getPositionsToExprNodes(partitionPositions, rsOp
          .getSchema().getSignature());
      fsOp.getConf().setPartitionCols(partitionColumns);

      LOG.info("Inserted " + rsOp.getOperatorId() + " and " + exOp.getOperatorId()
          + " as parent of " + fsOp.getOperatorId() + " and child of " + fsParent.getOperatorId());
      return null;
    }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.ExtractOperator

    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {

      BucketingSortingCtx bctx = (BucketingSortingCtx)procCtx;
      ExtractOperator exop = (ExtractOperator)nd;

      // As of writing this, there is no case where this could be false, this is just protection
      // from possible future changes
      if (exop.getParentOperators().size() != 1) {
        return null;
      }

      Operator<? extends OperatorDesc> parent = exop.getParentOperators().get(0);

      // The caller of this method should guarantee this
      assert(parent instanceof ReduceSinkOperator);

      ReduceSinkOperator rop = (ReduceSinkOperator)parent;

      // Go through the set of partition columns, and find their representatives in the values
      // These represent the bucketed columns
      List<BucketCol> bucketCols = new ArrayList<BucketCol>();
      for (int i = 0; i < rop.getConf().getPartitionCols().size(); i++) {
        boolean valueColFound = false;
        for (int j = 0; j < rop.getConf().getValueCols().size(); j++) {
          if (new ExprNodeDescEqualityWrapper(rop.getConf().getValueCols().get(j)).equals(
              new ExprNodeDescEqualityWrapper(rop.getConf().getPartitionCols().get(i)))) {

            bucketCols.add(new BucketCol(
                rop.getSchema().getSignature().get(j).getInternalName(), j));
            valueColFound = true;
            break;
          }
        }

        // If the partition columns can't all be found in the values then the data is not bucketed
        if (!valueColFound) {
          bucketCols.clear();
          break;
        }
      }

      // Go through the set of key columns, and find their representatives in the values
      // These represent the sorted columns
      String sortOrder = rop.getConf().getOrder();
      List<SortCol> sortCols = new ArrayList<SortCol>();
      for (int i = 0; i < rop.getConf().getKeyCols().size(); i++) {
        boolean valueColFound = false;
        for (int j = 0; j < rop.getConf().getValueCols().size(); j++) {
          if (new ExprNodeDescEqualityWrapper(rop.getConf().getValueCols().get(j)).equals(
              new ExprNodeDescEqualityWrapper(rop.getConf().getKeyCols().get(i)))) {

            sortCols.add(new SortCol(
                rop.getSchema().getSignature().get(j).getInternalName(), j, sortOrder.charAt(i)));
            valueColFound = true;
            break;
          }
        }

        // If the sorted columns can't all be found in the values then the data is only sorted on
        // the columns seen up until now
        if (!valueColFound) {
          break;
        }
      }

      List<ColumnInfo> colInfos = exop.getSchema().getSignature();

      if (!bucketCols.isEmpty()) {
        List<BucketCol> newBucketCols = getNewBucketCols(bucketCols, colInfos);
        bctx.setBucketedCols(exop, newBucketCols);
      }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.