Examples of JoinOperator


Examples of org.apache.hadoop.hive.ql.exec.JoinOperator

    private ConditionalTask processCurrentTask(MapRedTask currTask,
        ConditionalTask conditionalTask, Context context)
        throws SemanticException {

      // whether it contains common join op; if contains, return this common join op
      JoinOperator joinOp = getJoinOp(currTask);
      if (joinOp == null) {
        return null;
      }
      currTask.setTaskTag(Task.COMMON_JOIN);

      MapredWork currWork = currTask.getWork();
      // create conditional work list and task list
      List<Serializable> listWorks = new ArrayList<Serializable>();
      List<Task<? extends Serializable>> listTasks = new ArrayList<Task<? extends Serializable>>();

      // create alias to task mapping and alias to input file mapping for resolver
      HashMap<String, Task<? extends Serializable>> aliasToTask = new HashMap<String, Task<? extends Serializable>>();
      HashMap<String, ArrayList<String>> pathToAliases = currTask.getWork().getPathToAliases();

      // get parseCtx for this Join Operator
      ParseContext parseCtx = physicalContext.getParseContext();
      QBJoinTree joinTree = parseCtx.getJoinContext().get(joinOp);

      // start to generate multiple map join tasks
      JoinDesc joinDesc = joinOp.getConf();
      Byte[] order = joinDesc.getTagOrder();
      int numAliases = order.length;
     
      long aliasTotalKnownInputSize = 0;
      HashMap<String, Long> aliasToSize = new HashMap<String, Long>();
      try {
        // go over all the input paths, and calculate a known total size, known
        // size for each input alias.
        Utilities.getInputSummary(context, currWork, null).getLength();
       
        // set alias to size mapping, this can be used to determine if one table
        // is choosen as big table, what's the total size of left tables, which
        // are going to be small tables.
        for (Map.Entry<String, ArrayList<String>> entry : pathToAliases.entrySet()) {
          String path = entry.getKey();
          List<String> aliasList = entry.getValue();
          ContentSummary cs = context.getCS(path);
          if (cs != null) {
            long size = cs.getLength();
            for (String alias : aliasList) {
              aliasTotalKnownInputSize += size;
              Long es = aliasToSize.get(alias);
              if(es == null) {
                es = new Long(0);
              }
              es += size;
              aliasToSize.put(alias, es);
            }
          }
        }
       
        HashSet<Integer> bigTableCandidates = MapJoinProcessor.getBigTableCandidates(joinDesc.getConds());
       
        // no table could be the big table; there is no need to convert
        if (bigTableCandidates == null) {
          return null;
        }
        currWork.setOpParseCtxMap(parseCtx.getOpParseCtx());
        currWork.setJoinTree(joinTree);

        String xml = currWork.toXML();
        String bigTableAlias = null;

        long ThresholdOfSmallTblSizeSum = HiveConf.getLongVar(context.getConf(),
            HiveConf.ConfVars.HIVESMALLTABLESFILESIZE);
        for (int i = 0; i < numAliases; i++) {
          // this table cannot be big table
          if (!bigTableCandidates.contains(i)) {
            continue;
          }
         
          // create map join task and set big table as i
          // deep copy a new mapred work from xml
          InputStream in = new ByteArrayInputStream(xml.getBytes("UTF-8"));
          MapredWork newWork = Utilities.deserializeMapRedWork(in, physicalContext.getConf());
          // create a mapred task for this work
          MapRedTask newTask = (MapRedTask) TaskFactory.get(newWork, physicalContext
              .getParseContext().getConf());
          JoinOperator newJoinOp = getJoinOp(newTask);

          // optimize this newWork and assume big table position is i
          bigTableAlias = MapJoinProcessor.genMapJoinOpAndLocalWork(newWork, newJoinOp, i);

          Long aliasKnownSize = aliasToSize.get(bigTableAlias);
View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.JoinOperator

    }

    JoinDesc desc = new JoinDesc(exprMap, outputColumnNames,
        join.getNoOuterJoin(), joinCondns, filterMap);
    desc.setReversedExprs(reversedExprs);
    JoinOperator joinOp = (JoinOperator) OperatorFactory.getAndMakeChild(desc,
        new RowSchema(outputRS.getColumnInfos()), rightOps);
    joinOp.setColumnExprMap(colExprMap);
    joinOp.setPosToAliasMap(posToAliasMap);
    return putOpInsertMap(joinOp, outputRS);
  }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.JoinOperator

   */
  public static class SkewJoinJoinProcessor implements NodeProcessor {
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
        Object... nodeOutputs) throws SemanticException {
      SkewJoinProcCtx context = (SkewJoinProcCtx) ctx;
      JoinOperator op = (JoinOperator) nd;
      ParseContext parseContext = context.getParseCtx();
      Task<? extends Serializable> currentTsk = context.getCurrentTask();
      GenMRSkewJoinProcessor.processSkewJoin(op, currentTsk, parseContext);
      return null;
    }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.JoinOperator

    }

    // Type checking and implicit type conversion for join keys
    genJoinOperatorTypeCheck(joinSrcOp, srcOps);

    JoinOperator joinOp = (JoinOperator) genJoinOperatorChildren(joinTree,
        joinSrcOp, srcOps, omitOpts);
    joinContext.put(joinOp, joinTree);
    return joinOp;
  }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.JoinOperator

      newPlan.getPathToPartitionInfo().put(bigKeyDirPath, part);
      newPlan.getAliasToPartnInfo().put(alias, part);

      Operator<? extends Serializable> reducer = clonePlan.getReducer();
      assert reducer instanceof JoinOperator;
      JoinOperator cloneJoinOp = (JoinOperator) reducer;

      String dumpFilePrefix = "mapfile"+PlanUtils.getCountForMapJoinDumpFilePrefix();
      MapJoinDesc mapJoinDescriptor = new MapJoinDesc(newJoinKeys, keyTblDesc,
          newJoinValues, newJoinValueTblDesc, newJoinValueTblDesc,joinDescriptor
          .getOutputColumnNames(), i, joinDescriptor.getConds(),
          joinDescriptor.getFilters(), joinDescriptor.getNoOuterJoin(), dumpFilePrefix);
      mapJoinDescriptor.setTagOrder(tags);
      mapJoinDescriptor.setHandleSkewJoin(false);
      mapJoinDescriptor.setNullSafes(joinDescriptor.getNullSafes());

      MapredLocalWork localPlan = new MapredLocalWork(
          new LinkedHashMap<String, Operator<? extends Serializable>>(),
          new LinkedHashMap<String, FetchWork>());
      Map<Byte, String> smallTblDirs = smallKeysDirMap.get(src);

      for (int j = 0; j < numAliases; j++) {
        if (j == i) {
          continue;
        }
        Byte small_alias = tags[j];
        Operator<? extends Serializable> tblScan_op2 = parentOps[j];
        localPlan.getAliasToWork().put(small_alias.toString(), tblScan_op2);
        Path tblDir = new Path(smallTblDirs.get(small_alias));
        localPlan.getAliasToFetchWork().put(small_alias.toString(),
            new FetchWork(tblDir.toString(), tableDescList.get(small_alias)));
      }

      newPlan.setMapLocalWork(localPlan);

      // construct a map join and set it as the child operator of tblScan_op
      MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory
          .getAndMakeChild(mapJoinDescriptor, (RowSchema) null, parentOps);
      // change the children of the original join operator to point to the map
      // join operator
      List<Operator<? extends Serializable>> childOps = cloneJoinOp
          .getChildOperators();
      for (Operator<? extends Serializable> childOp : childOps) {
        childOp.replaceParent(cloneJoinOp, mapJoinOp);
      }
      mapJoinOp.setChildOperators(childOps);
View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.JoinOperator

      Set<Map.Entry<JoinOperator, QBJoinTree>> joinCtx = pGraphContext.getJoinContext().entrySet();
      Iterator<Map.Entry<JoinOperator, QBJoinTree>> joinCtxIter = joinCtx.iterator();
      while (joinCtxIter.hasNext()) {
        Map.Entry<JoinOperator, QBJoinTree> joinEntry = joinCtxIter.next();
        JoinOperator joinOp = joinEntry.getKey();
        QBJoinTree qbJoin = joinEntry.getValue();
        int mapJoinPos = mapSideJoin(joinOp, qbJoin);
        if (mapJoinPos >= 0) {
          MapJoinOperator mapJoinOp = generateMapJoinOperator(pactx, joinOp, qbJoin, mapJoinPos);
          listMapJoinOps.add(mapJoinOp);
View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.JoinOperator

    }

    JoinDesc desc = new JoinDesc(exprMap, outputColumnNames,
        join.getNoOuterJoin(), joinCondns, filterMap);
    desc.setReversedExprs(reversedExprs);
    JoinOperator joinOp = (JoinOperator) OperatorFactory.getAndMakeChild(desc,
        new RowSchema(outputRS.getColumnInfos()), rightOps);
    joinOp.setColumnExprMap(colExprMap);
    joinOp.setPosToAliasMap(posToAliasMap);

    if (join.getNullSafes() != null) {
      boolean[] nullsafes = new boolean[join.getNullSafes().size()];
      for (int i = 0; i < nullsafes.length; i++) {
        nullsafes[i] = join.getNullSafes().get(i);
View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.JoinOperator

    }

    // Type checking and implicit type conversion for join keys
    genJoinOperatorTypeCheck(joinSrcOp, srcOps);

    JoinOperator joinOp = (JoinOperator) genJoinOperatorChildren(joinTree,
        joinSrcOp, srcOps, omitOpts);
    joinContext.put(joinOp, joinTree);
    return joinOp;
  }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.JoinOperator

      newPlan.getPathToPartitionInfo().put(bigKeyDirPath, part);
      newPlan.getAliasToPartnInfo().put(alias, part);

      Operator<? extends Serializable> reducer = clonePlan.getReducer();
      assert reducer instanceof JoinOperator;
      JoinOperator cloneJoinOp = (JoinOperator) reducer;

      String dumpFilePrefix = "mapfile"+PlanUtils.getCountForMapJoinDumpFilePrefix();
      MapJoinDesc mapJoinDescriptor = new MapJoinDesc(newJoinKeys, keyTblDesc,
          newJoinValues, newJoinValueTblDesc, newJoinValueTblDesc,joinDescriptor
          .getOutputColumnNames(), i, joinDescriptor.getConds(),
          joinDescriptor.getFilters(), joinDescriptor.getNoOuterJoin(), dumpFilePrefix);
      mapJoinDescriptor.setTagOrder(tags);
      mapJoinDescriptor.setHandleSkewJoin(false);

      MapredLocalWork localPlan = new MapredLocalWork(
          new LinkedHashMap<String, Operator<? extends Serializable>>(),
          new LinkedHashMap<String, FetchWork>());
      Map<Byte, String> smallTblDirs = smallKeysDirMap.get(src);

      for (int j = 0; j < numAliases; j++) {
        if (j == i) {
          continue;
        }
        Byte small_alias = tags[j];
        Operator<? extends Serializable> tblScan_op2 = parentOps[j];
        localPlan.getAliasToWork().put(small_alias.toString(), tblScan_op2);
        Path tblDir = new Path(smallTblDirs.get(small_alias));
        localPlan.getAliasToFetchWork().put(small_alias.toString(),
            new FetchWork(tblDir.toString(), tableDescList.get(small_alias)));
      }

      newPlan.setMapLocalWork(localPlan);

      // construct a map join and set it as the child operator of tblScan_op
      MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory
          .getAndMakeChild(mapJoinDescriptor, (RowSchema) null, parentOps);
      // change the children of the original join operator to point to the map
      // join operator
      List<Operator<? extends Serializable>> childOps = cloneJoinOp
          .getChildOperators();
      for (Operator<? extends Serializable> childOp : childOps) {
        childOp.replaceParent(cloneJoinOp, mapJoinOp);
      }
      mapJoinOp.setChildOperators(childOps);
View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.JoinOperator

  private MapRedTask convertTaskToMapJoinTask(MapredWork newWork, int bigTablePosition)
      throws UnsupportedEncodingException, SemanticException {
    // create a mapred task for this work
    MapRedTask newTask = (MapRedTask) TaskFactory.get(newWork, physicalContext
        .getParseContext().getConf());
    JoinOperator newJoinOp = getJoinOp(newTask);

    // optimize this newWork given the big table position
    MapJoinProcessor.genMapJoinOpAndLocalWork(physicalContext.getParseContext().getConf(),
        newWork, newJoinOp, bigTablePosition);
    return newTask;
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.