Package org.apache.hadoop.hive.ql.plan

Examples of org.apache.hadoop.hive.ql.plan.PartitionDesc


    if (this.mrwork == null) {
      init(job);
    }

    boolean nonNative = false;
    PartitionDesc part = pathToPartitionInfo.get(hsplit.getPath().toString());
    if ((part != null) && (part.getTableDesc() != null)) {
      Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), cloneJobConf);
      nonNative = part.getTableDesc().isNonNative();
    }

    pushProjectionsAndFilters(cloneJobConf, inputFormatClass, hsplit.getPath()
      .toString(), hsplit.getPath().toUri().getPath(), nonNative);
View Full Code Here


    JobConf newjob = new JobConf(job);
    ArrayList<InputSplit> result = new ArrayList<InputSplit>();

    // for each dir, get the InputFormat, and do getSplits.
    for (Path dir : dirs) {
      PartitionDesc part = getPartitionDescFromPath(pathToPartitionInfo, dir);
      // create a new InputFormat instance if this is the first time to see this
      // class
      Class inputFormatClass = part.getInputFileFormatClass();
      InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);
      Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), newjob);

      // Make filter pushdown information available to getSplits.
      ArrayList<String> aliases =
        mrwork.getPathToAliases().get(dir.toUri().toString());
      if ((aliases != null) && (aliases.size() == 1)) {
View Full Code Here

    }
    JobConf newjob = new JobConf(job);

    // for each dir, get the InputFormat, and do validateInput.
    for (Path dir : dirs) {
      PartitionDesc part = getPartitionDescFromPath(pathToPartitionInfo, dir);
      // create a new InputFormat instance if this is the first time to see this
      // class
      InputFormat inputFormat = getInputFormatFromCache(part
          .getInputFileFormatClass(), job);

      FileInputFormat.setInputPaths(newjob, dir);
      newjob.setInputFormat(inputFormat.getClass());
      ShimLoader.getHadoopShims().inputFormatValidateInput(inputFormat, newjob);
View Full Code Here

  }

  protected static PartitionDesc getPartitionDescFromPath(
      Map<String, PartitionDesc> pathToPartitionInfo, Path dir)
      throws IOException {
    PartitionDesc partDesc = pathToPartitionInfo.get(dir.toString());
    if (partDesc == null) {
      partDesc = pathToPartitionInfo.get(dir.toUri().getPath());
    }
    if (partDesc == null) {
      throw new IOException("cannot find dir = " + dir.toString()
View Full Code Here

      throws Exception {
    assert mWork.getAliasToWork().keySet().size() == 1;

    String alias = mWork.getAliases().get(0);
    Operator<?> topOp = mWork.getAliasToWork().get(alias);
    PartitionDesc partDesc = mWork.getAliasToPartnInfo().get(alias);

    ArrayList<String> paths = mWork.getPaths();
    ArrayList<PartitionDesc> parts = mWork.getPartitionDescs();

    Path onePath = new Path(paths.get(0));
    String tmpPath = context.getCtx().getExternalTmpFileURI(onePath.toUri());

    Path partitionFile = new Path(tmpPath, ".partitions");
    ShimLoader.getHadoopShims().setTotalOrderPartitionFile(job, partitionFile);

    PartitionKeySampler sampler = new PartitionKeySampler();

    if (mWork.getSamplingType() == MapWork.SAMPLING_ON_PREV_MR) {
      console.printInfo("Use sampling data created in previous MR");
      // merges sampling data from previous MR and make paritition keys for total sort
      for (String path : paths) {
        Path inputPath = new Path(path);
        FileSystem fs = inputPath.getFileSystem(job);
        for (FileStatus status : fs.globStatus(new Path(inputPath, ".sampling*"))) {
          sampler.addSampleFile(status.getPath(), job);
        }
      }
    } else if (mWork.getSamplingType() == MapWork.SAMPLING_ON_START) {
      console.printInfo("Creating sampling data..");
      assert topOp instanceof TableScanOperator;
      TableScanOperator ts = (TableScanOperator) topOp;

      FetchWork fetchWork;
      if (!partDesc.isPartitioned()) {
        assert paths.size() == 1;
        fetchWork = new FetchWork(paths.get(0), partDesc.getTableDesc());
      } else {
        fetchWork = new FetchWork(paths, parts, partDesc.getTableDesc());
      }
      fetchWork.setSource(ts);

      // random sampling
      FetchOperator fetcher = PartitionKeySampler.createSampler(fetchWork, conf, job, ts);
View Full Code Here

      aliasList.add("a");
      LinkedHashMap<String, ArrayList<String>> pa = new LinkedHashMap<String, ArrayList<String>>();
      pa.put("/tmp/testfolder", aliasList);

      TableDesc tblDesc = Utilities.defaultTd;
      PartitionDesc partDesc = new PartitionDesc(tblDesc, null);
      LinkedHashMap<String, PartitionDesc> pt = new LinkedHashMap<String, PartitionDesc>();
      pt.put("/tmp/testfolder", partDesc);

      LinkedHashMap<String, Operator<? extends OperatorDesc>> ao =
        new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
View Full Code Here

      pathToAliases.put("hdfs:///testDir", aliases);

      // initialize pathToTableInfo
      // Default: treat the table as a single column "col"
      TableDesc td = Utilities.defaultTd;
      PartitionDesc pd = new PartitionDesc(td, null);
      LinkedHashMap<String, org.apache.hadoop.hive.ql.plan.PartitionDesc> pathToPartitionInfo =
        new LinkedHashMap<String, org.apache.hadoop.hive.ql.plan.PartitionDesc>();
      pathToPartitionInfo.put("hdfs:///testDir", pd);

      // initialize aliasToWork
View Full Code Here

  protected void setUp() {
    mr = PlanUtils.getMapRedWork();
  }

  public static void addMapWork(MapredWork mr, Table tbl, String alias, Operator<?> work) {
    mr.getMapWork().addMapWork(tbl.getDataLocation().toString(), alias, work, new PartitionDesc(
        Utilities.getTableDesc(tbl), null));
  }
View Full Code Here

    }
    JobConf newjob = new JobConf(job);
    ArrayList<InputSplit> result = new ArrayList<InputSplit>();

    // for each dir, get the InputFormat, and do getSplits.
    PartitionDesc part;
    for (Path dir : dirs) {
      part = HiveFileFormatUtils
          .getPartitionDescFromPathRecursively(pathToPartitionInfo, dir,
              IOPrepareCache.get().allocatePartitionDescMap(), true);
      // create a new InputFormat instance if this is the first time to see this
      // class
      Class inputFormatClass = part.getInputFileFormatClass();
      InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);
      Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), newjob);

      FileInputFormat.setInputPaths(newjob, dir);
      newjob.setInputFormat(inputFormat.getClass());
      InputSplit[] iss = inputFormat.getSplits(newjob, numSplits / dirs.length);
      for (InputSplit is : iss) {
View Full Code Here

      new HashMap<CombinePathInputFormat, CombineFilter>();
    Set<Path> poolSet = new HashSet<Path>();

    for (Path path : paths) {

      PartitionDesc part = HiveFileFormatUtils.getPartitionDescFromPathRecursively(
          pathToPartitionInfo, path, IOPrepareCache.get().allocatePartitionDescMap());
      TableDesc tableDesc = part.getTableDesc();
      if ((tableDesc != null) && tableDesc.isNonNative()) {
        return super.getSplits(job, numSplits);
      }

      // Use HiveInputFormat if any of the paths is not splittable
      Class inputFormatClass = part.getInputFileFormatClass();
      String inputFormatClassName = inputFormatClass.getName();
      InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);

      // Since there is no easy way of knowing whether MAPREDUCE-1597 is present in the tree or not,
      // we use a configuration variable for the same
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.plan.PartitionDesc

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.