Examples of org.apache.hadoop.hive.ql.log.PerfLogger

org.apache.hadoop.hive.ql.log.PerfLogger
PerfLogger. Can be used to measure and log the time spent by a piece of code.

   * obtained from he inputs and outputs populated by the compiler. The lock acuisition scheme is
   * pretty simple. If all the locks cannot be obtained, error out. Deadlock is avoided by making
   * sure that the locks are lexicographically sorted.
   **/
  private int acquireReadWriteLocks() {
    PerfLogger perfLogger = PerfLogger.getPerfLogger();
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);




    try {
      txnMgr.acquireLocks(plan, ctx, userName);
      return 0;
    } catch (LockException e) {
      errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage();
      SQLState = ErrorMsg.findSQLState(e.getMessage());
      downstreamError = e;
      console.printError(errorMessage, "\n"
          + org.apache.hadoop.util.StringUtils.stringifyException(e));
      return 10;
    } finally {
      perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);
    }
  }

View Full Code Here

   * @param hiveLocks
   *          list of hive locks to be released Release all the locks specified. If some of the
   *          locks have already been released, ignore them
   **/
  private void releaseLocks(List<HiveLock> hiveLocks) throws LockException {
    PerfLogger perfLogger = PerfLogger.getPerfLogger();
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.RELEASE_LOCKS);


    if (hiveLocks != null) {
      ctx.getHiveTxnManager().getLockManager().releaseLocks(hiveLocks);
    }
    ctx.setHiveLocks(null);


    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.RELEASE_LOCKS);
  }

View Full Code Here

          + org.apache.hadoop.util.StringUtils.stringifyException(e));
      return new CommandProcessorResponse(12, errorMessage, SQLState);
    }


    // Reset the perf logger
    PerfLogger perfLogger = PerfLogger.getPerfLogger(true);
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DRIVER_RUN);
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TIME_TO_SUBMIT);


    int ret;
    boolean requireLock = false;
    boolean ckLock = false;
    try {
      ckLock = checkConcurrency();
      createTxnManager();
    } catch (SemanticException e) {
      errorMessage = "FAILED: Error in semantic analysis: " + e.getMessage();
      SQLState = ErrorMsg.findSQLState(e.getMessage());
      downstreamError = e;
      console.printError(errorMessage, "\n"
          + org.apache.hadoop.util.StringUtils.stringifyException(e));
      ret = 10;
      return new CommandProcessorResponse(ret, errorMessage, SQLState);
    }
    ret = recordValidTxns();
    if (ret != 0) return new CommandProcessorResponse(ret, errorMessage, SQLState);


    if (!alreadyCompiled) {
      ret = compileInternal(command);
      if (ret != 0) {
        return new CommandProcessorResponse(ret, errorMessage, SQLState);
      }
    }


    // the reason that we set the txn manager for the cxt here is because each
    // query has its own ctx object. The txn mgr is shared across the
    // same instance of Driver, which can run multiple queries.
    ctx.setHiveTxnManager(txnMgr);


    if (ckLock) {
      boolean lockOnlyMapred = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_LOCK_MAPRED_ONLY);
      if(lockOnlyMapred) {
        Queue<Task<? extends Serializable>> taskQueue = new LinkedList<Task<? extends Serializable>>();
        taskQueue.addAll(plan.getRootTasks());
        while (taskQueue.peek() != null) {
          Task<? extends Serializable> tsk = taskQueue.remove();
          requireLock = requireLock || tsk.requireLock();
          if(requireLock) {
            break;
          }
          if (tsk instanceof ConditionalTask) {
            taskQueue.addAll(((ConditionalTask)tsk).getListTasks());
          }
          if(tsk.getChildTasks()!= null) {
            taskQueue.addAll(tsk.getChildTasks());
          }
          // does not add back up task here, because back up task should be the same
          // type of the original task.
        }
      } else {
        requireLock = true;
      }
    }


    if (requireLock) {
      ret = acquireReadWriteLocks();
      if (ret != 0) {
        try {
          releaseLocks(ctx.getHiveLocks());
        } catch (LockException e) {
          // Not much to do here
        }
        return new CommandProcessorResponse(ret, errorMessage, SQLState);
      }
    }


    ret = execute();
    if (ret != 0) {
      //if needRequireLock is false, the release here will do nothing because there is no lock
      try {
        releaseLocks(ctx.getHiveLocks());
      } catch (LockException e) {
        // Nothing to do here
      }
      return new CommandProcessorResponse(ret, errorMessage, SQLState);
    }


    //if needRequireLock is false, the release here will do nothing because there is no lock
    try {
      releaseLocks(ctx.getHiveLocks());
    } catch (LockException e) {
      errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e);
      SQLState = ErrorMsg.findSQLState(e.getMessage());
      downstreamError = e;
      console.printError(errorMessage + "\n"
          + org.apache.hadoop.util.StringUtils.stringifyException(e));
      return new CommandProcessorResponse(12, errorMessage, SQLState);
    }


    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.DRIVER_RUN);
    perfLogger.close(LOG, plan);


    // Take all the driver run hooks and post-execute them.
    try {
      for (HiveDriverRunHook driverRunHook : driverRunHooks) {
          driverRunHook.postDriverRun(hookContext);

View Full Code Here

      throw e;
    }
  }


  public int execute() throws CommandNeedRetryException {
    PerfLogger perfLogger = PerfLogger.getPerfLogger();
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DRIVER_EXECUTE);


    boolean noName = StringUtils.isEmpty(conf.getVar(HiveConf.ConfVars.HADOOPJOBNAME));
    int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);


    String queryId = plan.getQueryId();
    String queryStr = plan.getQueryStr();


    maxthreads = HiveConf.getIntVar(conf, HiveConf.ConfVars.EXECPARALLETHREADNUMBER);


    try {
      LOG.info("Starting command: " + queryStr);


      plan.setStarted();


      if (SessionState.get() != null) {
        SessionState.get().getHiveHistory().startQuery(queryStr,
            conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
        SessionState.get().getHiveHistory().logPlanProgress(plan);
      }
      resStream = null;


      HookContext hookContext = new HookContext(plan, conf, ctx.getPathToCS());
      hookContext.setHookType(HookContext.HookType.PRE_EXEC_HOOK);


      for (Hook peh : getHooks(HiveConf.ConfVars.PREEXECHOOKS)) {
        if (peh instanceof ExecuteWithHookContext) {
          perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PRE_HOOK + peh.getClass().getName());


          ((ExecuteWithHookContext) peh).run(hookContext);


          perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PRE_HOOK + peh.getClass().getName());
        } else if (peh instanceof PreExecute) {
          perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PRE_HOOK + peh.getClass().getName());


          ((PreExecute) peh).run(SessionState.get(), plan.getInputs(), plan.getOutputs(),
              ShimLoader.getHadoopShims().getUGIForConf(conf));


          perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PRE_HOOK + peh.getClass().getName());
        }
      }




      int jobs = Utilities.getMRTasks(plan.getRootTasks()).size()
        + Utilities.getTezTasks(plan.getRootTasks()).size();
      if (jobs > 0) {
        console.printInfo("Total jobs = " + jobs);
      }
      if (SessionState.get() != null) {
        SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_NUM_TASKS,
            String.valueOf(jobs));
        SessionState.get().getHiveHistory().setIdToTableMap(plan.getIdToTableNameMap());
      }
      String jobname = Utilities.abbreviate(queryStr, maxlen - 6);


      // A runtime that launches runnable tasks as separate Threads through
      // TaskRunners
      // As soon as a task isRunnable, it is put in a queue
      // At any time, at most maxthreads tasks can be running
      // The main thread polls the TaskRunners to check if they have finished.


      DriverContext driverCxt = new DriverContext(ctx);
      driverCxt.prepare(plan);


      ctx.setHDFSCleanup(true);


      this.driverCxt = driverCxt; // for canceling the query (should be bound to session?)


      SessionState.get().setLastMapRedStatsList(new ArrayList<MapRedStats>());
      SessionState.get().setStackTraces(new HashMap<String, List<List<String>>>());
      SessionState.get().setLocalMapRedErrors(new HashMap<String, List<String>>());


      // Add root Tasks to runnable
      for (Task<? extends Serializable> tsk : plan.getRootTasks()) {
        // This should never happen, if it does, it's a bug with the potential to produce
        // incorrect results.
        assert tsk.getParentTasks() == null || tsk.getParentTasks().isEmpty();
        driverCxt.addToRunnable(tsk);
      }


      perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TIME_TO_SUBMIT);
      perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.RUN_TASKS);
      // Loop while you either have tasks running, or tasks queued up
      while (!destroyed && driverCxt.isRunning()) {


        // Launch upto maxthreads tasks
        Task<? extends Serializable> task;
        while ((task = driverCxt.getRunnable(maxthreads)) != null) {
          perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TASK + task.getName() + "." + task.getId());
          TaskRunner runner = launchTask(task, queryId, noName, jobname, jobs, driverCxt);
          if (!runner.isRunning()) {
            break;
          }
        }


        // poll the Tasks to see which one completed
        TaskRunner tskRun = driverCxt.pollFinished();
        if (tskRun == null) {
          continue;
        }
        hookContext.addCompleteTask(tskRun);


        Task<? extends Serializable> tsk = tskRun.getTask();
        TaskResult result = tskRun.getTaskResult();


        int exitVal = result.getExitVal();
        if (exitVal != 0) {
          if (tsk.ifRetryCmdWhenFail()) {
            driverCxt.shutdown();
            // in case we decided to run everything in local mode, restore the
            // the jobtracker setting to its initial value
            ctx.restoreOriginalTracker();
            throw new CommandNeedRetryException();
          }
          Task<? extends Serializable> backupTask = tsk.getAndInitBackupTask();
          if (backupTask != null) {
            setErrorMsgAndDetail(exitVal, result.getTaskError(), tsk);
            console.printError(errorMessage);
            errorMessage = "ATTEMPT: Execute BackupTask: " + backupTask.getClass().getName();
            console.printError(errorMessage);


            // add backup task to runnable
            if (DriverContext.isLaunchable(backupTask)) {
              driverCxt.addToRunnable(backupTask);
            }
            continue;


          } else {
            hookContext.setHookType(HookContext.HookType.ON_FAILURE_HOOK);
            // Get all the failure execution hooks and execute them.
            for (Hook ofh : getHooks(HiveConf.ConfVars.ONFAILUREHOOKS)) {
              perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.FAILURE_HOOK + ofh.getClass().getName());


              ((ExecuteWithHookContext) ofh).run(hookContext);


              perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.FAILURE_HOOK + ofh.getClass().getName());
            }
            setErrorMsgAndDetail(exitVal, result.getTaskError(), tsk);
            SQLState = "08S01";
            console.printError(errorMessage);
            driverCxt.shutdown();
            // in case we decided to run everything in local mode, restore the
            // the jobtracker setting to its initial value
            ctx.restoreOriginalTracker();
            return exitVal;
          }
        }


        driverCxt.finished(tskRun);


        if (SessionState.get() != null) {
          SessionState.get().getHiveHistory().setTaskProperty(queryId, tsk.getId(),
              Keys.TASK_RET_CODE, String.valueOf(exitVal));
          SessionState.get().getHiveHistory().endTask(queryId, tsk);
        }


        if (tsk.getChildTasks() != null) {
          for (Task<? extends Serializable> child : tsk.getChildTasks()) {
            if (DriverContext.isLaunchable(child)) {
              driverCxt.addToRunnable(child);
            }
          }
        }
      }
      perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.RUN_TASKS);


      // in case we decided to run everything in local mode, restore the
      // the jobtracker setting to its initial value
      ctx.restoreOriginalTracker();


      if (driverCxt.isShutdown()) {
        SQLState = "HY008";
        errorMessage = "FAILED: Operation cancelled";
        console.printError(errorMessage);
        return 1000;
      }


      // remove incomplete outputs.
      // Some incomplete outputs may be added at the beginning, for eg: for dynamic partitions.
      // remove them
      HashSet<WriteEntity> remOutputs = new HashSet<WriteEntity>();
      for (WriteEntity output : plan.getOutputs()) {
        if (!output.isComplete()) {
          remOutputs.add(output);
        }
      }


      for (WriteEntity output : remOutputs) {
        plan.getOutputs().remove(output);
      }


      hookContext.setHookType(HookContext.HookType.POST_EXEC_HOOK);
      // Get all the post execution hooks and execute them.
      for (Hook peh : getHooks(HiveConf.ConfVars.POSTEXECHOOKS)) {
        if (peh instanceof ExecuteWithHookContext) {
          perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.POST_HOOK + peh.getClass().getName());


          ((ExecuteWithHookContext) peh).run(hookContext);


          perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.POST_HOOK + peh.getClass().getName());
        } else if (peh instanceof PostExecute) {
          perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.POST_HOOK + peh.getClass().getName());


          ((PostExecute) peh).run(SessionState.get(), plan.getInputs(), plan.getOutputs(),
              (SessionState.get() != null ? SessionState.get().getLineageState().getLineageInfo()
                  : null), ShimLoader.getHadoopShims().getUGIForConf(conf));


          perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.POST_HOOK + peh.getClass().getName());
        }
      }




      if (SessionState.get() != null) {
        SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE,
            String.valueOf(0));
        SessionState.get().getHiveHistory().printRowCount(queryId);
      }
    } catch (CommandNeedRetryException e) {
      throw e;
    } catch (Exception e) {
      ctx.restoreOriginalTracker();
      if (SessionState.get() != null) {
        SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE,
            String.valueOf(12));
      }
      // TODO: do better with handling types of Exception here
      errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e);
      SQLState = "08S01";
      downstreamError = e;
      console.printError(errorMessage + "\n"
          + org.apache.hadoop.util.StringUtils.stringifyException(e));
      return (12);
    } finally {
      if (SessionState.get() != null) {
        SessionState.get().getHiveHistory().endQuery(queryId);
      }
      if (noName) {
        conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, "");
      }
      perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.DRIVER_EXECUTE);


      if (SessionState.get().getLastMapRedStatsList() != null
          && SessionState.get().getLastMapRedStatsList().size() > 0) {
        long totalCpu = 0;
        console.printInfo("MapReduce Jobs Launched: ");

View Full Code Here

  /**
   * Create Hive splits based on CombineFileSplit.
   */
  @Override
  public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    PerfLogger perfLogger = PerfLogger.getPerfLogger();
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.GET_SPLITS);
    init(job);
    Map<String, ArrayList<String>> pathToAliases = mrwork.getPathToAliases();
    Map<String, Operator<? extends OperatorDesc>> aliasToWork =
      mrwork.getAliasToWork();
    CombineFileInputFormatShim combine = ShimLoader.getHadoopShims()
        .getCombineFileInputFormat();
    
    // on tez we're avoiding duplicating path info since the info will go over
    // rpc
    if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
      try {
        List<Path> dirs = Utilities.getInputPathsTez(job, mrwork);
        Utilities.setInputPaths(job, dirs);
      } catch (Exception e) {
        throw new IOException("Could not create input paths", e);
      }
    }


    InputSplit[] splits = null;
    if (combine == null) {
      splits = super.getSplits(job, numSplits);
      perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS);
      return splits;
    }


    if (combine.getInputPathsShim(job).length == 0) {
      throw new IOException("No input paths specified in job");
    }
    ArrayList<InputSplit> result = new ArrayList<InputSplit>();


    // combine splits only from same tables and same partitions. Do not combine splits from multiple
    // tables or multiple partitions.
    Path[] paths = combine.getInputPathsShim(job);


    List<Path> inpDirs = new ArrayList<Path>();
    List<Path> inpFiles = new ArrayList<Path>();
    Map<CombinePathInputFormat, CombineFilter> poolMap =
      new HashMap<CombinePathInputFormat, CombineFilter>();
    Set<Path> poolSet = new HashSet<Path>();


    for (Path path : paths) {
      PartitionDesc part = HiveFileFormatUtils.getPartitionDescFromPathRecursively(
          pathToPartitionInfo, path, IOPrepareCache.get().allocatePartitionDescMap());
      TableDesc tableDesc = part.getTableDesc();
      if ((tableDesc != null) && tableDesc.isNonNative()) {
        return super.getSplits(job, numSplits);
      }


      // Use HiveInputFormat if any of the paths is not splittable
      Class inputFormatClass = part.getInputFileFormatClass();
      String inputFormatClassName = inputFormatClass.getName();
      InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);
      String deserializerClassName = null;
      try {
        deserializerClassName = part.getDeserializer(job).getClass().getName();
      } catch (Exception e) {
        // ignore
      }
      FileSystem inpFs = path.getFileSystem(job);
      if (inputFormatClass.isAssignableFrom(OrcInputFormat.class)) {
        if (inpFs.exists(new Path(path, OrcRecordUpdater.ACID_FORMAT))) {
          throw new IOException("CombineHiveInputFormat is incompatible " +
            " with ACID tables. Please set hive.input.format=" +
              "org.apache.hadoop.hive.ql.io.HiveInputFormat");
        }
      }


      // Since there is no easy way of knowing whether MAPREDUCE-1597 is present in the tree or not,
      // we use a configuration variable for the same
      if (this.mrwork != null && !this.mrwork.getHadoopSupportsSplittable()) {
        // The following code should be removed, once
        // https://issues.apache.org/jira/browse/MAPREDUCE-1597 is fixed.
        // Hadoop does not handle non-splittable files correctly for CombineFileInputFormat,
        // so don't use CombineFileInputFormat for non-splittable files


        //ie, dont't combine if inputformat is a TextInputFormat and has compression turned on


        if (inputFormat instanceof TextInputFormat) {
          Queue<Path> dirs = new LinkedList<Path>();
          FileStatus fStats = inpFs.getFileStatus(path);


          // If path is a directory
          if (fStats.isDir()) {
            dirs.offer(path);
          } else if ((new CompressionCodecFactory(job)).getCodec(path) != null) {
            //if compresssion codec is set, use HiveInputFormat.getSplits (don't combine)
            splits = super.getSplits(job, numSplits);
            perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS);
            return splits;
          }


          while (dirs.peek() != null) {
            Path tstPath = dirs.remove();
            FileStatus[] fStatus = inpFs.listStatus(tstPath);
            for (int idx = 0; idx < fStatus.length; idx++) {
              if (fStatus[idx].isDir()) {
                dirs.offer(fStatus[idx].getPath());
              } else if ((new CompressionCodecFactory(job)).getCodec(
                  fStatus[idx].getPath()) != null) {
                //if compresssion codec is set, use HiveInputFormat.getSplits (don't combine)
                splits = super.getSplits(job, numSplits);
                perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS);
                return splits;
              }
            }
          }
        }
      }
      //don't combine if inputformat is a SymlinkTextInputFormat
      if (inputFormat instanceof SymlinkTextInputFormat) {
        splits = super.getSplits(job, numSplits);
        perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS);
        return splits;
      }


      Path filterPath = path;


      // Does a pool exist for this path already
      CombineFilter f = null;
      List<Operator<? extends OperatorDesc>> opList = null;


      if (!mrwork.isMapperCannotSpanPartns()) {
        //if mapper can span partitions, make sure a splits does not contain multiple
        // opList + inputFormatClassName + deserializerClassName combination
        // This is done using the Map of CombinePathInputFormat to PathFilter


        opList = HiveFileFormatUtils.doGetWorksFromPath(
                   pathToAliases, aliasToWork, filterPath);
        CombinePathInputFormat combinePathInputFormat =
            new CombinePathInputFormat(opList, inputFormatClassName, deserializerClassName);
        f = poolMap.get(combinePathInputFormat);
        if (f == null) {
          f = new CombineFilter(filterPath);
          LOG.info("CombineHiveInputSplit creating pool for " + path +
                   "; using filter path " + filterPath);
          combine.createPool(job, f);
          poolMap.put(combinePathInputFormat, f);
        } else {
          LOG.info("CombineHiveInputSplit: pool is already created for " + path +
                   "; using filter path " + filterPath);
          f.addPath(filterPath);
        }
      } else {
        // In the case of tablesample, the input paths are pointing to files rather than directories.
        // We need to get the parent directory as the filtering path so that all files in the same
        // parent directory will be grouped into one pool but not files from different parent
        // directories. This guarantees that a split will combine all files in the same partition
        // but won't cross multiple partitions if the user has asked so.
        if (!path.getFileSystem(job).getFileStatus(path).isDir()) { // path is not directory
          filterPath = path.getParent();
          inpFiles.add(path);
          poolSet.add(filterPath);
        } else {
          inpDirs.add(path);
        }
      }
    }


    // Processing directories
    List<InputSplitShim> iss = new ArrayList<InputSplitShim>();
    if (!mrwork.isMapperCannotSpanPartns()) {
      //mapper can span partitions
      //combine into as few as one split, subject to the PathFilters set
      // using combine.createPool.
      iss = Arrays.asList(combine.getSplits(job, 1));
    } else {
      for (Path path : inpDirs) {
        processPaths(job, combine, iss, path);
      }


      if (inpFiles.size() > 0) {
        // Processing files
        for (Path filterPath : poolSet) {
          combine.createPool(job, new CombineFilter(filterPath));
        }
        processPaths(job, combine, iss, inpFiles.toArray(new Path[0]));
      }
    }


    if (mrwork.getNameToSplitSample() != null && !mrwork.getNameToSplitSample().isEmpty()) {
      iss = sampleSplits(iss);
    }


    for (InputSplitShim is : iss) {
      CombineHiveInputSplit csplit = new CombineHiveInputSplit(job, is);
      result.add(csplit);
    }


    LOG.info("number of splits " + result.size());
    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS);
    return result.toArray(new CombineHiveInputSplit[result.size()]);
  }

View Full Code Here

      result.add(new HiveInputSplit(is, inputFormatClass.getName()));
    }
  }


  public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    PerfLogger perfLogger = PerfLogger.getPerfLogger();
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.GET_SPLITS);
    init(job);


    Path[] dirs = FileInputFormat.getInputPaths(job);
    if (dirs.length == 0) {
      // on tez we're avoiding to duplicate the file info in FileInputFormat.
      if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
        try {
          List<Path> paths = Utilities.getInputPathsTez(job, mrwork);
          dirs = paths.toArray(new Path[paths.size()]);
        } catch (Exception e) {
          throw new IOException("Could not create input files", e);
        }
      } else {
        throw new IOException("No input paths specified in job");
      }
    }
    JobConf newjob = new JobConf(job);
    List<InputSplit> result = new ArrayList<InputSplit>();


    List<Path> currentDirs = new ArrayList<Path>();
    Class<? extends InputFormat> currentInputFormatClass = null;
    TableDesc currentTable = null;
    TableScanOperator currentTableScan = null;


    // for each dir, get the InputFormat, and do getSplits.
    for (Path dir : dirs) {
      PartitionDesc part = getPartitionDescFromPath(pathToPartitionInfo, dir);
      Class<? extends InputFormat> inputFormatClass = part.getInputFileFormatClass();
      TableDesc table = part.getTableDesc();
      TableScanOperator tableScan = null;


      List<String> aliases =
          mrwork.getPathToAliases().get(dir.toUri().toString());


      // Make filter pushdown information available to getSplits.
      if ((aliases != null) && (aliases.size() == 1)) {
        Operator op = mrwork.getAliasToWork().get(aliases.get(0));
        if ((op != null) && (op instanceof TableScanOperator)) {
          tableScan = (TableScanOperator) op;
          // push down projections.
          ColumnProjectionUtils.appendReadColumns(
              newjob, tableScan.getNeededColumnIDs(), tableScan.getNeededColumns());
          // push down filters
          pushFilters(newjob, tableScan);
        }
      }


      if (!currentDirs.isEmpty() &&
          inputFormatClass.equals(currentInputFormatClass) &&
          table.equals(currentTable) &&
          tableScan == currentTableScan) {
        currentDirs.add(dir);
        continue;
      }


      if (!currentDirs.isEmpty()) {
        LOG.info("Generating splits");
        addSplitsForGroup(currentDirs, currentTableScan, newjob,
            getInputFormatFromCache(currentInputFormatClass, job),
            currentInputFormatClass, currentDirs.size()*(numSplits / dirs.length),
            currentTable, result);
      }


      currentDirs.clear();
      currentDirs.add(dir);
      currentTableScan = tableScan;
      currentTable = table;
      currentInputFormatClass = inputFormatClass;
    }


    if (dirs.length != 0) {
      LOG.info("Generating splits");
      addSplitsForGroup(currentDirs, currentTableScan, newjob,
          getInputFormatFromCache(currentInputFormatClass, job),
          currentInputFormatClass, currentDirs.size()*(numSplits / dirs.length),
          currentTable, result);
    }


    LOG.info("number of splits " + result.size());
    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS);
    return result.toArray(new HiveInputSplit[result.size()]);
  }

View Full Code Here

      boolean doEvalClientSide = hasUserFunctions(compactExpr);


      // Now filter.
      List<Partition> partitions = new ArrayList<Partition>();
      boolean hasUnknownPartitions = false;
      PerfLogger perfLogger = PerfLogger.getPerfLogger();
      if (!doEvalClientSide) {
        perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
        try {
          hasUnknownPartitions = Hive.get().getPartitionsByExpr(
              tab, compactExpr, conf, partitions);
        } catch (IMetaStoreClient.IncompatibleMetastoreException ime) {
          // TODO: backward compat for Hive <= 0.12. Can be removed later.
          LOG.warn("Metastore doesn't support getPartitionsByExpr", ime);
          doEvalClientSide = true;
        } finally {
          perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
        }
      }
      if (doEvalClientSide) {
        // Either we have user functions, or metastore is old version - filter names locally.
        hasUnknownPartitions = pruneBySequentialScan(tab, partitions, compactExpr, conf);

View Full Code Here

      throw new HiveException(e);
    }
  }


  private static Set<Partition> getAllPartitions(Table tab) throws HiveException {
    PerfLogger perfLogger = PerfLogger.getPerfLogger();
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
    Set<Partition> result = Hive.get().getAllPartitionsOf(tab);
    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
    return result;
  }

View Full Code Here

   * @param conf Hive Configuration object, can not be NULL.
   * @return true iff the partition pruning expression contains non-partition columns.
   */
  static private boolean pruneBySequentialScan(Table tab, List<Partition> partitions,
      ExprNodeGenericFuncDesc prunerExpr, HiveConf conf) throws HiveException, MetaException {
    PerfLogger perfLogger = PerfLogger.getPerfLogger();
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PRUNE_LISTING);


    List<String> partNames = Hive.get().getPartitionNames(
        tab.getDbName(), tab.getTableName(), (short) -1);


    String defaultPartitionName = conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME);
    List<String> partCols = extractPartColNames(tab);


    boolean hasUnknownPartitions = prunePartitionNames(
        partCols, prunerExpr, defaultPartitionName, partNames);
    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PRUNE_LISTING);


    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
    if (!partNames.isEmpty()) {
      partitions.addAll(Hive.get().getPartitionsByNames(tab, partNames));
    }
    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
    return hasUnknownPartitions;
  }

View Full Code Here

        roots.getClass(), conf, true);
    return result;
  }


  private static void serializePlan(Object plan, OutputStream out, Configuration conf, boolean cloningPlan) {
    PerfLogger perfLogger = PerfLogger.getPerfLogger();
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SERIALIZE_PLAN);
    String serializationType = conf.get(HiveConf.ConfVars.PLAN_SERIALIZATION.varname, "kryo");
    LOG.info("Serializing " + plan.getClass().getSimpleName() + " via " + serializationType);
    if("javaXML".equalsIgnoreCase(serializationType)) {
      serializeObjectByJavaXML(plan, out);
    } else {
      if(cloningPlan) {
        serializeObjectByKryo(cloningQueryPlanKryo.get(), plan, out);
      } else {
        serializeObjectByKryo(runtimeSerializationKryo.get(), plan, out);
      }
    }
    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SERIALIZE_PLAN);
  }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.hive.ql.log.PerfLogger

org.apache.hadoop.hive.conf.HiveConf

org.apache.hadoop.hive.ql.Driver

org.apache.hadoop.hive.ql.exec.Utilities

org.apache.hadoop.hive.ql.io.CombineHiveInputFormat

org.apache.hadoop.hive.ql.io.HiveInputFormat

org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner

org.apache.hadoop.hive.ql.session.SessionState

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.