Package org.apache.hadoop.hive.ql.log

Examples of org.apache.hadoop.hive.ql.log.PerfLogger

    return dirs;

  public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    PerfLogger perfLogger = PerfLogger.getPerfLogger();
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.GET_SPLITS);
    Path[] dirs = getInputPaths(job);
    JobConf newjob = new JobConf(job);
    List<InputSplit> result = new ArrayList<InputSplit>();

    List<Path> currentDirs = new ArrayList<Path>();
    Class<? extends InputFormat> currentInputFormatClass = null;
    TableDesc currentTable = null;
    TableScanOperator currentTableScan = null;

    boolean pushDownProjection = false;
    //Buffers to hold filter pushdown information
    StringBuilder readColumnsBuffer = new StringBuilder(newjob.
      get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, ""));;
    StringBuilder readColumnNamesBuffer = new StringBuilder(newjob.
      get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, ""));
    // for each dir, get the InputFormat, and do getSplits.
    for (Path dir : dirs) {
      PartitionDesc part = getPartitionDescFromPath(pathToPartitionInfo, dir);
      Class<? extends InputFormat> inputFormatClass = part.getInputFileFormatClass();
      TableDesc table = part.getTableDesc();
      TableScanOperator tableScan = null;

      List<String> aliases =

      // Make filter pushdown information available to getSplits.
      if ((aliases != null) && (aliases.size() == 1)) {
        Operator op = mrwork.getAliasToWork().get(aliases.get(0));
        if ((op != null) && (op instanceof TableScanOperator)) {
          tableScan = (TableScanOperator) op;
          //Reset buffers to store filter push down columns
          // push down projections.
          ColumnProjectionUtils.appendReadColumns(readColumnsBuffer, readColumnNamesBuffer,
            tableScan.getNeededColumnIDs(), tableScan.getNeededColumns());
          pushDownProjection = true;
          // push down filters
          pushFilters(newjob, tableScan);

      if (!currentDirs.isEmpty() &&
          inputFormatClass.equals(currentInputFormatClass) &&
          table.equals(currentTable) &&
          tableScan == currentTableScan) {

      if (!currentDirs.isEmpty()) {"Generating splits");
        addSplitsForGroup(currentDirs, currentTableScan, newjob,
            getInputFormatFromCache(currentInputFormatClass, job),
            currentInputFormatClass, currentDirs.size()*(numSplits / dirs.length),
            currentTable, result);

      currentTableScan = tableScan;
      currentTable = table;
      currentInputFormatClass = inputFormatClass;
    if (pushDownProjection) {
      newjob.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
      newjob.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, readColumnsBuffer.toString());
      newjob.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, readColumnNamesBuffer.toString()); + "=" + readColumnsBuffer.toString()); + "=" + readColumnNamesBuffer.toString());

    if (dirs.length != 0) {"Generating splits");
      addSplitsForGroup(currentDirs, currentTableScan, newjob,
          getInputFormatFromCache(currentInputFormatClass, job),
          currentInputFormatClass, currentDirs.size()*(numSplits / dirs.length),
          currentTable, result);

    Utilities.clearWorkMapForConf(job);"number of splits " + result.size());
    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS);
    return result.toArray(new HiveInputSplit[result.size()]);
View Full Code Here

   * @return the summary of all the input paths.
   * @throws IOException
  public static ContentSummary getInputSummary(final Context ctx, MapWork work, PathFilter filter)
      throws IOException {
    PerfLogger perfLogger = PerfLogger.getPerfLogger();
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.INPUT_SUMMARY);

    long[] summary = {0, 0, 0};

    final List<String> pathNeedProcess = new ArrayList<String>();

    // Since multiple threads could call this method concurrently, locking
    // this method will avoid number of threads out of control.
    synchronized (INPUT_SUMMARY_LOCK) {
      // For each input path, calculate the total size.
      for (String path : work.getPathToAliases().keySet()) {
        Path p = new Path(path);

        if (filter != null && !filter.accept(p)) {

        ContentSummary cs = ctx.getCS(path);
        if (cs == null) {
          if (path == null) {
        } else {
          summary[0] += cs.getLength();
          summary[1] += cs.getFileCount();
          summary[2] += cs.getDirectoryCount();

      // Process the case when name node call is needed
      final Map<String, ContentSummary> resultMap = new ConcurrentHashMap<String, ContentSummary>();
      ArrayList<Future<?>> results = new ArrayList<Future<?>>();
      final ThreadPoolExecutor executor;
      int maxThreads = ctx.getConf().getInt("mapred.dfsclient.parallelism.max", 0);
      if (pathNeedProcess.size() > 1 && maxThreads > 1) {
        int numExecutors = Math.min(pathNeedProcess.size(), maxThreads);"Using " + numExecutors + " threads for getContentSummary");
        executor = new ThreadPoolExecutor(numExecutors, numExecutors, 60, TimeUnit.SECONDS,
            new LinkedBlockingQueue<Runnable>());
      } else {
        executor = null;

      HiveInterruptCallback interrup = HiveInterruptUtils.add(new HiveInterruptCallback() {
        public void interrupt() {
          for (String path : pathNeedProcess) {
            try {
              new Path(path).getFileSystem(ctx.getConf()).close();
            } catch (IOException ignore) {
          if (executor != null) {
      try {
        Configuration conf = ctx.getConf();
        JobConf jobConf = new JobConf(conf);
        for (String path : pathNeedProcess) {
          final Path p = new Path(path);
          final String pathStr = path;
          // All threads share the same Configuration and JobConf based on the
          // assumption that they are thread safe if only read operations are
          // executed. It is not stated in Hadoop's javadoc, the sourcce codes
          // clearly showed that they made efforts for it and we believe it is
          // thread safe. Will revisit this piece of codes if we find the assumption
          // is not correct.
          final Configuration myConf = conf;
          final JobConf myJobConf = jobConf;
          final Map<String, Operator<?>> aliasToWork = work.getAliasToWork();
          final Map<String, ArrayList<String>> pathToAlias = work.getPathToAliases();
          final PartitionDesc partDesc = work.getPathToPartitionInfo().get(
          Runnable r = new Runnable() {
            public void run() {
              try {
                Class<? extends InputFormat> inputFormatCls = partDesc
                InputFormat inputFormatObj = HiveInputFormat.getInputFormatFromCache(
                    inputFormatCls, myJobConf);
                if (inputFormatObj instanceof ContentSummaryInputFormat) {
                  ContentSummaryInputFormat cs = (ContentSummaryInputFormat) inputFormatObj;
                  resultMap.put(pathStr, cs.getContentSummary(p, myJobConf));
                HiveStorageHandler handler = HiveUtils.getStorageHandler(myConf,
                if (handler instanceof InputEstimator) {
                  long total = 0;
                  TableDesc tableDesc = partDesc.getTableDesc();
                  InputEstimator estimator = (InputEstimator) handler;
                  for (String alias : HiveFileFormatUtils.doGetAliasesFromPath(pathToAlias, p)) {
                    JobConf jobConf = new JobConf(myJobConf);
                    TableScanOperator scanOp = (TableScanOperator) aliasToWork.get(alias);
                    Utilities.setColumnNameList(jobConf, scanOp, true);
                    Utilities.setColumnTypeList(jobConf, scanOp, true);
                    Utilities.copyTableJobPropertiesToConf(tableDesc, jobConf);
                    total += estimator.estimate(myJobConf, scanOp, -1).getTotalLength();
                  resultMap.put(pathStr, new ContentSummary(total, -1, -1));
                // todo: should nullify summary for non-native tables,
                // not to be selected as a mapjoin target
                FileSystem fs = p.getFileSystem(myConf);
                resultMap.put(pathStr, fs.getContentSummary(p));
              } catch (Exception e) {
                // We safely ignore this exception for summary data.
                // We don't update the cache to protect it from polluting other
                // usages. The worst case is that IOException will always be
                // retried for another getInputSummary(), which is fine as
                // IOException is not considered as a common case.
      "Cannot get size of " + pathStr + ". Safely ignored.");

          if (executor == null) {
          } else {
            Future<?> result = executor.submit(r);

        if (executor != null) {
          for (Future<?> result : results) {
            boolean executorDone = false;
            do {
              try {
                executorDone = true;
              } catch (InterruptedException e) {
      "Interrupted when waiting threads: ", e);
              } catch (ExecutionException e) {
                throw new IOException(e);
            } while (!executorDone);
        for (Map.Entry<String, ContentSummary> entry : resultMap.entrySet()) {
          ContentSummary cs = entry.getValue();

          summary[0] += cs.getLength();
          summary[1] += cs.getFileCount();
          summary[2] += cs.getDirectoryCount();

          ctx.addCS(entry.getKey(), cs);
"Cache Content Summary for " + entry.getKey() + " length: " + cs.getLength()
              + " file count: "
              + cs.getFileCount() + " directory count: " + cs.getDirectoryCount());

        perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.INPUT_SUMMARY);
        return new ContentSummary(summary[0], summary[1], summary[2]);
      } finally {
View Full Code Here

   * Create Hive splits based on CombineFileSplit.
  private InputSplit[] getCombineSplits(JobConf job,
                                        int numSplits) throws IOException {
    PerfLogger perfLogger = PerfLogger.getPerfLogger();
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.GET_SPLITS);
    Map<String, ArrayList<String>> pathToAliases = mrwork.getPathToAliases();
    Map<String, Operator<? extends OperatorDesc>> aliasToWork =
    CombineFileInputFormatShim combine = ShimLoader.getHadoopShims()

    InputSplit[] splits = null;
    if (combine == null) {
      splits = super.getSplits(job, numSplits);
      perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS);
      return splits;

    if (combine.getInputPathsShim(job).length == 0) {
      throw new IOException("No input paths specified in job");
    ArrayList<InputSplit> result = new ArrayList<InputSplit>();

    // combine splits only from same tables and same partitions. Do not combine splits from multiple
    // tables or multiple partitions.
    Path[] paths = combine.getInputPathsShim(job);

    List<Path> inpDirs = new ArrayList<Path>();
    List<Path> inpFiles = new ArrayList<Path>();
    Map<CombinePathInputFormat, CombineFilter> poolMap =
      new HashMap<CombinePathInputFormat, CombineFilter>();
    Set<Path> poolSet = new HashSet<Path>();

    for (Path path : paths) {
      PartitionDesc part = HiveFileFormatUtils.getPartitionDescFromPathRecursively(
          pathToPartitionInfo, path, IOPrepareCache.get().allocatePartitionDescMap());
      TableDesc tableDesc = part.getTableDesc();
      if ((tableDesc != null) && tableDesc.isNonNative()) {
        return super.getSplits(job, numSplits);

      // Use HiveInputFormat if any of the paths is not splittable
      Class inputFormatClass = part.getInputFileFormatClass();
      String inputFormatClassName = inputFormatClass.getName();
      InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);
      String deserializerClassName = null;
      try {
        deserializerClassName = part.getDeserializer(job).getClass().getName();
      } catch (Exception e) {
        // ignore
      FileSystem inpFs = path.getFileSystem(job);

      // Since there is no easy way of knowing whether MAPREDUCE-1597 is present in the tree or not,
      // we use a configuration variable for the same
      if (this.mrwork != null && !this.mrwork.getHadoopSupportsSplittable()) {
        // The following code should be removed, once
        // is fixed.
        // Hadoop does not handle non-splittable files correctly for CombineFileInputFormat,
        // so don't use CombineFileInputFormat for non-splittable files

        //ie, dont't combine if inputformat is a TextInputFormat and has compression turned on

        if (inputFormat instanceof TextInputFormat) {
          Queue<Path> dirs = new LinkedList<Path>();
          FileStatus fStats = inpFs.getFileStatus(path);

          // If path is a directory
          if (fStats.isDir()) {
          } else if ((new CompressionCodecFactory(job)).getCodec(path) != null) {
            //if compresssion codec is set, use HiveInputFormat.getSplits (don't combine)
            splits = super.getSplits(job, numSplits);
            perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS);
            return splits;

          while (dirs.peek() != null) {
            Path tstPath = dirs.remove();
            FileStatus[] fStatus = inpFs.listStatus(tstPath);
            for (int idx = 0; idx < fStatus.length; idx++) {
              if (fStatus[idx].isDir()) {
              } else if ((new CompressionCodecFactory(job)).getCodec(
                  fStatus[idx].getPath()) != null) {
                //if compresssion codec is set, use HiveInputFormat.getSplits (don't combine)
                splits = super.getSplits(job, numSplits);
                perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS);
                return splits;
      //don't combine if inputformat is a SymlinkTextInputFormat
      if (inputFormat instanceof SymlinkTextInputFormat) {
        splits = super.getSplits(job, numSplits);
        perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS);
        return splits;

      Path filterPath = path;

      // Does a pool exist for this path already
      CombineFilter f = null;
      List<Operator<? extends OperatorDesc>> opList = null;

      if (!mrwork.isMapperCannotSpanPartns()) {
        //if mapper can span partitions, make sure a splits does not contain multiple
        // opList + inputFormatClassName + deserializerClassName combination
        // This is done using the Map of CombinePathInputFormat to PathFilter

        opList = HiveFileFormatUtils.doGetWorksFromPath(
                   pathToAliases, aliasToWork, filterPath);
        CombinePathInputFormat combinePathInputFormat =
            new CombinePathInputFormat(opList, inputFormatClassName, deserializerClassName);
        f = poolMap.get(combinePathInputFormat);
        if (f == null) {
          f = new CombineFilter(filterPath);
"CombineHiveInputSplit creating pool for " + path +
                   "; using filter path " + filterPath);
          combine.createPool(job, f);
          poolMap.put(combinePathInputFormat, f);
        } else {
"CombineHiveInputSplit: pool is already created for " + path +
                   "; using filter path " + filterPath);
      } else {
        // In the case of tablesample, the input paths are pointing to files rather than directories.
        // We need to get the parent directory as the filtering path so that all files in the same
        // parent directory will be grouped into one pool but not files from different parent
        // directories. This guarantees that a split will combine all files in the same partition
        // but won't cross multiple partitions if the user has asked so.
        if (!path.getFileSystem(job).getFileStatus(path).isDir()) { // path is not directory
          filterPath = path.getParent();
        } else {

    // Processing directories
    List<InputSplitShim> iss = new ArrayList<InputSplitShim>();
    if (!mrwork.isMapperCannotSpanPartns()) {
      //mapper can span partitions
      //combine into as few as one split, subject to the PathFilters set
      // using combine.createPool.
      iss = Arrays.asList(combine.getSplits(job, 1));
    } else {
      for (Path path : inpDirs) {
        processPaths(job, combine, iss, path);

      if (inpFiles.size() > 0) {
        // Processing files
        for (Path filterPath : poolSet) {
          combine.createPool(job, new CombineFilter(filterPath));
        processPaths(job, combine, iss, inpFiles.toArray(new Path[0]));

    if (mrwork.getNameToSplitSample() != null && !mrwork.getNameToSplitSample().isEmpty()) {
      iss = sampleSplits(iss);

    for (InputSplitShim is : iss) {
      CombineHiveInputSplit csplit = new CombineHiveInputSplit(job, is);
    }"number of splits " + result.size());
    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS);
    return result.toArray(new CombineHiveInputSplit[result.size()]);
View Full Code Here

   * @param command  The HiveQL query to compile
   * @param resetTaskIds Resets taskID counter if true.
   * @return 0 for ok
  public int compile(String command, boolean resetTaskIds) {
    PerfLogger perfLogger = PerfLogger.getPerfLogger();
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.COMPILE);

    //holder for parent command type/string when executing reentrant queries
    QueryState queryState = new QueryState();

    if (plan != null) {
      plan = null;

    if (resetTaskIds) {

    // generate new query id
    String queryId = QueryPlan.makeQueryId();
    conf.setVar(HiveConf.ConfVars.HIVEQUERYID, queryId);

    try {
      command = new VariableSubstitution().substitute(conf,command);
      ctx = new Context(conf);

      perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PARSE);
      ParseDriver pd = new ParseDriver();
      ASTNode tree = pd.parse(command, ctx);
      tree = ParseUtils.findRootNonNullToken(tree);
      perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PARSE);

      // Initialize the transaction manager.  This must be done before analyze is called.  Also
      // record the valid transactions for this query.  We have to do this at compile time
      // because we use the information in planning the query.  Also,
      // we want to record it at this point so that users see data valid at the point that they
      // submit the query.

      perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ANALYZE);
      BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(conf, tree);
      List<HiveSemanticAnalyzerHook> saHooks =

      // Do semantic analysis and plan generation
      if (saHooks != null) {
        HiveSemanticAnalyzerHookContext hookCtx = new HiveSemanticAnalyzerHookContextImpl();
        for (HiveSemanticAnalyzerHook hook : saHooks) {
          tree = hook.preAnalyze(hookCtx, tree);
        sem.analyze(tree, ctx);
        for (HiveSemanticAnalyzerHook hook : saHooks) {
          hook.postAnalyze(hookCtx, sem.getRootTasks());
      } else {
        sem.analyze(tree, ctx);
      // Record any ACID compliant FileSinkOperators we saw so we can add our transaction ID to
      // them later.
      acidSinks = sem.getAcidFileSinks();"Semantic Analysis Completed");

      // validate the plan
      perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ANALYZE);

      plan = new QueryPlan(command, sem, perfLogger.getStartTime(PerfLogger.DRIVER_RUN), queryId,

      String queryStr = plan.getQueryStr();
      conf.setVar(HiveConf.ConfVars.HIVEQUERYSTRING, queryStr);

      conf.set("", "hive_" + queryId);
      conf.set("", queryStr);

      // initialize FetchTask right here
      if (plan.getFetchTask() != null) {
        plan.getFetchTask().initialize(conf, plan, null);

      // get the output schema
      schema = getSchema(sem, conf);

      //do the authorization check
      if (!sem.skipAuthorization() &&
          HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) {

        try {
          perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DO_AUTHORIZATION);
          doAuthorization(sem, command);
        } catch (AuthorizationException authExp) {
          console.printError("Authorization failed:" + authExp.getMessage()
              + ". Use SHOW GRANT to get more details.");
          errorMessage = authExp.getMessage();
          SQLState = "42000";
          return 403;
        } finally {
          perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.DO_AUTHORIZATION);

      if (conf.getBoolVar(ConfVars.HIVE_LOG_EXPLAIN_OUTPUT)) {
        String explainOutput = getExplainOutput(sem, plan, tree.dump());
        if (explainOutput != null) {
"EXPLAIN output for queryid " + queryId + " : "
              + explainOutput);

      return 0;
    } catch (Exception e) {
      ErrorMsg error = ErrorMsg.getErrorMsg(e.getMessage());
      errorMessage = "FAILED: " + e.getClass().getSimpleName();
      if (error != ErrorMsg.GENERIC_ERROR) {
        errorMessage += " [Error "  + error.getErrorCode()  + "]:";

      // HIVE-4889
      if ((e instanceof IllegalArgumentException) && e.getMessage() == null && e.getCause() != null) {
        errorMessage += " " + e.getCause().getMessage();
      } else {
        errorMessage += " " + e.getMessage();

      SQLState = error.getSQLState();
      downstreamError = e;
      console.printError(errorMessage, "\n"
          + org.apache.hadoop.util.StringUtils.stringifyException(e));
      return error.getErrorCode();
    } finally {
      perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.COMPILE);
View Full Code Here

   * This method also records the list of valid transactions.  This must be done after any
   * transactions have been opened and locks acquired.
  private int acquireLocksAndOpenTxn() {
    PerfLogger perfLogger = PerfLogger.getPerfLogger();
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);

    SessionState ss = SessionState.get();
    HiveTxnManager txnMgr = ss.getTxnMgr();

    try {
      // Don't use the userName member, as it may or may not have been set.  Get the value from
      // conf, which calls into getUGI to figure out who the process is running as.
      String userFromUGI;
      try {
        userFromUGI = conf.getUser();
      } catch (IOException e) {
        errorMessage = "FAILED: Error in determining user while acquiring locks: " + e.getMessage();
        SQLState = ErrorMsg.findSQLState(e.getMessage());
        downstreamError = e;
            "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
        return 10;
      if (acidSinks != null && acidSinks.size() > 0) {
        // We are writing to tables in an ACID compliant way, so we need to open a transaction
        long txnId = ss.getCurrentTxn();
        if (txnId == SessionState.NO_CURRENT_TXN) {
          txnId = txnMgr.openTxn(userFromUGI);
        // Set the transaction id in all of the acid file sinks
        if (acidSinks != null) {
          for (FileSinkDesc desc : acidSinks) {

        // TODO Once we move to cross query transactions we need to add the open transaction to
        // our list of valid transactions.  We don't have a way to do that right now.

      txnMgr.acquireLocks(plan, ctx, userFromUGI);

      return 0;
    } catch (LockException e) {
      errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage();
      SQLState = ErrorMsg.findSQLState(e.getMessage());
      downstreamError = e;
      console.printError(errorMessage, "\n"
          + org.apache.hadoop.util.StringUtils.stringifyException(e));
      return 10;
    } finally {
      perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);
View Full Code Here

   *               if false rollback.  If there is no open transaction this parameter is ignored.
  private void releaseLocksAndCommitOrRollback(List<HiveLock> hiveLocks, boolean commit)
      throws LockException {
    PerfLogger perfLogger = PerfLogger.getPerfLogger();
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.RELEASE_LOCKS);

    SessionState ss = SessionState.get();
    HiveTxnManager txnMgr = ss.getTxnMgr();
    // If we've opened a transaction we need to commit or rollback rather than explicitly
    // releasing the locks.
    if (ss.getCurrentTxn() != SessionState.NO_CURRENT_TXN && ss.isAutoCommit()) {
      try {
        if (commit) {
        } else {
      } finally {
    } else {
      if (hiveLocks != null) {

    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.RELEASE_LOCKS);
View Full Code Here

          + org.apache.hadoop.util.StringUtils.stringifyException(e));
      return createProcessorResponse(12);

    // Reset the perf logger
    PerfLogger perfLogger = PerfLogger.getPerfLogger(true);
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DRIVER_RUN);
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TIME_TO_SUBMIT);

    boolean requireLock = false;
    boolean ckLock = false;
    SessionState ss = SessionState.get();
    try {
      ckLock = checkConcurrency();
    } catch (SemanticException e) {
      errorMessage = "FAILED: Error in semantic analysis: " + e.getMessage();
      SQLState = ErrorMsg.findSQLState(e.getMessage());
      downstreamError = e;
      console.printError(errorMessage, "\n"
          + org.apache.hadoop.util.StringUtils.stringifyException(e));
      return createProcessorResponse(10);

    int ret;
    if (!alreadyCompiled) {
      ret = compileInternal(command);
      if (ret != 0) {
        return createProcessorResponse(ret);

    // the reason that we set the txn manager for the cxt here is because each
    // query has its own ctx object. The txn mgr is shared across the
    // same instance of Driver, which can run multiple queries.

    if (ckLock) {
      boolean lockOnlyMapred = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_LOCK_MAPRED_ONLY);
      if(lockOnlyMapred) {
        Queue<Task<? extends Serializable>> taskQueue = new LinkedList<Task<? extends Serializable>>();
        while (taskQueue.peek() != null) {
          Task<? extends Serializable> tsk = taskQueue.remove();
          requireLock = requireLock || tsk.requireLock();
          if(requireLock) {
          if (tsk instanceof ConditionalTask) {
          if(tsk.getChildTasks()!= null) {
          // does not add back up task here, because back up task should be the same
          // type of the original task.
      } else {
        requireLock = true;

    if (requireLock) {
      ret = acquireLocksAndOpenTxn();
      if (ret != 0) {
        try {
          releaseLocksAndCommitOrRollback(ctx.getHiveLocks(), false);
        } catch (LockException e) {
          // Not much to do here
        return createProcessorResponse(ret);

    ret = execute();
    if (ret != 0) {
      //if needRequireLock is false, the release here will do nothing because there is no lock
      try {
        releaseLocksAndCommitOrRollback(ctx.getHiveLocks(), false);
      } catch (LockException e) {
        // Nothing to do here
      return createProcessorResponse(ret);

    //if needRequireLock is false, the release here will do nothing because there is no lock
    try {
      releaseLocksAndCommitOrRollback(ctx.getHiveLocks(), true);
    } catch (LockException e) {
      errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e);
      SQLState = ErrorMsg.findSQLState(e.getMessage());
      downstreamError = e;
      console.printError(errorMessage + "\n"
          + org.apache.hadoop.util.StringUtils.stringifyException(e));
      return createProcessorResponse(12);

    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.DRIVER_RUN);
    perfLogger.close(LOG, plan);

    // Take all the driver run hooks and post-execute them.
    try {
      for (HiveDriverRunHook driverRunHook : driverRunHooks) {
View Full Code Here

      throw e;

  public int execute() throws CommandNeedRetryException {
    PerfLogger perfLogger = PerfLogger.getPerfLogger();
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DRIVER_EXECUTE);

    boolean noName = StringUtils.isEmpty(conf.getVar(HiveConf.ConfVars.HADOOPJOBNAME));
    int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);

    String queryId = plan.getQueryId();
    String queryStr = plan.getQueryStr();

    maxthreads = HiveConf.getIntVar(conf, HiveConf.ConfVars.EXECPARALLETHREADNUMBER);

    try {"Starting command: " + queryStr);


      if (SessionState.get() != null) {
      resStream = null;

      SessionState ss = SessionState.get();
      HookContext hookContext = new HookContext(plan, conf, ctx.getPathToCS(), ss.getUserName(), ss.getUserIpAddress());

      for (Hook peh : getHooks(HiveConf.ConfVars.PREEXECHOOKS)) {
        if (peh instanceof ExecuteWithHookContext) {
          perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PRE_HOOK + peh.getClass().getName());

          ((ExecuteWithHookContext) peh).run(hookContext);

          perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PRE_HOOK + peh.getClass().getName());
        } else if (peh instanceof PreExecute) {
          perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PRE_HOOK + peh.getClass().getName());

          ((PreExecute) peh).run(SessionState.get(), plan.getInputs(), plan.getOutputs(),

          perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PRE_HOOK + peh.getClass().getName());

      int jobs = Utilities.getMRTasks(plan.getRootTasks()).size()
        + Utilities.getTezTasks(plan.getRootTasks()).size();
      if (jobs > 0) {
        console.printInfo("Query ID = " + plan.getQueryId());
        console.printInfo("Total jobs = " + jobs);
      if (SessionState.get() != null) {
        SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_NUM_TASKS,
      String jobname = Utilities.abbreviate(queryStr, maxlen - 6);

      // A runtime that launches runnable tasks as separate Threads through
      // TaskRunners
      // As soon as a task isRunnable, it is put in a queue
      // At any time, at most maxthreads tasks can be running
      // The main thread polls the TaskRunners to check if they have finished.

      DriverContext driverCxt = new DriverContext(ctx);


      this.driverCxt = driverCxt; // for canceling the query (should be bound to session?)

      SessionState.get().setMapRedStats(new LinkedHashMap<String, MapRedStats>());
      SessionState.get().setStackTraces(new HashMap<String, List<List<String>>>());
      SessionState.get().setLocalMapRedErrors(new HashMap<String, List<String>>());

      // Add root Tasks to runnable
      for (Task<? extends Serializable> tsk : plan.getRootTasks()) {
        // This should never happen, if it does, it's a bug with the potential to produce
        // incorrect results.
        assert tsk.getParentTasks() == null || tsk.getParentTasks().isEmpty();

      perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TIME_TO_SUBMIT);
      perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.RUN_TASKS);
      // Loop while you either have tasks running, or tasks queued up
      while (!destroyed && driverCxt.isRunning()) {

        // Launch upto maxthreads tasks
        Task<? extends Serializable> task;
        while ((task = driverCxt.getRunnable(maxthreads)) != null) {
          perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TASK + task.getName() + "." + task.getId());
          TaskRunner runner = launchTask(task, queryId, noName, jobname, jobs, driverCxt);
          if (!runner.isRunning()) {

        // poll the Tasks to see which one completed
        TaskRunner tskRun = driverCxt.pollFinished();
        if (tskRun == null) {

        Task<? extends Serializable> tsk = tskRun.getTask();
        TaskResult result = tskRun.getTaskResult();

        int exitVal = result.getExitVal();
        if (exitVal != 0) {
          if (tsk.ifRetryCmdWhenFail()) {
            // in case we decided to run everything in local mode, restore the
            // the jobtracker setting to its initial value
            throw new CommandNeedRetryException();
          Task<? extends Serializable> backupTask = tsk.getAndInitBackupTask();
          if (backupTask != null) {
            setErrorMsgAndDetail(exitVal, result.getTaskError(), tsk);
            errorMessage = "ATTEMPT: Execute BackupTask: " + backupTask.getClass().getName();

            // add backup task to runnable
            if (DriverContext.isLaunchable(backupTask)) {

          } else {
            // Get all the failure execution hooks and execute them.
            for (Hook ofh : getHooks(HiveConf.ConfVars.ONFAILUREHOOKS)) {
              perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.FAILURE_HOOK + ofh.getClass().getName());

              ((ExecuteWithHookContext) ofh).run(hookContext);

              perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.FAILURE_HOOK + ofh.getClass().getName());
            setErrorMsgAndDetail(exitVal, result.getTaskError(), tsk);
            SQLState = "08S01";
            // in case we decided to run everything in local mode, restore the
            // the jobtracker setting to its initial value
            return exitVal;


        if (SessionState.get() != null) {
          SessionState.get().getHiveHistory().setTaskProperty(queryId, tsk.getId(),
              Keys.TASK_RET_CODE, String.valueOf(exitVal));
          SessionState.get().getHiveHistory().endTask(queryId, tsk);

        if (tsk.getChildTasks() != null) {
          for (Task<? extends Serializable> child : tsk.getChildTasks()) {
            if (DriverContext.isLaunchable(child)) {
      perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.RUN_TASKS);

      // in case we decided to run everything in local mode, restore the
      // the jobtracker setting to its initial value

      if (driverCxt.isShutdown()) {
        SQLState = "HY008";
        errorMessage = "FAILED: Operation cancelled";
        return 1000;

      // remove incomplete outputs.
      // Some incomplete outputs may be added at the beginning, for eg: for dynamic partitions.
      // remove them
      HashSet<WriteEntity> remOutputs = new HashSet<WriteEntity>();
      for (WriteEntity output : plan.getOutputs()) {
        if (!output.isComplete()) {

      for (WriteEntity output : remOutputs) {

      // Get all the post execution hooks and execute them.
      for (Hook peh : getHooks(HiveConf.ConfVars.POSTEXECHOOKS)) {
        if (peh instanceof ExecuteWithHookContext) {
          perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.POST_HOOK + peh.getClass().getName());

          ((ExecuteWithHookContext) peh).run(hookContext);

          perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.POST_HOOK + peh.getClass().getName());
        } else if (peh instanceof PostExecute) {
          perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.POST_HOOK + peh.getClass().getName());

          ((PostExecute) peh).run(SessionState.get(), plan.getInputs(), plan.getOutputs(),
              (SessionState.get() != null ? SessionState.get().getLineageState().getLineageInfo()
                  : null), ShimLoader.getHadoopShims().getUGIForConf(conf));

          perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.POST_HOOK + peh.getClass().getName());

      if (SessionState.get() != null) {
        SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE,
    } catch (CommandNeedRetryException e) {
      throw e;
    } catch (Exception e) {
      if (SessionState.get() != null) {
        SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE,
      // TODO: do better with handling types of Exception here
      errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e);
      SQLState = "08S01";
      downstreamError = e;
      console.printError(errorMessage + "\n"
          + org.apache.hadoop.util.StringUtils.stringifyException(e));
      return (12);
    } finally {
      if (SessionState.get() != null) {
      if (noName) {
        conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, "");
      perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.DRIVER_EXECUTE);

      Map<String, MapRedStats> stats = SessionState.get().getMapRedStats();
      if (stats != null && !stats.isEmpty()) {
        long totalCpu = 0;
        console.printInfo("MapReduce Jobs Launched: ");
View Full Code Here

      try {
        perfLogger = (PerfLogger) ReflectionUtils.newInstance(conf.getClassByName(
            conf.getVar(ConfVars.HIVE_PERF_LOGGER)), conf);
      } catch (ClassNotFoundException e) {
        LOG.error("Performance Logger Class not found:" + e.getMessage());
        perfLogger = new PerfLogger();
    return perfLogger;
View Full Code Here

   * @param command  The HiveQL query to compile
   * @param resetTaskIds Resets taskID counter if true.
   * @return 0 for ok
  public int compile(String command, boolean resetTaskIds) {
    PerfLogger perfLogger = PerfLogger.getPerfLogger();
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.COMPILE);

    //holder for parent command type/string when executing reentrant queries
    QueryState queryState = new QueryState();

    if (plan != null) {
      plan = null;

    if (resetTaskIds) {

    try {
      command = new VariableSubstitution().substitute(conf,command);
      ctx = new Context(conf);

      perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PARSE);
      ParseDriver pd = new ParseDriver();
      ASTNode tree = pd.parse(command, ctx);
      tree = ParseUtils.findRootNonNullToken(tree);
      perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PARSE);

      perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ANALYZE);
      BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(conf, tree);
      List<HiveSemanticAnalyzerHook> saHooks =

      // Do semantic analysis and plan generation
      if (saHooks != null) {
        HiveSemanticAnalyzerHookContext hookCtx = new HiveSemanticAnalyzerHookContextImpl();
        for (HiveSemanticAnalyzerHook hook : saHooks) {
          tree = hook.preAnalyze(hookCtx, tree);
        sem.analyze(tree, ctx);
        for (HiveSemanticAnalyzerHook hook : saHooks) {
          hook.postAnalyze(hookCtx, sem.getRootTasks());
      } else {
        sem.analyze(tree, ctx);
      }"Semantic Analysis Completed");

      // validate the plan
      perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ANALYZE);

      plan = new QueryPlan(command, sem, perfLogger.getStartTime(PerfLogger.DRIVER_RUN));

      String queryId = plan.getQueryId();
      String queryStr = plan.getQueryStr();

      conf.setVar(HiveConf.ConfVars.HIVEQUERYID, queryId);
      conf.setVar(HiveConf.ConfVars.HIVEQUERYSTRING, queryStr);

      conf.set("", "hive_" + queryId);
      conf.set("", queryStr);

      // initialize FetchTask right here
      if (plan.getFetchTask() != null) {
        plan.getFetchTask().initialize(conf, plan, null);

      // get the output schema
      schema = getSchema(sem, conf);

      //do the authorization check
      if (HiveConf.getBoolVar(conf,
          HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) {
        try {
          perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DO_AUTHORIZATION);
        } catch (AuthorizationException authExp) {
          console.printError("Authorization failed:" + authExp.getMessage()
              + ". Use SHOW GRANT to get more details.");
          errorMessage = authExp.getMessage();
          SQLState = "42000";
          return 403;
        } finally {
          perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.DO_AUTHORIZATION);

      //restore state after we're done executing a specific query

      return 0;
    } catch (Exception e) {
      ErrorMsg error = ErrorMsg.getErrorMsg(e.getMessage());
      errorMessage = "FAILED: " + e.getClass().getSimpleName();
      if (error != ErrorMsg.GENERIC_ERROR) {
        errorMessage += " [Error "  + error.getErrorCode()  + "]:";

      // HIVE-4889
      if ((e instanceof IllegalArgumentException) && e.getMessage() == null && e.getCause() != null) {
        errorMessage += " " + e.getCause().getMessage();
      } else {
        errorMessage += " " + e.getMessage();

      SQLState = error.getSQLState();
      downstreamError = e;
      console.printError(errorMessage, "\n"
          + org.apache.hadoop.util.StringUtils.stringifyException(e));
      return error.getErrorCode();
    } finally {
      perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.COMPILE);
View Full Code Here


Related Classes of org.apache.hadoop.hive.ql.log.PerfLogger

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact