Package org.apache.hadoop.hive.ql.exec.tez

Source Code of org.apache.hadoop.hive.ql.exec.tez.TezJobMonitor

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.hive.ql.exec.tez;

import static org.apache.tez.dag.api.client.DAGStatus.State.RUNNING;

import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.Heartbeater;
import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager;
import org.apache.hadoop.hive.ql.log.PerfLogger;
import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.tez.dag.api.TezException;
import org.apache.tez.dag.api.client.DAGClient;
import org.apache.tez.dag.api.client.DAGStatus;
import org.apache.tez.dag.api.client.Progress;
import org.apache.tez.dag.api.client.StatusGetOpts;

/**
* TezJobMonitor keeps track of a tez job while it's being executed. It will
* print status to the console and retrieve final status of the job after
* completion.
*/
public class TezJobMonitor {

  private static final Log LOG = LogFactory.getLog(TezJobMonitor.class.getName());
  private static final String CLASS_NAME = TezJobMonitor.class.getName();

  private transient LogHelper console;
  private final PerfLogger perfLogger = PerfLogger.getPerfLogger();
  private final int checkInterval = 200;
  private final int maxRetryInterval = 2500;
  private final int printInterval = 3000;
  private long lastPrintTime;
  private Set<String> completed;
  private static final List<DAGClient> shutdownList;

  static {
    shutdownList = Collections.synchronizedList(new LinkedList<DAGClient>());
    Runtime.getRuntime().addShutdownHook(new Thread() {
      @Override
      public void run() {
        for (DAGClient c: shutdownList) {
          try {
            System.err.println("Trying to shutdown DAG");
            c.tryKillDAG();
          } catch (Exception e) {
            // ignore
          }
        }
        try {
          for (TezSessionState s: TezSessionState.getOpenSessions()) {
            System.err.println("Shutting down tez session.");
            s.close(false);
          }
        } catch (Exception e) {
          // ignore
        }
      }
    });
  }

  public TezJobMonitor() {
    console = new LogHelper(LOG);
  }

  /**
   * monitorExecution handles status printing, failures during execution and final
   * status retrieval.
   *
   * @param dagClient client that was used to kick off the job
   * @param txnMgr transaction manager for this operation
   * @param conf configuration file for this operation
   * @return int 0 - success, 1 - killed, 2 - failed
   */
  public int monitorExecution(final DAGClient dagClient, HiveTxnManager txnMgr,
                              HiveConf conf) throws InterruptedException {
    DAGStatus status = null;
    completed = new HashSet<String>();

    boolean running = false;
    boolean done = false;
    int failedCounter = 0;
    int rc = 0;
    DAGStatus.State lastState = null;
    String lastReport = null;
    Set<StatusGetOpts> opts = new HashSet<StatusGetOpts>();
    Heartbeater heartbeater = new Heartbeater(txnMgr, conf);

    shutdownList.add(dagClient);

    console.printInfo("\n");
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_RUN_DAG);
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_SUBMIT_TO_RUNNING);

    while(true) {

      try {
        status = dagClient.getDAGStatus(opts);
        Map<String, Progress> progressMap = status.getVertexProgress();
        DAGStatus.State state = status.getState();
        heartbeater.heartbeat();

        if (state != lastState || state == RUNNING) {
          lastState = state;

          switch(state) {
          case SUBMITTED:
            console.printInfo("Status: Submitted");
            break;
          case INITING:
            console.printInfo("Status: Initializing");
            break;
          case RUNNING:
            if (!running) {
              perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_SUBMIT_TO_RUNNING);
              console.printInfo("Status: Running (application id: "
                +dagClient.getApplicationId()+")\n");
              for (String s: progressMap.keySet()) {
                perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_RUN_VERTEX + s);
              }
              running = true;
            }

            lastReport = printStatus(progressMap, lastReport, console);
            break;
          case SUCCEEDED:
            lastReport = printStatus(progressMap, lastReport, console);
            console.printInfo("Status: Finished successfully");
            running = false;
            done = true;
            break;
          case KILLED:
            console.printInfo("Status: Killed");
            running = false;
            done = true;
            rc = 1;
            break;
          case FAILED:
          case ERROR:
            console.printError("Status: Failed");
            running = false;
            done = true;
            rc = 2;
            break;
          }
        }
        if (!done) {
          Thread.sleep(checkInterval);
        }
      } catch (Exception e) {
        console.printInfo("Exception: "+e.getMessage());
        if (++failedCounter % maxRetryInterval/checkInterval == 0
            || e instanceof InterruptedException) {
          try {
            console.printInfo("Killing DAG...");
            dagClient.tryKillDAG();
          } catch(IOException io) {
            // best effort
          } catch(TezException te) {
            // best effort
          }
          e.printStackTrace();
          console.printError("Execution has failed.");
          rc = 1;
          done = true;
        } else {
          console.printInfo("Retrying...");
        }
      } finally {
        if (done) {
          if (rc != 0 && status != null) {
            for (String diag: status.getDiagnostics()) {
              console.printError(diag);
            }
          }
          shutdownList.remove(dagClient);
          break;
        }
      }
    }
    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_RUN_DAG);
    return rc;
  }

  private String printStatus(Map<String, Progress> progressMap, String lastReport, LogHelper console) {
    StringBuffer reportBuffer = new StringBuffer();

    SortedSet<String> keys = new TreeSet<String>(progressMap.keySet());
    for (String s: keys) {
      Progress progress = progressMap.get(s);
      int complete = progress.getSucceededTaskCount();
      int total = progress.getTotalTaskCount();
      if (total <= 0) {
        reportBuffer.append(String.format("%s: -/-\t", s, complete, total));
      } else {
        if (complete == total && !completed.contains(s)) {
          completed.add(s);
          perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_RUN_VERTEX + s);
        }
        reportBuffer.append(String.format("%s: %d/%d\t", s, complete, total));
      }
    }

    String report = reportBuffer.toString();
    if (!report.equals(lastReport) || System.currentTimeMillis() >= lastPrintTime + printInterval) {
      console.printInfo(report);
      lastPrintTime = System.currentTimeMillis();
    }

    return report;
  }
}
TOP

Related Classes of org.apache.hadoop.hive.ql.exec.tez.TezJobMonitor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.