Package org.apache.hadoop.chukwa.inputtools.mdl

Source Code of org.apache.hadoop.chukwa.inputtools.mdl.TorqueInfoProcessor

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.chukwa.inputtools.mdl;


import java.sql.SQLException;
import java.sql.ResultSet;
import java.lang.Exception;
import java.util.Calendar;
import java.util.Set;
import java.util.TreeSet;
import java.util.TreeMap;
import java.util.Iterator;
import java.lang.StringBuffer;
import java.sql.Timestamp;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.lang.Thread;
import java.util.Timer;
import java.lang.ProcessBuilder;
import java.lang.Process;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.lang.InterruptedException;
import java.lang.System;
import java.util.Date;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.chukwa.inputtools.mdl.DataConfig;
import org.apache.hadoop.chukwa.inputtools.mdl.TorqueTimerTask;
import org.apache.hadoop.chukwa.inputtools.mdl.ErStreamHandler;
import org.apache.hadoop.chukwa.util.DatabaseWriter;

public class TorqueInfoProcessor {

  private static Log log = LogFactory.getLog(TorqueInfoProcessor.class);

  private int intervalValue = 60;
  private String torqueServer = null;
  private String torqueBinDir = null;
  private String domain = null;

  private TreeMap<String, TreeMap<String, String>> currentHodJobs;

  public TorqueInfoProcessor(DataConfig mdlConfig, int interval) {
    this.intervalValue = interval;

    torqueServer = System.getProperty("TORQUE_SERVER");
    torqueBinDir = System.getProperty("TORQUE_HOME") + File.separator + "bin";
    domain = System.getProperty("DOMAIN");
    currentHodJobs = new TreeMap<String, TreeMap<String, String>>();
  }

  public void setup(boolean recover) throws Exception {
  }

  private void getHodJobInfo() throws IOException {
    StringBuffer sb = new StringBuffer();
    sb.append(torqueBinDir).append("/qstat -a");

    String[] getQueueInfoCommand = new String[3];
    getQueueInfoCommand[0] = "ssh";
    getQueueInfoCommand[1] = torqueServer;
    getQueueInfoCommand[2] = sb.toString();

    String command = getQueueInfoCommand[0] + " " + getQueueInfoCommand[1]
        + " " + getQueueInfoCommand[2];
    ProcessBuilder pb = new ProcessBuilder(getQueueInfoCommand);

    Process p = pb.start();

    Timer timeout = new Timer();
    TorqueTimerTask torqueTimerTask = new TorqueTimerTask(p, command);
    timeout.schedule(torqueTimerTask, TorqueTimerTask.timeoutInterval * 1000);

    BufferedReader result = new BufferedReader(new InputStreamReader(p
        .getInputStream()));
    ErStreamHandler errorHandler = new ErStreamHandler(p.getErrorStream(),
        command, true);
    errorHandler.start();

    String line = null;
    boolean start = false;
    TreeSet<String> jobsInTorque = new TreeSet<String>();
    while ((line = result.readLine()) != null) {
      if (line.startsWith("---")) {
        start = true;
        continue;
      }

      if (start) {
        String[] items = line.split("\\s+");
        if (items.length >= 10) {
          String hodIdLong = items[0];
          String hodId = hodIdLong.split("[.]")[0];
          String userId = items[1];
          String numOfMachine = items[5];
          String status = items[9];
          jobsInTorque.add(hodId);
          if (!currentHodJobs.containsKey(hodId)) {
            TreeMap<String, String> aJobData = new TreeMap<String, String>();

            aJobData.put("userId", userId);
            aJobData.put("numOfMachine", numOfMachine);
            aJobData.put("traceCheckCount", "0");
            aJobData.put("process", "0");
            aJobData.put("status", status);
            currentHodJobs.put(hodId, aJobData);
          } else {
            TreeMap<String, String> aJobData = currentHodJobs.get(hodId);
            aJobData.put("status", status);
            currentHodJobs.put(hodId, aJobData);
          }// if..else
        }
      }
    }// while

    try {
      errorHandler.join();
    } catch (InterruptedException ie) {
      log.error(ie.getMessage());
    }
    timeout.cancel();

    Set<String> currentHodJobIds = currentHodJobs.keySet();
    Iterator<String> currentHodJobIdsIt = currentHodJobIds.iterator();
    TreeSet<String> finishedHodIds = new TreeSet<String>();
    while (currentHodJobIdsIt.hasNext()) {
      String hodId = currentHodJobIdsIt.next();
      if (!jobsInTorque.contains(hodId)) {
        TreeMap<String, String> aJobData = currentHodJobs.get(hodId);
        String process = aJobData.get("process");
        if (process.equals("0") || process.equals("1")) {
          aJobData.put("status", "C");
        } else {
          finishedHodIds.add(hodId);
        }
      }
    }// while

    Iterator<String> finishedHodIdsIt = finishedHodIds.iterator();
    while (finishedHodIdsIt.hasNext()) {
      String hodId = finishedHodIdsIt.next();
      currentHodJobs.remove(hodId);
    }

  }

  private boolean loadQstatData(String hodId) throws IOException, SQLException {
    TreeMap<String, String> aJobData = currentHodJobs.get(hodId);
    String userId = aJobData.get("userId");

    StringBuffer sb = new StringBuffer();
    sb.append(torqueBinDir).append("/qstat -f -1 ").append(hodId);
    String[] qstatCommand = new String[3];
    qstatCommand[0] = "ssh";
    qstatCommand[1] = torqueServer;
    qstatCommand[2] = sb.toString();

    String command = qstatCommand[0] + " " + qstatCommand[1] + " "
        + qstatCommand[2];
    ProcessBuilder pb = new ProcessBuilder(qstatCommand);
    Process p = pb.start();

    Timer timeout = new Timer();
    TorqueTimerTask torqueTimerTask = new TorqueTimerTask(p, command);
    timeout.schedule(torqueTimerTask, TorqueTimerTask.timeoutInterval * 1000);

    BufferedReader result = new BufferedReader(new InputStreamReader(p
        .getInputStream()));
    ErStreamHandler errorHandler = new ErStreamHandler(p.getErrorStream(),
        command, false);
    errorHandler.start();
    String line = null;
    String hosts = null;
    long startTimeValue = -1;
    long endTimeValue = Calendar.getInstance().getTimeInMillis();
    long executeTimeValue = Calendar.getInstance().getTimeInMillis();
    boolean qstatfinished;

    while ((line = result.readLine()) != null) {
      if (line.indexOf("ctime") >= 0) {
        String startTime = line.split("=")[1].trim();
        // Tue Sep 9 23:44:29 2008
        SimpleDateFormat sdf = new SimpleDateFormat("EEE MMM d HH:mm:ss yyyy");
        Date startTimeDate;
        try {
          startTimeDate = sdf.parse(startTime);
          startTimeValue = startTimeDate.getTime();
        } catch (ParseException e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
        }

      }
      if (line.indexOf("mtime") >= 0) {
        String endTime = line.split("=")[1].trim();
        SimpleDateFormat sdf = new SimpleDateFormat("EEE MMM d HH:mm:ss yyyy");
        Date endTimeDate;
        try {
          endTimeDate = sdf.parse(endTime);
          endTimeValue = endTimeDate.getTime();
        } catch (ParseException e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
        }

      }
      if (line.indexOf("etime") >= 0) {
        String executeTime = line.split("=")[1].trim();
        SimpleDateFormat sdf = new SimpleDateFormat("EEE MMM d HH:mm:ss yyyy");
        Date executeTimeDate;
        try {
          executeTimeDate = sdf.parse(executeTime);
          executeTimeValue = executeTimeDate.getTime();
        } catch (ParseException e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
        }

      }
      if (line.indexOf("exec_host") >= 0) {
        hosts = line.split("=")[1].trim();
      }
    }

    if (hosts != null && startTimeValue >= 0) {
      String[] items2 = hosts.split("[+]");
      int num = 0;
      for (int i = 0; i < items2.length; i++) {
        String machinetmp = items2[i];
        if (machinetmp.length() > 3) {
          String machine = items2[i].substring(0, items2[i].length() - 2);
          StringBuffer data = new StringBuffer();
          data.append("HodId=").append(hodId);
          data.append(", Machine=").append(machine);
          if (domain != null) {
            data.append(".").append(domain);
          }
          log.info(data);
          num++;
        }
      }
      Timestamp startTimedb = new Timestamp(startTimeValue);
      Timestamp endTimedb = new Timestamp(endTimeValue);
      StringBuffer data = new StringBuffer();
      long timeQueued = executeTimeValue - startTimeValue;
      data.append("HodID=").append(hodId);
      data.append(", UserId=").append(userId);
      data.append(", StartTime=").append(startTimedb);
      data.append(", TimeQueued=").append(timeQueued);
      data.append(", NumOfMachines=").append(num);
      data.append(", EndTime=").append(endTimedb);
      log.info(data);
      qstatfinished = true;

    } else {

      qstatfinished = false;
    }

    try {
      errorHandler.join();
    } catch (InterruptedException ie) {
      log.error(ie.getMessage());
    }
    result.close();
    timeout.cancel();

    return qstatfinished;
  }

  private boolean loadTraceJobData(String hodId) throws IOException,
      SQLException {
    TreeMap<String, String> aJobData = currentHodJobs.get(hodId);
    String userId = aJobData.get("userId");
    String process = aJobData.get("process");

    StringBuffer sb = new StringBuffer();
    sb.append(torqueBinDir).append("/tracejob -n 10 -l -m -s ").append(hodId);
    String[] traceJobCommand = new String[3];
    traceJobCommand[0] = "ssh";
    traceJobCommand[1] = torqueServer;
    traceJobCommand[2] = sb.toString();

    String command = traceJobCommand[0] + " " + traceJobCommand[1] + " "
        + traceJobCommand[2];
    ProcessBuilder pb = new ProcessBuilder(traceJobCommand);

    Process p = pb.start();

    Timer timeout = new Timer();
    TorqueTimerTask torqueTimerTask = new TorqueTimerTask(p, command);
    timeout.schedule(torqueTimerTask, TorqueTimerTask.timeoutInterval * 1000);

    BufferedReader result = new BufferedReader(new InputStreamReader(p
        .getInputStream()));
    ErStreamHandler errorHandler = new ErStreamHandler(p.getErrorStream(),
        command, false);
    errorHandler.start();
    String line = null;
    String exit_status = null;
    String hosts = null;
    long timeQueued = -1;
    long startTimeValue = -1;
    long endTimeValue = -1;
    boolean findResult = false;

    while ((line = result.readLine()) != null && !findResult) {
      if (line.indexOf("end") >= 0 && line.indexOf("Exit_status") >= 0
          && line.indexOf("qtime") >= 0) {
        TreeMap<String, String> jobData = new TreeMap<String, String>();
        String[] items = line.split("\\s+");
        for (int i = 0; i < items.length; i++) {
          String[] items2 = items[i].split("=");
          if (items2.length >= 2) {
            jobData.put(items2[0], items2[1]);
          }

        }
        String startTime = jobData.get("ctime");
        startTimeValue = Long.valueOf(startTime);
        startTimeValue = startTimeValue - startTimeValue % (60);
        Timestamp startTimedb = new Timestamp(startTimeValue * 1000);

        String queueTime = jobData.get("qtime");
        long queueTimeValue = Long.valueOf(queueTime);

        String sTime = jobData.get("start");
        long sTimeValue = Long.valueOf(sTime);

        timeQueued = sTimeValue - queueTimeValue;

        String endTime = jobData.get("end");
        endTimeValue = Long.valueOf(endTime);
        endTimeValue = endTimeValue - endTimeValue % (60);
        Timestamp endTimedb = new Timestamp(endTimeValue * 1000);

        exit_status = jobData.get("Exit_status");
        hosts = jobData.get("exec_host");
        String[] items2 = hosts.split("[+]");
        int num = 0;
        for (int i = 0; i < items2.length; i++) {
          String machinetemp = items2[i];
          if (machinetemp.length() >= 3) {
            String machine = items2[i].substring(0, items2[i].length() - 2);
            StringBuffer data = new StringBuffer();
            data.append("HodId=").append(hodId);
            data.append(", Machine=").append(machine);
            if (domain != null) {
              data.append(".").append(domain);
            }
            log.info(data.toString());
            num++;
          }
        }

        StringBuffer data = new StringBuffer();
        data.append("HodID=").append(hodId);
        data.append(", UserId=").append(userId);
        data.append(", Status=").append(exit_status);
        data.append(", TimeQueued=").append(timeQueued);
        data.append(", StartTime=").append(startTimedb);
        data.append(", EndTime=").append(endTimedb);
        data.append(", NumOfMachines=").append(num);
        log.info(data.toString());
        findResult = true;
        log.debug(" hod info for job " + hodId + " has been loaded ");
      }// if

    }// while

    try {
      errorHandler.join();
    } catch (InterruptedException ie) {
      log.error(ie.getMessage());
    }

    timeout.cancel();
    boolean tracedone = false;
    if (!findResult) {

      String traceCheckCount = aJobData.get("traceCheckCount");
      int traceCheckCountValue = Integer.valueOf(traceCheckCount);
      traceCheckCountValue = traceCheckCountValue + 1;
      aJobData.put("traceCheckCount", String.valueOf(traceCheckCountValue));

      log.debug("did not find tracejob info for job " + hodId + ", after "
          + traceCheckCountValue + " times checking");
      if (traceCheckCountValue >= 2) {
        tracedone = true;
      }
    }
    boolean finished = findResult | tracedone;
    return finished;
  }

  private void process_data() throws SQLException {

    long currentTime = System.currentTimeMillis();
    currentTime = currentTime - currentTime % (60 * 1000);
    Timestamp timestamp = new Timestamp(currentTime);

    Set<String> hodIds = currentHodJobs.keySet();

    Iterator<String> hodIdsIt = hodIds.iterator();
    while (hodIdsIt.hasNext()) {
      String hodId = hodIdsIt.next();
      TreeMap<String, String> aJobData = currentHodJobs.get(hodId);
      String status = aJobData.get("status");
      String process = aJobData.get("process");
      if (process.equals("0") && (status.equals("R") || status.equals("E"))) {
        try {
          boolean result = loadQstatData(hodId);
          if (result) {
            aJobData.put("process", "1");
            currentHodJobs.put(hodId, aJobData);
          }
        } catch (IOException ioe) {
          log.error("load qsat data Error:" + ioe.getMessage());

        }
      }
      if (!process.equals("2") && status.equals("C")) {
        try {
          boolean result = loadTraceJobData(hodId);

          if (result) {
            aJobData.put("process", "2");
            currentHodJobs.put(hodId, aJobData);
          }
        } catch (IOException ioe) {
          log.error("loadTraceJobData Error:" + ioe.getMessage());
        }
      }// if

    } // while

  }

  private void handle_jobData() throws SQLException {
    try {
      getHodJobInfo();
    } catch (IOException ex) {
      log.error("getQueueInfo Error:" + ex.getMessage());
      return;
    }
    try {
      process_data();
    } catch (SQLException ex) {
      log.error("process_data Error:" + ex.getMessage());
      throw ex;
    }
  }

  public void run_forever() throws SQLException {
    while (true) {
      handle_jobData();
      try {
        log.debug("sleeping ...");
        Thread.sleep(this.intervalValue * 1000);
      } catch (InterruptedException e) {
        log.error(e.getMessage());
      }
    }
  }

  public void shutdown() {
  }
}
TOP

Related Classes of org.apache.hadoop.chukwa.inputtools.mdl.TorqueInfoProcessor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.