Package eu.stratosphere.nephele.jobgraph

Source Code of eu.stratosphere.nephele.jobgraph.JobGraph

/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/

package eu.stratosphere.nephele.jobgraph;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
import java.util.Vector;

import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.core.fs.FSDataInputStream;
import eu.stratosphere.core.fs.FileStatus;
import eu.stratosphere.core.fs.FileSystem;
import eu.stratosphere.core.fs.Path;
import eu.stratosphere.core.io.IOReadableWritable;
import eu.stratosphere.core.io.StringRecord;
import eu.stratosphere.nephele.execution.librarycache.LibraryCacheManager;
import eu.stratosphere.util.ClassUtils;

/**
* A job graph represents an entire job in Nephele. A job graph must consists at least of one job vertex
* and must be acyclic.
*
*/
public class JobGraph implements IOReadableWritable {

  /**
   * List of input vertices included in this job graph.
   */
  private Map<JobVertexID, AbstractJobInputVertex> inputVertices = new HashMap<JobVertexID, AbstractJobInputVertex>();

  /**
   * List of output vertices included in this job graph.
   */
  private Map<JobVertexID, AbstractJobOutputVertex> outputVertices = new HashMap<JobVertexID, AbstractJobOutputVertex>();

  /**
   * List of task vertices included in this job graph.
   */
  private Map<JobVertexID, JobTaskVertex> taskVertices = new HashMap<JobVertexID, JobTaskVertex>();

  /**
   * ID of this job.
   */
  private JobID jobID;

  /**
   * Name of this job.
   */
  private String jobName;

  /**
   * The job configuration attached to this job.
   */
  private Configuration jobConfiguration = new Configuration();

  /**
   * The configuration which should be applied to the task managers involved in processing this job.
   */
  private final Configuration taskManagerConfiguration = new Configuration();

  /**
   * List of JAR files required to run this job.
   */
  private final ArrayList<Path> userJars = new ArrayList<Path>();

  /**
   * Size of the buffer to be allocated for transferring attached files.
   */
  private static final int BUFFERSIZE = 8192;

  /**
   * Constructs a new job graph with a random job ID.
   */
  public JobGraph() {
    this.jobID = new JobID();
  }

  /**
   * Constructs a new job graph with the given name and a random job ID.
   *
   * @param jobName
   *        the name for this job graph
   */
  public JobGraph(final String jobName) {
    this();
    this.jobName = jobName;
  }

  /**
   * Returns the name assigned to the job graph.
   *
   * @return the name assigned to the job graph
   */
  public String getName() {
    return this.jobName;
  }

  /**
   * Returns the configuration object for this job if it is set.
   *
   * @return the configuration object for this job, or <code>null</code> if it is not set
   */
  public Configuration getJobConfiguration() {

    return this.jobConfiguration;
  }

  /**
   * Returns the configuration object distributed among the task managers
   * before they start processing this job.
   *
   * @return the configuration object for the task managers, or <code>null</code> if it is not set
   */
  public Configuration getTaskmanagerConfiguration() {

    return this.taskManagerConfiguration;
  }

  /**
   * Adds a new input vertex to the job graph if it is not already included.
   *
   * @param inputVertex
   *        the new input vertex to be added
   */
  public void addVertex(final AbstractJobInputVertex inputVertex) {

    if (!inputVertices.containsKey(inputVertex.getID())) {
      inputVertices.put(inputVertex.getID(), inputVertex);
    }
  }

  /**
   * Adds a new task vertex to the job graph if it is not already included.
   *
   * @param taskVertex
   *        the new task vertex to be added
   */
  public void addVertex(final JobTaskVertex taskVertex) {

    if (!taskVertices.containsKey(taskVertex.getID())) {
      taskVertices.put(taskVertex.getID(), taskVertex);
    }
  }

  /**
   * Adds a new output vertex to the job graph if it is not already included.
   *
   * @param outputVertex
   *        the new output vertex to be added
   */
  public void addVertex(final AbstractJobOutputVertex outputVertex) {

    if (!outputVertices.containsKey(outputVertex.getID())) {
      outputVertices.put(outputVertex.getID(), outputVertex);
    }
  }

  /**
   * Returns the number of input vertices registered with the job graph.
   *
   * @return the number of input vertices registered with the job graph
   */
  public int getNumberOfInputVertices() {
    return this.inputVertices.size();
  }

  /**
   * Returns the number of output vertices registered with the job graph.
   *
   * @return the number of output vertices registered with the job graph
   */
  public int getNumberOfOutputVertices() {
    return this.outputVertices.size();
  }

  /**
   * Returns the number of task vertices registered with the job graph.
   *
   * @return the number of task vertices registered with the job graph
   */
  public int getNumberOfTaskVertices() {
    return this.taskVertices.size();
  }

  /**
   * Returns an iterator to iterate all input vertices registered with the job graph.
   *
   * @return an iterator to iterate all input vertices registered with the job graph
   */
  public Iterator<AbstractJobInputVertex> getInputVertices() {

    final Collection<AbstractJobInputVertex> coll = this.inputVertices.values();

    return coll.iterator();
  }

  /**
   * Returns an iterator to iterate all output vertices registered with the job graph.
   *
   * @return an iterator to iterate all output vertices registered with the job graph
   */
  public Iterator<AbstractJobOutputVertex> getOutputVertices() {

    final Collection<AbstractJobOutputVertex> coll = this.outputVertices.values();

    return coll.iterator();
  }

  /**
   * Returns an iterator to iterate all task vertices registered with the job graph.
   *
   * @return an iterator to iterate all task vertices registered with the job graph
   */
  public Iterator<JobTaskVertex> getTaskVertices() {

    final Collection<JobTaskVertex> coll = this.taskVertices.values();

    return coll.iterator();
  }

  /**
   * Returns the number of all job vertices registered with this job graph.
   *
   * @return the number of all job vertices registered with this job graph
   */
  public int getNumberOfVertices() {

    return this.inputVertices.size() + this.outputVertices.size() + this.taskVertices.size();
  }

  /**
   * Returns an array of all job vertices than can be reached when traversing the job graph from the input vertices.
   *
   * @return an array of all job vertices than can be reached when traversing the job graph from the input vertices
   */
  public AbstractJobVertex[] getAllReachableJobVertices() {

    final Vector<AbstractJobVertex> collector = new Vector<AbstractJobVertex>();
    collectVertices(null, collector);
    return collector.toArray(new AbstractJobVertex[0]);
  }

  /**
   * Returns an array of all job vertices that are registered with the job graph. The order in which the vertices
   * appear in the list is not defined.
   *
   * @return an array of all job vertices that are registered with the job graph
   */
  public AbstractJobVertex[] getAllJobVertices() {

    int i = 0;
    final AbstractJobVertex[] vertices = new AbstractJobVertex[inputVertices.size() + outputVertices.size()
      + taskVertices.size()];

    final Iterator<AbstractJobInputVertex> iv = getInputVertices();
    while (iv.hasNext()) {
      vertices[i++] = iv.next();
    }

    final Iterator<AbstractJobOutputVertex> ov = getOutputVertices();
    while (ov.hasNext()) {
      vertices[i++] = ov.next();
    }

    final Iterator<JobTaskVertex> tv = getTaskVertices();
    while (tv.hasNext()) {
      vertices[i++] = tv.next();
    }

    return vertices;
  }

  /**
   * Auxiliary method to collect all vertices which are reachable from the input vertices.
   *
   * @param jv
   *        the currently considered job vertex
   * @param collector
   *        a temporary list to store the vertices that have already been visisted
   */
  private void collectVertices(final AbstractJobVertex jv, final List<AbstractJobVertex> collector) {

    if (jv == null) {
      final Iterator<AbstractJobInputVertex> iter = getInputVertices();
      while (iter.hasNext()) {
        collectVertices(iter.next(), collector);
      }
    } else {

      if (!collector.contains(jv)) {
        collector.add(jv);
      } else {
        return;
      }

      for (int i = 0; i < jv.getNumberOfForwardConnections(); i++) {
        collectVertices(jv.getForwardConnection(i).getConnectedVertex(), collector);
      }
    }
  }

  /**
   * Returns the ID of the job.
   *
   * @return the ID of the job
   */
  public JobID getJobID() {
    return this.jobID;
  }

  /**
   * Searches for a vertex with a matching ID and returns it.
   *
   * @param id
   *        the ID of the vertex to search for
   * @return the vertex with the matching ID or <code>null</code> if no vertex with such ID could be found
   */
  public AbstractJobVertex findVertexByID(final JobVertexID id) {

    if (this.inputVertices.containsKey(id)) {
      return this.inputVertices.get(id);
    }

    if (this.outputVertices.containsKey(id)) {
      return this.outputVertices.get(id);
    }

    if (this.taskVertices.containsKey(id)) {
      return this.taskVertices.get(id);
    }

    return null;
  }

  /**
   * Checks if the job vertex with the given ID is registered with the job graph.
   *
   * @param id
   *        the ID of the vertex to search for
   * @return <code>true</code> if a vertex with the given ID is registered with the job graph, <code>false</code>
   *         otherwise.
   */
  private boolean includedInJobGraph(final JobVertexID id) {

    if (this.inputVertices.containsKey(id)) {
      return true;
    }

    if (this.outputVertices.containsKey(id)) {
      return true;
    }

    if (this.taskVertices.containsKey(id)) {
      return true;
    }

    return false;
  }

  /**
   * Checks if the job graph is weakly connected.
   *
   * @return <code>true</code> if the job graph is weakly connected, otherwise <code>false</code>
   */
  public boolean isWeaklyConnected() {

    final AbstractJobVertex[] reachable = getAllReachableJobVertices();
    final AbstractJobVertex[] all = getAllJobVertices();

    // Check if number if reachable vertices matches number of registered vertices
    if (reachable.length != all.length) {
      return false;
    }

    final HashMap<JobVertexID, AbstractJobVertex> tmp = new HashMap<JobVertexID, AbstractJobVertex>();
    for (int i = 0; i < reachable.length; i++) {
      tmp.put(reachable[i].getID(), reachable[i]);
    }

    // Check if all is subset of reachable
    for (int i = 0; i < all.length; i++) {
      if (!tmp.containsKey(all[i].getID())) {
        return false;
      }
    }

    // Check if reachable is a subset of all
    for (int i = 0; i < reachable.length; i++) {
      if (!includedInJobGraph(reachable[i].getID())) {
        return false;
      }
    }

    return true;
  }

  /**
   * Checks if the job graph is acyclic.
   *
   * @return <code>true</code> if the job graph is acyclic, <code>false</code> otherwise
   */
  public boolean isAcyclic() {

    // Tarjan's algorithm to detect strongly connected componenent of a graph
    final AbstractJobVertex[] reachable = getAllReachableJobVertices();
    final HashMap<AbstractJobVertex, Integer> indexMap = new HashMap<AbstractJobVertex, Integer>();
    final HashMap<AbstractJobVertex, Integer> lowLinkMap = new HashMap<AbstractJobVertex, Integer>();
    final Stack<AbstractJobVertex> stack = new Stack<AbstractJobVertex>();
    final Integer index = Integer.valueOf(0);

    for (int i = 0; i < reachable.length; i++) {
      if (!indexMap.containsKey(reachable[i])) {
        if (!tarjan(reachable[i], index, indexMap, lowLinkMap, stack)) {
          return false;
        }
      }
    }

    return true;
  }

  /**
   * Auxiliary method implementing Tarjan's algorithm for strongly-connected components to determine whether the job
   * graph is acyclic.
   */
  private boolean tarjan(final AbstractJobVertex jv, Integer index,
      final HashMap<AbstractJobVertex, Integer> indexMap, final HashMap<AbstractJobVertex, Integer> lowLinkMap,
      final Stack<AbstractJobVertex> stack) {

    indexMap.put(jv, Integer.valueOf(index));
    lowLinkMap.put(jv, Integer.valueOf(index));
    index = Integer.valueOf(index.intValue() + 1);
    stack.push(jv);

    for (int i = 0; i < jv.getNumberOfForwardConnections(); i++) {

      final AbstractJobVertex jv2 = jv.getForwardConnection(i).getConnectedVertex();
      if (!indexMap.containsKey(jv2) || stack.contains(jv2)) {
        if (!indexMap.containsKey(jv2)) {
          if (!tarjan(jv2, index, indexMap, lowLinkMap, stack)) {
            return false;
          }
        }
        if (lowLinkMap.get(jv) > lowLinkMap.get(jv2)) {
          lowLinkMap.put(jv, Integer.valueOf(lowLinkMap.get(jv2)));
        }
      }
    }

    if (lowLinkMap.get(jv).equals(indexMap.get(jv))) {

      int count = 0;
      while (stack.size() > 0) {
        final AbstractJobVertex jv2 = stack.pop();
        if (jv == jv2) {
          break;
        }

        count++;
      }

      if (count > 0) {
        return false;
      }
    }

    return true;
  }

  /**
   * Checks for all registered job vertices if their in-/out-degree is correct.
   *
   * @return <code>null</code> if the in-/out-degree of all vertices is correct or the first job vertex whose
   *         in-/out-degree is incorrect.
   */
  public AbstractJobVertex areVertexDegreesCorrect() {

    // Check input vertices
    final Iterator<AbstractJobInputVertex> iter = getInputVertices();
    while (iter.hasNext()) {

      final AbstractJobVertex jv = iter.next();

      if (jv.getNumberOfForwardConnections() < 1 || jv.getNumberOfBackwardConnections() > 0) {
        return jv;
      }
    }

    // Check task vertices
    final Iterator<JobTaskVertex> iter2 = getTaskVertices();
    while (iter2.hasNext()) {

      final AbstractJobVertex jv = iter2.next();

      if (jv.getNumberOfForwardConnections() < 1 || jv.getNumberOfBackwardConnections() < 1) {
        return jv;
      }
    }

    // Check output vertices
    final Iterator<AbstractJobOutputVertex> iter3 = getOutputVertices();
    while (iter3.hasNext()) {

      final AbstractJobVertex jv = iter3.next();

      if (jv.getNumberOfForwardConnections() > 0 || jv.getNumberOfBackwardConnections() < 1) {
        return jv;
      }
    }

    return null;
  }


  @Override
  public void read(final DataInput in) throws IOException {

    // Read job id
    this.jobID.read(in);

    // Read the job name
    this.jobName = StringRecord.readString(in);

    // Read required jar files
    readRequiredJarFiles(in);

    // First read total number of vertices;
    final int numVertices = in.readInt();

    // First, recreate each vertex and add it to reconstructionMap
    for (int i = 0; i < numVertices; i++) {
      final String className = StringRecord.readString(in);
      final JobVertexID id = new JobVertexID();
      id.read(in);
      final String vertexName = StringRecord.readString(in);

      Class<? extends IOReadableWritable> c;
      try {
        c = ClassUtils.getRecordByName(className);
      } catch (ClassNotFoundException cnfe) {
        throw new IOException(cnfe.toString());
      }

      // Find constructor
      Constructor<? extends IOReadableWritable> cst;
      try {
        cst = c.getConstructor(String.class, JobVertexID.class, JobGraph.class);
      } catch (SecurityException e1) {
        throw new IOException(e1.toString());
      } catch (NoSuchMethodException e1) {
        throw new IOException(e1.toString());
      }

      try {
        cst.newInstance(vertexName, id, this);
      } catch (IllegalArgumentException e) {
        throw new IOException(e.toString());
      } catch (InstantiationException e) {
        throw new IOException(e.toString());
      } catch (IllegalAccessException e) {
        throw new IOException(e.toString());
      } catch (InvocationTargetException e) {
        throw new IOException(e.toString());
      }
    }

    final JobVertexID tmpID = new JobVertexID();
    for (int i = 0; i < numVertices; i++) {

      AbstractJobVertex jv;

      tmpID.read(in);
      if (inputVertices.containsKey(tmpID)) {
        jv = inputVertices.get(tmpID);
      } else {
        if (outputVertices.containsKey(tmpID)) {
          jv = outputVertices.get(tmpID);
        } else {
          if (taskVertices.containsKey(tmpID)) {
            jv = taskVertices.get(tmpID);
          } else {
            throw new IOException("Cannot find vertex with ID " + tmpID + " in any vertex map.");
          }
        }
      }

      // Read the vertex data
      jv.read(in);
    }

    // Find the class loader for the job
    final ClassLoader cl = LibraryCacheManager.getClassLoader(this.jobID);
    if (cl == null) {
      throw new IOException("Cannot find class loader for job graph " + this.jobID);
    }

    // Re-instantiate the job configuration object and read the configuration
    this.jobConfiguration = new Configuration(cl);
    this.jobConfiguration.read(in);

    // Read the task manager configuration
    this.taskManagerConfiguration.read(in);
  }


  @Override
  public void write(final DataOutput out) throws IOException {

    // Write job ID
    this.jobID.write(out);

    // Write out job name
    StringRecord.writeString(out, this.jobName);

    final AbstractJobVertex[] allVertices = this.getAllJobVertices();

    // Write out all required jar files
    writeRequiredJarFiles(out, allVertices);

    // Write total number of vertices
    out.writeInt(allVertices.length);

    // First write out class name and id for every vertex
    for (int i = 0; i < allVertices.length; i++) {

      final String className = allVertices[i].getClass().getName();
      StringRecord.writeString(out, className);
      allVertices[i].getID().write(out);
      StringRecord.writeString(out, allVertices[i].getName());
    }

    // Now write out vertices themselves
    for (int i = 0; i < allVertices.length; i++) {
      allVertices[i].getID().write(out);
      allVertices[i].write(out);
    }

    // Write out configuration objects
    this.jobConfiguration.write(out);
    this.taskManagerConfiguration.write(out);
  }

  /**
   * Writes the JAR files of all vertices in array <code>jobVertices</code> to the specified output stream.
   *
   * @param out
   *        the output stream to write the JAR files to
   * @param jobVertices
   *        array of job vertices whose required JAR file are to be written to the output stream
   * @throws IOException
   *         thrown if an error occurs while writing to the stream
   */
  private void writeRequiredJarFiles(final DataOutput out, final AbstractJobVertex[] jobVertices) throws IOException {

    // Now check if all the collected jar files really exist
    final FileSystem fs = FileSystem.getLocalFileSystem();

    for (int i = 0; i < this.userJars.size(); i++) {
      if (!fs.exists(this.userJars.get(i))) {
        throw new IOException("Cannot find jar file " + this.userJars.get(i));
      }
    }

    // How many jar files follow?
    out.writeInt(this.userJars.size());

    for (int i = 0; i < this.userJars.size(); i++) {

      final Path jar = this.userJars.get(i);

      // Write out the actual path
      jar.write(out);

      // Write out the length of the file
      final FileStatus file = fs.getFileStatus(jar);
      out.writeLong(file.getLen());

      // Now write the jar file
      final FSDataInputStream inStream = fs.open(this.userJars.get(i));
      final byte[] buf = new byte[BUFFERSIZE];
      int read = inStream.read(buf, 0, buf.length);
      while (read > 0) {
        out.write(buf, 0, read);
        read = inStream.read(buf, 0, buf.length);
      }
    }
  }

  /**
   * Reads required JAR files from an input stream and adds them to the
   * library cache manager.
   *
   * @param in
   *        the data stream to read the JAR files from
   * @throws IOException
   *         thrown if an error occurs while reading the stream
   */
  private void readRequiredJarFiles(final DataInput in) throws IOException {

    // Do jar files follow;
    final int numJars = in.readInt();

    if (numJars > 0) {

      for (int i = 0; i < numJars; i++) {

        final Path p = new Path();
        p.read(in);
        this.userJars.add(p);

        // Read the size of the jar file
        final long sizeOfJar = in.readLong();

        // Add the jar to the library manager
        LibraryCacheManager.addLibrary(this.jobID, p, sizeOfJar, in);
      }

    }

    // Register this job with the library cache manager
    LibraryCacheManager.register(this.jobID, this.userJars.toArray(new Path[0]));
  }

  /**
   * Adds the path of a JAR file required to run the job on a task manager.
   *
   * @param jar
   *        path of the JAR file required to run the job on a task manager
   */
  public void addJar(final Path jar) {

    if (jar == null) {
      return;
    }

    if (!userJars.contains(jar)) {
      userJars.add(jar);
    }
  }

  /**
   * Returns a (possibly empty) array of paths to JAR files which are required to run the job on a task manager.
   *
   * @return a (possibly empty) array of paths to JAR files which are required to run the job on a task manager
   */
  public Path[] getJars() {

    return userJars.toArray(new Path[userJars.size()]);
  }

  /**
   * Checks if any vertex of this job graph has an outgoing edge which is set to <code>null</code>. If this is the
   * case the respective vertex is returned.
   *
   * @return the vertex which has an outgoing edge set to <code>null</code> or <code>null</code> if no such vertex
   *         exists
   */
  public AbstractJobVertex findVertexWithNullEdges() {

    final AbstractJobVertex[] allVertices = getAllJobVertices();

    for (int i = 0; i < allVertices.length; i++) {

      for (int j = 0; j < allVertices[i].getNumberOfForwardConnections(); j++) {
        if (allVertices[i].getForwardConnection(j) == null) {
          return allVertices[i];
        }
      }

      for (int j = 0; j < allVertices[i].getNumberOfBackwardConnections(); j++) {
        if (allVertices[i].getBackwardConnection(j) == null) {
          return allVertices[i];
        }
      }
    }

    return null;
  }

  /**
   * Checks if the instance dependency chain created with the <code>setVertexToShareInstancesWith</code> method is
   * acyclic.
   *
   * @return <code>true</code> if the dependency chain is acyclic, <code>false</code> otherwise
   */
  public boolean isInstanceDependencyChainAcyclic() {

    final AbstractJobVertex[] allVertices = this.getAllJobVertices();
    final Set<AbstractJobVertex> alreadyVisited = new HashSet<AbstractJobVertex>();

    for (AbstractJobVertex vertex : allVertices) {

      if (alreadyVisited.contains(vertex)) {
        continue;
      }

      AbstractJobVertex vertexToShareInstancesWith = vertex.getVertexToShareInstancesWith();
      if (vertexToShareInstancesWith != null) {

        final Set<AbstractJobVertex> cycleMap = new HashSet<AbstractJobVertex>();

        while (vertexToShareInstancesWith != null) {

          if (cycleMap.contains(vertexToShareInstancesWith)) {
            return false;
          } else {
            alreadyVisited.add(vertexToShareInstancesWith);
            cycleMap.add(vertexToShareInstancesWith);
            vertexToShareInstancesWith = vertexToShareInstancesWith.getVertexToShareInstancesWith();
          }
        }
      }
    }

    return true;
  }
}
TOP

Related Classes of eu.stratosphere.nephele.jobgraph.JobGraph

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.