/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/
package eu.stratosphere.nephele.executiongraph;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.configuration.IllegalConfigurationException;
import eu.stratosphere.core.io.InputSplit;
import eu.stratosphere.nephele.execution.ExecutionListener;
import eu.stratosphere.nephele.execution.ExecutionState;
import eu.stratosphere.nephele.instance.AllocatedResource;
import eu.stratosphere.nephele.instance.DummyInstance;
import eu.stratosphere.nephele.instance.InstanceManager;
import eu.stratosphere.nephele.instance.InstanceType;
import eu.stratosphere.nephele.jobgraph.DistributionPattern;
import eu.stratosphere.runtime.io.gates.GateID;
import eu.stratosphere.runtime.io.channels.ChannelID;
import eu.stratosphere.runtime.io.channels.ChannelType;
import eu.stratosphere.nephele.jobgraph.AbstractJobInputVertex;
import eu.stratosphere.nephele.jobgraph.AbstractJobVertex;
import eu.stratosphere.nephele.jobgraph.JobEdge;
import eu.stratosphere.nephele.jobgraph.JobFileOutputVertex;
import eu.stratosphere.nephele.jobgraph.JobGraph;
import eu.stratosphere.nephele.jobgraph.JobID;
import eu.stratosphere.nephele.taskmanager.ExecutorThreadFactory;
import eu.stratosphere.nephele.template.AbstractInputTask;
import eu.stratosphere.nephele.template.AbstractInvokable;
import eu.stratosphere.util.StringUtils;
/**
* In Nephele an execution graph is the main data structure for scheduling, executing and
* observing a job. An execution graph is created from an job graph. In contrast to a job graph
* it can contain communication edges of specific types, sub groups of vertices and information on
* when and where (on which instance) to run particular tasks.
* <p>
* This class is thread-safe.
*
*/
public class ExecutionGraph implements ExecutionListener {
/**
* The log object used for debugging.
*/
private static final Log LOG = LogFactory.getLog(ExecutionGraph.class);
/**
* The ID of the job this graph has been built for.
*/
private final JobID jobID;
/**
* The name of the original job graph.
*/
private final String jobName;
/**
* Mapping of vertex IDs to vertices.
*/
private final ConcurrentMap<ExecutionVertexID, ExecutionVertex> vertexMap = new ConcurrentHashMap<ExecutionVertexID, ExecutionVertex>(
1024);
/**
* Mapping of channel IDs to edges.
*/
private final ConcurrentMap<ChannelID, ExecutionEdge> edgeMap = new ConcurrentHashMap<ChannelID, ExecutionEdge>(
1024 * 1024);
/**
* List of stages in the graph.
*/
private final CopyOnWriteArrayList<ExecutionStage> stages = new CopyOnWriteArrayList<ExecutionStage>();
/**
* The executor service to asynchronously perform update operations to this graph.
*/
private final ExecutorService executorService = Executors.newSingleThreadExecutor(ExecutorThreadFactory.INSTANCE);
/**
* Index to the current execution stage.
*/
private volatile int indexToCurrentExecutionStage = 0;
/**
* The job configuration that was originally attached to the JobGraph.
*/
private final Configuration jobConfiguration;
/**
* The current status of the job which is represented by this execution graph.
*/
private final AtomicReference<InternalJobStatus> jobStatus = new AtomicReference<InternalJobStatus>(
InternalJobStatus.CREATED);
/**
* The error description of the first task which causes this job to fail.
*/
private volatile String errorDescription = null;
/**
* List of listeners which are notified in case the status of this job has changed.
*/
private final CopyOnWriteArrayList<JobStatusListener> jobStatusListeners = new CopyOnWriteArrayList<JobStatusListener>();
/**
* List of listeners which are notified in case the execution stage of a job has changed.
*/
private final CopyOnWriteArrayList<ExecutionStageListener> executionStageListeners = new CopyOnWriteArrayList<ExecutionStageListener>();
/**
* Private constructor used for duplicating execution vertices.
*
* @param jobID
* the ID of the duplicated execution graph
* @param jobName
* the name of the original job graph
* @param jobConfiguration
* the configuration originally attached to the job graph
*/
private ExecutionGraph(final JobID jobID, final String jobName, final Configuration jobConfiguration) {
if (jobID == null) {
throw new IllegalArgumentException("Argument jobID must not be null");
}
this.jobID = jobID;
this.jobName = jobName;
this.jobConfiguration = jobConfiguration;
}
/**
* Creates a new execution graph from a job graph.
*
* @param job
* the user's job graph
* @param instanceManager
* the instance manager
* @throws GraphConversionException
* thrown if the job graph is not valid and no execution graph can be constructed from it
*/
public ExecutionGraph(final JobGraph job, final InstanceManager instanceManager)
throws GraphConversionException {
this(job.getJobID(), job.getName(), job.getJobConfiguration());
// Start constructing the new execution graph from given job graph
try {
constructExecutionGraph(job, instanceManager);
} catch (GraphConversionException e) {
throw e; // forward graph conversion exceptions
} catch (Exception e) {
throw new GraphConversionException(StringUtils.stringifyException(e));
}
}
/**
* Applies the user defined settings to the execution graph.
*
* @param temporaryGroupVertexMap
* mapping between job vertices and the corresponding group vertices.
* @throws GraphConversionException
* thrown if an error occurs while applying the user settings.
*/
private void applyUserDefinedSettings(final HashMap<AbstractJobVertex, ExecutionGroupVertex> temporaryGroupVertexMap)
throws GraphConversionException {
// The check for cycles in the dependency chain for instance sharing is already checked in
// <code>submitJob</code> method of the job manager
// If there is no cycle, apply the settings to the corresponding group vertices
final Iterator<Map.Entry<AbstractJobVertex, ExecutionGroupVertex>> it = temporaryGroupVertexMap.entrySet()
.iterator();
while (it.hasNext()) {
final Map.Entry<AbstractJobVertex, ExecutionGroupVertex> entry = it.next();
final AbstractJobVertex jobVertex = entry.getKey();
if (jobVertex.getVertexToShareInstancesWith() != null) {
final AbstractJobVertex vertexToShareInstancesWith = jobVertex.getVertexToShareInstancesWith();
final ExecutionGroupVertex groupVertex = entry.getValue();
final ExecutionGroupVertex groupVertexToShareInstancesWith = temporaryGroupVertexMap
.get(vertexToShareInstancesWith);
groupVertex.shareInstancesWith(groupVertexToShareInstancesWith);
}
}
// Second, we create the number of execution vertices each group vertex is supposed to manage
Iterator<ExecutionGroupVertex> it2 = new ExecutionGroupVertexIterator(this, true, -1);
while (it2.hasNext()) {
final ExecutionGroupVertex groupVertex = it2.next();
if (groupVertex.isNumberOfMembersUserDefined()) {
groupVertex.createInitialExecutionVertices(groupVertex.getUserDefinedNumberOfMembers());
groupVertex.repairSubtasksPerInstance();
}
}
// Finally, apply the channel settings channel settings
it2 = new ExecutionGroupVertexIterator(this, true, -1);
while (it2.hasNext()) {
final ExecutionGroupVertex groupVertex = it2.next();
for (int i = 0; i < groupVertex.getNumberOfForwardLinks(); i++) {
final ExecutionGroupEdge edge = groupVertex.getForwardEdge(i);
if (edge.isChannelTypeUserDefined()) {
edge.changeChannelType(edge.getChannelType());
}
// Create edges between execution vertices
createExecutionEdgesForGroupEdge(edge);
}
}
// Repair the instance assignment after having changed the channel types
repairInstanceAssignment();
// Repair the instance sharing among different group vertices
repairInstanceSharing();
// Finally, repair the stages
repairStages();
}
/**
* Sets up an execution graph from a job graph.
*
* @param jobGraph
* the job graph to create the execution graph from
* @param instanceManager
* the instance manager
* @throws GraphConversionException
* thrown if the job graph is not valid and no execution graph can be constructed from it
*/
private void constructExecutionGraph(final JobGraph jobGraph, final InstanceManager instanceManager)
throws GraphConversionException {
// Clean up temporary data structures
final HashMap<AbstractJobVertex, ExecutionVertex> temporaryVertexMap = new HashMap<AbstractJobVertex, ExecutionVertex>();
final HashMap<AbstractJobVertex, ExecutionGroupVertex> temporaryGroupVertexMap = new HashMap<AbstractJobVertex, ExecutionGroupVertex>();
// Initially, create only one execution stage that contains all group vertices
final ExecutionStage initialExecutionStage = new ExecutionStage(this, 0);
this.stages.add(initialExecutionStage);
// Convert job vertices to execution vertices and initialize them
final AbstractJobVertex[] all = jobGraph.getAllJobVertices();
for (int i = 0; i < all.length; i++) {
final ExecutionVertex createdVertex = createVertex(all[i], instanceManager, initialExecutionStage,
jobGraph.getJobConfiguration());
temporaryVertexMap.put(all[i], createdVertex);
temporaryGroupVertexMap.put(all[i], createdVertex.getGroupVertex());
}
// Create initial edges between the vertices
createInitialGroupEdges(temporaryVertexMap);
// Now that an initial graph is built, apply the user settings
applyUserDefinedSettings(temporaryGroupVertexMap);
// Calculate the connection IDs
calculateConnectionIDs();
// Finally, construct the execution pipelines
reconstructExecutionPipelines();
}
private void createExecutionEdgesForGroupEdge(final ExecutionGroupEdge groupEdge) {
final ExecutionGroupVertex source = groupEdge.getSourceVertex();
final int indexOfOutputGate = groupEdge.getIndexOfOutputGate();
final ExecutionGroupVertex target = groupEdge.getTargetVertex();
final int indexOfInputGate = groupEdge.getIndexOfInputGate();
final Map<GateID, List<ExecutionEdge>> inputChannelMap = new HashMap<GateID, List<ExecutionEdge>>();
// Unwire the respective gate of the source vertices
final int currentNumberOfSourceNodes = source.getCurrentNumberOfGroupMembers();
for (int i = 0; i < currentNumberOfSourceNodes; ++i) {
final ExecutionVertex sourceVertex = source.getGroupMember(i);
final ExecutionGate outputGate = sourceVertex.getOutputGate(indexOfOutputGate);
if (outputGate == null) {
throw new IllegalStateException("wire: " + sourceVertex.getName()
+ " has no output gate with index " + indexOfOutputGate);
}
if (outputGate.getNumberOfEdges() > 0) {
throw new IllegalStateException("wire: wire called on source " + sourceVertex.getName() + " (" + i
+ "), but number of output channels is " + outputGate.getNumberOfEdges() + "!");
}
final int currentNumberOfTargetNodes = target.getCurrentNumberOfGroupMembers();
final List<ExecutionEdge> outputChannels = new ArrayList<ExecutionEdge>();
for (int j = 0; j < currentNumberOfTargetNodes; ++j) {
final ExecutionVertex targetVertex = target.getGroupMember(j);
final ExecutionGate inputGate = targetVertex.getInputGate(indexOfInputGate);
if (inputGate == null) {
throw new IllegalStateException("wire: " + targetVertex.getName()
+ " has no input gate with index " + indexOfInputGate);
}
if (inputGate.getNumberOfEdges() > 0 && i == 0) {
throw new IllegalStateException("wire: wire called on target " + targetVertex.getName() + " ("
+ j + "), but number of input channels is " + inputGate.getNumberOfEdges() + "!");
}
// Check if a wire is supposed to be created
if (DistributionPatternProvider.createWire(groupEdge.getDistributionPattern(),
i, j, currentNumberOfSourceNodes, currentNumberOfTargetNodes)) {
final ChannelID outputChannelID = new ChannelID();
final ChannelID inputChannelID = new ChannelID();
final ExecutionEdge edge = new ExecutionEdge(outputGate, inputGate, groupEdge, outputChannelID,
inputChannelID, outputGate.getNumberOfEdges(), inputGate.getNumberOfEdges());
this.edgeMap.put(outputChannelID, edge);
this.edgeMap.put(inputChannelID, edge);
outputChannels.add(edge);
List<ExecutionEdge> inputChannels = inputChannelMap.get(inputGate.getGateID());
if (inputChannels == null) {
inputChannels = new ArrayList<ExecutionEdge>();
inputChannelMap.put(inputGate.getGateID(), inputChannels);
}
inputChannels.add(edge);
}
}
outputGate.replaceAllEdges(outputChannels);
}
// Finally, set the channels for the input gates
final int currentNumberOfTargetNodes = target.getCurrentNumberOfGroupMembers();
for (int i = 0; i < currentNumberOfTargetNodes; ++i) {
final ExecutionVertex targetVertex = target.getGroupMember(i);
final ExecutionGate inputGate = targetVertex.getInputGate(indexOfInputGate);
final List<ExecutionEdge> inputChannels = inputChannelMap.get(inputGate.getGateID());
if (inputChannels == null) {
LOG.error("Cannot find input channels for gate ID " + inputGate.getGateID());
continue;
}
inputGate.replaceAllEdges(inputChannels);
}
}
/**
* Creates the initial edges between the group vertices
*
* @param vertexMap
* the temporary vertex map
* @throws GraphConversionException
* if the initial wiring cannot be created
*/
private void createInitialGroupEdges(final HashMap<AbstractJobVertex, ExecutionVertex> vertexMap)
throws GraphConversionException {
Iterator<Map.Entry<AbstractJobVertex, ExecutionVertex>> it = vertexMap.entrySet().iterator();
while (it.hasNext()) {
final Map.Entry<AbstractJobVertex, ExecutionVertex> entry = it.next();
final AbstractJobVertex sjv = entry.getKey();
final ExecutionVertex sev = entry.getValue();
final ExecutionGroupVertex sgv = sev.getGroupVertex();
// First compare number of output gates
if (sjv.getNumberOfForwardConnections() != sgv.getEnvironment().getNumberOfOutputGates()) {
throw new GraphConversionException("Job and execution vertex " + sjv.getName()
+ " have different number of outputs");
}
if (sjv.getNumberOfBackwardConnections() != sgv.getEnvironment().getNumberOfInputGates()) {
throw new GraphConversionException("Job and execution vertex " + sjv.getName()
+ " have different number of inputs");
}
// First, build the group edges
for (int i = 0; i < sjv.getNumberOfForwardConnections(); ++i) {
final JobEdge edge = sjv.getForwardConnection(i);
final AbstractJobVertex tjv = edge.getConnectedVertex();
final ExecutionVertex tev = vertexMap.get(tjv);
final ExecutionGroupVertex tgv = tev.getGroupVertex();
// Use NETWORK as default channel type if nothing else is defined by the user
ChannelType channelType = edge.getChannelType();
boolean userDefinedChannelType = true;
if (channelType == null) {
userDefinedChannelType = false;
channelType = ChannelType.NETWORK;
}
final DistributionPattern distributionPattern = edge.getDistributionPattern();
// Connect the corresponding group vertices and copy the user settings from the job edge
final ExecutionGroupEdge groupEdge = sgv.wireTo(tgv, edge.getIndexOfInputGate(), i, channelType,
userDefinedChannelType,distributionPattern);
final ExecutionGate outputGate = new ExecutionGate(new GateID(), sev, groupEdge, false);
sev.insertOutputGate(i, outputGate);
final ExecutionGate inputGate = new ExecutionGate(new GateID(), tev, groupEdge, true);
tev.insertInputGate(edge.getIndexOfInputGate(), inputGate);
}
}
}
/**
* Creates an execution vertex from a job vertex.
*
* @param jobVertex
* the job vertex to create the execution vertex from
* @param instanceManager
* the instanceManager
* @param initialExecutionStage
* the initial execution stage all group vertices are added to
* @param jobConfiguration
* the configuration object originally attached to the {@link JobGraph}
* @return the new execution vertex
* @throws GraphConversionException
* thrown if the job vertex is of an unknown subclass
*/
private ExecutionVertex createVertex(final AbstractJobVertex jobVertex, final InstanceManager instanceManager,
final ExecutionStage initialExecutionStage, final Configuration jobConfiguration)
throws GraphConversionException {
// If the user has requested instance type, check if the type is known by the current instance manager
InstanceType instanceType = null;
boolean userDefinedInstanceType = false;
if (jobVertex.getInstanceType() != null) {
userDefinedInstanceType = true;
instanceType = instanceManager.getInstanceTypeByName(jobVertex.getInstanceType());
if (instanceType == null) {
throw new GraphConversionException("Requested instance type " + jobVertex.getInstanceType()
+ " is not known to the instance manager");
}
}
if (instanceType == null) {
instanceType = instanceManager.getDefaultInstanceType();
}
// Create an initial execution vertex for the job vertex
final Class<? extends AbstractInvokable> invokableClass = jobVertex.getInvokableClass();
if (invokableClass == null) {
throw new GraphConversionException("JobVertex " + jobVertex.getID() + " (" + jobVertex.getName()
+ ") does not specify a task");
}
// Calculate the cryptographic signature of this vertex
final ExecutionSignature signature = ExecutionSignature.createSignature(jobVertex.getInvokableClass(),
jobVertex.getJobGraph().getJobID());
// Create a group vertex for the job vertex
ExecutionGroupVertex groupVertex = null;
try {
groupVertex = new ExecutionGroupVertex(jobVertex.getName(), jobVertex.getID(), this,
jobVertex.getNumberOfSubtasks(), instanceType, userDefinedInstanceType,
jobVertex.getNumberOfSubtasksPerInstance(), jobVertex.getVertexToShareInstancesWith() != null ? true
: false, jobVertex.getNumberOfExecutionRetries(), jobVertex.getConfiguration(), signature,
invokableClass);
} catch (Throwable t) {
throw new GraphConversionException(StringUtils.stringifyException(t));
}
// Run the configuration check the user has provided for the vertex
try {
jobVertex.checkConfiguration(groupVertex.getEnvironment().getInvokable());
} catch (IllegalConfigurationException e) {
throw new GraphConversionException(StringUtils.stringifyException(e));
}
// Check if the user's specifications for the number of subtasks are valid
final int minimumNumberOfSubtasks = jobVertex.getMinimumNumberOfSubtasks(groupVertex.getEnvironment()
.getInvokable());
final int maximumNumberOfSubtasks = jobVertex.getMaximumNumberOfSubtasks(groupVertex.getEnvironment()
.getInvokable());
if (jobVertex.getNumberOfSubtasks() != -1) {
if (jobVertex.getNumberOfSubtasks() < 1) {
throw new GraphConversionException("Cannot split task " + jobVertex.getName() + " into "
+ jobVertex.getNumberOfSubtasks() + " subtasks");
}
if (jobVertex.getNumberOfSubtasks() < minimumNumberOfSubtasks) {
throw new GraphConversionException("Number of subtasks must be at least " + minimumNumberOfSubtasks);
}
if (maximumNumberOfSubtasks != -1) {
if (jobVertex.getNumberOfSubtasks() > maximumNumberOfSubtasks) {
throw new GraphConversionException("Number of subtasks for vertex " + jobVertex.getName()
+ " can be at most " + maximumNumberOfSubtasks);
}
}
}
// Check number of subtasks per instance
if (jobVertex.getNumberOfSubtasksPerInstance() != -1 && jobVertex.getNumberOfSubtasksPerInstance() < 1) {
throw new GraphConversionException("Cannot set number of subtasks per instance to "
+ jobVertex.getNumberOfSubtasksPerInstance() + " for vertex " + jobVertex.getName());
}
// Assign min/max to the group vertex (settings are actually applied in applyUserDefinedSettings)
groupVertex.setMinMemberSize(minimumNumberOfSubtasks);
groupVertex.setMaxMemberSize(maximumNumberOfSubtasks);
// Register input and output vertices separately
if (jobVertex instanceof AbstractJobInputVertex) {
final InputSplit[] inputSplits;
// let the task code compute the input splits
if (groupVertex.getEnvironment().getInvokable() instanceof AbstractInputTask) {
try {
inputSplits = ((AbstractInputTask<?>) groupVertex.getEnvironment().getInvokable())
.computeInputSplits(jobVertex.getNumberOfSubtasks());
} catch (Exception e) {
throw new GraphConversionException("Cannot compute input splits for " + groupVertex.getName(), e);
}
} else {
throw new GraphConversionException("JobInputVertex contained a task class which was not an input task.");
}
if (inputSplits == null) {
LOG.info("Job input vertex " + jobVertex.getName() + " generated 0 input splits");
} else {
LOG.info("Job input vertex " + jobVertex.getName() + " generated " + inputSplits.length
+ " input splits");
}
// assign input splits
groupVertex.setInputSplits(inputSplits);
}
// TODO: This is a quick workaround, problem can be solved in a more generic way
if (jobVertex instanceof JobFileOutputVertex) {
final JobFileOutputVertex jbov = (JobFileOutputVertex) jobVertex;
jobVertex.getConfiguration().setString("outputPath", jbov.getFilePath().toString());
}
// Add group vertex to initial execution stage
initialExecutionStage.addStageMember(groupVertex);
final ExecutionVertex ev = new ExecutionVertex(this, groupVertex, jobVertex.getNumberOfForwardConnections(),
jobVertex.getNumberOfBackwardConnections());
// Assign initial instance to vertex (may be overwritten later on when user settings are applied)
ev.setAllocatedResource(new AllocatedResource(DummyInstance.createDummyInstance(instanceType), instanceType,
null));
return ev;
}
/**
* Returns the number of input vertices registered with this execution graph.
*
* @return the number of input vertices registered with this execution graph
*/
public int getNumberOfInputVertices() {
return this.stages.get(0).getNumberOfInputExecutionVertices();
}
/**
* Returns the number of input vertices for the given stage.
*
* @param stage
* the index of the execution stage
* @return the number of input vertices for the given stage
*/
public int getNumberOfInputVertices(int stage) {
if (stage >= this.stages.size()) {
return 0;
}
return this.stages.get(stage).getNumberOfInputExecutionVertices();
}
/**
* Returns the number of output vertices registered with this execution graph.
*
* @return the number of output vertices registered with this execution graph
*/
public int getNumberOfOutputVertices() {
return this.stages.get(0).getNumberOfOutputExecutionVertices();
}
/**
* Returns the number of output vertices for the given stage.
*
* @param stage
* the index of the execution stage
* @return the number of input vertices for the given stage
*/
public int getNumberOfOutputVertices(final int stage) {
if (stage >= this.stages.size()) {
return 0;
}
return this.stages.get(stage).getNumberOfOutputExecutionVertices();
}
/**
* Returns the input vertex with the specified index.
*
* @param index
* the index of the input vertex to return
* @return the input vertex with the specified index or <code>null</code> if no input vertex with such an index
* exists
*/
public ExecutionVertex getInputVertex(final int index) {
return this.stages.get(0).getInputExecutionVertex(index);
}
/**
* Returns the output vertex with the specified index.
*
* @param index
* the index of the output vertex to return
* @return the output vertex with the specified index or <code>null</code> if no output vertex with such an index
* exists
*/
public ExecutionVertex getOutputVertex(final int index) {
return this.stages.get(0).getOutputExecutionVertex(index);
}
/**
* Returns the input vertex with the specified index for the given stage
*
* @param stage
* the index of the stage
* @param index
* the index of the input vertex to return
* @return the input vertex with the specified index or <code>null</code> if no input vertex with such an index
* exists in that stage
*/
public ExecutionVertex getInputVertex(final int stage, final int index) {
try {
final ExecutionStage s = this.stages.get(stage);
if (s == null) {
return null;
}
return s.getInputExecutionVertex(index);
} catch (ArrayIndexOutOfBoundsException e) {
return null;
}
}
/**
* Returns the output vertex with the specified index for the given stage.
*
* @param stage
* the index of the stage
* @param index
* the index of the output vertex to return
* @return the output vertex with the specified index or <code>null</code> if no output vertex with such an index
* exists in that stage
*/
public ExecutionVertex getOutputVertex(final int stage, final int index) {
try {
final ExecutionStage s = this.stages.get(stage);
if (s == null) {
return null;
}
return s.getOutputExecutionVertex(index);
} catch (ArrayIndexOutOfBoundsException e) {
return null;
}
}
/**
* Returns the execution stage with number <code>num</code>.
*
* @param num
* the number of the execution stage to be returned
* @return the execution stage with number <code>num</code> or <code>null</code> if no such execution stage exists
*/
public ExecutionStage getStage(final int num) {
try {
return this.stages.get(num);
} catch (ArrayIndexOutOfBoundsException e) {
return null;
}
}
/**
* Returns the number of execution stages in the execution graph.
*
* @return the number of execution stages in the execution graph
*/
public int getNumberOfStages() {
return this.stages.size();
}
/**
* Identifies an execution by the specified channel ID and returns it.
*
* @param id
* the channel ID to identify the vertex with
* @return the execution vertex which has a channel with ID <code>id</code> or <code>null</code> if no such vertex
* exists in the execution graph
*/
public ExecutionVertex getVertexByChannelID(final ChannelID id) {
final ExecutionEdge edge = this.edgeMap.get(id);
if (edge == null) {
return null;
}
if (id.equals(edge.getOutputChannelID())) {
return edge.getOutputGate().getVertex();
}
return edge.getInputGate().getVertex();
}
/**
* Finds an {@link ExecutionEdge} by its ID and returns it.
*
* @param id
* the channel ID to identify the edge
* @return the edge whose ID matches <code>id</code> or <code>null</code> if no such edge is known
*/
public ExecutionEdge getEdgeByID(final ChannelID id) {
return this.edgeMap.get(id);
}
/**
* Registers an execution vertex with the execution graph.
*
* @param vertex
* the execution vertex to register
*/
void registerExecutionVertex(final ExecutionVertex vertex) {
if (this.vertexMap.put(vertex.getID(), vertex) != null) {
throw new IllegalStateException("There is already an execution vertex with ID " + vertex.getID()
+ " registered");
}
}
/**
* Returns the execution vertex with the given vertex ID.
*
* @param id
* the vertex ID to retrieve the execution vertex
* @return the execution vertex matching the provided vertex ID or <code>null</code> if no such vertex could be
* found
*/
public ExecutionVertex getVertexByID(final ExecutionVertexID id) {
return this.vertexMap.get(id);
}
/**
* Checks if the current execution stage has been successfully completed, i.e.
* all vertices in this stage have successfully finished their execution.
*
* @return <code>true</code> if stage is completed, <code>false</code> otherwise
*/
private boolean isCurrentStageCompleted() {
if (this.indexToCurrentExecutionStage >= this.stages.size()) {
return true;
}
final ExecutionGraphIterator it = new ExecutionGraphIterator(this, this.indexToCurrentExecutionStage, true,
true);
while (it.hasNext()) {
final ExecutionVertex vertex = it.next();
if (vertex.getExecutionState() != ExecutionState.FINISHED) {
return false;
}
}
return true;
}
/**
* Checks if the execution of execution graph is finished.
*
* @return <code>true</code> if the execution of the graph is finished, <code>false</code> otherwise
*/
public boolean isExecutionFinished() {
return (getJobStatus() == InternalJobStatus.FINISHED);
}
/**
* Returns the job ID of the job configuration this execution graph was originally constructed from.
*
* @return the job ID of the job configuration this execution graph was originally constructed from
*/
public JobID getJobID() {
return this.jobID;
}
/**
* Returns the index of the current execution stage.
*
* @return the index of the current execution stage
*/
public int getIndexOfCurrentExecutionStage() {
return this.indexToCurrentExecutionStage;
}
/**
* Returns the stage which is currently executed.
*
* @return the currently executed stage or <code>null</code> if the job execution is already completed
*/
public ExecutionStage getCurrentExecutionStage() {
try {
return this.stages.get(this.indexToCurrentExecutionStage);
} catch (ArrayIndexOutOfBoundsException e) {
return null;
}
}
public void repairStages() {
final Map<ExecutionGroupVertex, Integer> stageNumbers = new HashMap<ExecutionGroupVertex, Integer>();
ExecutionGroupVertexIterator it = new ExecutionGroupVertexIterator(this, true, -1);
while (it.hasNext()) {
final ExecutionGroupVertex groupVertex = it.next();
int precedingNumber = 0;
if (stageNumbers.containsKey(groupVertex)) {
precedingNumber = stageNumbers.get(groupVertex).intValue();
} else {
stageNumbers.put(groupVertex, Integer.valueOf(precedingNumber));
}
for (int i = 0; i < groupVertex.getNumberOfForwardLinks(); i++) {
final ExecutionGroupEdge edge = groupVertex.getForwardEdge(i);
if (!stageNumbers.containsKey(edge.getTargetVertex())) {
// Target vertex has not yet been discovered
// Same stage as preceding vertex
stageNumbers.put(edge.getTargetVertex(), Integer.valueOf(precedingNumber));
} else {
final int stageNumber = stageNumbers.get(edge.getTargetVertex()).intValue();
if (stageNumber != precedingNumber) {
stageNumbers.put(edge.getTargetVertex(), (int) Math.max(precedingNumber, stageNumber));
}
}
}
}
// Traverse the graph backwards (starting from the output vertices) to make sure vertices are allocated in a
// stage as high as possible
it = new ExecutionGroupVertexIterator(this, false, -1);
while (it.hasNext()) {
final ExecutionGroupVertex groupVertex = it.next();
final int succeedingNumber = stageNumbers.get(groupVertex);
for (int i = 0; i < groupVertex.getNumberOfBackwardLinks(); i++) {
final ExecutionGroupEdge edge = groupVertex.getBackwardEdge(i);
final int stageNumber = stageNumbers.get(edge.getSourceVertex());
if (stageNumber != succeedingNumber) {
throw new IllegalStateException(edge.getSourceVertex() + " and " + edge.getTargetVertex()
+ " are assigned to different stages");
}
}
}
// Finally, assign the new stage numbers
this.stages.clear();
final Iterator<Map.Entry<ExecutionGroupVertex, Integer>> it2 = stageNumbers.entrySet().iterator();
while (it2.hasNext()) {
final Map.Entry<ExecutionGroupVertex, Integer> entry = it2.next();
final ExecutionGroupVertex groupVertex = entry.getKey();
final int stageNumber = entry.getValue().intValue();
// Prevent out of bounds exceptions
while (this.stages.size() <= stageNumber) {
this.stages.add(null);
}
ExecutionStage executionStage = this.stages.get(stageNumber);
// If the stage not yet exists,
if (executionStage == null) {
executionStage = new ExecutionStage(this, stageNumber);
this.stages.set(stageNumber, executionStage);
}
executionStage.addStageMember(groupVertex);
groupVertex.setExecutionStage(executionStage);
}
}
public void repairInstanceSharing() {
final Set<AllocatedResource> availableResources = new LinkedHashSet<AllocatedResource>();
final Iterator<ExecutionGroupVertex> it = new ExecutionGroupVertexIterator(this, true, -1);
while (it.hasNext()) {
final ExecutionGroupVertex groupVertex = it.next();
if (groupVertex.getVertexToShareInstancesWith() == null) {
availableResources.clear();
groupVertex.repairInstanceSharing(availableResources);
}
}
}
public void repairInstanceAssignment() {
Iterator<ExecutionVertex> it = new ExecutionGraphIterator(this, true);
while (it.hasNext()) {
final ExecutionVertex sourceVertex = it.next();
for (int i = 0; i < sourceVertex.getNumberOfOutputGates(); ++i) {
final ExecutionGate outputGate = sourceVertex.getOutputGate(i);
final ChannelType channelType = outputGate.getChannelType();
if (channelType == ChannelType.IN_MEMORY) {
final int numberOfOutputChannels = outputGate.getNumberOfEdges();
for (int j = 0; j < numberOfOutputChannels; ++j) {
final ExecutionEdge outputChannel = outputGate.getEdge(j);
outputChannel.getInputGate().getVertex()
.setAllocatedResource(sourceVertex.getAllocatedResource());
}
}
}
}
it = new ExecutionGraphIterator(this, false);
while (it.hasNext()) {
final ExecutionVertex targetVertex = it.next();
for (int i = 0; i < targetVertex.getNumberOfInputGates(); ++i) {
final ExecutionGate inputGate = targetVertex.getInputGate(i);
final ChannelType channelType = inputGate.getChannelType();
if (channelType == ChannelType.IN_MEMORY) {
final int numberOfInputChannels = inputGate.getNumberOfEdges();
for (int j = 0; j < numberOfInputChannels; ++j) {
final ExecutionEdge inputChannel = inputGate.getEdge(j);
inputChannel.getOutputGate().getVertex()
.setAllocatedResource(targetVertex.getAllocatedResource());
}
}
}
}
}
public ChannelType getChannelType(final ExecutionVertex sourceVertex, final ExecutionVertex targetVertex) {
final ExecutionGroupVertex sourceGroupVertex = sourceVertex.getGroupVertex();
final ExecutionGroupVertex targetGroupVertex = targetVertex.getGroupVertex();
final List<ExecutionGroupEdge> edges = sourceGroupVertex.getForwardEdges(targetGroupVertex);
if (edges.size() == 0) {
return null;
}
// On a task level, the two vertices are connected
final ExecutionGroupEdge edge = edges.get(0);
// Now lets see if these two concrete subtasks are connected
final ExecutionGate outputGate = sourceVertex.getOutputGate(edge.getIndexOfOutputGate());
for (int i = 0; i < outputGate.getNumberOfEdges(); ++i) {
final ExecutionEdge outputChannel = outputGate.getEdge(i);
if (targetVertex == outputChannel.getInputGate().getVertex()) {
return edge.getChannelType();
}
}
return null;
}
/**
* Returns the job configuration that was originally attached to the job graph.
*
* @return the job configuration that was originally attached to the job graph
*/
public Configuration getJobConfiguration() {
return this.jobConfiguration;
}
/**
* Checks whether the job represented by the execution graph has the status <code>FINISHED</code>.
*
* @return <code>true</code> if the job has the status <code>CREATED</code>, <code>false</code> otherwise
*/
private boolean jobHasFinishedStatus() {
final Iterator<ExecutionVertex> it = new ExecutionGraphIterator(this, true);
while (it.hasNext()) {
if (it.next().getExecutionState() != ExecutionState.FINISHED) {
return false;
}
}
return true;
}
/**
* Checks whether the job represented by the execution graph has the status <code>SCHEDULED</code>.
*
* @return <code>true</code> if the job has the status <code>SCHEDULED</code>, <code>false</code> otherwise
*/
private boolean jobHasScheduledStatus() {
final Iterator<ExecutionVertex> it = new ExecutionGraphIterator(this, true);
while (it.hasNext()) {
final ExecutionState s = it.next().getExecutionState();
if (s != ExecutionState.CREATED && s != ExecutionState.SCHEDULED && s != ExecutionState.READY) {
return false;
}
}
return true;
}
/**
* Checks whether the job represented by the execution graph has the status <code>CANCELED</code> or
* <code>FAILED</code>.
*
* @return <code>true</code> if the job has the status <code>CANCELED</code> or <code>FAILED</code>,
* <code>false</code> otherwise
*/
private boolean jobHasFailedOrCanceledStatus() {
final Iterator<ExecutionVertex> it = new ExecutionGraphIterator(this, true);
while (it.hasNext()) {
final ExecutionState state = it.next().getExecutionState();
if (state != ExecutionState.CANCELED && state != ExecutionState.FAILED && state != ExecutionState.FINISHED) {
return false;
}
}
return true;
}
private static InternalJobStatus determineNewJobStatus(final ExecutionGraph eg,
final ExecutionState latestStateChange) {
final InternalJobStatus currentJobStatus = eg.getJobStatus();
switch (currentJobStatus) {
case CREATED:
if (eg.jobHasScheduledStatus()) {
return InternalJobStatus.SCHEDULED;
} else if (latestStateChange == ExecutionState.CANCELED) {
if (eg.jobHasFailedOrCanceledStatus()) {
return InternalJobStatus.CANCELED;
}
}
break;
case SCHEDULED:
if (latestStateChange == ExecutionState.RUNNING) {
return InternalJobStatus.RUNNING;
} else if (latestStateChange == ExecutionState.CANCELED) {
if (eg.jobHasFailedOrCanceledStatus()) {
return InternalJobStatus.CANCELED;
}
}
break;
case RUNNING:
if (latestStateChange == ExecutionState.CANCELED) {
return InternalJobStatus.CANCELING;
}
if (latestStateChange == ExecutionState.FAILED) {
final Iterator<ExecutionVertex> it = new ExecutionGraphIterator(eg, true);
while (it.hasNext()) {
final ExecutionVertex vertex = it.next();
if (vertex.getExecutionState() == ExecutionState.FAILED) {
return InternalJobStatus.FAILING;
}
}
}
if (eg.jobHasFinishedStatus()) {
return InternalJobStatus.FINISHED;
}
break;
case FAILING:
if (eg.jobHasFailedOrCanceledStatus()) {
return InternalJobStatus.FAILED;
}
break;
case FAILED:
LOG.error("Received update of execute state in job status FAILED");
break;
case CANCELING:
if (eg.jobHasFailedOrCanceledStatus()) {
return InternalJobStatus.CANCELED;
}
break;
case CANCELED:
LOG.error("Received update of execute state in job status CANCELED: " + eg.getJobID());
break;
case FINISHED:
LOG.error("Received update of execute state in job status FINISHED: " + eg.getJobID() + " "
+ StringUtils.stringifyException(new Throwable()));
break;
}
return currentJobStatus;
}
/**
* Returns the current status of the job
* represented by this execution graph.
*
* @return the current status of the job
*/
public InternalJobStatus getJobStatus() {
return this.jobStatus.get();
}
@Override
public void executionStateChanged(final JobID jobID, final ExecutionVertexID vertexID,
final ExecutionState newExecutionState, String optionalMessage) {
// Do not use the parameter newExecutionState here as it may already be out-dated
final ExecutionVertex vertex = getVertexByID(vertexID);
if (vertex == null) {
LOG.error("Cannot find execution vertex with the ID " + vertexID);
return;
}
final ExecutionState actualExecutionState = vertex.getExecutionState();
final InternalJobStatus newJobStatus = determineNewJobStatus(this, actualExecutionState);
if (actualExecutionState == ExecutionState.FINISHED) {
// It is worth checking if the current stage has complete
if (this.isCurrentStageCompleted()) {
// Increase current execution stage
++this.indexToCurrentExecutionStage;
if (this.indexToCurrentExecutionStage < this.stages.size()) {
final Iterator<ExecutionStageListener> it = this.executionStageListeners.iterator();
final ExecutionStage nextExecutionStage = getCurrentExecutionStage();
while (it.hasNext()) {
it.next().nextExecutionStageEntered(jobID, nextExecutionStage);
}
}
}
}
updateJobStatus(newJobStatus, optionalMessage);
}
/**
* Updates the job status to given status and triggers the execution of the {@link JobStatusListener} objects.
*
* @param newJobStatus
* the new job status
* @param optionalMessage
* an optional message providing details on the reasons for the state change
*/
public void updateJobStatus(final InternalJobStatus newJobStatus, String optionalMessage) {
// Check if the new job status equals the old one
if (this.jobStatus.getAndSet(newJobStatus) == newJobStatus) {
return;
}
// The task caused the entire job to fail, save the error description
if (newJobStatus == InternalJobStatus.FAILING) {
this.errorDescription = optionalMessage;
}
// If this is the final failure state change, reuse the saved error description
if (newJobStatus == InternalJobStatus.FAILED) {
optionalMessage = this.errorDescription;
}
final Iterator<JobStatusListener> it = this.jobStatusListeners.iterator();
while (it.hasNext()) {
it.next().jobStatusHasChanged(this, newJobStatus, optionalMessage);
}
}
/**
* Registers a new {@link JobStatusListener} object with this execution graph.
* After being registered the object will receive notifications about changes
* of the job status. It is not possible to register the same listener object
* twice.
*
* @param jobStatusListener
* the listener object to register
*/
public void registerJobStatusListener(final JobStatusListener jobStatusListener) {
if (jobStatusListener == null) {
throw new IllegalArgumentException("Argument jobStatusListener must not be null");
}
this.jobStatusListeners.addIfAbsent(jobStatusListener);
}
/**
* Unregisters the given {@link JobStatusListener} object. After having called this
* method, the object will no longer receive notifications about changes of the job
* status.
*
* @param jobStatusListener
* the listener object to unregister
*/
public void unregisterJobStatusListener(final JobStatusListener jobStatusListener) {
if (jobStatusListener == null) {
throw new IllegalArgumentException("Argument jobStatusListener must not be null");
}
this.jobStatusListeners.remove(jobStatusListener);
}
/**
* Registers a new {@link ExecutionStageListener} object with this execution graph. After being registered the
* object will receive a notification whenever the job has entered its next execution stage. Note that a
* notification is not sent when the job has entered its initial execution stage.
*
* @param executionStageListener
* the listener object to register
*/
public void registerExecutionStageListener(final ExecutionStageListener executionStageListener) {
if (executionStageListener == null) {
throw new IllegalArgumentException("Argument executionStageListener must not be null");
}
this.executionStageListeners.addIfAbsent(executionStageListener);
}
/**
* Unregisters the given {@link ExecutionStageListener} object. After having called this method, the object will no
* longer receiver notifications about the execution stage progress.
*
* @param executionStageListener
* the listener object to unregister
*/
public void unregisterExecutionStageListener(final ExecutionStageListener executionStageListener) {
if (executionStageListener == null) {
throw new IllegalArgumentException("Argument executionStageListener must not be null");
}
this.executionStageListeners.remove(executionStageListener);
}
/**
* Returns the name of the original job graph.
*
* @return the name of the original job graph, possibly <code>null</code>
*/
public String getJobName() {
return this.jobName;
}
@Override
public void userThreadStarted(final JobID jobID, final ExecutionVertexID vertexID, final Thread userThread) {
// TODO Auto-generated method stub
}
@Override
public void userThreadFinished(final JobID jobID, final ExecutionVertexID vertexID, final Thread userThread) {
// TODO Auto-generated method stub
}
/**
* Reconstructs the execution pipelines for the entire execution graph.
*/
private void reconstructExecutionPipelines() {
final Iterator<ExecutionStage> it = this.stages.iterator();
while (it.hasNext()) {
it.next().reconstructExecutionPipelines();
}
}
/**
* Calculates the connection IDs of the graph to avoid deadlocks in the data flow at runtime.
*/
private void calculateConnectionIDs() {
final Set<ExecutionGroupVertex> alreadyVisited = new HashSet<ExecutionGroupVertex>();
final ExecutionStage lastStage = getStage(getNumberOfStages() - 1);
for (int i = 0; i < lastStage.getNumberOfStageMembers(); ++i) {
final ExecutionGroupVertex groupVertex = lastStage.getStageMember(i);
int currentConnectionID = 0;
if (groupVertex.isOutputVertex()) {
currentConnectionID = groupVertex.calculateConnectionID(currentConnectionID, alreadyVisited);
}
}
}
/**
* Returns an iterator over all execution stages contained in this graph.
*
* @return an iterator over all execution stages contained in this graph
*/
public Iterator<ExecutionStage> iterator() {
return this.stages.iterator();
}
@Override
public int getPriority() {
return 1;
}
/**
* Performs an asynchronous update operation to this execution graph.
*
* @param command
* the update command to be asynchronously executed on this graph
*/
public void executeCommand(final Runnable command) {
this.executorService.execute(command);
}
}