return new Hadoop2TezFlowStepJob( clientState, this, initializedStepConfig, dag );
}
private DAG createDAG( FlowProcess<TezConfiguration> flowProcess, TezConfiguration initializedConfig )
{
FlowNodeGraph nodeGraph = getFlowNodeGraph();
Map<FlowNode, Vertex> vertexMap = new HashMap<>();
DAG dag = DAG.create( getStepDisplayName( initializedConfig.getInt( "cascading.display.id.truncate", Util.ID_LENGTH ) ) );
dag.addTaskLocalFiles( localResources );
Iterator<FlowNode> iterator = nodeGraph.getOrderedTopologicalIterator(); // ordering of nodes for consistent remote debugging
while( iterator.hasNext() )
{
FlowNode flowNode = iterator.next();
Vertex vertex = createVertex( flowProcess, initializedConfig, flowNode );
dag.addVertex( vertex );
vertexMap.put( flowNode, vertex );
}
LinkedList<ProcessGraph.ProcessEdge> processedEdges = new LinkedList<>();
for( ProcessGraph.ProcessEdge processEdge : nodeGraph.edgeSet() )
{
if( processedEdges.contains( processEdge ) )
continue;
FlowNode edgeTargetFlowNode = nodeGraph.getEdgeTarget( processEdge );
FlowElement flowElement = processEdge.getFlowElement();
List<FlowNode> sourceNodes = nodeGraph.getElementSourceProcesses( flowElement );
EdgeProperty edgeProperty = createEdgeProperty( initializedConfig, processEdge );
Vertex targetVertex = vertexMap.get( edgeTargetFlowNode );
if( sourceNodes.size() == 1 || flowElement instanceof CoGroup || flowElement instanceof Boundary ) // todo: create group vertices around incoming ordinal
{
FlowNode edgeSourceFlowNode = nodeGraph.getEdgeSource( processEdge );
Vertex sourceVertex = vertexMap.get( edgeSourceFlowNode );
LOG.debug( "adding edge between: {} and {}", sourceVertex, targetVertex );
dag.addEdge( Edge.create( sourceVertex, targetVertex, edgeProperty ) );
}
else if( flowElement instanceof GroupBy || flowElement instanceof Merge ) // merge - source nodes > 1
{
List<String> sourceVerticesIDs = new ArrayList<>();
List<Vertex> sourceVertices = new ArrayList<>();
for( FlowNode edgeSourceFlowNode : sourceNodes )
{
sourceVerticesIDs.add( edgeSourceFlowNode.getID() );
sourceVertices.add( vertexMap.get( edgeSourceFlowNode ) );
processedEdges.add( nodeGraph.getEdge( edgeSourceFlowNode, edgeTargetFlowNode ) );
}
VertexGroup vertexGroup = dag.createVertexGroup( edgeTargetFlowNode.getID(), sourceVertices.toArray( new Vertex[ sourceVertices.size() ] ) );
String inputClassName = flowElement instanceof Group ? OrderedGroupedMergedKVInput.class.getName() : ConcatenatedMergedKeyValueInput.class.getName();