public void testCascade() throws IOException
{
getPlatform().copyFromLocal( inputFileApache );
// Setup two standard cascading flows that will generate the input for the first MapReduceFlow
Tap source1 = new Hfs( new TextLine( new Fields( "offset", "line" ) ), remove( inputFileApache, false ) );
String sinkPath4 = getOutputPath( "flow4" );
Tap sink1 = new Hfs( new TextLine( new Fields( "offset", "line" ) ), remove( sinkPath4, true ), true );
Flow firstFlow = getPlatform().getFlowConnector( getProperties() ).connect( source1, sink1, new Pipe( "first-flow" ) );
String sinkPath5 = getOutputPath( "flow5" );
Tap sink2 = new Hfs( new TextLine( new Fields( "offset", "line" ) ), remove( sinkPath5, true ), true );
Flow secondFlow = getPlatform().getFlowConnector( getProperties() ).connect( sink1, sink2, new Pipe( "second-flow" ) );
JobConf defaultConf = HadoopPlanner.createJobConf( getProperties() );
JobConf firstConf = new JobConf( defaultConf );
firstConf.setJobName( "first-mr" );
firstConf.setOutputKeyClass( LongWritable.class );
firstConf.setOutputValueClass( Text.class );
firstConf.setMapperClass( IdentityMapper.class );
firstConf.setReducerClass( IdentityReducer.class );
firstConf.setInputFormat( TextInputFormat.class );
firstConf.setOutputFormat( TextOutputFormat.class );
FileInputFormat.setInputPaths( firstConf, new Path( remove( sinkPath5, true ) ) );
String sinkPath1 = getOutputPath( "flow1" );
FileOutputFormat.setOutputPath( firstConf, new Path( remove( sinkPath1, true ) ) );
Flow firstMR = new MapReduceFlow( firstConf, true );
JobConf secondConf = new JobConf( defaultConf );
secondConf.setJobName( "second-mr" );
secondConf.setOutputKeyClass( LongWritable.class );
secondConf.setOutputValueClass( Text.class );
secondConf.setMapperClass( IdentityMapper.class );
secondConf.setReducerClass( IdentityReducer.class );
secondConf.setInputFormat( TextInputFormat.class );
secondConf.setOutputFormat( TextOutputFormat.class );
FileInputFormat.setInputPaths( secondConf, new Path( remove( sinkPath1, true ) ) );
String sinkPath2 = getOutputPath( "flow2" );
FileOutputFormat.setOutputPath( secondConf, new Path( remove( sinkPath2, true ) ) );
Flow secondMR = new MapReduceFlow( secondConf, true );
JobConf thirdConf = new JobConf( defaultConf );
thirdConf.setJobName( "third-mr" );
thirdConf.setOutputKeyClass( LongWritable.class );
thirdConf.setOutputValueClass( Text.class );
thirdConf.setMapperClass( IdentityMapper.class );
thirdConf.setReducerClass( IdentityReducer.class );
thirdConf.setInputFormat( TextInputFormat.class );
thirdConf.setOutputFormat( TextOutputFormat.class );
FileInputFormat.setInputPaths( thirdConf, new Path( remove( sinkPath2, true ) ) );
String sinkPath3 = getOutputPath( "flow3" );
FileOutputFormat.setOutputPath( thirdConf, new Path( remove( sinkPath3, true ) ) );
Flow thirdMR = new MapReduceFlow( thirdConf, true );
CascadeConnector cascadeConnector = new CascadeConnector();
// pass out of order
Cascade cascade = cascadeConnector.connect( firstFlow, secondFlow, thirdMR, firstMR, secondMR );
cascade.complete();
validateLength( new Hfs( new TextLine(), sinkPath3 ).openForRead( new HadoopFlowProcess( defaultConf ) ), 10 );
}