Tap source = getPlatform().getTextFile( new Fields( "line" ), inputFileApache200 );
Tap sink = getPlatform().getTextFile( getOutputPath( "deeppipline" ), SinkMode.REPLACE );
Pipe pipe = new Pipe( "pipeline" );
Function function = new ExpressionFunction( new Fields( "count" ), "line.split( \"\\\\s\").length", String.class );
pipe = new Each( pipe, new Fields( "line" ), function, Fields.ALL );
int depth = 50;
for( int i = 0; i < depth; i++ )
{
pipe = new Each( pipe, new Fields( "line" ), new Identity( new Fields( 0 ) ), Fields.ALL );
pipe = new Each( pipe, new Fields( "count" ), new Identity( new Fields( 0 ) ), Fields.ALL );
pipe = new Each( pipe, new Fields( "line" ), new Identity(), Fields.REPLACE );
pipe = new Each( pipe, new Fields( "count" ), new Identity(), Fields.REPLACE );
pipe = new Each( pipe, new Fields( "line", "count" ), new Identity() );
pipe = new Each( pipe, new Fields( "line", "count" ), new Identity( new Fields( "line2", "count2" ) ), new Fields( "line", "count2" ) );
pipe = new Each( pipe, new Fields( "count2" ), new Identity( new Fields( "count" ) ), new Fields( "line", "count" ) );
}
int modulo = 1000000;
pipe = new Each( pipe, new Fields( "line" ), new ExpressionFunction( new Fields( "hash" ), "line.hashCode() % " + modulo, String.class ), Fields.ALL ); // want some collisions
pipe = new GroupBy( pipe, new Fields( "hash" ) );
for( int i = 0; i < depth; i++ )
pipe = new Every( pipe, new Fields( "count" ), new Sum( new Fields( "sum" + ( i + 1 ) ) ) );