// a predefined pipe assembly that returns fields named "url" and "page"
Pipe importPipe = new ImportCrawlDataAssembly( "import pipe" );
// create the tap instances
Tap localPagesSource = new Lfs( new TextLine(), inputPath );
Tap importedPages = new Hfs( new SequenceFile( new Fields( "url", "page" ) ), pagesPath );
// connect the pipe assembly to the tap instances
Flow importPagesFlow = flowConnector.connect( "import pages", localPagesSource, importedPages, importPipe );
// a predefined pipe assembly that splits the stream into two named "url pipe" and "word pipe"
// these pipes could be retrieved via the getTails() method and added to new pipe instances
SubAssembly wordCountPipe = new WordCountSplitAssembly( "wordcount pipe", "url pipe", "word pipe" );
// create Hadoop sequence files to store the results of the counts
Tap sinkUrl = new Hfs( new SequenceFile( new Fields( "url", "word", "count" ) ), urlsPath );
Tap sinkWord = new Hfs( new SequenceFile( new Fields( "word", "count" ) ), wordsPath );
// convenience method to bind multiple pipes and taps
Map<String, Tap> sinks = Cascades.tapsMap( new String[]{"url pipe", "word pipe"}, Tap.taps( sinkUrl, sinkWord ) );
// wordCountPipe will be recognized as an assembly and handled appropriately
Flow count = flowConnector.connect( importedPages, sinks, wordCountPipe );
// create an assembly to export the Hadoop sequence file to local text files
Pipe exportPipe = new Each( "export pipe", new Identity() );
Tap localSinkUrl = new Lfs( new TextLine(), localUrlsPath );
Tap localSinkWord = new Lfs( new TextLine(), localWordsPath );
// connect up both sinks using the same exportPipe assembly
Flow exportFromUrl = flowConnector.connect( "export url", sinkUrl, localSinkUrl, exportPipe );
Flow exportFromWord = flowConnector.connect( "export word", sinkWord, localSinkWord, exportPipe );