generate a bipartite map of (uid, token), while filtering out stop-words
*/
// create a STREAM ASSERTION to validate the input data
Pipe tweetPipe = new Pipe( "tweet" ); // name branch
AssertMatches assertMatches = new AssertMatches( ".{6,150}" );
tweetPipe = new Each( tweetPipe, AssertionLevel.STRICT, assertMatches );
// create an OPERATION split the text into a token stream
RegexSplitGenerator splitter = new RegexSplitGenerator( new Fields( "token" ), " " );
Fields outputSelector = new Fields( "uid", "token" );