/**
* First, grab the tweets stream. We're going to use it in two different places
* and then, we'll going to join them.
*
*/
Stream contents = topology
.newStream("tweets", spout)
.each(new Fields("str"), new ParseTweet(), new Fields("text", "content", "user"));
/**
* Now, let's select and project only hashtags for each tweet.
* This stream is basically a list of couples (tweetId, hashtag).
*
*/
Stream hashtags = contents
.each(new Fields("content"), new OnlyHashtags())
.each(new Fields("content"), new TweetIdExtractor(), new Fields("tweetId"))
.each(new Fields("content"), new GetContentName(), new Fields("hashtag"))
.project(new Fields("hashtag", "tweetId"));
//.each(new Fields("content", "tweetId"), new DebugFilter());
/**
* And let's do the same for urls, obtaining a stream of couples
* like (tweetId, url).
*
*/
Stream urls = contents
.each(new Fields("content"), new OnlyUrls())
.each(new Fields("content"), new TweetIdExtractor(), new Fields("tweetId"))
.each(new Fields("content"), new GetContentName(), new Fields("url"))
.project(new Fields("url", "tweetId"));
//.each(new Fields("content", "tweetId"), new DebugFilter());