private final static DateTimeFormatter DAY_FORMAT = DateTimeFormat.forPattern("yyyyMMdd");
public static StormTopology buildTopology(LocalDRPC drpc) {
TridentTopology topology = new TridentTopology();
String now = DAY_FORMAT.print(new Date().getTime());
// This is just a dummy cyclic spout that only emits two tweets
FixedBatchSpout spout = new FixedBatchSpout(new Fields("tweet", "date"), 3,
new Values("#california is cool", now),
new Values("I like #california", now)
);
spout.setCycle(true);
// In this state we will save the real-time counts per date for each hashtag
StateFactory mapState = new MemoryMapState.Factory();
// Real-time part of the system: a Trident topology that groups by hashtag and stores per-date counts
TridentState hashTagCounts = topology
.newStream("spout1", spout)
// note how we carry the date around
.each(new Fields("tweet", "date"), new Split(), new Fields("word"))
.each(new Fields("word", "date"), new HashTagFilter(), new Fields("hashtag"))
.groupBy(new Fields("hashtag"))
.persistentAggregate(mapState, new Fields("hashtag", "date"), new CountByDate(),
new Fields("datecount"));
// Batch part of the system:
// We instantiate a Splout connector that doesn't fail fast so we can work without the batch layer.
// This TridentState can be used to query Splout.
TridentState sploutState = topology.newStaticState(new SploutState.Factory(false,
"http://localhost:4412"));
// DRPC service:
// Accepts a "hashtag" argument and queries first the real-time view and then the batch-view. Finally,
// it uses a custom Function "LambdaMerge" for merging the results and projects the results back to the user.
topology
.newDRPCStream("hashtags", drpc)
.each(new Fields("args"), new Split(), new Fields("hashtag"))
.groupBy(new Fields("hashtag"))
.stateQuery(hashTagCounts, new Fields("hashtag"), new MapGet(), new Fields("resultrt"))
.stateQuery(sploutState, new Fields("hashtag", "resultrt"), new HashTagsSploutQuery(),
new Fields("resultbatch"))
.each(new Fields("hashtag", "resultrt", "resultbatch"), new LambdaMerge(), new Fields("result"))
// Project allows us to keep only the interesting results
.project(new Fields("result"));
return topology.build();
}