if (!parseParameters(args)) {
return;
}
// set up the execution environment
StreamExecutionEnvironment env = StreamExecutionEnvironment
.createLocalEnvironment(PARALLELISM);
env.setBufferTimeout(1000);
// get input data
DataStream<String> streamSource = getTextDataStream(env);
DataStream<Tuple2<String, Integer>> dataStream = streamSource
// selecting english tweets and split to words
.flatMap(new SelectEnglishAndTokenizeFlatMap())
.partitionBy(0)
// returning (word, 1)
.map(new MapFunction<String, Tuple2<String, Integer>>() {
private static final long serialVersionUID = 1L;
@Override
public Tuple2<String, Integer> map(String value)
throws Exception {
return new Tuple2<String, Integer>(value, 1);
}
})
// group by words and sum their occurence
.groupBy(0)
.sum(1)
// select maximum occurenced word
.flatMap(new SelectMaxOccurence());
// emit result
dataStream.print();
// execute program
env.execute();
}