/**
* Copyright (C) 2010 Peter Karich <jetwick_@_pannous_._info>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.jetwick.tw;
import com.google.inject.Guice;
import com.google.inject.Injector;
import com.google.inject.Module;
import de.jetwick.config.Configuration;
import de.jetwick.config.DefaultModule;
import de.jetwick.es.ElasticTagSearch;
import de.jetwick.es.ElasticTweetSearch;
import de.jetwick.es.ElasticUserSearch;
import de.jetwick.rmi.RMIServer;
import de.jetwick.util.GenericUrlResolver;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author Peter Karich, peat_hal 'at' users 'dot' sourceforge 'dot' net
*/
public class TweetCollector {
// twClient.getTrend() ... 20 tweets per min
// RT ... 100 tweets per sec (as of 9.5.2010)
// public static List<String> DEFAULT_ST = Arrays.asList("RT",
// "java", "algorithm", "solr",
// "lucene", "netbeans", "db4o", "java", "javascript", "javafx", "dzone",
// "oracle", "open source", "google", "obama",
// "wicket", "wikileaks", "world cup", "news");
private static Logger logger = LoggerFactory.getLogger(TweetCollector.class);
private static Thread.UncaughtExceptionHandler excHandler = new Thread.UncaughtExceptionHandler() {
@Override
public void uncaughtException(Thread t, Throwable e) {
logger.error("Thread '" + t.getName() + "' was aborted!", e);
throw new UnsupportedOperationException("Not supported yet.");
}
};
public static void main(String[] args) throws InterruptedException {
Runnable runOnExit = new Runnable() {
@Override
public void run() {
logger.info("Finished via Shutdown hook!");
}
};
Runtime.getRuntime().addShutdownHook(new Thread(runOnExit));
Module module = new DefaultModule();
Injector injector = Guice.createInjector(module);
TwitterSearch tws = injector.getInstance(TwitterSearch.class);
ElasticTweetSearch tweetSearch = injector.getInstance(ElasticTweetSearch.class);
// WAIT UNTIL AVAILABLE
tweetSearch.waitUntilAvailable(10000);
ElasticUserSearch userSearch = injector.getInstance(ElasticUserSearch.class);
ElasticTagSearch tagSearch = injector.getInstance(ElasticTagSearch.class);
Configuration cfg = injector.getInstance(Configuration.class);
// 1. every producer has a separate queue (with a different capacity) to feed TweetConsumer:
// TProd1 -- queue1 --\
// TProd2 -- queue2 ---> TweetConsumer
// ...
// 2. TweetConsumer polls N elements from every queue and feeds the results
// into the resolver - see GenericUrlResolver.
// 4. Via ElasticTweetSearch:s commit listener the URL:s of tweets will be
// resolved -
// For every URL an article is created and feeded into the article index
TweetConsumer twConsumer = injector.getInstance(TweetConsumer.class);
twConsumer.setUncaughtExceptionHandler(excHandler);
GenericUrlResolver resolver = injector.getInstance(GenericUrlResolver.class);
resolver.start();
int queueCapacity = cfg.getUrlResolverInputQueueSize();
// feeding consumer via twitter search (or offline fake)
TweetProducer twProducer = injector.getInstance(TweetProducer.class);
twProducer.setTwitterSearch(tws);
twProducer.setUserSearch(userSearch);
twProducer.setTagSearch(tagSearch);
twProducer.setQueue(twConsumer.register("producer-search", queueCapacity, 100));
// feeding consumer via twitter keyword stream (gets keywords from tagindex)
TweetProducerViaStream producerViaStream = injector.getInstance(TweetProducerViaStream.class);
producerViaStream.setQueue(twConsumer.register("producer-stream", queueCapacity, 120));
producerViaStream.setTwitterSearch(tws);
producerViaStream.setTagSearch(tagSearch);
producerViaStream.setUncaughtExceptionHandler(excHandler);
producerViaStream.setTweetsPerSecLimit(cfg.getTweetsPerSecLimit());
// feeding consumer from tweets of friends (of registered users)
TweetProducerViaUsers producerFromFriends = injector.getInstance(TweetProducerViaUsers.class);
producerFromFriends.setQueue(twConsumer.register("producer-friends", queueCapacity, 100));
producerFromFriends.setTwitterSearch(tws);
producerFromFriends.setUserSearch(userSearch);
producerFromFriends.setUncaughtExceptionHandler(excHandler);
// feeding consumer from UI
RMIServer rmiServer = injector.getInstance(RMIServer.class);
rmiServer.setQueue(twConsumer.register("producer-rmi", queueCapacity, 20));
Thread rmiServerThread = rmiServer.createThread();
// configure tweet index to call UrlResolver after feeding of a tweet
tweetSearch.setRemoveOlderThanDays(cfg.getTweetSearchRemoveDays());
tweetSearch.setBatchSize(cfg.getTweetSearchBatch());
Thread twProducerThread = new Thread(twProducer, "tweet-producer");
twProducerThread.setUncaughtExceptionHandler(excHandler);
twProducerThread.start();
rmiServerThread.start();
twConsumer.start();
producerFromFriends.start();
if (cfg.isStreamEnabled())
producerViaStream.start();
// ## JOIN
twProducerThread.join();
if (cfg.isStreamEnabled())
producerViaStream.interrupt();
producerFromFriends.interrupt();
twConsumer.interrupt();
rmiServerThread.interrupt();
}
}