Package de.jetwick.tw

Source Code of de.jetwick.tw.TweetCollector

/**
* Copyright (C) 2010 Peter Karich <jetwick_@_pannous_._info>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*         http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.jetwick.tw;

import com.google.inject.Guice;
import com.google.inject.Injector;
import com.google.inject.Module;
import de.jetwick.config.Configuration;
import de.jetwick.config.DefaultModule;
import de.jetwick.es.ElasticTagSearch;
import de.jetwick.es.ElasticTweetSearch;
import de.jetwick.es.ElasticUserSearch;
import de.jetwick.rmi.RMIServer;
import de.jetwick.util.GenericUrlResolver;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* @author Peter Karich, peat_hal 'at' users 'dot' sourceforge 'dot' net
*/
public class TweetCollector {

    // twClient.getTrend() ...  20 tweets per min
    // RT                  ... 100 tweets per sec (as of 9.5.2010)
//    public static List<String> DEFAULT_ST = Arrays.asList("RT",
//            "java", "algorithm", "solr",
//            "lucene", "netbeans", "db4o", "java", "javascript", "javafx", "dzone",
//            "oracle", "open source", "google", "obama",
//            "wicket", "wikileaks", "world cup", "news");
    private static Logger logger = LoggerFactory.getLogger(TweetCollector.class);
    private static Thread.UncaughtExceptionHandler excHandler = new Thread.UncaughtExceptionHandler() {

        @Override
        public void uncaughtException(Thread t, Throwable e) {
            logger.error("Thread '" + t.getName() + "' was aborted!", e);
            throw new UnsupportedOperationException("Not supported yet.");
        }
    };

    public static void main(String[] args) throws InterruptedException {
        Runnable runOnExit = new Runnable() {

            @Override
            public void run() {
                logger.info("Finished via Shutdown hook!");
            }
        };

        Runtime.getRuntime().addShutdownHook(new Thread(runOnExit));

        Module module = new DefaultModule();
        Injector injector = Guice.createInjector(module);
        TwitterSearch tws = injector.getInstance(TwitterSearch.class);
        ElasticTweetSearch tweetSearch = injector.getInstance(ElasticTweetSearch.class);

        // WAIT UNTIL AVAILABLE
        tweetSearch.waitUntilAvailable(10000);
       
        ElasticUserSearch userSearch = injector.getInstance(ElasticUserSearch.class);
        ElasticTagSearch tagSearch = injector.getInstance(ElasticTagSearch.class);
        Configuration cfg = injector.getInstance(Configuration.class);

        // 1. every producer has a separate queue (with a different capacity) to feed TweetConsumer:
        //      TProd1 -- queue1 --\
        //      TProd2 -- queue2 ---> TweetConsumer
        //      ...
       
        // 2. TweetConsumer polls N elements from every queue and feeds the results
        //    into the resolver - see GenericUrlResolver.
       
        // 4. Via ElasticTweetSearch:s commit listener the URL:s of tweets will be       
        //    resolved -
        //    For every URL an article is created and feeded into the article index

        TweetConsumer twConsumer = injector.getInstance(TweetConsumer.class);
        twConsumer.setUncaughtExceptionHandler(excHandler);

        GenericUrlResolver resolver = injector.getInstance(GenericUrlResolver.class);       
        resolver.start();
        int queueCapacity = cfg.getUrlResolverInputQueueSize();
        // feeding consumer via twitter search (or offline fake)
        TweetProducer twProducer = injector.getInstance(TweetProducer.class);               
        twProducer.setTwitterSearch(tws);
        twProducer.setUserSearch(userSearch);
        twProducer.setTagSearch(tagSearch);
        twProducer.setQueue(twConsumer.register("producer-search", queueCapacity, 100));

        // feeding consumer via twitter keyword stream (gets keywords from tagindex)
        TweetProducerViaStream producerViaStream = injector.getInstance(TweetProducerViaStream.class);
        producerViaStream.setQueue(twConsumer.register("producer-stream", queueCapacity, 120));
        producerViaStream.setTwitterSearch(tws);
        producerViaStream.setTagSearch(tagSearch);
        producerViaStream.setUncaughtExceptionHandler(excHandler);
        producerViaStream.setTweetsPerSecLimit(cfg.getTweetsPerSecLimit());

        // feeding consumer from tweets of friends (of registered users)
        TweetProducerViaUsers producerFromFriends = injector.getInstance(TweetProducerViaUsers.class);
        producerFromFriends.setQueue(twConsumer.register("producer-friends", queueCapacity, 100));
        producerFromFriends.setTwitterSearch(tws);
        producerFromFriends.setUserSearch(userSearch);
        producerFromFriends.setUncaughtExceptionHandler(excHandler);

        // feeding consumer from UI       
        RMIServer rmiServer = injector.getInstance(RMIServer.class);
        rmiServer.setQueue(twConsumer.register("producer-rmi", queueCapacity, 20));
        Thread rmiServerThread = rmiServer.createThread();
 
        // configure tweet index to call UrlResolver after feeding of a tweet       
        tweetSearch.setRemoveOlderThanDays(cfg.getTweetSearchRemoveDays());
        tweetSearch.setBatchSize(cfg.getTweetSearchBatch());               

        Thread twProducerThread = new Thread(twProducer, "tweet-producer");
        twProducerThread.setUncaughtExceptionHandler(excHandler);
        twProducerThread.start();

        rmiServerThread.start();
        twConsumer.start();
        producerFromFriends.start();
        if (cfg.isStreamEnabled())
            producerViaStream.start();

        // ## JOIN
        twProducerThread.join();

        if (cfg.isStreamEnabled())
            producerViaStream.interrupt();

        producerFromFriends.interrupt();
        twConsumer.interrupt();
        rmiServerThread.interrupt();       
    }
}
TOP

Related Classes of de.jetwick.tw.TweetCollector

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.