Package de.jetwick.util

Source Code of de.jetwick.util.Util

/**
* Copyright (C) 2010 Peter Karich <jetwick_@_pannous_._info>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*         http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.jetwick.util;

import com.google.inject.Guice;
import com.google.inject.Inject;
import com.google.inject.Injector;
import com.google.inject.Module;
import com.google.inject.Provider;
import de.jetwick.config.Configuration;
import de.jetwick.config.DefaultModule;
import de.jetwick.data.DbObject;
import de.jetwick.es.ElasticTweetSearch;
import de.jetwick.es.ElasticUserSearch;
import de.jetwick.rmi.RMIClient;
import de.jetwick.es.JetwickQuery;
import de.jetwick.data.JUser;
import de.jetwick.es.AbstractElasticSearch;
import de.jetwick.es.CreateObjectsInterface;
import de.jetwick.es.TweetQuery;
import de.jetwick.tw.Credits;
import de.jetwick.tw.MyTweetGrabber;
import de.jetwick.tw.TwitterSearch;
import de.jetwick.tw.queue.QueueThread;
import java.util.Arrays;
import java.util.Date;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.search.facet.terms.TermsFacet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
*
* @author Peter Karich, peat_hal 'at' users 'dot' sourceforge 'dot' net
*/
public class Util {

    private static Logger logger = LoggerFactory.getLogger(Util.class);
    @Inject
    private ElasticUserSearch userSearch;
    @Inject
    private ElasticTweetSearch tweetSearch;
    private int userCounter;
    private Configuration config = new Configuration();

    public static void main(String[] args) {
        Map<String, String> map = Helper.parseArguments(args);

        Util util = new Util();
        String argStr = "";
        if (!Helper.isEmpty(map.get("deleteAll"))) {
            util.deleteAll();
            return;
        }

        argStr = map.get("fillFrom");
        if (!Helper.isEmpty(argStr)) {
            String fromUrl = argStr;
            util.fillFrom(fromUrl);
            return;
        }

        argStr = map.get("clearUserTokens");
        if (!Helper.isEmpty(argStr)) {
            String newUserIndexName = argStr;
            util.clearUserTokens(newUserIndexName);
            return;
        }

        if (!Helper.isEmpty(map.get("copyStaticTweets"))) {
            util.copyStaticTweets();
            return;
        }

        argStr = map.get("showFollowers");
        if (!Helper.isEmpty(argStr)) {
            String user = argStr;
            util.showFollowers(user);
            return;
        }

        if (!Helper.isEmpty(map.get("optimize"))) {
            util.optimize();
            return;
        }

        int hitsPerPage = 10000;
        try {
            hitsPerPage = Integer.parseInt(map.get("hitsPerPage"));
        } catch (Exception ex) {
        }

        // copyUserIndex=newtwindex
        argStr = map.get("copyUserIndex");
        if (!Helper.isEmpty(argStr)) {
            String newIndex = argStr;
            util.copyUserIndex(newIndex, hitsPerPage);
            return;
        }

        // copyUserIndex=newtwindex
        argStr = map.get("copyTweetIndex");
        if (!Helper.isEmpty(argStr)) {
            String newIndex = argStr;
            util.copyTweetIndex(newIndex, hitsPerPage);
            return;
        }

        argStr = map.get("removeIndexAndAddAlias");
        if (!Helper.isEmpty(argStr)) {
            logger.warn("use the simple curl script");
            return;
        }

        argStr = map.get("clearFriendsUpdate");
        if (!Helper.isEmpty(argStr)) {
            String user = argStr;
            util.clearFriendsUpdate(user);
            return;
        }
    }

    public Util() {
        Module module = new DefaultModule();
        Guice.createInjector(module).injectMembers(this);
    }

    public void deleteAll() {
        // why don't we need to set? query.setQueryType("simple")
        userSearch.deleteAll();
        userSearch.refresh();
        logger.info("Successfully finished deleteAll");
    }

    private void copyStaticTweets() {
        Module module = new DefaultModule();
        Injector injector = Guice.createInjector(module);
        Provider<RMIClient> rmiProvider = injector.getProvider(RMIClient.class);
        Configuration cfg = injector.getInstance(Configuration.class);
        TwitterSearch twSearch = injector.getInstance(TwitterSearch.class);
        twSearch.initTwitter4JInstance(cfg.getTwitterSearchCredits().getToken(),
                cfg.getTwitterSearchCredits().getTokenSecret(), true);
        ElasticTweetSearch fromUserSearch = new ElasticTweetSearch(injector.getInstance(Configuration.class));
        JetwickQuery query = new TweetQuery().addFilterQuery(ElasticTweetSearch.UPDATE_DT, "[* TO *]");
        // TODO set facetlimit to 2000
        query.addFacetField("user").setSize(0);
        SearchResponse rsp = fromUserSearch.query(query);

        TermsFacet tf = (TermsFacet) rsp.getFacets().facet("user");
        logger.info("found: " + tf.entries().size() + " users with the specified criteria");
        int SLEEP = 30;
        int counter = 0;
        for (TermsFacet.Entry tmpUser : tf.entries()) {
            if (tmpUser.getCount() < 20)
                break;

            while (twSearch.getRateLimit() <= 3) {
                try {
                    logger.info("sleeping " + SLEEP + " seconds to avoid ratelimit violation");
                    Thread.sleep(1000 * SLEEP);
                } catch (InterruptedException ex) {
                    throw new IllegalStateException(ex);
                }
            }

            logger.info(counter++ + "> feed pipe from " + tmpUser.getTerm() + " with " + tmpUser.getCount() + " tweets");

            MaxBoundSet boundSet = new MaxBoundSet<String>(0, 0);
            // try updating can fail so try max 3 times
            for (int trial = 0; trial < 3; trial++) {
                MyTweetGrabber grabber = new MyTweetGrabber().setMyBoundSet(boundSet).
                        init(null, null, tmpUser.getTerm()).setTweetsCount((int) tmpUser.getCount()).
                        setRmiClient(rmiProvider).setTwitterSearch(twSearch);
                QueueThread pkg = grabber.queueTweetPackage();
                Thread t = new Thread(pkg);
                t.start();
                try {
                    t.join();
                    if (pkg.getException() == null)
                        break;

                    logger.warn(trial + "> Try again feeding of user " + tmpUser.getTerm() + " for tweet package " + pkg);
                } catch (InterruptedException ex) {
                    logger.warn("interrupted", ex);
                    break;
                }
            }
        }

        // TODO send via RMI
    }

    public void fillFrom(final String fromUrl) {
        ElasticTweetSearch fromTweetSearch = new ElasticTweetSearch(fromUrl);
        JetwickQuery query = new TweetQuery();
        long maxPage = 1;
        int hitsPerPage = 300;
        Set<JUser> users = new LinkedHashSet<JUser>();
        Runnable optimizeOnExit = new Runnable() {

            @Override
            public void run() {
                userSearch.refresh();
                logger.info(userCounter + " users pushed to default tweet search from " + fromUrl);
            }
        };
        Runtime.getRuntime().addShutdownHook(new Thread(optimizeOnExit));

        for (int page = 0; page < maxPage; page++) {
            query.attachPagability(page, hitsPerPage);
            users.clear();

            SearchResponse rsp;
            try {
                rsp = fromTweetSearch.query(users, query);
            } catch (Exception ex) {
                logger.warn("Error while searching!", ex);
                continue;
            }
            if (maxPage == 1) {
                maxPage = rsp.getHits().getTotalHits() / hitsPerPage + 1;
                logger.info("Paging though query:" + query.toString());
                logger.info("Set numFound to " + rsp.getHits().getTotalHits());
            }

            for (JUser user : users) {
                userSearch.save(user, false);
            }
            userCounter += users.size();
            logger.info("Page " + page + " out of " + maxPage + " hitsPerPage:" + hitsPerPage);

            if (page * hitsPerPage % 100000 == 0) {
                logger.info("Commit ...");
                userSearch.refresh();
            }
        }
    }

    public void showFollowers(String user) {
//        ElasticUserSearch uSearch = createUserSearch();
//        Set<SolrUser> jetwickUsers = new LinkedHashSet<SolrUser>();
//        uSearch.search(jetwickUsers, new SolrQuery().setRows(10000));
        final Set<String> set = new TreeSet<String>();
//        for (SolrUser u : jetwickUsers) {
//            set.add(u.getScreenName());
//        }
        Credits credits = config.getTwitterSearchCredits();
        TwitterSearch tw4j = new TwitterSearch().setConsumer(credits.getConsumerKey(), credits.getConsumerSecret());
        tw4j.initTwitter4JInstance(credits.getToken(), credits.getTokenSecret(), true);
        tw4j.getFollowers(user, new AnyExecutor<JUser>() {

            @Override
            public JUser execute(JUser o) {
//                if (set.contains(o.getScreenName()))
                set.add(o.getScreenName());
                return null;
            }
        });
        for (String u : set) {
            System.out.println(u);
        }
    }

    public void optimize() {
        tweetSearch.optimize();
    }

    public <T extends DbObject> void copyIndex(String newIndex, AbstractElasticSearch<T> search, int hitsPerPage) {
        try {
            logger.info("Old index has totalhits:" + search.countAll());
            if (!search.indexExists(newIndex)) {
                logger.info("New Index '" + newIndex + "' does not exist! create it before copy!");
                return;
            }

            logger.info("Now copy from " + search.getIndexName() + " to " + newIndex);
            search.mergeIndices(Arrays.asList(search.getIndexName()), newIndex, hitsPerPage, true, search, null);

            search.setIndexName(newIndex);
            logger.info("New index has totalhits:" + search.countAll() + " Now optimize ...");
            search.optimize();
        } catch (Exception ex) {
            logger.error("Exception while copyIndex", ex);
        }
    }

    public void clearUserTokens(String newIndex) {
        try {
            logger.info("Old index has totalhits:" + userSearch.countAll());
            if (!userSearch.indexExists(newIndex)) {
                logger.info("New Index '" + newIndex + "' does not exist! create it before copy!");
                return;
            }

            logger.info("Now copy from " + userSearch.getIndexName() + " to " + newIndex + " and clear user tokens");
            userSearch.mergeIndices(Arrays.asList(userSearch.getIndexName()), newIndex, 10000, true,
                    new CreateObjectsInterface<JUser>() {

                        @Override
                        public List<JUser> collectObjects(SearchResponse rsp) {
                            List<JUser> users = userSearch.collectObjects(rsp);
                            for (JUser u : users) {
                                u.setTwitterToken(null);
                                u.setTwitterTokenSecret(null);
                            }
                            return users;
                        }
                    }, null);

            userSearch.setIndexName(newIndex);
            logger.info("New index has totalhits:" + userSearch.countAll() + " Now optimize ...");
            userSearch.optimize();
        } catch (Exception ex) {
            logger.error("Exception while copyIndex", ex);
        }
    }

    public void clearFriendsUpdate(String userStr) {
        JUser user = userSearch.findByScreenName(userStr);
        user.setLastFriendsUpdate(new Date(0));
        userSearch.save(user, true);
        logger.info("stored user:" + user + " collector should update friends immediately");
    }

    private void copyUserIndex(String newIndex, int hitsPerPage) {
        copyIndex(newIndex, userSearch, hitsPerPage);
    }

    private void copyTweetIndex(String newIndex, int hitsPerPage) {
        copyIndex(newIndex, tweetSearch, hitsPerPage);
    }
}
TOP

Related Classes of de.jetwick.util.Util

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.