Package de.jetwick.tw.cmd

Examples of de.jetwick.tw.cmd.TermCreateCommand


        list = twitterSearch.getTweets(new JUser("berniecezee2573"), 100);
        for (JTweet tw : list) {
            for (UrlEntry entry : new FakeUrlExtractor().setText(tw.getText()).run().getUrlEntries()) {
                tw.addUrlEntry(entry);
            }
            JTweet tw2 = new TermCreateCommand().execute(tw);
            System.out.println(tw2.getQuality() + " " + tw2.getQualDebug() + " " + tw2.getText());
        }
    }
View Full Code Here


        super(facets);
        this.tweet = tweet;
        if (this.tweet == null)
            throw new IllegalArgumentException("Tweet cannot be null");

        new TermCreateCommand().calcTermsWithoutNoise(tweet);
        getFilterQueries().clear();
        addFilterQuery(ElasticTweetSearch.IS_RT, false);
    }
View Full Code Here

        try {
            if (tweets.isEmpty())
                return;

            tweets = new SerialCommandExecutor(tweets).add(
                    new TermCreateCommand().setSw1(sw1).setSw2(sw2).setSw3(sw3).setSw4(sw4)).execute();

            List<JTweet> list = new ArrayList<JTweet>(tweets);
            Collection<Integer> failedArticleIndices = bulkUpdate(list, getIndexName());
            for (Integer integ : failedArticleIndices) {
                JTweet tw = list.get(integ);
View Full Code Here

        return sb.toString();
    }

    public Collection<JTweet> findDuplicates(Map<Long, JTweet> tweets) {
        final Set<JTweet> updatedTweets = new LinkedHashSet<JTweet>();
        TermCreateCommand termCommand = new TermCreateCommand();
        double JACC_BORDER = 0.7;
        for (JTweet currentTweet : tweets.values()) {
            if (currentTweet.isRetweet())
                continue;

            JetwickQuery reqBuilder = new SimilarTweetQuery(currentTweet, false).addLatestDateFilter(24);
            if (currentTweet.getTextTerms().size() < 3)
                continue;

            int dups = 0;
            try {
                // find dups in index
                for (JTweet simTweet : collectObjects(query(reqBuilder))) {
                    if (simTweet.getTwitterId().equals(currentTweet.getTwitterId()))
                        continue;

                    termCommand.calcTermsWithoutNoise(simTweet);
                    if (TermCreateCommand.calcJaccardIndex(currentTweet.getTextTerms(), simTweet.getTextTerms())
                            >= JACC_BORDER) {
                        currentTweet.addDuplicate(simTweet.getTwitterId());
                        dups++;
                    }
                }
            } catch (Exception ex) {
                logger.error("Error while findDuplicate query execution", ex);
            }

            // find dups in tweets map
            for (JTweet simTweet : tweets.values()) {
                if (simTweet.getTwitterId().equals(currentTweet.getTwitterId()) || simTweet.isRetweet())
                    continue;

                if (currentTweet.getCreatedAt().getTime() < simTweet.getCreatedAt().getTime())
                    continue;

                termCommand.calcTermsWithoutNoise(simTweet);
                if (TermCreateCommand.calcJaccardIndex(currentTweet.getTextTerms(), simTweet.getTextTerms())
                        >= JACC_BORDER) {
                    currentTweet.addDuplicate(simTweet.getTwitterId());
                    dups++;
                }
View Full Code Here

TOP

Related Classes of de.jetwick.tw.cmd.TermCreateCommand

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.