/**
* Copyright (C) 2010 Peter Karich <jetwick_@_pannous_._info>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.jetwick.tw;
import de.jetwick.data.JTag;
import de.jetwick.es.ElasticUserSearch;
import de.jetwick.data.JTweet;
import de.jetwick.data.JUser;
import de.jetwick.es.ElasticTagSearch;
import de.jetwick.es.JetwickQuery;
import de.jetwick.util.AnyExecutor;
import de.jetwick.util.Helper;
import de.jetwick.util.MyDate;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import twitter4j.TwitterException;
/**
* fills the tweets queue via twitter searchAndGetUsers (does not cost API calls)
*
* @author Peter Karich, peat_hal 'at' users 'dot' sourceforge 'dot' net
*/
public class TweetProducerViaSearch extends MyThread implements TweetProducer {
private final Logger logger = LoggerFactory.getLogger(getClass());
protected BlockingQueue<JTweet> resultTweets = new LinkedBlockingQueue<JTweet>();
private PriorityQueue<JTag> tags = new PriorityQueue<JTag>();
protected TwitterSearch twSearch;
protected ElasticTagSearch tagSearch;
protected ElasticUserSearch userSearch;
public TweetProducerViaSearch() {
super("tweet-producer-search");
}
@Override
public void setQueue(BlockingQueue<JTweet> packages) {
this.resultTweets = packages;
}
public BlockingQueue<JTweet> getQueue() {
return resultTweets;
}
@Override
public void run() {
long findNewTagsTime = -1;
Collection<JTweet> tmpColl = new ArrayList<JTweet>(500);
while (!isInterrupted()) {
if (tags.isEmpty()) {
initTags();
if (tags.isEmpty()) {
logger.warn("No tags found in db! Either add some via script ./utils/es-import-tags.sh "
+ "or track a keyword with rss button when logged in");
break;
}
if (findNewTagsTime > 0 && System.currentTimeMillis() - findNewTagsTime < 2000) {
// wait 2 to 60 seconds. depends on the demand
int sec = Math.max(2, (int) tags.peek().getWaitingSeconds() + 1);
logger.info("all tags are pausing. wait " + sec + " seconds ");
myWait(sec);
}
findNewTagsTime = System.currentTimeMillis();
}
JTag tag = tags.poll();
long lastMillis = tag.getLastMillis();
if (tag != null && tag.nextQuery()) {
String term = tag.getTerm();
if (term == null) {
// TODO use user search later on
logger.warn("TODO skipping tags with empty terms for now:" + tag);
continue;
}
if (term.isEmpty() || JetwickQuery.containsForbiddenChars(term))
continue;
float waitInSeconds = 1f;
try {
int pages = tag.getPages();
tmpColl.clear();
long newMaxCreateTime = twSearch.search(term + " " + TwitterSearch.LINK_FILTER, tmpColl, pages * 100, 0);
// calc tweets per sec with 'floating mean'
double lastTweetsPerSec = tag.getTweetsPerSec();
int newTweets = guessNewTweets(tmpColl, tag.getMaxCreateTime());
lastTweetsPerSec = lastTweetsPerSec + newTweets / ((System.currentTimeMillis() - lastMillis) / 1000.0);
tag.setTweetsPerSec(lastTweetsPerSec / 2);
tag.setMaxCreateTime(newMaxCreateTime);
logger.info("searched: " + tag + "\t=> tweets:" + tmpColl.size() + "\t newTweets:" + newTweets);
for (JTweet tw : tmpColl) {
try {
resultTweets.put(tw.setFeedSource("search:" + term));
} catch (InterruptedException ex) {
logger.error("Cannot put article into queue:" + tw + " " + ex.getMessage());
break;
}
}
// resultTweets.add(new JTweet(123, "something http://t.co/BVDTqCO", new JUser("timetabling")));
updateTag(tag, tmpColl.size());
} catch (TwitterException ex) {
waitInSeconds = 3f;
logger.warn("Couldn't finish search for tag '" + term + "': " + Helper.getMsg(ex));
if (ex.exceededRateLimitation())
waitInSeconds = ex.getRetryAfter();
}
if (!myWait(waitInSeconds))
break;
}
}
logger.info(getName() + " finished");
}
@Override
public void setTwitterSearch(TwitterSearch tws) {
this.twSearch = tws;
}
public void updateTag(JTag tag, int hits) {
tag.optimizeQueryFrequency(hits);
tagSearch.queueObject(tag);
}
private long lastDelete = -1;
private int hours = 3;
Collection<JTag> initTags() {
Map<String, JTag> tmpTags = new LinkedHashMap<String, JTag>();
try {
for (JTag tag : tagSearch.findSorted(0, 1000)) {
tmpTags.put(tag.getTerm(), tag);
}
long start = System.currentTimeMillis();
if (lastDelete < 0 || start > lastDelete + hours * MyDate.ONE_HOUR) {
logger.info("Delete tags older than " + hours + " hours");
tagSearch.deleteOlderThan(hours);
lastDelete = start;
tagSearch.refresh();
}
} catch (Exception ex) {
logger.info("Couldn't query tag index", ex);
}
try {
final Collection<String> userQueryTerms = userSearch.getQueryTerms();
// TODO execute in separate thread but separate tags by 'OR'
userSearch.executeForAll(new AnyExecutor<JUser>() {
@Override
public JUser execute(JUser u) {
userQueryTerms.addAll(u.getTopics());
return u;
}
}, 1000);
int counter = 0;
for (String termAsStr : userQueryTerms) {
termAsStr = JTag.toLowerCaseOnlyOnTerms(termAsStr).trim();
if (Helper.isEmpty(termAsStr) || JetwickQuery.containsForbiddenChars(termAsStr))
continue;
for (String tmpTerm : termAsStr.split(" OR ")) {
if (Helper.isEmpty(termAsStr) || JetwickQuery.containsForbiddenChars(termAsStr))
continue;
JTag tag = tmpTags.get(tmpTerm);
if (tag == null) {
tag = tagSearch.findByTerm(tmpTerm);
if (tag == null)
tag = new JTag(tmpTerm);
tmpTags.put(tmpTerm, tag);
counter++;
}
}
}
logger.info("Will add query terms " + counter + " of " + userQueryTerms);
} catch (Exception ex) {
logger.error("Couldn't query user index to feed tweet index with user queries:" + Helper.getMsg(ex));
}
tags.clear();
tags.addAll(tmpTags.values());
logger.info("Using " + tags.size() + " tags. first tag is: " + tags.peek());
return tags;
}
@Override
public void setUserSearch(ElasticUserSearch userSearch) {
this.userSearch = userSearch;
}
@Override
public void setTagSearch(ElasticTagSearch tagSearch) {
this.tagSearch = tagSearch;
}
public int guessNewTweets(Collection<JTweet> tweets, long maxTime) {
int counter = 0;
for (JTweet tw : tweets) {
if (tw.getCreatedAt().getTime() > maxTime - 1000)
counter++;
}
// the problem araise when we have a lot of tags which are waiting too long
if (counter > 98)
return 200;
return counter;
}
}