/**
* Copyright (C) 2010 Peter Karich <jetwick_@_pannous_._info>
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package de.jetwick.tw;
import com.google.inject.Inject;
import de.jetwick.data.JTweet;
import de.jetwick.data.UrlEntry;
import de.jetwick.snacktory.JResult;
import de.jetwick.util.GenericUrlResolver;
import de.jetwick.util.StopWatch;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import org.elasticsearch.common.cache.CacheBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* stores the tweets from the queue into the dbHelper and solr
*
* @author Peter Karich, peat_hal 'at' users 'dot' sourceforge 'dot' net
*/
public class TweetConsumer extends Thread {
private final Logger logger = LoggerFactory.getLogger(getClass());
private List<QueueInfo<JTweet>> inputQueues = new ArrayList<QueueInfo<JTweet>>();
@Inject
protected GenericUrlResolver resolver;
private Map<Long, Object> tweetCache;
private static final Object OBJECT = new Object();
private UrlExtractor urlExtractor;
public TweetConsumer() {
super("tweet-consumer");
}
public GenericUrlResolver getResolver() {
return resolver;
}
@Override
public void run() {
initTweetCache();
urlExtractor = new UrlExtractor() {
@Override
public JResult getInfo(String originalUrl, int timeout) throws Exception {
JResult res = UrlEntry.createSimpleResult(originalUrl);
return res;
}
};
int counter = 0;
StopWatch sw = new StopWatch();
while (true) {
counter++;
sw.start();
int feeded = executeOneBatch();
sw.stop();
if (feeded < 10) {
try {
Thread.sleep(400);
} catch (InterruptedException ex) {
logger.error(getName() + " interrupted while sleeping: " + ex.getLocalizedMessage());
break;
}
}
// print stats
if (counter % 1000 == 0) {
logger.info("time of polling:\t" + sw.getSeconds());
sw = new StopWatch();
logger.info("tweetCache size:\t" + tweetCache.size());
logger.info("tweetTodo size:\t" + resolver.getInputQueue().size());
for (QueueInfo<JTweet> qi : inputQueues) {
logger.info(qi.toString());
}
}
}
logger.warn(getName() + " finished");
}
public void setResolver(GenericUrlResolver resolver) {
this.resolver = resolver;
}
/**
* @param queueName the identifier of the input queue
* @param capacity the number of elements which should fit into the input
* queue. This should be at least twice times bigger than batchSize.
* @param batchSize the number of elements to feed at once into main output
* queue.
* @return the newly registered queue
*/
public BlockingQueue<JTweet> register(String queueName, int capacity, int batchSize) {
BlockingQueue q = new LinkedBlockingQueue<JTweet>(capacity);
QueueInfo qInfo = new QueueInfo(queueName, q);
for (QueueInfo<JTweet> qi : inputQueues) {
if (qi.getName().equals(queueName))
throw new IllegalStateException("cannot register queue. Queue " + queueName + " already exists");
}
qInfo.setBatchSize(batchSize);
inputQueues.add(qInfo);
int sum = 0;
for (QueueInfo<JTweet> qi : inputQueues) {
sum += qi.getBatchSize();
}
int mainCapacity = resolver.getInputQueue().remainingCapacity() + resolver.getInputQueue().size();
if (sum * 2 > mainCapacity)
throw new IllegalStateException("cannot register queue " + queueName + " because it"
+ " would increas capacity of all input queues too much (" + sum + ") and "
+ " can block main queue too often, where the capacity is only:" + mainCapacity);
return qInfo.getQueue();
}
public int executeOneBatch() {
int feeded = 0;
for (QueueInfo<JTweet> qi : inputQueues) {
int batchSize = qi.getBatchSize();
Queue<JTweet> queue = qi.getQueue();
int newTweets = 0;
for (; newTweets < batchSize; newTweets++) {
JTweet tw = queue.poll();
if (tw == null)
break;
if (!tw.isPersistent() && tweetCache != null && tweetCache.put(tw.getTwitterId(), OBJECT) != null) {
newTweets--;
continue;
}
if (urlExtractor != null) {
for (UrlEntry ue : ((UrlExtractor) urlExtractor.setTweet(tw).run()).getUrlEntries()) {
tw.addUrlEntry(ue);
}
}
feeded++;
resolver.queueObject(tw);
}
}
return feeded;
}
public void initTweetCache() {
if (tweetCache == null)
tweetCache = GenericUrlResolver.createGenericCache(50000, 6 * 60);
}
public static class QueueInfo<JTweet> {
private final String name;
private long lastMeasureTime = System.currentTimeMillis();
private final BlockingQueue<JTweet> queue;
private int batchSize = 200;
private int outputCount;
private float outputFrequency;
public QueueInfo(String name, BlockingQueue<JTweet> queue) {
this.name = name;
this.queue = queue;
}
public BlockingQueue<JTweet> getQueue() {
return queue;
}
public String getName() {
return name;
}
public int getBatchSize() {
return batchSize;
}
public void setBatchSize(int batchSize) {
this.batchSize = batchSize;
}
public void setOutputFrequency(float outputFrequency) {
this.outputFrequency = outputFrequency;
}
public float getOutputFrequency() {
return outputFrequency;
}
public void setLastMeasureTime(long lastMeasureTime) {
this.lastMeasureTime = lastMeasureTime;
}
public long getLastMeasureTime() {
return lastMeasureTime;
}
public int getOutputCount() {
return outputCount;
}
public void setOutputCount(int outputCount) {
this.outputCount = outputCount;
}
@Override
public String toString() {
return getName() + "\t size:" + getQueue().size() + "\t count:" + outputCount + "\t oFreq.:" + getOutputFrequency();
}
}
}