{
log.info("Cleanup_twitter_stream");
}
}; // ConnectionLifeCycleListener
final TwitterStream twitterStream;
final StatusListener statusListener;
final int QUEUE_SIZE = 2000;
/** This queue is used to move twitter events from the twitter4j thread to the druid ingest thread. */
final BlockingQueue<Status> queue = new ArrayBlockingQueue<Status>(QUEUE_SIZE);
final LinkedList<String> dimensions = new LinkedList<String>();
final long startMsec = System.currentTimeMillis();
dimensions.add("htags");
dimensions.add("lang");
dimensions.add("utc_offset");
//
// set up Twitter Spritzer
//
twitterStream = new TwitterStreamFactory().getInstance();
twitterStream.addConnectionLifeCycleListener(connectionLifeCycleListener);
statusListener = new StatusListener() { // This is what really gets called to deliver stuff from twitter4j
@Override
public void onStatus(Status status)
{
// time to stop?
if (Thread.currentThread().isInterrupted()) {
throw new RuntimeException("Interrupted, time to stop");
}
try {
boolean success = queue.offer(status, 15L, TimeUnit.SECONDS);
if (!success) {
log.warn("queue too slow!");
}
} catch (InterruptedException e) {
throw new RuntimeException("InterruptedException", e);
}
}
@Override
public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice)
{
//log.info("Got a status deletion notice id:" + statusDeletionNotice.getStatusId());
}
@Override
public void onTrackLimitationNotice(int numberOfLimitedStatuses)
{
// This notice will be sent each time a limited stream becomes unlimited.
// If this number is high and or rapidly increasing, it is an indication that your predicate is too broad, and you should consider a predicate with higher selectivity.
log.warn("Got track limitation notice:" + numberOfLimitedStatuses);
}
@Override
public void onScrubGeo(long userId, long upToStatusId)
{
//log.info("Got scrub_geo event userId:" + userId + " upToStatusId:" + upToStatusId);
}
@Override
public void onException(Exception ex)
{
ex.printStackTrace();
}
@Override
public void onStallWarning(StallWarning warning) {
System.out.println("Got stall warning:" + warning);
}
};
twitterStream.addListener(statusListener);
twitterStream.sample(); // creates a generic StatusStream
log.info("returned from sample()");
return new Firehose() {
private final Runnable doNothingRunnable = new Runnable() {
public void run()
{
}
};
private long rowCount = 0L;
private boolean waitIfmax = (maxEventCount < 0L);
private final Map<String, Object> theMap = new HashMap<String, Object>(2);
// DIY json parsing // private final ObjectMapper omapper = new ObjectMapper();
private boolean maxTimeReached()
{
if (maxRunMinutes <= 0) {
return false;
} else {
return (System.currentTimeMillis() - startMsec) / 60000L >= maxRunMinutes;
}
}
private boolean maxCountReached()
{
return maxEventCount >= 0 && rowCount >= maxEventCount;
}
@Override
public boolean hasMore()
{
if (maxCountReached() || maxTimeReached()) {
return waitIfmax;
} else {
return true;
}
}
@Override
public InputRow nextRow()
{
// Interrupted to stop?
if (Thread.currentThread().isInterrupted()) {
throw new RuntimeException("Interrupted, time to stop");
}
// all done?
if (maxCountReached() || maxTimeReached()) {
if (waitIfmax) {
// sleep a long time instead of terminating
try {
log.info("reached limit, sleeping a long time...");
sleep(2000000000L);
} catch (InterruptedException e) {
throw new RuntimeException("InterruptedException", e);
}
} else {
// allow this event through, and the next hasMore() call will be false
}
}
if (++rowCount % 1000 == 0) {
log.info("nextRow() has returned %,d InputRows", rowCount);
}
Status status;
try {
status = queue.take();
} catch (InterruptedException e) {
throw new RuntimeException("InterruptedException", e);
}
HashtagEntity[] hts = status.getHashtagEntities();
if (hts != null && hts.length > 0) {
List<String> hashTags = Lists.newArrayListWithExpectedSize(hts.length);
for (HashtagEntity ht : hts) {
hashTags.add(ht.getText());
}
theMap.put("htags", Arrays.asList(hashTags.get(0)));
}
long retweetCount = status.getRetweetCount();
theMap.put("retweet_count", retweetCount);
User user = status.getUser();
if (user != null) {
theMap.put("follower_count", user.getFollowersCount());
theMap.put("friends_count", user.getFriendsCount());
theMap.put("lang", user.getLang());
theMap.put("utc_offset", user.getUtcOffset()); // resolution in seconds, -1 if not available?
theMap.put("statuses_count", user.getStatusesCount());
}
return new MapBasedInputRow(status.getCreatedAt().getTime(), dimensions, theMap);
}
@Override
public Runnable commit()
{
// ephemera in, ephemera out.
return doNothingRunnable; // reuse the same object each time
}
@Override
public void close() throws IOException
{
log.info("CLOSE twitterstream");
twitterStream.shutdown(); // invokes twitterStream.cleanUp()
}
};
}