Package org.apache.flink.streaming.connectors.twitter

Source Code of org.apache.flink.streaming.connectors.twitter.TwitterSource

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*    http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.flink.streaming.connectors.twitter;

import java.io.FileInputStream;
import java.io.InputStream;
import java.util.Properties;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;

import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.function.source.RichSourceFunction;
import org.apache.flink.streaming.api.function.source.SourceFunction;
import org.apache.flink.util.Collector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.twitter.hbc.ClientBuilder;
import com.twitter.hbc.core.Constants;
import com.twitter.hbc.core.endpoint.StatusesSampleEndpoint;
import com.twitter.hbc.core.processor.StringDelimitedProcessor;
import com.twitter.hbc.httpclient.BasicClient;
import com.twitter.hbc.httpclient.auth.Authentication;
import com.twitter.hbc.httpclient.auth.OAuth1;

/**
* Implementation of {@link SourceFunction} specialized to emit tweets from
* Twitter. It can connect to Twitter Streaming API, collect tweets and
*/
public class TwitterSource extends RichSourceFunction<String> {

  private static final Logger LOG = LoggerFactory.getLogger(TwitterSource.class);

  private static final long serialVersionUID = 1L;
  private String authPath;
  private transient BlockingQueue<String> queue;
  private int queueSize = 10000;
  private transient BasicClient client;
  private int waitSec = 5;

  private boolean streaming;
  private int numberOfTweets;

  /**
   * Create {@link TwitterSource} for streaming
   *
   * @param authPath
   *            Location of the properties file containing the required
   *            authentication information.
   */
  public TwitterSource(String authPath) {
    this.authPath = authPath;
    streaming = true;
  }

  /**
   * Create {@link TwitterSource} to collect finite number of tweets
   *
   * @param authPath
   *            Location of the properties file containing the required
   *            authentication information.
   * @param numberOfTweets
   *
   */
  public TwitterSource(String authPath, int numberOfTweets) {
    this.authPath = authPath;
    streaming = false;
    this.numberOfTweets = numberOfTweets;
  }

  @Override
  public void open(Configuration parameters) throws Exception {
    initializeConnection();
  }

  @Override
  public void invoke(Collector<String> collector) throws Exception {

    if (streaming) {
      collectMessages(collector);
    } else {
      collectFiniteMessages(collector);
    }
  }

  @Override
  public void close() throws Exception {
    closeConnection();
  }

  /**
   * Initialize Hosebird Client to be able to consume Twitter's Streaming API
   */
  private void initializeConnection() {

    if (LOG.isInfoEnabled()) {
      LOG.info("Initializing Twitter Streaming API connection");
    }

    queue = new LinkedBlockingQueue<String>(queueSize);

    StatusesSampleEndpoint endpoint = new StatusesSampleEndpoint();
    endpoint.stallWarnings(false);

    Authentication auth = authenticate();

    initializeClient(endpoint, auth);

    if (LOG.isInfoEnabled()) {
      LOG.info("Twitter Streaming API connection established successfully");
    }
  }

  private OAuth1 authenticate() {

    Properties authenticationProperties = loadAuthenticationProperties();

    return new OAuth1(authenticationProperties.getProperty("consumerKey"),
        authenticationProperties.getProperty("consumerSecret"),
        authenticationProperties.getProperty("token"),
        authenticationProperties.getProperty("secret"));
  }

  /**
   * Reads the given properties file for the authentication data.
   *
   * @return the authentication data.
   */
  private Properties loadAuthenticationProperties() {
    Properties properties = new Properties();
    try {
      InputStream input = new FileInputStream(authPath);
      properties.load(input);
      input.close();
    } catch (Exception e) {
      throw new RuntimeException("Cannot open .properties file: " + authPath, e);
    }
    return properties;
  }

  private void initializeClient(StatusesSampleEndpoint endpoint, Authentication auth) {

    client = new ClientBuilder().name("twitterSourceClient").hosts(Constants.STREAM_HOST)
        .endpoint(endpoint).authentication(auth)
        .processor(new StringDelimitedProcessor(queue)).build();

    client.connect();
  }

  /**
   * Put tweets into collector
   *
   * @param collector
   *            Collector in which the tweets are collected.
   */
  protected void collectFiniteMessages(Collector<String> collector) {

    if (LOG.isInfoEnabled()) {
      LOG.info("Collecting tweets");
    }

    for (int i = 0; i < numberOfTweets; i++) {
      collectOneMessage(collector);
    }

    if (LOG.isInfoEnabled()) {
      LOG.info("Collecting tweets finished");
    }
  }

  /**
   * Put tweets into collector
   *
   * @param collector
   *            Collector in which the tweets are collected.
   */
  protected void collectMessages(Collector<String> collector) {

    if (LOG.isInfoEnabled()) {
      LOG.info("Tweet-stream begins");
    }

    while (true) {
      collectOneMessage(collector);
    }
  }

  /**
   * Put one tweet into the collector.
   *
   * @param collector
   *            Collector in which the tweets are collected.
   */
  protected void collectOneMessage(Collector<String> collector) {
    if (client.isDone()) {
      if (LOG.isErrorEnabled()) {
        LOG.error("Client connection closed unexpectedly: {}", client.getExitEvent()
            .getMessage());
      }
    }

    try {
      String msg = queue.poll(waitSec, TimeUnit.SECONDS);
      if (msg != null) {
        collector.collect(msg);
      } else {
        if (LOG.isInfoEnabled()) {
          LOG.info("Did not receive a message in {} seconds", waitSec);
        }
      }
    } catch (InterruptedException e) {
      throw new RuntimeException("'Waiting for tweet' thread is interrupted", e);
    }

  }

  private void closeConnection() {

    if (LOG.isInfoEnabled()) {
      LOG.info("Initiating connection close");
    }

    client.stop();

    if (LOG.isInfoEnabled()) {
      LOG.info("Connection closed successfully");
    }
  }

  /**
   * Get the size of the queue in which the tweets are contained temporarily.
   *
   * @return the size of the queue in which the tweets are contained temporarily
   */
  public int getQueueSize() {
    return queueSize;
  }

  /**
   * Set the size of the queue in which the tweets are contained temporarily.
   *
   * @param queueSize
   *            The desired value.
   */
  public void setQueueSize(int queueSize) {
    this.queueSize = queueSize;
  }

  /**
   * This function tells how long TwitterSource waits for the tweets.
   *
   * @return Number of second.
   */
  public int getWaitSec() {
    return waitSec;
  }

  /**
   * This function sets how long TwitterSource should wait for the tweets.
   *
   * @param waitSec
   *            The desired value.
   */
  public void setWaitSec(int waitSec) {
    this.waitSec = waitSec;
  }
}
TOP

Related Classes of org.apache.flink.streaming.connectors.twitter.TwitterSource

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.