Package DataAcquisition

Source Code of DataAcquisition.TwitterSource

/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/

package DataAcquisition;

import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import seekfeel.dataholders.DataUnit;
import seekfeel.dataholders.TweetUnit;
import seekfeel.utilities.Language;
import twitter4j.GeoLocation;
import twitter4j.Query;
import twitter4j.QueryResult;
import twitter4j.Tweet;
import twitter4j.Twitter;
import twitter4j.TwitterException;
import twitter4j.TwitterFactory;
import twitter4j.conf.ConfigurationBuilder;
import java.io.FileWriter;
import java.io.BufferedWriter;
import java.util.*;

import seekfeel.dataholders.Review;
/**
*
* @author Muhammad Zahran
*/
// enhance twitter search http://search.twitter.com/operators
// for further tunning the query search check
// http://www.jarvana.com/jarvana/view/org/twitter4j/twitter4j-core/2.1.1/twitter4j-core-2.1.1-javadoc.jar!/twitter4j/Query.html
public class TwitterSource extends DataSource {
 
  private ConfigurationBuilder cb;
  private TwitterFactory tf;
  private Twitter twitter;
  private String searchQuery;
  private int tweetCount; // no. of result tweets (MAX=100) but still looking
              // on how to increase it
  private int pageCount; // sets the page number (starting at 1) to return, up
              // to a max of roughly 1500 results
  private String language; // choose the language name according to the
                // "ISO 639-1", check
                // "http://en.wikipedia.org/wiki/ISO_639-1"
                // english-->en, arabic-->ar
  private QueryResult result;
  private List<Tweet> tweets;

  Set UniqueTweets = new HashSet();

  PrintWriter print = null;

  public TwitterSource() {
    ConfigurationBuilder cb = new ConfigurationBuilder();
    cb.setDebugEnabled(true)
        .setOAuthConsumerKey("fZdACmjtSdlIT7P7tv9g")
        .setOAuthConsumerSecret(
            "Pqk35GOwTc09UVTjR3tW76ibBp6D7f3JgFcZSIdwXSA")
        .setOAuthAccessToken(
            "83195003-LIdhlT1IE8Xspe3fq7p72iqjU47keD7yVhkwiKx9J")
        .setOAuthAccessTokenSecret(
            "KGPTSYfkhihmk87nDE6MTSLBbTzqNsZtHR5Hr9lhTcc");
    tf = new TwitterFactory(cb.build());
    twitter = tf.getInstance();
    tweetCount = 100;
    pageCount = 10;
  }

  public ArrayList<DataUnit> doWork(Language queryLang) throws IOException {
    ArrayList<DataUnit> allTweets = new ArrayList<DataUnit>();
    language = queryLang == Language.Arabic ? "ar":"en";
    Query query = new Query(searchQuery);// + " since:2011-02-15");
    query.setRpp(tweetCount);
    query.setPage(pageCount);
    query.setLang(language);
    try {
      result = twitter.search(query);
    } catch (Exception e) {
      System.out.println(e.getMessage());
    }
    if (result == null) {
      System.out.print(" null found ");
      return allTweets;
    }
    tweets = result.getTweets();
    TweetUnit tweetTemp;
    for (int i = 0; i < tweets.size(); i++) {   
      tweetTemp = new TweetUnit(tweets.get(i));
      allTweets.add(tweetTemp);
    }
    return allTweets;
  }

  public String CleanReview(String rev) {
    int len = rev.length();
    for (int i = 0; i < len; i++) {

      if (Character.getNumericValue(rev.charAt(i)) != -1) {
        rev = rev.replace(rev.charAt(i), '#');
      }
    }
    rev = rev.replace("#", "");
    rev = rev.replace(":", "");
    rev = rev.replace("/", "");
    rev = rev.replace("@", "");
    return rev;
  }

  public boolean isValidArabic(int x) {

    if (x > 1791 || x < 1536) {
      return false;
    }
    return true;
  }

  private boolean IsUnique(String tweet) {

    if (!tweetsContainsTweet(tweet)) {
      UniqueTweets.add(tweet);
      return true;

    }
    return false;
  }

  private boolean tweetsContainsTweet(String tweet) {
    for (Object tw : UniqueTweets) {
      String stw = (String) tw;
      if (stw.length() > tweet.length()) {
        if (stw.contains(tweet)) {
          return true;
        }
      } else {
        if (tweet.contains(stw)) {
          return true;
        }

      }
    }
    return false;

  }

  private String processTweet(String twt) {
    String[] tokens = twt.split(" ");
    String result = "";
    for (int i = 0; i < tokens.length; i++) {
      char[] chars = tokens[i].toCharArray();
      for (int j = 0; j < chars.length; j++) {
        if (((int) chars[j] >> 7) == 0) // if true then this char is
                        // ASCII, i.e. if true the char
                        // is not Arabic
        {
          break;
        } else {
          result += tokens[i] + " ";
          break;
        }
      }
    }

    return result;
  }

  /**
   * @return the searchQuery
   */
  public String getSearchQuery() {
    return searchQuery;
  }

  /**
   * @param searchQuery
   *            the searchQuery to set
   */
  public void setSearchQuery(String searchQuery) {
    this.searchQuery = searchQuery;
  }

  /**
   * @return the tweetCount
   */
  public int getTweetCount() {
    return tweetCount;
  }

  /**
   * @param tweetCount
   *            the tweetCount to set
   */
  public void setTweetCount(int tweetCount) {
    this.tweetCount = tweetCount;
  }

  /**
   * @return the pageCount
   */
  public int getPageCount() {
    return pageCount;
  }

  /**
   * @param pageCount
   *            the pageCount to set
   */
  public void setPageCount(int pageCount) {
    this.pageCount = pageCount;
  }

  /**
   * @return the language
   */
  public String getLanguage() {
    return language;
  }

  /**
   * @param language
   *            the language to set
   */
  public void setLanguage(String language) {
    this.language = language;
  }

  /**
   * @return the tweets
   */
  public List<Tweet> getTweets() {
    return tweets;
  }

  /**
   * @param tweets
   *            the tweets to set
   */
  public void setTweets(List<Tweet> tweets) {
    this.tweets = tweets;
  }

  @Override
  public ArrayList<DataUnit> harvest() {
    try {
      return doWork(Language.English);
    } catch (IOException e) {
      e.printStackTrace();
    }
    return null;
  }

}
TOP

Related Classes of DataAcquisition.TwitterSource

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.