/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package DataAcquisition;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import seekfeel.dataholders.DataUnit;
import seekfeel.dataholders.TweetUnit;
import seekfeel.utilities.Language;
import twitter4j.GeoLocation;
import twitter4j.Query;
import twitter4j.QueryResult;
import twitter4j.Tweet;
import twitter4j.Twitter;
import twitter4j.TwitterException;
import twitter4j.TwitterFactory;
import twitter4j.conf.ConfigurationBuilder;
import java.io.FileWriter;
import java.io.BufferedWriter;
import java.util.*;
import seekfeel.dataholders.Review;
/**
*
* @author Muhammad Zahran
*/
// enhance twitter search http://search.twitter.com/operators
// for further tunning the query search check
// http://www.jarvana.com/jarvana/view/org/twitter4j/twitter4j-core/2.1.1/twitter4j-core-2.1.1-javadoc.jar!/twitter4j/Query.html
public class TwitterSource extends DataSource {
private ConfigurationBuilder cb;
private TwitterFactory tf;
private Twitter twitter;
private String searchQuery;
private int tweetCount; // no. of result tweets (MAX=100) but still looking
// on how to increase it
private int pageCount; // sets the page number (starting at 1) to return, up
// to a max of roughly 1500 results
private String language; // choose the language name according to the
// "ISO 639-1", check
// "http://en.wikipedia.org/wiki/ISO_639-1"
// english-->en, arabic-->ar
private QueryResult result;
private List<Tweet> tweets;
Set UniqueTweets = new HashSet();
PrintWriter print = null;
public TwitterSource() {
ConfigurationBuilder cb = new ConfigurationBuilder();
cb.setDebugEnabled(true)
.setOAuthConsumerKey("fZdACmjtSdlIT7P7tv9g")
.setOAuthConsumerSecret(
"Pqk35GOwTc09UVTjR3tW76ibBp6D7f3JgFcZSIdwXSA")
.setOAuthAccessToken(
"83195003-LIdhlT1IE8Xspe3fq7p72iqjU47keD7yVhkwiKx9J")
.setOAuthAccessTokenSecret(
"KGPTSYfkhihmk87nDE6MTSLBbTzqNsZtHR5Hr9lhTcc");
tf = new TwitterFactory(cb.build());
twitter = tf.getInstance();
tweetCount = 100;
pageCount = 10;
}
public ArrayList<DataUnit> doWork(Language queryLang) throws IOException {
ArrayList<DataUnit> allTweets = new ArrayList<DataUnit>();
language = queryLang == Language.Arabic ? "ar":"en";
Query query = new Query(searchQuery);// + " since:2011-02-15");
query.setRpp(tweetCount);
query.setPage(pageCount);
query.setLang(language);
try {
result = twitter.search(query);
} catch (Exception e) {
System.out.println(e.getMessage());
}
if (result == null) {
System.out.print(" null found ");
return allTweets;
}
tweets = result.getTweets();
TweetUnit tweetTemp;
for (int i = 0; i < tweets.size(); i++) {
tweetTemp = new TweetUnit(tweets.get(i));
allTweets.add(tweetTemp);
}
return allTweets;
}
public String CleanReview(String rev) {
int len = rev.length();
for (int i = 0; i < len; i++) {
if (Character.getNumericValue(rev.charAt(i)) != -1) {
rev = rev.replace(rev.charAt(i), '#');
}
}
rev = rev.replace("#", "");
rev = rev.replace(":", "");
rev = rev.replace("/", "");
rev = rev.replace("@", "");
return rev;
}
public boolean isValidArabic(int x) {
if (x > 1791 || x < 1536) {
return false;
}
return true;
}
private boolean IsUnique(String tweet) {
if (!tweetsContainsTweet(tweet)) {
UniqueTweets.add(tweet);
return true;
}
return false;
}
private boolean tweetsContainsTweet(String tweet) {
for (Object tw : UniqueTweets) {
String stw = (String) tw;
if (stw.length() > tweet.length()) {
if (stw.contains(tweet)) {
return true;
}
} else {
if (tweet.contains(stw)) {
return true;
}
}
}
return false;
}
private String processTweet(String twt) {
String[] tokens = twt.split(" ");
String result = "";
for (int i = 0; i < tokens.length; i++) {
char[] chars = tokens[i].toCharArray();
for (int j = 0; j < chars.length; j++) {
if (((int) chars[j] >> 7) == 0) // if true then this char is
// ASCII, i.e. if true the char
// is not Arabic
{
break;
} else {
result += tokens[i] + " ";
break;
}
}
}
return result;
}
/**
* @return the searchQuery
*/
public String getSearchQuery() {
return searchQuery;
}
/**
* @param searchQuery
* the searchQuery to set
*/
public void setSearchQuery(String searchQuery) {
this.searchQuery = searchQuery;
}
/**
* @return the tweetCount
*/
public int getTweetCount() {
return tweetCount;
}
/**
* @param tweetCount
* the tweetCount to set
*/
public void setTweetCount(int tweetCount) {
this.tweetCount = tweetCount;
}
/**
* @return the pageCount
*/
public int getPageCount() {
return pageCount;
}
/**
* @param pageCount
* the pageCount to set
*/
public void setPageCount(int pageCount) {
this.pageCount = pageCount;
}
/**
* @return the language
*/
public String getLanguage() {
return language;
}
/**
* @param language
* the language to set
*/
public void setLanguage(String language) {
this.language = language;
}
/**
* @return the tweets
*/
public List<Tweet> getTweets() {
return tweets;
}
/**
* @param tweets
* the tweets to set
*/
public void setTweets(List<Tweet> tweets) {
this.tweets = tweets;
}
@Override
public ArrayList<DataUnit> harvest() {
try {
return doWork(Language.English);
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
}