public JsonObject extractTweet(String html)
throws java.net.MalformedURLException, java.io.UnsupportedEncodingException {
JsonObject status = new JsonObject();
Document doc = Jsoup.parse(html);
Element tweet_div = doc.select("div.permalink-tweet").first();
String tweet_text = tweet_div.select("p.tweet-text").first().text();
status.addProperty("text", tweet_text);
String tweet_id = tweet_div.attr("data-tweet-id");
status.addProperty("id_str", tweet_id);
status.addProperty("id", Long.parseLong(tweet_id));
String timestamp = doc.select("span.js-short-timestamp").first().attr("data-time");
Date created_at = new Date();
created_at.setTime(Long.parseLong(timestamp) * 1000);
status.addProperty("created_at", date_fmt.format(created_at));
Elements js_stats_retweets = doc.select("li.js-stat-retweets");
if (!js_stats_retweets.isEmpty()) {
status.addProperty("retweeted", true);
String count = js_stats_retweets.select("strong").first().text();
status.addProperty("retweet_count", Long.parseLong(count));
} else {
status.addProperty("retweeted", false);
status.addProperty("retweet_count", 0);
}
Elements js_stats_favs = doc.select("li.js-stat-favorites");
status.addProperty("favorited", !js_stats_favs.isEmpty());
// User subfield
JsonObject user = new JsonObject();
String user_id = tweet_div.attr("data-user-id");
user.addProperty("id_str", user_id);
user.addProperty("id", Long.parseLong(user_id));
String screen_name = tweet_div.attr("data-screen-name");
user.addProperty("screen_name", screen_name);
String user_name = tweet_div.attr("data-name");
user.addProperty("name", user_name);
status.add("user", user);
// Geo information
Elements tweet_loc = doc.select("a.tweet-geo-text");
if (!tweet_loc.isEmpty()) {
JsonObject location = new JsonObject();
Element loc = tweet_loc.first();
// Adding http to avoid malformed URL exception
URL url = new URL("http:" + loc.attr("href"));
Map<String, String> query_params = HTMLStatusExtractor.splitQuery(url);
// Loop over possible query parameters
// http://asnsblues.blogspot.ch/2011/11/google-maps-query-string-parameters.html
String lat_and_long = null;
if ((lat_and_long = query_params.get("ll")) != null
|| (lat_and_long = query_params.get("sll")) != null
|| (lat_and_long = query_params.get("cbll")) != null
|| (lat_and_long = query_params.get("q")) != null) {
String[] coordinates = lat_and_long.split(",");
double latitude = Double.parseDouble(coordinates[0]);
double longitude = Double.parseDouble(coordinates[1]);
location.addProperty("latitude", latitude);
location.addProperty("longitude", longitude);
}
location.addProperty("location_text", loc.text());
status.add("location", location);
}
return status;
}