public JsonObject extractTweet(String html)
throws java.net.MalformedURLException, java.io.UnsupportedEncodingException {
JsonObject status = new JsonObject();
Document doc = Jsoup.parse(html);
Element tweet_div = doc.select("div.permalink-tweet").first();
String tweet_text = tweet_div.select("p.tweet-text").first().text();
status.addProperty("text", tweet_text);
String tweet_id = tweet_div.attr("data-tweet-id");
status.addProperty("id_str", tweet_id);
status.addProperty("id", Long.parseLong(tweet_id));
String timestamp = doc.select("span.js-short-timestamp").first().attr("data-time");
Date created_at = new Date();
created_at.setTime(Long.parseLong(timestamp) * 1000);
status.addProperty("created_at", date_fmt.format(created_at));
Elements js_stats_retweets = doc.select("li.js-stat-retweets");
if (!js_stats_retweets.isEmpty()) {
status.addProperty("retweeted", true);
String count = js_stats_retweets.select("strong").first().text();
status.addProperty("retweet_count", Long.parseLong(count));
} else {
status.addProperty("retweeted", false);
status.addProperty("retweet_count", 0);
}
Elements js_stats_favs = doc.select("li.js-stat-favorites");
status.addProperty("favorited", !js_stats_favs.isEmpty());
// User subfield
JsonObject user = new JsonObject();
String user_id = tweet_div.attr("data-user-id");
user.addProperty("id_str", user_id);
user.addProperty("id", Long.parseLong(user_id));
String screen_name = tweet_div.attr("data-screen-name");
user.addProperty("screen_name", screen_name);
String user_name = tweet_div.attr("data-name");
user.addProperty("name", user_name);
status.add("user", user);
// Geo information
Elements tweet_loc = doc.select("a.tweet-geo-text");
if (!tweet_loc.isEmpty()) {
JsonObject location = new JsonObject();
Element loc = tweet_loc.first();
// Adding http to avoid malformed URL exception
URL url = new URL("http:" + loc.attr("href"));