//For every page of the subreddit we are to scrape from
String lastItemId = "";
for(int i = 0; i < INITIAL_PAGE_COUNT; i++){
//Retrieve the page
HttpGet getRequest = new HttpGet(URL +"&count=" + count + "&after=" + lastItemId);
ResponseHandler<String> responseHandler = new BasicResponseHandler();
String responseBody = httpClient.execute(getRequest, responseHandler);
//Parse it as JSON
JSONParser parser= new JSONParser();
JSONObject wrappingObject = (JSONObject) parser.parse(responseBody);
JSONObject wrappingObjectData = (JSONObject) wrappingObject.get("data");
JSONArray children = (JSONArray) wrappingObjectData.get("children");
if(children.size() == 0)
break;
//reverse order so printed order is consistent
for(int c=children.size()-1; c>=0; c--){
JSONObject childData = (JSONObject) ((JSONObject) children.get(c)).get("data");
QUEUE.add(new Post((String) childData.get("title"), SUBREDDIT));
}
lastItemId = (String) wrappingObjectData.get("after");
//If this is the first page, then it's the point we want to store to ensure that we don't get repeated posts
if(i == 0){
latestTimestamp = ((Double) ((JSONObject)((JSONObject) children.get(0)).get("data")).get("created")).longValue();
}
//Rate limit
if(i != INITIAL_PAGE_COUNT - 1)
Utils.sleep(1000);
count += 100;
}
initialPull = false;
}else{
//Rate limit for the API (pages are cached for 30 seconds)
Utils.sleep(10000);
//Get the page
HttpGet getRequest = new HttpGet(URL);
ResponseHandler<String> responseHandler = new BasicResponseHandler();
String responseBody = httpClient.execute(getRequest, responseHandler);
//Parse it
JSONParser parser= new JSONParser();