Package distributedRedditAnalyser.reddit

Examples of distributedRedditAnalyser.reddit.Post


  }

  @Override
  public void execute(Tuple input) {
    //Get the post
    Post newPost = (Post)input.getValue(0);
    //Turn it into an instance
    Instance inst = new DenseInstance(2);
    inst.setDataset(INST_HEADERS);
    inst.setValue(0, newPost.getTitle());
    inst.setValue(1, newPost.getSubReddit());
    //emit these to a new bolt that collects instances
    _collector.emit(new Values(inst));
    _collector.ack(input);
  }
View Full Code Here


  @Override
  public void nextTuple() {
    //Sleep to reduce congestion
    Utils.sleep(50);
    //Try and get the next post
    Post nextPost = getNextPost();
    //If we have gotten a post emit it
    if(nextPost != null)
      collector.emit(new Values(nextPost));
  }
View Full Code Here

              break;
           
            //reverse order so printed order is consistent
            for(int c=children.size()-1; c>=0; c--){
              JSONObject childData = (JSONObject) ((JSONObject) children.get(c)).get("data");
              QUEUE.add(new Post((String) childData.get("title"), SUBREDDIT));
            }
           
            lastItemId = (String) wrappingObjectData.get("after");
           
            //If this is the first page, then it's the point we want to store to ensure that we don't get repeated posts
            if(i == 0){
              latestTimestamp = ((Double) ((JSONObject)((JSONObject) children.get(0)).get("data")).get("created")).longValue();
            }
           
            //Rate limit
            if(i != INITIAL_PAGE_COUNT - 1)
              Utils.sleep(1000);
            count += 100;
          }
          initialPull = false;
        }else{
          //Rate limit for the API (pages are cached for 30 seconds)
          Utils.sleep(10000);
          //Get the page
          HttpGet getRequest = new HttpGet(URL);
          ResponseHandler<String> responseHandler = new BasicResponseHandler();
               
          String responseBody = httpClient.execute(getRequest, responseHandler);
         
          //Parse it
          JSONParser parser= new JSONParser();
         
          JSONObject wrappingObject = (JSONObject) parser.parse(responseBody);
         
          JSONObject wrappingObjectData = (JSONObject) wrappingObject.get("data");
         
          JSONArray children = (JSONArray) wrappingObjectData.get("children");
         
          if(children.size() > 0){
            //reverse order so it is an actual stream
            for(int c=children.size()-1; c>=0; c--){
              JSONObject childData = (JSONObject) ((JSONObject) children.get(c)).get("data");
              if(latestTimestamp < ((Double) childData.get("created")).longValue())
                QUEUE.add(new Post((String) childData.get("title"), SUBREDDIT));
            }
            latestTimestamp = ((Double) ((JSONObject)((JSONObject) children.get(0)).get("data")).get("created")).longValue();
          }
        }
      } catch (ClientProtocolException e) {
View Full Code Here

TOP

Related Classes of distributedRedditAnalyser.reddit.Post

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.