Package uk.ac.cam.ha293.tweetlabel.util

Source Code of uk.ac.cam.ha293.tweetlabel.util.AssociatedPress

package uk.ac.cam.ha293.tweetlabel.util;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashSet;
import java.util.Set;

import uk.ac.cam.ha293.tweetlabel.twitter.SimpleProfile;
import uk.ac.cam.ha293.tweetlabel.twitter.SimpleTweet;
import uk.ac.cam.ha293.tweetlabel.types.Document;

public class AssociatedPress {
 
  private String datasetPath;
  private HashSet<String> documents;

  public AssociatedPress() {
    datasetPath = "dataset/ap/ap.txt";
    documents = new HashSet<String>();
    try {
      FileInputStream fileIn = new FileInputStream(datasetPath);
      BufferedReader buffer = new BufferedReader(new InputStreamReader(fileIn));
      while(true) {
        String nextLine = buffer.readLine();
        if(nextLine == null || nextLine.equals(" ")) break;
        if(nextLine.charAt(0) == '<' || nextLine.charAt(1) == '<') continue;
        documents.add(nextLine);
      }
    } catch (IOException e){
      System.err.println("An error occured");
    }
  }
 
  public SimpleProfile asSimpleProfile() {
    SimpleProfile profile = new SimpleProfile(0);
    for(String document : documents) {
      profile.addTweet(new SimpleTweet(0,0,document));
    }
    return profile;
  }
}
TOP

Related Classes of uk.ac.cam.ha293.tweetlabel.util.AssociatedPress

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.