BufferedReader in = new BufferedReader(new InputStreamReader(
new FileInputStream(infile ), "utf8"));
// BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(
// outfile), enc2));
StopWords sw = new StopWords(stopwordfile);
LabelAlphabet dict = new LabelAlphabet();
// words in documents
ArrayList<TIntArrayList> documentsList= new ArrayList<TIntArrayList>();
String line = null;
while ((line = in.readLine()) != null) {
line = line.trim();
if(line.length()==0)
continue;
String[] toks = line.split("\\s+");
TIntArrayList wordlist = new TIntArrayList();
for(int j=0;j<toks.length;j++){
String tok = toks[j];
if(sw.isStopWord(tok))
continue;
int idx = dict.lookupIndex(tok);
wordlist.add(idx);
}
documentsList.add(wordlist);