bfr = new BufferedReader(new InputStreamReader(in,charset));
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
FNLPDoc docs = new FNLPDoc();
docs.name = file.getName();
String line = null;
carrier.clear();
while ((line = bfr.readLine()) != null) {
line = line.trim();
if (line.matches("^$")){
if(carrier.size()>0){
FNLPSent sent = new FNLPSent();
sent.parse(carrier,1,HASID); //TODO: 需要根据不同语料修改
//归一化
for(int i=0;i<sent.words.length;i++){
sent.words[i] = ct.normalize(sent.words[i]);
}
correct(sent);
docs.add(sent);
carrier.clear();
}
}else
carrier.add(line);
}
if(!carrier.isEmpty()){
FNLPSent sent = new FNLPSent();
sent.parse(carrier,1,HASID); //TODO: 需要根据不同语料修改
correct(sent);
docs.add(sent);
carrier.clear();
}
corpus.add(docs);
}
}