line = line.trim();
if (line.matches("^$")){
if(carrier.size()>0){
FNLPSent sent = new FNLPSent();
sent.parse(carrier,1,HASID); //TODO: 需要根据不同语料修改
//归一化
for(int i=0;i<sent.words.length;i++){
sent.words[i] = ct.normalize(sent.words[i]);
}
correct(sent);
docs.add(sent);
carrier.clear();
}
}else
carrier.add(line);
}
if(!carrier.isEmpty()){
FNLPSent sent = new FNLPSent();
sent.parse(carrier,1,HASID); //TODO: 需要根据不同语料修改
correct(sent);
docs.add(sent);
carrier.clear();
}
corpus.add(docs);