//line = line.replaceAll(" +"," ");
//System.err.println("pichuan: processing line = "+line);
String[] toks = line.split(" ");
for (String word : toks) {
CoreLabel wi = new CoreLabel();
Matcher lowerMatcher = allLower.matcher(word);
if (lowerMatcher.matches()) {
wi.set(CoreAnnotations.AnswerAnnotation.class, "LOWER");
wi.set(CoreAnnotations.GoldAnswerAnnotation.class, "LOWER");
} else {
Matcher upperMatcher = allUpper.matcher(word);
if (!THREE_CLASSES && upperMatcher.matches()) {
wi.set(CoreAnnotations.AnswerAnnotation.class, "UPPER");
wi.set(CoreAnnotations.GoldAnswerAnnotation.class, "UPPER");
} else {
Matcher startUpperMatcher = startUpper.matcher(word);
boolean isINIT_UPPER; // = false;
if (word.length() > 1) {
String w2 = word.substring(1);
String lcw2 = w2.toLowerCase();
isINIT_UPPER = w2.equals(lcw2);
} else {
isINIT_UPPER = false;
}
if (startUpperMatcher.matches() && isINIT_UPPER) {
wi.set(CoreAnnotations.AnswerAnnotation.class, "INIT_UPPER");
wi.set(CoreAnnotations.GoldAnswerAnnotation.class, "INIT_UPPER");
} else {
wi.set(CoreAnnotations.AnswerAnnotation.class, "O");
wi.set(CoreAnnotations.GoldAnswerAnnotation.class, "O");
}
}
}
wi.setWord(word.toLowerCase());
wi.set(CoreAnnotations.PositionAnnotation.class, pos + "");
doc.add(wi);
pos++;
}
return doc;
}