lexOptions.unknownPrefixSize = 1;
lexOptions.unknownSuffixSize = 1;
}
Index<String> wordIndex = new HashIndex<String>();
Index<String> tagIndex = new HashIndex<String>();
Lexicon lex = tlpp.lex(op, wordIndex, tagIndex);
int computeAfter = (int) (0.50 * tb.size());
Counter<String> vocab = new ClassicCounter<String>();
Counter<String> unkCounter = new ClassicCounter<String>();
int treeId = 0;
for(Tree t : tb) {
List<Label> yield = t.yield();
int posId = 0;
for(Label word : yield) {
vocab.incrementCount(word.value());
if(treeId > computeAfter && vocab.getCount(word.value()) < 2.0)
// if(lex.getUnknownWordModel().getSignature(word.value(), posId++).equals("UNK"))
// pw.println(word.value());
unkCounter.incrementCount(lex.getUnknownWordModel().getSignature(word.value(), posId++));
}
treeId++;
}
List<String> biggestKeys = new ArrayList<String>(unkCounter.keySet());