System.err.printf("Usage: java %s language filename features%n", TreebankFactoredLexiconStats.class.getName());
System.exit(-1);
}
Language language = Language.valueOf(args[0]);
TreebankLangParserParams tlpp = Languages.getLanguageParams(language);
if (language.equals(Language.Arabic)) {
String[] options = {"-arabicFactored"};
tlpp.setOptionFlag(options, 0);
} else {
String[] options = {"-frenchFactored"};
tlpp.setOptionFlag(options, 0);
}
Treebank tb = tlpp.diskTreebank();
tb.loadPath(args[1]);
MorphoFeatureSpecification morphoSpec = language.equals(Language.Arabic) ?
new ArabicMorphoFeatureSpecification() : new FrenchMorphoFeatureSpecification();
String[] features = args[2].trim().split(",");
for (String feature : features) {
morphoSpec.activate(MorphoFeatureType.valueOf(feature));
}
// Counters
Counter<String> wordTagCounter = new ClassicCounter<String>(30000);
Counter<String> morphTagCounter = new ClassicCounter<String>(500);
// Counter<String> signatureTagCounter = new ClassicCounter<String>();
Counter<String> morphCounter = new ClassicCounter<String>(500);
Counter<String> wordCounter = new ClassicCounter<String>(30000);
Counter<String> tagCounter = new ClassicCounter<String>(300);
Counter<String> lemmaCounter = new ClassicCounter<String>(25000);
Counter<String> lemmaTagCounter = new ClassicCounter<String>(25000);
Counter<String> richTagCounter = new ClassicCounter<String>(1000);
Counter<String> reducedTagCounter = new ClassicCounter<String>(500);
Counter<String> reducedTagLemmaCounter = new ClassicCounter<String>(500);
Map<String,Set<String>> wordLemmaMap = Generics.newHashMap();
TwoDimensionalIntCounter<String,String> lemmaReducedTagCounter = new TwoDimensionalIntCounter<String,String>(30000);
TwoDimensionalIntCounter<String,String> reducedTagTagCounter = new TwoDimensionalIntCounter<String,String>(500);
TwoDimensionalIntCounter<String,String> tagReducedTagCounter = new TwoDimensionalIntCounter<String,String>(300);
int numTrees = 0;
for (Tree tree : tb) {
for (Tree subTree : tree) {
if (!subTree.isLeaf()) {
tlpp.transformTree(subTree, tree);
}
}
List<Label> pretermList = tree.preTerminalYield();
List<Label> yield = tree.yield();
assert yield.size() == pretermList.size();