if (args[i].equals("-c")) { configFile = args[++i]; continue; }
if (args[i].equals("-beta")) { beta = Double.parseDouble(args[++i]); continue; }
System.out.println("Unrecognized option: " + args[i]);
}
ResultSink rs = new ResultSink(ResultSink.ResultSinkType.SUPERTAG);
try {
in = new SRILMFactoredBundleCorpusIterator(
(inputCorp.equals("<stdin>")) ?
new BufferedReader(new InputStreamReader(System.in)) :
new BufferedReader(new FileReader(new File(inputCorp))));
} catch (FileNotFoundException ex) {
System.err.print("Input corpus " + inputCorp + " not found. Exiting...");
Logger.getLogger(WordAndPOSDictionaryLabellingStrategy.class.getName()).log(Level.SEVERE, null, ex);
System.exit(-1);
}
try {
out = (output.equals("<stdout>")) ? new BufferedWriter(new OutputStreamWriter(System.out)) : new BufferedWriter(new FileWriter(new File(output)));
} catch (IOException ex) {
System.err.print("Output file " + output + " not found. Exiting...");
Logger.getLogger(WordAndPOSDictionaryLabellingStrategy.class.getName()).log(Level.SEVERE, null, ex);
System.exit(-1);
}
WordAndPOSDictionaryLabellingStrategy stgger = WordAndPOSDictionaryLabellingStrategy.supertaggerFactory(configFile);
// for each sentence, print out:
// <s>
// w1 <numPOSTags> <posTag1> ... <posTagK> <numSupertags> <supertag1> ... <supertagL>
// ...
// wN <numPOSTags> <posTag1> ... <posTagM> <numSupertags> <supertag1> ... <supertagU>
// </s>
for (List<Word> inLine : in) {
List<List<Pair<Double,String>>> taggedSent = stgger.multitag(inLine, beta);
if(test) { rs.addSent(taggedSent, inLine); }
// beginning of sentence...
out.write("<s>" + System.getProperty("line.separator"));
List<TaggedWord> posTagging = stgger.getCurrentTagging();
int cursor = -1;
while(++cursor < taggedSent.size()) {
Word wdIn = inLine.get(cursor);
// word form...
out.write(wdIn.getForm());
TaggedWord posT = posTagging.get(cursor);
// print out number of POS tags, followed by tab-separated probabilized POS tagging.
out.write("\t" + posT.getPOSTagging().size());
for(Pair<Double,String> pt : posT.getPOSTagging()) {
out.write("\t" + pt.b + "\t" + pt.a);
}
// now print out number of and list of tab-separated, probabilized supertags.
out.write("\t" + taggedSent.get(cursor).size());
for(Pair<Double,String> stg : taggedSent.get(cursor)) {
out.write("\t" + stg.b + "\t" + stg.a);
}
out.write(System.getProperty("line.separator"));
}
out.write("</s>" + System.getProperty("line.separator"));
}
out.flush();
if(test) { System.err.println(rs.report()); }
} catch (IOException ex) {
Logger.getLogger(WordAndPOSDictionaryLabellingStrategy.class.getName()).log(Level.SEVERE, null, ex);
} finally {
try {
out.close();