protected abstract T getOutput(IntsRef input, int ord) throws IOException;
public void run(int limit, boolean verify, boolean verifyByOutput) throws IOException {
BufferedReader is = new BufferedReader(new InputStreamReader(new FileInputStream(wordsFileIn), "UTF-8"), 65536);
try {
final IntsRef intsRef = new IntsRef(10);
long tStart = System.currentTimeMillis();
int ord = 0;
while(true) {
String w = is.readLine();
if (w == null) {
break;
}
toIntsRef(w, inputMode, intsRef);
builder.add(intsRef,
getOutput(intsRef, ord));
ord++;
if (ord % 500000 == 0) {
System.out.println(
String.format(Locale.ROOT,
"%6.2fs: %9d...", ((System.currentTimeMillis() - tStart) / 1000.0), ord));
}
if (ord >= limit) {
break;
}
}
long tMid = System.currentTimeMillis();
System.out.println(((tMid-tStart) / 1000.0) + " sec to add all terms");
assert builder.getTermCount() == ord;
FST<T> fst = builder.finish();
long tEnd = System.currentTimeMillis();
System.out.println(((tEnd-tMid) / 1000.0) + " sec to finish/pack");
if (fst == null) {
System.out.println("FST was fully pruned!");
System.exit(0);
}
if (dirOut == null) {
return;
}
System.out.println(ord + " terms; " + fst.getNodeCount() + " nodes; " + fst.getArcCount() + " arcs; " + fst.getArcWithOutputCount() + " arcs w/ output; tot size " + fst.sizeInBytes());
if (fst.getNodeCount() < 100) {
Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
Util.toDot(fst, w, false, false);
w.close();
System.out.println("Wrote FST to out.dot");
}
Directory dir = FSDirectory.open(new File(dirOut));
IndexOutput out = dir.createOutput("fst.bin", IOContext.DEFAULT);
fst.save(out);
out.close();
System.out.println("Saved FST to fst.bin.");
if (!verify) {
return;
}
/*
IndexInput in = dir.openInput("fst.bin", IOContext.DEFAULT);
fst = new FST<T>(in, outputs);
in.close();
*/
System.out.println("\nNow verify...");
while(true) {
for(int iter=0;iter<2;iter++) {
is.close();
is = new BufferedReader(new InputStreamReader(new FileInputStream(wordsFileIn), "UTF-8"), 65536);
ord = 0;
tStart = System.currentTimeMillis();
while(true) {
String w = is.readLine();
if (w == null) {
break;
}
toIntsRef(w, inputMode, intsRef);
if (iter == 0) {
T expected = getOutput(intsRef, ord);
T actual = Util.get(fst, intsRef);
if (actual == null) {
throw new RuntimeException("unexpected null output on input=" + w);
}
if (!actual.equals(expected)) {
throw new RuntimeException("wrong output (got " + outputs.outputToString(actual) + " but expected " + outputs.outputToString(expected) + ") on input=" + w);
}
} else {
// Get by output
final Long output = (Long) getOutput(intsRef, ord);
@SuppressWarnings("unchecked") final IntsRef actual = Util.getByOutput((FST<Long>) fst, output.longValue());
if (actual == null) {
throw new RuntimeException("unexpected null input from output=" + output);
}
if (!actual.equals(intsRef)) {
throw new RuntimeException("wrong input (got " + actual + " but expected " + intsRef + " from output=" + output);
}
}
ord++;