public void evaluate (String description, Extraction extraction, PrintStream out)
{
int numDocs = extraction.getNumDocuments ();
assert numDocs == extraction.getNumRecords ();
LabelAlphabet dict = extraction.getLabelAlphabet();
int numLabels = dict.size();
int[] numCorr = new int [numLabels];
int[] numPred = new int [numLabels];
int[] numTrue = new int [numLabels];
for (int docnum = 0; docnum < numDocs; docnum++) {
Record extracted = extraction.getRecord (docnum);
Record target = extraction.getTargetRecord (docnum);
// Calc precision
Iterator it = extracted.fieldsIterator ();
while (it.hasNext ()) {
Field predField = (Field) it.next ();
Label name = predField.getName ();
Field trueField = target.getField (name);
int idx = name.getIndex ();
for (int j = 0; j < predField.numValues(); j++) {
numPred [idx]++;
if (trueField != null && trueField.isValue (predField.value (j), comparator)) {
numCorr [idx]++;
} else {
// We have an error, report if necessary (this should be moved to the per-field rather than per-filler level.)
if (errorOutputStream != null) {
//xxx TODO: Display name of supporting document
errorOutputStream.println ("Error in extraction!");
errorOutputStream.println ("Predicted "+predField);
errorOutputStream.println ("True "+trueField);
errorOutputStream.println ();
}
}
}
}
// Calc true
it = target.fieldsIterator ();
while (it.hasNext ()) {
Field trueField = (Field) it.next ();
Label name = trueField.getName ();
numTrue [name.getIndex ()] += trueField.numValues ();
}
}
out.println (description+" SEGMENT counts");
out.println ("Name\tCorrect\tPred\tTarget");
for (int i = 0; i < numLabels; i++) {
Label name = dict.lookupLabel (i);
out.println (name+"\t"+numCorr[i]+"\t"+numPred[i]+"\t"+numTrue[i]);
}
out.println ();
DecimalFormat f = new DecimalFormat ("0.####");
double totalF1 = 0;
int totalFields = 0;
out.println (description+" per-field F1");
out.println ("Name\tP\tR\tF1");
for (int i = 0; i < numLabels; i++) {
double P = (numPred[i] == 0) ? 0 : ((double)numCorr[i]) / numPred [i];
double R = (numTrue[i] == 0) ? 1 : ((double)numCorr[i]) / numTrue [i];
double F1 = (P + R == 0) ? 0 : (2 * P * R) / (P + R);
if ((numPred[i] > 0) || (numTrue[i] > 0)) {
totalF1 += F1;
totalFields++;
}
Label name = dict.lookupLabel (i);
out.println (name+"\t"+f.format(P)+"\t"+f.format(R)+"\t"+f.format(F1));
}
int totalCorr = MatrixOps.sum (numCorr);
int totalPred = MatrixOps.sum (numPred);