numTrueSegments = numPredictedSegments = numCorrectSegments = 0;
numCorrectSegmentsInAlphabet = numCorrectSegmentsOOV = 0;
numIncorrectSegmentsInAlphabet = numIncorrectSegmentsOOV = 0;
for (int i = 0; i < data.size(); i++) {
Instance instance = data.get(i);
Sequence input = (Sequence) instance.getData();
//String tokens = null;
//if (instance.getSource() != null)
//tokens = (String) instance.getSource().toString();
Sequence trueOutput = (Sequence) instance.getTarget();
assert (input.size() == trueOutput.size());
Sequence predOutput = model.transduce (input);
assert (predOutput.size() == trueOutput.size());
boolean trueStart, predStart;
for (int j = 0; j < trueOutput.size(); j++) {
totalTokens++;
trueStart = predStart = false;
if (segmentStartTag.equals(trueOutput.get(j))) {
numTrueSegments++;
trueStart = true;
}
if (segmentStartTag.equals(predOutput.get(j))) {
predStart = true;
numPredictedSegments++;
}
if (trueStart && predStart) {
int m;
//StringBuffer sb = new StringBuffer();
//sb.append (tokens.charAt(j));
for (m = j+1; m < trueOutput.size(); m++) {
trueStart = predStart = false; // Here, these actually mean "end", not "start"
if (segmentEndTag.equals(trueOutput.get(m)))
trueStart = true;
if (segmentEndTag.equals(predOutput.get(m)))
predStart = true;
if (trueStart || predStart) {
if (trueStart && predStart) {
// It is a correct segment
numCorrectSegments++;
//if (HashFile.allLexicons.contains(sb.toString()))
//numCorrectSegmentsInAlphabet++;
//else
//numCorrectSegmentsOOV++;
} else {
// It is an incorrect segment; let's find out if it was in the lexicon
//for (int mm = m; mm < trueOutput.size(); mm++) {
//if (segmentEndTag.equals(predOutput.get(mm)))
//break;
//sb.append (tokens.charAt(mm));
//}
//if (HashFile.allLexicons.contains(sb.toString()))
//numIncorrectSegmentsInAlphabet++;
//else
//numIncorrectSegmentsOOV++;
}
break;
}
//sb.append (tokens.charAt(m));
}
// for the case of the end of the sequence
if(m==trueOutput.size()) {
if (trueStart==predStart) {
numCorrectSegments++;
//if (HashFile.allLexicons.contains(sb.toString()))
//numCorrectSegmentsInAlphabet++;
//else
//numCorrectSegmentsOOV++;
} else {
//if (HashFile.allLexicons.contains(sb.toString()))
//numIncorrectSegmentsInAlphabet++;
//else
//numIncorrectSegmentsOOV++;
}
}
} else if (predStart) {
// Here is an incorrect predicted start, find out if the word is in the lexicon
//StringBuffer sb = new StringBuffer();
//sb.append (tokens.charAt(j));
//for (int mm = j+1; mm < trueOutput.size(); mm++) {
//if (segmentEndTag.equals(predOutput.get(mm)))
//break;
//sb.append (tokens.charAt(mm));
//}
//if (HashFile.allLexicons.contains(sb.toString()))
//numIncorrectSegmentsInAlphabet++;
//else
//numIncorrectSegmentsOOV++;
}
if (trueOutput.get(j).equals(predOutput.get(j)))
numCorrectTokens++;
}
}
logger.info (description +" accuracy="+((double)numCorrectTokens)/totalTokens);
double precision = numPredictedSegments == 0 ? 1 : ((double)numCorrectSegments) / numPredictedSegments;