boolean start = chunkTags[i].charAt(0) == 'B';
boolean end = tag != null && (start || chunkTags[i].charAt(0) == 'O');
if(end){ //add the current phrase
//add at AnalysedText level, because offsets are absolute
//NOTE we are already at the next token when we detect the end
Chunk chunk = at.addChunk(
tokenList.get(i-chunkTokenCount).getStart(),
tokenList.get(i-1).getEnd());
chunk.addAnnotation(PHRASE_ANNOTATION,
new Value<PhraseTag>(tag,
chunkProps/(double)chunkTokenCount));
//reset the state
tag = null;
chunkTokenCount = 0;
chunkProps = 0;
}
if(start){ //create the new tag
tag = getPhraseTag(tagSet,adhocTags,
chunkTags[i].substring(2), language); //skip 'B-'
}
if(tag != null){ //count this token for the current chunk
chunkProps = chunkProps + chunkProb[i];
chunkTokenCount++;
}
}
if(tag != null){
Chunk chunk = at.addChunk(
tokenList.get(i-chunkTokenCount).getStart(),
tokenList.get(i-1).getEnd());
chunk.addAnnotation(PHRASE_ANNOTATION,
new Value<PhraseTag>(tag,
chunkProps/(double)chunkTokenCount));
}
// (4) clean up