protected List<LP2Rule> createStartRulesForExample(final TextRulerExample example) {
TextRulerTarget target = example.getTarget();
List<LP2Rule> result = new ArrayList<LP2Rule>();
CAS docCas = example.getDocumentCAS();
TextRulerAnnotation exampleAnnotation = example.getAnnotation();
TypeSystem ts = docCas.getTypeSystem();
Type tokensRootType = ts.getType(TextRulerToolkit.RUTA_ANY_TYPE_NAME);
boolean isLeftBoundary = (target.type == MLTargetType.SINGLE_LEFT_BOUNDARY || target.type == MLTargetType.SINGLE_LEFT_CORRECTION);
int thePosition = isLeftBoundary ? exampleAnnotation.getBegin() : exampleAnnotation.getEnd();
List<AnnotationFS> leftContext = TextRulerToolkit.getAnnotationsBeforePosition(docCas,
thePosition, windowSize,
TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokensRootType);
List<AnnotationFS> rightContext;
if (target.type == MLTargetType.SINGLE_LEFT_CORRECTION
|| target.type == MLTargetType.SINGLE_RIGHT_CORRECTION) {
rightContext = TextRulerToolkit.getAnnotationsAfterPosition(docCas, thePosition,
windowSize + 1, TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet),
tokensRootType);
rightContext.remove(0);
} else {
rightContext = TextRulerToolkit.getAnnotationsAfterPosition(docCas, thePosition, windowSize,
TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokensRootType);
}
int totalCount = leftContext.size() + rightContext.size();
// LEFT CONTEXT (PRE FILLER PATTERN)
// result.add(createStartRuleForConstraint(example, 0, true, null));
for (int index = 0; index < totalCount; index++) {
boolean isPre = index < leftContext.size();
int prePostIndex = isPre ? index : index - leftContext.size();
AnnotationFS tokenAFS = isPre ? leftContext.get(leftContext.size() - 1 - prePostIndex)
: rightContext.get(prePostIndex);
TextRulerAnnotation tokenAnnotation = new TextRulerAnnotation(tokenAFS, example.getDocument());
LP2RuleItem wordItem = new LP2RuleItem();
// one rule with only the word constraint:
wordItem.setWordConstraint(tokenAnnotation);
result.add(createStartRuleForConstraint(example.getTarget(), prePostIndex + 1, isPre,
wordItem));
if (wordItem.getWordConstraint().isRegExpConstraint()) {
LP2RuleItem basicItem = new LP2RuleItem();
// basicItem.setOtherConstraint("basicTM", new
// MLLP2OtherConstraint(tokenAnnotation, tokenAnnotation));
basicItem.addOtherConstraint(new MLLP2OtherConstraint(tokenAnnotation, tokenAnnotation));
result.add(createStartRuleForConstraint(example.getTarget(), prePostIndex + 1, isPre,
basicItem));
}
// // POS-Tags created by our test hmm tagger.
// Type posTagsRootType = ts.getType("org.apache.uima.ml.ML.postag");
// if (posTagsRootType != null)
// {
// List<AnnotationFS> posTagAnnotations =
// TextRulerToolkit.getAnnotationsWithinBounds(example.getDocumentCAS(),
// tokenAnnotation.getBegin(), tokenAnnotation.getEnd(), null,
// posTagsRootType);
// if (posTagAnnotations.size()>0)
// {
// if (TextRulerToolkit.DEBUG && posTagAnnotations.size()>1)
// {
// TextRulerToolkit.logIfDebug("HOW CAN ONE TOKEN HAVE MORE THAN ONE POS TAG ?? "+tokenAnnotation.getBegin()+":"+tokenAnnotation.getEnd()+"="+tokenAnnotation.getCoveredText());
// for (AnnotationFS afs : posTagAnnotations)
// {
// System.out.print(afs.getType().getShortName()+":"+afs.getCoveredText()+" "+afs.getBegin()+":"+afs.getEnd()+"\n");
// }
// TextRulerToolkit.logIfDebug("");
// }
//
// TextRulerAnnotation posTagAnnotation = new
// TextRulerAnnotation(posTagAnnotations.get(0),
// example.getDocument());
// LP2RuleItem basicItem = new LP2RuleItem();
// basicItem.setOtherConstraint("postag", new
// MLLP2OtherConstraint(posTagAnnotation, posTagAnnotation));
// result.add(createStartRuleForConstraint(example.getTarget(),
// prePostIndex+1, isPre, basicItem));
// }
// }
// new dynamic system: grab everything we get from the annotation
// index that lies over this token:
// (annotations WITHIN (with smaller bounds than the token itself)
// are ignored for now! we could
// add using them with the CONTAINS constraint. but our
// MLLP2OtherConstraint is not yet capable of this!
List<AnnotationFS> featureAnnotations = TextRulerToolkit.getNonTMAnnoationsOverToken(docCas,
tokenAFS, filterSetWithSlotNames);
if (TextRulerToolkit.DEBUG && featureAnnotations.size() > 1) {
TextRulerToolkit.log("FOUND MORE THAN ONE EXTRA TOKEN FEATURE ANNOTATION !");
for (AnnotationFS featA : featureAnnotations)
TextRulerToolkit.log(featA.toString());
TextRulerToolkit.log("--------------------------------");
}
for (AnnotationFS featA : featureAnnotations) {
TextRulerAnnotation featureAnnot = new TextRulerAnnotation(featA, example.getDocument());
LP2RuleItem basicItem = new LP2RuleItem();
basicItem.addOtherConstraint(new MLLP2OtherConstraint(tokenAnnotation, featureAnnot));
result.add(createStartRuleForConstraint(example.getTarget(), prePostIndex + 1, isPre,
basicItem));
}