if (checkForEnabledFeature("cueWordOrderingsLeft")) {
List<CueWordAnnotation> annots = new ArrayList<CueWordAnnotation>();
for (Annotation a : allLineAnnotations) {
if (a instanceof CueWordAnnotation) {
CueWordAnnotation an = (CueWordAnnotation)a;
if ((inCWSCueWordSet(an.getCueWordType())) && (an.getBegin().getTokenOffset() < conceptBeginTokenOffset)) {
annots.add(an);
}
}
}
if (annots.size() > 0) {
Collections.sort(annots);
StringBuilder str = new StringBuilder("CWS_left");
for (CueWordAnnotation a : annots) {
str.append("_");
str.append(a.getCueWordType());
}
trainingInstance.addFeature(str.toString());
}
}
if (checkForEnabledFeature("cueWordOrderingsRight")) {
List<CueWordAnnotation> annots = new ArrayList<CueWordAnnotation>();
for (Annotation a : allLineAnnotations) {
if (a instanceof CueWordAnnotation) {
CueWordAnnotation an = (CueWordAnnotation)a;
if ((inCWSCueWordSet(an.getCueWordType())) && (an.getBegin().getTokenOffset() > conceptEndTokenOffset)) {
annots.add(an);
}
}
}
Collections.sort(annots);
if (annots.size() > 0) {
StringBuilder str = new StringBuilder("CWS_right");
for (CueWordAnnotation a : annots) {
str.append("_");
str.append(a.getCueWordType());
}
trainingInstance.addFeature(str.toString());
}
}
//logger.info(String.format("lineNumber: %d%n", lineNumber);
String tokensOnCurrentLine[] = textLookup[lineNumber-1];
for (int currentTokenOffset=0; currentTokenOffset < tokensOnCurrentLine.length; currentTokenOffset++)
{
String currentToken = tokensOnCurrentLine[currentTokenOffset];
List<Annotation> annotationsAtCurrentPosition = indexer.findAnnotationsForPosition(lineNumber, currentTokenOffset);
int scopeCount = 0;
if (annotationsAtCurrentPosition != null)
for (Annotation a : annotationsAtCurrentPosition)
{
if (checkForEnabledFeature("concepts")) {
if (a instanceof ConceptAnnotation) {
ConceptAnnotation concept = (ConceptAnnotation) a;
String conceptType = concept.getConceptType().toString();
int thisConceptBegin = concept.getBegin().getTokenOffset();
int thisConceptEnd = concept.getEnd().getTokenOffset();
if (concept.getBegin().getTokenOffset() < conceptBeginTokenOffset) {
trainingInstance.addFeature("concept_" + conceptType + "_left");
if ((conceptBeginTokenOffset - thisConceptEnd) < 4) {
trainingInstance.addFeature("concept_" + conceptType + "_left_3");
}
} else {
if ((thisConceptBegin - conceptEndTokenOffset) < 4) {
trainingInstance.addFeature("concept_" + conceptType + "_right_3");
}
trainingInstance.addFeature("concept_" + conceptType + "_right");
}
}
}
if (a instanceof ScopeAnnotation)
{
ScopeAnnotation scope = (ScopeAnnotation)a;
scopeCount++;
if (checkForEnabledFeature("scope"))
{
trainingInstance.addFeature("scope");
}
if (checkForEnabledFeature("inScope"))
{
trainingInstance.addFeature("in_scope_" + currentToken);
}
if (checkForEnabledFeature("inScopeId"))
{
trainingInstance.addFeature("in_scope_id_" + scope.getScopeId() + "_" + currentToken);
}
}
if (a instanceof CueAnnotation)
{
CueAnnotation cue = (CueAnnotation)a;
if (checkForEnabledFeature("cue"))
{
String cueType = cue.getCueSubType().toString();
int cueBegin = cue.getBegin().getTokenOffset();
if (cueBegin < conceptBeginTokenOffset) {
trainingInstance.addFeature("cue_" + cueType + "_left");
if ((conceptBeginTokenOffset - cueBegin) < 4) {
trainingInstance.addFeature("cue_" + cueType + "_left_3");
}
} else {
int cueEnd = cue.getEnd().getTokenOffset();
trainingInstance.addFeature("cue_" + cueType + "_right");
if ((cueEnd - conceptEndTokenOffset) < 4) {
trainingInstance.addFeature("cue_" + cueType + "_right_3");
}
}
}
if (checkForEnabledFeature("inCue"))
{
trainingInstance.addFeature("in_cue_" + currentToken);
}
if (checkForEnabledFeature("inCueForScopeId"))
{
trainingInstance.addFeature("in_cue_for_scope_id_" + cue.getScopeIdReference() + "_" + currentToken);
}
}
if (a instanceof CueWordAnnotation)
{
CueWordAnnotation cueWord = (CueWordAnnotation)a;
String cueWordType = cueWord.getCueWordType().toString();
String cueWordText = cueWord.getCueWordText();
String escapedCueWordText = escapeFeatureName(cueWordText);
String escapedCueWordClass = null;
String cueWordClass = cueWord.getCueWordClass();
boolean cueWordClassIsNotEmpty = (cueWordClass != null) && (!cueWordClass.isEmpty());
if (checkForEnabledFeature("cueWordClassValue") && cueWordClassIsNotEmpty)
{
escapedCueWordClass = escapeFeatureName(cueWordClass);
}
if (checkForEnabledFeature("cueWordTextPositional"))
{
int cueWordBegin = cueWord.getBegin().getTokenOffset();
int cueWordEnd = cueWord.getEnd().getTokenOffset();
if (cueWordBegin < conceptBeginTokenOffset) {
trainingInstance.addFeature("cueWordTextPositional_" + escapedCueWordText + "_left");
if ((conceptBeginTokenOffset - cueWordBegin) < 4) {
trainingInstance.addFeature("cueWordTextPositional_" + escapedCueWordText + "_left_3");
}
} else if (cueWordBegin > conceptEndTokenOffset) {
trainingInstance.addFeature("cueWordTextPositional_" + escapedCueWordText + "_right");
if ((cueWordEnd - conceptEndTokenOffset) < 4) {
trainingInstance.addFeature("cueWordTextPositional_" + escapedCueWordText + "_right_3");
}
} else {
trainingInstance.addFeature("cueWordTextPositional_" + escapedCueWordText + "_within");
}
}
if (checkForEnabledFeature("cueWordTypePositional"))
{
int cueWordBegin = cueWord.getBegin().getTokenOffset();
int cueWordEnd = cueWord.getEnd().getTokenOffset();
if (cueWordBegin < conceptBeginTokenOffset) {
trainingInstance.addFeature("cueWordTypePositional_" + cueWordType + "_left");
if ((conceptBeginTokenOffset - cueWordBegin) < 4) {
trainingInstance.addFeature("cueWordTypePositional_" + cueWordType + "_left_3");
}
} else if (cueWordBegin > conceptEndTokenOffset) {
trainingInstance.addFeature("cueWordTypePositional_" + cueWordType + "_right");
if ((cueWordEnd - conceptEndTokenOffset) < 4) {
trainingInstance.addFeature("cueWordTypePositional_" + cueWordType + "_right_3");
}
} else {
trainingInstance.addFeature("cueWordTypePositional_" + cueWordType + "_within");
}
}
if (checkForEnabledFeature("cueWordClassPositional") && cueWordClassIsNotEmpty)
{
int cueWordBegin = cueWord.getBegin().getTokenOffset();
int cueWordEnd = cueWord.getEnd().getTokenOffset();
if (cueWordBegin < conceptBeginTokenOffset) {
trainingInstance.addFeature("cueWordClassPositional_" + escapedCueWordClass + "_left");
if ((conceptBeginTokenOffset - cueWordBegin) < 4) {
trainingInstance.addFeature("cueWordClassPositional_" + escapedCueWordClass + "_left_3");
}
} else if (cueWordBegin > conceptEndTokenOffset) {
trainingInstance.addFeature("cueWordClassPositional_" + escapedCueWordClass + "_right");
if ((cueWordEnd - conceptEndTokenOffset) < 4) {
trainingInstance.addFeature("cueWordClassPositional_" + escapedCueWordClass + "_right_3");
}
} else {
trainingInstance.addFeature("cueWordClassPositional_" + escapedCueWordClass + "_within");
}
}
if (checkForEnabledFeature("cueWordTextValue"))
{
trainingInstance.addFeature("cueWordTextValue_" + escapedCueWordText);
}
if (checkForEnabledFeature("cueWordTypeValue") && cueWord.getCueWordType() != null)
{
trainingInstance.addFeature("cueWordTypeValue_" + cueWord.getCueWordType().toString());
}
if (checkForEnabledFeature("cueWordClassValue") && cueWord.getCueWordClass() != null && !cueWord.getCueWordClass().isEmpty())
{
trainingInstance.addFeature("cueWordClassValue_" + escapedCueWordClass);
}
}