ApiConcept problem = problemEntrySet.getValue();
// TODO fix this code to either replace/enhance the use of the converter
// or use some other method
List<LineAndTokenPosition> beginAndEndOfConcept = calculateBeginAndEndOfConcept(problem);
LineAndTokenPosition problemBegin = beginAndEndOfConcept.get(0);
LineAndTokenPosition problemEnd = beginAndEndOfConcept.get(1);
//logger.info(String.format("<<PROB>> %s", problem));
// LineAndTokenPosition problemBegin = converter.convertReverse(problem.getBegin());
// LineAndTokenPosition problemEnd = converter.convertReverse(problem.getEnd());
int lineNumber = problemBegin.getLine();
int conceptBeginTokenOffset = problemBegin.getTokenOffset();
int conceptEndTokenOffset = problemEnd.getTokenOffset();
if (conceptBeginTokenOffset < 0) conceptBeginTokenOffset = 0;
if (conceptEndTokenOffset < 0) conceptEndTokenOffset = 0;
String currentLine[] = arrayOfArrayOfTokens[lineNumber - 1];
int lineLength = currentLine.length;
if (assertionDecoderConfiguration.getEnabledFeatureIdSet() == null)
{
logger.severe("enabled feature id set is null!!! using all features");
} else
{
logger.fine("enabled feature id set is set; using selected feature set");
}
TrainingInstance trainingInstance = new TrainingInstance();
if (checkForEnabledFeature("conceptUnigrams")) {
for (int k = conceptBeginTokenOffset; k <= conceptEndTokenOffset && k < lineLength; k++) {
trainingInstance.addFeature("concept_unigram_" + StringHandling.escapeStringForFeatureName(currentLine[k]));
}
}
if (checkForEnabledFeature("wordLeftFeature"))
{
List<String> wordLeftFeatureList = FeatureUtility.constructWordLeftFeatureList(problemBegin.getTokenOffset(), problemEnd.getTokenOffset(), currentLine);
for (String currentFeature : wordLeftFeatureList)
{
trainingInstance.addFeature(currentFeature);
}
}
if (checkForEnabledFeature("wordRightFeature"))
{
List<String> wordRightFeatureList = FeatureUtility.constructWordRightFeatureList(problemBegin.getTokenOffset(), problemEnd.getTokenOffset(), currentLine);
for (String currentFeature : wordRightFeatureList)
{
trainingInstance.addFeature(currentFeature);
}
}
Set<String> featureSet = trainingInstance.getFeatureSet();
if (featureSet != null && featureSet.size() > 0)
{
trainingInstanceMap.put(index, trainingInstance);
}
String conceptText = problem.getText();
if (checkForEnabledFeature("conceptTextFeature"))
{
String conceptTextFeature = MedFactsRunner.constructConceptPhraseFeature(conceptText);
trainingInstance.addFeature(conceptTextFeature);
}
if (checkForEnabledFeature("conceptPseudoHeadFeature"))
{
int tokenOffset = problemEnd.getTokenOffset();
//logger.info(String.format("before creating pseudo head; token offset: %d; # tokens on line: %d, tokens: %s", tokenOffset, currentLine.length, Arrays.toString(currentLine)));
String conceptHead = currentLine[tokenOffset];
trainingInstance.addFeature(MedFactsRunner.constructConceptHeadFeature(conceptHead));
}