{
if (!(identifiedAnnotation instanceof EntityMention || identifiedAnnotation instanceof EventMention))
{
continue;
}
IdentifiedAnnotation entityOrEventMention = identifiedAnnotation;
if (entityOrEventMention.getPolarity() == -1)
{
logger.debug(String.format(" - identified annotation: [%d-%d] polarity %d (%s)",
entityOrEventMention.getBegin(),
entityOrEventMention.getEnd(),
entityOrEventMention.getPolarity(),
entityOrEventMention.getClass().getName()));
}
Instance<String> instance = new Instance<String>();
// // extract all features that require only the entity mention annotation
// instance.addAll(tokenFeatureExtractor.extract(jCas, entityMention));
// extract all features that require the token and sentence annotations
//Sentence sentence = sentenceList.iterator().next();
/*
if (sentence != null)
{
for (ContextExtractor<IdentifiedAnnotation> extractor : this.contextFeatureExtractors) {
instance.addAll(extractor.extractWithin(identifiedAnnotationView, entityMention, sentence));
}
} else
{
// TODO extract context features for annotations that don't fall within a sentence
logger.log(Level.WARN, "FIXME/TODO: generate context features for entities that don't fall within a sentence");
}
*/
/*
for (ContextExtractor<BaseToken> extractor : this.tokenContextFeatureExtractors) {
instance.addAll(extractor.extract(identifiedAnnotationView, entityMention));
}
*/
// only use extract this version if not doing domain adaptation
if (ffDomainAdaptor==null) {
for (CleartkExtractor extractor : this.tokenCleartkExtractors) {
//instance.addAll(extractor.extractWithin(identifiedAnnotationView, entityMention, sentence));
instance.addAll(extractor.extract(identifiedAnnotationView, entityOrEventMention));
}
}
// List<Feature> cuePhraseFeatures = null;
// cuePhraseInWindowExtractor.extract(jCas, entityOrEventMention);
//cuePhraseInWindowExtractor.extractWithin(jCas, entityMention, firstCoveringSentence);
// List<Sentence> sents = new ArrayList<Sentence>(coveringSents.get(entityOrEventMention));
List<Sentence> sents = new ArrayList<Sentence>(JCasUtil.selectCovering(jCas, Sentence.class, entityOrEventMention.getBegin(), entityOrEventMention.getEnd()));
if(sents.size() > 0){
Sentence sentence = sents.get(0);
List<AssertionCuePhraseAnnotation> cues = JCasUtil.selectCovered(AssertionCuePhraseAnnotation.class, sentence);
int closest = Integer.MAX_VALUE;
AssertionCuePhraseAnnotation closestCue = null;
for(AssertionCuePhraseAnnotation cue : cues){
List<BaseToken> tokens = JCasUtil.selectBetween(BaseToken.class, cue, entityOrEventMention);
if(tokens.size() < closest){
closestCue = cue;
closest = tokens.size();
}
// instance.addAll(cuePhraseInWindowExtractor.extractBetween(jCas, cue, entityOrEventMention));
}
if(closestCue != null && closest < 21){
instance.add(new Feature("ClosestCue_Word", closestCue.getCoveredText()));
// instance.add(new Feature("ClosestCue_Phrase", closestCue.getCuePhrase()));
instance.add(new Feature("ClosestCue_PhraseFamily", closestCue.getCuePhraseAssertionFamily()));
instance.add(new Feature("ClosestCue_PhraseCategory", closestCue.getCuePhraseCategory()));
// add hack-ey domain adaptation to these hacked-in features
if (!fileToDomain.isEmpty() && ffDomainAdaptor!=null) {
instance.addAll(ffDomainAdaptor.apply(new Feature("ClosestCue_Word", closestCue.getCoveredText())));
instance.addAll(ffDomainAdaptor.apply(new Feature("ClosestCue_PhraseFamily", closestCue.getCuePhraseAssertionFamily())));
instance.addAll(ffDomainAdaptor.apply(new Feature("ClosestCue_PhraseCategory", closestCue.getCuePhraseCategory())));
}
}
}
// if (cuePhraseFeatures != null && !cuePhraseFeatures.isEmpty())
// {
// instance.addAll(cuePhraseFeatures);
// }
// 7/9/13 SRH trying to make it work just for anatomical site
int eemTypeId = entityOrEventMention.getTypeID();
if (eemTypeId == CONST.NE_TYPE_ID_ANATOMICAL_SITE) {
// 7/9/13 srh modified per tmiller so it's binary but not numeric feature
//instance.add(new Feature("ENTITY_TYPE_" + entityOrEventMention.getTypeID()));
instance.add(new Feature("ENTITY_TYPE_ANAT_SITE"));
// add hack-ey domain adaptation to these hacked-in features