/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.ctakes.smokingstatus.ae;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException;
import org.apache.uima.analysis_engine.annotator.AnnotatorInitializationException;
import org.apache.uima.analysis_engine.annotator.AnnotatorProcessException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.TOP;
import org.apache.ctakes.smokingstatus.type.NonSmokerNamedEntityAnnotation;
import org.apache.ctakes.smokingstatus.type.SmokerNamedEntityAnnotation;
import org.apache.ctakes.core.resource.FileResource;
import org.apache.ctakes.smokingstatus.Const;
import org.apache.ctakes.typesystem.type.syntax.WordToken;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.ctakes.smokingstatus.type.libsvm.NominalAttributeValue;
/**
* Resolves the data produced by the KU classifier, negation detection, and PCS
* classifier into a single smoking status value for the given sentence. The old
* NominalAttributeValue objects are removed and replaced with a single
* NominalAttributeValue object that represents the final classification.
*
* @author Mayo Clinic
*
*/
public class ResolutionAnnotator
{
Set<String> conWords; //contradiction words for negation -- if this word appears in sentence do not negate
// LOG4J logger based on class name
public Logger iv_logger = Logger.getLogger(getClass().getName());
public void initialize(UimaContext aContext)
throws AnnotatorConfigurationException, AnnotatorInitializationException
{
conWords = new HashSet<String>();
try
{
//String conWordsFileName = (String) aContext.getConfigParameterValue("ConWordsFile");
//conWords = readLinesFromFile(FileLocator.locateFile(conWordsFileName.replaceAll(apiMacroHome, ".")).getAbsolutePath());
FileResource fResrc = (FileResource) aContext.getResourceObject("negationContradictionWordsKey");
File conWordsFile = fResrc.getFile();
conWords = readLinesFromFile(conWordsFile.getAbsolutePath());
}
catch (Exception ace)
{
throw new AnnotatorConfigurationException(ace);
}
}
public void process(JCas jcas)
throws AnnotatorProcessException
{
// iterate over the NominalAttributeValue objects in the CAS
// figure out the KU and PCS classification values
String kuClassification = null;
String pcsClassification = null;
Iterator<?> navItr = jcas.getJFSIndexRepository().getAnnotationIndex(
NominalAttributeValue.type).iterator();
String navName = null;
List<NominalAttributeValue> removalList = new ArrayList<NominalAttributeValue>();
while (navItr.hasNext())
{
NominalAttributeValue nav = (NominalAttributeValue) navItr.next();
String nVal = nav.getNominalValue();
if (nVal.equals(Const.CLASS_KNOWN)
|| nVal.equals(Const.CLASS_UNKNOWN))
{
kuClassification = nVal;
navName = nav.getAttributeName();
} else if (nVal.equals(Const.CLASS_CURR_SMOKER)
|| nVal.equals(Const.CLASS_PAST_SMOKER)
|| nVal.equals(Const.CLASS_SMOKER))
{
pcsClassification = nVal;
navName = nav.getAttributeName();
} else
{
throw new AnnotatorProcessException(new Exception(
"Nominal value not part of " + Const.class + ": "
+ nVal));
}
removalList.add(nav);
}
// remove old NominalAttributeValue objects from CAS
Iterator<NominalAttributeValue> removalItr = removalList.iterator();
while (removalItr.hasNext())
{
TOP top = (TOP) removalItr.next();
top.removeFromIndexes();
}
/**
*
* This is to deal with cases like "nonsmoker" and "non-smoker"
* There are two dictionaries: smoker.dictionary and nonsmoker.dictionary
* and two NameEntities: SmokerNamedEntityAnnotation and NonSmokerNamedEntityAnnotation
* Each includes smoker or nonsmoker keywords respectively
* Configuration file and dictionary are set up in Resources in DitionaryLookupAnnotator.xml
*/
//Smoker or Nonsmoker NamedEntityAnnotation are created only if the sentence include
//smoker or nonsmoker keywords
int negCnt = getSmokerNegatedCount(jcas);
int nonsmokerCnt = getNonSmokerNegatedCount(jcas);
int negConCnt = getNegConCount(jcas);
String finalClassification = null;
/**
* 12/04/08
* Originally each roundtrip would have processed just one sentence
* Now, we process the complete doc
*
* 1/22/09 REVERTING TO ORIGINAL CODE as classifier need to just one sentence in the cas
*/
if (kuClassification.equals(Const.CLASS_UNKNOWN))
{
finalClassification = kuClassification;
} else
{
if ( (negCnt>0 && negConCnt==0) || nonsmokerCnt>0 )
{
finalClassification = Const.CLASS_NON_SMOKER;
} else
{
finalClassification = pcsClassification;
}
}
//---check sentence-level classification
if (iv_logger.isInfoEnabled())
if(finalClassification!=Const.CLASS_UNKNOWN) {
Iterator senIter = jcas.getJFSIndexRepository().getAnnotationIndex(Sentence.type).iterator();
while(senIter.hasNext()) {
Sentence sen = (Sentence) senIter.next();
iv_logger.info("|"+sen.getCoveredText() + "|" + finalClassification + "|" + negCnt);
}
}
//---
// add final classification as a new NominalAttributeValue object
NominalAttributeValue finalNav = new NominalAttributeValue(jcas);
finalNav.setAttributeName(navName);
finalNav.setNominalValue(finalClassification);
finalNav.addToIndexes();
}
private Set<String> readLinesFromFile(String fileName) throws IOException
{
Set<String> returnValues = new HashSet<String>();
File file = new File(fileName);
BufferedReader fileReader = new BufferedReader(new FileReader(file));
String line;
while((line = fileReader.readLine()) != null)
{
line = line.toLowerCase();
returnValues.add(line);
}
return returnValues;
}
private int getSmokerNegatedCount(JCas jcas)
{
int negCnt = 0;
Iterator<?> neItr= jcas.getJFSIndexRepository().getAnnotationIndex(
SmokerNamedEntityAnnotation.type).iterator();
while (neItr.hasNext())
{
SmokerNamedEntityAnnotation neAnn = (SmokerNamedEntityAnnotation) neItr.next();
int certainty = neAnn.getPolarity();
//TODO: need to re-define this in TypeSystemConst.java and re-release core
// if (certainty == TypeSystemConst.NE_CERTAINTY_NEGATED)
if (certainty == -1)
negCnt++;
iv_logger.info("***SmokerNameEntity***" + neAnn.getCoveredText() + " " + negCnt);
}
return negCnt;
}
private int getNonSmokerNegatedCount(JCas jcas)
{
int nonSmokerCnt = 0;
Iterator<?> neItr= jcas.getJFSIndexRepository().getAnnotationIndex(
NonSmokerNamedEntityAnnotation.type).iterator();
while (neItr.hasNext())
{
NonSmokerNamedEntityAnnotation neAnn = (NonSmokerNamedEntityAnnotation) neItr.next();
nonSmokerCnt++;
iv_logger.info("***NonSmokerNameEntity***" + neAnn.getCoveredText() + " " + nonSmokerCnt + " " + neAnn.getPolarity());
}
return nonSmokerCnt;
}
/**
* This is to count contradiction words -- if appears do not negate
* eg) Tobacco: no quit in 1980 -- "quit" is contradiction words. So do not negate
*/
private int getNegConCount(JCas jcas) {
int conCnt = 0;
Iterator<?> wordTokenItr = jcas.getJFSIndexRepository().getAnnotationIndex(
WordToken.type).iterator();
while (wordTokenItr.hasNext())
{
WordToken token = (WordToken) wordTokenItr.next();
String tok = token.getCoveredText();
if(tok == null) continue;
tok = tok.toLowerCase().replaceAll("[\\W]", " ").trim();
String[] toks = tok.split("\\s");
for(int i=0; i<toks.length; i++)
if(conWords.contains(toks[i]))
conCnt++;
}
return conCnt;
}
private String apiMacroHome = "\\$main_root";
}