Package org.apache.ctakes.smokingstatus.ae

Source Code of org.apache.ctakes.smokingstatus.ae.ResolutionAnnotator

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.ctakes.smokingstatus.ae;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException;
import org.apache.uima.analysis_engine.annotator.AnnotatorInitializationException;
import org.apache.uima.analysis_engine.annotator.AnnotatorProcessException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.TOP;

import org.apache.ctakes.smokingstatus.type.NonSmokerNamedEntityAnnotation;
import org.apache.ctakes.smokingstatus.type.SmokerNamedEntityAnnotation;

import org.apache.ctakes.core.resource.FileResource;
import org.apache.ctakes.smokingstatus.Const;
import org.apache.ctakes.typesystem.type.syntax.WordToken;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.ctakes.smokingstatus.type.libsvm.NominalAttributeValue;

/**
* Resolves the data produced by the KU classifier, negation detection, and PCS
* classifier into a single smoking status value for the given sentence. The old
* NominalAttributeValue objects are removed and replaced with a single
* NominalAttributeValue object that represents the final classification.
*
* @author Mayo Clinic
*
*/
public class ResolutionAnnotator
{
    Set<String> conWords; //contradiction words for negation -- if this word appears in sentence do not negate
  // LOG4J logger based on class name
  public Logger iv_logger = Logger.getLogger(getClass().getName());

  public void initialize(UimaContext aContext)
  throws AnnotatorConfigurationException, AnnotatorInitializationException
  {
    conWords = new HashSet<String>();
   
    try
    {
      //String conWordsFileName = (String) aContext.getConfigParameterValue("ConWordsFile");
      //conWords = readLinesFromFile(FileLocator.locateFile(conWordsFileName.replaceAll(apiMacroHome, ".")).getAbsolutePath());
     
      FileResource fResrc = (FileResource) aContext.getResourceObject("negationContradictionWordsKey");
      File conWordsFile = fResrc.getFile();
      conWords = readLinesFromFile(conWordsFile.getAbsolutePath());
     
    }
    catch (Exception ace)
    {
      throw new AnnotatorConfigurationException(ace);
    }
  }
 
    public void process(JCas jcas)
      throws AnnotatorProcessException
    {       
        // iterate over the NominalAttributeValue objects in the CAS
        // figure out the KU and PCS classification values
        String kuClassification = null;
        String pcsClassification = null;
        Iterator<?> navItr = jcas.getJFSIndexRepository().getAnnotationIndex(
                NominalAttributeValue.type).iterator();
    String navName = null;
       
        List<NominalAttributeValue> removalList = new ArrayList<NominalAttributeValue>();
        while (navItr.hasNext())
        {
            NominalAttributeValue nav = (NominalAttributeValue) navItr.next();

            String nVal = nav.getNominalValue();

            if (nVal.equals(Const.CLASS_KNOWN)
                    || nVal.equals(Const.CLASS_UNKNOWN))
            {
                kuClassification = nVal;
                navName = nav.getAttributeName();
            } else if (nVal.equals(Const.CLASS_CURR_SMOKER)
                    || nVal.equals(Const.CLASS_PAST_SMOKER)
                    || nVal.equals(Const.CLASS_SMOKER))
            {
                pcsClassification = nVal;
                navName = nav.getAttributeName();
            } else
            {
                throw new AnnotatorProcessException(new Exception(
                        "Nominal value not part of " + Const.class + ": "
                                + nVal));
            }
            removalList.add(nav);
        }

        // remove old NominalAttributeValue objects from CAS
        Iterator<NominalAttributeValue> removalItr = removalList.iterator();
        while (removalItr.hasNext())
        {
            TOP top = (TOP) removalItr.next();
            top.removeFromIndexes();
        }
     
        /**
         *
         * This is to deal with cases like "nonsmoker" and "non-smoker"
         * There are two dictionaries: smoker.dictionary and nonsmoker.dictionary
         * and two NameEntities: SmokerNamedEntityAnnotation and NonSmokerNamedEntityAnnotation
         * Each includes smoker or nonsmoker keywords respectively
         * Configuration file and dictionary are set up in Resources in DitionaryLookupAnnotator.xml
         */
      //Smoker or Nonsmoker NamedEntityAnnotation are created only if the sentence include
      //smoker or nonsmoker keywords
      int negCnt = getSmokerNegatedCount(jcas);     
      int nonsmokerCnt = getNonSmokerNegatedCount(jcas);
      int negConCnt = getNegConCount(jcas);
      String finalClassification = null;
                                 
      /**
        * 12/04/08
        * Originally each roundtrip would have processed just one sentence
        * Now, we process the complete doc
        *
        * 1/22/09 REVERTING TO ORIGINAL CODE as classifier need to just one sentence in the cas
        */

        if (kuClassification.equals(Const.CLASS_UNKNOWN))
        {
            finalClassification = kuClassification;
        } else
        {
          if ( (negCnt>0 && negConCnt==0) || nonsmokerCnt>0 )
            {
                finalClassification = Const.CLASS_NON_SMOKER;
            } else
            {
                finalClassification = pcsClassification;
            }
        }

        //---check sentence-level classification
    if (iv_logger.isInfoEnabled())
     if(finalClassification!=Const.CLASS_UNKNOWN) {
          Iterator senIter = jcas.getJFSIndexRepository().getAnnotationIndex(Sentence.type).iterator();   
          while(senIter.hasNext()) {
            Sentence sen = (Sentence) senIter.next();
            iv_logger.info("|"+sen.getCoveredText() + "|" + finalClassification + "|" + negCnt);
          }
        }
        //---
     
        // add final classification as a new NominalAttributeValue object
        NominalAttributeValue finalNav = new NominalAttributeValue(jcas);
        finalNav.setAttributeName(navName);
        finalNav.setNominalValue(finalClassification);
        finalNav.addToIndexes();
    }
   
  private Set<String> readLinesFromFile(String fileName) throws IOException
  {
    Set<String> returnValues = new HashSet<String>();
    File file = new File(fileName);
      BufferedReader fileReader = new BufferedReader(new FileReader(file));
   
    String line;
    while((line = fileReader.readLine()) != null)
    {
        line = line.toLowerCase();
          returnValues.add(line);
    }
    return returnValues;
  }
 
  private int getSmokerNegatedCount(JCas jcas)
  {
    int negCnt = 0;     
    Iterator<?> neItr= jcas.getJFSIndexRepository().getAnnotationIndex(
        SmokerNamedEntityAnnotation.type).iterator();

    while (neItr.hasNext())
    {
      SmokerNamedEntityAnnotation neAnn = (SmokerNamedEntityAnnotation) neItr.next();
      int certainty = neAnn.getPolarity();
      //TODO: need to re-define this in TypeSystemConst.java and re-release core
//      if (certainty == TypeSystemConst.NE_CERTAINTY_NEGATED)
      if (certainty == -1)
        negCnt++;
      iv_logger.info("***SmokerNameEntity***" + neAnn.getCoveredText() + " " + negCnt);
    }

    return negCnt;
  }
   
  private int getNonSmokerNegatedCount(JCas jcas)
  {
    int nonSmokerCnt = 0;
    Iterator<?> neItr= jcas.getJFSIndexRepository().getAnnotationIndex(
        NonSmokerNamedEntityAnnotation.type).iterator();

    while (neItr.hasNext())
    {
      NonSmokerNamedEntityAnnotation neAnn = (NonSmokerNamedEntityAnnotation) neItr.next();
      nonSmokerCnt++;
      iv_logger.info("***NonSmokerNameEntity***" + neAnn.getCoveredText() + " " + nonSmokerCnt + " " + neAnn.getPolarity());
    }

    return nonSmokerCnt;
  }
   
  /**
   * This is to count contradiction words -- if appears do not negate
   * eg) Tobacco: no quit in 1980 -- "quit" is contradiction words. So do not negate
   */
    private int getNegConCount(JCas jcas) {
      int conCnt = 0;
      Iterator<?> wordTokenItr = jcas.getJFSIndexRepository().getAnnotationIndex(
          WordToken.type).iterator();

      while (wordTokenItr.hasNext())
      {
        WordToken token = (WordToken) wordTokenItr.next();
        String tok = token.getCoveredText();

        if(tok == null) continue;
        tok = tok.toLowerCase().replaceAll("[\\W]", " ").trim();
        String[] toks = tok.split("\\s");
        for(int i=0; i<toks.length; i++)
          if(conWords.contains(toks[i]))
            conCnt++;       
      }

      return conCnt;
    }
    private String apiMacroHome = "\\$main_root";
}
TOP

Related Classes of org.apache.ctakes.smokingstatus.ae.ResolutionAnnotator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.