Package org.apache.ctakes.temporal.eval

Source Code of org.apache.ctakes.temporal.eval.EvaluationOfAnnotationSpans_ImplBase

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.ctakes.temporal.eval;

import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import java.util.logging.FileHandler;
import java.util.logging.Formatter;
import java.util.logging.Level;
import java.util.logging.LogRecord;
import java.util.logging.Logger;

import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.cas.CAS;
import org.apache.uima.collection.CollectionReader;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.cleartk.eval.AnnotationStatistics;
import org.cleartk.util.ViewURIUtil;
import org.uimafit.factory.AggregateBuilder;
import org.uimafit.factory.AnalysisEngineFactory;
import org.uimafit.pipeline.JCasIterable;
import org.uimafit.pipeline.SimplePipeline;
import org.uimafit.util.JCasUtil;

import com.google.common.base.Function;
import com.google.common.collect.Ordering;

public abstract class EvaluationOfAnnotationSpans_ImplBase extends
Evaluation_ImplBase<AnnotationStatistics<String>> {

  private final Logger logger = Logger.getLogger(this.getClass().getName());
  public void setLogging(Level level, File outputFile) throws IOException {
    if (!outputFile.getParentFile().exists()) {
      outputFile.getParentFile().mkdirs();
    }
    this.logger.setLevel(level);
    FileHandler handler = new FileHandler(outputFile.getPath());
    handler.setFormatter(new Formatter() {
      @Override
      public String format(LogRecord record) {
        return record.getMessage() + '\n';
      }
    });
    this.logger.addHandler(handler);
  }

  private Class<? extends Annotation> annotationClass;

  public EvaluationOfAnnotationSpans_ImplBase(
      File baseDirectory,
      File rawTextDirectory,
      File xmlDirectory,
      XMLFormat xmlFormat,
      File xmiDirectory,
      File treebankDirectory,
      Class<? extends Annotation> annotationClass) {
    super(baseDirectory, rawTextDirectory, xmlDirectory, xmlFormat, xmiDirectory, treebankDirectory);
    this.annotationClass = annotationClass;
  }

  public EvaluationOfAnnotationSpans_ImplBase(
      File baseDirectory,
      File rawTextDirectory,
      File xmlDirectory,
      XMLFormat xmlFormat,
      File xmiDirectory,
      Class<? extends Annotation> annotationClass) {
    this(baseDirectory,rawTextDirectory, xmlDirectory, xmlFormat, xmiDirectory, null, annotationClass);
  }

  protected abstract AnalysisEngineDescription getDataWriterDescription(File directory)
      throws ResourceInitializationException;

  protected abstract void trainAndPackage(File directory) throws Exception;

  @Override
  protected void train(CollectionReader collectionReader, File directory) throws Exception {
    AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder();
    aggregateBuilder.add(CopyFromGold.getDescription(this.annotationClass));
    aggregateBuilder.add(this.getDataWriterDescription(directory), "TimexView", CAS.NAME_DEFAULT_SOFA);
    SimplePipeline.runPipeline(collectionReader, aggregateBuilder.createAggregate());
    this.trainAndPackage(directory);
  }

  protected abstract AnalysisEngineDescription getAnnotatorDescription(File directory)
      throws ResourceInitializationException;

  protected abstract Collection<? extends Annotation> getGoldAnnotations(JCas jCas, Segment segment);

  protected abstract Collection<? extends Annotation> getSystemAnnotations(JCas jCas, Segment segment);

  @Override
  protected AnnotationStatistics<String> test(CollectionReader collectionReader, File directory)
      throws Exception {
    AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder();
    aggregateBuilder.add(this.getAnnotatorDescription(directory), "TimexView", CAS.NAME_DEFAULT_SOFA);
    if(this.i2b2Output != null){
      aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(WriteI2B2XML.class, WriteI2B2XML.PARAM_OUTPUT_DIR, this.i2b2Output), "TimexView", CAS.NAME_DEFAULT_SOFA);
    }
    AnnotationStatistics<String> stats = new AnnotationStatistics<String>();
    Ordering<Annotation> bySpans = Ordering.<Integer> natural().lexicographical().onResultOf(
        new Function<Annotation, List<Integer>>() {
          @Override
          public List<Integer> apply(Annotation annotation) {
            return Arrays.asList(annotation.getBegin(), annotation.getEnd());
          }
        });
    for (JCas jCas : new JCasIterable(collectionReader, aggregateBuilder.createAggregate())) {
      JCas goldView = jCas.getView(GOLD_VIEW_NAME);
      JCas systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
      for (Segment segment : JCasUtil.select(jCas, Segment.class)) {
        if (!THYMEData.SEGMENTS_TO_SKIP.contains(segment.getId())) {
          Collection<? extends Annotation> goldAnnotations = this.getGoldAnnotations(goldView, segment);
          Collection<? extends Annotation> systemAnnotations = this.getSystemAnnotations(systemView, segment);
          stats.add(goldAnnotations, systemAnnotations);

          Set<Annotation> goldSet = new TreeSet<Annotation>(bySpans);
          for (Annotation goldAnnotation : goldAnnotations) {
            // TODO: fix data so that this is not necessary
            if (goldAnnotation.getBegin() == Integer.MAX_VALUE || goldAnnotation.getEnd() == Integer.MIN_VALUE) {
              this.logger.warning("Invalid annotation");
              continue;
            }
            goldSet.add(goldAnnotation);
          }
          //goldSet.addAll(goldAnnotations);
          Set<Annotation> systemSet = new TreeSet<Annotation>(bySpans);
          systemSet.addAll(systemAnnotations);

          Set<Annotation> goldOnly = new TreeSet<Annotation>(bySpans);
          goldOnly.addAll(goldSet);
          goldOnly.removeAll(systemSet);

          Set<Annotation> systemOnly = new TreeSet<Annotation>(bySpans);
          systemOnly.addAll(systemSet);
          systemOnly.removeAll(goldSet);

          String text = jCas.getDocumentText().replaceAll("[\r\n]", " ");
          if (!goldOnly.isEmpty() || !systemOnly.isEmpty()) {
            this.logger.fine("Errors in : " + ViewURIUtil.getURI(jCas).toString());
            Set<Annotation> errors = new TreeSet<Annotation>(bySpans);
            errors.addAll(goldOnly);
            errors.addAll(systemOnly);
            for (Annotation annotation : errors) {
              int begin = annotation.getBegin();
              int end = annotation.getEnd();
              int windowBegin = Math.max(0, begin - 50);
              int windowEnd = Math.min(text.length(), end + 50);
              String label = goldOnly.contains(annotation) ? "DROPPED:" : "ADDED:  ";
              this.logger.fine(String.format(
                  "%s  ...%s[!%s!:%d-%d]%s...",
                  label,
                  text.substring(windowBegin, begin),
                  text.substring(begin, end),
                  begin,
                  end,
                  text.substring(end, windowEnd)));
            }
            //add correct predictions:
            for (Annotation annotation: goldSet){
              if (!errors.contains(annotation)){
                int begin = annotation.getBegin();
                int end = annotation.getEnd();
                int windowBegin = Math.max(0, begin - 50);
                int windowEnd = Math.min(text.length(), end + 50);
                String label = "CORRECT:";
                this.logger.fine(String.format(
                    "%s  ...%s[!%s!:%d-%d]%s...",
                    label,
                    text.substring(windowBegin, begin),
                    text.substring(begin, end),
                    begin,
                    end,
                    text.substring(end, windowEnd)));
              }
            }
          }
          Set<Annotation> partialGold = new HashSet<Annotation>();
          Set<Annotation> partialSystem = new HashSet<Annotation>();

          // get overlapping spans
          if(this.printOverlapping){
            // iterate over all remaining gold annotations
            for(Annotation gold : goldOnly){
              Annotation bestSystem = null;
              int bestOverlap = 0;
              for(Annotation system : systemOnly){
                if(system.getBegin() >= gold.getBegin() && system.getEnd() <= gold.getEnd()){
                  // system completely contained by gold
                  int overlap = system.getEnd() - system.getBegin();
                  if(overlap > bestOverlap){
                    bestOverlap = overlap;
                    bestSystem = system;
                  }
                }else if(gold.getBegin() >= system.getBegin() && gold.getEnd() <= system.getEnd()){
                  // gold completely contained by gold
                  int overlap = gold.getEnd() - gold.getBegin();
                  if(overlap > bestOverlap){
                    bestOverlap = overlap;
                    bestSystem = system;
                  }
                }
              }
              if(bestSystem != null){
                this.logger.info(String.format("Allowed overlapping annotation: Gold(%s) => System(%s)\n", gold.getCoveredText(), bestSystem.getCoveredText()));
                partialGold.add(gold);
                partialSystem.add(bestSystem);
              }
            }
            if(partialGold.size() > 0){
              goldOnly.removeAll(partialGold);
              systemOnly.removeAll(partialSystem);
              assert partialGold.size() == partialSystem.size();
              this.logger.info(String.format("Found %d overlapping spans and removed from gold/system errors\n", partialGold.size()));
            }
          }
        }
      }
    }
    return stats;
  }
}
TOP

Related Classes of org.apache.ctakes.temporal.eval.EvaluationOfAnnotationSpans_ImplBase

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.