Package org.apache.ctakes.relationextractor.eval

Source Code of org.apache.ctakes.relationextractor.eval.RelationExtractorEvaluation$RemoveSmallerEventMentions

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.ctakes.relationextractor.eval;

import java.io.File;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;

import javax.annotation.Nullable;

import org.apache.ctakes.relationextractor.ae.DegreeOfRelationExtractorAnnotator;
import org.apache.ctakes.relationextractor.ae.LocationOfRelationExtractorAnnotator;
import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator;
import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
import org.apache.ctakes.typesystem.type.relation.DegreeOfTextRelation;
import org.apache.ctakes.typesystem.type.relation.LocationOfTextRelation;
import org.apache.ctakes.typesystem.type.relation.RelationArgument;
import org.apache.ctakes.typesystem.type.textsem.EntityMention;
import org.apache.ctakes.typesystem.type.textsem.EventMention;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.typesystem.type.textsem.Modifier;
import org.apache.uima.UIMAFramework;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.cas.Feature;
import org.apache.uima.collection.CollectionReader;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.util.CasCopier;
import org.apache.uima.util.Level;
import org.apache.uima.util.XMLInputSource;
import org.cleartk.classifier.jar.DefaultDataWriterFactory;
import org.cleartk.classifier.jar.DirectoryDataWriterFactory;
import org.cleartk.classifier.jar.GenericJarClassifierFactory;
import org.cleartk.classifier.jar.JarClassifierBuilder;
import org.cleartk.classifier.liblinear.LIBLINEARStringOutcomeDataWriter;
import org.cleartk.eval.AnnotationStatistics;
import org.uimafit.component.JCasAnnotator_ImplBase;
import org.uimafit.factory.AggregateBuilder;
import org.uimafit.factory.AnalysisEngineFactory;
import org.uimafit.factory.ConfigurationParameterFactory;
import org.uimafit.pipeline.JCasIterable;
import org.uimafit.pipeline.SimplePipeline;
import org.uimafit.util.JCasUtil;

import com.google.common.base.Function;
import com.google.common.base.Objects;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.lexicalscope.jewel.cli.CliFactory;
import com.lexicalscope.jewel.cli.Option;

public class RelationExtractorEvaluation extends SHARPXMI.Evaluation_ImplBase {

  public static interface Options extends SHARPXMI.EvaluationOptions {

    @Option(
        longName = "relations",
        description = "determines which relations to evaluate on (separately)",
        defaultValue = { "degree_of", "location_of" })
    public List<String> getRelations();

    @Option(
        longName = "test-on-ctakes",
        description = "evaluate test performance on ctakes entities, instead of gold standard "
            + "entities")
    public boolean getTestOnCTakes();

    @Option(
        longName = "allow-smaller-system-arguments",
        description = "for evaluation, allow system relation arguments to match gold relation "
            + "arguments that enclose them")
    public boolean getAllowSmallerSystemArguments();

    @Option(
        longName = "ignore-impossible-gold-relations",
        description = "for evaluation, ignore gold relations that would be impossible to find "
            + "because there are no corresponding system mentions")
    public boolean getIgnoreImpossibleGoldRelations();

    @Option(
        longName = "--print-errors",
        description = "print relations that were incorrectly predicted")
    public boolean getPrintErrors();

  }

  public static final Map<String, Class<? extends BinaryTextRelation>> RELATION_CLASSES =
      Maps.newHashMap();
  public static final Map<Class<? extends BinaryTextRelation>, Class<? extends RelationExtractorAnnotator>> ANNOTATOR_CLASSES =
      Maps.newHashMap();
  public static final Map<Class<? extends BinaryTextRelation>, ParameterSettings> BEST_PARAMETERS =
      Maps.newHashMap();

  static {
    RELATION_CLASSES.put("degree_of", DegreeOfTextRelation.class);
    ANNOTATOR_CLASSES.put(DegreeOfTextRelation.class, DegreeOfRelationExtractorAnnotator.class);
    BEST_PARAMETERS.put(DegreeOfTextRelation.class, new ParameterSettings(
        LIBLINEARStringOutcomeDataWriter.class,
        new Object[] { RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
            1.0f },
        new String[] { "-s", "1", "-c", "10.0" }));

    RELATION_CLASSES.put("location_of", LocationOfTextRelation.class);
    ANNOTATOR_CLASSES.put(LocationOfTextRelation.class, LocationOfRelationExtractorAnnotator.class);
    BEST_PARAMETERS.put(LocationOfTextRelation.class, new ParameterSettings(
        LIBLINEARStringOutcomeDataWriter.class,
        new Object[] { RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
            1.0f },
        new String[] { "-s", "1", "-c", "0.05" }));
  }

  public static void main(String[] args) throws Exception {
    // parse the options, validate them, and generate XMI if necessary
    final Options options = CliFactory.parseArguments(Options.class, args);
    SHARPXMI.validate(options);
    SHARPXMI.generateXMI(options);

    // determine the grid of parameters to search through
    // for the full set of LIBLINEAR parameters, see:
    // https://github.com/bwaldvogel/liblinear-java/blob/master/src/main/java/de/bwaldvogel/liblinear/Train.java
    List<ParameterSettings> gridOfSettings = Lists.newArrayList();
    for (float probabilityOfKeepingANegativeExample : new float[] { 0.5f, 1.0f }) {
      for (int solver : new int[] { 0 /* logistic regression */, 1 /* SVM */}) {
        for (double svmCost : new double[] { 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50, 100 }) {
          gridOfSettings.add(new ParameterSettings(
              LIBLINEARStringOutcomeDataWriter.class,
              new Object[] {
                  RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
                  probabilityOfKeepingANegativeExample },
              new String[] { "-s", String.valueOf(solver), "-c", String.valueOf(svmCost) }));
        }
      }
    }

    // run an evaluation for each selected relation
    for (final String relationCategory : options.getRelations()) {

      // get the best parameters for the relation
      final Class<? extends BinaryTextRelation> relationClass =
          RELATION_CLASSES.get(relationCategory);
      ParameterSettings bestSettings = BEST_PARAMETERS.get(relationClass);

      // run the evaluation
      SHARPXMI.evaluate(
          options,
          bestSettings,
          gridOfSettings,
          new Function<ParameterSettings, RelationExtractorEvaluation>() {
            @Override
            public RelationExtractorEvaluation apply(@Nullable ParameterSettings params) {
              return new RelationExtractorEvaluation(
                  new File("target/models/" + relationCategory),
                  relationClass,
                  ANNOTATOR_CLASSES.get(relationClass),
                  params,
                  options.getTestOnCTakes(),
                  options.getAllowSmallerSystemArguments(),
                  options.getIgnoreImpossibleGoldRelations(),
                  options.getPrintErrors());
            }
          });
    }
  }

  private Class<? extends BinaryTextRelation> relationClass;

  private Class<? extends RelationExtractorAnnotator> classifierAnnotatorClass;

  private ParameterSettings parameterSettings;

  private boolean testOnCTakes;

  private boolean allowSmallerSystemArguments;

  private boolean ignoreImpossibleGoldRelations;

  private boolean printErrors;

  /**
   * An evaluation of a relation extractor.
   *
   * @param baseDirectory
   *          The directory where models, etc. should be written
   * @param relationClass
   *          The class of the relation to be predicted
   * @param classifierAnnotatorClass
   *          The CleartkAnnotator class that learns a relation extractor model
   * @param parameterSettings
   *          The parameters defining how to train a classifier
   * @param testOnCTakes
   *          During testing, use annotations from cTAKES, not from the gold
   *          standard
   * @param allowSmallerSystemArguments
   *          During testing, allow system annotations to match gold annotations
   *          that enclose them
   * @param ignoreImpossibleGoldRelations
   *          During testing, ignore gold relations that would be impossible to
   *          find because there are no corresponding system mentions
   */
  public RelationExtractorEvaluation(
      File baseDirectory,
      Class<? extends BinaryTextRelation> relationClass,
      Class<? extends RelationExtractorAnnotator> classifierAnnotatorClass,
      ParameterSettings parameterSettings,
      boolean testOnCTakes,
      boolean allowSmallerSystemArguments,
      boolean ignoreImpossibleGoldRelations,
      boolean printErrors) {
    super(baseDirectory);
    this.relationClass = relationClass;
    this.classifierAnnotatorClass = classifierAnnotatorClass;
    this.parameterSettings = parameterSettings;
    this.testOnCTakes = testOnCTakes;
    this.allowSmallerSystemArguments = allowSmallerSystemArguments;
    this.ignoreImpossibleGoldRelations = ignoreImpossibleGoldRelations;
    this.printErrors = printErrors;
  }

  public RelationExtractorEvaluation(
      File baseDirectory,
      Class<? extends BinaryTextRelation> relationClass,
      Class<? extends RelationExtractorAnnotator> classifierAnnotatorClass,
      ParameterSettings parameterSettings) {
    this(
        baseDirectory,
        relationClass,
        classifierAnnotatorClass,
        parameterSettings,
        false,
        false,
        false,
        false);
  }

  @Override
  public void train(CollectionReader collectionReader, File directory) throws Exception {
    System.err.printf(
        "%s: %s: %s:\n",
        this.getClass().getSimpleName(),
        this.relationClass.getSimpleName(),
        directory.getName());
    System.err.println(this.parameterSettings);

    AggregateBuilder builder = new AggregateBuilder();
    // remove cTAKES entity mentions and modifiers in the system view and copy
    // in the gold relations
    builder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveCTakesMentionsAndCopyGoldRelations.class));
    // add the relation extractor, configured for training mode
    AnalysisEngineDescription classifierAnnotator =
        AnalysisEngineFactory.createPrimitiveDescription(
            this.classifierAnnotatorClass,
            this.parameterSettings.configurationParameters);
    ConfigurationParameterFactory.addConfigurationParameters(
        classifierAnnotator,
        DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
        this.parameterSettings.dataWriterClass,
        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
        directory.getPath());
    builder.add(classifierAnnotator);

    // run the data-writing pipeline
    SimplePipeline.runPipeline(collectionReader, builder.createAggregateDescription());

    // train the classifier and package it into a .jar file
    JarClassifierBuilder.trainAndPackage(directory, this.parameterSettings.trainingArguments);
  }

  @Override
  protected AnnotationStatistics<String> test(CollectionReader collectionReader, File directory)
      throws Exception {
    AggregateBuilder builder = new AggregateBuilder();
    if (this.testOnCTakes) {
      // add the modifier extractor
      File file = new File("desc/analysis_engine/ModifierExtractorAnnotator.xml");
      XMLInputSource source = new XMLInputSource(file);
      builder.add(UIMAFramework.getXMLParser().parseAnalysisEngineDescription(source));
      // remove extraneous entity mentions
      builder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveSmallerEventMentions.class));
    } else {
      // replace cTAKES entity mentions and modifiers in the system view with
      // the gold annotations
      builder.add(AnalysisEngineFactory.createPrimitiveDescription(ReplaceCTakesMentionsWithGoldMentions.class));
    }
    // add the relation extractor, configured for classification mode
    AnalysisEngineDescription classifierAnnotator =
        AnalysisEngineFactory.createPrimitiveDescription(
            this.classifierAnnotatorClass,
            this.parameterSettings.configurationParameters);
    ConfigurationParameterFactory.addConfigurationParameters(
        classifierAnnotator,
        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
        JarClassifierBuilder.getModelJarFile(directory));
    builder.add(classifierAnnotator);

    // statistics will be based on the "category" feature of the
    // BinaryTextRelations
    AnnotationStatistics<String> stats = new AnnotationStatistics<String>();
    Function<BinaryTextRelation, HashableArguments> getSpan =
        new Function<BinaryTextRelation, HashableArguments>() {
          @Override
          public HashableArguments apply(BinaryTextRelation relation) {
            return new HashableArguments(relation);
          }
        };
    Function<BinaryTextRelation, String> getOutcome =
        AnnotationStatistics.annotationToFeatureValue("category");

    // calculate statistics, iterating over the results of the classifier
    AnalysisEngine engine = builder.createAggregate();
    for (JCas jCas : new JCasIterable(collectionReader, engine)) {

      // get the gold view
      JCas goldView;
      try {
        goldView = jCas.getView(SHARPXMI.GOLD_VIEW_NAME);
      } catch (CASException e) {
        throw new AnalysisEngineProcessException(e);
      }

      // get the gold and system annotations
      Collection<? extends BinaryTextRelation> goldBinaryTextRelations =
          JCasUtil.select(goldView, this.relationClass);
      Collection<? extends BinaryTextRelation> systemBinaryTextRelations =
          JCasUtil.select(jCas, this.relationClass);

      if (this.ignoreImpossibleGoldRelations) {
        // collect only relations where both arguments have some possible system
        // arguments
        List<BinaryTextRelation> relations = Lists.newArrayList();
        for (BinaryTextRelation relation : goldBinaryTextRelations) {
          boolean hasSystemArgs = true;
          for (RelationArgument relArg : Lists.newArrayList(relation.getArg1(), relation.getArg2())) {
            IdentifiedAnnotation goldArg = (IdentifiedAnnotation) relArg.getArgument();
            Class<? extends IdentifiedAnnotation> goldClass = goldArg.getClass();
            boolean noSystemArg = JCasUtil.selectCovered(jCas, goldClass, goldArg).isEmpty();
            hasSystemArgs = hasSystemArgs && !noSystemArg;
          }
          if (hasSystemArgs) {
            relations.add(relation);
          } else {
            IdentifiedAnnotation arg1 = (IdentifiedAnnotation) relation.getArg1().getArgument();
            IdentifiedAnnotation arg2 = (IdentifiedAnnotation) relation.getArg2().getArgument();
            String messageFormat =
                "removing relation between %s and %s which is impossible to "
                    + "find with system mentions";
            String message = String.format(messageFormat, format(arg1), format(arg2));
            UIMAFramework.getLogger(this.getClass()).log(Level.WARNING, message);
          }
        }
        goldBinaryTextRelations = relations;
      }

      if (this.allowSmallerSystemArguments) {

        // collect all the arguments of the manually annotated relations
        Set<IdentifiedAnnotation> goldArgs = Sets.newHashSet();
        for (BinaryTextRelation relation : goldBinaryTextRelations) {
          for (RelationArgument relArg : Lists.newArrayList(relation.getArg1(), relation.getArg2())) {
            goldArgs.add((IdentifiedAnnotation) relArg.getArgument());
          }
        }

        // collect all the arguments of system-predicted relations that don't
        // match some gold argument
        Set<IdentifiedAnnotation> unmatchedSystemArgs = Sets.newHashSet();
        for (BinaryTextRelation relation : systemBinaryTextRelations) {
          for (RelationArgument relArg : Lists.newArrayList(relation.getArg1(), relation.getArg2())) {
            IdentifiedAnnotation systemArg = (IdentifiedAnnotation) relArg.getArgument();
            Class<? extends IdentifiedAnnotation> systemClass = systemArg.getClass();
            boolean matchesSomeGold = false;
            for (IdentifiedAnnotation goldArg : JCasUtil.selectCovered(
                goldView,
                systemClass,
                systemArg)) {
              if (goldArg.getBegin() == systemArg.getBegin()
                  && goldArg.getEnd() == systemArg.getEnd()) {
                matchesSomeGold = true;
                break;
              }
            }
            if (!matchesSomeGold) {
              unmatchedSystemArgs.add(systemArg);
            }
          }
        }

        // map each unmatched system argument to the gold argument that encloses
        // it
        Map<IdentifiedAnnotation, IdentifiedAnnotation> systemToGold = Maps.newHashMap();
        for (IdentifiedAnnotation goldArg : goldArgs) {
          Class<? extends IdentifiedAnnotation> goldClass = goldArg.getClass();
          for (IdentifiedAnnotation systemArg : JCasUtil.selectCovered(jCas, goldClass, goldArg)) {
            if (unmatchedSystemArgs.contains(systemArg)) {

              // if there's no mapping yet for this system arg, map it to the
              // enclosing gold arg
              IdentifiedAnnotation oldGoldArg = systemToGold.get(systemArg);
              if (oldGoldArg == null) {
                systemToGold.put(systemArg, goldArg);
              }

              // if there's already a mapping for this system arg, only re-map
              // it to match the type
              else {
                IdentifiedAnnotation current, other;
                if (systemArg.getTypeID() == goldArg.getTypeID()) {
                  systemToGold.put(systemArg, goldArg);
                  current = goldArg;
                  other = oldGoldArg;
                } else {
                  current = oldGoldArg;
                  other = goldArg;
                }

                // issue a warning since this re-mapping procedure is imperfect
                String message =
                    "system argument %s mapped to gold argument %s, but could also be mapped to %s";
                message = String.format(message, format(systemArg), format(current), format(other));
                UIMAFramework.getLogger(this.getClass()).log(Level.WARNING, message);
              }
            }
          }
        }

        // replace system arguments with gold arguments where necessary/possible
        for (BinaryTextRelation relation : systemBinaryTextRelations) {
          for (RelationArgument relArg : Lists.newArrayList(relation.getArg1(), relation.getArg2())) {
            IdentifiedAnnotation systemArg = (IdentifiedAnnotation) relArg.getArgument();
            IdentifiedAnnotation matchingGoldArg = systemToGold.get(systemArg);
            if (matchingGoldArg != null) {
              String messageFormat = "replacing system argument %s with gold argument %s";
              String message =
                  String.format(messageFormat, format(systemArg), format(matchingGoldArg));
              UIMAFramework.getLogger(this.getClass()).log(Level.WARNING, message);
              relArg.setArgument(matchingGoldArg);
            }
          }
        }
      }

      // update the statistics based on the argument spans of the relation
      stats.add(goldBinaryTextRelations, systemBinaryTextRelations, getSpan, getOutcome);

      // print errors if requested
      if (this.printErrors) {
        Map<HashableArguments, BinaryTextRelation> goldMap = Maps.newHashMap();
        for (BinaryTextRelation relation : goldBinaryTextRelations) {
          goldMap.put(new HashableArguments(relation), relation);
        }
        Map<HashableArguments, BinaryTextRelation> systemMap = Maps.newHashMap();
        for (BinaryTextRelation relation : systemBinaryTextRelations) {
          systemMap.put(new HashableArguments(relation), relation);
        }
        Set<HashableArguments> all = Sets.union(goldMap.keySet(), systemMap.keySet());
        List<HashableArguments> sorted = Lists.newArrayList(all);
        Collections.sort(sorted);
        for (HashableArguments key : sorted) {
          BinaryTextRelation goldRelation = goldMap.get(key);
          BinaryTextRelation systemRelation = systemMap.get(key);
          if (goldRelation == null) {
            System.out.println("System added: " + formatRelation(systemRelation));
          } else if (systemRelation == null) {
            System.out.println("System dropped: " + formatRelation(goldRelation));
          } else if (!systemRelation.getCategory().equals(goldRelation.getCategory())) {
            String label = systemRelation.getCategory();
            System.out.printf("System labeled %s for %s\n", label, formatRelation(systemRelation));
          }
        }
      }
    }

    System.err.print(stats);
    System.err.println();
    return stats;
  }

  private static String formatRelation(BinaryTextRelation relation) {
    IdentifiedAnnotation arg1 = (IdentifiedAnnotation) relation.getArg1().getArgument();
    IdentifiedAnnotation arg2 = (IdentifiedAnnotation) relation.getArg2().getArgument();
    String text = arg1.getCAS().getDocumentText();
    int begin = Math.min(arg1.getBegin(), arg2.getBegin());
    int end = Math.max(arg1.getBegin(), arg2.getBegin());
    begin = Math.max(0, begin - 50);
    end = Math.min(text.length(), end + 50);
    return String.format(
        "%s(%s(type=%d), %s(type=%d)) in ...%s...",
        relation.getCategory(),
        arg1.getCoveredText(),
        arg1.getTypeID(),
        arg2.getCoveredText(),
        arg2.getTypeID(),
        text.substring(begin, end).replaceAll("[\r\n]", " "));
  }

  /**
   * Annotator that removes cTAKES mentions in the system view and copies
   * relations from the gold view to the system view
   */
  public static class RemoveCTakesMentionsAndCopyGoldRelations extends JCasAnnotator_ImplBase {

    @Override
    public void process(JCas jCas) throws AnalysisEngineProcessException {
      JCas goldView, systemView;
      try {
        goldView = jCas.getView(SHARPXMI.GOLD_VIEW_NAME);
        systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
      } catch (CASException e) {
        throw new AnalysisEngineProcessException(e);
      }

      // remove cTAKES Mentions and Modifiers from system view
      List<IdentifiedAnnotation> cTakesMentions = new ArrayList<IdentifiedAnnotation>();
      cTakesMentions.addAll(JCasUtil.select(systemView, EventMention.class));
      cTakesMentions.addAll(JCasUtil.select(systemView, EntityMention.class));
      cTakesMentions.addAll(JCasUtil.select(systemView, Modifier.class));
      for (IdentifiedAnnotation cTakesMention : cTakesMentions) {
        cTakesMention.removeFromIndexes();
      }

      // copy gold Mentions and Modifiers to the system view
      List<IdentifiedAnnotation> goldMentions = new ArrayList<IdentifiedAnnotation>();
      goldMentions.addAll(JCasUtil.select(goldView, EventMention.class));
      goldMentions.addAll(JCasUtil.select(goldView, EntityMention.class));
      goldMentions.addAll(JCasUtil.select(goldView, Modifier.class));
      CasCopier copier = new CasCopier(goldView.getCas(), systemView.getCas());
      Feature sofaFeature = jCas.getTypeSystem().getFeatureByFullName(CAS.FEATURE_FULL_NAME_SOFA);
      for (IdentifiedAnnotation goldMention : goldMentions) {
        Annotation copy = (Annotation) copier.copyFs(goldMention);
        copy.setFeatureValue(sofaFeature, systemView.getSofa());
        copy.addToIndexes();
      }

      // copy gold relations to the system view
      for (BinaryTextRelation goldRelation : JCasUtil.select(goldView, BinaryTextRelation.class)) {
        BinaryTextRelation relation = (BinaryTextRelation) copier.copyFs(goldRelation);
        relation.addToIndexes(systemView);
        for (RelationArgument relArg : Lists.newArrayList(relation.getArg1(), relation.getArg2())) {
          relArg.addToIndexes(systemView);
          // relArg.getArgument() should have been added to indexes with
          // mentions above
        }
      }
    }
  }

  /**
   * Annotator that removes cTAKES Mentions and Modifiers from the system view,
   * and copies over the manually annotated Mentions and Modifiers from the gold
   * view.
   */
  public static class ReplaceCTakesMentionsWithGoldMentions extends JCasAnnotator_ImplBase {

    @Override
    public void process(JCas jCas) throws AnalysisEngineProcessException {
      JCas goldView, systemView;
      try {
        goldView = jCas.getView(SHARPXMI.GOLD_VIEW_NAME);
        systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
      } catch (CASException e) {
        throw new AnalysisEngineProcessException(e);
      }

      // remove cTAKES Mentions and Modifiers from system view
      List<IdentifiedAnnotation> cTakesMentions = new ArrayList<IdentifiedAnnotation>();
      cTakesMentions.addAll(JCasUtil.select(systemView, EventMention.class));
      cTakesMentions.addAll(JCasUtil.select(systemView, EntityMention.class));
      cTakesMentions.addAll(JCasUtil.select(systemView, Modifier.class));
      for (IdentifiedAnnotation cTakesMention : cTakesMentions) {
        cTakesMention.removeFromIndexes();
      }

      // copy gold Mentions and Modifiers to the system view
      List<IdentifiedAnnotation> goldMentions = new ArrayList<IdentifiedAnnotation>();
      goldMentions.addAll(JCasUtil.select(goldView, EventMention.class));
      goldMentions.addAll(JCasUtil.select(goldView, EntityMention.class));
      goldMentions.addAll(JCasUtil.select(goldView, Modifier.class));
      CasCopier copier = new CasCopier(goldView.getCas(), systemView.getCas());
      for (IdentifiedAnnotation goldMention : goldMentions) {
        Annotation copy = (Annotation) copier.copyFs(goldMention);
        Feature sofaFeature = copy.getType().getFeatureByBaseName("sofa");
        copy.setFeatureValue(sofaFeature, systemView.getSofa());
        copy.addToIndexes();
      }
    }
  }

  static String format(IdentifiedAnnotation a) {
    return a == null ? null : String.format("\"%s\"(type=%d)", a.getCoveredText(), a.getTypeID());
  }

  public static class RemoveSmallerEventMentions extends JCasAnnotator_ImplBase {

    @Override
    public void process(JCas jCas) throws AnalysisEngineProcessException {
      Collection<EventMention> mentions = JCasUtil.select(jCas, EventMention.class);
      for (EventMention mention : Lists.newArrayList(mentions)) {
        int begin = mention.getBegin();
        int end = mention.getEnd();
        int typeID = mention.getTypeID();
        List<EventMention> subMentions = JCasUtil.selectCovered(jCas, EventMention.class, mention);
        for (EventMention subMention : subMentions) {
          if (subMention.getBegin() > begin || subMention.getEnd() < end) {
            if (subMention.getTypeID() == typeID) {
              String message =
                  String.format("removed %s inside %s", format(subMention), format(mention));
              this.getContext().getLogger().log(Level.WARNING, message);
              subMention.removeFromIndexes();
            }
          }
        }
      }
    }
  }

  /**
   * This class is useful for mapping the spans of relation arguments to the
   * relation's category.
   */
  public static class HashableArguments implements Comparable<HashableArguments> {

    protected int arg1begin;

    protected int arg1end;

    protected int arg2begin;

    protected int arg2end;

    public HashableArguments(int arg1begin, int arg1end, int arg2begin, int arg2end) {
      this.arg1begin = arg1begin;
      this.arg1end = arg1end;
      this.arg2begin = arg2begin;
      this.arg2end = arg2end;
    }

    public HashableArguments(Annotation arg1, Annotation arg2) {
      this(arg1.getBegin(), arg1.getEnd(), arg2.getBegin(), arg2.getEnd());
    }

    public HashableArguments(BinaryTextRelation relation) {
      this(relation.getArg1().getArgument(), relation.getArg2().getArgument());
    }

    @Override
    public boolean equals(Object otherObject) {
      boolean result = false;
      if (otherObject instanceof HashableArguments) {
        HashableArguments other = (HashableArguments) otherObject;
        result =
            (this.getClass() == other.getClass()
                && this.arg1begin == other.arg1begin
                && this.arg1end == other.arg1end
                && this.arg2begin == other.arg2begin && this.arg2end == other.arg2end);
      }
      return result;
    }

    @Override
    public int hashCode() {
      return Objects.hashCode(this.arg1begin, this.arg1end, this.arg2begin, this.arg2end);
    }

    @Override
    public String toString() {
      return String.format(
          "%s(%s,%s,%s,%s)",
          this.getClass().getSimpleName(),
          this.arg1begin,
          this.arg1end,
          this.arg2begin,
          this.arg2end);
    }

    @Override
    public int compareTo(HashableArguments that) {
      int thisBegin = Math.min(this.arg1begin, this.arg2begin);
      int thatBegin = Math.min(that.arg1begin, that.arg2begin);
      if (thisBegin < thatBegin) {
        return -1;
      } else if (thisBegin > thatBegin) {
        return +1;
      } else if (this.equals(that)) {
        return 0;
      } else {
        return +1; // arbitrary choice for overlapping
      }
    }

  }
}
TOP

Related Classes of org.apache.ctakes.relationextractor.eval.RelationExtractorEvaluation$RemoveSmallerEventMentions

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.