Package org.apache.ctakes.assertion.eval

Source Code of org.apache.ctakes.assertion.eval.AssertionEvaluation

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.ctakes.assertion.eval;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;

import org.apache.ctakes.assertion.attributes.features.selection.FeatureSelection;
import org.apache.ctakes.assertion.medfacts.cleartk.AlternateCuePhraseAnnotator;
import org.apache.ctakes.assertion.medfacts.cleartk.AssertionCleartkAnalysisEngine;
import org.apache.ctakes.assertion.medfacts.cleartk.AssertionCleartkAnalysisEngine.FEATURE_CONFIG;
import org.apache.ctakes.assertion.medfacts.cleartk.AssertionComponents;
import org.apache.ctakes.assertion.medfacts.cleartk.ConditionalCleartkAnalysisEngine;
import org.apache.ctakes.assertion.medfacts.cleartk.GenericCleartkAnalysisEngine;
import org.apache.ctakes.assertion.medfacts.cleartk.HistoryCleartkAnalysisEngine;
import org.apache.ctakes.assertion.medfacts.cleartk.PolarityCleartkAnalysisEngine;
import org.apache.ctakes.assertion.medfacts.cleartk.PolarityFedaCleartkAnalysisEngine;
import org.apache.ctakes.assertion.medfacts.cleartk.SubjectCleartkAnalysisEngine;
import org.apache.ctakes.assertion.medfacts.cleartk.UncertaintyCleartkAnalysisEngine;
import org.apache.ctakes.assertion.pipelines.GoldEntityAndAttributeReaderPipelineForSeedCorpus;
import org.apache.ctakes.core.ae.DocumentIdPrinterAnalysisEngine;
import org.apache.ctakes.core.util.CtakesFileNamer;
import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
import org.apache.ctakes.typesystem.type.constants.CONST;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.syntax.ContractionToken;
import org.apache.ctakes.typesystem.type.syntax.NewlineToken;
import org.apache.ctakes.typesystem.type.syntax.NumToken;
import org.apache.ctakes.typesystem.type.syntax.PunctuationToken;
import org.apache.ctakes.typesystem.type.syntax.SymbolToken;
import org.apache.ctakes.typesystem.type.syntax.WordToken;
import org.apache.ctakes.typesystem.type.textsem.EntityMention;
import org.apache.ctakes.typesystem.type.textsem.EventMention;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.typesystem.type.textsem.Modifier;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.log4j.Logger;
import org.apache.uima.UIMAException;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.text.AnnotationIndex;
import org.apache.uima.collection.CollectionReader;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.ResourceProcessException;
import org.apache.uima.resource.metadata.TypeSystemDescription;
import org.apache.uima.util.CasCopier;
import org.apache.uima.util.FileUtils;
import org.cleartk.classifier.DataWriter;
import org.cleartk.classifier.Instance;
import org.cleartk.classifier.feature.transform.InstanceDataWriter;
import org.cleartk.classifier.feature.transform.InstanceStream;
import org.cleartk.classifier.jar.DefaultDataWriterFactory;
import org.cleartk.classifier.jar.DirectoryDataWriterFactory;
import org.cleartk.classifier.jar.GenericJarClassifierFactory;
import org.cleartk.classifier.jar.JarClassifierBuilder;
import org.cleartk.classifier.libsvm.LIBSVMStringOutcomeDataWriter;
import org.cleartk.eval.Evaluation_ImplBase;
//import org.cleartk.ml.libsvm.tk.TKLIBSVMStringOutcomeDataWriter;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
import org.kohsuke.args4j.spi.BooleanOptionHandler;
import org.uimafit.component.JCasAnnotator_ImplBase;
import org.uimafit.component.NoOpAnnotator;
import org.uimafit.component.xwriter.XWriter;
import org.uimafit.factory.AggregateBuilder;
import org.uimafit.factory.AnalysisEngineFactory;
import org.uimafit.factory.CollectionReaderFactory;
import org.uimafit.factory.ConfigurationParameterFactory;
import org.uimafit.factory.TypeSystemDescriptionFactory;
import org.uimafit.pipeline.JCasIterable;
import org.uimafit.pipeline.SimplePipeline;
import org.uimafit.util.JCasUtil;

import com.google.common.base.Objects;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
//import org.chboston.cnlp.ctakes.relationextractor.ae.RelationExtractorAnnotator;
//import org.chboston.cnlp.ctakes.relationextractor.eval.RelationExtractorEvaluation;
//import org.chboston.cnlp.ctakes.relationextractor.ae.ModifierExtractorAnnotator;

public class AssertionEvaluation extends Evaluation_ImplBase<File, Map<String, AnnotationStatisticsCompact>> {
 
private static Logger logger = Logger.getLogger(AssertionEvaluation.class);

  private static final String YTEX_NEGATION_DESCRIPTOR = "ytex.uima.NegexAnnotator";

  enum Corpus {SHARP_SEED, SHARP_STRATIFIED, MIPACQ, I2B2, NEGEX}

  public static class Options {
    @Option(
        name = "--train-dir",
        usage = "specify the directory containing the XMI training files (for example, /NLP/Corpus/Relations/mipacq/xmi/train)",
        required = false)
    public String trainDirectory;
   
    @Option(
        name = "--test-dir",
        usage = "specify the directory containing the XMI testing files (for example, /NLP/Corpus/Relations/mipacq/xmi/test)",
        required = false)
    public File testDirectory;
   
    @Option(
            name = "--dev-dir",
            usage = "if running --preprocess, store the XMI development files here",
            required = false)
        public File devDirectory;

    @Option(
        name = "--models-dir",
        usage = "specify the directory where the models will be placed",
        required = false)
    public File modelsDirectory;
   
    @Option(
            name = "--evaluation-output-dir",
            usage = "specify the directory where the evaluation output xmi files will go",
            required = false)
    public File evaluationOutputDirectory;
       
    @Option(
            name = "--ignore-polarity",
            usage = "specify whether polarity processing should be ignored (true or false). default: false",
            required = false)
    public boolean ignorePolarity = false; // note that this is reversed from the "ignore" statement
       
    @Option(
            name = "--ignore-conditional",
            usage = "specify whether conditional processing should be ignored (true or false). default: false",
            required = false)
    public boolean ignoreConditional = false;
       
    @Option(
            name = "--ignore-uncertainty",
            usage = "specify whether uncertainty processing should be ignored (true or false). default: false",
            required = false)
    public boolean ignoreUncertainty = false;
       
    @Option(
            name = "--ignore-subject",
            usage = "specify whether subject processing should be ignored (true or false). default: false",
            required = false,
            handler=BooleanOptionHandler.class)
    public boolean ignoreSubject = false;
       
    @Option(
            name = "--ignore-generic",
            usage = "specify whether generic processing should be ignored (true or false). default: false",
            required = false)
    public boolean ignoreGeneric = false;
       
    // srh adding 2/20/13
    @Option(
            name = "--ignore-history",
            usage = "specify whether 'history of' processing should be run (true or false). default: false",
            required = false)
    public boolean ignoreHistory = false;
       
    @Option(
            name = "--cross-validation",
            usage = "ignore the test set and run n-fold cross-validation. default: n=2",
            required = false)
    public Integer crossValidationFolds;
   
    @Option(
            name = "--train-only",
            usage = "do not test a model, build one from xmi output and store in --models-dir",
            required = false)
    public boolean trainOnly = false;

    @Option(
            name = "--test-only",
            usage = "do not train a model, use the one specified in --models-dir",
            required = false)
    public boolean testOnly = false;

    @Option(
            name = "--preprocess-only",
            usage = "run preprocessing pipeline on a SHARP-style corpus, specify root directory",
            required = false)
    public File preprocessDir;

    @Option(
            name = "--no-cleartk",
            usage = "run the version of the assertion module released with cTAKES 2.5",
            required = false)
    public boolean noCleartk = false;
   
    @Option(
        name = "--print-errors",
        usage = "Flag to have test method print out error context for misclassified examples",
        required = false)
    public boolean printErrors = false;

    @Option(
        name = "--print-instances",
        usage = "Flag to have test method print out lots of info for statistical significance testing",
        required = false)
    public File printInstances;

    @Option(
        name = "--eval-only",
        usage = "Evaluate a CASes (supply the directory as an argument) with both system and gold in them.",
        required = false)
    public boolean evalOnly;

    @Option(
        name = "--ytex-negation",
        usage = "Use the negation detection from ytex, which is based on a more recent NegEx than the original cTAKES used." +
        " Note that using this requires adding the directory for YTEX_NEGATION_DESCRIPTOR to the classpath as well" +
        " as the annotator class itself, since ytex is under a different license than Apache cTAKES.",
        required = false)
    public boolean useYtexNegation;

    @Option(
        name = "--feature-selection",
        usage = "Takes an argument: the Chi^2 feature selection threshold",
        required = false)
    public Float featureSelectionThreshold = null;

    @Option(
        name = "--kernel-params",
        usage = "Set of parameters to pass to kernel (libsvm)",
        required = false)
    public String kernelParams = null;
   
    @Option(
        name = "--use-tmp",
        usage = "Whether to put trained models into a temp directory (e.g., for a grid search)",
        required = false)
    public boolean useTmp = false;
   
    @Option(
        name = "--corpus",
        usage = "What corpus to read for pre-processing",
        required = false)
    public Corpus corpus = Corpus.SHARP_SEED;
   
    @Option(
        name = "--feats",
        usage = "What feature configuration to use",
        required = false)
    public FEATURE_CONFIG featConfig = FEATURE_CONFIG.ALL_SYN;

    @Option(
        name = "--feda",
        usage = "Domain adaptation -- for each semicolon-separated directory in train-dir, creates a domain-specific feature space",
        required = false)
    public boolean feda = false;
  }
 
  protected ArrayList<String> annotationTypes;

  private Class<? extends AssertionCleartkAnalysisEngine> classifierAnnotatorClass;

//  private Class<? extends DataWriterFactory<String>> dataWriterFactoryClass;
 
  private Class<? extends DataWriter<String>> dataWriterClass;
 
  private File evaluationOutputDirectory;

//  private static Map<String,String> trainFileToCorpusDir = new HashMap<String,String>();

  static public String evaluationLogFilePath;
  static private File evaluationLogFile;
  static private BufferedWriter evaluationLogFileOut;
  static {
    evaluationLogFilePath = "eval_"+new Date().toString().replaceAll(" ","_") + ".txt";
  }

  static public boolean useEvaluationLogFile = false;
  private boolean ignoreAnatomicalSites = false;

  protected static Options options = new Options();
 
  public static void main(String[] args) throws Exception {
   
    System.out.println("Started assertion module at " + new Date());
   
    //Options options = new Options();
    resetOptions();
    CmdLineParser parser = new CmdLineParser(options);
    parser.parseArgument(args);
//    options.parseOptions(args);

    if (useEvaluationLogFile && evaluationLogFileOut == null) {
      evaluationLogFile = new File(evaluationLogFilePath);
      evaluationLogFileOut = new BufferedWriter(new FileWriter(evaluationLogFile), 32768);
    }
   
//    System.err.println("forcing skipping of subject processing!!!");
//    options.runSubject = false;
//    System.err.println("forcing skipping of generic processing!!!");
//    options.runGeneric = false;
//    System.err.println("forcing skipping of polarity processing!!!");
//    options.runPolarity = false;
//    System.err.println("forcing skipping of uncertainty processing!!!");
//    options.runUncertainty = false;
//    System.err.println("forcing skipping of conditional processing!!!");
//    options.runConditional = false;
    printOptionsForDebugging(options);
    List<File> trainFiles = new ArrayList<File>();
    if (null != options.trainDirectory) {
      String[] dirs = options.trainDirectory.split("[;:]");
      for (String dir : dirs) {
        File trainDir = new File(dir);
        if (trainDir.listFiles()!=null) {
          for (File f : trainDir.listFiles()) {
            trainFiles.add(f);
//            trainFileToCorpusDir.put(f.getPath(),domainId);
          }
        }
      }
    }
    //File modelsDir = new File("models/modifier");
    File modelsDir = options.modelsDirectory;
    if(options.useTmp){
      File tempModelDir = new File(options.modelsDirectory, "temp");
      tempModelDir.mkdirs();
      File curModelDir = File.createTempFile("assertion", null, tempModelDir);
      curModelDir.delete();
      curModelDir.mkdir();
      modelsDir = curModelDir;
    }
   
    File evaluationOutputDirectory = options.evaluationOutputDirectory;

    // determine the type of classifier to be trained
//    Class<? extends DataWriterFactory<String>> dataWriterFactoryClass = DefaultMaxentDataWriterFactory.class;
//    Class<? extends DataWriterFactory<String>> dataWriterFactoryClass = DefaultMultiClassLIBSVMDataWriterFactory.class;
   
    // TODO Class<? extends DataWriterFactory<String>> dataWriterFactoryClass = DefaultDataWriterFactory.class;
    //
    // A DataWriterFactory that creates a data writer from the class given by
    // PARAM_DATA_WRITER_CLASS_NAME and the directory given by
    // DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY.
    //
    // DefaultMaxentDataWriterFactory is deprecated and says to use DefaultDattaWriterFactory
    // with MaxentDataWriter.
   
    Class<? extends AssertionCleartkAnalysisEngine> annotatorClass = AssertionCleartkAnalysisEngine.class;

    //String [] annotationTypes = { "polarity", "conditional", "uncertainty", "subject", "generic" };
    ArrayList<String> annotationTypes = new ArrayList<String>();
    if (!options.ignorePolarity) { annotationTypes.add("polarity"); }
    if (!options.ignoreConditional) { annotationTypes.add("conditional"); }
    if (!options.ignoreUncertainty) { annotationTypes.add("uncertainty"); }
    if (!options.ignoreSubject) { annotationTypes.add("subject"); }
    if (!options.ignoreGeneric) { annotationTypes.add("generic"); }
    if (!options.ignoreHistory) { annotationTypes.add("historyOf"); }
   
    String[] kernelParams = null;
    if(options.kernelParams != null){
      kernelParams = options.kernelParams.split("\\s+");
    }else{
      kernelParams = new String[]{"-t", "0", "-c", "1"};
    }
    Class<? extends DataWriter<String>> dw = null;
    if(options.featConfig == FEATURE_CONFIG.STK || options.featConfig == FEATURE_CONFIG.PTK){
//        dw = TKLIBSVMStringOutcomeDataWriter.class;
      throw new UnsupportedOperationException("This requires cleartk-2.0 which");
    }
    dw = LIBSVMStringOutcomeDataWriter.class;
   
    AssertionEvaluation evaluation = new AssertionEvaluation(
        modelsDir,
        evaluationOutputDirectory,
        annotationTypes,
        annotatorClass,
        dw,
        kernelParams
//        "-t",
//        "0",
//       TKLIBSVMStringOutcomeDataWriter.class,
//        "-c",
//        "1"
//        "-t",
//        "5",
//        "-C",
//        "+",
//        "-L",
//        "0.4",
//        "-N",
//        "3",
//        "-S",
//        "0"
      
//        "-w0",
//        "100.0",
//        "-w1",
//        "1.0"
//        "100",
//        "2"
        );
   
    // if preprocessing, don't do anything else
    if(options.preprocessDir!=null ) {
      preprocess(options.preprocessDir);
    }
   
    // run cross-validation
    else if(options.crossValidationFolds != null) {
      // run n-fold cross-validation
      List<Map<String, AnnotationStatisticsCompact>> foldStats = evaluation.crossValidation(trainFiles, options.crossValidationFolds);
      //AnnotationStatisticsCompact overallStats = AnnotationStatisticsCompact.addAll(foldStats);
      Map<String, AnnotationStatisticsCompact> overallStats = new TreeMap<String, AnnotationStatisticsCompact>();
     
      for (String currentAnnotationType : annotationTypes)
      {
        AnnotationStatisticsCompact currentAnnotationStatisticsCompact = new AnnotationStatisticsCompact();
        overallStats.put(currentAnnotationType, currentAnnotationStatisticsCompact);
      }
      for (Map<String, AnnotationStatisticsCompact> singleFoldMap : foldStats)
      {
        for (String currentAnnotationType : annotationTypes)
        {
          AnnotationStatisticsCompact currentFoldStatistics = singleFoldMap.get(currentAnnotationType);
          overallStats.get(currentAnnotationType).addAll(currentFoldStatistics);
        }
      }
     
      AssertionEvaluation.printScore(overallStats,  "CROSS FOLD OVERALL");
     
    }
   
    // run train and test
    else {
      // train on the entire training set and evaluate on the test set
      List<File> testFiles;
      if (options.evalOnly) {
        testFiles = Arrays.asList(options.evaluationOutputDirectory.listFiles());
        logger.debug("evalOnly using files in directory " + evaluationOutputDirectory.getName() + " aka " + evaluationOutputDirectory.getCanonicalPath());
      } else if (options.trainOnly){
        testFiles = new ArrayList<File>();
      } else {
        testFiles = Arrays.asList(options.testDirectory.listFiles());
      }
     
      if (!options.testOnly && !options.evalOnly) {
        CollectionReader trainCollectionReader = evaluation.getCollectionReader(trainFiles);
        evaluation.train(trainCollectionReader, modelsDir);
      }
     
      // run testing
      if (!options.trainOnly) {
        if (testFiles==null || testFiles.size()==0) {
          throw new RuntimeException("testFiles = " + testFiles + " testFiles.size() = " + (testFiles==null ? "null": testFiles.size())) ;
        }
        logger.debug("testFiles.size() = " + testFiles.size());
        CollectionReader testCollectionReader = evaluation.getCollectionReader(testFiles);
        Map<String, AnnotationStatisticsCompact> stats = evaluation.test(testCollectionReader, modelsDir);

        AssertionEvaluation.printScore(stats,  modelsDir!=null? modelsDir.getAbsolutePath() : "no_model");
      }
    }
    if(options.useTmp && modelsDir != null){
      FileUtils.deleteRecursive(modelsDir);
    }
    System.out.println("Finished assertion module at " + new Date());
   
  }
 
  private static void resetOptions() {
    options.ignoreConditional = false;
    options.ignoreGeneric = false;
    options.ignoreHistory = false;
    options.ignorePolarity = false;
    options.ignoreSubject = false;
    options.ignoreUncertainty = false;
   
    options.trainOnly = false;
    options.testOnly = false;
    options.noCleartk = false;
    options.printErrors = false;
    options.printInstances = null;
    options.evalOnly = false;
   
    options.evaluationOutputDirectory = null;
    options.trainDirectory = null;
    options.testDirectory = null;
    options.devDirectory = null;
    options.modelsDirectory = null;
    options.preprocessDir = null;
   
    options.crossValidationFolds = null;
  }

private static void printOptionsForDebugging(Options options)
  {
  String message;
  message = String.format(
    "Printing options: %n" +
    "training dir: %s%n" +
      "test dir: %s%n" +
      "model dir: %s%n" +
      "preprocess dir: %s%n" +
      "evaluation output dir: %s%n" +
      "cross-validation: %d%n" +
      "ignore polarity: %b%n" +
      "ignore conditional: %b%n" +
      "ignore uncertainty: %b%n" +
      "ignore subject: %b%n" +
      "ignore generic: %b%n" +
      "ignore history: %b%n" +
      "train only: %b%n" +
      "test only: %b%n" +
      "eval only: %b%n" +
      //"crossValidationFolds: %s%n" +
      "noCleartk: %b%n" +
      "%n%n",
      options.trainDirectory, // just a String so no need to check for null because not using getAbsolutePath()
         (options.testDirectory != null) ? options.testDirectory.getAbsolutePath() : "",
      (options.modelsDirectory!=null) ? options.modelsDirectory.getAbsolutePath() : "",
       (options.preprocessDir!=null) ? options.preprocessDir.getAbsolutePath() : "",
      (options.evaluationOutputDirectory!=null) ? options.evaluationOutputDirectory.getAbsolutePath() : "",
      options.crossValidationFolds,
      options.ignorePolarity,
      options.ignoreConditional,
      options.ignoreUncertainty,
      options.ignoreSubject,
      options.ignoreGeneric,
      options.ignoreHistory,
    options.trainOnly,
    options.testOnly,
    options.evalOnly,
    //(options.crossValidationFolds != null) ? options.crossValidationFolds.intValue()+"" : "",
    options.noCleartk
      );
  logger.info(message);
  }

public static void printScore(Map<String, AnnotationStatisticsCompact> map, String directory)
  {
      for (Map.Entry<String, AnnotationStatisticsCompact> currentEntry : map.entrySet())
    {
        String annotationType = currentEntry.getKey();
        AnnotationStatisticsCompact stats = currentEntry.getValue();
       
        System.out.format("directory: \"%s\"; assertion type: %s%n%s%n%s%n%n",
          directory,
          annotationType.toUpperCase(),
          options.testDirectory,
          stats.toString());
       
        try {
          if (useEvaluationLogFile) {
            evaluationLogFileOut.write(
                String.format("%s\t%f\t%s\t%s\t%s",
                    annotationType,
                    options.featureSelectionThreshold,
                    options.modelsDirectory.getName(),
                    options.testDirectory.toString(),
                    stats.toTsv())
                );
            evaluationLogFileOut.flush();
          }
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }


    }

  }

  private String[] trainingArguments;

  public AssertionEvaluation(
      File modelDirectory,
      File evaluationOutputDirectory,
      ArrayList<String> annotationTypes,
      Class<? extends AssertionCleartkAnalysisEngine> classifierAnnotatorClass,
      Class<? extends DataWriter<String>> dataWriterClass,
      String... trainingArguments
      ) {
    super(modelDirectory);
   
    this.annotationTypes = annotationTypes;

    this.classifierAnnotatorClass = classifierAnnotatorClass;
    this.dataWriterClass = dataWriterClass;

    this.trainingArguments = trainingArguments;
    this.evaluationOutputDirectory = evaluationOutputDirectory;
  }

  @Override
  public CollectionReader getCollectionReader(List<File> items)
      throws ResourceInitializationException {
    String[] paths = new String[items.size()];
    for (int i = 0; i < paths.length; ++i) {
      paths[i] = items.get(i).getPath();
    }
    return CollectionReaderFactory.createCollectionReader(
        XMIReader.class,
        TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath(),
        XMIReader.PARAM_FILES,
        paths);
  }

  public static void preprocess(File rawDir ) throws ResourceInitializationException, UIMAException, IOException {
//    File devDirectory = new File(options.trainDirectory.getParentFile() + File.separator + "dev");
    File preprocessedDir = null;
    if (options.trainDirectory.split("[;]").length>1) {
      throw new IOException("Assertion preprocess wants to write to one train directory, but you've supplied multiple: " + options.trainDirectory);
    }
    preprocessedDir = new File(options.trainDirectory);
   
    if(options.corpus == Corpus.I2B2){
      GoldEntityAndAttributeReaderPipelineForSeedCorpus.readI2B2Challenge2010(rawDir, preprocessedDir);
    }else if(options.corpus == Corpus.MIPACQ){
      GoldEntityAndAttributeReaderPipelineForSeedCorpus.readMiPACQ(rawDir, preprocessedDir, options.testDirectory, options.devDirectory);
    }else if(options.corpus == Corpus.NEGEX){
      GoldEntityAndAttributeReaderPipelineForSeedCorpus.readNegexTestSet(rawDir, preprocessedDir);
    }else if(options.corpus == Corpus.SHARP_STRATIFIED){
      GoldEntityAndAttributeReaderPipelineForSeedCorpus.readSharpStratifiedUmls(
          rawDir, preprocessedDir, options.testDirectory, options.devDirectory);
    } else if(options.corpus == Corpus.SHARP_SEED){
      GoldEntityAndAttributeReaderPipelineForSeedCorpus.readSharpSeedUmls(
          rawDir, preprocessedDir, options.testDirectory, options.devDirectory);
    } else{
      throw new ResourceInitializationException("No corpus type specified!", new Object[]{rawDir});
    }
  }
 
  @Override
  public void train(CollectionReader collectionReader, File directory) throws Exception {
    AggregateBuilder builder = new AggregateBuilder();
   
    //builder.add(AnalysisEngineFactory.createPrimitiveDescription(ReplaceCTakesEntityMentionsAndModifiersWithGold.class));

//    AnalysisEngineDescription assertionDescription = AssertionCleartkAnalysisEngine.getDescription(
//        CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
//        //MultiClassLIBSVMDataWriterFactory.class.getName(),
//        MaxentStringOutcomeDataWriter.class.getName(),
//        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
//        directory.getPath());
//    builder.add(assertionDescription);
   
    AnalysisEngineDescription documentIdPrinterAnnotator = AnalysisEngineFactory.createPrimitiveDescription(DocumentIdPrinterAnalysisEngine.class);
    builder.add(documentIdPrinterAnnotator);
   
    AnalysisEngineDescription goldCopierIdentifiedAnnotsAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ReferenceIdentifiedAnnotationsSystemToGoldCopier.class);
    builder.add(goldCopierIdentifiedAnnotsAnnotator);
   
    AnalysisEngineDescription goldCopierSupportingAnnotsAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ReferenceSupportingAnnotationsSystemToGoldCopier.class);
    builder.add(goldCopierSupportingAnnotsAnnotator);
   
    AnalysisEngineDescription assertionAttributeClearerAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ReferenceAnnotationsSystemAssertionClearer.class);
    builder.add(assertionAttributeClearerAnnotator);
   
//    String generalSectionRegexFileUri =
//        "org/mitre/medfacts/zoner/section_regex.xml";
//    AnalysisEngineDescription zonerAnnotator =
//        AnalysisEngineFactory.createPrimitiveDescription(ZoneAnnotator.class,
//            ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
//            generalSectionRegexFileUri
//            );
//    builder.add(zonerAnnotator);
//
//    String mayoSectionRegexFileUri =
//        "org/mitre/medfacts/uima/mayo_sections.xml";
//    AnalysisEngineDescription mayoZonerAnnotator =
//        AnalysisEngineFactory.createPrimitiveDescription(ZoneAnnotator.class,
//            ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
//            mayoSectionRegexFileUri
//            );
//    builder.add(mayoZonerAnnotator);
 
//    URL assertionCuePhraseLookupAnnotatorDescriptorUrl1 = this.getClass().getClassLoader().getResource("org/apache/ctakes/dictionary/lookup/AssertionCuePhraseDictionaryLookupAnnotator.xml");
//    logger.info(String.format("assertionCuePhraseLookupAnnotatorDescriptorUrl1 (slashes): %s", assertionCuePhraseLookupAnnotatorDescriptorUrl1));
//    URL assertionCuePhraseLookupAnnotatorDescriptorUrl2 = this.getClass().getClassLoader().getResource("org.apache.ctakes.dictionary.lookup.AssertionCuePhraseDictionaryLookupAnnotator.xml");
//    logger.info(String.format("assertionCuePhraseLookupAnnotatorDescriptorUrl2 (periods): %s", assertionCuePhraseLookupAnnotatorDescriptorUrl2));
//
//   
//    AnalysisEngineDescription cuePhraseLookupAnnotator =
//        AnalysisEngineFactory.createAnalysisEngineDescription("org/apache/ctakes/dictionary/lookup/AssertionCuePhraseDictionaryLookupAnnotator");
//    builder.add(cuePhraseLookupAnnotator);

    // Set up Feature Selection parameters
    Float featureSelectionThreshold = options.featureSelectionThreshold;
    Class<? extends DataWriter> dataWriterClassFirstPass = getDataWriterClass();
    if (options.featureSelectionThreshold==null) {
      featureSelectionThreshold = 0f;
    }
   
    // Add each assertion Analysis Engine to the pipeline!
    builder.add(AnalysisEngineFactory.createPrimitiveDescription(AlternateCuePhraseAnnotator.class, new Object[]{}));
   
    if (!options.ignorePolarity)
    {
      AnalysisEngineDescription polarityAnnotator;
      if (options.useYtexNegation) {
         polarityAnnotator = AnalysisEngineFactory.createAnalysisEngineDescription(YTEX_NEGATION_DESCRIPTOR);
      } else {
        if (options.feda) {
          polarityAnnotator = AnalysisEngineFactory.createPrimitiveDescription(PolarityFedaCleartkAnalysisEngine.class);

            ConfigurationParameterFactory.addConfigurationParameters(
                polarityAnnotator,
                AssertionCleartkAnalysisEngine.FILE_TO_DOMAIN_MAP,
                options.trainDirectory
                );
        } else {
          // default: cleartk-based polarity, no domain adaptation
          polarityAnnotator = AnalysisEngineFactory.createPrimitiveDescription(PolarityCleartkAnalysisEngine.class); //,  this.additionalParamemters);
          }
        ConfigurationParameterFactory.addConfigurationParameters(
            polarityAnnotator,
            AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
            AssertionEvaluation.GOLD_VIEW_NAME,
//            CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
//            this.dataWriterFactoryClass.getName(),
            DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
            dataWriterClassFirstPass,
            DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
            new File(directory, "polarity").getPath(),
            AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_URI,
            PolarityCleartkAnalysisEngine.createFeatureSelectionURI(new File(directory, "polarity")),
            AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_THRESHOLD,
            featureSelectionThreshold,
            AssertionCleartkAnalysisEngine.PARAM_FEATURE_CONFIG,
            options.featConfig
            );
      }
    builder.add(polarityAnnotator);
    }

    if (!options.ignoreConditional)
    {
      AnalysisEngineDescription conditionalAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ConditionalCleartkAnalysisEngine.class); //,  this.additionalParamemters);
      ConfigurationParameterFactory.addConfigurationParameters(
          conditionalAnnotator,
          AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
          AssertionEvaluation.GOLD_VIEW_NAME,
//          CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
//          this.dataWriterFactoryClass.getName(),
          DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
      dataWriterClassFirstPass,
          DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
          new File(directory, "conditional").getPath(),
      AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_URI,
      ConditionalCleartkAnalysisEngine.createFeatureSelectionURI(new File(directory, "conditional")),
      AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_THRESHOLD,
      featureSelectionThreshold
          );
      builder.add(conditionalAnnotator);
    }

    if (!options.ignoreUncertainty)
    {
      AnalysisEngineDescription uncertaintyAnnotator = AnalysisEngineFactory.createPrimitiveDescription(UncertaintyCleartkAnalysisEngine.class); //,  this.additionalParamemters);
      ConfigurationParameterFactory.addConfigurationParameters(
          uncertaintyAnnotator,
          AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
          AssertionEvaluation.GOLD_VIEW_NAME,
//          CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
//          this.dataWriterFactoryClass.getName(),
          DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
      dataWriterClassFirstPass,
          DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
          new File(directory, "uncertainty").getPath(),
      AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_URI,
      UncertaintyCleartkAnalysisEngine.createFeatureSelectionURI(new File(directory, "uncertainty")),
      AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_THRESHOLD,
      featureSelectionThreshold,
      AssertionCleartkAnalysisEngine.PARAM_FEATURE_CONFIG,
      options.featConfig
          );
      builder.add(uncertaintyAnnotator);
    }

    if (!options.ignoreSubject)
    {
      AnalysisEngineDescription subjectAnnotator = AnalysisEngineFactory.createPrimitiveDescription(SubjectCleartkAnalysisEngine.class); //,  this.additionalParamemters);
      ConfigurationParameterFactory.addConfigurationParameters(
          subjectAnnotator,
          AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
          AssertionEvaluation.GOLD_VIEW_NAME,
//          CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
//          this.dataWriterFactoryClass.getName(),
          DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
      dataWriterClassFirstPass,
          DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
          new File(directory, "subject").getPath(),
      AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_URI,
      SubjectCleartkAnalysisEngine.createFeatureSelectionURI(new File(directory, "subject")),
      AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_THRESHOLD,
      featureSelectionThreshold
          );
      builder.add(subjectAnnotator);
    }

    if (!options.ignoreGeneric)
    {
    AnalysisEngineDescription genericAnnotator = AnalysisEngineFactory.createPrimitiveDescription(GenericCleartkAnalysisEngine.class); //,  this.additionalParamemters);
    ConfigurationParameterFactory.addConfigurationParameters(
        genericAnnotator,
        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
        AssertionEvaluation.GOLD_VIEW_NAME,
//        CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
//        this.dataWriterFactoryClass.getName(),
        DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
      dataWriterClassFirstPass,
        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
        new File(directory, "generic").getPath(),
      AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_URI,
      GenericCleartkAnalysisEngine.createFeatureSelectionURI(new File(directory, "generic")),
      AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_THRESHOLD,
      featureSelectionThreshold
        );
    builder.add(genericAnnotator);
    }
   
    // 2/20/13 srh adding
    if (!options.ignoreHistory) {
      AnalysisEngineDescription historyAnnotator = AnalysisEngineFactory.createPrimitiveDescription(HistoryCleartkAnalysisEngine.class);
      ConfigurationParameterFactory.addConfigurationParameters(
          historyAnnotator,
          AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
          AssertionEvaluation.GOLD_VIEW_NAME,
//          CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
//          this.dataWriterFactoryClass.getName(),
          DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
      dataWriterClassFirstPass,
          DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
          new File(directory, "historyOf").getPath(),
        AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_URI,
        HistoryCleartkAnalysisEngine.createFeatureSelectionURI(new File(directory, "historyOf")),
        AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_THRESHOLD,
        featureSelectionThreshold
          );
      builder.add(historyAnnotator);
    }

/*
    AnalysisEngineDescription classifierAnnotator = AnalysisEngineFactory.createPrimitiveDescription(
        this.classifierAnnotatorClass,
        this.additionalParameters);
    ConfigurationParameterFactory.addConfigurationParameters(
        classifierAnnotator,
        RelationExtractorAnnotator.PARAM_GOLD_VIEW_NAME,
        RelationExtractorEvaluation.GOLD_VIEW_NAME,
        CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
        this.dataWriterFactoryClass.getName(),
        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
        directory.getPath());
    builder.add(classifierAnnotator);
*/   
   
    SimplePipeline.runPipeline(collectionReader,  builder.createAggregateDescription());
   
    //HideOutput hider = new HideOutput();
    for (String currentAssertionAttribute : annotationTypes)
    {
      File currentDirectory = new File(directory, currentAssertionAttribute);
      trainAndPackage(currentAssertionAttribute, currentDirectory, trainingArguments);
    }
    //hider.restoreOutput();
  }

  @Override
  protected Map<String, AnnotationStatisticsCompact> test(CollectionReader collectionReader, File directory)
      throws Exception {
//    AnalysisEngine classifierAnnotator = AnalysisEngineFactory.createPrimitive(AssertionCleartkAnalysisEngine.getDescription(
//        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
//        new File(directory, "model.jar").getPath()));

    AggregateBuilder builder = new AggregateBuilder();
   
    // directory is such as /cTAKES/workspaces/Apache-cTAKES-trunk/ctakes/ctakes-assertion/sharp_data/model/eval.model
   
    AnalysisEngineDescription goldCopierAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ReferenceIdentifiedAnnotationsSystemToGoldCopier.class);
    builder.add(goldCopierAnnotator);
   
    AnalysisEngineDescription assertionAttributeClearerAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ReferenceAnnotationsSystemAssertionClearer.class);
    builder.add(assertionAttributeClearerAnnotator);
   
    if ( options.noCleartk ) {
      addExternalAttributeAnnotatorsToAggregate(builder);
    } else {
      addCleartkAttributeAnnotatorsToAggregate(directory, builder);
    }

    if (options.evalOnly && evaluationOutputDirectory != null) {
      // short circuit any other stuff in the pipeline
      builder = new AggregateBuilder();
     
      // uimafit find available type systems on classpath
      TypeSystemDescription typeSystemDescription = TypeSystemDescriptionFactory.createTypeSystemDescription();
     
        AnalysisEngineDescription noOp =
        AnalysisEngineFactory.createPrimitiveDescription(
              NoOpAnnotator.class,
              typeSystemDescription);
      builder.add(noOp);
     
        AnalysisEngineDescription mergeGold =
        AnalysisEngineFactory.createPrimitiveDescription(org.apache.ctakes.assertion.eval.MergeGoldViewFromOneCasIntoInitialViewOfAnotherCas.class, typeSystemDescription);
      builder.add(mergeGold);
     
    } else if (evaluationOutputDirectory!=null)  {
        AnalysisEngineDescription xwriter =
        AnalysisEngineFactory.createPrimitiveDescription(
              XWriter.class,
              AssertionComponents.CTAKES_CTS_TYPE_SYSTEM_DESCRIPTION,
              XWriter.PARAM_OUTPUT_DIRECTORY_NAME,
              evaluationOutputDirectory,
              XWriter.PARAM_XML_SCHEME_NAME,
              XWriter.XMI,
              XWriter.PARAM_FILE_NAMER_CLASS_NAME,
              CtakesFileNamer.class.getName());
        builder.add(xwriter);
    }
   
    //SimplePipeline.runPipeline(collectionReader,  builder.createAggregateDescription());
    //AnalysisEngineDescription aggregateDescription = builder.createAggregateDescription();
   
    AnalysisEngine aggregate = builder.createAggregate();
   
    AnnotationStatisticsCompact polarityStats = new AnnotationStatisticsCompact();
    AnnotationStatisticsCompact conditionalStats = new AnnotationStatisticsCompact();
    AnnotationStatisticsCompact uncertaintyStats = new AnnotationStatisticsCompact();
    AnnotationStatisticsCompact subjectStats = new AnnotationStatisticsCompact();
    AnnotationStatisticsCompact genericStats = new AnnotationStatisticsCompact();
    AnnotationStatisticsCompact historyStats = new AnnotationStatisticsCompact()// srh 3/6/13
   
    Map<String, AnnotationStatisticsCompact> map = new TreeMap<String, AnnotationStatisticsCompact>();
    if (!options.ignorePolarity)
    {
      map.put("polarity",  polarityStats);
    }

    if (!options.ignoreConditional)
    {
      map.put("conditional",  conditionalStats);
    }

    if (!options.ignoreUncertainty)
    {
      map.put("uncertainty",  uncertaintyStats);
    }

    if (!options.ignoreSubject)
    {
      map.put("subject", subjectStats);
    }

    if (!options.ignoreGeneric)
    {
      map.put("generic", genericStats);
    }
   
    // srh 3/6/13
    if (!options.ignoreHistory)
    {
      map.put("historyOf", historyStats);
    }

    // run on existing output that has both system (or instance gathering) and gold
    for (JCas jCas : new JCasIterable(collectionReader, aggregate)) {
     
//      printViewNames("Views found by JCasIterable:", jCas);
     
      JCas goldView;
      try {
        goldView = jCas.getView(GOLD_VIEW_NAME);
      } catch (CASException e) {
      logger.info("jCas.getViewName() = " + jCas.getViewName());
        throw new AnalysisEngineProcessException(e);
      }

      String documentId = DocumentIDAnnotationUtil.getDocumentID(jCas);
      System.out.format("document id: %s%n", documentId);
     
      Collection<IdentifiedAnnotation> goldEntitiesAndEvents = new ArrayList<IdentifiedAnnotation>();
      if ( !ignoreAnatomicalSites ) {
        Collection<EntityMention> goldEntities = JCasUtil.select(goldView, EntityMention.class);
        goldEntitiesAndEvents.addAll(goldEntities);
      }
      Collection<EventMention> goldEvents = JCasUtil.select(goldView, EventMention.class);
      goldEntitiesAndEvents.addAll(goldEvents);
      // System.out.format("gold entities: %d%ngold events: %d%n%n", goldEntities.size(), goldEvents.size());
     
      if (goldEntitiesAndEvents.size()==0) {
        // gold annotations might have been read in as just IdentifiedAnnotation annotations
        // since no EventMentio or EntityMention annotations were found, ok to just try IdentifiedAnnotation
        // without concern for using some twice.
          Collection<IdentifiedAnnotation> identifiedAnnotations = JCasUtil.select(goldView, IdentifiedAnnotation.class);
          goldEntitiesAndEvents.addAll(identifiedAnnotations);
       
       
      }
     
      Collection<IdentifiedAnnotation> systemEntitiesAndEvents = new ArrayList<IdentifiedAnnotation>();
      if ( !ignoreAnatomicalSites ) {
        Collection<EntityMention> systemEntities = JCasUtil.select(jCas, EntityMention.class);
        systemEntitiesAndEvents.addAll(systemEntities);
      }
      Collection<EventMention> systemEvents = JCasUtil.select(jCas, EventMention.class);
      systemEntitiesAndEvents.addAll(systemEvents);
//      System.out.format("system entities: %d%nsystem events: %d%n%n", systemEntities.size(), systemEvents.size());
     
      if (!options.ignorePolarity)
      {
        polarityStats.add(goldEntitiesAndEvents, systemEntitiesAndEvents,
        AnnotationStatisticsCompact.<IdentifiedAnnotation>annotationToSpan(),
        AnnotationStatisticsCompact.<IdentifiedAnnotation>annotationToFeatureValue("polarity"));
        if(options.printErrors){
          printErrors(jCas, goldEntitiesAndEvents, systemEntitiesAndEvents, "polarity", CONST.NE_POLARITY_NEGATION_PRESENT, Integer.class);
        }
        if(options.printInstances!=null){
          printInstances(jCas, goldEntitiesAndEvents, systemEntitiesAndEvents, "polarity", CONST.NE_POLARITY_NEGATION_PRESENT, Integer.class, options.printInstances);
        }
      }

      if (!options.ignoreConditional)
      {
        conditionalStats.add(goldEntitiesAndEvents, systemEntitiesAndEvents,
        AnnotationStatisticsCompact.<IdentifiedAnnotation>annotationToSpan(),
        AnnotationStatisticsCompact.<IdentifiedAnnotation>annotationToFeatureValue("conditional"));
        if(options.printErrors){
          printErrors(jCas, goldEntitiesAndEvents, systemEntitiesAndEvents, "conditional", CONST.NE_CONDITIONAL_TRUE, Boolean.class);
        }
      }

      if (!options.ignoreUncertainty)
      {
        uncertaintyStats.add(goldEntitiesAndEvents, systemEntitiesAndEvents,
        AnnotationStatisticsCompact.<IdentifiedAnnotation>annotationToSpan(),
        AnnotationStatisticsCompact.<IdentifiedAnnotation>annotationToFeatureValue("uncertainty"));
        if(options.printErrors){
          printErrors(jCas, goldEntitiesAndEvents, systemEntitiesAndEvents, "uncertainty", CONST.NE_UNCERTAINTY_PRESENT, Integer.class);
        }
      }

      if (!options.ignoreSubject)
      {
        subjectStats.add(goldEntitiesAndEvents, systemEntitiesAndEvents,
        AnnotationStatisticsCompact.<IdentifiedAnnotation>annotationToSpan(),
        AnnotationStatisticsCompact.<IdentifiedAnnotation>annotationToFeatureValue("subject"));
        if(options.printErrors){
          printErrors(jCas, goldEntitiesAndEvents, systemEntitiesAndEvents, "subject", null, CONST.ATTR_SUBJECT_PATIENT.getClass());
        }
      }

      if (!options.ignoreGeneric)
      {
        genericStats.add(goldEntitiesAndEvents, systemEntitiesAndEvents,
        AnnotationStatisticsCompact.<IdentifiedAnnotation>annotationToSpan(),
        AnnotationStatisticsCompact.<IdentifiedAnnotation>annotationToFeatureValue("generic"));
        if(options.printErrors){
          printErrors(jCas, goldEntitiesAndEvents, systemEntitiesAndEvents, "generic", CONST.NE_GENERIC_TRUE, Boolean.class);
        }
      }
     
      // srh 3/6/13
      if (!options.ignoreHistory)
      {
        historyStats.add(goldEntitiesAndEvents, systemEntitiesAndEvents,
            AnnotationStatisticsCompact.<IdentifiedAnnotation>annotationToSpan(),
            AnnotationStatisticsCompact.<IdentifiedAnnotation>annotationToFeatureValue("historyOf"));
        if(options.printErrors){
          printErrors(jCas, goldEntitiesAndEvents, systemEntitiesAndEvents, "historyOf", CONST.NE_HISTORY_OF_PRESENT, Integer.class);
        }
      }
     
    }
    return map;
  }

  protected void trainAndPackage(String currentAssertionAttribute, File directory, String[] arguments) throws Exception {
    if (options.featureSelectionThreshold!=null) {
//      InstanceDataWriter.INSTANCES_OUTPUT_FILENAME = "training-data.liblinear";
      // Extracting features and writing instances
      Iterable<Instance<String>> instances = InstanceStream.loadFromDirectory(directory);

      // Collect MinMax stats for feature normalization
      FeatureSelection<String> featureSelection;
      if (currentAssertionAttribute.equals("polarity")) {
        // TODO: parameterize the thresholds
        featureSelection = PolarityCleartkAnalysisEngine.createFeatureSelection(options.featureSelectionThreshold);
        featureSelection.train(instances);
        featureSelection.save(PolarityCleartkAnalysisEngine.createFeatureSelectionURI(directory));
      }
      else if (currentAssertionAttribute.equals("uncertainty")) {
        // TODO: parameterize the thresholds
        featureSelection = UncertaintyCleartkAnalysisEngine.createFeatureSelection(options.featureSelectionThreshold);
        featureSelection.train(instances);
        featureSelection.save(UncertaintyCleartkAnalysisEngine.createFeatureSelectionURI(directory));
      }
      else if (currentAssertionAttribute.equals("conditional")) {
        // TODO: parameterize the thresholds
        featureSelection = ConditionalCleartkAnalysisEngine.createFeatureSelection(options.featureSelectionThreshold);
        featureSelection.train(instances);
        featureSelection.save(ConditionalCleartkAnalysisEngine.createFeatureSelectionURI(directory));
      }
      else if (currentAssertionAttribute.equals("subject")) {
        // TODO: parameterize the thresholds
        featureSelection = SubjectCleartkAnalysisEngine.createFeatureSelection(options.featureSelectionThreshold);
        featureSelection.train(instances);
        featureSelection.save(SubjectCleartkAnalysisEngine.createFeatureSelectionURI(directory));
      }
      else if (currentAssertionAttribute.equals("generic")) {
        // TODO: parameterize the thresholds
        featureSelection = GenericCleartkAnalysisEngine.createFeatureSelection(options.featureSelectionThreshold);
        featureSelection.train(instances);
        featureSelection.save(GenericCleartkAnalysisEngine.createFeatureSelectionURI(directory));
      }
      else if (currentAssertionAttribute.equals("historyOf")) {
        // TODO: parameterize the thresholds
        featureSelection = HistoryCleartkAnalysisEngine.createFeatureSelection(options.featureSelectionThreshold);
        featureSelection.train(instances);
        featureSelection.save(HistoryCleartkAnalysisEngine.createFeatureSelectionURI(directory));
      }
      else {
        featureSelection = null;
      }


        // now write in the libsvm format
//        LIBLINEARStringOutcomeDataWriter dataWriter = new LIBLINEARStringOutcomeDataWriter(directory);
      Constructor c = this.dataWriterClass.getConstructor(File.class);
        DataWriter dataWriter = (DataWriter) c.newInstance(directory);
       
        // try filtering
        for (Instance<String> instance : instances) {
          dataWriter.write(featureSelection.transform(instance));
        }
        dataWriter.finish();
    }

    // train models based on instances
    JarClassifierBuilder.trainAndPackage(directory, arguments);
  }
 
  protected Class<? extends DataWriter> getDataWriterClass()
      throws ResourceInitializationException {
    return (options.featureSelectionThreshold!=null)
        ? InstanceDataWriter.class
        : this.dataWriterClass;
  }
 
  private static boolean DEBUG = false;
  private static void printViewNames(String message, JCas jcas) {
 
    Iterator<JCas> viewIter;
  try {
    viewIter = jcas.getViewIterator();
  } catch (CASException e) {
    e.printStackTrace();
    return;
  }
    while (viewIter.hasNext()) {
      JCas view = viewIter.next();
      String viewName = view.getViewName();
      logger.debug(message + " View name " + viewName);
      int numIndexedAnnotations = view.getAnnotationIndex().size();
      logger.debug(message + "  has " + numIndexedAnnotations + " indexed annotations");
      if (viewName.toLowerCase().contains("gold")) {
            if (DEBUG) printAnnotations(IdentifiedAnnotation.type, view);
      } else {
        if (DEBUG) printAnnotations(EventMention.type, view);
        if (DEBUG) printAnnotations(EntityMention.type, view);
      }
    }

  }

private static void printAnnotations(int uimaAnnotationType, JCas view) {
 
  AnnotationIndex<Annotation> index = view.getAnnotationIndex(uimaAnnotationType);
  Iterator<Annotation> iter = index.iterator();
  output("Printing annotations for view " + view.getViewName());
  while (iter.hasNext()) {
    Annotation a = iter.next();
    printAnnotation(a);
  }
 
  //// Temp debug code
  //if (view.getViewName().equals("GoldView")) {
  //  AnnotationIndex<Annotation> indexOfAll = view.getAnnotationIndex();
  //  Iterator<Annotation> iterOverAll = indexOfAll.iterator();
  //  output("Printing ALL annotations for view " + view.getViewName());
  //  while (iterOverAll.hasNext()) {
  //    Annotation a = iterOverAll.next();
  //    printAnnotation(a);
  //  }
  // 
  //}
 
}

private static void printAnnotation(Annotation a) {
 
  String s = String.format(" (%d, %d) ", a.getBegin(), a.getEnd());
  if (a instanceof IdentifiedAnnotation) {
    s = s + ((IdentifiedAnnotation) a).getTypeID() + "=typeID, ";
  }
  s = s + "|" + a.getCoveredText() + "|";
  s = s + a.getClass().getCanonicalName();
  output(s);
 
}


private static void output(Object o) {
  if (o==null) {
    System.out.println(o);
  } else {
    System.out.println(o.toString());
  }
}
private static void printErrors(JCas jCas,
      Collection<IdentifiedAnnotation> goldEntitiesAndEvents,
      Collection<IdentifiedAnnotation> systemEntitiesAndEvents, String classifierType, Object trueCategory, Class<? extends Object> categoryClass) throws ResourceProcessException {

  String documentId = DocumentIDAnnotationUtil.getDocumentID(jCas);
 
    Map<HashableAnnotation, IdentifiedAnnotation> goldMap = Maps.newHashMap();
    for (IdentifiedAnnotation mention : goldEntitiesAndEvents) {
      goldMap.put(new HashableAnnotation(mention), mention);
    }
    Map<HashableAnnotation, IdentifiedAnnotation> systemMap = Maps.newHashMap();
    for (IdentifiedAnnotation relation : systemEntitiesAndEvents) {
      systemMap.put(new HashableAnnotation(relation), relation);
    }
    Set<HashableAnnotation> all = Sets.union(goldMap.keySet(), systemMap.keySet());
    List<HashableAnnotation> sorted = Lists.newArrayList(all);
    Collections.sort(sorted);
    for (HashableAnnotation key : sorted) {
      IdentifiedAnnotation goldAnnotation = goldMap.get(key);
      IdentifiedAnnotation systemAnnotation = systemMap.get(key);
      Object goldLabel=null;
      Object systemLabel=null;
      if (goldAnnotation == null) {
        logger.debug(key + " not found in gold annotations ");
      } else {
        Feature feature = goldAnnotation.getType().getFeatureByBaseName(classifierType);
        goldLabel = getFeatureValue(feature, categoryClass, goldAnnotation);
        //  Integer goldLabel = goldAnnotation.getIntValue(feature);
      }
     
      if (systemAnnotation == null) {
        logger.info(key + " not found in system annotations ");
      } else {
        Feature feature = systemAnnotation.getType().getFeatureByBaseName(classifierType);
        systemLabel = getFeatureValue(feature, categoryClass, systemAnnotation);
        //  Integer systemLabel = systemAnnotation.getIntValue(feature);
      }
     
      String typeId;
      if (systemAnnotation!=null) {
        typeId = systemAnnotation.getTypeID()+"";
      } else  {
        typeId = "X";
      }
     
      if (goldLabel==null) {
        // skip counting the attribute value since we have no gold label to compare to
        logger.debug("Skipping annotation with label " + systemLabel + " because gold label is null");
      } else  {
        if(!goldLabel.equals(systemLabel)){
          if(trueCategory == null){
            // used for multi-class case.  Incorrect_system_label(Correct_label):
            System.out.println(classifierType+ " "+ systemLabel + "(" + goldLabel + "): " + formatError(jCas, systemAnnotation));
          }else if(systemLabel.equals(trueCategory)){
            System.out.println(classifierType+" FP: " + typeId  + " " + formatError(jCas, systemAnnotation) + "| gold:|" + formatError(jCas, goldAnnotation) + " " + documentId);
          }else{
            System.out.println(classifierType+" FN: " + typeId + " " + formatError(jCas, goldAnnotation)+ "| system:|" + formatError(jCas, systemAnnotation) + " " + documentId);
          }
        }else{
          if(trueCategory == null){
            // multi-class case -- probably don't want to print anything?
            System.out.println(classifierType+ " "+ systemLabel + "(" + goldLabel + "): " + formatError(jCas, systemAnnotation));
          }else if(systemLabel.equals(trueCategory)){
            System.out.println(classifierType+" TP: " + typeId + " " + formatError(jCas, systemAnnotation) + "| gold:|" + formatError(jCas, goldAnnotation) + " " + documentId);
          }else{
            System.out.println(classifierType+" TN: " + typeId + " " + formatError(jCas, systemAnnotation) + "| gold:|" + formatError(jCas, goldAnnotation) + " " + documentId);
          }
        }
      }
    }
  }
 
private static void printInstances(JCas jCas,
      Collection<IdentifiedAnnotation> goldEntitiesAndEvents,
      Collection<IdentifiedAnnotation> systemEntitiesAndEvents, String classifierType,
      Object trueCategory, Class<? extends Object> categoryClass,
      File outputfile)
          throws ResourceProcessException, IOException {

  String documentId = DocumentIDAnnotationUtil.getDocumentID(jCas);
    BufferedWriter fileOutWriter = new BufferedWriter(new FileWriter(outputfile,true), 32768);

    Map<HashableAnnotation, IdentifiedAnnotation> goldMap = Maps.newHashMap();
    for (IdentifiedAnnotation mention : goldEntitiesAndEvents) {
      goldMap.put(new HashableAnnotation(mention), mention);
    }
    Map<HashableAnnotation, IdentifiedAnnotation> systemMap = Maps.newHashMap();
    for (IdentifiedAnnotation relation : systemEntitiesAndEvents) {
      systemMap.put(new HashableAnnotation(relation), relation);
    }
    Set<HashableAnnotation> all = Sets.union(goldMap.keySet(), systemMap.keySet());
    List<HashableAnnotation> sorted = Lists.newArrayList(all);
    Collections.sort(sorted);
    for (HashableAnnotation key : sorted) {
      IdentifiedAnnotation goldAnnotation = goldMap.get(key);
      IdentifiedAnnotation systemAnnotation = systemMap.get(key);
      Object goldLabel=null;
      Object systemLabel=null;
      if (goldAnnotation == null) {
        logger.debug(key + " not found in gold annotations ");
      } else {
        Feature feature = goldAnnotation.getType().getFeatureByBaseName(classifierType);
        goldLabel = getFeatureValue(feature, categoryClass, goldAnnotation);
        //  Integer goldLabel = goldAnnotation.getIntValue(feature);
      }
     
      if (systemAnnotation == null) {
        logger.info(key + " not found in system annotations ");
      } else {
        Feature feature = systemAnnotation.getType().getFeatureByBaseName(classifierType);
        systemLabel = getFeatureValue(feature, categoryClass, systemAnnotation);
        //  Integer systemLabel = systemAnnotation.getIntValue(feature);
      }
     
      String typeId = "X";
      String typeName = "IdentifiedAnnotation";
      int polarity, uncertainty, historyOf;
      boolean conditional, generic;
      String subject = "";
      String cui, coveredText = "";
      String instanceData = "";
      if (systemAnnotation!=null && systemAnnotation.getEnd()>=0) {
        typeId      = systemAnnotation.getTypeID()+"";
        typeName    = systemAnnotation.getClass().getSimpleName();
        polarity    = systemAnnotation.getPolarity();
        uncertainty = systemAnnotation.getUncertainty();
        conditional = systemAnnotation.getConditional();
        generic     = systemAnnotation.getGeneric();
        subject     = systemAnnotation.getSubject();
        historyOf   = systemAnnotation.getHistoryOf();
        coveredText = systemAnnotation.getCoveredText().replaceAll("\\n", " ").replaceAll(",",";");
        instanceData = documentId+","+polarity+","+uncertainty+","+conditional+","+generic+","+subject+","+historyOf+","+
            typeId+","+typeName+","+coveredText;
      }
     
      if (goldLabel==null) {
        // skip counting the attribute value since we have no gold label to compare to
        logger.debug("Skipping annotation with label " + systemLabel + " because gold label is null");
      } else if (instanceData.equals("")) {
        continue;
      }
      else  {
        if(!goldLabel.equals(systemLabel)){
          if(trueCategory == null){
            // used for multi-class case.  Incorrect_system_label(Correct_label):
            fileOutWriter.write(classifierType+",F,"+systemLabel+","+goldLabel+","+instanceData+"\n");
          }else if(systemLabel.equals(trueCategory)){
            fileOutWriter.write(classifierType+",FP,"+systemLabel+","+goldLabel+","+instanceData+"\n");
          }else{
            fileOutWriter.write(classifierType+",FN,"+systemLabel+","+goldLabel+","+instanceData+"\n");
          }
        }else{
          if(trueCategory == null){
            // multi-class case -- probably don't want to print anything?
            fileOutWriter.write(classifierType+ ",T,"+systemLabel+","+goldLabel+","+instanceData+"\n");
          }else if(systemLabel.equals(trueCategory)){
            fileOutWriter.write(classifierType+",TP,"+systemLabel+","+goldLabel+","+instanceData+"\n");
          }else{
            fileOutWriter.write(classifierType+",TN,"+systemLabel+","+goldLabel+","+instanceData+"\n");
          }
        }
        fileOutWriter.flush();
      }
    }
    fileOutWriter.close();
  }
  private static Object getFeatureValue(Feature feature,
      Class<? extends Object> class1, Annotation annotation) throws ResourceProcessException {
    if(class1 == Integer.class){
      return annotation.getIntValue(feature);
    }else if(class1 == String.class){
      return annotation.getStringValue(feature);
    }else if(class1 == Boolean.class){
      return annotation.getBooleanValue(feature);
    }else{
      throw new ResourceProcessException("Received a class type that I'm not familiar with: ", new Object[]{class1});
    }
  }

  private static String formatError(JCas jcas, IdentifiedAnnotation mention){
    List<Sentence> context = JCasUtil.selectCovering(jcas, Sentence.class, mention.getBegin(), mention.getEnd());
    StringBuffer buff = new StringBuffer();
    if(context.size() > 0){
      Sentence sent = context.get(0);
      buff.append(sent.getCoveredText());
      long offset = mention.getBegin() - sent.getBegin();
      if (offset>=Integer.MAX_VALUE || offset<=Integer.MIN_VALUE) { offset=0; } // for spanless annots
      buff.insert((int)offset, "***");
      offset += (mention.getEnd()-mention.getBegin() + 3);
      buff.insert((int)offset, "***");
    }
    return buff.toString();
  }

public static class HashableAnnotation implements Comparable<HashableAnnotation> {

    protected int begin;

    protected int end;

    public HashableAnnotation(int begin, int end) {
      this.begin = begin;
      this.end = end;
    }

    public HashableAnnotation(Annotation arg1) {
      this(arg1.getBegin(), arg1.getEnd());
    }

    @Override
    public boolean equals(Object otherObject) {
      boolean result = false;
      if (otherObject instanceof HashableAnnotation) {
        HashableAnnotation other = (HashableAnnotation) otherObject;
        result = (this.getClass() == other.getClass() && this.begin == other.begin
            && this.end == other.end);
      }
     
      return result;
    }

    @Override
    public int hashCode() {
      return Objects.hashCode(this.begin, this.end);
    }

    @Override
    public String toString() {
      return String.format(
          "%s(%s,%s)",
          this.getClass().getSimpleName(),
          this.begin,
          this.end);
    }

    @Override
    public int compareTo(HashableAnnotation that) {
      int thisBegin = this.begin;
      int thatBegin = that.begin;
      if (thisBegin < thatBegin) {
        return -1;
      } else if (thisBegin > thatBegin) {
        return +1;
      } else if (this.equals(that)) {
        return 0;
      } else {
        return +1; // arbitrary choice for overlapping
      }
    }
  }

private void addExternalAttributeAnnotatorsToAggregate(AggregateBuilder builder)
    throws UIMAException, IOException {
  // RUN ALL THE OLD (non-ClearTK) CLASSIFIERS
  AnalysisEngineDescription oldAssertionAnnotator = AnalysisEngineFactory.createAnalysisEngineDescription("desc/assertionAnalysisEngine");
  ConfigurationParameterFactory.addConfigurationParameters(
      oldAssertionAnnotator,
      AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
      AssertionEvaluation.GOLD_VIEW_NAME
  );
  builder.add(oldAssertionAnnotator);

  AnalysisEngineDescription oldConversionAnnotator = AnalysisEngineFactory.createAnalysisEngineDescription("desc/conceptConverterAnalysisEngine");
  ConfigurationParameterFactory.addConfigurationParameters(
      oldConversionAnnotator,
      AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
      AssertionEvaluation.GOLD_VIEW_NAME
  );
  builder.add(oldConversionAnnotator);

  AnalysisEngineDescription oldSubjectAnnotator = AnalysisEngineFactory.createAnalysisEngineDescription("desc/SubjectAttributeAnalysisEngine");
  ConfigurationParameterFactory.addConfigurationParameters(
      oldSubjectAnnotator,
      AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
      AssertionEvaluation.GOLD_VIEW_NAME
  );
  builder.add(oldSubjectAnnotator);

  AnalysisEngineDescription oldGenericAnnotator = AnalysisEngineFactory.createAnalysisEngineDescription("desc/GenericAttributeAnalysisEngine");
  ConfigurationParameterFactory.addConfigurationParameters(
      oldGenericAnnotator,
      AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
      AssertionEvaluation.GOLD_VIEW_NAME
  );
  builder.add(oldGenericAnnotator);
}

private void addCleartkAttributeAnnotatorsToAggregate(File directory,
    AggregateBuilder builder) throws UIMAException, IOException,
    ResourceInitializationException {
//  AnalysisEngineDescription cuePhraseLookupAnnotator =
//    AnalysisEngineFactory.createAnalysisEngineDescription("org/apache/ctakes/dictionary/lookup/AssertionCuePhraseDictionaryLookupAnnotator");
//  builder.add(cuePhraseLookupAnnotator);
    builder.add(AnalysisEngineFactory.createPrimitiveDescription(AlternateCuePhraseAnnotator.class, new Object[]{}));

//  String generalSectionRegexFileUri =
//    "org/mitre/medfacts/zoner/section_regex.xml";
//  AnalysisEngineDescription zonerAnnotator =
//    AnalysisEngineFactory.createPrimitiveDescription(ZoneAnnotator.class,
//        ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
//        generalSectionRegexFileUri
//    );
////  builder.add(zonerAnnotator);
//
//  String mayoSectionRegexFileUri =
//    "org/mitre/medfacts/uima/mayo_sections.xml";
//  AnalysisEngineDescription mayoZonerAnnotator =
//    AnalysisEngineFactory.createPrimitiveDescription(ZoneAnnotator.class,
//        ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
//        mayoSectionRegexFileUri
//    );
//  builder.add(mayoZonerAnnotator);

  // Add the ClearTk or the ytex negation (polarity) classifier
  if (!options.ignorePolarity)
  {
    AnalysisEngineDescription polarityAnnotator;
      if (options.useYtexNegation) {
        polarityAnnotator = AnalysisEngineFactory.createAnalysisEngineDescription(YTEX_NEGATION_DESCRIPTOR);
        builder.add(polarityAnnotator);
      } else {
        if (options.feda) {
          polarityAnnotator = AnalysisEngineFactory.createPrimitiveDescription(PolarityFedaCleartkAnalysisEngine.class);

            ConfigurationParameterFactory.addConfigurationParameters(
                polarityAnnotator,
                AssertionCleartkAnalysisEngine.FILE_TO_DOMAIN_MAP,
                options.testDirectory
                );
        } else {
          // default: cleartk-based polarity, no domain adaptation
          polarityAnnotator = AnalysisEngineFactory.createPrimitiveDescription(PolarityCleartkAnalysisEngine.class); //,  this.additionalParamemters);
          }
        ConfigurationParameterFactory.addConfigurationParameters(
            polarityAnnotator,
            AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
            AssertionEvaluation.GOLD_VIEW_NAME,
            GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
            new File(new File(directory, "polarity"), "model.jar").getPath(),
            PolarityCleartkAnalysisEngine.PARAM_FEATURE_CONFIG,
            options.featConfig
            );
        builder.add(polarityAnnotator);
      }
  }

  // Add the rest of the ClearTk classifiers
  if (!options.ignoreConditional)
  {
    AnalysisEngineDescription conditionalAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ConditionalCleartkAnalysisEngine.class); //,  this.additionalParamemters);
    ConfigurationParameterFactory.addConfigurationParameters(
        conditionalAnnotator,
        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
        AssertionEvaluation.GOLD_VIEW_NAME,
        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
        new File(new File(directory, "conditional"), "model.jar").getPath()
    );
    builder.add(conditionalAnnotator);
  }

  if (!options.ignoreUncertainty)
  {
    AnalysisEngineDescription uncertaintyAnnotator = AnalysisEngineFactory.createPrimitiveDescription(UncertaintyCleartkAnalysisEngine.class); //,  this.additionalParamemters);
    ConfigurationParameterFactory.addConfigurationParameters(
        uncertaintyAnnotator,
        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
        AssertionEvaluation.GOLD_VIEW_NAME,
        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
        new File(new File(directory, "uncertainty"), "model.jar").getPath()
    );
    builder.add(uncertaintyAnnotator);
  }

  if (!options.ignoreSubject)
  {
    AnalysisEngineDescription subjectAnnotator = AnalysisEngineFactory.createPrimitiveDescription(SubjectCleartkAnalysisEngine.class); //,  this.additionalParamemters);
    ConfigurationParameterFactory.addConfigurationParameters(
        subjectAnnotator,
        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
        AssertionEvaluation.GOLD_VIEW_NAME,
        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
        new File(new File(directory, "subject"), "model.jar").getPath()
    );
    builder.add(subjectAnnotator);
  }

  if (!options.ignoreGeneric)
  {
    AnalysisEngineDescription genericAnnotator = AnalysisEngineFactory.createPrimitiveDescription(GenericCleartkAnalysisEngine.class); //,  this.additionalParamemters);
    ConfigurationParameterFactory.addConfigurationParameters(
        genericAnnotator,
        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
        AssertionEvaluation.GOLD_VIEW_NAME,
        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
        new File(new File(directory, "generic"), "model.jar").getPath()
    );
    builder.add(genericAnnotator);
  }
 
  if(!options.ignoreHistory){
    AnalysisEngineDescription historyAnnotator = AnalysisEngineFactory.createPrimitiveDescription(HistoryCleartkAnalysisEngine.class);
    ConfigurationParameterFactory.addConfigurationParameters(
        historyAnnotator,
        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
        AssertionEvaluation.GOLD_VIEW_NAME,
        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
        new File(new File(directory, "historyOf"), "model.jar").getPath()
        );
    builder.add(historyAnnotator);
  }
}

  public static final String GOLD_VIEW_NAME = "GoldView";

  /**
   * Class that copies the manual {@link Modifier} annotations to the default CAS.
   */
  public static class OnlyGoldAssertions extends JCasAnnotator_ImplBase {

    @Override
    public void process(JCas jCas) throws AnalysisEngineProcessException {
      JCas goldView;
      try {
        goldView = jCas.getView(GOLD_VIEW_NAME);
      } catch (CASException e) {
        throw new AnalysisEngineProcessException(e);
      }

      // remove any automatically generated Modifiers
      for (EntityMention entityMention : JCasUtil.select(jCas, EntityMention.class)) {
        entityMention.removeFromIndexes();
      }

      // copy over the manually annotated Modifiers
      for (EntityMention entityMention : JCasUtil.select(goldView, EntityMention.class)) {
        EntityMention newEntityMention = new EntityMention(jCas, entityMention.getBegin(), entityMention.getEnd());
        newEntityMention.setTypeID(entityMention.getTypeID());
        newEntityMention.setId(entityMention.getId());
        newEntityMention.setDiscoveryTechnique(entityMention.getDiscoveryTechnique());
        newEntityMention.setConfidence(entityMention.getConfidence());
        newEntityMention.addToIndexes();
      }
    }
  }
 

  /**
   * Annotator that removes cTAKES EntityMentions and Modifiers from the system
   * view, and copies over the manually annotated EntityMentions and Modifiers
   * from the gold view.
   *
   */
  public static class ReplaceCTakesEntityMentionsAndModifiersWithGold extends
      JCasAnnotator_ImplBase
  {

    @Override
    public void process(JCas jCas) throws AnalysisEngineProcessException
    {
      JCas goldView, systemView;
      try
      {
        goldView = jCas.getView(GOLD_VIEW_NAME);
        systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
      } catch (CASException e)
      {
        throw new AnalysisEngineProcessException(e);
      }

      // remove cTAKES EntityMentions and Modifiers from system view
      List<IdentifiedAnnotation> cTakesMentions = new ArrayList<IdentifiedAnnotation>();
      cTakesMentions.addAll(JCasUtil.select(systemView, EntityMention.class));
      cTakesMentions.addAll(JCasUtil.select(systemView, Modifier.class));
      for (IdentifiedAnnotation cTakesMention : cTakesMentions)
      {
        cTakesMention.removeFromIndexes();
      }

      // copy gold EntityMentions and Modifiers to the system view
      List<IdentifiedAnnotation> goldMentions = new ArrayList<IdentifiedAnnotation>();
      goldMentions.addAll(JCasUtil.select(goldView, EntityMention.class));
      goldMentions.addAll(JCasUtil.select(goldView, Modifier.class));
      CasCopier copier = new CasCopier(goldView.getCas(), systemView.getCas());
      for (IdentifiedAnnotation goldMention : goldMentions)
      {
        Annotation copy = (Annotation) copier.copyFs(goldMention);
        Feature sofaFeature = copy.getType().getFeatureByBaseName("sofa");
        copy.setFeatureValue(sofaFeature, systemView.getSofa());
        copy.addToIndexes();
      }
    }
  }
 
//  public static class ReplaceGoldEntityMentionsAndModifiersWithCTakes extends
//      JCasAnnotator_ImplBase
//  {
//
//    @Override
//    public void process(JCas jCas) throws AnalysisEngineProcessException
//    {
//      JCas goldView, systemView;
//      try
//      {
//        goldView = jCas.getView(GOLD_VIEW_NAME);
//        systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
//      } catch (CASException e)
//      {
//        throw new AnalysisEngineProcessException(e);
//      }
//
//      // remove manual EntityMentions and Modifiers from gold view
//      List<IdentifiedAnnotation> goldMentions = new ArrayList<IdentifiedAnnotation>();
//      goldMentions.addAll(JCasUtil.select(goldView, EntityMention.class));
//      goldMentions.addAll(JCasUtil.select(goldView, Modifier.class));
//      for (IdentifiedAnnotation goldMention : goldMentions)
//      {
//        goldMention.removeFromIndexes();
//      }
//
//      // copy cTAKES EntityMentions and Modifiers to gold view
//      List<IdentifiedAnnotation> cTakesMentions = new ArrayList<IdentifiedAnnotation>();
//      cTakesMentions.addAll(JCasUtil.select(systemView, EntityMention.class));
//      cTakesMentions.addAll(JCasUtil.select(systemView, Modifier.class));
//      CasCopier copier = new CasCopier(systemView.getCas(), goldView.getCas());
//      for (IdentifiedAnnotation cTakesMention : cTakesMentions)
//      {
//        Annotation copy = (Annotation) copier.copyFs(cTakesMention);
//        Feature sofaFeature = copy.getType().getFeatureByBaseName("sofa");
//        copy.setFeatureValue(sofaFeature, goldView.getSofa());
//        copy.addToIndexes();
//      }
//
//      // replace gold EntityMentions and Modifiers in relations with cTAKES ones
//      List<BinaryTextRelation> relations = new ArrayList<BinaryTextRelation>();
//      relations.addAll(JCasUtil.select(goldView, BinaryTextRelation.class));
//      for (BinaryTextRelation relation : relations)
//      {
//
//        // attempt to replace the gold RelationArguments with system ones
//        int replacedArgumentCount = 0;
//        for (RelationArgument relArg : Arrays.asList(relation.getArg1(),
//            relation.getArg2()))
//        {
//          Annotation goldArg = relArg.getArgument();
//          Class<? extends Annotation> argClass = goldArg.getClass();
//
//          // find all annotations covered by the gold argument and of the same
//          // class (these should
//          // be the ones copied over from the cTAKES output earlier)
//          List<? extends Annotation> systemArgs = JCasUtil.selectCovered(
//              goldView, argClass, goldArg);
//
//          // no ctakes annotation found
//          if (systemArgs.size() == 0)
//          {
//            String word = "no";
//            String className = argClass.getSimpleName();
//            String argText = goldArg.getCoveredText();
//            String message = String.format("%s %s for \"%s\"", word, className,
//                argText);
//            this.getContext().getLogger().log(Level.FINE, message);
//            continue;
//          }
//
//          // if there's exactly one annotation, replace the gold one with that
//          if (systemArgs.size() == 1)
//          {
//            relArg.setArgument(systemArgs.get(0));
//            replacedArgumentCount += 1;
//          }
//
//          else
//          {
//            // multiple ctakes arguments found; look for one that matches
//            // exactly
//            // e.g. gold: "right breast", ctakes: "right breast", "breast"
//            for (Annotation systemArg : systemArgs)
//            {
//              String goldArgText = goldArg.getCoveredText();
//              String systemArgText = systemArg.getCoveredText();
//              if (systemArgText.equals(goldArgText))
//              {
//                relArg.setArgument(systemArg);
//                replacedArgumentCount += 1;
//              }
//            }
//
//            if (replacedArgumentCount < 1)
//            {
//              // issue a warning message
//              String word = "multiple";
//              String className = argClass.getSimpleName();
//              String argText = goldArg.getCoveredText();
//              String message = String.format("%s %s for \"%s\"", word,
//                  className, argText);
//              this.getContext().getLogger().log(Level.FINE, message);
//
//              System.out.println("gold argument: " + goldArg.getCoveredText());
//              System.out.println("gold type: "
//                  + ((IdentifiedAnnotation) goldArg).getTypeID());
//              for (Annotation systemArg : systemArgs)
//              {
//                System.out.println("ctakes argument: "
//                    + systemArg.getCoveredText());
//                System.out.println("ctakes type: "
//                    + ((IdentifiedAnnotation) systemArg).getTypeID());
//              }
//              System.out.println();
//            }
//          }
//        }
//
//        // if replacements were not found for both arguments, remove the
//        // relation
//        if (replacedArgumentCount < 2)
//        {
//          relation.removeFromIndexes();
//        }
//      }
//    }
//  }
 
  /**
   * Class that copies the manual {@link Modifier} annotations to the default CAS.
   */
  public static class ReferenceIdentifiedAnnotationsSystemToGoldCopier extends JCasAnnotator_ImplBase {

    @Override
    public void process(JCas jCas) throws AnalysisEngineProcessException {
      JCas goldView;
      try {
        goldView = jCas.createView(GOLD_VIEW_NAME);
        goldView.setSofaDataString(jCas.getSofaDataString(), jCas.getSofaMimeType());
        //goldView.setDocumentText(jCas.getDocumentText());
        //goldView = jCas.getView(GOLD_VIEW_NAME);
      } catch (CASException e) {
        throw new AnalysisEngineProcessException(e);
      }

//      // remove any automatically generated Modifiers
//      for (Modifier modifier : JCasUtil.select(jCas, Modifier.class)) {
//        modifier.removeFromIndexes();
//      }
     
      for (EntityMention oldSystemEntityMention : JCasUtil.select(jCas, EntityMention.class))
      {
        EntityMention newGoldEntityMention = new EntityMention(goldView, oldSystemEntityMention.getBegin(), oldSystemEntityMention.getEnd());
       
        // copying assertion fields
        newGoldEntityMention.setDiscoveryTechnique(oldSystemEntityMention.getDiscoveryTechnique());
        newGoldEntityMention.setUncertainty(oldSystemEntityMention.getUncertainty());
        newGoldEntityMention.setConditional(oldSystemEntityMention.getConditional());
        newGoldEntityMention.setGeneric(oldSystemEntityMention.getGeneric());
        newGoldEntityMention.setPolarity(oldSystemEntityMention.getPolarity());
        newGoldEntityMention.setSubject(oldSystemEntityMention.getSubject());
        newGoldEntityMention.setHistoryOf(oldSystemEntityMention.getHistoryOf());

        // copying non-assertion fields
        newGoldEntityMention.setConfidence(oldSystemEntityMention.getConfidence());
        newGoldEntityMention.setTypeID(oldSystemEntityMention.getTypeID());
       
        newGoldEntityMention.addToIndexes();
      }

      for (EventMention oldSystemEventMention : JCasUtil.select(jCas, EventMention.class))
      {
        EventMention newGoldEventMention = new EventMention(goldView, oldSystemEventMention.getBegin(), oldSystemEventMention.getEnd());
       
        // copying assertion fields
        newGoldEventMention.setDiscoveryTechnique(oldSystemEventMention.getDiscoveryTechnique());
        newGoldEventMention.setUncertainty(oldSystemEventMention.getUncertainty());
        newGoldEventMention.setConditional(oldSystemEventMention.getConditional());
        newGoldEventMention.setGeneric(oldSystemEventMention.getGeneric());
        newGoldEventMention.setPolarity(oldSystemEventMention.getPolarity());
        newGoldEventMention.setSubject(oldSystemEventMention.getSubject());
        newGoldEventMention.setHistoryOf(oldSystemEventMention.getHistoryOf());

        // copying non-assertion fields
        newGoldEventMention.setConfidence(oldSystemEventMention.getConfidence());
        newGoldEventMention.setTypeID(oldSystemEventMention.getTypeID());

        newGoldEventMention.addToIndexes();
      }
     
      // TODO do we need to copy supporting feature structures (particularly ontology concept array)??

    } // end of method ReferenceIdentifiedAnnotationsSystemToGoldCopier.process()
  } // end of class ReferenceIdentifiedAnnotationsSystemToGoldCopier

  /**
   * Class that copies the manual {@link Modifier} annotations to the default CAS.
   */
  public static class ReferenceSupportingAnnotationsSystemToGoldCopier extends JCasAnnotator_ImplBase {

    @Override
    public void process(JCas jCas) throws AnalysisEngineProcessException {
      JCas goldView;
      try {
        goldView = jCas.getView(GOLD_VIEW_NAME);
      } catch (CASException e) {
        throw new AnalysisEngineProcessException(e);
      }

//      // remove any automatically generated Modifiers
//      for (Modifier modifier : JCasUtil.select(jCas, Modifier.class)) {
//        modifier.removeFromIndexes();
//      }
     
      for (Sentence oldSystemSentence : JCasUtil.select(jCas, Sentence.class))
      {
        Sentence newGoldSentence = new Sentence(goldView, oldSystemSentence.getBegin(), oldSystemSentence.getEnd());
       
        newGoldSentence.addToIndexes();
      }

      for (BaseToken oldSystemToken : JCasUtil.select(jCas, BaseToken.class))
      {
        BaseToken newGoldToken = null; //new BaseToken(goldView, oldSystemEventMention.getBegin(), oldSystemEventMention.getEnd());

        // TODO the following commented out block is an alternative to having the hard coded if..then..else-if..else block for constructing new BaseToken objects
//        Constructor<? extends BaseToken> constructor = null;
//        try
//        {
//          constructor = oldSystemToken.getClass().getConstructor(JCas.class, int.class, int.class);
//        } catch(NoSuchMethodException| SecurityException e)
//        {
//          logger.error("problem getting constructor for copying BaseToken instance (inside AssertionEvalBasedOnModifier.ReferenceSupportingAnnotationsSystemToGoldcopier.process())");
//          continue;
//        }
//        try
//        {
//          newGoldToken = constructor.newInstance(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd());
//        } catch (InstantiationException | IllegalAccessException
//            | IllegalArgumentException | InvocationTargetException e)
//        {
//          logger.error("problem invoking constructor to copy BaseToken instance (inside AssertionEvalBasedOnModifier.ReferenceSupportingAnnotationsSystemToGoldcopier.process())");
//          continue;
//        }
       
        String oldSystemTokenClass = oldSystemToken.getClass().getName();
        if (oldSystemTokenClass.equals(WordToken.class.getName()))
        {
          newGoldToken = new WordToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd());
        } else if (oldSystemTokenClass.equals(ContractionToken.class.getName()))
        {
          newGoldToken = new ContractionToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd());
        } else if (oldSystemTokenClass.equals(NewlineToken.class.getName()))
        {
          newGoldToken = new NewlineToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd());
        } else if (oldSystemTokenClass.equals(NumToken.class.getName()))
        {
          newGoldToken = new NumToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd());
        } else if (oldSystemTokenClass.equals(PunctuationToken.class.getName()))
        {
          newGoldToken = new PunctuationToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd());
        } else if (oldSystemTokenClass.equals(SymbolToken.class.getName()))
        {
          newGoldToken = new SymbolToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd());
        } else if (oldSystemTokenClass.equals(BaseToken.class.getName()))
        {
          newGoldToken = new BaseToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd());
        } else
        {
          newGoldToken = new BaseToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd());
        }
       
        newGoldToken.setPartOfSpeech(oldSystemToken.getPartOfSpeech());
        newGoldToken.setTokenNumber(oldSystemToken.getTokenNumber());
       
        newGoldToken.addToIndexes();
      }

    } // end of method ReferenceSupportingAnnotationsSystemToGoldCopier.process()
  } // end of class ReferenceSupportingAnnotationsSystemToGoldCopier

  /**
   * Class that copies the manual {@link Modifier} annotations to the default CAS.
   */
  public static class ReferenceAnnotationsSystemAssertionClearer extends JCasAnnotator_ImplBase
  {

    @Override
    public void process(JCas jCas) throws AnalysisEngineProcessException
    {
      for (EntityMention entityMention : JCasUtil.select(jCas,  EntityMention.class))
      {
        entityMention.setPolarity(1);
      }
      for (EventMention eventMention : JCasUtil.select(jCas,  EventMention.class))
      {
        eventMention.setPolarity(1);
      }
    } // end method ReferenceAnnotationsSystemAssertionClearer.process()
  } // end class ReferenceAnnotationsSystemAssertionClearer




 
} // end of class AssertionEvalBasedOnModifier
TOP

Related Classes of org.apache.ctakes.assertion.eval.AssertionEvaluation

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.