Package org.uimafit.factory

Examples of org.uimafit.factory.AggregateBuilder


          // use the uimafit method of finding available type system
          // descriptor via META-INF/org.uimafit/types.txt
          // (found in ctakes-type-system/src/main/resources)
        TypeSystemDescriptionFactory.createTypeSystemDescription();
     
      AggregateBuilder aggregate = new AggregateBuilder();
     
      CollectionReaderDescription collectionReader = CollectionReaderFactory.createDescription(
          FilesInDirectoryCollectionReader.class,
          typeSystemDescription,
          "InputDirectory",
          textDirectory.toString()
          );
     
      // read the UMLS_CEM data from Knowtator
      AnalysisEngineDescription goldAnnotator = AnalysisEngineFactory.createPrimitiveDescription(
          SHARPKnowtatorXMLReader.class,
          typeSystemDescription,
          "TextDirectory", // 3/13/13 halgrim changed from "TextURI" trying to work with new SHARPKnowtatorXMLReader.java
          //"/work/medfacts/sharp/data/2012-10-16_full_data_set_updated/Seed_Corpus/sandbox/batch02_mayo/knowtator/"
          textDirectory.toString() + "/"
      );
      aggregate.add(goldAnnotator);

      // write just the XMI version of what's in Knowtator UMLS_CEM
      AnalysisEngineDescription xWriter = AnalysisEngineFactory.createPrimitiveDescription(
          XWriter.class,
          typeSystemDescription,
          XWriter.PARAM_OUTPUT_DIRECTORY_NAME,
          xmiDirectory.toString(),
          XWriter.PARAM_FILE_NAMER_CLASS_NAME,
          CtakesFileNamer.class.getName()
      );
      aggregate.add(xWriter);

      // fill in other values that are necessary for preprocessing
      AnalysisEngineDescription preprocessAnnotator = AnalysisEngineFactory.createAnalysisEngineDescription(
          "desc/analysis_engine/AttributeDiscoveryPreprocessor"
          );
      aggregate.add(preprocessAnnotator);
     
      if (trainDirectory!=null && testDirectory!=null && devDirectory!=null) {
        File subcorpusDirectory;
        switch (SharpCorpusSplit.splitSeed(currentBatchDirectory)) {
        case TRAIN:
          subcorpusDirectory = trainDirectory;
          break;
        case TEST:
          subcorpusDirectory = testDirectory;
          break;
        case DEV:
          subcorpusDirectory = devDirectory;
          break;
        case CROSSVAL:
          subcorpusDirectory = trainDirectory;
          break;
        default:
          subcorpusDirectory = trainDirectory;
          break;
        }
        AnalysisEngineDescription xWriter2 = AnalysisEngineFactory.createPrimitiveDescription(
            XWriter.class,
            typeSystemDescription,
            XWriter.PARAM_OUTPUT_DIRECTORY_NAME,
            subcorpusDirectory,
            XWriter.PARAM_FILE_NAMER_CLASS_NAME,
            CtakesFileNamer.class.getName()
        );
        aggregate.add(xWriter2);
//        SimplePipeline.runPipeline(collectionReader, goldAnnotator, xWriter, xWriter2);
      }

      SimplePipeline.runPipeline(collectionReader, aggregate.createAggregateDescription());
    }

    logger.info("Finished!");
  }
View Full Code Here


            // descriptor via META-INF/org.uimafit/types.txt
            // (found in ctakes-type-system/src/main/resources)
          TypeSystemDescriptionFactory.createTypeSystemDescription();
       
        File textDirectory = new File(batchDir, "text");
        AggregateBuilder aggregate = new AggregateBuilder();
       
        CollectionReaderDescription collectionReader = CollectionReaderFactory.createDescription(
            FilesInDirectoryCollectionReader.class,
            typeSystemDescription,
            "InputDirectory",
            textDirectory.toString()
            );
       
        // read the UMLS_CEM data from Knowtator
        AnalysisEngineDescription goldAnnotator = AnalysisEngineFactory.createPrimitiveDescription(
            SHARPKnowtatorXMLReader.class,
            typeSystemDescription,
            "TextDirectory", // 3/13/13 halgrim changed from "TextURI" trying to work with new SHARPKnowtatorXMLReader.java
            //"/work/medfacts/sharp/data/2012-10-16_full_data_set_updated/Seed_Corpus/sandbox/batch02_mayo/knowtator/"
            textDirectory.toString() + "/"
        );
        aggregate.add(goldAnnotator);

        // fill in other values that are necessary for preprocessing
        AnalysisEngineDescription preprocessAnnotator = AnalysisEngineFactory.createAnalysisEngineDescription(
            "desc/analysis_engine/AttributeDiscoveryPreprocessor"
            );
        aggregate.add(preprocessAnnotator);

        File subcorpusDir = null;
//        Subcorpus subcorpus = SharpCorpusSplit.splitStratified(Integer.parseInt(batchDir.getName()));
        Subcorpus subcorpus = splitFunction.apply(batchDir);
        switch(subcorpus){
        case TRAIN:
          subcorpusDir = trainDirectory;
          break;
        case DEV:
          subcorpusDir = devDirectory;
          break;
        case TEST:
          subcorpusDir = testDirectory;
          break;
        default:
          subcorpusDir = trainDirectory;
        }
       
         AnalysisEngineDescription xWriter = AnalysisEngineFactory.createPrimitiveDescription(
              XWriter.class,
              typeSystemDescription,
              XWriter.PARAM_OUTPUT_DIRECTORY_NAME,
              subcorpusDir,
              XWriter.PARAM_FILE_NAMER_CLASS_NAME,
              CtakesFileNamer.class.getName()
          );
         aggregate.add(xWriter);
         SimplePipeline.runPipeline(collectionReader, aggregate.createAggregateDescription());

      }
    }
  }
View Full Code Here

      // use the uimafit method of finding available type system
      // descriptor via META-INF/org.uimafit/types.txt
      // (found in ctakes-type-system/src/main/resources)
      TypeSystemDescriptionFactory.createTypeSystemDescription();

    AggregateBuilder aggregate = new AggregateBuilder();

    CollectionReaderDescription collectionReader = CollectionReaderFactory.createDescription(
        I2B2Challenge2010CollectionReader.class,
        typeSystemDescription,
        "inputDir",
        parentDirectory
    );

    // fill in other values that are necessary for preprocessing
    AnalysisEngineDescription preprocessAnnotator = AnalysisEngineFactory.createAnalysisEngineDescription(
        "desc/analysis_engine/AttributeDiscoveryPreprocessor"
    );
    aggregate.add(preprocessAnnotator);

    if (preprocessedDirectory!=null) {
      AnalysisEngineDescription xWriter2 = AnalysisEngineFactory.createPrimitiveDescription(
          XWriter.class,
          typeSystemDescription,
          XWriter.PARAM_OUTPUT_DIRECTORY_NAME,
          preprocessedDirectory,
          XWriter.PARAM_FILE_NAMER_CLASS_NAME,
          CtakesFileNamer.class.getName()
      );
      aggregate.add(xWriter2);
      //    SimplePipeline.runPipeline(collectionReader, goldAnnotator, xWriter, xWriter2);
    }

    SimplePipeline.runPipeline(collectionReader, aggregate.createAggregateDescription());
    logger.info("Finished!");
  }
View Full Code Here

  throws ResourceInitializationException, UIMAException, IOException {

    TypeSystemDescription typeSystemDescription =
      TypeSystemDescriptionFactory.createTypeSystemDescription();

    AggregateBuilder aggregate = new AggregateBuilder();

    // input dir is hard-coded in AssertionConst
    CollectionReaderDescription collectionReader = CollectionReaderFactory.createDescription(
        NegExCorpusReader.class,
        typeSystemDescription
    );

    // fill in other values that are necessary for preprocessing
    AnalysisEngineDescription preprocessAnnotator = AnalysisEngineFactory.createAnalysisEngineDescription(
        "desc/analysis_engine/AttributeDiscoveryPreprocessor"
    );
    aggregate.add(preprocessAnnotator);

    if (preprocessedDirectory!=null) {
      AnalysisEngineDescription xWriter2 = AnalysisEngineFactory.createPrimitiveDescription(
          XWriter.class,
          typeSystemDescription,
          XWriter.PARAM_OUTPUT_DIRECTORY_NAME,
          preprocessedDirectory,
          XWriter.PARAM_FILE_NAMER_CLASS_NAME,
          CtakesFileNamer.class.getName()
      );
      aggregate.add(xWriter2);
      //    SimplePipeline.runPipeline(collectionReader, goldAnnotator, xWriter, xWriter2);
    }

    SimplePipeline.runPipeline(collectionReader, aggregate.createAggregateDescription());
    logger.info("Finished!");
  }
View Full Code Here

    HashMap<File,File> splitMipacq = new HashMap<File,File>();
    splitMipacq.put(new File(inputDirectory+"/text/train"), preprocessedDirectory);
    splitMipacq.put(new File(inputDirectory+"/text/test"),  testDirectory);
    splitMipacq.put(new File(inputDirectory+"/text/dev"),   devDirectory);
    for (File inDir : splitMipacq.keySet() ) {
      AggregateBuilder aggregate = new AggregateBuilder();

      CollectionReaderDescription collectionReader = CollectionReaderFactory.createDescription(
          FilesInDirectoryCollectionReader.class,
          typeSystemDescription,
          "InputDirectory",
          inDir
          );

      // read the UMLS_CEM data from Knowtator
      AnalysisEngineDescription goldAnnotator = AnalysisEngineFactory.createPrimitiveDescription(
          MiPACQKnowtatorXMLReader.class,
          typeSystemDescription,
          MiPACQKnowtatorXMLReader.PARAM_TEXT_DIRECTORY,
          inDir
          );

      aggregate.add(goldAnnotator);
      // fill in other values that are necessary for preprocessing
      AnalysisEngineDescription preprocessAnnotator = AnalysisEngineFactory.createAnalysisEngineDescription(
          "desc/analysis_engine/AttributeDiscoveryPreprocessor"
          );
      aggregate.add(preprocessAnnotator);

      if (preprocessedDirectory!=null) {
        AnalysisEngineDescription xWriter2 = AnalysisEngineFactory.createPrimitiveDescription(
            XWriter.class,
            typeSystemDescription,
            XWriter.PARAM_OUTPUT_DIRECTORY_NAME,
            splitMipacq.get(inDir),
            XWriter.PARAM_FILE_NAMER_CLASS_NAME,
            CtakesFileNamer.class.getName()
            );
        aggregate.add(xWriter2);
        //    SimplePipeline.runPipeline(collectionReader, goldAnnotator, xWriter, xWriter2);
      }

      SimplePipeline.runPipeline(collectionReader, aggregate.createAggregateDescription());
    }
     
    logger.info("Finished!");
  }
View Full Code Here

    sents = JCasUtil.select(jcas, Sentence.class);
    assertEquals(sents.size(), 8);
  }
 
  private static AnalysisEngineDescription getUimaFitPipeline() throws ResourceInitializationException {
    AggregateBuilder builder = new AggregateBuilder();
    builder.add(SimpleSegmentAnnotator.createAnnotatorDescription());
    builder.add(SentenceDetector.createAnnotatorDescription());
    return builder.createAggregateDescription();
  }
View Full Code Here

    builder.add(SentenceDetector.createAnnotatorDescription());
    return builder.createAggregateDescription();
  }

  private static AnalysisEngine getSegmentingPipeline() throws ResourceInitializationException{
    AggregateBuilder aggregateBuilder = new AggregateBuilder();

    // identify segments; use simple segment annotator on non-mayo notes
    // aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SegmentsFromBracketedSectionTagsAnnotator.class));
    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(CDASegmentAnnotator.class));

    // identify sentences
    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
        SentenceDetector.class,
        SentenceDetector.SD_MODEL_FILE_PARAM,
        "org/apache/ctakes/core/sentdetect/sd-med-model.zip",
        SentenceDetector.PARAM_SEGMENTS_TO_SKIP,
        new String[]{"2.16.840.1.113883.10.20.22.2.1.1" /*Medications*/, "2.16.840.1.113883.10.20.22.2.45" /*Instructions*/}));

    return aggregateBuilder.createAggregate();
  }
View Full Code Here

    return aggregateBuilder.createAggregate();
  }
 
  private static AnalysisEngine getBasicPipeline() throws ResourceInitializationException{
    AggregateBuilder aggregateBuilder = new AggregateBuilder();

    // identify segments; use simple segment annotator on non-mayo notes
    // aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SegmentsFromBracketedSectionTagsAnnotator.class));
    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SimpleSegmentAnnotator.class));

    // identify sentences
    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
        SentenceDetector.class,
        SentenceDetector.SD_MODEL_FILE_PARAM,
        "org/apache/ctakes/core/sentdetect/sd-med-model.zip"
        ));

    return aggregateBuilder.createAggregate();
  }
View Full Code Here

 
  public void createTrainDescriptor() throws Exception
  {
    File trainDirectory = new File("/tmp/assertion_data/train");
    File directory = trainDirectory;
    AggregateBuilder builder = new AggregateBuilder();

////
    AnalysisEngineDescription goldCopierIdentifiedAnnotsAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ReferenceIdentifiedAnnotationsSystemToGoldCopier.class);
    builder.add(goldCopierIdentifiedAnnotsAnnotator);
   
    AnalysisEngineDescription goldCopierSupportingAnnotsAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ReferenceSupportingAnnotationsSystemToGoldCopier.class);
    builder.add(goldCopierSupportingAnnotsAnnotator);
   
    AnalysisEngineDescription assertionAttributeClearerAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ReferenceAnnotationsSystemAssertionClearer.class);
    builder.add(assertionAttributeClearerAnnotator);
   
    String generalSectionRegexFileUri =
        "org/mitre/medfacts/zoner/section_regex.xml";
    AnalysisEngineDescription zonerAnnotator =
        AnalysisEngineFactory.createPrimitiveDescription(ZoneAnnotator.class,
            ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
            generalSectionRegexFileUri
            );
    builder.add(zonerAnnotator);

    String mayoSectionRegexFileUri =
        "org/mitre/medfacts/uima/mayo_sections.xml";
    AnalysisEngineDescription mayoZonerAnnotator =
        AnalysisEngineFactory.createPrimitiveDescription(ZoneAnnotator.class,
            ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
            mayoSectionRegexFileUri
            );
    builder.add(mayoZonerAnnotator);
   
   
    AnalysisEngineDescription polarityAnnotator = AnalysisEngineFactory.createPrimitiveDescription(PolarityCleartkAnalysisEngine.class); //,  this.additionalParamemters);
    ConfigurationParameterFactory.addConfigurationParameters(
        polarityAnnotator,
        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
        AssertionEvaluation.GOLD_VIEW_NAME,
        CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
        this.dataWriterFactoryClass.getName(),
        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
        new File(directory, "polarity").getPath()
        );
    builder.add(polarityAnnotator);

    AnalysisEngineDescription conditionalAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ConditionalCleartkAnalysisEngine.class); //,  this.additionalParamemters);
    ConfigurationParameterFactory.addConfigurationParameters(
        conditionalAnnotator,
        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
        AssertionEvaluation.GOLD_VIEW_NAME,
        CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
        this.dataWriterFactoryClass.getName(),
        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
        new File(directory, "conditional").getPath()
        );
    builder.add(conditionalAnnotator);

    AnalysisEngineDescription uncertaintyAnnotator = AnalysisEngineFactory.createPrimitiveDescription(UncertaintyCleartkAnalysisEngine.class); //,  this.additionalParamemters);
    ConfigurationParameterFactory.addConfigurationParameters(
        uncertaintyAnnotator,
        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
        AssertionEvaluation.GOLD_VIEW_NAME,
        CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
        this.dataWriterFactoryClass.getName(),
        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
        new File(directory, "uncertainty").getPath()
        );
    builder.add(uncertaintyAnnotator);

    AnalysisEngineDescription subjectAnnotator = AnalysisEngineFactory.createPrimitiveDescription(SubjectCleartkAnalysisEngine.class); //,  this.additionalParamemters);
    ConfigurationParameterFactory.addConfigurationParameters(
        subjectAnnotator,
        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
        AssertionEvaluation.GOLD_VIEW_NAME,
        CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
        this.dataWriterFactoryClass.getName(),
        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
        new File(directory, "subject").getPath()
        );
    builder.add(subjectAnnotator);

    AnalysisEngineDescription genericAnnotator = AnalysisEngineFactory.createPrimitiveDescription(GenericCleartkAnalysisEngine.class); //,  this.additionalParamemters);
    ConfigurationParameterFactory.addConfigurationParameters(
        genericAnnotator,
        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
        AssertionEvaluation.GOLD_VIEW_NAME,
        CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
        this.dataWriterFactoryClass.getName(),
        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
        new File(directory, "generic").getPath()
        );
    builder.add(genericAnnotator);
   
////
   
    FileOutputStream outputStream = new FileOutputStream("desc/analysis_engine/assertion_train.xml");
   
    AnalysisEngineDescription description = builder.createAggregateDescription();
   
    description.toXML(outputStream);
  }
View Full Code Here

  public void createTestDescriptor() throws Exception
  {
    File testDirectory = new File("/tmp/assertion_data/test");
    File directory = testDirectory;
    File testOutputDirectory = new File("/tmp/assertion_data/test_output");
    AggregateBuilder builder = new AggregateBuilder();

////
    AnalysisEngineDescription goldCopierAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ReferenceIdentifiedAnnotationsSystemToGoldCopier.class);
    builder.add(goldCopierAnnotator);
   
    AnalysisEngineDescription assertionAttributeClearerAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ReferenceAnnotationsSystemAssertionClearer.class);
    builder.add(assertionAttributeClearerAnnotator);
   
    String generalSectionRegexFileUri =
      "org/mitre/medfacts/zoner/section_regex.xml";
    AnalysisEngineDescription zonerAnnotator =
        AnalysisEngineFactory.createPrimitiveDescription(ZoneAnnotator.class,
            ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
            generalSectionRegexFileUri
            );
    builder.add(zonerAnnotator);

    String mayoSectionRegexFileUri =
      "org/mitre/medfacts/uima/mayo_sections.xml";
    AnalysisEngineDescription mayoZonerAnnotator =
        AnalysisEngineFactory.createPrimitiveDescription(ZoneAnnotator.class,
            ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
            mayoSectionRegexFileUri
            );
    builder.add(mayoZonerAnnotator);
   
    AnalysisEngineDescription polarityAnnotator = AnalysisEngineFactory.createPrimitiveDescription(PolarityCleartkAnalysisEngine.class); //,  this.additionalParamemters);
    ConfigurationParameterFactory.addConfigurationParameters(
        polarityAnnotator,
        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
        AssertionEvaluation.GOLD_VIEW_NAME,
        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
        new File(new File(directory, "polarity"), "model.jar").getPath()
        );
    builder.add(polarityAnnotator);

    AnalysisEngineDescription conditionalAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ConditionalCleartkAnalysisEngine.class); //,  this.additionalParamemters);
    ConfigurationParameterFactory.addConfigurationParameters(
        conditionalAnnotator,
        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
        AssertionEvaluation.GOLD_VIEW_NAME,
        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
        new File(new File(directory, "conditional"), "model.jar").getPath()
        );
    builder.add(conditionalAnnotator);
 
    AnalysisEngineDescription uncertaintyAnnotator = AnalysisEngineFactory.createPrimitiveDescription(UncertaintyCleartkAnalysisEngine.class); //,  this.additionalParamemters);
    ConfigurationParameterFactory.addConfigurationParameters(
        uncertaintyAnnotator,
        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
        AssertionEvaluation.GOLD_VIEW_NAME,
        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
        new File(new File(directory, "uncertainty"), "model.jar").getPath()
        );
    builder.add(uncertaintyAnnotator);

    AnalysisEngineDescription subjectAnnotator = AnalysisEngineFactory.createPrimitiveDescription(SubjectCleartkAnalysisEngine.class); //,  this.additionalParamemters);
    ConfigurationParameterFactory.addConfigurationParameters(
        subjectAnnotator,
        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
        AssertionEvaluation.GOLD_VIEW_NAME,
        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
        new File(new File(directory, "subject"), "model.jar").getPath()
        );
    builder.add(subjectAnnotator);

    AnalysisEngineDescription genericAnnotator = AnalysisEngineFactory.createPrimitiveDescription(GenericCleartkAnalysisEngine.class); //,  this.additionalParamemters);
    ConfigurationParameterFactory.addConfigurationParameters(
        genericAnnotator,
        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
        AssertionEvaluation.GOLD_VIEW_NAME,
        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
        new File(new File(directory, "generic"), "model.jar").getPath()
        );
    builder.add(genericAnnotator);

    AnalysisEngineDescription xwriter =
    AnalysisEngineFactory.createPrimitiveDescription(
          XWriter.class,
          AssertionComponents.CTAKES_CTS_TYPE_SYSTEM_DESCRIPTION,
          XWriter.PARAM_OUTPUT_DIRECTORY_NAME,
          testOutputDirectory,
          XWriter.PARAM_XML_SCHEME_NAME,
          XWriter.XMI);
    builder.add(xwriter);
////
   
    FileOutputStream outputStream = new FileOutputStream("desc/analysis_engine/assertion_test.xml");
   
    AnalysisEngineDescription description = builder.createAggregateDescription();
   
    description.toXML(outputStream);
  }
View Full Code Here

TOP

Related Classes of org.uimafit.factory.AggregateBuilder

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.