Examples of AggregateBuilder

org.apache.uima.fit.factory.AggregateBuilder

This builder makes it easier to create an aggregate analysis engine. A typical use-case would involve initializing this builder with your preferred type system and type priorities (the latter may be null). This is followed by adding analysis engine descriptions one at a time until done. This makes it easy to have runtime decisions determine how the aggregate engine should be built. Finally, one of the create methods are called and an AnalysisEngine or AnalysisEngineDescription is returned.

This is an example taken from our test cases:

 import static org.apache.uima.fit.factory.AnalysisEngineFactory.createPrimitiveDescription; AggregateBuilder builder = new AggregateBuilder(); builder.add(createPrimitiveDescription(Annotator1.class, typeSystemDescription), ViewNames.PARENTHESES_VIEW, "A"); builder.add(createPrimitiveDescription(Annotator2.class, typeSystemDescription), ViewNames.SORTED_VIEW, "B", ViewNames.SORTED_PARENTHESES_VIEW, "C", ViewNames.PARENTHESES_VIEW, "A"); builder.add(createPrimitiveDescription(Annotator3.class, typeSystemDescription), ViewNames.INITIAL_VIEW, "B"); AnalysisEngine aggregateEngine = builder.createAggregate();

org.uimafit.factory.AggregateBuilder

Examples of org.uimafit.factory.AggregateBuilder

        + "The patient was well till 6 months ago, when he started having a little blood with stool.";
    JCas jcas = JCasFactory.createJCas();
    jcas.setDocumentText(note);


    // Get the default pipeline
    AggregateBuilder builder = new AggregateBuilder();
    builder.add(ClinicalPipelineFactory.getTokenProcessingPipeline());
    builder.add(AnalysisEngineFactory
        .createPrimitiveDescription(CopyNPChunksToLookupWindowAnnotations.class));
    builder.add(AnalysisEngineFactory
        .createPrimitiveDescription(RemoveEnclosedLookupWindows.class));
    // Commented out the Dictionary lookup for the test
    // Uncomment and set -Dctakes.umlsuser and -Dctakes.umlspw env params if
    // needed
    // builder.add(UmlsDictionaryLookupAnnotator.createAnnotatorDescription());
    builder.add(ClearNLPDependencyParserAE.createAnnotatorDescription());


    // Add EventAnnotator
    builder.add(EventAnnotator
        .createAnnotatorDescription("/org/apache/ctakes/temporal/ae/eventannotator/model.jar"));


    SimplePipeline.runPipeline(jcas, builder.createAggregateDescription());


    Collection<EventMention> mentions = JCasUtil.select(jcas,
        EventMention.class);


    ArrayList<String> temp = new ArrayList<>();

View Full Code Here

Examples of org.uimafit.factory.AggregateBuilder

        + "The patient was well till 6 months ago, when he started having a little blood with stool.";
    JCas jcas = JCasFactory.createJCas();
    jcas.setDocumentText(note);


    // Get the default pipeline with umls dictionary lookup
    AggregateBuilder builder = new AggregateBuilder();
    builder.add(ClinicalPipelineFactory.getTokenProcessingPipeline());
    builder.add(AnalysisEngineFactory
        .createPrimitiveDescription(CopyNPChunksToLookupWindowAnnotations.class));
    builder.add(AnalysisEngineFactory
        .createPrimitiveDescription(RemoveEnclosedLookupWindows.class));
    // Commented out the Dictionary lookup for the test
    // Uncomment and set -Dctakes.umlsuser and -Dctakes.umlspw env params if
    // needed
    // builder.add(UmlsDictionaryLookupAnnotator.createAnnotatorDescription());
    builder.add(ClearNLPDependencyParserAE.createAnnotatorDescription());


    // Add BackwardsTimeAnnotator
    builder.add(BackwardsTimeAnnotator
        .createAnnotatorDescription("/org/apache/ctakes/temporal/ae/timeannotator/model.jar"));


    SimplePipeline.runPipeline(jcas, builder.createAggregateDescription());


    Collection<TimeMention> mentions = JCasUtil.select(jcas,
        TimeMention.class);


    ArrayList<String> temp = new ArrayList<>();

View Full Code Here

Examples of org.uimafit.factory.AggregateBuilder

        + "The patient was well till 6 months ago, when he started having a little blood with stool.";
    JCas jcas = JCasFactory.createJCas();
    jcas.setDocumentText(note);


    // Get the default pipeline with umls dictionary lookup
    AggregateBuilder builder = new AggregateBuilder();
    builder.add(ClinicalPipelineFactory.getTokenProcessingPipeline());
    builder.add(AnalysisEngineFactory
        .createPrimitiveDescription(CopyNPChunksToLookupWindowAnnotations.class));
    builder.add(AnalysisEngineFactory
        .createPrimitiveDescription(RemoveEnclosedLookupWindows.class));
    // Commented out the Dictionary lookup for the test
    // Uncomment and set -Dctakes.umlsuser and -Dctakes.umlspw env params if
    // needed
    // builder.add(UmlsDictionaryLookupAnnotator.createAnnotatorDescription());
    builder.add(ClearNLPDependencyParserAE.createAnnotatorDescription());


    // Add BackwardsTimeAnnotator
    builder.add(BackwardsTimeAnnotator
        .createAnnotatorDescription("/org/apache/ctakes/temporal/ae/timeannotator/model.jar"));
    // Add EventAnnotator
    builder.add(EventAnnotator
        .createAnnotatorDescription("/org/apache/ctakes/temporal/ae/eventannotator/model.jar"));
    // Add Document Time Relative Annotator
    builder.add(DocTimeRelAnnotator
        .createAnnotatorDescription("/org/apache/ctakes/temporal/ae/doctimerel/model.jar"));


    SimplePipeline.runPipeline(jcas, builder.createAggregateDescription());


    Collection<EventMention> mentions = JCasUtil.select(jcas,
        EventMention.class);


    ArrayList<String> temp = new ArrayList<>();

View Full Code Here

Examples of org.uimafit.factory.AggregateBuilder


  }
  
  public void execute() throws Exception
  {
    AggregateBuilder builder = new AggregateBuilder();


//    AnalysisEngineDescription documentIdPrinter =
//        AnalysisEngineFactory.createPrimitiveDescription(DocumentIdPrinterAnalysisEngine.class);
//    builder.add(documentIdPrinter);
  
    URI generalSectionRegexFileUri =
      this.getClass().getClassLoader().getResource("org/mitre/medfacts/uima/section_regex.xml").toURI();
//    ExternalResourceDescription generalSectionRegexDescription = ExternalResourceFactory.createExternalResourceDescription(
//        SectionRegexConfigurationResource.class, new File(generalSectionRegexFileUri));
    AnalysisEngineDescription zonerAnnotator =
        AnalysisEngineFactory.createPrimitiveDescription(ZoneAnnotator.class,
            ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
            generalSectionRegexFileUri
            );
    builder.add(zonerAnnotator);


    URI mayoSectionRegexFileUri =
        this.getClass().getClassLoader().getResource("org/mitre/medfacts/uima/mayo_sections.xml").toURI();
//      ExternalResourceDescription mayoSectionRegexDescription = ExternalResourceFactory.createExternalResourceDescription(
//          SectionRegexConfigurationResource.class, new File(mayoSectionRegexFileUri));
    AnalysisEngineDescription mayoZonerAnnotator =
        AnalysisEngineFactory.createPrimitiveDescription(ZoneAnnotator.class,
            ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
            mayoSectionRegexFileUri
            );
    builder.add(mayoZonerAnnotator);
    
    FileOutputStream outputStream = new FileOutputStream("desc/aggregateAssertionZoner.xml");
    
    AnalysisEngineDescription description = builder.createAggregateDescription();
    
    description.toXML(outputStream);
  }

View Full Code Here

Examples of org.uimafit.factory.AggregateBuilder

   */
  @Test
  public void testSentenceDetection() throws UIMAException, IOException {
//      JCas jCas = JCasFactory.createJCasFromPath("src/main/resources/org/apache/ctakes/ytex/types/TypeSystem.xml");
      String text = "Dr. Doolitle asked patient\nto take a deep breath\nand exhale slowly.  Patient coughed.";
      AggregateBuilder builder = new AggregateBuilder();
      File directoryCtakes = new File("../ctakes-core/desc/analysis_engine");
      File fileCtakes = new File(directoryCtakes, "TokenizerAnnotator.xml");
      XMLParser parser = UIMAFramework.getXMLParser();
      XMLInputSource source = new XMLInputSource(fileCtakes);
      builder.add(parser.parseAnalysisEngineDescription(source));
      File directory = new File("desc/analysis_engine");
      File file = new File(directory, "SentenceDetectorAnnotator.xml");
      source = new XMLInputSource(file);
      builder.add(parser.parseAnalysisEngineDescription(source));
      AnalysisEngine engine = builder.createAggregate();
      JCas jCas = engine.newJCas();
      jCas.setDocumentText(text);
      Segment s = new Segment(jCas);
      s.setBegin(0);
      s.setEnd(text.length());

View Full Code Here

Examples of org.uimafit.factory.AggregateBuilder

        ? this.getXMIReadingPreprocessorAggregateBuilder()
        : this.getXMIWritingPreprocessorAggregateBuilder();
  }


  protected AggregateBuilder getXMIReadingPreprocessorAggregateBuilder() throws UIMAException {
    AggregateBuilder aggregateBuilder = new AggregateBuilder();
    aggregateBuilder.add(UriToDocumentTextAnnotator.getDescription());
    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
        XMIReader.class,
        XMIReader.PARAM_XMI_DIRECTORY,
        this.xmiDirectory));
    return aggregateBuilder;
  }

View Full Code Here

Examples of org.uimafit.factory.AggregateBuilder

    return aggregateBuilder;
  }


  protected AggregateBuilder getXMIWritingPreprocessorAggregateBuilder()
      throws Exception {
    AggregateBuilder aggregateBuilder = new AggregateBuilder();
    aggregateBuilder.add(UriToDocumentTextAnnotator.getDescription());


    // read manual annotations into gold view
    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
        ViewCreatorAnnotator.class,
        ViewCreatorAnnotator.PARAM_VIEW_NAME,
        GOLD_VIEW_NAME));
    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
        ViewTextCopierAnnotator.class,
        ViewTextCopierAnnotator.PARAM_SOURCE_VIEW_NAME,
        CAS.NAME_DEFAULT_SOFA,
        ViewTextCopierAnnotator.PARAM_DESTINATION_VIEW_NAME,
        GOLD_VIEW_NAME));
    switch (this.xmlFormat) {
    case Anafora:
      aggregateBuilder.add(
          THYMEAnaforaXMLReader.getDescription(this.xmlDirectory),
          CAS.NAME_DEFAULT_SOFA,
          GOLD_VIEW_NAME);
      break;
    case Knowtator:
      aggregateBuilder.add(
          THYMEKnowtatorXMLReader.getDescription(this.xmlDirectory),
          CAS.NAME_DEFAULT_SOFA,
          GOLD_VIEW_NAME);
      break;
    case I2B2:
      aggregateBuilder.add(
          I2B2TemporalXMLReader.getDescription(this.xmlDirectory),
          CAS.NAME_DEFAULT_SOFA,
          GOLD_VIEW_NAME);
      break;
    }


    // identify segments
    if(this.xmlFormat == XMLFormat.I2B2){
      aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SimpleSegmentAnnotator.class));
    }else{
      aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SegmentsFromBracketedSectionTagsAnnotator.class));
    }
    // identify sentences
    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
        SentenceDetector.class,
        SentenceDetector.SD_MODEL_FILE_PARAM,
        "org/apache/ctakes/core/sentdetect/sd-med-model.zip"));
    // identify tokens
    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(TokenizerAnnotatorPTB.class));
    // merge some tokens
    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ContextDependentTokenizerAnnotator.class));


    // identify part-of-speech tags
    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
        POSTagger.class,
        TypeSystemDescriptionFactory.createTypeSystemDescription(),
        TypePrioritiesFactory.createTypePriorities(Segment.class, Sentence.class, BaseToken.class),
        POSTagger.POS_MODEL_FILE_PARAM,
        "org/apache/ctakes/postagger/models/mayo-pos.zip"));


    // identify chunks
    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
        Chunker.class,
        Chunker.CHUNKER_MODEL_FILE_PARAM,
        FileLocator.locateFile("org/apache/ctakes/chunker/models/chunker-model.zip"),
        Chunker.CHUNKER_CREATOR_CLASS_PARAM,
        DefaultChunkCreator.class));


    // identify UMLS named entities


    // adjust NP in NP NP to span both
    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
        ChunkAdjuster.class,
        ChunkAdjuster.PARAM_CHUNK_PATTERN,
        new String[] { "NP", "NP" },
        ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
        1));
    // adjust NP in NP PP NP to span all three
    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
        ChunkAdjuster.class,
        ChunkAdjuster.PARAM_CHUNK_PATTERN,
        new String[] { "NP", "PP", "NP" },
        ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
        2));
    // add lookup windows for each NP
    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(CopyNPChunksToLookupWindowAnnotations.class));
    // maximize lookup windows
    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
        OverlapAnnotator.class,
        "A_ObjectClass",
        LookupWindowAnnotation.class,
        "B_ObjectClass",
        LookupWindowAnnotation.class,
        "OverlapType",
        "A_ENV_B",
        "ActionType",
        "DELETE",
        "DeleteAction",
        new String[] { "selector=B" }));
    // add UMLS on top of lookup windows
    aggregateBuilder.add(
        UmlsDictionaryLookupAnnotator.createAnnotatorDescription()
        );


    // add lvg annotator
    String[] XeroxTreebankMap = {
        "adj|JJ",
        "adv|RB",
        "aux|AUX",
        "compl|CS",
        "conj|CC",
        "det|DET",
        "modal|MD",
        "noun|NN",
        "prep|IN",
        "pron|PRP",
        "verb|VB" };
    String[] ExclusionSet = {
        "and",
        "And",
        "by",
        "By",
        "for",
        "For",
        "in",
        "In",
        "of",
        "Of",
        "on",
        "On",
        "the",
        "The",
        "to",
        "To",
        "with",
        "With" };
    AnalysisEngineDescription lvgAnnotator = AnalysisEngineFactory.createPrimitiveDescription(
        LvgAnnotator.class,
        "UseSegments",
        false,
        "SegmentsToSkip",
        new String[0],
        "UseCmdCache",
        false,
        "CmdCacheFileLocation",
        "/org/apache/ctakes/lvg/2005_norm.voc",
        "CmdCacheFrequencyCutoff",
        20,
        "ExclusionSet",
        ExclusionSet,
        "XeroxTreebankMap",
        XeroxTreebankMap,
        "LemmaCacheFileLocation",
        "/org/apache/ctakes/lvg/2005_lemma.voc",
        "UseLemmaCache",
        false,
        "LemmaCacheFrequencyCutoff",
        20,
        "PostLemmas",
        false,
        "LvgCmdApi",
        ExternalResourceFactory.createExternalResourceDescription(
            LvgCmdApiResourceImpl.class,
            new File(LvgCmdApiResourceImpl.class.getResource(
                "/org/apache/ctakes/lvg/data/config/lvg.properties").toURI())));
    aggregateBuilder.add(lvgAnnotator);


    // add dependency parser
    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearNLPDependencyParserAE.class));


    // add semantic role labeler
    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearNLPSemanticRoleLabelerAE.class));


    // add gold standard parses to gold view, and adjust gold view to correct a few annotation mis-steps
    if(this.treebankDirectory != null){
      aggregateBuilder.add(THYMETreebankReader.getDescription(this.treebankDirectory));
      aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(TimexAnnotationCorrector.class));
    }else{
      // add ctakes constituency parses to system view
      aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ConstituencyParser.class,
          ConstituencyParser.PARAM_MODEL_FILENAME,
          "org/apache/ctakes/constituency/parser/models/sharpacq-3.1.bin"));
//      aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(BerkeleyParserWrapper.class,
//          BerkeleyParserWrapper.PARAM_MODEL_FILENAME,
//          
//        "org/apache/ctakes/constituency/parser/models/thyme.gcg.4sm.bin"));
//          "org/apache/ctakes/constituency/parser/models/thyme.4sm.bin"));
    }
    // write out the CAS after all the above annotations
    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
        XMIWriter.class,
        XMIWriter.PARAM_XMI_DIRECTORY,
        this.xmiDirectory));


    return aggregateBuilder;

View Full Code Here

Examples of org.uimafit.factory.AggregateBuilder

    return fileList;
  }


  public void execute() throws UIMAException, IOException, URISyntaxException
  {
    AggregateBuilder builder = new AggregateBuilder();
    
    TypeSystemDescription typeSystemDescription = TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath();
    
//    CollectionReader reader = 
//        CollectionReaderFactory.createCollectionReader(
//          XMIReader.class,
//          typeSystemDescription,
//          XMIReader.PARAM_FILES,
//          inputFiles);


    CollectionReader reader = 
    CollectionReaderFactory.createCollectionReader(
      TextReader.class,
      typeSystemDescription,
      TextReader.PARAM_FILES,
      inputFiles);


    
      AnalysisEngineDescription documentIdPrinter =
          AnalysisEngineFactory.createPrimitiveDescription(DocumentIdPrinterAnalysisEngine.class);
      builder.add(documentIdPrinter);
    
      String generalSectionRegexFileUri =
        "org/mitre/medfacts/uima/section_regex.xml";
      //URI generalSectionRegexFileUri =
      //  this.getClass().getClassLoader().getResource("org/mitre/medfacts/zoner/section_regex.xml").toURI();
//      ExternalResourceDescription generalSectionRegexDescription = ExternalResourceFactory.createExternalResourceDescription(
//          SectionRegexConfigurationResource.class, new File(generalSectionRegexFileUri));
      AnalysisEngineDescription zonerAnnotator =
          AnalysisEngineFactory.createPrimitiveDescription(ZoneAnnotator.class,
              ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
              generalSectionRegexFileUri
              );
      builder.add(zonerAnnotator);


      String mayoSectionRegexFileUri =
          "org/mitre/medfacts/uima/mayo_sections.xml";
//      URI mayoSectionRegexFileUri =
//          this.getClass().getClassLoader().getResource("org/mitre/medfacts/zoner/mayo_sections.xml").toURI();
//        ExternalResourceDescription mayoSectionRegexDescription = ExternalResourceFactory.createExternalResourceDescription(
//            SectionRegexConfigurationResource.class, new File(mayoSectionRegexFileUri));
      AnalysisEngineDescription mayoZonerAnnotator =
          AnalysisEngineFactory.createPrimitiveDescription(ZoneAnnotator.class,
              ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
              mayoSectionRegexFileUri
              );
      builder.add(mayoZonerAnnotator);


      AnalysisEngineDescription xWriter = AnalysisEngineFactory.createPrimitiveDescription(
          XWriter.class,
          typeSystemDescription,
          XWriter.PARAM_OUTPUT_DIRECTORY_NAME,
          outputDirectory.toString(),
          XWriter.PARAM_FILE_NAMER_CLASS_NAME,
          CtakesFileNamer.class.getName()
          );
      
      builder.add(xWriter);


    logger.info("BEFORE RUNNING PIPELINE...");
    SimplePipeline.runPipeline(reader,  builder.createAggregateDescription());
    logger.info("AFTER RUNNING PIPELINE...COMPLETED");
  }

View Full Code Here

Examples of org.uimafit.factory.AggregateBuilder

    createZonerMayoDescriptor();
  }
  
  public void createZonerAggregateDescriptor() throws Exception
  {
    AggregateBuilder builder = new AggregateBuilder();


////
    String generalSectionRegexFileUri =
      "org/mitre/medfacts/zoner/section_regex.xml";
    AnalysisEngineDescription zonerAnnotator =
        AnalysisEngineFactory.createPrimitiveDescription(ZoneAnnotator.class,
            ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
            generalSectionRegexFileUri
            );
    builder.add(zonerAnnotator);


    String mayoSectionRegexFileUri =
      "org/mitre/medfacts/uima/mayo_sections.xml";
    AnalysisEngineDescription mayoZonerAnnotator =
        AnalysisEngineFactory.createPrimitiveDescription(ZoneAnnotator.class,
            ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
            mayoSectionRegexFileUri
            );
    builder.add(mayoZonerAnnotator);
    
////
    
    File outputFile = new File("desc/analysis_engine/assertion_zoner__both_regular_and_mayo.xml");
    FileOutputStream outputStream = new FileOutputStream(outputFile);
    String outputFilePath = outputFile.getAbsolutePath();
    System.out.println("output descriptor file: " + outputFilePath);
    
    AnalysisEngineDescription description = builder.createAggregateDescription();
    
    description.toXML(outputStream);
  }

View Full Code Here

Examples of org.uimafit.factory.AggregateBuilder

    description.toXML(outputStream);
  }
  
  public void createZonerNormalDescriptor() throws Exception
  {
    AggregateBuilder builder = new AggregateBuilder();


    String generalSectionRegexFileUri =
      "org/mitre/medfacts/zoner/section_regex.xml";
    AnalysisEngineDescription zonerAnnotator =
        AnalysisEngineFactory.createPrimitiveDescription(ZoneAnnotator.class,
            ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
            generalSectionRegexFileUri
            );
    builder.add(zonerAnnotator);


    File outputFile = new File("desc/analysis_engine/assertion_zoner__normal.xml");
    FileOutputStream outputStream = new FileOutputStream(outputFile);
    String outputFilePath = outputFile.getAbsolutePath();
    System.out.println("output descriptor file: " + outputFilePath);

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.