Package org.apache.uima.analysis_engine

Examples of org.apache.uima.analysis_engine.AnalysisEngine


    }
   
    // we want to reuse these cases, so extend the type system in case a boundary-based learner is
    // called
    TextRulerToolkit.addBoundaryTypes(analysisEngineDescription, currentSlotNames);
    AnalysisEngine ae = TextRulerToolkit.loadAnalysisEngine(analysisEngineDescription);

    // preprocess input XMIs
    File inputFolder = new File(inFolder);
    File outputFolder = new File(tmpDir + docType);
    File[] files = inputFolder.listFiles(new FilenameFilter() {
      public boolean accept(File dir, String name) {
        return (name.endsWith(".xmi"));
      }
    });

    try {
      outputFolder.mkdir();
    } catch (Exception e) {
      TextRulerPlugin.error(e);
      return null;
    }

    CAS cas = null;
    for (File file : files) {
      if (delegate != null && delegate.shouldAbort()) {
        TextRulerToolkit.log("[PREPROCESSOR] ABORT");
        break;
      }
      TextRulerToolkit.log("Load INPUT XMI file: " + file.getName());
      if (delegate != null)
        delegate.preprocessorStatusUpdate(this,
                "Loading input XMI file (" + docType + "): " + file.getName());
      cas = TextRulerToolkit.readCASfromXMIFile(file, ae, cas);
      System.out.print("Processing...");
      try {
        ae.process(cas);
        TextRulerToolkit.log(" OK");
        TextRulerToolkit.writeCAStoXMIFile(cas, outputFolder + "/processed_" + file.getName());
      } catch (Exception e) {
        TextRulerPlugin.error(e);
        TextRulerToolkit.log(" ERROR!");
View Full Code Here


    Collection<TypeSystemDescription> tsds = new ArrayList<TypeSystemDescription>();
    tsds.add(basicTypeSystem);
    TypeSystemDescription mergeTypeSystems = CasCreationUtils.mergeTypeSystems(tsds);
    aed.getAnalysisEngineMetaData().setTypeSystem(mergeTypeSystems);

    AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(specifier);
    ae.setConfigParameterValue(RutaEngine.PARAM_SCRIPT_PATHS, new String[] { ruleFile
            .getParentFile().getPath() });
    String name = ruleFile.getName();
    if (name.endsWith(RutaEngine.SCRIPT_FILE_EXTENSION)) {
      name = name.substring(0, name.length() - 5);
    }
    ae.setConfigParameterValue(RutaEngine.PARAM_MAIN_SCRIPT, name);
    ae.setConfigParameterValue(RutaEngine.PARAM_SEEDERS, new String[0]);
    ae.setConfigParameterValue(RutaEngine.PARAM_DEFAULT_FILTERED_TYPES, new String[0]);

    ae.reconfigure();
    CAS cas = ae.newCAS();
    cas.setDocumentText(FileUtils.file2String(textFile, "UTF-8"));
   
    Type typeCW = cas.getTypeSystem().getType("org.apache.uima.ruta.type.CW");
    Type typeSW = cas.getTypeSystem().getType("org.apache.uima.ruta.type.SW");
    cas.addFsToIndexes(cas.createAnnotation(typeCW, 0, 5));
    cas.addFsToIndexes(cas.createAnnotation(typeCW, 7, 13));
    cas.addFsToIndexes(cas.createAnnotation(typeCW, 15, 18));
    cas.addFsToIndexes(cas.createAnnotation(typeSW, 19, 22));
    cas.addFsToIndexes(cas.createAnnotation(typeCW, 23, 28));
   
    ae.process(cas);

    Type t = null;
    AnnotationIndex<AnnotationFS> ai = null;
    FSIterator<AnnotationFS> iterator = null;
   
View Full Code Here

    TypeDescription_impl ti2 = new TypeDescription_impl(t2, "", "uima.tcas.Annotation");
    tsdi.setTypes(new TypeDescription[] {ti1, ti2});
   
    String script = "CW SW{-> MARK(Test1)};\n Test1 SW{-> MARK(Test2)};";
    AnalysisEngineDescription aed = Ruta.createAnalysisEngineDescription(script, tsdi);
    AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(aed);
    CAS cas = ae.newCAS();
    cas.setDocumentText("Only some text.");
    ae.process(cas);

    Type type1 = cas.getTypeSystem().getType(t1);
    AnnotationIndex<AnnotationFS> ai1 = cas.getAnnotationIndex(type1);
    assertEquals(1, ai1.size());
    assertEquals("some", ai1.iterator().get().getCoveredText());
View Full Code Here

    }
    File tempFile = File.createTempFile("ViewWriterTest", ".xmi");
    XMLInputSource in = new XMLInputSource(url);
    ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);

    AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(specifier);
    ae.setConfigParameterValue(ViewWriter.INPUT_VIEW, NEW_VIEW);
    ae.setConfigParameterValue(ViewWriter.OUTPUT_VIEW, CAS.NAME_DEFAULT_SOFA);
    ae.setConfigParameterValue(ViewWriter.OUTPUT, tempFile.getAbsolutePath());
    ae.reconfigure();

    CAS cas = ae.newCAS();
    cas.setDocumentText("This is the default view.");
    CAS newView = cas.createView(NEW_VIEW);
    newView.setDocumentText("This is a new view.");
    Type type = cas.getTypeSystem().getType(TEST_TYPE);
    AnnotationFS createAnnotation = newView.createAnnotation(type, 5, 7);
    newView.addFsToIndexes(createAnnotation);

    ae.process(cas);

    cas.reset();
    FileInputStream stream = new FileInputStream(tempFile);
    XmiCasDeserializer.deserialize(stream, cas, true);
View Full Code Here

  public static void main(String[] args) throws Exception {
    if (!parseCmdLineArgs(args)) {
      throw new IllegalArgumentException("Passed arguments are invalid!");
    }

    AnalysisEngine ae = Ruta.wrapAnalysisEngine(descriptor.toURI().toURL(), view, true, inputEncoding);
    configure(ae);
    CAS cas = ae.newCAS();

    List<File> inputFiles = getFiles(inputFolder, inputRecursive);
    for (File file : inputFiles) {
      processFile(file, ae, cas);
    }

    ae.batchProcessComplete(new ProcessTrace_impl());
    ae.collectionProcessComplete(new ProcessTrace_impl());
    cas.release();
    ae.destroy();
  }
View Full Code Here

      TextRulerStatisticsCollector c = new TextRulerStatisticsCollector();
      resetAndFillTestCAS(testCAS, target);
      CAS docCAS = getCAS();
      TypeSystem ts = docCAS.getTypeSystem();
      Type tokensRootType = ts.getType(TextRulerToolkit.RUTA_ANY_TYPE_NAME);
      AnalysisEngine analysisEngine = learner.getAnalysisEngine();
      try {
        analysisEngine.process(testCAS);
      } catch (AnalysisEngineProcessException e) {
        // TODO add log here
      }
      TextRulerTarget newTarget = new TextRulerTarget(target.slotNames, target.getLearner());
      if (target.isLeftCorrection()) {
View Full Code Here

      url = HtmlConverter.class.getClassLoader().getResource(
              "org/apache/uima/ruta/engine/HtmlConverter.xml");
    }
    XMLInputSource in = new XMLInputSource(url);
    ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);
    AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(specifier);
    CAS cas = ae.newCAS();

    ae.setConfigParameterValue(HtmlConverter.OUTPUT_VIEW, outputViewName);
    ae.setConfigParameterValue(HtmlConverter.CONVERSION_POLICY, "explicit");
    ae.setConfigParameterValue(HtmlConverter.CONVERSION_PATTERNS, new String[] { "&nbsp;" });
    ae.setConfigParameterValue(HtmlConverter.CONVERSION_REPLACEMENTS, new String[] { " " });
    ae.reconfigure();
    cas.reset();
    cas.setDocumentText(htmlDecoding);

    // go:
    ae.process(cas);

    CAS modifiedView = cas.getView(outputViewName);
    String text = modifiedView.getDocumentText();

    String inputText = cas.getDocumentText();
View Full Code Here

      url = HtmlConverter.class.getClassLoader().getResource(
              "org/apache/uima/ruta/engine/HtmlConverter.xml");
    }
    XMLInputSource in = new XMLInputSource(url);
    ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);
    AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(specifier);
    CAS cas = ae.newCAS();

    ae.setConfigParameterValue(HtmlConverter.OUTPUT_VIEW, outputViewName);
    ae.reconfigure();
    cas.reset();
    cas.setDocumentText(htmlDecoding);

    // go:
    ae.process(cas);

    CAS modifiedView = cas.getView(outputViewName);
    String text = modifiedView.getDocumentText();

    String inputText = cas.getDocumentText();
View Full Code Here

      url = HtmlConverter.class.getClassLoader().getResource(
              "org/apache/uima/ruta/engine/HtmlConverter.xml");
    }
    XMLInputSource in = new XMLInputSource(url);
    ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);
    AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(specifier);
    CAS cas = ae.newCAS();

    ae.setConfigParameterValue(HtmlConverter.OUTPUT_VIEW, outputViewName);
    ae.reconfigure();
    cas.reset();
    cas.setDocumentText(htmlUnix);

    // go:
    ae.process(cas);

    CAS modifiedView = cas.getView(outputViewName);
    String text = modifiedView.getDocumentText();

    String expectedText = "start of body\nnormal bold\nend of body";
View Full Code Here

      url = HtmlConverter.class.getClassLoader().getResource(
              "org/apache/uima/ruta/engine/HtmlConverter.xml");
    }
    XMLInputSource in = new XMLInputSource(url);
    ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);
    AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(specifier);
    CAS cas = ae.newCAS();

    ae.setConfigParameterValue(HtmlConverter.OUTPUT_VIEW, outputViewName);
    ae.reconfigure();
    cas.reset();
    cas.setDocumentText(htmlWin);

    // go:
    ae.process(cas);

    CAS modifiedView = cas.getView(outputViewName);
    String text = modifiedView.getDocumentText();

    String expectedText = "start of body\nnormal bold\nend of body";
View Full Code Here

TOP

Related Classes of org.apache.uima.analysis_engine.AnalysisEngine

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.