Package org.apache.uima.test.junit_extension

Source Code of org.apache.uima.test.junit_extension.AnnotatorPerformanceTester$FileFileFilter

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.uima.test.junit_extension;

import java.io.File;
import java.io.FileFilter;
import java.util.HashMap;
import java.util.logging.LogManager;

import junit.framework.Assert;

import org.apache.uima.UIMAFramework;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.cas.CAS;
import org.apache.uima.internal.util.Timer;
import org.apache.uima.resource.ResourceManager;
import org.apache.uima.resource.ResourceSpecifier;
import org.apache.uima.util.FileUtils;
import org.apache.uima.util.Level;
import org.apache.uima.util.Logger;
import org.apache.uima.util.XMLInputSource;

/**
* AnnotatorPerfTester is a helper class to execute annotator performance tests. The performance
* test results are returned as {@link PerformanceTestResultImpl} object.
*
*/
public class AnnotatorPerformanceTester {

  private static class FileFileFilter implements FileFilter {

    private FileFileFilter() {
      super();
    }

    public boolean accept(File arg0) {
      return arg0.isFile();
    }

  }

  private static HashMap logLevels = new HashMap(9);
  static {
    logLevels.put("OFF", Level.OFF);
    logLevels.put("SEVERE", Level.SEVERE);
    logLevels.put("WARNING", Level.WARNING);
    logLevels.put("INFO", Level.INFO);
    logLevels.put("CONFIG", Level.CONFIG);
    logLevels.put("FINE", Level.FINE);
    logLevels.put("FINER", Level.FINER);
    logLevels.put("FINEST", Level.FINEST);
    logLevels.put("ALL", Level.ALL);
  }

  /**
   * runs an annotator performance test
   *
   * @param repeatSingle
   *          if true, every document is process "numsToRun" times before the next document is
   *          processed. If false, all documents are processed and this is repeated "numsToRun"
   *          times.
   *
   * @param numsToRun
   *          repeat count for the input documents
   * @param taeDescFilePath
   *          ae descriptor - absolute file path
   * @param testFileDir
   *          test file directory
   * @param dataPath
   *          ae datapath
   * @param doWarmup
   *          do warum for analysis engine - runs an short english sample document
   * @return PerformanceTestResult - returns the performance test results
   *
   * @throws Exception
   */
  public static PerformanceTestResult runPerformanceTest(boolean repeatSingle, int numsToRun,
          File taeDescFilePath, File testFileDir, String dataPath, boolean doWarmup)
          throws Exception {

    // create performance result object
    PerformanceTestResultImpl result = new PerformanceTestResultImpl();

    // check mandetory settings
    Assert.assertNotNull(taeDescFilePath);
    Assert.assertNotNull(testFileDir);

    // save settings
    result.setRepeatSingleMode(repeatSingle);
    result.setDoWarmup(doWarmup);
    result.setNumsToRun(numsToRun);
    result.setAeDescFilePath(taeDescFilePath);
    result.setTestFileDir(testFileDir);
    result.setDatapath(dataPath);

    // set and check test file directory
    if (testFileDir == null || !testFileDir.isDirectory() || !testFileDir.canRead()) {
      throw new Exception("test file directory not valid");
    }

    // get current log level setting
    Level defaultLogLevel = (Level) logLevels.get(LogManager.getLogManager()
            .getProperty(".level"));

    if (defaultLogLevel == null) {
      // no log level was specified, use default log level settings "INFO" that is also
      // used by the Java logging framework.
      defaultLogLevel = Level.INFO;
    }
    // turn of logging for the performance test
    Logger logger = UIMAFramework.getLogger();
    logger.setLevel(Level.OFF);

    //create timer
    Timer globalTimer = new Timer();
    Timer initTimer = new Timer();
    Timer warmupTimer = new Timer();
    Timer ioTimer = new Timer();
    Timer processResetTimer = new Timer();
    Timer cleanupTimer = new Timer();
    Timer documentPreparationTimer = new Timer();
   
    //start timer for global time
    globalTimer.start();

    // init analysis engine
    try {

      // start initialization timer  
      initTimer.start();
     
      // set datapath
      ResourceManager resMgr = UIMAFramework.newDefaultResourceManager();
      if (dataPath != null) {
        resMgr.setDataPath(dataPath);
      }

      AnalysisEngine ae = null;
      CAS cas = null;
      // get resource specifier from XML file
      XMLInputSource in = new XMLInputSource(taeDescFilePath);
      ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);

      // create analysis engine with resource manager
      ae = UIMAFramework.produceAnalysisEngine(specifier, resMgr, null);
      // check ae
      Assert.assertNotNull(ae);

      // create new cas
      cas = ae.newCAS();
      // check cas
      Assert.assertNotNull(cas);

      // access cas type system
      cas.getTypeSystem().getFeatureByFullName(CAS.FEATURE_FULL_NAME_LANGUAGE);

      // stop initalization timer
      initTimer.stop();
      result.setInitTime(initTimer.getTimeSpan());

      if (doWarmup) {
        // start warmup timer    
        warmupTimer.start();

        // process dummy document
        cas.setDocumentLanguage("en");
        cas.setDocumentText("This is a test sentence.");
        ae.process(cas);
        cas.reset();

        // stop warmup timer
        warmupTimer.stop();
        result.setWarmupTime(warmupTimer.getTimeSpan());
      }

      // start io timer
      ioTimer.start();

      // read all files in the test file directory
      File[] inputFiles = testFileDir.listFiles(new FileFileFilter());
      // create string array for the file content and language
      String[] fileTexts = new String[inputFiles.length];
      String[] languages = new String[inputFiles.length];
      int numChars = 0;
      long fileSize = 0;
      // iterate of all input files and extract content and language
      for (int i = 0; i < inputFiles.length; i++) {
        // get file language
        languages[i] = inputFiles[i].getName().substring(0, 2);
        // get file content
        fileTexts[i] = FileUtils.file2String(inputFiles[i], "UTF-8");
        fileSize += inputFiles[i].length();
        // count characters
        numChars += fileTexts[i].length();
      }

      // stop io timer
      ioTimer.stop();

      // save results
      result.setNumberOfFiles(inputFiles.length);
      result.setNumberOfCharacters(numChars);
      result.setTotalFileSize(fileSize);
      result.setIoTime(ioTimer.getTimeSpan());

      // start real processing
      int numAnnot = 0;

      // check repeat single mode setting
      // repeatSingle=true: iterates of all files and repeat each file "numsToRun" times
      // repeatSingle=false: iterates of all files and repeat the collection "numsToRun" times
      if (repeatSingle) {
        // iterate over all text files (over the cached content)
        for (int i = 0; i < fileTexts.length; i++) {
          // file repeat mode
          // iterate over the current document "numsToRun" times
          for (int j = 0; j < numsToRun; j++) {
            documentPreparationTimer.start();
            // set cas data
            cas.setDocumentLanguage(languages[i]);
            cas.setDocumentText(fileTexts[i]);
            documentPreparationTimer.stop();
            processResetTimer.start();
            ae.process(cas);
            processResetTimer.stop();
            documentPreparationTimer.start();
            numAnnot += cas.getAnnotationIndex().size();
            cas.reset();
            documentPreparationTimer.stop();
          }
        }
      }
      // use collection repeat mode
      else {
        // process the file collection "numsToRun" times
        for (int j = 0; j < numsToRun; j++) {
          // iterate over all text files (over the cached content)
          for (int i = 0; i < fileTexts.length; i++) {
            documentPreparationTimer.start();
            // set cas data
            cas.setDocumentLanguage(languages[i]);
            cas.setDocumentText(fileTexts[i]);
            documentPreparationTimer.stop();
            processResetTimer.start();
            ae.process(cas);
            processResetTimer.stop();
            documentPreparationTimer.start();
            numAnnot += cas.getAnnotationIndex().size();
            cas.reset();
            documentPreparationTimer.stop();
          }
        }
      }

      // cleanup ae and stop global timer
      cleanupTimer.start();
      ae.destroy();
      ae = null;
      cleanupTimer.stop();
      globalTimer.stop();

      // save results
      result.setNumberOfCreatedAnnotations(numAnnot);
      result.setOverallTime(globalTimer.getTimeSpan());
      result.setProcessingTime(processResetTimer.getTimeSpan());
      result.setCleanupTime(cleanupTimer.getTimeSpan());
      result.setDocumentPreparationTime(documentPreparationTimer.getTimeSpan());

      // turn on logging as it was before
      logger.setLevel(defaultLogLevel);

      // return result object
      return result;

    } catch (Exception e) { // Bail out.
      throw e;
    }

  }

}
TOP

Related Classes of org.apache.uima.test.junit_extension.AnnotatorPerformanceTester$FileFileFilter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.