// turn of logging for the performance test
Logger logger = UIMAFramework.getLogger();
logger.setLevel(Level.OFF);
//create timer
Timer globalTimer = new Timer();
Timer initTimer = new Timer();
Timer warmupTimer = new Timer();
Timer ioTimer = new Timer();
Timer processResetTimer = new Timer();
Timer cleanupTimer = new Timer();
Timer documentPreparationTimer = new Timer();
//start timer for global time
globalTimer.start();
// init analysis engine
try {
// start initialization timer
initTimer.start();
// set datapath
ResourceManager resMgr = UIMAFramework.newDefaultResourceManager();
if (dataPath != null) {
resMgr.setDataPath(dataPath);
}
AnalysisEngine ae = null;
CAS cas = null;
// get resource specifier from XML file
XMLInputSource in = new XMLInputSource(taeDescFilePath);
ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);
// create analysis engine with resource manager
ae = UIMAFramework.produceAnalysisEngine(specifier, resMgr, null);
// check ae
Assert.assertNotNull(ae);
// create new cas
cas = ae.newCAS();
// check cas
Assert.assertNotNull(cas);
// access cas type system
cas.getTypeSystem().getFeatureByFullName(CAS.FEATURE_FULL_NAME_LANGUAGE);
// stop initalization timer
initTimer.stop();
result.setInitTime(initTimer.getTimeSpan());
if (doWarmup) {
// start warmup timer
warmupTimer.start();
// process dummy document
cas.setDocumentLanguage("en");
cas.setDocumentText("This is a test sentence.");
ae.process(cas);
cas.reset();
// stop warmup timer
warmupTimer.stop();
result.setWarmupTime(warmupTimer.getTimeSpan());
}
// start io timer
ioTimer.start();
// read all files in the test file directory
File[] inputFiles = testFileDir.listFiles(new FileFileFilter());
// create string array for the file content and language
String[] fileTexts = new String[inputFiles.length];
String[] languages = new String[inputFiles.length];
int numChars = 0;
long fileSize = 0;
// iterate of all input files and extract content and language
for (int i = 0; i < inputFiles.length; i++) {
// get file language
languages[i] = inputFiles[i].getName().substring(0, 2);
// get file content
fileTexts[i] = FileUtils.file2String(inputFiles[i], "UTF-8");
fileSize += inputFiles[i].length();
// count characters
numChars += fileTexts[i].length();
}
// stop io timer
ioTimer.stop();
// save results
result.setNumberOfFiles(inputFiles.length);
result.setNumberOfCharacters(numChars);
result.setTotalFileSize(fileSize);
result.setIoTime(ioTimer.getTimeSpan());
// start real processing
int numAnnot = 0;
// check repeat single mode setting
// repeatSingle=true: iterates of all files and repeat each file "numsToRun" times
// repeatSingle=false: iterates of all files and repeat the collection "numsToRun" times
if (repeatSingle) {
// iterate over all text files (over the cached content)
for (int i = 0; i < fileTexts.length; i++) {
// file repeat mode
// iterate over the current document "numsToRun" times
for (int j = 0; j < numsToRun; j++) {
documentPreparationTimer.start();
// set cas data
cas.setDocumentLanguage(languages[i]);
cas.setDocumentText(fileTexts[i]);
documentPreparationTimer.stop();
processResetTimer.start();
ae.process(cas);
processResetTimer.stop();
documentPreparationTimer.start();
numAnnot += cas.getAnnotationIndex().size();
cas.reset();
documentPreparationTimer.stop();
}
}
}
// use collection repeat mode
else {
// process the file collection "numsToRun" times
for (int j = 0; j < numsToRun; j++) {
// iterate over all text files (over the cached content)
for (int i = 0; i < fileTexts.length; i++) {
documentPreparationTimer.start();
// set cas data
cas.setDocumentLanguage(languages[i]);
cas.setDocumentText(fileTexts[i]);
documentPreparationTimer.stop();
processResetTimer.start();
ae.process(cas);
processResetTimer.stop();
documentPreparationTimer.start();
numAnnot += cas.getAnnotationIndex().size();
cas.reset();
documentPreparationTimer.stop();
}
}
}
// cleanup ae and stop global timer
cleanupTimer.start();
ae.destroy();
ae = null;
cleanupTimer.stop();
globalTimer.stop();
// save results
result.setNumberOfCreatedAnnotations(numAnnot);
result.setOverallTime(globalTimer.getTimeSpan());
result.setProcessingTime(processResetTimer.getTimeSpan());
result.setCleanupTime(cleanupTimer.getTimeSpan());
result.setDocumentPreparationTime(documentPreparationTimer.getTimeSpan());
// turn on logging as it was before
logger.setLevel(defaultLogLevel);
// return result object