String xmlTag, String language, String encoding) {
try {
// create and configure collection reader that will read input docs
CollectionReaderDescription collectionReaderDesc = FileSystemCollectionReader
.getDescription();
ConfigurationParameterSettings paramSettings = collectionReaderDesc.getMetaData()
.getConfigurationParameterSettings();
paramSettings.setParameterValue(FileSystemCollectionReader.PARAM_INPUTDIR, inputDir
.getAbsolutePath());
paramSettings.setParameterValue(FileSystemCollectionReader.PARAM_LANGUAGE, language);
paramSettings.setParameterValue(FileSystemCollectionReader.PARAM_ENCODING, encoding);
collectionReader = (FileSystemCollectionReader) UIMAFramework
.produceCollectionReader(collectionReaderDesc);
// show progress Monitor
String progressMsg = " Processing " + collectionReader.getNumberOfDocuments()
+ " Documents.";
numDocs = collectionReader.getNumberOfDocuments();
progressMonitor = new ProgressMonitor(DocumentAnalyzer.this, progressMsg, "", 0, numDocs + 2);
String initial = "Initializing.... Please wait ";
progressMonitor.setNote(initial);
progressMonitor.setMillisToPopup(-1);
progressMonitor.setMillisToDecideToPopup(-1);
numDocsProcessed = 0;
progressTimer.start();
// set wait cursor
setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR));
// Disable frame while processing:
setEnabled(false);
// create CPM instance that will drive processing
mCPM = UIMAFramework.newCollectionProcessingManager();
// read AE descriptor from file
XMLInputSource in = new XMLInputSource(aeSpecifierFile);
ResourceSpecifier aeSpecifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);
// create and configure CAS consumer that will write the output (in
// XMI format)
CasConsumerDescription casConsumerDesc = XmiWriterCasConsumer.getDescription();
ConfigurationParameterSettings consumerParamSettings = casConsumerDesc.getMetaData()
.getConfigurationParameterSettings();
consumerParamSettings.setParameterValue(XmiWriterCasConsumer.PARAM_OUTPUTDIR, outputDir
.getAbsolutePath());
// declare uima.cas.TOP as an input so that ResultSpec on user's AE will be set to produce all
// types
casConsumerDesc.getCasConsumerMetaData().getCapabilities()[0].addInputType("uima.cas.TOP",
true);
// if XML tag was specified, also create XmlDetagger annotator that handles this
AnalysisEngineDescription xmlDetaggerDesc = null;
if (xmlTag != null && xmlTag.length() > 0) {
xmlDetaggerDesc = XmlDetagger.getDescription();
ConfigurationParameterSettings xmlDetaggerParamSettings = xmlDetaggerDesc.getMetaData()
.getConfigurationParameterSettings();
xmlDetaggerParamSettings.setParameterValue(XmlDetagger.PARAM_TEXT_TAG, xmlTag);
usingXmlDetagger = true;
}
else {
usingXmlDetagger = false;
}