// Enable schema validation (omit this to speed up initialization)
// UIMAFramework.getXMLParser().enableSchemaValidation(true);
// build a Collection Processing Engine descriptor that will drive processing
CpeDescription cpeDesc = CpeDescriptorFactory.produceDescriptor();
// add collection reader that will read input docs
cpeDesc.addCollectionReader(FileSystemCollectionReader.getDescriptorURL().toString());
// specify configuration parameters for collection reader
CasProcessorConfigurationParameterSettings crSettings = CpeDescriptorFactory
.produceCasProcessorConfigurationParameterSettings();
CpeCollectionReader cpeCollRdr = cpeDesc.getAllCollectionCollectionReaders()[0];
cpeCollRdr.setConfigurationParameterSettings(crSettings);
crSettings.setParameterValue(FileSystemCollectionReader.PARAM_INPUTDIR, inputDir
.getAbsolutePath());
crSettings.setParameterValue(FileSystemCollectionReader.PARAM_ENCODING, encoding);
crSettings.setParameterValue(FileSystemCollectionReader.PARAM_LANGUAGE, language);
if (xcasInput) {
crSettings.setParameterValue(FileSystemCollectionReader.PARAM_XCAS, "XCAS");
} else if (xmiInput) {
crSettings.setParameterValue(FileSystemCollectionReader.PARAM_XCAS, "XMI");
}
if (xLenient) {
crSettings.setParameterValue(FileSystemCollectionReader.PARAM_LENIENT, "true");
}
// if XML tag was specified, configure XmlDetagger annotator and add to CPE
CpeCasProcessor xmlDetaggerCasProc = null;
if (xmlTagName != null && xmlTagName.length() > 0) {
xmlDetaggerCasProc = CpeDescriptorFactory.produceCasProcessor("XmlDetagger");
CpeComponentDescriptor cpeComponentDescriptor =
CpeDescriptorFactory.produceComponentDescriptor(XmlDetagger.getDescriptorURL().toString());
xmlDetaggerCasProc.setCpeComponentDescriptor(cpeComponentDescriptor);
CasProcessorConfigurationParameterSettings detaggerSettings = CpeDescriptorFactory
.produceCasProcessorConfigurationParameterSettings();
xmlDetaggerCasProc.setConfigurationParameterSettings(detaggerSettings);
detaggerSettings.setParameterValue(XmlDetagger.PARAM_TEXT_TAG, xmlTagName);
xmlDetaggerCasProc.setMaxErrorCount(0);
cpeDesc.addCasProcessor(xmlDetaggerCasProc);
}
// add user's AE to CPE
CpeCasProcessor casProc = CpeDescriptorFactory.produceCasProcessor("UserAE");
CpeComponentDescriptor cpeComponentDescriptor =
CpeDescriptorFactory.produceComponentDescriptor(aeSpecifierFile.getAbsolutePath());
casProc.setCpeComponentDescriptor(cpeComponentDescriptor);
casProc.setMaxErrorCount(0);
cpeDesc.addCasProcessor(casProc);
// add CAS Consumer that will write the output
// create and configure CAS consumer that will write the output
CpeCasProcessor casCon = null;
if (outputDir != null) {
casCon = CpeDescriptorFactory.produceCasProcessor("CasConsumer");
cpeComponentDescriptor =
CpeDescriptorFactory.produceComponentDescriptor(InlineXmlCasConsumer.getDescriptorURL().toString());
casCon.setCpeComponentDescriptor(cpeComponentDescriptor);
CasProcessorConfigurationParameterSettings consumerSettings = CpeDescriptorFactory
.produceCasProcessorConfigurationParameterSettings();
casCon.setConfigurationParameterSettings(consumerSettings);
consumerSettings.setParameterValue(InlineXmlCasConsumer.PARAM_OUTPUTDIR, outputDir
.getAbsolutePath());
if (xcasInput) {
consumerSettings.setParameterValue(InlineXmlCasConsumer.PARAM_XCAS, "XCAS");
} else if (xmiInput) {
consumerSettings.setParameterValue(InlineXmlCasConsumer.PARAM_XCAS, "XMI");
}
casCon.setMaxErrorCount(0);
cpeDesc.addCasProcessor(casCon);
}
// if XML detagger is used, we need to configure sofa mappings for the CPE
if (xmlDetaggerCasProc != null) {
// For XML detagger map default sofa to "xmlDocument"