if (xLenient) {
crSettings.setParameterValue(FileSystemCollectionReader.PARAM_LENIENT, "true");
}
// if XML tag was specified, configure XmlDetagger annotator and add to CPE
CpeCasProcessor xmlDetaggerCasProc = null;
if (xmlTagName != null && xmlTagName.length() > 0) {
xmlDetaggerCasProc = CpeDescriptorFactory.produceCasProcessor("XmlDetagger");
CpeComponentDescriptor cpeComponentDescriptor =
CpeDescriptorFactory.produceComponentDescriptor(XmlDetagger.getDescriptorURL().toString());
xmlDetaggerCasProc.setCpeComponentDescriptor(cpeComponentDescriptor);
CasProcessorConfigurationParameterSettings detaggerSettings = CpeDescriptorFactory
.produceCasProcessorConfigurationParameterSettings();
xmlDetaggerCasProc.setConfigurationParameterSettings(detaggerSettings);
detaggerSettings.setParameterValue(XmlDetagger.PARAM_TEXT_TAG, xmlTagName);
xmlDetaggerCasProc.setMaxErrorCount(0);
cpeDesc.addCasProcessor(xmlDetaggerCasProc);
}
// add user's AE to CPE
CpeCasProcessor casProc = CpeDescriptorFactory.produceCasProcessor("UserAE");
CpeComponentDescriptor cpeComponentDescriptor =
CpeDescriptorFactory.produceComponentDescriptor(aeSpecifierFile.getAbsolutePath());
casProc.setCpeComponentDescriptor(cpeComponentDescriptor);
casProc.setMaxErrorCount(0);
cpeDesc.addCasProcessor(casProc);
// add CAS Consumer that will write the output
// create and configure CAS consumer that will write the output
CpeCasProcessor casCon = null;
if (outputDir != null) {
casCon = CpeDescriptorFactory.produceCasProcessor("CasConsumer");
cpeComponentDescriptor =
CpeDescriptorFactory.produceComponentDescriptor(InlineXmlCasConsumer.getDescriptorURL().toString());
casCon.setCpeComponentDescriptor(cpeComponentDescriptor);
CasProcessorConfigurationParameterSettings consumerSettings = CpeDescriptorFactory
.produceCasProcessorConfigurationParameterSettings();
casCon.setConfigurationParameterSettings(consumerSettings);
consumerSettings.setParameterValue(InlineXmlCasConsumer.PARAM_OUTPUTDIR, outputDir
.getAbsolutePath());
if (xcasInput) {
consumerSettings.setParameterValue(InlineXmlCasConsumer.PARAM_XCAS, "XCAS");
} else if (xmiInput) {
consumerSettings.setParameterValue(InlineXmlCasConsumer.PARAM_XCAS, "XMI");
}
casCon.setMaxErrorCount(0);
cpeDesc.addCasProcessor(casCon);
}
// if XML detagger is used, we need to configure sofa mappings for the CPE
if (xmlDetaggerCasProc != null) {
// For XML detagger map default sofa to "xmlDocument"
CpeSofaMapping sofaMapping = CpeDescriptorFactory.produceSofaMapping();
sofaMapping.setComponentSofaName("xmlDocument");
sofaMapping.setCpeSofaName(CAS.NAME_DEFAULT_SOFA);
CpeSofaMappings xmlDetaggerSofaMappings = CpeDescriptorFactory.produceSofaMappings();
xmlDetaggerSofaMappings.setSofaNameMappings(new CpeSofaMapping[] { sofaMapping });
xmlDetaggerCasProc.setSofaNameMappings(xmlDetaggerSofaMappings);
// User AE and InlineXmlCasConsumer (if present) operate on the "plainTextDocument"
// sofa produced by the XmlDetagger
CpeSofaMapping aeSofaMapping = CpeDescriptorFactory.produceSofaMapping();
aeSofaMapping.setCpeSofaName("plainTextDocument");
CpeSofaMappings userAeSofaMappings = CpeDescriptorFactory.produceSofaMappings();
userAeSofaMappings.setSofaNameMappings(new CpeSofaMapping[] { aeSofaMapping });
casProc.setSofaNameMappings(userAeSofaMappings);
if (casCon != null) {
CpeSofaMapping casConSofaMapping = CpeDescriptorFactory.produceSofaMapping();
casConSofaMapping.setCpeSofaName("plainTextDocument");
CpeSofaMappings consumerSofaMappings = CpeDescriptorFactory.produceSofaMappings();
consumerSofaMappings.setSofaNameMappings(new CpeSofaMapping[] { casConSofaMapping });
casCon.setSofaNameMappings(consumerSofaMappings);
}
}
// instantiate CPE
mCPE = UIMAFramework.produceCollectionProcessingEngine(cpeDesc);