*
* @throws ConfigurationException
*/
public BaseCollectionReader getCollectionReader() throws ResourceConfigurationException {
checkForErrors();
BaseCollectionReader colreader = null;
try {
CpeCollectionReader reader = (getCpeDescriptor().getAllCollectionCollectionReaders())[0];
if (reader == null) {
throw new ResourceConfigurationException(InvalidXMLException.ELEMENT_NOT_FOUND,
new Object[] { "<collectionReader>", "<cpeDescriptor>" }, new Exception(
CpmLocalizedMessage.getLocalizedMessage(CPMUtils.CPM_LOG_RESOURCE_BUNDLE,
"UIMA_CPM_EXP_missing_required_element__WARNING", new Object[] {
Thread.currentThread().getName(), "<collectionReader>" })));
}
CpeCollectionReaderIterator cit = reader.getCollectionIterator();
if (cit == null || cit.getDescriptor() == null || cit.getDescriptor().getInclude() == null) {
throw new ResourceConfigurationException(InvalidXMLException.ELEMENT_NOT_FOUND,
new Object[] { "<include>", "<collectionIterator>" }, new Exception(
CpmLocalizedMessage.getLocalizedMessage(CPMUtils.CPM_LOG_RESOURCE_BUNDLE,
"UIMA_CPM_EXP_missing_required_element__WARNING", new Object[] {
Thread.currentThread().getName(), "<include>" })));
}
if (cit.getDescriptor().getInclude().get() == null) {
throw new ResourceConfigurationException(InvalidXMLException.ELEMENT_NOT_FOUND,
new Object[] { "<href>", "<collectionIterator>" }, new Exception(
CpmLocalizedMessage.getLocalizedMessage(CPMUtils.CPM_LOG_RESOURCE_BUNDLE,
"UIMA_CPM_EXP_missing_attribute_from_xml_element__WARNING",
new Object[] { Thread.currentThread().getName(), "<href>",
"<collectionIterator>" })));
}
String descriptorPath = CPMUtils.convertToAbsolutePath(System.getProperty("CPM_HOME"),
CPM_HOME, cit.getDescriptor().getInclude().get());
// create new collection reader from the descriptor
XMLInputSource in1 = new XMLInputSource(descriptorPath);
ResourceSpecifier colReaderSp = UIMAFramework.getXMLParser()
.parseCollectionReaderDescription(in1);
overrideParameterSettings(colReaderSp, cit.getConfigurationParameterSettings(),
descriptorPath, "Collection Reader");
// compute sofa mapping for the CAS Initializer
CpeSofaMappings sofanamemappings = cit.getSofaNameMappings();
HashMap sofamap = new HashMap();
if (sofanamemappings != null) {
CpeSofaMapping[] sofaNameMappingArray = sofanamemappings.getSofaNameMappings();
for (int i = 0; sofaNameMappingArray != null && i < sofaNameMappingArray.length; i++) {
CpeSofaMapping aSofaMap = sofaNameMappingArray[i];
// if no component sofa name, then set it to default
if (aSofaMap.getComponentSofaName() == null)
aSofaMap.setComponentSofaName(CAS.NAME_DEFAULT_TEXT_SOFA);
sofamap.put(aSofaMap.getComponentSofaName(), aSofaMap.getCpeSofaName());
}
}
// create child UimaContext for the CollectionReader
UimaContextAdmin collectionReaderContext = uimaContext.createChild("_CollectionReader",
sofamap);
Map additionalParams = new HashMap();
additionalParams.put(Resource.PARAM_UIMA_CONTEXT, collectionReaderContext);
colreader = (BaseCollectionReader) UIMAFramework.produceResource(BaseCollectionReader.class,
colReaderSp, getResourceManager(), additionalParams);
CpeCollectionReaderCasInitializer casInit = reader.getCasInitializer();
if (casInit != null) {
if (casInit.getDescriptor() == null) {
throw new ResourceConfigurationException(InvalidXMLException.ELEMENT_NOT_FOUND,
new Object[] { "<descriptor>", "<casInitializer>" }, new Exception(
CpmLocalizedMessage.getLocalizedMessage(CPMUtils.CPM_LOG_RESOURCE_BUNDLE,
"UIMA_CPM_EXP_missing_required_element__WARNING", new Object[] {
Thread.currentThread().getName(), "<descriptor>" })));
}
if (casInit.getDescriptor().getInclude() == null) {
throw new ResourceConfigurationException(InvalidXMLException.ELEMENT_NOT_FOUND,
new Object[] { "<include>", "<casInitializer>" }, new Exception(
CpmLocalizedMessage.getLocalizedMessage(CPMUtils.CPM_LOG_RESOURCE_BUNDLE,
"UIMA_CPM_EXP_missing_required_element__WARNING", new Object[] {
Thread.currentThread().getName(), "<include>" })));
}
if (casInit.getDescriptor().getInclude().get() == null
|| casInit.getDescriptor().getInclude().get().length() == 0) {
throw new ResourceConfigurationException(InvalidXMLException.ELEMENT_NOT_FOUND,
new Object[] { "<href>", "<casInitializer>" }, new Exception(CpmLocalizedMessage
.getLocalizedMessage(CPMUtils.CPM_LOG_RESOURCE_BUNDLE,
"UIMA_CPM_EXP_missing_attribute_from_xml_element__WARNING",
new Object[] { Thread.currentThread().getName(), "<href>",
"<casInitializer>" })));
}
descriptorPath = CPMUtils.convertToAbsolutePath(System.getProperty("CPM_HOME"), CPM_HOME,
casInit.getDescriptor().getInclude().get());
File casInitializerSpecifierFile = new File(descriptorPath);
XMLInputSource in4 = new XMLInputSource(casInitializerSpecifierFile);
ResourceSpecifier casIniSp = UIMAFramework.getXMLParser().parseCasInitializerDescription(
in4);
overrideParameterSettings(casIniSp, casInit.getConfigurationParameterSettings(),
descriptorPath, "Cas Initializer");
// compute sofa mapping for the CAS Initializer
CpeSofaMappings sofaNamemappings = casInit.getSofaNameMappings();
sofamap = new HashMap();
if (sofaNamemappings != null) {
CpeSofaMapping[] sofaNameMappingArray = sofaNamemappings.getSofaNameMappings();
for (int i = 0; sofaNameMappingArray != null && i < sofaNameMappingArray.length; i++) {
CpeSofaMapping aSofaMap = sofaNameMappingArray[i];
// if no component sofa name, then set it to default
if (aSofaMap.getComponentSofaName() == null)
aSofaMap.setComponentSofaName(CAS.NAME_DEFAULT_TEXT_SOFA);
sofamap.put(aSofaMap.getComponentSofaName(), aSofaMap.getCpeSofaName());
}
}
// create child UimaContext for the CAS Initializer
UimaContextAdmin initializerContext = uimaContext.createChild("_CasInitializer", sofamap);
additionalParams.put(Resource.PARAM_UIMA_CONTEXT, initializerContext);
Object initializer = produceInitializer(casIniSp, additionalParams);
if (initializer instanceof CasDataInitializer) {
((CasDataCollectionReader) colreader).setCasInitializer((CasDataInitializer) initializer);
} else if (initializer instanceof CasInitializer) {
((CollectionReader) colreader).setCasInitializer((CasInitializer) initializer);
} else {
throw new ResourceConfigurationException(InvalidXMLException.INVALID_ELEMENT_TYPE,
new Object[] { "CasDataInitializer", initializer.getClass().getName() },
new Exception(CpmLocalizedMessage.getLocalizedMessage(
CPMUtils.CPM_LOG_RESOURCE_BUNDLE,
"UIMA_CPM_EXP_incompatible_component__WARNING", new Object[] {
Thread.currentThread().getName(), "CasInitializer",
"CasDataInitializer", initializer.getClass().getName() })));
}
}
// Retrieve number of entities to process from CPE configuration
long numDocs2Process = getCPEConfig().getNumToProcess();
if (UIMAFramework.getLogger().isLoggable(Level.CONFIG)) {
UIMAFramework.getLogger(this.getClass()).logrb(Level.CONFIG, this.getClass().getName(),
"process", CPMUtils.CPM_LOG_RESOURCE_BUNDLE,
"UIMA_CPM_show_docs_to_process__CONFIG",
new Object[] { Thread.currentThread().getName(), String.valueOf(numDocs2Process) });
}
// Provide CollectionReader with the number of documents to process
((ConfigurableResource_ImplBase) colreader).setConfigParameterValue("processSize",
new Integer(new Long(numDocs2Process).intValue()));
CpeConfiguration cpeType = getCpeDescriptor().getCpeConfiguration();
if (cpeType != null && cpeType.getStartingEntityId() != null
&& cpeType.getStartingEntityId().trim().length() > 0) {
if (UIMAFramework.getLogger().isLoggable(Level.FINEST)) {
UIMAFramework.getLogger(this.getClass()).logrb(Level.FINEST, this.getClass().getName(),
"process", CPMUtils.CPM_LOG_RESOURCE_BUNDLE,
"UIMA_CPM_show_start_doc_id__FINEST",
new Object[] { Thread.currentThread().getName(), cpeType.getStartingEntityId() });
}
colreader.getProcessingResourceMetaData().getConfigurationParameterSettings()
.setParameterValue("startNumber", cpeType.getStartingEntityId().trim());
if (UIMAFramework.getLogger().isLoggable(Level.FINEST)) {
UIMAFramework.getLogger(this.getClass()).log(
Level.FINEST,
"Retrieved Documents Starting with DocId ::"
+ colreader.getProcessingResourceMetaData()
.getConfigurationParameterSettings().getParameterValue(
"startNumber"));
}
}