public void getNext(CAS aCAS) throws IOException, CollectionException {
JCas jcas;
try {
jcas = aCAS.getJCas();
} catch (CASException e) {
throw new CollectionException(e);
}
// open input stream to file
File file = (File) mFiles.get(mCurrentIndex++);
FileInputStream fis = new FileInputStream(file);
if (mTEXT) {
try {
// if there's a CAS Initializer, call it
if (getCasInitializer() != null) {
getCasInitializer().initializeCas(fis, aCAS);
} else // No CAS Initiliazer, so read file and set document text ourselves
{
String text = FileUtils.file2String(file, mEncoding);
// put document text in JCas
jcas.setDocumentText(text);
}
} finally {
if (fis != null)
fis.close();
}
// set language if it was explicitly specified as a configuration parameter
if (mLanguage != null) {
jcas.setDocumentLanguage(mLanguage);
}
// Also store location of source document in CAS. This information is critical
// if CAS Consumers will need to know where the original document contents are located.
// For example, the Semantic Search CAS Indexer writes this information into the
// search index that it creates, which allows applications that use the search index to
// locate the documents that satisfy their semantic queries.
SourceDocumentInformation srcDocInfo = new SourceDocumentInformation(jcas);
srcDocInfo.setUri(file.getAbsoluteFile().toURL().toString());
srcDocInfo.setOffsetInSource(0);
srcDocInfo.setDocumentSize((int) file.length());
srcDocInfo.setLastSegment(mCurrentIndex == mFiles.size());
srcDocInfo.addToIndexes();
}
// XCAS input files
else {
try {
if (mXCAS.equalsIgnoreCase("xmi")) {
XmiCasDeserializer.deserialize(fis, aCAS, lenient);
}
else {
XCASDeserializer.deserialize(fis, aCAS, lenient);
}
} catch (SAXException e) {
UIMAFramework.getLogger(FileSystemCollectionReader.class).log(Level.WARNING,
"Problem with XML input file: " + file.getAbsolutePath());
throw new CollectionException(e);
} finally {
fis.close();
}
}
}