String text;
if(iv_delimeter != null)
{
int delimeterLoc = line.indexOf(iv_delimeter);
if(delimeterLoc <= 0)
throw new CollectionException(new Exception("Line in file number "+lineNumber+" is not well formatted. " +
"\nIt should have the format:" +
"\n<doc_id>"+iv_delimeter+"<doc_text>"));
id = line.substring(0,delimeterLoc);
text = line.substring(delimeterLoc+iv_delimeter.length());
}
else
{
id = Integer.toString(lineNumber); //id will one more than its index into iv_linesFromFile (iv_currentIndex has already been incremented)
text = line;
}
iv_logger.debug("id="+id);
iv_logger.debug("text="+text);
//if there's a CAS Initializer, call it
if (getCasInitializer() != null)
{
Reader reader = new StringReader(text);
getCasInitializer().initializeCas(reader, cas);
reader.close();
}
else //No CAS Initiliazer, so read file and set document text ourselves
{
jcas.setDocumentText(text);
}
//set language if it was explicitly specified as a configuration parameter
if (iv_language != null)
{
// ((DocumentAnnotation)jcas.getDocumentAnnotationFs()).setLanguage(iv_language);
}
DocumentID documentIDAnnotation = new DocumentID(jcas);
documentIDAnnotation.setDocumentID(id);
documentIDAnnotation.addToIndexes();
}
catch (CASException e)
{
throw new CollectionException(e);
}
finally
{
iv_currentIndex++;
}