// that substring.
if (endIndex > startIndex) {
String extractedContent =
line.substring(startIndex, endIndex);
extractedContent = cleanDoc(extractedContent);
return new StringDocument(extractedContent);
}
// Otherwise create a new builder and everything
// appearing after the content tag.
else {
content = new StringBuilder(line.substring(
startIndex));
inContent = true;
}
} else if (line.contains("</content>")) {
inContent = false;
// If this is the end of the content, extract everything
// before it and return the total amount of text
// extracted.
int endIndex = line.lastIndexOf("<");
content.append(line.substring(0, endIndex));
return new StringDocument(cleanDoc(content.toString()));
} else if (line.contains("<updated>") && content != null) {
// When the line has an updated tag and content is not
// null, we need to extract the date time and prepend it
// to the content.
int startIndex = line.indexOf(">")+1;
int endIndex = line.lastIndexOf("<");
String date = line.substring(startIndex, endIndex);
long dateTime = date.equals("")
? 0 :
Timestamp.valueOf(date).getTime();
String doc = String.format(
"%d %s", dateTime,
cleanDoc(content.toString()));
return new StringDocument(doc);
} else if (inContent && content != null) {
// If the content builder has been created, we know this
// line contains content. Add it to the builder.
content.append(line);
}