BufferedReader reader = new BufferedReader(isr);
metadata.set(Metadata.CONTENT_TYPE, MBOX_MIME_TYPE);
metadata.set(Metadata.CONTENT_ENCODING, "us-ascii");
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
ParseStates parseState = ParseStates.START;
String multiLine = null;
boolean inQuote = false;
int numEmails = 0;
// We're going to scan, line-by-line, for a line that starts with
// "From "
for (String curLine = reader.readLine(); curLine != null; curLine = reader.readLine()) {
boolean newMessage = curLine.startsWith(MBOX_RECORD_DIVIDER);
if (newMessage) {
numEmails += 1;
}
switch (parseState) {
case START:
if (newMessage) {
parseState = ParseStates.IN_HEADER;
newMessage = false;
// Fall through to IN_HEADER
} else {
break;
}
case IN_HEADER:
if (newMessage) {
saveHeaderInMetadata(numEmails, metadata, multiLine);
multiLine = curLine;
} else if (curLine.length() == 0) {
// Blank line is signal that we're transitioning to the content.
saveHeaderInMetadata(numEmails, metadata, multiLine);
parseState = ParseStates.IN_CONTENT;
// Mimic what PackageParser does between entries.
xhtml.startElement("div", "class", "email-entry");
xhtml.startElement("p");
inQuote = false;
} else if (curLine.startsWith(" ") || curLine.startsWith("\t")) {
multiLine += " " + curLine.trim();
} else {
saveHeaderInMetadata(numEmails, metadata, multiLine);
multiLine = curLine;
}
break;
// TODO - use real email parsing support so we can correctly handle
// things like multipart messages and quoted-printable encoding.
// We'd also want this for charset handling, where content isn't 7-bit
// ascii.
case IN_CONTENT:
if (newMessage) {
endMessage(xhtml, inQuote);
parseState = ParseStates.IN_HEADER;
multiLine = curLine;
} else {
boolean quoted = curLine.startsWith(">");
if (inQuote) {
if (!quoted) {
xhtml.endElement("q");
inQuote = false;
}
} else if (quoted) {
xhtml.startElement("q");
inQuote = true;
}
xhtml.characters(curLine);
// For plain text email, each line is a real break position.
xhtml.element("br", "");
}
}
}
if (parseState == ParseStates.IN_HEADER) {
saveHeaderInMetadata(numEmails, metadata, multiLine);
} else if (parseState == ParseStates.IN_CONTENT) {
endMessage(xhtml, inQuote);
}
xhtml.endDocument();
}