FetchedDatum fetchedDatum = new FetchedDatum(arguments.getTuple());
// Now, if the FetchedDatum mime-type is application/mbox, we want to parse it and
// output the results
if (fetchedDatum.getContentType().equals("application/mbox")) {
Metadata metadata = new Metadata();
ParseContext context = new ParseContext();
InputStream is = new ByteArrayInputStream(fetchedDatum.getContentBytes());
try {
_parser.parse(is, _handler, metadata, context);
// _content now has all of the body text, and metadata has the header info.
String messageId = metadata.get(Metadata.IDENTIFIER);
String emailAddress = metadata.get(Metadata.CREATOR);
if (emailAddress == null) {
LOGGER.warn("No email address for message: " + messageId);
return;
}
String address = null;
String name = null;
Matcher addressMatcher = FULL_EMAIL_ADDRESS_PATTERN.matcher(emailAddress);
if (addressMatcher.matches()) {
name = addressMatcher.group(1);
address = addressMatcher.group(2);
} else {
addressMatcher = SIMPLE_EMAIL_ADDRESS_PATTERN.matcher(emailAddress);
if (addressMatcher.matches()) {
address = addressMatcher.group(1);
} else {
LOGGER.warn("Email address has invalid format: " + emailAddress);
return;
}
}
// Now we might need to remain the address, if this user has aliases.
if (EMAIL_ALIASES.containsKey(address)) {
address = EMAIL_ALIASES.get(address);
}
Tuple tuple = new Tuple(messageId, address, name, 0.0);
functionCall.getOutputCollector().add(tuple);
String replyId = metadata.get(Metadata.RELATION);
if (replyId != null) {
double score = analyzeReply(_content.toString());
if (score > 0.0) {
tuple = new Tuple(replyId, null, null, score);