}
@Override
public void operate(FlowProcess process, FunctionCall<NullContext> functionCall) {
TupleEntry arguments = functionCall.getArguments();
FetchedDatum fetchedDatum = new FetchedDatum(arguments.getTuple());
if (fetchedDatum.getContentType().startsWith("text/html")) {
init();
Metadata metadata = new Metadata();
InputStream is = new ByteArrayInputStream(fetchedDatum.getContentBytes());
try {
_parser.parse(is, _handler, metadata, new ParseContext());
// _ids now has a list of the mailbox IDs that we use to create URLs.
for (String id : _ids) {
String url = String.format("%s/%s.mbox", fetchedDatum.getUrl(), id);
UrlDatum datum = new UrlDatum(url);
functionCall.getOutputCollector().add(datum.getTuple());
}
} catch (Exception e) {
LOGGER.error("Exception parsing mod_mbox page", e);