BaseFetcher fetcher = new SimpleHttpFetcher(MAX_THREADS, userAgent);
FetchPipe fetchPagePipe = new FetchPipe(importPipe, scorer, fetcher, NUM_REDUCERS);
// Here's the pipe that will output UrlDatum tuples, by extracting URLs from the mod_mbox-generated page.
Pipe mboxPagePipe = new Each(fetchPagePipe.getContentTailPipe(), new ParseModMboxPageFunction(), Fields.RESULTS);
// Create a named pipe for the status of the mod_mbox-generated pages.
Pipe mboxPageStatusPipe = new Pipe(MBOX_PAGE_STATUS_PIPE_NAME, fetchPagePipe.getStatusTailPipe());
// Set up appropriate FetcherPolicy, where we increase the max content size (since mailbox files