protected void testFetchPipe(BixoPlatform platform) throws Exception {
// System.setProperty("bixo.root.level", "TRACE");
final int numPages = 10;
final int port = 8089;
Tap in = makeInputData(platform, "testFetchPipe", "localhost:" + port, numPages, new Payload());
Pipe pipe = new Pipe("urlSource");
BaseScoreGenerator scorer = new FixedScoreGenerator();
BaseFetcher fetcher = new SimpleHttpFetcher(ConfigUtils.BIXO_TEST_AGENT);
FetchPipe fetchPipe = new FetchPipe(pipe, scorer, fetcher, 1);
String output = "build/test/FetchPipeTest/testFetchPipe";
BasePath outputPath = platform.makePath(output);
BasePath statusPath = platform.makePath(outputPath, "status");
BasePath contentPath = platform.makePath(outputPath, "content");
Tap status = platform.makeTap(platform.makeBinaryScheme(StatusDatum.FIELDS), statusPath, SinkMode.REPLACE);
Tap content = platform.makeTap(platform.makeBinaryScheme(FetchedDatum.FIELDS), contentPath, SinkMode.REPLACE);
// Finally we can run it.
FlowConnector flowConnector = platform.makeFlowConnector();
Flow flow = flowConnector.connect(in, FetchPipe.makeSinkMap(status, content), fetchPipe);
TestWebServer webServer = null;
try {
webServer = new TestWebServer(new NoRobotsResponseHandler(), port);
flow.complete();
} finally {
webServer.stop();
}
// Verify numPages fetched and numPages status entries were saved.
Tap validate = platform.makeTap(platform.makeBinaryScheme(FetchedDatum.FIELDS), contentPath);
TupleEntryIterator tupleEntryIterator = validate.openForRead(platform.makeFlowProcess());
int totalEntries = 0;
boolean[] fetchedPages = new boolean[numPages];
while (tupleEntryIterator.hasNext()) {
TupleEntry entry = tupleEntryIterator.next();
totalEntries += 1;
// Verify we can convert properly
FetchedDatum datum = new FetchedDatum(entry);
String url = datum.getUrl();
Assert.assertNotNull(url);
// Verify that we got one of each page
int idOffset = url.indexOf(".html") - 1;
int pageId = Integer.parseInt(url.substring(idOffset, idOffset + 1));
Assert.assertFalse(fetchedPages[pageId]);
fetchedPages[pageId] = true;
}
Assert.assertEquals(numPages, totalEntries);
tupleEntryIterator.close();
validate = platform.makeTap(platform.makeBinaryScheme(StatusDatum.FIELDS), statusPath);
tupleEntryIterator = validate.openForRead(platform.makeFlowProcess());
totalEntries = 0;
fetchedPages = new boolean[numPages];
while (tupleEntryIterator.hasNext()) {
TupleEntry entry = tupleEntryIterator.next();
totalEntries += 1;