config.set("keep.original.url.on.redirect", "true");
TestUtils.writeFile(tmpDir+"/web/one.html", "<a href='page two.html?a <b> c#d'>two</a>");
TestUtils.writeFile(tmpDir+"/web/page two.html", "content");
WebServer server = null;
Crawler crawler = null;
Page in, one, two;
in = PageTest.randomPage();
in.setUrl(url);
try {
server = new WebServer(8087);
server.addResourceHandler("/", tmpDir+"/web");
server.start();
PageDB db = new PageDB(tmpDir+"/testdb");
db.open(PageDB.WRITE);
db.addPage(in);
db.close();
crawler = new Crawler();
crawler.crawl(2);
db.open(PageDB.READ);
Iterator<Page> pages = db.iterator();
assertTrue("The crawler lost or discarded all test pages", pages.hasNext());
one = pages.next();
assertTrue("The crawler lost or discarded the second test page", pages.hasNext());
two = pages.next();
assertFalse("The crawler has more than two pages", pages.hasNext());
db.close();
} finally {
if (null != crawler) {
crawler.cleanup();
}
server.requestStop();
while (!server.isStopped()) {
Execute.sleep(20);
}
}
assertTrue("Failed in fetching both test pages", (one.getLastSuccess() > 0) && (two.getLastSuccess() > 0));