http://www.example.org/test
http://www.example.org/test/index.(html|json|xml)
http://www.example.org/test/index_de.(html|json|xml)
http://www.example.org/test/127.(html|json|xml)
341342343344345346347348349350351352353354
} catch (Exception e) { logger.error("Could not add seed: " + e.getMessage()); } } WebURL webUrl = new WebURL(); webUrl.setURL(canonicalUrl); webUrl.setDocid(docId); webUrl.setDepth((short) 0); if (robotstxtServer.allowedIn(webUrl) == null) { logger.info("Robots.txt does not allow this seed: " + pageUrl); } else { frontier.schedule(webUrl); }
272829303132333435
@Test public void absolutePath() throws Exception { WebURL parent = new WebURL("http://foo.com"); WebURL resolved = parent.resolve("http://bar.com"); assertEquals(resolved.toASCIIString(),"http://bar.com"); }
363738394041424344
@Test public void relativePath() throws Exception { WebURL parent = new WebURL("http://foo.com/abc/def.html"); WebURL resolved = parent.resolve("/def/ghi.html"); assertEquals(resolved.toASCIIString(),"http://foo.com/def/ghi.html"); }
454647484950515253
@Test public void noSlashDocument() throws Exception { WebURL parent = new WebURL("http://foo.com"); WebURL resolved = parent.resolve("hello.pdf"); assertEquals(resolved.toASCIIString(),"http://foo.com/hello.pdf"); }
545556575859606162
@Test public void relativeQuery() throws Exception { WebURL parent = new WebURL("http://foo.com/abc/def/ghi.asmx?q=foo"); WebURL resolved = parent.resolve("?q=bar"); assertEquals(resolved.toASCIIString(),"http://foo.com/abc/def/ghi.asmx?q=bar"); }
636465666768697071
@Test public void queryEscaping() throws Exception { WebURL parent = new WebURL("http://foo.com/abc/def/ghi.asmx?q=foo%3Dbar"); WebURL resolved = parent.resolve("?q=bar%3Dfoo"); assertEquals(resolved.toASCIIString(),"http://foo.com/abc/def/ghi.asmx?q=bar%3Dfoo"); }