assertTrue(httpRequestString(curi).startsWith("GET /url%20with%20spaces?query%20with%20spaces HTTP/1.0\r\n"));
runDefaultChecks(curi, "requestLine");
}
public void testCharsets() throws Exception {
CrawlURI curi = makeCrawlURI("http://localhost:7777/cp1251");
fetcher().process(curi);
assertEquals("text/plain;charset=cp1251", curi.getHttpResponseHeader("content-type"));
assertEquals(Charset.forName("cp1251"), curi.getRecorder().getCharset());
assertTrue(Arrays.equals(FetchHTTPTest.CP1251_PAYLOAD, IOUtils.toByteArray(curi.getRecorder().getContentReplayInputStream())));
assertEquals("\u041A\u043E\u0447\u0430\u043D\u0438 \u041E\u0440\u043A"
+ "\u0435\u0441\u0442\u0430\u0440 \u0435 \u0435\u0434\u0435"
+ "\u043D \u043E\u0434 \u043D\u0430\u0458\u043F\u043E\u0437"
+ "\u043D\u0430\u0442\u0438\u0442\u0435 \u0438 \u043D\u0430"
+ "\u0458\u043F\u043E\u043F\u0443\u043B\u0430\u0440\u043D"
+ "\u0438\u0442\u0435 \u0431\u043B\u0435\u0445-\u043E\u0440"
+ "\u043A\u0435\u0441\u0442\u0440\u0438 \u0432\u043E \u0441"
+ "\u0432\u0435\u0442\u043E\u0442, \u043A\u043E\u0458 \u0433"
+ "\u043E \u0441\u043E\u0447\u0438\u043D\u0443\u0432\u0430"
+ "\u0430\u0442 \u0434\u0435\u0441\u0435\u0442\u043C\u0438"
+ "\u043D\u0430 \u0420\u043E\u043C\u0438-\u041C\u0430\u043A"
+ "\u0435\u0434\u043E\u043D\u0446\u0438 \u043F\u043E \u043F"
+ "\u043E\u0442\u0435\u043A\u043B\u043E \u043E\u0434 \u041A"
+ "\u043E\u0447\u0430\u043D\u0438, \u043F\u0440\u0435\u0434"
+ "\u0432\u043E\u0434\u0435\u043D\u0438 \u043E\u0434 \u0442"
+ "\u0440\u0443\u0431\u0430\u0447\u043E\u0442 \u041D\u0430"
+ "\u0430\u0442 (\u041D\u0435\u0430\u0442) \u0412\u0435\u043B"
+ "\u0438\u043E\u0432.\n",
curi.getRecorder().getContentReplayCharSequence().toString());
curi = makeCrawlURI("http://localhost:7777/unsupported-charset");
fetcher().process(curi);
assertEquals("text/plain;charset=UNSUPPORTED-CHARSET", curi.getHttpResponseHeader("content-type"));
assertTrue(curi.getAnnotations().contains("unsatisfiableCharsetInHeader:UNSUPPORTED-CHARSET"));
assertEquals(Charset.forName("latin1"), curi.getRecorder().getCharset()); // default fallback
runDefaultChecks(curi, "requestLine", "contentType");
curi = makeCrawlURI("http://localhost:7777/invalid-charset");
fetcher().process(curi);
assertEquals("text/plain;charset=%%INVALID-CHARSET%%", curi.getHttpResponseHeader("content-type"));
assertTrue(curi.getAnnotations().contains("unsatisfiableCharsetInHeader:%%INVALID-CHARSET%%"));
assertEquals(Charset.forName("latin1"), curi.getRecorder().getCharset()); // default fallback
runDefaultChecks(curi, "requestLine", "contentType");
}