//Metadata metadata = new Metadata();
EncodingDetector detector;
// Content content;
String encoding;
WebPage page = new WebPage();
page.setBaseUrl(new Utf8("http://www.example.com/"));
page.setContentType(new Utf8("text/plain"));
page.setContent(ByteBuffer.wrap(contentInOctets));
detector = new EncodingDetector(conf);
detector.autoDetectClues(page, true);
encoding = detector.guessEncoding(page, "windows-1252");
// no information is available, so it should return default encoding
assertEquals("windows-1252", encoding.toLowerCase());
page = new WebPage();
page.setBaseUrl(new Utf8("http://www.example.com/"));
page.setContentType(new Utf8("text/plain"));
page.setContent(ByteBuffer.wrap(contentInOctets));
page.putToHeaders(EncodingDetector.CONTENT_TYPE_UTF8, new Utf8("text/plain; charset=UTF-16"));
detector = new EncodingDetector(conf);
detector.autoDetectClues(page, true);
encoding = detector.guessEncoding(page, "windows-1252");
assertEquals("utf-16", encoding.toLowerCase());
page = new WebPage();
page.setBaseUrl(new Utf8("http://www.example.com/"));
page.setContentType(new Utf8("text/plain"));
page.setContent(ByteBuffer.wrap(contentInOctets));
detector = new EncodingDetector(conf);
detector.autoDetectClues(page, true);
detector.addClue("windows-1254", "sniffed");
encoding = detector.guessEncoding(page, "windows-1252");
assertEquals("windows-1254", encoding.toLowerCase());
// enable autodetection
conf.setInt(EncodingDetector.MIN_CONFIDENCE_KEY, 50);
page = new WebPage();
page.setBaseUrl(new Utf8("http://www.example.com/"));
page.setContentType(new Utf8("text/plain"));
page.setContent(ByteBuffer.wrap(contentInOctets));
page.putToMetadata(new Utf8(Response.CONTENT_TYPE), ByteBuffer.wrap("text/plain; charset=UTF-16".getBytes()));
detector = new EncodingDetector(conf);
detector.autoDetectClues(page, true);
detector.addClue("utf-32", "sniffed");
encoding = detector.guessEncoding(page, "windows-1252");