public void testGuessing() {
// first disable auto detection
conf.setInt(EncodingDetector.MIN_CONFIDENCE_KEY, -1);
Metadata metadata = new Metadata();
EncodingDetector detector;
Content content;
String encoding;
content = new Content("http://www.example.com", "http://www.example.com/",
contentInOctets, "text/plain", metadata, conf);
detector = new EncodingDetector(conf);
detector.autoDetectClues(content, true);
encoding = detector.guessEncoding(content, "windows-1252");
// no information is available, so it should return default encoding
assertEquals("windows-1252", encoding.toLowerCase());
metadata.clear();
metadata.set(Response.CONTENT_TYPE, "text/plain; charset=UTF-16");
content = new Content("http://www.example.com", "http://www.example.com/",
contentInOctets, "text/plain", metadata, conf);
detector = new EncodingDetector(conf);
detector.autoDetectClues(content, true);
encoding = detector.guessEncoding(content, "windows-1252");
assertEquals("utf-16", encoding.toLowerCase());
metadata.clear();
content = new Content("http://www.example.com", "http://www.example.com/",
contentInOctets, "text/plain", metadata, conf);
detector = new EncodingDetector(conf);
detector.autoDetectClues(content, true);
detector.addClue("windows-1254", "sniffed");
encoding = detector.guessEncoding(content, "windows-1252");
assertEquals("windows-1254", encoding.toLowerCase());
// enable autodetection
conf.setInt(EncodingDetector.MIN_CONFIDENCE_KEY, 50);
metadata.clear();
metadata.set(Response.CONTENT_TYPE, "text/plain; charset=UTF-16");
content = new Content("http://www.example.com", "http://www.example.com/",
contentInOctets, "text/plain", metadata, conf);
detector = new EncodingDetector(conf);
detector.autoDetectClues(content, true);
detector.addClue("utf-32", "sniffed");