}
}
String encoding = metadata.get(Metadata.CONTENT_ENCODING);
if (encoding == null) {
throw new TikaException(
"Text encoding could not be detected and no encoding"
+ " hint is available in document metadata");
}
try {
Reader reader =
new BufferedReader(new InputStreamReader(stream, encoding));
// TIKA-240: Drop the BOM when extracting plain text
reader.mark(1);
int bom = reader.read();
if (bom != '\ufeff') { // zero-width no-break space
reader.reset();
}
XHTMLContentHandler xhtml =
new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
xhtml.startElement("p");
char[] buffer = new char[4096];
int n = reader.read(buffer);
while (n != -1) {
xhtml.characters(buffer, 0, n);
n = reader.read(buffer);
}
xhtml.endElement("p");
xhtml.endDocument();
} catch (UnsupportedEncodingException e) {
throw new TikaException(
"Unsupported text encoding: " + encoding, e);
}
}