String html = "<html><head><title>The Big Brown Shoe</title></head><body><p>The best pizza place " +
"in the US is <a href=\"http://antoniospizzas.com/\">Antonio's Pizza</a>.</p>" +
"<p>It is located in Amherst, MA.</p></body></html>";
//<start id="tika-html"/>
InputStream input = new ByteArrayInputStream(html.getBytes(Charset.forName("UTF-8")));
ContentHandler text = new BodyContentHandler();//<co id="html.text.co"/>
LinkContentHandler links = new LinkContentHandler();//<co id="html.link.co"/>
ContentHandler handler = new TeeContentHandler(links, text);//<co id="html.merge"/>
Metadata metadata = new Metadata();//<co id="html.store"/>
Parser parser = new HtmlParser();//<co id="html.parser"/>
ParseContext context = new ParseContext();
parser.parse(input, handler, metadata, context);//<co id="html.parse"/>
System.out.println("Title: " + metadata.get(Metadata.TITLE));
System.out.println("Body: " + text.toString());
System.out.println("Links: " + links.getLinks());
/*
<calloutlist>
<callout arearefs="html.text.co"><para>Construct a ContentHandler that will just extract between the body tags.</para></callout>
<callout arearefs="html.link.co"><para>Construct ContentHandler that knows about HTML links</para></callout>