headers.add(HttpHeaderNames.CONTENT_TYPE, contentType);
ContentBytes content = new ContentBytes(html.getBytes("utf-8"));
FetchedDatum fetchedDatum = new FetchedDatum(url, url, System.currentTimeMillis(), headers, content, contentType, 0);
// Call parser.parse
ParserPolicy policy = new ParserPolicy( ParserPolicy.NO_MAX_PARSE_DURATION,
BaseLinkExtractor.ALL_LINK_TAGS,
BaseLinkExtractor.ALL_LINK_ATTRIBUTE_TYPES);
SimpleParser parser = new SimpleParser(new SimpleContentExtractor(), new SimpleLinkExtractor(), policy, true);
ParsedDatum parsedDatum = parser.parse(fetchedDatum);