* @see net.sf.sitstart.svc.responseparser.ResponseParser#parseHtml(java.net.URI, java.io.InputStream)
*/
public void parseHtml(URI uri, InputStream content)
{
ByteArrayOutputStream output = null; //####TODO: Add in MIME type detection to this stream
Parser parser = null;
NodeFilter filter = null;
NodeList list = null;
if (uriFilter.isURIInternal(uri) && content != null)
{
log.debug("Parsing HTML from URI " + uri.toString());
try
{
log.debug("Copying content.");
output = new ByteArrayOutputStream();
IOUtils.copy(content, output);
log.debug("Creating filter.");
//####TODO: Dependency inject this crap
filter = new AndFilter(new NodeClassFilter(LinkTag.class),
new NodeFilter()
{
public boolean accept(Node node)
{
return(!((LinkTag)node).isMailLink());
}
});
log.debug("Creating parser.");
parser = new Parser(output.toString());
log.debug("Extracting all nodes that match the filter.");
list = parser.extractAllNodesThatMatch(filter);
log.debug("About to iterate through the matching nodes, count=" + list.size());
for (int i = 0; i < list.size(); i++)
{