package net.sf.jpluck.spider;
import net.sf.jpluck.handlers.ContentHandler;
import net.sf.jpluck.handlers.HandlingException;
import java.util.logging.Logger;
abstract class Parser implements Runnable {
protected final Logger logger = Logger.getLogger("parser");
protected Spider spider;
protected Resource resource;
protected Parser(Spider spider, Resource resource) {
this.spider = spider;
this.resource=resource;
}
public void run() {
try {
if (!spider.isRunning()) {
return;
}
spider.fireParsingStarted(resource.getURI());
ContentHandler contentHandler = getContentHandler();
if (contentHandler == null) {
logger.warning(resource.getMimeType() + " content not handled.");
return;
}
contentHandler.handle();
String[] pageLinks = contentHandler.getPageLinks();
for (int i = 0; i < pageLinks.length; i++) {
String link = pageLinks[i];
spider.retrieve(link, LinkFilter.EXTERNAL_LINK, resource.getLevel() + 1, resource.getURI());
}
String[] imageLinks = contentHandler.getImageLinks();
for (int i = 0; i < imageLinks.length; i++) {
String image = imageLinks[i];
spider.retrieve(image, LinkFilter.EMBEDDED_IMAGE_LINK, resource.getLevel(), resource.getURI());
}
} catch (HandlingException e) {
String s ="Error handling content " + resource.getURI() + ". " ;
if (e.getCause() != null ) {
s += e.getCause() + ": " + e.getCause().getMessage();
}
logger.warning(s);
e.printStackTrace();
throw new RuntimeException(e);
} finally {
spider.fireParsingCompleted(resource.getURI());
spider.notifyTaskDone();
}
}
protected abstract ContentHandler getContentHandler();
}