@Override
public Variable executePlugin(Scraper scraper, ScraperContext context) {
IElementDef[] defs = elementDef.getOperationDefs();
ListVariable result = new ListVariable();
String transformError = "Error Transforming document";
if (defs.length > 0) {
try {
Class userAgentInterfaceClass = Class.forName("org.lobobrowser.html.UserAgentContext");
Class userAgentContextClass = Class.forName("org.lobobrowser.html.test.SimpleUserAgentContext");
Object userAgentContext = userAgentContextClass.newInstance();
Class documentBuilderClass = Class.forName("org.lobobrowser.html.parser.DocumentBuilderImpl");
Constructor documentBuilderConstructor = documentBuilderClass.getConstructor(new Class[]{userAgentInterfaceClass});
Object documentBuilder = documentBuilderConstructor.newInstance(new Object[]{userAgentContext});
Class inputSourceClass = Class.forName("org.lobobrowser.html.parser.InputSourceImpl");
Constructor inputSourceConstructor = inputSourceClass.getConstructor(
new Class[]{InputStream.class, String.class, String.class});
Method documentBuilderParse = documentBuilderClass.getMethod("parse", InputSource.class);
for (int i = 0; i < defs.length; i++) {
HttpProcessor processor = (HttpProcessor) ProcessorResolver.createProcessor(
defs[i], scraper.getConfiguration(), scraper);
String documentURI = ((HttpDef) processor.getElementDef()).getUrl();
HttpInfo httpInfo = (HttpInfo) context.get("http");
Variable content = processor.run(scraper, context);
try {
// A document URI and a charset should be provided.
Object inputSource = inputSourceConstructor.newInstance(
new Object[]{new ByteArrayInputStream(content.toBinary()), documentURI, httpInfo.charset});
Document document = (Document) documentBuilderParse.invoke(documentBuilder, inputSource);
Source source = new DOMSource(document);
ByteArrayOutputStream out = new ByteArrayOutputStream();
Result domResult = new StreamResult(out);
TransformerFactory factory = TransformerFactory.newInstance();
Transformer transformer = factory.newTransformer();
transformer.transform(source, domResult);
result.addVariable(new NodeVariable(out.toByteArray()));
} catch (TransformerException e) {
log.error(transformError, e);
throw new PluginException(e);
} catch (IllegalAccessException e) {
throw new PluginException(e);
} catch (InstantiationException e) {
throw new PluginException(e);
} catch (Exception e) {
if (e instanceof SAXException) {
String msg = "Error parsing content retrieved from the url" + documentURI;
log.error(msg, e);
throw new PluginException(msg, e);
} else if(e instanceof IOException) {
String msg = "Error retrieving content from the url" + documentURI;
log.error(msg, e);
throw new PluginException(msg, e);
} else if(e instanceof ClassNotFoundException) {
log.error(EXCEPTION, e);
throw new PluginException(EXCEPTION, e);
} else if(e instanceof InvocationTargetException) {
log.error(EXCEPTION, e);
throw new PluginException(EXCEPTION, e);
} else if(e instanceof IllegalAccessException) {
log.error(EXCEPTION, e);
throw new PluginException(EXCEPTION, e);
} else {
String msg = "Error occurred with the content of " + documentURI;
log.error(msg, e);
throw new PluginException(msg, e);
}
}
}
} catch (ClassNotFoundException e) {
log.error(EXCEPTION, e);
throw new PluginException(EXCEPTION, e);
} catch (InstantiationException e) {
log.error(EXCEPTION, e);
throw new PluginException(EXCEPTION, e);
} catch (IllegalAccessException e) {
log.error(EXCEPTION, e);
throw new PluginException(EXCEPTION, e);
} catch (NoSuchMethodException e) {
log.error(EXCEPTION, e);
throw new PluginException(EXCEPTION, e);
} catch (InvocationTargetException e) {
log.error(EXCEPTION, e);
throw new PluginException(EXCEPTION, e);
}
} else {
result.addVariable(new NodeVariable(elementDef.getBodyText()));
}
return result;
}