Package org.wso2.carbon.mashup.javascript.hostobjects.scraper.plugins

Source Code of org.wso2.carbon.mashup.javascript.hostobjects.scraper.plugins.DhtmlToHtmlPlugin

/*
* Copyright 2006,2007 WSO2, Inc. http://www.wso2.org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.wso2.carbon.mashup.javascript.hostobjects.scraper.plugins;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.webharvest.definition.HttpDef;
import org.webharvest.definition.IElementDef;
import org.webharvest.exception.PluginException;
import org.webharvest.runtime.Scraper;
import org.webharvest.runtime.ScraperContext;
import org.webharvest.runtime.processors.HttpProcessor;
import org.webharvest.runtime.processors.ProcessorResolver;
import org.webharvest.runtime.processors.WebHarvestPlugin;
import org.webharvest.runtime.variables.ListVariable;
import org.webharvest.runtime.variables.NodeVariable;
import org.webharvest.runtime.variables.Variable;
import org.w3c.dom.Document;
import org.webharvest.runtime.web.HttpInfo;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;

public class DhtmlToHtmlPlugin extends WebHarvestPlugin {

    private static final Log log = LogFactory.getLog(DhtmlToHtmlPlugin.class);

    private final String EXCEPTION = "Cannot find cobra library in the class path. Please make " +
                                     "sure its in the classpath or remove <dhtml-to-html/> from " +
                                     "the Scraper config";

    @Override
    public String getName() {
        return "dhtml-to-html";
    }

    @Override
    public Variable executePlugin(Scraper scraper, ScraperContext context) {

        IElementDef[] defs = elementDef.getOperationDefs();
        ListVariable result = new ListVariable();

        String transformError = "Error Transforming document";

        if (defs.length > 0) {

            try {
                Class userAgentInterfaceClass = Class.forName("org.lobobrowser.html.UserAgentContext");
                Class userAgentContextClass = Class.forName("org.lobobrowser.html.test.SimpleUserAgentContext");
                Object userAgentContext = userAgentContextClass.newInstance();

                Class documentBuilderClass = Class.forName("org.lobobrowser.html.parser.DocumentBuilderImpl");
                Constructor documentBuilderConstructor = documentBuilderClass.getConstructor(new Class[]{userAgentInterfaceClass});
                Object documentBuilder = documentBuilderConstructor.newInstance(new Object[]{userAgentContext});

                Class inputSourceClass = Class.forName("org.lobobrowser.html.parser.InputSourceImpl");

                Constructor inputSourceConstructor = inputSourceClass.getConstructor(
                        new Class[]{InputStream.class, String.class, String.class});
                Method documentBuilderParse = documentBuilderClass.getMethod("parse", InputSource.class);

                for (int i = 0; i < defs.length; i++) {
                    HttpProcessor processor = (HttpProcessor) ProcessorResolver.createProcessor(
                            defs[i], scraper.getConfiguration(), scraper);
                    String documentURI = ((HttpDef) processor.getElementDef()).getUrl();
                    HttpInfo httpInfo = (HttpInfo) context.get("http");
                    Variable content = processor.run(scraper, context);

                    try {
                        // A document URI and a charset should be provided.
                        Object inputSource = inputSourceConstructor.newInstance(
                                new Object[]{new ByteArrayInputStream(content.toBinary()), documentURI, httpInfo.charset});
                        Document document = (Document) documentBuilderParse.invoke(documentBuilder, inputSource);
                        Source source = new DOMSource(document);
                        ByteArrayOutputStream out = new ByteArrayOutputStream();
                        Result domResult = new StreamResult(out);
                        TransformerFactory factory = TransformerFactory.newInstance();
                        Transformer transformer = factory.newTransformer();
                        transformer.transform(source, domResult);
                        result.addVariable(new NodeVariable(out.toByteArray()));
                    } catch (TransformerException e) {
                        log.error(transformError, e);
                        throw new PluginException(e);
                    } catch (IllegalAccessException e) {
                        throw new PluginException(e);
                    } catch (InstantiationException e) {
                        throw new PluginException(e);
                    } catch (Exception e) {
                        if (e instanceof SAXException) {
                            String msg = "Error parsing content retrieved from the url" + documentURI;
                            log.error(msg, e);
                            throw new PluginException(msg, e);
                        } else if(e instanceof IOException) {
                            String msg = "Error retrieving content from the url" + documentURI;
                            log.error(msg, e);
                            throw new PluginException(msg, e);
                        } else if(e instanceof ClassNotFoundException) {
                            log.error(EXCEPTION, e);
                            throw new PluginException(EXCEPTION, e);
                        } else if(e instanceof InvocationTargetException) {
                            log.error(EXCEPTION, e);
                            throw new PluginException(EXCEPTION, e);
                        } else if(e instanceof IllegalAccessException) {
                            log.error(EXCEPTION, e);
                            throw new PluginException(EXCEPTION, e);
                        } else {
                            String msg = "Error occurred with the content of " + documentURI;
                            log.error(msg, e);
                            throw new PluginException(msg, e);
                        }
                    }
                }

            } catch (ClassNotFoundException e) {
                log.error(EXCEPTION, e);
                throw new PluginException(EXCEPTION, e);
            } catch (InstantiationException e) {
                log.error(EXCEPTION, e);
                throw new PluginException(EXCEPTION, e);
            } catch (IllegalAccessException e) {
                log.error(EXCEPTION, e);
                throw new PluginException(EXCEPTION, e);
            } catch (NoSuchMethodException e) {
                log.error(EXCEPTION, e);
                throw new PluginException(EXCEPTION, e);
            } catch (InvocationTargetException e) {
                log.error(EXCEPTION, e);
                throw new PluginException(EXCEPTION, e);
            }

        } else {
            result.addVariable(new NodeVariable(elementDef.getBodyText()));
        }
        return result;
    }

    public String[] getValidAttributes() {
        return new String[] {"scripting", "externalCSS"};
    }

    public String[] getAttributeValueSuggestions(String attributeName) {
        if ("scripting".equalsIgnoreCase(attributeName)) {
            return new String[] {"true", "false"};
        } else if ("externalCSS".equalsIgnoreCase(attributeName)) {
            return new String[] {"true", "false"};
        }
        return null;
    }

    public String[] getValidSubprocessors() {
        return null;
    }

    public Class[] getDependantProcessors() {
        return null;
    }
}
TOP

Related Classes of org.wso2.carbon.mashup.javascript.hostobjects.scraper.plugins.DhtmlToHtmlPlugin

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.