Package org.wso2.carbon.mashup.javascript.hostobjects.scraper

Source Code of org.wso2.carbon.mashup.javascript.hostobjects.scraper.ScraperHostObject

/*
* Copyright 2006,2007 WSO2, Inc. http://www.wso2.org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.wso2.carbon.mashup.javascript.hostobjects.scraper;

import org.apache.axiom.om.OMElement;
import org.apache.axiom.om.impl.builder.StAXOMBuilder;
import org.apache.log4j.Logger;
import org.mozilla.javascript.Context;
import org.mozilla.javascript.Function;
import org.mozilla.javascript.Scriptable;
import org.mozilla.javascript.ScriptableObject;
import org.webharvest.definition.DefinitionResolver;
import org.webharvest.definition.ScraperConfiguration;
import org.webharvest.runtime.Scraper;
import org.webharvest.runtime.ScraperContext;
import org.webharvest.runtime.variables.Variable;
import org.wso2.carbon.CarbonException;
import org.wso2.javascript.xmlimpl.XML;
import org.xml.sax.InputSource;

import javax.xml.stream.XMLStreamException;
import java.io.*;

public class ScraperHostObject extends ScriptableObject {

    private static Logger log = Logger.getLogger(ScraperHostObject.class);
    private OMElement config;
    private ScraperContext scraperContext;

    public static Scriptable jsConstructor(Context cx, Object[] args, Function ctorObj,
                                           boolean inNewExpr) throws CarbonException {
        ScraperHostObject result = new ScraperHostObject();

        if (args.length == 1 && !(args[0] == Context.getUndefinedValue())) {
            OMElement configElement;
            if (args[0] instanceof XML) {
                XML xml = (XML) args[0];
                configElement = (OMElement) xml.getAxiomFromXML();
            } else if (args[0] instanceof File) {
                File configFile = (File) args[0];
                FileInputStream fis;
                try {
                    fis = new FileInputStream(configFile);
                } catch (FileNotFoundException e) {
                    throw new CarbonException(e);
                }
                StAXOMBuilder staxOMBuilder;
                try {
                    staxOMBuilder = new StAXOMBuilder(fis);
                } catch (XMLStreamException e) {
                    log.error("The configuration file should contain XML");
                    throw new CarbonException("The configuration file should contain XML");
                }
                configElement = staxOMBuilder.getDocumentElement();
            } else {
                log.error(
                        "Unsupported parameter type. The config should be XML or a File that has XML");
                throw new CarbonException(
                        "Unsupported parameter type. The config should be XML or a File that has XML");
            }
            result.setConfig(configElement);
            result.scrape();
            return result;
        }
        log.error("Invalid parameters. The configuration should be XML");
        throw new CarbonException("Invalid parameters. The configuration should be XML");
    }

    /**
     * This function does not get invoked explicitly. It is called by rhino internally.
     *
     * @return The name of this hostObject
     */
    public String getClassName() {
        return "Scraper";
    }

    private void scrape() throws CarbonException {
        //todo need to add proxy info
        InputStream in = new ByteArrayInputStream(config.toString().getBytes());
        InputSource inputSource = new InputSource(in);
        ScraperConfiguration scraperConfiguration = new ScraperConfiguration(inputSource);
        Scraper scraper = new Scraper(scraperConfiguration, "");
        // Execute the scraper config
        scraper.execute();
        scraperContext = scraper.getContext();
    }

    private void setConfig(OMElement config) {
        this.config = config;
    }

    /**
     * <p/>
     * <pre>
     * eg:
     *    var config =  <config>
     *                       <var-def name='response'>
     *                           <html-to-xml>
     *                               <http method='get' url='http://ww2.wso2.org/~builder/'/>
     *                           </html-to-xml>
     *                       </var-def>
     *                   </config>;
     *    var scraper = new Scraper(config);
     *    var response = scraper.response;
     * </pre>
     *
     * @param s          - The property requested for
     * @param scriptable - The Scriptable object that the property was requested from
     * @return String - The property if it exist
     */
    public Object get(String s, Scriptable scriptable) {
        String result = null;
        Variable var = scraperContext.getVar(s);
        if (var != null) {
            result = var.toString();
        }
        return result;
    }

    /**
     * This function does not get invoked explicitly. It is called by rhino internally when something like scraper.
     * response is attempted. Rhino uses this function to check whether the property is available.
     *
     * @param s          - The property requested for
     * @param scriptable - The Scriptable object that the property was requested from
     * @return boolean - Indicating whether the proty exist or not
     */
    public boolean has(String s, Scriptable scriptable) {
        Variable var = scraperContext.getVar(s);
        return var != null;
    }
}
TOP

Related Classes of org.wso2.carbon.mashup.javascript.hostobjects.scraper.ScraperHostObject

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.