Package org.apache.abdera.ext.html

Source Code of org.apache.abdera.ext.html.HtmlHelper

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  The ASF licenses this file to You
* under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.  For additional information regarding
* copyright in this work, please see the NOTICE file in the top level
* directory of this distribution.
*/
package org.apache.abdera.ext.html;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.apache.abdera.Abdera;
import org.apache.abdera.model.Div;
import org.apache.abdera.model.Document;
import org.apache.abdera.model.Element;
import org.apache.abdera.protocol.client.AbderaClient;
import org.apache.abdera.protocol.client.ClientResponse;
import org.apache.abdera.util.MimeTypeHelper;
import org.apache.abdera.util.XmlRestrictedCharReader;

public class HtmlHelper {

    private HtmlHelper() {
    }

    public static Div parse(String value) {
        return parse(Abdera.getInstance(), value);
    }

    public static Div parse(InputStream in) {
        return parse(Abdera.getInstance(), in);
    }

    public static Div parse(InputStream in, String charset) {
        return parse(Abdera.getInstance(), in, charset);
    }

    public static Div parse(Reader in) {
        return parse(Abdera.getInstance(), in);
    }

    public static Div parse(Abdera abdera, String value) {
        return parse(abdera, new StringReader(value));
    }

    public static Div parse(Abdera abdera, InputStream in) {
        return parse(abdera, in, "UTF-8");
    }

    public static Div parse(Abdera abdera, InputStream in, String charset) {
        try {
            return parse(abdera, new InputStreamReader(in, charset));
        } catch (RuntimeException e) {
            throw e;
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    public static Div parse(Abdera abdera, Reader in) {
        String result = null;
        Div div = abdera.getFactory().newDiv();
        try {
            div.setValue(HtmlCleaner.parse(in, true));
            return div;
        } catch (Exception e) {
            // this is a temporary hack. some html really
            // can't be parsed successfully. in that case,
            // we produce something that will likely render
            // rather ugly. but there's not much else we
            // can do
            if (result != null)
                div.setText(result);
            return div;
        }
    }

    public static Document<Element> parseDocument(Reader in) {
        return parseDocument(Abdera.getInstance(), in);
    }

    public static Document<Element> parseDocument(Abdera abdera, Reader in) {
        return abdera.getParser().parse(new StringReader(HtmlCleaner.parse(in, false)));
    }

    /**
     * This will search the element tree for elements named "link" with a rel attribute containing the value of rel and
     * a type attribute containg the value of type.
     */
    public static List<Element> discoverLinks(Element base, String type, String... rel) {
        List<Element> results = new ArrayList<Element>();
        walkElementForLinks(results, base, rel, type);
        return results;
    }

    private static void walkElementForLinks(List<Element> results, Element base, String[] rel, String type) {
        if (checkElementForLink(base, rel, type))
            results.add(base);
        for (Element child : base.getElements())
            walkElementForLinks(results, child, rel, type);
    }

    private static boolean checkElementForLink(Element base, String[] relvals, String type) {
        if (base.getQName().getLocalPart().equalsIgnoreCase("link")) {
            String relattr = base.getAttributeValue("rel");
            String typeattr = base.getAttributeValue("type");
            if (relattr != null) {
                String[] rels = relattr.split("\\s+");
                Arrays.sort(rels);
                for (String rel : relvals) {
                    if (Arrays.binarySearch(rels, rel) < 0)
                        return false;
                }
            }
            if (type != null && typeattr == null)
                return false;
            if (type == null && typeattr != null)
                return true; // assume possible match
            if (MimeTypeHelper.isMatch(type, typeattr))
                return true;
        }
        return false;
    }

    public static List<Element> discoverLinks(String uri, String type, String... rel) throws IOException {
        return discoverLinks(Abdera.getInstance(), uri, type, rel);
    }

    public static List<Element> discoverLinks(Abdera abdera, String uri, String type, String... rel) throws IOException {
        AbderaClient client = new AbderaClient(abdera);
        ClientResponse resp = client.get(uri);
        InputStream in = resp.getInputStream();
        InputStreamReader r = new InputStreamReader(in);
        XmlRestrictedCharReader x = new XmlRestrictedCharReader(r);
        Document<Element> doc = HtmlHelper.parseDocument(x);
        List<Element> list = discoverLinks(doc.getRoot(), type, rel);
        return list;
    }
}
TOP

Related Classes of org.apache.abdera.ext.html.HtmlHelper

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.