Package com.salas.bb.utils.parser.impl

Source Code of com.salas.bb.utils.parser.impl.Atom10ParserV2

// BlogBridge -- RSS feed reader, manager, and web based service
// Copyright (C) 2002-2006 by R. Pito Salas
//
// This program is free software; you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free Software Foundation;
// either version 2 of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
// without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
// See the GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along with this program;
// if not, write to the Free Software Foundation, Inc., 59 Temple Place,
// Suite 330, Boston, MA 02111-1307 USA
//
// Contact: R. Pito Salas
// mailto:pitosalas@users.sourceforge.net
// More information: about BlogBridge
// http://www.blogbridge.com
// http://sourceforge.net/projects/blogbridge
//
// $Id: Atom10ParserV2.java,v 1.1 2007/10/01 17:03:27 spyromus Exp $
//

package com.salas.bb.utils.parser.impl;

import com.sun.syndication.feed.WireFeed;
import com.sun.syndication.feed.atom.*;
import com.sun.syndication.feed.atom.Content;
import com.sun.syndication.io.FeedException;
import com.sun.syndication.io.impl.Atom10Parser;
import com.sun.syndication.io.impl.DateParser;
import org.jdom.*;
import org.jdom.output.XMLOutputter;

import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;

/**
* Custom Atom 1.0 parser fixing URL connection bugs.
*/
public class Atom10ParserV2 extends Atom10Parser
{
    private static final String ATOM_10_URI = "http://www.w3.org/2005/Atom";
    Namespace ns = Namespace.getNamespace(ATOM_10_URI);

    protected WireFeed parseFeed(Element eFeed) throws FeedException
    {
        com.sun.syndication.feed.atom.Feed feed =
            new com.sun.syndication.feed.atom.Feed(getType());

        String baseURI;
        try
        {
            baseURI = findBaseURI(eFeed);
        } catch (Exception e)
        {
            throw new FeedException("ERROR while finding base URI of feed", e);
        }

        String xmlBase = eFeed.getAttributeValue("base", Namespace.XML_NAMESPACE);
        if (xmlBase != null)
        {
            feed.setXmlBase(xmlBase);
        }

        Element e = eFeed.getChild("title", getAtomNamespace());
        if (e != null)
        {
            Content c = new Content();
            c.setValue(parseTextConstructToString(e));
            c.setType(e.getAttributeValue("type")); //, Namespace.XML_NAMESPACE));
            feed.setTitleEx(c);
        }

        List<Element> eList = getChildren(eFeed, "link");
        feed.setAlternateLinks(parseAlternateLinks(feed, null, baseURI, eList));
        feed.setOtherLinks(parseOtherLinks(feed, null, baseURI, eList));

        List cList = eFeed.getChildren("category", getAtomNamespace());
        feed.setCategories(parseCategories(baseURI, cList));

        eList = getChildren(eFeed, "author");
        if (eList.size() > 0)
        {
            feed.setAuthors(parsePersons(baseURI, eList));
        }

        eList = getChildren(eFeed, "contributor");
        if (eList.size() > 0)
        {
            feed.setContributors(parsePersons(baseURI, eList));
        }

        e = eFeed.getChild("subtitle", getAtomNamespace());
        if (e != null)
        {
            Content subtitle = new Content();
            subtitle.setValue(parseTextConstructToString(e));
            subtitle.setType(e.getAttributeValue("type")); //, Namespace.XML_NAMESPACE));
            feed.setSubtitle(subtitle);
        }

        e = eFeed.getChild("id", getAtomNamespace());
        if (e != null)
        {
            feed.setId(e.getText());
        }

        e = eFeed.getChild("generator", getAtomNamespace());
        if (e != null)
        {
            Generator gen = new Generator();
            gen.setValue(e.getText());
            String att = e.getAttributeValue("uri");//getAtomNamespace()); DONT KNOW WHY DOESN'T WORK
            if (att != null)
            {
                gen.setUrl(att);
            }
            att = e.getAttributeValue("version");//getAtomNamespace()); DONT KNOW WHY DOESN'T WORK
            if (att != null)
            {
                gen.setVersion(att);
            }
            feed.setGenerator(gen);
        }

        e = eFeed.getChild("rights", getAtomNamespace());
        if (e != null)
        {
            feed.setRights(parseTextConstructToString(e));
        }

        e = eFeed.getChild("icon", getAtomNamespace());
        if (e != null)
        {
            feed.setIcon(e.getText());
        }

        e = eFeed.getChild("logo", getAtomNamespace());
        if (e != null)
        {
            feed.setLogo(e.getText());
        }

        e = eFeed.getChild("updated", getAtomNamespace());
        if (e != null)
        {
            feed.setUpdated(DateParser.parseDate(e.getText()));
        }

        feed.setModules(parseFeedModules(eFeed));

        eList = getChildren(eFeed, "entry");
        if (eList.size() > 0)
        {
            feed.setEntries(parseEntries(feed, baseURI, eList));
        }

        List foreignMarkup =
            extractForeignMarkup(eFeed, feed, getAtomNamespace());
        if (foreignMarkup.size() > 0)
        {
            feed.setForeignMarkup(foreignMarkup);
        }
        return feed;
    }

    private List<Element> getChildren(Element eFeed, String name)
    {
        return (List<Element>)eFeed.getChildren(name, getAtomNamespace());
    }

    private Link parseLink(Feed feed, Entry entry, String baseURI, Element eLink)
    {
        Link link = new Link();
        String att = eLink.getAttributeValue("rel");//getAtomNamespace()); DONT KNOW WHY DOESN'T WORK
        if (att != null)
        {
            link.setRel(att);
        }
        att = eLink.getAttributeValue("type");//getAtomNamespace()); DONT KNOW WHY DOESN'T WORK
        if (att != null)
        {
            link.setType(att);
        }
        att = eLink.getAttributeValue("href");//getAtomNamespace()); DONT KNOW WHY DOESN'T WORK
        if (att != null)
        {
            if (isRelativeURI(att))
            {
                link.setHref(resolveURI(baseURI, eLink, att));
            } else
            {
                link.setHref(att);
            }
        }
        att = eLink.getAttributeValue("title");
        if (att != null)
        {
            link.setTitle(att);
        }
        att = eLink.getAttributeValue("hreflang");//getAtomNamespace()); DONT KNOW WHY DOESN'T WORK
        if (att != null)
        {
            link.setHreflang(att);
        }
        att = eLink.getAttributeValue("length");//getAtomNamespace()); DONT KNOW WHY DOESN'T WORK
        if (att != null)
        {
            link.setLength(Long.parseLong(att));
        }
        return link;
    }

    // List(Elements) -> List(Link)
    private List<Link> parseAlternateLinks(Feed feed, Entry entry, String baseURI, List<Element> eLinks)
    {
        List<Link> links = new ArrayList<Link>();
        for (Element eLink : eLinks)
        {
            Link link = parseLink(feed, entry, baseURI, eLink);
            if (link.getRel() == null
                || "".equals(link.getRel().trim())
                || "alternate".equals(link.getRel()))
            {
                links.add(link);
            }
        }
        return (links.size() > 0) ? links : null;
    }

    private List<Link> parseOtherLinks(Feed feed, Entry entry, String baseURI, List<Element> eLinks)
    {
        List<Link> links = new ArrayList<Link>();
        for (Element eLink : eLinks)
        {
            Link link = parseLink(feed, entry, baseURI, eLink);
            if (!"alternate".equals(link.getRel()))
            {
                links.add(link);
            }
        }
        return (links.size() > 0) ? links : null;
    }

    private Person parsePerson(String baseURI, Element ePerson)
    {
        Person person = new Person();
        Element e = ePerson.getChild("name", getAtomNamespace());
        if (e != null)
        {
            person.setName(e.getText());
        }
        e = ePerson.getChild("uri", getAtomNamespace());
        if (e != null)
        {
            person.setUri(resolveURI(baseURI, ePerson, e.getText()));
        }
        e = ePerson.getChild("email", getAtomNamespace());
        if (e != null)
        {
            person.setEmail(e.getText());
        }
        return person;
    }

    // List(Elements) -> List(Persons)
    private List<Person> parsePersons(String baseURI, List<Element> ePersons)
    {
        List<Person> persons = new ArrayList<Person>();
        for (Element ePerson : ePersons) persons.add(parsePerson(baseURI, ePerson));
        return (persons.size() > 0) ? persons : null;
    }

    private Content parseContent(Element e)
    {
        String value = parseTextConstructToString(e);
        String src = e.getAttributeValue("src");//getAtomNamespace()); DONT KNOW WHY DOESN'T WORK
        String type = e.getAttributeValue("type");//getAtomNamespace()); DONT KNOW WHY DOESN'T WORK
        Content content = new Content();
        content.setSrc(src);
        content.setType(type);
        content.setValue(value);
        return content;
    }

    private String parseTextConstructToString(Element e)
    {
        String value;
        String type = e.getAttributeValue("type");//getAtomNamespace()); DONT KNOW WHY DOESN'T WORK
        type = (type != null) ? type : Content.TEXT;
        if (type.equals(Content.XHTML))
        {
            // XHTML content needs special handling
            XMLOutputter outputter = new XMLOutputter();
            List<org.jdom.Content> eContent = (List<org.jdom.Content>)e.getContent();
            for (org.jdom.Content c : eContent)
            {
                if (c instanceof Element)
                {
                    Element eC = (Element)c;
                    if (eC.getNamespace().equals(getAtomNamespace()))
                    {
                        ((Element)c).setNamespace(Namespace.NO_NAMESPACE);
                    }
                }
            }
            value = outputter.outputString(eContent);
        } else
        {
            // Everything else comes in verbatim
            value = e.getText();
        }
        return value;
    }

    // List(Elements) -> List(Entries)
    protected List parseEntries(Feed feed, String baseURI, List eEntries)
    {
        List<Entry> entries = new ArrayList<Entry>();
        for (Element eEntry : (List<Element>)eEntries)
        {
            entries.add(parseEntry(feed, eEntry, baseURI));
        }
        return (entries.size() > 0) ? entries : null;
    }

    protected Entry parseEntry(Feed feed, Element eEntry, String baseURI)
    {
        Entry entry = new Entry();

        String xmlBase = eEntry.getAttributeValue("base", Namespace.XML_NAMESPACE);
        if (xmlBase != null)
        {
            entry.setXmlBase(xmlBase);
        }

        Element e = eEntry.getChild("title", getAtomNamespace());
        if (e != null)
        {
            Content c = new Content();
            c.setValue(parseTextConstructToString(e));
            c.setType(e.getAttributeValue("type")); //, Namespace.XML_NAMESPACE));
            entry.setTitleEx(c);
        }

        List<Element> eList = getChildren(eEntry, "link");
        entry.setAlternateLinks(parseAlternateLinks(feed, entry, baseURI, eList));
        entry.setOtherLinks(parseOtherLinks(feed, entry, baseURI, eList));

        eList = getChildren(eEntry, "author");
        if (eList.size() > 0)
        {
            entry.setAuthors(parsePersons(baseURI, eList));
        }

        eList = getChildren(eEntry, "contributor");
        if (eList.size() > 0)
        {
            entry.setContributors(parsePersons(baseURI, eList));
        }

        e = eEntry.getChild("id", getAtomNamespace());
        if (e != null)
        {
            entry.setId(e.getText());
        }

        e = eEntry.getChild("updated", getAtomNamespace());
        if (e != null)
        {
            entry.setUpdated(DateParser.parseW3CDateTime(e.getText()));
        }

        e = eEntry.getChild("published", getAtomNamespace());
        if (e != null)
        {
            entry.setPublished(DateParser.parseW3CDateTime(e.getText()));
        }

        e = eEntry.getChild("summary", getAtomNamespace());
        if (e != null)
        {
            entry.setSummary(parseContent(e));
        }

        e = eEntry.getChild("content", getAtomNamespace());
        if (e != null)
        {
            List<Content> contents = new ArrayList<Content>();
            contents.add(parseContent(e));
            entry.setContents(contents);
        }

        e = eEntry.getChild("rights", getAtomNamespace());
        if (e != null)
        {
            entry.setRights(e.getText());
        }

        List<Element> cList = getChildren(eEntry, "category");
        entry.setCategories(parseCategories(baseURI, cList));

        // TODO: SHOULD handle Atom entry source element

        entry.setModules(parseItemModules(eEntry));

        List foreignMarkup =
            extractForeignMarkup(eEntry, entry, getAtomNamespace());
        if (foreignMarkup.size() > 0)
        {
            entry.setForeignMarkup(foreignMarkup);
        }
        return entry;
    }

    private List<Category> parseCategories(String baseURI, List<Element> eCategories)
    {
        List<Category> cats = new ArrayList<Category>();
        for (Element eCategory : eCategories) cats.add(parseCategory(baseURI, eCategory));
        return (cats.size() > 0) ? cats : null;
    }

    private Category parseCategory(String baseURI, Element eCategory)
    {
        Category category = new Category();
        String att = eCategory.getAttributeValue("term");//getAtomNamespace()); DONT KNOW WHY DOESN'T WORK
        if (att != null)
        {
            category.setTerm(att);
        }
        att = eCategory.getAttributeValue("scheme");//getAtomNamespace()); DONT KNOW WHY DOESN'T WORK
        if (att != null)
        {
            category.setScheme(resolveURI(baseURI, eCategory, att));
        }
        att = eCategory.getAttributeValue("label");//getAtomNamespace()); DONT KNOW WHY DOESN'T WORK
        if (att != null)
        {
            category.setLabel(att);
        }
        return category;

    }


    // Fix for issue #34 "valid IRI href attributes are stripped for atom:link"
    // URI's that didn't start with http were being treated as relative URIs.
    // So now consider an absolute URI to be any alpha-numeric string followed
    // by a colon, followed by anything -- specified by this regex:
    static Pattern absoluteURIPattern = Pattern.compile("^[a-z0-9]*:.*$");

    private boolean isAbsoluteURI(String uri)
    {
        return absoluteURIPattern.matcher(uri).find();
    }

    private boolean isRelativeURI(String uri)
    {
        return !isAbsoluteURI(uri);
    }

    /**
     * Resolve URI based considering xml:base and baseURI.
     *
     * @param baseURI Base URI of feed
     * @param parent  Parent from which to consider xml:base
     * @param url     URL to be resolved
     *
     * @return URI.
     */
    private String resolveURI(String baseURI, Parent parent, String url)
    {
        if (isRelativeURI(url))
        {
            url = (!".".equals(url) && !"./".equals(url)) ? url : "";

            // Relative URI with parent
            if (parent != null && parent instanceof Element)
            {

                // Do we have an xml:base?
                String xmlbase = ((Element)parent).getAttributeValue(
                    "base", Namespace.XML_NAMESPACE);
                if (xmlbase != null && xmlbase.trim().length() > 0)
                {
                    if (isAbsoluteURI(xmlbase))
                    {
                        // Absolute xml:base, so form URI right now
                        if (url.startsWith("/"))
                        {
                            // Host relative URI
                            int slashslash = xmlbase.indexOf("//");
                            int nextslash = xmlbase.indexOf("/", slashslash + 2);
                            if (nextslash != -1) xmlbase = xmlbase.substring(0, nextslash);
                            return formURI(xmlbase, url);
                        }
                        if (!xmlbase.endsWith("/"))
                        {
                            // Base URI is filename, strip it off
                            xmlbase = xmlbase.substring(0, xmlbase.lastIndexOf("/") + 1);
                        }
                        return formURI(xmlbase, url);
                    } else
                    {
                        // Relative xml:base, so walk up tree
                        return resolveURI(baseURI, parent.getParent(),
                            stripTrailingSlash(xmlbase) + "/" + stripStartingSlash(url));
                    }
                }
                // No xml:base so walk up tree
                return resolveURI(baseURI, parent.getParent(), url);

                // Relative URI with no parent (i.e. top of tree), so form URI right now
            } else if (parent == null || parent instanceof Document)
            {
                return formURI(baseURI, url);
            }
        }
        return url;
    }

    /**
     * Find base URI of feed considering relative URIs.
     *
     * @param root Root element of feed.
     *
     * @return base URI.
     *
     * @throws java.net.MalformedURLException if error.
     */
    private String findBaseURI(Element root) throws MalformedURLException
    {
        String ret = findAtomLink(root, "alternate");
        if (ret != null && isRelativeURI(ret))
        {
            String self = findAtomLink(root, "self");
            if (self != null)
            {
                self = resolveURI(null, root, self);
                ret = resolveURI(self, root, ret);
            }
        }
        return ret;
    }

    /**
     * Return URL string of Atom link element under parent element.
     * Link with no rel attribute is considered to be rel="alternate"
     *
     * @param parent Consider only children of this parent element
     * @param rel    Consider only links with this relationship
     *
     * @return atom link.
     */
    private String findAtomLink(Element parent, String rel)
    {
        String ret = null;
        List<Element> linksList = (List<Element>)parent.getChildren("link", ns);
        if (linksList != null)
        {
            for (Element link : linksList)
            {
                Attribute relAtt = link.getAttribute("rel");
                Attribute hrefAtt = link.getAttribute("href");
                if ((relAtt == null && "alternate".equals(rel))
                    || (relAtt != null && relAtt.getValue().equals(rel)))
                {
                    ret = hrefAtt.getValue();
                    break;
                }
            }
        }
        return ret;
    }

    /**
     * Strip starting slash from beginning of string.
     */
    private static String stripStartingSlash(String s)
    {
        if (s != null && s.startsWith("/"))
        {
            s = s.substring(1, s.length());
        }
        return s;
    }

    /**
     * Strip trailing slash from end of string.
     */
    private static String stripTrailingSlash(String s)
    {
        if (s != null && s.endsWith("/"))
        {
            s = s.substring(0, s.length() - 1);
        }
        return s;
    }

    /**
     * Resolves two URI into one.
     *
     * @param base  base URI (optional).
     * @param child child URI (optional).
     *
     * @return resulting URI or <code>NULL</code> if errors out.
     */
    static String formURI(String base, String child)
    {
        if (base == null) return child;
        if (child == null) return base;

        try
        {
            return new URI(base).resolve(child).toString();
        } catch (URISyntaxException e)
        {
            return null;
        }
    }
}
TOP

Related Classes of com.salas.bb.utils.parser.impl.Atom10ParserV2

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.