Package org.exoplatform.services.rss.parser

Source Code of org.exoplatform.services.rss.parser.RSSParser

/**
* Copyright (C) 2009 eXo Platform SAS.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/

package org.exoplatform.services.rss.parser;

import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.methods.GetMethod;
import org.exoplatform.services.common.DataBuffer;
import org.exoplatform.services.common.HttpClientImpl;
import org.exoplatform.services.html.refs.RefsDecoder;
import org.exoplatform.services.token.attribute.AttributeParser;
import org.exoplatform.services.token.attribute.Attributes;
import org.exoplatform.services.xml.parser.XMLDocument;
import org.exoplatform.services.xml.parser.XMLNode;
import org.exoplatform.services.xml.parser.XMLParser;

import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

/**
* Created by The eXo Platform SARL .
*
* @author nhuthuan Email: nhudinhthuan@yahoo.com
*/
public class RSSParser
{

   private RefsDecoder decoder;

   public RSSParser()
   {
      decoder = new RefsDecoder();
   }

   public synchronized RSSDocument<DefaultRSSChannel, DefaultRSSItem> createDocument(XMLNode root) throws Exception
   {
      DefaultRSSChannel channel = createChannel(root);
      if (channel == null)
         return null;
      return new RSSDocument<DefaultRSSChannel, DefaultRSSItem>(channel, createItems(channel.getNode()));
   }

   public synchronized <T extends IRSSChannel, E extends IRSSItem> RSSDocument<T, E> createDocument(XMLNode root,
      Class<T> channelClazz, Class<E> itemClazz) throws Exception
   {
      T channel = createChannel(root, channelClazz);
      if (channel == null)
         return null;
      return new RSSDocument<T, E>(channel, createItems(channel.getNode(), itemClazz));
   }

   private DefaultRSSChannel createChannel(XMLNode root) throws Exception
   {
      return createChannel(root, DefaultRSSChannel.class);
   }

   private <T extends IRSSChannel> T createChannel(XMLNode root, Class<T> clazz) throws Exception
   {
      if (root.isNode("channel") || root.isNode("feed"))
         return createSingleChannel(root, clazz);
      List<XMLNode> list = root.getChildren();
      for (XMLNode ele : list)
      {
         T channel = createChannel(ele, clazz);
         if (channel != null)
            return channel;
      }
      return null;
   }

   private <T extends IRSSChannel> T createSingleChannel(XMLNode root, Class<T> clazz) throws Exception
   {
      T t = null;
      List<XMLNode> children = root.getChildren();
      t = createItem(root, clazz);
      for (XMLNode ele : children)
      {
         if (ele.getTotalChildren() < 1)
            continue;
         if (ele.isNode("generator"))
            t.setGenerator(ele.getChild(0).getNodeValue());
      }
      return t;
   }

   private List<DefaultRSSItem> createItems(XMLNode root) throws Exception
   {
      return createItems(root, DefaultRSSItem.class);
   }

   private <T extends IRSSItem> List<T> createItems(XMLNode root, Class<T> clazz) throws Exception
   {
      if (root == null)
         return new ArrayList<T>();
      List<XMLNode> list = root.getChildren();
      List<T> items = new ArrayList<T>();
      for (XMLNode ele : list)
      {
         if (ele.isNode("item") || ele.isNode("entry"))
            items.add(createItem(ele, clazz));
      }
      if (items.size() < 1)
         return createItems(root.getParent(), clazz);
      return items;
   }

   private <T extends IRSSItem> T createItem(XMLNode node, Class<T> clazz) throws Exception
   {
      T item = clazz.newInstance();
      item.setNode(node);
      List<XMLNode> children = node.getChildren();
      if (children.size() < 1)
         return null;
      for (XMLNode ele : children)
      {
         if (ele.isNode("title") && ele.getTotalChildren() > 0)
         {
            item.setTitle(removeCData(ele.getChild(0).getNodeValue().trim()));
         }
         else if ((ele.isNode("description") || ele.isNode("summary") || ele.isNode("content"))
            && ele.getTotalChildren() > 0)
         {
            if (item.getDesc() == null || item.getDesc().trim().length() < 1)
            {
               item.setDesc(decoder.decode(removeCData(ele.getChild(0).getNodeValue().trim())));
            }
         }
         else if (ele.isNode("link"))
         {
            if (ele.getTotalChildren() > 0)
            {
               // item.setLink(ele.getChild(0).getNodeValue());
               item.setLink(removeCData(ele.getChild(0).getNodeValue()));
            }
            else
            {
               Attributes attributes = AttributeParser.getAttributes(ele);
               // item.setLink(attributes.getAttributeValue("href"));
               item.setLink(removeCData(attributes.getAttributeValue("href")));
            }
         }
         else if ((ele.isNode("pubDate") || ele.isNode("issued")) && ele.getTotalChildren() > 0)
            item.setTime(removeCData(ele.getChild(0).getNodeValue().trim()));
         else if (ele.isNode("image") && ele.getTotalChildren() > 0)
            item.setImage(ele.getChild(0).getNodeValue());
         // TODO: set creator of content
         else if (ele.isNode("dc:creator") && ele.getTotalChildren() > 0)
            item.setCreator((ele.getChild(0).getNodeValue()));
      }
      return item;
   }

   private String removeCData(String text)
   {
      int idx = text.indexOf("[CDATA[");
      if (idx != 1)
         return text;
      text = text.substring(idx + 7);
      idx = text.lastIndexOf("]]");
      if (idx != text.length() - 2)
         return text;
      return text.substring(0, text.length() - 2);
   }

   public synchronized <T extends IRSSChannel, E extends IRSSItem> RSSDocument<T, E> createDocument(String text,
      Class<T> channelClazz, Class<E> itemClazz) throws Exception
   {
      XMLDocument document = XMLParser.createDocument(text);
      if (document == null)
         return null;
      return createDocument(document.getRoot(), channelClazz, itemClazz);
   }

   public synchronized RSSDocument<DefaultRSSChannel, DefaultRSSItem> createDocument(String text) throws Exception
   {
      XMLDocument document = XMLParser.createDocument(text);
      if (document == null)
         return null;
      return createDocument(document.getRoot(), DefaultRSSChannel.class, DefaultRSSItem.class);
   }

   public synchronized RSSDocument<DefaultRSSChannel, DefaultRSSItem> createDocument(byte[] data, String charset)
      throws Exception
   {
      return createDocument(new String(data, charset), DefaultRSSChannel.class, DefaultRSSItem.class);
   }

   public synchronized <T extends IRSSChannel, E extends IRSSItem> RSSDocument<T, E> createDocument(byte[] data,
      String charset, Class<T> channelClazz, Class<E> itemClazz) throws Exception
   {
      return createDocument(new String(data, charset), channelClazz, itemClazz);
   }

   public synchronized <T extends IRSSChannel, E extends IRSSItem> RSSDocument<T, E> createDocument(InputStream input,
      String charset, Class<T> channelClazz, Class<E> itemClazz) throws Exception
   {
      XMLDocument document = XMLParser.createDocument(input, charset);
      if (document == null)
         return null;
      return createDocument(document.getRoot(), channelClazz, itemClazz);
   }

   public synchronized RSSDocument<DefaultRSSChannel, DefaultRSSItem> createDocument(InputStream input, String charset)
      throws Exception
   {
      XMLDocument document = XMLParser.createDocument(input, charset);
      if (document == null)
         return null;
      return createDocument(document.getRoot(), DefaultRSSChannel.class, DefaultRSSItem.class);
   }

   public synchronized <T extends IRSSChannel, E extends IRSSItem> RSSDocument<T, E> createDocument(File file,
      String charset, Class<T> channelClazz, Class<E> itemClazz) throws Exception
   {
      return createDocument(new FileInputStream(file), charset, channelClazz, itemClazz);
   }

   public synchronized RSSDocument<DefaultRSSChannel, DefaultRSSItem> createDocument(File file, String charset)
      throws Exception
   {
      if (!file.exists())
         return null;
      return createDocument(new FileInputStream(file), charset, DefaultRSSChannel.class, DefaultRSSItem.class);
   }

   public synchronized <T extends IRSSChannel, E extends IRSSItem> RSSDocument<T, E> createDocument(URL url,
      String charset, Class<T> channelClazz, Class<E> itemClazz) throws Exception
   {
      GetMethod get = null;
      try
      {
         HttpClientImpl httpClientService = new HttpClientImpl(url);
         get = httpClientService.getMethod(url.getFile());
         get.setFollowRedirects(true);
         int statusCode = httpClientService.getHttpClient().executeMethod(get);
         if (statusCode != HttpStatus.SC_OK)
         {
            throw new MalformedURLException("Server response code " + statusCode);
         }
         InputStream input = get.getResponseBodyAsStream();
         DataBuffer buffer = new DataBuffer();
         byte[] data = buffer.loadInputStream(input).toByteArray();
         return createDocument(data, charset, channelClazz, itemClazz);
      }
      finally
      {
         if (get != null)
            get.releaseConnection();
      }
   }

   public synchronized <T extends IRSSChannel, E extends IRSSItem> RSSDocument<T, E> createDocument(URI uri,
      String charset, Class<T> channelClazz, Class<E> itemClazz) throws Exception
   {
      try
      {
         return createDocument(uri.toURL(), charset, channelClazz, itemClazz);
      }
      catch (Exception e)
      {
         try
         {
            File file = new File(uri);
            return createDocument(file, charset, channelClazz, itemClazz);
         }
         catch (Exception exp)
         {
            return null;
         }
      }
   }

   public synchronized RSSDocument<DefaultRSSChannel, DefaultRSSItem> createDocument(URI uri, String charset)
      throws Exception
   {
      return createDocument(uri, charset, DefaultRSSChannel.class, DefaultRSSItem.class);
   }

}
TOP

Related Classes of org.exoplatform.services.rss.parser.RSSParser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.