Package net.hkionline.rss

Source Code of net.hkionline.rss.Reader

package net.hkionline.rss;


/* This is a XML reader and parser.
* It is used to read and parse and RSS feed.
* The reader uses StAX because it is efficient enough and comes with the runtime. This should keep dependencies down.
*
* There are several tutorials and guides how best to parse XML and RSS. My intention was not to reinvent the wheel again so I mix and match
* these tutorials here. Lars Vogels parsing tutorial was among the better ones so I borrowed heavily from him in this class. I also extended the elements he used but it could be extended further.
* Extending makes sense only so far before the code here explodes.
*
* @author: Henri Kesseli
*/

import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;

import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.Characters;
import javax.xml.stream.events.XMLEvent;

import net.hkionline.rss.Channel;
import net.hkionline.rss.Item;

public class Reader {

  // The URI to the feed
 
  final URL uri;
 
  /* Here are the elements that are parsed from the feed.
   * These elements are represented as static class variables. They are shared properties so it's best to make them static.
   * Please check what elements should be parsed from the Channel and Item classes.
   */

  final static String _title = "title";
  final static String _description ="description";
  final static String _link = "link";
  final static String _author = "author";
  final static String _enclosure = "enclosure";
  final static String _guid ="guid";
  final static String _language ="language";
  final static String _copyright = "copyright";
  final static String _managingEditor ="managingEditor";
  final static String _webmaster = "webmaster";
  final static String _pubDate ="pubDate";
  final static String _source = "source";
  final static String _lastBuildDate ="lastBuildDate";
  final static String _category ="category";
  final static String _generator ="generator";
  final static String _ttl ="ttl";
  final static String _image ="image";
  final static String _item = "item";
 
 
  public Reader(String streamURI){
    try{
      this.uri = new URL(streamURI);
    }
    catch(MalformedURLException e){
      throw new RuntimeException(e);
    }
  }
 
 
  /* This method performs the actual feed parsing and returns a nice Channel-object for easy feed handling. */
  public Channel read(){
   
    Channel feed = null;
   
    try{
      // Items and the channel have XML elements using the same name.
      boolean isFeedHeader = true;
     
      String title = null;
      String link = null;
      String description = null;
      String author = null;
      String enclosure = null;
      String guid = null;
      String language = null;
      String copyright = null;
      String managingEditor = null;
      String webmaster = null;
      String pubDate = null;
      String source = null;
      String lastBuildDate = null;
      String category = null;
      String generator = null;
      String ttl = null;
      String image = null;
     
      // Lets create the factories and reader objects first to handle the XML stream
     
      InputStream inputStream = null;
      try {
        inputStream = uri.openStream();
      }
      catch (IOException e) {
        throw new RuntimeException(e);
      }

      XMLInputFactory inputFactory = XMLInputFactory.newInstance();
      XMLEventReader eventReader = inputFactory.createXMLEventReader(inputStream);
     
      // StAX is event based, so lets loop trough the events and while there is next
       while (eventReader.hasNext()) {
         XMLEvent event = eventReader.nextEvent();
        
         if (event.isStartElement()) {
                String localPart = event.asStartElement().getName().getLocalPart();
               
                switch (localPart) {
                case _item:
                  if (isFeedHeader) {
                    // This is done once to instantiate a channel object and save the channels/feed header/metadata
                    // This takes place right after the parser has hit the first start tag of an item-element in the RSS-feed
                    isFeedHeader = false;
                    feed = new Channel(title, link, description, language, copyright,  managingEditor,  webmaster,  pubDate, lastBuildDate, category, generator, ttl, image);
                  }
                  event = eventReader.nextEvent();
                  break;
                case _title:
                  title = getElementData(event, eventReader);
                  break;
                case _link:
                  link = getElementData(event, eventReader);
                  break;               
                case _description:
                  description = getElementData(event, eventReader);
                  break;
                case _author:
                  author = getElementData(event, eventReader);
                  break;
                case _enclosure:
                  enclosure = getElementData(event, eventReader);
                  break;
                case _guid:
                  guid = getElementData(event, eventReader);
                  break;
                case _language:
                  language = getElementData(event, eventReader);
                  break;
                case _copyright:
                  copyright = getElementData(event, eventReader);
                  break;
                case _managingEditor:
                  managingEditor = getElementData(event, eventReader);
                  break;
                case _webmaster:
                  webmaster = getElementData(event, eventReader);
                  break;
                case _pubDate:
                  pubDate = getElementData(event, eventReader);
                  break;
                case _source:
                  source = getElementData(event, eventReader);
                  break;
                case _lastBuildDate:
                  lastBuildDate = getElementData(event, eventReader);
                  break;
                case _category:
                  category = getElementData(event, eventReader);
                  break;
                case _generator:
                  generator = getElementData(event, eventReader);
                  break;
                case _ttl:
                  ttl = getElementData(event, eventReader);
                  break;
                case _image:
                  image = getElementData(event, eventReader);
                  break;
                }
         }
         else if (event.isEndElement()) {
           /*
            * This event happens when the parser hits the closing tag of an RSS-feed element.
            * We are interested at item-tags. When they close we want to save the information of the child elements to a new item-object and to the feed itself
            */
           if (event.asEndElement().getName().getLocalPart() == (_item)) {

             Item item = new Item();
            
            
             item.setTitle(title);
             item.setDescription(description);
             item.setLink(link);
             item.setAuthor(author);
             item.setCategory(category);
             item.setEnclosure(enclosure);
             item.setGuid(guid);
             item.setPubDate(pubDate);
             item.setSource(source);
                         
             feed.getItems().add(item);
            
             // We have now saved a new item  to our feed model so we can continue to the next item
             event = eventReader.nextEvent();
             continue;
                }
              }
       }
    }
    catch (XMLStreamException e) { // If all goes wrong!
        throw new RuntimeException(e);
      }
      return feed;
  }

  private String getElementData(XMLEvent event, XMLEventReader eventReader) throws XMLStreamException {
    /*
     * Here we reed the the event Character data and pass it as a String. This essentially gives us the data in a single XML element.
     * This is best done in a separate method because we want/need to manage possible exceptions also this keeps the code DRY as Rails developer would say.
     */
     
      String data = "";
      event = eventReader.nextEvent();
     
      if (event instanceof Characters) {
        data = event.asCharacters().getData();
      }
     
      return data;
  }
 
}
TOP

Related Classes of net.hkionline.rss.Reader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.