package net.hkionline.rss;
/* This is a XML reader and parser.
* It is used to read and parse and RSS feed.
* The reader uses StAX because it is efficient enough and comes with the runtime. This should keep dependencies down.
*
* There are several tutorials and guides how best to parse XML and RSS. My intention was not to reinvent the wheel again so I mix and match
* these tutorials here. Lars Vogels parsing tutorial was among the better ones so I borrowed heavily from him in this class. I also extended the elements he used but it could be extended further.
* Extending makes sense only so far before the code here explodes.
*
* @author: Henri Kesseli
*/
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.Characters;
import javax.xml.stream.events.XMLEvent;
import net.hkionline.rss.Channel;
import net.hkionline.rss.Item;
public class Reader {
// The URI to the feed
final URL uri;
/* Here are the elements that are parsed from the feed.
* These elements are represented as static class variables. They are shared properties so it's best to make them static.
* Please check what elements should be parsed from the Channel and Item classes.
*/
final static String _title = "title";
final static String _description ="description";
final static String _link = "link";
final static String _author = "author";
final static String _enclosure = "enclosure";
final static String _guid ="guid";
final static String _language ="language";
final static String _copyright = "copyright";
final static String _managingEditor ="managingEditor";
final static String _webmaster = "webmaster";
final static String _pubDate ="pubDate";
final static String _source = "source";
final static String _lastBuildDate ="lastBuildDate";
final static String _category ="category";
final static String _generator ="generator";
final static String _ttl ="ttl";
final static String _image ="image";
final static String _item = "item";
public Reader(String streamURI){
try{
this.uri = new URL(streamURI);
}
catch(MalformedURLException e){
throw new RuntimeException(e);
}
}
/* This method performs the actual feed parsing and returns a nice Channel-object for easy feed handling. */
public Channel read(){
Channel feed = null;
try{
// Items and the channel have XML elements using the same name.
boolean isFeedHeader = true;
String title = null;
String link = null;
String description = null;
String author = null;
String enclosure = null;
String guid = null;
String language = null;
String copyright = null;
String managingEditor = null;
String webmaster = null;
String pubDate = null;
String source = null;
String lastBuildDate = null;
String category = null;
String generator = null;
String ttl = null;
String image = null;
// Lets create the factories and reader objects first to handle the XML stream
InputStream inputStream = null;
try {
inputStream = uri.openStream();
}
catch (IOException e) {
throw new RuntimeException(e);
}
XMLInputFactory inputFactory = XMLInputFactory.newInstance();
XMLEventReader eventReader = inputFactory.createXMLEventReader(inputStream);
// StAX is event based, so lets loop trough the events and while there is next
while (eventReader.hasNext()) {
XMLEvent event = eventReader.nextEvent();
if (event.isStartElement()) {
String localPart = event.asStartElement().getName().getLocalPart();
switch (localPart) {
case _item:
if (isFeedHeader) {
// This is done once to instantiate a channel object and save the channels/feed header/metadata
// This takes place right after the parser has hit the first start tag of an item-element in the RSS-feed
isFeedHeader = false;
feed = new Channel(title, link, description, language, copyright, managingEditor, webmaster, pubDate, lastBuildDate, category, generator, ttl, image);
}
event = eventReader.nextEvent();
break;
case _title:
title = getElementData(event, eventReader);
break;
case _link:
link = getElementData(event, eventReader);
break;
case _description:
description = getElementData(event, eventReader);
break;
case _author:
author = getElementData(event, eventReader);
break;
case _enclosure:
enclosure = getElementData(event, eventReader);
break;
case _guid:
guid = getElementData(event, eventReader);
break;
case _language:
language = getElementData(event, eventReader);
break;
case _copyright:
copyright = getElementData(event, eventReader);
break;
case _managingEditor:
managingEditor = getElementData(event, eventReader);
break;
case _webmaster:
webmaster = getElementData(event, eventReader);
break;
case _pubDate:
pubDate = getElementData(event, eventReader);
break;
case _source:
source = getElementData(event, eventReader);
break;
case _lastBuildDate:
lastBuildDate = getElementData(event, eventReader);
break;
case _category:
category = getElementData(event, eventReader);
break;
case _generator:
generator = getElementData(event, eventReader);
break;
case _ttl:
ttl = getElementData(event, eventReader);
break;
case _image:
image = getElementData(event, eventReader);
break;
}
}
else if (event.isEndElement()) {
/*
* This event happens when the parser hits the closing tag of an RSS-feed element.
* We are interested at item-tags. When they close we want to save the information of the child elements to a new item-object and to the feed itself
*/
if (event.asEndElement().getName().getLocalPart() == (_item)) {
Item item = new Item();
item.setTitle(title);
item.setDescription(description);
item.setLink(link);
item.setAuthor(author);
item.setCategory(category);
item.setEnclosure(enclosure);
item.setGuid(guid);
item.setPubDate(pubDate);
item.setSource(source);
feed.getItems().add(item);
// We have now saved a new item to our feed model so we can continue to the next item
event = eventReader.nextEvent();
continue;
}
}
}
}
catch (XMLStreamException e) { // If all goes wrong!
throw new RuntimeException(e);
}
return feed;
}
private String getElementData(XMLEvent event, XMLEventReader eventReader) throws XMLStreamException {
/*
* Here we reed the the event Character data and pass it as a String. This essentially gives us the data in a single XML element.
* This is best done in a separate method because we want/need to manage possible exceptions also this keeps the code DRY as Rails developer would say.
*/
String data = "";
event = eventReader.nextEvent();
if (event instanceof Characters) {
data = event.asCharacters().getData();
}
return data;
}
}