/**
* Copyright (c) 2013, Institute of Information Systems (Sven Groppe and contributors of LUPOSDATE), University of Luebeck
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
* following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice, this list of conditions and the following
* disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
* following disclaimer in the documentation and/or other materials provided with the distribution.
* - Neither the name of the University of Luebeck nor the names of its contributors may be used to endorse or promote
* products derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package lupos.event.producer.rsssemantics;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.XMLEvent;
/**
* Opens the input stream, retrieves data and adds it to the feed.
*/
public class RSSReader {
static final String TITLE = "title";
static final String DESCRIPTION = "description";
static final String CHANNEL = "channel";
static final String LANGUAGE = "language";
static final String COPYRIGHT = "copyright";
static final String LINK = "link";
static final String AUTHOR = "author";
static final String ITEM = "item";
static final String PUB_DATE = "pubDate";
static final String GUID = "guid";
final URL url;
/**
* Constructor sets url to read from
*
* @param feedUrl
*/
public RSSReader(String feedUrl) {
try {
this.url = new URL(feedUrl);
} catch (MalformedURLException e) {
throw new RuntimeException(e);
}
}
/**
* Retrieves RSSFeed data, creates a FeedMessage and adds it to the feed.
*/
@SuppressWarnings("null")
public Feed readFeed() {
Feed feed = null;
try {
boolean isFeedHeader = true;
/**
* Set header values initial to the empty string
*/
String description = "";
String title = "";
String link = "";
String language = "";
String copyright = "";
String author = "";
String pubdate = "";
String guid = "";
XMLInputFactory inputFactory = XMLInputFactory.newInstance();
InputStream in = read();
XMLEventReader eventReader = inputFactory.createXMLEventReader(in);
/**
* Read XML document
*/
while (eventReader.hasNext()) {
XMLEvent event = eventReader.nextEvent();
if (event.isStartElement()) {
if (event.asStartElement().getName().getLocalPart() == (ITEM)) {
if (isFeedHeader) {
isFeedHeader = false;
feed = new Feed(title, link, description, language,
copyright, pubdate);
}
event = eventReader.nextEvent();
continue;
}
if (event.asStartElement().getName().getLocalPart() == (TITLE)) {
event = eventReader.nextEvent();
title = event.asCharacters().getData();
continue;
}
if (event.asStartElement().getName().getLocalPart() == (DESCRIPTION)) {
event = eventReader.nextEvent();
description = event.asCharacters().getData();
continue;
}
if (event.asStartElement().getName().getLocalPart() == (LINK)) {
event = eventReader.nextEvent();
link = event.asCharacters().getData();
continue;
}
if (event.asStartElement().getName().getLocalPart() == (GUID)) {
event = eventReader.nextEvent();
guid = event.asCharacters().getData();
continue;
}
if (event.asStartElement().getName().getLocalPart() == (LANGUAGE)) {
event = eventReader.nextEvent();
language = event.asCharacters().getData();
continue;
}
if (event.asStartElement().getName().getLocalPart() == (AUTHOR)) {
event = eventReader.nextEvent();
author = event.asCharacters().getData();
continue;
}
if (event.asStartElement().getName().getLocalPart() == (PUB_DATE)) {
event = eventReader.nextEvent();
pubdate = event.asCharacters().getData();
continue;
}
if (event.asStartElement().getName().getLocalPart() == (COPYRIGHT)) {
event = eventReader.nextEvent();
copyright = event.asCharacters().getData();
continue;
}
} else if (event.isEndElement()) {
if (event.asEndElement().getName().getLocalPart() == (ITEM)) {
FeedMessage message = new FeedMessage();
message.setAuthor(author);
/**
* bugfix: if description was not explicitly set to the
* empty string, it would still contain the last
* message's description.
*/
String lastDescription;
try {
lastDescription = feed.getLastMessage().getDescription();
} catch (Exception e) {
lastDescription = "";
}
if (description.equals(lastDescription)) {
description = "";
}
/**
* end of bugfix
*/
message.setDescription(description);
message.setGuid(guid);
message.setLink(link);
message.setTitle(title);
feed.getMessages().add(message);
event = eventReader.nextEvent();
continue;
}
}
}
} catch (XMLStreamException e) {
throw new RuntimeException(e);
}
return feed;
}
/**
* Retrieve input stream
*
* @return an input stream for reading from the URL connection.
*/
private InputStream read() {
try {
return this.url.openStream();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}