throws IOException, CauseMemoryException, CauseException, Exception {
/** Initialize item collection */
Vector rssFeeds = new Vector();
/** Initialize XML parser and parse OPML XML */
HTMLParser parser = new HTMLParser(encodingUtil);
try {
// The first element is the main tag.
int elementType = parser.parse();
// If we found the prologue, get the next entry.
if( elementType == XmlParser.PROLOGUE ) {
elementType = parser.parse();
}
if (elementType == XmlParser.END_DOCUMENT ) {
return null;
}
boolean windows = parser.isWindows();
boolean utf = parser.isUtf();
boolean process = true;
boolean bodyFound = false;
do {
/** RSS item properties */
String title = "";
String link = "";
String tagName = parser.getName();
//#ifdef DLOGGING
//@ if (finerLoggable) {logger.finer("tagname: " + tagName);}
//#endif
switch (tagName.charAt(0)) {
case 'b':
case 'B':
if (bodyFound) {
continue;
}
bodyFound = parser.isBodyFound();
if (bodyFound) {
windows = parser.isWindows();
utf = parser.isUtf();
}
// If looking for OPML link, it is in header.
if ((!needRss || needFirstRss) && bodyFound) {
process = false;
break;
}
break;
case 'l':
case 'L':
if (!tagName.toLowerCase().equals("link")) {
break;
}
//#ifdef DLOGGING
//@ if (finerLoggable) {logger.finer("Parsing <link> tag");}
//#endif
// TODO base
String type = parser.getAttributeValue( "type" );
if (type == null) {
continue;
}
if (!needRss && (type.toLowerCase().indexOf("opml") < 0)) {
continue;
}
if (needRss &&
((type.toLowerCase().indexOf("rss") < 0) &&
(type.toLowerCase().indexOf("atom") < 0))) {
continue;
}
title = parser.getAttributeValue( "title" );
// Allow null title so that the caller can
// check if it needs to get the title another way.
if (title != null) {
title = EncodingUtil.replaceAlphaEntities(true,
title);
title = EncodingUtil.replaceNumEntity(title);
// Replace special chars like left quote, etc.
// Since we have already converted to unicode, we want
// to replace with uni chars.
title = encodingUtil.replaceSpChars(title);
title = StringUtil.removeHtml(title);
}
if (((link = parser.getAttributeValue( "href" ))
== null) || ( link.length() == 0 )) {
continue;
}
if (link.charAt(0) == '/') {
link = url + link;
}
/** Debugging information */
System.out.println("Title: " + title);
System.out.println("Link: " + link);
/**
* Create new RSS item and add it do RSS document's item
* collection. Account for wrong OPML which is an
* OPML composed of other OPML. These have url attribute
* instead of link attribute.
*/
if (!needRss || needFirstRss) {
RssItunesFeed feed = new RssItunesFeed(title, link, "", "");
rssFeeds.addElement( feed );
process = false;
break;
}
if (( feedURLFilter != null) &&
( link.toLowerCase().indexOf(feedURLFilter) < 0)) {
continue;
}
if (( feedNameFilter != null) &&
((title != null) &&
(title.toLowerCase().indexOf(feedNameFilter) < 0))) {
continue;
}
RssItunesFeed feed = new RssItunesFeed(title, link, "", "");
rssFeeds.addElement( feed );
break;
default:
}
}
while( process && (parser.parse() != XmlParser.END_DOCUMENT) );
} catch (CauseMemoryException ex) {
CauseMemoryException cex = new CauseMemoryException(
"Out of memory error while parsing HTML auto link feed " +
url, ex);