// transform cat list & hierarchy into list of CategoryIF elements
Enumeration<String> enumCategories = catTable.keys();
while (enumCategories.hasMoreElements()) {
String key = enumCategories.nextElement();
// build category list: getCategoryList(parent, title, children)
CategoryIF cat = getCategoryList(null, key, catTable.get(key));
catList.add(cat);
}
if (catList.size() > 0) {
// if categories were actually created, then add list to item node
rssItem.setCategories(catList);
}
}
// get publication date
Element elDate = item.getChild("pubDate", defNS);
if (elDate == null) {
// fallback mechanism: get dc:date element
elDate = item.getChild("date", dcNS);
}
if (elDate != null) {
rssItem.setDate(ParserUtils.getDate(elDate.getTextTrim()));
}
rssItem.setFound(dateParsed);
// get Author element
Element elAuthor = item.getChild("author", defNS);
if (elAuthor == null) {
// fallback mechanism: get dc:creator element
elAuthor = item.getChild("creator", dcNS);
}
if (elAuthor != null)
rssItem.setCreator(elAuthor.getTextTrim());
// get Comments element
Element elComments = item.getChild("comments", defNS);
String strComments = "";
if (elComments != null) {
strComments = elComments.getTextTrim();
}
rssItem.setComments(ParserUtils.getURL(strComments));
// get guid element
Element elGuid = item.getChild("guid", defNS);
if (elGuid != null) {
String guidUrl = elGuid.getTextTrim();
if (guidUrl != null) {
boolean permaLink = true;
Attribute permaLinkAttribute = elGuid.getAttribute("isPermaLink",
defNS);
if (permaLinkAttribute != null) {
String permaLinkStr = permaLinkAttribute.getValue();
if (permaLinkStr != null) {
permaLink = Boolean.valueOf(permaLinkStr).booleanValue();
}
}
ItemGuidIF itemGuid = cBuilder.createItemGuid(rssItem, guidUrl,
permaLink);
rssItem.setGuid(itemGuid);
}
}
// get source element
Element elSource = item.getChild("source", defNS);
if (elSource != null) {
String sourceName = elSource.getTextTrim();
Attribute sourceAttribute = elSource.getAttribute("url", defNS);
if (sourceAttribute != null) {
String sourceLocation = sourceAttribute.getValue().trim();
ItemSourceIF itemSource = cBuilder.createItemSource(rssItem,
sourceName, sourceLocation, null);
rssItem.setSource(itemSource);
}
}
// get enclosure element
Element elEnclosure = item.getChild("enclosure", defNS);
if (elEnclosure != null) {
URL location = null;
String type = null;
int length = -1;
Attribute urlAttribute = elEnclosure.getAttribute("url", defNS);
if (urlAttribute != null) {
location = ParserUtils.getURL(urlAttribute.getValue().trim());
}
Attribute typeAttribute = elEnclosure.getAttribute("type", defNS);
if (typeAttribute != null) {
type = typeAttribute.getValue().trim();
}
Attribute lengthAttribute = elEnclosure.getAttribute("length", defNS);
if (lengthAttribute != null) {
try {
length = Integer.parseInt(lengthAttribute.getValue().trim());
} catch (NumberFormatException e) {
logger.warn(e);
}
}
ItemEnclosureIF itemEnclosure = cBuilder.createItemEnclosure(rssItem,
location, type, length);
rssItem.setEnclosure(itemEnclosure);
}
}
// 0..1 image element
Element image = channel.getChild("image", defNS);
if (image != null) {
ParserUtils.matchCaseOfChildren(image, new String[] { "title", "url",
"link", "width", "height", "description" });
ImageIF rssImage = cBuilder.createImage(image.getChildTextTrim("title",
defNS), ParserUtils.getURL(image.getChildTextTrim("url", defNS)),
ParserUtils.getURL(image.getChildTextTrim("link", defNS)));
Element imgWidth = image.getChild("width", defNS);
if (imgWidth != null) {
try {
rssImage.setWidth(Integer.parseInt(imgWidth.getTextTrim()));
} catch (NumberFormatException e) {
logger.warn("Error parsing width: " + e.getMessage());
}
}
Element imgHeight = image.getChild("height", defNS);
if (imgHeight != null) {
try {
rssImage.setHeight(Integer.parseInt(imgHeight.getTextTrim()));
} catch (NumberFormatException e) {
logger.warn("Error parsing height: " + e.getMessage());
}
}
Element imgDescr = image.getChild("description", defNS);
if (imgDescr != null) {
rssImage.setDescription(imgDescr.getTextTrim());
}
chnl.setImage(rssImage);
}
// 0..1 textinput element
Element txtinp = channel.getChild("textinput", defNS);
if (txtinp != null) {
ParserUtils.matchCaseOfChildren(txtinp, new String[] { "title",
"description", "name", "link" });
TextInputIF rssTextInput = cBuilder.createTextInput(txtinp
.getChildTextTrim("title", defNS), txtinp.getChildTextTrim(
"description", defNS), txtinp.getChildTextTrim("name", defNS),
ParserUtils.getURL(txtinp.getChildTextTrim("link", defNS)));
chnl.setTextInput(rssTextInput);
}
// 0..1 copyright element
Element copyright = channel.getChild("copyright", defNS);
if (copyright != null) {
chnl.setCopyright(copyright.getTextTrim());
}
// 0..1 Rating element
Element rating = channel.getChild("rating", defNS);
if (rating != null) {
chnl.setRating(rating.getTextTrim());
}
// 0..1 Docs element
Element docs = channel.getChild("docs", defNS);
if (docs != null) {
chnl.setDocs(docs.getTextTrim());
}
// 0..1 Generator element
Element generator = channel.getChild("generator", defNS);
if (generator != null) {
chnl.setGenerator(generator.getTextTrim());
}
// 0..1 ttl element
Element ttl = channel.getChild("ttl", defNS);
if (ttl != null) {
String ttlValue = ttl.getTextTrim();
try {
chnl.setTtl(Integer.parseInt(ttlValue));
} catch (NumberFormatException e) {
logger.warn("Invalid TTL format: '" + ttlValue + "'");
}
}
// 0..1 pubDate element
Element pubDate = channel.getChild("pubDate", defNS);
if (pubDate != null) {
chnl.setPubDate(ParserUtils.getDate(pubDate.getTextTrim()));
}
// 0..1 lastBuildDate element
Element lastBuildDate = channel.getChild("lastBuildDate", defNS);
if (lastBuildDate != null) {
chnl.setLastBuildDate(ParserUtils.getDate(lastBuildDate.getTextTrim()));
}
// get category list
// get list of <category> elements
List listCategory = channel.getChildren("category", defNS);
if (listCategory.size() < 1) {
// fallback mechanism: get dc:category element
listCategory = channel.getChildren("category", dcNS);
}
if (listCategory.size() > 0) {
RecursiveHashtable<String> catTable = new RecursiveHashtable<String>();
// for each category, parse hierarchy
Iterator itCat = listCategory.iterator();
while (itCat.hasNext()) {
RecursiveHashtable<String> currTable = catTable;
Element elCategory = (Element) itCat.next();
// get contents of category element
String[] titles = elCategory.getTextNormalize().split("/");
for (int x = 0; x < titles.length; x++) {
// tokenize category string to extract out hierarchy
if (currTable.containsKey(titles[x]) == false) {
// if token does not exist in current map, add it with child Hashtable
currTable.put(titles[x], new RecursiveHashtable<String>());
}
// reset current Hashtable to child's Hashtable then iterate to next token
currTable = currTable.get(titles[x]);
}
}
ArrayList<CategoryIF> catList = new ArrayList<CategoryIF>();
// transform cat list & hierarchy into list of CategoryIF elements
Enumeration<String> enumCategories = catTable.keys();
while (enumCategories.hasMoreElements()) {
String key = enumCategories.nextElement();
// build category list: getCategoryList(parent, title, children)
CategoryIF cat = getCategoryList(null, key, catTable.get(key));
catList.add(cat);
}
if (catList.size() > 0) {
// if categories were actually created, then add list to item node
chnl.setCategories(catList);