Package de.nava.informa.parsers

Source Code of de.nava.informa.parsers.RSS_0_91_Parser

//
// Informa -- RSS Library for Java
// Copyright (c) 2002 by Niko Schmuck
//
// Niko Schmuck
// http://sourceforge.net/projects/informa
// mailto:niko_schmuck@users.sourceforge.net
//
// This library is free software.
//
// You may redistribute it and/or modify it under the terms of the GNU
// Lesser General Public License as published by the Free Software Foundation.
//
// Version 2.1 of the license should be included with this distribution in
// the file LICENSE. If the license is not included with this distribution,
// you may find a copy at the FSF web site at 'www.gnu.org' or 'www.fsf.org',
// or you may write to the Free Software Foundation, 675 Mass Ave, Cambridge,
// MA 02139 USA.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied waranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//

// $Id: RSS_0_91_Parser.java,v 1.35 2007/01/06 22:18:12 niko_schmuck Exp $

package de.nava.informa.parsers;

import java.net.URL;
import java.util.Date;
import java.util.Iterator;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.jdom.Attribute;
import org.jdom.Element;

import de.nava.informa.core.ChannelBuilderIF;
import de.nava.informa.core.ChannelFormat;
import de.nava.informa.core.ChannelIF;
import de.nava.informa.core.ChannelParserIF;
import de.nava.informa.core.ImageIF;
import de.nava.informa.core.ItemEnclosureIF;
import de.nava.informa.core.ItemIF;
import de.nava.informa.core.ItemSourceIF;
import de.nava.informa.core.ParseException;
import de.nava.informa.core.TextInputIF;
import de.nava.informa.utils.ParserUtils;

/**
* Parser which reads in document instances according to the RSS 0.91
* specification and generates a news channel object.
*
* @author Niko Schmuck
*/
class RSS_0_91_Parser implements ChannelParserIF {

  private static Log logger = LogFactory.getLog(RSS_0_91_Parser.class);

  /**
   * Private constructor suppresses generation of a (public) default constructor.
   */
  private RSS_0_91_Parser() {}

  /**
   * Holder of the RSS_0_91_Parser instance.
   */
  private static class RSS_0_91_ParserHolder {
    private static RSS_0_91_Parser instance = new RSS_0_91_Parser();
  }

  /**
   * Get the RSS_0_91_Parser instance.
   */
  public static RSS_0_91_Parser getInstance() {
    return RSS_0_91_ParserHolder.instance;
  }
 
  /**
   * @see de.nava.informa.core.ChannelParserIF#parse(de.nava.informa.core.ChannelBuilderIF, org.jdom.Element)
   */
  public ChannelIF parse(ChannelBuilderIF cBuilder, Element root)
    throws ParseException {
    if (cBuilder == null) {
      throw new RuntimeException(
        "Without builder no channel can " + "be created.");
    }
    Date dateParsed = new Date();
    logger.debug("start parsing.");

    // Get the channel element (only one occurs)
    ParserUtils.matchCaseOfChildren(root, "channel");
    Element channel = root.getChild("channel");
    if (channel == null) {
      logger.warn("Channel element could not be retrieved from feed.");
      throw new ParseException("No channel element found in feed.");
    }

    // --- read in channel information

    ParserUtils.matchCaseOfChildren(channel, new String[] {
      "title", "description", "link", "language", "item", "image", "textinput", "copyright",
      "rating", "pubDate", "lastBuildDate", "docs", "managingEditor", "webMaster", "cloud" });

    // 1 title element
    ChannelIF chnl =
      cBuilder.createChannel(channel, channel.getChildTextTrim("title"));

    chnl.setFormat(ChannelFormat.RSS_0_91);

    // 1 description element
    chnl.setDescription(channel.getChildTextTrim("description"));

    // 1 link element
    chnl.setSite(ParserUtils.getURL(channel.getChildTextTrim("link")));

    // 1 language element
    chnl.setLanguage(channel.getChildTextTrim("language"));

    // 1..n item elements
    List items = channel.getChildren("item");
    Iterator i = items.iterator();
    while (i.hasNext()) {
      Element item = (Element) i.next();

      ParserUtils.matchCaseOfChildren(item, new String[] {
        "title", "link", "description", "source", "enclosure" });

      // get title element
      Element elTitle = item.getChild("title");
      String strTitle = "<No Title>";
      if (elTitle != null) {
        strTitle = elTitle.getTextTrim();
      }
      if (logger.isDebugEnabled()) {
        logger.debug("Item element found (" + strTitle + ").");
      }

      // get link element
      Element elLink = item.getChild("link");
      String strLink = "";
      if (elLink != null) {
        strLink = elLink.getTextTrim();
      }

      // get description element
      Element elDesc = item.getChild("description");
      String strDesc = "";
      if (elDesc != null) {
        strDesc = elDesc.getTextTrim();
      }

      // generate new RSS item (link to article)
      ItemIF rssItem =
        cBuilder.createItem(
          item,
          chnl,
          strTitle,
          strDesc,
          ParserUtils.getURL(strLink));
      rssItem.setFound(dateParsed);

      // get source element (an RSS 0.92 element)
      Element source = item.getChild("source");
      if (source != null) {
        String sourceName = source.getTextTrim();
        Attribute sourceAttribute = source.getAttribute("url");
        if (sourceAttribute != null) {
          String location = sourceAttribute.getValue().trim();
          ItemSourceIF itemSource =
            cBuilder.createItemSource(rssItem, sourceName, location, null);
          rssItem.setSource(itemSource);
        }
      }

      // get enclosure element (an RSS 0.92 element)
      Element enclosure = item.getChild("enclosure");
      if (enclosure != null) {
        URL location = null;
        String type = null;
        int length = -1;
        Attribute urlAttribute = enclosure.getAttribute("url");
        if (urlAttribute != null) {
          location = ParserUtils.getURL(urlAttribute.getValue().trim());
        }
        Attribute typeAttribute = enclosure.getAttribute("type");
        if (typeAttribute != null) {
          type = typeAttribute.getValue().trim();
        }
        Attribute lengthAttribute = enclosure.getAttribute("length");
        if (lengthAttribute != null) {
          try {
            length = Integer.parseInt(lengthAttribute.getValue().trim());
          } catch (NumberFormatException e) {
            logger.warn(e);
          }
        }
        ItemEnclosureIF itemEnclosure =
          cBuilder.createItemEnclosure(rssItem, location, type, length);
        rssItem.setEnclosure(itemEnclosure);
      }
    }

    // 0..1 image element
    Element image = channel.getChild("image");
    if (image != null) {

      ParserUtils.matchCaseOfChildren(image, new String[] {
        "title", "url", "link", "width", "height", "description" });

      ImageIF rssImage =
        cBuilder.createImage(
          image.getChildTextTrim("title"),
          ParserUtils.getURL(image.getChildTextTrim("url")),
          ParserUtils.getURL(image.getChildTextTrim("link")));
      Element imgWidth = image.getChild("width");
      if (imgWidth != null) {
        try {
          rssImage.setWidth(Integer.parseInt(imgWidth.getTextTrim()));
        } catch (NumberFormatException e) {
          logger.warn(e);
        }
      }
      Element imgHeight = image.getChild("height");
      if (imgHeight != null) {
        try {
          rssImage.setHeight(Integer.parseInt(imgHeight.getTextTrim()));
        } catch (NumberFormatException e) {
          logger.warn(e);
        }
      }
      Element imgDescr = image.getChild("description");
      if (imgDescr != null) {
        rssImage.setDescription(imgDescr.getTextTrim());
      }
      chnl.setImage(rssImage);
    }

    // 0..1 textinput element
    Element txtinp = channel.getChild("textinput");
    if (txtinp != null) {

      ParserUtils.matchCaseOfChildren(txtinp, new String[] {
        "title", "description", "name", "link"});

      TextInputIF rssTextInput =
        cBuilder.createTextInput(
          txtinp.getChild("title").getTextTrim(),
          txtinp.getChild("description").getTextTrim(),
          txtinp.getChild("name").getTextTrim(),
          ParserUtils.getURL(txtinp.getChild("link").getTextTrim()));
      chnl.setTextInput(rssTextInput);
    }

    // 0..1 copyright element
    Element copyright = channel.getChild("copyright");
    if (copyright != null) {
      chnl.setCopyright(copyright.getTextTrim());
    }

    // 0..1 rating element
    Element rating = channel.getChild("rating");
    if (rating != null) {
      chnl.setRating(rating.getTextTrim());
    }

    // 0..1 pubDate element
    Element pubDate = channel.getChild("pubDate");
    if (pubDate != null) {
      chnl.setPubDate(ParserUtils.getDate(pubDate.getTextTrim()));
    }

    // 0..1 lastBuildDate element
    Element lastBuildDate = channel.getChild("lastBuildDate");
    if (lastBuildDate != null) {
      chnl.setLastBuildDate(ParserUtils.getDate(lastBuildDate.getTextTrim()));
    }

    // 0..1 docs element
    Element docs = channel.getChild("docs");
    if (docs != null) {
      chnl.setDocs(docs.getTextTrim());
    }

    // 0..1 managingEditor element
    Element managingEditor = channel.getChild("managingEditor");
    if (managingEditor != null) {
      chnl.setCreator(managingEditor.getTextTrim());
    }

    // 0..1 webMaster element
    Element webMaster = channel.getChild("webMaster");
    if (webMaster != null) {
      chnl.setPublisher(webMaster.getTextTrim());
    }

    // 0..1 cloud element
    Element cloud = channel.getChild("cloud");
    if (cloud != null) {
      String _port = cloud.getAttributeValue("port");
      int port = -1;
      if (_port != null) {
        try {
          port = Integer.parseInt(_port);
        } catch (NumberFormatException e) {
          logger.warn(e);
        }
      }
      chnl.setCloud(
        cBuilder.createCloud(
          cloud.getAttributeValue("domain"),
          port,
          cloud.getAttributeValue("path"),
          cloud.getAttributeValue("registerProcedure"),
          cloud.getAttributeValue("protocol")));
    }

    chnl.setLastUpdated(dateParsed);
    // 0..1 skipHours element
    // 0..1 skipDays element

    return chnl;
  }

}
TOP

Related Classes of de.nava.informa.parsers.RSS_0_91_Parser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.