Package de.nava.informa.parsers

Source Code of de.nava.informa.parsers.RSS_1_0_Parser$RSS_1_0_ParserHolder

//
// Informa -- RSS Library for Java
// Copyright (c) 2002 by Niko Schmuck
//
// Niko Schmuck
// http://sourceforge.net/projects/informa
// mailto:niko_schmuck@users.sourceforge.net
//
// This library is free software.
//
// You may redistribute it and/or modify it under the terms of the GNU
// Lesser General Public License as published by the Free Software Foundation.
//
// Version 2.1 of the license should be included with this distribution in
// the file LICENSE. If the license is not included with this distribution,
// you may find a copy at the FSF web site at 'www.gnu.org' or 'www.fsf.org',
// or you may write to the Free Software Foundation, 675 Mass Ave, Cambridge,
// MA 02139 USA.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied waranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//

// $Id: RSS_1_0_Parser.java,v 1.44 2007/01/06 22:18:12 niko_schmuck Exp $

package de.nava.informa.parsers;

import java.net.URL;
import java.util.Date;
import java.util.Iterator;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.jdom.Attribute;
import org.jdom.Element;
import org.jdom.Namespace;

import de.nava.informa.core.ChannelBuilderIF;
import de.nava.informa.core.ChannelFormat;
import de.nava.informa.core.ChannelIF;
import de.nava.informa.core.ChannelParserIF;
import de.nava.informa.core.ChannelUpdatePeriod;
import de.nava.informa.core.ImageIF;
import de.nava.informa.core.ItemIF;
import de.nava.informa.core.ItemSourceIF;
import de.nava.informa.core.ParseException;
import de.nava.informa.core.TextInputIF;
import de.nava.informa.utils.ParserUtils;

/**
* Parser which reads in document instances according to the RSS 1.0
* (RDF) specification and generates a news channel object.
*
* @author Niko Schmuck
*/
class RSS_1_0_Parser implements ChannelParserIF {

  private static Log logger = LogFactory.getLog(RSS_1_0_Parser.class);
 
  /**
   * Private constructor suppresses generation of a (public) default constructor.
   */
  private RSS_1_0_Parser() {}

  /**
   * Holder of the RSS_1_0_Parser instance.
   */
  private static class RSS_1_0_ParserHolder {
    private static RSS_1_0_Parser instance = new RSS_1_0_Parser();
  }

  /**
   * Get the RSS_1_0_Parser instance.
   */
  public static RSS_1_0_Parser getInstance() {
    return RSS_1_0_ParserHolder.instance;
  }

  public ChannelIF parse(ChannelBuilderIF cBuilder, Element root)
      throws ParseException {
    if (cBuilder == null) {
      throw new RuntimeException("Without builder no channel can "
          + "be created.");
    }
    Date dateParsed = new Date();
    Namespace defNS = ParserUtils.getDefaultNS(root);
    if (defNS == null) {
      defNS = Namespace.NO_NAMESPACE;
      logger.info("No default namespace found.");
    }

    // RSS 1.0 Dublin Core Module namespace
    Namespace dcNS = ParserUtils.getNamespace(root, "dc");
    // fall back to default name space (for retrieving descriptions)
    if (dcNS == null) {
      dcNS = defNS;
    }

    // RSS 1.0 Syndication Module namespace
    Namespace syNS = ParserUtils.getNamespace(root, "sy");

    // RSS 1.0 Aggregation Module namespace
    Namespace agNS = ParserUtils.getNamespace(root, "ag");

    // RSS 1.0 Administration Module namespace
    Namespace adminNS = ParserUtils.getNamespace(root, "admin");

    // RSS 1.0 DCTerms Module namespace
    Namespace dctermsNS = ParserUtils.getNamespace(root, "dcterms");

    // RSS 1.0 Annotation Module namespace
    Namespace annotateNS = ParserUtils.getNamespace(root, "annotate");

    // RSS091 Module namespace
    Namespace rss091NS = ParserUtils.getNamespace(root, "rss091");

    // Content namespace
    Namespace contentNS = ParserUtils.getNamespace(root, "content");

    ParserUtils.matchCaseOfChildren(root, new String[] { "channel", "item",
        "image", "textinput" });

    // Get the channel element (only one occurs)
    Element channel = root.getChild("channel", defNS);
    if (channel == null) {
      logger.warn("Channel element could not be retrieved from feed.");
      throw new ParseException("No channel element found in feed.");
    }

    // ----------------------- read in channel information

    ParserUtils.matchCaseOfChildren(channel, new String[] { "title",
        "description", "link", "creator", "managingEditor", "publisher",
        "errorReportsTo", "webMaster", "language", "rights", "copyright",
        "rating", "date", "issued", "pubdate", "lastBuildDate", "modified",
        "generatorAgent", "updatePeriod", "updateFrequency", "updateBase" });

    // title element
    ChannelIF chnl = cBuilder.createChannel(channel, channel.getChildTextTrim(
        "title", defNS));

    // set channel format
    chnl.setFormat(ChannelFormat.RSS_1_0);

    // description element
    chnl.setDescription(channel.getChildTextTrim("description", defNS));

    // link element
    chnl.setSite(ParserUtils.getURL(channel.getChildTextTrim("link", defNS)));

    // creator element
    Element creator = channel.getChild("creator", dcNS);
    if (creator == null) {
      creator = channel.getChild("managingEditor", rss091NS);
    }
    if (creator != null) {
      chnl.setCreator(creator.getTextTrim());
    }

    // publisher element
    String publisher = channel.getChildTextTrim("publisher", dcNS);
    if (publisher == null) {
      Element elErrorReportsTo = channel.getChild("errorReportsTo", adminNS);
      if (elErrorReportsTo != null) {
        publisher = elErrorReportsTo.getAttributeValue("resource", ParserUtils
            .getNamespace(elErrorReportsTo, "rdf"));
      }
    }
    if (publisher == null) {
      publisher = channel.getChildTextTrim("webMaster", rss091NS);
    }
    chnl.setPublisher(publisher);

    // language element
    Element language = channel.getChild("language", dcNS);
    if (language == null) {
      language = channel.getChild("language", rss091NS);
    }
    if (language != null) {
      chnl.setLanguage(language.getTextTrim());
    }

    // rights element
    Element copyright = channel.getChild("rights", dcNS);
    if (copyright == null) {
      copyright = channel.getChild("copyright", rss091NS);
    }
    if (copyright != null) {
      chnl.setCopyright(copyright.getTextTrim());
    }

    // 0..1 Rating element
    Element rating = channel.getChild("rating", rss091NS);
    if (rating != null) {
      chnl.setRating(rating.getTextTrim());
    }

    // 0..1 Docs element
    // use namespace URI
    chnl.setDocs(defNS.getURI());

    // 0..1 pubDate element
    Element pubDate = channel.getChild("date", dcNS);
    if (pubDate == null) {
      pubDate = channel.getChild("issued", dctermsNS);
    }
    if (pubDate == null) {
      pubDate = channel.getChild("pubdate", rss091NS);
    }
    if (pubDate != null) {
      chnl.setPubDate(ParserUtils.getDate(pubDate.getTextTrim()));
    }

    // 0..1 lastBuildDate element
    Element lastBuildDate = channel.getChild("lastBuildDate");
    if (lastBuildDate == null) {
      lastBuildDate = channel.getChild("modified", dctermsNS);
    }
    if (lastBuildDate == null) {
      lastBuildDate = channel.getChild("lastBuildDate", rss091NS);
    }
    if (lastBuildDate != null) {
      chnl.setLastBuildDate(ParserUtils.getDate(lastBuildDate.getTextTrim()));
    }

    // RSS 1.0 Administration Module support

    // 0..1 generator element
    Element elGenerator = channel.getChild("generatorAgent", adminNS);
    if (elGenerator != null) {
      Attribute generator = elGenerator.getAttribute("resource", ParserUtils
          .getNamespace(elGenerator, "rdf"));
      if (generator != null) {
        chnl.setGenerator(generator.getValue());
      }
    }

    // RSS 1.0 Syndication Module support

    // 0..1 update period element
    Element updatePeriod = channel.getChild("updatePeriod", syNS);
    if (updatePeriod != null) {
      try {
        ChannelUpdatePeriod channelUpdatePeriod = ChannelUpdatePeriod
            .valueFromText(updatePeriod.getTextTrim());
        chnl.setUpdatePeriod(channelUpdatePeriod);
      } catch (IllegalArgumentException ex) {
        logger.warn(updatePeriod.getTextTrim(), ex);
      }
    }

    // 0..1 update frequency element
    Element updateFrequency = channel.getChild("updateFrequency", syNS);
    if (updateFrequency != null) {
      chnl.setUpdateFrequency((new Integer(updateFrequency.getTextTrim()))
          .intValue());
    }

    // 0..1 update base element
    Element updateBase = channel.getChild("updateBase", syNS);
    if (updateBase != null) {
      chnl.setUpdateBase(ParserUtils.getDate(updateBase.getTextTrim()));
    }

    if ((updatePeriod != null) && updateFrequency != null) {
      int ttl = getTTL(chnl.getUpdatePeriod(), chnl.getUpdateFrequency());
      chnl.setTtl(ttl);
    }

    // item elements
    List items = root.getChildren("item", defNS);
    Iterator i = items.iterator();
    while (i.hasNext()) {
      Element item = (Element) i.next();

      ParserUtils.matchCaseOfChildren(item, new String[] { "title", "link",
          "encoded", "description", "creator", "subject", "date", "sourceURL",
          "source", "timestamp", "reference" });

      // get title element
      Element elTitle = item.getChild("title", defNS);
      String strTitle = "<No Title>";
      if (elTitle != null) {
        strTitle = elTitle.getTextTrim();
      }
      if (logger.isDebugEnabled()) {
        logger.debug("Item element found (" + strTitle + ").");
      }

      // get link element
      Element elLink = item.getChild("link", defNS);
      String strLink = "";
      if (elLink != null) {
        strLink = elLink.getTextTrim();
      }

      // get description element
      Element elDesc = item.getChild("encoded", contentNS);
      if (elDesc == null) {
        elDesc = item.getChild("description", defNS);
      }
      if (elDesc == null) {
        elDesc = item.getChild("description", dcNS);
      }
      String strDesc = "";
      if (elDesc != null) {
        strDesc = elDesc.getTextTrim();
      }

      // generate new RSS item (link to article)
      ItemIF rssItem = cBuilder.createItem(item, chnl, strTitle, strDesc,
          ParserUtils.getURL(strLink));
      rssItem.setFound(dateParsed);

      // get creator element
      Element elCreator = item.getChild("creator", dcNS);
      if (elCreator != null) {
        rssItem.setCreator(elCreator.getTextTrim());
      }

      // get subject element
      Element elSubject = item.getChild("subject", dcNS);
      if (elSubject != null) {
        // TODO: Mulitple subject elements not handled currently
        rssItem.setSubject(elSubject.getTextTrim());
      }

      // get date element
      Element elDate = item.getChild("date", dcNS);
      if (elDate != null) {
        rssItem.setDate(ParserUtils.getDate(elDate.getTextTrim()));
      }

      // get source element - default to Aggregation module, then try Dublin Core
      String sourceName = null;
      String sourceLocation = null;
      Date sourceTimestamp = null;

      Element elSourceURL = item.getChild("sourceURL", agNS);
      if (elSourceURL == null) { //  No Aggregation module - try Dublin Core
        elSourceURL = item.getChild("source", dcNS);
        if (elSourceURL != null) {
          sourceLocation = elSourceURL.getTextTrim();
          sourceName = "Source";
        }
      } else { // Aggregation module
        sourceLocation = elSourceURL.getTextTrim();
        Element elSourceName = item.getChild("source", agNS);
        if (elSourceName != null) {
          sourceName = elSourceName.getTextTrim();
        }
        Element elSourceTimestamp = item.getChild("timestamp", agNS);
        if (elSourceTimestamp != null) {
          sourceTimestamp = ParserUtils
              .getDate(elSourceTimestamp.getTextTrim());
        }
      }

      if (sourceLocation != null) {
        ItemSourceIF itemSource = cBuilder.createItemSource(rssItem,
            sourceName, sourceLocation, sourceTimestamp);
        rssItem.setSource(itemSource);
      }

      // comments element - use Annotation module
      Element elReference = item.getChild("reference", annotateNS);
      if (elReference != null) {
        Attribute resource = elReference.getAttribute("resource", ParserUtils
            .getNamespace(elReference, "rdf"));
        if (resource != null) {
          URL resourceURL = ParserUtils.getURL(resource.getValue());
          if (resourceURL != null) {
            rssItem.setComments(resourceURL);
          }
        }
      }

    }

    // image element
    Element image = root.getChild("image", defNS);
    if (image != null) {

      ParserUtils.matchCaseOfChildren(image, new String[] { "title", "url",
          "link", "width", "height", "description" });

      ImageIF rssImage = cBuilder.createImage(image.getChildTextTrim("title",
          defNS), ParserUtils.getURL(image.getChildTextTrim("url", defNS)),
          ParserUtils.getURL(image.getChildTextTrim("link", defNS)));
      Element imgWidth = image.getChild("width", defNS);
      if (imgWidth != null) {
        try {
          rssImage.setWidth(Integer.parseInt(imgWidth.getTextTrim()));
        } catch (NumberFormatException e) {
          logger.warn(e);
        }
      }
      Element imgHeight = image.getChild("height", defNS);
      if (imgHeight != null) {
        try {
          rssImage.setHeight(Integer.parseInt(imgHeight.getTextTrim()));
        } catch (NumberFormatException e) {
          logger.warn(e);
        }
      }
      Element imgDescr = image.getChild("description", defNS);
      if (imgDescr != null) {
        rssImage.setDescription(imgDescr.getTextTrim());
      }
      chnl.setImage(rssImage);
    }

    // textinput element
    Element txtinp = root.getChild("textinput", defNS);
    if (txtinp != null) {

      ParserUtils.matchCaseOfChildren(image, new String[] { "title",
          "description", "name", "link" });

      String tiTitle = null;
      if (txtinp.getChild("title", defNS) != null) {
        tiTitle = txtinp.getChild("title", defNS).getTextTrim();
      }
      String tiDescr = null;
      if (txtinp.getChild("description", defNS) != null) {
        tiDescr = txtinp.getChild("description", defNS).getTextTrim();
      }
      String tiName = null;
      if (txtinp.getChild("name", defNS) != null) {
        tiName = txtinp.getChild("name", defNS).getTextTrim();
      }
      URL tiLink = null;
      if (txtinp.getChild("link", defNS) != null) {
        tiLink = ParserUtils.getURL(txtinp.getChild("link", defNS)
            .getTextTrim());
      }
      TextInputIF rssTextInput = cBuilder.createTextInput(tiTitle, tiDescr,
          tiName, tiLink);
      chnl.setTextInput(rssTextInput);
    }

    chnl.setLastUpdated(dateParsed);

    return chnl;
  }

  /**
   * Returns the TTL value corresponding to updatePeriod and updateFrequency.
   * @param updatePeriod Channel update period.
   * @param updateFrequency the update frequency.
   * @return the TTL value.
   */
  private int getTTL(final ChannelUpdatePeriod updatePeriod, int updateFrequency) {

    int minutes;
    if (updatePeriod != null) {
      minutes = updatePeriod.getMinutesInPeriod();
    } else {
      minutes = 24 * 60;
    }

    return updateFrequency == 0 ? minutes : minutes / updateFrequency;
  }
}
TOP

Related Classes of de.nava.informa.parsers.RSS_1_0_Parser$RSS_1_0_ParserHolder

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.