Source Code of churchillobjects.rss4j.parser.RssParser

/*
 *  Copyright (c) 1999-2002 ChurchillObjects.com  All rights reserved.
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions are
 *  met: Redistributions of source code must retain the above copyright notice,
 *  this list of conditions and the following disclaimer. Redistributions in
 *  binary form must reproduce the above copyright notice, this list of
 *  conditions and the following disclaimer in the documentation and/or other
 *  materials provided with the distribution. Neither the name of the copyright
 *  holder nor the names of its contributors may be used to endorse or promote
 *  products derived from this software without specific prior written
 *  permission.
 *
 *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 *  ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
 *  ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 *  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 *  OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 *  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 *  LIABILITY, OR TORT, INCLUDING NEGLIGENCE OR OTHERWISE, ARISING IN ANY WAY
 *  OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
 *  DAMAGE.
 *
 */


package churchillobjects.rss4j.parser;


import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.util.Vector;
//import org.apache.xerces.parsers.SAXParser;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import churchillobjects.rss4j.RssChannel;
import churchillobjects.rss4j.RssChannelImage;
import churchillobjects.rss4j.RssChannelItem;
import churchillobjects.rss4j.RssChannelTextInput;
import churchillobjects.rss4j.RssDocument;
import churchillobjects.rss4j.model.RssNamespace;
import churchillobjects.rss4j.model.RssVersion;
import org.apache.xerces.parsers.SAXParser;




/**
 * This parser is really a 'bootstrap' parser, whose only real purpose
 * is to identify what version the RSS document is and then instantiate
 * and pass processing along to a parser appropriate to that version. Its
 * functionality is available to client code only through its set of public
 * static parse methods, each of which takes a different variety of input and
 * converts it to an InputSource for the SAX handler.
 */
public class RssParser extends DefaultHandler implements RssVersion{


  /**
   * The xml reader object
   */
  private XMLReader parser;


  /**
   * The main document object to be used as the parser runs through its events.
   */
  protected RssDocument document;


  /**
   * Holds the current channel object while the parser executes.
   */
  protected RssChannel currentChannel;


  /**
   * Holds the current item object while the parser executes.
   */
  protected RssChannelItem currentItem;


  /**
   * Holds the current text input object while the parser executes.
   */
  protected RssChannelTextInput currentTextInput;


  /**
   * Holds the current image object while the parser executes.
   */
  protected RssChannelImage currentImage;


  /**
   * Holds a buffer of characters for the current element.
   */
  private StringBuffer characters;


  /**
   * Holds the namespaces that were found in the document.
   */
  protected Vector namespaces;


  /**
   * Invokes a parse of an RSS document contained in a String as unicode text.
   * An input source will be created from a string reader based off of that
   * string which will be used by the xml parser.
   * @param rss
   * @return
   * @throws RssParseException
   */
  public static RssDocument parseRss(String rss) throws RssParseException{
    StringReader sr = new StringReader(rss);
    InputSource inputSource = new InputSource(sr);
    RssParser parser = new RssParser();
    return parser.parseRss(inputSource);
  }


  /**
   * Invokes a parse of an RSS document made available from an input stream.
   * An input source will be created from the input stream which will be used
   * by the xml parser.
   * @param is
   * @return
   * @throws RssParseException
   */
  public static RssDocument parseRss(InputStream is) throws RssParseException{
    InputSource inputSource = new InputSource(is);
    RssParser parser = new RssParser();
    return parser.parseRss(inputSource);
  }


  /**
   * Invokes a parse of an RSS document made available from a reader object.
   * An input source will be created from the reader which will be used
   * by the xml parser.
   * @param r
   * @return
   * @throws RssParseException
   */
  public static RssDocument parseRss(Reader r) throws RssParseException{
    InputSource inputSource = new InputSource(r);
    RssParser parser = new RssParser();
    return parser.parseRss(inputSource);
  }


  /**
   * Invokes a parse of an RSS document made available from a file object.
   * An input source will be created from in input stream off of the file
   * which will be used by the xml parser.
   * @param file
   * @return
   * @throws RssParseException
   */
  public static RssDocument parseRss(File file) throws RssParseException{
    FileInputStream fis;
    try{
      fis = new FileInputStream(file);
    }
    catch(FileNotFoundException e){
      throw new RssParseException(e);
    }
    InputSource inputSource = new InputSource(fis);
    RssParser parser = new RssParser();
    return parser.parseRss(inputSource);
  }


  /**
   * Blank constructor, used by the static parse methods before the version of
   * the document is known.
   */
  RssParser(){
    namespaces = new Vector();
  }


  /**
   * In-package constructor, used by subclasses to set their already set-up
   * document and namespace objects.
   * @param document
   * @param namespaces
   */
  RssParser(RssDocument document, Vector namespaces){
    this.document = document;
    this.namespaces = namespaces;
  }


  /**
   * Starts the parsing of a document from the specified input source. Through
   * the SAX events, the RSS document object model is created and returned.
   * If something should go wrong, then the exception is caught and thrown as
   * an RssParseException, unless of course it is a SAXException wrapping an
   * RssParseException, in which case the RssParseException will be unwrapped
   * and rethrown. Either way, the client code only needs to worry about the
   * RssParseException.
   * @param is The input source to parse.
   * @return The RssDocument object structure.
   * @throws RssParseException
   */
  RssDocument parseRss(InputSource is) throws RssParseException{
    document = new RssDocument();
    namespaces = new Vector();
    characters = new StringBuffer();
    parser = new SAXParser();
    parser.setContentHandler(this);
    parser.setErrorHandler(this);
    try{
      parser.parse(is);
    }
    catch(SAXException e){
      cleanUp();
      if(e.getException() instanceof RssParseException){
        throw (RssParseException)e.getException();
      }
      else{
        throw new RssParseException(e.getException());
      }
    }
    catch(IOException e){
      cleanUp();
      throw new RssParseException(e);
    }
    // local reference to the doc that will be returned, before cleanup
    RssDocument returnDoc = document;
    // release all local objects for gc
    cleanUp();
    return returnDoc;
  }


  /**
   * Releases local attributes of the parser for gc
   */
  protected void cleanUp(){
    parser = null;
    document = null;
    currentChannel = null;
    currentItem = null;
    currentTextInput = null;
    currentImage = null;
    characters = null;
    namespaces = null;
  }


  /**
   * SAX event. Signifies that a prefix mapping has been encountered. This is used to identify
   * RSS document versions .90 and 1.0.
   * @param prefix
   * @param uri
   * @throws SAXException
   */
  public void startPrefixMapping(String prefix, String uri) throws SAXException{
    namespaces.addElement(new RssNamespace(prefix, uri));
    if(prefix.length()==0){
      if("http://my.netscape.com/rdf/simple/0.9/".equals(uri)){
        document.setVersion(VERSION_90);
        DefaultHandler handler = new RssParserImpl090(document, namespaces);
        parser.setContentHandler(handler);
        parser.setErrorHandler(handler);
      }
      else if("http://purl.org/rss/1.0/".equals(uri)){
        document.setVersion(VERSION_10);
        DefaultHandler handler = new RssParserImpl100(document, namespaces);
        parser.setContentHandler(handler);
        parser.setErrorHandler(handler);
      }
      else{
        throw new SAXException(new RssParseException("Could not determine the RSS version from namespace: " + uri));
      }
    }
  }


  /**
   * SAX event. Signifies the start of an element. This method is only used
   * to get the version of the RSS document being parsed; subclassed events
   * will handle the data values and add them to the RSS document object model.
   * @param uri
   * @param name
   * @param qName
   * @param atts
   */
  public void startElement(String uri, String name, String qName, Attributes atts) throws SAXException{
    if(name.equals("rss")){
      String version = atts.getValue("version");
      if(version.equals("0.91")){
        document.setVersion(VERSION_91);
        DefaultHandler handler = new RssParserImpl091(document, null);
        parser.setContentHandler(handler);
        parser.setErrorHandler(handler);
      }
      else if(version.equals("0.92")){
        throw new SAXException(new RssParseException("RSS 0.92 not supported by this toolkit"));
      }
      else{
        throw new SAXException(new RssParseException("Could not determine the RSS version: " + version));
      }
    }
    if(name.equals("channel")){
      // we've gone too far and not determined the version...
      throw new SAXException(new RssParseException("Could not determine the RSS version of this document."));
    }
  }


  /**
   * SAX event. Adds characters to the string buffer. This may be called several
   * times for an element even though only a few characters are present.
   * @param ch
   * @param start
   * @param length
   */
  public void characters(char ch[], int start, int length){
    if(characters==null){
      characters = new StringBuffer();
    }
    characters.append(ch, start, length);
  }


  /**
   * Returns the characters string buffer as a string, then resets it
   * for the next element. This is a read-once value, then the value is gone.
   * @return
   */
  protected String getChars(){
    if(characters==null){
      return "";
    }
    String s = characters.toString().trim();
    resetChars();
    return s;
  }


  /**
   * Indicates that characters are available. This is different from getChars
   * because it does not clear the string buffer.
   * @return
   */
  protected boolean hasChars(){
    if(characters==null){
      return false;
    }
    return characters.toString().trim().length() > 0;
  }


  /**
   * Resets the characters string buffer. This is done after parsing an
   * element so that tht next element gets a clean buffer to add to.
   */
  protected void resetChars(){
    characters = null;
  }


  /**
   * Returns the value of the attribute if it is found in the hashmap.
   * If not, then returns null.
   * @param attrs
   * @param name
   * @return
   */
  protected String getAttribute(Attributes attrs, String name){
    for(int i=0;i<attrs.getLength();i++){
      String qname = attrs.getQName(i);
      if(qname.equals(name)){
        return attrs.getValue(i);
      }
    }
    return null;
  }


}
Source Code of churchillobjects.rss4j.parser.RssParser

Related Classes of churchillobjects.rss4j.parser.RssParser