Package com.substanceofcode.rssreader.businesslogic

Source Code of com.substanceofcode.rssreader.businesslogic.RssFormatParser

/*
   TODO remove extra spaces between description item 7 of imbed
* RssFormatParser.java
*
* Copyright (C) 2005-2006 Tommi Laukkanen
* http://www.substanceofcode.com
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*
*/

// Expand to define logging define
//#define DNOLOGGING
package com.substanceofcode.rssreader.businesslogic;
//#ifdef DLOGGING
//@import net.sf.jlogmicro.util.logging.Logger;
//@import net.sf.jlogmicro.util.logging.Level;
//#endif

import com.substanceofcode.rssreader.businessentities.RssItunesFeed;
import com.substanceofcode.rssreader.businessentities.RssItunesItem;
import com.substanceofcode.utils.StringUtil;
import com.substanceofcode.utils.EncodingUtil;
import com.substanceofcode.utils.XmlParser;
import com.substanceofcode.utils.CauseException;
import java.io.IOException;
import java.io.InputStream;
import java.util.Calendar;
import java.util.Date;
import java.util.Vector;

/**
*
* @author Tommi
*/
public class RssFormatParser implements FeedFormatParser {
   
  //#ifdef DLOGGING
//@    private Logger logger = Logger.getLogger("RssFormatParser");
  //#endif
  /** RSS item properties */
  private boolean m_hasExt = false;
  //#ifdef DLOGGING
//@    private boolean fineLoggable = logger.isLoggable(Level.FINE);
//@    private boolean finestLoggable = logger.isLoggable(Level.FINEST);
  //#endif
  private boolean m_convXmlEnts;
  private String m_title = "";
  private String m_author = "";
  private String m_description = "";
  private String m_link = "";
  private String m_language = "";
  private String m_date = "";
  private String m_enclosure = "";
  private ExtParser m_extParser;

    /** Creates a new instance of RssFormatParser */
    public RssFormatParser() {
    }
   
    public RssItunesFeed parse(XmlParser parser, RssItunesFeed cfeed,
                 
                 final boolean convXmlEnts,
                 final int maxItemCount, boolean getTitleOnly)
  throws IOException, CauseException {
       
        Vector items = new Vector();
    m_extParser = new ExtParser(convXmlEnts);
    m_extParser.parseNamespaces(parser);
    m_hasExt = m_extParser.isHasExt();
    m_convXmlEnts = convXmlEnts;
    RssItunesFeed feed = cfeed;
        feed.setItems(items);
       
        /** Parse to first entry element */
        while(!parser.getName().equals("item")) {
            switch (parser.parse()) {
        case XmlParser.END_DOCUMENT:
          System.out.println("No entries found.");
          return feed;
        case XmlParser.ELEMENT:
          String elementName = parser.getName();
          if (elementName.length() == 0) {
            continue;
          }
          char elemChar = elementName.charAt(0);
          if (parseCommon(parser, elemChar, elementName)) {
            if ((elemChar == 't') &&
                getTitleOnly && elementName.equals("title") ) {
              feed.setName(m_title);
              return feed;
            }
            continue;
          }
          switch (elemChar) {
            //#ifdef DITUNES
//@            case 'l':
//@               if (elementName.equals("language")) {
//@                 m_language = parser.getText();
                 //#ifdef DLOGGING
//@                 if (finestLoggable) {logger.finest("m_language=" + m_language);}
                 //#endif
//@                 continue;
//@               }
//@               break;
            //#endif
            case 'i':
               if (elementName.equals("image")) {
                 // Skip image text as it includes link
                 // and title.
                 String itext = parser.getText(false);
                 //#ifdef DLOGGING
//@                 if (finestLoggable) {logger.finest("image=" + itext);}
                 //#endif
                 continue;
               }
               break;
            default:
               break;
          }
          if (m_hasExt) {
            m_extParser.parseExtItem(parser, elemChar, elementName);
          }
          break;
        default:
          break;
            }
        }
    feed.setLink(m_link);
    if (m_date.length() > 0) {
      Date pubDate = parseRssDate(m_date);
      feed.setDate(pubDate);
    } else {
      feed.setDate(null);
    }
    if (m_extParser.isItunes()) {
      feed = m_extParser.getFeedInstance(feed, m_language, m_title,
          m_description);
    }
       
    reset();

    /** Parse next element */           
        int parsingResult;
        while( (parsingResult = parser.parse()) !=XmlParser.END_DOCUMENT ) {
            String elementName = parser.getName();
            if (elementName.length() == 0) {
        continue;
      }
           
      char elemChar = elementName.charAt(0);
            switch (elemChar) {
        case 'i':
          if (elementName.equals("item") ) {
            /** Save previous entry */
            RssItunesItem item = createItem();
            if ( item != null) {
              items.addElement( item );
              if(items.size()==maxItemCount) {
                return feed;
              }
            }

            /** New entry */
            /** reset */
            reset();
            continue;
          }
          break;
        case 't':
          // Textinput has required sub element description.
          // We don't want the overriding description.
          if (elementName.equals("textinput") ) {
            String textData = parser.getText(false);
            //#ifdef DLOGGING
//@            if (finestLoggable) {logger.finest("skipping textinput data=" + textData);}
            //#endif
            continue;
          }
          break;
        default:
      }
      parseItem(parser, elemChar, elementName);
           
        }

        /** Save previous entry */
    RssItunesItem item = createItem();
    if ( item != null) {
            items.addElement( item );
        }       
                       
    return feed;
    }
   
  /** Save previous entry */
  final private RssItunesItem createItem() {
    boolean hasTitle = (m_title.length()>0);
    boolean hasDesc = (m_description.length()>0);
    if(hasTitle || hasDesc) {
      if (hasTitle && hasDesc) {
        m_title = m_title.replace('\n', ' ');
        // If we were not converting HTML, do so now since the
        // title should not have HTML.
        if (!m_convXmlEnts) {
          m_title = EncodingUtil.replaceAlphaEntities(
              true, m_title );
          m_title = StringUtil.removeHtml( m_title );
        }
      }
      Date pubDate = null;
      // Check date in case we cannot find it.
      if ((m_date.length() == 0) &&
          m_extParser.isHasExt()) {
        m_date = m_extParser.getDate();
      }
      if (m_date.length() > 0) {
        pubDate = parseRssDate(m_date);
      }
      RssItunesItem item;
      if (m_hasExt) {
        item = m_extParser.createItem(m_title, m_link,
            m_description, pubDate, m_enclosure, true,
            m_author);
      } else {
        item = new RssItunesItem(m_title, m_link,
            m_description, pubDate,
            m_enclosure, true);
      }
      return item;
    }
    return null;
  }

  private void reset() {
    m_title = "";
    m_author = "";
    m_description = "";
    m_link = "";
    m_language = "";
    m_date = "";
    m_enclosure = "";
    if (m_hasExt) {
      m_extParser.reset();
    }
  }

  /* Parse the fields common to feed and item. */
  private boolean parseCommon(XmlParser parser, char elemChar,
      String elementName)
  throws IOException, CauseException {
    switch (elemChar) {
      case 'p':
        if( elementName.equals("pubDate")) {
          m_date = parser.getText();
          //#ifdef DLOGGING
//@          if (finestLoggable) {logger.finest("m_date=" + m_date);}
          //#endif
          return true;
        }
        break;
      case 't':
        if( elementName.equals("title") ) {
          m_title = parser.getText(m_convXmlEnts);
          //#ifdef DLOGGING
//@          if (finestLoggable) {logger.finest("m_title=" + m_title);}
          //#endif
          return true;
        }
        break;
      case 'd':
        if( elementName.equals("description")) {
          m_description = parser.getText(m_convXmlEnts);
          if (m_convXmlEnts) {
            m_description = StringUtil.removeHtml( m_description );
          }
          //#ifdef DLOGGING
//@          if (finestLoggable) {logger.finest("m_description=" + m_description);}
          //#endif
          return true;
        }
        break;
      case 'l':
        if( elementName.equals("link") ) {
          m_link = parser.getText();
          m_link = StringUtil.removeHtml( m_link );
          //#ifdef DLOGGING
//@          if (finestLoggable) {logger.finest("m_link=" + m_link);}
          //#endif
          return true;
        }
        break;
      default:
    }
    return false;
  }

  /* Parse the item to get it's fields */
  void parseItem(XmlParser parser, char elemChar, String elementName)
  throws IOException, CauseException {
    switch (elemChar) {
      //#ifdef DITUNES
//@      case 'a':
//@        if( elementName.equals("author")) {
//@          m_author = parser.getText(m_convXmlEnts);
//@          return;
//@        }
//@        break;
      //#endif
      case 'e':
        if( elementName.equals("enclosure") ) {
          String cenclosure = parser.getAttributeValue("url");
          if (cenclosure != null) {
            m_enclosure = cenclosure;
            return;
          }
          return;
        }
        break;
      default:
    }
    if (parseCommon(parser, elemChar, elementName)) {
      return;
    }
    if (m_hasExt) {
      m_extParser.parseExtItem(parser, elemChar, elementName);
    }
  }

  /** Get calendar date. **/
  public static Date getCal(int dayOfMonth, int month, int year, int hours,
                 int minutes, int seconds) throws Exception {
    // Create calendar object from date values
    Calendar cal = Calendar.getInstance();
    cal.set(Calendar.DAY_OF_MONTH, dayOfMonth);
    cal.set(Calendar.MONTH, month);
    cal.set(Calendar.YEAR, year);
    cal.set(Calendar.HOUR_OF_DAY, hours);
    cal.set(Calendar.MINUTE, minutes);
    cal.set(Calendar.SECOND, seconds);
   
    return cal.getTime();
  }

  /**  Parse the standard RSS date and Dublin Core (dc) date. */
  static Date parseRssDate(String date) {
    Date pubDate = null;
    date = date.trim();
    int dpos = date.indexOf('-', 2);
    if ((dpos > 0) && (date.indexOf('-', dpos + 1) > 0)) {
      pubDate = parseDcDate(date);
    } else {
      pubDate = parseStdDate(date);
    }
    //#ifdef DLOGGING
//@    Logger logger = Logger.getLogger("RssFormatParser");
//@    logger.finest("date,pubDate=" + date + "," + pubDate);
    //#endif
    return pubDate;
  }

    /**
     * Parse RSS date format to Date object.
     * Example of RSS date:
     * Sat, 23 Sep 2006 22:25:11 +0000
     */
    public static Date parseStdDate(String dateString) {
        Date pubDate = null;
        try {
            // Split date string to values
            // 0 = week day
            // 1 = day of month
            // 2 = month
            // 3 = year (could be with either 4 or 2 digits)
            // 4 = time
            // 5 = GMT
            int weekDayIndex = 0;
            int dayOfMonthIndex = 1;
            int monthIndex = 2;
            int yearIndex = 3;
            int timeIndex = 4;
            int gmtIndex = 5;
           
      int kc = 0;
            while ((dateString.indexOf("  ") >= 0) &&
          (kc++ < dateString.length())) {
        dateString = StringUtil.replace(dateString, "  ", " ");
      }
            String[] values = StringUtil.split(dateString, ' ');
            int columnCount = values.length;
            if( columnCount==5 ) {
                // Expected format:
                // 09 Nov 2006 23:18:49 EST
                dayOfMonthIndex = 0;
                monthIndex = 1;
                yearIndex = 2;
                timeIndex = 3;
                gmtIndex = 4;
      } else if( columnCount==7 ) {
                // Expected format:
                // Thu, 19 Jul  2007 00:00:00 N
                yearIndex = 4;
                timeIndex = 5;
                gmtIndex = 6;
            } else if( columnCount<5 || columnCount>6 ) {
        //#ifdef DLOGGING
//@        Logger logger = Logger.getLogger("RssFormatParser");
//@        logger.warning("Invalid date format: " + dateString);
        //#endif
        for (int ic = 0; ic < dateString.length(); ic++) {
          System.out.println("date=" + ic + "," + dateString.charAt(ic) + "," + (int)dateString.charAt(ic));
        }
                throw new Exception("Invalid date format: " + dateString);
            }
           
            // Day of month
            int dayOfMonth = Integer.parseInt( values[ dayOfMonthIndex ] );
           
            // Month
            String[] months =  {
                "Jan", "Feb", "Mar", "Apr", "May", "Jun",
                "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
            String monthString = values[ monthIndex ];
            int month=0;
            for(int monthEnumIndex=0; monthEnumIndex<11; monthEnumIndex++) {
                if( monthString.equals( months[ monthEnumIndex ] )) {
                    month = monthEnumIndex;
                }
            }
           
            // Year
            int year = Integer.parseInt(values[ yearIndex ]);
            if(year<100) {
                year += 2000;
            }
           
            // Time
            String[] timeValues = StringUtil.split(values[ timeIndex ], ':');
            int hours = Integer.parseInt( timeValues[0] );
            int minutes = Integer.parseInt( timeValues[1] );
            int seconds = Integer.parseInt( timeValues[2] );
           
            pubDate = getCal(dayOfMonth, month, year, hours,
          minutes, seconds);
           
        } catch(Exception ex) {
            // TODO: Add exception handling code
            System.err.println("parseStdDate error while converting date string to object: " +
                    dateString + "," + ex.toString());
      //#ifdef DLOGGING
//@      Logger logger = Logger.getLogger("RssFormatParser");
//@      logger.severe("parseStdDate  error while converting date " +
//@               "string to object: " +
//@                    dateString, ex);
      //#endif
        } catch(Throwable t) {
            // TODO: Add exception handling code
            System.err.println("parseStdDate error while converting date string to object: " +
                    dateString + "," + t.toString());
      //#ifdef DLOGGING
//@      Logger logger = Logger.getLogger("RssFormatParser");
//@      logger.severe("parseStdDate  error while converting date " +
//@               "string to object: " +
//@                    dateString, t);
      //#endif
        }
        return pubDate;
    }
   
    /**
     * Parse RSS date dc:date or atom format to Date object.
     * Example of RSS dc:date:
     * 2007-07-31T02:02:00+00:00
   * atom date
     * [-]CCYY-MM-DDThh:mm:ss[Z|(+|-)hh:mm]
     */
    public static Date parseDcDate(String dateString) {
        Date pubDate = null;
        try {
            // Split date string to values
            // 0 = year (could be with either 4 or 2 digits)
            // 1 = month
            // 2 = day of month/time
            int yearIndex = 0;
            int monthIndex = 1;
            int dayOfMonthTimeIndex = 2;
      if (dateString.charAt(0) == '-') {
        dateString = dateString.substring(1);
      }
           
            String[] values = StringUtil.split(dateString, '-');

            if( values.length<3 ) {
                throw new Exception("Invalid date format: " + dateString);
            }
           
            int year = Integer.parseInt(values[ yearIndex ]);
           
            // Month
            int month = Integer.parseInt( values[ monthIndex ] );
           
            // Day of month
            String sdayOfMonth = values[ dayOfMonthTimeIndex ].substring(0, 2);

            int dayOfMonth = Integer.parseInt( sdayOfMonth );
           
            String time = values[ dayOfMonthTimeIndex ].substring(3);
            String [] timeValues = StringUtil.split(time, ':');

            int hours = Integer.parseInt( timeValues[0] );
            int minutes = Integer.parseInt( timeValues[1] );
            timeValues[2] = timeValues[2].substring( 0, 2 );
            int seconds = Integer.parseInt( timeValues[2] );
           
            pubDate = getCal(dayOfMonth, month - 1 + Calendar.JANUARY, year,
          hours, minutes, seconds);
           
        } catch(Exception ex) {
      //#ifdef DLOGGING
//@      Logger logger = Logger.getLogger("RssFormatParser");
//@      logger.warning("parseDcDate error while converting date " +
//@               "string to object: " +
//@                    dateString, ex);
      //#endif
            // TODO: Add exception handling code
            System.err.println("parseDcDate error while converting date string to object: " +
                    dateString + "," + ex.toString());
        } catch(Throwable t) {
      //#ifdef DLOGGING
//@      Logger logger = Logger.getLogger("RssFormatParser");
//@      logger.severe("parseDcDate error while converting date " +
//@               "string to object: " +
//@                    dateString, t);
      //#endif
            // TODO: Add exception handling code
            System.err.println("parseDcDate error while converting date string to object: " +
                    dateString + "," + t.toString());
        }
        return pubDate;
    }
   
}
TOP

Related Classes of com.substanceofcode.rssreader.businesslogic.RssFormatParser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.