Package com.substanceofcode.rssreader.businesslogic

Source Code of com.substanceofcode.rssreader.businesslogic.HTMLAutoLinkParser

/*
* HTMLAutoLinkParser.java
*
* Copyright (C) 2007-2008 Tommi Laukkanen
* Copyright (C) 2007-2008 Irving Bunton
* http://www.substanceofcode.com
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*
*/

// Expand to define memory size define
@DMEMSIZEDEF@
// Expand to define logging define
@DLOGDEF@

/* This functionality adds to jar size, so don't do it for small memory */
/* devices. */
//#ifndef DSMALLMEM
package com.substanceofcode.rssreader.businesslogic;

import com.substanceofcode.rssreader.businessentities.RssItunesFeed;
import com.substanceofcode.utils.HTMLParser;
import com.substanceofcode.utils.XmlParser;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Vector;

import com.substanceofcode.utils.EncodingUtil;
import com.substanceofcode.utils.StringUtil;
import com.substanceofcode.utils.CauseException;
import com.substanceofcode.utils.CauseMemoryException;
//#ifdef DLOGGING
import net.sf.jlogmicro.util.logging.Logger;
import net.sf.jlogmicro.util.logging.LogManager;
import net.sf.jlogmicro.util.logging.Level;
//#endif

/**
* HTMLAutoLinkParser class is used when we are parsing RSS feed list
* using HTML autolinks <link href="link" title="Name"/>.
* These have type with application/atom or rss.
*
* @author Irving Bunton
*/
public class HTMLAutoLinkParser extends FeedListParser {
   
  boolean m_needRss = true;
  boolean m_needFirstRss = false;
  //#ifdef DLOGGING
    private Logger logger = Logger.getLogger("HTMLAutoLinkParser");
    private boolean fineLoggable = logger.isLoggable(Level.FINE);
    private boolean finerLoggable = logger.isLoggable(Level.FINER);
    private boolean finestLoggable = logger.isLoggable(Level.FINEST);
  //#endif

    /** Creates a new instance of HTMLAutoLinkParser */
    public HTMLAutoLinkParser(String url, String username, String password) {
        super(url, username, password);
    }

    public RssItunesFeed[] parseFeeds(InputStream is)
    throws IOException, CauseMemoryException, CauseException, Exception {
    // Init in case we get a severe error.
    try {
      return HTMLAutoLinkParser.parseFeeds(new EncodingUtil(is),
                      m_url,
                      m_needRss,
                      m_needFirstRss,
                      m_feedNameFilter,
                      m_feedURLFilter
                      //#ifdef DLOGGING
                      ,logger
                      ,fineLoggable
                      ,finerLoggable
                      ,finestLoggable
                      //#endif
                      );
        } catch (CauseException ex) {
      throw ex;
    } catch (Throwable t) {
      CauseException cex = new CauseException(
          "Error while parsing HTML auto link feed " + m_url, t);
//#ifdef DLOGGING
      logger.severe(cex.getMessage(), cex);
//#endif
      System.out.println(cex.getMessage() + " " + t + " " + t.getMessage());
      throw cex;
    }
  }
       
  // Parse feeds.  Allow null title.
    static public RssItunesFeed[] parseFeeds(EncodingUtil encodingUtil,
                    String url,
                    boolean needRss,
                    boolean needFirstRss,
                    String feedNameFilter,
                    String feedURLFilter
                    //#ifdef DLOGGING
                    ,Logger logger,
                     boolean fineLoggable,
                     boolean finerLoggable,
                     boolean finestLoggable
                    //#endif
                                 )
    throws IOException, CauseMemoryException, CauseException, Exception {
        /** Initialize item collection */
        Vector rssFeeds = new Vector();
       
        /** Initialize XML parser and parse OPML XML */
        HTMLParser parser = new HTMLParser(encodingUtil);
        try {
           
      // The first element is the main tag.
            int elementType = parser.parse();
      // If we found the prologue, get the next entry.
      if( elementType == XmlParser.PROLOGUE ) {
        elementType = parser.parse();
      }
      if (elementType == XmlParser.END_DOCUMENT ) {
        return null;
      }
           
      boolean windows = parser.isWindows();
      boolean utf = parser.isUtf();
      boolean process = true;
      boolean bodyFound = false;
            do {
        /** RSS item properties */
        String title = "";
        String link = "";
                       
        String tagName = parser.getName();
        //#ifdef DLOGGING
        if (finerLoggable) {logger.finer("tagname: " + tagName);}
        //#endif
        switch (tagName.charAt(0)) {
          case 'b':
          case 'B':
            if (bodyFound) {
              continue;
            }
            bodyFound = parser.isBodyFound();
            if (bodyFound) {
              windows = parser.isWindows();
              utf = parser.isUtf();
            }
            // If looking for OPML link, it is in header.
            if ((!needRss || needFirstRss) && bodyFound) {
              process = false;
              break;
            }
            break;
          case 'l':
          case 'L':
            if (!tagName.toLowerCase().equals("link")) {
              break;
            }
            //#ifdef DLOGGING
            if (finerLoggable) {logger.finer("Parsing <link> tag");}
            //#endif
           
            // TODO base
            String type = parser.getAttributeValue( "type" );
            if (type == null) {
              continue;
            }
            if (!needRss && (type.toLowerCase().indexOf("opml") < 0)) {
              continue;
            }
            if (needRss &&
                ((type.toLowerCase().indexOf("rss") < 0) &&
                (type.toLowerCase().indexOf("atom") < 0))) {
              continue;
            }
            title = parser.getAttributeValue( "title" );
            // Allow null title so that the caller can
            // check if it needs to get the title another way.
            if (title != null) {
              title = EncodingUtil.replaceAlphaEntities(true,
                  title);
              title = EncodingUtil.replaceNumEntity(title);
              // Replace special chars like left quote, etc.
              // Since we have already converted to unicode, we want
              // to replace with uni chars.
              title = encodingUtil.replaceSpChars(title);

              title = StringUtil.removeHtml(title);
            }
            if (((link = parser.getAttributeValue( "href" ))
                  == null) || ( link.length() == 0 )) {
              continue;
            }
            if (link.charAt(0) == '/') {
              link = url + link;
            }
           
            /** Debugging information */
            System.out.println("Title:       " + title);
            System.out.println("Link:        " + link);
           
            /**
             * Create new RSS item and add it do RSS document's item
             * collection.  Account for wrong OPML which is an
             * OPML composed of other OPML.  These have url attribute
             * instead of link attribute.
             */
            if (!needRss || needFirstRss) {
              RssItunesFeed feed = new RssItunesFeed(title, link, "", "");
              rssFeeds.addElement( feed );
              process = false;
              break;
            }
            if (( feedURLFilter != null) &&
              ( link.toLowerCase().indexOf(feedURLFilter) < 0)) {
              continue;
            }
            if (( feedNameFilter != null) &&
              ((title != null) &&
              (title.toLowerCase().indexOf(feedNameFilter) < 0))) {
              continue;
            }
            RssItunesFeed feed = new RssItunesFeed(title, link, "", "");
            rssFeeds.addElement( feed );
            break;
          default:
        }
      }
            while( process && (parser.parse() != XmlParser.END_DOCUMENT) );
           
        } catch (CauseMemoryException ex) {
      CauseMemoryException cex = new CauseMemoryException(
          "Out of memory error while parsing HTML auto link feed " +
          url, ex);
      throw cex;
        } catch (Exception ex) {
      CauseException cex = new CauseException(
          "Error while parsing HTML auto link feed " + url, ex);
//#ifdef DLOGGING
      logger.severe(cex.getMessage(), cex);
//#endif
      System.err.println(cex.getMessage() + " " + ex + " " + ex.getMessage());
      ex.printStackTrace();
      throw cex;
        } catch (Throwable t) {
      CauseException cex = new CauseException(
          "Error while parsing HTML auto link feed " + url, t);
//#ifdef DLOGGING
      logger.severe(cex.getMessage(), cex);
//#endif
      System.err.println(cex.getMessage() + " " + t + " " + t.getMessage());
      t.printStackTrace();
      throw cex;
        }
       
        /** Create array */
        RssItunesFeed[] feeds = new RssItunesFeed[ rssFeeds.size() ];
        if (feeds.length > 0) {
      rssFeeds.copyInto(feeds);
    }
        return feeds;
    }
   
    public void setNeedRss(boolean needRss) {
        this.m_needRss = needRss;
    }

    public boolean isNeedRss() {
        return (m_needRss);
    }

}
//#endif
TOP

Related Classes of com.substanceofcode.rssreader.businesslogic.HTMLAutoLinkParser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.