Package com.gnizr.core.robot.rss

Source Code of com.gnizr.core.robot.rss.DefaultFeedCrawler

/*
* gnizr is a trademark of Image Matters LLC in the United States.
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either expressed or implied. See the License
* for the specific language governing rights and limitations under the License.
*
* The Initial Contributor of the Original Code is Image Matters LLC.
* Portions created by the Initial Contributor are Copyright (C) 2007
* Image Matters LLC. All Rights Reserved.
*/
package com.gnizr.core.robot.rss;

import java.net.URL;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import org.apache.log4j.Logger;

import com.gnizr.core.util.GnizrDaoUtil;
import com.gnizr.db.dao.FeedSubscription;
import com.sun.syndication.feed.synd.SyndEntry;
import com.sun.syndication.feed.synd.SyndFeed;
import com.sun.syndication.io.SyndFeedInput;
import com.sun.syndication.io.XmlReader;

public class DefaultFeedCrawler implements FeedCrawler{

  private static final Logger logger = Logger.getLogger(DefaultFeedCrawler.class);
 
  private BookmarkEntryFactory entryFactory;
 
  public DefaultFeedCrawler(){
    entryFactory = new BookmarkEntryFactory();
  }
 
  public DefaultFeedCrawler(BookmarkEntryFactory factory){
    this.entryFactory = factory;
  }
 
  public FeedCrawlResult doCrawl(FeedSubscription feed) {
    logger.info("Trying to crawl: " + feed.getBookmark().getLink().getUrl());
    List<BookmarkEntry> entries = null;
    Date feedLastUpdated = GnizrDaoUtil.getNow();
    Date feedPubDate = null;
    String feedUrl = feed.getBookmark().getLink().getUrl();
    SyndFeed syndFeed = fetchFeed(feedUrl);
    if (syndFeed != null) {
      logger.info("Successfully fetched: " + feedUrl);
      try {
        if(syndFeed.getPublishedDate() != null){       
          feedPubDate = syndFeed.getPublishedDate();
        }else{
          feedPubDate = feedLastUpdated;
        }
        if(feed.getLastSync() == null || feed.getPubDate() == null){
          logger.info("doCrawl for the very first time: " + feedUrl);
          entries = processSyndFeed(syndFeed, feed);
        }else if(feed.getPubDate() != null){                   
          if(feedPubDate.after(feed.getPubDate())){
            logger.info("doCrawl because remote server pubDate is newer: " + feedUrl + ". Recorded PubDate="
                +feed.getPubDate() + " Server PubDate="+feedPubDate);
            entries = processSyndFeed(syndFeed, feed);
          }else{
            logger.info("doCrawl skip because pubDate is up-to-date: " + ". Recorded PubDate="
                +feed.getPubDate() + " Server PubDate="+feedPubDate);
          }
        }
      } catch (Exception e) {
        logger.error("Error processing feed: "
            + feed.getBookmark().getLink().getUrl(), e);
      }
    }else{
      logger.debug("RSS feed doesn't seem to be valid: " + feed.getBookmark().getLink().getUrl());
    }
    if(entries != null){
      logger.info("Total number of BookmarkEntry created: " + entries.size());
    }
    return new FeedCrawlResult(entries,feedLastUpdated,feedPubDate);
  }
 
  private SyndFeed fetchFeed(String feedUrl){   
    SyndFeed feed = null;
    try{
      SyndFeedInput input = new SyndFeedInput();
      feed = input.build(new XmlReader(new URL(feedUrl)));
    }catch(Exception e){
      logger.error("error reading feed: " + feedUrl);
      logger.debug("error reading feed exception: " + feedUrl,e);
    }   
    return feed;
  }

  @SuppressWarnings("unchecked")
  private List<BookmarkEntry> processSyndFeed(SyndFeed syndFeed, FeedSubscription fromSubs){ 
    List<BookmarkEntry> entries = new ArrayList<BookmarkEntry>();
    List<SyndEntry> syndEntries = syndFeed.getEntries();
    for(SyndEntry e : syndEntries){
      logger.debug("Creating BookmarkEntry for SyndEntry: " + e.getTitle());
      BookmarkEntry newEntry = entryFactory.createEntry(e, syndFeed, fromSubs);
      if(newEntry != null){
        entries.add(newEntry);
      }
    }
    return entries;
  }
}
TOP

Related Classes of com.gnizr.core.robot.rss.DefaultFeedCrawler

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.