Package talkfeed

Source Code of talkfeed.BlogManager

/*
Copyright 2010 - Jean-Baptiste Vovau

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
*/

package talkfeed;

import java.util.Calendar;
import java.util.Collection;
import java.util.Date;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;

import javax.jdo.PersistenceManager;
import javax.jdo.Query;

import talkfeed.QueuedTask.TaskType;
import talkfeed.cache.BlogCache;
import talkfeed.cache.SubscriptionCache;
import talkfeed.cache.UserPresence;
import talkfeed.data.Blog;
import talkfeed.data.BlogEntry;
import talkfeed.data.DataManager;
import talkfeed.data.DataManagerFactory;
import talkfeed.data.Subscription;
import talkfeed.feed.Channel;
import talkfeed.feed.FeedItem;
import talkfeed.feed.FeedManager;
import talkfeed.utils.DocumentLoader;
import talkfeed.utils.TextTools;

/**
* Managing web sources updates.
*
* @author JBVovau
*
*/
public final class BlogManager {

  private static final int MIN_INTERVAL = 60 ; //1 hour
  private static final int MAX_INTERVAL = (60 * 4); //4 days
 
  /**
   * Get instance of BlogManager
   * @return
   */
  public static BlogManager getInstance() {
    return new BlogManager();
  }

  /** No public instance */
  private BlogManager() {
  }

  /**
   * get or create web source from given link (could be RSS, Atom feed or URL)
   */
  public Blog getOrCreateSource(String link) {
   
    Date now = Calendar.getInstance().getTime();
   
    //long time
    Calendar calLongTime = Calendar.getInstance();
    calLongTime.set(Calendar.YEAR, 1900);
   
    //prepare link
    link = TextTools.purgeLink(link);
   
    DataManager dm = DataManagerFactory.getInstance();
    PersistenceManager pm = dm.newPersistenceManager();
   
    //find if blog already exists in database
    Blog blog = dm.getBlogFromLink(pm , link);

    if (blog == null) {
      //blog does not exist : load content to parse it
      String content = DocumentLoader.loadPage(link);
      String rss = null;
     
      //test if given link is already RSS or Atom FEED
      if (FeedManager.isFeed(content)){
        rss = link;
      }else {
        //extract RSS link from page content
        rss = TextTools.extractRssFromPage(content);
      }

      //test if rss information hase been found
      if (rss != null){
        //check again if blog exists with given link
        blog = dm.getBlogFromLink(pm , rss);
        //actually create new blog in database
        if (blog == null){
          blog = new Blog();
          blog.setLatestEntry(calLongTime.getTime());
          blog.setNextUpdate(now);
          blog.setLink(link);
          blog.setRss(rss);
          pm.currentTransaction().begin();
          pm.makePersistent(blog);
          pm.currentTransaction().commit();
        }
      } else {
        Logger.getLogger("updateBlog").log(Level.WARNING,
            "blog not found : "  + link);
      }
    }
   
    pm.close();
    pm = null;

    return blog;
  }


  /**
   * Update feed's blogs.
   */
  @Deprecated
  public void updateBlogs(int nbMax){
    if (nbMax <=0) return;
   
    DataManager dm = DataManagerFactory.getInstance();
    PersistenceManager pm = dm.newPersistenceManager();
   
    //find blogs where "nextUpdate" is before now, ordered by "nextUpdate" date
    Query q = pm.newQuery(Blog.class);
    q.setRange(0, nbMax);
    q.setFilter("nextUpdate <= date");
    q.declareParameters("java.util.Date date");
    q.setOrdering("nextUpdate");
   
    Date now = Calendar.getInstance().getTime();
   
    @SuppressWarnings("unchecked")
    List<Blog> blogs = (List<Blog>) q.execute(now);
   
    for(Blog blog : blogs){
      //add treatment to queue
      QueuedTask task = new QueuedTask();
      task.setType(TaskType.updateblog);
      task.addParam("id", blog.getKey().getId());
      QueuedTask.enqueue(task);
     
    }
   
    q.closeAll();
    pm.close();
  }
 
  /**
   * Update blog only when user is present
   * @param max
   */
  public void updateActiveBlogs(int max){
   
    //fetch list from Cache
    List<Long> blogToUpdate = BlogCache.getActiveBlogsToUpdate(max);
   
    //to update
    for(Long id : blogToUpdate){
      //add treatment to queue
      QueuedTask task = new QueuedTask();
      task.setType(TaskType.updateblog);
      task.addParam("id", id);
      QueuedTask.enqueue(task);
     
    }
  }
 
  /**
   * Update single blog (for queued task usage)
   * @param id
   */
  public void updateBlog(long id){
   
    DataManager dm = DataManagerFactory.getInstance();
    PersistenceManager pm = dm.newPersistenceManager();
   
    //find blog
    Blog blog = pm.getObjectById(Blog.class,id);
   
    //check if blog has any updates
    BlogUpdateResult result = null;
   
    //fetch channel on the web via RSS
    Channel chan = FeedManager.loadRss(blog.getRss());
   
    if (chan != null){
      //update blog. Returns true if new items were added
      result = updateBlog(pm, blog, chan);
     
      //test url and title and correct if worng
      if (chan.getLink() != null && !chan.getLink().equalsIgnoreCase(blog.getLink())){
        blog.setLink(chan.getLink());
      }
     
      if (chan.getTitle() != null && !chan.getTitle().equalsIgnoreCase(blog.getTitle())){
        blog.setTitle(chan.getTitle());
      }
    }
   
    //Set last post entry date to blog
    if (result != null && result.isUpdate())
      blog.setLatestEntry(result.getLastestEntryDate());
   
    String messageLog = "";
   
    //build nextUpdate
    //if no new update : increase interval
    int newInterval = blog.getRefreshInterval();
    if (result != null && result.isUpdate()){
      newInterval = newInterval / 2;
      messageLog = "Blog updated [" + blog.getTitle() + "]";
    } else {
      newInterval = newInterval * 2;
      messageLog = "No new update [" + blog.getTitle() + "]";
    }
   
    if (newInterval <= MIN_INTERVAL) newInterval = MIN_INTERVAL;
    if (newInterval > MAX_INTERVAL) newInterval = MAX_INTERVAL;
   
    Calendar cal = Calendar.getInstance();
    cal.add(Calendar.MINUTE, newInterval);
   
    blog.setRefreshInterval(newInterval);
    blog.setNextUpdate(cal.getTime());
   
    pm.flush();
    pm.close();
   
    BlogCache.setNextUpdate(blog.getKey().getId(), newInterval);
   
    Logger.getLogger("updateBlog").log(Level.INFO,
        messageLog);
  }
 
  /**
   * Active blogs regarding users presence.
   */
  public void activeBlogsFromUserPresence(){
   
    int nb = 0;
   
    try {
      Collection<String> users = UserPresence.listUsers();
     
      //for each present user : fetch subscriptions and active blog
      for(String user : users){
        //fetch subscriptions
        Collection<Long> blogs = SubscriptionCache.getUserBlogs(user);
        for(long id : blogs){
          BlogCache.setBlogIsActive(id);
          nb++;
        }
      }
     
    }  finally {
      //release datastore
      SubscriptionCache.releaseDataStore();
    }
   
    Logger.getLogger("UserService").info(
        nb +  " blog(s) active");
   
  }
 
  /**
   * Remove all blogs without subscription
   * @return
   */
  public int removeBlogWithoutSubscription(){
    int nb = 0;
   
    DataManager dm = DataManagerFactory.getInstance();
   
    PersistenceManager pm = dm.newPersistenceManager();
   
    Query q = pm.newQuery(Blog.class);
   
    //fetch ALL blogs
    @SuppressWarnings("unchecked")
    List<Blog> allBlogs = (List<Blog>) q.execute();
   
    for(Blog b : allBlogs){
      //fetch sub
      Query sq = pm.newQuery(Subscription.class);
      sq.setFilter("blogKey == bk");
      sq.declareParameters("com.google.appengine.api.datastore.Key bk");
      sq.setRange(0, 1);
     
      @SuppressWarnings("unchecked")
      List<Subscription> ls = (List<Subscription>) sq.execute(b.getKey());
     
      if (ls.size() == 0){
        //delete blog without subscription
        pm.currentTransaction().begin();
        pm.deletePersistent(b);
        pm.currentTransaction().commit();
        nb++;
      }
    }
   
    pm.close();
   
    return nb;
  }
 
 
  /**
   * Remove old entries
   * @param nbdays
   * @return
   */
  public int removeOldestEntries(int nbdays){
    int nb = 0;
   
    DataManager dm = DataManagerFactory.getInstance();
   
    PersistenceManager pm = dm.newPersistenceManager();
   
    Query q = pm.newQuery(BlogEntry.class);
    q.setFilter("creaDate < date");
    q.setRange(0,50);
    q.declareParameters("java.util.Date date");
   
    Calendar cdat = Calendar.getInstance();
    cdat.add(Calendar.DATE , -nbdays);
   
    //fetch ALL blogs
    @SuppressWarnings("unchecked")
    List<BlogEntry> oldest = (List<BlogEntry>) q.execute(cdat.getTime());
   
    for(BlogEntry be : oldest){
      pm.currentTransaction().begin();
      pm.deletePersistent(be);
      pm.currentTransaction().commit();
      nb++;
    }

    pm.close();
   
    return nb;
  }
 
  /**
   * update entries for a blog with given channel
   * @param blog
   * @param chan
   * @return True if any change has occurred (new items found)
   */
  private BlogUpdateResult updateBlog(PersistenceManager pm ,Blog blog, Channel chan){
   
    BlogUpdateResult result = new BlogUpdateResult();
   
    Date recentDate = blog.getLatestEntry();

    //if most recent entry found, pubDate becomes its date
    if (recentDate == null){
      //if no most recent entry, pubDate is minimum
      Calendar c = Calendar.getInstance();
      c.set(Calendar.YEAR,1900);
      recentDate = c.getTime();
    }
   
    //query to find blog entry by GUID
    Query queryBlogEntry = pm.newQuery(BlogEntry.class);
    queryBlogEntry.setFilter("guid == g");
    queryBlogEntry.declareParameters("java.lang.String g");
    queryBlogEntry.setUnique(true);
   
    //compare item
    for(FeedItem item : chan.items()){
     
      //parse error
      if (item.getLink() == null) continue;
     
      //set creaDate
      Date pubDate = item.getPubDate();
      if (pubDate == null) pubDate = Calendar.getInstance().getTime();
     
      //date
      if(recentDate.after(pubDate)) continue;

      BlogEntry entry = (BlogEntry) queryBlogEntry.execute(item.getGuid());
     
      if (entry == null){
        //new item
        entry = new BlogEntry();
        entry.setBlogKey(blog.getKey());
        entry.setCreaDate(new Date());
        entry.setPubDate(pubDate);
        entry.setGuid(item.getGuid());
        entry.setLink(TextTools.limitText(item.getLink(),500));
        entry.setTitle(TextTools.limitText(item.getTitle(),500));
        entry.setBlogTitle(TextTools.limitText(chan.getTitle(),500));
        pm.makePersistent(entry);
        result.setUpdate(true);
        result.checkEntryDate(pubDate);
      }

      queryBlogEntry.close(entry);
    }
   
    queryBlogEntry.closeAll();
   
    return result;
  }
 
 
  /**
   * Result of a blog update
   * @author JB Balmeyer
   *
   */
  private class BlogUpdateResult{
    /**
     * are new entries found ?
     */
    private boolean hasUpdate ;
    /**
     * The date of the latest entry
     */
    private Date lastestEntryDate;
   
    public boolean isUpdate() {
      return hasUpdate;
    }
    public void setUpdate(boolean hasUpdate) {
      this.hasUpdate = hasUpdate;
    }
   
    /**
     * Returns the latest entry date
     * @return
     */
    public Date getLastestEntryDate() {
      return lastestEntryDate;
    }
   
    /**
     * check a new date
     * @param lastestEntryDate
     */
    public void checkEntryDate(Date newEntryDate) {
     
      if (this.lastestEntryDate == null) this.lastestEntryDate = newEntryDate;
      else {
        if (this.lastestEntryDate.before(newEntryDate)) {
          this.lastestEntryDate = newEntryDate;
        }
      }
    }
 
  }
 
}
TOP

Related Classes of talkfeed.BlogManager

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.