Source Code of de.nava.informa.search.ChannelIndexer

//
// Informa -- RSS Library for Java
// Copyright (c) 2002 by Niko Schmuck
//
// Niko Schmuck
// http://sourceforge.net/projects/informa
// mailto:niko_schmuck@users.sourceforge.net
//
// This library is free software.
//
// You may redistribute it and/or modify it under the terms of the GNU
// Lesser General Public License as published by the Free Software Foundation.
//
// Version 2.1 of the license should be included with this distribution in
// the file LICENSE. If the license is not included with this distribution,
// you may find a copy at the FSF web site at 'www.gnu.org' or 'www.fsf.org',
// or you may write to the Free Software Foundation, 675 Mass Ave, Cambridge, 
// MA 02139 USA.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied waranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//




// $Id: ChannelIndexer.java,v 1.8 2006/12/04 23:43:28 italobb Exp $


package de.nava.informa.search;


import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;


import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexWriter;


import de.nava.informa.core.ChannelIF;
import de.nava.informa.core.ItemIF;


/**
 * Utility class which allows to generate respective maintain a
 * fulltext index for news items.
 *
 * @author Niko Schmuck (niko@nava.de)
 */
public final class ChannelIndexer {


  private static Log logger = LogFactory.getLog(ChannelIndexer.class);


  private String indexDir;
  private int nrOfIndexedItems;
  private Analyzer analyzer;
  
  /**
   * Constructor which allows to specify the index directory.
   * For the full-text indexing process the lucene
   * {@link org.apache.lucene.analysis.standard.StandardAnalyzer}
   * is used.
   *
   * @param indexDir - The directory in which the index files are stored.
   */
  public ChannelIndexer(String indexDir) {
    this.indexDir = indexDir;
    this.nrOfIndexedItems = 0;
    this.analyzer = new StandardAnalyzer();
  }
  
  /**
   * Index all news items contained in the given channels.
   *
   * @param createNewIndex - wether a new index should be generated or
   *        an existant one should be taken into account
   * @param channels - a collection of ChannelIF objects
   */
  public void indexChannels(boolean createNewIndex, Collection channels)
    throws java.io.IOException {


    Collection<ItemIF> items = new ArrayList<ItemIF>();
    Iterator itC = channels.iterator();
    while (itC.hasNext()) {
      ChannelIF channel = (ChannelIF) itC.next();
      if (logger.isDebugEnabled()) {
        logger.debug("Searching channel " + channel + " for items.");
      }
      items.addAll(channel.getItems());
    }
    if (!items.isEmpty()) {
      indexItems(createNewIndex, items);
    } else {
      logger.info("No items found for indexing.");
    }
  }


  /**
   * Index all given news items.
   *
   * @param createNewIndex - Wether a new index should be generated or
   *        an existant one should be taken into account.
   * @param items - A collection of ItemIF objects.
   */
  public void indexItems(boolean createNewIndex, Collection<ItemIF> items)
    throws java.io.IOException {
    
    logger.info("Start writing index.");
    IndexWriter writer = new IndexWriter(indexDir, analyzer, createNewIndex);
    Iterator<ItemIF> itI = items.iterator();
    while (itI.hasNext()) {
      ItemIF item = itI.next();
      if (logger.isDebugEnabled()) {
        logger.debug("Add item " + item + " to index.");
      }
      writer.addDocument(ItemDocument.makeDocument(item));
    }
    writer.optimize();
    nrOfIndexedItems = writer.docCount();
    writer.close();
    logger.info("Finished writing index.");
  }


  /**
   * Returns the number of documents that were in the index last time
   * the index operation was performed.
   *
   * Note: Use only directly after the indexing process, otherwise the
   * return value may be wrong.
   */
  public int getNrOfIndexedItems() {
    return nrOfIndexedItems;
  }


  public void setIndexDir(String indexDir) {
    this.indexDir = indexDir;
  }


  public String getIndexDir() {
    return indexDir;
  }
  
}
Source Code of de.nava.informa.search.ChannelIndexer

Related Classes of de.nava.informa.search.ChannelIndexer