Source Code of net.sourceforge.pebble.index.SearchIndex

/*
 * Copyright (c) 2003-2011, Simon Brown
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   - Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *
 *   - Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in
 *     the documentation and/or other materials provided with the
 *     distribution.
 *
 *   - Neither the name of Pebble nor the names of its contributors may
 *     be used to endorse or promote products derived from this software
 *     without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
package net.sourceforge.pebble.index;


import net.sourceforge.pebble.domain.*;
import net.sourceforge.pebble.search.SearchException;
import net.sourceforge.pebble.search.SearchHit;
import net.sourceforge.pebble.search.SearchResults;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.DateField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;


import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Collection;


/**
 * Wraps up the functionality to index blog entries. This is really just
 * a convenient wrapper around Lucene.
 *
 * @author    Simon Brown
 */
public class SearchIndex {


  /** the log used by this class */
  private static final Log log = LogFactory.getLog(SearchIndex.class);


  private final Blog blog;


  public SearchIndex(Blog blog) {
    this.blog = blog;
  }


  /**
   * Clears the index.
   */
  public void clear() {
    File searchDirectory = new File(blog.getSearchIndexDirectory());
    if (!searchDirectory.exists()) {
      searchDirectory.mkdirs();
    }


    synchronized (blog) {
      try {
        Analyzer analyzer = getAnalyzer();
        IndexWriter writer = new IndexWriter(searchDirectory, analyzer, true);
        writer.close();
      } catch (Exception e) {
        log.error(e.getMessage(), e);
      }
    }
  }


  /**
   * Allows a collection of blog entries to be indexed.
   */
  public void indexBlogEntries(Collection<BlogEntry> blogEntries) {
    synchronized (blog) {
      try {
        Analyzer analyzer = getAnalyzer();
        IndexWriter writer = new IndexWriter(blog.getSearchIndexDirectory(), analyzer, false);


        for (BlogEntry blogEntry : blogEntries) {
          index(blogEntry, writer);
        }


        writer.close();
      } catch (Exception e) {
        log.error(e.getMessage(), e);
      }
    }
  }


  /**
   * Allows a collection of static pages to be indexed.
   */
  public void indexStaticPages(Collection<StaticPage> staticPages) {
    synchronized (blog) {
      try {
        Analyzer analyzer = getAnalyzer();
        IndexWriter writer = new IndexWriter(blog.getSearchIndexDirectory(), analyzer, false);


        for (StaticPage staticPage : staticPages) {
          index(staticPage, writer);
        }


        writer.close();
      } catch (Exception e) {
        log.error(e.getMessage(), e);
      }
    }
  }


  /**
   * Allows a single blog entry to be (re)indexed. If the entry is already
   * indexed, this method deletes the previous index before adding the new
   * one.
   *
   * @param blogEntry   the BlogEntry instance to index
   */
  public void index(BlogEntry blogEntry) {
    try {
      synchronized (blog) {
        // first delete the blog entry from the index (if it was there)
        unindex(blogEntry);


        Analyzer analyzer = getAnalyzer();
        IndexWriter writer = new IndexWriter(blog.getSearchIndexDirectory(), analyzer, false);
        index(blogEntry, writer);
        writer.close();
      }
    } catch (Exception e) {
      log.error(e.getMessage(), e);
    }
  }


  /**
   * Allows a single static page to be (re)indexed. If the page is already
   * indexed, this method deletes the previous index before adding the new
   * one.
   *
   * @param staticPage    the StaticPage instance to index
   */
  public void index(StaticPage staticPage) {
    try {
      synchronized (blog) {
        // first delete the static page from the index (if it was there)
        unindex(staticPage);


        Analyzer analyzer = getAnalyzer();
        IndexWriter writer = new IndexWriter(blog.getSearchIndexDirectory(), analyzer, false);
        index(staticPage, writer);
        writer.close();
      }
    } catch (Exception e) {
      log.error(e.getMessage(), e);
    }
  }


  /**
   * Gets the Analyzer implementation to use.
   *
   * @return  an Analyzer instance
   * @throws Exception
   */
  private Analyzer getAnalyzer() throws Exception {
    Class c = Class.forName(blog.getLuceneAnalyzer());
    return (Analyzer)c.newInstance();
  }


  /**
   * Removes the index for a single blog entry to be removed.
   *
   * @param blogEntry   the BlogEntry instance to be removed
   */
  public void unindex(BlogEntry blogEntry) {
    try {
      synchronized (blog) {
        log.debug("Attempting to delete index for " + blogEntry.getTitle());
        IndexReader reader = IndexReader.open(blog.getSearchIndexDirectory());
        Term term = new Term("id", blogEntry.getId());
        log.debug("Deleted " + reader.delete(term) + " document(s) from the index");
        reader.close();
      }
    } catch (Exception e) {
      log.error(e.getMessage(), e);
    }
  }


  /**
   * Removes the index for a single blog entry to be removed.
   *
   * @param staticPage    the StaticPage instance to be removed
   */
  public void unindex(StaticPage staticPage) {
    try {
      synchronized (blog) {
        log.debug("Attempting to delete index for " + staticPage.getTitle());
        IndexReader reader = IndexReader.open(blog.getSearchIndexDirectory());
        Term term = new Term("id", staticPage.getId());
        log.debug("Deleted " + reader.delete(term) + " document(s) from the index");
        reader.close();
      }
    } catch (Exception e) {
      log.error(e.getMessage(), e);
    }
  }


  /**
   * Helper method to index an individual blog entry.
   *
   * @param blogEntry   the BlogEntry instance to index
   * @param writer      the IndexWriter to index with
   */
  private void index(BlogEntry blogEntry, IndexWriter writer) {
    if (!blogEntry.isPublished()) {
      return;
    }


    try {
      log.debug("Indexing " + blogEntry.getTitle());
      Document document = new Document();
      document.add(Field.Keyword("id", blogEntry.getId()));
      if (blogEntry.getTitle() != null) {
        document.add(Field.Text("title", blogEntry.getTitle()));
      } else {
        document.add(Field.Text("title", ""));
      }
      document.add(Field.Keyword("permalink", blogEntry.getPermalink()));
      document.add(Field.UnIndexed("date", DateField.dateToString(blogEntry.getDate())));
      if (blogEntry.getBody() != null) {
        document.add(Field.UnStored("body", blogEntry.getBody()));
      } else {
        document.add(Field.UnStored("body", ""));
      }
      if (blogEntry.getTruncatedContent() != null) {
        document.add(Field.Text("truncatedBody", blogEntry.getTruncatedContent()));
      } else {
        document.add(Field.Text("truncatedBody", ""));
      }


      if (blogEntry.getAuthor() != null) {
        document.add(Field.Text("author", blogEntry.getAuthor()));
      }


      // build up one large string with all searchable content
      // i.e. entry title, entry body and all response bodies
      StringBuffer searchableContent = new StringBuffer();
      searchableContent.append(blogEntry.getTitle());
      searchableContent.append(" ");
      searchableContent.append(blogEntry.getBody());


      for (Category category : blogEntry.getCategories()) {
        document.add(Field.Text("category", category.getId()));
      }


      for (Tag tag : blogEntry.getAllTags()) {
        document.add(Field.Text("tag", tag.getName()));
      }


      searchableContent.append(" ");
      Iterator it = blogEntry.getComments().iterator();
      while (it.hasNext()) {
        Comment comment = (Comment)it.next();
        if (comment.isApproved()) {
          searchableContent.append(comment.getBody());
          searchableContent.append(" ");
        }
      }
      it = blogEntry.getTrackBacks().iterator();
      while (it.hasNext()) {
        TrackBack trackBack = (TrackBack)it.next();
        if (trackBack.isApproved()) {
          searchableContent.append(trackBack.getExcerpt());
          searchableContent.append(" ");
        }
      }


      // join the title and body together to make searching on them both easier
      document.add(Field.UnStored("blogEntry", searchableContent.toString()));


      writer.addDocument(document);
    } catch (Exception e) {
      log.error(e.getMessage(), e);
    }
  }
  /**
   * Helper method to index an individual blog entry.
   *
   * @param staticPage    the Page instance instance to index
   * @param writer      the IndexWriter to index with
   */
  private void index(StaticPage staticPage, IndexWriter writer) {
    try {
      log.debug("Indexing " + staticPage.getTitle());
      Document document = new Document();
      document.add(Field.Keyword("id", staticPage.getId()));
      if (staticPage.getTitle() != null) {
        document.add(Field.Text("title", staticPage.getTitle()));
      } else {
        document.add(Field.Text("title", ""));
      }
      document.add(Field.Keyword("permalink", staticPage.getPermalink()));
      document.add(Field.UnIndexed("date", DateField.dateToString(staticPage.getDate())));
      if (staticPage.getBody() != null) {
        document.add(Field.UnStored("body", staticPage.getBody()));
      } else {
        document.add(Field.UnStored("body", ""));
      }
      if (staticPage.getTruncatedContent() != null) {
        document.add(Field.Text("truncatedBody", staticPage.getTruncatedContent()));
      } else {
        document.add(Field.Text("truncatedBody", ""));
      }


      if (staticPage.getAuthor() != null) {
        document.add(Field.Text("author", staticPage.getAuthor()));
      }


      // build up one large string with all searchable content
      // i.e. entry title, entry body and all response bodies
      StringBuffer searchableContent = new StringBuffer();
      searchableContent.append(staticPage.getTitle());
      searchableContent.append(" ");
      searchableContent.append(staticPage.getBody());


      // join the title and body together to make searching on them both easier
      document.add(Field.UnStored("blogEntry", searchableContent.toString()));


      writer.addDocument(document);
    } catch (Exception e) {
      log.error(e.getMessage(), e);
    }
  }


  public SearchResults search(String queryString) throws SearchException {


    log.debug("Performing search : " + queryString);


    SearchResults searchResults = new SearchResults();
    searchResults.setQuery(queryString);


    if (queryString != null && queryString.length() > 0) {
      Searcher searcher = null;


      try {
        searcher = new IndexSearcher(blog.getSearchIndexDirectory());
        Query query = QueryParser.parse(queryString, "blogEntry", getAnalyzer());
        Hits hits = searcher.search(query);


        for (int i = 0; i < hits.length(); i++) {
          Document doc = hits.doc(i);
          SearchHit result = new SearchHit(
              blog,
              doc.get("id"),
              doc.get("permalink"),
              doc.get("title"),
              doc.get("truncatedBody"),
              DateField.stringToDate(doc.get("date")),
              hits.score(i));
          searchResults.add(result);
        }
      } catch (ParseException pe) {
        pe.printStackTrace();
        searchResults.setMessage("Sorry, but there was an error. Please try another search");
      } catch (Exception e) {
        e.printStackTrace();
        throw new SearchException(e.getMessage());
      } finally {
        if (searcher != null) {
          try {
            searcher.close();
          } catch (IOException e) {
            // can't do much now! ;-)
          }
        }
      }
    }


    return searchResults;
  }


}
Source Code of net.sourceforge.pebble.index.SearchIndex

Related Classes of net.sourceforge.pebble.index.SearchIndex