Package net.sourceforge.pebble.index

Source Code of net.sourceforge.pebble.index.SearchIndex

/*
* Copyright (c) 2003-2011, Simon Brown
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
*   - Redistributions of source code must retain the above copyright
*     notice, this list of conditions and the following disclaimer.
*
*   - Redistributions in binary form must reproduce the above copyright
*     notice, this list of conditions and the following disclaimer in
*     the documentation and/or other materials provided with the
*     distribution.
*
*   - Neither the name of Pebble nor the names of its contributors may
*     be used to endorse or promote products derived from this software
*     without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
package net.sourceforge.pebble.index;

import net.sourceforge.pebble.domain.*;
import net.sourceforge.pebble.search.SearchException;
import net.sourceforge.pebble.search.SearchHit;
import net.sourceforge.pebble.search.SearchResults;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.DateField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;

import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Collection;

/**
* Wraps up the functionality to index blog entries. This is really just
* a convenient wrapper around Lucene.
*
* @author    Simon Brown
*/
public class SearchIndex {

  /** the log used by this class */
  private static final Log log = LogFactory.getLog(SearchIndex.class);

  private final Blog blog;

  public SearchIndex(Blog blog) {
    this.blog = blog;
  }

  /**
   * Clears the index.
   */
  public void clear() {
    File searchDirectory = new File(blog.getSearchIndexDirectory());
    if (!searchDirectory.exists()) {
      searchDirectory.mkdirs();
    }

    synchronized (blog) {
      try {
        Analyzer analyzer = getAnalyzer();
        IndexWriter writer = new IndexWriter(searchDirectory, analyzer, true);
        writer.close();
      } catch (Exception e) {
        log.error(e.getMessage(), e);
      }
    }
  }

  /**
   * Allows a collection of blog entries to be indexed.
   */
  public void indexBlogEntries(Collection<BlogEntry> blogEntries) {
    synchronized (blog) {
      try {
        Analyzer analyzer = getAnalyzer();
        IndexWriter writer = new IndexWriter(blog.getSearchIndexDirectory(), analyzer, false);

        for (BlogEntry blogEntry : blogEntries) {
          index(blogEntry, writer);
        }

        writer.close();
      } catch (Exception e) {
        log.error(e.getMessage(), e);
      }
    }
  }

  /**
   * Allows a collection of static pages to be indexed.
   */
  public void indexStaticPages(Collection<StaticPage> staticPages) {
    synchronized (blog) {
      try {
        Analyzer analyzer = getAnalyzer();
        IndexWriter writer = new IndexWriter(blog.getSearchIndexDirectory(), analyzer, false);

        for (StaticPage staticPage : staticPages) {
          index(staticPage, writer);
        }

        writer.close();
      } catch (Exception e) {
        log.error(e.getMessage(), e);
      }
    }
  }

  /**
   * Allows a single blog entry to be (re)indexed. If the entry is already
   * indexed, this method deletes the previous index before adding the new
   * one.
   *
   * @param blogEntry   the BlogEntry instance to index
   */
  public void index(BlogEntry blogEntry) {
    try {
      synchronized (blog) {
        // first delete the blog entry from the index (if it was there)
        unindex(blogEntry);

        Analyzer analyzer = getAnalyzer();
        IndexWriter writer = new IndexWriter(blog.getSearchIndexDirectory(), analyzer, false);
        index(blogEntry, writer);
        writer.close();
      }
    } catch (Exception e) {
      log.error(e.getMessage(), e);
    }
  }

  /**
   * Allows a single static page to be (re)indexed. If the page is already
   * indexed, this method deletes the previous index before adding the new
   * one.
   *
   * @param staticPage    the StaticPage instance to index
   */
  public void index(StaticPage staticPage) {
    try {
      synchronized (blog) {
        // first delete the static page from the index (if it was there)
        unindex(staticPage);

        Analyzer analyzer = getAnalyzer();
        IndexWriter writer = new IndexWriter(blog.getSearchIndexDirectory(), analyzer, false);
        index(staticPage, writer);
        writer.close();
      }
    } catch (Exception e) {
      log.error(e.getMessage(), e);
    }
  }

  /**
   * Gets the Analyzer implementation to use.
   *
   * @return  an Analyzer instance
   * @throws Exception
   */
  private Analyzer getAnalyzer() throws Exception {
    Class c = Class.forName(blog.getLuceneAnalyzer());
    return (Analyzer)c.newInstance();
  }

  /**
   * Removes the index for a single blog entry to be removed.
   *
   * @param blogEntry   the BlogEntry instance to be removed
   */
  public void unindex(BlogEntry blogEntry) {
    try {
      synchronized (blog) {
        log.debug("Attempting to delete index for " + blogEntry.getTitle());
        IndexReader reader = IndexReader.open(blog.getSearchIndexDirectory());
        Term term = new Term("id", blogEntry.getId());
        log.debug("Deleted " + reader.delete(term) + " document(s) from the index");
        reader.close();
      }
    } catch (Exception e) {
      log.error(e.getMessage(), e);
    }
  }

  /**
   * Removes the index for a single blog entry to be removed.
   *
   * @param staticPage    the StaticPage instance to be removed
   */
  public void unindex(StaticPage staticPage) {
    try {
      synchronized (blog) {
        log.debug("Attempting to delete index for " + staticPage.getTitle());
        IndexReader reader = IndexReader.open(blog.getSearchIndexDirectory());
        Term term = new Term("id", staticPage.getId());
        log.debug("Deleted " + reader.delete(term) + " document(s) from the index");
        reader.close();
      }
    } catch (Exception e) {
      log.error(e.getMessage(), e);
    }
  }

  /**
   * Helper method to index an individual blog entry.
   *
   * @param blogEntry   the BlogEntry instance to index
   * @param writer      the IndexWriter to index with
   */
  private void index(BlogEntry blogEntry, IndexWriter writer) {
    if (!blogEntry.isPublished()) {
      return;
    }

    try {
      log.debug("Indexing " + blogEntry.getTitle());
      Document document = new Document();
      document.add(Field.Keyword("id", blogEntry.getId()));
      if (blogEntry.getTitle() != null) {
        document.add(Field.Text("title", blogEntry.getTitle()));
      } else {
        document.add(Field.Text("title", ""));
      }
      if (blogEntry.getSubtitle() != null) {
        document.add(Field.Text("subtitle", blogEntry.getSubtitle()));
      } else {
        document.add(Field.Text("subtitle", ""));
      }
      document.add(Field.Keyword("permalink", blogEntry.getPermalink()));
      document.add(Field.UnIndexed("date", DateField.dateToString(blogEntry.getDate())));
      if (blogEntry.getBody() != null) {
        document.add(Field.UnStored("body", blogEntry.getBody()));
      } else {
        document.add(Field.UnStored("body", ""));
      }
      if (blogEntry.getTruncatedContent() != null) {
        document.add(Field.Text("truncatedBody", blogEntry.getTruncatedContent()));
      } else {
        document.add(Field.Text("truncatedBody", ""));
      }

      if (blogEntry.getAuthor() != null) {
        document.add(Field.Text("author", blogEntry.getAuthor()));
      }

      // build up one large string with all searchable content
      // i.e. entry title, entry body and all response bodies
      StringBuffer searchableContent = new StringBuffer();
      searchableContent.append(blogEntry.getTitle());
      searchableContent.append(" ");
      searchableContent.append(blogEntry.getBody());

      for (Category category : blogEntry.getCategories()) {
        document.add(Field.Text("category", category.getId()));
      }

      for (Tag tag : blogEntry.getAllTags()) {
        document.add(Field.Text("tag", tag.getName()));
      }

      searchableContent.append(" ");
      Iterator it = blogEntry.getComments().iterator();
      while (it.hasNext()) {
        Comment comment = (Comment)it.next();
        if (comment.isApproved()) {
          searchableContent.append(comment.getBody());
          searchableContent.append(" ");
        }
      }
      it = blogEntry.getTrackBacks().iterator();
      while (it.hasNext()) {
        TrackBack trackBack = (TrackBack)it.next();
        if (trackBack.isApproved()) {
          searchableContent.append(trackBack.getExcerpt());
          searchableContent.append(" ");
        }
      }

      // join the title and body together to make searching on them both easier
      document.add(Field.UnStored("blogEntry", searchableContent.toString()));

      writer.addDocument(document);
    } catch (Exception e) {
      log.error(e.getMessage(), e);
    }
  }
  /**
   * Helper method to index an individual blog entry.
   *
   * @param staticPage    the Page instance instance to index
   * @param writer      the IndexWriter to index with
   */
  private void index(StaticPage staticPage, IndexWriter writer) {
    try {
      log.debug("Indexing " + staticPage.getTitle());
      Document document = new Document();
      document.add(Field.Keyword("id", staticPage.getId()));
      if (staticPage.getTitle() != null) {
        document.add(Field.Text("title", staticPage.getTitle()));
      } else {
        document.add(Field.Text("title", ""));
      }
      document.add(Field.Keyword("permalink", staticPage.getPermalink()));
      document.add(Field.UnIndexed("date", DateField.dateToString(staticPage.getDate())));
      if (staticPage.getBody() != null) {
        document.add(Field.UnStored("body", staticPage.getBody()));
      } else {
        document.add(Field.UnStored("body", ""));
      }
      if (staticPage.getTruncatedContent() != null) {
        document.add(Field.Text("truncatedBody", staticPage.getTruncatedContent()));
      } else {
        document.add(Field.Text("truncatedBody", ""));
      }

      if (staticPage.getAuthor() != null) {
        document.add(Field.Text("author", staticPage.getAuthor()));
      }

      // build up one large string with all searchable content
      // i.e. entry title, entry body and all response bodies
      StringBuffer searchableContent = new StringBuffer();
      searchableContent.append(staticPage.getTitle());
      searchableContent.append(" ");
      searchableContent.append(staticPage.getBody());

      // join the title and body together to make searching on them both easier
      document.add(Field.UnStored("blogEntry", searchableContent.toString()));

      writer.addDocument(document);
    } catch (Exception e) {
      log.error(e.getMessage(), e);
    }
  }

  public SearchResults search(String queryString) throws SearchException {

    log.debug("Performing search : " + queryString);

    SearchResults searchResults = new SearchResults();
    searchResults.setQuery(queryString);

    if (queryString != null && queryString.length() > 0) {
      Searcher searcher = null;

      try {
        searcher = new IndexSearcher(blog.getSearchIndexDirectory());
        Query query = QueryParser.parse(queryString, "blogEntry", getAnalyzer());
        Hits hits = searcher.search(query);

        for (int i = 0; i < hits.length(); i++) {
          Document doc = hits.doc(i);
          SearchHit result = new SearchHit(
              blog,
              doc.get("id"),
              doc.get("permalink"),
              doc.get("title"),
              doc.get("subtitle"),
              doc.get("truncatedBody"),
              DateField.stringToDate(doc.get("date")),
              hits.score(i));
          searchResults.add(result);
        }
      } catch (ParseException pe) {
        pe.printStackTrace();
        searchResults.setMessage("Sorry, but there was an error. Please try another search");
      } catch (Exception e) {
        e.printStackTrace();
        throw new SearchException(e.getMessage());
      } finally {
        if (searcher != null) {
          try {
            searcher.close();
          } catch (IOException e) {
            // can't do much now! ;-)
          }
        }
      }
    }

    return searchResults;
  }

}
TOP

Related Classes of net.sourceforge.pebble.index.SearchIndex

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.