Source Code of de.chris_soft.utilities.FulltextIndexAndSearchUtils

/**
 * NanoDoA - File based document archive
 *
 * Copyright (C) 2011-2012 Christian Packenius, christian.packenius@googlemail.com
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package de.chris_soft.utilities;


import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;


/**
 * A very (!) basic implementation of a full text indexing and search, based on
 * Apache Lucene.
 * @author Christian Packenius.
 */
public class FulltextIndexAndSearchUtils {
  private static final Version VERSION = Version.LUCENE_35;


  private static final String FIELD_DOCUMENT_ID = "documentID";


  private static final String FIELD_FULLTEXT = "fulltext";


  private final Analyzer analyzer = new SimpleAnalyzer(VERSION);


  private final FSDirectory indexDirectory;


  /**
   * Constructor.
   * @param indexDirectory Directory for Lucene index files. Will be created if
   *          it does not exist.
   * @throws IOException
   */
  public FulltextIndexAndSearchUtils(File indexDirectory) throws IOException {
    indexDirectory.mkdirs();
    this.indexDirectory = FSDirectory.open(indexDirectory);
  }


  /**
   * Adds a document full text to the search index.
   * @param documentID ID of the document containing the full text.
   * @param fulltext Full document text.
   * @throws CorruptIndexException
   * @throws IOException
   */
  public void add(String documentID, String fulltext) throws CorruptIndexException, IOException {
    IndexWriterConfig indexWriterConfiguration = new IndexWriterConfig(VERSION, analyzer);
    indexWriterConfiguration.setOpenMode(OpenMode.CREATE_OR_APPEND);
    IndexWriter iwriter = new IndexWriter(indexDirectory, indexWriterConfiguration);
    Document doc = new Document();
    doc.add(new Field(FIELD_FULLTEXT, fulltext, Field.Store.NO, Field.Index.ANALYZED));
    doc.add(new Field(FIELD_DOCUMENT_ID, documentID, Field.Store.YES, Field.Index.NO));
    iwriter.addDocument(doc);
    iwriter.close();
  }


  /**
   * Searches a search item in full index.
   * @param searchItem Item to search.
   * @return List of document IDs that match the search item.
   * @throws IOException
   * @throws ParseException
   */
  public List<String> search(String searchItem) throws IOException, ParseException {
    List<String> results = new ArrayList<String>();
    IndexReader ireader = IndexReader.open(indexDirectory);
    IndexSearcher isearcher = new IndexSearcher(ireader);
    QueryParser parser = new QueryParser(VERSION, FIELD_FULLTEXT, analyzer);
    parser.setLowercaseExpandedTerms(true);
    Query query = parser.parse(searchItem);


    TopScoreDocCollector collector = TopScoreDocCollector.create(10000, false);


    isearcher.search(query, collector);// .scoreDocs;
    ScoreDoc[] hits = collector.topDocs().scoreDocs;
    for (ScoreDoc hit : hits) {
      Document hitDoc = isearcher.doc(hit.doc);
      results.add(hitDoc.get(FIELD_DOCUMENT_ID));
    }
    isearcher.close();
    ireader.close();
    return results;
  }


  /**
   * Close index.
   */
  public void close() {
    indexDirectory.close();
  }


  /**
   * Remove a document from the fulltext index.
   * @param documentID Document identifier.
   * @throws IOException
   */
  public void remove(String documentID) throws IOException {
    Term term = new Term(FIELD_DOCUMENT_ID, documentID);
    IndexReader indexReader = IndexReader.open(indexDirectory);
    indexReader.deleteDocuments(term);
    indexReader.close();
  }
}
Source Code of de.chris_soft.utilities.FulltextIndexAndSearchUtils

Related Classes of de.chris_soft.utilities.FulltextIndexAndSearchUtils