Package de.chris_soft.utilities

Source Code of de.chris_soft.utilities.FulltextIndexAndSearchUtils

/**
* NanoDoA - File based document archive
*
* Copyright (C) 2011-2012 Christian Packenius, christian.packenius@googlemail.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/
package de.chris_soft.utilities;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

/**
* A very (!) basic implementation of a full text indexing and search, based on
* Apache Lucene.
* @author Christian Packenius.
*/
public class FulltextIndexAndSearchUtils {
  private static final Version VERSION = Version.LUCENE_35;

  private static final String FIELD_DOCUMENT_ID = "documentID";

  private static final String FIELD_FULLTEXT = "fulltext";

  private final Analyzer analyzer = new SimpleAnalyzer(VERSION);

  private final FSDirectory indexDirectory;

  /**
   * Constructor.
   * @param indexDirectory Directory for Lucene index files. Will be created if
   *          it does not exist.
   * @throws IOException
   */
  public FulltextIndexAndSearchUtils(File indexDirectory) throws IOException {
    indexDirectory.mkdirs();
    this.indexDirectory = FSDirectory.open(indexDirectory);
  }

  /**
   * Adds a document full text to the search index.
   * @param documentID ID of the document containing the full text.
   * @param fulltext Full document text.
   * @throws CorruptIndexException
   * @throws IOException
   */
  public void add(String documentID, String fulltext) throws CorruptIndexException, IOException {
    IndexWriterConfig indexWriterConfiguration = new IndexWriterConfig(VERSION, analyzer);
    indexWriterConfiguration.setOpenMode(OpenMode.CREATE_OR_APPEND);
    IndexWriter iwriter = new IndexWriter(indexDirectory, indexWriterConfiguration);
    Document doc = new Document();
    doc.add(new Field(FIELD_FULLTEXT, fulltext, Field.Store.NO, Field.Index.ANALYZED));
    doc.add(new Field(FIELD_DOCUMENT_ID, documentID, Field.Store.YES, Field.Index.NO));
    iwriter.addDocument(doc);
    iwriter.close();
  }

  /**
   * Searches a search item in full index.
   * @param searchItem Item to search.
   * @return List of document IDs that match the search item.
   * @throws IOException
   * @throws ParseException
   */
  public List<String> search(String searchItem) throws IOException, ParseException {
    List<String> results = new ArrayList<String>();
    IndexReader ireader = IndexReader.open(indexDirectory);
    IndexSearcher isearcher = new IndexSearcher(ireader);
    QueryParser parser = new QueryParser(VERSION, FIELD_FULLTEXT, analyzer);
    parser.setLowercaseExpandedTerms(true);
    Query query = parser.parse(searchItem);

    TopScoreDocCollector collector = TopScoreDocCollector.create(10000, false);

    isearcher.search(query, collector);// .scoreDocs;
    ScoreDoc[] hits = collector.topDocs().scoreDocs;
    for (ScoreDoc hit : hits) {
      Document hitDoc = isearcher.doc(hit.doc);
      results.add(hitDoc.get(FIELD_DOCUMENT_ID));
    }
    isearcher.close();
    ireader.close();
    return results;
  }

  /**
   * Close index.
   */
  public void close() {
    indexDirectory.close();
  }

  /**
   * Remove a document from the fulltext index.
   * @param documentID Document identifier.
   * @throws IOException
   */
  public void remove(String documentID) throws IOException {
    Term term = new Term(FIELD_DOCUMENT_ID, documentID);
    IndexReader indexReader = IndexReader.open(indexDirectory);
    indexReader.deleteDocuments(term);
    indexReader.close();
  }
}
TOP

Related Classes of de.chris_soft.utilities.FulltextIndexAndSearchUtils

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.