Package com.gentics.cr.lucene.autocomplete

Source Code of com.gentics.cr.lucene.autocomplete.Autocompleter

package com.gentics.cr.lucene.autocomplete;

import java.io.IOException;
import java.lang.reflect.Constructor;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

import org.apache.log4j.Logger;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.spell.LuceneDictionary;
import org.apache.lucene.store.Directory;

import com.gentics.cr.CRConfig;
import com.gentics.cr.CRConfigUtil;
import com.gentics.cr.CRRequest;
import com.gentics.cr.CRResolvableBean;
import com.gentics.cr.configuration.GenericConfiguration;
import com.gentics.cr.events.Event;
import com.gentics.cr.events.EventManager;
import com.gentics.cr.events.IEventReceiver;
import com.gentics.cr.lucene.events.IndexingFinishedEvent;
import com.gentics.cr.lucene.indexaccessor.IndexAccessor;
import com.gentics.cr.lucene.indexer.index.LuceneIndexLocation;
import com.gentics.cr.monitoring.MonitorFactory;
import com.gentics.cr.monitoring.UseCase;
import com.gentics.cr.util.indexing.IReIndexStrategy;
import com.gentics.cr.util.indexing.IndexLocation;
import com.gentics.cr.util.indexing.ReIndexNoSkipStrategy;

/**
* This class can be used to build an autocomplete index over an existing lucene
* index.
*
* from version 2.0.0 the {@link AutocompleteIndexExtension} is used for all
* Index related tasks and the {@link Autocompleter} will only handle search
* requests.
*
* Last changed: $Date: 2010-04-01 15:20:21 +0200 (Do, 01 Apr 2010) $
*
* @version $Revision: 528 $
* @author $Author: supnig@constantinopel.at $
* @author Sebastian Vogel <s.vogel@gentics.com>
*/
public class Autocompleter implements IEventReceiver, AutocompleteConfigurationKeys {

  protected static final Logger log = Logger.getLogger(Autocompleter.class);
  @Deprecated
  private LuceneIndexLocation source;
  private LuceneIndexLocation autocompleteLocation;

  private String autocompletefield = "content";

  @Deprecated
  private boolean autocompletereopenupdate = false;

  @Deprecated
  private long lastupdatestored = 0;

  @Deprecated
  private IReIndexStrategy reindexStrategy;

  /**
   * to keep backward compatibility - new implementations must declare in the
   * config if they use the new {@link AutocompleteIndexExtension} Class
   */
  @Deprecated
  private boolean useAutocompleteIndexExtension = false;

  public Autocompleter(CRConfig config) {
    GenericConfiguration srcConf = (GenericConfiguration) config.get(SOURCE_INDEX_KEY);
    GenericConfiguration autoConf = (GenericConfiguration) config.get(AUTOCOMPLETE_INDEX_KEY);
    useAutocompleteIndexExtension = config.getBoolean(
      AUTOCOMPLETE_USE_AUTCOMPLETE_INDEXER,
      useAutocompleteIndexExtension);

    source = null;
    if (!useAutocompleteIndexExtension) {
      source = LuceneIndexLocation.getIndexLocation(new CRConfigUtil(srcConf, "SOURCE_INDEX_KEY"));
    }
    autocompleteLocation = LuceneIndexLocation
        .getIndexLocation(new CRConfigUtil(autoConf, AUTOCOMPLETE_INDEX_KEY));
    autocompleteLocation.registerDirectoriesSpecial();
    String s_autofield = config.getString(AUTOCOMPLETE_FIELD_KEY);

    if (!useAutocompleteIndexExtension) {
      reindexStrategy = initReindexStrategy(config);
    }
    if (s_autofield != null)
      this.autocompletefield = s_autofield;

    String sReopenUpdate = config.getString(AUTOCOMPLETE_REOPEN_UPDATE);
    if (sReopenUpdate != null) {
      autocompletereopenupdate = Boolean.parseBoolean(sReopenUpdate);
    }

    if (!useAutocompleteIndexExtension) {
      try {
        // CHECK AND REMOVE LOCKING
        autocompleteLocation.forceRemoveLock();
        // REINDEX
        reIndex();
      } catch (IOException e) {
        log.error("Could not create autocomplete index.", e);
      }
      EventManager.getInstance().register(this);
    }
  }

  /**
   * from version 2.0.0 the {@link AutocompleteIndexExtension} is used for all
   * Index related tasks and the {@link Autocompleter} will only handle search
   * requests
   */
  @Deprecated
  public void processEvent(Event event) {
    if (IndexingFinishedEvent.INDEXING_FINISHED_EVENT_TYPE.equals(event.getType())) {
      IndexLocation il = (IndexLocation) event.getData();
      if (!reindexStrategy.skipReIndex(il)) {
        try {
          reIndex();
        } catch (IOException e) {
          log.error("Could not reindex autocomplete index.", e);
        }
      }
    }
  }

  public Collection<CRResolvableBean> suggestWords(CRRequest request) throws IOException {
    ArrayList<CRResolvableBean> result = new ArrayList<CRResolvableBean>();
    String term = request.getRequestFilter();
    // get the top 5 terms for query

    if (autocompletereopenupdate || useAutocompleteIndexExtension) {
      checkForUpdate();
    }

    IndexAccessor ia = autocompleteLocation.getAccessor();
    Searcher autoCompleteSearcher = ia.getPrioritizedSearcher();
    IndexReader autoCompleteReader = ia.getReader(false);
    try {
      Query query = new TermQuery(new Term(GRAMMED_WORDS_FIELD, term));
      Sort sort = new Sort(new SortField(COUNT_FIELD, SortField.LONG, true));
      TopDocs docs = autoCompleteSearcher.search(query, null, 5, sort);
      int id = 1;
      for (ScoreDoc doc : docs.scoreDocs) {
        CRResolvableBean bean = new CRResolvableBean(id++);
        Document d = autoCompleteReader.document(doc.doc);
        bean.set(SOURCE_WORD_FIELD, d.get(SOURCE_WORD_FIELD));
        bean.set(COUNT_FIELD, d.get(COUNT_FIELD));
        result.add(bean);
      }
    } finally {
      ia.release(autoCompleteSearcher);
      ia.release(autoCompleteReader, false);
    }

    return result;
  }

  private void checkForUpdate() {

    // Use the old checkForUpdate logic for backward compatibility if the
    // AutocompleteIndexExtension is not used
    if (!useAutocompleteIndexExtension) {
      IndexAccessor ia = source.getAccessor();
      boolean reopened = false;
      try {
        IndexReader reader = ia.getReader(false);
        Directory dir = reader.directory();
        try {
          if (dir.fileExists("reopen")) {
            long lastupdate = dir.fileModified("reopen");
            if (lastupdate != lastupdatestored) {
              reopened = true;
              lastupdatestored = lastupdate;
            }
          }
        } finally {
          ia.release(reader, false);
        }
        if (reopened) {

          reIndex();

        }
      } catch (IOException e) {
        log.debug("Could not reIndex autocomplete index.", e);
      }
    } else {
      // the new checkForUpdate Logic only calls reopenCheck on the
      // IndexLocation
      IndexAccessor ia = autocompleteLocation.getAccessor();
      autocompleteLocation.reopenCheck(ia, null);
    }
  }

  /**
   * from version 2.0.0 the {@link AutocompleteIndexExtension} is used for all
   * Index related tasks and the {@link Autocompleter} will only handle search
   * requests
   */
  @Deprecated
  private synchronized void reIndex() throws IOException {
    UseCase ucReIndex = MonitorFactory.startUseCase("reIndex()");
    // build a dictionary (from the spell package)
    log.debug("Starting to reindex autocomplete index.");
    IndexAccessor sia = this.source.getAccessor();
    IndexReader sourceReader = sia.getReader(false);
    LuceneDictionary dict = new LuceneDictionary(sourceReader, this.autocompletefield);
    IndexAccessor aia = this.autocompleteLocation.getAccessor();
    // IndexReader reader = aia.getReader(false);
    IndexWriter writer = aia.getWriter();

    try {
      writer.setMergeFactor(300);
      writer.setMaxBufferedDocs(150);
      // go through every word, storing the original word (incl. n-grams)
      // and the number of times it occurs
      // CREATE WORD LIST FROM SOURCE INDEX
      Map<String, Integer> wordsMap = new HashMap<String, Integer>();
      Iterator<String> iter = (Iterator<String>) dict.getWordsIterator();
      while (iter.hasNext()) {
        String word = iter.next();
        int len = word.length();
        if (len < 3) {
          continue; // too short we bail but "too long" is fine...
        }
        if (wordsMap.containsKey(word)) {
          throw new IllegalStateException("Lucene returned a bad word list");
        } else {
          // use the number of documents this word appears in
          wordsMap.put(word, sourceReader.docFreq(new Term(autocompletefield, word)));
        }
      }
      // DELETE OLD OBJECTS FROM INDEX
      writer.deleteAll();

      // UPDATE DOCUMENTS IN AUTOCOMPLETE INDEX
      for (String word : wordsMap.keySet()) {
        // ok index the word
        Document doc = new Document();
        doc.add(new Field(SOURCE_WORD_FIELD, word, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); // orig term
        doc.add(new Field(GRAMMED_WORDS_FIELD, word, Field.Store.YES, Field.Index.ANALYZED)); // grammed
        doc.add(new Field(COUNT_FIELD, Integer.toString(wordsMap.get(word)), Field.Store.YES,
            Field.Index.NOT_ANALYZED_NO_NORMS)); // count
        writer.addDocument(doc);
      }
      writer.optimize();
    } finally {

      sia.release(sourceReader, false);
      // close writer

      aia.release(writer);
      // aia.release(reader,false);
    }
    autocompleteLocation.createReopenFile();
    log.debug("Finished reindexing autocomplete index.");
    ucReIndex.stop();
  }

  public void finalize() {
    autocompleteLocation.stop();
    if (!useAutocompleteIndexExtension) {
      source.stop();
      EventManager.getInstance().unregister(this);
    }
  }

  /**
   * from version 2.0.0 the {@link AutocompleteIndexExtension} is used for all
   * Index related tasks and the {@link Autocompleter} will only handle search
   * requests
   *
   * Initialize a config class for the periodical execution flag of the
   * indexer. If init of the configured class fails, a fallback class is
   * returned.
   *
   * @return configclass
   * @param config
   */
  @Deprecated
  private IReIndexStrategy initReindexStrategy(final CRConfig config) {
    String className = config.getString(REINDEXSTRATEGYCLASS_KEY);

    if (className != null && className.length() != 0) {
      try {
        Class<?> clazz = Class.forName(className);
        Constructor<?> constructor = clazz.getConstructor(CRConfig.class);
        return (IReIndexStrategy) constructor.newInstance(config);
      } catch (Exception e) {
        log.warn("Cound not init configured " + REINDEXSTRATEGYCLASS_KEY + ": " + className, e);
      }
    }
    return new ReIndexNoSkipStrategy(config);
  }

}
TOP

Related Classes of com.gentics.cr.lucene.autocomplete.Autocompleter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.