Package com.tigertext.lucene

Source Code of com.tigertext.lucene.LuceneServer

package com.tigertext.lucene;

import java.io.File;
import java.io.IOException;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.security.InvalidParameterException;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Logger;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexFileNameFilter;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.queryParser.ext.Extensions;
import org.apache.lucene.search.ChainedFilter;
import org.apache.lucene.search.FieldCacheTermsFilter;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;

import com.ericsson.otp.erlang.OtpErlangAtom;
import com.ericsson.otp.erlang.OtpErlangBinary;
import com.ericsson.otp.erlang.OtpErlangException;
import com.ericsson.otp.erlang.OtpErlangList;
import com.ericsson.otp.erlang.OtpErlangLong;
import com.ericsson.otp.erlang.OtpErlangObject;
import com.ericsson.otp.erlang.OtpErlangString;
import com.ericsson.otp.erlang.OtpErlangTuple;
import com.ericsson.otp.erlang.OtpNode;
import com.ericsson.otp.stdlib.OtpContinueException;
import com.ericsson.otp.stdlib.OtpGenServer;
import com.ericsson.otp.stdlib.OtpStopException;
import com.tigertext.lucene.DocumentTranslator.UnsupportedFieldTypeException;
import com.tigertext.lucene.ext.ErlangParserExtension;
import com.tigertext.lucene.ext.NearParserExtension;
import com.tigertext.lucene.ext.RedisParserExtension;

/**
* @author Fernando Benavides <elbrujohalcon@inaka.net>
*
*/
public class LuceneServer extends OtpGenServer {
  private static final Logger jlog = Logger.getLogger(LuceneServer.class
      .getName());

  protected Analyzer analyzer;
  protected Directory index;
  protected IndexWriter writer;
  protected DocumentTranslator translator;
  protected Extensions extensions;

  private int allowedThreads;

  private int initialThreads;

  private SearcherManager searcherManager;

  // TODO: Let the user configure the internal parameters (i.e. analyzer,
  // index, writer)
  /**
   * @param host
   *            Host node
   * @param allowedThreads
   *            Number of threads allowed to run queries at the same time
   * @throws CorruptIndexException
   *             Check Lucene docs for a description
   * @throws LockObtainFailedException
   *             Check Lucene docs for a description
   * @throws IOException
   *             Check Lucene docs for a description
   */
  public LuceneServer(OtpNode host, int allowedThreads)
      throws CorruptIndexException, LockObtainFailedException,
      IOException {
    super(host, "lucene_server");

    this.allowedThreads = allowedThreads;
    this.initialThreads = Thread.activeCount() + 3;

    this.analyzer = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName,
          Reader reader) {
        WhitespaceTokenizer tokenStream = new WhitespaceTokenizer(
            Version.LUCENE_36, reader);
        TokenStream result = new LowerCaseFilter(Version.LUCENE_36,
            tokenStream);
        return new TokenStreamComponents(tokenStream, result);
      }
    };
    this.index = new RAMDirectory();
    this.writer = new IndexWriter(this.index, new IndexWriterConfig(
        Version.LUCENE_36, this.analyzer));
    this.translator = new DocumentTranslator();
    Extensions ext = new Extensions('.');
    ext.add("near", new NearParserExtension());
    ext.add("erlang", new ErlangParserExtension(this.translator));
    ext.add("redis", new RedisParserExtension());
    this.extensions = ext;
    try {
      add(this.translator.convert(new OtpErlangList(new OtpErlangList(
          new OtpErlangTuple(new OtpErlangObject[] {
              new OtpErlangAtom("to"),
              new OtpErlangAtom("delete") })))));
      del("to:delete");
    } catch (UnsupportedFieldTypeException e) {
    }
    final SearcherManager searcherManager = new SearcherManager(
        this.writer, true, null);
    this.searcherManager = searcherManager;
    new Thread("SearcherManager-refresher") {
      @Override
      public void run() {
        try {
          while (true) {
            searcherManager.maybeRefresh();
            sleep(10000);
          }
        } catch (IOException ioe) {
          jlog.severe("Couldn't refresh searcher:\n\t" + ioe);
          ioe.printStackTrace();
        } catch (InterruptedException e) {
          return;
        }
      };
    }.start();
  }

  @Override
  protected OtpErlangObject handleCall(OtpErlangObject cmd,
      OtpErlangTuple from) throws OtpStopException, OtpContinueException,
      OtpErlangException {

    OtpErlangTuple cmdTuple = (OtpErlangTuple) cmd;
    OtpErlangAtom cmdName = (OtpErlangAtom) cmdTuple.elementAt(0);
    if (cmdName.atomValue().equals("pid")) { // {pid}
      return super.getSelf();

    } else if (cmdName.atomValue().equals("match")) {
      // {match, Query :: binary(), PageSize :: integer(), SortFields,
      // Filters}
      try {

        String queryString = new String(
            ((OtpErlangBinary) cmdTuple.elementAt(1)).binaryValue(),
            "ISO-8859-1");
        int pageSize = ((OtpErlangLong) cmdTuple.elementAt(2))
            .intValue();
        OtpErlangObject[] sortFieldNames = ((OtpErlangList) cmdTuple
            .elementAt(3)).elements();
        SortField[] sortFields = new SortField[sortFieldNames.length];
        for (int i = 0; i < sortFields.length; i++) {
          sortFields[i] = this.translator
              .createSortField(sortFieldNames[i]);
        }
        Filter filter = null;
        if (cmdTuple.arity() > 4) {
          OtpErlangObject[] filterDescriptors = ((OtpErlangList) cmdTuple
              .elementAt(4)).elements();
          if (filterDescriptors.length > 0) {
            Filter[] filters = new FieldCacheTermsFilter[filterDescriptors.length];
            for (int i = 0; i < filterDescriptors.length; i++) {
              OtpErlangTuple fd = (OtpErlangTuple) filterDescriptors[i];
              if (fd.arity() != 2) {
                jlog.warning("Invalid filter description: "
                    + fd);
                throw new InvalidParameterException(
                    "Invalid filter description: " + fd);
              }
              OtpErlangAtom fieldName = (OtpErlangAtom) fd
                  .elementAt(0);
              OtpErlangList fieldValues = (OtpErlangList) fd
                  .elementAt(1);
              if (fieldValues.arity() == 0) {
                jlog.warning("Invalid filter description: "
                    + fd);
                throw new OtpErlangException(
                    "Invalid filter description: " + fd);
              }
              filters[i] = this.translator.createFilter(
                  fieldName, fieldValues);
            }
            filter = new org.apache.lucene.search.ChainedFilter(
                filters, ChainedFilter.AND);
          }
        }
        runMatch(queryString, pageSize, sortFields, filter, from);
        throw new OtpContinueException();
      } catch (InvalidParameterException ipe) {
        ipe.printStackTrace();
        OtpErlangTuple reply = new OtpErlangTuple(
            new OtpErlangObject[] { new OtpErlangAtom("error"),
                new OtpErlangString(ipe.getMessage()) });
        return reply;
      } catch (ClassCastException cce) {
        cce.printStackTrace();
        OtpErlangTuple reply = new OtpErlangTuple(
            new OtpErlangObject[] { new OtpErlangAtom("error"),
                new OtpErlangString(cce.getMessage()) });
        return reply;
      } catch (UnsupportedFieldTypeException ufte) {
        ufte.printStackTrace();
        OtpErlangTuple reply = new OtpErlangTuple(
            new OtpErlangObject[] { new OtpErlangAtom("error"),
                new OtpErlangString(ufte.getMessage()) });
        return reply;
      } catch (UnsupportedEncodingException uee) {
        uee.printStackTrace();
        OtpErlangTuple reply = new OtpErlangTuple(
            new OtpErlangObject[] { new OtpErlangAtom("error"),
                new OtpErlangString(uee.getMessage()) });
        return reply;
      }
    } else if (cmdName.atomValue().equals("continue")) {
      // {continue, Token :: binary(), PageSize :: integer()}
      Object pageToken = ((OtpErlangBinary) cmdTuple.elementAt(1))
          .getObject();
      int pageSize = ((OtpErlangLong) cmdTuple.elementAt(2)).intValue();
      runContinue(pageToken, pageSize, from);
      throw new OtpContinueException();

    } else {
      return new OtpErlangTuple(new OtpErlangObject[] {
          new OtpErlangAtom("error"),
          new OtpErlangAtom("unknown command") });
    }
  }

  @Override
  protected void handleCast(OtpErlangObject cmd) throws OtpStopException,
      OtpErlangException {
    OtpErlangTuple cmdTuple = (OtpErlangTuple) cmd;
    String cmdName = ((OtpErlangAtom) cmdTuple.elementAt(0)).atomValue();
    if (cmdName.equals("save")) { // {save, Path}
      save(((OtpErlangString) cmdTuple.elementAt(1)).stringValue());
    } else if (cmdName.equals("clear")) { // {clear}
      clear();
    } else if (cmdName.equals("del")) {
      // {del, Query :: string()}
      if (cmdTuple.elementAt(1) instanceof OtpErlangBinary) {
        String queryString = new String(
            ((OtpErlangBinary) cmdTuple.elementAt(1)).binaryValue());
        del(queryString);
      } else {
        jlog.severe("Received " + cmdTuple.elementAt(1)
            + " instead of a binary in " + cmdTuple);
        throw new OtpStopException();
      }
    } else if (cmdName.equals("add")) {
      // {add, Docs :: [[{atom(), string()}]]}
      try {
        add(this.translator.convert((OtpErlangList) cmdTuple
            .elementAt(1)));
      } catch (UnsupportedFieldTypeException ufte) {
        jlog.severe(ufte.getMessage());
        ufte.printStackTrace();
      }
    } else if (cmdName.equals("stop")) { // {stop}
      jlog.info("Stopping");
      throw new OtpStopException();
    }
  }

  private void save(String dir) {
    File folder = new File(dir);
    try {
      Directory dest = FSDirectory.open(folder);
      IndexFileNameFilter filter = IndexFileNameFilter.getFilter();
      for (String file : this.index.listAll()) {
        if (filter.accept(null, file)) {
          this.index.copy(dest, file, file);
        }
      }
    } catch (IOException ioe) {
      jlog.severe("Couldn't copy index:\n\t" + ioe);
      ioe.printStackTrace();
    }
  }

  @Override
  protected void handleInfo(OtpErlangObject cmd) throws OtpStopException {
  }

  @Override
  protected void terminate(OtpErlangException oee) {
    if (oee != null) {
      jlog.warning("Lucene Server terminating with reason " + oee);
      System.exit(1);
    } else {
      jlog.info("Lucene Server terminating normally");
      System.exit(0);
    }
  }

  private QueryParser queryParser() {
    return new LuceneQueryParser(Version.LUCENE_36, this.analyzer,
        this.translator, this.extensions);
  }

  protected void add(List<Document> docs) {
    try {
      this.writer.addDocuments(docs);
      this.writer.commit();
      jlog.info("" + docs.size() + " docs added");
    } catch (CorruptIndexException cie) {
      jlog.severe("Corrupt index!");
      cie.printStackTrace();
    } catch (IOException ioe) {
      jlog.severe("Couldn't write docs:\n\t" + ioe);
      ioe.printStackTrace();
    }
  }

  private void del(String queryString) {
    jlog.info("Deleting " + queryString);
    try {
      Query q = this.queryParser().parse(queryString);
      this.writer.deleteDocuments(q);
      this.writer.commit();
      jlog.info("Several docs deleted");
    } catch (CorruptIndexException cie) {
      jlog.severe("Corrupt index!");
      cie.printStackTrace();
    } catch (IOException ioe) {
      jlog.severe("Couldn't del values docs:\n\t" + ioe);
      ioe.printStackTrace();
    } catch (ParseException pe) {
      jlog.severe("Couldn't parse del query:\n\t" + pe);
      pe.printStackTrace();
    }
  }

  protected void clear() {
    try {
      this.writer.deleteAll();
      this.writer.commit();
      jlog.info("All docs deleted");
    } catch (IOException ioe) {
      jlog.severe("Couldn't del all values:\n\t" + ioe);
      ioe.printStackTrace();
    }
  }

  protected OtpErlangObject continueMatch(Object pageTokenAsObject,
      int pageSize) throws IOException, ParseException {
    LucenePageToken pageToken = (LucenePageToken) pageTokenAsObject;
    return match(pageToken, pageSize);
  }

  private OtpErlangObject match(LucenePageToken pageToken, int pageSize)
      throws IOException, ParseException {

    long t0 = System.nanoTime();

    this.searcherManager.maybeRefresh();
    IndexSearcher searcher = this.searcherManager.acquire();

    Query q = this.queryParser().parse(pageToken.getQueryString());
    TopDocs topDocs;

    Sort sort = new Sort(pageToken.getSortFields());
    TopFieldCollector collector = TopFieldCollector.create(sort,
        pageToken.getNextFirstHit() + pageSize - 1, true, true, true,
        true);

    long t1 = System.nanoTime();
    searcher.search(q, pageToken.getFilter(), collector);
    long t2 = System.nanoTime();

    topDocs = collector.topDocs(pageToken.getNextFirstHit() - 1);

    ScoreDoc[] hits = topDocs.scoreDocs;
    // jlog.info("Sort: " + sort + "; topDocs: " + topDocs + "; hits: + " +
    // hits);
    int firstHit = 0;
    if (hits.length > 0) {
      firstHit = pageToken.getNextFirstHit();
    }

    List<Document> docs = new ArrayList<Document>(hits.length);
    for (ScoreDoc sd : hits) {
      docs.add(searcher.doc(sd.doc));
    }
    searcher.close();
    searcherManager.release(searcher);
    searcher = null;

    boolean nextPage = hits.length == pageSize
        && pageToken.incrementFirstHit(pageSize) <= topDocs.totalHits;

    OtpErlangList valuesAsList = this.translator.convert(docs, hits);

    long t3 = System.nanoTime();

    long queryTime = (t3 - t0) / 1000;
    long searchTime = (t2 - t1) / 1000;

    // Metadata as a proplist

    OtpErlangObject[] metadata = new OtpErlangObject[nextPage ? 5 : 4];
    metadata[0] = new OtpErlangTuple(new OtpErlangObject[] {
        new OtpErlangAtom("total_hits"),
        new OtpErlangLong(topDocs.totalHits) });
    metadata[1] = new OtpErlangTuple(new OtpErlangObject[] {
        new OtpErlangAtom("first_hit"), new OtpErlangLong(firstHit) });
    metadata[2] = new OtpErlangTuple(new OtpErlangObject[] {
        new OtpErlangAtom("query_time"), new OtpErlangLong(queryTime) });
    metadata[3] = new OtpErlangTuple(
        new OtpErlangObject[] { new OtpErlangAtom("search_time"),
            new OtpErlangLong(searchTime) });
    if (nextPage) {
      metadata[4] = new OtpErlangTuple(new OtpErlangObject[] {
          new OtpErlangAtom("next_page"),
          new OtpErlangBinary(pageToken) });
    }

    OtpErlangList metadataAsList = new OtpErlangList(metadata);

    // Final result
    return new OtpErlangTuple(new OtpErlangObject[] { valuesAsList,
        metadataAsList });
  }

  /**
   * @param pageToken
   *            From where to continue
   * @param pageSize
   *            Number of results per page
   * @param from
   *            Process expecting the response
   */
  protected void runContinue(final Object pageToken, final int pageSize,
      final OtpErlangTuple from) {
    new Thread("continue-runner") {
      @Override
      public void run() {
        OtpErlangObject reply = null;
        try {
          reply = new OtpErlangTuple(new OtpErlangObject[] {
              new OtpErlangAtom("ok"),
              continueMatch(pageToken, pageSize) });
        } catch (IOException ioe) {
          jlog.severe("Couldn't search the index: " + ioe);
          ioe.printStackTrace();
          reply = new OtpErlangTuple(new OtpErlangObject[] {
              new OtpErlangAtom("error"),
              new OtpErlangString(ioe.getMessage()) });
        } catch (ParseException pe) {
          jlog.severe("Bad Formatted Query");
          pe.printStackTrace();
          reply = new OtpErlangTuple(new OtpErlangObject[] {
              new OtpErlangAtom("error"),
              new OtpErlangString(pe.getMessage()) });
        }
        OtpGenServer.reply(LuceneNode.NODE, from, reply);
      }
    }.start();
  }

  /**
   * @param queryString
   *            Query to match indexed docs against
   * @param sortFields
   *            Fields to sort the result
   * @param pageSize
   *            Number of results per page
   * @param from
   *            Process expecting the response
   */
  protected void runMatch(final String queryString, final int pageSize,
      final SortField[] sortFields, final Filter filter,
      final OtpErlangTuple from) {
    int threadCount = Thread.activeCount() - this.initialThreads;
    jlog.info("Currently using " + threadCount + " threads");
    if (threadCount <= this.allowedThreads) {
      new Thread("query-runner") {
        @Override
        public void run() {
          doRunMatch(queryString, pageSize, sortFields, filter, from);
        }
      }.start();
    } else {
      jlog.warning("More than " + this.allowedThreads
          + " threads... waiting...");
      try {
        Thread.sleep(500);
      } catch (InterruptedException e) {
      }
      runMatch(queryString, pageSize, sortFields, filter, from);
    }
  }

  /**
   * @param queryString
   *            Query to match indexed docs against
   * @param sortFields
   *            Fields to sort the result
   * @param pageSize
   *            Number of results per page
   * @param from
   *            Process expecting the response
   */
  protected void doRunMatch(final String queryString, final int pageSize,
      final SortField[] sortFields, final Filter filter,
      final OtpErlangTuple from) {
    OtpErlangObject reply = null;
    try {
      reply = new OtpErlangTuple(new OtpErlangObject[] {
          new OtpErlangAtom("ok"),
          match(new LucenePageToken(queryString, sortFields, filter),
              pageSize) });
    } catch (IOException ioe) {
      jlog.severe("Couldn't search the index: " + ioe);
      ioe.printStackTrace();
      reply = new OtpErlangTuple(new OtpErlangObject[] {
          new OtpErlangAtom("error"),
          new OtpErlangString(ioe.getMessage()) });
    } catch (ParseException pe) {
      jlog.severe("Bad Formatted Query");
      pe.printStackTrace();
      reply = new OtpErlangTuple(new OtpErlangObject[] {
          new OtpErlangAtom("error"),
          new OtpErlangString(pe.getMessage()) });
    }
    OtpGenServer.reply(LuceneNode.NODE, from, reply);
  }
}
TOP

Related Classes of com.tigertext.lucene.LuceneServer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.