Source Code of com.stratio.cassandra.index.LuceneIndex

/*
 * Copyright 2014, Stratio.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.stratio.cassandra.index;


import com.stratio.cassandra.index.util.Log;
import org.apache.cassandra.io.util.FileUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.*;
import org.apache.lucene.index.sorter.EarlyTerminatingSortingCollector;
import org.apache.lucene.index.sorter.SortingMergePolicy;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.NRTCachingDirectory;
import org.apache.lucene.util.Version;


import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;


/**
 * Class wrapping a Lucene's directory and its readers , writers and searchers for NRT.
 *
 * @author Andres de la Pena <adelapena@stratio.com>
 */
public class LuceneIndex
{
    private final String path;
    private final Double refreshSeconds;
    private final Integer ramBufferMB;
    private final Integer maxMergeMB;
    private final Integer maxCachedMB;
    private final Analyzer analyzer;


    private File file;
    private Directory directory;
    private IndexWriter indexWriter;
    private SearcherManager searcherManager;
    private ControlledRealTimeReopenThread<IndexSearcher> searcherReopener;


    private Sort sort;


    /**
     * Builds a new {@code RowDirectory} using the specified directory path and analyzer.
     *
     * @param path           The analyzer to be used. The path of the directory in where the Lucene's files will be stored.
     * @param refreshSeconds The index readers refresh time in seconds. No guarantees that the writings are visible until this
     *                       time.
     * @param ramBufferMB    The index writer buffer size in MB.
     * @param maxMergeMB     NRTCachingDirectory max merge size in MB.
     * @param maxCachedMB    NRTCachingDirectory max cached MB.
     * @param analyzer       The default {@link Analyzer}.
     */
    public LuceneIndex(String path,
                       Double refreshSeconds,
                       Integer ramBufferMB,
                       Integer maxMergeMB,
                       Integer maxCachedMB,
                       Analyzer analyzer)
    {
        this.path = path;
        this.refreshSeconds = refreshSeconds;
        this.ramBufferMB = ramBufferMB;
        this.maxMergeMB = maxMergeMB;
        this.maxCachedMB = maxCachedMB;
        this.analyzer = analyzer;
    }


    /**
     * Initializes this using the specified {@link Sort} for trying to keep the {@link Document}s sorted.
     *
     * @param sort The {@link Sort} to be used.
     */
    public void init(Sort sort) throws IOException
    {
        this.sort = sort;


        // Get directory file
        file = new File(path);


        // Open or create directory
        FSDirectory fsDirectory = FSDirectory.open(file);
        directory = new NRTCachingDirectory(fsDirectory, maxMergeMB, maxCachedMB);


        // Setup index writer
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_48, analyzer);
        config.setRAMBufferSizeMB(ramBufferMB);
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        config.setUseCompoundFile(true);
        config.setMergePolicy(new SortingMergePolicy(config.getMergePolicy(), sort));
        indexWriter = new IndexWriter(directory, config);


        // Setup NRT search
        SearcherFactory searcherFactory = new SearcherFactory()
        {
            public IndexSearcher newSearcher(IndexReader reader) throws IOException
            {
                IndexSearcher searcher = new IndexSearcher(reader);
                searcher.setSimilarity(new NoIDFSimilarity());
                return searcher;
            }
        };
        TrackingIndexWriter trackingIndexWriter = new TrackingIndexWriter(indexWriter);
        searcherManager = new SearcherManager(indexWriter, true, searcherFactory);
        searcherReopener = new ControlledRealTimeReopenThread<>(trackingIndexWriter,
                                                                searcherManager,
                                                                refreshSeconds,
                                                                refreshSeconds);
        searcherReopener.start(); // Start the refresher thread
    }


    /**
     * Updates the specified {@link Document} by first deleting the documents containing {@code Term} and then adding
     * the new document. The delete and then add are atomic as seen by a reader on the same index (flush may happen only
     * after the add).
     *
     * @param term     The {@link Term} to identify the document(s) to be deleted.
     * @param document The {@link Document} to be added.
     */
    public void upsert(Term term, Document document) throws IOException
    {
        // Log.debug("Updating document %s with term %s", document, term);
        indexWriter.updateDocument(term, document);
    }


    /**
     * Deletes all the {@link Document}s containing the specified {@link Term}.
     *
     * @param term The {@link Term} to identify the documents to be deleted.
     */
    public void delete(Term term) throws IOException
    {
        // Log.debug(String.format("Deleting by term %s", term));
        indexWriter.deleteDocuments(term);
    }


    /**
     * Deletes all the {@link Document}s satisfying the specified {@link Query}.
     *
     * @param query The {@link Query} to identify the documents to be deleted.
     */
    public void delete(Query query) throws IOException
    {
        // Log.debug("Deleting by query %s", query);
        indexWriter.deleteDocuments(query);
    }


    /**
     * Deletes all the {@link Document}s.
     */
    public void truncate() throws IOException
    {
        Log.info("Deleting all");
        indexWriter.deleteAll();
    }


    /**
     * Commits the pending changes.
     */
    public void commit() throws IOException
    {
        Log.info("Committing");
        indexWriter.commit();
    }


    /**
     * Commits all changes to the index, waits for pending merges to complete, and closes all associated resources.
     */
    public void close() throws IOException
    {
        Log.info("Closing");
        searcherReopener.interrupt();
        searcherManager.close();
        indexWriter.close();
        directory.close();
        analyzer.close();
    }


    /**
     * Closes the index and removes all its files.
     */
    public void drop() throws IOException
    {
        Log.info("Removing");
        close();
        FileUtils.deleteRecursive(file);
    }


    /**
     * Finds the top {@code count} hits for {@code query}, applying {@code clusteringKeyFilter} if non-null, and sorting the hits by
     * the criteria in {@code sort}.
     *
     * @param query        The {@link Query} to search for.
     * @param sort         The {@link Sort} to be applied.
     * @param after        The starting {@link com.stratio.cassandra.index.ScoredDocument}.
     * @param count        Return only the top {@code count} results.
     * @param fieldsToLoad The name of the fields to be loaded.
     * @return The found documents, sorted according to the supplied {@link Sort} instance.
     */
    public List<ScoredDocument> search(Query query,
                                       Sort sort,
                                       ScoredDocument after,
                                       Integer count,
                                       Set<String> fieldsToLoad) throws IOException
    {
        // Validate
        if (count == null || count < 0)
        {
            throw new IllegalArgumentException("Positive count required");
        }
        if (fieldsToLoad == null || fieldsToLoad.isEmpty())
        {
            throw new IllegalArgumentException("Fields to load required");
        }
        IndexSearcher searcher = searcherManager.acquire();
        try
        {
            // Search
            ScoreDoc start = after == null ? null : after.getScoreDoc();
            TopDocs topDocs = topDocs(searcher, query, sort, start, count);
            ScoreDoc[] scoreDocs = topDocs.scoreDocs;


            // Collect the documents from query result
            List<ScoredDocument> scoredDocuments = new ArrayList<>(scoreDocs.length);
            for (ScoreDoc scoreDoc : scoreDocs)
            {
                Document document = searcher.doc(scoreDoc.doc, fieldsToLoad);
                ScoredDocument scoredDocument = new ScoredDocument(document, scoreDoc);
                scoredDocuments.add(scoredDocument);
            }


            return scoredDocuments;
        }
        finally
        {
            searcherManager.release(searcher);
        }
    }


    private TopDocs topDocs(IndexSearcher searcher, Query query, Sort sort, ScoreDoc after, int count)
            throws IOException
    {
        // Use default sort if the query doesn't use relevance
        if (sort == null)
        {
            if (query instanceof ConstantScoreQuery)
            {
                FieldDoc start = after == null ? null : (FieldDoc) after;
                TopFieldCollector tfc = TopFieldCollector.create(this.sort, count, start, true, false, false, false);
                Collector collector = new EarlyTerminatingSortingCollector(tfc, this.sort, count);
                searcher.search(query, collector);
                return tfc.topDocs();
            }
            else
            {
                return searcher.searchAfter(after, query, count);
            }
        }
        else
        {
            return searcher.searchAfter(after, query, count, sort);
        }
    }


    /**
     * Optimizes the index forcing merge segments leaving one single segment. This operation blocks until all merging
     * completes.
     *
     * @throws IOException
     */
    public void optimize() throws IOException
    {
        indexWriter.forceMerge(1, true);
        indexWriter.commit();
    }


    /**
     * Returns the total number of {@link Document}s in this index.
     *
     * @return The total number of {@link Document}s in this index.
     * @throws IOException
     */
    public long getNumDocs() throws IOException
    {
        IndexSearcher searcher = searcherManager.acquire();
        try
        {
            return searcher.getIndexReader().numDocs();
        }
        finally
        {
            searcherManager.release(searcher);
        }
    }
}
Source Code of com.stratio.cassandra.index.LuceneIndex

Related Classes of com.stratio.cassandra.index.LuceneIndex