Package com.stratio.cassandra.index

Source Code of com.stratio.cassandra.index.RowService

* Copyright 2014, Stratio.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
package com.stratio.cassandra.index;

import com.stratio.cassandra.index.query.Search;
import com.stratio.cassandra.index.schema.Column;
import com.stratio.cassandra.index.schema.Columns;
import com.stratio.cassandra.index.schema.Schema;
import com.stratio.cassandra.index.util.Log;
import com.stratio.cassandra.index.util.TaskQueue;
import org.apache.cassandra.config.CFMetaData;
import org.apache.cassandra.config.ColumnDefinition;
import org.apache.cassandra.cql3.ColumnIdentifier;
import org.apache.cassandra.db.*;
import org.apache.cassandra.db.composites.CellName;
import org.apache.cassandra.db.composites.CellNameType;
import org.apache.cassandra.db.filter.SliceQueryFilter;
import org.apache.cassandra.db.marshal.AbstractType;
import org.apache.cassandra.db.marshal.UTF8Type;
import org.apache.cassandra.utils.ByteBufferUtil;
import org.apache.lucene.document.Document;

import java.nio.ByteBuffer;
import java.util.*;

* Class for mapping rows between Cassandra and Lucene.
* @author Andres de la Pena <>
public abstract class RowService

    protected final ColumnFamilyStore baseCfs;
    protected final ColumnDefinition columnDefinition;
    protected final RowMapper rowMapper;
    protected final CFMetaData metadata;
    protected final CellNameType nameType;
    protected final ColumnIdentifier indexedColumnName;
    protected final Schema schema;
    protected final LuceneIndex luceneIndex;

     * The max number of rows to be read per iteration
    private static final int MAX_PAGE_SIZE = 100000;
    private static final int FILTERING_PAGE_SIZE = 1000;

    private TaskQueue indexQueue;

     * Returns a new {@code RowService}.
     * @param baseCfs          The base column family store.
     * @param columnDefinition The indexed column definition.
    protected RowService(ColumnFamilyStore baseCfs, ColumnDefinition columnDefinition)

        this.baseCfs = baseCfs;
        this.columnDefinition = columnDefinition;
        this.metadata = baseCfs.metadata;
        this.nameType = metadata.comparator;
        this.indexedColumnName =;

        RowIndexConfig config = new RowIndexConfig(metadata, columnDefinition.getIndexOptions());

        this.schema = config.getSchema();
        this.rowMapper =, columnDefinition, schema);

        this.luceneIndex = new LuceneIndex(config.getPath(),

        this.indexQueue = new TaskQueue(config.getIndexingThreads(), config.getIndexingQueuesSize());

     * Returns a new {@link RowService} for the specified {@link ColumnFamilyStore} and {@link ColumnDefinition}.
     * @param baseCfs          The {@link ColumnFamilyStore} associated to the managed index.
     * @param columnDefinition The {@link ColumnDefinition} of the indexed column.
     * @return A new {@link RowService} for the specified {@link ColumnFamilyStore} and {@link ColumnDefinition}.
    public static RowService build(ColumnFamilyStore baseCfs, ColumnDefinition columnDefinition) throws IOException
        int clusteringPosition = baseCfs.metadata.clusteringColumns().size();
        if (clusteringPosition > 0)
            return new RowServiceWide(baseCfs, columnDefinition);
            return new RowServiceSkinny(baseCfs, columnDefinition);

     * Returns the used {@link Schema}.
     * @return The used {@link Schema}.
    protected final Schema getSchema()
        return schema;

     * Returns the names of the document fields to be loaded when reading a Lucene index.
     * @return The names of the document fields to be loaded.
    protected abstract Set<String> fieldsToLoad();

     * Indexes the logical {@link Row} identified by the specified key and column family using the specified time stamp.
     * The must be read from the {@link ColumnFamilyStore} because it could exist previously having more columns than
     * the specified ones. The specified {@link ColumnFamily} is used for determine the cluster key. This operation is
     * performed asynchronously.
     * @param key          A partition key.
     * @param columnFamily A {@link ColumnFamily} with a single common cluster key.
     * @param timestamp    The insertion time.
    protected void index(final ByteBuffer key, final ColumnFamily columnFamily, final long timestamp)
        indexQueue.submitAsynchronous(key, new Runnable()
            public void run()
                    indexInner(key, columnFamily, timestamp);
                catch (Exception e)
                    Log.error(e, "Error while running indexing task");

     * Puts in the Lucene index the Cassandra's the row identified by the specified partition key and the clustering
     * keys contained in the specified {@link ColumnFamily}.
     * @param key          The partition key.
     * @param columnFamily The column family containing the clustering keys.
     * @param timestamp    The operation time stamp.
    protected abstract void indexInner(ByteBuffer key, ColumnFamily columnFamily, long timestamp) throws IOException;

     * Deletes the partition identified by the specified partition key. This operation is performed asynchronously.
     * @param partitionKey The partition key identifying the partition to be deleted.
    public void delete(final DecoratedKey partitionKey)
        indexQueue.submitAsynchronous(partitionKey, new Runnable()
            public void run()
                catch (Exception e)
                    Log.error(e, "Error while running deletion task");

     * Deletes the partition identified by the specified partition key.
     * @param partitionKey The partition key identifying the partition to be deleted.
    protected abstract void deleteInner(DecoratedKey partitionKey) throws IOException;

     * Deletes all the {@link Document}s.
    public final void truncate() throws IOException

     * Closes and removes all the index files.
    public final void delete() throws IOException

     * Commits the pending changes. This operation is performed asynchronously.
    public final void commit()
        indexQueue.submitSynchronous(new Runnable()
            public void run()
                catch (Exception e)
                    Log.error(e, "Error while running commit task");

     * Returns the stored and indexed {@link Row}s satisfying the specified restrictions.
     * @param search      The {@link Search} to be performed.
     * @param expressions A list of filtering {@link IndexExpression}s to be satisfied.
     * @param dataRange   A {@link DataRange} to be satisfied.
     * @param limit       The max number of {@link Row}s to be returned.
     * @param timestamp   The operation time stamp.
     * @return The {@link Row}s satisfying the specified restrictions.
    public final List<Row> search(Search search,
                                  List<IndexExpression> expressions,
                                  DataRange dataRange,
                                  final int limit,
                                  long timestamp) throws IOException
        // Log.debug("Searching with search %s ", search);

        // Setup search arguments
        Filter filter = rowMapper.filter(dataRange);
        Query query = search.filteredQuery(schema, filter);
        Sort sort = search.sort(schema);

        // Setup search pagination
        List<Row> rows = new LinkedList<>(); // The row list to be returned
        ScoredDocument lastDoc = null; // The last search result
        int collectedDocs = 0;
        long searchTime = 0;
        long collectTime = 0;
        int numPages = 0;

        // Paginate search collecting documents
        List<ScoredDocument> scoredDocuments;
        int pageSize = Math.min(limit, MAX_PAGE_SIZE);
        boolean maybeMore;
            // Search rows identifiers in Lucene
            long searchStartTime = System.currentTimeMillis();
            scoredDocuments =, sort, lastDoc, pageSize, fieldsToLoad());
            collectedDocs += scoredDocuments.size();
            lastDoc = scoredDocuments.isEmpty() ? null : scoredDocuments.get(scoredDocuments.size() - 1);
            searchTime += System.currentTimeMillis() - searchStartTime;

            // Collect rows from Cassandra
            long collectStartTime = System.currentTimeMillis();
            for (Row row : rows(scoredDocuments, timestamp))
                if (row != null && accepted(row, expressions))
            collectTime += System.currentTimeMillis() - collectStartTime;

            // Setup next iteration
            maybeMore = scoredDocuments.size() == pageSize;
            pageSize = Math.min(Math.max(FILTERING_PAGE_SIZE, rows.size() - limit), MAX_PAGE_SIZE);

            // Iterate while there are still documents to read and we don't have enough rows
        } while (maybeMore && rows.size() < limit);

        Log.debug("Lucene time: %d ms", searchTime);
        Log.debug("Cassandra time: %d ms", collectTime);
        Log.debug("Collected %d docs and %d rows in %d pages", collectedDocs, rows.size(), numPages);

        Collections.sort(rows, comparator());
        return rows;

     * Returns {@code true} if the specified {@link Row} satisfies the all the specified {@link IndexExpression}s,
     * {@code false} otherwise.
     * @param row         A {@link Row}.
     * @param expressions A list of {@link IndexExpression}s to be satisfied by {@code row}.
     * @return {@code true} if the specified {@link Row} satisfies the all the specified {@link IndexExpression}s,
     * {@code false} otherwise.
    private boolean accepted(Row row, List<IndexExpression> expressions)
        if (!expressions.isEmpty())
            Columns columns = rowMapper.columns(row);
            for (IndexExpression expression : expressions)
                if (!accepted(columns, expression))
                    return false;
        return true;

     * Returns {@code true} if the specified {@link com.stratio.cassandra.index.schema.Columns} satisfies the the specified {@link IndexExpression},
     * {@code false} otherwise.
     * @param columns    A {@link com.stratio.cassandra.index.schema.Columns}
     * @param expression A {@link IndexExpression}s to be satisfied by {@code columns}.
     * @return {@code true} if the specified {@link com.stratio.cassandra.index.schema.Columns} satisfies the the specified {@link IndexExpression},
     * {@code false} otherwise.
    private boolean accepted(Columns columns, IndexExpression expression)

        ByteBuffer expectedValue = expression.value;

        ColumnDefinition def = metadata.getColumnDefinition(expression.column);
        String name =;

        Column column = columns.getCell(name);
        if (column == null)
            return false;

        ByteBuffer actualValue = column.getRawValue();
        if (actualValue == null)
            return false;

        AbstractType<?> validator = def.type;
        int comparison =, expectedValue);
        switch (expression.operator)
            case EQ:
                return comparison == 0;
            case GTE:
                return comparison >= 0;
            case GT:
                return comparison > 0;
            case LTE:
                return comparison <= 0;
            case LT:
                return comparison < 0;
                throw new IllegalStateException();

     * Returns the {@link Row}s identified by the specified {@link Document}s, using the specified time stamp to ignore
     * deleted columns. The {@link Row}s are retrieved from the storage engine, so it involves IO operations.
     * @param scoredDocuments The {@link ScoredDocument}s
     * @param timestamp       The time stamp to ignore deleted columns.
     * @return The {@link Row} identified by the specified {@link Document}s
    protected abstract List<Row> rows(List<ScoredDocument> scoredDocuments, long timestamp) throws IOException;

     * Returns a {@link ColumnFamily} composed by the non expired {@link Cell}s of the specified  {@link ColumnFamily}.
     * @param columnFamily A {@link ColumnFamily}.
     * @param timestamp    The max allowed timestamp for the {@link Cell}s.
     * @return A {@link ColumnFamily} composed by the non expired {@link Cell}s of the specified  {@link ColumnFamily}.
    protected ColumnFamily cleanExpired(ColumnFamily columnFamily, long timestamp)
        ColumnFamily cleanColumnFamily = ArrayBackedSortedColumns.factory.create(baseCfs.metadata);
        for (Cell cell : columnFamily)
            if (cell.isLive(timestamp))
        return cleanColumnFamily;

     * Adds to the specified {@link Row} the specified Lucene score column.
     * @param row       A {@link Row}.
     * @param timestamp The score column timestamp.
     * @param score     The score column value.
     * @return The {@link Row} with the score.
    protected Row addScoreColumn(Row row, long timestamp, Float score)
        ColumnFamily cf =;
        CellName cellName = rowMapper.makeCellName(cf);
        ByteBuffer cellValue = UTF8Type.instance.decompose(score.toString());

        ColumnFamily dcf = ArrayBackedSortedColumns.factory.create(baseCfs.metadata);
        dcf.addColumn(cellName, cellValue, timestamp);

        return new Row(row.key, dcf);

//    /**
//     * Returns a Lucene's {@link Filter} representing the specified Cassandra's {@link DataRange} using caching.
//     *
//     * @param dataRange The Cassandra's {@link DataRange} to be mapped.
//     * @return A Lucene's {@link Filter} representing the specified Cassandra's {@link DataRange}.
//     */
//    protected final Filter cachedFilter(DataRange dataRange)
//    {
//        AbstractBounds<RowPosition> keyRange = dataRange.keyRange();
//        if (filterCache == null)
//        {
//            Log.debug("Filter cache not present for range %s", keyRange);
//            return rowMapper.filter(dataRange);
//        }
//        Filter filter = filterCache.get(dataRange);
//        if (filter == null)
//        {
//            filter = rowMapper.filter(dataRange);
//            if (filter != null)
//            {
//                Log.debug("Filter cache fails for range %s", keyRange);
//                filterCache.put(dataRange, filter);
//            }
//            else
//            {
//                Log.debug("Filter cache unneeded for range %s", keyRange);
//            }
//        }
//        else
//        {
//            Log.debug("Filter cache hits for range %s", keyRange);
//        }
//        return filter;
//    }

     * Returns the {@link RowComparator} to be used for ordering the {@link Row}s obtained from the specified
     * {@link Search}. This {@link Comparator} is useful for merging the partial results obtained from running the
     * specified {@link Search} against several indexes.
     * @param search A {@link Search}.
     * @return The {@link RowComparator} to be used for ordering the {@link Row}s obtained from the specified
     * {@link Search}.
    public RowComparator comparator(Search search)
        if (search != null)
            if (search.usesSorting()) // Sort with search itself
                return new RowComparatorSorting(rowMapper, search.getSorting());
            else if (search.usesRelevance()) // Sort with row's score
                return new RowComparatorScoring(this);
        return rowMapper.naturalComparator();

     * Returns the default {@link Row} comparator. This comparator is based on Cassandra's natural order.
     * @return The default {@link Row} comparator.
    public RowComparator comparator()
        return rowMapper.naturalComparator();

     * Returns the score of the specified {@link Row}.
     * @param row A {@link Row}.
     * @return The score of the specified {@link Row}.
    protected Float score(Row row)
        ColumnFamily cf =;
        CellName cellName = rowMapper.makeCellName(cf);
        Cell cell = cf.getColumn(cellName);
        String value = UTF8Type.instance.compose(cell.value());
        return Float.parseFloat(value);

     * Optimizes the managed Lucene's index. It can be a very heavy operation.
    public void optimize() throws IOException

     * Returns the total number of {@link Document}s in the index.
     * @return The total number of {@link Document}s in the index.
     * @throws IOException
    public long getIndexSize() throws IOException
        return luceneIndex.getNumDocs();

     * Groups the specified CQL3 {@link Row} into a list of physical storage {@link Row}s. This grouping is based in row's key.
     * @param rows A list of CQL3 {@link Row}s.
     * @return The specified CQL3 {@link Row} into a list of physical storage {@link Row}s.
    public List<Row> group(List<Row> rows)
        LinkedList<Row> result = new LinkedList<>();
        Row lastRow = null;
        for (Row row : rows)
            if (lastRow != null && row.key.equals(lastRow.key))
                lastRow = row;
        return result;


Related Classes of com.stratio.cassandra.index.RowService

Copyright © 2018 All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact