Package lucandra

Source Code of lucandra.TermCache

package lucandra;

import java.nio.ByteBuffer;
import java.util.*;
import java.util.concurrent.ConcurrentNavigableMap;
import java.util.concurrent.ConcurrentSkipListMap;

import org.apache.cassandra.db.*;
import org.apache.cassandra.thrift.ColumnParent;
import org.apache.cassandra.utils.ByteBufferUtil;
import org.apache.log4j.Logger;
import org.apache.lucene.index.Term;

public class TermCache

    private final static Term                                             emptyTerm         = new Term("");
    private final static ConcurrentNavigableMap<Term, LucandraTermInfo[]> emptyMap          = new ConcurrentSkipListMap<Term, LucandraTermInfo[]>();
    private final static ColumnParent                                     fieldColumnFamily = new ColumnParent(
    private final static Logger                                           logger            = Logger

    public final String                                                   indexName;
    public final ByteBuffer                                               termsListKey;
    public final ConcurrentSkipListMap<Term, LucandraTermInfo[]>          termList;
    public final ConcurrentSkipListMap<Term, Pair<Term, Term>>            termQueryBoundries;

    public TermCache(String indexName) throws IOException
        this.indexName = indexName;
        termsListKey = CassandraUtils.hashKeyBytes(indexName.getBytes("UTF-8"), CassandraUtils.delimeterBytes, "terms"
        termList = new ConcurrentSkipListMap<Term, LucandraTermInfo[]>();

        // Get the boundries of terms each term
        termQueryBoundries = new ConcurrentSkipListMap<Term, Pair<Term, Term>>();

    // Cache check only
    public LucandraTermInfo[] get(Term term)
        return termList.get(term);

    public ConcurrentNavigableMap<Term, LucandraTermInfo[]> skipTo(Term skip) throws IOException

        Pair<Term, Term> range = null;

        int bufferSize = termList.isEmpty() ? 4 : 64;

        // verify we've buffered sufficiently
        Map.Entry<Term, Pair<Term, Term>> tailEntry = termQueryBoundries.ceilingEntry(skip);
        boolean needsBuffering = true;

        if (tailEntry != null)
            range = tailEntry.getValue();          
            //skip term must be within a buffered range avoid rebuffering
            if (skip.compareTo(range.left) >= 0 && (!range.right.equals(emptyTerm) && skip.compareTo(range.right) < 0))
                needsBuffering = false;

        ConcurrentNavigableMap<Term, LucandraTermInfo[]> subList = emptyMap;

        if (needsBuffering)
            range = bufferTerms(skip, bufferSize);

        //" rebuffered "+needsBuffering+" "+range);
        if (skip.compareTo(range.left) >= 0 && (!range.right.equals(emptyTerm)) && skip.compareTo(range.right) <= 0)
            subList = termList.subMap(skip, true, range.right, true);

        return subList;

    public static LucandraTermInfo[] convertTermInfo(Collection<IColumn> docs)

        LucandraTermInfo termInfo[] = new LucandraTermInfo[docs.size()];

        int i = 0;
        for (IColumn col : docs)
            if (i == termInfo.length)

            if (i == 0 && col instanceof SuperColumn)
                throw new IllegalStateException(
                        "TermInfo ColumnFamily is a of type Super: This is no longer supported, please see NEWS.txt");

            if (col == null || == null || col.value() == null)
                throw new IllegalStateException("Encountered missing column: " + col);

            termInfo[i] = new LucandraTermInfo(CassandraUtils.readVInt(, col.value());

        return termInfo;

    public Pair<Term, Term> bufferTerms(Term startTerm, int bufferSize) throws IOException
        assert bufferSize > 0;

        long start = System.currentTimeMillis();

        // Scan range of terms in this field (reversed, so we have a exit point)
        List<Row> rows = CassandraUtils.robustRead(CassandraUtils.consistency, new SliceFromReadCommand(
                CassandraUtils.keySpace, termsListKey, fieldColumnFamily, CassandraUtils.createColumnName(startTerm),
                ByteBufferUtil.EMPTY_BYTE_BUFFER, false, bufferSize));

        ColumnParent columnParent = new ColumnParent(CassandraUtils.termVecColumnFamily);

        // Collect read commands
        Collection<IColumn> columns;

        if (rows == null || rows.size() != 1 || rows.get(0).cf == null)
            columns = new ArrayList<IColumn>();
            columns = rows.get(0).cf.getSortedColumns();

            if (logger.isDebugEnabled())
                logger.debug("Found " + columns.size() + " terms under field " + startTerm.field());

        Pair<Term, Term> queryRange;

        if (!columns.isEmpty())
            //end of range will get filled in later
            queryRange = new Pair<Term, Term>(startTerm, null);
            queryRange = new Pair<Term, Term>(startTerm, emptyTerm);

            termQueryBoundries.put(startTerm, queryRange);

            return queryRange;

        Map<Term, Pair<Term, Term>> localRanges = new HashMap<Term, Pair<Term, Term>>(columns.size());
        localRanges.put(startTerm, queryRange);

        List<ReadCommand> reads = new ArrayList<ReadCommand>(columns.size());
        for (IColumn column : columns)
            if(!column.isLive() || column instanceof DeletedColumn)
            Term term = CassandraUtils.parseTerm(ByteBufferUtil.string(, CassandraUtils.UTF_8));

            localRanges.put(term, queryRange);

            ByteBuffer rowKey;
                rowKey = CassandraUtils.hashKeyBytes(indexName.getBytes("UTF-8"), CassandraUtils.delimeterBytes, term
                        .field().getBytes("UTF-8"), CassandraUtils.delimeterBytes, term.text().getBytes("UTF-8"));
            catch (UnsupportedEncodingException e)
                throw new RuntimeException("This JVM doesn't support UTF-8");

            if (logger.isDebugEnabled())
                logger.debug("scanning row: " + ByteBufferUtil.string(rowKey));

            reads.add((ReadCommand) new SliceFromReadCommand(CassandraUtils.keySpace, rowKey, columnParent,
                    ByteBufferUtil.EMPTY_BYTE_BUFFER, ByteBufferUtil.EMPTY_BYTE_BUFFER, false, Integer.MAX_VALUE));

        rows = CassandraUtils.robustRead(CassandraUtils.consistency, reads.toArray(new ReadCommand[] {}));

        // term to start with next time
        int actualReadSize = rows.size();

        if (logger.isDebugEnabled())
            logger.debug("Found " + rows.size() + " rows in range:" + startTerm + " to " + "" + " in "
                    + (System.currentTimeMillis() - start) + "ms");

        if (actualReadSize > 0)
            for (Row row : rows)

                if ( == null)
                    //"Encountered deleted row");
                String key = ByteBufferUtil.string(row.key.key, CassandraUtils.UTF_8);

                // term keys look like wikipedia/body/wiki
                String termStr = key.substring(key.indexOf(CassandraUtils.delimeter)
                        + CassandraUtils.delimeter.length());
                Term term = CassandraUtils.parseTerm(termStr);

                columns =;

                if (logger.isDebugEnabled())
                    logger.debug(term + " has " + columns.size());

                // remove any deleted columns
                Collection<IColumn> columnsToRemove = null;

                for (IColumn col : columns)
                    if (!col.isLive())
                        if (columnsToRemove == null)
                            columnsToRemove = new ArrayList<IColumn>();

                            logger.debug("Removing "+col+" documents from "+term);


                    if (logger.isDebugEnabled())
                        logger.debug("Kept DocId " + CassandraUtils.readVInt(;

                if (columnsToRemove != null)

                if (!columns.isEmpty())
                    if (logger.isDebugEnabled())
                        logger.debug("saving term: " + term + " with " + columns.size() + " docs");

                    termList.put(term, convertTermInfo(columns));
                    //update end of range
                    if(queryRange.right == null || queryRange.right.compareTo(term) < 0)
                        queryRange.right = term;
                    if (logger.isDebugEnabled())
                        logger.debug("Skipped term: " + term);

            if(queryRange.right == null)
                queryRange.right = emptyTerm;
            // to recall we did this query

        long end = System.currentTimeMillis();

        if (logger.isDebugEnabled())
            logger.debug("loadTerms: " + startTerm + "(" + actualReadSize + ") took " + (end - start) + "ms");

        return queryRange;

Related Classes of lucandra.TermCache

Copyright © 2018 All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact