Source Code of lucandra.TermCache

package lucandra;


import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.util.*;
import java.util.concurrent.ConcurrentNavigableMap;
import java.util.concurrent.ConcurrentSkipListMap;


import org.apache.cassandra.db.*;
import org.apache.cassandra.thrift.ColumnParent;
import org.apache.cassandra.utils.ByteBufferUtil;
import org.apache.log4j.Logger;
import org.apache.lucene.index.Term;


public class TermCache
{


    private final static Term                                             emptyTerm         = new Term("");
    private final static ConcurrentNavigableMap<Term, LucandraTermInfo[]> emptyMap          = new ConcurrentSkipListMap<Term, LucandraTermInfo[]>();
    private final static ColumnParent                                     fieldColumnFamily = new ColumnParent(
                                                                                                    CassandraUtils.metaInfoColumnFamily);
    private final static Logger                                           logger            = Logger
                                                                                                    .getLogger(TermCache.class);


    public final String                                                   indexName;
    public final ByteBuffer                                               termsListKey;
    public final ConcurrentSkipListMap<Term, LucandraTermInfo[]>          termList;
    public final ConcurrentSkipListMap<Term, Pair<Term, Term>>            termQueryBoundries;


    public TermCache(String indexName) throws IOException
    {
        this.indexName = indexName;
        termsListKey = CassandraUtils.hashKeyBytes(indexName.getBytes("UTF-8"), CassandraUtils.delimeterBytes, "terms"
                .getBytes("UTF-8"));
        termList = new ConcurrentSkipListMap<Term, LucandraTermInfo[]>();


        // Get the boundries of terms each term
        termQueryBoundries = new ConcurrentSkipListMap<Term, Pair<Term, Term>>();
    }


    // Cache check only
    public LucandraTermInfo[] get(Term term)
    {
        return termList.get(term);
    }


    public ConcurrentNavigableMap<Term, LucandraTermInfo[]> skipTo(Term skip) throws IOException
    {


        Pair<Term, Term> range = null;


        int bufferSize = termList.isEmpty() ? 4 : 64;


        // verify we've buffered sufficiently
        Map.Entry<Term, Pair<Term, Term>> tailEntry = termQueryBoundries.ceilingEntry(skip);
        boolean needsBuffering = true;


        if (tailEntry != null)
        {
            range = tailEntry.getValue();           
            
            //skip term must be within a buffered range avoid rebuffering
            if (skip.compareTo(range.left) >= 0 && (!range.right.equals(emptyTerm) && skip.compareTo(range.right) < 0))
            {                
                needsBuffering = false;
            }
        }


        ConcurrentNavigableMap<Term, LucandraTermInfo[]> subList = emptyMap;


        if (needsBuffering)
        {
            range = bufferTerms(skip, bufferSize);
        }


        //logger.info(Thread.currentThread().getName()+" rebuffered "+needsBuffering+" "+range);
        
        if (skip.compareTo(range.left) >= 0 && (!range.right.equals(emptyTerm)) && skip.compareTo(range.right) <= 0)
        {
            subList = termList.subMap(skip, true, range.right, true);
        }


        return subList;
    }


    public static LucandraTermInfo[] convertTermInfo(Collection<IColumn> docs)
    {


        LucandraTermInfo termInfo[] = new LucandraTermInfo[docs.size()];


        int i = 0;
        for (IColumn col : docs)
        {
            if (i == termInfo.length)
                break;


            if (i == 0 && col instanceof SuperColumn)
                throw new IllegalStateException(
                        "TermInfo ColumnFamily is a of type Super: This is no longer supported, please see NEWS.txt");


            if (col == null || col.name() == null || col.value() == null)
                throw new IllegalStateException("Encountered missing column: " + col);


            termInfo[i] = new LucandraTermInfo(CassandraUtils.readVInt(col.name()), col.value());
            i++;
        }


        return termInfo;
    }


    public Pair<Term, Term> bufferTerms(Term startTerm, int bufferSize) throws IOException
    {
        assert bufferSize > 0;


        long start = System.currentTimeMillis();


        // Scan range of terms in this field (reversed, so we have a exit point)
        List<Row> rows = CassandraUtils.robustRead(CassandraUtils.consistency, new SliceFromReadCommand(
                CassandraUtils.keySpace, termsListKey, fieldColumnFamily, CassandraUtils.createColumnName(startTerm),
                ByteBufferUtil.EMPTY_BYTE_BUFFER, false, bufferSize));


        ColumnParent columnParent = new ColumnParent(CassandraUtils.termVecColumnFamily);


        // Collect read commands
        Collection<IColumn> columns;


        if (rows == null || rows.size() != 1 || rows.get(0).cf == null)
        {
            columns = new ArrayList<IColumn>();
        }
        else
        {
            columns = rows.get(0).cf.getSortedColumns();


            if (logger.isDebugEnabled())
                logger.debug("Found " + columns.size() + " terms under field " + startTerm.field());
        }


        Pair<Term, Term> queryRange;


        if (!columns.isEmpty())
        {
            //end of range will get filled in later
            queryRange = new Pair<Term, Term>(startTerm, null);
        }
        else
        {
            queryRange = new Pair<Term, Term>(startTerm, emptyTerm);


            termQueryBoundries.put(startTerm, queryRange);


            return queryRange;
        }


        Map<Term, Pair<Term, Term>> localRanges = new HashMap<Term, Pair<Term, Term>>(columns.size());
        localRanges.put(startTerm, queryRange);


        List<ReadCommand> reads = new ArrayList<ReadCommand>(columns.size());
        for (IColumn column : columns)
        {
            if(!column.isLive() || column instanceof DeletedColumn)
                continue;
            
            Term term = CassandraUtils.parseTerm(ByteBufferUtil.string(column.name(), CassandraUtils.UTF_8));


            localRanges.put(term, queryRange);


            ByteBuffer rowKey;
            try
            {
                rowKey = CassandraUtils.hashKeyBytes(indexName.getBytes("UTF-8"), CassandraUtils.delimeterBytes, term
                        .field().getBytes("UTF-8"), CassandraUtils.delimeterBytes, term.text().getBytes("UTF-8"));
            }
            catch (UnsupportedEncodingException e)
            {
                throw new RuntimeException("This JVM doesn't support UTF-8");
            }


            if (logger.isDebugEnabled())
                logger.debug("scanning row: " + ByteBufferUtil.string(rowKey));


            reads.add((ReadCommand) new SliceFromReadCommand(CassandraUtils.keySpace, rowKey, columnParent,
                    ByteBufferUtil.EMPTY_BYTE_BUFFER, ByteBufferUtil.EMPTY_BYTE_BUFFER, false, Integer.MAX_VALUE));
        }


        rows = CassandraUtils.robustRead(CassandraUtils.consistency, reads.toArray(new ReadCommand[] {}));


        // term to start with next time
        int actualReadSize = rows.size();


        if (logger.isDebugEnabled())
        {
            logger.debug("Found " + rows.size() + " rows in range:" + startTerm + " to " + "" + " in "
                    + (System.currentTimeMillis() - start) + "ms");


        }
        
        if (actualReadSize > 0)
        {
            for (Row row : rows)
            {


                if (row.cf == null)
                {
                    //logger.info("Encountered deleted row");
                    continue;
                }
                String key = ByteBufferUtil.string(row.key.key, CassandraUtils.UTF_8);


                // term keys look like wikipedia/body/wiki
                String termStr = key.substring(key.indexOf(CassandraUtils.delimeter)
                        + CassandraUtils.delimeter.length());
                Term term = CassandraUtils.parseTerm(termStr);


                columns = row.cf.getSortedColumns();


                if (logger.isDebugEnabled())
                    logger.debug(term + " has " + columns.size());


                // remove any deleted columns
                Collection<IColumn> columnsToRemove = null;


                for (IColumn col : columns)
                {
                    if (!col.isLive())
                    {
                        if (columnsToRemove == null)
                            columnsToRemove = new ArrayList<IColumn>();


                        if(logger.isDebugEnabled())
                            logger.debug("Removing "+col+" documents from "+term);


                        
                        columnsToRemove.add(col);
                    }


                    if (logger.isDebugEnabled())
                        logger.debug("Kept DocId " + CassandraUtils.readVInt(col.name()));
                }


                if (columnsToRemove != null)
                {                   
                    columns.removeAll(columnsToRemove);
                }


                if (!columns.isEmpty())
                {
                    if (logger.isDebugEnabled())
                        logger.debug("saving term: " + term + " with " + columns.size() + " docs");


                    termList.put(term, convertTermInfo(columns));
                    
                    //update end of range
                    if(queryRange.right == null || queryRange.right.compareTo(term) < 0)
                        queryRange.right = term;
                    
                }
                else
                {
                    if (logger.isDebugEnabled())
                        logger.debug("Skipped term: " + term);
                }
            }


            if(queryRange.right == null)
                queryRange.right = emptyTerm;
            
            // to recall we did this query
            termQueryBoundries.putAll(localRanges);
        }


        long end = System.currentTimeMillis();


        if (logger.isDebugEnabled())
        {
            logger.debug("loadTerms: " + startTerm + "(" + actualReadSize + ") took " + (end - start) + "ms");
        }


        return queryRange;
    }
}
Source Code of lucandra.TermCache

Related Classes of lucandra.TermCache