Package lucandra

Source Code of lucandra.TermCache

package lucandra;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.util.*;
import java.util.concurrent.ConcurrentNavigableMap;
import java.util.concurrent.ConcurrentSkipListMap;

import org.apache.cassandra.db.*;
import org.apache.cassandra.thrift.ColumnParent;
import org.apache.cassandra.utils.ByteBufferUtil;
import org.apache.log4j.Logger;
import org.apache.lucene.index.Term;

public class TermCache
{

    private final static Term                                             emptyTerm         = new Term("");
    private final static ConcurrentNavigableMap<Term, LucandraTermInfo[]> emptyMap          = new ConcurrentSkipListMap<Term, LucandraTermInfo[]>();
    private final static ColumnParent                                     fieldColumnFamily = new ColumnParent(
                                                                                                    CassandraUtils.metaInfoColumnFamily);
    private final static Logger                                           logger            = Logger
                                                                                                    .getLogger(TermCache.class);

    public final String                                                   indexName;
    public final ByteBuffer                                               termsListKey;
    public final ConcurrentSkipListMap<Term, LucandraTermInfo[]>          termList;
    public final ConcurrentSkipListMap<Term, Pair<Term, Term>>            termQueryBoundries;

    public TermCache(String indexName) throws IOException
    {
        this.indexName = indexName;
        termsListKey = CassandraUtils.hashKeyBytes(indexName.getBytes("UTF-8"), CassandraUtils.delimeterBytes, "terms"
                .getBytes("UTF-8"));
        termList = new ConcurrentSkipListMap<Term, LucandraTermInfo[]>();

        // Get the boundries of terms each term
        termQueryBoundries = new ConcurrentSkipListMap<Term, Pair<Term, Term>>();
    }

    // Cache check only
    public LucandraTermInfo[] get(Term term)
    {
        return termList.get(term);
    }

    public ConcurrentNavigableMap<Term, LucandraTermInfo[]> skipTo(Term skip) throws IOException
    {

        Pair<Term, Term> range = null;

        int bufferSize = termList.isEmpty() ? 4 : 64;

        // verify we've buffered sufficiently
        Map.Entry<Term, Pair<Term, Term>> tailEntry = termQueryBoundries.ceilingEntry(skip);
        boolean needsBuffering = true;

        if (tailEntry != null)
        {
            range = tailEntry.getValue();          
           
            //skip term must be within a buffered range avoid rebuffering
            if (skip.compareTo(range.left) >= 0 && (!range.right.equals(emptyTerm) && skip.compareTo(range.right) < 0))
            {               
                needsBuffering = false;
            }
        }

        ConcurrentNavigableMap<Term, LucandraTermInfo[]> subList = emptyMap;

        if (needsBuffering)
        {
            range = bufferTerms(skip, bufferSize);
        }

        //logger.info(Thread.currentThread().getName()+" rebuffered "+needsBuffering+" "+range);
       
        if (skip.compareTo(range.left) >= 0 && (!range.right.equals(emptyTerm)) && skip.compareTo(range.right) <= 0)
        {
            subList = termList.subMap(skip, true, range.right, true);
        }

        return subList;
    }

    public static LucandraTermInfo[] convertTermInfo(Collection<IColumn> docs)
    {

        LucandraTermInfo termInfo[] = new LucandraTermInfo[docs.size()];

        int i = 0;
        for (IColumn col : docs)
        {
            if (i == termInfo.length)
                break;

            if (i == 0 && col instanceof SuperColumn)
                throw new IllegalStateException(
                        "TermInfo ColumnFamily is a of type Super: This is no longer supported, please see NEWS.txt");

            if (col == null || col.name() == null || col.value() == null)
                throw new IllegalStateException("Encountered missing column: " + col);

            termInfo[i] = new LucandraTermInfo(CassandraUtils.readVInt(col.name()), col.value());
            i++;
        }

        return termInfo;
    }

    public Pair<Term, Term> bufferTerms(Term startTerm, int bufferSize) throws IOException
    {
        assert bufferSize > 0;

        long start = System.currentTimeMillis();

        // Scan range of terms in this field (reversed, so we have a exit point)
        List<Row> rows = CassandraUtils.robustRead(CassandraUtils.consistency, new SliceFromReadCommand(
                CassandraUtils.keySpace, termsListKey, fieldColumnFamily, CassandraUtils.createColumnName(startTerm),
                ByteBufferUtil.EMPTY_BYTE_BUFFER, false, bufferSize));

        ColumnParent columnParent = new ColumnParent(CassandraUtils.termVecColumnFamily);

        // Collect read commands
        Collection<IColumn> columns;

        if (rows == null || rows.size() != 1 || rows.get(0).cf == null)
        {
            columns = new ArrayList<IColumn>();
        }
        else
        {
            columns = rows.get(0).cf.getSortedColumns();

            if (logger.isDebugEnabled())
                logger.debug("Found " + columns.size() + " terms under field " + startTerm.field());
        }

        Pair<Term, Term> queryRange;

        if (!columns.isEmpty())
        {
            //end of range will get filled in later
            queryRange = new Pair<Term, Term>(startTerm, null);
        }
        else
        {
            queryRange = new Pair<Term, Term>(startTerm, emptyTerm);

            termQueryBoundries.put(startTerm, queryRange);

            return queryRange;
        }

        Map<Term, Pair<Term, Term>> localRanges = new HashMap<Term, Pair<Term, Term>>(columns.size());
        localRanges.put(startTerm, queryRange);

        List<ReadCommand> reads = new ArrayList<ReadCommand>(columns.size());
        for (IColumn column : columns)
        {
            if(!column.isLive() || column instanceof DeletedColumn)
                continue;
           
            Term term = CassandraUtils.parseTerm(ByteBufferUtil.string(column.name(), CassandraUtils.UTF_8));

            localRanges.put(term, queryRange);

            ByteBuffer rowKey;
            try
            {
                rowKey = CassandraUtils.hashKeyBytes(indexName.getBytes("UTF-8"), CassandraUtils.delimeterBytes, term
                        .field().getBytes("UTF-8"), CassandraUtils.delimeterBytes, term.text().getBytes("UTF-8"));
            }
            catch (UnsupportedEncodingException e)
            {
                throw new RuntimeException("This JVM doesn't support UTF-8");
            }

            if (logger.isDebugEnabled())
                logger.debug("scanning row: " + ByteBufferUtil.string(rowKey));

            reads.add((ReadCommand) new SliceFromReadCommand(CassandraUtils.keySpace, rowKey, columnParent,
                    ByteBufferUtil.EMPTY_BYTE_BUFFER, ByteBufferUtil.EMPTY_BYTE_BUFFER, false, Integer.MAX_VALUE));
        }

        rows = CassandraUtils.robustRead(CassandraUtils.consistency, reads.toArray(new ReadCommand[] {}));

        // term to start with next time
        int actualReadSize = rows.size();

        if (logger.isDebugEnabled())
        {
            logger.debug("Found " + rows.size() + " rows in range:" + startTerm + " to " + "" + " in "
                    + (System.currentTimeMillis() - start) + "ms");

        }
       
        if (actualReadSize > 0)
        {
            for (Row row : rows)
            {

                if (row.cf == null)
                {
                    //logger.info("Encountered deleted row");
                    continue;
                }
                String key = ByteBufferUtil.string(row.key.key, CassandraUtils.UTF_8);

                // term keys look like wikipedia/body/wiki
                String termStr = key.substring(key.indexOf(CassandraUtils.delimeter)
                        + CassandraUtils.delimeter.length());
                Term term = CassandraUtils.parseTerm(termStr);

                columns = row.cf.getSortedColumns();

                if (logger.isDebugEnabled())
                    logger.debug(term + " has " + columns.size());

                // remove any deleted columns
                Collection<IColumn> columnsToRemove = null;

                for (IColumn col : columns)
                {
                    if (!col.isLive())
                    {
                        if (columnsToRemove == null)
                            columnsToRemove = new ArrayList<IColumn>();

                        if(logger.isDebugEnabled())
                            logger.debug("Removing "+col+" documents from "+term);

                       
                        columnsToRemove.add(col);
                    }

                    if (logger.isDebugEnabled())
                        logger.debug("Kept DocId " + CassandraUtils.readVInt(col.name()));
                }

                if (columnsToRemove != null)
                {                  
                    columns.removeAll(columnsToRemove);
                }

                if (!columns.isEmpty())
                {
                    if (logger.isDebugEnabled())
                        logger.debug("saving term: " + term + " with " + columns.size() + " docs");

                    termList.put(term, convertTermInfo(columns));
                   
                    //update end of range
                    if(queryRange.right == null || queryRange.right.compareTo(term) < 0)
                        queryRange.right = term;
                   
                }
                else
                {
                    if (logger.isDebugEnabled())
                        logger.debug("Skipped term: " + term);
                }
            }

            if(queryRange.right == null)
                queryRange.right = emptyTerm;
           
            // to recall we did this query
            termQueryBoundries.putAll(localRanges);
        }

        long end = System.currentTimeMillis();

        if (logger.isDebugEnabled())
        {
            logger.debug("loadTerms: " + startTerm + "(" + actualReadSize + ") took " + (end - start) + "ms");
        }

        return queryRange;
    }
}
TOP

Related Classes of lucandra.TermCache

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.