package lucandra;
import java.io.IOError;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.*;
import java.util.concurrent.ConcurrentMap;
import lucandra.cluster.CassandraIndexManager;
import lucandra.serializers.thrift.DocumentMetadata;
import lucandra.serializers.thrift.ThriftTerm;
import com.google.common.collect.MapMaker;
import org.apache.cassandra.db.*;
import org.apache.cassandra.thrift.ColumnParent;
import org.apache.cassandra.utils.ByteBufferUtil;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.document.*;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.*;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.LucandraFieldCache;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.OpenBitSet;
import solandra.SolandraFieldSelector;
public class IndexReader extends org.apache.lucene.index.IndexReader
private final static int numDocs = CassandraIndexManager.maxDocsPerShard;
private final static byte defaultNorm = Similarity.encodeNorm(1.0f);
private final static Directory mockDirectory = new RAMDirectory();
new IndexWriter(mockDirectory, new SimpleAnalyzer(), true, MaxFieldLength.LIMITED);
catch (CorruptIndexException e)
throw new RuntimeException(e);
catch (LockObtainFailedException e)
throw new RuntimeException(e);
catch (IOException e)
throw new RuntimeException(e);
private final static ThreadLocal<String> indexName = new ThreadLocal<String>();
private final static ThreadLocal<ReaderCache> activeCache = new ThreadLocal<ReaderCache>();
private final static ConcurrentMap<String, ReaderCache> globalCache = new MapMaker().makeMap();
private static final Logger logger = Logger.getLogger(IndexReader.class);
public IndexReader(String name)
public synchronized IndexReader reopen() throws CorruptIndexException, IOException
return this;
public synchronized IndexReader reopen(boolean openReadOnly) throws CorruptIndexException, IOException
return reopen();
public synchronized IndexReader reopen(IndexCommit commit) throws CorruptIndexException, IOException
return reopen();
public void clearCache()
String activeIndex = getIndexName();
if (activeIndex != null)
if(FieldCache.DEFAULT instanceof LucandraFieldCache)
public ReaderCache getCache() throws IOException
String activeIndex = getIndexName();
if (activeIndex == null)
throw new IllegalStateException();
ReaderCache cache = activeCache.get();
if (cache != null)
return cache;
cache = globalCache.get(activeIndex);
if (cache == null)
synchronized (activeIndex.intern())
cache = globalCache.get(activeIndex);
if (cache == null)
cache = new ReaderCache(activeIndex);
globalCache.put(activeIndex, cache);
return cache;
protected void doClose() throws IOException
protected void doCommit() throws IOException
protected void doDelete(int arg0) throws CorruptIndexException, IOException
protected void doSetNorm(int arg0, String arg1, byte arg2) throws CorruptIndexException, IOException
protected void doUndeleteAll() throws CorruptIndexException, IOException
public int docFreq(Term term) throws IOException
LucandraTermInfo[] docs = getCache().termCache.get(term);
if (docs != null)
return docs.length;
LucandraTermEnum termEnum = new LucandraTermEnum(this);
if (termEnum.skipTo(term) && termEnum.term().equals(term))
return termEnum.docFreq();
return 0;
public Document document(int docNum, FieldSelector selector) throws CorruptIndexException, IOException
Map<Integer, Document> documentCache = getCache().documents;
Document doc = documentCache.get(docNum);
if (doc != null)
if (logger.isDebugEnabled())
logger.debug("Found doc in cache");
return doc;
String indexName = getIndexName();
List<ByteBuffer> fieldNames = null;
Map<Integer, ByteBuffer> keyMap = new HashMap<Integer, ByteBuffer>();
keyMap.put(docNum, CassandraUtils.hashKeyBytes(indexName.getBytes("UTF-8"), CassandraUtils.delimeterBytes, Integer
// Special field selector used to carry list of other docIds to cache in
// Parallel for Solr Performance
if (selector != null && selector instanceof SolandraFieldSelector)
List<Integer> otherDocIds = ((SolandraFieldSelector) selector).getOtherDocsToCache();
fieldNames = ((SolandraFieldSelector) selector).getFieldNames();
if (logger.isDebugEnabled())
logger.debug("Going to bulk load " + otherDocIds.size() + " documents");
for (Integer otherDocNum : otherDocIds)
if (otherDocNum == docNum)
if (documentCache.containsKey(otherDocNum))
byte[] docKey = Integer.toHexString(otherDocNum).getBytes("UTF-8");
if (docKey == null)
keyMap.put(otherDocNum, CassandraUtils.hashKeyBytes(indexName.getBytes("UTF-8"),
CassandraUtils.delimeterBytes, docKey));
ColumnParent columnParent = new ColumnParent();
long start = System.currentTimeMillis();
List<Row> rows = null;
List<ReadCommand> readCommands = new ArrayList<ReadCommand>();
for (ByteBuffer key : keyMap.values())
if (fieldNames == null || fieldNames.size() == 0)
// get all columns ( except this skips meta info )
readCommands.add(new SliceFromReadCommand(CassandraUtils.keySpace, key, columnParent,
ByteBufferUtil.EMPTY_BYTE_BUFFER, CassandraUtils.finalTokenBytes, false, Integer.MAX_VALUE));
.add(new SliceByNamesReadCommand(CassandraUtils.keySpace, key, columnParent, fieldNames));
rows = CassandraUtils.robustRead(CassandraUtils.consistency, readCommands.toArray(new ReadCommand[]{}));
// allow lookup by row
Map<ByteBuffer, Row> rowMap = new HashMap<ByteBuffer, Row>(keyMap.size());
for (Row row : rows)
rowMap.put(row.key.key, row);
for (Map.Entry<Integer, ByteBuffer> key : keyMap.entrySet())
Document cacheDoc = new Document();
Row row = rowMap.get(key.getValue());
if (row == null || row.cf == null)
logger.warn("Missing document in multiget_slice for: "
+ ByteBufferUtil.string(key.getValue(), CassandraUtils.UTF_8) + " " + rowMap);
for (IColumn col : row.cf.getSortedColumns())
Field field = null;
String fieldName = ByteBufferUtil.string(col.name());
// Incase __META__ slips through
if (ByteBufferUtil.compare(col.name(), CassandraUtils.documentMetaFieldBytes.array()) == 0)
logger.warn("Filtering out __META__ key");
DocumentMetadata dm = lucandra.IndexWriter.fromBytesUsingThrift(col.value());
for(ThriftTerm term : dm.getTerms())
Fieldable f = null;
if( term.isSetLongVal() )
f = new NumericField(term.getField()).setLongValue(term.getLongVal());
else if(term.isSetDoubleVal())
f = new NumericField(term.getField()).setDoubleValue(term.getDoubleVal());
else if(term.isSetIntVal())
f = new NumericField(term.getField()).setIntValue(term.getIntVal());
else if(term.isSetFloatVal())
f = new NumericField(term.getField()).setFloatValue((float)term.getFloatVal());
else if(term.isSetIs_binary())
f = new Field(term.getField(), term.getText());
f = new Field(term.getField(), new String(term.getText()), Store.YES, Index.ANALYZED);
throw new RuntimeException("Malformed term");
// Mark the required doc
if (key.getKey().equals(docNum))
doc = cacheDoc;
// only cache complete docs
if (fieldNames == null || fieldNames.size() == 0)
documentCache.put(key.getKey(), cacheDoc);
long end = System.currentTimeMillis();
if (logger.isDebugEnabled())
logger.debug("Document read took: " + (end - start) + "ms");
return doc;
catch (Exception e)
throw new IOException(e);
public Object getCoreCacheKey()
return getCache().fieldCacheKey;
catch (IOException e)
throw new RuntimeException(e);
public void addReaderFinishedListener(ReaderFinishedListener listener)
catch(IOException e)
throw new IOError(e);
public Collection getFieldNames(FieldOption fieldOption)
return Arrays.asList(new String[] {});
public TermFreqVector getTermFreqVector(int docNum, String field) throws IOException
TermFreqVector termVector = new lucandra.TermFreqVector(getIndexName(), field, docNum);
return termVector;
public void getTermFreqVector(int arg0, TermVectorMapper arg1) throws IOException
throw new RuntimeException();
public void getTermFreqVector(int arg0, String arg1, TermVectorMapper arg2) throws IOException
throw new RuntimeException();
public TermFreqVector[] getTermFreqVectors(int arg0) throws IOException
throw new RuntimeException();
public boolean hasDeletions()
return false;
public boolean isDeleted(int arg0)
return false;
public int maxDoc()
return numDocs + 1;
public byte[] norms(String field) throws IOException
return getCache().fieldNorms.get(field);
public void norms(String arg0, byte[] arg1, int arg2) throws IOException
throw new RuntimeException("This operation is not supported");
public int numDocs()
return numDocs;
public TermDocs termDocs(Term term) throws IOException
if (term == null)
return new LucandraAllTermDocs(this);
return super.termDocs(term);
public TermDocs termDocs() throws IOException
return new LucandraTermDocs(this);
public TermPositions termPositions() throws IOException
return new LucandraTermDocs(this);
public TermEnum terms() throws IOException
return new LucandraTermEnum(this);
public TermEnum terms(Term term) throws IOException
LucandraTermEnum termEnum = new LucandraTermEnum(this);
return termEnum;
public void addDocumentNormalizations(LucandraTermInfo[] allDocs, String field, ReaderCache cache)
byte[] norms = cache.fieldNorms.get(field);
OpenBitSet docHits = cache.docHits;
for (LucandraTermInfo docInfo : allDocs)
int idx = docInfo.docId;
if (idx > numDocs)
throw new IllegalStateException("numDocs reached");
Byte norm = docInfo.norm;
if (norm == null)
norm = defaultNorm;
// Check for cached reads
if (norms != null && norms.length > idx && norms[idx] == norm)
if (norms == null)
norms = new byte[numDocs];
norms[idx] = norm;
cache.fieldNorms.put(field, norms);
public String getIndexName()
String name = indexName.get();
return name == null ? "" : name;
public void setIndexName(String name)
public Directory directory()
return mockDirectory;
public long getVersion()
return Long.MAX_VALUE;
public boolean isOptimized()
return true;
public boolean isCurrent()
return true;
public OpenBitSet getDocsHit()
return getCache().docHits;
catch (IOException e)
throw new RuntimeException(e);
protected void doCommit(Map<String, String> arg0) throws IOException
