/**
* Copyright T Jake Luciani
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package lucandra;
import java.io.IOError;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.*;
import java.util.concurrent.ConcurrentMap;
import lucandra.cluster.CassandraIndexManager;
import lucandra.serializers.thrift.DocumentMetadata;
import lucandra.serializers.thrift.ThriftTerm;
import com.google.common.collect.MapMaker;
import org.apache.cassandra.db.*;
import org.apache.cassandra.thrift.ColumnParent;
import org.apache.cassandra.utils.ByteBufferUtil;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.document.*;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.*;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.LucandraFieldCache;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.OpenBitSet;
import solandra.SolandraFieldSelector;
public class IndexReader extends org.apache.lucene.index.IndexReader
{
private final static int numDocs = CassandraIndexManager.maxDocsPerShard;
private final static byte defaultNorm = Similarity.encodeNorm(1.0f);
private final static Directory mockDirectory = new RAMDirectory();
static
{
try
{
new IndexWriter(mockDirectory, new SimpleAnalyzer(), true, MaxFieldLength.LIMITED);
}
catch (CorruptIndexException e)
{
throw new RuntimeException(e);
}
catch (LockObtainFailedException e)
{
throw new RuntimeException(e);
}
catch (IOException e)
{
throw new RuntimeException(e);
}
}
private final static ThreadLocal<String> indexName = new ThreadLocal<String>();
private final static ThreadLocal<ReaderCache> activeCache = new ThreadLocal<ReaderCache>();
private final static ConcurrentMap<String, ReaderCache> globalCache = new MapMaker().makeMap();
private static final Logger logger = Logger.getLogger(IndexReader.class);
public IndexReader(String name)
{
super();
setIndexName(name);
}
public synchronized IndexReader reopen() throws CorruptIndexException, IOException
{
clearCache();
return this;
}
@Override
public synchronized IndexReader reopen(boolean openReadOnly) throws CorruptIndexException, IOException
{
return reopen();
}
@Override
public synchronized IndexReader reopen(IndexCommit commit) throws CorruptIndexException, IOException
{
return reopen();
}
public void clearCache()
{
String activeIndex = getIndexName();
if (activeIndex != null)
{
if(FieldCache.DEFAULT instanceof LucandraFieldCache)
{
LucandraFieldCache.purgeReader.finished(this);
}
globalCache.remove(activeIndex);
}
activeCache.remove();
}
public ReaderCache getCache() throws IOException
{
String activeIndex = getIndexName();
if (activeIndex == null)
throw new IllegalStateException();
ReaderCache cache = activeCache.get();
if (cache != null)
return cache;
else
cache = globalCache.get(activeIndex);
if (cache == null)
{
synchronized (activeIndex.intern())
{
cache = globalCache.get(activeIndex);
if (cache == null)
{
cache = new ReaderCache(activeIndex);
globalCache.put(activeIndex, cache);
}
}
}
activeCache.set(cache);
return cache;
}
protected void doClose() throws IOException
{
clearCache();
}
protected void doCommit() throws IOException
{
clearCache();
}
protected void doDelete(int arg0) throws CorruptIndexException, IOException
{
}
protected void doSetNorm(int arg0, String arg1, byte arg2) throws CorruptIndexException, IOException
{
}
protected void doUndeleteAll() throws CorruptIndexException, IOException
{
}
public int docFreq(Term term) throws IOException
{
LucandraTermInfo[] docs = getCache().termCache.get(term);
if (docs != null)
return docs.length;
LucandraTermEnum termEnum = new LucandraTermEnum(this);
if (termEnum.skipTo(term) && termEnum.term().equals(term))
{
return termEnum.docFreq();
}
return 0;
}
public Document document(int docNum, FieldSelector selector) throws CorruptIndexException, IOException
{
Map<Integer, Document> documentCache = getCache().documents;
Document doc = documentCache.get(docNum);
if (doc != null)
{
if (logger.isDebugEnabled())
logger.debug("Found doc in cache");
return doc;
}
String indexName = getIndexName();
List<ByteBuffer> fieldNames = null;
Map<Integer, ByteBuffer> keyMap = new HashMap<Integer, ByteBuffer>();
keyMap.put(docNum, CassandraUtils.hashKeyBytes(indexName.getBytes("UTF-8"), CassandraUtils.delimeterBytes, Integer
.toHexString(docNum).getBytes("UTF-8")));
// Special field selector used to carry list of other docIds to cache in
// Parallel for Solr Performance
if (selector != null && selector instanceof SolandraFieldSelector)
{
List<Integer> otherDocIds = ((SolandraFieldSelector) selector).getOtherDocsToCache();
fieldNames = ((SolandraFieldSelector) selector).getFieldNames();
if (logger.isDebugEnabled())
logger.debug("Going to bulk load " + otherDocIds.size() + " documents");
for (Integer otherDocNum : otherDocIds)
{
if (otherDocNum == docNum)
continue;
if (documentCache.containsKey(otherDocNum))
continue;
byte[] docKey = Integer.toHexString(otherDocNum).getBytes("UTF-8");
if (docKey == null)
continue;
keyMap.put(otherDocNum, CassandraUtils.hashKeyBytes(indexName.getBytes("UTF-8"),
CassandraUtils.delimeterBytes, docKey));
}
}
ColumnParent columnParent = new ColumnParent();
columnParent.setColumn_family(CassandraUtils.docColumnFamily);
long start = System.currentTimeMillis();
try
{
List<Row> rows = null;
List<ReadCommand> readCommands = new ArrayList<ReadCommand>();
for (ByteBuffer key : keyMap.values())
{
if (fieldNames == null || fieldNames.size() == 0)
{
// get all columns ( except this skips meta info )
readCommands.add(new SliceFromReadCommand(CassandraUtils.keySpace, key, columnParent,
ByteBufferUtil.EMPTY_BYTE_BUFFER, CassandraUtils.finalTokenBytes, false, Integer.MAX_VALUE));
}
else
{
readCommands
.add(new SliceByNamesReadCommand(CassandraUtils.keySpace, key, columnParent, fieldNames));
}
}
rows = CassandraUtils.robustRead(CassandraUtils.consistency, readCommands.toArray(new ReadCommand[]{}));
// allow lookup by row
Map<ByteBuffer, Row> rowMap = new HashMap<ByteBuffer, Row>(keyMap.size());
for (Row row : rows)
{
rowMap.put(row.key.key, row);
}
for (Map.Entry<Integer, ByteBuffer> key : keyMap.entrySet())
{
Document cacheDoc = new Document();
Row row = rowMap.get(key.getValue());
if (row == null || row.cf == null)
{
logger.warn("Missing document in multiget_slice for: "
+ ByteBufferUtil.string(key.getValue(), CassandraUtils.UTF_8) + " " + rowMap);
}
else
{
for (IColumn col : row.cf.getSortedColumns())
{
Field field = null;
String fieldName = ByteBufferUtil.string(col.name());
// Incase __META__ slips through
if (ByteBufferUtil.compare(col.name(), CassandraUtils.documentMetaFieldBytes.array()) == 0)
{
logger.warn("Filtering out __META__ key");
continue;
}
DocumentMetadata dm = lucandra.IndexWriter.fromBytesUsingThrift(col.value());
for(ThriftTerm term : dm.getTerms())
{
Fieldable f = null;
if( term.isSetLongVal() )
{
f = new NumericField(term.getField()).setLongValue(term.getLongVal());
}
else if(term.isSetDoubleVal())
{
f = new NumericField(term.getField()).setDoubleValue(term.getDoubleVal());
}
else if(term.isSetIntVal())
{
f = new NumericField(term.getField()).setIntValue(term.getIntVal());
}
else if(term.isSetFloatVal())
{
f = new NumericField(term.getField()).setFloatValue((float)term.getFloatVal());
}
else if(term.isSetIs_binary())
{
if(term.is_binary)
f = new Field(term.getField(), term.getText());
else
f = new Field(term.getField(), new String(term.getText()), Store.YES, Index.ANALYZED);
}
else
throw new RuntimeException("Malformed term");
cacheDoc.add(f);
}
}
}
// Mark the required doc
if (key.getKey().equals(docNum))
doc = cacheDoc;
// only cache complete docs
if (fieldNames == null || fieldNames.size() == 0)
documentCache.put(key.getKey(), cacheDoc);
}
long end = System.currentTimeMillis();
if (logger.isDebugEnabled())
logger.debug("Document read took: " + (end - start) + "ms");
return doc;
}
catch (Exception e)
{
throw new IOException(e);
}
}
@Override
public Object getCoreCacheKey()
{
try
{
return getCache().fieldCacheKey;
}
catch (IOException e)
{
throw new RuntimeException(e);
}
}
public void addReaderFinishedListener(ReaderFinishedListener listener)
{
try
{
getCache().readerFinishedListeners.add(listener);
}
catch(IOException e)
{
throw new IOError(e);
}
}
@Override
public Collection getFieldNames(FieldOption fieldOption)
{
return Arrays.asList(new String[] {});
}
@Override
public TermFreqVector getTermFreqVector(int docNum, String field) throws IOException
{
TermFreqVector termVector = new lucandra.TermFreqVector(getIndexName(), field, docNum);
return termVector;
}
@Override
public void getTermFreqVector(int arg0, TermVectorMapper arg1) throws IOException
{
throw new RuntimeException();
}
@Override
public void getTermFreqVector(int arg0, String arg1, TermVectorMapper arg2) throws IOException
{
throw new RuntimeException();
}
@Override
public TermFreqVector[] getTermFreqVectors(int arg0) throws IOException
{
throw new RuntimeException();
}
@Override
public boolean hasDeletions()
{
return false;
}
@Override
public boolean isDeleted(int arg0)
{
return false;
}
@Override
public int maxDoc()
{
return numDocs + 1;
}
@Override
public byte[] norms(String field) throws IOException
{
return getCache().fieldNorms.get(field);
}
@Override
public void norms(String arg0, byte[] arg1, int arg2) throws IOException
{
throw new RuntimeException("This operation is not supported");
}
@Override
public int numDocs()
{
return numDocs;
}
@Override
public TermDocs termDocs(Term term) throws IOException
{
if (term == null)
return new LucandraAllTermDocs(this);
return super.termDocs(term);
}
@Override
public TermDocs termDocs() throws IOException
{
return new LucandraTermDocs(this);
}
@Override
public TermPositions termPositions() throws IOException
{
return new LucandraTermDocs(this);
}
@Override
public TermEnum terms() throws IOException
{
return new LucandraTermEnum(this);
}
@Override
public TermEnum terms(Term term) throws IOException
{
LucandraTermEnum termEnum = new LucandraTermEnum(this);
termEnum.skipTo(term);
return termEnum;
}
public void addDocumentNormalizations(LucandraTermInfo[] allDocs, String field, ReaderCache cache)
{
byte[] norms = cache.fieldNorms.get(field);
OpenBitSet docHits = cache.docHits;
for (LucandraTermInfo docInfo : allDocs)
{
int idx = docInfo.docId;
if (idx > numDocs)
throw new IllegalStateException("numDocs reached");
Byte norm = docInfo.norm;
if (norm == null)
norm = defaultNorm;
// Check for cached reads
if (norms != null && norms.length > idx && norms[idx] == norm)
continue;
docHits.fastSet(idx);
if (norms == null)
norms = new byte[numDocs];
norms[idx] = norm;
}
cache.fieldNorms.put(field, norms);
}
public String getIndexName()
{
String name = indexName.get();
return name == null ? "" : name;
}
public void setIndexName(String name)
{
activeCache.remove();
indexName.set(name);
}
@Override
public Directory directory()
{
return mockDirectory;
}
@Override
public long getVersion()
{
return Long.MAX_VALUE;
}
@Override
public boolean isOptimized()
{
return true;
}
@Override
public boolean isCurrent()
{
return true;
}
public OpenBitSet getDocsHit()
{
try
{
return getCache().docHits;
}
catch (IOException e)
{
throw new RuntimeException(e);
}
}
protected void doCommit(Map<String, String> arg0) throws IOException
{
// TODO Auto-generated method stub
}
}