// next, the key cache (only make sense for valid row key)
if ((op == Operator.EQ || op == Operator.GE) && (key instanceof DecoratedKey))
{
DecoratedKey decoratedKey = (DecoratedKey)key;
KeyCacheKey cacheKey = new KeyCacheKey(metadata.cfId, descriptor, decoratedKey.getKey());
RowIndexEntry cachedPosition = getCachedPosition(cacheKey, updateCacheAndStats);
if (cachedPosition != null)
{
Tracing.trace("Key cache hit for sstable {}", descriptor.generation);
return cachedPosition;
}
}
// check the smallest and greatest keys in the sstable to see if it can't be present
if (first.compareTo(key) > 0 || last.compareTo(key) < 0)
{
if (op == Operator.EQ && updateCacheAndStats)
bloomFilterTracker.addFalsePositive();
if (op.apply(1) < 0)
{
Tracing.trace("Check against min and max keys allows skipping sstable {}", descriptor.generation);
return null;
}
}
int binarySearchResult = indexSummary.binarySearch(key);
long sampledPosition = getIndexScanPositionFromBinarySearchResult(binarySearchResult, indexSummary);
int sampledIndex = getIndexSummaryIndexFromBinarySearchResult(binarySearchResult);
// if we matched the -1th position, we'll start at the first position
sampledPosition = sampledPosition == -1 ? 0 : sampledPosition;
int effectiveInterval = indexSummary.getEffectiveIndexIntervalAfterIndex(sampledIndex);
// scan the on-disk index, starting at the nearest sampled position.
// The check against IndexInterval is to be exit the loop in the EQ case when the key looked for is not present
// (bloom filter false positive). But note that for non-EQ cases, we might need to check the first key of the
// next index position because the searched key can be greater the last key of the index interval checked if it
// is lesser than the first key of next interval (and in that case we must return the position of the first key
// of the next interval).
int i = 0;
Iterator<FileDataInput> segments = ifile.iterator(sampledPosition);
while (segments.hasNext() && i <= effectiveInterval)
{
FileDataInput in = segments.next();
try
{
while (!in.isEOF() && i <= effectiveInterval)
{
i++;
ByteBuffer indexKey = ByteBufferUtil.readWithShortLength(in);
boolean opSatisfied; // did we find an appropriate position for the op requested
boolean exactMatch; // is the current position an exact match for the key, suitable for caching
// Compare raw keys if possible for performance, otherwise compare decorated keys.
if (op == Operator.EQ)
{
opSatisfied = exactMatch = indexKey.equals(((DecoratedKey) key).getKey());
}
else
{
DecoratedKey indexDecoratedKey = partitioner.decorateKey(indexKey);
int comparison = indexDecoratedKey.compareTo(key);
int v = op.apply(comparison);
opSatisfied = (v == 0);
exactMatch = (comparison == 0);
if (v < 0)
{
Tracing.trace("Partition index lookup allows skipping sstable {}", descriptor.generation);
return null;
}
}
if (opSatisfied)
{
// read data position from index entry
RowIndexEntry indexEntry = rowIndexEntrySerializer.deserialize(in, descriptor.version);
if (exactMatch && updateCacheAndStats)
{
assert key instanceof DecoratedKey; // key can be == to the index key only if it's a true row key
DecoratedKey decoratedKey = (DecoratedKey)key;
if (logger.isTraceEnabled())
{
// expensive sanity check! see CASSANDRA-4687
FileDataInput fdi = dfile.getSegment(indexEntry.position);
DecoratedKey keyInDisk = partitioner.decorateKey(ByteBufferUtil.readWithShortLength(fdi));
if (!keyInDisk.equals(key))
throw new AssertionError(String.format("%s != %s in %s", keyInDisk, key, fdi.getPath()));
fdi.close();
}
// store exact match for the key
cacheKey(decoratedKey, indexEntry);
}
if (op == Operator.EQ && updateCacheAndStats)
bloomFilterTracker.addTruePositive();
Tracing.trace("Partition index with {} entries found for sstable {}", indexEntry.columnsIndex().size(), descriptor.generation);
return indexEntry;
}
RowIndexEntry.Serializer.skip(in);
}