*/
final AbstractBounds<RowPosition> range = filter.dataRange.keyRange();
ByteBuffer startKey = range.left instanceof DecoratedKey ? ((DecoratedKey)range.left).getKey() : ByteBufferUtil.EMPTY_BYTE_BUFFER;
ByteBuffer endKey = range.right instanceof DecoratedKey ? ((DecoratedKey)range.right).getKey() : ByteBufferUtil.EMPTY_BYTE_BUFFER;
final CellNameType baseComparator = baseCfs.getComparator();
final CellNameType indexComparator = index.getIndexCfs().getComparator();
final Composite startPrefix = makePrefix(index, startKey, filter, true);
final Composite endPrefix = makePrefix(index, endKey, filter, false);
return new ColumnFamilyStore.AbstractScanIterator()
{
private Composite lastSeenPrefix = startPrefix;
private Deque<Cell> indexCells;
private int columnsRead = Integer.MAX_VALUE;
private int limit = filter.currentLimit();
private int columnsCount = 0;
private int meanColumns = Math.max(index.getIndexCfs().getMeanColumns(), 1);
// We shouldn't fetch only 1 row as this provides buggy paging in case the first row doesn't satisfy all clauses
private int rowsPerQuery = Math.max(Math.min(filter.maxRows(), filter.maxColumns() / meanColumns), 2);
public boolean needsFiltering()
{
return false;
}
private Row makeReturn(DecoratedKey key, ColumnFamily data)
{
if (data == null)
return endOfData();
assert key != null;
return new Row(key, data);
}
protected Row computeNext()
{
/*
* Our internal index code is wired toward internal rows. So we need to accumulate all results for a given
* row before returning from this method. Which unfortunately means that this method has to do what
* CFS.filter does for KeysIndex.
*/
DecoratedKey currentKey = null;
ColumnFamily data = null;
Composite previousPrefix = null;
while (true)
{
// Did we get more columns that needed to respect the user limit?
// (but we still need to return what has been fetched already)
if (columnsCount >= limit)
return makeReturn(currentKey, data);
if (indexCells == null || indexCells.isEmpty())
{
if (columnsRead < rowsPerQuery)
{
logger.trace("Read only {} (< {}) last page through, must be done", columnsRead, rowsPerQuery);
return makeReturn(currentKey, data);
}
if (logger.isTraceEnabled())
logger.trace("Scanning index {} starting with {}",
index.expressionString(primary), indexComparator.getString(startPrefix));
QueryFilter indexFilter = QueryFilter.getSliceFilter(indexKey,
index.getIndexCfs().name,
lastSeenPrefix,
endPrefix,
false,
rowsPerQuery,
filter.timestamp);
ColumnFamily indexRow = index.getIndexCfs().getColumnFamily(indexFilter);
if (indexRow == null || !indexRow.hasColumns())
return makeReturn(currentKey, data);
Collection<Cell> sortedCells = indexRow.getSortedColumns();
columnsRead = sortedCells.size();
indexCells = new ArrayDeque<>(sortedCells);
Cell firstCell = sortedCells.iterator().next();
// Paging is racy, so it is possible the first column of a page is not the last seen one.
if (lastSeenPrefix != startPrefix && lastSeenPrefix.equals(firstCell.name()))
{
// skip the row we already saw w/ the last page of results
indexCells.poll();
logger.trace("Skipping {}", indexComparator.getString(firstCell.name()));
}
}
while (!indexCells.isEmpty() && columnsCount <= limit)
{
Cell cell = indexCells.poll();
lastSeenPrefix = cell.name();
if (!cell.isLive(filter.timestamp))
{
logger.trace("skipping {}", cell.name());
continue;
}
CompositesIndex.IndexedEntry entry = index.decodeEntry(indexKey, cell);
DecoratedKey dk = baseCfs.partitioner.decorateKey(entry.indexedKey);
// Are we done for this row?
if (currentKey == null)
{
currentKey = dk;
}
else if (!currentKey.equals(dk))
{
DecoratedKey previousKey = currentKey;
currentKey = dk;
// We're done with the previous row, return it if it had data, continue otherwise
indexCells.addFirst(cell);
if (data == null)
continue;
else
return makeReturn(previousKey, data);
}
if (!range.contains(dk))
{
// Either we're not yet in the range cause the range is start excluding, or we're
// past it.
if (!range.right.isMinimum(baseCfs.partitioner) && range.right.compareTo(dk) < 0)
{
logger.trace("Reached end of assigned scan range");
return endOfData();
}
else
{
logger.debug("Skipping entry {} before assigned scan range", dk.getToken());
continue;
}
}
// Check if this entry cannot be a hit due to the original cell filter
Composite start = entry.indexedEntryPrefix;
if (!filter.columnFilter(dk.getKey()).maySelectPrefix(baseComparator, start))
continue;
// If we've record the previous prefix, it means we're dealing with an index on the collection value. In
// that case, we can have multiple index prefix for the same CQL3 row. In that case, we want to only add
// the CQL3 row once (because requesting the data multiple time would be inefficient but more importantly
// because we shouldn't count the columns multiple times with the lastCounted() call at the end of this
// method).
if (previousPrefix != null && previousPrefix.equals(start))
continue;
else
previousPrefix = null;
logger.trace("Adding index hit to current row for {}", indexComparator.getString(cell.name()));
// We always query the whole CQL3 row. In the case where the original filter was a name filter this might be
// slightly wasteful, but this probably doesn't matter in practice and it simplify things.
ColumnSlice dataSlice = new ColumnSlice(start, entry.indexedEntryPrefix.end());
// If the table has static columns, we must fetch them too as they may need to be returned too.