/*
* Copyright 2010 Outerthought bvba
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.lilyproject.hbaseindex;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import com.gotometrics.orderly.RowKey;
import com.gotometrics.orderly.StructBuilder;
import com.gotometrics.orderly.StructRowKey;
import com.gotometrics.orderly.Termination;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.BinaryPrefixComparator;
import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.RowFilter;
import org.apache.hadoop.hbase.filter.WhileMatchFilter;
import org.lilyproject.hbaseindex.filter.IndexFilterHbaseImpl;
import org.lilyproject.util.ArgumentValidator;
import org.lilyproject.util.ByteArrayKey;
/**
* Allows to query an index, and add entries to it or remove entries from it.
*
* <p>An Index instance can be obtained from {@link IndexManager#getIndex(String)}.
*/
public class Index {
private HTableInterface htable;
private IndexDefinition definition;
private static final byte[] DUMMY_QUALIFIER = new byte[]{0};
private static final byte[] DUMMY_VALUE = new byte[]{0};
protected Index(HTableInterface htable, IndexDefinition definition) {
this.htable = htable;
this.definition = definition;
}
public IndexDefinition getDefinition() {
return definition;
}
/**
* Adds an entry to this index. See {@link IndexEntry} for more information.
*
* @param entry the values to be part of the index key, should correspond to the fields
* defined in the {@link IndexDefinition}
*/
public void addEntry(IndexEntry entry) throws IOException {
ArgumentValidator.notNull(entry, "entry");
entry.validate();
Put put = createAddEntryPut(entry);
htable.put(put);
}
/**
* Adds multiple entries to the index. Uses one HBase put so is more efficient
* than adding each entry individually.
*/
public void addEntries(List<IndexEntry> entries) throws IOException {
List<Put> puts = new ArrayList<Put>();
for (IndexEntry entry : entries) {
entry.validate();
Put put = createAddEntryPut(entry);
puts.add(put);
}
htable.put(puts);
}
private Put createAddEntryPut(IndexEntry entry) throws IOException {
byte[] indexKey = buildRowKey(entry);
Put put = new Put(indexKey);
Map<ByteArrayKey, byte[]> data = entry.getData();
if (data.size() > 0) {
for (Map.Entry<ByteArrayKey, byte[]> item : data.entrySet()) {
put.add(IndexDefinition.DATA_FAMILY, item.getKey().getKey(), item.getValue());
}
} else {
// HBase does not allow to create a row without columns, so add a dummy column
put.add(IndexDefinition.DATA_FAMILY, DUMMY_QUALIFIER, DUMMY_VALUE);
}
return put;
}
/**
* Removes an entry from the index. The contents of the supplied
* entry and the identifier should exactly match those supplied
* when creating the index entry.
*/
public void removeEntry(IndexEntry entry) throws IOException {
ArgumentValidator.notNull(entry, "entry");
entry.validate();
byte[] indexKey = buildRowKey(entry);
Delete delete = new Delete(indexKey);
htable.delete(delete);
}
public void removeEntries(List<IndexEntry> entries) throws IOException {
ArgumentValidator.notNull(entries, "entries");
List<Delete> deletes = new ArrayList<Delete>();
for (IndexEntry entry : entries) {
entry.validate();
byte[] indexKey = buildRowKey(entry);
Delete delete = new Delete(indexKey);
deletes.add(delete);
}
htable.delete(deletes);
}
/**
* Build the index row key.
*
* <p>The format is as follows:
*
* <pre>
* ([encoded value][terminator for variable length fields])*[identifier]
* </pre>
*/
private byte[] buildRowKey(IndexEntry entry) throws IOException {
final StructRowKey indexEntryRowKeySerializer = definition.asStructRowKey();
return indexEntryRowKeySerializer.serialize(entry.getFieldValuesInSerializationOrder());
}
public QueryResult performQuery(Query query) throws IOException {
validateQuery(query);
final StructBuilder fromKeyStructBuilder = new StructBuilder();
final StructBuilder toKeyStructBuilder = new StructBuilder();
// Construct from and to keys
final List<Object> fromKeyComponents = new ArrayList<Object>(definition.getFields().size());
byte[] fromKey = null;
byte[] toKey = null;
final Query.RangeCondition rangeCond = query.getRangeCondition();
boolean rangeCondSet = false;
int usedConditionsCount = 0;
int definedFieldsIndex = 0;
// loop through all defined index fields, and see if they occur in the query
for (; definedFieldsIndex < definition.getFields().size(); definedFieldsIndex++) {
final IndexFieldDefinition fieldDef = definition.getFields().get(definedFieldsIndex);
final Query.EqualsCondition eqCond = query.getCondition(fieldDef.getName());
if (eqCond != null) {
// there is an equality condition for this field
checkQueryValueType(fieldDef, eqCond.getValue());
final RowKey key = fieldDef.asRowKey();
key.setTermination(Termination.MUST);
fromKeyStructBuilder.add(key);
fromKeyComponents.add(eqCond.getValue());
usedConditionsCount++;
} else if (rangeCond != null) {
// no equality condition for this field, but there is a range condition
if (!rangeCond.getName().equals(fieldDef.getName())) {
throw new MalformedQueryException("Query defines range condition on field " + rangeCond.getName() +
" but has no equals condition on field " + fieldDef.getName() +
" which comes earlier in the index definition.");
}
final List<Object> toKeyComponents = new ArrayList<Object>(fromKeyComponents.size() + 1);
toKeyComponents.addAll(fromKeyComponents);
for (RowKey rowKey : fromKeyStructBuilder.getFields()) {
toKeyStructBuilder.add(rowKey);
}
final Object fromValue = query.getRangeCondition().getFromValue();
final Object toValue = query.getRangeCondition().getToValue();
if (fromValue == Query.MIN_VALUE) {
// just leave of the value, a shorter key is smaller than anything else
} else {
checkQueryValueType(fieldDef, fromValue);
fromKeyComponents.add(fromValue);
fromKeyStructBuilder.add(fieldDef.asRowKeyWithoutTermination());
}
if (toValue == Query.MAX_VALUE) {
// Searching to max value is equal to a prefix search (assumes always exclusive interval,
// since max value is bigger than anything else)
// So, append nothing to the search key.
} else {
checkQueryValueType(fieldDef, toValue);
toKeyComponents.add(toValue);
toKeyStructBuilder.add(fieldDef.asRowKeyWithoutTermination());
}
final StructRowKey frk = fromKeyStructBuilder.toRowKey();
fromKey = frk.serialize(fromKeyComponents.toArray());
final StructRowKey trk = toKeyStructBuilder.toRowKey();
toKey = trk.serialize(toKeyComponents.toArray());
rangeCondSet = true;
usedConditionsCount++;
break;
} else {
// we're done
break;
}
}
// Check if we have used all conditions defined in the query
if (definedFieldsIndex < definition.getFields().size() &&
usedConditionsCount < query.getEqConditions().size() + (rangeCond != null ? 1 : 0)) {
StringBuilder message = new StringBuilder();
message.append("The query contains conditions on fields which either did not follow immediately on ");
message.append(
"the previous equals condition or followed after a range condition on a field. The fields are: ");
for (; definedFieldsIndex < definition.getFields().size(); definedFieldsIndex++) {
IndexFieldDefinition fieldDef = definition.getFields().get(definedFieldsIndex);
if (query.getCondition(fieldDef.getName()) != null) {
message.append(fieldDef.getName());
} else if (rangeCond != null && rangeCond.getName().equals(fieldDef.getName())) {
message.append(fieldDef.getName());
}
message.append(" ");
}
throw new MalformedQueryException(message.toString());
}
if (!rangeCondSet) {
// Construct fromKey/toKey for the case there were only equals conditions
final StructRowKey rk = fromKeyStructBuilder.toRowKey();
rk.setTermination(Termination.MUST);
fromKey = rk.serialize(fromKeyComponents.toArray());
toKey = fromKey;
}
Scan scan = new Scan(fromKey);
// Query.MAX_VALUE is a value which should be larger than anything, so cannot be an inclusive upper bound
// The importance of this is because for Query.MAX_VALUE, we do a prefix scan so the operator should be
// CompareOp.LESS_OR_EQUAL
boolean upperBoundInclusive =
rangeCond != null && (rangeCond.isUpperBoundInclusive() || rangeCond.getToValue() == Query.MAX_VALUE);
CompareOp op = rangeCondSet && !upperBoundInclusive ? CompareOp.LESS : CompareOp.LESS_OR_EQUAL;
Filter toFilter = new RowFilter(op, new BinaryPrefixComparator(toKey));
FilterList filters = new FilterList(FilterList.Operator.MUST_PASS_ALL);
if (query.getIndexFilter() != null) {
filters.addFilter(new IndexFilterHbaseImpl(query.getIndexFilter(), definition));
}
if (rangeCondSet && !rangeCond.isLowerBoundInclusive()) {
// TODO: optimize the performance hit caused by the extra filter
// Once the greater filter on the fromKey returns true, it will remain true because
// row keys are sorted. The RowFilter will however keep doing the check again and again
// on each new row key. We need a new filter in HBase, something like the opposite of the
// WhileMatchFilter.
filters.addFilter(new RowFilter(CompareOp.GREATER, new BinaryPrefixComparator(fromKey)));
filters.addFilter(new WhileMatchFilter(toFilter));
} else {
filters.addFilter(new WhileMatchFilter(toFilter));
}
scan.setFilter(filters);
scan.setCaching(30);
return new ScannerQueryResult(htable.getScanner(scan), definition);
}
/**
* Validates that all fields used in the query actually exist in the index definition.
*
* TODO: shouldn't we also validate that the requested sort order corresponds with the indexed order etc?
*
* @param query query to validate
*/
private void validateQuery(Query query) {
for (Query.EqualsCondition eqCond : query.getEqConditions()) {
if (definition.getField(eqCond.getName()) == null) {
throw new MalformedQueryException(
String.format("The query refers to a field which does not exist in this index: %1$s",
eqCond.getName()));
}
}
if (query.getRangeCondition() != null && definition.getField(query.getRangeCondition().getName()) == null) {
throw new MalformedQueryException(
String.format("The query refers to a field which does not exist in this index: %1$s",
query.getRangeCondition().getName()));
}
}
private void checkQueryValueType(IndexFieldDefinition fieldDef, Object value) {
if (value != null) {
final RowKey rowKey = fieldDef.asRowKey();
if (!rowKey.getDeserializedClass().isAssignableFrom(value.getClass())) {
throw new MalformedQueryException("query for field " + fieldDef.getName() + " contains" +
" a value of an incorrect type. Expected: " + rowKey.getDeserializedClass() +
", found: " + value.getClass().getName());
}
}
}
}