Source Code of org.lilyproject.hbaseindex.Index

/*
 * Copyright 2010 Outerthought bvba
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.lilyproject.hbaseindex;


import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;


import com.gotometrics.orderly.RowKey;
import com.gotometrics.orderly.StructBuilder;
import com.gotometrics.orderly.StructRowKey;
import com.gotometrics.orderly.Termination;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.BinaryPrefixComparator;
import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.RowFilter;
import org.apache.hadoop.hbase.filter.WhileMatchFilter;
import org.lilyproject.hbaseindex.filter.IndexFilterHbaseImpl;
import org.lilyproject.util.ArgumentValidator;
import org.lilyproject.util.ByteArrayKey;


/**
 * Allows to query an index, and add entries to it or remove entries from it.
 *
 * <p>An Index instance can be obtained from {@link IndexManager#getIndex(String)}.
 */
public class Index {
    private HTableInterface htable;
    private IndexDefinition definition;


    private static final byte[] DUMMY_QUALIFIER = new byte[]{0};
    private static final byte[] DUMMY_VALUE = new byte[]{0};


    protected Index(HTableInterface htable, IndexDefinition definition) {
        this.htable = htable;
        this.definition = definition;
    }


    public IndexDefinition getDefinition() {
        return definition;
    }


    /**
     * Adds an entry to this index. See {@link IndexEntry} for more information.
     *
     * @param entry the values to be part of the index key, should correspond to the fields
     *              defined in the {@link IndexDefinition}
     */
    public void addEntry(IndexEntry entry) throws IOException {
        ArgumentValidator.notNull(entry, "entry");
        entry.validate();


        Put put = createAddEntryPut(entry);
        htable.put(put);
    }


    /**
     * Adds multiple entries to the index. Uses one HBase put so is more efficient
     * than adding each entry individually.
     */
    public void addEntries(List<IndexEntry> entries) throws IOException {
        List<Put> puts = new ArrayList<Put>();


        for (IndexEntry entry : entries) {
            entry.validate();
            Put put = createAddEntryPut(entry);
            puts.add(put);
        }


        htable.put(puts);
    }


    private Put createAddEntryPut(IndexEntry entry) throws IOException {
        byte[] indexKey = buildRowKey(entry);
        Put put = new Put(indexKey);


        Map<ByteArrayKey, byte[]> data = entry.getData();
        if (data.size() > 0) {
            for (Map.Entry<ByteArrayKey, byte[]> item : data.entrySet()) {
                put.add(IndexDefinition.DATA_FAMILY, item.getKey().getKey(), item.getValue());
            }
        } else {
            // HBase does not allow to create a row without columns, so add a dummy column
            put.add(IndexDefinition.DATA_FAMILY, DUMMY_QUALIFIER, DUMMY_VALUE);
        }
        return put;
    }


    /**
     * Removes an entry from the index. The contents of the supplied
     * entry and the identifier should exactly match those supplied
     * when creating the index entry.
     */
    public void removeEntry(IndexEntry entry) throws IOException {
        ArgumentValidator.notNull(entry, "entry");
        entry.validate();


        byte[] indexKey = buildRowKey(entry);
        Delete delete = new Delete(indexKey);
        htable.delete(delete);
    }


    public void removeEntries(List<IndexEntry> entries) throws IOException {
        ArgumentValidator.notNull(entries, "entries");


        List<Delete> deletes = new ArrayList<Delete>();
        for (IndexEntry entry : entries) {
            entry.validate();


            byte[] indexKey = buildRowKey(entry);
            Delete delete = new Delete(indexKey);
            deletes.add(delete);
        }


        htable.delete(deletes);
    }


    /**
     * Build the index row key.
     *
     * <p>The format is as follows:
     *
     * <pre>
     * ([encoded value][terminator for variable length fields])*[identifier]
     * </pre>
     */
    private byte[] buildRowKey(IndexEntry entry) throws IOException {
        final StructRowKey indexEntryRowKeySerializer = definition.asStructRowKey();


        return indexEntryRowKeySerializer.serialize(entry.getFieldValuesInSerializationOrder());
    }


    public QueryResult performQuery(Query query) throws IOException {
        validateQuery(query);


        final StructBuilder fromKeyStructBuilder = new StructBuilder();
        final StructBuilder toKeyStructBuilder = new StructBuilder();


        // Construct from and to keys


        final List<Object> fromKeyComponents = new ArrayList<Object>(definition.getFields().size());
        byte[] fromKey = null;
        byte[] toKey = null;


        final Query.RangeCondition rangeCond = query.getRangeCondition();
        boolean rangeCondSet = false;
        int usedConditionsCount = 0;
        int definedFieldsIndex = 0;


        // loop through all defined index fields, and see if they occur in the query
        for (; definedFieldsIndex < definition.getFields().size(); definedFieldsIndex++) {
            final IndexFieldDefinition fieldDef = definition.getFields().get(definedFieldsIndex);


            final Query.EqualsCondition eqCond = query.getCondition(fieldDef.getName());
            if (eqCond != null) {
                // there is an equality condition for this field
                checkQueryValueType(fieldDef, eqCond.getValue());
                final RowKey key = fieldDef.asRowKey();
                key.setTermination(Termination.MUST);
                fromKeyStructBuilder.add(key);
                fromKeyComponents.add(eqCond.getValue());
                usedConditionsCount++;
            } else if (rangeCond != null) {
                // no equality condition for this field, but there is a range condition
                if (!rangeCond.getName().equals(fieldDef.getName())) {
                    throw new MalformedQueryException("Query defines range condition on field " + rangeCond.getName() +
                            " but has no equals condition on field " + fieldDef.getName() +
                            " which comes earlier in the index definition.");
                }


                final List<Object> toKeyComponents = new ArrayList<Object>(fromKeyComponents.size() + 1);
                toKeyComponents.addAll(fromKeyComponents);
                for (RowKey rowKey : fromKeyStructBuilder.getFields()) {
                    toKeyStructBuilder.add(rowKey);
                }


                final Object fromValue = query.getRangeCondition().getFromValue();
                final Object toValue = query.getRangeCondition().getToValue();


                if (fromValue == Query.MIN_VALUE) {
                    // just leave of the value, a shorter key is smaller than anything else
                } else {
                    checkQueryValueType(fieldDef, fromValue);
                    fromKeyComponents.add(fromValue);
                    fromKeyStructBuilder.add(fieldDef.asRowKeyWithoutTermination());
                }


                if (toValue == Query.MAX_VALUE) {
                    // Searching to max value is equal to a prefix search (assumes always exclusive interval,
                    // since max value is bigger than anything else)
                    // So, append nothing to the search key.
                } else {
                    checkQueryValueType(fieldDef, toValue);
                    toKeyComponents.add(toValue);
                    toKeyStructBuilder.add(fieldDef.asRowKeyWithoutTermination());
                }


                final StructRowKey frk = fromKeyStructBuilder.toRowKey();
                fromKey = frk.serialize(fromKeyComponents.toArray());
                final StructRowKey trk = toKeyStructBuilder.toRowKey();
                toKey = trk.serialize(toKeyComponents.toArray());


                rangeCondSet = true;
                usedConditionsCount++;


                break;
            } else {
                // we're done
                break;
            }
        }


        // Check if we have used all conditions defined in the query
        if (definedFieldsIndex < definition.getFields().size() &&
                usedConditionsCount < query.getEqConditions().size() + (rangeCond != null ? 1 : 0)) {
            StringBuilder message = new StringBuilder();
            message.append("The query contains conditions on fields which either did not follow immediately on ");
            message.append(
                    "the previous equals condition or followed after a range condition on a field. The fields are: ");
            for (; definedFieldsIndex < definition.getFields().size(); definedFieldsIndex++) {
                IndexFieldDefinition fieldDef = definition.getFields().get(definedFieldsIndex);
                if (query.getCondition(fieldDef.getName()) != null) {
                    message.append(fieldDef.getName());
                } else if (rangeCond != null && rangeCond.getName().equals(fieldDef.getName())) {
                    message.append(fieldDef.getName());
                }
                message.append(" ");
            }
            throw new MalformedQueryException(message.toString());
        }


        if (!rangeCondSet) {
            // Construct fromKey/toKey for the case there were only equals conditions
            final StructRowKey rk = fromKeyStructBuilder.toRowKey();
            rk.setTermination(Termination.MUST);
            fromKey = rk.serialize(fromKeyComponents.toArray());
            toKey = fromKey;
        }


        Scan scan = new Scan(fromKey);


        // Query.MAX_VALUE is a value which should be larger than anything, so cannot be an inclusive upper bound
        // The importance of this is because for Query.MAX_VALUE, we do a prefix scan so the operator should be
        // CompareOp.LESS_OR_EQUAL
        boolean upperBoundInclusive =
                rangeCond != null && (rangeCond.isUpperBoundInclusive() || rangeCond.getToValue() == Query.MAX_VALUE);
        CompareOp op = rangeCondSet && !upperBoundInclusive ? CompareOp.LESS : CompareOp.LESS_OR_EQUAL;
        Filter toFilter = new RowFilter(op, new BinaryPrefixComparator(toKey));


        FilterList filters = new FilterList(FilterList.Operator.MUST_PASS_ALL);
        if (query.getIndexFilter() != null) {
            filters.addFilter(new IndexFilterHbaseImpl(query.getIndexFilter(), definition));
        }


        if (rangeCondSet && !rangeCond.isLowerBoundInclusive()) {
            // TODO: optimize the performance hit caused by the extra filter
            //  Once the greater filter on the fromKey returns true, it will remain true because
            //  row keys are sorted. The RowFilter will however keep doing the check again and again
            //  on each new row key. We need a new filter in HBase, something like the opposite of the
            //  WhileMatchFilter.
            filters.addFilter(new RowFilter(CompareOp.GREATER, new BinaryPrefixComparator(fromKey)));
            filters.addFilter(new WhileMatchFilter(toFilter));
        } else {
            filters.addFilter(new WhileMatchFilter(toFilter));
        }


        scan.setFilter(filters);
        scan.setCaching(30);


        return new ScannerQueryResult(htable.getScanner(scan), definition);
    }


    /**
     * Validates that all fields used in the query actually exist in the index definition.
     *
     * TODO: shouldn't we also validate that the requested sort order corresponds with the indexed order etc?
     *
     * @param query query to validate
     */
    private void validateQuery(Query query) {
        for (Query.EqualsCondition eqCond : query.getEqConditions()) {
            if (definition.getField(eqCond.getName()) == null) {
                throw new MalformedQueryException(
                        String.format("The query refers to a field which does not exist in this index: %1$s",
                                eqCond.getName()));
            }
        }
        if (query.getRangeCondition() != null && definition.getField(query.getRangeCondition().getName()) == null) {
            throw new MalformedQueryException(
                    String.format("The query refers to a field which does not exist in this index: %1$s",
                            query.getRangeCondition().getName()));
        }
    }


    private void checkQueryValueType(IndexFieldDefinition fieldDef, Object value) {
        if (value != null) {
            final RowKey rowKey = fieldDef.asRowKey();
            if (!rowKey.getDeserializedClass().isAssignableFrom(value.getClass())) {
                throw new MalformedQueryException("query for field " + fieldDef.getName() + " contains" +
                        " a value of an incorrect type. Expected: " + rowKey.getDeserializedClass() +
                        ", found: " + value.getClass().getName());
            }
        }
    }


}
Source Code of org.lilyproject.hbaseindex.Index

Related Classes of org.lilyproject.hbaseindex.Index