Package org.lilyproject.indexer.derefmap

Source Code of org.lilyproject.indexer.derefmap.DerefMapHbaseImpl

/*
* Copyright 2012 NGDATA nv
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.lilyproject.indexer.derefmap;

import java.io.IOException;
import java.util.Collection;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

import com.google.common.collect.Sets;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.util.Bytes;
import org.lilyproject.hbaseindex.Index;
import org.lilyproject.hbaseindex.IndexDefinition;
import org.lilyproject.hbaseindex.IndexEntry;
import org.lilyproject.hbaseindex.IndexManager;
import org.lilyproject.hbaseindex.IndexNotFoundException;
import org.lilyproject.hbaseindex.Query;
import org.lilyproject.hbaseindex.QueryResult;
import org.lilyproject.repository.api.AbsoluteRecordId;
import org.lilyproject.repository.api.IdGenerator;
import org.lilyproject.repository.api.RecordId;
import org.lilyproject.repository.api.SchemaId;
import org.lilyproject.util.hbase.HBaseTableFactory;
import org.lilyproject.util.io.Closer;

/**
*
*/
public class DerefMapHbaseImpl implements DerefMap {

    private static final byte[] DEPENDENCIES_KEY = Bytes.toBytes("dependencies");

    private static final byte[] FIELDS_KEY = Bytes.toBytes("fields");

    private static final byte[] DUMMY_IDENTIFIER = new byte[]{0};

    private Index forwardDerefIndex;

    private Index backwardDerefIndex;

    private DerefMapSerializationUtil serializationUtil;

    /**
     * Private constructor. Clients should use static factory methods {@link #delete(String,
     * org.apache.hadoop.conf.Configuration)} and {@link #create(String, Configuration, HBaseTableFactory,
     * IdGenerator)}
     */
    private DerefMapHbaseImpl(final String owningRepoName, final String indexName, final Configuration hbaseConfiguration,
                              final HBaseTableFactory tableFactory, final IdGenerator idGenerator)
            throws IndexNotFoundException, IOException, InterruptedException {

        this.serializationUtil = new DerefMapSerializationUtil(idGenerator);

        final IndexManager indexManager = new IndexManager(hbaseConfiguration, tableFactory);

        IndexDefinition forwardIndexDef = new IndexDefinition(forwardIndexName(indexName));
        // For the record ID we use a variable length byte array field of which the first two bytes are fixed length
        // The first byte is actually the record identifier byte.
        // The second byte really is the first byte of the record id. We put this in the fixed length part
        // (safely because a record id should at least be a single byte long) because this prevents BCD encoding
        // on the first byte, thus making it easier to configure table splitting based on the original input.
        forwardIndexDef.addVariableLengthByteField("dependant_recordid", 2);
        forwardIndexDef.addByteField("dependant_vtag", DerefMapSerializationUtil.SCHEMA_ID_BYTE_LENGTH);
        forwardDerefIndex = indexManager.getIndex(owningRepoName, forwardIndexDef);

        IndexDefinition backwardIndexDef = new IndexDefinition(backwardIndexName(indexName));
        // Same remark as in the forward index.
        backwardIndexDef.addVariableLengthByteField("dependency_masterrecordid", 2);
        backwardIndexDef.addByteField("dependant_vtag", DerefMapSerializationUtil.SCHEMA_ID_BYTE_LENGTH);
        backwardIndexDef.addVariableLengthByteField("variant_properties_pattern");
        backwardDerefIndex = indexManager.getIndex(owningRepoName, backwardIndexDef);
    }

    /**
     * Create a DerefMap for a given index. If this is the first time the DerefMap is constructed for this index,
     * the forward and backward index tables will be created.
     *
     * @param indexName          name of the index
     * @param hbaseConfiguration hbase configuration
     * @param idGenerator        id generator
     * @throws IndexNotFoundException
     * @throws IOException
     * @throws InterruptedException
     */
    public static DerefMap create(final String owningRepoName, final String indexName, final Configuration hbaseConfiguration,
                                  final HBaseTableFactory tableFactory, final IdGenerator idGenerator)
            throws IndexNotFoundException, IOException, InterruptedException {
        return new DerefMapHbaseImpl(owningRepoName, indexName, hbaseConfiguration, tableFactory, idGenerator);
    }

    /**
     * Delete a DerefMap. This will delete the corresponding hbase tables.
     *
     * @param indexName          name of the index to delete
     * @param hbaseConfiguration hbase configuration
     * @throws IOException
     * @throws IndexNotFoundException if the index doesn't exist (maybe it was already deleted?)
     */
    public static void delete(final String indexName, final Configuration hbaseConfiguration)
            throws IOException, IndexNotFoundException {
        final IndexManager manager = new IndexManager(hbaseConfiguration);
        manager.deleteIndex(forwardIndexName(indexName));
        manager.deleteIndex(backwardIndexName(indexName));
    }

    public static String forwardIndexName(String indexName) {
        return "deref-forward-" + indexName;
    }

    public static String backwardIndexName(String indexName) {
        return "deref-backward-" + indexName;
    }

    @Override
    public void updateDependants(AbsoluteRecordId parentRecordId, SchemaId parentVtagId,
                                   Map<DependencyEntry, Set<SchemaId>> newDependantEntries)
            throws IOException {

        final Set<DependencyEntry> existingEntries = findDependencies(parentRecordId, parentVtagId);

        // Figure out what changed
        final Set<DependencyEntry> removedDependencies =
                figureOutRemovedDependencies(newDependantEntries.keySet(), existingEntries);
        final Collection<DependencyEntry> addedDependencies =
                figureOutAddedDependencies(newDependantEntries.keySet(), existingEntries);

        // IMPORTANT implementation note: the order in which changes are applied is not arbitrary. It is such that if
        // the process would fail in between, there will never be left any state in the backward index which would not
        // be found via the forward index.

        // delete removed from bwd index
        for (DependencyEntry removed : removedDependencies) {
            final IndexEntry backwardEntry =
                    createBackwardEntry(removed.getDependency(), parentRecordId, parentVtagId, null,
                            removed.getMoreDimensionedVariants());
            backwardDerefIndex.removeEntry(backwardEntry);
        }

        // update fwd index (added and removed at the same time, it is a single row)
        final IndexEntry fwdEntry =
                createForwardEntry(parentRecordId, parentVtagId, newDependantEntries.keySet());
        forwardDerefIndex.addEntry(fwdEntry);

        // add added to bwd idx
        for (DependencyEntry added : addedDependencies) {
            final Set<SchemaId> fields = newDependantEntries.get(added);
            final IndexEntry backwardEntry =
                    createBackwardEntry(added.getDependency(), parentRecordId, parentVtagId, fields,
                            added.getMoreDimensionedVariants());
            backwardDerefIndex.addEntry(backwardEntry);
        }
    }

    private Set<DependencyEntry> figureOutRemovedDependencies(Collection<DependencyEntry> newDependencies,
                                                              Set<DependencyEntry> existingDependencies) {
        final Set<DependencyEntry> removed = new HashSet<DependencyEntry>();

        // add all existing
        removed.addAll(existingDependencies);

        // remove all new
        removed.removeAll(newDependencies);

        return removed;
    }

    private Collection<DependencyEntry> figureOutAddedDependencies(Set<DependencyEntry> newDependencyEntries,
                                                                   Set<DependencyEntry> existingDependencies) {
        final Set<DependencyEntry> added = new HashSet<DependencyEntry>();

        // add all new
        added.addAll(newDependencyEntries);

        // remove all existing
        added.removeAll(existingDependencies);

        return added;
    }

    private IndexEntry createForwardEntry(AbsoluteRecordId parentRecordId, SchemaId parentVtagId,
                                          Collection<DependencyEntry> newDependencies) throws IOException {
        final IndexEntry fwdEntry = new IndexEntry(forwardDerefIndex.getDefinition());
        fwdEntry.addField("dependant_recordid", parentRecordId.toBytes());
        fwdEntry.addField("dependant_vtag", parentVtagId.getBytes());

        // we do not really use the identifier... all we are interested in is in the data of the entry
        fwdEntry.setIdentifier(DUMMY_IDENTIFIER);

        // the data contains the dependencies of the dependant (master record ids and vtags)
        fwdEntry.addData(DEPENDENCIES_KEY, this.serializationUtil.serializeDependenciesForward(newDependencies));

        return fwdEntry;
    }

    private IndexEntry createBackwardEntry(AbsoluteRecordId parentRecordId, AbsoluteRecordId dependantRecordId, SchemaId dependantVtagId,
                                           Set<SchemaId> fields, Set<String> moreDimensionedVariantProperties)
            throws IOException {
        final byte[] serializedVariantPropertiesPattern = this.serializationUtil.serializeVariantPropertiesPattern(
                this.serializationUtil.createVariantPropertiesPattern(parentRecordId.getRecordId().getVariantProperties(),
                        moreDimensionedVariantProperties));


        final IndexEntry bwdEntry = new IndexEntry(backwardDerefIndex.getDefinition());
        bwdEntry.addField("dependency_masterrecordid", parentRecordId.getRecordId().getMaster().toBytes());
        bwdEntry.addField("dependant_vtag", dependantVtagId.getBytes());
        bwdEntry.addField("variant_properties_pattern", serializedVariantPropertiesPattern);

        // the identifier is the dependant which depends on the dependency
        bwdEntry.setIdentifier(dependantRecordId.toBytes());

        // the fields which the dependant uses of the dependency (null if used for deleting the entry)
        if (fields != null) {
            bwdEntry.addData(FIELDS_KEY, this.serializationUtil.serializeFields(fields));
        }

        return bwdEntry;
    }

    /**
     * Find the set of record ids (and corresponding version tags) on which a given record (in a given version tag)
     * depends.
     *
     * @param parentRecordId record id of the record to find dependencies for
     * @param vtag     vtag of the record to find dependencies for
     * @return the record ids and vtags on which the given record depends
     */
    Set<DependencyEntry> findDependencies(AbsoluteRecordId parentRecordId, SchemaId vtag) throws IOException {
        final Query query = new Query();
        query.addEqualsCondition("dependant_recordid", parentRecordId.toBytes());
        query.addEqualsCondition("dependant_vtag", vtag.getBytes());

        final Set<DependencyEntry> result;

        final QueryResult queryResult = forwardDerefIndex.performQuery(query);
        if (queryResult.next() != null) {
            final byte[] serializedEntries = queryResult.getData(DEPENDENCIES_KEY);
            result = this.serializationUtil.deserializeDependenciesForward(serializedEntries);

            if (queryResult.next() != null) {
                throw new IllegalStateException(
                        "Expected only a single matching entry in " + forwardDerefIndex.getDefinition().getName());
            }

        } else {
            result = new HashSet<DependencyEntry>();
        }

        // Not closed in finally block: avoid HBase contact when there could be connection problems.
        Closer.close(queryResult);

        return result;
    }

    @Override
    public DependantRecordIdsIterator findDependantsOf(AbsoluteRecordId parentRecordId, Set<SchemaId> fields,
                                                       SchemaId vtag) throws IOException {

        final RecordId master = parentRecordId.getRecordId().getMaster();

        final Query query = new Query();
        query.addEqualsCondition("dependency_masterrecordid", master.toBytes());
        if (vtag != null) {
            query.addEqualsCondition("dependant_vtag", vtag.getBytes());
        }

        query.setIndexFilter(new DerefMapIndexFilter(parentRecordId.getRecordId().getVariantProperties(), fields));

        return new DependantRecordIdsIteratorImpl(backwardDerefIndex.performQuery(query), this.serializationUtil);
    }

    @Override
    public DependantRecordIdsIterator findDependantsOf(AbsoluteRecordId parentRecordId, SchemaId field,
                                                       SchemaId vtag) throws IOException {
        return findDependantsOf(parentRecordId, field == null ? null : Sets.newHashSet(field), vtag);
    }

    @Override
    public DependantRecordIdsIterator findDependantsOf(AbsoluteRecordId parentRecordId) throws IOException {
        return findDependantsOf(parentRecordId, (Set<SchemaId>) null, null);
    }

}
TOP

Related Classes of org.lilyproject.indexer.derefmap.DerefMapHbaseImpl

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.