Package org.locationtech.geogig.storage

Source Code of org.locationtech.geogig.storage.NodePathStorageOrder

/* Copyright (c) 2012-2014 Boundless and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* https://www.eclipse.org/org/documents/edl-v10.html
*
* Contributors:
* Victor Olaya (Boundless) - initial implementation
*/
package org.locationtech.geogig.storage;

import java.io.Serializable;

import org.locationtech.geogig.api.Node;
import org.locationtech.geogig.api.ObjectId;
import org.locationtech.geogig.api.RevTree;

import com.google.common.base.Preconditions;
import com.google.common.collect.Ordering;
import com.google.common.primitives.UnsignedLong;

/**
* Implements storage order of {@link Node} based on the non cryptographic 64-bit <a
* href="http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function">FNV-1a</a>
* variation of the "Fowler/Noll/Vo" hash algorithm.
* <p>
* This class mandates in which order {@link Node nodes} are stored inside {@link RevTree trees},
* hence defining the prescribed order in which tree elements are traversed, regardless of in how
* many subtrees a given tree is split into.
* <p>
* The resulting structure where a given node (identified by its name) always falls on the same
* bucket (subtree) for a given subtree depth makes it possible to compute diffs between two trees
* very quickly, by traversing both trees in parallel and finding both bucket and node differences
* and skipping equal bucket trees, as two buckets at the same depth with the same contents will
* always hash out to the same {@link ObjectId}.
* <p>
* The FNV-1 hash for a node name is computed as in the following pseudo-code:
*
* <pre>
* <code>
* hash = FNV_offset_basis
* for each octet_of_data to be hashed
*      hash = hash × FNV_prime
*      hash = hash XOR octet_of_data
* return hash
* </code>
* </pre>
*
* Where {@code FNV_offset_basis} is the 64-bit literal {@code 0xcbf29ce484222325}, and
* {@code FNV_prime} is the 64-bit literal {@code 0x100000001b3}.
* <p>
* To compute the node name hash, each two-byte char in the node name produces two
* {@code octet_of_data}, in big-endian order.
* <p>
* This hash function proved to be extremely fast while maintaining a good distribution and low
* collision rate, and is widely used by the computer science industry as explained <a
* href="http://www.isthe.com/chongo/tech/comp/fnv/index.html#FNV-param">here</a> when speed is
* needed in contrast to cryptographic security.
*
* @since 0.6
*/
public final class NodePathStorageOrder extends Ordering<String> implements Serializable {

    private static final long serialVersionUID = -685759544293388523L;

    private static final FNV1a64bitHash hashOrder = new FNV1a64bitHash();

    @Override
    public int compare(String p1, String p2) {
        return hashOrder.compare(p1, p2);
    }

    /**
     * Computes the bucket index that corresponds to the given node name at the given depth.
     *
     * @return and Integer between zero and {@link RevTree#MAX_BUCKETS} minus one
     */
    public Integer bucket(final String nodeName, final int depth) {

        final int byteN = hashOrder.byteN(nodeName, depth);
        Preconditions.checkState(byteN >= 0);
        Preconditions.checkState(byteN < 256);

        final int maxBuckets = RevTree.MAX_BUCKETS;

        final int bucket = (byteN * maxBuckets) / 256;
        return Integer.valueOf(bucket);
    }

    public UnsignedLong hashCodeLong(String name) {
        UnsignedLong fnv = FNV1a64bitHash.fnv(name);
        return fnv;
    }

    /**
     * The FNV-1a hash function used as {@link Node} storage order.
     */
    private static class FNV1a64bitHash implements Serializable {

        private static final long serialVersionUID = -1931193743208260766L;

        private static final UnsignedLong FNV64_OFFSET_BASIS = UnsignedLong
                .valueOf("14695981039346656037");

        private static final UnsignedLong FNV64_PRIME = UnsignedLong.valueOf("1099511628211");

        public int compare(final String p1, final String p2) {
            UnsignedLong hash1 = fnv(p1);
            UnsignedLong hash2 = fnv(p2);
            return hash1.compareTo(hash2);
        }

        private static UnsignedLong fnv(CharSequence chars) {
            final int length = chars.length();

            UnsignedLong hash = FNV64_OFFSET_BASIS;

            for (int i = 0; i < length; i++) {
                char c = chars.charAt(i);
                byte b1 = (byte) (c >> 8);
                byte b2 = (byte) c;
                hash = update(hash, b1);
                hash = update(hash, b2);
            }
            return hash;
        }

        private static UnsignedLong update(UnsignedLong hash, final byte octet) {
            // it's ok to use the signed long value here, its a bitwise operation anyways, and its
            // on the lower byte of the long value
            final long longValue = hash.longValue();
            final long bits = longValue ^ octet;

            // System.err.println("hash : " + Long.toBinaryString(longValue));
            // System.err.println("xor  : " + Long.toBinaryString(bits));
            // System.err.println("octet: " + Integer.toBinaryString(octet));

            // convert back to unsigned long
            hash = UnsignedLong.fromLongBits(bits);
            // multiply by prime
            hash = hash.times(FNV64_PRIME);
            return hash;
        }

        /**
         * Returns the Nth unsigned byte in the hash of {@code nodeName} where N is given by
         * {@code depth}
         */
        public int byteN(final String nodeName, final int depth) {
            Preconditions.checkArgument(depth < 8, "depth too deep: %s", Integer.valueOf(depth));

            final long longBits = fnv(nodeName).longValue();

            final int displaceBits = 8 * (7 - depth);// how many bits to right shift longBits to get
                                                     // the byte N

            final int byteN = ((byte) (longBits >> displaceBits)) & 0xFF;
            return byteN;
        }
    }
}
TOP

Related Classes of org.locationtech.geogig.storage.NodePathStorageOrder

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.