/* Copyright (c) 2012-2014 Boundless and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* https://www.eclipse.org/org/documents/edl-v10.html
*
* Contributors:
* Victor Olaya (Boundless) - initial implementation
*/
package org.locationtech.geogig.storage;
import java.io.Serializable;
import org.locationtech.geogig.api.Node;
import org.locationtech.geogig.api.ObjectId;
import org.locationtech.geogig.api.RevTree;
import com.google.common.base.Preconditions;
import com.google.common.collect.Ordering;
import com.google.common.primitives.UnsignedLong;
/**
* Implements storage order of {@link Node} based on the non cryptographic 64-bit <a
* href="http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function">FNV-1a</a>
* variation of the "Fowler/Noll/Vo" hash algorithm.
* <p>
* This class mandates in which order {@link Node nodes} are stored inside {@link RevTree trees},
* hence defining the prescribed order in which tree elements are traversed, regardless of in how
* many subtrees a given tree is split into.
* <p>
* The resulting structure where a given node (identified by its name) always falls on the same
* bucket (subtree) for a given subtree depth makes it possible to compute diffs between two trees
* very quickly, by traversing both trees in parallel and finding both bucket and node differences
* and skipping equal bucket trees, as two buckets at the same depth with the same contents will
* always hash out to the same {@link ObjectId}.
* <p>
* The FNV-1 hash for a node name is computed as in the following pseudo-code:
*
* <pre>
* <code>
* hash = FNV_offset_basis
* for each octet_of_data to be hashed
* hash = hash × FNV_prime
* hash = hash XOR octet_of_data
* return hash
* </code>
* </pre>
*
* Where {@code FNV_offset_basis} is the 64-bit literal {@code 0xcbf29ce484222325}, and
* {@code FNV_prime} is the 64-bit literal {@code 0x100000001b3}.
* <p>
* To compute the node name hash, each two-byte char in the node name produces two
* {@code octet_of_data}, in big-endian order.
* <p>
* This hash function proved to be extremely fast while maintaining a good distribution and low
* collision rate, and is widely used by the computer science industry as explained <a
* href="http://www.isthe.com/chongo/tech/comp/fnv/index.html#FNV-param">here</a> when speed is
* needed in contrast to cryptographic security.
*
* @since 0.6
*/
public final class NodePathStorageOrder extends Ordering<String> implements Serializable {
private static final long serialVersionUID = -685759544293388523L;
private static final FNV1a64bitHash hashOrder = new FNV1a64bitHash();
@Override
public int compare(String p1, String p2) {
return hashOrder.compare(p1, p2);
}
/**
* Computes the bucket index that corresponds to the given node name at the given depth.
*
* @return and Integer between zero and {@link RevTree#MAX_BUCKETS} minus one
*/
public Integer bucket(final String nodeName, final int depth) {
final int byteN = hashOrder.byteN(nodeName, depth);
Preconditions.checkState(byteN >= 0);
Preconditions.checkState(byteN < 256);
final int maxBuckets = RevTree.MAX_BUCKETS;
final int bucket = (byteN * maxBuckets) / 256;
return Integer.valueOf(bucket);
}
public UnsignedLong hashCodeLong(String name) {
UnsignedLong fnv = FNV1a64bitHash.fnv(name);
return fnv;
}
/**
* The FNV-1a hash function used as {@link Node} storage order.
*/
private static class FNV1a64bitHash implements Serializable {
private static final long serialVersionUID = -1931193743208260766L;
private static final UnsignedLong FNV64_OFFSET_BASIS = UnsignedLong
.valueOf("14695981039346656037");
private static final UnsignedLong FNV64_PRIME = UnsignedLong.valueOf("1099511628211");
public int compare(final String p1, final String p2) {
UnsignedLong hash1 = fnv(p1);
UnsignedLong hash2 = fnv(p2);
return hash1.compareTo(hash2);
}
private static UnsignedLong fnv(CharSequence chars) {
final int length = chars.length();
UnsignedLong hash = FNV64_OFFSET_BASIS;
for (int i = 0; i < length; i++) {
char c = chars.charAt(i);
byte b1 = (byte) (c >> 8);
byte b2 = (byte) c;
hash = update(hash, b1);
hash = update(hash, b2);
}
return hash;
}
private static UnsignedLong update(UnsignedLong hash, final byte octet) {
// it's ok to use the signed long value here, its a bitwise operation anyways, and its
// on the lower byte of the long value
final long longValue = hash.longValue();
final long bits = longValue ^ octet;
// System.err.println("hash : " + Long.toBinaryString(longValue));
// System.err.println("xor : " + Long.toBinaryString(bits));
// System.err.println("octet: " + Integer.toBinaryString(octet));
// convert back to unsigned long
hash = UnsignedLong.fromLongBits(bits);
// multiply by prime
hash = hash.times(FNV64_PRIME);
return hash;
}
/**
* Returns the Nth unsigned byte in the hash of {@code nodeName} where N is given by
* {@code depth}
*/
public int byteN(final String nodeName, final int depth) {
Preconditions.checkArgument(depth < 8, "depth too deep: %s", Integer.valueOf(depth));
final long longBits = fnv(nodeName).longValue();
final int displaceBits = 8 * (7 - depth);// how many bits to right shift longBits to get
// the byte N
final int byteN = ((byte) (longBits >> displaceBits)) & 0xFF;
return byteN;
}
}
}