/*
* Copyright (C) 2012 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.stats.cardinality;
import com.facebook.stats.cardinality.Model.SymbolInfo;
import com.google.common.base.Preconditions;
import java.io.IOException;
import java.io.OutputStream;
class ArithmeticEncoder {
private final Model model;
private final OutputStream out;
private long low;
private long high = 0xFFFFFFFFFFFFL;
private int underflowHighValue;
private int underflowBytes;
public ArithmeticEncoder(Model model, OutputStream out) {
Preconditions.checkNotNull(model, "model is null");
Preconditions.checkNotNull(out, "out is null");
this.model = model;
this.out = out;
}
public void encode(int symbol) throws IOException {
// lookup symbol data
SymbolInfo symbolInfo = model.getSymbolInfo(symbol);
// adjust low and high counts
long range = (high - low + 1) >> model.log2MaxCount();
high = low + (range * symbolInfo.highCount()) - 1;
low = low + range * symbolInfo.lowCount();
// write high byte if they are equal
while ((high & 0xFF0000000000L) == (low & 0xFF0000000000L)) {
int value = (int) (high >>> 40);
out.write(value);
// write underflow bytes
int underflowValue = (value == underflowHighValue) ? 0x00 : 0xFF;
while (underflowBytes > 0) {
out.write(underflowValue);
underflowBytes--;
}
// remove high byte
low <<= 8;
high = (high << 8) | 0xFF;
}
low &= 0xFFFFFFFFFFFFL;
high &= 0xFFFFFFFFFFFFL;
// handle possible underflow
// if top two bytes differ by only one digit
if ((high >> 32) - (low >> 32) == 1) {
// if second highest bytes are 0x00 on the high and 0xFF
// on the low, we need to deal with underflow
while ((high & 0x00FF00000000L) == 0 && (low & 0x00FF00000000L) == 0x00FF00000000L) {
// if this is the first underflow byte remember the high value
// so when we output later we know if we need to output 0xFF or 0x00
if (underflowBytes == 0) {
underflowHighValue = (int) (high >>> 40);
}
underflowBytes++;
// remove second chunk of low and high (shifting over lower bits)
low = removeUnderflowByte(low, 0x00);
high = removeUnderflowByte(high, 0xFF);
}
}
}
public void close() throws IOException {
// Write out the shortest value between the high and low values
// if there are no underflow bytes...
if (underflowBytes == 0) {
// the high byte will be separated by more then one, so the
// high byte plus one will be between the high and low values
out.write((int) (low >>> 40) + 1);
}
// we have underflow, but if the second byte is 0xFF...
else if ((low & 0x00FF00000000L) == 0x00FF00000000L) {
// This is a complex case, that almost never happens
//
// In this case the high bytes are separated by only one, and
// the subsequent underflow bytes on the high are 0x00 and low 0xFF.
// The the final byte on the low is 0xFF and the high will be
// anything other than 0x00 (since this would have been considered
// an underflow byte). So in decimal we have something like this:
// low: 3 99999 9
// high: 4 00000 1
//
// so if we simply out put the high byte of the high value, it will
// be between the low and high. In the example above, that would be
// the equivalent of:
// value: 4
//
out.write((int) (high >>> 40));
} else {
// Slightly simpler case
//
// As above high bytes are separated by one, and underflow bytes
// are 0xFF and 0x00 for the low and high respectively. The final
// byte on the low is anything but 0xFF and the high can be anything.
// In decimal we have something like this:
// low: 3 99999 7
// high: 4 00000 0
//
// So we will need to output the high byte of the low value, the
// underflow bytes (0xFF), and finally the second byte of the low
// plus one, which will put the value between the low and the high.
// In the example above, that would be the equivalent of:
// value: 3 99999 8
// write the high byte of the low value
out.write((int) (low >>> 40));
// write the underflow bytes for the low (0xFF)
while (underflowBytes > 0) {
out.write(0xFF);
underflowBytes--;
}
// write the second byte of the low value plus one to put it
// between the low and high
int secondByte = (int) ((low >>> 32) & 0xFF);
out.write(secondByte + 1);
}
}
public static long removeUnderflowByte(long value, int backFillValue) {
long highBits = (value & 0xFF0000000000L);
long lowBits = (value & 0x0000FFFFFFFFL) << 8;
long newValue = highBits | lowBits | backFillValue;
return newValue;
}
}