Package com.facebook.stats.cardinality

Source Code of com.facebook.stats.cardinality.ArithmeticEncoder

/*
* Copyright (C) 2012 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.stats.cardinality;

import com.facebook.stats.cardinality.Model.SymbolInfo;
import com.google.common.base.Preconditions;

import java.io.IOException;
import java.io.OutputStream;

class ArithmeticEncoder {
  private final Model model;
  private final OutputStream out;

  private long low;
  private long high = 0xFFFFFFFFFFFFL;

  private int underflowHighValue;
  private int underflowBytes;

  public ArithmeticEncoder(Model model, OutputStream out) {
    Preconditions.checkNotNull(model, "model is null");
    Preconditions.checkNotNull(out, "out is null");

    this.model = model;
    this.out = out;
  }

  public void encode(int symbol) throws IOException {
    // lookup symbol data
    SymbolInfo symbolInfo = model.getSymbolInfo(symbol);

    // adjust low and high counts
    long range = (high - low + 1) >> model.log2MaxCount();
    high = low + (range * symbolInfo.highCount()) - 1;
    low = low + range * symbolInfo.lowCount();

    // write high byte if they are equal
    while ((high & 0xFF0000000000L) == (low & 0xFF0000000000L)) {
      int value = (int) (high >>> 40);
      out.write(value);

      // write underflow bytes
      int underflowValue = (value == underflowHighValue) ? 0x00 : 0xFF;
      while (underflowBytes > 0) {
        out.write(underflowValue);
        underflowBytes--;
      }

      // remove high byte
      low <<= 8;
      high = (high << 8) | 0xFF;
    }
    low &= 0xFFFFFFFFFFFFL;
    high &= 0xFFFFFFFFFFFFL;

    // handle possible underflow
    // if top two bytes differ by only one digit
    if ((high >> 32) - (low >> 32) == 1) {
      // if second highest bytes are 0x00 on the high and 0xFF
      // on the low, we need to deal with underflow
      while ((high & 0x00FF00000000L) == 0 && (low & 0x00FF00000000L) == 0x00FF00000000L) {
        // if this is the first underflow byte remember the high value
        // so when we output later we know if we need to output 0xFF or 0x00
        if (underflowBytes == 0) {
          underflowHighValue = (int) (high >>> 40);
        }

        underflowBytes++;

        // remove second chunk of low and high (shifting over lower bits)
        low = removeUnderflowByte(low, 0x00);
        high = removeUnderflowByte(high, 0xFF);
      }
    }
  }

  public void close() throws IOException {
    // Write out the shortest value between the high and low values


    // if there are no underflow bytes...
    if (underflowBytes == 0) {
      // the high byte will be separated by more then one, so the
      // high byte plus one will be between the high and low values
      out.write((int) (low >>> 40) + 1);
    }
    // we have underflow, but if the second byte is 0xFF...
    else if ((low & 0x00FF00000000L) == 0x00FF00000000L) {
      // This is a complex case, that almost never happens
      //
      // In this case the high bytes are separated by only one, and
      // the subsequent underflow bytes on the high are 0x00 and low 0xFF.
      // The the final byte on the low is 0xFF and the high will be
      // anything other than 0x00 (since this would have been considered
      // an underflow byte).  So in decimal we have something like this:
      //   low: 3 99999 9
      //  high: 4 00000 1
      //
      // so if we simply out put the high byte of the high value, it will
      // be between the low and high.  In the example above, that would be
      // the equivalent of:
      //  value: 4
      //
      out.write((int) (high >>> 40));
    } else {
      // Slightly simpler case
      //
      // As above high bytes are separated by one, and underflow bytes
      // are 0xFF and 0x00 for the low and high respectively.  The final
      // byte on the low is anything but 0xFF and the high can be anything.
      // In decimal we have something like this:
      //    low: 3 99999 7
      //   high: 4 00000 0
      //
      // So we will need to output the high byte of the low value, the
      // underflow bytes (0xFF), and finally the second byte of the low
      // plus one, which will put the value between the low and the high.
      // In the example above, that would be the equivalent of:
      //  value: 3 99999 8

      // write the high byte of the low value
      out.write((int) (low >>> 40));

      // write the underflow bytes for the low (0xFF)
      while (underflowBytes > 0) {
        out.write(0xFF);
        underflowBytes--;
      }

      // write the second byte of the low value plus one to put it
      // between the low and high
      int secondByte = (int) ((low >>> 32) & 0xFF);
      out.write(secondByte + 1);
    }
  }

  public static long removeUnderflowByte(long value, int backFillValue) {
    long highBits = (value & 0xFF0000000000L);
    long lowBits = (value & 0x0000FFFFFFFFL) << 8;
    long newValue = highBits | lowBits | backFillValue;
    return newValue;
  }
}
TOP

Related Classes of com.facebook.stats.cardinality.ArithmeticEncoder

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.