Package xbird.util.compress

Source Code of xbird.util.compress.PFOR$CompressedSegment

/*
* @(#)$Id: codetemplate_xbird.xml 943 2006-09-13 07:03:37Z yui $
*
* Copyright 2006-2008 Makoto YUI
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Contributors:
*     Makoto YUI - initial implementation
*/
package xbird.util.compress;

import java.io.DataInput;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.Arrays;

import xbird.util.codec.BitwiseCodec;
import xbird.util.collections.CharArrayList;
import xbird.util.io.BitInputStream;
import xbird.util.io.FastByteArrayInputStream;
import xbird.util.io.FastByteArrayOutputStream;
import xbird.util.lang.ArrayUtils;

/**
*
* <DIV lang="en"></DIV>
* <DIV lang="ja"></DIV>
*
* @author Makoto YUI (yuin405+xbird@gmail.com)
* @deprecated
*/
public final class PFOR {

    private PFOR() {}

    public static final class CompressedSegment {

        // header
        int totalEntries_ = 0;
        final int bitwidth_; // at most 128
        final transient int maxcode_;
        //int codeSectionPtr, exceptionSectionPtr;

        /*
         // entry points
         byte[] entryStartList = new byte[12];
         int[] entryStartException = new int[12];        
         */

        // code section
        final transient BitwiseCodec codesBuf;
        final int[] codes;

        // exception section
        final CharArrayList exceptionList;
        int firstException_ = -1;
        transient int nextException_ = 0;

        public CompressedSegment(int bitwidth) {
            this.bitwidth_ = bitwidth;
            this.maxcode_ = 2 ^ bitwidth;
            this.codesBuf = new BitwiseCodec(4096);
            this.codes = null;
            this.exceptionList = new CharArrayList(64);
        }

        public CompressedSegment(int totalEntries, int bitwidth, int firstException, int[] codes, CharArrayList exceptionList) {
            this.totalEntries_ = totalEntries;
            this.bitwidth_ = bitwidth;
            this.maxcode_ = 2 ^ bitwidth;
            this.firstException_ = firstException;
            this.codesBuf = null;
            this.codes = codes;
            this.exceptionList = exceptionList;
        }

        public void writeTo(final DataOutputStream out) throws IOException {
            out.writeInt(totalEntries_);
            out.writeByte(bitwidth_);
            out.writeInt(firstException_);
            codesBuf.writeTo(out, true);
            int exceptions = exceptionList.size();
            out.writeShort(exceptions);
            for(int i = 0; i < exceptions; i++) {
                char c = exceptionList.get(i);
                out.writeChar(c);
            }
        }

        public static CompressedSegment readFrom(final byte[] in) throws IOException {
            FastByteArrayInputStream bis = new FastByteArrayInputStream(in);
            DataInput dis = new DataInputStream(bis);
            int totalEntries = dis.readInt();
            int bitwidth = dis.readByte();
            int firstException = dis.readInt();
            int len = dis.readInt();
            byte[] b = new byte[len];
            dis.readFully(b, 0, len);
            FastByteArrayInputStream codesIs = new FastByteArrayInputStream(b);
            BitInputStream codesBis = new BitInputStream(codesIs);
            int[] codes = new int[totalEntries];
            unpack(codesBis, bitwidth, codes, totalEntries);
            int exceptions = dis.readShort();
            CharArrayList exceptionList = new CharArrayList(exceptions);
            for(int i = 0; i < exceptions; i++) {
                char c = dis.readChar();
                exceptionList.add(c);
            }
            return new CompressedSegment(totalEntries, bitwidth, firstException, codes, exceptionList);
        }

    }

    public static byte[] compress(final char[] input) {
        int n = input.length;
        int bitwidth = estimateAdequateBitWidth(input, n);
        CompressedSegment segment = new CompressedSegment(bitwidth);
        compress(input, n, segment);
        FastByteArrayOutputStream bos = new FastByteArrayOutputStream(4096);
        DataOutputStream dos = new DataOutputStream(bos);
        try {
            segment.writeTo(dos);
            dos.flush();
        } catch (IOException e) {
            throw new IllegalStateException(e);
        }
        byte[] b = bos.toByteArray();
        return b;
    }

    public static byte[] compress(final char[] input, final int bitwidth) {
        CompressedSegment segment = new CompressedSegment(bitwidth);
        compress(input, input.length, segment);
        FastByteArrayOutputStream bos = new FastByteArrayOutputStream(4096);
        DataOutputStream dos = new DataOutputStream(bos);
        try {
            segment.writeTo(dos);
            dos.flush();
        } catch (IOException e) {
            throw new IllegalStateException(e);
        }
        byte[] b = bos.toByteArray();
        return b;
    }

    public static int compress(final char[] input, final int n, final CompressedSegment segment) {
        final int[] miss = new int[n];
        final int[] data = new int[n];
        final int maxcode = segment.maxcode_;
        int prev = segment.nextException_;

        // LOOP1: find exceptions
        int j = 0;
        for(int i = 0; i < n; i++) {
            int val = encode(input[i]);
            data[i] = val;
            miss[j] = i;
            j += (val > maxcode) ? 1 : 0; // TODO can't eliminate if-then-else control hazard
        }
        // LOOP2: create patch-list       
        final CharArrayList exceptionList = segment.exceptionList;
        if(j > 0) {
            segment.firstException_ = miss[0];
            prev = miss[j - 1]; // last-patch
            for(int i = 0; i < j; i++) {
                int cur = miss[i];
                exceptionList.add(input[cur]);
                data[prev] = cur - prev - 1; // difference of the two exception
                prev = cur;
            }
        }

        pack(segment, data, n); // bit-pack the values

        segment.totalEntries_ += n;
        segment.nextException_ = prev;
        return j; // # of exceptions
    }

    private static void pack(final CompressedSegment segment, final int[] data, final int n) {
        final BitwiseCodec codes = segment.codesBuf;
        final int nbits = segment.bitwidth_;
        for(int i = 0; i < n; i++) {
            codes.putBits(data[i], nbits);
        }
    }

    public static char[] decompressAsChars(final byte[] in) {
        final CompressedSegment segment;
        try {
            segment = CompressedSegment.readFrom(in);
        } catch (IOException e) {
            throw new IllegalStateException(e);
        }
        return decompressAsChars(segment.totalEntries_, segment);
    }

    public static char[] decompressAsChars(final int n, final CompressedSegment segment) {
        final int[] code = segment.codes;

        // LOOP1: decode regardless
        final char[] output = new char[n];
        for(int i = 0; i < n; i++) {
            output[i] = decodeChar(code[i]);
        }

        // LOOP2: patch it up
        int firstException = segment.firstException_;
        if(firstException != -1) {
            final CharArrayList exceptionList = segment.exceptionList;
            int cur = code[firstException]; // REVIEWME  exceptionList was empty
            for(int i = 0, next; cur < n; i++, cur = next) {
                output[cur] = exceptionList.get(i);
                next = cur + code[cur] + 1;
            }
            segment.nextException_ = cur - n;
        }

        return output;
    }

    //    public static char finegrainedDecompressAsChars(final int x, final CompressedSegment segment) {
    //        final int[] code = segment.codes;
    //        int entryIdx = x >> 7;
    //        int i = segment.entryStartList[entryIdx] + (x & ~127);
    //        int j = segment.entryStartException[entryIdx];
    //        while(i > x) {
    //            i += code[i];
    //            j--;
    //        }
    //        if(i == x) {
    //            return segment.exceptionList[j];
    //        } else {
    //            return decodeChar(code[x]);
    //        }
    //    }

    private static void unpack(final BitInputStream codes, final int bitwidth, final int[] code, final int n)
            throws IOException {
        for(int i = 0; i < n; i++) {
            code[i] = codes.readBits(bitwidth);
        }
    }

    private static int encode(char c) {
        return c + 32768;
    }

    private static char decodeChar(int i) {
        return (char) (i - 32768);
    }

    public final static int estimateAdequateBitWidth(final char[] v, final int s) {
        final char[] sorted = ArrayUtils.copyOf(v, s);
        Arrays.sort(sorted, 0, s);

        float minEst = Float.MAX_VALUE;
        int bitwidth = 1;
        for(int b = 1; b < 16; b++) {
            float erate = ecceptionRate(sorted, s, b);
            float nrate = 1f - erate;
            float est = (nrate * b) + (erate * 16);
            if(est < minEst) {
                minEst = est;
                bitwidth = b;
            }
        }
        return bitwidth;
    }

    private static float ecceptionRate(final char[] v, final int s, final int b) {
        int lenb = pforAnalyzeBits(v, s, b);
        return (float) (s - lenb) / s;
    }

    /**
     * calculate the longest stretch of value starts, such that
     * the difference between first and last is representable in "b" bits.
     */
    private static int pforAnalyzeBits(final char[] v, final int n, final int b) {
        assert (v != null);
        assert (n >= 0) : n;
        assert (b > 0) : b;

        int len = 0, range = 1 << b;
        for(int lo = 0, hi = 0; hi < n; hi++) {
            if(v[hi] - v[lo] >= range) {
                if(hi - lo >= len) {
                    len = hi - lo;
                }
                while(v[hi] - v[++lo] >= range)
                    ;
            }
        }
        return len + 1;
    }

}
TOP

Related Classes of xbird.util.compress.PFOR$CompressedSegment

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.