Package org.toubassi.femtozip

Source Code of org.toubassi.femtozip.Tool

/**
*   Copyright 2011 Garrick Toubassi
*
*   Licensed under the Apache License, Version 2.0 (the "License");
*   you may not use this file except in compliance with the License.
*   You may obtain a copy of the License at
*
*       http://www.apache.org/licenses/LICENSE-2.0
*
*   Unless required by applicable law or agreed to in writing, software
*   distributed under the License is distributed on an "AS IS" BASIS,
*   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*   See the License for the specific language governing permissions and
*   limitations under the License.
*/
package org.toubassi.femtozip;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Random;

import org.toubassi.femtozip.dictionary.DictionaryOptimizer;
import org.toubassi.femtozip.models.NativeCompressionModel;
import org.toubassi.femtozip.util.FileUtil;

public class Tool  {
   
    protected enum Operation {
        BuildModel, Benchmark, Compress, Decompress
    }

    protected DecimalFormat format = new DecimalFormat("#.##");

    protected Operation operation;
   
    protected String path;
    protected String modelPath;
    protected String[] models;
    protected CompressionModel model;
    protected boolean preload;
    protected boolean verify;
    protected boolean dumpArgs;
    protected boolean useNativeModel;
    protected boolean dictOnly;
   
    protected int numSamples = Integer.MAX_VALUE;
    protected int maxDictionarySize = 0;

    protected CompressionModel buildModel(DocumentList documents) throws IOException {
        return buildModel(documents, new ArrayList<CompressionModel.ModelOptimizationResult>());
    }
   
    protected CompressionModel buildModel(DocumentList documents, ArrayList<CompressionModel.ModelOptimizationResult> results) throws IOException {
       
        long start = System.currentTimeMillis();
       
        System.out.print("Building model...");

        CompressionModel[] competingModels = null;
        if (models != null && models.length != 0) {
            competingModels = new CompressionModel[models.length];
            int i = 0;
            for (String modelName : models) {
                competingModels[i++] = CompressionModel.instantiateCompressionModel(modelName);
            }
        }
       
        model = CompressionModel.buildOptimalModel(documents, results, competingModels, true);
       
        long duration = Math.round((System.currentTimeMillis() - start)/1000d);
        System.out.println(" (" + duration + "s)");
       
        for (CompressionModel.ModelOptimizationResult result : results) {
            if (result.totalDataSize > 0) {
                System.out.println(result);
            }
        }
       
        System.out.println();
        return model;
    }
   
    protected void buildModel() throws IOException {
        File dir = new File(path);
        List<String> files = Arrays.asList(dir.list());
        Collections.shuffle(files, new Random(1234567890)); // Avoid any bias in ordering of the files
        numSamples = Math.min(numSamples, files.size());
        buildModel(new FileDocumentList(path, files));
    }

    protected void benchmarkModel(CompressionModel model, DocumentList docs, long totalDataSize[], long totalCompressedSize[]) throws IOException {
        System.out.print("Benchmarking " + model.getClass().getSimpleName() + " ");

        long compressTime = 0;
        long decompressTime = 0;
        int dataSize = 0;
        int compressedSize = 0;
        for (int i = 0, count = docs.size(); i < count; i++) {
            byte[] bytes = docs.get(i);
           
            long startCompress = System.nanoTime();
            byte[] compressed = model.compress(bytes);
            compressTime += System.nanoTime() - startCompress;

            dataSize += bytes.length;
            compressedSize += compressed.length;
           
            if (verify) {
                long startDecompress = System.nanoTime();
                byte[] decompressed = model.decompress(compressed);
                decompressTime += System.nanoTime() - startDecompress;
                if (!Arrays.equals(bytes, decompressed)) {
                    throw new RuntimeException("Compress/Decompress round trip failed for " + model.getClass().getSimpleName());
                }
            }
        }

        totalDataSize[0] += dataSize;
        totalCompressedSize[0] += compressedSize;

        decompressTime /= 1000000;
        compressTime /= 1000000;
        String ratio = format.format(100f * compressedSize / dataSize);
        System.out.println(ratio  + "% (" + compressedSize + "/" + dataSize + "  compressed: " + compressTime + "ms" + (verify ? (" decompress:" + decompressTime + "ms") : "") + ")\n");
    }
   
    protected void benchmarkModel() throws IOException {
        File dir = new File(path);
        List<String> files = Arrays.asList(dir.list());
        Collections.shuffle(files, new Random(1234567890)); // Avoid any bias in ordering of the files
        numSamples = Math.min(numSamples, files.size());
        FileDocumentList docs = new FileDocumentList(path, files.subList(0, numSamples), preload);

        long start = System.currentTimeMillis();
        long[] totalDataSizeRef = new long[1];
        long[] totalCompressedSizeRef = new long[1];
        benchmarkModel(model, docs, totalDataSizeRef, totalCompressedSizeRef);
        long totalCompressedSize = totalCompressedSizeRef[0];
        long totalDataSize = totalDataSizeRef[0];
        long duration = System.currentTimeMillis() - start;
       
        System.out.println("Summary:");
        System.out.println("Aggregate Stored Data Compression Rate: " + format.format(totalCompressedSize * 100d / totalDataSize) + "% (" + totalCompressedSize + " bytes)");
        System.out.println("Compression took " + format.format(duration / 1000f) + "s");
    }

    protected void compress(File file) throws IOException {
        System.out.println("Compressing " + file.getName());
        byte[] data = FileUtil.readFile(file);
        byte[] compressed = model.compress(data);
       
        File outputFile = new File(file.getPath() + ".fz");
        FileOutputStream out = new FileOutputStream(outputFile);
        out.write(compressed);
        out.close();
        file.delete();
    }
   
    protected void compress() throws IOException {
        File file = new File(path);
        if (file.isDirectory()) {
            File[] files = file.listFiles();
            for (File f : files) {
                compress(f);
            }
        }
        else {
            compress(file);
        }
    }

    protected void decompress(File file) throws IOException {
        System.out.println("Decompressing " + file.getName());
        byte[] compressed = FileUtil.readFile(file);
        byte[] data = model.decompress(compressed);
       
        File outputFile = new File(file.getPath().substring(0, file.getPath().length() - 3));
        FileOutputStream out = new FileOutputStream(outputFile);
        out.write(data);
        out.close();
        file.delete();
    }
   
    protected void decompress() throws IOException {
        File file = new File(path);
        if (file.isDirectory()) {
            File[] files = file.listFiles();
            for (File f : files) {
                if (f.getName().endsWith(".fz")) {
                    decompress(f);
                }
            }
        }
        else {
            decompress(file);
        }
    }
   
    protected void buildDictionary() throws IOException {
        File dir = new File(path);
        List<String> files = Arrays.asList(dir.list());
        DocumentList documents = new FileDocumentList(path, files);
        DictionaryOptimizer optimizer = new DictionaryOptimizer(documents);
        byte[] dictionary = optimizer.optimize(maxDictionarySize  > 0 ? maxDictionarySize : 64*1024);
       
        FileOutputStream fileOut = new FileOutputStream(modelPath);
        fileOut.write(dictionary);
        fileOut.close();
    }
   
    protected void loadBenchmarkModel() throws IOException {
        if (useNativeModel) {
            NativeCompressionModel nativeModel = new NativeCompressionModel();
            nativeModel.load(modelPath);
            model = nativeModel;
        }
        else {
            model = CompressionModel.loadModel(modelPath);
        }
    }
   
    protected void saveBenchmarkModel() throws IOException {
        File modelDir = new File(modelPath);
        modelDir.getParentFile().mkdirs();
       
        model.save(modelPath);
    }
   
    protected void usage() {
        System.out.println("Usage: [--build|--benchmark|--compress|--decompress] [--dictonly] [--maxdict num] --model path path");
        System.exit(1);
    }
   
    public void run(String[] args) throws IOException {
       
        for (int i = 0, count = args.length; i < count; i++) {
            String arg = args[i];
           
            if (arg.equals("--benchmark")) {
                operation = Operation.Benchmark;
            }
            else if (arg.equals("--build")) {
                operation = Operation.BuildModel;
            }
            else if (arg.equals("--compress")) {
                operation = Operation.Compress;
            }
            else if (arg.equals("--decompress")) {
                operation = Operation.Decompress;
            }
            else if (arg.equals("--dictonly")) {
                dictOnly = true;
            }
            else if (arg.equals("--numsamples")) {
                numSamples = Integer.parseInt(args[++i]);
            }
            else if (arg.equals("--model")) {
                modelPath = args[++i];
            }
            else if (arg.equals("--models")) {
                models = args[++i].split(",");
            }
            else if (arg.equals("--preload")) {
                preload = true;
            }
            else if (arg.equals("--verify")) {
                verify = true;
            }
            else if (arg.equals("--maxdict")) {
                maxDictionarySize = Integer.parseInt(args[++i]);
            }
            else if (arg.equals("--native")) {
                useNativeModel = true;
            }
            else if (arg.equals("--dumpargs")) {
                dumpArgs = true;
            }
            else {
                path = arg;
            }
        }

        if (operation == null || path == null || modelPath == null) {
            usage();
        }
       
        if (dumpArgs) {
            System.out.println("Command line arguments:");
            for (String arg : args) {
                System.out.println(arg);
            }
            System.out.println();
        }
       
        long start = System.currentTimeMillis();
       
        if (operation == Operation.BuildModel) {
            if (dictOnly) {
                buildDictionary();
            }
            else {
                buildModel();
                saveBenchmarkModel();
            }
        }       
        else if (operation == Operation.Benchmark) {
            loadBenchmarkModel();
            benchmarkModel();
        }
        else if (operation == Operation.Compress) {
            loadBenchmarkModel();
            compress();
        }
        else if (operation == Operation.Decompress) {
            loadBenchmarkModel();
            decompress();
        }
       
        long duration = System.currentTimeMillis() - start;
       
        System.out.println("Took " + format.format(duration / 1000f) + "s");
    }
   
    public static void main(String[] args) throws IOException {
        Tool tool = new Tool();
        tool.run(args);
    }
   
}
TOP

Related Classes of org.toubassi.femtozip.Tool

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.