Source Code of com.atolsystems.atolutilities.MultiPartStore$OutputFile

/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */


package com.atolsystems.atolutilities;


import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.RandomAccessFile;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.codec.binary.Base64InputStream;
import org.apache.commons.codec.binary.Base64OutputStream;


/**
 * Class to store data into multiple files. When reading back, the class is able to figure out
 * what is the order of the files and which files contains the same data stream.
 *
 * File format:
 * header, data chunk
 *
 * Header format:
 * encoding (1 byte): either "B" or "T". If "T", remaining part of the header is decoded using base64
 * fileHash (32 bytes): SHA-256 hash of the original file
 * fileSize (4 bytes)
 * dataOffset (4 bytes): the offset of the data chunk in the original file
 * dataSize (4 bytes): data chunk size
 * dataHash (32 bytes): SHA-256 hash of the data in this file
 * 
 * multi bytes integers are written high byte first. Hashes are written low byte first.
 *
 * @author seb
 */
public class MultiPartStore {
    final static byte BASE64_ENCODING = 'T';//written in base64
    final static byte BINARY_ENCODING = 'B';
    final static int FILEHASH_OFFSET = 1;
    final static int FILEHASH_SIZE = 32;
    final static int FILESIZE_OFFSET = FILEHASH_OFFSET + FILEHASH_SIZE;
    final static int FILESIZE_SIZE = 4;
    final static int DATAOFFSET_OFFSET = FILESIZE_OFFSET + FILESIZE_SIZE;
    final static int DATAOFFSET_SIZE = 4;
    final static int DATASIZE_OFFSET = DATAOFFSET_OFFSET + DATAOFFSET_SIZE;
    final static int DATASIZE_SIZE = 4;
    final static int DATAHASH_OFFSET = DATASIZE_OFFSET + DATASIZE_SIZE;
    final static int DATAHASH_SIZE = 32;
    final static int HEADER_SIZE = DATAHASH_OFFSET + DATAHASH_SIZE;//size of the header when binary encoding is used
    final static int HEADER_ENCODED_DATA_CNT = (HEADER_SIZE-1);//number of data encoded using binary OR base64 in header
    final static int HEADER64_B64PAD_CNT = (3-(HEADER_ENCODED_DATA_CNT%3))%3;
    final static int HEADER64_SIZE = 1+((HEADER_ENCODED_DATA_CNT+HEADER64_B64PAD_CNT)/3)*4;//size of the header when base64 encoding is used
    //final static int HEADER64_SIZE = (1+HEADER_SIZE-1)*4;//size of the header when binary encoding is used


    /*static{
        System.out.println(HEADER_ENCODED_DATA_CNT);
        System.out.println(HEADER64_B64PAD_CNT);   
    }*/
    static int getHeaderSize(boolean useBase64){
        return useBase64 ? HEADER64_SIZE : HEADER_SIZE;
    }
    static int getHeaderDataCnt(){
        return HEADER_ENCODED_DATA_CNT;
    }
    static int getHeaderPadCnt(boolean useBase64){
        return useBase64 ? HEADER64_B64PAD_CNT : 0;
    }
    static int getHeaderDataAndPadCnt(boolean useBase64){
        return getHeaderDataCnt()+getHeaderPadCnt(useBase64);
    }
    static public class Header implements Comparable{
        final boolean base64;
        final byte []fileHash;
        final int fileSize;
        final int dataOffset;
        final int dataSize;
        final byte [] dataHash;
        final File file;


        public Header(boolean base64, byte[] fileHash, int fileSize, int dataOffset, int dataSize, byte[] dataHash, File file) {
            this.base64 = base64;
            this.fileHash = fileHash;
            this.fileSize = fileSize;
            this.dataOffset = dataOffset;
            this.dataSize = dataSize;
            this.dataHash = dataHash;
            this.file = file;
        }


        public boolean isBase64() {
            return base64;
        }


        public byte[] getDataHash() {
            return dataHash;
        }


        public int getDataOffset() {
            return dataOffset;
        }


        public int getDataSize() {
            return dataSize;
        }


        public File getFile() {
            return file;
        }


        public byte[] getFileHash() {
            return fileHash;
        }


        public int getFileSize() {
            return fileSize;
        }


        public int compareTo(Object o) {
            Header other=(Header)o;
            if(!Arrays.equals(fileHash,other.fileHash)){
                String comparison = "\n"+AStringUtilities.bytesToHex(fileHash)+"\n"+AStringUtilities.bytesToHex(other.fileHash);
                throw new RuntimeException("Illegal comparision: the headers are not related to the same file (fileHash mismatch):"+comparison);
            }
            if(dataOffset!=other.dataOffset) return dataOffset-other.dataOffset;
            if(this.equals(o)) return 0;
            throw new RuntimeException("Internal state corrpution: two headers are not equal but have same fileHash and same dataOffset");
        }


        @Override
        public boolean equals(Object obj) {
            if (obj == null) {
                return false;
            }
            if (getClass() != obj.getClass()) {
                return false;
            }
            final Header other = (Header) obj;
            if (this.base64 != other.base64) {
                return false;
            }
            if (!Arrays.equals(this.fileHash, other.fileHash)) {
                return false;
            }
            if (this.fileSize != other.fileSize) {
                return false;
            }
            if (this.dataOffset != other.dataOffset) {
                return false;
            }
            if (this.dataSize != other.dataSize) {
                return false;
            }
            if (!Arrays.equals(this.dataHash, other.dataHash)) {
                return false;
            }
            if (this.file != other.file && (this.file == null || !this.file.equals(other.file))) {
                return false;
            }
            return true;
        }


        @Override
        public int hashCode() {
            int hash = 7;
            hash = 41 * hash + (this.base64 ? 1 : 0);
            hash = 41 * hash + Arrays.hashCode(this.fileHash);
            hash = 41 * hash + this.fileSize;
            hash = 41 * hash + this.dataOffset;
            hash = 41 * hash + this.dataSize;
            hash = 41 * hash + Arrays.hashCode(this.dataHash);
            hash = 41 * hash + (this.file != null ? this.file.hashCode() : 0);
            return hash;
        }


        public long getHeaderSize() {
            return MultiPartStore.getHeaderSize(this.base64);//this.base64 ? HEADER64_SIZE : HEADER_SIZE;
        }


        
    }


    /**
     * Split the content of the input file into several files
     * @param baseTarget set the base name for output files
     * @param input the file to store in multiple files
     * @param chunkSize the maximum size of each output file
     * @param base64 if true, the file headers are encoded in base64 (does not apply to data from input stream)
     * @return the number of output files (which is the value of the <code>lastChunkIndex</code> field which is in each output file)
     */
    static public int write(File baseTarget, File input, int chunkSize, boolean useBase64) throws FileNotFoundException, IOException{
        return write(baseTarget, new FileInputStream(input), input.length(), chunkSize, useBase64);
    }
    
    static final class OutputFile{
        final File file;
        RandomAccessFile raf;
        RandomAccessFileOutputStream rafOutputStream;
        OutputStream outputStream;
        final long headerPos;
        final long dataPos;
        final boolean base64;
        long dataCnt=0;
        public OutputFile(File file, boolean useBase64) throws FileNotFoundException, IOException {
            this.file = file;
            this.base64=useBase64;
            raf = new RandomAccessFile(file, "rws");
            raf.setLength(0);
            rafOutputStream = new RandomAccessFileOutputStream(raf);
            if(useBase64){
                raf.write(BASE64_ENCODING);
                //outputStream=new Base64OutputStream(rafOutputStream);
                outputStream=new Base64OutputStream(rafOutputStream, true, 0, null);
                //outputStream.write(BASE64_ENCODING);
            } else {
                raf.write(BINARY_ENCODING);
                outputStream = rafOutputStream;
            }
            outputStream.flush();
            headerPos=getFilePointer();
            writeHeaderPlaceHolder();
            dataPos=getFilePointer();
            //outputStream.close();
        }
        private void writeHeaderPlaceHolder() throws IOException{
            byte []header = new byte[MultiPartStore.getHeaderDataAndPadCnt(base64)];
            outputStream.write(header);//write header place holder (all zeroes)
            outputStream.flush();
        }
        //public void writeHeaderAndClose(InputStream headerStream) throws IOException{
        public void writeHeaderAndClose(byte []header) throws IOException{
            //close();
            /*close();
            raf = new RandomAccessFile(file, "rws");
            rafOutputStream = new RandomAccessFileOutputStream(raf);
            if(base64){
                outputStream=new Base64OutputStream(rafOutputStream);
            } else {
                outputStream = rafOutputStream;
            }*/
            if(base64){//pad manually
                /*int toPad=(3 - ((int) dataCnt) % 3)%3;
                for(int i=0;i<toPad;i++){
                    rafOutputStream.write('=');
                }
                rafOutputStream.flush();*/
            }


            if(header.length!=MultiPartStore.getHeaderDataAndPadCnt(base64)) throw new RuntimeException();
            seek(headerPos);
            outputStream.write(header);
            close();
        }


        public void write(byte[] b, int off, int len) throws IOException {
            outputStream.write(b, off, len);
            dataCnt+=len;
        }


        /*public void write(byte[] b) throws IOException {
            outputStream.write(b);
            dataCnt+=b.length;
        }*/


        /*public void write(int b) throws IOException {
            outputStream.write(b);
        }*/


        public void flush() throws IOException {
            outputStream.flush();
        }


        public void close() throws IOException {
            outputStream.close();
        }


        private void seek(long pos) throws IOException {
            outputStream.flush();
            raf.seek(pos);
        }


        private long getFilePointer() throws IOException {
            outputStream.flush();
            return raf.getFilePointer();
        }


        
    }
    /**
     * Split the content of the input stream into several files
     * @param baseTarget set the base name for output files
     * @param input the input stream to store
     * @param inputLength the size of the data to store, in bytes
     * @param chunkSize the maximum size of each output file
     * @param base64 if true, the file headers are encoded in base64 (does not apply to data from input stream)
     * @return the number of output files
     */
    static public int write(File baseTarget, InputStream input, long inputLength, int chunkSize, boolean useBase64) throws IOException{
        final int headerSize=getHeaderSize(useBase64);
        final int dataChunkSize; 
        if(useBase64){
            final int maxDataChunkSize=chunkSize-headerSize;
            final int rejected=maxDataChunkSize%3;
            dataChunkSize=maxDataChunkSize-rejected;
        }else
            dataChunkSize=chunkSize-headerSize;
        //System.out.println("dataChunkSize="+dataChunkSize);
            
        if(dataChunkSize<=0) throw new IllegalArgumentException("chunkSize is smaller or equal than the header size.");


        final int lastChunkIndex=(inputLength % dataChunkSize == 0) ? (int) (inputLength / dataChunkSize)-1 : (int) (inputLength / dataChunkSize);
        if(lastChunkIndex>=65536) throw new IllegalArgumentException("chunkSize is so small that more than 65536 files would be generated. This is not supported.");


        MessageDigest inputDigest;
        try {
            inputDigest = MessageDigest.getInstance("SHA-256");
        } catch (NoSuchAlgorithmException ex) {
            throw new RuntimeException(ex);
        }


        //RandomAccessFile []outputFiles = new RandomAccessFile [lastChunkIndex+1];
        OutputFile []outputFiles = new OutputFile [lastChunkIndex+1];


        MessageDigest []chunkDigests = new MessageDigest[lastChunkIndex+1];
        int []offsets = new int [lastChunkIndex+1];


        byte[] dataBytes = new byte[4096];
        int cumulatedSize=0;
        //open and write data to chunk files
        for(int chunkIndex=0;chunkIndex<=lastChunkIndex;chunkIndex++){
            File outputFile=new File(baseTarget.getCanonicalPath()+chunkIndex);
            outputFiles[chunkIndex] = new OutputFile(outputFile,useBase64);
            try {
                chunkDigests[chunkIndex] = MessageDigest.getInstance("SHA-256");
            } catch (NoSuchAlgorithmException ex) {
                throw new RuntimeException(ex);
            }
            
            offsets[chunkIndex]=cumulatedSize;
            final int dataSize=(int) ((chunkIndex == lastChunkIndex) ? inputLength - cumulatedSize : dataChunkSize);
            cumulatedSize+=dataSize;
            int remaining=dataSize;
            while(remaining!=0){
                int toRead=Math.min(remaining, dataBytes.length);
                int nRead = input.read(dataBytes, 0, toRead);
                if(-1==nRead){
                    throw new EOFException("End of stream reached but at least "+remaining+" additional bytes were expected.");
                }
                outputFiles[chunkIndex].write(dataBytes, 0, nRead);
                chunkDigests[chunkIndex].update(dataBytes, 0, nRead);
                //System.out.println("digest.update:"+AStringUtilities.bytesToHex(dataBytes, 0, nRead));
                inputDigest.update(dataBytes, 0, nRead);
                remaining-=nRead;
            }
            if(useBase64 && (chunkIndex == lastChunkIndex)){//in base64, last file is padded to modulo 3
                final int padCnt=(3-(dataSize%3))%3;
                if(padCnt>0){
                    byte []padding=new byte[padCnt];
                    outputFiles[chunkIndex].write(padding,0,padCnt);
                    chunkDigests[chunkIndex].update(padding,0,padCnt);
                    //System.out.println("digest.update:"+AStringUtilities.bytesToHex(padding,0,padCnt));
                    inputDigest.update(padding,0,padCnt);
                }
            }
        }
        //get hash of original file
        byte[] fileHash = inputDigest.digest();
        //System.out.println("fileHash="+AStringUtilities.bytesToHex(fileHash));
        if(FILEHASH_SIZE!=fileHash.length) throw new RuntimeException("32!=fileHash.length. fileHash.length="+fileHash.length);
        if(inputLength!=cumulatedSize) throw new RuntimeException("inputLength!=cumulatedSize: inputLength="+inputLength+", cumulatedSize="+cumulatedSize);
        cumulatedSize=0;
        //write header of chunk files and close them
        for(int chunkIndex=0;chunkIndex<=lastChunkIndex;chunkIndex++){
            int thisChunkDataSize=(int) ((chunkIndex != lastChunkIndex) ? dataChunkSize : inputLength - cumulatedSize);
            cumulatedSize+=thisChunkDataSize;
            byte []header = new byte[MultiPartStore.getHeaderDataAndPadCnt(useBase64)];
            System.arraycopy(fileHash, 0, header, 0, fileHash.length);
            AArrayUtilities.int2Bytes((int)inputLength, header, FILESIZE_OFFSET-1);
            AArrayUtilities.int2Bytes(offsets[chunkIndex], header, DATAOFFSET_OFFSET-1);
            AArrayUtilities.int2Bytes(thisChunkDataSize, header, DATASIZE_OFFSET-1);         
            //compute data hash
            byte[] dataHash = chunkDigests[chunkIndex].digest();
            //System.out.println(AStringUtilities.bytesToHex(dataHash));
            System.arraycopy(dataHash, 0, header, DATAHASH_OFFSET-1, DATAHASH_SIZE);
            outputFiles[chunkIndex].writeHeaderAndClose(header);
        }
        return lastChunkIndex+1;
    }


    static class ReconstructedFile{
        SortedSet<Header> srcHeaders;
        File file;
        ReconstructedFile(Header firstHeader){
            srcHeaders=new TreeSet<Header>();
            srcHeaders.add(firstHeader);
        }
        void add(Header header){
            srcHeaders.add(header);
        }
    }


    static public Set<File> read(File inputFolder, File outputFolder, String postFix) throws FileNotFoundException, IOException{
        return read(inputFolder.listFiles(), outputFolder, postFix);
    }
    static public Set<File> read(File []inputFiles, File outputFolder, String postFix) throws FileNotFoundException, IOException{
        Set<Header> headers=new HashSet<Header>();
        //find valid chunk files
        for(File f:inputFiles){
            //System.err.print(f.getAbsolutePath());
            Header header=getHeader(f);
            if(null!=header){
                headers.add(header);
                //System.err.print(" --> header valid");
            }
            //System.err.println();
        }
        HashMap<Integer,ReconstructedFile> map=new HashMap<Integer,ReconstructedFile>();
        
        //identify the files to reconstruct
        for(Header header:headers){
            byte []hash=header.getFileHash();
            ReconstructedFile file=map.get(Arrays.hashCode(hash));
            if(null!=file){
                file.add(header);
            }else{
                file=new ReconstructedFile(header);
                map.put(Arrays.hashCode(hash), file);
            }
        }


        //reconstruct the files
        for(ReconstructedFile file:map.values()){
            file.file=reconstructFile(file.srcHeaders, outputFolder, postFix);
        }


        Set<File> out=new HashSet<File>();
        for(ReconstructedFile file:map.values())
            out.add(file.file);
        return out;
    }


    static File reconstructFile(SortedSet<Header> headers, File outputFolder, String postFix) throws FileNotFoundException, IOException{
        FileInputStream fis=null;
        String fileName=headers.first().getFile().getName()+postFix;
        File out=new File(outputFolder,fileName);
        RandomAccessFile raf=new RandomAccessFile(out, "rw");
        try{
            raf.setLength(headers.first().getFileSize());


            for(Header header:headers){
                File in=header.getFile();
                fis=new FileInputStream(in);
                //int nRead;
                final int dataBytesBufferSize = 4096;
                byte []dataBytes=new byte[dataBytesBufferSize];
                try{
                    InputStream inputStream;
                    {
                        BufferedInputStream bis = new BufferedInputStream(fis);
                        inputStream = header.isBase64() ? new Base64InputStream(bis): bis;
                    }
                    try{
                        long nSkip;
                        long toSkip=header.getHeaderSize();
                        while((nSkip=fis.skip(toSkip))!=toSkip){
                            if(-1==nSkip) throw new RuntimeException("Unexcepted end of file in chunk file "+in.getCanonicalPath());
                            toSkip-=nSkip;
                        }
                        raf.seek(header.dataOffset);
                        int nRead;
                        int remaining=header.getDataSize();
                        while((remaining>0) && (nRead=inputStream.read(dataBytes))!=-1){
                            if(remaining<nRead) nRead=remaining;
                            remaining-=nRead;
                            raf.write(dataBytes, 0, nRead);
                        }
                    }
                    finally{
                        inputStream.close();
                    }
                }finally{
                    fis.close();
                }
            }
        }finally{
            raf.close();
        }
        return out;
    }


    static public int getHeaderSize(byte firstByte){
        if(firstByte==BASE64_ENCODING){
            return HEADER64_SIZE;
        }else if(firstByte==BINARY_ENCODING){
            return HEADER_SIZE;
        }else{
            throw new RuntimeException(AStringUtilities.byteToHex(firstByte)+" is not a valid first byte");
        }
    }


    static public int getChunkSize(byte headerBytes[]){
        InputStream in = new ByteArrayInputStream(headerBytes);
        Header header=null;
        try {
            header = getHeader(in, null);
        } catch (IOException ex) {
            throw new RuntimeException(ex);
        }
        return header.getDataSize();
    }


    /**
     * Read a file and if the file is a valid chunk file, return the header
     * @param f File to read
     * @return the header of f or null if f is a not chunk file
     */
    static public Header getHeader(File f) throws FileNotFoundException, IOException{
        if(f.isDirectory()) return null;
 InputStream in=new FileInputStream(f);
        return getHeader(in,f);
    }
 /**
     * Read a file and if the file is a valid chunk file, return the header
     * @param in InputStream to read
     * @return the header of f or null if f is a not chunk file
     */
    static private Header getHeader(InputStream inputStream, File f) throws IOException{
        InputStream in64=null;
        //InputStream inputStream;
        try{
            int firstByte=inputStream.read();
            boolean base64;
            if(firstByte==BASE64_ENCODING){
                in64=new Base64InputStream(inputStream);//base64 decode
                inputStream=in64;
                base64=true;
            }else if(firstByte==BINARY_ENCODING){
                //inputStream=in;
                base64=false;
            }else{
                return null;
            }


            byte []fileHash=new byte[FILEHASH_SIZE];
            for(int i=0;i<FILEHASH_SIZE;i++)
                fileHash[i]=(byte) inputStream.read();
            //System.out.println("fileHash="+AStringUtilities.bytesToHex(fileHash));


            int fileSize=0;
            for(int i=0;i<FILESIZE_SIZE;i++)
                fileSize=(fileSize<<8)+ (0xFF & inputStream.read());


            int dataOffset=0;
            for(int i=0;i<DATAOFFSET_SIZE;i++)
                dataOffset=(dataOffset<<8)+ (0xFF & inputStream.read());


            int dataChunkSize=0;
            for(int i=0;i<DATASIZE_SIZE;i++)
                dataChunkSize=(dataChunkSize<<8)+ (0xFF & inputStream.read());
            
            byte []dataHash=new byte[DATAHASH_SIZE];
            for(int i=0;i<DATAHASH_SIZE;i++)
                dataHash[i]=(byte) inputStream.read();


            if(base64){
                for(int i=0;i<HEADER64_B64PAD_CNT;i++)
                    inputStream.read();
            }


            //compute data hash
            MessageDigest md;
            try {
                md = MessageDigest.getInstance("SHA-256");
            } catch (NoSuchAlgorithmException ex) {
                throw new RuntimeException(ex);
            }


            int nRead;
            int remaining=dataChunkSize;
            final int dataBytesBufferSize = 4096;
            byte []dataBytes=new byte[dataBytesBufferSize];


            while ((remaining>0) && (nRead = inputStream.read(dataBytes)) != -1) {
                if(remaining < nRead){
                    nRead = remaining;
                }
                md.update(dataBytes, 0, nRead);
                remaining-=nRead;
                //System.out.println("digest.update:"+AStringUtilities.bytesToHex(dataBytes, 0, nRead));
            }
            
            byte[] actualDataHash = md.digest();
            //System.out.println(AStringUtilities.bytesToHex(actualDataHash));


            if(!Arrays.equals(dataHash, actualDataHash)) return null;
            Header header=new Header(base64, fileHash, fileSize, dataOffset, dataChunkSize, dataHash, f);
            return header;
        }finally{
            if(null!=in64) in64.close();
            inputStream.close();
        }
    }


    public static void main(String[] args) throws FileNotFoundException, IOException{
        File inputDir = new File("./test input");
        File outputDir = new File("./test output");
        AFileUtilities.removeAll(outputDir);
        File input=new File(inputDir,"nimpsmartcardsrc.hex");
        File output=new File(outputDir,"card1.bin");
        aCat.main(new String[]{
            "dst:"+output.getCanonicalPath(),
            "inHex",
            "add:"+input.getCanonicalPath()
        });
        /*input=new File(inputDir,"card2.hex");
        output=new File(outputDir,"card2.bin");
        aCat.main(new String[]{
            "dst:"+output.getCanonicalPath(),
            "inHex",
            "add:"+input.getCanonicalPath()
        });*/
        MultiPartStore.read(outputDir, outputDir, "_reconstructed");


    }
}
Source Code of com.atolsystems.atolutilities.MultiPartStore$OutputFile

Related Classes of com.atolsystems.atolutilities.MultiPartStore$OutputFile