Package org.apache.jackrabbit.mk.blobs

Source Code of org.apache.jackrabbit.mk.blobs.AbstractBlobStore$Data

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jackrabbit.mk.blobs;

import org.apache.jackrabbit.mk.util.ExceptionFactory;
import org.apache.jackrabbit.mk.util.IOUtils;
import org.apache.jackrabbit.mk.util.Cache;
import org.apache.jackrabbit.mk.util.StringUtils;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.lang.ref.WeakReference;
import java.security.DigestOutputStream;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Map;
import java.util.WeakHashMap;

/**
* An abstract data store that splits the binaries in relatively small blocks,
* so that each block fits in memory.
* <p>
* Each data store id is a list of zero or more entries. Each entry is either
* <ul>
* <li>data (a number of bytes), or</li>
* <li>the hash code of the content of a number of bytes, or</li>
* <li>the hash code of the content of a data store id (indirect hash)</li>
* </ul>
* Thanks to the indirection, blocks can be kept relatively small, so that
* caching is simpler, and so that the storage backend doesn't need to support
* arbitrary size blobs (some storage backends buffer blobs in memory) and fast
* seeks (some storage backends re-read the whole blob when seeking).
* <p>
* The the format of a 'data' entry is: type (one byte; 0 for data), length
* (variable size int), data (bytes).
* <p>
* The format of a 'hash of content' entry is: type (one byte; 1 for hash),
* level (variable size int, 0 meaning not nested), size (variable size long),
* hash code length (variable size int), hash code.
* <p>
* The format of a 'hash of data store id' entry is: type (one byte; 1 for
* hash), level (variable size int, nesting level), total size (variable size
* long), size of data store id (variable size long), hash code length (variable
* size int), hash code.
*/
public abstract class AbstractBlobStore implements Closeable, BlobStore, Cache.Backend<AbstractBlobStore.BlockId, AbstractBlobStore.Data> {

    protected static final String HASH_ALGORITHM = "SHA-1";

    protected static final int TYPE_DATA = 0;
    protected static final int TYPE_HASH = 1;
    protected static final int TYPE_HASH_COMPRESSED = 2;

    protected Map<String, WeakReference<String>> inUse =
        Collections.synchronizedMap(new WeakHashMap<String, WeakReference<String>>());

    /**
     * The minimum size of a block. Smaller blocks are inlined (the data store id
     * is the data itself).
     */
    private int blockSizeMin = 256;

    /**
     * The size of a block. 128 KB has been found to be as fast as larger
     * values, and faster than smaller values. 2 MB results in less files.
     */
    private int blockSize = 2 * 1024 * 1024;

    private Cache<AbstractBlobStore.BlockId, Data> cache = Cache.newInstance(this, 8 * 1024 * 1024);

    public void setBlockSizeMin(int x) {
        this.blockSizeMin = x;
    }

    public long getBlockSizeMin() {
        return blockSizeMin;
    }

    public void setBlockSize(int x) {
        this.blockSize = x;
    }

    public int getBlockSize() {
        return blockSize;
    }

    public String addBlob(String tempFilePath) {
        try {
            File file = new File(tempFilePath);
            InputStream in = null;
            try {
                in = new FileInputStream(file);
                return writeBlob(in);
            } finally {
                if (in != null) {
                    in.close();
                }
                file.delete();
            }
        } catch (Exception e) {
            throw ExceptionFactory.convert(e);
        }
    }

    public String writeBlob(InputStream in) {
        try {
            ByteArrayOutputStream idStream = new ByteArrayOutputStream();
            convertBlobToId(in, idStream, 0, 0);
            byte[] id = idStream.toByteArray();
            // System.out.println("    write blob " +  StringUtils.convertBytesToHex(id));
            String blobId = StringUtils.convertBytesToHex(id);
            usesBlobId(blobId);
            return blobId;
        } catch (Exception e) {
            try {
                in.close();
            } catch (IOException e1) {
                // ignore
            }
            throw ExceptionFactory.convert(e);
        }
    }

    protected void usesBlobId(String blobId) {
        inUse.put(blobId, new WeakReference<String>(blobId));
    }

    public void clearInUse() {
        inUse.clear();
    }

    public void clearCache() {
        cache.clear();
    }

    private void convertBlobToId(InputStream in, ByteArrayOutputStream idStream, int level, long totalLength) throws Exception {
        byte[] block = new byte[blockSize];
        int count = 0;
        while (true) {
            MessageDigest messageDigest = MessageDigest.getInstance(HASH_ALGORITHM);
            ByteArrayOutputStream buff = new ByteArrayOutputStream();
            DigestOutputStream dout = new DigestOutputStream(buff, messageDigest);
            int blockLen = IOUtils.readFully(in, block, 0, block.length);
            count++;
            if (blockLen == 0) {
                break;
            } else if (blockLen < blockSizeMin) {
                idStream.write(TYPE_DATA);
                IOUtils.writeVarInt(idStream, blockLen);
                idStream.write(block, 0, blockLen);
                totalLength += blockLen;
            } else {
                dout.write(block, 0, blockLen);
                byte[] digest = messageDigest.digest();
                idStream.write(TYPE_HASH);
                IOUtils.writeVarInt(idStream, level);
                if (level > 0) {
                    IOUtils.writeVarLong(idStream, totalLength);
                }
                IOUtils.writeVarLong(idStream, blockLen);
                totalLength += blockLen;
                IOUtils.writeVarInt(idStream, digest.length);
                idStream.write(digest);
                byte[] data = buff.toByteArray();
                storeBlock(digest, level, data);
            }
            if (idStream.size() > blockSize / 2) {
                // convert large ids to a block, but ensure it can be stored as
                // one block (otherwise the indirection no longer works)
                byte[] idBlock = idStream.toByteArray();
                idStream.reset();
                convertBlobToId(new ByteArrayInputStream(idBlock), idStream, level + 1, totalLength);
                count = 1;
            }
        }
        if (count > 0 && idStream.size() > blockSizeMin) {
            // at the very end, convert large ids to a block,
            // because large block ids are not handy
            // (specially if they are used to read data in small chunks)
            byte[] idBlock = idStream.toByteArray();
            idStream.reset();
            convertBlobToId(new ByteArrayInputStream(idBlock), idStream, level + 1, totalLength);
        }
        in.close();
    }

    protected abstract void storeBlock(byte[] digest, int level, byte[] data) throws Exception;

    public abstract void startMark() throws Exception;

    public abstract int sweep() throws Exception;

    protected abstract boolean isMarkEnabled();

    protected abstract void mark(BlockId id) throws Exception;

    protected void markInUse() throws Exception {
        for (String id : new ArrayList<String>(inUse.keySet())) {
            mark(id);
        }
    }

    public int readBlob(String blobId, long pos, byte[] buff, int off, int length) {
        try {
            if (isMarkEnabled()) {
                mark(blobId);
            }
            byte[] id = StringUtils.convertHexToBytes(blobId);
            ByteArrayInputStream idStream = new ByteArrayInputStream(id);
            while (true) {
                int type = idStream.read();
                if (type == -1) {
                    return -1;
                } else if (type == TYPE_DATA) {
                    int len = IOUtils.readVarInt(idStream);
                    if (pos < len) {
                        IOUtils.skipFully(idStream, (int) pos);
                        len -= pos;
                        if (length < len) {
                            len = length;
                        }
                        IOUtils.readFully(idStream, buff, off, len);
                        return len;
                    }
                    IOUtils.skipFully(idStream, len);
                    pos -= len;
                } else if (type == TYPE_HASH) {
                    int level = IOUtils.readVarInt(idStream);
                    long totalLength = IOUtils.readVarLong(idStream);
                    if (level > 0) {
                        // block length (ignored)
                        IOUtils.readVarLong(idStream);
                    }
                    byte[] digest = new byte[IOUtils.readVarInt(idStream)];
                    IOUtils.readFully(idStream, digest, 0, digest.length);
                    if (pos >= totalLength) {
                        pos -= totalLength;
                    } else {
                        if (level > 0) {
                            byte[] block = readBlock(digest, 0);
                            idStream = new ByteArrayInputStream(block);
                        } else {
                            long readPos = pos - pos % blockSize;
                            byte[] block = readBlock(digest, readPos);
                            ByteArrayInputStream in = new ByteArrayInputStream(block);
                            IOUtils.skipFully(in, pos - readPos);
                            return IOUtils.readFully(in, buff, off, length);
                        }
                    }
                } else {
                    throw new IOException("Unknown blobs id type " + type + " for blob " + blobId);
                }
            }
        } catch (Exception e) {
            throw ExceptionFactory.convert(e);
        }
    }

    private byte[] readBlock(byte[] digest, long pos) throws Exception {
        BlockId id = new BlockId(digest, pos);
        return cache.get(id).data;
    }

    public Data load(BlockId id) {
        try {
            return new Data(readBlockFromBackend(id));
        } catch (Exception e) {
            throw ExceptionFactory.convert(e);
        }
    }

    protected abstract byte[] readBlockFromBackend(BlockId id) throws Exception;

    public long getBlobLength(String blobId) {
        try {
            if (isMarkEnabled()) {
                mark(blobId);
            }
            byte[] id = StringUtils.convertHexToBytes(blobId);
            ByteArrayInputStream idStream = new ByteArrayInputStream(id);
            long totalLength = 0;
            while (true) {
                int type = idStream.read();
                if (type == -1) {
                    break;
                }
                if (type == TYPE_DATA) {
                    int len = IOUtils.readVarInt(idStream);
                    IOUtils.skipFully(idStream, len);
                    totalLength += len;
                } else if (type == TYPE_HASH) {
                    int level = IOUtils.readVarInt(idStream);
                    totalLength += IOUtils.readVarLong(idStream);
                    if (level > 0) {
                        // block length (ignored)
                        IOUtils.readVarLong(idStream);
                    }
                    int digestLength = IOUtils.readVarInt(idStream);
                    IOUtils.skipFully(idStream, digestLength);
                } else {
                    throw new IOException("Datastore id type " + type + " for blob " + blobId);
                }
            }
            return totalLength;
        } catch (IOException e) {
            throw ExceptionFactory.convert(e);
        }
    }

    protected void mark(String blobId) throws IOException {
        try {
            byte[] id = StringUtils.convertHexToBytes(blobId);
            ByteArrayInputStream idStream = new ByteArrayInputStream(id);
            mark(idStream);
        } catch (Exception e) {
            throw new IOException("Mark failed for blob " + blobId, e);
        }
    }

    private void mark(ByteArrayInputStream idStream) throws Exception {
        while (true) {
            int type = idStream.read();
            if (type == -1) {
                return;
            } else if (type == TYPE_DATA) {
                int len = IOUtils.readVarInt(idStream);
                IOUtils.skipFully(idStream, (int) len);
            } else if (type == TYPE_HASH) {
                int level = IOUtils.readVarInt(idStream);
                // totalLength
                IOUtils.readVarLong(idStream);
                if (level > 0) {
                    // block length (ignored)
                    IOUtils.readVarLong(idStream);
                }
                byte[] digest = new byte[IOUtils.readVarInt(idStream)];
                IOUtils.readFully(idStream, digest, 0, digest.length);
                if (level > 0) {
                    byte[] block = readBlock(digest, 0);
                    idStream = new ByteArrayInputStream(block);
                    mark(idStream);
                } else {
                    BlockId id = new BlockId(digest, 0);
                    mark(id);
                }
            } else {
                throw new IOException("Unknown blobs id type " + type);
            }
        }
    }

    public void close() {
        // ignore
    }

    /**
     * A block id. Blocks are small enough to fit in memory, so they can be
     * cached.
     */
    public static class BlockId {

        final byte[] digest;
        final long pos;

        BlockId(byte[] digest, long pos) {
            this.digest = digest;
            this.pos = pos;
        }

        public boolean equals(Object other) {
            if (this == other) {
                return true;
            }
            BlockId o = (BlockId) other;
            return Arrays.equals(digest, o.digest) &&
                    pos == o.pos;
        }

        public int hashCode() {
            return Arrays.hashCode(digest) ^
                    (int) (pos >> 32) ^ (int) pos;
        }

        public String toString() {
            return StringUtils.convertBytesToHex(digest) + "@" + pos;
        }

    }

    /**
     * The data for a block.
     */
    public static class Data implements Cache.Value {

        final byte[] data;

        Data(byte[] data) {
            this.data = data;
        }

        public String toString() {
            String s = StringUtils.convertBytesToHex(data);
            return s.length() > 100 ? s.substring(0, 100) + ".. (len=" + data.length + ")" : s;
        }

        public int getMemory() {
            return data.length;
        }

    }

}
TOP

Related Classes of org.apache.jackrabbit.mk.blobs.AbstractBlobStore$Data

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.