Package org.xtreemfs.babudb.index

Source Code of org.xtreemfs.babudb.index.LSMTree

/*
* Copyright (c) 2008, Jan Stender, Bjoern Kolbeck, Mikael Hoegqvist,
*                     Felix Hupfeld, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/

package org.xtreemfs.babudb.index;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
import java.util.NoSuchElementException;

import org.xtreemfs.babudb.api.database.ResultSet;
import org.xtreemfs.babudb.api.index.ByteRangeComparator;
import org.xtreemfs.babudb.index.overlay.MultiOverlayBufferTree;
import org.xtreemfs.babudb.index.reader.DiskIndex;
import org.xtreemfs.babudb.index.reader.InternalBufferUtil;
import org.xtreemfs.babudb.index.reader.InternalDiskIndexIterator;
import org.xtreemfs.babudb.index.reader.InternalMergeIterator;
import org.xtreemfs.babudb.index.writer.DiskIndexWriter;
import org.xtreemfs.babudb.snapshots.SnapshotConfig;
import org.xtreemfs.foundation.logging.Logging;
import org.xtreemfs.foundation.util.OutputUtils;

public class LSMTree {
   
    private static long               totalOnDiskSize = 0;
   
    private static final byte[]       NULL_ELEMENT    = new byte[0];
   
    private MultiOverlayBufferTree    overlay;
   
    private DiskIndex                 index;
   
    private final ByteRangeComparator comp;
   
    private final Object              lock;
   
    private boolean                   compressed;
   
    private final int                 maxEntriesPerBlock;
   
    private final int                 maxBlockFileSize;
   
    private final boolean             useMMap;
   
    private final int                 mmapLimitBytes;
   
    /**
     * Creates a new LSM tree.
     *
     * @param indexFile
     *            the on-disk index file - may be <code>null</code>
     * @param comp
     *            a comparator for byte ranges
     * @param compressed
     *            Compression of disk-index
     * @throws IOException
     *             if an I/O error occurs when accessing the on-disk index file
     */
    public LSMTree(String indexFile, ByteRangeComparator comp, boolean compressed, int maxEntriesPerBlock,
        int maxBlockFileSize, boolean useMMap, int mmapLimit) throws IOException {
       
        this.comp = comp;
        this.compressed = compressed;
        this.maxEntriesPerBlock = maxEntriesPerBlock;
        this.maxBlockFileSize = maxBlockFileSize;
        this.useMMap = useMMap;
        this.mmapLimitBytes = mmapLimit * 1024 * 1024;
       
        overlay = new MultiOverlayBufferTree(NULL_ELEMENT, comp);
        totalOnDiskSize += indexFile == null ? 0 : getTotalDirSize(new File(indexFile));
        index = indexFile == null ? null : new DiskIndex(indexFile, comp, compressed, useMmap());
        lock = new Object();
    }
   
    /**
     * Performs a lookup.
     *
     * @param key
     *            the key to look up
     * @return the value associated with the key
     */
    public byte[] lookup(byte[] key) {
       
        byte[] result = overlay.lookup(key);
       
        if (result == NULL_ELEMENT)
            return null;
       
        if (result != null)
            return result;
       
        return index == null ? null : index.lookup(key);
    }
   
    /**
     * Performs a lookup in a given snapshot.
     *
     * @param key
     *            the key to look up
     * @param snapId
     *            the snapshot ID
     * @return the value associated with the key in the snapshot
     */
    public byte[] lookup(byte[] key, int snapId) {
       
        byte[] result = overlay.lookup(key, snapId);
       
        if (result == NULL_ELEMENT)
            return null;
       
        if (result != null)
            return result;
       
        return index == null ? null : index.lookup(key);
    }
   
    /**
     * Returns the first entry.
     *
     * @return the first entry
     */
    public Entry<byte[], byte[]> firstEntry() {
        ResultSet<byte[], byte[]> it = prefixLookup(new byte[0]);
        Entry<byte[], byte[]> result = it.hasNext() ? it.next() : null;
        it.free();
        return result;
    }
   
    /**
     * Returns the first entry in the given snapshot.
     *
     * @param snapId
     *            the snapshot ID
     * @return the first entry
     */
    public Entry<byte[], byte[]> firstEntry(int snapId) {
        ResultSet<byte[], byte[]> it = prefixLookup(new byte[0], snapId, true);
        Entry<byte[], byte[]> result = it.hasNext() ? it.next() : null;
        it.free();
        return result;
    }
   
    /**
     * Returns the last entry.
     *
     * @return the last entry
     */
    public Entry<byte[], byte[]> lastEntry() {
        ResultSet<byte[], byte[]> it = prefixLookup(new byte[0], false);
        Entry<byte[], byte[]> result = it.hasNext() ? it.next() : null;
        it.free();
        return result;
    }
   
    /**
     * Returns the last entry in the given snapshot.
     *
     * @param snapId
     *            the snapshot ID
     * @return the last entry
     */
    public Entry<byte[], byte[]> lastEntry(int snapId) {
        ResultSet<byte[], byte[]> it = prefixLookup(new byte[0], snapId, false);
        Entry<byte[], byte[]> result = it.hasNext() ? it.next() : null;
        it.free();
        return result;
    }
   
    /**
     * Performs a prefix lookup. Key-value paris are returned in an iterator in
     * ascending key order, where only such keys are returned with a matching
     * prefix according to the comparator.
     *
     * @param prefix
     *            the prefix
     * @return an iterator with key-value pairs
     */
    public ResultSet<byte[], byte[]> prefixLookup(byte[] prefix) {
        return prefixLookup(prefix, true);
    }
   
    /**
     * Performs a prefix lookup. Key-value paris are returned in an iterator in
     * the given key order, where only such keys are returned with a matching
     * prefix according to the comparator.
     *
     * @param prefix
     *            the prefix
     * @param ascending
     *            if <code>true</code>, entries will be returned in ascending
     *            order; otherwise, they will be returned in descending order
     * @return an iterator with key-value pairs
     */
    public ResultSet<byte[], byte[]> prefixLookup(byte[] prefix, boolean ascending) {
       
        if (prefix != null && prefix.length == 0)
            prefix = null;
       
        List<Iterator<Entry<byte[], byte[]>>> list = new ArrayList<Iterator<Entry<byte[], byte[]>>>(2);
        list.add(overlay.prefixLookup(prefix, true, ascending));
        if (index != null) {
            byte[][] rng = comp.prefixToRange(prefix, ascending);
            list.add(index.rangeLookup(rng[0], rng[1], ascending));
        }
       
        return new OverlayMergeIterator<byte[], byte[]>(list, comp, NULL_ELEMENT, ascending);
    }
   
    /**
     * Performs a prefix lookup in a given snapshot. Key-value paris are
     * returned in an iterator in ascending key order, where only such keys are
     * returned with a matching prefix according to the comparator.
     *
     * @param prefix
     *            the prefix
     * @param snapId
     *            the snapshot ID
     * @return an iterator with key-value pairs
     */
    public ResultSet<byte[], byte[]> prefixLookup(byte[] prefix, int snapId) {
        return prefixLookup(prefix, snapId, true);
    }
   
    /**
     * Performs a prefix lookup in a given snapshot. Key-value pairs are
     * returned in an iterator in the given key order, where only such keys are
     * returned with a matching prefix according to the comparator.
     *
     * @param prefix
     *            the prefix
     * @param snapId
     *            the snapshot ID
     * @param ascending
     *            if <code>true</code>, entries will be returned in ascending
     *            order; otherwise, they will be returned in descending order
     * @return an iterator with key-value pairs
     */
    public ResultSet<byte[], byte[]> prefixLookup(byte[] prefix, int snapId, boolean ascending) {
       
        if (prefix != null && prefix.length == 0)
            prefix = null;
       
        List<Iterator<Entry<byte[], byte[]>>> list = new ArrayList<Iterator<Entry<byte[], byte[]>>>(2);
        list.add(overlay.prefixLookup(prefix, snapId, true, ascending));
        if (index != null) {
            byte[][] rng = comp.prefixToRange(prefix, ascending);
            list.add(index.rangeLookup(rng[0], rng[1], ascending));
        }
       
        return new OverlayMergeIterator<byte[], byte[]>(list, comp, NULL_ELEMENT, ascending);
    }
   
    /**
     * Performs a range lookup. Key-value paris are returned in an iterator in
     * ascending key order, where only such keys are returned between
     * <code>from</code> (inclusively) and <code>to</code> (inclusively),
     * according to the comparator.
     *
     * @param from
     *            the first key (inclusively)
     * @param to
     *            the last key (exclusively)
     * @return an iterator with key-value pairs
     */
    public ResultSet<byte[], byte[]> rangeLookup(byte[] from, byte[] to) {
        return rangeLookup(from, to, true);
    }
   
    /**
     * Performs a range lookup. Key-value paris are returned in an iterator in
     * the given key order, where only such keys are returned between
     * <code>from</code> (inclusively) and <code>to</code> (inclusively),
     * according to the comparator.
     *
     * @param from
     *            the first key (inclusively)
     * @param to
     *            the last key (exclusively)
     * @param ascending
     *            if <code>true</code>, entries will be returned in ascending
     *            order; otherwise, they will be returned in descending order
     * @return an iterator with key-value pairs
     */
    public ResultSet<byte[], byte[]> rangeLookup(byte[] from, byte[] to, boolean ascending) {
       
        if (from.length == 0)
            from = null;
       
        if (to.length == 0)
            to = null;
       
        List<Iterator<Entry<byte[], byte[]>>> list = new ArrayList<Iterator<Entry<byte[], byte[]>>>(2);
        list.add(overlay.rangeLookup(from, to, true, ascending));
        if (index != null)
            list.add(index.rangeLookup(from, to, ascending));
       
        return new OverlayMergeIterator<byte[], byte[]>(list, comp, NULL_ELEMENT, ascending);
    }
   
    /**
     * Performs a range lookup in a given snapshot. Key-value paris are returned
     * in an iterator in ascending key order, where only such keys are returned
     * between <code>from</code> (inclusively) and <code>to</code>
     * (inclusively), according to the comparator.
     *
     * @param from
     *            the first key (inclusively)
     * @param to
     *            the last key (exclusively)
     * @param snapId
     *            the snapshot ID
     * @return an iterator with key-value pairs
     */
    public ResultSet<byte[], byte[]> rangeLookup(byte[] from, byte[] to, int snapId) {
        return rangeLookup(from, to, snapId, true);
    }
   
    /**
     * Performs a range lookup in a given snapshot. Key-value pairs are returned
     * in an iterator in the given key order, where only such keys are returned
     * between <code>from</code> (inclusively) and <code>to</code>
     * (inclusively), according to the comparator.
     *
     * @param from
     *            the first key (inclusively)
     * @param to
     *            the last key (exclusively)
     * @param snapId
     *            the snapshot ID
     * @param ascending
     *            if <code>true</code>, entries will be returned in ascending
     *            order; otherwise, they will be returned in descending order
     * @return an iterator with key-value pairs
     */
    public ResultSet<byte[], byte[]> rangeLookup(byte[] from, byte[] to, int snapId, boolean ascending) {
       
        if (from.length == 0)
            from = null;
       
        if (to.length == 0)
            to = null;
       
        List<Iterator<Entry<byte[], byte[]>>> list = new ArrayList<Iterator<Entry<byte[], byte[]>>>(2);
        list.add(overlay.rangeLookup(from, to, snapId, true, ascending));
        if (index != null)
            list.add(index.rangeLookup(from, to, ascending));
       
        return new OverlayMergeIterator<byte[], byte[]>(list, comp, NULL_ELEMENT, ascending);
    }
   
    /**
     * Inserts a key-value pair.
     *
     * @param key
     *            the key
     * @param value
     *            the value
     */
    public void insert(byte[] key, byte[] value) {
        synchronized (lock) {
            overlay.insert(key, value);
        }
    }
   
    /**
     * Deletes a key-value pair. This method is equivalent to
     * <code>insert(key, null)</code>.
     *
     * @param key
     *            the key
     */
    public void delete(byte[] key) {
        synchronized (lock) {
            overlay.insert(key, null);
        }
    }
   
    /**
     * Creates a new in-memory snapshot.
     *
     * @return the snapshot ID
     */
    public int createSnapshot() {
        return overlay.newOverlay();
    }
   
    /**
     * Writes an in-memory snapshot to a file on disk.
     *
     * @param targetFile
     *            the file to which to write the snapshot
     * @param snapId
     *            the snapshot ID
     * @throws IOException
     *             if an I/O error occurs while writing the snapshot
     */
    public void materializeSnapshot(String targetFile, int snapId) throws IOException {
       
        DiskIndexWriter writer = new DiskIndexWriter(targetFile, maxEntriesPerBlock, compressed,
            maxBlockFileSize);
       
        InternalMergeIterator it = internalPrefixLookup(null, snapId, true);
        writer.writeIndex(it);
        it.free();
    }
   
    /**
     * Writes a certain part of an in-memory snapshot to a file on disk.
     *
     * @param targetFile
     *            the file to which to write the snapshot
     * @param snapId
     *            the snapshot ID
     * @param indexId
     *            the id used by the database to identify this index
     * @param snap
     *            the snapshot configuration
     * @throws IOException
     *             if an I/O error occurs while writing the snapshot
     */
    public void materializeSnapshot(String targetFile, final int snapId, final int indexId,
        final SnapshotConfig snap) throws IOException {
       
        DiskIndexWriter writer = new DiskIndexWriter(targetFile, maxEntriesPerBlock, compressed,
            maxBlockFileSize);
        writer.writeIndex(new ResultSet<Object, Object>() {
           
            private ResultSet<Object, Object>[] iterators;
           
            private Entry<Object, Object>             next;
           
            private int                               currentIt;
           
            {
                byte[][] prefixes = snap.getPrefixes(indexId);
               
                currentIt = 0;
               
                if (prefixes != null) {
                    iterators = new ResultSet[prefixes.length];
                    for (int i = 0; i < prefixes.length; i++)
                        iterators[i] = internalPrefixLookup(prefixes[i], snapId, true);
                } else {
                    iterators = new ResultSet[] { prefixLookup(null, snapId, true) };
                }
               
                getNextElement();
               
            }
           
            @Override
            public boolean hasNext() {
                return (next != null);
            }
           
            @Override
            public Entry<Object, Object> next() {
               
                if (next == null)
                    throw new NoSuchElementException();
               
                Entry<Object, Object> tmp = next;
                getNextElement();
               
                return tmp;
            }
           
            @Override
            public void remove() {
                throw new UnsupportedOperationException();
            }
           
            @Override
            public void free() {
                for (ResultSet<Object, Object> it : iterators)
                    it.free();
            }
           
            private void getNextElement() {
               
                for (;;) {
                   
                    // get the next iterator w/ elements
                    while (currentIt < iterators.length && !iterators[currentIt].hasNext())
                        currentIt++;
                   
                    // if there is no such iterator, set next to null and return
                    if (currentIt >= iterators.length) {
                        next = null;
                        return;
                    }
                   
                    // otherwise, next is the next element from the current
                    // iterator
                    next = iterators[currentIt].next();
                   
                    // if this element is explicitly excluded, skip it
                    byte[] tmp = InternalBufferUtil.toBuffer(next.getKey());
                    if (snap.containsKey(indexId, tmp))
                        break;
                }
            }
           
        });
    }
   
    /**
     * Links the LSM tree to a new snapshot file. The on-disk index is replaced
     * with the index stored in the given snapshot file, and all in-memory
     * snapshots are discarded.
     *
     * @param snapshotFile
     *            the snapshot file
     * @throws IOException
     *             if an I/O error occurred while reading the snapshot file
     */
    public void linkToSnapshot(String snapshotFile) throws IOException {
        final DiskIndex oldIndex = index;
        synchronized (lock) {
            totalOnDiskSize -= index == null ? 0 : index.getSize();
            index = new DiskIndex(snapshotFile, comp, this.compressed, useMmap());
            totalOnDiskSize += index.getSize();
            if (oldIndex != null)
                oldIndex.destroy();
            overlay.cleanup();
        }
    }
   
    /**
     * Checks if files containing index data are memory-mapped.
     *
     * @return <code>true</code> if index files are memory-mapped, <code>false</code>, otherwise
     */
    public boolean isMMapEnabled() {
      return useMMap;
    }
   
    /**
     * Checks if files containing index data are compressed.
     *
     * @return <code>true</code> if index files are compressed, <code>false</code>, otherwise
     */
    public boolean isCompressed() {
      return compressed;
    }
   
    /**
     * Destroys the LSM tree. Frees all in-memory indices plus the on-disk tree.
     */
    public void destroy() throws IOException {
       
        synchronized (lock) {
            if (index != null) {
                totalOnDiskSize -= index.getSize();
                index.destroy();
            }
            overlay.cleanup();
        }
    }
   
    private boolean useMmap() {
        Logging.logMessage(Logging.LEVEL_DEBUG, this, "DB size: " + OutputUtils.formatBytes(totalOnDiskSize));
        return useMMap && (mmapLimitBytes < 0 || totalOnDiskSize < mmapLimitBytes);
    }
   
    /**
     * Performs a prefix lookup. Key-value paris are returned in an iterator in
     * the given key order, where only such keys are returned with a matching
     * prefix according to the comparator. <br/>
     *
     * <b>WARNING:</b> This method should only be accessed internally, as it
     * provides access to internal index buffers that have to remain immutable.
     *
     * @param prefix
     *            the prefix
     * @param snapId
     *            the snapshot ID
     * @param ascending
     *            if <code>true</code>, entries will be returned in ascending
     *            order; otherwise, they will be returned in descending order
     * @return an iterator with references to internally used buffers
     */
    protected InternalMergeIterator internalPrefixLookup(byte[] prefix, int snapId, boolean ascending) {
       
        if (prefix != null && prefix.length == 0)
            prefix = null;
       
        Iterator<Entry<byte[], byte[]>> overlayIterator = overlay.prefixLookup(prefix, snapId, true,
            ascending);
        InternalDiskIndexIterator diskIndexIterator = null;
        if (index != null) {
            byte[][] rng = comp.prefixToRange(prefix, ascending);
            diskIndexIterator = index.internalRangeLookup(rng[0], rng[1], ascending);
        }
       
        return new InternalMergeIterator(overlayIterator, diskIndexIterator, comp, NULL_ELEMENT, ascending);
    }
   
    private static long getTotalDirSize(File dir) {
       
        if (!dir.exists())
            return 0;
       
        long size = 0;
        if (dir.isDirectory()) {
            for (File child : dir.listFiles())
                size += getTotalDirSize(child);
        } else
            size += dir.length();
       
        return size;
    }
   
}
TOP

Related Classes of org.xtreemfs.babudb.index.LSMTree

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.