Source Code of org.apache.cassandra.io.SequenceFile

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


package org.apache.cassandra.io;


import java.io.*;
import java.lang.reflect.Method;
import java.nio.ByteBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.security.AccessController;
import java.security.PrivilegedAction;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;


import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.utils.BloomFilter;
import org.apache.cassandra.utils.LogUtil;


import org.apache.log4j.Logger;


/**
 * This class writes key/value pairs seqeuntially to disk. It is
 * also used to read sequentially from disk. However one could
 * jump to random positions to read data from the file. This class
 * also has many implementations of the IFileWriter and IFileReader
 * interfaces which are exposed through factory methods.
 * <p/>
 * Author : Avinash Lakshman ( alakshman@facebook.com) & Prashant Malik ( pmalik@facebook.com ) & Karthik Ranganathan ( kranganathan@facebook.com )
 */


public class SequenceFile
{
    public static abstract class AbstractWriter implements IFileWriter
    {
        protected String filename_;


        AbstractWriter(String filename)
        {
            filename_ = filename;
        }


        public String getFileName()
        {
            return filename_;
        }


        public long lastModified()
        {
            File file = new File(filename_);
            return file.lastModified();
        }
    }


    public static class Writer extends AbstractWriter
    {
        protected RandomAccessFile file_;


        Writer(String filename) throws IOException
        {
            super(filename);
            init(filename);
        }


        Writer(String filename, int size) throws IOException
        {
            super(filename);
            init(filename, size);
        }


        protected void init(String filename) throws IOException
        {
            File file = new File(filename);
            if (!file.exists())
            {
                file.createNewFile();
            }
            file_ = new RandomAccessFile(file, "rw");
        }


        protected void init(String filename, int size) throws IOException
        {
            init(filename);
        }


        public long getCurrentPosition() throws IOException
        {
            return file_.getFilePointer();
        }


        public void seek(long position) throws IOException
        {
            file_.seek(position);
        }


        public void append(DataOutputBuffer buffer) throws IOException
        {
            file_.write(buffer.getData(), 0, buffer.getLength());
        }


        public void append(DataOutputBuffer keyBuffer, DataOutputBuffer buffer) throws IOException
        {
            int keyBufLength = keyBuffer.getLength();
            if (keyBuffer == null || keyBufLength == 0)
                throw new IllegalArgumentException("Key cannot be NULL or of zero length.");


            file_.writeInt(keyBufLength);
            file_.write(keyBuffer.getData(), 0, keyBufLength);


            int length = buffer.getLength();
            file_.writeInt(length);
            file_.write(buffer.getData(), 0, length);
        }


        public void append(String key, DataOutputBuffer buffer) throws IOException
        {
            if (key == null)
                throw new IllegalArgumentException("Key cannot be NULL.");


            file_.writeUTF(key);
            int length = buffer.getLength();
            file_.writeInt(length);
            file_.write(buffer.getData(), 0, length);
        }


        public void append(String key, byte[] value) throws IOException
        {
            if (key == null)
                throw new IllegalArgumentException("Key cannot be NULL.");


            file_.writeUTF(key);
            file_.writeInt(value.length);
            file_.write(value);
        }


        public void append(String key, long value) throws IOException
        {
            if (key == null)
                throw new IllegalArgumentException("Key cannot be NULL.");


            file_.writeUTF(key);
            file_.writeLong(value);
        }


        /**
         * Be extremely careful while using this API. This currently
         * used to write the commit log header in the commit logs.
         * If not used carefully it could completely screw up reads
         * of other key/value pairs that are written.
         *
         * @param bytes the bytes to write
         */
        public long writeDirect(byte[] bytes) throws IOException
        {
            file_.write(bytes);
            return file_.getFilePointer();
        }


        public void writeLong(long value) throws IOException
        {
            file_.writeLong(value);
        }


        public void close() throws IOException
        {
            file_.close();
        }


        public void close(byte[] footer, int size) throws IOException
        {
            file_.writeUTF(SequenceFile.marker_);
            file_.writeInt(size);
            file_.write(footer, 0, size);
        }


        public String getFileName()
        {
            return filename_;
        }


        public long getFileSize() throws IOException
        {
            return file_.length();
        }
    }


    public static class BufferWriter extends Writer
    {


        BufferWriter(String filename, int size) throws IOException
        {
            super(filename, size);
        }


        @Override
        protected void init(String filename) throws IOException
        {
            init(filename, 0);
        }


        @Override
        protected void init(String filename, int size) throws IOException
        {
            File file = new File(filename);
            file_ = new BufferedRandomAccessFile(file, "rw", size);
            if (!file.exists())
            {
                file.createNewFile();
            }
        }
    }


    public static class ChecksumWriter extends Writer
    {


        ChecksumWriter(String filename, int size) throws IOException
        {
            super(filename, size);
        }


        @Override
        protected void init(String filename) throws IOException
        {
            init(filename, 0);
        }


        @Override
        protected void init(String filename, int size) throws IOException
        {
            File file = new File(filename);
            file_ = new ChecksumRandomAccessFile(file, "rw", size);
        }


        @Override
        public void close() throws IOException
        {
            super.close();
            ChecksumManager.close(filename_);
        }
    }


    public static class ConcurrentWriter extends AbstractWriter
    {
        private FileChannel fc_;


        public ConcurrentWriter(String filename) throws IOException
        {
            super(filename);
            RandomAccessFile raf = new RandomAccessFile(filename, "rw");
            fc_ = raf.getChannel();
        }


        public long getCurrentPosition() throws IOException
        {
            return fc_.position();
        }


        public void seek(long position) throws IOException
        {
            fc_.position(position);
        }


        public void append(DataOutputBuffer buffer) throws IOException
        {
            int length = buffer.getLength();
            ByteBuffer byteBuffer = ByteBuffer.allocateDirect(length);
            byteBuffer.put(buffer.getData(), 0, length);
            byteBuffer.flip();
            fc_.write(byteBuffer);
        }


        public void append(DataOutputBuffer keyBuffer, DataOutputBuffer buffer) throws IOException
        {
            int keyBufLength = keyBuffer.getLength();
            if (keyBuffer == null || keyBufLength == 0)
                throw new IllegalArgumentException("Key cannot be NULL or of zero length.");


            /* Size allocated "int" for key length + key + "int" for data length + data */
            int length = buffer.getLength();
            ByteBuffer byteBuffer = ByteBuffer.allocateDirect(4 + keyBufLength + 4 + length);
            byteBuffer.putInt(keyBufLength);
            byteBuffer.put(keyBuffer.getData(), 0, keyBufLength);
            byteBuffer.putInt(length);
            byteBuffer.put(buffer.getData(), 0, length);
            byteBuffer.flip();
            fc_.write(byteBuffer);
        }


        public void append(String key, DataOutputBuffer buffer) throws IOException
        {
            if (key == null)
                throw new IllegalArgumentException("Key cannot be NULL.");


            int length = buffer.getLength();
            /* Size allocated : utfPrefix_ + key length + "int" for data size + data */
            ByteBuffer byteBuffer = ByteBuffer.allocateDirect(SequenceFile.utfPrefix_ + key.length() + 4 + length);
            SequenceFile.writeUTF(byteBuffer, key);
            byteBuffer.putInt(length);
            byteBuffer.put(buffer.getData(), 0, length);
            byteBuffer.flip();
            fc_.write(byteBuffer);
        }


        public void append(String key, byte[] value) throws IOException
        {
            if (key == null)
                throw new IllegalArgumentException("Key cannot be NULL.");


            /* Size allocated key length + "int" for data size + data */
            ByteBuffer byteBuffer = ByteBuffer.allocateDirect(utfPrefix_ + key.length() + 4 + value.length);
            SequenceFile.writeUTF(byteBuffer, key);
            byteBuffer.putInt(value.length);
            byteBuffer.put(value);
            byteBuffer.flip();
            fc_.write(byteBuffer);
        }


        public void append(String key, long value) throws IOException
        {
            if (key == null)
                throw new IllegalArgumentException("Key cannot be NULL.");


            /* Size allocated key length + a long */
            ByteBuffer byteBuffer = ByteBuffer.allocateDirect(SequenceFile.utfPrefix_ + key.length() + 8);
            SequenceFile.writeUTF(byteBuffer, key);
            byteBuffer.putLong(value);
            byteBuffer.flip();
            fc_.write(byteBuffer);
        }


        /*
         * Be extremely careful while using this API. This currently
         * used to write the commit log header in the commit logs.
         * If not used carefully it could completely screw up reads
         * of other key/value pairs that are written.
        */
        public long writeDirect(byte[] bytes) throws IOException
        {
            ByteBuffer byteBuffer = ByteBuffer.allocateDirect(bytes.length);
            byteBuffer.put(bytes);
            byteBuffer.flip();
            fc_.write(byteBuffer);
            return fc_.position();
        }


        public void writeLong(long value) throws IOException
        {
            ByteBuffer byteBuffer = ByteBuffer.allocateDirect(8);
            byteBuffer.putLong(value);
            byteBuffer.flip();
            fc_.write(byteBuffer);
        }


        public void close() throws IOException
        {
            fc_.close();
        }


        public void close(byte[] footer, int size) throws IOException
        {
            /* Size is marker length + "int" for size + footer data */
            ByteBuffer byteBuffer = ByteBuffer.allocateDirect(utfPrefix_ + SequenceFile.marker_.length() + 4 + footer.length);
            SequenceFile.writeUTF(byteBuffer, SequenceFile.marker_);
            byteBuffer.putInt(size);
            byteBuffer.put(footer);
            byteBuffer.flip();
            fc_.write(byteBuffer);
        }


        public String getFileName()
        {
            return filename_;
        }


        public long getFileSize() throws IOException
        {
            return fc_.size();
        }
    }


    public static class FastConcurrentWriter extends AbstractWriter
    {
        private FileChannel fc_;
        private MappedByteBuffer buffer_;


        public FastConcurrentWriter(String filename, int size) throws IOException
        {
            super(filename);
            fc_ = new RandomAccessFile(filename, "rw").getChannel();
            buffer_ = fc_.map(FileChannel.MapMode.READ_WRITE, 0, size);
            buffer_.load();
        }


        void unmap(final Object buffer)
        {
            AccessController.doPrivileged(new PrivilegedAction<MappedByteBuffer>()
            {
                public MappedByteBuffer run()
                {
                    try
                    {
                        Method getCleanerMethod = buffer.getClass().getMethod("cleaner", new Class[0]);
                        getCleanerMethod.setAccessible(true);
                        sun.misc.Cleaner cleaner = (sun.misc.Cleaner) getCleanerMethod.invoke(buffer);
                        cleaner.clean();
                    }
                    catch (Throwable e)
                    {
                        logger_.warn(LogUtil.throwableToString(e));
                    }
                    return null;
                }
            });
        }




        public long getCurrentPosition() throws IOException
        {
            return buffer_.position();
        }


        public void seek(long position) throws IOException
        {
            buffer_.position((int) position);
        }


        public void append(DataOutputBuffer buffer) throws IOException
        {
            buffer_.put(buffer.getData(), 0, buffer.getLength());
        }


        public void append(DataOutputBuffer keyBuffer, DataOutputBuffer buffer) throws IOException
        {
            int keyBufLength = keyBuffer.getLength();
            if (keyBuffer == null || keyBufLength == 0)
                throw new IllegalArgumentException("Key cannot be NULL or of zero length.");


            int length = buffer.getLength();
            buffer_.putInt(keyBufLength);
            buffer_.put(keyBuffer.getData(), 0, keyBufLength);
            buffer_.putInt(length);
            buffer_.put(buffer.getData(), 0, length);
        }


        public void append(String key, DataOutputBuffer buffer) throws IOException
        {
            if (key == null)
                throw new IllegalArgumentException("Key cannot be NULL.");


            int length = buffer.getLength();
            SequenceFile.writeUTF(buffer_, key);
            buffer_.putInt(length);
            buffer_.put(buffer.getData(), 0, length);
        }


        public void append(String key, byte[] value) throws IOException
        {
            if (key == null)
                throw new IllegalArgumentException("Key cannot be NULL.");


            SequenceFile.writeUTF(buffer_, key);
            buffer_.putInt(value.length);
            buffer_.put(value);
        }


        public void append(String key, long value) throws IOException
        {
            if (key == null)
                throw new IllegalArgumentException("Key cannot be NULL.");


            SequenceFile.writeUTF(buffer_, key);
            buffer_.putLong(value);
        }


        /*
         * Be extremely careful while using this API. This currently
         * used to write the commit log header in the commit logs.
         * If not used carefully it could completely screw up reads
         * of other key/value pairs that are written.
        */
        public long writeDirect(byte[] bytes) throws IOException
        {
            buffer_.put(bytes);
            return buffer_.position();
        }


        public void writeLong(long value) throws IOException
        {
            buffer_.putLong(value);
        }


        public void close() throws IOException
        {
            buffer_.flip();
            buffer_.force();
            unmap(buffer_);
            fc_.truncate(buffer_.limit());
        }


        public void close(byte[] footer, int size) throws IOException
        {
            SequenceFile.writeUTF(buffer_, SequenceFile.marker_);
            buffer_.putInt(size);
            buffer_.put(footer);
            close();
        }


        public String getFileName()
        {
            return filename_;
        }


        public long getFileSize() throws IOException
        {
            return buffer_.position();
        }
    }


    public static abstract class AbstractReader implements IFileReader
    {
        private static final short utfPrefix_ = 2;
        protected RandomAccessFile file_;
        protected String filename_;


        AbstractReader(String filename)
        {
            filename_ = filename;
        }


        public String getFileName()
        {
            return filename_;
        }


        /**
         * Return the position of the given key from the block index.
         *
         * @param key the key whose offset is to be extracted from the current block index
         */
        public long getPositionFromBlockIndex(String key) throws IOException
        {
            long position = -1L;
            /* note the beginning of the block index */
            long blockIndexPosition = file_.getFilePointer();
            /* read the block key. */
            String blockIndexKey = file_.readUTF();
            if (!blockIndexKey.equals(SSTable.blockIndexKey_))
                throw new IOException("Unexpected position to be reading the block index from.");
            /* read the size of the block index */
            int size = file_.readInt();


            /* Read the entire block index. */
            byte[] bytes = new byte[size];
            file_.readFully(bytes);


            DataInputBuffer bufIn = new DataInputBuffer();
            bufIn.reset(bytes, bytes.length);
            /* Number of keys in the block. */
            int keys = bufIn.readInt();
            for (int i = 0; i < keys; ++i)
            {
                String keyInBlock = bufIn.readUTF();
                if (keyInBlock.equals(key))
                {
                    position = bufIn.readLong();
                    break;
                }
                else
                {
                    /*
                     * This is not the key we are looking for. So read its position
                     * and the size of the data associated with it. This was strored
                     * as the BlockMetadata.
                    */
                    bufIn.readLong();
                    bufIn.readLong();
                }
            }


            /* we do this because relative position of the key within a block is stored. */
            if (position != -1L)
                position = blockIndexPosition - position;
            else
                throw new IOException("This key " + key + " does not exist in this file.");
            return position;
        }


        /**
         * Return the block index metadata for a given key.
         */
        public SSTable.BlockMetadata getBlockMetadata(String key) throws IOException
        {
            SSTable.BlockMetadata blockMetadata = SSTable.BlockMetadata.NULL;
            /* read the block key. */
            String blockIndexKey = file_.readUTF();
            if (!blockIndexKey.equals(SSTable.blockIndexKey_))
                throw new IOException("Unexpected position to be reading the block index from.");
            /* read the size of the block index */
            int size = file_.readInt();


            /* Read the entire block index. */
            byte[] bytes = new byte[size];
            file_.readFully(bytes);


            DataInputBuffer bufIn = new DataInputBuffer();
            bufIn.reset(bytes, bytes.length);


            /* Number of keys in the block. */
            int keys = bufIn.readInt();
            for (int i = 0; i < keys; ++i)
            {
                if (bufIn.readUTF().equals(key))
                {
                    long position = bufIn.readLong();
                    long dataSize = bufIn.readLong();
                    blockMetadata = new SSTable.BlockMetadata(position, dataSize);
                    break;
                }
                else
                {
                    /*
                     * This is not the key we are looking for. So read its position
                     * and the size of the data associated with it. This was strored
                     * as the BlockMetadata.
                    */
                    bufIn.readLong();
                    bufIn.readLong();
                }
            }


            return blockMetadata;
        }


        /**
         * This function seeks to the position where the key data is present in the file
         * in order to get the buffer cache populated with the key-data. This is done as
         * a hint before the user actually queries the data.
         *
         * @param key   the key whose data is being touched
         * @param fData
         */
        public long touch(String key, boolean fData) throws IOException
        {
            long bytesRead = -1L;
            if (isEOF())
                return bytesRead;


            long startPosition = file_.getFilePointer();
            String keyInDisk = file_.readUTF();
            if (keyInDisk != null)
            {
                /*
                 * If key on disk is greater than requested key
                 * we can bail out since we exploit the property
                 * of the SSTable format.
                */
                if (keyInDisk.compareTo(key) > 0)
                    return bytesRead;


                /*
                 * If we found the key then we populate the buffer that
                 * is passed in. If not then we skip over this key and
                 * position ourselves to read the next one.
                */
                int dataSize = file_.readInt();
                if (keyInDisk.equals(key))
                {
                    /* return 0L to signal the key has been touched. */
                    bytesRead = 0L;
                    return bytesRead;
                }
                else
                {
                    /* skip over data portion */
                    file_.seek(dataSize + file_.getFilePointer());
                }


                long endPosition = file_.getFilePointer();
                bytesRead = endPosition - startPosition;
            }


            return bytesRead;
        }


        /**
         * This method seek the disk head to the block index, finds
         * the offset of the key within the block and seeks to that
         * offset.
         *
         * @param key     we are interested in.
         * @param section indicates the location of the block index.
         * @throws IOException
         */
        protected void seekTo(String key, Coordinate section) throws IOException
        {
            /* Goto the Block Index */
            seek(section.end_);
            long position = getPositionFromBlockIndex(key);
            seek(position);
        }


        /**
         * Defreeze the bloom filter.
         *
         * @return bloom filter summarizing the column information
         * @throws IOException
         */
        private BloomFilter defreezeBloomFilter() throws IOException
        {
            int size = file_.readInt();
            byte[] bytes = new byte[size];
            file_.readFully(bytes);
            DataInputBuffer bufIn = new DataInputBuffer();
            bufIn.reset(bytes, bytes.length);
            BloomFilter bf = BloomFilter.serializer().deserialize(bufIn);
            return bf;
        }


        /**
         * Reads the column name indexes if present. If the
         * indexes are based on time then skip over them.
         *
         * @param cfName
         * @return
         */
        private int handleColumnNameIndexes(String cfName, List<IndexHelper.ColumnIndexInfo> columnIndexList) throws IOException
        {
            /* check if we have an index */
            boolean hasColumnIndexes = file_.readBoolean();
            int totalBytesRead = 1;
            /* if we do then deserialize the index */
            if (hasColumnIndexes)
            {
                if (DatabaseDescriptor.isNameSortingEnabled(cfName) || DatabaseDescriptor.getColumnFamilyType(cfName).equals("Super"))
                {
                    /* read the index */
                    totalBytesRead += IndexHelper.deserializeIndex(cfName, file_, columnIndexList);
                }
                else
                {
                    totalBytesRead += IndexHelper.skipIndex(file_);
                }
            }
            return totalBytesRead;
        }


        /**
         * Reads the column name indexes if present. If the
         * indexes are based on time then skip over them.
         *
         * @param cfName
         * @return
         */
        private int handleColumnTimeIndexes(String cfName, List<IndexHelper.ColumnIndexInfo> columnIndexList) throws IOException
        {
            /* check if we have an index */
            boolean hasColumnIndexes = file_.readBoolean();
            int totalBytesRead = 1;
            /* if we do then deserialize the index */
            if (hasColumnIndexes)
            {
                if (DatabaseDescriptor.isTimeSortingEnabled(cfName))
                {
                    /* read the index */
                    totalBytesRead += IndexHelper.deserializeIndex(cfName, file_, columnIndexList);
                }
                else
                {
                    totalBytesRead += IndexHelper.skipIndex(file_);
                }
            }
            return totalBytesRead;
        }


        /**
         * This method dumps the next key/value into the DataOuputStream
         * passed in. Always use this method to query for application
         * specific data as it will have indexes.
         *
         * @param key       key we are interested in.
         * @param bufOut    DataOutputStream that needs to be filled.
         * @param columnFamilyName name of the columnFamily
         * @param columnNames columnNames we are interested in
         * OR
         * @param timeRange time range we are interested in
         * @param section   region of the file that needs to be read
         * @return number of bytes that were read.
         * @throws IOException
         */
        public long next(String key, DataOutputBuffer bufOut, String columnFamilyName, List<String> columnNames, IndexHelper.TimeRange timeRange, Coordinate section) throws IOException
        {
            assert !columnFamilyName.contains(":");
            assert timeRange == null || columnNames == null; // at most one may be non-null


            long bytesRead = -1L;
            if (isEOF())
                return bytesRead;
            seekTo(key, section);
            /* note the position where the key starts */
            long startPosition = file_.getFilePointer();
            String keyInDisk = file_.readUTF();
            if (keyInDisk != null)
            {
                /*
                 * If key on disk is greater than requested key
                 * we can bail out since we exploit the property
                 * of the SSTable format.
                */
                if (keyInDisk.compareTo(key) > 0)
                    return bytesRead;


                /*
                 * If we found the key then we populate the buffer that
                 * is passed in. If not then we skip over this key and
                 * position ourselves to read the next one.
                */
                if (keyInDisk.equals(key))
                {
                    if (timeRange == null) {
                        readColumns(key, bufOut, columnFamilyName, columnNames);
                    } else {
                        readTimeRange(key, bufOut, columnFamilyName, timeRange);
                    }
                }
                else
                {
                    /* skip over data portion */
                    int dataSize = file_.readInt();
                    file_.seek(dataSize + file_.getFilePointer());
                }


                long endPosition = file_.getFilePointer();
                bytesRead = endPosition - startPosition;
            }


            return bytesRead;
        }


        private void readTimeRange(String key, DataOutputBuffer bufOut, String columnFamilyName, IndexHelper.TimeRange timeRange)
                throws IOException
        {
            int dataSize = file_.readInt();


            /* write the key into buffer */
            bufOut.writeUTF(key);


            int bytesSkipped = IndexHelper.skipBloomFilter(file_);
            /*
             * read the correct number of bytes for the column family and
             * write data into buffer. Substract from dataSize the bloom
             * filter size.
            */
            dataSize -= bytesSkipped;
            List<IndexHelper.ColumnIndexInfo> columnIndexList = new ArrayList<IndexHelper.ColumnIndexInfo>();
            /* Read the times indexes if present */
            int totalBytesRead = handleColumnTimeIndexes(columnFamilyName, columnIndexList);
            dataSize -= totalBytesRead;


            /* read the column family name */
            String cfName = file_.readUTF();
            dataSize -= (utfPrefix_ + cfName.length());
            
            /* read local deletion time */
            int localDeletionTime = file_.readInt();
            dataSize -=4;


            /* read if this cf is marked for delete */
            long markedForDeleteAt = file_.readLong();
            dataSize -= 8;


            /* read the total number of columns */
            int totalNumCols = file_.readInt();
            dataSize -= 4;


            /* get the column range we have to read */
            IndexHelper.ColumnRange columnRange = IndexHelper.getColumnRangeFromTimeIndex(timeRange, columnIndexList, dataSize, totalNumCols);


            Coordinate coordinate = columnRange.coordinate();
            /* seek to the correct offset to the data, and calculate the data size */
            file_.skipBytes((int) coordinate.start_);
            dataSize = (int) (coordinate.end_ - coordinate.start_);


            /*
             * write the number of columns in the column family we are returning:
             *  dataSize that we are reading +
             *  length of column family name +
             *  one booleanfor deleted or not +
             *  one int for number of columns
            */
            bufOut.writeInt(dataSize + utfPrefix_ + cfName.length() + 4 + 8 + 4);
            /* write the column family name */
            bufOut.writeUTF(cfName);
            /* write local deletion time */
            bufOut.writeInt(localDeletionTime);
            /* write if this cf is marked for delete */
            bufOut.writeLong(markedForDeleteAt);
            /* write number of columns */
            bufOut.writeInt(columnRange.count());
            /* now write the columns */
            bufOut.write(file_, dataSize);
        }


        private void readColumns(String key, DataOutputBuffer bufOut, String columnFamilyName, List<String> cNames)
                throws IOException
        {
            int dataSize = file_.readInt();


            /* write the key into buffer */
            bufOut.writeUTF(key);


            /* if we need to read the all the columns do not read the column indexes */
            if (cNames == null || cNames.size() == 0)
            {
                int bytesSkipped = IndexHelper.skipBloomFilterAndIndex(file_);
                /*
                       * read the correct number of bytes for the column family and
                       * write data into buffer
                      */
                dataSize -= bytesSkipped;
                /* write the data size */
                bufOut.writeInt(dataSize);
                /* write the data into buffer, except the boolean we have read */
                bufOut.write(file_, dataSize);
            }
            else
            {
                /* Read the bloom filter summarizing the columns */
                long preBfPos = file_.getFilePointer();
                BloomFilter bf = defreezeBloomFilter();
                long postBfPos = file_.getFilePointer();
                dataSize -= (postBfPos - preBfPos);


                List<IndexHelper.ColumnIndexInfo> columnIndexList = new ArrayList<IndexHelper.ColumnIndexInfo>();
                /* read the column name indexes if present */
                int totalBytesRead = handleColumnNameIndexes(columnFamilyName, columnIndexList);
                dataSize -= totalBytesRead;


                /* read the column family name */
                String cfName = file_.readUTF();
                dataSize -= (utfPrefix_ + cfName.length());


                /* read local deletion time */
                int localDeletionTime = file_.readInt();
                dataSize -=4;


                /* read if this cf is marked for delete */
                long markedForDeleteAt = file_.readLong();
                dataSize -= 8;


                /* read the total number of columns */
                int totalNumCols = file_.readInt();
                dataSize -= 4;


                // TODO: this is name sorted - but eventually this should be sorted by the same criteria as the col index
                /* sort the required list of columns */
                cNames = new ArrayList<String>(cNames);
                Collections.sort(cNames);
                /* get the various column ranges we have to read */
                List<IndexHelper.ColumnRange> columnRanges = IndexHelper.getMultiColumnRangesFromNameIndex(cNames, columnIndexList, dataSize, totalNumCols);


                /* calculate the data size */
                int numColsReturned = 0;
                int dataSizeReturned = 0;
                for (IndexHelper.ColumnRange columnRange : columnRanges)
                {
                    numColsReturned += columnRange.count();
                    Coordinate coordinate = columnRange.coordinate();
                    dataSizeReturned += coordinate.end_ - coordinate.start_;
                }


                /*
                 * write the number of columns in the column family we are returning:
                 *   dataSize that we are reading +
                 *   length of column family name +
                 *   one booleanfor deleted or not +
                 *   one int for number of columns
                */
                bufOut.writeInt(dataSizeReturned + utfPrefix_ + cfName.length() + 4 + 8 + 4);
                /* write the column family name */
                bufOut.writeUTF(cfName);
                /* write local deletion time */
                bufOut.writeInt(localDeletionTime);
                /* write if this cf is marked for delete */
                bufOut.writeLong(markedForDeleteAt);
                /* write number of columns */
                bufOut.writeInt(numColsReturned);
                int prevPosition = 0;
                /* now write all the columns we are required to write */
                for (IndexHelper.ColumnRange columnRange : columnRanges)
                {
                    /* seek to the correct offset to the data */
                    Coordinate coordinate = columnRange.coordinate();
                    file_.skipBytes((int) (coordinate.start_ - prevPosition));
                    bufOut.write(file_, (int) (coordinate.end_ - coordinate.start_));
                    prevPosition = (int) coordinate.end_;
                }
            }
        }


        /**
         * This method dumps the next key/value into the DataOuputStream
         * passed in.
         *
         * @param bufOut DataOutputStream that needs to be filled.
         * @return total number of bytes read/considered
         */
        public long next(DataOutputBuffer bufOut) throws IOException
        {
            long bytesRead = -1L;
            if (isEOF())
                return bytesRead;


            long startPosition = file_.getFilePointer();
            String key = file_.readUTF();
            if (key != null)
            {
                /* write the key into buffer */
                bufOut.writeUTF(key);
                int dataSize = file_.readInt();
                /* write data size into buffer */
                bufOut.writeInt(dataSize);
                /* write the data into buffer */
                bufOut.write(file_, dataSize);
                long endPosition = file_.getFilePointer();
                bytesRead = endPosition - startPosition;
            }


            /*
             * If we have read the bloom filter in the data
             * file we know we are at the end of the file 
             * and no further key processing is required. So
             * we return -1 indicating we are at the end of
             * the file. 
            */
            if (key.equals(SequenceFile.marker_))
                bytesRead = -1L;
            return bytesRead;
        }


        /**
         * This method dumps the next key/value into the DataOuputStream
         * passed in.
         *
         * @param key     - key we are interested in.
         * @param bufOut  DataOutputStream that needs to be filled.
         * @param section region of the file that needs to be read
         * @return total number of bytes read/considered
         */
        public long next(String key, DataOutputBuffer bufOut, Coordinate section) throws IOException
        {
            long bytesRead = -1L;
            if (isEOF())
                return bytesRead;


            seekTo(key, section);
            /* note the position where the key starts */
            long startPosition = file_.getFilePointer();
            String keyInDisk = file_.readUTF();
            if (keyInDisk != null)
            {
                /*
                 * If key on disk is greater than requested key
                 * we can bail out since we exploit the property
                 * of the SSTable format.
                */
                if (keyInDisk.compareTo(key) > 0)
                    return bytesRead;


                /*
                 * If we found the key then we populate the buffer that
                 * is passed in. If not then we skip over this key and
                 * position ourselves to read the next one.
                */
                int dataSize = file_.readInt();
                if (keyInDisk.equals(key))
                {
                    /* write the key into buffer */
                    bufOut.writeUTF(keyInDisk);
                    /* write data size into buffer */
                    bufOut.writeInt(dataSize);
                    /* write the data into buffer */
                    bufOut.write(file_, dataSize);
                }
                else
                {
                    /* skip over data portion */
                    file_.seek(dataSize + file_.getFilePointer());
                }


                long endPosition = file_.getFilePointer();
                bytesRead = endPosition - startPosition;
            }


            return bytesRead;
        }
    }


    public static class Reader extends AbstractReader
    {
        Reader(String filename) throws IOException
        {
            super(filename);
            init(filename);
        }


        protected void init(String filename) throws IOException
        {
            file_ = new RandomAccessFile(filename, "r");
        }


        public long getEOF() throws IOException
        {
            return file_.length();
        }


        public long getCurrentPosition() throws IOException
        {
            return file_.getFilePointer();
        }


        public boolean isHealthyFileDescriptor() throws IOException
        {
            return file_.getFD().valid();
        }


        public void seek(long position) throws IOException
        {
            file_.seek(position);
        }


        public boolean isEOF() throws IOException
        {
            return (getCurrentPosition() == getEOF());
        }


        /**
         * Be extremely careful while using this API. This currently
         * used to read the commit log header from the commit logs.
         * Treat this as an internal API.
         *
         * @param bytes read from the buffer into the this array
         */
        public void readDirect(byte[] bytes) throws IOException
        {
            file_.readFully(bytes);
        }


        public long readLong() throws IOException
        {
            return file_.readLong();
        }


        public void close() throws IOException
        {
            file_.close();
        }
    }


    public static class BufferReader extends Reader
    {
        private int size_;


        BufferReader(String filename, int size) throws IOException
        {
            super(filename);
            size_ = size;
        }


        protected void init(String filename) throws IOException
        {
            file_ = new BufferedRandomAccessFile(filename, "r", size_);
        }
    }


    public static class ChecksumReader extends Reader
    {
        private int size_;


        ChecksumReader(String filename, int size) throws IOException
        {
            super(filename);
            size_ = size;
        }


        protected void init(String filename) throws IOException
        {
            file_ = new ChecksumRandomAccessFile(filename, "r", size_);
        }
    }


    private static Logger logger_ = Logger.getLogger(SequenceFile.class);
    public static final short utfPrefix_ = 2;
    public static final String marker_ = "Bloom-Filter";


    public static IFileWriter writer(String filename) throws IOException
    {
        return new Writer(filename);
    }


    public static IFileWriter bufferedWriter(String filename, int size) throws IOException
    {
        return new BufferWriter(filename, size);
    }


    public static IFileWriter fastWriter(String filename, int size) throws IOException
    {
        return new FastConcurrentWriter(filename, size);
    }


    public static IFileReader reader(String filename) throws IOException
    {
        return new Reader(filename);
    }


    public static IFileReader bufferedReader(String filename, int size) throws IOException
    {
        return new BufferReader(filename, size);
    }


    /**
     * Efficiently writes a UTF8 string to the buffer.
     * Assuming all Strings that are passed in have length
     * that can be represented as a short i.e length of the
     * string is <= 65535
     *
     * @param buffer buffer to write the serialize version into
     * @param str    string to serialize
     */
    protected static void writeUTF(ByteBuffer buffer, String str)
    {
        int strlen = str.length();
        int utflen = 0;
        int c, count = 0;


        /* use charAt instead of copying String to char array */
        for (int i = 0; i < strlen; i++)
        {
            c = str.charAt(i);
            if ((c >= 0x0001) && (c <= 0x007F))
            {
                utflen++;
            }
            else if (c > 0x07FF)
            {
                utflen += 3;
            }
            else
            {
                utflen += 2;
            }
        }


        byte[] bytearr = new byte[utflen + 2];
        bytearr[count++] = (byte) ((utflen >>> 8) & 0xFF);
        bytearr[count++] = (byte) ((utflen >>> 0) & 0xFF);


        int i = 0;
        for (i = 0; i < strlen; i++)
        {
            c = str.charAt(i);
            if (!((c >= 0x0001) && (c <= 0x007F)))
                break;
            bytearr[count++] = (byte) c;
        }


        for (; i < strlen; i++)
        {
            c = str.charAt(i);
            if ((c >= 0x0001) && (c <= 0x007F))
            {
                bytearr[count++] = (byte) c;


            }
            else if (c > 0x07FF)
            {
                bytearr[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
                bytearr[count++] = (byte) (0x80 | ((c >> 6) & 0x3F));
                bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
            }
            else
            {
                bytearr[count++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
                bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
            }
        }
        buffer.put(bytearr, 0, utflen + 2);
    }


}
Source Code of org.apache.cassandra.io.SequenceFile

Related Classes of org.apache.cassandra.io.SequenceFile