Package org.commoncrawl.util.shared

Source Code of org.commoncrawl.util.shared.MMapUtils$MMapFile$MMapFileInputStream

package org.commoncrawl.util.shared;

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import java.io.IOException;
import java.io.File;
import java.io.InputStream;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.BufferUnderflowException;
import java.nio.channels.FileChannel;
import java.nio.channels.FileChannel.MapMode;

import java.security.AccessController;
import java.security.PrivilegedExceptionAction;
import java.security.PrivilegedActionException;
import java.lang.reflect.Method;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSInputStream;
import org.apache.hadoop.fs.Seekable;

/**
* some utility classes to do memory mapped io in java
*
* @author rana
*
*/
public class MMapUtils {

 
  static final Log LOG = LogFactory.getLog(MMapUtils.class);
 
  public static final int DEFAULT_MAX_BUFF = 256 * 1024;

  /**
   * <code>true</code>, if this platform supports unmapping mmapped files.
   */
  public static final boolean UNMAP_SUPPORTED;
 
  static {
    boolean v;
    try {
      Class.forName("sun.misc.Cleaner");
      Class.forName("java.nio.DirectByteBuffer")
        .getMethod("cleaner");
      v = true;
    } catch (Exception e) {
      v = false;
    }
    UNMAP_SUPPORTED = v;
  }
 
  /**
   * Returns <code>true</code>, if the unmap workaround is enabled.
   * @see #setUseUnmap
   */
  public static boolean getUseUnmap() {
    return UNMAP_SUPPORTED;
  }
 
  /**
   * Try to unmap the buffer, this method silently fails if no support
   * for that in the JVM. On Windows, this leads to the fact,
   * that mmapped files cannot be modified or deleted.
   */
  final static void cleanMapping(final ByteBuffer buffer) throws IOException {
    if (getUseUnmap()) {
      try {
        AccessController.doPrivileged(new PrivilegedExceptionAction<Object>() {
          public Object run() throws Exception {
            final Method getCleanerMethod = buffer.getClass()
              .getMethod("cleaner");
            getCleanerMethod.setAccessible(true);
            final Object cleaner = getCleanerMethod.invoke(buffer);
            if (cleaner != null) {
              cleaner.getClass().getMethod("clean")
                .invoke(cleaner);
            }
            return null;
          }
        });
      } catch (PrivilegedActionException e) {
        final IOException ioe = new IOException("unable to unmap the mapped buffer");
        ioe.initCause(e.getCause());
        throw ioe;
      }
    }
  }
   

  /**
   * Returns the current mmap chunk size.
   * @see #setMaxChunkSize
   */
  public static int getMaxChunkSize() {
    return DEFAULT_MAX_BUFF;
  }
 
  public static class MMapFile {
   
    long length = -1;
    ByteBuffer buffers[]=null;
    int        bufSizes[] = null;
    private int refCount =0;
    private boolean closePending = false;
   
    public MMapFile(File input) throws IOException {
      RandomAccessFile raf = new RandomAccessFile(input, "r");
      try {
        this.length = raf.length();
       
       
        if ((length / getMaxChunkSize()) > Integer.MAX_VALUE)
          throw new IllegalArgumentException
            ("RandomAccessFile too big for maximum buffer size: "
             + raf.toString());
       
        int nrBuffers = (int) (length / getMaxChunkSize());
        if (((long) nrBuffers * getMaxChunkSize()) <= length) nrBuffers++;
       
        this.buffers = new ByteBuffer[nrBuffers];
        this.bufSizes = new int[nrBuffers];
       
        long bufferStart = 0;
        FileChannel rafc = raf.getChannel();
        for (int bufNr = 0; bufNr < nrBuffers; bufNr++) {
          int bufSize = (length > (bufferStart + getMaxChunkSize()))
            ? getMaxChunkSize()
            : (int) (length - bufferStart);
          this.buffers[bufNr] = rafc.map(MapMode.READ_ONLY,bufferStart,bufSize);
          this.bufSizes[bufNr] = bufSize;
          bufferStart += bufSize;
        }
        LOG.info("Initialized MapFile from file:" + input.getAbsolutePath() " NumBuffers:" + buffers.length + " TotalLength:" + this.length);
      } finally {
        raf.close();
      }
    }
   
    public FSDataInputStream newInputStream() throws IOException {
      FSInputStream stream = new MMapFileInputStream();
      FSDataInputStream dataStream = new FSDataInputStream(stream);
      return dataStream;
    }
   
    private synchronized void addRef() {
      refCount++;
    }
   
    private synchronized void release() {
      if (--refCount == 0 && closePending) {
        try {
          close();
        } catch (IOException e) {
          LOG.error(CCStringUtils.stringifyException(e));
        }
      }
    }
   
    public synchronized void close()throws IOException {
      if (refCount == 0) {
        if (buffers != null) {
          for (ByteBuffer buffer : buffers) {
            cleanMapping(buffer);
          }
        }
        buffers = null;
      }
      else {
        closePending = true;
      }
    }
   
    public long getLength() {
      return length;
    }
   
   
    // Because Java's ByteBuffer uses an int to address the
    // values, it's necessary to access a file >
    // Integer.MAX_VALUE in size using multiple byte buffers.
    public class MMapFileInputStream extends FSInputStream {
   
      private int curBufIndex = 0;
      private final int maxBufSize = getMaxChunkSize();
      private ByteBuffer curBuf; // redundant for speed: buffers[curBufIndex]
     
      public MMapFileInputStream() throws IOException {
        addRef();
        seek(0L);
      }
   
      @Override
      public int read() throws IOException {
        try {
          return curBuf.get() & 0xff;
        } catch (BufferUnderflowException e) {
          curBufIndex++;
          if (curBufIndex >= buffers.length)
            throw new IOException("read past EOF");
          curBuf = buffers[curBufIndex].slice();
          curBuf.position(0);
          return curBuf.get() & 0xff;
        }
      }
   
      @Override
      public int read(byte[] bytes, int offset, int len) throws IOException {
        try {
          curBuf.get(bytes, offset, len);
          return len;
        } catch (BufferUnderflowException e) {
          int bytesRead = 0;
          int curAvail = curBuf.remaining();
          while (len > curAvail) {
            curBuf.get(bytes, offset, curAvail);
            bytesRead += curAvail;
            len -= curAvail;
            offset += curAvail;
            curBufIndex++;
            if (curBufIndex >= buffers.length) {
              return bytesRead;
            }
            curBuf = buffers[curBufIndex].slice();
            curBuf.position(0);
            curAvail = curBuf.remaining();
          }
          curBuf.get(bytes, offset, len);
          return bytesRead + len;
        }
      }
     
      @Override
      public void close() throws IOException {
        release();
      }
     
      @Override
      public int available() throws IOException {
        long amtAvailable = (length() - getPos());
        return (amtAvailable <= Integer.MAX_VALUE) ? (int)amtAvailable : Integer.MAX_VALUE;
      };
   
      @Override
      public void seek(long pos) throws IOException {
        int bufferIndex = (int) (pos / maxBufSize);
        if (curBuf == null || bufferIndex != curBufIndex) {
          curBufIndex = bufferIndex;
          curBuf = buffers[curBufIndex].slice();
        }
        int bufOffset = (int) (pos - ((long) curBufIndex * maxBufSize));
        curBuf.position(bufOffset);
      }
   
      public long length() {
        return length;
      }

      @Override
      public long getPos() throws IOException {
        return ((long) curBufIndex * maxBufSize) + curBuf.position();
      }

      @Override
      public boolean seekToNewSource(long targetPos) throws IOException {
        seek(targetPos);
        return false;
      }

     
      public short readShort() throws IOException {
        try {
          return curBuf.getShort();
        } catch (BufferUnderflowException e) {
          return (short) (((read() & 0xFF) <<  8) (read() & 0xFF));
        }
      }

      public int readInt() throws IOException {
        try {
          return curBuf.getInt();
        } catch (BufferUnderflowException e) {
          return ((read() & 0xFF) << 24) | ((read() & 0xFF) << 16)
          | ((read() & 0xFF) <<  8) (read() & 0xFF);
        }
      }

      public long readLong() throws IOException {
        try {
          return curBuf.getLong();
        } catch (BufferUnderflowException e) {
          return (((long)readInt()) << 32) | (readInt() & 0xFFFFFFFFL);
        }
      }
     
      /** Reads an int stored in variable-length format.  Reads between one and
       * five bytes.  Smaller values take fewer bytes.  Negative numbers are not
       * supported.
       * @see DataOutput#writeVInt(int)
       */
      public int readVInt() throws IOException {
        int b = read();
        int i = b & 0x7F;
        for (int shift = 7; (b & 0x80) != 0; shift += 7) {
          b = read();
          i |= (b & 0x7F) << shift;
        }
        return i;
      }
     
      /** Reads a long stored in variable-length format.  Reads between one and
       * nine bytes.  Smaller values take fewer bytes.  Negative numbers are not
       * supported. */
      public long readVLong() throws IOException {
        int b = read();
        long i = b & 0x7F;
        for (int shift = 7; (b & 0x80) != 0; shift += 7) {
          b = read();
          i |= (b & 0x7FL) << shift;
        }
        return i;
      }

   
    }   
  }
 
}
TOP

Related Classes of org.commoncrawl.util.shared.MMapUtils$MMapFile$MMapFileInputStream

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.