Package org.apache.hadoop.hdfs.server.datanode

Source Code of org.apache.hadoop.hdfs.server.datanode.BlockSender

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.datanode;

import java.io.DataOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.SocketException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;

import org.apache.commons.logging.Log;
import org.apache.hadoop.fs.FSDataNodeReadProfilingData;

import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.DataTransferProtocol;
import org.apache.hadoop.hdfs.protocol.FSConstants;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.net.SocketOutputStream;
import org.apache.hadoop.util.DataChecksum;
import org.apache.hadoop.util.DataTransferThrottler;
import org.apache.hadoop.util.Progressable;

/**
* Reads a block from the disk and sends it to a recipient.
*/
public class BlockSender implements java.io.Closeable, FSConstants {
  public static final Log LOG = DataNode.LOG;
  static final Log ClientTraceLog = DataNode.ClientTraceLog;
 
  private DataChecksum checksum; // checksum stream
  private long offset; // starting position to read
  private long endOffset; // ending position
  private long blockLength;
  private int bytesPerChecksum; // chunk size
  private int checksumSize; // checksum size
  private boolean chunkOffsetOK; // if need to send chunk offset
  private long seqno; // sequence number of packet

  private boolean transferToAllowed = true;
  private boolean pktIncludeVersion = false;
  private int packetVersion;
  // set once entire requested byte range has been sent to the client
  private boolean sentEntireByteRange;
  private boolean verifyChecksum; //if true, check is verified while reading
  private DataTransferThrottler throttler;
  private String clientTraceFmt; // format of client trace log message
  private DatanodeBlockReader blockReader;

  private BlockCrcUpdater crcUpdater = null;
 
  final ReplicaToRead replicaToRead;

  /**
   * Minimum buffer used while sending data to clients. Used only if
   * transferTo() is enabled. 64KB is not that large. It could be larger, but
   * not sure if there will be much more improvement.
   */
  private static final int MIN_BUFFER_WITH_TRANSFERTO = 64*1024;

 
  BlockSender(int namespaceId, Block block, long startOffset, long length,
              boolean corruptChecksumOk, boolean chunkOffsetOK,
              boolean verifyChecksum, DataNode datanode) throws IOException {
    this(namespaceId, block, startOffset, length, false, corruptChecksumOk,
        chunkOffsetOK, verifyChecksum, false, true, datanode, null);
  }

  BlockSender(int namespaceId, Block block, long startOffset, long length,
      boolean ignoreChecksum, boolean corruptChecksumOk, boolean chunkOffsetOK,
      boolean verifyChecksum, boolean pktIncludeVersion, boolean forceOldPktVersion, DataNode datanode,
      String clientTraceFmt) throws IOException {
   
    replicaToRead = datanode.data.getReplicaToRead(namespaceId, block);
    if (replicaToRead == null) {
      throw new IOException("Can't find block " + block + " in volumeMap");
    }

    long blockLength = replicaToRead.getBytesVisible();
    boolean transferToAllowed = datanode.transferToAllowed;
   
    DatanodeBlockReader.BlockInputStreamFactory streamFactory =
      new DatanodeBlockReader.BlockInputStreamFactory(
        namespaceId, block, replicaToRead, datanode, datanode.data, ignoreChecksum,
        verifyChecksum, corruptChecksumOk);
    blockReader = streamFactory.getBlockReader();

    initialize(namespaceId, block, blockLength, startOffset, length,
        corruptChecksumOk, chunkOffsetOK, verifyChecksum, transferToAllowed,
        datanode.updateBlockCrcWhenRead, pktIncludeVersion, forceOldPktVersion,
        streamFactory, clientTraceFmt);
  }
 
  public BlockSender(int namespaceId, Block block, long blockLength, long startOffset, long length,
              boolean corruptChecksumOk, boolean chunkOffsetOK,
              boolean verifyChecksum, boolean transferToAllowed, boolean pktIncludeVersion,
              BlockWithChecksumFileReader.InputStreamWithChecksumFactory streamFactory
              ) throws IOException {

    replicaToRead = null;
    blockReader = new BlockWithChecksumFileReader(namespaceId, block, true,
        false, verifyChecksum, corruptChecksumOk, streamFactory);

    initialize(namespaceId, block, blockLength, startOffset, length,
        corruptChecksumOk, chunkOffsetOK, verifyChecksum, transferToAllowed,
        false, pktIncludeVersion, true, streamFactory, null);
  }

  private void initialize(int namespaceId, Block block, long blockLength,
      long startOffset, long length, boolean corruptChecksumOk,
      boolean chunkOffsetOK, boolean verifyChecksum, boolean transferToAllowed,
      boolean allowUpdateBlocrCrc, boolean pktIncludeVersion, boolean forceOldPktVersion,
      BlockWithChecksumFileReader.InputStreamWithChecksumFactory streamFactory,
      String clientTraceFmt) throws IOException {
    try {
      this.chunkOffsetOK = chunkOffsetOK;
      this.verifyChecksum = verifyChecksum;
      this.blockLength = blockLength;
      this.transferToAllowed = transferToAllowed;
      this.clientTraceFmt = clientTraceFmt;
      this.pktIncludeVersion = pktIncludeVersion;
     
      if (this.pktIncludeVersion && ! forceOldPktVersion) {
        this.packetVersion = blockReader.getPreferredPacketVersion();
      } else {
        this.packetVersion = DataTransferProtocol.PACKET_VERSION_CHECKSUM_FIRST;
      }
     
      checksum = blockReader.getChecksumToSend(blockLength);
     
      bytesPerChecksum = blockReader.getBytesPerChecksum();
      checksumSize = blockReader.getChecksumSize();
     
      if (length < 0) {
        length = blockLength;
      }

      endOffset = blockLength;
      if (startOffset < 0 || startOffset > endOffset
          || (length + startOffset) > endOffset) {
        String msg = " Offset " + startOffset + " and length " + length
        + " don't match block " + block + " ( blockLen " + endOffset + " )";
        LOG.warn("sendBlock() : " + msg);
        throw new IOException(msg);
      }

     
      offset = (startOffset - (startOffset % bytesPerChecksum));
      if (length >= 0) {
        // Make sure endOffset points to end of a checksumed chunk.
        long tmpLen = startOffset + length;
        if (tmpLen % bytesPerChecksum != 0) {
          tmpLen += (bytesPerChecksum - tmpLen % bytesPerChecksum);
        }
        if (tmpLen < endOffset) {
          endOffset = tmpLen;
        }
      }
     
      // Recalculate block CRC if:
      // 1. it is configured to be allowed;
      // 2. the block is finalized
      // 3. the full block is to be read
      // 4. there is no Block CRC already cached
      // 5. the block format is CRC32 and checksum size is 4
      if (allowUpdateBlocrCrc &&
          (!transferToAllowed || verifyChecksum)
          && startOffset == 0
          && length >= blockLength
          && replicaToRead != null
          && !replicaToRead.hasBlockCrcInfo()
          && replicaToRead.isFinalized()
          && replicaToRead instanceof DatanodeBlockInfo
          && checksumSize == DataChecksum.DEFAULT_CHECKSUM_SIZE
          && checksum != null
          && (checksum.getChecksumType() == DataChecksum.CHECKSUM_CRC32 || checksum
              .getChecksumType() == DataChecksum.CHECKSUM_CRC32C)) {
        // Needs to recalculate block CRC
        crcUpdater = new BlockCrcUpdater(bytesPerChecksum, true);
      }
     
      seqno = 0;
     
      blockReader.initialize(offset, blockLength);
    } catch (IOException ioe) {
      IOUtils.closeStream(this);
      throw ioe;
    }
  }

  public void fadviseStream(int advise, long offset, long len)
      throws IOException {
    blockReader.fadviseStream(advise, offset, len);
  }

  public ReplicaToRead getReplicaToRead() {
    return replicaToRead;
  }

  /**
   * close opened files.
   */
  public void close() throws IOException {
    if (blockReader != null) {
      blockReader.close();
    }
  }

  /**
   * Converts an IOExcpetion (not subclasses) to SocketException.
   * This is typically done to indicate to upper layers that the error
   * was a socket error rather than often more serious exceptions like
   * disk errors.
   */
  static IOException ioeToSocketException(IOException ioe) {
    if (ioe.getClass().equals(IOException.class)) {
      // "se" could be a new class in stead of SocketException.
      IOException se = new SocketException("Original Exception : " + ioe);
      se.initCause(ioe);
      /* Change the stacktrace so that original trace is not truncated
       * when printed.*/
      se.setStackTrace(ioe.getStackTrace());
      return se;
    }
    // otherwise just return the same exception.
    return ioe;
  }
 
  public void enableReadProfiling(FSDataNodeReadProfilingData dnData) {
    blockReader.enableReadProfiling(dnData);
  }

  /**
   * Sends upto maxChunks chunks of data.
   *
   * When blockInPosition is >= 0, assumes 'out' is a
   * {@link SocketOutputStream} and tries
   * {@link SocketOutputStream#transferToFully(FileChannel, long, int)} to
   * send data (and updates blockInPosition).
   */
  private int sendChunks(ByteBuffer pkt, int maxChunks, OutputStream out)
                         throws IOException {
    // Sends multiple chunks in one packet with a single write().

    int len = (int) Math.min(endOffset - offset,
                            (((long) bytesPerChecksum) * ((long) maxChunks)));

    // truncate len so that any partial chunks will be sent as a final packet.
    // this is not necessary for correctness, but partial chunks are
    // ones that may be recomputed and sent via buffer copy, so try to minimize
    // those bytes
    if (len > bytesPerChecksum && len % bytesPerChecksum != 0) {
      len -= len % bytesPerChecksum;
    }

    if (len == 0) {
      return 0;
    }

    int numChunks = (len + bytesPerChecksum - 1)/bytesPerChecksum;
    int packetLen = len + numChunks*checksumSize + 4;
    pkt.clear();

    // The packet format is documented in DFSOuputStream.Packet.getBuffer().
    // Here we need to use the exact packet format since it can be received
    // by both of DFSClient, or BlockReceiver in the case of replication, which
    // uses the same piece of codes as receiving data from DFSOutputStream.
    //

    // write packet header
    pkt.putInt(packetLen);
    if (pktIncludeVersion) {
      pkt.putInt(packetVersion);
    }
    pkt.putLong(offset);
    pkt.putLong(seqno);
    pkt.put((byte)((offset + len >= endOffset) ? 1 : 0));
               //why no ByteBuf.putBoolean()?
    pkt.putInt(len);
   
    int checksumOff = pkt.position();
    byte[] buf = pkt.array();
   
    blockReader.sendChunks(out, buf, offset, checksumOff,
        numChunks, len, crcUpdater, packetVersion);
   
    if (throttler != null) { // rebalancing so throttle
      throttler.throttle(packetLen);
    }
   
    return len;
  }
 
  private int getPacketHeaderLen() {
    return DataNode.getPacketHeaderLen(pktIncludeVersion);
  }

  /**
   * sendBlock() is used to read block and its metadata and stream the data to
   * either a client or to another datanode.
   *
   * @param out  stream to which the block is written to
   * @param baseStream optional. if non-null, <code>out</code> is assumed to
   *        be a wrapper over this stream. This enables optimizations for
   *        sending the data, e.g.
   *        {@link SocketOutputStream#transferToFully(FileChannel,
   *        long, int)}.
   * @param throttler for sending data.
   * @return total bytes reads, including crc.
   */
  public long sendBlock(DataOutputStream out, OutputStream baseStream,
                 DataTransferThrottler throttler) throws IOException {
    return sendBlock(out, baseStream, throttler, null);
  }

  /**
   * sendBlock() is used to read block and its metadata and stream the data to
   * either a client or to another datanode.
   *
   * @param out  stream to which the block is written to
   * @param baseStream optional. if non-null, <code>out</code> is assumed to
   *        be a wrapper over this stream. This enables optimizations for
   *        sending the data, e.g.
   *        {@link SocketOutputStream#transferToFully(FileChannel,
   *        long, int)}.
   * @param throttler for sending data.
   * @param progress for signalling progress.
   * @return total bytes reads, including crc.
   */
  public long sendBlock(DataOutputStream out, OutputStream baseStream,
       DataTransferThrottler throttler, Progressable progress) throws IOException {
    if( out == null ) {
      throw new IOException( "out stream is null" );
    }
    this.throttler = throttler;

    long initialOffset = offset;
    long totalRead = 0;
    OutputStream streamForSendChunks = out;
   
    final long startTime = ClientTraceLog.isInfoEnabled() ? System.nanoTime() : 0;
    try {
      try {
        checksum.writeHeader(out);
        if ( chunkOffsetOK ) {
          out.writeLong( offset );
        }
        out.flush();
      } catch (IOException e) { //socket error
        throw ioeToSocketException(e);
      }
     
      int maxChunksPerPacket;
      int pktSize = SIZE_OF_INTEGER + getPacketHeaderLen();
     
      if (transferToAllowed && !verifyChecksum &&
          baseStream instanceof SocketOutputStream &&
          blockReader.prepareTransferTo()) {
        streamForSendChunks = baseStream;
       
        // assure a mininum buffer size.
        maxChunksPerPacket = (Math.max(BUFFER_SIZE,
                                       MIN_BUFFER_WITH_TRANSFERTO)
                              + bytesPerChecksum - 1)/bytesPerChecksum;
       
        // packet buffer has to be able to do a normal transfer in the case
        // of recomputing checksum
        pktSize += (bytesPerChecksum + checksumSize) * maxChunksPerPacket;
      } else {
        maxChunksPerPacket = Math.max(1,
                 (BUFFER_SIZE + bytesPerChecksum - 1)/bytesPerChecksum);
        pktSize += (bytesPerChecksum + checksumSize) * maxChunksPerPacket;
      }

      ByteBuffer pktBuf = ByteBuffer.allocate(pktSize);

      while (endOffset > offset) {
        long len = sendChunks(pktBuf, maxChunksPerPacket,
                              streamForSendChunks);
        if (progress != null) {
          progress.progress();
        }
        offset += len;
        totalRead += len + ((len + bytesPerChecksum - 1)/bytesPerChecksum*
                            checksumSize);
        seqno++;
      }
      try {
        out.writeInt(0); // mark the end of block       
        out.flush();
      } catch (IOException e) { //socket error
        throw ioeToSocketException(e);
      }
     
      sentEntireByteRange = true;
    }
    catch (RuntimeException e) {
      LOG.error("unexpected exception sending block", e);
     
      throw new IOException("unexpected runtime exception", e);
    }
    finally {
      if (clientTraceFmt != null) {
        final long endTime = System.nanoTime();
        ClientTraceLog.info(String.format(clientTraceFmt, totalRead, initialOffset, endTime - startTime));
      }
      close();
    }
   
    if (crcUpdater != null && crcUpdater.isCrcValid(offset)
        && !replicaToRead.hasBlockCrcInfo()) {
      int blockCrcOffset = crcUpdater.getBlockCrcOffset();
      int blockCrc = crcUpdater.getBlockCrc();
      if (DataNode.LOG.isDebugEnabled()) {
        DataNode.LOG.debug("Setting block CRC " + replicaToRead + " offset "
            + blockCrcOffset + " CRC " + blockCrc);
      }
      ((DatanodeBlockInfo) replicaToRead).setBlockCrc(blockCrcOffset, blockCrc);
    }

    return totalRead;
  }
 
  boolean didSendEntireByteRange() {
    return sentEntireByteRange;
  }

  public static interface InputStreamFactory {
    public BlockDataFile.Reader getBlockDataFileReader() throws IOException;
  }
}
TOP

Related Classes of org.apache.hadoop.hdfs.server.datanode.BlockSender

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.