Package org.apache.hadoop.hdfs.server.namenode

Source Code of org.apache.hadoop.hdfs.server.namenode.BlockPlacementPolicyRaid

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.namenode;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.server.namenode.BlockPlacementPolicyDefault;
import org.apache.hadoop.hdfs.server.namenode.BlockPlacementPolicy.NotEnoughReplicasException;
import org.apache.hadoop.hdfs.util.InjectionEvent;
import org.apache.hadoop.net.DNSToSwitchMapping;
import org.apache.hadoop.net.NetworkTopology;
import org.apache.hadoop.net.Node;
import org.apache.hadoop.raid.DirectoryStripeReader.BlockInfo;
import org.apache.hadoop.raid.RaidNode;
import org.apache.hadoop.raid.Codec;
import org.apache.hadoop.util.HostsFileReader;
import org.apache.hadoop.util.InjectionHandler;
import org.apache.hadoop.util.StringUtils;

/**
* This BlockPlacementPolicy uses a simple heuristic, random placement of
* the replicas of a newly-created block, for the purpose of spreading out the
* group of blocks which used by RAID for recovering each other.
* This is important for the availability of the blocks.
*
* Replication of an existing block continues to use the default placement
* policy.
*
* This simple block placement policy does not guarantee that
* blocks on the RAID stripe are on different nodes. However, BlockMonitor
* will periodically scans the raided files and will fix the placement
* if it detects violation.
*
* This class can be used by multiple threads. It has to be thread safe.
*/
public class BlockPlacementPolicyRaid extends BlockPlacementPolicyDefault {
  public static final Log LOG =
    LogFactory.getLog(BlockPlacementPolicyRaid.class);
  Configuration conf;
  private FSNamesystem namesystem = null;

  private CachedLocatedBlocks cachedLocatedBlocks;
  private CachedFullPathNames cachedFullPathNames;
  private long minFileSize = RaidNode.MINIMUM_RAIDABLE_FILESIZE;

  /** {@inheritDoc} */
  @Override
  public void initialize(Configuration conf,  FSClusterStats stats,
                         NetworkTopology clusterMap, HostsFileReader hostsReader,
                         DNSToSwitchMapping dnsToSwitchMapping, FSNamesystem namesystem) {
    super.initialize(conf, stats, clusterMap,
                     hostsReader, dnsToSwitchMapping, namesystem);
    this.conf = conf;
    this.minFileSize = conf.getLong(RaidNode.MINIMUM_RAIDABLE_FILESIZE_KEY,
        RaidNode.MINIMUM_RAIDABLE_FILESIZE);
    this.namesystem = namesystem;
    this.cachedLocatedBlocks = new CachedLocatedBlocks(conf);
    this.cachedFullPathNames = new CachedFullPathNames(conf);
  }

  @Override
  public DatanodeDescriptor[] chooseTarget(String srcPath, int numOfReplicas,
      DatanodeDescriptor writer, List<DatanodeDescriptor> chosenNodes,
      long blocksize) {
    return chooseTarget(srcPath, numOfReplicas, writer, chosenNodes, null,
        blocksize);
  }

  @Override
  protected void place3rdReplicaForInClusterWriter(
      HashMap<Node, Node> excludedNodes, long blocksize,
      int maxNodesPerRack,List<DatanodeDescriptor> results
      ) throws NotEnoughReplicasException {
    if (results.size() > 2) {
      return;
    }
    HashSet<String> excludedRacks = new HashSet<String>();
    for (DatanodeDescriptor node : results) {
      String rack = node.getNetworkLocation();
      excludedRacks.add(rack);
    }
   
    do {
      String remoteRack = clusterMap.chooseRack(excludedRacks);
      if (remoteRack == null) { // no more remote rack available
        // choose a node on the rack where the first replica is located
        chooseLocalRack(
            results.get(0), excludedNodes, blocksize, maxNodesPerRack, results);
        return;
      }
      // a remote rack is chosen
      try {
        excludedRacks.add(remoteRack);
        chooseRandom(1, remoteRack, excludedNodes, blocksize,
            maxNodesPerRack, results);
        return;
      } catch (NotEnoughReplicasException ne) {
        // try again until all remote tracks are exhausted
      }
    } while (true);
  }
   

  @Override
  public DatanodeDescriptor[] chooseTarget(String srcPath, int numOfReplicas,
      DatanodeDescriptor writer, List<DatanodeDescriptor> chosenNodes,
      List<Node> exlcNodes, long blocksize) {
    try {
      FileInfo info = getFileInfo(null, srcPath);
      if (LOG.isDebugEnabled()) {
        LOG.debug("FileType:" + srcPath + " " + info.type.name());
      }
      if (info.type == FileType.NOT_RAID) {
        return super.chooseTarget(
            srcPath, numOfReplicas, writer, chosenNodes, exlcNodes, blocksize);
      }
      ArrayList<DatanodeDescriptor> results = new ArrayList<DatanodeDescriptor>();
      HashMap<Node, Node> excludedNodes = new HashMap<Node, Node>();
      if (exlcNodes != null) {
        for (Node node: exlcNodes) {
          excludedNodes.put(node, node);
        }
      }
      for (Node node:chosenNodes) {
        excludedNodes.put(node, node);
      }
      chooseRandom(numOfReplicas, Path.SEPARATOR, excludedNodes, blocksize,
          1, results);
      return results.toArray(new DatanodeDescriptor[results.size()]);
    } catch (Exception e) {
      FSNamesystem.LOG.debug(
        "Error happend when choosing datanode to write:" +
        StringUtils.stringifyException(e));
      return super.chooseTarget(srcPath, numOfReplicas, writer,
                                chosenNodes, blocksize);
    }
  }

  /** {@inheritDoc} */
  @Override
  public DatanodeDescriptor chooseReplicaToDelete(FSInodeInfo inode,
      Block block, short replicationFactor,
      Collection<DatanodeDescriptor> first,
      Collection<DatanodeDescriptor> second) {

    DatanodeDescriptor chosenNode = null;
    try {
      String path = getFullPathName(inode);
      FileInfo info = getFileInfo(inode, path);
      if (info.type == FileType.NOT_RAID) {
        return super.chooseReplicaToDelete(
            inode, block, replicationFactor, first, second);
      }
      List<LocatedBlock> companionBlocks =
          getCompanionBlocks(path, info, block, inode);
      if (companionBlocks == null || companionBlocks.size() == 0) {
        // Use the default method if it is not a valid raided or parity file
        return super.chooseReplicaToDelete(
            inode, block, replicationFactor, first, second);
      }
      // Delete from the first collection first
      // This ensures the number of unique rack of this block is not reduced
      Collection<DatanodeDescriptor> all = new HashSet<DatanodeDescriptor>();
      all.addAll(first);
      all.addAll(second);
      chosenNode = chooseReplicaToDelete(companionBlocks, all);
      if (chosenNode != null) {
        return chosenNode;
      }
      return super.chooseReplicaToDelete(
          inode, block, replicationFactor, first, second);
    } catch (Exception e) {
      LOG.debug("Failed to choose the correct replica to delete", e);
      return super.chooseReplicaToDelete(
          inode, block, replicationFactor, first, second);
    }
  }

  private DatanodeDescriptor chooseReplicaToDelete(
      Collection<LocatedBlock> companionBlocks,
      Collection<DatanodeDescriptor> dataNodes) throws IOException {

    if (dataNodes.isEmpty()) {
      return null;
    }
    // Count the number of replicas on each node and rack
    final Map<String, Integer>[] companionBlockCounts = countCompanionBlocks(companionBlocks);
    final Map<String, Integer> nodeCompanionBlockCount =
        companionBlockCounts[0];
    final Map<String, Integer> rackCompanionBlockCount =
        companionBlockCounts[1];

    NodeComparator comparator =
      new NodeComparator(nodeCompanionBlockCount, rackCompanionBlockCount);
    return Collections.max(dataNodes, comparator);
  }

  /**
   * Count how many companion blocks are on each datanode or the each rack
   * @param companionBlocks a collection of all the companion blocks
   * @param result the map from node name to the number of companion blocks
   * [0] for datanodes [1] for racks
   */
  @SuppressWarnings("unchecked")
  static Map<String, Integer>[] countCompanionBlocks(
      Collection<LocatedBlock> companionBlocks) {
    Map<String, Integer>[] result = new HashMap[2];
    result[0] = new HashMap<String, Integer>();
    result[1] = new HashMap<String, Integer>();
   
    for (LocatedBlock block : companionBlocks) {
      for (DatanodeInfo d : block.getLocations()) {
        // count the companion blocks on the datanodes
        String name = d.getName();
        Integer currentCount = result[0].get(name);
        result[0].put(name, currentCount == null ? 1 : currentCount + 1);
       
        // count the companion blocks on the racks of datanodes
        name = d.getParent().getName();
        currentCount = result[1].get(name);
        result[1].put(name, currentCount == null ? 1 : currentCount + 1);
      }
    }
    return result;
  }

  /**
   * Compares the datanodes based on the number of companion blocks on the same
   * node and rack. If even, compare the remaining space on the datanodes.
   */
  class NodeComparator implements Comparator<DatanodeDescriptor> {
    private Map<String, Integer> nodeBlockCount;
    private Map<String, Integer> rackBlockCount;
    private NodeComparator(Map<String, Integer> nodeBlockCount,
                           Map<String, Integer> rackBlockCount) {
      this.nodeBlockCount = nodeBlockCount;
      this.rackBlockCount = rackBlockCount;
    }
    @Override
    public int compare(DatanodeDescriptor d1, DatanodeDescriptor d2) {
      int res = compareBlockCount(d1, d2, nodeBlockCount);
      if (res != 0) {
        return res;
      }
      res = compareBlockCount(d1.getParent(), d2.getParent(), rackBlockCount);
      if (res != 0) {
        return res;
      }
      if (d1.getRemaining() > d2.getRemaining()) {
        return -1;
      }
      if (d1.getRemaining() < d2.getRemaining()) {
        return 1;
      }
      return 0;
    }
    private int compareBlockCount(Node node1, Node node2,
                                  Map<String, Integer> blockCount) {
      Integer count1 = blockCount.get(node1.getName());
      Integer count2 = blockCount.get(node2.getName());
      count1 = count1 == null ? 0 : count1;
      count2 = count2 == null ? 0 : count2;
      if (count1 > count2) {
        return 1;
      }
      if (count1 < count2) {
        return -1;
      }
      return 0;
    }
  }

  /**
   * Obtain the companion blocks of the give block
   * Companion blocks are defined as the blocks that can help recover each
   * others by using raid decoder.
   * @param path The path of the file contains the block
   * @param info The info of this file
   * @param block The given block
   *              null if it is the block which is currently being written to
   * @param inode the inode of the path file
   * @return the block locations of companion blocks
   */
  List<LocatedBlock> getCompanionBlocks(String path, FileInfo info, Block block, FSInodeInfo inode)
      throws IOException {
    Codec codec = info.codec;
    switch (info.type) {
      case NOT_RAID:
        return Collections.emptyList();
      case HAR_TEMP_PARITY:
        return getCompanionBlocksForHarParityBlock(
            path, codec.parityLength, block, inode);
      case TEMP_PARITY:
        NameWithINode ni = getSourceFile(path, codec.tmpParityDirectory);
        return getCompanionBlocksForParityBlock(
            ni.name,
            path, codec.parityLength, codec.stripeLength, block,
            codec.isDirRaid, ni.inode, inode);
      case PARITY:
        ni = getSourceFile(path, codec.parityDirectory);
        return getCompanionBlocksForParityBlock(
            ni.name,
            path, codec.parityLength, codec.stripeLength, block,
            codec.isDirRaid, ni.inode, inode);
      case SOURCE:
        return getCompanionBlocksForSourceBlock(
            path,
            info.parityName,
            codec.parityLength, codec.stripeLength, block,
            codec.isDirRaid, inode, info.parityInode);
    }
    return Collections.emptyList();
  }

  private List<LocatedBlock> getCompanionBlocksForHarParityBlock(
      String parity, int parityLength, Block block, FSInodeInfo inode)
      throws IOException {
    int blockIndex = getBlockIndex(parity, block, inode, true);
    List<LocatedBlock> parityBlocks = getLocatedBlocks(parity, inode);
    // consider only parity file in this case because source file block
    // location is not easy to obtain
    List<LocatedBlock> result = new ArrayList<LocatedBlock>();
    int start = Math.max(0, blockIndex - parityLength + 1);
    int end = Math.min(parityBlocks.size(), blockIndex + parityLength);
    result.addAll(parityBlocks.subList(start, end));
    return result;
  }

  private void addCompanionParityBlocks(String parity, INodeFile pinode,
      int stripeIndex, int parityLength, List<LocatedBlock> blocks)
          throws IOException {
    if (pinode == null)
      return;
    long parityStartOffset = stripeIndex * parityLength *
       pinode.getPreferredBlockSize();
    long parityFileSize = namesystem.dir.getFileSize(pinode);
    // for parity, always consider the neighbor blocks as companion blocks
    if (parityStartOffset < parityFileSize) {
      blocks.addAll(getLocatedBlocks(pinode, parityStartOffset,
          parityLength * pinode.getPreferredBlockSize()));
    }
  }

  String getFullPathName(FSInodeInfo inode) throws IOException {
    String path = cachedFullPathNames.get(inode);
    if (path != null) {
      InjectionHandler
          .processEvent(InjectionEvent.BLOCKPLACEMENTPOLICYRAID_CACHED_PATH);
      return path;
    }
    byte[][] names = null;
    namesystem.readLock();
    try {
      names = FSDirectory.getINodeByteArray((INode)inode);
    } finally {
      namesystem.readUnlock();
    }
    path = FSDirectory.getFullPathName(names);
    cachedFullPathNames.put(inode, path);
    return path;  
  }
 
  List<LocatedBlock> getLocatedBlocks(String file, FSInodeInfo f)
      throws IOException {
    List<LocatedBlock> blocks = cachedLocatedBlocks.get(file);
    if (blocks != null) {
      InjectionHandler
          .processEvent(InjectionEvent.BLOCKPLACEMENTPOLICYRAID_CACHED_BLOCKS);
      return blocks;
    }
    // otherwise populate cache
    INodeFile inode = (INodeFile) f;
    // Note that the list is generated. It is not the internal data of inode.
    List<LocatedBlock> result = inode == null ? new ArrayList<LocatedBlock>()
        : namesystem.getBlockLocationsInternal(inode, 0, Long.MAX_VALUE,
            Integer.MAX_VALUE).getLocatedBlocks();
    if (result == null) {
      result = Collections.emptyList();
    } else {
      result = Collections.unmodifiableList(result);
    }
    cachedLocatedBlocks.put(file, result);
    return result;
  }
 
  public List<LocatedBlock> getLocatedBlocks(INodeFile inode, long offset,
      long length)
      throws IOException {
    // Note that the list is generated. It is not the internal data of inode.
    List<LocatedBlock> result = inode == null ?
        new ArrayList<LocatedBlock>() :
        namesystem.getBlockLocationsInternal(inode, offset, length,
                                     Integer.MAX_VALUE).getLocatedBlocks();
    if (result == null) {
      return Collections.emptyList();
    }
    return Collections.unmodifiableList(result);
  }

  private List<LocatedBlock> getCompanionBlocksForParityBlock(
      String src, String parity, int parityLength, int stripeLength,
      Block block, boolean isDirRaid, FSInodeInfo srcinode, FSInodeInfo pinode)
      throws IOException {
    int blockIndex = getBlockIndex(parity, block, pinode, false);
    int stripeIndex = blockIndex / parityLength;

    List<LocatedBlock> result = new ArrayList<LocatedBlock>();
    addCompanionParityBlocks(parity, (INodeFile)pinode, stripeIndex,
        parityLength, result);
    if (src == null) {
      return result;
    }

    // get the source blocks.
    List<LocatedBlock> sourceBlocks;
    int sourceStart = stripeIndex * stripeLength;
    int sourceEnd = sourceStart + stripeLength;

    if (!isDirRaid) {
      sourceBlocks = getLocatedBlocks(src, srcinode);
    } else {
      sourceBlocks = new ArrayList<LocatedBlock>();
      INode inode = (INode) srcinode;
      INodeDirectory srcNode;
      if (inode.isDirectory()) {
        srcNode = (INodeDirectory) inode;
      } else {
        throw new IOException(
            "The source should be a directory in Dir-Raiding: " + src);
      }

      boolean found = false;
      String srcPath = src + Path.SEPARATOR;
      // look for the stripe
      namesystem.readLock();
      namesystem.dir.readLock();
      try {
        for (INode child : srcNode.getChildren()) {
          if (child.isDirectory()) {
            throw new IOException("The source is not a leaf directory: " + src
                + ", contains a subdirectory: " + child.getLocalName());
          }
          INodeFile childInode = (INodeFile)child;
          long fileSize = namesystem.dir.getFileSize(childInode);
          // check if we will do dir-raid on this file
          if (fileSize < minFileSize) {
            continue;
          }
          int numBlocks = childInode.getBlocks().length;

          if (numBlocks < sourceStart && !found) {
            sourceStart -= numBlocks;
            sourceEnd -= numBlocks;
            continue;
          } else {
            String childName = srcPath + child.getLocalName();
            List<LocatedBlock> childBlocks = getLocatedBlocks(childName, child);
            found = true;
            sourceBlocks.addAll(childBlocks);
            if (sourceEnd <= sourceBlocks.size()) {
              break;
            }
          }
        }
      } finally {
        namesystem.dir.readUnlock();
        namesystem.readUnlock();
      }
    }

    sourceEnd = Math.min(sourceEnd,
        sourceBlocks.size());
    if (sourceStart < sourceBlocks.size()) {
      result.addAll(sourceBlocks.subList(sourceStart, sourceEnd));
    }
    return result;
  }

  private List<LocatedBlock> getCompanionBlocksForSourceBlock(
      String src, String parity, int parityLength, int stripeLength,
      Block block, boolean isDirRaid, FSInodeInfo inode, FSInodeInfo parityInode)
      throws IOException {
    List<LocatedBlock> result = new ArrayList<LocatedBlock>();
    List<LocatedBlock> sourceBlocks = null;
    int blockIndex = getBlockIndex(src, block, inode, true);
    int stripeIndex = 0;
    int sourceStart = 0;
    int sourceEnd = 0;

    if (!isDirRaid) {
      sourceBlocks = getLocatedBlocks(src, inode);
      stripeIndex = blockIndex / stripeLength;
      sourceStart = stripeIndex * stripeLength;
      sourceEnd = Math.min(sourceStart + stripeLength, sourceBlocks.size());
    } else {
      // cache the candidate blocks.
      BlockInfo[] tmpStripe = new BlockInfo[stripeLength];
      for (int i = 0; i < stripeLength; i++) {
        tmpStripe[i] = new BlockInfo(0, 0);
      }
      int curIdx = 0;
      boolean found = false;

      sourceBlocks = new ArrayList<LocatedBlock>();
      byte[][] components = INodeDirectory.getPathComponents(src);
      INodeDirectory srcNode = namesystem.dir.getINode(components).getParent();
      String parentPath = getParentPath(src);
      if (!parentPath.endsWith(Path.SEPARATOR)) {
        parentPath += Path.SEPARATOR;
      }
     
      namesystem.readLock();
      namesystem.dir.readLock();
      try {
        List<INode> children = srcNode.getChildren();
        // look for the stripe
        for (int fid = 0; fid < children.size(); fid++) {
          INode child = children.get(fid);
          if (child.isDirectory()) {
            throw new IOException("The raided-directory is not a leaf directory: "
                + parentPath +
                ", contains a subdirectory: " + child.getLocalName());
          }
          INodeFile childInode = (INodeFile)child;

          long fileSize = namesystem.dir.getFileSize(childInode);
          // check if we will do dir-raid on this file
          if (fileSize < minFileSize) {
            continue;
          }

          String childName = parentPath + child.getLocalName();
          if (found) {
            if (sourceEnd <= sourceBlocks.size()) {
              break;
            }
            List<LocatedBlock> childBlocks = getLocatedBlocks(childName, childInode);
            sourceBlocks.addAll(childBlocks);
          } else {
            int childBlockSize = childInode.getBlocks().length;

            /**
             * If we find the target file, we will addAll the
             * cached blocks and the child blocks.
             * And update the metrics like stripeIndex, sourceStart and sourceEnd.
             *
             */
            if (childName.equals(src)) {
              found = true;
              List<LocatedBlock> prevChildBlocks = null;
              for (int i=0; i<curIdx; i++) {
                if (i == 0 || tmpStripe[i].fileIdx != tmpStripe[i - 1].fileIdx) {
                  INode prevChildInode = children.get(tmpStripe[i].fileIdx);
                  String prevChildName = parentPath + prevChildInode.getLocalName();
                  prevChildBlocks = getLocatedBlocks(prevChildName, prevChildInode);
                }
                sourceBlocks.add(prevChildBlocks.get(tmpStripe[i].blockId));
              }
              List<LocatedBlock> childBlocks = getLocatedBlocks(childName, childInode);
              sourceBlocks.addAll(childBlocks);
              blockIndex += curIdx;

              stripeIndex += blockIndex / stripeLength;
              sourceStart = (blockIndex / stripeLength) * stripeLength;
              sourceEnd = sourceStart + stripeLength;
            } else {
              /**
               * If not find the target file, we will keep the current stripe
               * in the temp stripe cache.
               */
              /**
               * the childBlockSize is small, and we can fill them into
               * current temp stripe cache.
               */
              if (curIdx + childBlockSize < stripeLength) {
                for (int i=0; i<childBlockSize; i++, curIdx++) {
                  tmpStripe[curIdx].fileIdx = fid;
                  tmpStripe[curIdx].blockId = i;
                }
              } else {
                /**
                 * The childBlockSize is not small, We need to calculate
                 * the place in the stripe cache, and copy the current stripe
                 * into the temp stripe cache.
                 */
                stripeIndex += (curIdx + childBlockSize) / stripeLength;
                int childStart = ((curIdx + childBlockSize) / stripeLength)
                    * stripeLength - curIdx;
                curIdx = 0;
                for (; childStart<childBlockSize; childStart++,curIdx++) {
                  tmpStripe[curIdx].fileIdx = fid;
                  tmpStripe[curIdx].blockId = childStart;
                }
                curIdx %= stripeLength;
              }
            }
          }
        }
      } finally {
        namesystem.dir.readUnlock();
        namesystem.readUnlock();
      }
      sourceEnd = Math.min(sourceEnd, sourceBlocks.size());
    }

    if (sourceStart < sourceBlocks.size()) {
      for (int i = sourceStart; i < sourceEnd; i++) {
        result.add(sourceBlocks.get(i));
      }
    }
    if (parity == null) {
      return result;
    }
    // add the parity blocks.
    addCompanionParityBlocks(parity, (INodeFile)parityInode,
        stripeIndex, parityLength, result);
    return result;
  }

  private int getBlockIndex(String file, Block block, FSInodeInfo inode,
      boolean cacheResult)
      throws IOException {
    if (cacheResult) {
      List<LocatedBlock> blocks = getLocatedBlocks(file, inode);
      // null indicates that this block is currently added. Return size()
      // as the index in this case
      if (block == null) {
        return blocks.size();
      }
      for (int i = 0; i < blocks.size(); i++) {
        if (blocks.get(i).getBlock().equals(block)) {
          return i;
        }
      }
      throw new IOException("Cannot locate " + block + " in file " + file);
    } else {
      return namesystem.dir.getBlockIndex((INodeFile)inode, block, file);
    }
  }
 
  /**
   * Cache results for FSInodeInfo.getFullPathName()
   */
  static class CachedFullPathNames {
    private Cache<INodeWithHashCode, String> cacheInternal;

    CachedFullPathNames(final Configuration conf) {
      this.cacheInternal = new Cache<INodeWithHashCode, String>(conf);
    }
     
    private static class INodeWithHashCode {
      FSInodeInfo inode;
      INodeWithHashCode(FSInodeInfo inode) {
        this.inode = inode;
      }
      @Override
      public boolean equals(Object obj) {
        if (!(obj instanceof INodeWithHashCode))
          return false;
        return inode == ((INodeWithHashCode)obj).inode;
      }
      @Override
      public int hashCode() {
        return System.identityHashCode(inode);
      }
    }

    public String get(FSInodeInfo inode) throws IOException {
      return cacheInternal.get(new INodeWithHashCode(inode));
    }
   
    public void put(FSInodeInfo inode, String path) {
      cacheInternal.put(new INodeWithHashCode(inode), path);
    }
  }

  /**
   * Cache results for FSNamesystem.getBlockLocations()
   */
  static class CachedLocatedBlocks extends Cache<String, List<LocatedBlock>> {
    CachedLocatedBlocks(Configuration conf) {
      super(conf);
    }
  }

  /**
   * Generic caching class
   */
  private static class Cache<K, V> {
    private Map<K, ValueWithTime> cache;
    final private long cacheTimeout;
    final private int maxEntries;
    // The timeout is long but the consequence of stale value is not serious
    Cache(Configuration conf) {
      this.cacheTimeout =
        conf.getLong("raid.blockplacement.cache.timeout", 5000L); // 5 seconds
      this.maxEntries =
        conf.getInt("raid.blockplacement.cache.size", 1000)// 1000 entries
      Map<K, ValueWithTime> map = new LinkedHashMap<K, ValueWithTime>(
          2 * maxEntries, 0.75f, true) {
        private static final long serialVersionUID = 1L;
          @Override
          protected boolean removeEldestEntry(
            Map.Entry<K, ValueWithTime> eldest) {
            return size() > maxEntries;
          }
        };
      this.cache = Collections.synchronizedMap(map);
    }

    public V get(K key) throws IOException {
      // The method is not synchronized so we may get some stale value here but
      // it's OK.
      ValueWithTime result = cache.get(key);
      long now = System.currentTimeMillis();
      if (result != null &&
          now - result.cachedTime < cacheTimeout) {
        return result.value;
      }
      return null;
    }
   
    public void put(K key, V value) {
      ValueWithTime v = new ValueWithTime();
      v.value = value;
      v.cachedTime = System.currentTimeMillis();
      cache.put(key,  v);
    }
   
    private class ValueWithTime {
      V value = null;
      long cachedTime = 0L;
    }
  }

  /**
   * Get path for the corresponding source file for a valid parity
   * file. Returns null if it does not exists
   * @param parity the toUri path of the parity file
   * @return the toUri path of the source file
   */
  NameWithINode getSourceFile(String parity, String prefix) throws IOException {
    if (isHarFile(parity)) {
      return null;
    }
    // remove the prefix
    String src = parity.substring(prefix.length());
    byte[][] components = INodeDirectory.getPathComponents(src);
    INode inode = namesystem.dir.getINode(components);
    return new NameWithINode(src, inode);
  }
 
  class NameWithINode {
    String name;
    INode inode;
   
    public NameWithINode(String name, INode inode) {
      this.name = name;
      this.inode = inode;
    }
  }

  /**
   * Get path for the parity file. Returns null if it does not exists
   * @param codec the codec of the parity file.
   * @return the toUri path of the parity file
   */
  private NameWithINode getParityFile(Codec codec, String src)
      throws IOException {
    String parity;
    if (codec.isDirRaid) {
      String parent = getParentPath(src);     
      parity = codec.parityDirectory + parent;
    } else {
      parity = codec.parityDirectory + src;
    }
    byte[][] components = INodeDirectory.getPathComponents(parity);
    INode parityInode = namesystem.dir.getINode(components);
    if (parityInode == null)
      return null;
    return new NameWithINode(parity, parityInode);
  }
 
  static String getParentPath(String src) {
    int precision = 1;
    if (src.length() > 1 && src.endsWith(Path.SEPARATOR)) {
      precision = 2;
    }
    src = src.substring(0, src.lastIndexOf(Path.SEPARATOR, src.length() - precision));
    if (src.isEmpty())
      src = Path.SEPARATOR;
    return src;
  }

  private boolean isHarFile(String path) {
    return path.lastIndexOf(RaidNode.HAR_SUFFIX) != -1;
  }

  class FileInfo {
    FileInfo(FileType type, Codec codec) {
      this.type = type;
      this.codec = codec;
    }
   
    FileInfo(FileType type, Codec codec, String parityName, INode parityInode)
        throws IOException {
      if (type != FileType.SOURCE) {
        throw new IOException("FileType must be source");
      }
      this.type = type;
      this.codec = codec;
      this.parityInode = parityInode;
      this.parityName = parityName;
    }
   
    final FileType type;
    final Codec codec;
    INode parityInode = null;
    String parityName = null;
  }

  enum FileType {
    NOT_RAID,
    HAR_TEMP_PARITY,
    TEMP_PARITY,
    PARITY,
    SOURCE,
  }

  /**
   * Return raid information about a file, for example
   * if this file is the source file, parity file, or not raid
   *
   * @param path file name
   * @return raid information
   * @throws IOException
   */
  protected FileInfo getFileInfo(FSInodeInfo srcINode, String path) throws IOException {
    for (Codec c : Codec.getCodecs()) {
      if (path.startsWith(c.tmpHarDirectoryPS)) {
        return new FileInfo(FileType.HAR_TEMP_PARITY, c);
      }
      if (path.startsWith(c.tmpParityDirectoryPS)) {
        return new FileInfo(FileType.TEMP_PARITY, c);
      }
      if (path.startsWith(c.parityDirectoryPS)) {
        return new FileInfo(FileType.PARITY, c);
      }
      NameWithINode ni = getParityFile(c, path);
      if (ni != null) {
        if (c.isDirRaid && srcINode != null && srcINode instanceof INodeFile) {
          INodeFile inf = (INodeFile)srcINode;
          if (inf.getFileSize() < this.minFileSize) {
            // It's too small to be raided
            return new FileInfo(FileType.NOT_RAID, null);
          }
        }
        return new FileInfo(FileType.SOURCE, c, ni.name, ni.inode);
      }
    }
    return new FileInfo(FileType.NOT_RAID, null);
  }
}
TOP

Related Classes of org.apache.hadoop.hdfs.server.namenode.BlockPlacementPolicyRaid

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.