Package org.apache.hadoop.hdfs.server.namenode

Source Code of org.apache.hadoop.hdfs.server.namenode.TestProcessCorruptBlocks

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.namenode;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

import java.io.File;
import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.server.blockmanagement.NumberReplicas;
import org.junit.Test;

public class TestProcessCorruptBlocks {
  /**
   * The corrupt block has to be removed when the number of valid replicas
   * matches replication factor for the file. In this the above condition is
   * tested by reducing the replication factor
   * The test strategy :
   *   Bring up Cluster with 3 DataNodes
   *   Create a file of replication factor 3
   *   Corrupt one replica of a block of the file
   *   Verify that there are still 2 good replicas and 1 corrupt replica
   *    (corrupt replica should not be removed since number of good
   *     replicas (2) is less than replication factor (3))
   *   Set the replication factor to 2
   *   Verify that the corrupt replica is removed.
   *     (corrupt replica  should not be removed since number of good
   *      replicas (2) is equal to replication factor (2))
   */
  @Test
  public void testWhenDecreasingReplication() throws IOException {
    Configuration conf = new HdfsConfiguration();
    conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
    conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2));
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
    FileSystem fs = cluster.getFileSystem();
    final FSNamesystem namesystem = cluster.getNamesystem();

    try {
      final Path fileName = new Path("/foo1");
      DFSTestUtil.createFile(fs, fileName, 2, (short) 3, 0L);
      DFSTestUtil.waitReplication(fs, fileName, (short) 3);

      ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
      corruptBlock(cluster, fs, fileName, 0, block);

      DFSTestUtil.waitReplication(fs, fileName, (short) 2);

      assertEquals(2, countReplicas(namesystem, block).liveReplicas());
      assertEquals(1, countReplicas(namesystem, block).corruptReplicas());

      namesystem.setReplication(fileName.toString(), (short) 2);

      // wait for 3 seconds so that all block reports are processed.
      try {
        Thread.sleep(3000);
      } catch (InterruptedException ignored) {
      }

      assertEquals(2, countReplicas(namesystem, block).liveReplicas());
      assertEquals(0, countReplicas(namesystem, block).corruptReplicas());

    } finally {
      cluster.shutdown();
    }
  }

  /**
   * The corrupt block has to be removed when the number of valid replicas
   * matches replication factor for the file. In this test, the above
   * condition is achieved by increasing the number of good replicas by
   * replicating on a new Datanode.
   * The test strategy :
   *   Bring up Cluster with 3 DataNodes
   *   Create a file  of replication factor 3
   *   Corrupt one replica of a block of the file
   *   Verify that there are still 2 good replicas and 1 corrupt replica
   *     (corrupt replica should not be removed since number of good replicas
   *      (2) is less  than replication factor (3))
   *   Start a new data node
   *   Verify that the a new replica is created and corrupt replica is
   *   removed.
   *
   */
  @Test
  public void testByAddingAnExtraDataNode() throws IOException {
    Configuration conf = new HdfsConfiguration();
    conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
    conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2));
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build();
    FileSystem fs = cluster.getFileSystem();
    final FSNamesystem namesystem = cluster.getNamesystem();
    DataNodeProperties dnPropsFourth = cluster.stopDataNode(3);

    try {
      final Path fileName = new Path("/foo1");
      DFSTestUtil.createFile(fs, fileName, 2, (short) 3, 0L);
      DFSTestUtil.waitReplication(fs, fileName, (short) 3);

      ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
      corruptBlock(cluster, fs, fileName, 0, block);

      DFSTestUtil.waitReplication(fs, fileName, (short) 2);

      assertEquals(2, countReplicas(namesystem, block).liveReplicas());
      assertEquals(1, countReplicas(namesystem, block).corruptReplicas());

      cluster.restartDataNode(dnPropsFourth);

      DFSTestUtil.waitReplication(fs, fileName, (short) 3);

      assertEquals(3, countReplicas(namesystem, block).liveReplicas());
      assertEquals(0, countReplicas(namesystem, block).corruptReplicas());
    } finally {
      cluster.shutdown();
    }
  }

  /**
   * The corrupt block has to be removed when the number of valid replicas
   * matches replication factor for the file. The above condition should hold
   * true as long as there is one good replica. This test verifies that.
   *
   * The test strategy :
   *   Bring up Cluster with 2 DataNodes
   *   Create a file of replication factor 2
   *   Corrupt one replica of a block of the file
   *   Verify that there is  one good replicas and 1 corrupt replica
   *     (corrupt replica should not be removed since number of good
   *     replicas (1) is less than replication factor (2)).
   *   Set the replication factor to 1
   *   Verify that the corrupt replica is removed.
   *     (corrupt replica should  be removed since number of good
   *      replicas (1) is equal to replication factor (1))
   */
  @Test
  public void testWithReplicationFactorAsOne() throws IOException {
    Configuration conf = new HdfsConfiguration();
    conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
    conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2));
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
    FileSystem fs = cluster.getFileSystem();
    final FSNamesystem namesystem = cluster.getNamesystem();

    try {
      final Path fileName = new Path("/foo1");
      DFSTestUtil.createFile(fs, fileName, 2, (short) 2, 0L);
      DFSTestUtil.waitReplication(fs, fileName, (short) 2);

      ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
      corruptBlock(cluster, fs, fileName, 0, block);

      DFSTestUtil.waitReplication(fs, fileName, (short) 1);

      assertEquals(1, countReplicas(namesystem, block).liveReplicas());
      assertEquals(1, countReplicas(namesystem, block).corruptReplicas());

      namesystem.setReplication(fileName.toString(), (short) 1);

      // wait for 3 seconds so that all block reports are processed.
      try {
        Thread.sleep(3000);
      } catch (InterruptedException ignored) {
      }

      assertEquals(1, countReplicas(namesystem, block).liveReplicas());
      assertEquals(0, countReplicas(namesystem, block).corruptReplicas());

    } finally {
      cluster.shutdown();
    }
  }

  /**
   * None of the blocks can be removed if all blocks are corrupt.
   *
   * The test strategy :
   *    Bring up Cluster with 3 DataNodes
   *    Create a file of replication factor 3
   *    Corrupt all three replicas
   *    Verify that all replicas are corrupt and 3 replicas are present.
   *    Set the replication factor to 1
   *    Verify that all replicas are corrupt and 3 replicas are present.
   */
  @Test
  public void testWithAllCorruptReplicas() throws IOException {
    Configuration conf = new HdfsConfiguration();
    conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
    conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2));
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
    FileSystem fs = cluster.getFileSystem();
    final FSNamesystem namesystem = cluster.getNamesystem();

    try {
      final Path fileName = new Path("/foo1");
      DFSTestUtil.createFile(fs, fileName, 2, (short) 3, 0L);
      DFSTestUtil.waitReplication(fs, fileName, (short) 3);

      ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
      corruptBlock(cluster, fs, fileName, 0, block);

      corruptBlock(cluster, fs, fileName, 1, block);

      corruptBlock(cluster, fs, fileName, 2, block);

      // wait for 3 seconds so that all block reports are processed.
      try {
        Thread.sleep(3000);
      } catch (InterruptedException ignored) {
      }

      assertEquals(0, countReplicas(namesystem, block).liveReplicas());
      assertEquals(3, countReplicas(namesystem, block).corruptReplicas());

      namesystem.setReplication(fileName.toString(), (short) 1);

      // wait for 3 seconds so that all block reports are processed.
      try {
        Thread.sleep(3000);
      } catch (InterruptedException ignored) {
      }

      assertEquals(0, countReplicas(namesystem, block).liveReplicas());
      assertEquals(3, countReplicas(namesystem, block).corruptReplicas());

    } finally {
      cluster.shutdown();
    }
  }

  private static NumberReplicas countReplicas(final FSNamesystem namesystem, ExtendedBlock block) {
    return namesystem.getBlockManager().countNodes(block.getLocalBlock());
  }

  private void corruptBlock(MiniDFSCluster cluster, FileSystem fs, final Path fileName,
      int dnIndex, ExtendedBlock block) throws IOException {
    // corrupt the block on datanode dnIndex
    // the indexes change once the nodes are restarted.
    // But the datadirectory will not change
    assertTrue(MiniDFSCluster.corruptReplica(dnIndex, block));

    DataNodeProperties dnProps = cluster.stopDataNode(0);

    // Each datanode has multiple data dirs, check each
    for (int dirIndex = 0; dirIndex < 2; dirIndex++) {
      final String bpid = cluster.getNamesystem().getBlockPoolId();
      File storageDir = MiniDFSCluster.getStorageDir(dnIndex, dirIndex);
      File dataDir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
      File scanLogFile = new File(dataDir, "dncp_block_verification.log.curr");
      if (scanLogFile.exists()) {
        // wait for one minute for deletion to succeed;
        for (int i = 0; !scanLogFile.delete(); i++) {
          assertTrue("Could not delete log file in one minute", i < 60);
          try {
            Thread.sleep(1000);
          } catch (InterruptedException ignored) {
          }
        }
      }
    }

    // restart the detained so the corrupt replica will be detected
    cluster.restartDataNode(dnProps);
  }
}
TOP

Related Classes of org.apache.hadoop.hdfs.server.namenode.TestProcessCorruptBlocks

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.