/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.util.Random;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.raid.RaidCodec;
import org.apache.hadoop.raid.RaidCodecBuilder;
import org.apache.hadoop.security.UnixUserGroupInformation;
import org.apache.hadoop.util.InjectionEventI;
import org.apache.hadoop.util.InjectionHandler;
import org.apache.hadoop.hdfs.server.datanode.BlockDataFile;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.hdfs.server.namenode.INode;
import org.apache.hadoop.hdfs.server.namenode.INodeFile;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.BlocksMap.BlockInfo;
import org.apache.hadoop.hdfs.server.protocol.RaidTask;
import org.apache.hadoop.hdfs.server.protocol.RaidTaskCommand;
import org.apache.hadoop.hdfs.server.datanode.BlockInlineChecksumWriter;
import org.apache.hadoop.hdfs.util.InjectionEvent;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
public class TestRaidFile {
public static final Log LOG = LogFactory.getLog(TestRaidFile.class);
private MiniDFSCluster cluster;
private NameNode nn;
private DistributedFileSystem dfs;
private DistributedFileSystem userdfs;
private static long blockSize = 512;
private static int numDataBlocks = 6;
private static int numRSParityBlocks = 3;
private static Configuration conf;
private static UnixUserGroupInformation USER1;
private static int id = 0;
private static Random rand = new Random();
private static byte[] bytes = new byte[(int)blockSize];
static {
conf = new Configuration();
conf.setLong("dfs.block.size", blockSize);
conf.setBoolean("dfs.permissions", true);
}
class FakeBlockGeneratorInjectionHandler extends InjectionHandler {
@Override
public void _processEventIO(InjectionEventI event, Object... args)
throws IOException {
if (event == InjectionEvent.DATANODE_PROCESS_RAID_TASK) {
int namespaceId = nn.getNamespaceID();
DataNode dn = (DataNode)args[0];
RaidTaskCommand rtc = (RaidTaskCommand)args[1];
RaidTask[] tasks = rtc.tasks;
for (RaidTask rw: tasks) {
// Generate all parity block locally instead of sending them remotely
try {
for (int idx = 0; idx < rw.toRaidIdxs.length; idx++) {
Block blk = rw.stripeBlocks[rw.toRaidIdxs[idx]];
blk.setNumBytes(blockSize);
BlockDataFile.Writer dataOut =
((BlockInlineChecksumWriter)dn.getFSDataset().writeToBlock(
namespaceId, blk, blk, false, false, 1, 512)).getBlockDataFile()
.getWriter(0);
dataOut.write(bytes);
dataOut.close();
dn.finalizeAndNotifyNamenode(namespaceId, blk);
}
} catch (IOException ioe) {
LOG.warn(ioe);
}
}
}
}
}
@Before
public void startUpCluster() throws IOException {
RaidCodecBuilder.loadDefaultFullBlocksCodecs(conf, numRSParityBlocks,
numDataBlocks);
cluster = new MiniDFSCluster(conf, 4, true, null);
assertNotNull("Failed Cluster Creation", cluster);
cluster.waitClusterUp();
dfs = (DistributedFileSystem) cluster.getFileSystem();
assertNotNull("Failed to get FileSystem", dfs);
nn = cluster.getNameNode();
assertNotNull("Failed to get NameNode", nn);
Configuration newConf = new Configuration(conf);
USER1 = new UnixUserGroupInformation("foo", new String[] {"bar" });
UnixUserGroupInformation.saveToConf(newConf,
UnixUserGroupInformation.UGI_PROPERTY_NAME, USER1);
userdfs = (DistributedFileSystem)FileSystem.get(newConf); // login as ugi
InjectionHandler h = new FakeBlockGeneratorInjectionHandler();
InjectionHandler.set(h);
rand.nextBytes(bytes);
}
@After
public void shutDownCluster() throws IOException {
if(dfs != null) {
dfs.close();
}
if (userdfs != null) {
userdfs.close();
}
if(cluster != null) {
cluster.shutdownDataNodes();
cluster.shutdown();
}
InjectionHandler.clear();
}
public static void raidFile(DistributedFileSystem fs, Path source,
String codecId, short expectedSourceRepl, String exceptionMessage)
throws Exception {
try {
fs.raidFile(source, codecId, expectedSourceRepl);
} catch (Exception e) {
if (exceptionMessage == null) {
// This is not expected
throw e;
}
assertTrue("Exception " + e.getMessage() + " doesn't match " +
exceptionMessage, e.getMessage().contains(exceptionMessage));
}
}
/**
* Test we could XOR Raid files with different number of blocks:
* 1. Two full stripes
* 2. one and half stripes
* 3. less than one stripe
* @throws Exception
*/
@Test(timeout=60000)
public void testRaidXORFile() throws Exception {
raidFile(12, (short)2, "xor");
raidFile(9, (short)3, "xor");
raidFile(3, (short)2, "xor");
}
/**
* Test we could RS Raid files with different number of blocks
* 1. Two full stripes
* 2. one and half stripes
* 3. less than one stripe
* @throws Exception
*/
@Test(timeout=60000)
public void testRaidRSFile() throws Exception {
raidFile(12, (short)1, "rs");
raidFile(9, (short)2, "rs");
raidFile(3, (short)3, "rs");
}
private static INodeFile getINodeFile(NameNode nn, Path source) {
INode[] inodes = nn.getNamesystem().dir.getExistingPathINodes(
source.toUri().getPath());
return (INodeFile)inodes[inodes.length - 1];
}
private static FileStatus verifyRaidFiles(NameNode nn,
DistributedFileSystem fileSys, FileStatus statBefore,
LocatedBlocks lbsBefore, Path source, long fileLen, long crc,
short expectedSourceRepl, String codecId, boolean checkParityBlocks)
throws Exception {
FileStatus statAfter = fileSys.getFileStatus(source);
LocatedBlocks lbsAfter = fileSys.getLocatedBlocks(source, 0, fileLen);
// Verify file stat
assertEquals(statBefore.getBlockSize(), statAfter.getBlockSize());
assertEquals(statBefore.getLen(), statAfter.getLen());
assertEquals(expectedSourceRepl, statAfter.getReplication());
// Verify getLocatedBlocks
assertEquals(lbsBefore.getLocatedBlocks().size(),
lbsAfter.getLocatedBlocks().size());
for (int i = 0; i < lbsBefore.getLocatedBlocks().size(); i++) {
assertEquals(lbsBefore.get(i).getBlock(), lbsAfter.get(i).getBlock());
}
// Verify file content
assertTrue("File content matches", DFSTestUtil.validateFile(fileSys,
statBefore.getPath(), statBefore.getLen(), crc));
return statAfter;
}
private void fillChecksums(Path source) {
INodeFile file = getINodeFile(nn, source);
BlockInfo[] bis = file.getBlocks();
for (int i = 0; i < bis.length; i++) {
bis[i].setChecksum(1);
}
}
/**
* 1. Create a file
* 2. Fill fake checksums in it
* 3. Call raidFile to convert it into Raid format and return false. Namenode
* will start schedule raiding
* 4. verify we could read the file
* 5. Datanodes will receive RaidTaskCommand from namenode and jumps into
* FakeBlockGeneratorInjectionHandler, this handler will create a fake parity
* block in the datanode and notifies namenode
* 6. keep calling raidFile until all parity blocks are generated, then raidFile
* will succeed to reduce replication and return true
*
*/
private void raidFile(int numBlocks, short expectedSourceRepl, String codecId)
throws Exception {
LOG.info("RUNNING testMergeFile numBlocks=" + numBlocks +
" sourceRepl=" + expectedSourceRepl + " codecId=" + codecId);
id++;
long fileLen = blockSize * numBlocks;
Path dir = new Path ("/user/facebook" + id);
assertTrue(dfs.mkdirs(dir));
Path source = new Path(dir, "1");
long crc = DFSTestUtil.createFile(dfs, source, fileLen, (short)3, 1);
LOG.info("Fill fake checksums to the file");
fillChecksums(source);
ContentSummary cBefore = dfs.getContentSummary(dir);
FileStatus statBefore = dfs.getFileStatus(source);
LocatedBlocks lbsBefore = dfs.getLocatedBlocks(source, 0, fileLen);
// now raid the file
boolean result = dfs.raidFile(source, codecId, expectedSourceRepl);
assertTrue("raidFile should return false", !result);
ContentSummary cAfter = dfs.getContentSummary(dir);
// verify directory stat
assertEquals("File count doesn't change", cBefore.getFileCount(),
cAfter.getFileCount());
verifyRaidFiles(nn, dfs, statBefore, lbsBefore, source, fileLen, crc,
statBefore.getReplication(), codecId, false);
LocatedBlocks lbsAfter = dfs.getLocatedBlocks(source, blockSize, fileLen);
assertEquals(numBlocks - 1, lbsAfter.getLocatedBlocks().size());
for (int i = 0; i < numBlocks - 1; i++) {
assertEquals(lbsBefore.get(i + 1).getBlock(), lbsAfter.get(i).getBlock());
}
String otherCodec = codecId.equals("xor") ? "rs" : "xor";
raidFile(dfs, source, otherCodec, (short)2,
"raidFile: couldn't raid a raided file");
RaidCodec codec = RaidCodec.getCodec(codecId);
long startTime = System.currentTimeMillis();
result = false;
while (System.currentTimeMillis() - startTime < 70000 && !result) {
DFSTestUtil.waitNSecond(3);
result = dfs.raidFile(source, codecId, expectedSourceRepl);
}
assertTrue("Finish raiding", result);
verifyRaidFiles(nn, dfs, statBefore, lbsBefore, source, fileLen, crc,
expectedSourceRepl, codecId, true);
if (codec.minSourceReplication >= 2) {
try {
dfs.setReplication(source, (short)(codec.minSourceReplication-1));
assertTrue("setReplication should fail", false);
} catch (IOException ioe) {
assertTrue("fail to setReplication",
ioe.getMessage().contains("Couldn't set replication smaller than "));
}
}
}
/**
* Test raidFile fails with expected exception for different illegal cases
* such as empty file, directory, hardlinked files, files without checksums,
* files without permission...
* @throws Exception
*/
@Test(timeout=60000)
public void testRaidFileIllegalCases() throws Exception {
LOG.info("Running testRaidFileIllegalCases");
int numBlocks = 6;
long fileLen = blockSize * numBlocks;
Path dir = new Path ("/user/facebook");
assertTrue(dfs.mkdirs(dir));
Path source = new Path(dir, "1");
Path dest = new Path(dir, "2");
DFSTestUtil.createFile(dfs, source, fileLen, (short)3, 1);
Path emptyFile = new Path("/empty");
DFSTestUtil.createFile(dfs, emptyFile, 0L, (short)3, 1);
raidFile(dfs, source, "nonexist", (short)2,
"raidFile: codec nonexist doesn't exist");
raidFile(dfs, source, "xor", (short)1,
"raidFile: expectedSourceRepl is smaller than ");
dfs.setOwner(source, "foo", "bar");
LOG.info("Disallow write on " + source);
dfs.setPermission(source, new FsPermission((short)0577));
raidFile(userdfs, source, "xor", (short)2, "Permission denied");
LOG.info("Enable write on " + source);
dfs.setPermission(source, new FsPermission((short)0777));
LOG.info("Test different types of files");
raidFile(dfs, new Path("/nonexist"), "rs", (short)1,
"raidFile: source file doesn't exist");
raidFile(dfs, dir, "rs", (short)1, "raidFile: source file is a directory");
raidFile(dfs, emptyFile, "rs", (short)1, "raidFile: source file is empty");
raidFile(dfs, source, "rs", (short)1,
"raidFile: not all source blocks have checksums");
LOG.info("Hardlink the file to " + dest);
dfs.hardLink(source, dest);
raidFile(dfs, dest, "rs", (short)1, "raidFile: cannot raid a hardlinked file");
raidFile(dfs, source, "rs", (short)1, "raidFile: cannot raid a hardlinked file");
}
}