/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs;
import junit.framework.TestCase;
import java.io.*;
import java.util.ArrayList;
import java.util.Random;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadPoolExecutor;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.metrics.DFSClientMetrics;
import org.apache.hadoop.hdfs.metrics.DFSQuorumReadMetrics;
import org.apache.hadoop.hdfs.server.common.HdfsConstants;
import org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset;
import org.apache.hadoop.hdfs.util.InjectionEvent;
import org.apache.hadoop.util.InjectionEventI;
import org.apache.hadoop.util.InjectionHandler;
/**
* This class tests the DFS positional read functionality in a single node
* mini-cluster.
*/
public class TestPread extends TestCase {
static final long seed = 0xDEADBEEFL;
static final int blockSize = 4096;
boolean simulatedStorage = false;
private void writeFile(FileSystem fileSys, Path name) throws IOException {
int replication = 3;// We need > 1 blocks to test out the quorum reads.
// create and write a file that contains three blocks of data
DataOutputStream stm = fileSys.create(name, true, 4096, (short)replication,
(long)blockSize);
// test empty file open and read
stm.close();
FSDataInputStream in = fileSys.open(name);
byte[] buffer = new byte[(int)(12*blockSize)];
in.readFully(0, buffer, 0, 0);
IOException res = null;
try { // read beyond the end of the file
in.readFully(0, buffer, 0, 1);
} catch (IOException e) {
// should throw an exception
res = e;
}
assertTrue("Error reading beyond file boundary.", res != null);
in.close();
if (!fileSys.delete(name, true))
assertTrue("Cannot delete file", false);
// now create the real file
stm = fileSys.create(name, true, 4096, (short)replication, (long)blockSize);
Random rand = new Random(seed);
rand.nextBytes(buffer);
stm.write(buffer);
stm.close();
}
private void checkAndEraseData(byte[] actual, int from, byte[] expected, String message) {
for (int idx = 0; idx < actual.length; idx++) {
assertEquals(message+" byte "+(from+idx)+" differs. expected "+
expected[from+idx]+" actual "+actual[idx],
actual[idx], expected[from+idx]);
actual[idx] = 0;
}
}
private void doPread(FSDataInputStream stm, long position, byte[] buffer,
int offset, int length) throws IOException {
int nread = 0;
while (nread < length) {
int nbytes = stm.read(position+nread, buffer, offset+nread, length-nread);
assertTrue("Error in pread", nbytes > 0);
nread += nbytes;
}
}
private void pReadFile(FileSystem fileSys, Path name) throws IOException {
FSDataInputStream stm = fileSys.open(name);
byte[] expected = new byte[(int)(12*blockSize)];
if (simulatedStorage) {
for (int i= 0; i < expected.length; i++) {
expected[i] = SimulatedFSDataset.DEFAULT_DATABYTE;
}
} else {
Random rand = new Random(seed);
rand.nextBytes(expected);
}
// do a sanity check. Read first 4K bytes
byte[] actual = new byte[4096];
stm.readFully(actual);
checkAndEraseData(actual, 0, expected, "Read Sanity Test");
// now do a pread for the first 8K bytes
actual = new byte[8192];
doPread(stm, 0L, actual, 0, 8192);
checkAndEraseData(actual, 0, expected, "Pread Test 1");
// Now check to see if the normal read returns 4K-8K byte range
actual = new byte[4096];
stm.readFully(actual);
checkAndEraseData(actual, 4096, expected, "Pread Test 2");
// Now see if we can cross a single block boundary successfully
// read 4K bytes from blockSize - 2K offset
stm.readFully(blockSize - 2048, actual, 0, 4096);
checkAndEraseData(actual, (int)(blockSize-2048), expected, "Pread Test 3");
// now see if we can cross two block boundaries successfully
// read blockSize + 4K bytes from blockSize - 2K offset
actual = new byte[(int)(blockSize+4096)];
stm.readFully(blockSize - 2048, actual);
checkAndEraseData(actual, (int)(blockSize-2048), expected, "Pread Test 4");
// now see if we can cross two block boundaries that are not cached
// read blockSize + 4K bytes from 10*blockSize - 2K offset
actual = new byte[(int)(blockSize+4096)];
stm.readFully(10*blockSize - 2048, actual);
checkAndEraseData(actual, (int)(10*blockSize-2048), expected, "Pread Test 5");
// now check that even after all these preads, we can still read
// bytes 8K-12K
actual = new byte[4096];
stm.readFully(actual);
checkAndEraseData(actual, 8192, expected, "Pread Test 6");
// done
stm.close();
// check block location caching
stm = fileSys.open(name);
stm.readFully(1, actual, 0, 4096);
stm.readFully(4*blockSize, actual, 0, 4096);
stm.readFully(7*blockSize, actual, 0, 4096);
actual = new byte[3*4096];
stm.readFully(0*blockSize, actual, 0, 3*4096);
checkAndEraseData(actual, 0, expected, "Pread Test 7");
actual = new byte[8*4096];
stm.readFully(3*blockSize, actual, 0, 8*4096);
checkAndEraseData(actual, 3*blockSize, expected, "Pread Test 8");
// read the tail
stm.readFully(11*blockSize+blockSize/2, actual, 0, blockSize/2);
IOException res = null;
try { // read beyond the end of the file
stm.readFully(11*blockSize+blockSize/2, actual, 0, blockSize);
} catch (IOException e) {
// should throw an exception
res = e;
}
assertTrue("Error reading beyond file boundary.", res != null);
stm.close();
}
// test pread can survive datanode restarts
private void datanodeRestartTest(MiniDFSCluster cluster, FileSystem fileSys,
Path name) throws IOException {
// skip this test if using simulated storage since simulated blocks
// don't survive datanode restarts.
if (simulatedStorage) {
return;
}
int numBlocks = 1;
assertTrue(numBlocks <= DFSClient.MAX_BLOCK_ACQUIRE_FAILURES);
byte[] expected = new byte[numBlocks * blockSize];
Random rand = new Random(seed);
rand.nextBytes(expected);
byte[] actual = new byte[numBlocks * blockSize];
FSDataInputStream stm = fileSys.open(name);
// read a block and get block locations cached as a result
stm.readFully(0, actual);
checkAndEraseData(actual, 0, expected, "Pread Datanode Restart Setup");
// restart all datanodes. it is expected that they will
// restart on different ports, hence, cached block locations
// will no longer work.
assertTrue(cluster.restartDataNodes());
cluster.waitActive();
// verify the block can be read again using the same InputStream
// (via re-fetching of block locations from namenode). there is a
// 3 sec sleep in chooseDataNode(), which can be shortened for
// this test if configurable.
stm.readFully(0, actual);
checkAndEraseData(actual, 0, expected, "Pread Datanode Restart Test");
}
private void cleanupFile(FileSystem fileSys, Path name) throws IOException {
assertTrue(fileSys.exists(name));
assertTrue(fileSys.delete(name, true));
assertTrue(!fileSys.exists(name));
}
/**
* Tests positional read in DFS.
*/
public void testPreadDFS() throws IOException {
Configuration conf = new Configuration();
dfsPreadTest(conf, false); //normal pread
dfsPreadTest(conf, true); //trigger read code path without transferTo.
}
/**
* Tests positional read in DFS, with quorum reads enabled.
*/
public void testQuorumPreadDFSBasic() throws IOException {
Configuration conf = new Configuration();
conf.setInt(HdfsConstants.DFS_DFSCLIENT_QUORUM_READ_THREADPOOL_SIZE, 5);
conf.setLong(HdfsConstants.DFS_DFSCLIENT_QUORUM_READ_THRESHOLD_MILLIS, 100);
dfsPreadTest(conf, false); //normal pread
dfsPreadTest(conf, true); //trigger read code path without transferTo.
}
public void testMaxOutQuorumPool() throws IOException, InterruptedException, ExecutionException {
Configuration conf = new Configuration();
int numQuorumPoolThreads = 5;
conf.setBoolean("dfs.client.metrics.enable", true);
conf.setInt(HdfsConstants.DFS_DFSCLIENT_QUORUM_READ_THREADPOOL_SIZE, numQuorumPoolThreads);
conf.setLong(HdfsConstants.DFS_DFSCLIENT_QUORUM_READ_THRESHOLD_MILLIS, 100);
// Set up the InjectionHandler
// make preads sleep for 60ms
InjectionHandler.set(new InjectionHandler() {
public void _processEvent(InjectionEventI event, Object... args) {
if(event == InjectionEvent.DFSCLIENT_START_FETCH_FROM_DATANODE) {
try {
Thread.sleep(60);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
});
MiniDFSCluster cluster = new MiniDFSCluster(conf, 3, true, null);
DistributedFileSystem fileSys = (DistributedFileSystem)cluster.getFileSystem();
DFSClient dfsClient = fileSys.getClient();
DFSQuorumReadMetrics metrics = dfsClient.quorumReadMetrics;
try {
Path file1 = new Path("quorumReadMaxOut.dat");
writeFile(fileSys, file1);
// time the pReadFile test
long t0, t1;
/*
* Basic test. Reads complete within timeout.
* Assert that there were no quorum reads.
*
*/
t0 = System.currentTimeMillis();
pReadFile(fileSys, file1);
t1 = System.currentTimeMillis();
long pReadTestTime = t1 - t0;
// assert that there were no quorum reads. 60ms + delta < 100ms
assertTrue(metrics.getParallelReadOps() == 0);
assertTrue(metrics.getParallelReadOpsInCurThread() == 0);
// set the timeout to 50ms;
/*
* Reads take longer than timeout. But, only one thread reading.
*
* Assert that there were quorum reads. But, none of the
* reads had to run in the current thread.
*/
dfsClient.setQuorumReadTimeout(50); // 50ms
t0 = System.currentTimeMillis();
pReadFile(fileSys, file1);
t1 = System.currentTimeMillis();
// assert that there were quorum reads
long pReadTestTimeNew = t1 - t0;
// assert that there were no quorum reads. 60ms + delta < 100ms
assertTrue(metrics.getParallelReadOps() > 0);
assertTrue(metrics.getParallelReadOpsInCurThread() == 0);
/*
* Multiple threads reading. Reads take longer than timeout.
*
* Assert that there were quorum reads. And that
* reads had to run in the current thread.
*/
int factor = 10;
int numParallelReads = numQuorumPoolThreads * factor;
long initialReadOpsValue = metrics.getParallelReadOps();
ExecutorService executor = Executors.newFixedThreadPool(numParallelReads);
ArrayList<Future<Void>> futures = new ArrayList<Future<Void>>();
for (int i = 0; i < numParallelReads; i++) {
futures.add(executor.submit(getPReadFileCallable(fileSys, file1)));
}
for (int i = 0; i < numParallelReads; i++) {
futures.get(i).get();
}
assertTrue(metrics.getParallelReadOps() > initialReadOpsValue);
assertTrue(metrics.getParallelReadOpsInCurThread() > 0);
cleanupFile(fileSys, file1);
} finally {
fileSys.close();
cluster.shutdown();
}
}
private Callable<Void> getPReadFileCallable(final FileSystem fileSys, final Path file) {
return new Callable<Void>() {
public Void call() throws IOException {
pReadFile(fileSys, file);
return null;
}
};
}
private void dfsPreadTest(Configuration conf, boolean disableTransferTo) throws IOException {
conf.setLong("dfs.block.size", 4096);
conf.setLong("dfs.read.prefetch.size", 4096);
if (simulatedStorage) {
conf.setBoolean("dfs.datanode.simulateddatastorage", true);
}
if (disableTransferTo) {
conf.setBoolean("dfs.datanode.transferTo.allowed", false);
}
MiniDFSCluster cluster = new MiniDFSCluster(conf, 3, true, null);
FileSystem fileSys = cluster.getFileSystem();
try {
Path file1 = new Path("preadtest.dat");
writeFile(fileSys, file1);
pReadFile(fileSys, file1);
datanodeRestartTest(cluster, fileSys, file1);
cleanupFile(fileSys, file1);
} finally {
fileSys.close();
cluster.shutdown();
}
}
public void testPreadDFSSimulated() throws IOException {
simulatedStorage = true;
testPreadDFS();
simulatedStorage = false;
}
/**
* Tests positional read in LocalFS.
*/
public void testPreadLocalFS() throws IOException {
Configuration conf = new Configuration();
FileSystem fileSys = FileSystem.getLocal(conf);
try {
Path file1 = new Path("build/test/data", "preadtest.dat");
writeFile(fileSys, file1);
pReadFile(fileSys, file1);
cleanupFile(fileSys, file1);
} finally {
fileSys.close();
}
}
public static void main(String[] args) throws Exception {
new TestPread().testPreadDFS();
}
}