/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import java.io.IOException;
import java.io.OutputStream;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockMissingException;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.TestRaidDfs;
import org.apache.hadoop.hdfs.util.InjectionEvent;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.raid.DistRaid.DistRaidMapper;
import org.apache.hadoop.raid.DistRaid.EncodingCandidate;
import org.apache.hadoop.raid.protocol.PolicyInfo;
import org.apache.hadoop.util.InjectionEventI;
import org.apache.hadoop.util.InjectionHandler;
public class TestRetryTaskEncoding extends TestMultiTasksEncoding {
final static Log LOG =
LogFactory.getLog("org.apache.hadoop.raid.TestRetryTaskEncoding");
class TestEncodingHandler extends InjectionHandler {
Map<InjectionEventI, AtomicInteger> events =
new HashMap<InjectionEventI, AtomicInteger>();
Random rand = new Random();
HashMap<InjectionEventI, Double> specialFailProbs;
HashMap<OutputStream, Integer> blockMissingMap;
double defaultFailProb;
public Path lockFile;
public Path parityFile;
public Path finalTmpPath;
public Long numStripes;
public boolean throwBlockMissingException = false;
private AtomicInteger blockMissingCount = new AtomicInteger(0);
public TestEncodingHandler(double defaultProb,
HashMap<InjectionEventI, Double> specialProbs,
boolean newThrowBlockMissingException) {
defaultFailProb = defaultProb;
specialFailProbs = specialProbs;
throwBlockMissingException = newThrowBlockMissingException;
for (InjectionEventI event: new InjectionEventI[]{
InjectionEvent.RAID_ENCODING_PARTIAL_STRIPE_ENCODED,
InjectionEvent.RAID_ENCODING_FAILURE_PARTIAL_PARITY_SIZE_MISMATCH,
InjectionEvent.RAID_ENCODING_FAILURE_BLOCK_MISSING,
InjectionEvent.RAID_ENCODING_FAILURE_PUT_CHECKSUM,
InjectionEvent.RAID_ENCODING_FAILURE_RENAME_FILE,
InjectionEvent.RAID_ENCODING_FAILURE_CONCAT_FILE,
InjectionEvent.RAID_ENCODING_FAILURE_GET_SRC_STRIPES,
InjectionEvent.RAID_ENCODING_FAILURE_PUT_STRIPE,
}) {
events.put(event, new AtomicInteger(0));
}
blockMissingMap = new HashMap<OutputStream, Integer>();
}
@Override
public void _processEventIO(InjectionEventI event, Object... args)
throws IOException {
if (!events.containsKey(event)) {
return;
}
int count = events.get(event).incrementAndGet();
if (event == InjectionEvent.RAID_ENCODING_FAILURE_PUT_STRIPE) {
parityFile = (Path)args[0];
finalTmpPath = (Path)args[1];
assertTrue("parity file should exist", fileSys.exists(parityFile));
assertFalse("finalTmpPath should not exist", fileSys.exists(finalTmpPath));
}
if (event ==
InjectionEvent.RAID_ENCODING_FAILURE_PARTIAL_PARITY_SIZE_MISMATCH ||
event == InjectionEvent.RAID_ENCODING_PARTIAL_STRIPE_ENCODED) {
return;
}
Double failProb = null;
// Only use the specialFailProb for the first time
if (specialFailProbs != null && count == 1) {
failProb = specialFailProbs.get(event);
}
if (failProb == null) {
failProb = defaultFailProb;
}
if (rand.nextDouble() < failProb) {
if (event == InjectionEvent.RAID_ENCODING_FAILURE_BLOCK_MISSING) {
if (throwBlockMissingException && events.get(
InjectionEvent.RAID_ENCODING_PARTIAL_STRIPE_ENCODED).get() == 0 &&
args[0] instanceof FSDataOutputStream) {
FSDataOutputStream stream = (FSDataOutputStream)args[0];
// Skip the first stripe
if (stream.getPos() == 0) {
return;
}
if (this.blockMissingCount.incrementAndGet() > 2) {
return;
}
Integer errorCount = 0;
synchronized(blockMissingMap) {
errorCount = blockMissingMap.get(stream);
if (errorCount == null) {
errorCount = 1;
} else {
errorCount++;
}
blockMissingMap.put(stream, errorCount);
}
if (errorCount <= 2) {
throw new BlockMissingException(stream.toString(),
event.toString(), stream.getPos());
}
}
} else {
throw new IOException(event.toString());
}
}
}
}
public class EncodingThread extends Thread {
public PolicyInfo policyInfo = null;
public boolean succeed = false;
public int retryNum = 0;
public EncodingCandidate encodingCandidate = null;
public IOException exp = null;
public int threadNum = 0;
public Configuration encodingConf = null;
public EncodingThread(PolicyInfo pi, EncodingCandidate newEC,
int newRetryNum, int newThreadNum, Configuration newConf) {
policyInfo = pi;
encodingCandidate = newEC;
retryNum = newRetryNum;
threadNum = newThreadNum;
encodingConf = newConf;
}
public void run() {
try {
succeed = DistRaidMapper.doRaid(retryNum, encodingCandidate.toString(),
encodingConf, policyInfo, new RaidNode.Statistics(), Reporter.NULL);
LOG.info("Finished Thread " + threadNum + " " + succeed);
} catch (IOException ioe) {
exp = ioe;
}
}
}
// Run one round of encoding job
// Each task could have multiple instances to simulate race conditions
public boolean runEncodingTasks(Configuration conf, Codec codec,
FileStatus stat, PolicyInfo info, int retryNum) throws Exception {
String jobId = RaidNode.getJobID(conf);
LOG.info("Set local raid job id: " + jobId);
List<EncodingCandidate> lec =
RaidNode.splitPaths(conf, codec, stat);
EncodingThread[] threads = new EncodingThread[lec.size()];
boolean succeed = false;
for (int i = 0; i < lec.size(); i++) {
threads[i] = new EncodingThread(info, lec.get(i), retryNum, i, conf);
}
for (EncodingThread et: threads) {
et.start();
}
for (EncodingThread et: threads) {
et.join();
succeed |= et.succeed;
if (et.exp != null) {
LOG.warn("Exception from Thread " + et.threadNum +
et.encodingCandidate + " :" + et.exp.getMessage());
}
}
return succeed;
}
public void testBlockMissingExceptionDuringEncoding() throws Exception {
LOG.info("Test testBlockMissingExceptionDuringEncoding started.");
createClusters(false);
Configuration newConf = new Configuration(conf);
// Make sure only one thread is running
newConf.setLong(RaidNode.RAID_ENCODING_STRIPES_KEY, 1000);
newConf.setLong("raid.encoder.bufsize", 4096);
RaidNode.createChecksumStore(newConf, true);
Path raidDir = new Path("/raidtest/1");
HashMap<Codec, Long[]> fileCRCs = new HashMap<Codec, Long[]>();
HashMap<Codec, Path> filePaths = new HashMap<Codec, Path>();
PolicyInfo info = new PolicyInfo();
info.setProperty("targetReplication", Integer.toString(targetReplication));
info.setProperty("metaReplication", Integer.toString(metaReplication));
try {
createTestFiles(raidDir, filePaths, fileCRCs, null);
LOG.info("Test testBlockMissingExceptionDuringEncoding created test files");
// create the InjectionHandler
for (Codec codec: Codec.getCodecs()) {
Path filePath = filePaths.get(codec);
FileStatus stat = fileSys.getFileStatus(filePath);
info.setCodecId(codec.id);
boolean succeed = false;
TestEncodingHandler h = new TestEncodingHandler(0.5,
null, true);
InjectionHandler.set(h);
succeed = runEncodingTasks(newConf, codec, stat, info, 100);
assertTrue("Block missing exceptions should be more than 0",
h.events.get(
InjectionEvent.RAID_ENCODING_FAILURE_BLOCK_MISSING).get() > 0);
assertEquals("No Encoding failure of partial parity size mismatch", 0,
h.events.get(
InjectionEvent.RAID_ENCODING_FAILURE_PARTIAL_PARITY_SIZE_MISMATCH
).get());
assertTrue("We should succeed", succeed);
if (!codec.isDirRaid) {
TestRaidDfs.waitForFileRaided(LOG, fileSys, filePath,
new Path(codec.parityDirectory,
RaidNode.makeRelative(filePath.getParent())),
targetReplication);
} else {
TestRaidDfs.waitForDirRaided(LOG, fileSys, filePath,
new Path(codec.parityDirectory,
RaidNode.makeRelative(raidDir)),
targetReplication);
}
TestRaidDfs.waitForReplicasReduction(fileSys, filePath,
targetReplication);
}
verifyCorrectness(raidDir, fileCRCs, null);
LOG.info("Test testBlockMissingExceptionDuringEncoding successful.");
} catch (Exception e) {
LOG.info("testBlockMissingExceptionDuringEncoding Exception ", e);
throw e;
} finally {
stopClusters();
}
LOG.info("Test testBlockMissingExceptionDuringEncoding completed.");
}
public void testRetryTask() throws Exception {
LOG.info("Test testRetryTask started.");
createClusters(false);
Configuration newConf = new Configuration(conf);
RaidNode.createChecksumStore(newConf, true);
Path raidDir = new Path("/raidtest/1");
HashMap<Codec, Long[]> fileCRCs = new HashMap<Codec, Long[]>();
HashMap<Codec, Path> filePaths = new HashMap<Codec, Path>();
HashMap<InjectionEventI, Double> specialFailProbs =
new HashMap<InjectionEventI, Double>();
PolicyInfo info = new PolicyInfo();
info.setProperty("targetReplication", Integer.toString(targetReplication));
info.setProperty("metaReplication", Integer.toString(metaReplication));
try {
createTestFiles(raidDir, filePaths, fileCRCs, null);
LOG.info("Test testRetryTask created test files");
for (Codec codec: Codec.getCodecs()) {
Path filePath = filePaths.get(codec);
FileStatus stat = fileSys.getFileStatus(filePath);
info.setCodecId(codec.id);
LOG.info("Codec: " + codec + ", Path: " + filePath +
" Sync every task to the finalize stage, " +
"all partial parity files are generated");
specialFailProbs.clear();
specialFailProbs.put(
InjectionEvent.RAID_ENCODING_FAILURE_CONCAT_FILE, 1.0);
specialFailProbs.put(
InjectionEvent.RAID_ENCODING_FAILURE_GET_SRC_STRIPES, 1.0);
specialFailProbs.put(
InjectionEvent.RAID_ENCODING_FAILURE_PUT_STRIPE, 1.0);
TestEncodingHandler h = new TestEncodingHandler(0.0,
specialFailProbs, false);
InjectionHandler.set(h);
assertEquals("Should succeed", true,
runEncodingTasks(newConf, codec, stat, info, 1000));
assertEquals("Only did two concats, one failed, one succeeded ", 2,
h.events.get(
InjectionEvent.RAID_ENCODING_FAILURE_CONCAT_FILE).get());
if (codec.isDirRaid) {
assertEquals("Only did two getSrcStripes, one failed, two succeeded",
2, h.events.get(
InjectionEvent.RAID_ENCODING_FAILURE_GET_SRC_STRIPES).get());
assertEquals("Only did two putStripes, one failed, one succeeded", 2,
h.events.get(InjectionEvent.RAID_ENCODING_FAILURE_PUT_STRIPE).get());
}
if (!codec.isDirRaid) {
TestRaidDfs.waitForFileRaided(LOG, fileSys, filePath,
new Path(codec.parityDirectory,
RaidNode.makeRelative(filePath.getParent())),
targetReplication);
} else {
TestRaidDfs.waitForDirRaided(LOG, fileSys, filePath,
new Path(codec.parityDirectory,
RaidNode.makeRelative(raidDir)),
targetReplication);
}
TestRaidDfs.waitForReplicasReduction(fileSys, filePath,
targetReplication);
}
verifyCorrectness(raidDir, fileCRCs, null);
LOG.info("Test testRetryTask successful.");
} catch (Exception e) {
LOG.info("testRetryTask Exception ", e);
throw e;
} finally {
stopClusters();
}
LOG.info("Test testRetryTask completed.");
}
public void testLargeFailureRateEncoding() throws Exception {
LOG.info("Test testLargeFailureRateEncoding started.");
createClusters(false);
Configuration newConf = new Configuration(conf);
RaidNode.createChecksumStore(newConf, true);
Path raidDir = new Path("/raidtest/1");
HashMap<Codec, Long[]> fileCRCs = new HashMap<Codec, Long[]>();
HashMap<Codec, Path> filePaths = new HashMap<Codec, Path>();
PolicyInfo info = new PolicyInfo();
info.setProperty("targetReplication", Integer.toString(targetReplication));
info.setProperty("metaReplication", Integer.toString(metaReplication));
try {
createTestFiles(raidDir, filePaths, fileCRCs, null);
LOG.info("Test testLargeFailureRateEncoding created test files");
// create the InjectionHandler
for (Codec codec: Codec.getCodecs()) {
Path filePath = filePaths.get(codec);
FileStatus stat = fileSys.getFileStatus(filePath);
info.setCodecId(codec.id);
boolean succeed = false;
TestEncodingHandler h = new TestEncodingHandler(0.5, null, false);
InjectionHandler.set(h);
succeed = runEncodingTasks(newConf, codec, stat, info, 100);
assertTrue("We should succeed", succeed);
if (!codec.isDirRaid) {
TestRaidDfs.waitForFileRaided(LOG, fileSys, filePath,
new Path(codec.parityDirectory,
RaidNode.makeRelative(filePath.getParent())),
targetReplication);
} else {
TestRaidDfs.waitForDirRaided(LOG, fileSys, filePath,
new Path(codec.parityDirectory,
RaidNode.makeRelative(raidDir)),
targetReplication);
}
TestRaidDfs.waitForReplicasReduction(fileSys, filePath,
targetReplication);
}
verifyCorrectness(raidDir, fileCRCs, null);
LOG.info("Test testLargeFailureRateEncoding successful.");
} catch (Exception e) {
LOG.info("testLargeFailureRateEncoding Exception ", e);
throw e;
} finally {
stopClusters();
}
LOG.info("Test testLargeFailureRateEncoding completed.");
}
@Override
public void testFileListPolicy() throws Exception {
}
}