/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import java.io.IOException;
import java.io.FileNotFoundException;
import java.io.InterruptedIOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Map;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.Executors;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import java.net.InetSocketAddress;
import java.net.SocketException;
import javax.security.auth.login.LoginException;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.hadoop.ipc.*;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.io.retry.RetryPolicy;
import org.apache.hadoop.io.retry.RetryPolicies;
import org.apache.hadoop.io.retry.RetryProxy;
import org.apache.hadoop.security.UnixUserGroupInformation;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.HarFileSystem;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.DistributedRaidFileSystem;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.raid.StripeReader.LocationPair;
import org.apache.hadoop.raid.protocol.PolicyInfo;
import org.apache.hadoop.raid.protocol.RaidProtocol;
import org.xml.sax.SAXException;
import java.util.concurrent.atomic.*;
/**
* A {@link RaidShell} that allows browsing configured raid policies.
*/
public class RaidShell extends Configured implements Tool {
static {
Configuration.addDefaultResource("hdfs-default.xml");
Configuration.addDefaultResource("hdfs-site.xml");
Configuration.addDefaultResource("raid-default.xml");
Configuration.addDefaultResource("raid-site.xml");
}
public static final Log LOG = LogFactory.getLog( "org.apache.hadoop.RaidShell");
public RaidProtocol raidnode;
RaidProtocol rpcRaidnode;
private UnixUserGroupInformation ugi;
volatile boolean clientRunning = true;
private Configuration conf;
AtomicInteger corruptCounter = new AtomicInteger();
AtomicLongArray numStrpMissingBlks =
new AtomicLongArray(Codec.getCodec("rs").stripeLength+Codec.getCodec("rs").parityLength);
private final PrintStream out;
final static private String DistRaidCommand = "-distRaid";
/**
* Start RaidShell.
* <p>
* The RaidShell connects to the specified RaidNode and performs basic
* configuration options.
* @throws IOException
*/
public RaidShell(Configuration conf) throws IOException {
super(conf);
this.conf = conf;
this.out = System.out;
}
public RaidShell(Configuration conf, PrintStream out) throws IOException {
super(conf);
this.conf = conf;
this.out = out;
}
void initializeRpc(Configuration conf, InetSocketAddress address) throws IOException {
try {
this.ugi = UnixUserGroupInformation.login(conf, true);
} catch (LoginException e) {
throw (IOException)(new IOException().initCause(e));
}
this.rpcRaidnode = createRPCRaidnode(address, conf, ugi);
this.raidnode = createRaidnode(rpcRaidnode);
}
void initializeLocal(Configuration conf) throws IOException {
try {
this.ugi = UnixUserGroupInformation.login(conf, true);
} catch (LoginException e) {
throw (IOException)(new IOException().initCause(e));
}
}
public static RaidProtocol createRaidnode(Configuration conf) throws IOException {
return createRaidnode(RaidNode.getAddress(conf), conf);
}
public static RaidProtocol createRaidnode(InetSocketAddress raidNodeAddr,
Configuration conf) throws IOException {
try {
return createRaidnode(createRPCRaidnode(raidNodeAddr, conf,
UnixUserGroupInformation.login(conf, true)));
} catch (LoginException e) {
throw (IOException)(new IOException().initCause(e));
}
}
private static RaidProtocol createRPCRaidnode(InetSocketAddress raidNodeAddr,
Configuration conf, UnixUserGroupInformation ugi)
throws IOException {
LOG.info("RaidShell connecting to " + raidNodeAddr);
return (RaidProtocol)RPC.getProxy(RaidProtocol.class,
RaidProtocol.versionID, raidNodeAddr, ugi, conf,
NetUtils.getSocketFactory(conf, RaidProtocol.class));
}
private static RaidProtocol createRaidnode(RaidProtocol rpcRaidnode)
throws IOException {
RetryPolicy createPolicy = RetryPolicies.retryUpToMaximumCountWithFixedSleep(
5, 5000, TimeUnit.MILLISECONDS);
Map<Class<? extends Exception>,RetryPolicy> remoteExceptionToPolicyMap =
new HashMap<Class<? extends Exception>, RetryPolicy>();
Map<Class<? extends Exception>,RetryPolicy> exceptionToPolicyMap =
new HashMap<Class<? extends Exception>, RetryPolicy>();
exceptionToPolicyMap.put(RemoteException.class,
RetryPolicies.retryByRemoteException(
RetryPolicies.TRY_ONCE_THEN_FAIL, remoteExceptionToPolicyMap));
RetryPolicy methodPolicy = RetryPolicies.retryByException(
RetryPolicies.TRY_ONCE_THEN_FAIL, exceptionToPolicyMap);
Map<String,RetryPolicy> methodNameToPolicyMap = new HashMap<String,RetryPolicy>();
methodNameToPolicyMap.put("create", methodPolicy);
return (RaidProtocol) RetryProxy.create(RaidProtocol.class,
rpcRaidnode, methodNameToPolicyMap);
}
private void checkOpen() throws IOException {
if (!clientRunning) {
IOException result = new IOException("RaidNode closed");
throw result;
}
}
/**
* Close the connection to the raidNode.
*/
public synchronized void close() throws IOException {
if(clientRunning) {
clientRunning = false;
RPC.stopProxy(rpcRaidnode);
}
}
/**
* Displays format of commands.
*/
private static void printUsage(String cmd) {
String prefix = "Usage: java " + RaidShell.class.getSimpleName();
if ("-showConfig".equals(cmd)) {
System.err.println("Usage: java RaidShell" +
" [-showConfig]");
} else if ("-recover".equals(cmd)) {
System.err.println("Usage: java RaidShell" +
" [-recover srcPath1 corruptOffset]");
} else if ("-recoverBlocks".equals(cmd)) {
System.err.println("Usage: java RaidShell" +
" [-recoverBlocks path1 path2...]");
} else if ("-raidFile".equals(cmd)) {
System.err.println(
"Usage: java RaidShell -raidFile <path-to-file> <path-to-raidDir> <XOR|RS>");
} else if (DistRaidCommand.equals(cmd)) {
System.err.println("Usage: java RaidShell " + DistRaidCommand
+ " <raid_policy_name> <path1> ... <pathn>");
} else if ("-fsck".equals(cmd)) {
System.err.println("Usage: java RaidShell [-fsck [path [-threads numthreads] [-count]] [-retNumStrpsMissingBlksRS]]]");
} else if ("-usefulHar".equals(cmd)) {
System.err.println("Usage: java RaidShell [-usefulHar <XOR|RS> [path-to-raid-har]]");
} else if ("-checkFile".equals(cmd)) {
System.err.println("Usage: java RaidShell [-checkFile path]");
} else if ("-purgeParity".equals(cmd)) {
System.err.println("Usage: java RaidShell -purgeParity path <XOR|RS>");
} else if ("-checkParity".equals(cmd)) {
System.err.println("Usage: java RaidShell [-checkParity path]");
} else if ("-findMissingParityFiles".equals(cmd)) {
System.err.println("Usage: java RaidShell -findMissingParityFiles [-r] rootPath");
} else {
System.err.println("Usage: java RaidShell");
System.err.println(" [-showConfig ]");
System.err.println(" [-help [cmd]]");
System.err.println(" [-recover srcPath1 corruptOffset]");
System.err.println(" [-recoverBlocks path1 path2...]");
System.err.println(" [-raidFile <path-to-file> <path-to-raidDir> <XOR|RS>");
System.err.println(" [" + DistRaidCommand
+ " <raid_policy_name> <path1> ... <pathn>]");
System.err.println(" [-fsck [path [-threads numthreads] [-count]] [-retNumStrpsMissingBlksRS]]");
System.err.println(" [-usefulHar <XOR|RS> [path-to-raid-har]]");
System.err.println(" [-checkFile path]");
System.err.println(" [-purgeParity path <XOR|RS>]");
System.err.println(" [-findMissingParityFiles [-r] RrootPath]");
System.err.println(" [-checkParity path]");
System.err.println();
ToolRunner.printGenericCommandUsage(System.err);
}
}
/**
* run
*/
public int run(String argv[]) throws Exception {
if (argv.length < 1) {
printUsage("");
return -1;
}
int exitCode = -1;
int i = 0;
String cmd = argv[i++];
//
// verify that we have enough command line parameters
//
if ("-showConfig".equals(cmd)) {
if (argv.length < 1) {
printUsage(cmd);
return exitCode;
}
} else if ("-recover".equals(cmd)) {
if (argv.length < 3) {
printUsage(cmd);
return exitCode;
}
} else if ("-fsck".equals(cmd)) {
if ((argv.length < 1) || (argv.length > 5)) {
printUsage(cmd);
return exitCode;
}
} else if (DistRaidCommand.equals(cmd)) {
if (argv.length < 3) {
printUsage(cmd);
return exitCode;
}
}
try {
if ("-showConfig".equals(cmd)) {
initializeRpc(conf, RaidNode.getAddress(conf));
exitCode = showConfig(cmd, argv, i);
} else if ("-recover".equals(cmd)) {
initializeRpc(conf, RaidNode.getAddress(conf));
exitCode = recoverAndPrint(cmd, argv, i);
} else if ("-recoverBlocks".equals(cmd)) {
initializeLocal(conf);
recoverBlocks(argv, i);
exitCode = 0;
} else if ("-raidFile".equals(cmd)) {
initializeLocal(conf);
raidFile(argv, i);
exitCode = 0;
} else if (DistRaidCommand.equals(cmd)) {
initializeLocal(conf);
distRaid(argv, i);
exitCode = 0;
} else if ("-fsck".equals(cmd)) {
fsck(cmd, argv, i);
exitCode = 0;
} else if ("-usefulHar".equals(cmd)) {
usefulHar(argv, i);
exitCode = 0;
} else if ("-checkFile".equals(cmd)) {
checkFile(cmd, argv, i);
exitCode = 0;
} else if ("-purgeParity".equals(cmd)) {
purgeParity(cmd, argv, i);
exitCode = 0;
} else if ("-checkParity".equals(cmd)) {
checkParity(cmd, argv, i);
exitCode = 0;
} else if ("-findMissingParityFiles".equals(cmd)) {
findMissingParityFiles(argv, i);
exitCode = 0;
} else {
exitCode = -1;
System.err.println(cmd.substring(1) + ": Unknown command");
printUsage("");
}
} catch (IllegalArgumentException arge) {
exitCode = -1;
System.err.println(cmd.substring(1) + ": " + arge);
printUsage(cmd);
} catch (RemoteException e) {
//
// This is a error returned by raidnode server. Print
// out the first line of the error mesage, ignore the stack trace.
exitCode = -1;
try {
String[] content;
content = e.getLocalizedMessage().split("\n");
System.err.println(cmd.substring(1) + ": " +
content[0]);
} catch (Exception ex) {
System.err.println(cmd.substring(1) + ": " +
ex.getLocalizedMessage());
}
} catch (Exception e) {
exitCode = -1;
LOG.error(cmd.substring(1) + ": ", e);
}
return exitCode;
}
/**
* Find the files that do not have a corresponding parity file and have replication
* factor less that 3
* args[] contains the root where we need to check
*/
private void findMissingParityFiles(String[] args, int startIndex) {
boolean restoreReplication = false;
Path root = null;
for (int i = startIndex; i < args.length; i++) {
String arg = args[i];
if (arg.equals("-r")) {
restoreReplication = true;
} else {
root = new Path(arg);
}
}
if (root == null) {
throw new IllegalArgumentException("Too few arguments");
}
try {
FileSystem fs = root.getFileSystem(conf);
// Make sure default uri is the same as root
conf.set(FileSystem.FS_DEFAULT_NAME_KEY, fs.getUri().toString());
MissingParityFiles mParFiles = new MissingParityFiles(conf, restoreReplication);
mParFiles.findMissingParityFiles(root, System.out);
} catch (IOException ex) {
System.err.println("findMissingParityFiles: " + ex);
}
}
/**
* Apply operation specified by 'cmd' on all parameters
* starting from argv[startindex].
*/
private int showConfig(String cmd, String argv[], int startindex) throws IOException {
int exitCode = 0;
int i = startindex;
PolicyInfo[] all = raidnode.getAllPolicies();
for (PolicyInfo p: all) {
out.println(p);
}
return exitCode;
}
/**
* Recovers the specified path from the parity file
*/
public Path[] recover(String cmd, String argv[], int startindex)
throws IOException {
Path[] paths = new Path[(argv.length - startindex) / 2];
int j = 0;
for (int i = startindex; i < argv.length; i = i + 2) {
String path = argv[i];
long corruptOffset = Long.parseLong(argv[i+1]);
LOG.info("RaidShell recoverFile for " + path + " corruptOffset " + corruptOffset);
Path recovered = new Path("/tmp/recovered." + System.currentTimeMillis());
FileSystem fs = recovered.getFileSystem(conf);
DistributedFileSystem dfs = (DistributedFileSystem)fs;
Configuration raidConf = new Configuration(conf);
raidConf.set("fs.hdfs.impl",
"org.apache.hadoop.hdfs.DistributedRaidFileSystem");
raidConf.set("fs.raid.underlyingfs.impl",
"org.apache.hadoop.hdfs.DistributedFileSystem");
raidConf.setBoolean("fs.hdfs.impl.disable.cache", true);
java.net.URI dfsUri = dfs.getUri();
FileSystem raidFs = FileSystem.get(dfsUri, raidConf);
FileUtil.copy(raidFs, new Path(path), fs, recovered, false, conf);
paths[j] = recovered;
LOG.info("Raidshell created recovery file " + paths[j]);
j++;
}
return paths;
}
public int recoverAndPrint(String cmd, String argv[], int startindex)
throws IOException {
int exitCode = 0;
for (Path p : recover(cmd,argv,startindex)) {
out.println(p);
}
return exitCode;
}
public void recoverBlocks(String[] args, int startIndex)
throws IOException, InterruptedException {
LOG.info("Recovering blocks for " + (args.length - startIndex) + " files");
BlockReconstructor.CorruptBlockReconstructor fixer = new BlockReconstructor.CorruptBlockReconstructor(conf);
for (int i = startIndex; i < args.length; i++) {
String path = args[i];
fixer.reconstructFile(new Path(path), null);
}
}
/**
* Submit a map/reduce job to raid the input paths
* @param args all input parameters
* @param startIndex staring index of arguments: policy_name path1, ..., pathn
* @return 0 if successful
* @throws IOException if any error occurs
* @throws ParserConfigurationException
* @throws ClassNotFoundException
* @throws RaidConfigurationException
* @throws SAXException
*/
private int distRaid(String[] args, int startIndex) throws IOException,
SAXException, RaidConfigurationException,
ClassNotFoundException, ParserConfigurationException {
// find the matched raid policy
String policyName = args[startIndex++];
ConfigManager configManager = new ConfigManager(conf);
PolicyInfo policy = configManager.getPolicy(policyName);
if (policy == null) {
System.err.println ("Invalid policy: " + policyName);
return -1;
}
Codec codec = Codec.getCodec(policy.getCodecId());
if (codec == null) {
System.err.println("Policy " + policyName
+ " with invalid codec " + policy.getCodecId());
}
// find the matched paths to raid
FileSystem fs = FileSystem.get(conf);
List<FileStatus> pathsToRaid = new ArrayList<FileStatus>();
List<Path> policySrcPaths = policy.getSrcPathExpanded();
for (int i = startIndex; i< args.length; i++) {
boolean invalidPathToRaid = true;
Path pathToRaid = new Path(args[i]).makeQualified(fs);
String pathToRaidStr = pathToRaid.toString();
if (!pathToRaidStr.endsWith(Path.SEPARATOR)) {
pathToRaidStr = pathToRaidStr.concat(Path.SEPARATOR);
}
for (Path srcPath : policySrcPaths) {
String srcStr = srcPath.toString();
if (!srcStr.endsWith(Path.SEPARATOR)) {
srcStr = srcStr.concat(Path.SEPARATOR);
}
if (pathToRaidStr.startsWith(srcStr)) {
if (codec.isDirRaid) {
FileUtil.listStatusForLeafDir(
fs, fs.getFileStatus(pathToRaid), pathsToRaid);
} else {
FileUtil.listStatusHelper(fs, pathToRaid,
Integer.MAX_VALUE, pathsToRaid);
}
invalidPathToRaid = false;
break;
}
}
if (invalidPathToRaid) {
System.err.println("Path " + pathToRaidStr +
" does not support by the given policy " + policyName);
}
}
// Check if files are valid
List<FileStatus> validPaths = new ArrayList<FileStatus>();
List<PolicyInfo> policyInfos = new ArrayList<PolicyInfo>(1);
policyInfos.add(policy);
RaidState.Checker checker = new RaidState.Checker(
policyInfos, conf);
long now = System.currentTimeMillis();
for (FileStatus fileStatus : pathsToRaid) {
FileStatus[] dirStats = null;
if (codec.isDirRaid) {
dirStats = fs.listStatus(fileStatus.getPath());
}
RaidState stat = checker.check(
policy, fileStatus, now, false,
dirStats == null ? null : Arrays.asList(dirStats));
if (stat == RaidState.NOT_RAIDED_BUT_SHOULD) {
validPaths.add(fileStatus);
} else {
System.err.println("Path " + fileStatus.getPath() +
" is not qualified for raiding: " + stat);
}
}
if (validPaths.isEmpty()) {
System.err.println("No file can be raided");
return 0;
}
DistRaid dr = new DistRaid(conf);
//add paths for distributed raiding
dr.addRaidPaths(policy, validPaths);
if (dr.startDistRaid()) {
System.out.println("Job started: " + dr.getJobTrackingURL());
System.out.print("Job in progress ");
while (!dr.checkComplete()) {
try {
System.out.print(".");
Thread.sleep(1000);
} catch (InterruptedException e) {
throw new InterruptedIOException("Got interrupted.");
}
}
if (dr.successful()) {
System.out.println("/nFiles are successfully raided.");
return 0;
} else {
System.err.println("/nRaid job failed.");
return -1;
}
}
return -1;
}
public void raidFile(String[] args, int startIndex) throws IOException {
Path file = new Path(args[startIndex]);
Path destPath = new Path(args[startIndex + 1]);
Codec codec = Codec.getCodec(args[startIndex + 2]);
LOG.info("Raiding file " + file + " to " + destPath + " using " + codec);
FileSystem fs = destPath.getFileSystem(conf);
FileStatus stat = fs.getFileStatus(file);
boolean doSimulate = false;
int targetRepl = conf.getInt("raidshell.raidfile.targetrepl",
stat.getReplication());
int metaRepl = conf.getInt("raidshell.raidfile.metarepl", 2);
RaidNode.doRaid(conf, stat, destPath, codec, new RaidNode.Statistics(),
RaidUtils.NULL_PROGRESSABLE, doSimulate, targetRepl, metaRepl);
}
void collectFileCorruptBlocksInStripe(final DistributedFileSystem dfs,
final RaidInfo raidInfo, final Path filePath,
final HashMap<Integer, Integer> corruptBlocksPerStripe)
throws IOException {
// read conf
final int stripeBlocks = raidInfo.codec.stripeLength;
// figure out which blocks are missing/corrupted
final FileStatus fileStatus = dfs.getFileStatus(filePath);
final long blockSize = fileStatus.getBlockSize();
final long fileLength = fileStatus.getLen();
final long fileLengthInBlocks = RaidNode.numBlocks(fileStatus);
final long fileStripes = RaidNode.numStripes(fileLengthInBlocks,
stripeBlocks);
final BlockLocation[] fileBlocks =
dfs.getFileBlockLocations(fileStatus, 0, fileLength);
// figure out which stripes these corrupted blocks belong to
for (BlockLocation fileBlock: fileBlocks) {
int blockNo = (int) (fileBlock.getOffset() / blockSize);
final int stripe = blockNo / stripeBlocks;
if (this.isBlockCorrupt(fileBlock)) {
this.incCorruptBlocksPerStripe(corruptBlocksPerStripe, stripe);
if (LOG.isDebugEnabled()) {
LOG.debug("file " + filePath.toString() + " corrupt in block " +
blockNo + "/" + fileLengthInBlocks + ", stripe " + stripe +
"/" + fileStripes);
}
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("file " + filePath.toString() + " OK in block " + blockNo +
"/" + fileLengthInBlocks + ", stripe " + stripe + "/" +
fileStripes);
}
}
}
checkParityBlocks(filePath, corruptBlocksPerStripe, blockSize, 0, fileStripes,
fileStripes, raidInfo);
}
void collectDirectoryCorruptBlocksInStripe(final DistributedFileSystem dfs,
final RaidInfo raidInfo, final Path filePath,
HashMap<Integer, Integer> corruptBlocksPerStripe)
throws IOException {
final int stripeSize = raidInfo.codec.stripeLength;
final FileStatus fileStatus = dfs.getFileStatus(filePath);
final BlockLocation[] fileBlocks =
dfs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
LocationPair lp = StripeReader.getBlockLocation(raidInfo.codec, dfs,
filePath, 0, conf);
int startBlockIdx = lp.getStripeIdx() * stripeSize +
lp.getBlockIdxInStripe();
int startStripeIdx = lp.getStripeIdx();
int endStripeIdx = (startBlockIdx + fileBlocks.length) / stripeSize;
long blockSize = DirectoryStripeReader.getParityBlockSize(conf,
lp.getListFileStatus());
long numBlocks = DirectoryStripeReader.getBlockNum(lp.getListFileStatus());
HashMap<Integer, Integer> allCorruptBlocksPerStripe =
new HashMap<Integer, Integer>();
checkParityBlocks(filePath, allCorruptBlocksPerStripe, blockSize,
startStripeIdx, endStripeIdx,
RaidNode.numStripes(numBlocks, stripeSize), raidInfo);
DirectoryStripeReader sReader = (DirectoryStripeReader)
StripeReader.getStripeReader(raidInfo.codec,
conf, blockSize, dfs, lp.getStripeIdx(), fileStatus);
// Get the corrupt block information for all stripes related to the file
while (sReader.getCurStripeIdx() <= endStripeIdx) {
int stripe = (int)sReader.getCurStripeIdx();
BlockLocation[] bls = sReader.getNextStripeBlockLocations();
for (BlockLocation bl : bls) {
if (this.isBlockCorrupt(bl)) {
this.incCorruptBlocksPerStripe(allCorruptBlocksPerStripe,
stripe);
}
}
}
// figure out which stripes these corrupted blocks belong to
for (BlockLocation fileBlock: fileBlocks) {
int blockNo = startBlockIdx + (int) (fileBlock.getOffset() /
fileStatus.getBlockSize());
final int stripe = blockNo / stripeSize;
if (this.isBlockCorrupt(fileBlock)) {
corruptBlocksPerStripe.put(stripe, allCorruptBlocksPerStripe.get(stripe));
if (LOG.isDebugEnabled()) {
LOG.debug("file " + filePath.toString() + " corrupt in block " +
blockNo + ", stripe " + stripe);
}
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("file " + filePath.toString() + " OK in block " +
blockNo + ", stripe " + stripe);
}
}
}
}
/**
* checks whether a file has more than the allowable number of
* corrupt blocks and must therefore be considered corrupt
*/
protected boolean isFileCorrupt(final DistributedFileSystem dfs,
final Path filePath)
throws IOException {
return isFileCorrupt(dfs, filePath, false);
}
/**
*
* @param dfs
* @param filePath
* @param CntMissingBlksPerStrp
* @return
* @throws IOException
*/
protected boolean isFileCorrupt(final DistributedFileSystem dfs,
final Path filePath,
final boolean CntMissingBlksPerStrp)
throws IOException {
try {
// corruptBlocksPerStripe:
// map stripe # -> # of corrupt blocks in that stripe (data + parity)
HashMap<Integer, Integer> corruptBlocksPerStripe =
new LinkedHashMap<Integer, Integer>();
boolean fileCorrupt = false;
RaidInfo raidInfo = getFileRaidInfo(filePath);
if (raidInfo.codec == null) {
// Couldn't find out the parity file, so the file is corrupt
return true;
}
if (raidInfo.codec.isDirRaid) {
collectDirectoryCorruptBlocksInStripe(dfs, raidInfo, filePath,
corruptBlocksPerStripe);
} else {
collectFileCorruptBlocksInStripe(dfs, raidInfo, filePath,
corruptBlocksPerStripe);
}
final int maxCorruptBlocksPerStripe = raidInfo.parityBlocksPerStripe;
for (int corruptBlocksInStripe: corruptBlocksPerStripe.values()) {
//detect if the file has any stripes which cannot be fixed by Raid
LOG.debug("file " + filePath.toString() +
" has corrupt blocks per Stripe value " +
corruptBlocksInStripe);
if (!fileCorrupt) {
if (corruptBlocksInStripe > maxCorruptBlocksPerStripe) {
fileCorrupt = true;
}
}
if(raidInfo.codec.id.equals("rs") & CntMissingBlksPerStrp) {
incrStrpMissingBlks(corruptBlocksInStripe-1);
}
}
return fileCorrupt;
} catch (SocketException e) {
// Re-throw network-related exceptions.
throw e;
} catch (IOException e) {
LOG.error("While trying to check isFileCorrupt " + filePath +
" got exception ", e);
return true;
}
}
/**
* holds raid type and parity file pair
*/
private class RaidInfo {
public RaidInfo(final Codec codec,
final ParityFilePair parityPair,
final int parityBlocksPerStripe) {
this.codec = codec;
this.parityPair = parityPair;
this.parityBlocksPerStripe = parityBlocksPerStripe;
}
public final Codec codec;
public final ParityFilePair parityPair;
public final int parityBlocksPerStripe;
}
/**
* returns the raid for a given file
*/
private RaidInfo getFileRaidInfo(final Path filePath)
throws IOException {
// now look for the parity file
ParityFilePair ppair = null;
for (Codec c : Codec.getCodecs()) {
ppair = ParityFilePair.getParityFile(c, filePath, conf);
if (ppair != null) {
return new RaidInfo(c, ppair, c.parityLength);
}
}
return new RaidInfo(null, ppair, 0);
}
/**
* gets the parity blocks corresponding to file
* returns the parity blocks in case of DFS
* and the part blocks containing parity blocks
* in case of HAR FS
*/
private BlockLocation[] getParityBlocks(final Path filePath,
final long blockSize,
final long numStripes,
final RaidInfo raidInfo)
throws IOException {
final String parityPathStr = raidInfo.parityPair.getPath().toUri().
getPath();
FileSystem parityFS = raidInfo.parityPair.getFileSystem();
// get parity file metadata
FileStatus parityFileStatus = parityFS.
getFileStatus(new Path(parityPathStr));
long parityFileLength = parityFileStatus.getLen();
if (parityFileLength != numStripes * raidInfo.parityBlocksPerStripe *
blockSize) {
throw new IOException("expected parity file of length" +
(numStripes * raidInfo.parityBlocksPerStripe *
blockSize) +
" but got parity file of length " +
parityFileLength);
}
BlockLocation[] parityBlocks =
parityFS.getFileBlockLocations(parityFileStatus, 0L, parityFileLength);
if (parityFS instanceof DistributedFileSystem ||
parityFS instanceof DistributedRaidFileSystem) {
long parityBlockSize = parityFileStatus.getBlockSize();
if (parityBlockSize != blockSize) {
throw new IOException("file block size is " + blockSize +
" but parity file block size is " +
parityBlockSize);
}
} else if (parityFS instanceof HarFileSystem) {
LOG.debug("HAR FS found");
} else {
LOG.warn("parity file system is not of a supported type");
}
return parityBlocks;
}
/**
* checks the parity blocks for a given file and modifies
* corruptBlocksPerStripe accordingly
*/
private void checkParityBlocks(final Path filePath,
final HashMap<Integer, Integer>
corruptBlocksPerStripe,
final long blockSize,
final long startStripeIdx,
final long endStripeIdx,
final long numStripes,
final RaidInfo raidInfo)
throws IOException {
// get the blocks of the parity file
// because of har, multiple blocks may be returned as one container block
BlockLocation[] containerBlocks = getParityBlocks(filePath, blockSize,
numStripes, raidInfo);
long parityStripeLength = blockSize *
((long) raidInfo.parityBlocksPerStripe);
long parityBlocksFound = 0L;
for (BlockLocation cb: containerBlocks) {
if (cb.getLength() % blockSize != 0) {
throw new IOException("container block size is not " +
"multiple of parity block size");
}
LOG.debug("found container with offset " + cb.getOffset() +
", length " + cb.getLength());
for (long offset = cb.getOffset();
offset < cb.getOffset() + cb.getLength();
offset += blockSize) {
long block = offset / blockSize;
int stripe = (int) (offset / parityStripeLength);
if (stripe < 0) {
// before the beginning of the parity file
continue;
}
if (stripe >= numStripes) {
// past the end of the parity file
break;
}
parityBlocksFound++;
if (stripe < startStripeIdx || stripe > endStripeIdx) {
continue;
}
if (this.isBlockCorrupt(cb)) {
LOG.info("parity file for " + filePath.toString() +
" corrupt in block " + block +
", stripe " + stripe + "/" + numStripes);
this.incCorruptBlocksPerStripe(corruptBlocksPerStripe, stripe);
} else {
LOG.debug("parity file for " + filePath.toString() +
" OK in block " + block +
", stripe " + stripe + "/" + numStripes);
}
}
}
long parityBlocksExpected = raidInfo.parityBlocksPerStripe * numStripes;
if (parityBlocksFound != parityBlocksExpected ) {
throw new IOException("expected " + parityBlocksExpected +
" parity blocks but got " + parityBlocksFound);
}
}
/**
* checks the raided file system, prints a list of corrupt files to
* this.out and returns the number of corrupt files.
* Also prints out the total number of files with at least one missing block.
* When called with '-retNumStrpsMissingBlksRS', also prints out number of stripes
* with certain number of blocks missing for files using the 'RS' codec.
*/
public void fsck(String cmd, String[] args, int startIndex) throws IOException {
final int numFsckArgs = args.length - startIndex;
int numThreads = 16;
String path = "/";
boolean argsOk = false;
boolean countOnly = false;
boolean MissingBlksPerStrpCnt = false;
if (numFsckArgs >= 1) {
argsOk = true;
path = args[startIndex];
}
for (int i = startIndex + 1; i < args.length; i++) {
if (args[i].equals("-threads")) {
numThreads = Integer.parseInt(args[++i]);
} else if (args[i].equals("-count")) {
countOnly = true;
} else if (args[i].equals("-retNumStrpsMissingBlksRS")) {
MissingBlksPerStrpCnt = true;
}
}
if (!argsOk) {
printUsage(cmd);
return;
}
System.err.println("Running RAID FSCK with " + numThreads +
" threads on " + path);
FileSystem fs = (new Path(path)).getFileSystem(conf);
// if we got a raid fs, get the underlying fs
if (fs instanceof DistributedRaidFileSystem) {
fs = ((DistributedRaidFileSystem) fs).getFileSystem();
}
// check that we have a distributed fs
if (!(fs instanceof DistributedFileSystem)) {
throw new IOException("expected DistributedFileSystem but got " +
fs.getClass().getName());
}
final DistributedFileSystem dfs = (DistributedFileSystem) fs;
// get a list of corrupted files (not considering parity blocks just yet)
// from the name node
// these are the only files we need to consider:
// if a file has no corrupted data blocks, it is OK even if some
// of its parity blocks are corrupted, so no further checking is
// necessary
System.err.println("Querying NameNode for list of corrupt files under " + path);
final String[] files = DFSUtil.getCorruptFiles(dfs, path);
final List<String> corruptFileCandidates = new LinkedList<String>();
for (final String f: files) {
// if this file is a parity file
// or if it does not start with the specified path,
// ignore it
boolean matched = false;
for (Codec c : Codec.getCodecs()) {
if (f.startsWith(c.getParityPrefix())) {
matched = true;
}
}
if (!matched) {
corruptFileCandidates.add(f);
}
}
// filter files marked for deletion
RaidUtils.filterTrash(conf, corruptFileCandidates);
//clear numStrpMissingBlks if missing blocks per stripe is to be counted
if (MissingBlksPerStrpCnt) {
for (int i = 0; i < numStrpMissingBlks.length(); i++) {
numStrpMissingBlks.set(i, 0);
}
}
System.err.println(
"Processing " + corruptFileCandidates.size() + " possibly corrupt files using " +
numThreads + " threads");
ExecutorService executor = null;
if (numThreads > 1) {
executor = Executors.newFixedThreadPool(numThreads);
}
final boolean finalCountOnly = countOnly;
final boolean finalMissingBlksPerStrpCnt = MissingBlksPerStrpCnt;
for (final String corruptFileCandidate: corruptFileCandidates) {
Runnable work = new Runnable() {
public void run() {
boolean corrupt = false;
try {
corrupt = isFileCorrupt(dfs, new Path(corruptFileCandidate),finalMissingBlksPerStrpCnt);
if (corrupt) {
incrCorruptCount();
if (!finalCountOnly) {
out.println(corruptFileCandidate);
}
}
} catch (IOException e) {
LOG.error("Error in processing " + corruptFileCandidate, e);
}
}
};
if (executor != null) {
executor.execute(work);
} else {
work.run();
}
}
if (executor != null) {
executor.shutdown(); // Waits for submitted tasks to finish.
try {
executor.awaitTermination(3600, TimeUnit.SECONDS);
} catch (InterruptedException e) {
}
}
if (countOnly) {
//Number of corrupt files (which cannot be fixed by Raid)
out.println(getCorruptCount());
LOG.info("Nubmer of corrupt files:" + getCorruptCount());
//Number of files with at least one missing block
out.println(corruptFileCandidates.size());
LOG.info("Number of files with at least one block missing/corrupt: "+corruptFileCandidates.size());
}
/*Number of stripes with missing blocks array:
* index 0: Number of stripes found with one block missing in this fsck
* index 1: Number of stripes found with two block missing in this fsck
* and so on
*/
if (MissingBlksPerStrpCnt)
for (int j = 0; j < numStrpMissingBlks.length() ; j++) {
long temp = numStrpMissingBlks.get(j);
out.println(temp);
LOG.info("Number of stripes with missing blocks at index "+ j + " is " + temp);
}
}
// For testing.
private void incrCorruptCount() {
corruptCounter.incrementAndGet();
}
// For testing.
int getCorruptCount() {
return corruptCounter.get();
}
private void incrStrpMissingBlks(int index){
numStrpMissingBlks.incrementAndGet(index);
}
long getStrpMissingBlks(int index){
return numStrpMissingBlks.get(index);
}
void usefulHar(String[] args, int startIndex) throws IOException {
if (args.length - startIndex < 2) {
printUsage("usefulHar");
throw new IllegalArgumentException("Too few arguments");
}
Codec codec = Codec.getCodec(args[startIndex]);
Path prefixPath = new Path(codec.parityDirectory);
String prefix = prefixPath.toUri().getPath();
FileSystem fs = new Path("/").getFileSystem(conf);
for (int i = startIndex + 1; i < args.length; i++) {
String harPath = args[i];
if (harPath.startsWith(prefix)) {
float usefulPercent =
PurgeMonitor.usefulHar(
codec, fs, fs, new Path(harPath), prefix, conf, null);
out.println("Useful percent of " + harPath + " " + usefulPercent);
} else {
System.err.println("Har " + harPath + " is not located in " +
prefix + ", ignoring");
}
}
}
public void checkFile(String cmd, String[] args, int startIndex)
throws IOException {
if (startIndex >= args.length) {
printUsage(cmd);
throw new IllegalArgumentException("Insufficient arguments");
}
for (int i = startIndex; i < args.length; i++) {
Path p = new Path(args[i]);
FileSystem fs = p.getFileSystem(conf);
// if we got a raid fs, get the underlying fs
if (fs instanceof DistributedRaidFileSystem) {
fs = ((DistributedRaidFileSystem) fs).getFileSystem();
}
// We should be able to cast at this point.
DistributedFileSystem dfs = (DistributedFileSystem) fs;
RemoteIterator<Path> corruptIt = dfs.listCorruptFileBlocks(p);
int count = 0;
while (corruptIt.hasNext()) {
count++;
Path corruptFile = corruptIt.next();
// Result of checking.
String result = null;
FileStatus stat = fs.getFileStatus(p);
if (stat.getReplication() < fs.getDefaultReplication()) {
RaidInfo raidInfo = getFileRaidInfo(corruptFile);
if (raidInfo.codec == null) {
result = "Below default replication but no parity file found";
} else {
boolean notRecoverable = isFileCorrupt(dfs, corruptFile);
if (notRecoverable) {
result = "Missing too many blocks to be recovered " +
"using parity file " + raidInfo.parityPair.getPath();
} else {
result = "Has missing blocks but can be read using parity file " +
raidInfo.parityPair.getPath();
}
}
} else {
result = "At default replication, not raided";
}
out.println("Result of checking " + corruptFile + " : " +
result);
}
out.println("Found " + count + " files with missing blocks");
}
}
public void purgeParity(String cmd, String[] args, int startIndex)
throws IOException {
if (startIndex + 1 >= args.length) {
printUsage(cmd);
throw new IllegalArgumentException("Insufficient arguments");
}
Path parityPath = new Path(args[startIndex]);
AtomicLong entriesProcessed = new AtomicLong(0);
System.err.println("Starting recursive purge of " + parityPath);
Codec codec = Codec.getCodec(args[startIndex + 1]);
FileSystem srcFs = parityPath.getFileSystem(conf);
if (srcFs instanceof DistributedRaidFileSystem) {
srcFs = ((DistributedRaidFileSystem)srcFs).getFileSystem();
}
FileSystem parityFs = srcFs;
String parityPrefix = codec.parityDirectory;
DirectoryTraversal obsoleteParityFileRetriever =
new DirectoryTraversal(
"Purge File ",
java.util.Collections.singletonList(parityPath),
parityFs,
new PurgeMonitor.PurgeParityFileFilter(conf, codec, srcFs, parityFs,
parityPrefix, null, entriesProcessed),
1,
false);
FileStatus obsolete = null;
while ((obsolete = obsoleteParityFileRetriever.next()) !=
DirectoryTraversal.FINISH_TOKEN) {
PurgeMonitor.performDelete(parityFs, obsolete.getPath(), false);
}
}
public void checkParity(String cmd, String[] args, int startIndex)
throws IOException {
if (startIndex >= args.length) {
printUsage(cmd);
throw new IllegalArgumentException("Insufficient arguments");
}
for (int i = startIndex; i < args.length; i++) {
Path p = new Path(args[i]);
ParityFilePair ppair = null;
int numParityFound = 0;
for (Codec c : Codec.getCodecs()) {
try {
ppair = ParityFilePair.getParityFile(c, p, conf);
if (ppair != null) {
System.out.println(c.id + " parity: " + ppair.getPath());
numParityFound += 1;
}
} catch (FileNotFoundException ignore) {
}
}
if (numParityFound == 0) {
System.out.println("No parity file found");
}
if (numParityFound > 1) {
System.out.println("Warning: multiple parity files found");
}
}
}
private boolean isBlockCorrupt(BlockLocation fileBlock)
throws IOException {
if (fileBlock == null)
// empty block
return false;
return fileBlock.isCorrupt() ||
(fileBlock.getNames().length == 0 && fileBlock.getLength() > 0);
}
private void incCorruptBlocksPerStripe(HashMap<Integer, Integer>
corruptBlocksPerStripe, int stripe) {
Integer value = corruptBlocksPerStripe.get(stripe);
if (value == null) {
value = 0;
}
corruptBlocksPerStripe.put(stripe, value + 1);
}
/**
* main() has some simple utility methods
*/
public static void main(String argv[]) throws Exception {
RaidShell shell = null;
try {
shell = new RaidShell(new Configuration());
int res = ToolRunner.run(shell, argv);
System.exit(res);
} catch (RPC.VersionMismatch v) {
System.err.println("Version Mismatch between client and server" +
"... command aborted.");
System.exit(-1);
} catch (IOException e) {
System.err.
println("Bad connection to RaidNode or NameNode. command aborted.");
System.err.println(e.getMessage());
System.exit(-1);
} finally {
shell.close();
}
}
}