/**
* Copyright 2011-2014 Asakusa Framework Team.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.asakusafw.operation.tools.hadoop.fs;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.Method;
import java.net.URI;
import java.sql.Date;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.apache.commons.cli.BasicParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.Tool;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* CLI for Hadoop FS cleaning tool.
* @since 0.4.0
*/
public class Clean extends Configured implements Tool {
static final Logger LOG = LoggerFactory.getLogger(Clean.class);
static final Option OPT_RECURSIVE;
static final Option OPT_DRY_RUN;
static final Option OPT_KEEP_DAYS;
private static final Options OPTIONS;
static {
OPT_RECURSIVE = new Option("r", "recursive", false, "remove recursively");
OPT_DRY_RUN = new Option("s", "dry-run", false, "do not delete actually");
OPT_KEEP_DAYS = new Option("k", "keep-days", true, "keep files lecent days");
OPTIONS = new Options();
OPTIONS.addOption(OPT_RECURSIVE);
OPTIONS.addOption(OPT_DRY_RUN);
OPTIONS.addOption(OPT_KEEP_DAYS);
}
private final long currentTime;
/**
* Creates a new instance.
*/
public Clean() {
this(System.currentTimeMillis());
}
Clean(long currentTime) {
this.currentTime = currentTime;
}
/**
* Program entry.
* @param args arguments
* @throws Exception if failed to execute command
*/
public static void main(String... args) throws Exception {
LOG.info("[OT-CLEAN-I00000] Start Hadoop FS cleaning tool");
long start = System.currentTimeMillis();
Tool tool = new Clean();
tool.setConf(new Configuration());
int exit = tool.run(args); // no generic options
long end = System.currentTimeMillis();
LOG.info(MessageFormat.format(
"[OT-CLEAN-I00999] Finish Hadoop FS cleaning tool (exit-code={0}, elapsed={1}ms)",
exit,
end - start));
if (exit != 0) {
System.exit(exit);
}
}
@Override
public int run(String[] args) {
if (args == null) {
throw new IllegalArgumentException("args must not be null"); //$NON-NLS-1$
}
Opts opts;
try {
opts = parseOptions(args);
if (opts == null) {
return 2;
}
} catch (Exception e) {
LOG.error(MessageFormat.format(
"[OT-CLEAN-E00001] Invalid options: {0}",
Arrays.toString(args)), e);
return 2;
}
long period = currentTime - (long) (opts.keepDays * TimeUnit.DAYS.toMillis(1));
if (LOG.isDebugEnabled()) {
LOG.debug("Keep switching-time: {}", new Date(period));
}
Context context = new Context(opts.recursive, period, opts.dryRun);
for (Path path : opts.paths) {
remove(path, context);
}
if (context.hasError()) {
return 1;
}
return 0;
}
private Opts parseOptions(String[] args) throws ParseException {
assert args != null;
if (LOG.isDebugEnabled()) {
LOG.debug("Parsing options: {}", Arrays.toString(args));
}
CommandLineParser parser = new BasicParser();
CommandLine cmd = parser.parse(OPTIONS, args);
boolean recursive = cmd.hasOption(OPT_RECURSIVE.getOpt());
String keepString = cmd.getOptionValue(OPT_KEEP_DAYS.getOpt());
boolean dryRun = cmd.hasOption(OPT_DRY_RUN.getOpt());
String[] rest = cmd.getArgs();
if (keepString == null) {
LOG.error(MessageFormat.format(
"[OT-CLEAN-E00001] Missing option: -{0}",
OPT_KEEP_DAYS.getLongOpt()));
return null;
}
if (rest == null) {
rest = new String[0];
}
LOG.debug("Option {}: {}", OPT_RECURSIVE.getLongOpt(), recursive);
double keepDays;
try {
keepDays = Double.parseDouble(keepString);
} catch (NumberFormatException e) {
LOG.error(MessageFormat.format(
"[OT-CLEAN-E00001] -{0} must be a number: {1}",
OPT_KEEP_DAYS.getLongOpt(),
keepString));
return null;
}
LOG.debug("Option {}: {}", OPT_KEEP_DAYS.getLongOpt(), keepDays);
LOG.debug("Option {}: {}", OPT_DRY_RUN.getLongOpt(), dryRun);
List<Path> paths = new ArrayList<Path>();
for (String pathString : rest) {
if (pathString.trim().isEmpty()) {
continue;
}
try {
Path path = new Path(pathString);
paths.add(path);
LOG.debug("Option --: {}", path);
} catch (RuntimeException e) {
LOG.error(MessageFormat.format(
"[OT-CLEAN-E00001] Invalid target path: {0}",
pathString), e);
return null;
}
}
if (paths.isEmpty()) {
LOG.error(MessageFormat.format(
"[OT-CLEAN-E00001] Missing target paths",
new Object[0]));
return null;
}
return new Opts(recursive, keepDays, dryRun, paths);
}
boolean remove(Path path, Context context) {
LOG.info(MessageFormat.format(
"[OT-CLEAN-I01000] Start cleaning: {0}",
path));
FileSystem fs;
try {
fs = FileSystem.get(path.toUri(), getConf());
} catch (Exception e) {
LOG.error(MessageFormat.format(
"[OT-CLEAN-E01001] Failed to connect to filesystem: {0}",
path), e);
context.setError();
return false;
}
List<FileStatus> files;
try {
files = asList(fs.globStatus(path));
} catch (Exception e) {
LOG.error(MessageFormat.format(
"[OT-CLEAN-E01002] Failed to glob path pattern: {0}",
path), e);
context.setError();
return false;
}
if (files.isEmpty()) {
LOG.warn(MessageFormat.format(
"[OT-CLEAN-W01001] Target file is not found: {0}",
path));
context.setError();
return false;
}
boolean removed = true;
long start = System.currentTimeMillis();
for (FileStatus file : files) {
removed &= remove(fs, file, context);
}
long end = System.currentTimeMillis();
LOG.info(MessageFormat.format(
"[OT-CLEAN-I01999] Finish cleaning: {0} (all-removed={1}, elapsed={2}ms)",
path,
removed,
end - start));
return removed;
}
private boolean remove(FileSystem fs, FileStatus file, Context context) {
LOG.debug("Attempt to remove {}", file.getPath());
boolean isSymlink = context.isSymlink(fs, file);
if (isSymlink) {
LOG.error(MessageFormat.format(
"[OT-CLEAN-W01001] Symlink is currenty not supported: {0}",
file.getPath()));
context.setError();
return false;
}
if (file.isDir()) {
if (context.isRecursive()) {
List<FileStatus> children;
try {
children = asList(fs.listStatus(file.getPath()));
} catch (IOException e) {
LOG.error(MessageFormat.format(
"[OT-CLEAN-E01003] Failed to list directory: {0}",
file.getPath()), e);
context.setError();
return false;
}
boolean deleteChildren = true;
for (FileStatus child : children) {
deleteChildren &= remove(fs, child, context);
}
if (deleteChildren == false) {
LOG.info(MessageFormat.format(
"[OT-CLEAN-I01004] Skipped: {0} (is no-empty directory)",
file.getPath(),
new Date(file.getModificationTime())));
return false;
}
} else {
LOG.info(MessageFormat.format(
"[OT-CLEAN-I01003] Skipped: {0} (is directory)",
file.getPath(),
new Date(file.getModificationTime())));
return false;
}
}
if (context.canDelete(file)) {
LOG.debug("Removing {}", file.getPath());
if (context.isDryRun() == false) {
try {
boolean removed = fs.delete(file.getPath(), false);
if (removed == false) {
LOG.error(MessageFormat.format(
"[OT-CLEAN-E01004] Failed to remove: {0}",
file.getPath()));
context.setError();
return false;
}
} catch (IOException e) {
LOG.warn(MessageFormat.format(
"[OT-CLEAN-E01004] Failed to remove: {0}",
file.getPath()), e);
context.setError();
return false;
}
}
LOG.info(MessageFormat.format(
"[OT-CLEAN-I01001] Removed: {0} (timestamp={1})",
file.getPath(),
new Date(file.getModificationTime())));
} else {
LOG.info(MessageFormat.format(
"[OT-CLEAN-I01002] Kept: {0} (timestamp={1})",
file.getPath(),
new Date(file.getModificationTime())));
return false;
}
return true;
}
private List<FileStatus> asList(FileStatus[] files) {
if (files == null) {
return Collections.emptyList();
} else {
return Arrays.asList(files);
}
}
private static final class Opts {
final boolean recursive;
final double keepDays;
final boolean dryRun;
final List<Path> paths;
public Opts(boolean recursive, double keepDays, boolean dryRun, List<Path> paths) {
this.recursive = recursive;
this.keepDays = keepDays;
this.dryRun = dryRun;
this.paths = paths;
}
}
private static final class Context {
private final boolean recursive;
private final long keepPeriod;
private final boolean dryRun;
private boolean sawError;
private static final Method FILE_STATUS_IS_SYMLINK;
static {
Method m;
try {
m = FileStatus.class.getMethod("isSymlink");
} catch (Exception e) {
m = null;
LOG.debug("FileStatus.isSymlink does not supported");
}
FILE_STATUS_IS_SYMLINK = m;
}
public Context(boolean recursive, long keepPeriod, boolean dryRun) {
this.recursive = recursive;
this.keepPeriod = keepPeriod;
this.dryRun = dryRun;
this.sawError = false;
}
public boolean isSymlink(FileSystem fs, FileStatus file) {
try {
return isSymlink0(fs, file);
} catch (Exception e) {
if (LOG.isDebugEnabled()) {
LOG.debug(MessageFormat.format(
"Failed to resolve symlink",
file.getPath()), e);
}
return false;
}
}
private boolean isSymlink0(FileSystem fs, FileStatus file) throws IOException {
assert fs != null;
assert file != null;
URI uri = file.getPath().toUri();
if (uri.getScheme() == null) {
uri = fs.makeQualified(file.getPath()).toUri();
if (uri == null) {
return false;
}
}
// NOTE: It seems that Hadoop 2.0 LocalFileSystem still does not support symlink.
if (uri.getScheme().equals("file")) {
File f = new File(uri);
File c = f.getCanonicalFile();
if (f.equals(c)) {
return false;
}
if (f.getName().equals(c.getName()) == false) {
return true;
}
File p = f.getParentFile().getCanonicalFile();
if (p.equals(c.getParentFile()) == false) {
return true;
}
} else if (FILE_STATUS_IS_SYMLINK != null) {
try {
return Boolean.TRUE.equals(FILE_STATUS_IS_SYMLINK.invoke(file));
} catch (Exception e) {
if (LOG.isDebugEnabled()) {
LOG.debug(MessageFormat.format(
"Failed to invoke {0}({1})",
FILE_STATUS_IS_SYMLINK.getName(),
file.getPath()), e);
}
return false;
}
}
return false;
}
public boolean isRecursive() {
return recursive;
}
public boolean isDryRun() {
return dryRun;
}
public boolean canDelete(FileStatus file) {
long lastModified = file.getModificationTime();
return lastModified < keepPeriod;
}
public void setError() {
this.sawError = true;
}
public boolean hasError() {
return this.sawError;
}
}
}