@Test
public void execute() throws Exception {
writeFiles(true, true, true);
Crush crush = new Crush();
ToolRunner.run(job, crush, new String [] {
"--threshold=0.015",
"--max-file-blocks=1",
"--verbose",
"--regex=.+/other",
"--replacement=${crush.timestamp}-${crush.task.num}-middle-${crush.file.num}-tail",
"--input-format=" + SequenceFileInputFormat.class.getName(),
"--output-format=" + TextOutputFormat.class.getName(),
"--regex=.+/dir",
"--replacement=secondregex-${crush.timestamp}-${crush.task.num}-${crush.file.num}",
"--input-format=" + TextInputFormat.class.getName(),
"--output-format=" + SequenceFileOutputFormat.class.getName(),
"--regex=.+/dir/([^/]+/)*(.+)",
"--replacement=thirdregex-$2-${crush.timestamp}-${crush.task.num}-${crush.file.num}",
"--input-format=" + SequenceFileInputFormat.class.getName(),
"--output-format=" + SequenceFileOutputFormat.class.getName(),
"--regex=.+/text",
"--replacement=fourthregex-${crush.task.num}-${crush.timestamp}-${crush.file.num}",
"--input-format=" + TextInputFormat.class.getName(),
"--output-format=" + TextOutputFormat.class.getName(),
/*
* This is the default regex and replacement, which we add last so we can exercise the default logic.
*/
"--regex=.+",
"--replacement=crushed_file-${crush.timestamp}-${crush.task.num}-${crush.file.num}",
"--input-format=" + SequenceFileInputFormat.class.getName(),
"--output-format=" + TextOutputFormat.class.getName(),
"--compress=" + CustomCompressionCodec.class.getName(),
"in", "out", "20101116153015"
});
/*
* Crushed files.
*/
verifyOutput(homeDir + "/out/dir", "secondregex-20101116153015-*-*", Format.TEXT, Format.SEQUENCE, customCodec, "file10", "file11", "file12", "file13");
verifyOutput(homeDir + "/out/dir/subdir", "thirdregex-subdir-20101116153015-*-*", Format.SEQUENCE, Format.SEQUENCE, customCodec, "file20", "file21", "file22", "file23", "file24");
verifyOutput(homeDir + "/out/dir/subdir/subsubdir", "thirdregex-subsubdir-20101116153015-*-*", Format.SEQUENCE, Format.SEQUENCE, customCodec, "file30", "file31", "file32", "file33", "file34");
verifyOutput(homeDir + "/out/dir/subdir/other", "20101116153015-*-middle-*-tail", Format.SEQUENCE, Format.TEXT, customCodec, "file40", "file41", "file42", "file43");
verifyOutput(homeDir + "/out/dir/other", "20101116153015-*-middle-*-tail", Format.SEQUENCE, Format.TEXT, customCodec, "file50", "file51", "file52", "file53", "file54", "file55");
verifyOutput(homeDir + "/out/text", "fourthregex-*-20101116153015-*", Format.TEXT, Format.TEXT, customCodec, "file60", "file61", "file62", "file63");
verifyOutput(homeDir + "/out", "crushed_file-20101116153015-*-*", Format.SEQUENCE, Format.TEXT, customCodec, "file70", "file71", "file72");
/*
* Skipped files should have been moved to the output dir.
*/
verifyOutput(homeDir + "/out/dir/skipped", "file80", Format.SEQUENCE, Format.SEQUENCE, defaultCodec, "file80");
verifyHugeFile(homeDir + "/out/huge", (long) (((float) 0.015) * 1024 * 1024 * 64) + 1);
/*
* Crush input files should remain in the input dir.
*/
for (String file : new String[] { "file10", "file11", "file12", "file13" }) {
verifyOutput(homeDir + "/in/dir", file, Format.TEXT, Format.TEXT, null, file);
}
for (String file : new String[] { "file20", "file21", "file22", "file23", "file24" }) {
verifyOutput(homeDir + "/in/dir/subdir", file, Format.SEQUENCE, Format.SEQUENCE, defaultCodec, file);
}
for (String file : new String[] { "file30", "file31", "file32", "file33", "file34" }) {
verifyOutput(homeDir + "/in/dir/subdir/subsubdir", file, Format.SEQUENCE, Format.SEQUENCE, defaultCodec, file);
}
for (String file : new String[] { "file40", "file41", "file42", "file43" }) {
verifyOutput(homeDir + "/in/dir/subdir/other", file, Format.SEQUENCE, Format.SEQUENCE, defaultCodec, file);
}
for (String file : new String[] { "file50", "file51", "file52", "file53", "file54", "file55" }) {
verifyOutput(homeDir + "/in/dir/other", file, Format.SEQUENCE, Format.SEQUENCE, defaultCodec, file);
}
for (String file : new String[] { "file60", "file61", "file62", "file63" }) {
verifyOutput(homeDir + "/in/text", file, Format.TEXT, Format.TEXT, null, file);
}
for (String file : new String[] { "file70", "file71", "file72" }) {
verifyOutput(homeDir + "/in", file, Format.SEQUENCE, Format.SEQUENCE, defaultCodec, file);
}
Counters jobCounters = crush.getJobCounters();
assertThat(jobCounters.getCounter(MapperCounter.DIRS_FOUND), equalTo( 8L));
assertThat(jobCounters.getCounter(MapperCounter.DIRS_ELIGIBLE), equalTo( 7L));
assertThat(jobCounters.getCounter(MapperCounter.DIRS_SKIPPED), equalTo( 1L));
assertThat(jobCounters.getCounter(MapperCounter.FILES_FOUND), equalTo(33L));