package org.apache.hadoop.mapred.pipes;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import junit.framework.TestCase;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.mapred.Counters;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MiniMRCluster;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapred.TestMiniMRWithDFS;
import org.apache.hadoop.mapred.Utils;
import org.apache.hadoop.mapred.Counters.Counter;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.ToolRunner;
public class TestPipes extends TestCase {
private static final Log LOG =
private static Path cppExamples =
new Path(System.getProperty("install.c++.examples"));
static Path wordCountSimple =
new Path(cppExamples, "bin/wordcount-simple");
static Path wordCountPart =
new Path(cppExamples, "bin/wordcount-part");
static Path wordCountNoPipes =
new Path(cppExamples,"bin/wordcount-nopipe");
static Path nonPipedOutDir;
static void cleanup(FileSystem fs, Path p) throws IOException {
fs.delete(p, true);
assertFalse("output not cleaned up", fs.exists(p));
public void testPipes() throws IOException {
if (System.getProperty("compile.c++") == null) {
LOG.info("compile.c++ is not defined, so skipping TestPipes");
MiniDFSCluster dfs = null;
MiniMRCluster mr = null;
Path inputPath = new Path("/testing/in");
Path outputPath = new Path("/testing/out");
try {
final int numSlaves = 2;
Configuration conf = new Configuration();
dfs = new MiniDFSCluster(conf, numSlaves, true, null);
mr = new MiniMRCluster(numSlaves, dfs.getFileSystem().getName(), 1);
writeInputFile(dfs.getFileSystem(), inputPath);
runProgram(mr, dfs, wordCountSimple,
inputPath, outputPath, 3, 2, twoSplitOutput, null);
cleanup(dfs.getFileSystem(), outputPath);
runProgram(mr, dfs, wordCountSimple,
inputPath, outputPath, 3, 0, noSortOutput, null);
cleanup(dfs.getFileSystem(), outputPath);
runProgram(mr, dfs, wordCountPart,
inputPath, outputPath, 3, 2, fixedPartitionOutput, null);
runNonPipedProgram(mr, dfs, wordCountNoPipes, null);
} finally {
final static String[] twoSplitOutput = new String[] {
"`and\t1\na\t1\nand\t1\nbeginning\t1\nbook\t1\nbut\t1\nby\t1\n" +
"Alice\t2\n`without\t1\nbank,\t1\nbook,'\t1\nconversations\t1\nget\t1\n" +
"into\t1\nis\t1\nreading,\t1\nshe\t1\nsister\t2\nsitting\t1\ntired\t1\n" +
final static String[] noSortOutput = new String[] {
"it,\t1\n`and\t1\nwhat\t1\nis\t1\nthe\t1\nuse\t1\nof\t1\na\t1\n" +
final static String[] fixedPartitionOutput = new String[] {
"Alice\t2\n`and\t1\n`without\t1\na\t1\nand\t1\nbank,\t1\nbeginning\t1\n" +
"do:\t1\nget\t1\nhad\t2\nhaving\t1\nher\t2\nin\t1\ninto\t1\nis\t1\n" +
"it\t1\nit,\t1\nno\t1\nnothing\t1\nof\t3\non\t1\nonce\t1\nor\t3\n" +
"peeped\t1\npictures\t2\nreading,\t1\nshe\t1\nsister\t2\nsitting\t1\n" +
"the\t3\nthought\t1\ntired\t1\nto\t2\ntwice\t1\nuse\t1\n" +
static void writeInputFile(FileSystem fs, Path dir) throws IOException {
DataOutputStream out = fs.create(new Path(dir, "part0"));
out.writeBytes("Alice was beginning to get very tired of sitting by her\n");
out.writeBytes("sister on the bank, and of having nothing to do: once\n");
out.writeBytes("or twice she had peeped into the book her sister was\n");
out.writeBytes("reading, but it had no pictures or conversations in\n");
out.writeBytes("it, `and what is the use of a book,' thought Alice\n");
out.writeBytes("`without pictures or conversation?'\n");
static void runProgram(MiniMRCluster mr, MiniDFSCluster dfs,
Path program, Path inputPath, Path outputPath,
int numMaps, int numReduces, String[] expectedResults,
JobConf conf
) throws IOException {
Path wordExec = new Path("/testing/bin/application");
JobConf job = null;
if(conf == null) {
job = mr.createJobConf();
}else {
job = new JobConf(conf);
FileSystem fs = dfs.getFileSystem();
fs.delete(wordExec.getParent(), true);
fs.copyFromLocalFile(program, wordExec);
Submitter.setExecutable(job, fs.makeQualified(wordExec).toString());
Submitter.setIsJavaRecordReader(job, true);
Submitter.setIsJavaRecordWriter(job, true);
FileInputFormat.setInputPaths(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
RunningJob rJob = null;
if (numReduces == 0) {
rJob = Submitter.jobSubmit(job);
while (!rJob.isComplete()) {
try {
} catch (InterruptedException ie) {
throw new RuntimeException(ie);
} else {
rJob = Submitter.runJob(job);
assertTrue("pipes job failed", rJob.isSuccessful());
Counters counters = rJob.getCounters();
Counters.Group wordCountCounters = counters.getGroup("WORDCOUNT");
int numCounters = 0;
for (Counter c : wordCountCounters) {
assertTrue("No counters found!", (numCounters > 0));
List<String> results = new ArrayList<String>();
for (Path p:FileUtil.stat2Paths(dfs.getFileSystem().listStatus(outputPath,
new Utils.OutputFileUtils.OutputFilesFilter()))) {
results.add(TestMiniMRWithDFS.readOutput(p, job));
assertEquals("number of reduces is wrong",
expectedResults.length, results.size());
for(int i=0; i < results.size(); i++) {
assertEquals("pipes program " + program + " output " + i + " wrong",
expectedResults[i], results.get(i));
* Run a map/reduce word count that does all of the map input and reduce
* output directly rather than sending it back up to Java.
* @param mr The mini mr cluster
* @param dfs the dfs cluster
* @param program the program to run
* @throws IOException
static void runNonPipedProgram(MiniMRCluster mr, MiniDFSCluster dfs,
Path program, JobConf conf) throws IOException {
JobConf job;
if(conf == null) {
job = mr.createJobConf();
}else {
job = new JobConf(conf);
FileSystem local = FileSystem.getLocal(job);
Path testDir = new Path("file:" + System.getProperty("test.build.data"),
Path inDir = new Path(testDir, "input");
nonPipedOutDir = new Path(testDir, "output");
Path wordExec = new Path("/testing/bin/application");
Path jobXml = new Path(testDir, "job.xml");
FileSystem fs = dfs.getFileSystem();
fs.delete(wordExec.getParent(), true);
fs.copyFromLocalFile(program, wordExec);
DataOutputStream out = local.create(new Path(inDir, "part0"));
out.writeBytes("i am a silly test\n");
out.writeBytes("you are silly\n");
out.writeBytes("i am a cat test\n");
out.writeBytes("you is silly\n");
out.writeBytes("i am a billy test\n");
out.writeBytes("hello are silly\n");
out = local.create(new Path(inDir, "part1"));
out.writeBytes("mall world things drink java\n");
out.writeBytes("hall silly cats drink java\n");
out.writeBytes("all dogs bow wow\n");
out.writeBytes("hello drink java\n");
local.delete(nonPipedOutDir, true);
local.mkdirs(nonPipedOutDir, new FsPermission(FsAction.ALL, FsAction.ALL,
out = local.create(jobXml);
System.err.println("About to run: Submitter -conf " + jobXml + " -input "
+ inDir + " -output " + nonPipedOutDir + " -program "
+ dfs.getFileSystem().makeQualified(wordExec));
try {
int ret = ToolRunner.run(new Submitter(),
new String[]{"-conf", jobXml.toString(),
"-input", inDir.toString(),
"-output", nonPipedOutDir.toString(),
"-reduces", "2"});
assertEquals(0, ret);
} catch (Exception e) {
assertTrue("got exception: " + StringUtils.stringifyException(e), false);