package org.xadoop.driver;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.xadoop.xquerymr.XQueryMRZorbaConfFile;
import org.xadoop.zorba.ZorbaMapper;
import org.xadoop.zorba.ZorbaReducer;
//import org.apache.log4j.Logger;
public class ZorbaDriver {
//private static Logger log = Logger.getLogger(XadoopDriver.class);
public static void main(String[] args) throws IOException {
Configuration configuration = new Configuration();
GenericOptionsParser parser = new GenericOptionsParser(configuration, args);
args = parser.getRemainingArgs();
if (args.length < 2) {
System.err.println("Usage: <input dir> <output dir>");
//log.error("Usage: <input dir> <output dir>");
System.exit(2);
}
final String inputDirName = args[0];
final String outputDirName = args[1];
// FileSystem.get(jobMaster).delete(new Path(outputDirName), true);
final String pipeline = configuration.get(XQueryMRZorbaConfFile.PROPNAME_PIPELINE);
if (pipeline == null) {
System.err.println("ERROR: " + XQueryMRZorbaConfFile.PROPNAME_PIPELINE + " not set");
//log.error(XQueryMRZorbaConfFile.PROPNAME_PIPELINE + " not set");
System.exit(-1);
}
final String[] pipelineSplitted = pipeline.split(", ");
final String prolog = configuration.get(XQueryMRZorbaConfFile.PROPNAME_QUERYFILE);
if (prolog == null) {
System.err.println("ERROR: " + XQueryMRZorbaConfFile.PROPNAME_QUERYFILE + " not set");
//log.error(XQueryMRZorbaConfFile.PROPNAME_QUERYFILE + " not set");
System.exit(-1);
}
// TODO find out what this does!
DistributedCache.addCacheFile(new Path(prolog).toUri(), configuration);
// look over all jobs
String previousJobDirName = inputDirName;
for (String jobName : pipelineSplitted) {
Job job = new Job(configuration);
job.setJobName(jobName);
job.setJarByClass(ZorbaDriver.class);
job.setMapperClass(ZorbaMapper.class);
job.setReducerClass(ZorbaReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setInputFormatClass(TextInputFormat.class);
// use input from previous job (or given input)
// write output to this job dir
String currentJobDirName = outputDirName + "/" + jobName;
FileInputFormat.setInputPaths(job, new Path(previousJobDirName));
FileOutputFormat.setOutputPath(job, new Path(currentJobDirName));
// iterate
previousJobDirName = currentJobDirName;
// run job
try {
job.waitForCompletion(true);
} catch (InterruptedException e) {
e.printStackTrace();
throw new RuntimeException(e);
} catch (ClassNotFoundException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
}
}