package co.nubetech.hiho.uuid;
import java.util.Map.Entry;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;
import co.nubetech.hiho.common.HIHOConf;
import co.nubetech.hiho.dedup.DelimitedTextInputFormat;
import co.nubetech.hiho.mapreduce.lib.output.NoKeyOnlyValueOutputFormat;
public class UuidJob extends Configured implements Tool {
private final static Logger logger = Logger
.getLogger(co.nubetech.hiho.uuid.UuidJob.class);
@Override
public int run(String[] args) throws Exception {
Configuration conf = getConf();
Job job = new Job(conf);
job.setJobName("UUID_Job");
job.setMapperClass(UuidMapper.class);
job.setJarByClass(UuidJob.class);
for (Entry<String, String> entry : conf) {
logger.debug("key, value " + entry.getKey() + "="
+ entry.getValue());
}
job.getConfiguration().setInt(HIHOConf.NUMBER_MAPPERS, 5);
job.setNumReduceTasks(0);
job.setInputFormatClass(DelimitedTextInputFormat.class);
DelimitedTextInputFormat.addInputPath(job, new Path(args[0]));
DelimitedTextInputFormat.setProperties(job, ",", 1);
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Text.class);
job.setOutputFormatClass(NoKeyOnlyValueOutputFormat.class);
NoKeyOnlyValueOutputFormat.setOutputPath(job, new Path("output"));
int ret = 0;
try {
ret = job.waitForCompletion(true) ? 0 : 1;
} catch (Exception e) {
e.printStackTrace();
}
return ret;
}
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new Configuration(), new UuidJob(), args);
System.exit(res);
}
}