package linear;
import java.io.IOException;
import java.io.PrintStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import comparison.AllPairsMapper;
import comparison.AllPairsReducer;
import comparison.ExhaustiveUniqueGrouper;
import comparison.PairSplit;
import comparison.PrimeRot;
/**
* This class is strictly for generating data points and likely has no practical applications.
*
* Hadoop program which takes a set of input sequences and computes all pairs comparisons. Given
* sequences of length L1 and L2, the comparison function takes O(max(L1, L2)) time to compute.
*/
public class LinearAligner extends Configured implements Tool {
/** Identifies the different implementations of {@link ExhaustiveUniqueGrouper}'s supported by
* {@link LinearAligner}. */
private enum EugType {PRIME_ROT, PAIR_SPLIT}
private static final int MAX_REDUCES = 200;
private static final int MAX_MAPS = 200;
/*Configuration attribute names for grouping data.*/
private static final String NUM_ITEMS_ATTR = "ni";
private static final String EUG_ATTR = "eug";
private static final String PRIME_ROT_P_ATTR = "n";
private static final String USAGE = "LinearAligner INPUT OUTPUT NUMITEMS EUGTYPE [NUM_TASKS]";
public static final String LOG_DELIM = ",";
private static final PrintStream log = System.out;
public static void main(String[] args) {
int result = 1;
try {
result = ToolRunner.run(new LinearAligner(), args);
} catch (Exception e) {
e.printStackTrace();
System.out.println("Job failed.");
}
System.exit(result);
}
public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
if(args.length < 4 ) {
System.out.println(USAGE);
return -1;
}
Job job = new Job(getConf());
job.setJarByClass(LinearAligner.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
job.setMapperClass(LinearMapper.class);
job.setReducerClass(LinearReducer.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
long numItems = Long.parseLong(args[2]);
EugType eugType = null;
ExhaustiveUniqueGrouper eug = null;
/*Determine the EUG type*/
switch(args[3].charAt(0)) {
/*Pair split*/
case 's':
eugType = EugType.PAIR_SPLIT;
eug = new PairSplit(numItems);
break;
/*PrimeRot*/
case 'r':
eugType = EugType.PRIME_ROT;
eug = PrimeRot.generatePrimeRot(numItems);
break;
default:
throw new IOException("Unrecognized EUG type.");
}
int mapTasks = MAX_MAPS;
int reduceTasks = MAX_REDUCES;
if(args.length > 4) {
int numTasks = Integer.parseInt(args[4]);
mapTasks = numTasks;
reduceTasks = numTasks;
}
job.setNumReduceTasks(reduceTasks);
/*Setup the key value pairs*/
job.getConfiguration().set(NUM_ITEMS_ATTR, args[2]);
job.getConfiguration().setInt(EUG_ATTR, eugType.ordinal());
if (eugType == EugType.PRIME_ROT) {
job.getConfiguration().setInt(PRIME_ROT_P_ATTR, ((PrimeRot)eug).getP());
}
job.setJobName(args[3].charAt(0) + "-linearAligner:" + numItems);
long startTime = System.currentTimeMillis();
boolean result = job.waitForCompletion(true);
log.println(numItems + LOG_DELIM + (System.currentTimeMillis() - startTime) +
LOG_DELIM + mapTasks + LOG_DELIM + reduceTasks);
return result ? 0 : 1;
}
public static class LinearMapper extends AllPairsMapper<LongWritable, Text> {
private ExhaustiveUniqueGrouper eug;
@Override
public void setup(Context context) {
Configuration conf = context.getConfiguration();
long numItems = conf.getLong(NUM_ITEMS_ATTR, 0);
int eugID = conf.getInt(EUG_ATTR, -1);
if(numItems < 0) {
eug = null;
} else {
if(eugID == EugType.PRIME_ROT.ordinal()) {
int p = conf.getInt(PRIME_ROT_P_ATTR, -1);
if(p != -1) {
eug = PrimeRot.generatePrimeRot(numItems, p);
} else {
eug = PrimeRot.generatePrimeRot(numItems);
}
} else if(eugID == EugType.PAIR_SPLIT.ordinal()) {
try {
eug = new PairSplit(numItems);
} catch (IOException e) {
eug = null;
}
} else {
eug = PrimeRot.generatePrimeRot(numItems);
}
}
}
@Override
protected long[] getGroups(long id) throws IOException {
return eug.getGroups(id);
}
@Override
protected long parseId(Text value) throws IOException {
return LinearUtils.parseTextId(value);
}
}
public static class LinearReducer extends AllPairsReducer<Text, LongWritable> {
@Override
protected LongWritable compareItems(Text a, Text b) {
return LinearUtils.linearCompare(a,b);
}
@Override
protected Text copyValue(Text original) {
return LinearUtils.copyText(original);
}
@Override
protected long parseId(Text value) throws IOException {
return LinearUtils.parseTextId(value);
}
}
}