package com.ontology2.bakemono.joins;
import com.google.common.base.Splitter;
import com.google.common.collect.Iterables;
import com.google.common.collect.Maps;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.VIntWritable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.log4j.Logger;
import java.io.IOException;
import java.util.Map;
public abstract class SetJoinMapper<T extends WritableComparable>
extends Mapper<LongWritable,T,TaggedItem<T>,VIntWritable> {
static Logger log= Logger.getLogger(SetJoinMapper.class);
public static final String JOINS="com.ontology2.bakemono.joins";
public static final String INPUTS=JOINS+".inputs";
static final Splitter dotSplitter= Splitter.on(".");
static final Splitter commaSplitter= Splitter.on(",");
Map<String,VIntWritable> mapping;
//
// We pass in the organization of the join as
//
// com.ontology2.bakemono.joins.inputs.1=path1,path2,path3
// com.ontology2.bakemono.joins.inputs.2=path4
//
// where the paths are path prefixes; anything that prefix
// matches path1 will go into bucket 1 for the reducer,
// anything that goes into bucket 2 will go into path4
//
@Override
protected void setup(Context context) throws IOException, InterruptedException {
Configuration that=context.getConfiguration();
mapping=getPathMapping(that);
}
static Map<String,VIntWritable> getPathMapping(Configuration that) {
String prefixRegex=
"^"+JOINS.replace(".","[.]")+".*$";
Map<String,VIntWritable> mapping= Maps.newHashMap();
Map<String,String> targets=that.getValByRegex(prefixRegex);
for(String keyNumber:targets.keySet()) {
VIntWritable i=new VIntWritable(Integer.parseInt(lastSegment(keyNumber)));
for(String path:commaSplitter.split(targets.get(keyNumber)))
mapping.put(path,i);
}
return mapping;
}
static String lastSegment(String input) {
return Iterables.getLast(dotSplitter.split(input));
}
@Override
protected void map(LongWritable key, T value, Context context) throws IOException, InterruptedException {
FileSplit split=(FileSplit) context.getInputSplit();
String thePath=split.getPath().toString();
VIntWritable currentTag = determineTag(mapping,thePath);
context.write(newTaggedKey(value,currentTag),currentTag);
}
static VIntWritable determineTag(Map<String,VIntWritable> mapping,String thePath) {
VIntWritable currentTag=new VIntWritable(0);
for(String aPrefix:mapping.keySet())
if(thePath.startsWith(aPrefix))
currentTag=mapping.get(aPrefix);
return currentTag;
}
abstract TaggedItem<T> newTaggedKey(T key,VIntWritable tag);
}