package mia.clustering.ch12.lastfm;
import java.io.IOException;
import java.util.regex.Pattern;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class DictionaryMapper extends
Mapper<LongWritable,Text,Text,IntWritable> {
private Pattern splitter;
@Override
protected void map(LongWritable key, Text line, Context context) throws IOException,
InterruptedException {
String[] fields = splitter.split(line.toString());
if (fields.length < 4) {
context.getCounter("Map", "LinesWithErrors").increment(1);
return;
}
String artist = fields[1];
context.write(new Text(artist), new IntWritable(0));
}
@Override
protected void setup(Context context) throws IOException,
InterruptedException {
super.setup(context);
splitter = Pattern.compile("<sep>");
}
}