package org.xadoop.saxon;
import java.io.IOException;
import java.io.StringReader;
import javax.xml.transform.stream.StreamSource;
import net.sf.saxon.s9api.DocumentBuilder;
import net.sf.saxon.s9api.Processor;
import net.sf.saxon.s9api.QName;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.XQueryCompiler;
import net.sf.saxon.s9api.XQueryEvaluator;
import net.sf.saxon.s9api.XQueryExecutable;
import net.sf.saxon.s9api.XdmItem;
import net.sf.saxon.s9api.XdmNode;
import net.sf.saxon.s9api.XdmValue;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.xadoop.util.XadoopUtil;
public class SaxonMapper extends Mapper<Object, Text, Text, Text> {
private String query;
Processor proc;
XQueryCompiler comp;
DocumentBuilder doc;
XQueryExecutable exec;
Text k, v;
//long time_stamp = System.currentTimeMillis();
public SaxonMapper() {
}
protected void setup(Context context) throws IOException {
try {
super.setup(context);
} catch (InterruptedException e) {
e.printStackTrace();
}
query = XadoopUtil.prepareSaxon(context.getConfiguration(), true, "saxon:map" + context.getJobName());
//System.out.println(context.getJobName());
proc = new Processor(true);
comp = proc.newXQueryCompiler();
doc = proc.newDocumentBuilder();
try {
exec = comp.compile(query);
} catch (SaxonApiException e) {
e.printStackTrace();
}
//System.out.print(System.currentTimeMillis()-time_stamp+", ");
}
protected void map(Object key, Text value, Context context) {
//System.out.print(System.currentTimeMillis()-time_stamp+", ");
//System.out.println("Receiving key: " + key);
//System.out.println("Receiving value: " + value);
XQueryEvaluator eval = exec.load();
//System.out.print(System.currentTimeMillis()-time_stamp+", ");
XdmNode keyDoc;
XdmNode valDoc;
try {
keyDoc = doc.build(new StreamSource(new StringReader("<Key>"+key.toString()+"</Key>")));
valDoc = doc.build(new StreamSource(new StringReader(value.toString())));
eval.setExternalVariable(new QName("key"), keyDoc);
eval.setExternalVariable(new QName("value"), valDoc);
} catch (SaxonApiException e1) {
e1.printStackTrace();
}
//System.out.print(System.currentTimeMillis()-time_stamp+", ");
k = new Text();
v = new Text();
String result_string;
try {
XdmValue res = eval.evaluate();
for (XdmItem i : res) {
result_string = i.toString();
System.out.println("Emitting: " + result_string);
if (result_string.startsWith("<Key>")) {
k.set(result_string.trim().replaceAll("\n", "").replaceAll("\t", ""));
} else {
v.set(result_string.trim().replaceAll("\n", "").replaceAll("\t", ""));
context.write(k, v);
}
}
} catch (SaxonApiException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
//System.out.println(System.currentTimeMillis()-time_stamp);
}
protected void cleanup(Context context) throws IOException {
}
}