package org.xadoop.saxon;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.io.StringReader;
import java.util.Iterator;
import javax.xml.transform.stream.StreamSource;
import net.sf.saxon.s9api.DocumentBuilder;
import net.sf.saxon.s9api.Processor;
import net.sf.saxon.s9api.QName;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.XQueryCompiler;
import net.sf.saxon.s9api.XQueryEvaluator;
import net.sf.saxon.s9api.XQueryExecutable;
import net.sf.saxon.s9api.XdmAtomicValue;
import net.sf.saxon.s9api.XdmItem;
import net.sf.saxon.s9api.XdmNode;
import net.sf.saxon.s9api.XdmValue;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.conf.Configuration;
import org.xadoop.util.XadoopUtil;
import org.xadoop.xquerymr.XQueryMRSaxonConfFile;
public class SaxonReducer extends Reducer<Text, Text, Text, Text> {
String query;
Processor proc;
XQueryCompiler comp;
DocumentBuilder doc;
XQueryExecutable exec;
Text k,v;
//long time_stamp = System.currentTimeMillis();
public SaxonReducer() {
}
protected void setup(Context context) throws IOException {
try {
super.setup(context);
} catch (InterruptedException e) {
e.printStackTrace();
}
query = XadoopUtil.prepareSaxon(context.getConfiguration(), false, "saxon:reduce" + context.getJobName());
//System.out.println(context.getJobName());
proc = new Processor(true);
comp = proc.newXQueryCompiler();
doc = proc.newDocumentBuilder();
try {
exec = comp.compile(query);
} catch (SaxonApiException e) {
e.printStackTrace();
}
//System.out.print(System.currentTimeMillis()-time_stamp+", ");
}
protected void reduce(Text key, Iterable<Text> inputValues, Context context) {
//System.out.print(System.currentTimeMillis()-time_stamp+", ");
Configuration conf = context.getConfiguration();
//System.out.println(System.getProperty("user.dir"));
//System.out.println("Receiving key: " + key);
XQueryEvaluator eval = exec.load();
//System.out.print(System.currentTimeMillis()-time_stamp+", ");
int counter = 0;
//if streaming is enabled we need to write the input to a file
//in order to allow the use of the doc tag, which is required by
//saxon:stream()
if (conf.get(XQueryMRSaxonConfFile.PROPNAME_STREAM_ENABLED).equals("true")) {
BufferedWriter valWriter;
try {
FileWriter fVal = new FileWriter("value.txt");
valWriter = new BufferedWriter(fVal);
valWriter.write("<root>\n");
for (Text inputValue : inputValues) {
valWriter.write(inputValue.toString());
counter++;
}
valWriter.write("</root>");
valWriter.flush();
fVal.close();
valWriter.close();
System.out.println("Length of value-input " + counter);
} catch (IOException e1) {
e1.printStackTrace();
}
//System.out.print(System.currentTimeMillis()-time_stamp+", ");
XdmNode keyDoc;
try {
keyDoc = doc.build(new StreamSource(new StringReader(key.toString())));
eval.setExternalVariable(new QName("key"), keyDoc);
eval.setExternalVariable(new QName("uri"), new XdmAtomicValue("value.txt"));
} catch (SaxonApiException e) {
e.printStackTrace();
}
//System.out.print(System.currentTimeMillis()-time_stamp+", ");
//if streaming is not enabled (no need to write input to file)
} else {
StringBuffer values = new StringBuffer();
values.append("<root>");
for (Text inputValue : inputValues) {
values.append(inputValue.toString().trim().replaceAll("\n", "").replaceAll("\t", ""));
//counter++;
}
values.append("</root>");
//System.out.println("Length of input: " + counter);
//System.out.println("###VALUES###:\n" + values);
//System.out.print(System.currentTimeMillis()-time_stamp+", ");
XdmNode keyDoc;
XdmNode valDoc;
try {
keyDoc = doc.build(new StreamSource(new StringReader(key.toString())));
valDoc = doc.build(new StreamSource(new StringReader(values.toString())));
eval.setExternalVariable(new QName("key"), keyDoc);
eval.setExternalVariable(new QName("value"), valDoc);
} catch (SaxonApiException e) {
e.printStackTrace();
}
}
//System.out.print(System.currentTimeMillis()-time_stamp+", ");
try {
Iterator<XdmItem> iter = eval.iterator();
while (iter.hasNext()) {
context.write(new Text(iter.next().toString()), new Text(iter.next().toString()));
}
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
k = new Text();
v = new Text();
String result_string;
/*
long mem0 = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
System.gc();System.gc();System.gc();System.gc();System.gc();System.gc();
System.gc();System.gc();System.gc();System.gc();System.gc();System.gc();
long mem1 = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
try {
XdmValue res = eval.evaluate();
long mem2 = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
System.gc();System.gc();System.gc();System.gc();System.gc();System.gc();
System.gc();System.gc();System.gc();System.gc();System.gc();System.gc();
long mem3 = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
//System.out.print(System.currentTimeMillis()-time_stamp+", ");
System.out.println("mem0: " + mem0 + " mem1: " + mem1 + " mem2: " + mem2 + " mem3: " + mem3);
context.write(new Text(res.itemAt(0).toString().trim()), new Text(res.itemAt(1).toString().trim()));
long mem4 = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
System.gc();System.gc();System.gc();System.gc();System.gc();System.gc();
System.gc();System.gc();System.gc();System.gc();System.gc();System.gc();
long mem5 = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
System.out.println(" mem4: " + mem4 + " mem5: " + mem5);
*/
/*
try {
XdmValue res = eval.evaluate();
for (XdmItem i : res) {
result_string = i.toString();
System.out.println("Length of result-string: " + result_string.length());
if (result_string.startsWith("<Key>")) {
//System.out.println("Emitting key: " + result_string);
k.set(result_string.trim().replaceAll("\n", "").replaceAll("\t", ""));
} else {
//System.out.println("Emitting value: " + result_string);
v.set(result_string.trim().replaceAll("\n", "").replaceAll("\t", ""));
context.write(k, v);
}
//System.out.println(System.currentTimeMillis()-time_stamp);
}
} catch (SaxonApiException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
*/
}
protected void cleanup(Context context) throws IOException {
}
}