Package org.xadoop.saxon

Source Code of org.xadoop.saxon.SaxonReducer

package org.xadoop.saxon;

import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.io.StringReader;
import java.util.Iterator;

import javax.xml.transform.stream.StreamSource;

import net.sf.saxon.s9api.DocumentBuilder;
import net.sf.saxon.s9api.Processor;
import net.sf.saxon.s9api.QName;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.XQueryCompiler;
import net.sf.saxon.s9api.XQueryEvaluator;
import net.sf.saxon.s9api.XQueryExecutable;
import net.sf.saxon.s9api.XdmAtomicValue;
import net.sf.saxon.s9api.XdmItem;
import net.sf.saxon.s9api.XdmNode;
import net.sf.saxon.s9api.XdmValue;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.conf.Configuration;
import org.xadoop.util.XadoopUtil;
import org.xadoop.xquerymr.XQueryMRSaxonConfFile;

public class SaxonReducer extends Reducer<Text, Text, Text, Text> {
 
  String query;
 
  Processor proc;
  XQueryCompiler comp;
  DocumentBuilder doc;
  XQueryExecutable exec;
 
  Text k,v;
 
  //long time_stamp = System.currentTimeMillis();
 
  public SaxonReducer() {
   
  }
 
  protected void setup(Context context) throws IOException {   
   
    try {
      super.setup(context);
    } catch (InterruptedException e) {
      e.printStackTrace();
    }
    query = XadoopUtil.prepareSaxon(context.getConfiguration(), false, "saxon:reduce" + context.getJobName());
       
    //System.out.println(context.getJobName());
   
    proc = new Processor(true);
    comp = proc.newXQueryCompiler();
    doc = proc.newDocumentBuilder();
    try {
      exec = comp.compile(query);
    } catch (SaxonApiException e) {
      e.printStackTrace();
    }
   
    //System.out.print(System.currentTimeMillis()-time_stamp+", ");
  }
 
  protected void reduce(Text key, Iterable<Text> inputValues, Context context) {
   
    //System.out.print(System.currentTimeMillis()-time_stamp+", ");
       
    Configuration conf = context.getConfiguration();
   
    //System.out.println(System.getProperty("user.dir"));
   
    //System.out.println("Receiving key: " + key);
   
    XQueryEvaluator eval = exec.load();
   
    //System.out.print(System.currentTimeMillis()-time_stamp+", ");
   
   
    int counter = 0;
   
   
    //if streaming is enabled we need to write the input to a file
    //in order to allow the use of the doc tag, which is required by
    //saxon:stream()
    if (conf.get(XQueryMRSaxonConfFile.PROPNAME_STREAM_ENABLED).equals("true")) {
      BufferedWriter valWriter;
     
      try {
        FileWriter fVal = new FileWriter("value.txt");
        valWriter = new BufferedWriter(fVal);
       
        valWriter.write("<root>\n");
        for (Text inputValue : inputValues) {
          valWriter.write(inputValue.toString());
          counter++;
        }
        valWriter.write("</root>");
       
        valWriter.flush();
        fVal.close();
        valWriter.close();
        System.out.println("Length of value-input " + counter);
      } catch (IOException e1) {
        e1.printStackTrace();
      }
     
      //System.out.print(System.currentTimeMillis()-time_stamp+", ");
     
      XdmNode keyDoc;
     
      try {
        keyDoc = doc.build(new StreamSource(new StringReader(key.toString())));
               
        eval.setExternalVariable(new QName("key"), keyDoc);
        eval.setExternalVariable(new QName("uri"), new XdmAtomicValue("value.txt"));
      } catch (SaxonApiException e) {
        e.printStackTrace();
      }
   
      //System.out.print(System.currentTimeMillis()-time_stamp+", ");
     
    //if streaming is not enabled (no need to write input to file)
    } else {
     
      StringBuffer values = new StringBuffer();
      values.append("<root>");
      for (Text inputValue : inputValues) {
        values.append(inputValue.toString().trim().replaceAll("\n", "").replaceAll("\t", ""));
        //counter++;
      }
      values.append("</root>");
     
      //System.out.println("Length of input: " + counter);
     
      //System.out.println("###VALUES###:\n" + values);
     
      //System.out.print(System.currentTimeMillis()-time_stamp+", ");
     
      XdmNode keyDoc;
      XdmNode valDoc;
     
      try {
        keyDoc = doc.build(new StreamSource(new StringReader(key.toString())));
        valDoc = doc.build(new StreamSource(new StringReader(values.toString())));
       
        eval.setExternalVariable(new QName("key"), keyDoc);
        eval.setExternalVariable(new QName("value"), valDoc);
      } catch (SaxonApiException e) {
        e.printStackTrace();
      }
    }
   
    //System.out.print(System.currentTimeMillis()-time_stamp+", ");
   
   
   
    try {
      Iterator<XdmItem> iter = eval.iterator();
      while (iter.hasNext()) {
        context.write(new Text(iter.next().toString()), new Text(iter.next().toString()));
      }
    } catch (IOException e) {
      e.printStackTrace();
    } catch (InterruptedException e) {
      e.printStackTrace();
    }
   
   
    k = new Text();
    v = new Text();
    String result_string;
   
    /*
    long mem0 = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
   
    System.gc();System.gc();System.gc();System.gc();System.gc();System.gc();
    System.gc();System.gc();System.gc();System.gc();System.gc();System.gc();
   
    long mem1 = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
   
    try {
      XdmValue res = eval.evaluate();
           
      long mem2 = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
     
      System.gc();System.gc();System.gc();System.gc();System.gc();System.gc();
      System.gc();System.gc();System.gc();System.gc();System.gc();System.gc();
     
      long mem3 = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
     
      //System.out.print(System.currentTimeMillis()-time_stamp+", ");
     
      System.out.println("mem0: " + mem0 + " mem1: " + mem1 + " mem2: " + mem2 + " mem3: " + mem3);
     
      context.write(new Text(res.itemAt(0).toString().trim()), new Text(res.itemAt(1).toString().trim()));
     
      long mem4 = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
     
      System.gc();System.gc();System.gc();System.gc();System.gc();System.gc();
      System.gc();System.gc();System.gc();System.gc();System.gc();System.gc();
     
      long mem5 = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
     
      System.out.println(" mem4: " + mem4 + " mem5: " + mem5);
   
    */
   
    /*
    try {
      XdmValue res = eval.evaluate();
     
      for (XdmItem i : res) {
        result_string = i.toString();
       
        System.out.println("Length of result-string: " + result_string.length());
       
        if (result_string.startsWith("<Key>")) {
          //System.out.println("Emitting key: " + result_string);
          k.set(result_string.trim().replaceAll("\n", "").replaceAll("\t", ""));
        } else {
          //System.out.println("Emitting value: " + result_string);
          v.set(result_string.trim().replaceAll("\n", "").replaceAll("\t", ""));
         
          context.write(k, v);
        }
     
        //System.out.println(System.currentTimeMillis()-time_stamp);
       
      }
     
    } catch (SaxonApiException e) {
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    } catch (InterruptedException e) {
      e.printStackTrace();
    }
    */
   
  }
 
  protected void cleanup(Context context) throws IOException {
   
  }
}
TOP

Related Classes of org.xadoop.saxon.SaxonReducer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.