Package org.xadoop.saxon

Source Code of org.xadoop.saxon.SaxonMapper

package org.xadoop.saxon;

import java.io.IOException;
import java.io.StringReader;

import javax.xml.transform.stream.StreamSource;

import net.sf.saxon.s9api.DocumentBuilder;
import net.sf.saxon.s9api.Processor;
import net.sf.saxon.s9api.QName;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.XQueryCompiler;
import net.sf.saxon.s9api.XQueryEvaluator;
import net.sf.saxon.s9api.XQueryExecutable;
import net.sf.saxon.s9api.XdmItem;
import net.sf.saxon.s9api.XdmNode;
import net.sf.saxon.s9api.XdmValue;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.xadoop.util.XadoopUtil;

public class SaxonMapper extends Mapper<Object, Text, Text, Text> {
 
  private String query;
 
  Processor proc;
  XQueryCompiler comp;
  DocumentBuilder doc;
  XQueryExecutable exec;
 
  Text k, v;
 
  //long time_stamp = System.currentTimeMillis();
 
  public SaxonMapper() {
   
  }
 
  protected void setup(Context context) throws IOException {
    try {
      super.setup(context);
    } catch (InterruptedException e) {
      e.printStackTrace();
    }
   
    query = XadoopUtil.prepareSaxon(context.getConfiguration(), true, "saxon:map" + context.getJobName());
   
    //System.out.println(context.getJobName());
   
    proc = new Processor(true);
    comp = proc.newXQueryCompiler();
    doc = proc.newDocumentBuilder();
    try {
      exec = comp.compile(query);
    } catch (SaxonApiException e) {
      e.printStackTrace();
    }
   
    //System.out.print(System.currentTimeMillis()-time_stamp+", ");
   
  }
 
  protected void map(Object key, Text value, Context context) {
   
    //System.out.print(System.currentTimeMillis()-time_stamp+", ");
   
    //System.out.println("Receiving key: " + key);
    //System.out.println("Receiving value: " + value);
   
    XQueryEvaluator eval = exec.load();
   
    //System.out.print(System.currentTimeMillis()-time_stamp+", ");
   
    XdmNode keyDoc;
    XdmNode valDoc;

    try {
      keyDoc = doc.build(new StreamSource(new StringReader("<Key>"+key.toString()+"</Key>")));
      valDoc = doc.build(new StreamSource(new StringReader(value.toString())));
   
      eval.setExternalVariable(new QName("key"), keyDoc);
      eval.setExternalVariable(new QName("value"), valDoc);

    } catch (SaxonApiException e1) {
      e1.printStackTrace();
    }
   
    //System.out.print(System.currentTimeMillis()-time_stamp+", ");
   

   
    k = new Text();
    v = new Text();
    String result_string;
   
    try {
      XdmValue res = eval.evaluate();
      for (XdmItem i : res) {
        result_string = i.toString();
       
        System.out.println("Emitting: " + result_string);
       
        if (result_string.startsWith("<Key>")) {
          k.set(result_string.trim().replaceAll("\n", "").replaceAll("\t", ""));
        } else {
          v.set(result_string.trim().replaceAll("\n", "").replaceAll("\t", ""));
         
          context.write(k, v);
        }
       
      }
    } catch (SaxonApiException e) {
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    } catch (InterruptedException e) {
      e.printStackTrace();
    }
   
    //System.out.println(System.currentTimeMillis()-time_stamp);
       
  }
 
  protected void cleanup(Context context) throws IOException {
  }
}
TOP

Related Classes of org.xadoop.saxon.SaxonMapper

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.