Source Code of org.xadoop.driver.ZorbaDriver

package org.xadoop.driver;


import java.io.IOException;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.xadoop.xquerymr.XQueryMRZorbaConfFile;
import org.xadoop.zorba.ZorbaMapper;
import org.xadoop.zorba.ZorbaReducer;


//import org.apache.log4j.Logger;


public class ZorbaDriver {
  
  //private static Logger log = Logger.getLogger(XadoopDriver.class);
  
  public static void main(String[] args) throws IOException {
    Configuration configuration = new Configuration();
    GenericOptionsParser parser = new GenericOptionsParser(configuration, args);
    args = parser.getRemainingArgs();


    if (args.length < 2) {
      System.err.println("Usage: <input dir> <output dir>");
      //log.error("Usage: <input dir> <output dir>");
      System.exit(2);
    }


    final String inputDirName = args[0];
    final String outputDirName = args[1];


    // FileSystem.get(jobMaster).delete(new Path(outputDirName), true);


    final String pipeline = configuration.get(XQueryMRZorbaConfFile.PROPNAME_PIPELINE);
    if (pipeline == null) {
      System.err.println("ERROR: " + XQueryMRZorbaConfFile.PROPNAME_PIPELINE + " not set");
      //log.error(XQueryMRZorbaConfFile.PROPNAME_PIPELINE + " not set");
      System.exit(-1);
    }
    final String[] pipelineSplitted = pipeline.split(", ");


    final String prolog = configuration.get(XQueryMRZorbaConfFile.PROPNAME_QUERYFILE);
    if (prolog == null) {
      System.err.println("ERROR: " + XQueryMRZorbaConfFile.PROPNAME_QUERYFILE + " not set");
      //log.error(XQueryMRZorbaConfFile.PROPNAME_QUERYFILE + " not set");
      System.exit(-1);
    }


    // TODO find out what this does!
    DistributedCache.addCacheFile(new Path(prolog).toUri(), configuration);


    // look over all jobs
    String previousJobDirName = inputDirName;
    for (String jobName : pipelineSplitted) {
      Job job = new Job(configuration);
      job.setJobName(jobName);
      job.setJarByClass(ZorbaDriver.class);


      job.setMapperClass(ZorbaMapper.class);
      job.setReducerClass(ZorbaReducer.class);


      job.setMapOutputKeyClass(Text.class);
      job.setMapOutputValueClass(Text.class);
      
      job.setInputFormatClass(TextInputFormat.class);


      // use input from previous job (or given input)
      // write output to this job dir
      String currentJobDirName = outputDirName + "/" + jobName;
      FileInputFormat.setInputPaths(job, new Path(previousJobDirName));
      FileOutputFormat.setOutputPath(job, new Path(currentJobDirName));


      // iterate
      previousJobDirName = currentJobDirName;


      // run job
      try {
        job.waitForCompletion(true);
      } catch (InterruptedException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
      } catch (ClassNotFoundException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
      }
    }
  }


}
Source Code of org.xadoop.driver.ZorbaDriver

Related Classes of org.xadoop.driver.ZorbaDriver