Package org.xadoop.driver

Source Code of org.xadoop.driver.ZorbaDriver

package org.xadoop.driver;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.xadoop.xquerymr.XQueryMRZorbaConfFile;
import org.xadoop.zorba.ZorbaMapper;
import org.xadoop.zorba.ZorbaReducer;

//import org.apache.log4j.Logger;

public class ZorbaDriver {
 
  //private static Logger log = Logger.getLogger(XadoopDriver.class);
 
  public static void main(String[] args) throws IOException {
    Configuration configuration = new Configuration();
    GenericOptionsParser parser = new GenericOptionsParser(configuration, args);
    args = parser.getRemainingArgs();

    if (args.length < 2) {
      System.err.println("Usage: <input dir> <output dir>");
      //log.error("Usage: <input dir> <output dir>");
      System.exit(2);
    }

    final String inputDirName = args[0];
    final String outputDirName = args[1];

    // FileSystem.get(jobMaster).delete(new Path(outputDirName), true);

    final String pipeline = configuration.get(XQueryMRZorbaConfFile.PROPNAME_PIPELINE);
    if (pipeline == null) {
      System.err.println("ERROR: " + XQueryMRZorbaConfFile.PROPNAME_PIPELINE + " not set");
      //log.error(XQueryMRZorbaConfFile.PROPNAME_PIPELINE + " not set");
      System.exit(-1);
    }
    final String[] pipelineSplitted = pipeline.split(", ");

    final String prolog = configuration.get(XQueryMRZorbaConfFile.PROPNAME_QUERYFILE);
    if (prolog == null) {
      System.err.println("ERROR: " + XQueryMRZorbaConfFile.PROPNAME_QUERYFILE + " not set");
      //log.error(XQueryMRZorbaConfFile.PROPNAME_QUERYFILE + " not set");
      System.exit(-1);
    }

    // TODO find out what this does!
    DistributedCache.addCacheFile(new Path(prolog).toUri(), configuration);

    // look over all jobs
    String previousJobDirName = inputDirName;
    for (String jobName : pipelineSplitted) {
      Job job = new Job(configuration);
      job.setJobName(jobName);
      job.setJarByClass(ZorbaDriver.class);

      job.setMapperClass(ZorbaMapper.class);
      job.setReducerClass(ZorbaReducer.class);

      job.setMapOutputKeyClass(Text.class);
      job.setMapOutputValueClass(Text.class);
     
      job.setInputFormatClass(TextInputFormat.class);

      // use input from previous job (or given input)
      // write output to this job dir
      String currentJobDirName = outputDirName + "/" + jobName;
      FileInputFormat.setInputPaths(job, new Path(previousJobDirName));
      FileOutputFormat.setOutputPath(job, new Path(currentJobDirName));

      // iterate
      previousJobDirName = currentJobDirName;

      // run job
      try {
        job.waitForCompletion(true);
      } catch (InterruptedException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
      } catch (ClassNotFoundException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
      }
    }
  }

}
TOP

Related Classes of org.xadoop.driver.ZorbaDriver

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.