Source Code of com.splunk.shuttl.integration.hadoop.hbase.JobRunner

// Copyright (C) 2011 Splunk Inc.
//
// Splunk Inc. licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.splunk.shuttl.integration.hadoop.hbase;


import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStreamReader;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat;
import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;




public class JobRunner {
  private static HBaseAdmin admin;


  private static final String CONFIG_FILENAME = "csvmapper.filename";
  private static final String CONFIG_OUTPUTPATH = "csvmapper.outputPath";
  private static final String CONFIG_TABLENAME = "csvmapper.tableName";


  public JobRunner() {
  }


  public static void main(String[] args) throws Exception {
    
    Job job = CSVJobFactory.getConfiguredJob(args);
    
    Configuration jobConfiguration = job.getConfiguration();
    admin = new HBaseAdmin(jobConfiguration);
    
    JobRunner jobRunner = new JobRunner();
    jobRunner.run(job);
  }


  /**
   * @param job
   * @param jobConfiguration
   * @param configuration
   * @throws Exception
   */
  private boolean run(Job job) throws Exception {


    Configuration configuration = job.getConfiguration();


    Path inputPath = new Path(configuration.get(CONFIG_FILENAME));
    Path outputPath = new Path(configuration.get(CONFIG_OUTPUTPATH));


    FileSystem fSystem = FileSystem.get(configuration);
    
    CreateHBaseTableIfNotExists(configuration.get(CONFIG_TABLENAME));


    DeleteOutputPathIfExists(fSystem, outputPath);


    FileSystem fs = FileSystem.get(configuration);
    String headerString = readFirstLine(fs.open(new Path("")));
    configuration.set(JobConfigurationConstants.HEADER_STRING, headerString);


    HTable hTable = new HTable(job.getConfiguration(),
        configuration.get(CONFIG_TABLENAME));


    // Auto configure partitioner and reducer
    HFileOutputFormat.configureIncrementalLoad(job, hTable);


    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);


    boolean complete = job.waitForCompletion(true);


    if (complete) {
      LoadIncrementalHFiles loader = new LoadIncrementalHFiles(configuration);
      loader.doBulkLoad(outputPath, hTable);
    }


    fSystem.deleteOnExit(outputPath);


    return complete;
  }


  /**
   * @throws IOException
   * 
   */
  private void DeleteOutputPathIfExists(FileSystem fs, Path outputPath)
      throws IOException {
    if (fs.exists(outputPath))
        fs.delete(outputPath, true);
  }


  /**
   * @throws IOException
   * 
   */
  private void CreateHBaseTableIfNotExists(String tableName)
      throws IOException {
    if (!admin.tableExists(tableName))
      admin.createTable(new HTableDescriptor(tableName));
  }


  private String readFirstLine(FSDataInputStream fsDataInputStream)
      throws IOException {
    DataInputStream in = new DataInputStream(fsDataInputStream);
    return new BufferedReader(new InputStreamReader(in)).readLine();
  }


}
Source Code of com.splunk.shuttl.integration.hadoop.hbase.JobRunner

Related Classes of com.splunk.shuttl.integration.hadoop.hbase.JobRunner