Package com.splunk.shuttl.integration.hadoop.hbase

Source Code of com.splunk.shuttl.integration.hadoop.hbase.JobRunner

// Copyright (C) 2011 Splunk Inc.
//
// Splunk Inc. licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.splunk.shuttl.integration.hadoop.hbase;

import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStreamReader;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat;
import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


public class JobRunner {
  private static HBaseAdmin admin;

  private static final String CONFIG_FILENAME = "csvmapper.filename";
  private static final String CONFIG_OUTPUTPATH = "csvmapper.outputPath";
  private static final String CONFIG_TABLENAME = "csvmapper.tableName";

  public JobRunner() {
  }

  public static void main(String[] args) throws Exception {
   
    Job job = CSVJobFactory.getConfiguredJob(args);
   
    Configuration jobConfiguration = job.getConfiguration();
    admin = new HBaseAdmin(jobConfiguration);
   
    JobRunner jobRunner = new JobRunner();
    jobRunner.run(job);
  }

  /**
   * @param job
   * @param jobConfiguration
   * @param configuration
   * @throws Exception
   */
  private boolean run(Job job) throws Exception {

    Configuration configuration = job.getConfiguration();

    Path inputPath = new Path(configuration.get(CONFIG_FILENAME));
    Path outputPath = new Path(configuration.get(CONFIG_OUTPUTPATH));

    FileSystem fSystem = FileSystem.get(configuration);
   
    CreateHBaseTableIfNotExists(configuration.get(CONFIG_TABLENAME));

    DeleteOutputPathIfExists(fSystem, outputPath);

    FileSystem fs = FileSystem.get(configuration);
    String headerString = readFirstLine(fs.open(new Path("")));
    configuration.set(JobConfigurationConstants.HEADER_STRING, headerString);

    HTable hTable = new HTable(job.getConfiguration(),
        configuration.get(CONFIG_TABLENAME));

    // Auto configure partitioner and reducer
    HFileOutputFormat.configureIncrementalLoad(job, hTable);

    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    boolean complete = job.waitForCompletion(true);

    if (complete) {
      LoadIncrementalHFiles loader = new LoadIncrementalHFiles(configuration);
      loader.doBulkLoad(outputPath, hTable);
    }

    fSystem.deleteOnExit(outputPath);

    return complete;
  }

  /**
   * @throws IOException
   *
   */
  private void DeleteOutputPathIfExists(FileSystem fs, Path outputPath)
      throws IOException {
    if (fs.exists(outputPath))
        fs.delete(outputPath, true);
  }

  /**
   * @throws IOException
   *
   */
  private void CreateHBaseTableIfNotExists(String tableName)
      throws IOException {
    if (!admin.tableExists(tableName))
      admin.createTable(new HTableDescriptor(tableName));
  }

  private String readFirstLine(FSDataInputStream fsDataInputStream)
      throws IOException {
    DataInputStream in = new DataInputStream(fsDataInputStream);
    return new BufferedReader(new InputStreamReader(in)).readLine();
  }

}
TOP

Related Classes of com.splunk.shuttl.integration.hadoop.hbase.JobRunner

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.