Package eu.stratosphere.addons.hbase

Source Code of eu.stratosphere.addons.hbase.GenericTableOutputFormat

/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/

package eu.stratosphere.addons.hbase;

import java.io.IOException;

import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.TaskType;

import eu.stratosphere.api.common.io.OutputFormat;
import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.types.Record;

public abstract class GenericTableOutputFormat implements OutputFormat<Record> {

  private static final long serialVersionUID = 1L;

  public static final String JT_ID_KEY = "pact.hbase.jtkey";

  public static final String JOB_ID_KEY = "pact.job.id";

  private RecordWriter<ImmutableBytesWritable, KeyValue> writer;

  private Configuration config;

  private org.apache.hadoop.conf.Configuration hadoopConfig;

  private TaskAttemptContext context;

  private String jtID;

  private int jobId;


  @Override
  public void configure(Configuration parameters) {
    this.config = parameters;

    // get the ID parameters
    this.jtID = parameters.getString(JT_ID_KEY, null);
    if (this.jtID == null) {
      throw new RuntimeException("Missing JT_ID entry in hbase config.");
    }
    this.jobId = parameters.getInteger(JOB_ID_KEY, -1);
    if (this.jobId < 0) {
      throw new RuntimeException("Missing or invalid job id in input config.");
    }
  }

  @Override
  public void open(int taskNumber, int numTasks) throws IOException {
    this.hadoopConfig = getHadoopConfig(this.config);
   
    /**
     * PLASE NOTE:
     * If you are a Eclipse+Maven Integration user and you have two (or more) warnings here, please
     * close the pact-hbase project OR set the maven profile to hadoop_yarn
     *
     * pact-hbase requires hadoop_yarn, but Eclipse is not able to parse maven profiles properly. Therefore,
     * it imports the pact-hbase project even if it is not included in the standard profile (hadoop_v1)
     */
    final TaskAttemptID attemptId = new TaskAttemptID(this.jtID, this.jobId, TaskType.MAP, taskNumber - 1, 0);

    this.context = new org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl(this.hadoopConfig, attemptId);
    final HFileOutputFormat outFormat = new HFileOutputFormat();
    try {
      this.writer = outFormat.getRecordWriter(this.context);
    } catch (InterruptedException iex) {
      throw new IOException("Opening the writer was interrupted.", iex);
    }
  }

  @Override
  public void close() throws IOException {
    final RecordWriter<ImmutableBytesWritable, KeyValue> writer = this.writer;
    this.writer = null;
    if (writer != null) {
      try {
        writer.close(this.context);
      } catch (InterruptedException iex) {
        throw new IOException("Closing was interrupted.", iex);
      }
    }
  }

  public void collectKeyValue(KeyValue kv) throws IOException {
    try {
      this.writer.write(null, kv);
    } catch (InterruptedException iex) {
      throw new IOException("Write request was interrupted.", iex);
    }
  }

  public abstract org.apache.hadoop.conf.Configuration getHadoopConfig(Configuration config);
}
TOP

Related Classes of eu.stratosphere.addons.hbase.GenericTableOutputFormat

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.