Package org.apache.hive.hcatalog.hbase

Source Code of org.apache.hive.hcatalog.hbase.TestHBaseInputFormat

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.hive.hcatalog.hbase;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;

import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hive.cli.CliSessionState;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.hbase.HBaseSerDe;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hive.hcatalog.cli.HCatDriver;
import org.apache.hive.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer;
import org.apache.hive.hcatalog.common.HCatConstants;
import org.apache.hive.hcatalog.common.HCatException;
import org.apache.hive.hcatalog.common.HCatUtil;
import org.apache.hive.hcatalog.data.HCatRecord;
import org.apache.hive.hcatalog.data.schema.HCatFieldSchema;
import org.apache.hive.hcatalog.data.schema.HCatSchema;
import org.apache.hive.hcatalog.mapreduce.HCatInputFormat;
import org.apache.hive.hcatalog.mapreduce.InputJobInfo;
import org.junit.Test;

public class TestHBaseInputFormat extends SkeletonHBaseTest {

  private static HiveConf hcatConf;
  private static HCatDriver hcatDriver;
  private final byte[] FAMILY = Bytes.toBytes("testFamily");
  private final byte[] QUALIFIER1 = Bytes.toBytes("testQualifier1");
  private final byte[] QUALIFIER2 = Bytes.toBytes("testQualifier2");

  public TestHBaseInputFormat() throws Exception {
    hcatConf = getHiveConf();
    hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname,
      HCatSemanticAnalyzer.class.getName());
    URI fsuri = getFileSystem().getUri();
    Path whPath = new Path(fsuri.getScheme(), fsuri.getAuthority(),
      getTestDir());
    hcatConf.set(HiveConf.ConfVars.HADOOPFS.varname, fsuri.toString());
    hcatConf.set(ConfVars.METASTOREWAREHOUSE.varname, whPath.toString());

    //Add hbase properties

    for (Map.Entry<String, String> el : getHbaseConf()) {
      if (el.getKey().startsWith("hbase.")) {
        hcatConf.set(el.getKey(), el.getValue());
      }
    }

    SessionState.start(new CliSessionState(hcatConf));
    hcatDriver = new HCatDriver();

  }

  private List<Put> generatePuts(int num, String tableName) throws IOException {

    List<String> columnFamilies = Arrays.asList("testFamily");
    List<Put> myPuts;
    myPuts = new ArrayList<Put>();
    for (int i = 1; i <= num; i++) {
      Put put = new Put(Bytes.toBytes("testRow"));
      put.add(FAMILY, QUALIFIER1, i, Bytes.toBytes("textValue-" + i));
      put.add(FAMILY, QUALIFIER2, i, Bytes.toBytes("textValue-" + i));
      myPuts.add(put);
    }
    return myPuts;
  }

  private void populateHBaseTable(String tName, int revisions) throws IOException {
    List<Put> myPuts = generatePuts(revisions, tName);
    HTable table = new HTable(getHbaseConf(), Bytes.toBytes(tName));
    table.put(myPuts);
  }

  @Test
  public void TestHBaseTableReadMR() throws Exception {
    String tableName = newTableName("MyTable");
    String databaseName = newTableName("MyDatabase");
    //Table name will be lower case unless specified by hbase.table.name property
    String hbaseTableName = (databaseName + "." + tableName).toLowerCase();
    String db_dir = new Path(getTestDir(), "hbasedb").toString();

    String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '"
                            + db_dir + "'";
    String tableQuery = "CREATE TABLE " + databaseName + "." + tableName
      + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " +
      "'org.apache.hadoop.hive.hbase.HBaseStorageHandler'"
      + " WITH SERDEPROPERTIES ('hbase.columns.mapping'=':key,testFamily:testQualifier1,testFamily:testQualifier2')" ;

    CommandProcessorResponse responseOne = hcatDriver.run(dbquery);
    assertEquals(0, responseOne.getResponseCode());
    CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery);
    assertEquals(0, responseTwo.getResponseCode());

    HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf());
    boolean doesTableExist = hAdmin.tableExists(hbaseTableName);
    assertTrue(doesTableExist);

    populateHBaseTable(hbaseTableName, 5);
    Configuration conf = new Configuration(hcatConf);
    conf.set(HCatConstants.HCAT_KEY_HIVE_CONF,
      HCatUtil.serialize(getHiveConf().getAllProperties()));

    conf.set(HBaseSerDe.HBASE_TABLE_NAME,hbaseTableName);
    conf.set(TableInputFormat.INPUT_TABLE, hbaseTableName);
    // output settings
    Path outputDir = new Path(getTestDir(), "mapred/testHbaseTableMRRead");
    FileSystem fs = getFileSystem();
    if (fs.exists(outputDir)) {
      fs.delete(outputDir, true);
    }
    // create job
    Job job = new Job(conf, "hbase-mr-read-test");
    job.setJarByClass(this.getClass());
    job.setMapperClass(MapReadHTable.class);
    MapReadHTable.resetCounters();

    job.setInputFormatClass(HCatInputFormat.class);
    InputJobInfo inputJobInfo = InputJobInfo.create(databaseName, tableName,
                null);
    HCatInputFormat.setInput(job, inputJobInfo);
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(0);
    assertTrue(job.waitForCompletion(true));
    // Note: These asserts only works in case of LocalJobRunner as they run in same jvm.
    // If using MiniMRCluster, the tests will have to be modified.
    assertFalse(MapReadHTable.error);
    assertEquals(MapReadHTable.count, 1);

    String dropTableQuery = "DROP TABLE " + hbaseTableName ;
    CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery);
    assertEquals(0, responseThree.getResponseCode());

    boolean isHbaseTableThere = hAdmin.tableExists(hbaseTableName);
    assertFalse(isHbaseTableThere);

    String dropDB = "DROP DATABASE " + databaseName;
    CommandProcessorResponse responseFour = hcatDriver.run(dropDB);
    assertEquals(0, responseFour.getResponseCode());
  }

  @Test
  public void TestHBaseTableProjectionReadMR() throws Exception {

    String tableName = newTableName("MyTable");
    //Table name as specified by hbase.table.name property
    String hbaseTableName = "MyDB_" + tableName;
    String tableQuery = "CREATE TABLE " + tableName
      + "(key string, testqualifier1 string, testqualifier2 string) STORED BY "
      + "'org.apache.hadoop.hive.hbase.HBaseStorageHandler'"
      + " WITH  SERDEPROPERTIES ('hbase.columns.mapping'="
      + "':key,testFamily:testQualifier1,testFamily:testQualifier2')"
      + " TBLPROPERTIES ('hbase.table.name'='" + hbaseTableName+ "')" ;

    CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery);
    assertEquals(0, responseTwo.getResponseCode());

    HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf());
    boolean doesTableExist = hAdmin.tableExists(hbaseTableName);
    assertTrue(doesTableExist);

    populateHBaseTable(hbaseTableName, 5);

    Configuration conf = new Configuration(hcatConf);
    conf.set(HCatConstants.HCAT_KEY_HIVE_CONF,
      HCatUtil.serialize(getHiveConf().getAllProperties()));

    // output settings
    Path outputDir = new Path(getTestDir(), "mapred/testHBaseTableProjectionReadMR");
    FileSystem fs = getFileSystem();
    if (fs.exists(outputDir)) {
      fs.delete(outputDir, true);
    }
    // create job
    Job job = new Job(conf, "hbase-column-projection");
    job.setJarByClass(this.getClass());
    job.setMapperClass(MapReadProjHTable.class);
    job.setInputFormatClass(HCatInputFormat.class);
    InputJobInfo inputJobInfo = InputJobInfo.create(
      MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName, null);
    HCatInputFormat.setOutputSchema(job, getProjectionSchema());
    HCatInputFormat.setInput(job, inputJobInfo);
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(0);
    assertTrue(job.waitForCompletion(true));
    assertFalse(MapReadProjHTable.error);
    assertEquals(MapReadProjHTable.count, 1);

    String dropTableQuery = "DROP TABLE " + tableName;
    CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery);
    assertEquals(0, responseThree.getResponseCode());

    boolean isHbaseTableThere = hAdmin.tableExists(hbaseTableName);
    assertFalse(isHbaseTableThere);
  }


  static class MapReadHTable
    extends
    Mapper<ImmutableBytesWritable, HCatRecord, WritableComparable<?>, Text> {

    static boolean error = false;
    static int count = 0;

    @Override
    public void map(ImmutableBytesWritable key, HCatRecord value,
            Context context) throws IOException, InterruptedException {
      boolean correctValues = (value.size() == 3)
        && (value.get(0).toString()).equalsIgnoreCase("testRow")
        && (value.get(1).toString()).equalsIgnoreCase("textValue-5")
        && (value.get(2).toString()).equalsIgnoreCase("textValue-5");

      if (correctValues == false) {
        error = true;
      }
      count++;
    }

    public static void resetCounters() {
      error = false;
      count = 0;
    }
  }

  static class MapReadProjHTable
    extends
    Mapper<ImmutableBytesWritable, HCatRecord, WritableComparable<?>, Text> {

    static boolean error = false;
    static int count = 0;
    @Override
    public void map(ImmutableBytesWritable key, HCatRecord value,
            Context context) throws IOException, InterruptedException {
      boolean correctValues = (value.size() == 2)
        && (value.get(0).toString()).equalsIgnoreCase("testRow")
        && (value.get(1).toString()).equalsIgnoreCase("textValue-5");

      if (correctValues == false) {
        error = true;
      }
      count++;
    }
  }

  private HCatSchema getProjectionSchema() throws HCatException {

    HCatSchema schema = new HCatSchema(new ArrayList<HCatFieldSchema>());
    schema.append(new HCatFieldSchema("key", HCatFieldSchema.Type.STRING,
      ""));
    schema.append(new HCatFieldSchema("testqualifier1",
      HCatFieldSchema.Type.STRING, ""));
    return schema;
  }


}
TOP

Related Classes of org.apache.hive.hcatalog.hbase.TestHBaseInputFormat

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.