Package org.kitesdk.data.spi.hive

Source Code of org.kitesdk.data.spi.hive.TestHiveExternalDatasetRepository

/*
* Copyright 2013 Cloudera.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.kitesdk.data.spi.hive;

import org.kitesdk.data.Dataset;
import org.kitesdk.data.DatasetDescriptor;
import org.kitesdk.data.spi.DatasetRepository;
import org.kitesdk.data.DatasetWriter;
import org.kitesdk.data.spi.PartitionKey;
import org.kitesdk.data.PartitionStrategy;
import org.kitesdk.data.spi.PartitionedDataset;
import org.kitesdk.data.spi.filesystem.TestFileSystemDatasetRepository;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.GenericRecordBuilder;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.thrift.TException;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.kitesdk.data.spi.Mergeable;
import org.kitesdk.data.spi.MetadataProvider;

public class TestHiveExternalDatasetRepository extends TestFileSystemDatasetRepository {

  public TestHiveExternalDatasetRepository(boolean distributed) {
    super(distributed);
  }

  @Override
  public DatasetRepository newRepo(MetadataProvider provider) {
    // use null URI because TestDatasetRepositories expects no URI
    return new HiveExternalDatasetRepository(conf, testDirectory, provider);
  }

  @Override
  public MetadataProvider newProvider(Configuration conf) {
    return new HiveExternalMetadataProvider(conf, testDirectory);
  }

  private HiveMetaStoreClient client;

  @Before
  public void connectToHive() throws MetaException {
    this.client = new HiveMetaStoreClient(new HiveConf(conf, HiveConf.class));
  }

  @Before
  @After
  public void cleanHive() {
    // ensures all tables are removed
    MetaStoreUtil metastore = new MetaStoreUtil(getConfiguration());
    for (String database : metastore.getAllDatabases()) {
      for (String table : metastore.getAllTables(database)) {
        metastore.dropTable(database, table);
      }
      if (!"default".equals(database)) {
        metastore.dropDatabase(database, true);
      }
    }
  }

  @Test
  public void testTableExists() throws MetaException, TException {
    ensureCreated();
    HiveTestUtils.assertTableExists(client, NAMESPACE, NAME);
    HiveTestUtils.assertTableIsExternal(client, NAMESPACE, NAME);
  }

  @SuppressWarnings("deprecation")
  @Test
  public void testNewPartitionIsVisibleToHive() throws Exception {
    final String NAME2 = "test2";

    PartitionStrategy partitionStrategy = new PartitionStrategy.Builder()
        .hash("username", 2).build();

    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schema(testSchema)
        .partitionStrategy(partitionStrategy)
        .build();

    Dataset<GenericRecord> dataset = repo.create(NAMESPACE, NAME2, descriptor);

    HiveTestUtils.assertTableExists(client, NAMESPACE, NAME2);
    HiveTestUtils.assertTableIsExternal(client, NAMESPACE, NAME2);
    Assert.assertTrue("No partitions yet",
        client.listPartitionNames(NAMESPACE, NAME2, (short) 10).isEmpty());

    writeRecord(dataset, 0);

    Assert.assertEquals("Should be one partition", 1,
        client.listPartitionNames(NAMESPACE, NAME2, (short) 10).size());

  }

  @SuppressWarnings({"deprecation", "unchecked"})
  @Test
  public void testMerge() throws Exception {
    final String NAME2 = "test2";
    final String NAME3 = "test3";

    PartitionStrategy partitionStrategy = new PartitionStrategy.Builder()
        .hash("username", 2).build();

    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schema(testSchema)
        .partitionStrategy(partitionStrategy)
        .build();

    Dataset<GenericRecord> dataset = repo.create(NAMESPACE, NAME2, descriptor);

    HiveTestUtils.assertTableExists(client, NAMESPACE, NAME2);
    HiveTestUtils.assertTableIsExternal(client, NAMESPACE, NAME2);
    Assert.assertTrue("No partitions yet",
        client.listPartitionNames(NAMESPACE, NAME2, (short) 10).isEmpty());

    writeRecord(dataset, 0);

    Assert.assertEquals("Should be one partition", 1,
        client.listPartitionNames(NAMESPACE, NAME2, (short) 10).size());

    Dataset<GenericRecord> dsUpdate = repo.create(NAMESPACE, NAME3, descriptor);

    writeRecord(dsUpdate, 1);

    Assert.assertTrue(dataset instanceof Mergeable);
    ((Mergeable<Dataset<GenericRecord>>) dataset).merge(dsUpdate);

    Assert.assertEquals("Should be two partitions", 2,
        client.listPartitionNames(NAMESPACE, NAME2, (short) 10).size());

  }

  private void writeRecord(Dataset<GenericRecord> dataset, int partition) {
    PartitionKey key = new PartitionKey(partition);
    DatasetWriter<GenericRecord> writer =
        ((PartitionedDataset<GenericRecord>) dataset).getPartition(key, true).newWriter();
    try {
      GenericRecordBuilder recordBuilder = new GenericRecordBuilder(
          dataset.getDescriptor().getSchema())
          .set("username", partition + "").set("email", partition + "@example.com");
      writer.write(recordBuilder.build());
    } finally {
      writer.close();
    }

  }
}
TOP

Related Classes of org.kitesdk.data.spi.hive.TestHiveExternalDatasetRepository

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.