Package org.kitesdk.data.crunch

Source Code of org.kitesdk.data.crunch.TestCrunchDatasetsHBase

/**
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kitesdk.data.crunch;

import java.io.IOException;
import junit.framework.Assert;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.util.Utf8;
import org.apache.crunch.PCollection;
import org.apache.crunch.Pipeline;
import org.apache.crunch.Target;
import org.apache.crunch.impl.mr.MRPipeline;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.kitesdk.data.Dataset;
import org.kitesdk.data.DatasetDescriptor;
import org.kitesdk.data.DatasetReader;
import org.kitesdk.data.DatasetWriter;
import org.kitesdk.data.View;
import org.kitesdk.data.hbase.HBaseDatasetRepository;
import org.kitesdk.data.hbase.HBaseDatasetRepositoryTest;
import org.kitesdk.data.hbase.avro.AvroUtils;
import org.kitesdk.data.hbase.testing.HBaseTestUtils;
import org.kitesdk.data.spi.DatasetRepository;

import static org.junit.Assert.assertEquals;
import static org.kitesdk.data.spi.filesystem.DatasetTestUtilities.datasetSize;

public class TestCrunchDatasetsHBase {
  private static final String testGenericEntity;

  static {
    try {
      testGenericEntity = AvroUtils.inputStreamToString(TestCrunchDatasetsHBase.class
          .getResourceAsStream("/TestGenericEntity.avsc"));
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
  }

  private DatasetRepository repo;

  private static final String tableName = "testtable";
  private static final String managedTableName = "managed_schemas";

  @BeforeClass
  public static void beforeClass() throws Exception {
    HBaseTestUtils.getMiniCluster();
    // managed table should be created by HBaseDatasetRepository
    HBaseTestUtils.util.deleteTable(Bytes.toBytes(managedTableName));
  }

  @AfterClass
  public static void afterClass() throws Exception {
    HBaseTestUtils.util.deleteTable(Bytes.toBytes(tableName));
    if (HBaseTestUtils.getMiniCluster() != null) {
      HBaseTestUtils.util.shutdownMiniHBaseCluster();
      HBaseTestUtils.util.shutdownMiniDFSCluster();
    }
  }

  @Before
  public void setUp() throws Exception {
    this.repo = new HBaseDatasetRepository.Builder()
        .configuration(HBaseTestUtils.getConf()).build();
  }

  @After
  public void after() throws Exception {
    HBaseTestUtils.util.truncateTable(Bytes.toBytes(tableName));
    HBaseTestUtils.util.truncateTable(Bytes.toBytes(managedTableName));
  }

  @Test
  public void testGeneric() throws IOException {
    String datasetName = tableName + ".TestGenericEntity";

    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schemaLiteral(testGenericEntity)
        .build();

    Dataset<GenericRecord> inputDataset = repo.create("default", "in", descriptor);
    Dataset<GenericRecord> outputDataset = repo.create("default", datasetName, descriptor);

    writeRecords(inputDataset, 10);

    Pipeline pipeline = new MRPipeline(TestCrunchDatasetsHBase.class, HBaseTestUtils.getConf());
    PCollection<GenericRecord> data = pipeline.read(
        CrunchDatasets.asSource(inputDataset));
    pipeline.write(data, CrunchDatasets.asTarget(outputDataset), Target.WriteMode.APPEND);
    pipeline.run();

    checkRecords(outputDataset, 10, 0);
  }

  @Test
  public void testSourceView() throws IOException {
    String datasetName = tableName + ".TestGenericEntity";

    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schemaLiteral(testGenericEntity)
        .build();

    Dataset<GenericRecord> inputDataset = repo.create("default", "in", descriptor);
    Dataset<GenericRecord> outputDataset = repo.create("default", datasetName, descriptor);

    writeRecords(inputDataset, 10);

    View<GenericRecord> inputView = inputDataset
        .from("part1", new Utf8("part1_2")).to("part1", new Utf8("part1_7"))
        .from("part2", new Utf8("part2_2")).to("part2", new Utf8("part2_7"));
    Assert.assertEquals(6, datasetSize(inputView));

    Pipeline pipeline = new MRPipeline(TestCrunchDatasetsHBase.class, HBaseTestUtils.getConf());
    PCollection<GenericRecord> data = pipeline.read(
        CrunchDatasets.asSource(inputView));
    pipeline.write(data, CrunchDatasets.asTarget(outputDataset), Target.WriteMode.APPEND);
    pipeline.run();

    checkRecords(outputDataset, 6, 2);
  }

  private void writeRecords(Dataset<GenericRecord> dataset, int count) {
    DatasetWriter<GenericRecord> writer = dataset.newWriter();
    try {
      for (int i = 0; i < count; ++i) {
        GenericRecord entity = HBaseDatasetRepositoryTest.createGenericEntity(i);
        writer.write(entity);
      }
    } finally {
      writer.close();
    }
  }

  private void checkRecords(Dataset<GenericRecord> dataset, int count, int start) {
    int cnt = start;
    DatasetReader<GenericRecord> reader = dataset.newReader();
    try {
      for (GenericRecord entity : reader) {
        HBaseDatasetRepositoryTest.compareEntitiesWithUtf8(cnt, entity);
        cnt++;
      }
      assertEquals(count, cnt - start);
    } finally {
      reader.close();
    }
  }

}
TOP

Related Classes of org.kitesdk.data.crunch.TestCrunchDatasetsHBase

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.