Package org.kitesdk.data

Examples of org.kitesdk.data.DatasetDescriptor$Builder


  @Test
  public void testRoundTripDescriptor() throws Exception {
    String namespace = "test_ns";
    String name = "test_table";
    DatasetDescriptor original = new DatasetDescriptor.Builder()
        .schemaUri("resource:schema/user.avsc")
        .location("file:/tmp/data/test_table")
        .build();
    boolean external = true;
    Table table = HiveUtils.tableForDescriptor(namespace, name, original, external);

    Configuration conf = new HiveConf();
    DatasetDescriptor result = HiveUtils.descriptorForTable(conf, table);
    assertEquals(original, result);
  }
View Full Code Here


  @Test
  public void testRoundTripDescriptorWithCompressionType() throws Exception {
    String namespace = "test_ns";
    String name = "test_table";
    DatasetDescriptor original = new DatasetDescriptor.Builder()
        .schemaUri("resource:schema/user.avsc")
        .location("file:/tmp/data/test_table")
        .compressionType(CompressionType.Deflate)
        .build();
    boolean external = true;
    Table table = HiveUtils.tableForDescriptor(namespace, name, original, external);

    Configuration conf = new HiveConf();
    DatasetDescriptor result = HiveUtils.descriptorForTable(conf, table);
    assertEquals(original, result);
  }
View Full Code Here

  @Test
  public void testRoundTripDescriptorNoCompressionProperty() throws Exception {
    String namespace = "test_ns";
    String name = "test_table";
    DatasetDescriptor original = new DatasetDescriptor.Builder()
        .schemaUri("resource:schema/user.avsc")
        .location("file:/tmp/data/test_table")
        .build();
    boolean external = true;
    Table table = HiveUtils.tableForDescriptor(namespace, name, original, external);
    assertEquals("snappy", table.getParameters().get("kite.compression.type"));
    table.getParameters().remove("kite.compression.type");

    Configuration conf = new HiveConf();
    DatasetDescriptor result = HiveUtils.descriptorForTable(conf, table);
    assertEquals(original, result);
  }
View Full Code Here

  @Test
  public void testGeneric() throws IOException {
    String datasetName = tableName + ".TestGenericEntity";

    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schemaLiteral(testGenericEntity)
        .build();

    Dataset<GenericRecord> inputDataset = repo.create("default", "in", descriptor);
    Dataset<GenericRecord> outputDataset = repo.create("default", datasetName, descriptor);
View Full Code Here

  @Test
  public void testSourceView() throws IOException {
    String datasetName = tableName + ".TestGenericEntity";

    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schemaLiteral(testGenericEntity)
        .build();

    Dataset<GenericRecord> inputDataset = repo.create("default", "in", descriptor);
    Dataset<GenericRecord> outputDataset = repo.create("default", datasetName, descriptor);
View Full Code Here

    Schema datasetSchema = target.getDataset().getDescriptor().getSchema();

    // TODO: replace this with a temporary Dataset from a FS repo
    // TODO: CDK-92: always use GenericRecord?

    DatasetDescriptor csvDescriptor = new DatasetDescriptor.Builder()
        .location(source.toUri())
        .schema(ColumnMappingParser.removeEmbeddedMapping(
            PartitionStrategyParser.removeEmbeddedStrategy(datasetSchema)))
        .format("csv")
        .build();
View Full Code Here

    final String NAME2 = "test2";

    PartitionStrategy partitionStrategy = new PartitionStrategy.Builder()
        .hash("username", 2).build();

    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schema(testSchema)
        .partitionStrategy(partitionStrategy)
        .build();

    Dataset<GenericRecord> dataset = repo.create(NAMESPACE, NAME2, descriptor);
View Full Code Here

    final String NAME3 = "test3";

    PartitionStrategy partitionStrategy = new PartitionStrategy.Builder()
        .hash("username", 2).build();

    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schema(testSchema)
        .partitionStrategy(partitionStrategy)
        .build();

    Dataset<GenericRecord> dataset = repo.create(NAMESPACE, NAME2, descriptor);
View Full Code Here

        new DatasetDescriptor.Builder(descriptor)
            .property("added.property", "true")
            .build());
    Assert.assertNotNull("Update should succeed", updated);

    DatasetDescriptor stored =
        HiveUtils.descriptorForTable(conf, metastore.getTable("default", "test"));

    Assert.assertEquals("Should update default.test descriptor",
        stored, updated.getDescriptor());

    Assert.assertEquals("Added property should be present",
        stored.getProperty("added.property"), "true");
  }
View Full Code Here

        new DatasetDescriptor.Builder(descriptor)
            .property("added.property", "true")
            .build());
    Assert.assertNotNull("Update should succeed", updated);

    DatasetDescriptor stored =
        HiveUtils.descriptorForTable(conf, metastore.getTable("default", "test"));

    Assert.assertEquals("Should update default.test descriptor",
        stored, updated.getDescriptor());

    Assert.assertEquals("Added property should be present",
        stored.getProperty("added.property"), "true");
  }
View Full Code Here

TOP

Related Classes of org.kitesdk.data.DatasetDescriptor$Builder

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.