Package org.kitesdk.data

Examples of org.kitesdk.data.DatasetDescriptor


        throw new ValidationException("Unknown mapping: " + mapping);
      }
    }

    // building the descriptor validates the schema and strategy
    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .columnMapping(mappingBuilder.build())
        .schema(open(avroSchemaFile))
        .partitionStrategy(open(partitionStrategyFile))
        .build();

    String mapping = descriptor.getColumnMapping().toString(!minimize);

    output(mapping, console, outputPath);

    return 0;
  }
View Full Code Here


    org.junit.Assert.assertTrue("Repo should be a HCatalogExternalDatasetRepository",
        repo instanceof HiveExternalDatasetRepository);
    Assert.assertEquals("Repository URI", repoUri, repo.getUri());

    // verify location
    DatasetDescriptor created = repo.create("ns", "test",
        new DatasetDescriptor.Builder()
            .schemaLiteral("\"string\"")
            .build()).getDescriptor();
    Assert.assertEquals("Location should be in local FS",
        "file", created.getLocation().getScheme());
    Assert.assertTrue("Location should be in the repo path",
        created.getLocation().getPath().startsWith("/tmp/hive-repo"));
  }
View Full Code Here

    org.junit.Assert.assertTrue("Repo should be a HCatalogExternalDatasetRepository",
        repo instanceof HiveExternalDatasetRepository);
    Assert.assertEquals("Repository URI", repoUri, repo.getUri());

    // verify location
    DatasetDescriptor created = repo.create("ns", "test",
        new DatasetDescriptor.Builder()
            .schemaLiteral("\"string\"")
            .build()).getDescriptor();
    Assert.assertEquals("Location should be in local FS",
        "file", created.getLocation().getScheme());
    Assert.assertTrue("Location should be in the repo path",
        created.getLocation().getPath().endsWith("tmp/hive-repo/ns/test"));
  }
View Full Code Here

    org.junit.Assert.assertTrue("Repo should be a HCatalogExternalDatasetRepository",
        repo instanceof HiveExternalDatasetRepository);
    Assert.assertEquals("Repository URI", repoUri, repo.getUri());

    // verify location
    DatasetDescriptor created = repo.create("tmp", "test",
        new DatasetDescriptor.Builder()
        .schemaLiteral("\"string\"")
        .build()).getDescriptor();
    Assert.assertEquals("Location should be in HDFS",
        "hdfs", created.getLocation().getScheme());
    Assert.assertEquals("Location should have the correct HDFS host",
        hdfsUri.getHost(), created.getLocation().getHost());
    Assert.assertEquals("Location should have the correct HDFS port",
        hdfsUri.getPort(), created.getLocation().getPort());
    Assert.assertTrue("Location should be in the repo path",
        created.getLocation().getPath().startsWith("/tmp/hive-repo"));
  }
View Full Code Here

                                    @Nullable URI location) {
    if (getMetaStoreUtil().exists(namespace, name)) {
      return namespace;
    }
    try {
      DatasetDescriptor descriptor = HiveUtils.descriptorForTable(
          conf, getMetaStoreUtil().getTable(URIBuilder.NAMESPACE_DEFAULT, name));
      URI expectedLocation = location;
      if (location == null) {
        expectedLocation = expectedLocation(namespace, name);
      }
      if ((expectedLocation == null) ||
          pathsEquivalent(expectedLocation, descriptor.getLocation())) {
        // table in the default db has the location that would have been used
        return URIBuilder.NAMESPACE_DEFAULT;
      }
      // fall through and return null
    } catch (DatasetNotFoundException e) {
View Full Code Here

    reader.hasNext();
  }

  @Test(expected = UnknownFormatException.class)
  public void testUnknownFormat() throws IOException {
    final DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schema(STRING_SCHEMA)
        .format(Accessor.getDefault().newFormat("explode!"))
        .build();

    MultiFileDatasetReader<Record> reader = new MultiFileDatasetReader<Record>(
View Full Code Here

  public void testBasicUse() throws Exception {
    command.avroSchemaFile = "resource:test-schemas/user.avsc";
    command.datasets = Lists.newArrayList("users");
    command.run();

    DatasetDescriptor expectedDescriptor = new DatasetDescriptor.Builder()
        .schemaUri("resource:test-schemas/user.avsc")
        .build();

    verify(repo).create("default", "users", expectedDescriptor);
    verify(console).debug(contains("Created"), eq("users"));
View Full Code Here

    command.avroSchemaFile = "resource:test-schemas/user.avsc";
    command.datasets = Lists.newArrayList("users");
    command.format = "parquet";
    command.run();

    DatasetDescriptor expectedDescriptor = new DatasetDescriptor.Builder()
        .schemaUri("resource:test-schemas/user.avsc")
        .format("parquet")
        .build();

    verify(repo).create("default", "users", expectedDescriptor);
View Full Code Here

    if (exists(namespace, name)) {
      throw new DatasetExistsException(
          "Dataset already exists for name:" + name);
    }

    DatasetDescriptor newDescriptor;
    if (descriptor.getLocation() == null) {
      newDescriptor = new DatasetDescriptor.Builder(descriptor)
          .location(fs.makeQualified(new Path(newLocation(name))))
          .build();
    } else {
View Full Code Here

    Dataset<Record> dataset = repo.create(NAMESPACE, NAME,
        new DatasetDescriptor.Builder(testDescriptor)
            .format(Formats.AVRO)
            .build());

    DatasetDescriptor changed =
        new DatasetDescriptor.Builder(dataset.getDescriptor())
        .format(Formats.PARQUET)
        .build();

    try {
View Full Code Here

TOP

Related Classes of org.kitesdk.data.DatasetDescriptor

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.