Package org.kitesdk.data

Examples of org.kitesdk.data.DatasetDescriptor


    Dataset<Record> dataset = repo.create(NAMESPACE, NAME,
        new DatasetDescriptor.Builder(testDescriptor)
            .partitionStrategy(ps1)
            .build());

    DatasetDescriptor changed =
        new DatasetDescriptor.Builder(dataset.getDescriptor())
            .partitionStrategy(ps2)
            .build();

    try {
View Full Code Here


  public void testUpdateFailsWithLocationChange() {
    ensureCreated();
    Dataset<Record> dataset = repo.load(NAMESPACE, NAME);
    URI location = dataset.getDescriptor().getLocation();

    DatasetDescriptor changed =
        new DatasetDescriptor.Builder(dataset.getDescriptor())
            .location(new Path(testDirectory, "newDataLocation").toUri())
            .build();

    try {
View Full Code Here

    HBaseDatasetRepository repo = new HBaseDatasetRepository.Builder()
        .configuration(conf).build();

    // TODO: change to use namespace (CDK-140)

    DatasetDescriptor userProfileDatasetDescriptor =
        new DatasetDescriptor.Builder().schema(UserProfileModel2.SCHEMA$).build();
    userProfileDataset = repo.create("default", "kite_example_user_profiles.UserProfileModel2",
        userProfileDatasetDescriptor);

    DatasetDescriptor userActionsDatasetDescriptor =
        new DatasetDescriptor.Builder().schema(UserActionsModel2.SCHEMA$).build();
    userActionsDataset = repo.create("default", "kite_example_user_profiles.UserActionsModel2",
        userActionsDatasetDescriptor);

    DatasetDescriptor userProfileActionsDatasetDescriptor =
        new DatasetDescriptor.Builder().schema(UserProfileActionsModel2.SCHEMA$).build();
    userProfileActionsDataset = repo.create("default", "kite_example_user_profiles.UserProfileActionsProtocol2",
        userProfileActionsDatasetDescriptor);

  }
View Full Code Here

    Assert.assertTrue(v instanceof FileSystemView);
    Assert.assertEquals("Locations should match",
        URI.create("file:/tmp/data/ns/test"),
        v.getDataset().getDescriptor().getLocation());

    DatasetDescriptor loaded = repo.load("ns", "test").getDescriptor();
    Assert.assertEquals("Descriptors should match",
        loaded, v.getDataset().getDescriptor());
    Assert.assertEquals("Should report correct namespace",
        "ns", v.getDataset().getNamespace());
    Assert.assertEquals("Should report correct name",
        "test", v.getDataset().getName());

    Constraints withUser = new Constraints(loaded.getSchema())
        .with("username", new Utf8("user"));
    Assert.assertEquals("Constraints should be username=user",
        withUser, ((FileSystemView) v).getConstraints());

    repo.delete("ns", "test");
View Full Code Here

  @Test
  public void testCSVSyncDFS() throws Exception {
    String auth = getDFS().getUri().getAuthority();
    final FileSystem fs = getDFS();
    final Path path = new Path("hdfs://" + auth + "/tmp/test.csv");
    final DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schema(schema)
        .build();
    CSVAppender<GenericRecord> appender = new CSVAppender<GenericRecord>(fs, path, descriptor);
    GenericRecord record = new GenericData.Record(schema);
View Full Code Here

  @Test
  @Ignore(value="LocalFileSystem is broken!?")
  public void testCSVSyncLocalFS() throws Exception {
    final FileSystem fs = FileSystem.getLocal(getConfiguration());
    final Path path = new Path("file:/tmp/test.csv");
    final DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schema(schema)
        .build();
    CSVAppender<GenericRecord> appender = new CSVAppender<GenericRecord>(fs, path, descriptor);
    GenericRecord record = new GenericData.Record(schema);
View Full Code Here

   */
  public static <E> View<E> viewForUri(Dataset<E> dataset, URI uri) {
    Preconditions.checkArgument(dataset instanceof FileSystemDataset,
        "Not a file system dataset: " + dataset);

    DatasetDescriptor descriptor = dataset.getDescriptor();

    String s1 = descriptor.getLocation().getScheme();
    String s2 = uri.getScheme();
    Preconditions.checkArgument((s1 == null || s2 == null) || s1.equals(s2),
        "%s is not contained in %s", uri, descriptor.getLocation());

    URI location = URI.create(descriptor.getLocation().getPath());
    URI relative = location.relativize(URI.create(uri.getPath()));
    if (relative.toString().isEmpty()) {
      // no partitions are selected
      return dataset;
    }

    Preconditions.checkArgument(!relative.getPath().startsWith("/"),
        "%s is not contained in %s", uri, location);
    Preconditions.checkArgument(descriptor.isPartitioned(),
        "Dataset is not partitioned");

    Schema schema = descriptor.getSchema();
    PartitionStrategy strategy = descriptor.getPartitionStrategy();

    RefinableView<E> view = dataset;
    Iterator<String> parts = PATH_SPLITTER.split(relative.toString()).iterator();
    for (FieldPartitioner fp : strategy.getFieldPartitioners()) {
      if (!parts.hasNext()) {
View Full Code Here

  private final Map<String, Object> provided;

  private ReaderWriterState state;

  public PartitionedDatasetWriter(FileSystemView<E> view) {
    final DatasetDescriptor descriptor = view.getDataset().getDescriptor();
    Preconditions.checkArgument(descriptor.isPartitioned(),
        "Dataset " + view.getDataset() + " is not partitioned");

    this.view = view;
    this.partitionStrategy = descriptor.getPartitionStrategy();

    int maxWriters = DEFAULT_WRITER_CACHE_SIZE;
    if (descriptor.hasProperty(FileSystemProperties.WRITER_CACHE_SIZE_PROP)) {
      try {
        maxWriters = Integer.parseInt(
            descriptor.getProperty(FileSystemProperties.WRITER_CACHE_SIZE_PROP));
      } catch (NumberFormatException e) {
        LOG.warn("Not an integer: " + FileSystemProperties.WRITER_CACHE_SIZE_PROP + "=" +
            descriptor.getProperty(FileSystemProperties.WRITER_CACHE_SIZE_PROP));
      }
    } else if (partitionStrategy.getCardinality() != FieldPartitioner.UNKNOWN_CARDINALITY) {
        maxWriters = Math.min(maxWriters, partitionStrategy.getCardinality());
    }
    this.maxWriters = maxWriters;
View Full Code Here

    return writer;
  }

  @Override
  public boolean deleteAll() {
    DatasetDescriptor descriptor = getDataset().getDescriptor();
    if (!descriptor.isPartitioned()) {
      // at least one constraint, but not partitioning to satisfy it
      throw new UnsupportedOperationException(
          "Cannot cleanly delete view: " + this);
    }
    if (!constraints.alignedWithBoundaries()) {
View Full Code Here

      return Iterators.singletonIterator(root);
    }
  }

  private FileSystemPartitionIterator partitionIterator() {
    DatasetDescriptor descriptor = dataset.getDescriptor();
    try {
      return new FileSystemPartitionIterator(
          fs, root, descriptor.getPartitionStrategy(), descriptor.getSchema(),
          constraints);
    } catch (IOException ex) {
      throw new DatasetException("Cannot list partitions in view:" + this, ex);
    }
  }
View Full Code Here

TOP

Related Classes of org.kitesdk.data.DatasetDescriptor

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.