// Construct a filesystem dataset repository rooted at /tmp/data
DatasetRepository repo = DatasetRepositories.open("repo:hdfs:/tmp/data");
// Create a partition strategy that hash partitions on username with 10 buckets
PartitionStrategy partitionStrategy =
new PartitionStrategy.Builder().hash("username", 10).build();
// Create a dataset of users with the Avro schema in the repository
DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
.schemaUri("resource:user.avsc")