// Create a partition strategy that hash partitions on username with 10 buckets
PartitionStrategy partitionStrategy =
new PartitionStrategy.Builder().hash("username", 10).build();
// Create a dataset of users with the Avro schema in the repository
DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
.schemaUri("resource:user.avsc")
.partitionStrategy(partitionStrategy)
.build();
Dataset<GenericRecord> users = repo.create("users", descriptor);
// Get a writer for the dataset and write some users to it
DatasetWriter<GenericRecord> writer = users.newWriter();
try {
writer.open();
String[] colors = { "green", "blue", "pink", "brown", "yellow" };
Random rand = new Random();
GenericRecordBuilder builder = new GenericRecordBuilder(descriptor.getSchema());
for (int i = 0; i < 100; i++) {
GenericRecord record = builder.set("username", "user-" + i)
.set("creationDate", System.currentTimeMillis())
.set("favoriteColor", colors[rand.nextInt(colors.length)]).build();
writer.write(record);