public <E> Dataset<E> update(String name, DatasetDescriptor descriptor) {
Preconditions.checkArgument(name != null, "Dataset name cannot be null");
Preconditions.checkArgument(descriptor != null,
"DatasetDescriptro cannot be null");
DatasetDescriptor oldDescriptor = metadataProvider.load(name);
// oldDescriptor is valid if load didn't throw NoSuchDatasetException
if (!oldDescriptor.getFormat().equals(descriptor.getFormat())) {
throw new DatasetRepositoryException("Cannot change dataset format from " +
oldDescriptor.getFormat() + " to " + descriptor.getFormat());
}
final URI oldLocation = oldDescriptor.getLocation();
if ((oldLocation != null) && !(oldLocation.equals(descriptor.getLocation()))) {
throw new DatasetRepositoryException(
"Cannot change the dataset's location");
}
if (oldDescriptor.isPartitioned() != descriptor.isPartitioned()) {
throw new DatasetRepositoryException("Cannot change an unpartitioned dataset to " +
" partitioned or vice versa.");
} else if (oldDescriptor.isPartitioned() && descriptor.isPartitioned() &&
!oldDescriptor.getPartitionStrategy().equals(descriptor.getPartitionStrategy())) {
throw new DatasetRepositoryException("Cannot change partition strategy from " +
oldDescriptor.getPartitionStrategy() + " to " + descriptor.getPartitionStrategy());
}
// check can read records written with old schema using new schema
final Schema oldSchema = oldDescriptor.getSchema();
final Schema newSchema = descriptor.getSchema();
if (!SchemaValidationUtil.canRead(oldSchema, newSchema)) {
throw new IncompatibleSchemaException("New schema cannot read data " +
"written using " +
"old schema. New schema: " + newSchema.toString(true) + "\nOld schema: " +
oldSchema.toString(true));
}
final DatasetDescriptor updatedDescriptor = metadataProvider
.update(name, descriptor);
logger.debug("Updated dataset:{} schema:{} datasetPath:{}", new Object[] {
name, updatedDescriptor.getSchema(),
updatedDescriptor.getLocation().toString() });
return new FileSystemDataset.Builder()
.name(name)
.configuration(conf)
.descriptor(updatedDescriptor)
.partitionKey(updatedDescriptor.isPartitioned() ?
com.cloudera.cdk.data.impl.Accessor.getDefault().newPartitionKey() :
null)
.build();
}