PartitionStrategyParser.removeEmbeddedStrategy(datasetSchema)))
.format("csv")
.build();
csvDescriptor = props.addToDescriptor(csvDescriptor);
TemporaryFileSystemDatasetRepository repo =
new TemporaryFileSystemDatasetRepository(getConf(),
// ensure the same FS as the file source is used
sourceFS.makeQualified(new Path("/tmp")),
target.getDataset().getNamespace(),
UUID.randomUUID().toString());
try {
FileSystemDataset<Record> csvDataset =
(FileSystemDataset) repo.create("default", "csv", csvDescriptor);
Iterator<Path> iter = csvDataset.pathIterator().iterator();
Preconditions.checkArgument(iter.hasNext(),
"CSV path has no data files: " + source);
Schema csvSchema = CSVUtil.inferSchema(
datasetSchema.getFullName(), sourceFS.open(iter.next()), props);
if (!skipSchemaChecks) {
Preconditions.checkArgument(
SchemaValidationUtil.canRead(csvSchema, datasetSchema),
"Incompatible schemas\nCSV: %s\nDataset: %s",
csvSchema.toString(true), datasetSchema.toString(true));
// TODO: add support for orderByHeaders
Preconditions.checkArgument(verifyFieldOrder(csvSchema, datasetSchema),
"Incompatible schema field order\nCSV: %s\nDataset: %s",
csvSchema.toString(true), datasetSchema.toString(true));
}
TaskUtil.configure(getConf()).addJars(jars);
TransformTask task;
if (transform != null) {
DoFn<Record, Record> transformFn;
try {
DynConstructors.Ctor<DoFn<Record, Record>> ctor =
new DynConstructors.Builder(DoFn.class)
.loader(loaderForJars(jars))
.impl(transform)
.buildChecked();
transformFn = ctor.newInstance();
} catch (NoSuchMethodException e) {
throw new DatasetException(
"Cannot find no-arg constructor for class: " + transform, e);
}
task = new TransformTask<Record, Record>(
csvDataset, target, transformFn);
} else {
task = new CopyTask<Record>(csvDataset, target);
}
task.setConf(getConf());
if (noCompaction) {
task.noCompaction();
}
if (numWriters >= 0) {
task.setNumWriters(numWriters);
}
PipelineResult result = task.run();
if (result.succeeded()) {
long count = task.getCount();
if (count > 0) {
console.info("Added {} records to \"{}\"", count, dataset);
}
return 0;
} else {
return 1;
}
} finally {
// clean up the temporary repository
repo.delete();
}
}