Path source = qualifiedPath(targets.get(0));
FileSystem sourceFS = source.getFileSystem(getConf());
Preconditions.checkArgument(sourceFS.exists(source),
"CSV path does not exist: " + source);
CSVProperties props = new CSVProperties.Builder()
.delimiter(delimiter)
.escape(escape)
.quote(quote)
.hasHeader(!noHeader)
.linesToSkip(linesToSkip)
.charset(charsetName)
.build();
String dataset = targets.get(1);
View<Record> target = load(dataset, Record.class);
Schema datasetSchema = target.getDataset().getDescriptor().getSchema();
// TODO: replace this with a temporary Dataset from a FS repo
// TODO: CDK-92: always use GenericRecord?
DatasetDescriptor csvDescriptor = new DatasetDescriptor.Builder()
.location(source.toUri())
.schema(ColumnMappingParser.removeEmbeddedMapping(
PartitionStrategyParser.removeEmbeddedStrategy(datasetSchema)))
.format("csv")
.build();
csvDescriptor = props.addToDescriptor(csvDescriptor);
TemporaryFileSystemDatasetRepository repo =
new TemporaryFileSystemDatasetRepository(getConf(),
// ensure the same FS as the file source is used
sourceFS.makeQualified(new Path("/tmp")),