savedRecord.put("name", "John Doe");
savedRecord.put("age", 42);
savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane"));
populateGenericFile(Lists.newArrayList(savedRecord), Person.SCHEMA$);
Pipeline pipeline = new MRPipeline(AvroParquetPipelineIT.class, tmpDir.getDefaultConfiguration());
PCollection<Person> genericCollection = pipeline.read(At.avroFile(avroFile.getAbsolutePath(),
Avros.records(Person.class)));
PCollection<Employee> employees = genericCollection.parallelDo(new DoFn<Person, Employee>() {
@Override
public void process(Person person, Emitter<Employee> emitter) {
emitter.emit(new Employee(person.getName(), 0, "Eng"));
}
}, Avros.records(Employee.class));
File output1File = tmpDir.getFile("output1");
File output2File = tmpDir.getFile("output2");
pipeline.write(genericCollection, new AvroParquetFileTarget(output1File.getAbsolutePath()));
pipeline.write(employees, new AvroParquetFileSourceTarget(new Path(output2File.getAbsolutePath()),
Avros.records(Employee.class)));
pipeline.run();
Person person = genericCollection.materialize().iterator().next();
Employee employee = employees.materialize().iterator().next();
Path parquet1File = new Path(new File(output1File, "part-m-00000.parquet").getPath());