Package org.kitesdk.morphline.hadoop.parquet.avro

Source Code of org.kitesdk.morphline.hadoop.parquet.avro.AvroParquetMorphlineTest

/*
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kitesdk.morphline.hadoop.parquet.avro;

import java.io.File;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;

import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericFixed;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.GenericRecordBuilder;
import org.apache.hadoop.fs.Path;
import org.junit.Test;
import org.kitesdk.morphline.api.AbstractMorphlineTest;
import org.kitesdk.morphline.api.Record;
import org.kitesdk.morphline.base.Fields;
import org.kitesdk.morphline.hadoop.parquet.avro.ReadAvroParquetFileBuilder;

import parquet.avro.AvroParquetWriter;

import com.google.common.base.Charsets;
import com.google.common.collect.ImmutableMap;

public class AvroParquetMorphlineTest extends AbstractMorphlineTest {

  @Test
  public void testMapWithUtf8Key() throws Exception {
    Schema schema = new Schema.Parser().parse(new File("src/test/resources/test-avro-schemas/map.avsc"));

    File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp");
    tmp.deleteOnExit();
    tmp.delete();
    Path file = new Path(tmp.getPath());

    AvroParquetWriter<GenericRecord> writer =
        new AvroParquetWriter<GenericRecord>(file, schema);

    // Write a record with a map with Utf8 keys.
    GenericData.Record record = new GenericRecordBuilder(schema)
        .set("mymap", new HashMap(ImmutableMap.of(utf8("a"), 1, utf8("b"), 2)))
        .build();
    writer.write(record);
    writer.close();

    for (String configFile : Arrays.asList(
        "readAvroParquetFile",
        "readAvroParquetFileWithProjectionSchema",
        "readAvroParquetFileWithReaderSchema1",
        "readAvroParquetFileWithReaderSchemaExternal"
        )) {
      morphline = createMorphline("test-morphlines/" + configFile);
     
      Record morphlineRecord = new Record();
      morphlineRecord.put(ReadAvroParquetFileBuilder.FILE_UPLOAD_URL, file.toString());
      collector.reset();
     
      assertTrue(morphline.process(morphlineRecord));

      assertEquals(1, collector.getRecords().size());
      GenericData.Record actualRecord = (GenericData.Record) collector.getFirstRecord().getFirstValue(Fields.ATTACHMENT_BODY);
      assertEquals(record, actualRecord);     
    }
  }
 
  @Test
  public void testAll() throws Exception {
    Schema schema = new Schema.Parser().parse(new File("src/test/resources/test-avro-schemas/all.avsc"));

    File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp");
    tmp.deleteOnExit();
    tmp.delete();
    Path file = new Path(tmp.getPath());
   
    AvroParquetWriter<GenericRecord> writer = new
        AvroParquetWriter<GenericRecord>(file, schema);

    GenericData.Record nestedRecord = new GenericRecordBuilder(
        schema.getField("mynestedrecord").schema())
            .set("mynestedint", 1).build();

    List<Integer> integerArray = Arrays.asList(1, 2, 3);
    GenericData.Array<Integer> genericIntegerArray = new GenericData.Array<Integer>(
        Schema.createArray(Schema.create(Schema.Type.INT)), integerArray);

    GenericFixed genericFixed = new GenericData.Fixed(
        Schema.createFixed("fixed", null, null, 1), new byte[] { (byte) 65 });

    List<Integer> emptyArray = new ArrayList<Integer>();
    ImmutableMap emptyMap = new ImmutableMap.Builder<String, Integer>().build();

    GenericData.Record record = new GenericRecordBuilder(schema)
        .set("mynull", null)
        .set("myboolean", true)
        .set("myint", 1)
        .set("mylong", 2L)
        .set("myfloat", 3.1f)
        .set("mydouble", 4.1)
        .set("mybytes", ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8)))
        .set("mystring", "hello")
        .set("mynestedrecord", nestedRecord)
        .set("myenum", "a")
        .set("myarray", genericIntegerArray)
        .set("myemptyarray", emptyArray)
        .set("myoptionalarray", genericIntegerArray)
        .set("mymap", ImmutableMap.of("a", 1, "b", 2))
        .set("myemptymap", emptyMap)
        .set("myfixed", genericFixed)
        .build();

    writer.write(record);
    writer.close();

    morphline = createMorphline("test-morphlines/readAvroParquetFileWithProjectionSubSchema");
   
    Record morphlineRecord = new Record();
    morphlineRecord.put(ReadAvroParquetFileBuilder.FILE_UPLOAD_URL, file.toString());
    collector.reset();
   
    assertTrue(morphline.process(morphlineRecord));

    assertEquals(1, collector.getRecords().size());
    GenericData.Record actualRecord = (GenericData.Record) collector.getFirstRecord().getFirstValue(Fields.ATTACHMENT_BODY);
    assertNotNull(actualRecord);
    assertEquals(null, actualRecord.get("mynull"));
    assertEquals(true, actualRecord.get("myboolean"));
    assertEquals(1, actualRecord.get("myint"));
    assertEquals(2L, actualRecord.get("mylong"));
    assertEquals(null, actualRecord.get("myfloat"));
    assertEquals(4.1, actualRecord.get("mydouble"));
    assertEquals(ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8)), actualRecord.get("mybytes"));
    assertEquals("hello", actualRecord.get("mystring"));
    assertEquals("a", actualRecord.get("myenum"));
    assertEquals(nestedRecord, actualRecord.get("mynestedrecord"));
    assertEquals(integerArray, actualRecord.get("myarray"));
    assertEquals(emptyArray, actualRecord.get("myemptyarray"));
    assertEquals(integerArray, actualRecord.get("myoptionalarray"));
    assertEquals(ImmutableMap.of("a", 1, "b", 2), actualRecord.get("mymap"));
    assertEquals(emptyMap, actualRecord.get("myemptymap"));
    assertEquals(genericFixed, actualRecord.get("myfixed"));
  }

  private static String utf8(String str) {
    return str;
  }

//private static Utf8 utf8(String str) {
//return new Utf8(str);
//}

}
TOP

Related Classes of org.kitesdk.morphline.hadoop.parquet.avro.AvroParquetMorphlineTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.