Package com.asakusafw.dmdl.directio.csv.driver

Source Code of com.asakusafw.dmdl.directio.csv.driver.CsvFormatEmitterTest

/**
* Copyright 2011-2014 Asakusa Framework Team.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.asakusafw.dmdl.directio.csv.driver;

import static org.hamcrest.Matchers.*;
import static org.junit.Assert.*;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.math.BigDecimal;
import java.nio.charset.Charset;
import java.text.MessageFormat;
import java.util.Arrays;
import java.util.List;
import java.util.Random;
import java.util.Scanner;
import java.util.zip.GZIPInputStream;

import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.io.Text;
import org.junit.Before;
import org.junit.Test;

import com.asakusafw.dmdl.directio.common.driver.GeneratorTesterRoot;
import com.asakusafw.dmdl.java.emitter.driver.ObjectDriver;
import com.asakusafw.runtime.directio.BinaryStreamFormat;
import com.asakusafw.runtime.directio.util.DelimiterRangeInputStream;
import com.asakusafw.runtime.io.ModelInput;
import com.asakusafw.runtime.io.ModelOutput;
import com.asakusafw.runtime.io.csv.CsvConfiguration;
import com.asakusafw.runtime.io.csv.CsvFormatException;
import com.asakusafw.runtime.io.csv.CsvParser;
import com.asakusafw.runtime.value.Date;
import com.asakusafw.runtime.value.DateTime;
import com.asakusafw.runtime.value.IntOption;
import com.asakusafw.runtime.value.LongOption;
import com.asakusafw.runtime.value.StringOption;
import com.asakusafw.utils.collections.Lists;

/**
* Test for {@link CsvFormatEmitter}.
*/
public class CsvFormatEmitterTest extends GeneratorTesterRoot {

    private static final Charset DEFAULT_ENCODING = Charset.forName("UTF-8");

    /**
     * Initializes the test.
     * @throws Exception if some errors were occurred
     */
    @Before
    public void setUp() throws Exception {
        emitDrivers.add(new CsvFormatEmitter());
        emitDrivers.add(new ObjectDriver());
    }

    /**
     * A simple case.
     * @throws Exception if failed
     */
    @Test
    public void simple() throws Exception {
        ModelLoader loaded = generateJava("simple");
        ModelWrapper model = loaded.newModel("Simple");
        BinaryStreamFormat<?> support = (BinaryStreamFormat<?>) loaded.newObject("csv", "SimpleCsvFormat");

        assertThat(support.getSupportedType(), is((Object) model.unwrap().getClass()));

        BinaryStreamFormat<Object> unsafe = unsafe(support);
        assertThat(unsafe, is(not(instanceOf(Configurable.class))));

        model.set("value", new Text("hello-world"));

        ByteArrayOutputStream output = new ByteArrayOutputStream();
        ModelOutput<Object> writer = unsafe.createOutput(model.unwrap().getClass(), "hello", output);
        writer.write(model.unwrap());
        writer.close();

        assertThat(scan(output.toByteArray()), is(Arrays.asList("hello-world")));

        Object buffer = loaded.newModel("Simple").unwrap();
        ModelInput<Object> reader = unsafe.createInput(model.unwrap().getClass(), "hello", in(output),
                0, size(output));
        assertThat(reader.readTo(buffer), is(true));
        assertThat(buffer, is(model.unwrap()));
        assertThat(reader.readTo(buffer), is(false));
    }

    /**
     * All types.
     * @throws Exception if failed
     */
    @Test
    public void types() throws Exception {
        ModelLoader loaded = generateJava("types");
        ModelWrapper model = loaded.newModel("Types");
        BinaryStreamFormat<?> support = (BinaryStreamFormat<?>) loaded.newObject("csv", "TypesCsvFormat");
        assertThat(support.getSupportedType(), is((Object) model.unwrap().getClass()));

        ModelWrapper empty = loaded.newModel("Types");

        ModelWrapper all = loaded.newModel("Types");
        all.set("c_int", 100);
        all.set("c_text", new Text("Hello, DMDL world!"));
        all.set("c_boolean", true);
        all.set("c_byte", (byte) 64);
        all.set("c_short", (short) 1023);
        all.set("c_long", 100000L);
        all.set("c_float", 1.5f);
        all.set("c_double", 2.5f);
        all.set("c_decimal", new BigDecimal("3.1415"));
        all.set("c_date", new Date(2011, 9, 1));
        all.set("c_datetime", new DateTime(2011, 12, 31, 23, 59, 59));

        BinaryStreamFormat<Object> unsafe = unsafe(support);

        ByteArrayOutputStream output = new ByteArrayOutputStream();
        ModelOutput<Object> writer = unsafe.createOutput(model.unwrap().getClass(), "hello", output);
        writer.write(empty.unwrap());
        writer.write(all.unwrap());
        writer.close();

        Object buffer = loaded.newModel("Types").unwrap();
        ModelInput<Object> reader = unsafe.createInput(model.unwrap().getClass(), "hello", in(output),
                0, size(output));
        assertThat(reader.readTo(buffer), is(true));
        assertThat(buffer, is(empty.unwrap()));
        assertThat(reader.readTo(buffer), is(true));
        assertThat(buffer, is(all.unwrap()));
        assertThat(reader.readTo(buffer), is(false));
    }

    /**
     * with attributes.
     * @throws Exception if failed
     */
    @Test
    public void attributes() throws Exception {
        ModelLoader loaded = generateJava("attributes");
        ModelWrapper model = loaded.newModel("Model");
        model.set("text_value", new Text("\u3042\u3044\u3046\u3048\u304a"));
        model.set("true_value", true);
        model.set("false_value", false);
        model.set("date_value", new Date(2011, 10, 11));
        model.set("date_time_value", new DateTime(2011, 1, 2, 13, 14, 15));
        BinaryStreamFormat<Object> support = unsafe(loaded.newObject("csv", "ModelCsvFormat"));

        ByteArrayOutputStream output = new ByteArrayOutputStream();
        ModelOutput<Object> writer = support.createOutput(model.unwrap().getClass(), "hello", output);
        writer.write(model.unwrap());
        writer.close();

        String[][] results = parse(5, new String(dump(new GZIPInputStream(new ByteArrayInputStream(output.toByteArray()))), "ISO-2022-jp"));
        assertThat(results, is(new String[][] {
                {"text_value", "true_value", "false_value", "date_value", "date_time_value"},
                {"\u3042\u3044\u3046\u3048\u304a", "T", "F", "2011/10/11", "2011/01/02+13:14:15"},
        }));
    }

    /**
     * With compression.
     * @throws Exception if failed
     */
    @Test
    public void compression() throws Exception {
        ModelLoader loaded = generateJava("compress");
        ModelWrapper model = loaded.newModel("Compress");
        BinaryStreamFormat<?> support = (BinaryStreamFormat<?>) loaded.newObject("csv", "CompressCsvFormat");

        assertThat(support.getSupportedType(), is((Object) model.unwrap().getClass()));

        BinaryStreamFormat<Object> unsafe = unsafe(support);
        assertThat(unsafe, is(instanceOf(Configurable.class)));

        model.set("value", new Text("hello"));

        ByteArrayOutputStream output = new ByteArrayOutputStream();
        ModelOutput<Object> writer = unsafe.createOutput(model.unwrap().getClass(), "hello", output);
        writer.write(model.unwrap());
        writer.close();

        assertThat(
                scan(dump(new GZIPInputStream(new ByteArrayInputStream(output.toByteArray())))),
                is(Arrays.asList("hello")));

        Object buffer = loaded.newModel("Compress").unwrap();
        ModelInput<Object> reader = unsafe.createInput(model.unwrap().getClass(), "hello", in(output),
                0, size(output));
        assertThat(reader.readTo(buffer), is(true));
        assertThat(buffer, is(model.unwrap()));
        assertThat(reader.readTo(buffer), is(false));
    }

    /**
     * with header.
     * @throws Exception if failed
     */
    @Test
    public void header() throws Exception {
        ModelLoader loaded = generateJava("field_name");
        ModelWrapper model = loaded.newModel("Model");
        BinaryStreamFormat<Object> support = unsafe(loaded.newObject("csv", "ModelCsvFormat"));

        ByteArrayOutputStream output = new ByteArrayOutputStream();
        ModelOutput<Object> writer = support.createOutput(model.unwrap().getClass(), "hello", output);
        model.set("value", new Text("Hello, world!"));
        writer.write(model.unwrap());
        writer.close();

        String[][] results = parse(1, new String(output.toByteArray(), "UTF-8"));
        assertThat(results, is(new String[][] {
                {"title"},
                {"Hello, world!"},
        }));
    }

    /**
     * with implicit field.
     * @throws Exception if failed
     */
    @Test
    public void implicit_field_name() throws Exception {
        ModelLoader loaded = generateJava("implicit_field_name");
        ModelWrapper model = loaded.newModel("Model");
        BinaryStreamFormat<Object> support = unsafe(loaded.newObject("csv", "ModelCsvFormat"));

        ByteArrayOutputStream output = new ByteArrayOutputStream();
        ModelOutput<Object> writer = support.createOutput(model.unwrap().getClass(), "hello", output);
        model.set("value", new Text("Hello, world!"));
        writer.write(model.unwrap());
        writer.close();

        String[][] results = parse(1, new String(output.toByteArray(), "UTF-8"));
        assertThat(results, is(new String[][] {
                {"value"},
                {"Hello, world!"},
        }));
    }

    /**
     * with file name.
     * @throws Exception if failed
     */
    @Test
    public void file_name() throws Exception {
        ModelLoader loaded = generateJava("file_name");
        ModelWrapper model = loaded.newModel("Model");
        ModelWrapper buffer = loaded.newModel("Model");
        BinaryStreamFormat<Object> support = unsafe(loaded.newObject("csv", "ModelCsvFormat"));
        assertThat(support.getMinimumFragmentSize(), is(greaterThan(0L)));

        ByteArrayOutputStream output = new ByteArrayOutputStream();
        ModelOutput<Object> writer = support.createOutput(model.unwrap().getClass(), "hello", output);
        model.set("value", new Text("Hello, world!"));
        writer.write(model.unwrap());
        writer.close();

        ModelInput<Object> reader = support.createInput(model.unwrap().getClass(), "testing", in(output),
                0, size(output));
        assertThat(reader.readTo(buffer.unwrap()), is(true));
        assertThat(buffer.getOption("value"), is((Object) new StringOption("Hello, world!")));
        assertThat(buffer.getOption("path"), is((Object) new StringOption("testing")));
        assertThat(reader.readTo(buffer.unwrap()), is(false));
    }

    /**
     * with line number.
     * @throws Exception if failed
     */
    @Test
    public void line_number() throws Exception {
        ModelLoader loaded = generateJava("line_number");
        ModelWrapper model = loaded.newModel("Model");
        model.set("value", new Text("Hello\nworld!"));
        ModelWrapper buffer = loaded.newModel("Model");
        BinaryStreamFormat<Object> support = unsafe(loaded.newObject("csv", "ModelCsvFormat"));

        ByteArrayOutputStream output = new ByteArrayOutputStream();
        ModelOutput<Object> writer = support.createOutput(model.unwrap().getClass(), "hello", output);
        writer.write(model.unwrap());
        writer.write(model.unwrap());
        writer.close();

        ModelInput<Object> reader = support.createInput(model.unwrap().getClass(), "testing", in(output),
                0, size(output));
        assertThat(reader.readTo(buffer.unwrap()), is(true));
        assertThat(buffer.getOption("value"), is((Object) new StringOption("Hello\nworld!")));
        assertThat(buffer.getOption("number"), is((Object) new IntOption(1)));
        assertThat(reader.readTo(buffer.unwrap()), is(true));
        assertThat(buffer.getOption("value"), is((Object) new StringOption("Hello\nworld!")));
        assertThat(buffer.getOption("number"), is((Object) new IntOption(3)));
        assertThat(reader.readTo(buffer.unwrap()), is(false));
    }

    /**
     * with record number.
     * @throws Exception if failed
     */
    @Test
    public void record_number() throws Exception {
        ModelLoader loaded = generateJava("record_number");
        ModelWrapper model = loaded.newModel("Model");
        model.set("value", new Text("Hello\nworld!"));
        ModelWrapper buffer = loaded.newModel("Model");
        BinaryStreamFormat<Object> support = unsafe(loaded.newObject("csv", "ModelCsvFormat"));

        ByteArrayOutputStream output = new ByteArrayOutputStream();
        ModelOutput<Object> writer = support.createOutput(model.unwrap().getClass(), "hello", output);
        writer.write(model.unwrap());
        writer.write(model.unwrap());
        writer.close();

        ModelInput<Object> reader = support.createInput(model.unwrap().getClass(), "testing", in(output),
                0, size(output));
        assertThat(reader.readTo(buffer.unwrap()), is(true));
        assertThat(buffer.getOption("value"), is((Object) new StringOption("Hello\nworld!")));
        assertThat(buffer.getOption("number"), is((Object) new LongOption(1)));
        assertThat(reader.readTo(buffer.unwrap()), is(true));
        assertThat(buffer.getOption("value"), is((Object) new StringOption("Hello\nworld!")));
        assertThat(buffer.getOption("number"), is((Object) new LongOption(2)));
        assertThat(reader.readTo(buffer.unwrap()), is(false));
    }

    /**
     * with ignored property.
     * @throws Exception if failed
     */
    @Test
    public void ignore() throws Exception {
        ModelLoader loaded = generateJava("ignore");
        ModelWrapper model = loaded.newModel("Model");
        model.set("value", new Text("Hello, world!"));
        model.set("ignored", new Text("ignored"));
        ModelWrapper buffer = loaded.newModel("Model");
        BinaryStreamFormat<Object> support = unsafe(loaded.newObject("csv", "ModelCsvFormat"));

        ByteArrayOutputStream output = new ByteArrayOutputStream();
        ModelOutput<Object> writer = support.createOutput(model.unwrap().getClass(), "hello", output);
        writer.write(model.unwrap());
        writer.close();

        ModelInput<Object> reader = support.createInput(model.unwrap().getClass(), "testing", in(output),
                0, size(output));
        assertThat(reader.readTo(buffer.unwrap()), is(true));
        assertThat(buffer.getOption("value"), is((Object) new StringOption("Hello, world!")));
        assertThat(buffer.getOption("ignored"), is((Object) new StringOption()));
        assertThat(reader.readTo(buffer.unwrap()), is(false));
    }

    /**
     * fragmentation support.
     * @throws Exception if failed
     */
    @Test
    public void fragmentation() throws Exception {
        ModelLoader loaded = generateJava("fragmentation");
        Random random = new Random(12345);
        for (int i = 0; i < 5; i++) {
            fragmentation_attempt(loaded, random);
        }
    }

    /**
     * fragmentation support with hedaer.
     * @throws Exception if failed
     */
    @Test
    public void fragmentation_header() throws Exception {
        ModelLoader loaded = generateJava("fragmentation_header");
        Random random = new Random(12345);
        for (int i = 0; i < 5; i++) {
            fragmentation_attempt(loaded, random);
        }
    }

    private void fragmentation_attempt(ModelLoader loaded, Random random) throws Exception {
        ModelWrapper model = loaded.newModel("Tuple");
        BinaryStreamFormat<?> support = (BinaryStreamFormat<?>) loaded.newObject("csv", "TupleCsvFormat");

        assertThat(support.getSupportedType(), is((Object) model.unwrap().getClass()));

        BinaryStreamFormat<Object> unsafe = unsafe(support);

        ByteArrayOutputStream output = new ByteArrayOutputStream();
        ModelOutput<Object> writer = unsafe.createOutput(model.unwrap().getClass(), "hello", output);
        List<Object> expected = Lists.create();
        for (int line = 0; line < 100; line++) {
            ModelWrapper buffer = loaded.newModel("Tuple");
            buffer.set("f1", new Text("f1:" + (line * 1)));
            buffer.set("f2", new Text("f2:" + random.nextInt()));
            buffer.set("f3", new Text("f3:" + random.nextInt()));
            writer.write(buffer.unwrap());
            expected.add(buffer.unwrap());
        }
        writer.close();
        byte[] bytes = output.toByteArray();

        for (int attempt = 0; attempt < 100; attempt++) {
            List<Object> actual = Lists.create();
            int[] fragment = new int[random.nextInt(100) + 2];
            fragment[0] = output.size();
            for (int i = 1; i < fragment.length; i++) {
                fragment[i] = random.nextInt(output.size() + 1);
            }
            Arrays.sort(fragment);
            int start = 0;
            for (int i = 0; i < fragment.length; i++) {
                int offset = start;
                int length = fragment[i] - offset;
                InputStream in = new ByteArrayInputStream(bytes, offset, bytes.length - offset);
                in.mark(bytes.length - offset);
                ModelInput<Object> reader = unsafe.createInput(model.unwrap().getClass(), "hello", in,
                        offset, length);
                try {
                    while (true) {
                        Object buffer = loaded.newModel("Tuple").unwrap();
                        if (reader.readTo(buffer) == false) {
                            break;
                        }
                        actual.add(buffer);
                    }
                } catch (CsvFormatException e) {
                    InputStream reIn = new ByteArrayInputStream(bytes, offset, bytes.length - offset);
                    InputStream copy = new DelimiterRangeInputStream(reIn, '\n', length, offset > 0);
                    System.out.println(copy.read());
                    copy.close();
                    throw new IOException(MessageFormat.format(
                            "attempt={0}, f-offset={1}, f-size={2}, total={3}: [[{4}]]",
                            attempt,
                            offset,
                            length,
                            bytes.length,
                            new String(bytes, offset, length, "UTF-8")), e);
                }
                start = fragment[i];
            }
            assertThat(actual, is(expected));
        }
    }

    /**
     * fragmentation is restricted.
     * @throws Exception if failed
     */
    @Test
    public void fragmentation_restricted() throws Exception {
        ModelLoader loaded = generateJava("fragmentation_restricted");
        ModelWrapper model = loaded.newModel("Tuple");
        BinaryStreamFormat<?> support = (BinaryStreamFormat<?>) loaded.newObject("csv", "TupleCsvFormat");
        BinaryStreamFormat<Object> unsafe = unsafe(support);

        model.set("f1", new Text("Hello1"));
        model.set("f2", new Text("Hello1"));
        model.set("f3", new Text("Hello1"));

        ByteArrayOutputStream output = new ByteArrayOutputStream();
        ModelOutput<Object> writer = unsafe.createOutput(model.unwrap().getClass(), "hello", output);
        writer.write(model.unwrap());
        writer.close();

        try {
            unsafe.createInput(model.unwrap().getClass(), "hello", in(output), 1, size(output));
            fail();
        } catch (Exception e) {
            // ok.
        }
    }

    /**
     * Compile with no attributes.
     * @throws Exception if failed
     */
    @Test
    public void no_attributes() throws Exception {
        ModelLoader loaded = generateJava("no_attributes");
        assertThat(loaded.exists("csv", "NoAttributesCsvFormat"), is(false));
    }

    /**
     * with invalid field.
     * @throws Exception if failed
     */
    @Test
    public void invalid_file_name() throws Exception {
        shouldSemanticError("invalid_file_name");
    }

    /**
     * with invalid line number.
     * @throws Exception if failed
     */
    @Test
    public void invalid_line_number() throws Exception {
        shouldSemanticError("invalid_line_number");
    }

    /**
     * with invalid record number.
     * @throws Exception if failed
     */
    @Test
    public void invalid_record_number() throws Exception {
        shouldSemanticError("invalid_record_number");
    }

    private String[][] parse(int columns, String string) {
        CsvConfiguration conf = new CsvConfiguration(
                CsvConfiguration.DEFAULT_CHARSET,
                CsvConfiguration.DEFAULT_HEADER_CELLS,
                CsvConfiguration.DEFAULT_TRUE_FORMAT,
                CsvConfiguration.DEFAULT_FALSE_FORMAT,
                CsvConfiguration.DEFAULT_DATE_FORMAT,
                CsvConfiguration.DEFAULT_DATE_TIME_FORMAT);
        ByteArrayInputStream input = new ByteArrayInputStream(string.getBytes(conf.getCharset()));
        CsvParser parser = new CsvParser(input, string, conf);
        List<String[]> results = Lists.create();
        try {
            StringOption buffer = new StringOption();
            while (parser.next()) {
                String[] line = new String[columns];
                for (int i = 0; i < columns; i++) {
                    parser.fill(buffer);
                    line[i] = buffer.or((String) null);
                }
                parser.endRecord();
                results.add(line);
            }
            parser.close();
        } catch (Exception e) {
            throw new AssertionError(e);
        }
        return results.toArray(new String[results.size()][]);
    }

    /**
     * Compile with invalid attribute.
     * @throws Exception if failed
     */
    @Test
    public void invalid_attribute() throws Exception {
        shouldSemanticError("invalid_attribute");
    }

    @SuppressWarnings("unchecked")
    private BinaryStreamFormat<Object> unsafe(Object support) {
        return (BinaryStreamFormat<Object>) support;
    }

    private ByteArrayInputStream in(ByteArrayOutputStream output) {
        return new ByteArrayInputStream(output.toByteArray());
    }

    private long size(ByteArrayOutputStream output) {
        return output.size();
    }

    private byte[] dump(InputStream input) throws IOException {
        try {
            ByteArrayOutputStream output = new ByteArrayOutputStream();
            byte[] buf = new byte[1024];
            while (true) {
                int read = input.read(buf);
                if (read < 0) {
                    break;
                }
                output.write(buf, 0, read);
            }
            output.close();
            return output.toByteArray();
        } finally {
            input.close();
        }
    }

    private List<String> scan(byte[] bytes) {
        Scanner scanner = new Scanner(new ByteArrayInputStream(bytes), DEFAULT_ENCODING.name());
        try {
            List<String> results = Lists.create();
            while (scanner.hasNextLine()) {
                results.add(scanner.nextLine());
            }
            return results;
        } finally {
            scanner.close();
        }
    }
}
TOP

Related Classes of com.asakusafw.dmdl.directio.csv.driver.CsvFormatEmitterTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.