Package com.asakusafw.directio.hive.orc

Source Code of com.asakusafw.directio.hive.orc.OrcFileFormatTest

/**
* Copyright 2011-2014 Asakusa Framework Team.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.asakusafw.directio.hive.orc;

import static org.hamcrest.Matchers.*;
import static org.junit.Assert.*;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.io.orc.CompressionKind;
import org.apache.hadoop.hive.ql.io.orc.OrcFile;
import org.junit.Assume;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;

import com.asakusafw.directio.hive.serde.DataModelDescriptorEditor;
import com.asakusafw.directio.hive.serde.DataModelMapping.ExceptionHandlingStrategy;
import com.asakusafw.directio.hive.serde.DataModelMapping.FieldMappingStrategy;
import com.asakusafw.directio.hive.serde.FieldPropertyDescriptor;
import com.asakusafw.directio.hive.serde.ValueSerde;
import com.asakusafw.directio.hive.serde.mock.MockSimple;
import com.asakusafw.runtime.directio.Counter;
import com.asakusafw.runtime.directio.DirectInputFragment;
import com.asakusafw.runtime.directio.hadoop.StripedDataFormat;
import com.asakusafw.runtime.io.ModelInput;
import com.asakusafw.runtime.io.ModelOutput;
import com.asakusafw.runtime.value.IntOption;
import com.asakusafw.runtime.value.StringOption;

/**
* Test for {@link OrcFileFormat}.
*/
public class OrcFileFormatTest {

    /**
     * A temporary folder for testing.
     */
    @Rule
    public final TemporaryFolder folder = new TemporaryFolder();

    private <T> OrcFileFormat<T> format(Class<T> type, String... removes) {
        return format(type, Collections.<String, ValueSerde>emptyMap(), removes);
    }

    private <T> OrcFileFormat<T> format(
            Class<T> type,
            Map<String, ? extends ValueSerde> edits,
            String... removes) {
        OrcFileFormat<T> format = new OrcFileFormat<T>(
                "testing",
                new OrcFormatConfiguration(),
                new DataModelDescriptorEditor(FieldPropertyDescriptor.extract(type))
                    .editAll(edits)
                    .removeAll(Arrays.asList(removes))
                    .build());
        format.setConf(new org.apache.hadoop.conf.Configuration());
        return format;
    }

    /**
     * Test method for {@link AbstractOrcFileFormat#getFormatName()}.
     */
    @Test
    public void format_name() {
        assertThat(format(MockSimple.class).getFormatName(), equalTo("ORC"));
    }

    /**
     * Test method for {@link AbstractOrcFileFormat#getSupportedType()}.
     */
    @Test
    public void supported_type() {
        assertThat(format(MockSimple.class).getSupportedType(), equalTo((Object) MockSimple.class));
    }

    /**
     * {@code tblproperties} for default settings.
     */
    @Test
    public void table_properties_default() {
        Map<String, String> props = format(MockSimple.class).getTableProperties();
        assertThat(props.size(), is(2));
        assertThat(props, hasEntry("orc.compress", "SNAPPY"));
        assertThat(props, hasEntry("orc.stripe.size", String.valueOf(64L * 1024 * 1024)));
    }

    /**
     * {@code tblproperties} for custom settings.
     */
    @Test
    public void table_properties_custom() {
        long stripeSize = 99L * 1024 * 1024;
        OrcFileFormat<MockSimple> format = format(MockSimple.class);
        format.getFormatConfiguration()
            .withFormatVersion(OrcFile.Version.V_0_11)
            .withCompressionKind(CompressionKind.ZLIB)
            .withStripeSize(stripeSize);
        Map<String, String> props = format.getTableProperties();
        assertThat(props.size(), is(2));
        assertThat(props, hasEntry("orc.compress", "ZLIB"));
        assertThat(props, hasEntry("orc.stripe.size", String.valueOf(stripeSize)));
    }

    /**
     * simple I/O.
     * @throws Exception if failed
     */
    @Test
    public void io_simple() throws Exception {
        OrcFileFormat<MockSimple> format = format(MockSimple.class);
        MockSimple in = new MockSimple(100, "Hello, world!");
        MockSimple out = restore(format, in);
        assertThat(out.number, is(in.number));
        assertThat(out.string, is(in.string));
    }

    /**
     * I/O with projection.
     * @throws Exception if failed
     */
    @Test
    public void io_projection() throws Exception {
        OrcFileFormat<MockSimple> format1 = format(MockSimple.class);
        OrcFileFormat<MockSimple> format2 = format(MockSimple.class, "string");
        format2.getFormatConfiguration()
            .withFieldMappingStrategy(FieldMappingStrategy.NAME)
            .withOnMissingTarget(ExceptionHandlingStrategy.IGNORE);

        MockSimple in = new MockSimple(100, "Hello, world!");
        File file = save(format1, Arrays.asList(in));
        List<MockSimple> restored = load(format2, file);

        assertThat(restored, hasSize(1));
        MockSimple out = restored.get(0);
        assertThat(out.number, is(in.number));
        assertThat(out.string, is(new StringOption())); // null
    }

    /**
     * I/O with fragment.
     * @throws Exception if failed
     */
    @Test
    public void io_fragment() throws Exception {
        File file = folder.newFile();
        Assume.assumeThat(file.delete() || file.exists() == false, is(true));

        OrcFileFormat<MockSimple> format = format(MockSimple.class);
        LocalFileSystem fs = FileSystem.getLocal(format.getConf());
        ModelOutput<MockSimple> output = format.createOutput(
                MockSimple.class,
                fs, new Path(file.toURI()),
                new Counter());
        try {
            output.write(new MockSimple(100, "Hello, world!"));
        } finally {
            output.close();
        }
        assertThat(file.exists(), is(true));

        FileStatus stat = fs.getFileStatus(new Path(file.toURI()));
        List<DirectInputFragment> fragments = format.computeInputFragments(new StripedDataFormat.InputContext(
                MockSimple.class,
                Arrays.asList(stat), fs,
                -1L, -1L,
                false, false));

        assertThat(fragments, hasSize(1));
        DirectInputFragment first = fragments.get(0);

        ModelInput<MockSimple> input = format.createInput(
                MockSimple.class,
                fs, new Path(first.getPath()),
                first.getOffset(), first.getSize(),
                new Counter());
        try {
            MockSimple buf = new MockSimple();
            assertThat(input.readTo(buf), is(true));
            assertThat(buf.number, is(new IntOption(100)));
            assertThat(buf.string, is(new StringOption("Hello, world!")));

            assertThat(input.readTo(buf), is(false));
        } finally {
            input.close();
        }
    }


    /**
     * I/O with {@code 0.11}.
     * @throws Exception if failed
     */
    @Test
    public void io_v_0_11() throws Exception {
        OrcFileFormat<MockSimple> format = format(MockSimple.class);
        format.getFormatConfiguration().withFormatVersion(OrcFile.Version.V_0_11);
        MockSimple in = new MockSimple(100, "Hello, world!");
        MockSimple out = restore(format, in);
        assertThat(out.number, is(in.number));
        assertThat(out.string, is(in.string));
    }

    private <T> T restore(OrcFileFormat<T> format, T value) throws IOException, InterruptedException {
        List<T> in = new ArrayList<T>();
        in.add(value);
        return restore(format, in).get(0);
    }

    private <T> List<T> restore(OrcFileFormat<T> format, List<T> values) throws IOException, InterruptedException {
        File file = save(format, values);
        List<T> results = load(format, file);
        assertThat(values, hasSize(results.size()));
        return results;
    }

    private <T> File save(OrcFileFormat<T> format, List<T> values) throws IOException, InterruptedException {
        File file = folder.newFile();
        Assume.assumeThat(file.delete() || file.exists() == false, is(true));
        LocalFileSystem fs = FileSystem.getLocal(format.getConf());
        ModelOutput<T> output = format.createOutput(
                format.getSupportedType(),
                fs, new Path(file.toURI()),
                new Counter());
        try {
            for (T value : values) {
                output.write(value);
            }
        } finally {
            output.close();
        }
        assertThat(file.exists(), is(true));
        return file;
    }

    private <T> List<T> load(OrcFileFormat<T> format, File file) throws IOException, InterruptedException {
        LocalFileSystem fs = FileSystem.getLocal(format.getConf());
        ModelInput<T> input = format.createInput(
                format.getSupportedType(),
                fs, new Path(file.toURI()),
                0, file.length(),
                new Counter());
        try {
            List<T> results = new ArrayList<T>();
            while (true) {
                @SuppressWarnings("unchecked")
                T value = (T) format.getDataModelDescriptor().createDataModelObject();
                if (input.readTo(value) == false) {
                    break;
                }
                results.add(value);
            }
            return results;
        } finally {
            input.close();
        }
    }
}
TOP

Related Classes of com.asakusafw.directio.hive.orc.OrcFileFormatTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.