Package com.asakusafw.compiler.fileio

Source Code of com.asakusafw.compiler.fileio.HadoopFileIoProcessorTest

/**
* Copyright 2011-2014 Asakusa Framework Team.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.asakusafw.compiler.fileio;

import static org.hamcrest.Matchers.*;
import static org.junit.Assert.*;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.jar.JarFile;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.SequenceFile;
import org.junit.Assume;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TestWatcher;
import org.junit.runner.Description;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.asakusafw.compiler.fileio.external.Ex1MockExporterDescription;
import com.asakusafw.compiler.fileio.flow.IndependentOutExporterDesc;
import com.asakusafw.compiler.fileio.flow.IndependentOutputJob;
import com.asakusafw.compiler.fileio.flow.InvalidFileNameOutputJob;
import com.asakusafw.compiler.fileio.flow.MissingPathOutputJob;
import com.asakusafw.compiler.fileio.flow.MixedInputJob;
import com.asakusafw.compiler.fileio.flow.MultipleOutputJob;
import com.asakusafw.compiler.fileio.flow.NestedOutExporterDesc;
import com.asakusafw.compiler.fileio.flow.NestedOutputJob;
import com.asakusafw.compiler.fileio.flow.NormalInputJob;
import com.asakusafw.compiler.fileio.flow.Out1ExporterDesc;
import com.asakusafw.compiler.fileio.flow.Out2ExporterDesc;
import com.asakusafw.compiler.fileio.flow.Out3ExporterDesc;
import com.asakusafw.compiler.fileio.flow.Out4ExporterDesc;
import com.asakusafw.compiler.fileio.flow.RootOutputJob;
import com.asakusafw.compiler.fileio.flow.SingleOutputJob;
import com.asakusafw.compiler.fileio.flow.SingularOutputJob;
import com.asakusafw.compiler.fileio.flow.TinyInputJob;
import com.asakusafw.compiler.fileio.model.Ex1;
import com.asakusafw.compiler.fileio.model.Ex2;
import com.asakusafw.compiler.flow.FlowCompilerOptions.GenericOptionValue;
import com.asakusafw.compiler.flow.Location;
import com.asakusafw.compiler.testing.JobflowInfo;
import com.asakusafw.compiler.util.tester.CompilerTester;
import com.asakusafw.runtime.io.ModelOutput;
import com.asakusafw.runtime.io.sequencefile.SequenceFileModelOutput;
import com.asakusafw.runtime.value.IntOption;
import com.asakusafw.vocabulary.external.FileExporterDescription;

/**
* Test for {@link HadoopFileIoProcessor}.
*/
public class HadoopFileIoProcessorTest {

    static final Logger LOG = LoggerFactory.getLogger(HadoopFileIoProcessorTest.class);

    static volatile boolean enableOutput = false;

    /**
     * Test helper.
     */
    @Rule
    public final CompilerTester tester = new CompilerTester();

    /**
     * Run only enable output.
     */
    @Rule
    public final TestWatcher check = new TestWatcher() {

        @Override
        protected void starting(Description description) {
            Assume.assumeTrue(description.getMethodName().startsWith("output_") == false || enableOutput);
        }
    };

    /**
     * Checks Hadoop environment.
     */
    @BeforeClass
    public static void checkHadoop() {
        enableOutput = hasMapreduce370();
    }

    private static boolean hasMapreduce370() {
        try {
            File corelib = findHadoopCoreLib();
            if (corelib != null) {
                String classFile = "org/apache/hadoop/mapreduce/lib/output/MultipleOutputs.class";
                LOG.debug("Searching MultipleOutputs");
                JarFile jar = new JarFile(corelib);
                try {
                    boolean found = jar.getJarEntry(classFile) != null;
                    LOG.debug("Searched MultipleOutputs: found={}", found);
                    return found;
                } finally {
                    jar.close();
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return false;
    }

    private static File findHadoopCoreLib() {
        String hadoop = System.getenv("HADOOP_HOME");
        if (hadoop != null) {
            File home = new File(hadoop);
            for (File file : home.listFiles()) {
                String name = file.getName();
                if (name.startsWith("hadoop-core-") && name.endsWith(".jar")) {
                    LOG.debug("hadoop-core.jar is found on: {}", file.getAbsoluteFile());
                    return file;
                }
            }
        }
        LOG.debug("hadoop-core.jar is not found");
        return null;
    }

    /**
     * validated.
     * @throws Exception if failed
     */
    @Test
    public void validate() throws Exception {
        tester.options().putExtraAttribute(
                HadoopFileIoProcessor.OPTION_EXPORTER_ENABLED,
                GenericOptionValue.ENABLED.getSymbol());
        tester.compileJobflow(SingleOutputJob.class);
    }

    /**
     * missing path.
     * @throws Exception expected
     */
    @Test(expected = IOException.class)
    public void validate_invalid_compiler_option() throws Exception {
        tester.options().putExtraAttribute(
                HadoopFileIoProcessor.OPTION_EXPORTER_ENABLED,
                "__INVALID__");
        tester.compileJobflow(SingleOutputJob.class);
    }

    /**
     * missing path.
     * @throws Exception expected
     */
    @Test(expected = IOException.class)
    public void validate_missing_path() throws Exception {
        tester.options().putExtraAttribute(
                HadoopFileIoProcessor.OPTION_EXPORTER_ENABLED,
                GenericOptionValue.ENABLED.getSymbol());
        tester.compileJobflow(MissingPathOutputJob.class);
    }

    /**
     * invalid file name.
     * @throws Exception expected
     */
    @Test(expected = IOException.class)
    public void validate_inavalid_file_name() throws Exception {
        tester.options().putExtraAttribute(
                HadoopFileIoProcessor.OPTION_EXPORTER_ENABLED,
                GenericOptionValue.ENABLED.getSymbol());
        tester.compileJobflow(InvalidFileNameOutputJob.class);
    }

    /**
     * singular path.
     * @throws Exception expected
     */
    @Test(expected = IOException.class)
    public void validate_singular_file() throws Exception {
        tester.options().putExtraAttribute(
                HadoopFileIoProcessor.OPTION_EXPORTER_ENABLED,
                GenericOptionValue.ENABLED.getSymbol());
        tester.compileJobflow(SingularOutputJob.class);
    }

    /**
     * root folder.
     * @throws Exception expected
     */
    @Test(expected = IOException.class)
    public void validate_root() throws Exception {
        tester.options().putExtraAttribute(
                HadoopFileIoProcessor.OPTION_EXPORTER_ENABLED,
                GenericOptionValue.ENABLED.getSymbol());
        tester.compileJobflow(RootOutputJob.class);
    }

    /**
     * Input single dataset.
     * @throws Exception if failed
     */
    @Test
    public void input_single() throws Exception {
        JobflowInfo info = tester.compileJobflow(NormalInputJob.class);

        ModelOutput<Ex1> s10 = openOutput(Ex1.class, Location.fromPath("target/testing/in/normal1-0", '/'));
        writeEx1(s10, 1, 2);

        ModelOutput<Ex1> s20 = openOutput(Ex1.class, Location.fromPath("target/testing/in/normal2-0", '/'));
        writeEx1(s20, 3, 4, 5);

        ModelOutput<Ex1> s21 = openOutput(Ex1.class, Location.fromPath("target/testing/in/normal2-1", '/'));
        writeEx1(s21, 6, 7, 8, 9);

        assertThat(tester.run(info), is(true));
        checkResults(1, 2, 3, 4, 5, 6, 7, 8, 9);
    }

    /**
     * Input a tiny dataset.
     * @throws Exception if failed
     */
    @Test
    public void input_tiny() throws Exception {
        tester.options().setHashJoinForTiny(true);
        JobflowInfo info = tester.compileJobflow(TinyInputJob.class);

        ModelOutput<Ex2> s10 = openOutput(Ex2.class, Location.fromPath("target/testing/in/tiny1-0", '/'));
        writeEx2(s10, 1, 2);

        ModelOutput<Ex2> s20 = openOutput(Ex2.class, Location.fromPath("target/testing/in/tiny2-0", '/'));
        writeEx2(s20, 3, 4, 5);

        ModelOutput<Ex2> s21 = openOutput(Ex2.class, Location.fromPath("target/testing/in/tiny2-1", '/'));
        writeEx2(s21, 6, 7, 8, 9);

        assertThat(tester.run(info), is(true));
        checkResults(1, 2, 3, 4, 5, 6, 7, 8, 9);
    }

    /**
     * Input mixed datasets.
     * @throws Exception if failed
     */
    @Test
    public void input_mixed() throws Exception {
        tester.options().setHashJoinForTiny(true);
        JobflowInfo info = tester.compileJobflow(MixedInputJob.class);

        ModelOutput<Ex1> s10 = openOutput(Ex1.class, Location.fromPath("target/testing/in/normal1-0", '/'));
        writeEx1(s10, 1, 2);
        ModelOutput<Ex1> s20 = openOutput(Ex1.class, Location.fromPath("target/testing/in/normal2-0", '/'));
        writeEx1(s20, 3, 4);

        ModelOutput<Ex2> t10 = openOutput(Ex2.class, Location.fromPath("target/testing/in/tiny1-0", '/'));
        writeEx2(t10, 5, 6);
        ModelOutput<Ex2> t20 = openOutput(Ex2.class, Location.fromPath("target/testing/in/tiny2-0", '/'));
        writeEx2(t20, 7, 8);

        assertThat(tester.run(info), is(true));
        checkResults(1, 2, 3, 4, 5, 6, 7, 8);
    }

    private void writeEx1(ModelOutput<Ex1> output, int... sids) throws IOException {
        try {
            Ex1 value = new Ex1();
            for (int sid : sids) {
                value.setSid(sid);
                value.setValue(sid);
                value.setStringAsString(String.valueOf(sid));
                output.write(value);
            }
        } finally {
            output.close();
        }
    }

    private void writeEx2(ModelOutput<Ex2> output, int... sids) throws IOException {
        try {
            Ex2 value = new Ex2();
            for (int sid : sids) {
                value.setSid(sid);
                value.setValue(sid);
                value.setStringAsString(String.valueOf(sid));
                output.write(value);
            }
        } finally {
            output.close();
        }
    }

    private void checkResults(int... sids) {
        List<Ex1> list;
        try {
            Ex1MockExporterDescription instance = new Ex1MockExporterDescription();
            list = tester.getList(
                    Ex1.class,
                    Location.fromPath(instance.getPathPrefix(), '/'),
                    new Comparator<Ex1>() {
                        @Override
                        public int compare(Ex1 o1, Ex1 o2) {
                            return o1.getSidOption().compareTo(o2.getSidOption());
                        }
                    });
        } catch (Exception e) {
            throw new AssertionError(e);
        }
        assertThat(list.size(), is(sids.length));
        for (int i = 0; i < sids.length; i++) {
            assertThat(list.get(i).getSid(), is((long) sids[i]));
        }
    }

    /**
     * Fail for MAPREDUCE-370.
     * @throws Exception if failed
     */
    @Test(expected = IOException.class)
    public void mapreduce_370() throws Exception {
        tester.options().putExtraAttribute(
                HadoopFileIoProcessor.OPTION_EXPORTER_ENABLED,
                GenericOptionValue.DISABLED.getSymbol());
        tester.compileJobflow(SingleOutputJob.class);
    }

    /**
     * Output single file set.
     * @throws Exception if failed
     */
    @Test
    public void output_single() throws Exception {
        tester.options().putExtraAttribute(
                HadoopFileIoProcessor.OPTION_EXPORTER_ENABLED,
                GenericOptionValue.ENABLED.getSymbol());
        JobflowInfo info = tester.compileJobflow(SingleOutputJob.class);

        ModelOutput<Ex1> source = tester.openOutput(Ex1.class, tester.getImporter(info, "input"));
        writeTestData(source);
        source.close();

        assertThat(tester.run(info), is(true));

        List<Ex1> out1 = getList(Out1ExporterDesc.class);
        checkSids(out1);
        checlValues(out1, 100);
    }

    /**
     * Output multiple file sets.
     * @throws Exception if failed
     */
    @Test
    public void output_multiple() throws Exception {
        tester.options().putExtraAttribute(
                HadoopFileIoProcessor.OPTION_EXPORTER_ENABLED,
                GenericOptionValue.ENABLED.getSymbol());
        JobflowInfo info = tester.compileJobflow(MultipleOutputJob.class);

        ModelOutput<Ex1> source = tester.openOutput(Ex1.class, tester.getImporter(info, "input"));
        writeTestData(source);
        source.close();

        assertThat(tester.run(info), is(true));

        List<Ex1> out1 = getList(Out1ExporterDesc.class);
        checkSids(out1);
        checlValues(out1, 100);

        List<Ex1> out2 = getList(Out2ExporterDesc.class);
        checkSids(out2);
        checlValues(out2, 200);

        List<Ex1> out3 = getList(Out3ExporterDesc.class);
        checkSids(out3);
        checlValues(out3, 300);

        List<Ex1> out4 = getList(Out4ExporterDesc.class);
        checkSids(out4);
        checlValues(out4, 400);
    }

    /**
     * Output files into nested directory.
     * @throws Exception if failed
     */
    @Test
    public void output_independent() throws Exception {
        tester.options().putExtraAttribute(
                HadoopFileIoProcessor.OPTION_EXPORTER_ENABLED,
                GenericOptionValue.ENABLED.getSymbol());
        JobflowInfo info = tester.compileJobflow(IndependentOutputJob.class);

        ModelOutput<Ex1> source = tester.openOutput(Ex1.class, tester.getImporter(info, "input"));
        writeTestData(source);
        source.close();

        assertThat(tester.run(info), is(true));

        List<Ex1> out1 = getList(Out1ExporterDesc.class);
        checkSids(out1);
        checlValues(out1, 100);

        List<Ex1> out2 = getList(IndependentOutExporterDesc.class);
        checkSids(out2);
        checlValues(out2, 200);
    }

    /**
     * Output files into nested directory.
     * @throws Exception if failed
     */
    @Test
    public void output_nested() throws Exception {
        tester.options().putExtraAttribute(
                HadoopFileIoProcessor.OPTION_EXPORTER_ENABLED,
                GenericOptionValue.ENABLED.getSymbol());
        JobflowInfo info = tester.compileJobflow(NestedOutputJob.class);

        ModelOutput<Ex1> source = tester.openOutput(Ex1.class, tester.getImporter(info, "input"));
        writeTestData(source);
        source.close();

        assertThat(tester.run(info), is(true));

        List<Ex1> out1 = getList(Out1ExporterDesc.class);
        checkSids(out1);
        checlValues(out1, 100);

        List<Ex1> out2 = getList(NestedOutExporterDesc.class);
        checkSids(out2);
        checlValues(out2, 200);
    }

    private <T> ModelOutput<T> openOutput(Class<T> aClass, Location location) throws IOException {
        Configuration conf = tester.configuration();
        Path path = new Path(location.toPath('/'));
        FileSystem fs = path.getFileSystem(conf);
        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, path, NullWritable.class, aClass);
        return new SequenceFileModelOutput<T>(writer);
    }

    private void checkSids(List<Ex1> results) {
        assertThat(results.size(), is(10));
        assertThat(results.get(0).getSidOption().isNull(), is(true));
        for (int i = 1; i < 10; i++) {
            assertThat(results.get(i).getSid(), is((long) i));
        }
    }

    private void checlValues(List<Ex1> results, int value) {
        for (Ex1 ex1 : results) {
            assertThat(ex1.getValueOption(), is(new IntOption(value)));
        }
    }

    private void writeTestData(ModelOutput<Ex1> source) throws IOException {
        Ex1 value = new Ex1();
        source.write(value);
        value.setSid(1);
        source.write(value);
        value.setSid(2);
        source.write(value);
        value.setSid(3);
        source.write(value);
        value.setSid(4);
        source.write(value);
        value.setSid(5);
        source.write(value);
        value.setSid(6);
        source.write(value);
        value.setSid(7);
        source.write(value);
        value.setSid(8);
        source.write(value);
        value.setSid(9);
        source.write(value);
    }

    private List<Ex1> getList(Class<? extends FileExporterDescription> exporter) {
        try {
            FileExporterDescription instance = exporter.newInstance();
            Path path = new Path(Location.fromPath(instance.getPathPrefix(), '/').toString());
            FileSystem fs = path.getFileSystem(tester.configuration());
            FileStatus[] statuses = fs.globStatus(path);
            List<Ex1> results = new ArrayList<Ex1>();
            for (FileStatus status : statuses) {
                SequenceFile.Reader reader = new SequenceFile.Reader(fs, status.getPath(), tester.configuration());
                try {
                    Ex1 model = new Ex1();
                    while (reader.next(NullWritable.get(), model)) {
                        Ex1 copy = new Ex1();
                        copy.copyFrom(model);
                        results.add(copy);
                    }
                } finally {
                    reader.close();
                }
            }
            Collections.sort(results, new Comparator<Ex1>() {
                @Override
                public int compare(Ex1 o1, Ex1 o2) {
                    return o1.getSidOption().compareTo(o2.getSidOption());
                }
            });
            return results;
        } catch (Exception e) {
            throw new AssertionError(e);
        }
    }
}
TOP

Related Classes of com.asakusafw.compiler.fileio.HadoopFileIoProcessorTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.