Package com.asakusafw.runtime.stage.output

Source Code of com.asakusafw.runtime.stage.output.StageOutputDriver

/**
* Copyright 2011-2014 Asakusa Framework Team.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.asakusafw.runtime.stage.output;

import java.io.IOException;
import java.lang.reflect.Method;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskInputOutputContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.ReflectionUtils;

import com.asakusafw.runtime.compatibility.JobCompatibility;
import com.asakusafw.runtime.core.Result;
import com.asakusafw.runtime.flow.ResultOutput;
import com.asakusafw.runtime.stage.StageOutput;

/**
* ステージ出力を設定するためのドライバ。
* <p>
* 現在のところ、{@link FileOutputFormat}およびそのサブクラスに関するもののみを取り扱う。
* </p>
*/
public class StageOutputDriver {

    static final Log LOG = LogFactory.getLog(StageOutputDriver.class);

    private static final String K_NAMES = "com.asakusafw.stage.output.names";

    private static final String K_FORMAT_PREFIX = "com.asakusafw.stage.output.format.";

    private static final String K_KEY_PREFIX = "com.asakusafw.stage.output.key.";

    private static final String K_VALUE_PREFIX = "com.asakusafw.stage.output.value.";

    private static final String COUNTER_GROUP = "com.asakusafw.stage.output.RecordCounters";

    private final Map<String, ResultOutput<?>> resultSinks;

    private final TaskInputOutputContext<?, ?, ?, ?> context;

    /**
     * インスタンスを生成する。
     * @param context 現在のタスク試行コンテキスト
     * @throws IOException 出力の初期化に失敗した場合
     * @throws InterruptedException 出力の初期化に失敗した場合
     * @throws IllegalArgumentException 引数に{@code null}が含まれる場合
     */
    public StageOutputDriver(
            TaskInputOutputContext<?, ?, ?, ?> context) throws IOException, InterruptedException {
        if (context == null) {
            throw new IllegalArgumentException("context must not be null"); //$NON-NLS-1$
        }
        this.context = context;
        this.resultSinks = prepareSinks(context);
    }

    private static Map<String, ResultOutput<?>> prepareSinks(TaskInputOutputContext<?, ?, ?, ?> context) {
        assert context != null;
        Map<String, ResultOutput<?>> results = new HashMap<String, ResultOutput<?>>();
        Configuration conf = context.getConfiguration();
        for (String name : conf.getStringCollection(K_NAMES)) {
            results.put(name, null);
        }
        return results;
    }

    private static final String METHOD_SET_OUTPUT_NAME = "setOutputName";

    private void setOutputFilePrefix(JobContext localContext, String name) throws IOException {
        assert localContext != null;
        assert name != null;
        try {
            Method method = FileOutputFormat.class.getDeclaredMethod(
                    METHOD_SET_OUTPUT_NAME, JobContext.class, String.class);
            method.setAccessible(true);
            method.invoke(null, localContext, name);
        } catch (Exception e) {
            throw new IOException(MessageFormat.format(
                    "Failed to configure output name of \"{0}\" ([MAPREDUCE-370] may be not applied)",
                    name), e);
        }
    }

    /**
     * 指定の名前を持つ出力のシンクオブジェクトを返す。
     * <p>
     * ここに指定する名前は、ジョブの起動時にあらかじめ
     * {@link #set(Job, String, Collection)}で登録しておく必要がある。
     * </p>
     * @param <T> 出力の型
     * @param name 出力の名前
     * @return 対応するシンクオブジェクト
     * @throws IOException 出力の作成に失敗した場合
     * @throws InterruptedException 出力の作成時に割り込みが発行された場合
     * @throws IllegalArgumentException 引数に{@code null}が含まれる場合
     */
    @SuppressWarnings("unchecked")
    public synchronized <T extends Writable> Result<T> getResultSink(
            String name) throws IOException, InterruptedException {
        if (name == null) {
            throw new IllegalArgumentException("name must not be null"); //$NON-NLS-1$
        }
        if (resultSinks.containsKey(name) == false) {
            throw new IllegalArgumentException(MessageFormat.format(
                    "Output \"{0}\" is not declared",
                    name));
        }
        ResultOutput<?> sink = resultSinks.get(name);
        if (sink == null) {
            sink = buildSink(name);
            resultSinks.put(name, sink);
        }
        return (Result<T>) sink;
    }

    private ResultOutput<?> buildSink(String name) throws IOException, InterruptedException {
        assert name != null;
        Configuration conf = context.getConfiguration();
        @SuppressWarnings("rawtypes")
        Class<? extends OutputFormat> formatClass = conf.getClass(
                getPropertyName(K_FORMAT_PREFIX, name),
                null,
                OutputFormat.class);
        Class<?> keyClass = conf.getClass(getPropertyName(K_KEY_PREFIX, name), null);
        Class<?> valueClass = conf.getClass(getPropertyName(K_VALUE_PREFIX, name), null);

        if (formatClass == null) {
            throw new IllegalStateException(MessageFormat.format(
                    "OutputFormat is not declared for output \"{0}\"",
                    name));
        }
        if (keyClass == null) {
            throw new IllegalStateException(MessageFormat.format(
                    "Output key type is not declared for output \"{0}\"",
                    name));
        }
        if (valueClass == null) {
            throw new IllegalStateException(MessageFormat.format(
                    "Output value type is not declared for output \"{0}\"",
                    name));
        }

        List<Counter> counters = getCounters(name);
        if (TemporaryOutputFormat.class.isAssignableFrom(formatClass)) {
            return buildTemporarySink(name, valueClass, counters);
        } else {
            return buildNormalSink(name, formatClass, keyClass, valueClass, counters);
        }
    }

    private List<Counter> getCounters(String name) {
        assert name != null;
        try {
            List<Counter> results = new ArrayList<Counter>();
            results.add(JobCompatibility.getTaskOutputRecordCounter(context));
            results.add(context.getCounter(COUNTER_GROUP, name));
            return results;
        } catch (RuntimeException e) {
            LOG.warn("Failed to create counters", e);
            return Collections.emptyList();
        }
    }

    private ResultOutput<?> buildTemporarySink(
            String name,
            Class<?> valueClass,
            List<Counter> counters) throws IOException, InterruptedException {
        assert context != null;
        assert name != null;
        assert valueClass != null;
        assert counters != null;
        TemporaryOutputFormat<?> format = new TemporaryOutputFormat<Object>();
        RecordWriter<?, ?> writer = format.createRecordWriter(context, name, valueClass);
        return new ResultOutput<Writable>(context, writer, counters);
    }

    private ResultOutput<?> buildNormalSink(
            String name,
            @SuppressWarnings("rawtypes") Class<? extends OutputFormat> formatClass,
            Class<?> keyClass,
            Class<?> valueClass,
            List<Counter> counters) throws IOException, InterruptedException {
        assert context != null;
        assert name != null;
        assert formatClass != null;
        assert keyClass != null;
        assert valueClass != null;
        assert counters != null;
        Job job = JobCompatibility.newJob(context.getConfiguration());
        job.setOutputFormatClass(formatClass);
        job.setOutputKeyClass(keyClass);
        job.setOutputValueClass(valueClass);
        TaskAttemptContext localContext = JobCompatibility.newTaskAttemptContext(
                job.getConfiguration(),
                context.getTaskAttemptID());
        if (FileOutputFormat.class.isAssignableFrom(formatClass)) {
            setOutputFilePrefix(localContext, name);
        }
        OutputFormat<?, ?> format = ReflectionUtils.newInstance(
                formatClass,
                localContext.getConfiguration());
        RecordWriter<?, ?> writer = format.getRecordWriter(localContext);
        return new ResultOutput<Writable>(localContext, writer);
    }

    /**
     * 現在の出力を破棄する。
     * @throws IOException 出力のフラッシュに失敗した場合
     * @throws InterruptedException 出力の破棄に割り込みが発行された場合
     * @throws IllegalArgumentException 引数に{@code null}が含まれる場合
     */
    public synchronized void close() throws IOException, InterruptedException {
        for (Map.Entry<String, ResultOutput<?>> entry : resultSinks.entrySet()) {
            ResultOutput<?> output = entry.getValue();
            if (output != null) {
                output.close();
                entry.setValue(null);
            }
        }
    }

    /**
     * Sets the output specification for this job.
     * @param job current job
     * @param outputPath base output path
     * @param outputList each output information
     * @throws IOException if failed to configure the output specification
     * @throws IllegalArgumentException if some parameters were {@code null}
     * @since 0.2.5
     */
    public static void set(Job job, String outputPath, Collection<StageOutput> outputList) throws IOException {
        if (job == null) {
            throw new IllegalArgumentException("job must not be null"); //$NON-NLS-1$
        }
        if (outputPath == null) {
            throw new IllegalArgumentException("outputPath must not be null"); //$NON-NLS-1$
        }
        if (outputList == null) {
            throw new IllegalArgumentException("outputList must not be null"); //$NON-NLS-1$
        }
        List<StageOutput> brigeOutputs = new ArrayList<StageOutput>();
        List<StageOutput> normalOutputs = new ArrayList<StageOutput>();
        boolean sawFileOutput = false;
        boolean sawTemporaryOutput = false;
        for (StageOutput output : outputList) {
            Class<? extends OutputFormat<?, ?>> formatClass = output.getFormatClass();
            if (BridgeOutputFormat.class.isAssignableFrom(formatClass)) {
                brigeOutputs.add(output);
            } else {
                normalOutputs.add(output);
            }
        }
        if (brigeOutputs.isEmpty() == false) {
            BridgeOutputFormat.set(job, brigeOutputs);
        }
        for (StageOutput output : normalOutputs) {
            String name = output.getName();
            Class<?> keyClass = output.getKeyClass();
            Class<?> valueClass = output.getValueClass();
            Class<? extends OutputFormat<?, ?>> formatClass = output.getFormatClass();
            sawFileOutput |= FileOutputFormat.class.isAssignableFrom(formatClass);
            sawTemporaryOutput |= TemporaryOutputFormat.class.isAssignableFrom(formatClass);
            addOutput(job, name, formatClass, keyClass, valueClass);
        }
        if (sawFileOutput) {
            FileOutputFormat.setOutputPath(job, new Path(outputPath));
        }
        if (sawTemporaryOutput) {
            TemporaryOutputFormat.setOutputPath(job, new Path(outputPath));
        }
    }

    private static void addOutput(
            Job job,
            String name,
            Class<?> formatClass,
            Class<?> keyClass,
            Class<?> valueClass) {
        assert job != null;
        assert name != null;
        assert formatClass != null;
        assert keyClass != null;
        assert valueClass != null;
        if (isValidName(name) == false) {
            throw new IllegalArgumentException(MessageFormat.format(
                    "Output name \"{0}\" is not valid",
                    name));
        }
        Configuration conf = job.getConfiguration();
        Set<String> names = new TreeSet<String>(conf.getStringCollection(K_NAMES));
        if (names.contains(name)) {
            throw new IllegalArgumentException(MessageFormat.format(
                    "Output name \"{0}\" is already declared",
                    name));
        }
        names.add(name);
        conf.setStrings(K_NAMES, names.toArray(new String[names.size()]));
        conf.setClass(getPropertyName(K_FORMAT_PREFIX, name), formatClass, OutputFormat.class);
        conf.setClass(getPropertyName(K_KEY_PREFIX, name), keyClass, Object.class);
        conf.setClass(getPropertyName(K_VALUE_PREFIX, name), valueClass, Object.class);
    }

    private static String getPropertyName(String prefix, String name) {
        assert prefix != null;
        assert name != null;
        return prefix + name;
    }

    private static boolean isValidName(String name) {
        assert name != null;
        for (char c : name.toCharArray()) {
            if (isValidNameChar(c) == false) {
                return false;
            }
        }
        return true;
    }

    private static boolean isValidNameChar(char c) {
        return ('0' <= c && c <= '9') || ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z');
    }
}
TOP

Related Classes of com.asakusafw.runtime.stage.output.StageOutputDriver

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.