Package com.asakusafw.compiler.flow.jobflow

Source Code of com.asakusafw.compiler.flow.jobflow.JobflowAnalyzer

/**
* Copyright 2011-2014 Asakusa Framework Team.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.asakusafw.compiler.flow.jobflow;

import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.asakusafw.compiler.common.Precondition;
import com.asakusafw.compiler.flow.ExternalIoDescriptionProcessor;
import com.asakusafw.compiler.flow.FlowCompilingEnvironment;
import com.asakusafw.compiler.flow.Location;
import com.asakusafw.compiler.flow.jobflow.JobflowModel.Delivery;
import com.asakusafw.compiler.flow.jobflow.JobflowModel.Export;
import com.asakusafw.compiler.flow.jobflow.JobflowModel.Import;
import com.asakusafw.compiler.flow.jobflow.JobflowModel.Process;
import com.asakusafw.compiler.flow.jobflow.JobflowModel.Reduce;
import com.asakusafw.compiler.flow.jobflow.JobflowModel.SideData;
import com.asakusafw.compiler.flow.jobflow.JobflowModel.Source;
import com.asakusafw.compiler.flow.jobflow.JobflowModel.Stage;
import com.asakusafw.compiler.flow.jobflow.JobflowModel.Target;
import com.asakusafw.compiler.flow.plan.FlowBlock;
import com.asakusafw.compiler.flow.plan.StageGraph;
import com.asakusafw.compiler.flow.stage.CompiledReduce;
import com.asakusafw.compiler.flow.stage.CompiledShuffle;
import com.asakusafw.compiler.flow.stage.StageModel;
import com.asakusafw.utils.collections.Lists;
import com.asakusafw.utils.collections.Maps;
import com.asakusafw.utils.collections.Sets;
import com.asakusafw.vocabulary.flow.graph.FlowElement;
import com.asakusafw.vocabulary.flow.graph.FlowElementDescription;
import com.asakusafw.vocabulary.flow.graph.FlowElementKind;
import com.asakusafw.vocabulary.flow.graph.InputDescription;
import com.asakusafw.vocabulary.flow.graph.OutputDescription;

/**
* ジョブフローの構造を解析する。
*/
public class JobflowAnalyzer {

    static final Logger LOG = LoggerFactory.getLogger(JobflowAnalyzer.class);

    private final FlowCompilingEnvironment environment;

    private boolean sawError;

    /**
     * インスタンスを生成する。
     * @param environment 環境オブジェクト
     * @throws IllegalArgumentException 引数に{@code null}が指定された場合
     */
    public JobflowAnalyzer(FlowCompilingEnvironment environment) {
        Precondition.checkMustNotBeNull(environment, "environment"); //$NON-NLS-1$
        this.environment = environment;
    }

    /**
     * 現在までにエラーが発生していた場合に{@code true}を返す。
     * @return 現在までにエラーが発生していた場合に{@code true}
     */
    public boolean hasError() {
        return sawError;
    }

    /**
     * 現在までに発生したエラーの情報をクリアする。
     * @see #hasError()
     */
    public void clearError() {
        sawError = false;
    }

    /**
     * 指定のステージグラフを解析し、ジョブフロー全体のステージ構造に関する情報を返す。
     * @param graph ステージグラフ
     * @param stageModels 各ステージの情報
     * @return ジョブフロー全体のステージ構造に関する情報、解析に失敗した場合は{@code null}
     * @throws IllegalArgumentException 引数に{@code null}が指定された場合
     */
    public JobflowModel analyze(StageGraph graph, Collection<StageModel> stageModels) {
        Precondition.checkMustNotBeNull(graph, "graph"); //$NON-NLS-1$
        Precondition.checkMustNotBeNull(stageModels, "stageModels"); //$NON-NLS-1$
        LOG.debug("{}のステージ構造を分析します",
                graph.getInput().getSource().getDescription().getName());

        List<Import> imports = analyzeImports(graph, stageModels);
        List<Export> exports = analyzeExports(graph, stageModels);
        List<Stage> stages = analyzeStages(stageModels);
        if (hasError()) {
            return null;
        }

        resolve(imports, exports, stages);
        if (hasError()) {
            return null;
        }

        return new JobflowModel(
                graph,
                environment.getBatchId(),
                environment.getFlowId(),
                imports,
                exports,
                stages);
    }

    private List<Import> analyzeImports(StageGraph graph, Collection<StageModel> stageModels) {
        assert graph != null;
        assert stageModels != null;
        LOG.debug("入力を解析しています({})", graph.getInput());

        Set<InputDescription> saw = Sets.create();
        List<Import> results = Lists.create();
        for (FlowBlock.Output source : graph.getInput().getBlockOutputs()) {
            FlowElement element = source.getElementPort().getOwner();
            FlowElementDescription desc = element.getDescription();
            if (desc.getKind() != FlowElementKind.INPUT) {
                error("{0}は入力を表現していません", desc);
                continue;
            }
            InputDescription description = (InputDescription) desc;
            saw.add(description);
            ExternalIoDescriptionProcessor proc = environment.getExternals().findProcessor(description);
            if (proc == null) {
                error("{0}は不明なインポーターを利用しています", desc);
                continue;
            }
            Import prologue = new Import(source, description, proc);
            LOG.debug("入力{}が追加されます", prologue);
            results.add(prologue);
        }
        Set<InputDescription> sideData = Sets.create();
        for (StageModel stage : stageModels) {
            sideData.addAll(stage.getSideDataInputs());
        }
        sideData.removeAll(saw);
        for (InputDescription input : sideData) {
            ExternalIoDescriptionProcessor proc = environment.getExternals().findProcessor(input);
            if (proc == null) {
                error("{0}は不明なインポーターを利用しています", input);
                continue;
            }
            Import prologue = new Import(input, proc);
            LOG.debug("サイドデータ入力{}が追加されます", prologue);
            results.add(prologue);
        }
        return results;
    }

    private List<Export> analyzeExports(StageGraph graph, Collection<StageModel> stageModels) {
        assert graph != null;
        assert stageModels != null;
        LOG.debug("出力を解析しています({})", graph.getOutput());

        List<Export> results = Lists.create();
        for (FlowBlock.Input target : graph.getOutput().getBlockInputs()) {
            FlowElement element = target.getElementPort().getOwner();
            FlowElementDescription desc = element.getDescription();
            if (desc.getKind() != FlowElementKind.OUTPUT) {
                error("{0}は出力を表現していません", desc);
                continue;
            }
            OutputDescription description = (OutputDescription) desc;
            ExternalIoDescriptionProcessor proc = environment.getExternals().findProcessor(description);
            if (proc == null) {
                error("{0}は不明なエクスポーターを利用しています", desc);
                continue;
            }
            Export epilogue = new Export(
                    Collections.singletonList(target),
                    description,
                    proc);
            results.add(epilogue);
            LOG.debug("出力{}が追加されます", epilogue);
        }
        return results;
    }

    private List<Stage> analyzeStages(Collection<StageModel> stageModels) {
        assert stageModels != null;
        List<Stage> results = Lists.create();
        for (StageModel model : sort(stageModels)) {
            results.add(analyzeStage(model));
        }
        return results;
    }

    private Stage analyzeStage(StageModel model) {
        assert model != null;
        LOG.debug("{}を解析しています", model);
        List<Process> processes = analyzeProcesses(model);
        List<Delivery> deliveries = analyzeDeliveries(model);
        Set<SideData> sideData = analyzeSideData(model);
        Reduce reduce = analyzeReduce(model);
        Stage stage = new Stage(
                model,
                processes,
                deliveries,
                reduce,
                sideData);
        LOG.debug("{}が追加されます", model);
        return stage;
    }

    private Reduce analyzeReduce(StageModel model) {
        if (model.getShuffleModel() == null) {
            assert model.getReduceUnits().isEmpty();
            return null;
        }
        assert model.getReduceUnits().isEmpty() == false;
        CompiledShuffle shuffle = model.getShuffleModel().getCompiled();
        CompiledReduce reducer = model.getReduceUnits().get(0).getCompiled();
        return new Reduce(
                reducer.getReducerType().getQualifiedName(),
                reducer.getCombinerTypeOrNull() == null
                    ? null
                    : reducer.getCombinerTypeOrNull().getQualifiedName(),
                shuffle.getKeyTypeName(),
                shuffle.getValueTypeName(),
                shuffle.getGroupComparatorTypeName(),
                shuffle.getSortComparatorTypeName(),
                shuffle.getPartitionerTypeName());
    }

    private List<Delivery> analyzeDeliveries(StageModel model) {
        assert model != null;
        Location base = environment.getStageLocation(model.getStageBlock().getStageNumber());
        List<Delivery> deliveries = Lists.create();
        for (StageModel.Sink sink : model.getStageResults()) {
            Location location = base.append(sink.getName()).asPrefix();
            deliveries.add(new Delivery(sink.getOutputs(), Collections.singleton(location)));
        }
        return deliveries;
    }

    private List<Process> analyzeProcesses(StageModel model) {
        List<Process> processes = Lists.create();
        for (StageModel.MapUnit unit : model.getMapUnits()) {
            processes.add(new Process(
                    unit.getInputs(),
                    unit.getCompiled().getQualifiedName()));
        }
        return processes;
    }

    private Set<SideData> analyzeSideData(StageModel model) {
        assert model != null;
        Set<SideData> results = Sets.create();
        for (InputDescription input : model.getSideDataInputs()) {
            ExternalIoDescriptionProcessor proc = environment.getExternals().findProcessor(input);
            if (proc == null) {
                error("{0}は不明なインポーターを利用しています", input);
                continue;
            }
            Set<Location> locations = proc.getInputInfo(input).getLocations();
            results.add(new SideData(locations, input.getName()));
        }
        return results;
    }

    private List<StageModel> sort(Collection<StageModel> stageModels) {
        List<StageModel> models = Lists.from(stageModels);
        Collections.sort(models, new Comparator<StageModel>() {
            @Override
            public int compare(StageModel o1, StageModel o2) {
                int s1 = o1.getStageBlock().getStageNumber();
                int s2 = o2.getStageBlock().getStageNumber();
                if (s1 == s2) {
                    return 0;
                }
                if (s1 < s2) {
                    return -1;
                }
                return +1;
            }
        });
        return models;
    }

    private void resolve(List<Import> imports, List<Export> exports, List<Stage> stages) {
        assert imports != null;
        assert exports != null;
        assert stages != null;
        Map<FlowBlock.Output, Source> sources = createOutputMap(imports, stages);
        for (Target target : exports) {
            resolveTarget(target, sources);
        }
        for (Stage stage : stages) {
            for (Target target : stage.getProcesses()) {
                resolveTarget(target, sources);
            }
        }
    }

    private void resolveTarget(Target target, Map<FlowBlock.Output, Source> sources) {
        assert target != null;
        assert sources != null;
        Set<Source> opposites = Sets.create();
        for (FlowBlock.Input input : target.getInputs()) {
            for (FlowBlock.Connection conn : input.getConnections()) {
                FlowBlock.Output upstream = conn.getUpstream();
                Source source = sources.get(upstream);
                assert source != null;
                opposites.add(source);
            }
        }
        target.resolveSources(opposites);
    }

    private Map<FlowBlock.Output, Source> createOutputMap(
            List<Import> imports,
            List<Stage> stages) {
        assert imports != null;
        assert stages != null;
        Map<FlowBlock.Output, Source> sources = Maps.create();
        for (Source source : imports) {
            for (FlowBlock.Output output : source.getOutputs()) {
                sources.put(output, source);
            }
        }
        for (Stage stage : stages) {
            for (Source source : stage.getDeliveries()) {
                for (FlowBlock.Output output : source.getOutputs()) {
                    sources.put(output, source);
                }
            }
        }
        return sources;
    }

    private void error(String format, Object...args) {
        environment.error(format, args);
        sawError = true;
    }
}
TOP

Related Classes of com.asakusafw.compiler.flow.jobflow.JobflowAnalyzer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.