Package com.asakusafw.compiler.windgate

Source Code of com.asakusafw.compiler.windgate.WindGateIoProcessor

/**
* Copyright 2011-2014 Asakusa Framework Team.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.asakusafw.compiler.windgate;

import java.io.IOException;
import java.io.OutputStream;
import java.text.MessageFormat;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.TreeMap;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.asakusafw.compiler.common.JavaName;
import com.asakusafw.compiler.flow.ExternalIoCommandProvider;
import com.asakusafw.compiler.flow.ExternalIoDescriptionProcessor;
import com.asakusafw.compiler.flow.Location;
import com.asakusafw.compiler.flow.jobflow.CompiledStage;
import com.asakusafw.compiler.flow.jobflow.ExternalIoStage;
import com.asakusafw.compiler.flow.mapreduce.parallel.ParallelSortClientEmitter;
import com.asakusafw.compiler.flow.mapreduce.parallel.ResolvedSlot;
import com.asakusafw.compiler.flow.mapreduce.parallel.Slot;
import com.asakusafw.compiler.flow.mapreduce.parallel.SlotResolver;
import com.asakusafw.runtime.stage.input.TemporaryInputFormat;
import com.asakusafw.runtime.stage.output.TemporaryOutputFormat;
import com.asakusafw.utils.collections.Lists;
import com.asakusafw.utils.collections.Maps;
import com.asakusafw.vocabulary.external.ExporterDescription;
import com.asakusafw.vocabulary.external.ImporterDescription;
import com.asakusafw.vocabulary.flow.graph.InputDescription;
import com.asakusafw.vocabulary.flow.graph.OutputDescription;
import com.asakusafw.vocabulary.windgate.Constants;
import com.asakusafw.vocabulary.windgate.WindGateExporterDescription;
import com.asakusafw.vocabulary.windgate.WindGateImporterDescription;
import com.asakusafw.vocabulary.windgate.WindGateProcessDescription;
import com.asakusafw.windgate.core.DriverScript;
import com.asakusafw.windgate.core.GateScript;
import com.asakusafw.windgate.core.ProcessScript;
import com.asakusafw.windgate.core.vocabulary.FileProcess;

/**
* Processes WindGate vocabularies.
* @since 0.2.2
*/
public class WindGateIoProcessor extends ExternalIoDescriptionProcessor {

    static final Logger LOG = LoggerFactory.getLogger(WindGateIoProcessor.class);

    /**
     * The module name of WindGate.
     */
    public static final String MODULE_NAME = "windgate";

    private static final String CMD_PROCESS = "windgate/bin/process.sh";

    private static final String CMD_FINALIZE = "windgate/bin/finalize.sh";

    private static final String OPT_IMPORT = "import";

    private static final String OPT_EXPORT = "export";

    private static final String PATTERN_SCRIPT_LOCATION = "META-INF/windgate/{0}-{1}.properties";

    static final String OPT_BEGIN = "begin";

    static final String OPT_END = "end";

    static final String OPT_ONESHOT = "oneshot";

    @Override
    public String getId() {
        return MODULE_NAME;
    }

    @Override
    public Class<? extends ImporterDescription> getImporterDescriptionType() {
        return WindGateImporterDescription.class;
    }

    @Override
    public Class<? extends ExporterDescription> getExporterDescriptionType() {
        return WindGateExporterDescription.class;
    }

    @Override
    public boolean validate(List<InputDescription> inputs, List<OutputDescription> outputs) {
        LOG.debug("Validating WindGate Vocabularies (batch={}, flow={})",
                getEnvironment().getBatchId(),
                getEnvironment().getFlowId());
        boolean valid = true;
        for (InputDescription input : inputs) {
            WindGateImporterDescription desc = extract(input);
            try {
                validateCommonProperties(desc);
                if (desc.getDriverScript() == null) {
                    throw new IllegalStateException(MessageFormat.format(
                            "Driver script is not defined: {0}",
                            desc.getClass().getName()));
                }
            } catch (IllegalStateException e) {
                getEnvironment().error(
                        "Importer description \"{0}\" is invalid (batch={1}, flow={2}): {3}",
                        input.getName(),
                        getEnvironment().getBatchId(),
                        getEnvironment().getFlowId(),
                        e.getMessage());
                valid = false;
            }
        }
        for (OutputDescription output : outputs) {
            WindGateExporterDescription desc = extract(output);
            try {
                validateCommonProperties(desc);
                if (desc.getDriverScript() == null) {
                    throw new IllegalStateException(MessageFormat.format(
                            "Driver script is not defined: {0}",
                            desc.getClass().getName()));
                }
            } catch (IllegalStateException e) {
                getEnvironment().error(
                        "Exporter description \"{0}\" is invalid (batch={1}, flow={2}): {3}",
                        output.getName(),
                        getEnvironment().getBatchId(),
                        getEnvironment().getFlowId(),
                        e.getMessage());
                valid = false;
            }
        }
        return valid;
    }

    private void validateCommonProperties(WindGateProcessDescription desc) {
        String profileName = desc.getProfileName();
        if (profileName == null) {
            throw new IllegalStateException(MessageFormat.format(
                    "{1} must not be null: {0}",
                    desc.getClass().getName(),
                    "getProfileName()"));
        }
        if (profileName.isEmpty()) {
            throw new IllegalStateException(MessageFormat.format(
                    "{1} must not be empty string: {0}",
                    desc.getClass().getName(),
                    "getProfileName()"));
        }
    }

    @Override
    public SourceInfo getInputInfo(InputDescription description) {
        Set<Location> locations = Collections.singleton(getInputLocation(description));
        return new SourceInfo(locations, TemporaryInputFormat.class);
    }

    @Override
    public List<ExternalIoStage> emitEpilogue(IoContext context) throws IOException {
        if (context.getOutputs().isEmpty()) {
            return Collections.emptyList();
        }
        LOG.debug("Emitting epilogue stages for WindGate (batch={}, flow={})",
                getEnvironment().getBatchId(),
                getEnvironment().getFlowId());

        List<Slot> slots = Lists.create();
        for (Output output : context.getOutputs()) {
            Slot slot = toSlot(output);
            slots.add(slot);
        }
        List<ResolvedSlot> resolved = new SlotResolver(getEnvironment()).resolve(slots);
        if (getEnvironment().hasError()) {
            return Collections.emptyList();
        }

        ParallelSortClientEmitter emitter = new ParallelSortClientEmitter(getEnvironment());
        CompiledStage stage = emitter.emit(
                MODULE_NAME,
                resolved,
                getEnvironment().getEpilogueLocation(MODULE_NAME));

        return Collections.singletonList(new ExternalIoStage(getId(), stage, context.getOutputContext()));
    }

    private Slot toSlot(Output output) {
        assert output != null;
        String name = normalize(output.getDescription().getName());
        return new Slot(
                name,
                output.getDescription().getDataType(),
                Collections.<String>emptyList(),
                output.getSources(),
                TemporaryOutputFormat.class);
    }

    private Location getInputLocation(InputDescription description) {
        assert description != null;
        String name = normalize(description.getName());
        return getEnvironment()
            .getPrologueLocation(MODULE_NAME)
            .append(name);
    }

    private Location getOutputLocation(OutputDescription description) {
        assert description != null;
        String name = normalize(description.getName());
        return getEnvironment()
            .getEpilogueLocation(MODULE_NAME)
            .append(name)
            .asPrefix();
    }

    private String normalize(String name) {
        assert name != null;
        assert name.trim().isEmpty() == false;
        String memberName = JavaName.of(name).toMemberName();
        StringBuilder buf = new StringBuilder();
        for (char c : memberName.toCharArray()) {
            if (('A' <= c && c <= 'Z')
                    || ('a' <= c && c <= 'z')
                    || ('0' <= c && c <= '9')) {
                buf.append(c);
            }
        }
        if (buf.length() == 0) {
            buf.append("0");
        }
        return buf.toString();
    }

    @Override
    public void emitPackage(IoContext context) throws IOException {
        LOG.debug("Emitting process scripts for WindGate (batch={}, flow={})",
                getEnvironment().getBatchId(),
                getEnvironment().getFlowId());
        Map<String, GateScript> importers = toImporterScripts(context.getInputs());
        Map<String, GateScript> exporters = toExporterScripts(context.getOutputs());

        for (Map.Entry<String, GateScript> entry : importers.entrySet()) {
            String script = getScriptLocation(true, entry.getKey());
            LOG.debug("Emitting importer script {} (batch={}, flow={})", new Object[] {
                    script,
                    getEnvironment().getBatchId(),
                    getEnvironment().getFlowId(),
            });
            emitScript(script, entry.getValue());
        }
        for (Map.Entry<String, GateScript> entry : exporters.entrySet()) {
            String script = getScriptLocation(false, entry.getKey());
            LOG.debug("Emitting importer script {} (batch={}, flow={})", new Object[] {
                    script,
                    getEnvironment().getBatchId(),
                    getEnvironment().getFlowId(),
            });
            emitScript(script, entry.getValue());
        }
    }

    static String getScriptLocation(boolean importer, String profileName) {
        assert profileName != null;
        return MessageFormat.format(
                PATTERN_SCRIPT_LOCATION,
                importer ? OPT_IMPORT : OPT_EXPORT,
                profileName);
    }

    private Map<String, GateScript> toImporterScripts(Collection<Input> inputs) {
        assert inputs != null;
        Map<String, List<ProcessScript<?>>> processes = Maps.create();
        for (Input input : inputs) {
            String profileName = extract(input.getDescription()).getProfileName();
            ProcessScript<?> process = toProcessScript(input);
            Maps.addToList(processes, profileName, process);
        }
        return toGateScripts(processes);
    }

    private Map<String, GateScript> toExporterScripts(Collection<Output> outputs) {
        assert outputs != null;
        Map<String, List<ProcessScript<?>>> processes = Maps.create();
        for (Output output : outputs) {
            String profileName = extract(output.getDescription()).getProfileName();
            ProcessScript<?> process = toProcessScript(output);
            Maps.addToList(processes, profileName, process);
        }
        return toGateScripts(processes);
    }

    private ProcessScript<?> toProcessScript(Input input) {
        assert input != null;
        WindGateImporterDescription desc = extract(input.getDescription());
        String location = getInputLocation(input.getDescription()).toPath('/');
        DriverScript drain = new DriverScript(
                Constants.HADOOP_FILE_RESOURCE_NAME,
                Collections.singletonMap(FileProcess.FILE.key(), location));
        return createProcessScript(
                input.getDescription().getName(),
                desc.getModelType(),
                desc.getDriverScript(),
                drain);
    }

    private ProcessScript<?> toProcessScript(Output output) {
        assert output != null;
        WindGateExporterDescription desc = extract(output.getDescription());
        String location = getOutputLocation(output.getDescription()).toPath('/');
        DriverScript source = new DriverScript(
                Constants.HADOOP_FILE_RESOURCE_NAME,
                Collections.singletonMap(FileProcess.FILE.key(), location));
        return createProcessScript(
                output.getDescription().getName(),
                desc.getModelType(),
                source,
                desc.getDriverScript());
    }

    private <T> ProcessScript<T> createProcessScript(
            String profileName,
            Class<T> modelType,
            DriverScript source,
            DriverScript drain) {
        assert profileName != null;
        assert modelType != null;
        assert source != null;
        assert drain != null;
        return new ProcessScript<T>(
                profileName,
                Constants.DEFAULT_PROCESS_NAME,
                modelType,
                source,
                drain);
    }

    private Map<String, GateScript> toGateScripts(Map<String, List<ProcessScript<?>>> processes) {
        assert processes != null;
        Map<String, GateScript> results = new TreeMap<String, GateScript>();
        for (Map.Entry<String, List<ProcessScript<?>>> entry : processes.entrySet()) {
            results.put(entry.getKey(), new GateScript(entry.getKey(), entry.getValue()));
        }
        return results;
    }

    private void emitScript(String path, GateScript script) throws IOException {
        assert path != null;
        assert script != null;
        Properties properties = new Properties();
        script.storeTo(properties);

        OutputStream output = getEnvironment().openResource(null, path);
        try {
            properties.store(output, getEnvironment().getTargetId());
        } finally {
            output.close();
        }
    }

    private WindGateImporterDescription extract(InputDescription description) {
        assert description != null;
        ImporterDescription importer = description.getImporterDescription();
        assert importer != null;
        assert importer instanceof WindGateImporterDescription;
        return (WindGateImporterDescription) importer;
    }

    private WindGateExporterDescription extract(OutputDescription description) {
        assert description != null;
        ExporterDescription exporter = description.getExporterDescription();
        assert exporter != null;
        assert exporter instanceof WindGateExporterDescription;
        return (WindGateExporterDescription) exporter;
    }

    @Override
    public ExternalIoCommandProvider createCommandProvider(IoContext context) {
        Map<String, IoContextBuilder> importers = Maps.create();
        for (Input input : context.getInputs()) {
            WindGateImporterDescription desc = extract(input.getDescription());
            add(importers, desc.getProfileName(), input);
        }
        Map<String, IoContextBuilder> exporters = Maps.create();
        for (Output output : context.getOutputs()) {
            WindGateExporterDescription desc = extract(output.getDescription());
            add(exporters, desc.getProfileName(), output);
        }
        return new CommandProvider(
                getEnvironment().getBatchId(),
                getEnvironment().getFlowId(),
                build(importers),
                build(exporters));
    }

    private void add(Map<String, IoContextBuilder> targets, String target, Input input) {
        IoContextBuilder builder = targets.get(target);
        if (builder == null) {
            builder = new IoContextBuilder();
            targets.put(target, builder);
        }
        builder.addInput(input);
    }

    private void add(Map<String, IoContextBuilder> targets, String target, Output output) {
        IoContextBuilder builder = targets.get(target);
        if (builder == null) {
            builder = new IoContextBuilder();
            targets.put(target, builder);
        }
        builder.addOutput(output);
    }

    private Map<String, IoContext> build(Map<String, IoContextBuilder> builders) {
        Map<String, IoContext> results = new TreeMap<String, IoContext>();
        for (Map.Entry<String, IoContextBuilder> entry : builders.entrySet()) {
            results.put(entry.getKey(), entry.getValue().build());
        }
        return results;
    }

    static ExternalIoCommandProvider findRelated(List<ExternalIoCommandProvider> commands) {
        for (ExternalIoCommandProvider provider : commands) {
            if (provider instanceof CommandProvider) {
                return provider;
            }
        }
        return null;
    }

    static String resolveProfileName(String profileName) {
        assert profileName != null;
        return profileName;
    }

    static String resolveModuleName(String profileName) {
        assert profileName != null;
        return MessageFormat.format("{0}.{1}", MODULE_NAME, profileName); //$NON-NLS-1$
    }

    /**
     * Provides lifecycle commands for WindGate.
     */
    public static class CommandProvider extends ExternalIoCommandProvider {

        private static final long serialVersionUID = -3916072136449360144L;

        private final String batchId;

        private final String flowId;

        private final Map<String, IoContext> importers;

        private final Map<String, IoContext> exporters;

        CommandProvider(
                String batchId, String flowId, Map<String, IoContext> importers, Map<String, IoContext> exporters) {
            assert batchId != null;
            assert flowId != null;
            assert importers != null;
            assert exporters != null;
            this.batchId = batchId;
            this.flowId = flowId;
            this.importers = new TreeMap<String, IoContext>(importers);
            this.exporters = new TreeMap<String, IoContext>(exporters);
        }

        @Override
        public String getName() {
            return MODULE_NAME;
        }

        @Override
        public List<Command> getImportCommand(CommandContext context) {
            List<Command> results = Lists.create();
            for (Map.Entry<String, IoContext> entry : importers.entrySet()) {
                String profile = entry.getKey();
                List<String> commands = Lists.create();
                commands.add(context.getHomePathPrefix() + CMD_PROCESS);
                commands.add(profile);
                if (exporters.containsKey(profile)) {
                    commands.add(OPT_BEGIN);
                } else {
                    commands.add(OPT_ONESHOT);
                }
                commands.add("classpath:" + getScriptLocation(true, profile));
                commands.add(batchId);
                commands.add(flowId);
                commands.add(context.getExecutionId());
                commands.add(context.getVariableList());
                results.add(new Command(
                        String.format("%s%s%04d", MODULE_NAME, '.', results.size()),
                        commands,
                        resolveModuleName(profile),
                        resolveProfileName(profile),
                        getEnvironment(context),
                        entry.getValue().getInputContext()));
            }
            return results;
        }

        @Override
        public List<Command> getExportCommand(CommandContext context) {
            List<Command> results = Lists.create();
            for (Map.Entry<String, IoContext> entry : exporters.entrySet()) {
                String profile = entry.getKey();
                List<String> commands = Lists.create();
                commands.add(context.getHomePathPrefix() + CMD_PROCESS);
                commands.add(profile);
                if (importers.containsKey(profile)) {
                    commands.add(OPT_END);
                } else {
                    commands.add(OPT_ONESHOT);
                }
                commands.add("classpath:" + getScriptLocation(false, profile));
                commands.add(batchId);
                commands.add(flowId);
                commands.add(context.getExecutionId());
                commands.add(context.getVariableList());
                results.add(new Command(
                        String.format("%s%s%04d", MODULE_NAME, '.', results.size()),
                        commands,
                        resolveModuleName(profile),
                        resolveProfileName(profile),
                        getEnvironment(context),
                        entry.getValue().getOutputContext()));
            }
            return results;
        }

        @Override
        public List<Command> getFinalizeCommand(CommandContext context) {
            Map<String, IoContextBuilder> union = new TreeMap<String, IoContextBuilder>();
            for (Map.Entry<String, IoContext> entry : importers.entrySet()) {
                add(union, entry.getKey(), entry.getValue());
            }
            for (Map.Entry<String, IoContext> entry : exporters.entrySet()) {
                add(union, entry.getKey(), entry.getValue());
            }
            List<Command> results = Lists.create();
            for (Map.Entry<String, IoContextBuilder> entry : union.entrySet()) {
                String profile = entry.getKey();
                List<String> commands = Lists.create();
                commands.add(context.getHomePathPrefix() + CMD_FINALIZE);
                commands.add(profile);
                commands.add(batchId);
                commands.add(flowId);
                commands.add(context.getExecutionId());
                results.add(new Command(
                        String.format("%s%s%04d", MODULE_NAME, '.', results.size()),
                        commands,
                        resolveModuleName(profile),
                        resolveProfileName(profile),
                        getEnvironment(context),
                        entry.getValue().build()));
            }
            return results;
        }

        private void add(Map<String, IoContextBuilder> union, String key, IoContext value) {
            IoContextBuilder builder = union.get(key);
            if (builder == null) {
                builder = new IoContextBuilder();
                union.put(key, builder);
            }
            for (Input input : value.getInputs()) {
                builder.addInput(input);
            }
            for (Output output : value.getOutputs()) {
                builder.addOutput(output);
            }
        }

        private Map<String, String> getEnvironment(CommandContext context) {
            return Collections.emptyMap();
        }
    }
}
TOP

Related Classes of com.asakusafw.compiler.windgate.WindGateIoProcessor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.