Package com.asakusafw.compiler.directio

Source Code of com.asakusafw.compiler.directio.DirectFileIoProcessor

/**
* Copyright 2011-2014 Asakusa Framework Team.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.asakusafw.compiler.directio;

import java.io.IOException;
import java.util.Collections;
import java.util.EnumSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;

import org.apache.hadoop.mapreduce.InputFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.asakusafw.compiler.directio.OutputPattern.CompiledOrder;
import com.asakusafw.compiler.directio.OutputPattern.CompiledResourcePattern;
import com.asakusafw.compiler.directio.emitter.NamingClassEmitter;
import com.asakusafw.compiler.directio.emitter.OrderingClassEmitter;
import com.asakusafw.compiler.directio.emitter.Slot;
import com.asakusafw.compiler.directio.emitter.StageEmitter;
import com.asakusafw.compiler.flow.DataClass;
import com.asakusafw.compiler.flow.ExternalIoDescriptionProcessor;
import com.asakusafw.compiler.flow.Location;
import com.asakusafw.compiler.flow.jobflow.CompiledStage;
import com.asakusafw.compiler.flow.jobflow.ExternalIoStage;
import com.asakusafw.compiler.flow.mapreduce.copy.CopierClientEmitter;
import com.asakusafw.compiler.flow.mapreduce.copy.CopyDescription;
import com.asakusafw.runtime.directio.DataFormat;
import com.asakusafw.runtime.directio.DirectDataSourceConstants;
import com.asakusafw.runtime.directio.FilePattern;
import com.asakusafw.runtime.directio.FilePattern.PatternElementKind;
import com.asakusafw.runtime.stage.input.BridgeInputFormat;
import com.asakusafw.runtime.stage.input.TemporaryInputFormat;
import com.asakusafw.runtime.stage.output.TemporaryOutputFormat;
import com.asakusafw.utils.collections.Lists;
import com.asakusafw.utils.collections.Maps;
import com.asakusafw.utils.java.model.syntax.ModelFactory;
import com.asakusafw.utils.java.model.syntax.Name;
import com.asakusafw.utils.java.model.util.Models;
import com.asakusafw.vocabulary.directio.DirectFileInputDescription;
import com.asakusafw.vocabulary.directio.DirectFileOutputDescription;
import com.asakusafw.vocabulary.external.ExporterDescription;
import com.asakusafw.vocabulary.external.ImporterDescription;
import com.asakusafw.vocabulary.flow.graph.InputDescription;
import com.asakusafw.vocabulary.flow.graph.OutputDescription;

/**
* Processes {@link DirectFileInputDescription} and {@link DirectFileOutputDescription}.
* @since 0.2.5
* @version 0.6.1
*/
public class DirectFileIoProcessor extends ExternalIoDescriptionProcessor {

    static final Logger LOG = LoggerFactory.getLogger(DirectFileIoProcessor.class);

    private static final Set<PatternElementKind> INVALID_BASE_PATH_KIND =
            EnumSet.of(PatternElementKind.WILDCARD, PatternElementKind.SELECTION);

    private static final String METHOD_RESOURCE_PATTERN = "getResourcePattern";

    private static final String METHOD_ORDER = "getOrder";

    private static final String MODULE_NAME = "directio";

    private static final Class<? extends InputFormat<?, ?>> INPUT_FORMAT = BridgeInputFormat.class;

    @Override
    public String getId() {
        return MODULE_NAME;
    }

    @Override
    public Class<? extends ImporterDescription> getImporterDescriptionType() {
        return DirectFileInputDescription.class;
    }

    @Override
    public Class<? extends ExporterDescription> getExporterDescriptionType() {
        return DirectFileOutputDescription.class;
    }

    @Override
    public boolean validate(List<InputDescription> inputs, List<OutputDescription> outputs) {
        LOG.debug("Checking Direct I/O vocabularies: batch={}, flow={}",
                getEnvironment().getBatchId(),
                getEnvironment().getFlowId());
        boolean valid = true;
        for (InputDescription input : inputs) {
            LOG.debug("Checking Direct I/O input: {}",
                    input.getName());
            valid &= validateInput(input);
        }
        for (OutputDescription output : outputs) {
            LOG.debug("Checking Direct I/O output: {}",
                    output.getName());
            valid &= validateOutput(output);
        }
        LOG.debug("Checking Direct I/O paths");
        valid &= validatePaths(inputs, outputs);
        return valid;
    }

    private boolean validateInput(InputDescription input) {
        boolean valid = true;
        DirectFileInputDescription desc = extract(input);
        valid &= checkBasePath(desc.getClass(), desc.getBasePath(), "入力ベースパス");
        String pattern = desc.getResourcePattern();
        try {
            FilePattern.compile(pattern);
        } catch (IllegalArgumentException e) {
            getEnvironment().error(
                    "入力リソース名のパターンが不正です ({1}): {0}",
                    e.getMessage(),
                    desc.getClass().getName());
            valid = false;
        }
        valid &= validateFormat(desc.getClass(), desc.getModelType(), desc.getFormat());
        return valid;
    }

    private boolean validateOutput(OutputDescription output) {
        boolean valid = true;
        DirectFileOutputDescription desc = extract(output);
        valid &= checkBasePath(desc.getClass(), desc.getBasePath(), "出力ベースパス");
        DataClass dataType = getEnvironment().getDataClasses().load(desc.getModelType());
        String pattern = desc.getResourcePattern();
        List<CompiledResourcePattern> compiledPattern;
        try {
            compiledPattern = OutputPattern.compileResourcePattern(pattern, dataType);
        } catch (IllegalArgumentException e) {
            getEnvironment().error(
                    "出力リソース名のパターンが不正です ({1}) [{0}]",
                    e.getMessage(),
                    desc.getClass().getName());
            valid = false;
            compiledPattern = Collections.emptyList();
        }

        for (String patternString : desc.getDeletePatterns()) {
            try {
                FilePattern.compile(patternString);
            } catch (IllegalArgumentException e) {
                getEnvironment().error(
                        "削除するリソース名のパターン(\"{2}\")が不正です ({1}) [{0}]",
                        e.getMessage(),
                        desc.getClass().getName(),
                        patternString);
                valid = false;
            }
        }

        List<String> orders = desc.getOrder();
        try {
            OutputPattern.compileOrder(orders, dataType);
        } catch (IllegalArgumentException e) {
            getEnvironment().error(
                    "出力順序の指定が不正です ({1}) [{0}]",
                    e.getMessage(),
                    desc.getClass().getName());
            valid = false;
        }

        Set<OutputPattern.SourceKind> kinds = pickSourceKinds(compiledPattern);
        if (kinds.contains(OutputPattern.SourceKind.ENVIRONMENT)) {
            if (kinds.contains(OutputPattern.SourceKind.PROPERTY)) {
                getEnvironment().error(
                        "出力リソース名にワイルドカードを含む場合、プロパティ ('{'name'}') は指定できません"
                        + " ({1}.{2}()): {0}",
                        pattern,
                        desc.getClass().getName(),
                        METHOD_RESOURCE_PATTERN);
                valid = false;
            }
            if (kinds.contains(OutputPattern.SourceKind.RANDOM)) {
                getEnvironment().error(
                        "出力リソース名にワイルドカードを含む場合、ランダム ([m..n]) は指定できません"
                        + " ({1}.{2}()): {0}",
                        pattern,
                        desc.getClass().getName(),
                        METHOD_RESOURCE_PATTERN);
                valid = false;
            }
            if (orders.isEmpty() == false) {
                getEnvironment().error(
                        "出力リソース名にワイルドカードを含む場合、出力順序は指定できません"
                        + " ({1}.{2}()): {0}",
                        pattern,
                        desc.getClass().getName(),
                        METHOD_ORDER);
                valid = false;
            }
        }

        valid &= validateFormat(desc.getClass(), desc.getModelType(), desc.getFormat());
        return valid;
    }

    private boolean checkBasePath(Class<?> theClass, String basePath, String name) {
        boolean valid = true;
        try {
            FilePattern pattern = FilePattern.compile(basePath);
            if (pattern.containsTraverse()) {
                getEnvironment().error(
                        "{0}にワイルドカード (**) を利用できません: {1}",
                        name,
                        theClass.getName());
                valid = false;
            }
            Set<PatternElementKind> kinds = pattern.getPatternElementKinds();
            for (PatternElementKind kind : kinds) {
                if (INVALID_BASE_PATH_KIND.contains(kind)) {
                    getEnvironment().error(
                            "{0}に \"{1}\" を利用できません: {2}",
                            name,
                            kind.getSymbol(),
                            theClass.getName());
                    valid = false;
                }
            }
        } catch (IllegalArgumentException e) {
            getEnvironment().error(
                    "{0}が不正です ({2}): {1}",
                    name,
                    e.getMessage(),
                    theClass.getName());
            valid = false;
        }
        return valid;
    }

    private boolean validatePaths(List<InputDescription> inputs, List<OutputDescription> outputs) {
        assert inputs != null;
        assert outputs != null;
        boolean valid = true;
        TreeMap<String, InputDescription> inputPaths = new TreeMap<String, InputDescription>();
        for (InputDescription input : inputs) {
            DirectFileInputDescription desc = extract(input);
            String path = normalizePath(desc.getBasePath());
            inputPaths.put(path, input);
        }
        TreeMap<String, OutputDescription> outputPaths = new TreeMap<String, OutputDescription>();
        for (OutputDescription output : outputs) {
            DirectFileOutputDescription desc = extract(output);
            String path = normalizePath(desc.getBasePath());
            for (Map.Entry<String, InputDescription> entry : inputPaths.tailMap(path, true).entrySet()) {
                if (entry.getKey().startsWith(path) == false) {
                    break;
                }
                DirectFileInputDescription other = extract(entry.getValue());
                getEnvironment().error(
                        "入出力のベースパスが衝突しています: {0}[{1}] -> {2}[{3}]",
                        desc.getClass().getName(),
                        desc.getBasePath(),
                        other.getClass().getName(),
                        other.getBasePath());
                valid = false;
            }
            if (outputPaths.containsKey(path)) {
                DirectFileOutputDescription other = extract(outputPaths.get(path));
                getEnvironment().error(
                        "2つの出力のベースパスが重複しています: {0}[{1}] <-> {2}[{3}]",
                        desc.getClass().getName(),
                        desc.getBasePath(),
                        other.getClass().getName(),
                        other.getBasePath());
                valid = false;
            } else {
                outputPaths.put(path, output);
            }
        }
        for (Map.Entry<String, OutputDescription> base : outputPaths.entrySet()) {
            String path = base.getKey();
            DirectFileOutputDescription desc = extract(base.getValue());
            for (Map.Entry<String, OutputDescription> entry : outputPaths.tailMap(path, false).entrySet()) {
                if (entry.getKey().startsWith(path) == false) {
                    break;
                }
                DirectFileOutputDescription other = extract(entry.getValue());
                getEnvironment().error(
                        "2つの出力のベースパスが衝突しています: {0}[{1}] -> {2}[{3}]",
                        desc.getClass().getName(),
                        desc.getBasePath(),
                        other.getClass().getName(),
                        other.getBasePath());
                valid = false;
            }
        }
        return valid;
    }

    private String normalizePath(String path) {
        assert path != null;
        boolean sawSeparator = false;
        StringBuilder buf = new StringBuilder();
        for (int i = 0, n = path.length(); i < n; i++) {
            char c = path.charAt(i);
            if (c == '/') {
                sawSeparator = true;
            } else {
                if (sawSeparator && buf.length() > 0) {
                    buf.append('/');
                }
                sawSeparator = false;
                buf.append(c);
            }
        }
        if (sawSeparator == false) {
            buf.append('/');
        }
        return buf.toString();
    }

    private boolean validateFormat(Class<?> desc, Class<?> model, Class<? extends DataFormat<?>> format) {
        assert desc != null;
        if (format == null) {
            getEnvironment().error(
                    "データフォーマットが指定されていません: {0}",
                    desc.getName());
            return false;
        }
        DataFormat<?> formatObject;
        try {
            formatObject = format.getConstructor().newInstance();
        } catch (Exception e) {
            getEnvironment().error(
                    "データフォーマット\"{1}\"の生成に失敗しました: {0}",
                    desc.getName(),
                    format.getName());
            return false;
        }
        if (formatObject.getSupportedType().isAssignableFrom(model) == false) {
            getEnvironment().error(
                    "データフォーマット\"{2}\"はデータモデル\"{1}\"をサポートしていません: {0}",
                    desc.getName(),
                    model.getName(),
                    format.getName());
            return false;
        }
        return true;
    }

    @Override
    public SourceInfo getInputInfo(InputDescription description) {
        DirectFileInputDescription desc = extract(description);
        if (isCacheTarget(desc)) {
            String outputName = getProcessedInputName(description);
            Location location = getEnvironment().getPrologueLocation(MODULE_NAME).append(outputName).asPrefix();
            return new SourceInfo(Collections.singleton(location), TemporaryInputFormat.class);
        } else {
            return getOriginalInputInfo(description);
        }
    }

    private SourceInfo getOriginalInputInfo(InputDescription description) {
        DirectFileInputDescription desc = extract(description);
        Set<Location> locations = Collections.singleton(
                Location.fromPath("__DIRECTIO__", '/')
                .append(description.getName())
                .append(Location.fromPath(desc.getBasePath(), '/')));
        return new SourceInfo(locations, INPUT_FORMAT, getAttributes(desc));
    }

    private Map<String, String> getAttributes(DirectFileInputDescription desc) {
        Map<String, String> attributes = Maps.create();
        attributes.put(DirectDataSourceConstants.KEY_DATA_CLASS, desc.getModelType().getName());
        attributes.put(DirectDataSourceConstants.KEY_FORMAT_CLASS, desc.getFormat().getName());
        attributes.put(DirectDataSourceConstants.KEY_BASE_PATH, desc.getBasePath());
        attributes.put(DirectDataSourceConstants.KEY_RESOURCE_PATH, desc.getResourcePattern());
        attributes.put(DirectDataSourceConstants.KEY_OPTIONAL, String.valueOf(desc.isOptional()));
        return attributes;
    }

    private String getProcessedInputName(InputDescription description) {
        assert description != null;
        StringBuilder buf = new StringBuilder();
        for (char c : description.getName().toCharArray()) {
            // 0 as escape character
            if ('1' <= c && c <= '9' || 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z') {
                buf.append(c);
            } else if (c <= 0xff) {
                buf.append('0');
                buf.append(String.format("%02x", (int) c));
            } else {
                buf.append("0u");
                buf.append(String.format("%04x", (int) c));
            }
        }
        return buf.toString();
    }

    @Override
    public List<ExternalIoStage> emitPrologue(IoContext context) throws IOException {
        IoContextBuilder builder = new IoContextBuilder();
        List<CopyDescription> targets = Lists.create();
        for (Input input : context.getInputs()) {
            InputDescription description = input.getDescription();
            DirectFileInputDescription desc = extract(description);
            if (isCacheTarget(desc)) {
                LOG.debug("Input will be copied in prologue: {}", description.getName());
                targets.add(new CopyDescription(
                        getProcessedInputName(description),
                        getEnvironment().getDataClasses().load(description.getDataType()),
                        getOriginalInputInfo(description),
                        TemporaryOutputFormat.class));
                builder.addInput(input);
            }
        }
        if (targets.isEmpty()) {
            return Collections.emptyList();
        }
        CopierClientEmitter emitter = new CopierClientEmitter(getEnvironment());
        CompiledStage stage = emitter.emitPrologue(
                MODULE_NAME,
                targets,
                getEnvironment().getPrologueLocation(MODULE_NAME));
        return Collections.singletonList(new ExternalIoStage(getId(), stage, builder.build()));
    }

    @Override
    public List<ExternalIoStage> emitEpilogue(IoContext context) throws IOException {
        ModelFactory f = getEnvironment().getModelFactory();
        NamingClassEmitter namingEmitter = new NamingClassEmitter(getEnvironment(), MODULE_NAME);
        OrderingClassEmitter orderingEmitter = new OrderingClassEmitter(getEnvironment(), MODULE_NAME);
        List<Slot> slots = Lists.create();
        for (Output output : context.getOutputs()) {
            DirectFileOutputDescription desc = extract(output.getDescription());
            DataClass dataType = getEnvironment().getDataClasses().load(desc.getModelType());
            List<CompiledResourcePattern> namingInfo =
                OutputPattern.compileResourcePattern(desc.getResourcePattern(), dataType);
            Set<OutputPattern.SourceKind> kinds = pickSourceKinds(namingInfo);
            if (kinds.contains(OutputPattern.SourceKind.ENVIRONMENT)) {
                assert kinds.contains(OutputPattern.SourceKind.PROPERTY) == false;
                assert kinds.contains(OutputPattern.SourceKind.RANDOM) == false;
                assert desc.getOrder().isEmpty();
                String outputName = output.getDescription().getName();
                Slot slot = new Slot(
                        outputName,
                        output.getSources(),
                        Models.toName(f, desc.getModelType().getName()),
                        desc.getBasePath(),
                        desc.getResourcePattern(),
                        Models.toName(f, desc.getFormat().getName()),
                        null,
                        null,
                        desc.getDeletePatterns());
                slots.add(slot);
            } else {
                List<CompiledOrder> orderingInfo = OutputPattern.compileOrder(desc.getOrder(), dataType);
                String outputName = output.getDescription().getName();
                Name naming = namingEmitter.emit(outputName, slots.size() + 1, dataType, namingInfo);
                Name ordering = orderingEmitter.emit(outputName, slots.size() + 1, dataType, orderingInfo);
                Slot slot = new Slot(
                        outputName,
                        output.getSources(),
                        Models.toName(f, desc.getModelType().getName()),
                        desc.getBasePath(),
                        desc.getResourcePattern(),
                        Models.toName(f, desc.getFormat().getName()),
                        naming,
                        ordering,
                        desc.getDeletePatterns());
                slots.add(slot);
            }
        }
        if (slots.isEmpty()) {
            return Collections.emptyList();
        }
        StageEmitter stageEmitter = new StageEmitter(getEnvironment(), MODULE_NAME);
        CompiledStage result = stageEmitter.emit(slots, getEnvironment().getEpilogueLocation(MODULE_NAME));
        return Collections.singletonList(new ExternalIoStage(getId(), result, context.getOutputContext()));
    }

    private boolean isCacheTarget(ImporterDescription desc) {
        assert desc != null;
        switch (desc.getDataSize()) {
        case TINY:
            return getEnvironment().getOptions().isHashJoinForTiny();
        case SMALL:
            return getEnvironment().getOptions().isHashJoinForSmall();
        default:
            return false;
        }
    }

    private Set<OutputPattern.SourceKind> pickSourceKinds(List<CompiledResourcePattern> fragments) {
        assert fragments != null;
        Set<OutputPattern.SourceKind> results = EnumSet.noneOf(OutputPattern.SourceKind.class);
        for (CompiledResourcePattern fragment : fragments) {
            results.add(fragment.getKind());
        }
        return results;
    }

    private DirectFileInputDescription extract(InputDescription description) {
        assert description != null;
        ImporterDescription importer = description.getImporterDescription();
        assert importer != null;
        assert importer instanceof DirectFileInputDescription;
        return (DirectFileInputDescription) importer;
    }

    private DirectFileOutputDescription extract(OutputDescription description) {
        assert description != null;
        ExporterDescription exporter = description.getExporterDescription();
        assert exporter != null;
        assert exporter instanceof DirectFileOutputDescription;
        return (DirectFileOutputDescription) exporter;
    }
}
TOP

Related Classes of com.asakusafw.compiler.directio.DirectFileIoProcessor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.