Package io.crate.operation.scalar

Source Code of io.crate.operation.scalar.SubstrFunction

/*
* Licensed to CRATE Technology GmbH ("Crate") under one or more contributor
* license agreements.  See the NOTICE file distributed with this work for
* additional information regarding copyright ownership.  Crate licenses
* this file to you under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.  You may
* obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
* License for the specific language governing permissions and limitations
* under the License.
*
* However, if you have executed another commercial license agreement
* with Crate these terms will supersede the license and you may use the
* software solely pursuant to the terms of the relevant commercial agreement.
*/

package io.crate.operation.scalar;

import com.google.common.base.Preconditions;
import io.crate.metadata.*;
import io.crate.operation.Input;
import io.crate.planner.symbol.Function;
import io.crate.planner.symbol.Literal;
import io.crate.planner.symbol.Symbol;
import io.crate.types.DataType;
import io.crate.types.DataTypes;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.lucene.BytesRefs;

import javax.annotation.Nonnull;
import java.util.List;

public class SubstrFunction extends Scalar<BytesRef, Object> implements DynamicFunctionResolver {

    public static final String NAME = "substr";
    private FunctionInfo info;
    private static final BytesRef EMPTY_BYTES_REF = new BytesRef("");

    public SubstrFunction() {}
    public SubstrFunction(FunctionInfo info) {
        this.info = info;
    }

    public static void register(ScalarFunctionModule module) {
        module.register(NAME, new SubstrFunction());
    }

    private static FunctionInfo createInfo(List<DataType> types) {
        return new FunctionInfo(new FunctionIdent(NAME, types), DataTypes.STRING);
    }

    @Override
    public FunctionInfo info() {
        return info;
    }

    @Override
    public BytesRef evaluate(Input[] args) {
        assert (args.length >= 2 && args.length <= 3);
        final Object val = args[0].value();
        if (val == null) {
            return null;
        }
        if (args.length == 3) {
            return evaluate(BytesRefs.toBytesRef(val),
                    ((Number) args[1].value()).intValue(),
                    ((Number) args[2].value()).intValue());

        }
        return evaluate(BytesRefs.toBytesRef(val), ((Number) args[1].value()).intValue());
    }

    private static BytesRef evaluate(@Nonnull BytesRef inputStr, int beginIdx) {
        final int startPos = Math.max(0, beginIdx - 1);
        if (startPos > inputStr.length - 1) {
            return EMPTY_BYTES_REF;
        }
        int endPos = inputStr.length;
        return substring(inputStr, startPos, endPos);
    }

    private static BytesRef evaluate(@Nonnull BytesRef inputStr, int beginIdx, int len) {
        final int startPos = Math.max(0, beginIdx - 1);
        if (startPos > inputStr.length - 1) {
            return EMPTY_BYTES_REF;
        }
        int endPos = inputStr.length;
        if (startPos + len < endPos) {
            endPos = startPos + len;
        }
        return substring(inputStr, startPos, endPos);
    }

    public static BytesRef substring(BytesRef utf8, int begin, int end) {
        int pos = utf8.offset;
        final int limit = pos + utf8.length;
        final byte[] bytes = utf8.bytes;
        int posBegin = pos;

        int codePointCount = 0;
        for (; pos < limit; codePointCount++) {
            if (codePointCount == begin) {
                posBegin = pos;
            }
            if (codePointCount == end) {
                break;
            }

            int v = bytes[pos] & 0xFF;
            if (v <   /* 0xxx xxxx */ 0x80) {
                pos += 1;
                continue;
            }
            if (v >=  /* 110x xxxx */ 0xc0) {
                if (v < /* 111x xxxx */ 0xe0) {
                    pos += 2;
                    continue;
                }
                if (v < /* 1111 xxxx */ 0xf0) {
                    pos += 3;
                    continue;
                }
                if (v < /* 1111 1xxx */ 0xf8) {
                    pos += 4;
                    continue;
                }
                // fallthrough, consider 5 and 6 byte sequences invalid.
            }

            // Anything not covered above is invalid UTF8.
            throw new IllegalArgumentException("substr: invalid UTF8 string found.");
        }

        // Check if we didn't go over the limit on the last character.
        if (pos > limit) throw new IllegalArgumentException();
        return new BytesRef(bytes, posBegin, pos - posBegin);
    }

    @Override
    public Symbol normalizeSymbol(Function symbol) {
        final int size = symbol.arguments().size();
        assert (size >= 2 && size <= 3);

        final Symbol input = symbol.arguments().get(0);
        final Symbol beginIdx = symbol.arguments().get(1);

        if (anyNonLiterals(input, beginIdx, symbol.arguments())) {
            return symbol;
        }

        final Object inputValue = ((Input) input).value();
        final Object beginIdxValue = ((Input) beginIdx).value();
        if (inputValue == null || beginIdxValue == null) {
            return Literal.NULL;
        }
        if (size == 3) {
            if (((Input)symbol.arguments().get(2)).value() == null) {
                return Literal.NULL;
            }
            return Literal.newLiteral(
                    evaluate(BytesRefs.toBytesRef(inputValue),
                    ((Number) ((Input) beginIdx).value()).intValue(),
                    ((Number) ((Input) symbol.arguments().get(2)).value()).intValue()));
        }
        return Literal.newLiteral(evaluate(
                BytesRefs.toBytesRef(inputValue),
                ((Number) ((Input) beginIdx).value()).intValue()));
    }

    private static boolean anyNonLiterals(Symbol input, Symbol beginIdx, List<Symbol> arguments) {
        return !input.symbolType().isValueSymbol() ||
                !beginIdx.symbolType().isValueSymbol() ||
                (arguments.size() == 3 && !arguments.get(2).symbolType().isValueSymbol());
    }

    @Override
    public FunctionImplementation<Function> getForTypes(List<DataType> dataTypes) throws IllegalArgumentException {
        Preconditions.checkArgument(dataTypes.size() > 1 && dataTypes.size() < 4 && dataTypes.get(0) == DataTypes.STRING);
        return new SubstrFunction(createInfo(dataTypes));
    }
}
TOP

Related Classes of io.crate.operation.scalar.SubstrFunction

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.