Package org.joni

Source Code of org.joni.ArrayCompiler

/*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal in
* the Software without restriction, including without limitation the rights to
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is furnished to do
* so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package org.joni;

import static org.joni.BitStatus.bsAt;
import static org.joni.Option.isDynamic;
import static org.joni.Option.isIgnoreCase;
import static org.joni.Option.isMultiline;
import static org.joni.ast.QuantifierNode.isRepeatInfinite;

import org.jcodings.constants.CharacterType;
import org.joni.ast.AnchorNode;
import org.joni.ast.BackRefNode;
import org.joni.ast.CClassNode;
import org.joni.ast.CTypeNode;
import org.joni.ast.CallNode;
import org.joni.ast.ConsAltNode;
import org.joni.ast.EncloseNode;
import org.joni.ast.Node;
import org.joni.ast.QuantifierNode;
import org.joni.ast.StringNode;
import org.joni.constants.AnchorType;
import org.joni.constants.EncloseType;
import org.joni.constants.NodeType;
import org.joni.constants.OPCode;
import org.joni.constants.OPSize;
import org.joni.constants.TargetInfo;

final class ArrayCompiler extends Compiler {
    private int[]code;
    private int codeLength;

    private byte[][]templates;
    private int templateNum;

    ArrayCompiler(Analyser analyser) {
        super(analyser);
    }

    @Override
    protected final void prepare() {
        int codeSize = Config.USE_STRING_TEMPLATES ? 8 : ((analyser.getEnd() - analyser.getBegin()) * 2 + 2);
        code = new int[codeSize];
        codeLength = 0;
    }

    @Override
    protected final void finish() {
        addOpcode(OPCode.END);
        addOpcode(OPCode.FINISH); // for stack bottom

        regex.code = code;
        regex.codeLength = codeLength;
        regex.templates = templates;
        regex.templateNum = templateNum;
        regex.factory = MatcherFactory.DEFAULT;

        if (Config.USE_SUBEXP_CALL && analyser.env.unsetAddrList != null) {
            analyser.env.unsetAddrList.fix(regex);
            analyser.env.unsetAddrList = null;
        }
    }

    @Override
    protected void compileAltNode(ConsAltNode node) {
        ConsAltNode aln = node;
        int len = 0;

        do {
            len += compileLengthTree(aln.car);
            if (aln.cdr != null) {
                len += OPSize.PUSH + OPSize.JUMP;
            }
        } while ((aln = aln.cdr) != null);

        int pos = codeLength + len;  /* goal position */

        aln = node;
        do {
            len = compileLengthTree(aln.car);
            if (aln.cdr != null) {
                addOpcodeRelAddr(OPCode.PUSH, len + OPSize.JUMP);
            }
            compileTree(aln.car);
            if (aln.cdr != null) {
                len = pos - (codeLength + OPSize.JUMP);
                addOpcodeRelAddr(OPCode.JUMP, len);
            }
        } while ((aln = aln.cdr) != null);
    }

    private boolean isNeedStrLenOpExact(int op) {
        return  op == OPCode.EXACTN         ||
                op == OPCode.EXACTMB2N      ||
                op == OPCode.EXACTMB3N      ||
                op == OPCode.EXACTMBN       ||
                op == OPCode.EXACTN_IC      ||
                op == OPCode.EXACTN_IC_SB;
    }

    private boolean opTemplated(int op) {
        return isNeedStrLenOpExact(op);
    }

    private int selectStrOpcode(int mbLength, int strLength, boolean ignoreCase) {
        int op;

        if (ignoreCase) {
            switch(strLength) {
            case 1: op = enc.toLowerCaseTable() != null ? OPCode.EXACT1_IC_SB : OPCode.EXACT1_IC; break;
            default:op = enc.toLowerCaseTable() != null ? OPCode.EXACTN_IC_SB : OPCode.EXACTN_IC; break;
            } // switch
        } else {
            switch (mbLength) {
            case 1:
                switch (strLength) {
                case 1: op = OPCode.EXACT1; break;
                case 2: op = OPCode.EXACT2; break;
                case 3: op = OPCode.EXACT3; break;
                case 4: op = OPCode.EXACT4; break;
                case 5: op = OPCode.EXACT5; break;
                default:op = OPCode.EXACTN; break;
                } // inner switch
                break;
            case 2:
                switch (strLength) {
                case 1: op = OPCode.EXACTMB2N1; break;
                case 2: op = OPCode.EXACTMB2N2; break;
                case 3: op = OPCode.EXACTMB2N3; break;
                default:op = OPCode.EXACTMB2N;  break;
                } // inner switch
                break;
            case 3:
                op = OPCode.EXACTMB3N;
                break;
            default:
                op = OPCode.EXACTMBN;
            } // switch
        }
        return op;
    }

    private void compileTreeEmptyCheck(Node node, int emptyInfo) {
        int savedNumNullCheck = regex.numNullCheck;

        if (emptyInfo != 0) {
            addOpcode(OPCode.NULL_CHECK_START);
            addMemNum(regex.numNullCheck); /* NULL CHECK ID */
            regex.numNullCheck++;
        }

        compileTree(node);

        if (emptyInfo != 0) {
            switch(emptyInfo) {
            case TargetInfo.IS_EMPTY:
                addOpcode(OPCode.NULL_CHECK_END);
                break;
            case TargetInfo.IS_EMPTY_MEM:
                addOpcode(OPCode.NULL_CHECK_END_MEMST);
                break;
            case TargetInfo.IS_EMPTY_REC:
                addOpcode(OPCode.NULL_CHECK_END_MEMST_PUSH);
                break;
            } // switch

            addMemNum(savedNumNullCheck); /* NULL CHECK ID */
        }
    }

    private int addCompileStringlength(byte[]bytes, int p, int mbLength, int strLength, boolean ignoreCase) {
        int op = selectStrOpcode(mbLength, strLength, ignoreCase);
        int len = OPSize.OPCODE;

        if (Config.USE_STRING_TEMPLATES && opTemplated(op)) {
            // string length, template index, template string pointer
            len += OPSize.LENGTH + OPSize.INDEX + OPSize.INDEX;
        } else {
            if (isNeedStrLenOpExact(op)) len += OPSize.LENGTH;
            len += mbLength * strLength;
        }
        if (op == OPCode.EXACTMBN) len += OPSize.LENGTH;
        return len;
    }

    @Override
    protected final void addCompileString(byte[]bytes, int p, int mbLength, int strLength, boolean ignoreCase) {
        int op = selectStrOpcode(mbLength, strLength, ignoreCase);
        addOpcode(op);

        if (op == OPCode.EXACTMBN) addLength(mbLength);

        if (isNeedStrLenOpExact(op)) {
            if (op == OPCode.EXACTN_IC || op == OPCode.EXACTN_IC_SB) {
                addLength(mbLength * strLength);
            } else {
                addLength(strLength);
            }
        }

        if (Config.USE_STRING_TEMPLATES && opTemplated(op)) {
            addInt(templateNum);
            addInt(p);
            addTemplate(bytes);
        } else {
            addBytes(bytes, p, mbLength * strLength);
        }
    }

    private int compileLengthStringNode(Node node) {
        StringNode sn = (StringNode)node;
        if (sn.length() <= 0) return 0;
        boolean ambig = sn.isAmbig();

        int p, prev;
        p = prev = sn.p;
        int end = sn.end;
        byte[]bytes = sn.bytes;
        int prevLen = enc.length(bytes, p, end);
        p += prevLen;

        int slen = 1;
        int rlen = 0;

        while (p < end) {
            int len = enc.length(bytes, p, end);
            if (len == prevLen) {
                slen++;
            } else {
                int r = addCompileStringlength(bytes, prev, prevLen, slen, ambig);
                rlen += r;
                prev = p;
                slen = 1;
                prevLen = len;
            }
            p += len;
        }
        int r = addCompileStringlength(bytes, prev, prevLen, slen, ambig);
        rlen += r;
        return rlen;
    }

    private int compileLengthStringRawNode(StringNode sn) {
        if (sn.length() <= 0) return 0;
        return addCompileStringlength(sn.bytes, sn.p, 1 /*sb*/, sn.length(), false);
    }

    private void addMultiByteCClass(CodeRangeBuffer mbuf) {
        addLength(mbuf.used);
        addInts(mbuf.p, mbuf.used);
    }

    private int compileLengthCClassNode(CClassNode cc) {
        if (cc.isShare()) return OPSize.OPCODE + OPSize.POINTER;

        int len;
        if (cc.mbuf == null) {
            len = OPSize.OPCODE + BitSet.BITSET_SIZE;
        } else {
            if (enc.minLength() > 1 || cc.bs.isEmpty()) {
                len = OPSize.OPCODE;
            } else {
                len = OPSize.OPCODE + BitSet.BITSET_SIZE;
            }

            len += OPSize.LENGTH + cc.mbuf.used;
        }
        return len;
    }

    @Override
    protected void compileCClassNode(CClassNode cc) {
        if (cc.isShare()) { // shared char class
            addOpcode(OPCode.CCLASS_NODE);
            addPointer(cc);
            return;
        }

        if (cc.mbuf == null) {
            if (cc.isNot()) {
                addOpcode(enc.isSingleByte() ? OPCode.CCLASS_NOT_SB : OPCode.CCLASS_NOT);
            } else {
                addOpcode(enc.isSingleByte() ? OPCode.CCLASS_SB : OPCode.CCLASS);
            }
            addInts(cc.bs.bits, BitSet.BITSET_SIZE); // add_bitset
        } else {
            if (enc.minLength() > 1 || cc.bs.isEmpty()) {
                if (cc.isNot()) {
                    addOpcode(OPCode.CCLASS_MB_NOT);
                } else {
                    addOpcode(OPCode.CCLASS_MB);
                }
                addMultiByteCClass(cc.mbuf);
            } else {
                if (cc.isNot()) {
                    addOpcode(OPCode.CCLASS_MIX_NOT);
                } else {
                    addOpcode(OPCode.CCLASS_MIX);
                }
                // store the bit set and mbuf themself!
                addInts(cc.bs.bits, BitSet.BITSET_SIZE); // add_bitset
                addMultiByteCClass(cc.mbuf);
            }
        }
    }

    @Override
    protected void compileCTypeNode(CTypeNode node) {
        CTypeNode cn = node;
        int op;
        switch (cn.ctype) {
        case CharacterType.WORD:
            if (cn.not) {
                op = enc.isSingleByte() ? OPCode.NOT_WORD_SB : OPCode.NOT_WORD;
            } else {
                op = enc.isSingleByte() ? OPCode.WORD_SB : OPCode.WORD;
            }
            break;

        default:
            newInternalException(ERR_PARSER_BUG);
            return; // not reached
        } // inner switch
        addOpcode(op);
    }

    @Override
    protected void compileAnyCharNode() {
        if (isMultiline(regex.options)) {
            addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_ML_SB : OPCode.ANYCHAR_ML);
        } else {
            addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_SB : OPCode.ANYCHAR);
        }
    }

    @Override
    protected void compileCallNode(CallNode node) {
        addOpcode(OPCode.CALL);
        node.unsetAddrList.add(codeLength, node.target);
        addAbsAddr(0); /*dummy addr.*/
    }

    @Override
    protected void compileBackrefNode(BackRefNode node) {
        BackRefNode br = node;
        if (Config.USE_BACKREF_WITH_LEVEL && br.isNestLevel()) {
            addOpcode(OPCode.BACKREF_WITH_LEVEL);
            addOption(regex.options & Option.IGNORECASE);
            addLength(br.nestLevel);
            // !goto add_bacref_mems;!
            addLength(br.backNum);
            for (int i=br.backNum-1; i>=0; i--) addMemNum(br.back[i]);
            return;
        } else { // USE_BACKREF_AT_LEVEL
            if (br.backNum == 1) {
                if (isIgnoreCase(regex.options)) {
                    addOpcode(OPCode.BACKREFN_IC);
                    addMemNum(br.back[0]);
                } else {
                    switch (br.back[0]) {
                    case 1:
                        addOpcode(OPCode.BACKREF1);
                        break;
                    case 2:
                        addOpcode(OPCode.BACKREF2);
                        break;
                    default:
                        addOpcode(OPCode.BACKREFN);
                        addOpcode(br.back[0]);
                        break;
                    } // switch
                }
            } else {
                if (isIgnoreCase(regex.options)) {
                    addOpcode(OPCode.BACKREF_MULTI_IC);
                } else {
                    addOpcode(OPCode.BACKREF_MULTI);
                }
                // !add_bacref_mems:!
                addLength(br.backNum);
                for (int i=br.backNum-1; i>=0; i--) addMemNum(br.back[i]);
            }
        }
    }

    private static final int REPEAT_RANGE_ALLOC = 8;
    private void entryRepeatRange(int id, int lower, int upper) {
        if (regex.repeatRangeLo == null) {
            regex.repeatRangeLo = new int[REPEAT_RANGE_ALLOC];
            regex.repeatRangeHi = new int[REPEAT_RANGE_ALLOC];
        } else if (id >= regex.repeatRangeLo.length){
            int[]tmp = new int[regex.repeatRangeLo.length + REPEAT_RANGE_ALLOC];
            System.arraycopy(regex.repeatRangeLo, 0, tmp, 0, regex.repeatRangeLo.length);
            regex.repeatRangeLo = tmp;
            tmp = new int[regex.repeatRangeHi.length + REPEAT_RANGE_ALLOC];
            System.arraycopy(regex.repeatRangeHi, 0, tmp, 0, regex.repeatRangeHi.length);
            regex.repeatRangeHi = tmp;
        }

        regex.repeatRangeLo[id] = lower;
        regex.repeatRangeHi[id] = isRepeatInfinite(upper) ? 0x7fffffff : upper;
    }

    private void compileRangeRepeatNode(QuantifierNode qn, int targetLen, int emptyInfo) {
        int numRepeat = regex.numRepeat;
        addOpcode(qn.greedy ? OPCode.REPEAT : OPCode.REPEAT_NG);
        addMemNum(numRepeat); /* OP_REPEAT ID */
        regex.numRepeat++;
        addRelAddr(targetLen + OPSize.REPEAT_INC);

        entryRepeatRange(numRepeat, qn.lower, qn.upper);

        compileTreeEmptyCheck(qn.target, emptyInfo);

        if ((Config.USE_SUBEXP_CALL && regex.numCall > 0) || qn.isInRepeat()) {
            addOpcode(qn.greedy ? OPCode.REPEAT_INC_SG : OPCode.REPEAT_INC_NG_SG);
        } else {
            addOpcode(qn.greedy ? OPCode.REPEAT_INC : OPCode.REPEAT_INC_NG);
        }

        addMemNum(numRepeat); /* OP_REPEAT ID */
    }

    private static final int QUANTIFIER_EXPAND_LIMIT_SIZE   = 50; // was 50

    private static boolean cknOn(int ckn) {
        return ckn > 0;
    }

    private int compileCECLengthQuantifierNode(QuantifierNode qn) {
        boolean infinite = isRepeatInfinite(qn.upper);
        int emptyInfo = qn.targetEmptyInfo;

        int tlen = compileLengthTree(qn.target);
        int ckn = regex.numCombExpCheck > 0 ? qn.combExpCheckNum : 0;
        int cklen = cknOn(ckn) ? OPSize.STATE_CHECK_NUM : 0;

        /* anychar repeat */
        if (qn.target.getType() == NodeType.CANY) {
            if (qn.greedy && infinite) {
                if (qn.nextHeadExact != null && !cknOn(ckn)) {
                    return OPSize.ANYCHAR_STAR_PEEK_NEXT + tlen * qn.lower + cklen;
                } else {
                    return OPSize.ANYCHAR_STAR + tlen * qn.lower + cklen;
                }
            }
        }

        int modTLen;
        if (emptyInfo != 0) {
            modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END);
        } else {
            modTLen = tlen;
        }

        int len;
        if (infinite && qn.lower <= 1) {
            if (qn.greedy) {
                if (qn.lower == 1) {
                    len = OPSize.JUMP;
                } else {
                    len = 0;
                }
                len += OPSize.PUSH + cklen + modTLen + OPSize.JUMP;
            } else {
                if (qn.lower == 0) {
                    len = OPSize.JUMP;
                } else {
                    len = 0;
                }
                len += modTLen + OPSize.PUSH + cklen;
            }
        } else if (qn.upper == 0) {
            if (qn.isRefered) { /* /(?<n>..){0}/ */
                len = OPSize.JUMP + tlen;
            } else {
                len = 0;
            }
        } else if (qn.upper == 1 && qn.greedy) {
            if (qn.lower == 0) {
                if (cknOn(ckn)) {
                    len = OPSize.STATE_CHECK_PUSH + tlen;
                } else {
                    len = OPSize.PUSH + tlen;
                }
            } else {
                len = tlen;
            }
        } else if (!qn.greedy && qn.upper == 1 && qn.lower == 0) { /* '??' */
            len = OPSize.PUSH + cklen + OPSize.JUMP + tlen;
        } else {
            len = OPSize.REPEAT_INC + modTLen + OPSize.OPCODE + OPSize.RELADDR + OPSize.MEMNUM;

            if (cknOn(ckn)) {
                len += OPSize.STATE_CHECK;
            }
        }
        return len;
    }

    @Override
    protected void compileCECQuantifierNode(QuantifierNode qn) {
        boolean infinite = isRepeatInfinite(qn.upper);
        int emptyInfo = qn.targetEmptyInfo;

        int tlen = compileLengthTree(qn.target);

        int ckn = regex.numCombExpCheck > 0 ? qn.combExpCheckNum : 0;

        if (qn.isAnyCharStar()) {
            compileTreeNTimes(qn.target, qn.lower);
            if (qn.nextHeadExact != null && !cknOn(ckn)) {
                if (isMultiline(regex.options)) {
                    addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_ML_STAR_PEEK_NEXT_SB : OPCode.ANYCHAR_ML_STAR_PEEK_NEXT);
                } else {
                    addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_STAR_PEEK_NEXT_SB : OPCode.ANYCHAR_STAR_PEEK_NEXT);
                }
                if (cknOn(ckn)) {
                    addStateCheckNum(ckn);
                }
                StringNode sn = (StringNode)qn.nextHeadExact;
                addBytes(sn.bytes, sn.p, 1);
                return;
            } else {
                if (isMultiline(regex.options)) {
                    if (cknOn(ckn)) {
                        addOpcode(enc.isSingleByte() ? OPCode.STATE_CHECK_ANYCHAR_ML_STAR_SB : OPCode.STATE_CHECK_ANYCHAR_ML_STAR);
                    } else {
                        addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_ML_STAR_SB : OPCode.ANYCHAR_ML_STAR);
                    }
                } else {
                    if (cknOn(ckn)) {
                        addOpcode(enc.isSingleByte() ? OPCode.STATE_CHECK_ANYCHAR_STAR_SB : OPCode.STATE_CHECK_ANYCHAR_STAR);
                    } else {
                        addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_STAR_SB : OPCode.ANYCHAR_STAR);
                    }
                }
                if (cknOn(ckn)) {
                    addStateCheckNum(ckn);
                }
                return;
            }
        }

        int modTLen;
        if (emptyInfo != 0) {
            modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END);
        } else {
            modTLen = tlen;
        }
        if (infinite && qn.lower <= 1) {
            if (qn.greedy) {
                if (qn.lower == 1) {
                    addOpcodeRelAddr(OPCode.JUMP, cknOn(ckn) ? OPSize.STATE_CHECK_PUSH :
                                                                     OPSize.PUSH);
                }
                if (cknOn(ckn)) {
                    addOpcode(OPCode.STATE_CHECK_PUSH);
                    addStateCheckNum(ckn);
                    addRelAddr(modTLen + OPSize.JUMP);
                } else {
                    addOpcodeRelAddr(OPCode.PUSH, modTLen + OPSize.JUMP);
                }
                compileTreeEmptyCheck(qn.target, emptyInfo);
                addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + (cknOn(ckn) ?
                                                                               OPSize.STATE_CHECK_PUSH :
                                                                               OPSize.PUSH)));
            } else {
                if (qn.lower == 0) {
                    addOpcodeRelAddr(OPCode.JUMP, modTLen);
                }
                compileTreeEmptyCheck(qn.target, emptyInfo);
                if (cknOn(ckn)) {
                    addOpcode(OPCode.STATE_CHECK_PUSH_OR_JUMP);
                    addStateCheckNum(ckn);
                    addRelAddr(-(modTLen + OPSize.STATE_CHECK_PUSH_OR_JUMP));
                } else {
                    addOpcodeRelAddr(OPCode.PUSH, -(modTLen + OPSize.PUSH));
                }
            }
        } else if (qn.upper == 0) {
            if (qn.isRefered) { /* /(?<n>..){0}/ */
                addOpcodeRelAddr(OPCode.JUMP, tlen);
                compileTree(qn.target);
            } // else r=0 ???
        } else if (qn.upper == 1 && qn.greedy) {
            if (qn.lower == 0) {
                if (cknOn(ckn)) {
                    addOpcode(OPCode.STATE_CHECK_PUSH);
                    addStateCheckNum(ckn);
                    addRelAddr(tlen);
                } else {
                    addOpcodeRelAddr(OPCode.PUSH, tlen);
                }
            }
            compileTree(qn.target);
        } else if (!qn.greedy && qn.upper == 1 && qn.lower == 0){ /* '??' */
            if (cknOn(ckn)) {
                addOpcode(OPCode.STATE_CHECK_PUSH);
                addStateCheckNum(ckn);
                addRelAddr(OPSize.JUMP);
            } else {
                addOpcodeRelAddr(OPCode.PUSH, OPSize.JUMP);
            }

            addOpcodeRelAddr(OPCode.JUMP, tlen);
            compileTree(qn.target);
        } else {
            compileRangeRepeatNode(qn, modTLen, emptyInfo);
            if (cknOn(ckn)) {
                addOpcode(OPCode.STATE_CHECK);
                addStateCheckNum(ckn);
            }
        }
    }

    private int compileNonCECLengthQuantifierNode(QuantifierNode qn) {
        boolean infinite = isRepeatInfinite(qn.upper);
        int emptyInfo = qn.targetEmptyInfo;

        int tlen = compileLengthTree(qn.target);

        /* anychar repeat */
        if (qn.target.getType() == NodeType.CANY) {
            if (qn.greedy && infinite) {
                if (qn.nextHeadExact != null) {
                    return OPSize.ANYCHAR_STAR_PEEK_NEXT + tlen * qn.lower;
                } else {
                    return OPSize.ANYCHAR_STAR + tlen * qn.lower;
                }
            }
        }

        int modTLen = 0;
        if (emptyInfo != 0) {
            modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END);
        } else {
            modTLen = tlen;
        }

        int len;
        if (infinite && (qn.lower <= 1 || tlen * qn.lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
            if (qn.lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
                len = OPSize.JUMP;
            } else {
                len = tlen * qn.lower;
            }

            if (qn.greedy) {
                if (qn.headExact != null) {
                    len += OPSize.PUSH_OR_JUMP_EXACT1 + modTLen + OPSize.JUMP;
                } else if (qn.nextHeadExact != null) {
                    len += OPSize.PUSH_IF_PEEK_NEXT + modTLen + OPSize.JUMP;
                } else {
                    len += OPSize.PUSH + modTLen + OPSize.JUMP;
                }
            } else {
                len += OPSize.JUMP + modTLen + OPSize.PUSH;
            }

        } else if (qn.upper == 0 && qn.isRefered) { /* /(?<n>..){0}/ */
            len = OPSize.JUMP + tlen;
        } else if (!infinite && qn.greedy &&
                  (qn.upper == 1 || (tlen + OPSize.PUSH) * qn.upper <= QUANTIFIER_EXPAND_LIMIT_SIZE )) {
            len = tlen * qn.lower;
            len += (OPSize.PUSH + tlen) * (qn.upper - qn.lower);
        } else if (!qn.greedy && qn.upper == 1 && qn.lower == 0) { /* '??' */
            len = OPSize.PUSH + OPSize.JUMP + tlen;
        } else {
            len = OPSize.REPEAT_INC + modTLen + OPSize.OPCODE + OPSize.RELADDR + OPSize.MEMNUM;
        }
        return len;
    }

    @Override
    protected void compileNonCECQuantifierNode(QuantifierNode qn) {
        boolean infinite = isRepeatInfinite(qn.upper);
        int emptyInfo = qn.targetEmptyInfo;

        int tlen = compileLengthTree(qn.target);

        if (qn.isAnyCharStar()) {
            compileTreeNTimes(qn.target, qn.lower);
            if (qn.nextHeadExact != null) {
                if (isMultiline(regex.options)) {
                    addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_ML_STAR_PEEK_NEXT_SB : OPCode.ANYCHAR_ML_STAR_PEEK_NEXT);
                } else {
                    addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_STAR_PEEK_NEXT_SB : OPCode.ANYCHAR_STAR_PEEK_NEXT);
                }
                StringNode sn = (StringNode)qn.nextHeadExact;
                addBytes(sn.bytes, sn.p, 1);
                return;
            } else {
                if (isMultiline(regex.options)) {
                    addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_ML_STAR_SB : OPCode.ANYCHAR_ML_STAR);
                } else {
                    addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_STAR_SB : OPCode.ANYCHAR_STAR);
                }
                return;
            }
        }

        int modTLen;
        if (emptyInfo != 0) {
            modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END);
        } else {
            modTLen = tlen;
        }
        if (infinite && (qn.lower <= 1 || tlen * qn.lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
            if (qn.lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
                if (qn.greedy) {
                    if (qn.headExact != null) {
                        addOpcodeRelAddr(OPCode.JUMP, OPSize.PUSH_OR_JUMP_EXACT1);
                    } else if (qn.nextHeadExact != null) {
                        addOpcodeRelAddr(OPCode.JUMP, OPSize.PUSH_IF_PEEK_NEXT);
                    } else {
                        addOpcodeRelAddr(OPCode.JUMP, OPSize.PUSH);
                    }
                } else {
                    addOpcodeRelAddr(OPCode.JUMP, OPSize.JUMP);
                }
            } else {
                compileTreeNTimes(qn.target, qn.lower);
            }

            if (qn.greedy) {
                if (qn.headExact != null) {
                    addOpcodeRelAddr(OPCode.PUSH_OR_JUMP_EXACT1, modTLen + OPSize.JUMP);
                    StringNode sn = (StringNode)qn.headExact;
                    addBytes(sn.bytes, sn.p, 1);
                    compileTreeEmptyCheck(qn.target, emptyInfo);
                    addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + OPSize.PUSH_OR_JUMP_EXACT1));
                } else if (qn.nextHeadExact != null) {
                    addOpcodeRelAddr(OPCode.PUSH_IF_PEEK_NEXT, modTLen + OPSize.JUMP);
                    StringNode sn = (StringNode)qn.nextHeadExact;
                    addBytes(sn.bytes, sn.p, 1);
                    compileTreeEmptyCheck(qn.target, emptyInfo);
                    addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + OPSize.PUSH_IF_PEEK_NEXT));
                } else {
                    addOpcodeRelAddr(OPCode.PUSH, modTLen + OPSize.JUMP);
                    compileTreeEmptyCheck(qn.target, emptyInfo);
                    addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + OPSize.PUSH));
                }
            } else {
                addOpcodeRelAddr(OPCode.JUMP, modTLen);
                compileTreeEmptyCheck(qn.target, emptyInfo);
                addOpcodeRelAddr(OPCode.PUSH, -(modTLen + OPSize.PUSH));
            }
        } else if (qn.upper == 0 && qn.isRefered) { /* /(?<n>..){0}/ */
            addOpcodeRelAddr(OPCode.JUMP, tlen);
            compileTree(qn.target);
        } else if (!infinite && qn.greedy &&
                  (qn.upper == 1 || (tlen + OPSize.PUSH) * qn.upper <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
            int n = qn.upper - qn.lower;
            compileTreeNTimes(qn.target, qn.lower);

            for (int i=0; i<n; i++) {
                addOpcodeRelAddr(OPCode.PUSH, (n - i) * tlen + (n - i - 1) * OPSize.PUSH);
                compileTree(qn.target);
            }
        } else if (!qn.greedy && qn.upper == 1 && qn.lower == 0) { /* '??' */
            addOpcodeRelAddr(OPCode.PUSH, OPSize.JUMP);
            addOpcodeRelAddr(OPCode.JUMP, tlen);
            compileTree(qn.target);
        } else {
            compileRangeRepeatNode(qn, modTLen, emptyInfo);
        }
    }

    private int compileLengthOptionNode(EncloseNode node) {
        int prev = regex.options;
        regex.options = node.option;
        int tlen = compileLengthTree(node.target);
        regex.options = prev;

        if (isDynamic(prev ^ node.option)) {
            return OPSize.SET_OPTION_PUSH + OPSize.SET_OPTION + OPSize.FAIL + tlen + OPSize.SET_OPTION;
        } else {
            return tlen;
        }
    }

    @Override
    protected void compileOptionNode(EncloseNode node) {
        int prev = regex.options;

        if (isDynamic(prev ^ node.option)) {
            addOpcodeOption(OPCode.SET_OPTION_PUSH, node.option);
            addOpcodeOption(OPCode.SET_OPTION, prev);
            addOpcode(OPCode.FAIL);
        }

        regex.options = node.option;
        compileTree(node.target);
        regex.options = prev;

        if (isDynamic(prev ^ node.option)) {
            addOpcodeOption(OPCode.SET_OPTION, prev);
        }
    }

    private int compileLengthEncloseNode(EncloseNode node) {
        if (node.isOption()) {
            return compileLengthOptionNode(node);
        }

        int tlen;
        if (node.target != null) {
            tlen = compileLengthTree(node.target);
        } else {
            tlen = 0;
        }

        int len;
        switch (node.type) {
        case EncloseType.MEMORY:
            if (Config.USE_SUBEXP_CALL && node.isCalled()) {
                len = OPSize.MEMORY_START_PUSH + tlen + OPSize.CALL + OPSize.JUMP + OPSize.RETURN;
                if (bsAt(regex.btMemEnd, node.regNum)) {
                    len += node.isRecursion() ? OPSize.MEMORY_END_PUSH_REC : OPSize.MEMORY_END_PUSH;
                } else {
                    len += node.isRecursion() ? OPSize.MEMORY_END_REC : OPSize.MEMORY_END;
                }
            } else { // USE_SUBEXP_CALL
                if (bsAt(regex.btMemStart, node.regNum)) {
                    len = OPSize.MEMORY_START_PUSH;
                } else {
                    len = OPSize.MEMORY_START;
                }
                len += tlen + (bsAt(regex.btMemEnd, node.regNum) ? OPSize.MEMORY_END_PUSH : OPSize.MEMORY_END);
            }
            break;

        case EncloseType.STOP_BACKTRACK:
            if (node.isStopBtSimpleRepeat()) {
                QuantifierNode qn = (QuantifierNode)node.target;
                tlen = compileLengthTree(qn.target);
                len = tlen * qn.lower + OPSize.PUSH + tlen + OPSize.POP + OPSize.JUMP;
            } else {
                len = OPSize.PUSH_STOP_BT + tlen + OPSize.POP_STOP_BT;
            }
            break;

        default:
            newInternalException(ERR_PARSER_BUG);
            return 0; // not reached
        } // switch
        return len;
    }

    @Override
    protected void compileEncloseNode(EncloseNode node) {
        int len;
        switch (node.type) {
        case EncloseType.MEMORY:
            if (Config.USE_SUBEXP_CALL) {
                if (node.isCalled()) {
                    addOpcode(OPCode.CALL);
                    node.callAddr = codeLength + OPSize.ABSADDR + OPSize.JUMP;
                    node.setAddrFixed();
                    addAbsAddr(node.callAddr);
                    len = compileLengthTree(node.target);
                    len += OPSize.MEMORY_START_PUSH + OPSize.RETURN;
                    if (bsAt(regex.btMemEnd, node.regNum)) {
                        len += node.isRecursion() ? OPSize.MEMORY_END_PUSH_REC : OPSize.MEMORY_END_PUSH;
                    } else {
                        len += node.isRecursion() ? OPSize.MEMORY_END_REC : OPSize.MEMORY_END;
                    }
                    addOpcodeRelAddr(OPCode.JUMP, len);
                }
            } // USE_SUBEXP_CALL

            if (bsAt(regex.btMemStart, node.regNum)) {
                addOpcode(OPCode.MEMORY_START_PUSH);
            } else {
                addOpcode(OPCode.MEMORY_START);
            }

            addMemNum(node.regNum);
            compileTree(node.target);

            if (Config.USE_SUBEXP_CALL && node.isCalled()) {
                if (bsAt(regex.btMemEnd, node.regNum)) {
                    addOpcode(node.isRecursion() ? OPCode.MEMORY_END_PUSH_REC : OPCode.MEMORY_END_PUSH);
                } else {
                    addOpcode(node.isRecursion() ? OPCode.MEMORY_END_REC : OPCode.MEMORY_END);
                }
                addMemNum(node.regNum);
                addOpcode(OPCode.RETURN);
            } else { // USE_SUBEXP_CALL
                if (bsAt(regex.btMemEnd, node.regNum)) {
                    addOpcode(OPCode.MEMORY_END_PUSH);
                } else {
                    addOpcode(OPCode.MEMORY_END);
                }
                addMemNum(node.regNum);
            }
            break;

        case EncloseType.STOP_BACKTRACK:
            if (node.isStopBtSimpleRepeat()) {
                QuantifierNode qn = (QuantifierNode)node.target;

                compileTreeNTimes(qn.target, qn.lower);

                len = compileLengthTree(qn.target);
                addOpcodeRelAddr(OPCode.PUSH, len + OPSize.POP + OPSize.JUMP);
                compileTree(qn.target);
                addOpcode(OPCode.POP);
                addOpcodeRelAddr(OPCode.JUMP, -(OPSize.PUSH + len + OPSize.POP + OPSize.JUMP));
            } else {
                addOpcode(OPCode.PUSH_STOP_BT);
                compileTree(node.target);
                addOpcode(OPCode.POP_STOP_BT);
            }
            break;

        default:
            newInternalException(ERR_PARSER_BUG);
            break;
        } // switch
    }

    private int compileLengthAnchorNode(AnchorNode node) {
        int tlen;
        if (node.target != null) {
            tlen = compileLengthTree(node.target);
        } else {
            tlen = 0;
        }

        int len;
        switch (node.type) {
        case AnchorType.PREC_READ:
            len = OPSize.PUSH_POS + tlen + OPSize.POP_POS;
            break;

        case AnchorType.PREC_READ_NOT:
            len = OPSize.PUSH_POS_NOT + tlen + OPSize.FAIL_POS;
            break;

        case AnchorType.LOOK_BEHIND:
            len = OPSize.LOOK_BEHIND + tlen;
            break;

        case AnchorType.LOOK_BEHIND_NOT:
            len = OPSize.PUSH_LOOK_BEHIND_NOT + tlen + OPSize.FAIL_LOOK_BEHIND_NOT;
            break;

        default:
            len = OPSize.OPCODE;
            break;
        } // switch
        return len;
    }

    @Override
    protected void compileAnchorNode(AnchorNode node) {
        int len;
        int n;

        switch (node.type) {
        case AnchorType.BEGIN_BUF:          addOpcode(OPCode.BEGIN_BUF);            break;
        case AnchorType.END_BUF:            addOpcode(OPCode.END_BUF);              break;
        case AnchorType.BEGIN_LINE:         addOpcode(OPCode.BEGIN_LINE);           break;
        case AnchorType.END_LINE:           addOpcode(OPCode.END_LINE);             break;
        case AnchorType.SEMI_END_BUF:       addOpcode(OPCode.SEMI_END_BUF);         break;
        case AnchorType.BEGIN_POSITION:     addOpcode(OPCode.BEGIN_POSITION);       break;

        case AnchorType.WORD_BOUND:
            addOpcode(enc.isSingleByte() ? OPCode.WORD_BOUND_SB : OPCode.WORD_BOUND);
            break;

        case AnchorType.NOT_WORD_BOUND:
            addOpcode(enc.isSingleByte() ? OPCode.NOT_WORD_BOUND_SB : OPCode.NOT_WORD_BOUND);
            break;

        case AnchorType.WORD_BEGIN:
            if (Config.USE_WORD_BEGIN_END)
                addOpcode(enc.isSingleByte() ? OPCode.WORD_BEGIN_SB : OPCode.WORD_BEGIN);
            break;

        case AnchorType.WORD_END:
            if (Config.USE_WORD_BEGIN_END)
                addOpcode(enc.isSingleByte() ? OPCode.WORD_END_SB : OPCode.WORD_END);
            break;

        case AnchorType.PREC_READ:
            addOpcode(OPCode.PUSH_POS);
            compileTree(node.target);
            addOpcode(OPCode.POP_POS);
            break;

        case AnchorType.PREC_READ_NOT:
            len = compileLengthTree(node.target);
            addOpcodeRelAddr(OPCode.PUSH_POS_NOT, len + OPSize.FAIL_POS);
            compileTree(node.target);
            addOpcode(OPCode.FAIL_POS);
            break;

        case AnchorType.LOOK_BEHIND:
            addOpcode(enc.isSingleByte() ? OPCode.LOOK_BEHIND_SB : OPCode.LOOK_BEHIND);
            if (node.charLength < 0) {
                n = analyser.getCharLengthTree(node.target);
                if (analyser.returnCode != 0) newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
            } else {
                n = node.charLength;
            }
            addLength(n);
            compileTree(node.target);
            break;

        case AnchorType.LOOK_BEHIND_NOT:
            len = compileLengthTree(node.target);
            addOpcodeRelAddr(OPCode.PUSH_LOOK_BEHIND_NOT, len + OPSize.FAIL_LOOK_BEHIND_NOT);
            if (node.charLength < 0) {
                n = analyser.getCharLengthTree(node.target);
                if (analyser.returnCode != 0) newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
            } else {
                n = node.charLength;
            }
            addLength(n);
            compileTree(node.target);
            addOpcode(OPCode.FAIL_LOOK_BEHIND_NOT);
            break;

        default:
            newInternalException(ERR_PARSER_BUG);
        } // switch
    }

    private int compileLengthTree(Node node) {
        int len = 0;

        switch (node.getType()) {
        case NodeType.LIST:
            ConsAltNode lin = (ConsAltNode)node;
            do {
                len += compileLengthTree(lin.car);
            } while ((lin = lin.cdr) != null);
            break;

        case NodeType.ALT:
            ConsAltNode aln = (ConsAltNode)node;
            int n = 0;
            do {
                len += compileLengthTree(aln.car);
                n++;
            } while ((aln = aln.cdr) != null);
            len += (OPSize.PUSH + OPSize.JUMP) * (n - 1);
            break;

        case NodeType.STR:
            StringNode sn = (StringNode)node;
            if (sn.isRaw()) {
                len = compileLengthStringRawNode(sn);
            } else {
                len = compileLengthStringNode(sn);
            }
            break;

        case NodeType.CCLASS:
            len = compileLengthCClassNode((CClassNode)node);
            break;

        case NodeType.CTYPE:
        case NodeType.CANY:
            len = OPSize.OPCODE;
            break;

        case NodeType.BREF:
            BackRefNode br = (BackRefNode)node;

            if (Config.USE_BACKREF_WITH_LEVEL && br.isNestLevel()) {
                len = OPSize.OPCODE + OPSize.OPTION + OPSize.LENGTH +
                      OPSize.LENGTH + (OPSize.MEMNUM * br.backNum);
            } else { // USE_BACKREF_AT_LEVEL
                if (br.backNum == 1) {
                    len = ((!isIgnoreCase(regex.options) && br.back[0] <= 2)
                            ? OPSize.OPCODE : (OPSize.OPCODE + OPSize.MEMNUM));
                } else {
                    len = OPSize.OPCODE + OPSize.LENGTH + (OPSize.MEMNUM * br.backNum);
                }
            }
            break;

        case NodeType.CALL:
            if (Config.USE_SUBEXP_CALL) {
                len = OPSize.CALL;
                break;
            } // USE_SUBEXP_CALL
            break;

        case NodeType.QTFR:
            if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
                len = compileCECLengthQuantifierNode((QuantifierNode)node);
            } else {
                len = compileNonCECLengthQuantifierNode((QuantifierNode)node);
            }
            break;

        case NodeType.ENCLOSE:
            len = compileLengthEncloseNode((EncloseNode)node);
            break;

        case NodeType.ANCHOR:
            len = compileLengthAnchorNode((AnchorNode)node);
            break;

        default:
            newInternalException(ERR_PARSER_BUG);

        } //switch
        return len;
    }

    private void ensure(int size) {
        if (size >= code.length) {
            int length = code.length << 1;
            while (length <= size) length <<= 1;
            int[]tmp = new int[length];
            System.arraycopy(code, 0, tmp, 0, code.length);
            code = tmp;
        }
    }

    private void addInt(int i) {
        if (codeLength >= code.length) {
            int[]tmp = new int[code.length << 1];
            System.arraycopy(code, 0, tmp, 0, code.length);
            code = tmp;
        }
        code[codeLength++] = i;
    }

    void setInt(int i, int offset) {
        ensure(offset);
        regex.code[offset] = i;
    }

    private void addObject(Object o) {
        if (regex.operands == null) {
            regex.operands = new Object[4];
        } else if (regex.operandLength >= regex.operands.length) {
            Object[]tmp = new Object[regex.operands.length << 1];
            System.arraycopy(regex.operands, 0, tmp, 0, regex.operands.length);
            regex.operands = tmp;
        }
        addInt(regex.operandLength);
        regex.operands[regex.operandLength++] = o;
    }

    private void addBytes(byte[]bytes, int p ,int length) {
        ensure(codeLength + length);
        int end = p + length;

        while (p < end) code[codeLength++] = bytes[p++];
    }

    private void addInts(int[]ints, int length) {
        ensure(codeLength + length);
        System.arraycopy(ints, 0, code, codeLength, length);
        codeLength += length;
    }

    private void addOpcode(int opcode) {
        addInt(opcode);

        switch(opcode) {
        case OPCode.ANYCHAR_STAR:
        case OPCode.ANYCHAR_STAR_SB:
        case OPCode.ANYCHAR_ML_STAR:
        case OPCode.ANYCHAR_ML_STAR_SB:
        case OPCode.ANYCHAR_STAR_PEEK_NEXT:
        case OPCode.ANYCHAR_STAR_PEEK_NEXT_SB:
        case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT:
        case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT_SB:
        case OPCode.STATE_CHECK_ANYCHAR_STAR:
        case OPCode.STATE_CHECK_ANYCHAR_STAR_SB:
        case OPCode.STATE_CHECK_ANYCHAR_ML_STAR:
        case OPCode.MEMORY_START_PUSH:
        case OPCode.MEMORY_END_PUSH:
        case OPCode.MEMORY_END_PUSH_REC:
        case OPCode.MEMORY_END_REC:
        case OPCode.NULL_CHECK_START:
        case OPCode.NULL_CHECK_END_MEMST_PUSH:
        case OPCode.PUSH:
        case OPCode.STATE_CHECK_PUSH:
        case OPCode.STATE_CHECK_PUSH_OR_JUMP:
        case OPCode.STATE_CHECK:
        case OPCode.PUSH_OR_JUMP_EXACT1:
        case OPCode.PUSH_IF_PEEK_NEXT:
        case OPCode.REPEAT:
        case OPCode.REPEAT_NG:
        case OPCode.REPEAT_INC_SG:
        case OPCode.REPEAT_INC_NG:
        case OPCode.REPEAT_INC_NG_SG:
        case OPCode.PUSH_POS:
        case OPCode.PUSH_POS_NOT:
        case OPCode.PUSH_STOP_BT:
        case OPCode.PUSH_LOOK_BEHIND_NOT:
        case OPCode.CALL:
        case OPCode.RETURN: // it will appear only with CALL though
            regex.stackNeeded = true;
        }
    }

    private void addStateCheckNum(int num) {
        addInt(num);
    }

    private void addRelAddr(int addr) {
        addInt(addr);
    }

    private void addAbsAddr(int addr) {
        addInt(addr);
    }

    private void addLength(int length) {
        addInt(length);
    }

    private void addMemNum(int num) {
        addInt(num);
    }

    private void addPointer(Object o) {
        addObject(o);
    }

    private void addOption(int option) {
        addInt(option);
    }

    private void addOpcodeRelAddr(int opcode, int addr) {
        addOpcode(opcode);
        addRelAddr(addr);
    }

    private void addOpcodeOption(int opcode, int option) {
        addOpcode(opcode);
        addOption(option);
    }

    private void addTemplate(byte[]bytes) {
        if (templateNum == 0) {
            templates = new byte[2][];
        } else if (templateNum == templates.length) {
            byte[][]tmp = new byte[templateNum * 2][];
            System.arraycopy(templates, 0, tmp, 0, templateNum);
            templates = tmp;
        }
        templates[templateNum++] = bytes;
    }
}
TOP

Related Classes of org.joni.ArrayCompiler

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.