Package org.joni.ast

Examples of org.joni.ast.CClassNode$CCStateArg


        sprev = sbegin; // break;
    }

    private void opCClassNode() {
        if (s >= range) {opFail(); return;}
        CClassNode cc = (CClassNode)regex.operands[code[ip++]];
        int mbLen = enc.length(bytes, s, end);
        int ss = s;
        s += mbLen;
        if (s > range) {opFail(); return;}
        int c = enc.mbcToCode(bytes, ss, s);
        if (!cc.isCodeInCCLength(mbLen, c)) {opFail(); return;}
        sprev = sbegin; // break;
    }
View Full Code Here


    }

    public int compiledByteCodeToString(StringBuilder sb, int bp) {
        int len, n, mem, addr, scn, cod;
        BitSet bs;
        CClassNode cc;
        int tm, idx;

        sb.append("[" + OPCode.OpCodeNames[code[bp]]);
        int argType = OPCode.OpCodeArgTypes[code[bp]];
        int ip = bp;
View Full Code Here

    public void apply(int from, int[]to, int length, Object o) {
        ApplyCaseFoldArg arg = (ApplyCaseFoldArg)o;

        ScanEnvironment env = arg.env;
        Encoding enc = env.enc;
        CClassNode cc = arg.cc;
        BitSet bs = cc.bs;

        if (length == 1) {
            boolean inCC = cc.isCodeInCC(enc, from);

            if (Config.CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS) {
                if ((inCC && !cc.isNot()) || (!inCC && cc.isNot())) {
                    if (enc.minLength() > 1 || to[0] >= BitSet.SINGLE_BYTE_SIZE) {
                        cc.addCodeRange(env, to[0], to[0]);
                    } else {
                        /* /(?i:[^A-C])/.match("a") ==> fail. */
                        bs.set(to[0]);
                    }
                }
            } else {
                if (inCC) {
                    if (enc.minLength() > 1 || to[0] >= BitSet.SINGLE_BYTE_SIZE) {
                        if (cc.isNot()) cc.clearNotFlag(enc);
                        cc.addCodeRange(env, to[0], to[0]);
                    } else {
                        if (cc.isNot()) {
                            bs.clear(to[0]);
                        } else {
                            bs.set(to[0]);
                        }
                    }
                }
            } // CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS

        } else {
            if (cc.isCodeInCC(enc, from) && (!Config.CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS || !cc.isNot())) {
                StringNode node = null;
                for (int i=0; i<length; i++) {
                    if (i == 0) {
                        node = new StringNode();
                        /* char-class expanded multi-char only
View Full Code Here

                break;
            } // inner switch
            break;

        case NodeType.CCLASS:
            CClassNode xc = (CClassNode)x;

            switch(yType) {
            case NodeType.CTYPE:
                switch(((CTypeNode)y).ctype) {
                case CharacterType.WORD:
                    if (!((CTypeNode)y).not) {
                        if (xc.mbuf == null && !xc.isNot()) {
                            for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
                                if (xc.bs.at(i)) {
                                    if (enc.isSbWord(i)) return false;
                                }
                            }
                            return true;
                        }
                        return false;
                    } else {
                        for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
                            if (!enc.isSbWord(i)) {
                                if (!xc.isNot()) {
                                    if (xc.bs.at(i)) return false;
                                } else {
                                    if (!xc.bs.at(i)) return false;
                                }
                            }
                        }
                        return true;
                    }
                    // break; not reached

                default:
                    break;
                } // inner switch
                break;

            case NodeType.CCLASS:
                CClassNode yc = (CClassNode)y;

                for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
                    boolean v = xc.bs.at(i);
                    if ((v && !xc.isNot()) || (!v && xc.isNot())) {
                        v = yc.bs.at(i);
                        if ((v && !yc.isNot()) || (!v && yc.isNot())) return false;
                    }
                }
                if ((xc.mbuf == null && !xc.isNot()) || yc.mbuf == null && !yc.isNot()) return true;
                return false;
                // break; not reached

            case NodeType.STR:
                // !goto swap;!
                tmp = x;
                x = y;
                y = tmp;
                continue retry;

            default:
                break;

            } // inner switch
            break; // case NodeType.CCLASS

        case NodeType.STR:
            StringNode xs = (StringNode)x;
            if (xs.length() == 0) break;

            switch (yType) {
            case NodeType.CTYPE:
                CTypeNode cy = ((CTypeNode)y);
                switch (cy.ctype) {
                case CharacterType.WORD:
                    if (enc.isMbcWord(xs.bytes, xs.p, xs.end)) {
                        return cy.not;
                    } else {
                        return !cy.not;
                    }

                default:
                    break;

                } // inner switch
                break;

            case NodeType.CCLASS:
                CClassNode cc = (CClassNode)y;
                int code = enc.mbcToCode(xs.bytes, xs.p, xs.p + enc.maxLength());
                return !cc.isCodeInCC(enc, code);

            case NodeType.STR:
                StringNode ys = (StringNode)y;
                int len = xs.length();
                if (len > ys.length()) len = ys.length();
View Full Code Here

            }
            break;
        }

        case NodeType.CCLASS: {
            CClassNode cc = (CClassNode)node;
            /* no need to check ignore case. (setted in setup_tree()) */
            if (cc.mbuf != null || cc.isNot()) {
                int min = enc.minLength();
                int max = enc.maxLengthDistance();
                opt.length.set(min, max);
            } else {
                for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
                    boolean z = cc.bs.at(i);
                    if ((z && !cc.isNot()) || (!z && cc.isNot())) {
                        opt.map.addChar((byte)i, enc);
                    }
                }
                opt.length.set(1, 1);
            }
View Full Code Here

        return true; /* 1: is not POSIX bracket, but no error. */
    }

    private CClassNode parseCharProperty() {
        int ctype = fetchCharPropertyToCType();
        CClassNode n = new CClassNode();
        n.addCType(ctype, false, env, this);
        if (token.getPropNot()) n.setNot();
        return n;
    }
View Full Code Here

            if (!codeExistCheck(']', true)) newSyntaxException(ERR_EMPTY_CHAR_CLASS);
            env.ccEscWarn("]");
            token.type = TokenType.CHAR; /* allow []...] */
        }

        CClassNode cc = new CClassNode();
        CClassNode prevCC = null;
        CClassNode workCC = null;

        CCStateArg arg = new CCStateArg();

        boolean andStart = false;
        arg.state = CCSTATE.START;

        while (token.type != TokenType.CC_CLOSE) {
            boolean fetched = false;

            switch (token.type) {

            case CHAR:
                final int len;
                if (Config.VANILLA) {
                    len = enc.codeToMbcLength(token.getC());
                    if (len > 1) {
                        arg.inType = CCVALTYPE.CODE_POINT;
                    } else {
                        arg.inType = CCVALTYPE.SB; // sb_char:
                    }
                } else {
                    if (token.getCode() >= BitSet.SINGLE_BYTE_SIZE || (len = enc.codeToMbcLength(token.getC())) > 1) {
                        arg.inType = CCVALTYPE.CODE_POINT;
                    } else {
                        arg.inType = CCVALTYPE.SB; // sb_char:
                    }
                }
                arg.v = token.getC();
                arg.vIsRaw = false;
                parseCharClassValEntry2(cc, arg); // goto val_entry2
                break;

            case RAW_BYTE:
                if (!enc.isSingleByte() && token.base != 0) { /* tok->base != 0 : octal or hexadec. */
                    byte[]buf = new byte[Config.ENC_MBC_CASE_FOLD_MAXLEN];
                    int psave = p;
                    int base = token.base;
                    buf[0] = (byte)token.getC();
                    int i;
                    for (i=1; i<enc.maxLength(); i++) {
                        fetchTokenInCC();
                        if (token.type != TokenType.RAW_BYTE || token.base != base) {
                            fetched = true;
                            break;
                        }
                        buf[i] = (byte)token.getC();
                    }
                    if (i < enc.minLength()) newValueException(ERR_TOO_SHORT_MULTI_BYTE_STRING);

                    len = enc.length(buf, 0, i);
                    if (i < len) {
                        newValueException(ERR_TOO_SHORT_MULTI_BYTE_STRING);
                    } else if (i > len) { /* fetch back */
                        p = psave;
                        for (i=1; i<len; i++) fetchTokenInCC();
                        fetched = false;
                    }
                    if (i == 1) {
                        arg.v = buf[0] & 0xff;
                        arg.inType = CCVALTYPE.SB; // goto raw_single
                    } else {
                        arg.v = enc.mbcToCode(buf, 0, buf.length);
                        arg.inType = CCVALTYPE.CODE_POINT;
                    }
                } else {
                    arg.v = token.getC();
                    arg.inType = CCVALTYPE.SB; // raw_single:
                }
                arg.vIsRaw = true;
                parseCharClassValEntry2(cc, arg); // goto val_entry2
                break;

            case CODE_POINT:
                arg.v = token.getCode();
                arg.vIsRaw = true;
                parseCharClassValEntry(cc, arg); // val_entry:, val_entry2
                break;

            case POSIX_BRACKET_OPEN:
                if (parsePosixBracket(cc)) { /* true: is not POSIX bracket */
                    env.ccEscWarn("[");
                    p = token.backP;
                    arg.v = token.getC();
                    arg.vIsRaw = false;
                    parseCharClassValEntry(cc, arg); // goto val_entry
                    break;
                }
                cc.nextStateClass(arg, env); // goto next_class
                break;

            case CHAR_TYPE:
                cc.addCType(token.getPropCType(), token.getPropNot(), env, this);
                cc.nextStateClass(arg, env); // next_class:
                break;

            case CHAR_PROPERTY:
                int ctype = fetchCharPropertyToCType();
                cc.addCType(ctype, token.getPropNot(), env, this);
                cc.nextStateClass(arg, env); // goto next_class
                break;

            case CC_RANGE:
                if (arg.state == CCSTATE.VALUE) {
                    fetchTokenInCC();
                    fetched = true;
                    if (token.type == TokenType.CC_CLOSE) { /* allow [x-] */
                        parseCharClassRangeEndVal(cc, arg); // range_end_val:, goto val_entry;
                        break;
                    } else if (token.type == TokenType.CC_AND) {
                        env.ccEscWarn("-");
                        parseCharClassRangeEndVal(cc, arg); // goto range_end_val
                        break;
                    }
                    arg.state = CCSTATE.RANGE;
                } else if (arg.state == CCSTATE.START) {
                    arg.v = token.getC(); /* [-xa] is allowed */
                    arg.vIsRaw = false;
                    fetchTokenInCC();
                    fetched = true;
                    if (token.type == TokenType.CC_RANGE || andStart) env.ccEscWarn("-"); /* [--x] or [a&&-x] is warned. */
                    parseCharClassValEntry(cc, arg); // goto val_entry
                    break;
                } else if (arg.state == CCSTATE.RANGE) {
                    env.ccEscWarn("-");
                    parseCharClassSbChar(cc, arg); // goto sb_char /* [!--x] is allowed */
                    break;
                } else { /* CCS_COMPLETE */
                    fetchTokenInCC();
                    fetched = true;
                    if (token.type == TokenType.CC_CLOSE) { /* allow [a-b-] */
                        parseCharClassRangeEndVal(cc, arg); // goto range_end_val
                        break;
                    } else if (token.type == TokenType.CC_AND) {
                        env.ccEscWarn("-");
                        parseCharClassRangeEndVal(cc, arg); // goto range_end_val
                        break;
                    }

                    if (syntax.allowDoubleRangeOpInCC()) {
                        env.ccEscWarn("-");
                        parseCharClassSbChar(cc, arg); // goto sb_char /* [0-9-a] is allowed as [0-9\-a] */
                        break;
                    }
                    newSyntaxException(ERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS);
                }
                break;

            case CC_CC_OPEN: /* [ */
                CClassNode acc = parseCharClass();
                cc.or(acc, enc);
                break;

            case CC_AND:     /* && */
                if (arg.state == CCSTATE.VALUE) {
                    arg.v = 0; // ??? safe v ?
                    arg.vIsRaw = false;
                    cc.nextStateValue(arg, env);
                }
                /* initialize local variables */
                andStart = true;
                arg.state = CCSTATE.START;
                if (prevCC != null) {
                    prevCC.and(cc, enc);
                } else {
                    prevCC = cc;
                    if (workCC == null) workCC = new CClassNode();
                    cc = workCC;
                }
                cc.clear();
                break;

View Full Code Here

            switch(token.getPropCType()) {
            case CharacterType.D:
            case CharacterType.S:
            case CharacterType.W:
                if (Config.NON_UNICODE_SDW) {
                    CClassNode cc = new CClassNode();
                    cc.addCType(token.getPropCType(), false, env, this);
                    if (token.getPropNot()) cc.setNot();
                    node = cc;
                }
                break;

            case CharacterType.WORD:
                node = new CTypeNode(token.getPropCType(), token.getPropNot());
                break;

            case CharacterType.SPACE:
            case CharacterType.DIGIT:
            case CharacterType.XDIGIT:
                // #ifdef USE_SHARED_CCLASS_TABLE ... #endif
                CClassNode ccn = new CClassNode();
                ccn.addCType(token.getPropCType(), false, env, this);
                if (token.getPropNot()) ccn.setNot();
                node = ccn;
                break;

            default:
                newInternalException(ERR_PARSER_BUG);

            } // inner switch
            break;

        case CHAR_PROPERTY:
            node = parseCharProperty();
            break;

        case CC_CC_OPEN:
            CClassNode cc = parseCharClass();
            node = cc;
            if (isIgnoreCase(env.option)) {
                ApplyCaseFoldArg arg = new ApplyCaseFoldArg(env, cc);
                enc.applyAllCaseFold(env.caseFoldFlag, ApplyCaseFold.INSTANCE, arg);
View Full Code Here

TOP

Related Classes of org.joni.ast.CClassNode$CCStateArg

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.