Package br.com.starcode.parccser

Source Code of br.com.starcode.parccser.Parser

package br.com.starcode.parccser;

import java.util.ArrayList;
import java.util.List;

import br.com.starcode.parccser.model.AttributeOperator;
import br.com.starcode.parccser.model.AttributeSelector;
import br.com.starcode.parccser.model.ClassSelector;
import br.com.starcode.parccser.model.Combinator;
import br.com.starcode.parccser.model.Context;
import br.com.starcode.parccser.model.HashSelector;
import br.com.starcode.parccser.model.NegationSelector;
import br.com.starcode.parccser.model.PseudoExpression;
import br.com.starcode.parccser.model.PseudoSelector;
import br.com.starcode.parccser.model.Selector;
import br.com.starcode.parccser.model.SimpleSelector;
import br.com.starcode.parccser.model.SimpleSelectorSequence;
import br.com.starcode.parccser.model.TypeSelector;
import br.com.starcode.parccser.model.PseudoExpression.Item;
import br.com.starcode.parccser.model.PseudoExpression.Type;
import br.com.starcode.parccser.model.PseudoSelector.PseudoType;

/**
* That's the main class for parsing CSS selectors.
*
* Selector list:
* http://www.w3.org/TR/css3-selectors/#selectors
*/
public class Parser {

    protected String content;
    protected int pos;
    protected int len;
    protected Character current;
    protected ParserListener parserListener;
   
    public Parser(String selector, ParserListener parserListener) {
        if (selector == null || selector.trim().isEmpty()) {
            throw new IllegalArgumentException("Selector cannot be null or empty!");
        }
        if (parserListener == null) {
            throw new IllegalArgumentException("ParserListener cannot be null!");
        }
        this.parserListener = parserListener;
        this.content = selector;
        this.pos = -1;
        this.len = selector.length();
    }
   
    /**
     * Starts parsing the selector
     * @throws ParserException
     */
    public void interpret() throws ParserException {
        next();
        groups();
    }
   
    protected Character next() {
        if (pos < len) {
            pos++;
        }
        if (pos < len) {
            return current = content.charAt(pos);
        }
        return current = null;
    }
   
    protected boolean end() {
        return pos >= len;
    }
   
    /**
     * selectors_group
     *  : selector [ COMMA S* selector ]*
     */
    protected List<Selector> groups() throws ParserException {

        List<Selector> groups = new ArrayList<Selector>();
        int groupNumber = 0;
        while (!end()) {
            ignoreWhitespaces();
            parserListener.beginGroup(groupNumber, pos);
            Selector selector = selector();
            groups.add(selector);
            parserListener.endGroup(selector);
            groupNumber++;
            ignoreWhitespaces();
            if (!end() && current != ',') {
                throw new ParserException("There is something left in the selector at position " + pos);
            }
            next();
        }
        return groups;
       
    }
   
    /**
     * selector
     *  : simple_selector_sequence [ combinator simple_selector_sequence ]*
     *  ;
     * 
     *  combinator
     *  : PLUS S* | GREATER S* | TILDE S* | S+
     */
    protected Selector selector() throws ParserException {
        int initialPosition = pos;
        StringBuilder sb = new StringBuilder();
        List<SimpleSelectorSequence> simpleSelectors = new ArrayList<SimpleSelectorSequence>();
        List<Combinator> combinators = new ArrayList<Combinator>();
        while (!end()) {
          //finds combinator, but not in the first iteration
          Combinator combinator = null;
          if (!simpleSelectors.isEmpty()) {
            //stores if it has spaces until the next token
                boolean hasWhitespace = false;
                if (!end() && Character.isWhitespace(current)) {
                    hasWhitespace = true;
                    ignoreWhitespaces();
                }
                if (!end()) {
                    //implements "combinator" rule
                    if (current == '+') combinator = Combinator.ADJASCENT_SIBLING;
                    else if (current == '>') combinator = Combinator.CHILD;
                    else if (current == '~') combinator = Combinator.GENERAL_SIBLING;
                    //if hasn't any but spaces
                    else if (hasWhitespace) combinator = Combinator.DESCENDANT;
                    //is it the end?
                    if (combinator == null || current == ',') {
                        break;
                    }
                    sb.append(current);
                    //don't advance because spaces were just advanced
                    if (combinator != Combinator.DESCENDANT) {
                        next();
                    }
                    ignoreWhitespaces();
                    if (end()) {
                        throw new ParserException("Unexpected end of selector at position " + pos);
                    }
                }
            combinators.add(combinator);
          }
          //get next sequence
            SimpleSelectorSequence simpleSelectorSequence = simpleSelectorSequence();
            sb.append(simpleSelectorSequence);
            //sends combinator here (the first case it's null)
            simpleSelectors.add(simpleSelectorSequence);
            parserListener.selectorSequence(simpleSelectorSequence, combinator);
        }
        return new Selector(simpleSelectors, combinators, new Context(content, sb.toString(), initialPosition, pos));
    }
   
    /**
     * simple_selector_sequence
     *  : [ type_selector | universal ]
     *    [ HASH | class | attrib | pseudo | negation ]*
     *  | [ HASH | class | attrib | pseudo | negation ]+
     * 
     *  type_selector
     *  : [ namespace_prefix ]? element_name
     *
     * namespace_prefix ** not implemented
     *  : [ IDENT | '*' ]? '|'
     *
     * element_name
     *  : IDENT
     *
     * universal
     *  : [ namespace_prefix ]? '*'
     *
     * class
     *  : '.' IDENT
     */
    protected SimpleSelectorSequence simpleSelectorSequence() throws ParserException {
       
        List<SimpleSelector> simpleSelectorList = new ArrayList<SimpleSelector>();
        StringBuilder sb = new StringBuilder();
        boolean hasSimpleSelector = false;
        int initialPos = pos;
        if (current == '*') {
            //universal selector
            sb.append(current);
            hasSimpleSelector = true;
            TypeSelector ts = new TypeSelector("*", new Context(content, "*", initialPos, pos));
            parserListener.typeSelector(ts);
            simpleSelectorList.add(ts);
            next();
        } else if (Character.isLetter(current)) {
            //type selector
            String type = identifier();
            sb.append(type);
            hasSimpleSelector = true;
            TypeSelector ts = new TypeSelector(type, new Context(content, type, initialPos, pos));
            parserListener.typeSelector(ts);
            simpleSelectorList.add(ts);
        }
       
        int selectorCount = 0;
        boolean hasPseudoElement = false;
        while (!end()) {
          int initialLoopPos = pos;
            if (current == '.') {
                //class selector
                sb.append(current);
                next();
                String className = !end() ? identifier() : null;
                if (className == null || className.isEmpty()) {
                    throw new ParserException("Expected className at position " + pos);
                }
                sb.append(className);
                ClassSelector cs = new ClassSelector(className, new Context(content, "." + className, initialLoopPos, pos));
                simpleSelectorList.add(cs);
                parserListener.classSelector(cs);
            } else if (current == '#') {
                //hash selector
                sb.append(current);
                next();
                String id = !end() ? identifier() : null;
                if (id == null || id.isEmpty()) {
                    throw new ParserException("Expected id at position " + pos);
                }
                HashSelector hs = new HashSelector(id, new Context(content, "#" + id, initialLoopPos, pos));
                simpleSelectorList.add(hs);
                parserListener.idSelector(hs);
                sb.append(id);
            } else if (current == '[') {
                //attribute selectors
                sb.append(current);
                next();
                AttributeSelector as = attribute();
                simpleSelectorList.add(as);
                parserListener.attributeSelector(as);
                sb.append(as);
                sb.append(']');
            } else if (current == ':') {
                //pseudo-element, pseudo-class or negation
                sb.append(current);
                next();
                if (end()) {
                    throw new ParserException("Expected pseudo-element or pseudo-class at " + pos);
                }
                boolean doubleColon = false;
                if (current == ':') {
                    doubleColon = true;
                    sb.append(current);
                    next();
                }
                String ident = !end() ? identifier() : null;
                if (ident == null || ident.isEmpty()) {
                    throw new ParserException("Expected identifier at position " + pos);
                }
                boolean special = isPseudoSpecialCase(ident);
                if (special || doubleColon) {
                    //pseudo-element (double colon or special cases)
                    //allow just one
                    if (hasPseudoElement) {
                        throw new ParserException("Only one pseudo-element is allowed for each simple selector and a second one was found at position " + pos);
                    }
                    PseudoSelector ps = pseudo(ident, PseudoType.PseudoElement, doubleColon);
                    simpleSelectorList.add(ps);
                    parserListener.pseudoSelector(ps);
                    sb.append(ps);
                    hasPseudoElement = true;
                } else if ("not".equalsIgnoreCase(ident)) {
                    //negation
                    NegationSelector n = negation(selectorCount);
                    simpleSelectorList.add(n);
                    sb.append(ident);
                    sb.append(n);
                } else {
                    //pseudo-class
                    PseudoSelector ps = pseudo(ident, PseudoType.PseudoClass, false);
                    simpleSelectorList.add(ps);
                    parserListener.pseudoSelector(ps);
                    sb.append(ps);
                }
            } else {
                break;
            }
            selectorCount++;
        }
        if (!hasSimpleSelector && selectorCount == 0) {
            throw new ParserException("No real selector found at position " + pos);
        }
           
        return new SimpleSelectorSequence(simpleSelectorList, new Context(content, sb.toString(), initialPos, pos));
       
    }
   
    /**
     * attrib
     *  : '[' S* [ namespace_prefix ]? IDENT S*
     *        [ [ PREFIXMATCH |
     *            SUFFIXMATCH |
     *            SUBSTRINGMATCH |
     *            '=' |
     *            INCLUDES |
     *            DASHMATCH ] S* [ IDENT | STRING ] S*
     *        ]? ']'
     */
    protected AttributeSelector attribute() throws ParserException {
       
      int initialPos = pos;
        StringBuilder sb = new StringBuilder();
        ignoreWhitespaces();
        String name = !end() ? identifier() : null;
        if (name == null || name.isEmpty()) {
            throw new ParserException("Expected attribute name at position " + pos);
        }
        sb.append(name);
        ignoreWhitespaces();
        if (end()) {
            throw new ParserException("Unexpected end of selector selector at position " + pos);
        }
        AttributeOperator operator = null;
        if (current == '=') {
            operator = AttributeOperator.EQUALS;
            next();
        } else if (current != ']') {
            if (current == '~') operator = AttributeOperator.INCLUDES;
            else if (current == '|') operator = AttributeOperator.DASH_MATCH;
            else if (current == '^') operator = AttributeOperator.PREFIX_MATCH;
            else if (current == '$') operator = AttributeOperator.SUFFIX_MATCH;
            else if (current == '*') operator = AttributeOperator.SUBSTRING_MATCH;
            else throw new ParserException("Invalid operator ('" + current + "') at position " + pos);
            next();
            if (end() || current != '=') {
                throw new ParserException("Expected '=' sign at position " + pos);
            }
            next();
        }
       
        ignoreWhitespaces();
        if (end()) {
            throw new ParserException("Unexpected end of attribute selector at position " + pos);
        }
       
        //value
        String value = null;
        if (operator != null) {
            sb.append(operator.getSign());
            if (current == '\'' || current == '"') {
                char quote = current;
                value = string();
                sb.append(quote);
                sb.append(value);
                sb.append(quote);
            } else {
                value = identifier();
                sb.append(value);
            }
        }
       
        ignoreWhitespaces();
       
        //end of attribute
        if (end() || current != ']') {
            throw new ParserException("Token ']' expected at position " + pos);
        }
        int endPos = pos;
        next();
       
        return new AttributeSelector(name, operator, value, new Context(content, sb.toString(), initialPos, endPos));
       
    }

    /**
     * string    {string1}|{string2}
     * string1   \"([^\n\r\f\\"]|\\{nl}|{nonascii}|{escape})*\"
     * string2   \'([^\n\r\f\\']|\\{nl}|{nonascii}|{escape})*\'
     * nonascii  [^\0-\177]
     * escape    {unicode}|\\[^\n\r\f0-9a-f]
     * unicode   \\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?
     * nl        \n|\r\n|\r|\f
     */
    protected String string() throws ParserException {
        Character openQuote = current;
        boolean escape = false;
        next();
        StringBuilder sb = new StringBuilder();
        while (!end()) {
            if (escape) {
                //TODO implement UNICODE
                if (current == openQuote || current == '\\') {
                    sb.append(current);
                    escape = false;
                } else {
                    throw new ParserException("Invalid escape character at position " + pos);
                }
            } else {
                if (current == '\\') {
                    escape = true;
                } else {
                    sb.append(current);
                }
            }
            next();
            if (!escape && current == openQuote) break;
        }
       
        if (current != openQuote) {
            throw new ParserException("String not closed!");
        }
        next();
        return sb.toString();
       
    }
   
    /**
     * num       [0-9]+|[0-9]*\.[0-9]+
     */
    protected String number() throws ParserException {
        StringBuilder sb = new StringBuilder();
        sb.append(current);
        next();
        boolean hasDot = false;
        while (!end() && (Character.isDigit(current) || current == '.')) {
            if (current == '.') {
                if (hasDot) {
                    throw new ParserException("Unexpected '.' at position " + pos);
                }
                hasDot = true;
            }
            sb.append(current);
            next();
        }
        return sb.toString();
    }

    /**
     * negation
     *  : NOT S* negation_arg S* ')'
     *
     * negation_arg
     *  : type_selector | universal | HASH | class | attrib | pseudo
     * 
     *  TODO include "not" or not? It would be necessary to use parameters
     */
    protected NegationSelector negation(int number) throws ParserException {
       
        StringBuilder sb = new StringBuilder();
        ignoreWhitespaces();
        if (end() || current != '(') {
            throw new ParserException("Expected '(' at position " + pos);
        }
        sb.append(current);
        int parenthesisPos = pos;
        next();
        ignoreWhitespaces();
        if (end()) {
            throw new ParserException("Selector expected at position " + pos);
        }
        int initialPos = pos;
        SimpleSelector simpleSelector = null;
        if (current == '*') {
            //universal selector
            sb.append(current);
            TypeSelector ts = new TypeSelector("*", new Context(content, "*", initialPos, pos + 1));
            parserListener.negationTypeSelector(ts);
            simpleSelector = ts;
            next();
        } else if (Character.isLetter(current)) {
            //type selector
            String type = identifier();
            sb.append(type);
            TypeSelector ts = new TypeSelector(type, new Context(content, type, initialPos, pos));
            parserListener.negationTypeSelector(ts);
            simpleSelector = ts;
        } else if (current == '.') {
            //class selector
            sb.append(current);
            next();
            String className = !end() ? identifier() : null;
            if (className == null || className.isEmpty()) {
                throw new ParserException("Expected className at position " + pos);
            }
            ClassSelector cs = new ClassSelector(className, new Context(content, "." + className, initialPos, pos));
            simpleSelector = cs;
            parserListener.negationClassSelector(cs);
            sb.append(className);
        } else if (current == '#') {
            //hash selector
            sb.append(current);
            next();
            String id = !end() ? identifier() : null;
            if (id == null || id.isEmpty()) {
                throw new ParserException("Expected id at position " + pos);
            }
            HashSelector hs = new HashSelector(id, new Context(content, "#" + id, initialPos, pos));
            simpleSelector = hs;
            parserListener.negationIdSelector(hs);
            sb.append(id);
        } else if (current == '[') {
            //attribute selectors
            sb.append(current);
            next();
            AttributeSelector as = attribute();
            simpleSelector = as;
            parserListener.negationAttributeSelector(as);
            sb.append(as.getContext());
            sb.append(']');
        } else if (current == ':') {
            //pseudo-class only
            sb.append(current);
            next();
            if (end()) {
                throw new ParserException("Expected pseudo-element or pseudo-class at " + pos);
            }
            boolean doubleColon = false;
            if (current == ':') {
                doubleColon = true;
                sb.append(current);
                next();
            }
            String ident = !end() ? identifier() : null;
            if (ident == null || ident.isEmpty()) {
                throw new ParserException("Expected identifier at position " + pos);
            }
            if ("not".equalsIgnoreCase(ident) || isPseudoSpecialCase(ident) || doubleColon) {
                throw new ParserException("Expected pseudo-class (starting with ':', except ':not', ':first-line', ':first-letter', ':before' and ':after') at position " + pos);
            }
            PseudoSelector ps = pseudo(ident, PseudoType.PseudoClass, doubleColon);
            simpleSelector = ps;
            parserListener.negationPseudoSelector(ps);
            sb.append(ps.getContext());
        } else {
            throw new ParserException("Selector expected at position " + pos);
        }
       
        ignoreWhitespaces();
        if (end() || current != ')') {
            throw new ParserException("Expected ')' at position " + pos);
        }
        sb.append(current);
        next();
           
        return new NegationSelector(simpleSelector, new Context(content, sb.toString(), parenthesisPos, pos));
       
    }

    /**
     * pseudo
     *  // '::' starts a pseudo-element, ':' a pseudo-class
     *  // Exceptions: :first-line, :first-letter, :before and :after.
     *  // Note that pseudo-elements are restricted to one per selector and
     *  // occur only in the last simple_selector_sequence.
     *  : ':' ':'? [ IDENT | functional_pseudo ]
     *  ;
     * 
     * functional_pseudo
     *  : FUNCTION S* expression ')'
     */
    protected PseudoSelector pseudo(String ident, PseudoType type, boolean doubleColon) throws ParserException {
       
      int initialPost = pos;
        StringBuilder sb = new StringBuilder();
        sb.append(ident);
        PseudoExpression expression = null;
        if (!end() && current == '(') {
            sb.append(current);
            next();
            ignoreWhitespaces();
            if (end()) {
                throw new ParserException("Expected expression at position " + pos);
            }
           
            //expression
            expression = expression();
            sb.append(expression.getContext());
           
            //close parenthesis
            ignoreWhitespaces();
            if (end() || current != ')') {
                throw new ParserException("Expected ')' at position " + pos);
            }
            sb.append(current);
            next();
        }
        return new PseudoSelector(ident, type, doubleColon, expression, new Context(content, sb.toString(), initialPost, pos));
       
    }
   
    /**
     * expression
     *  // In CSS3, the expressions are identifiers, strings,
     *  // or of the form "an+b"
     *  : [ [ PLUS | '-' | DIMENSION | NUMBER | STRING | IDENT ] S* ]+
     * 
     *  {num}{ident}     return DIMENSION;
     *  {num}            return NUMBER;
     */
    protected PseudoExpression expression() throws ParserException {
       
        List<Item> items = new ArrayList<Item>();
        StringBuilder sb = new StringBuilder();
        while (!end()) {
           
            if (current == '+') {
                items.add(new Item(Type.Signal, "+"));
                sb.append(current);
                next();
            } else if (current == '-') {
                items.add(new Item(Type.Signal, "-"));
                sb.append(current);
                next();
            } else if (Character.isLetter(current)) {
                //identifier
                String ident = identifier();
                sb.append(ident);
                items.add(new Item(Type.Identifier, ident));
            } else if (Character.isDigit(current)) {
                //number or dimension
                String number = number();
                sb.append(number);
                if (!end() && Character.isLetter(current)) {
                    String ident = identifier();
                    sb.append(ident);
                    items.add(new Item(Type.Dimension, number + ident));
                } else {
                    items.add(new Item(Type.Number, number));
                }
            } else if (current == '\'' || current == '"') {
              Character quote = current;
                String s = string();
                sb.append(quote);
                sb.append(s);
                sb.append(quote);
                items.add(new Item(Type.StringType, s));
            } else {
                break;
            }
            ignoreWhitespaces();
           
        }
        return new PseudoExpression(items, sb.toString());
       
    }
   
    /**
     * ident     [-]?{nmstart}{nmchar}*
     * name      {nmchar}+
     * nmstart   [_a-z]|{nonascii}|{escape}
     * nonascii  [^\0-\177]
     * unicode   \\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?
     * escape    {unicode}|\\[^\n\r\f0-9a-f]
     * nmchar    [_a-z0-9-]|{nonascii}|{escape}
     * num       [0-9]+|[0-9]*\.[0-9]+
     */
    protected String identifier() throws ParserException {
        //TODO implement unicode, escape, [-], nonascii
        StringBuilder sb = new StringBuilder();
       
        //validates first character
        if (!end() && !Character.isLetter(current) && current != '_') {
            return "";
        }
        while (!end() && (Character.isLetterOrDigit(current) || current == '-' || current == '_')) {
            sb.append(current);
            next();
        }
        return sb.toString();
    }
   
    /**
     * pseudo (Special cases)
     *  // '::' starts a pseudo-element, ':' a pseudo-class
     *  // Exceptions: :first-line, :first-letter, :before and :after.
     */
    protected boolean isPseudoSpecialCase(String identifier) {
        return "first-line".equalsIgnoreCase(identifier) ||
                "first-letter".equalsIgnoreCase(identifier) ||
                "before".equalsIgnoreCase(identifier) ||
                "after".equalsIgnoreCase(identifier);
    }

    protected void ignoreWhitespaces() throws ParserException {
        if (!end() && Character.isWhitespace(current)) {
            while (!end() && Character.isWhitespace(current)) {
                next();
            }
        }
    }
   
}
TOP

Related Classes of br.com.starcode.parccser.Parser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.