Package org.exist.xquery.functions.fn

Source Code of org.exist.xquery.functions.fn.FunAnalyzeString$GroupPosition

package org.exist.xquery.functions.fn;

import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.exist.dom.QName;
import org.exist.memtree.MemTreeBuilder;
import org.exist.xquery.BasicFunction;
import org.exist.xquery.Cardinality;
import org.exist.xquery.Function;
import org.exist.xquery.FunctionSignature;
import org.exist.xquery.XPathException;
import org.exist.xquery.XQueryContext;
import org.exist.xquery.value.FunctionParameterSequenceType;
import org.exist.xquery.value.FunctionReturnSequenceType;
import org.exist.xquery.value.NodeValue;
import org.exist.xquery.value.Sequence;
import org.exist.xquery.value.SequenceType;
import org.exist.xquery.value.Type;
import org.xml.sax.helpers.AttributesImpl;
/**
* XPath and XQuery 3.0 F+O fn:analyze-string()
*
* @author Adam Retter <adam@exist-db.org>
* @serial 201211101626
*
* Corrections were made by to the previous buggy version
* by taking inspiration from the BaseX 7.3 version.
*/

public class FunAnalyzeString extends BasicFunction {

    private final static QName fnAnalyzeString = new QName("analyze-string", Function.BUILTIN_FUNCTION_NS);

    private final static QName QN_MATCH = new QName("match", Function.BUILTIN_FUNCTION_NS);
    private final static QName QN_GROUP = new QName("group", Function.BUILTIN_FUNCTION_NS);
    private final static QName QN_NR = new QName("nr");
    private final static QName QN_NON_MATCH = new QName("non-match", Function.BUILTIN_FUNCTION_NS);
   
    public final static FunctionSignature signatures[] = {
        new FunctionSignature(
            fnAnalyzeString,
            "Analyzes a string using a regular expression, returning an XML " +
            "structure that identifies which parts of the input string matched " +
            "or failed to match the regular expression, and in the case of " +
            "matched substrings, which substrings matched each " +
            "capturing group in the regular expression.",
            new SequenceType[] {
                new FunctionParameterSequenceType("input", Type.STRING,
                    Cardinality.ZERO_OR_ONE, "The input string"),
                new FunctionParameterSequenceType("pattern", Type.STRING,
                    Cardinality.EXACTLY_ONE, "The pattern")
            },
            new FunctionReturnSequenceType(Type.ELEMENT,
                Cardinality.EXACTLY_ONE, "The result of the analysis")
        ),
        new FunctionSignature(
            fnAnalyzeString,
            "Analyzes a string using a regular expression, returning an XML " +
            "structure that identifies which parts of the input string matched " +
            "or failed to match the regular expression, and in the case of " +
            "matched substrings, which substrings matched each " +
            "capturing group in the regular expression.",
            new SequenceType[] {
                new FunctionParameterSequenceType("input", Type.STRING,
                    Cardinality.ZERO_OR_ONE, "The input string"),
                new FunctionParameterSequenceType("pattern", Type.STRING,
                    Cardinality.EXACTLY_ONE, "The pattern"),
                new FunctionParameterSequenceType("flags", Type.STRING,
                    Cardinality.EXACTLY_ONE, "Flags"),
            },
            new FunctionReturnSequenceType(Type.ELEMENT,
                Cardinality.EXACTLY_ONE, "The result of the analysis")
        )
    };

    public FunAnalyzeString(final XQueryContext context, final FunctionSignature signature) {
        super(context, signature);
    }

    @Override
    public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException {
        final MemTreeBuilder builder = new MemTreeBuilder(context);
        builder.startDocument();
        builder.startElement(new QName("analyze-string-result", Function.BUILTIN_FUNCTION_NS), null);
        String input = "";
        if (!args[0].isEmpty()) {
            input = args[0].itemAt(0).getStringValue();
        }
        if (!"".equals(input)) {
            final String pattern = args[1].itemAt(0).getStringValue();
            String flags = null;
            if(args.length == 3) {
                flags = args[2].itemAt(0).getStringValue();
            }
            analyzeString(builder, input, pattern, flags);
        }
        builder.endElement();
        builder.endDocument();
        return (NodeValue)builder.getDocument().getDocumentElement();
    }

    private void analyzeString(final MemTreeBuilder builder, final String input, final String pattern, final String flags) throws XPathException {
        final Pattern ptn;
        if (flags != null) {
            final int iFlags = parseStringFlags(flags);
            ptn = Pattern.compile(pattern, iFlags);
        } else {
            ptn = Pattern.compile(pattern);
        }
       
        final Matcher matcher = ptn.matcher(input);
       
        int offset = 0;
        while(matcher.find()) {
            if(matcher.start() != offset) {
                nonMatch(builder, input.substring(offset, matcher.start()));
            }
            match(builder, matcher, input, 0);
           
            offset = matcher.end();
        }
       
        if(offset != input.length()) {
            nonMatch(builder, input.substring(offset));
        }
    }
   
    private class GroupPosition {

        public GroupPosition(final int groupNumber, final int position) {
            this.groupNumber = groupNumber;
            this.position = position;
        }
       
        public int groupNumber;
        public int position;
    }
   
    private GroupPosition match(final MemTreeBuilder builder, final Matcher matcher, final String input, final int group) {
        if(group == 0) {
            builder.startElement(QN_MATCH, null);
        } else {
            final AttributesImpl attributes = new AttributesImpl();
            attributes.addAttribute("", QN_NR.getLocalName(), QN_NR.getLocalName(), "int", Integer.toString(group));
            builder.startElement(QN_GROUP, attributes);
        }
       
        final int groupStart = matcher.start(group);
        final int groupEnd = matcher.end(group);
        final int groupCount = matcher.groupCount();
       
        GroupPosition groupAndPosition = new GroupPosition(group + 1, groupStart);
        while(groupAndPosition.groupNumber <= groupCount && matcher.end(groupAndPosition.groupNumber) <= groupEnd) {
            final int start = matcher.start(groupAndPosition.groupNumber);
            if(start >= 0) { //group matched
                if(groupAndPosition.position < start) {
                    builder.characters(input.substring(groupAndPosition.position, start));
                }
                groupAndPosition = match(builder, matcher, input, groupAndPosition.groupNumber);
            } else {
                groupAndPosition.groupNumber++; //skip to next group
            }
        }
       
        if(groupAndPosition.position < groupEnd) {
            builder.characters(input.substring(groupAndPosition.position, groupEnd));
            groupAndPosition.position = groupEnd;
        }
       
        builder.endElement();
       
        return groupAndPosition;
    }

    private void nonMatch(final MemTreeBuilder builder, final String nonMatch) {
        builder.startElement(QN_NON_MATCH, null);
        builder.characters(nonMatch);
        builder.endElement();
    }

    private int parseStringFlags(final String flags) {
        int iFlags = 0;
        for (final char c : flags.toCharArray()) {
            switch(c) {
            case 's':
                iFlags |= Pattern.DOTALL;
                break;
           
            case 'm':
                iFlags |= Pattern.MULTILINE;
                break;
               
            case 'i':
                iFlags |= Pattern.CASE_INSENSITIVE;
                break;
               
            case 'x' :
                iFlags |= Pattern.CANON_EQ;
                break;
               
            case 'q' :
                iFlags |= Pattern.LITERAL;
                break;
            }
        }
        return iFlags;
    }
}
TOP

Related Classes of org.exist.xquery.functions.fn.FunAnalyzeString$GroupPosition

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.