Package client.net.sf.saxon.ce.regex

Source Code of client.net.sf.saxon.ce.regex.UnicodeBlocks

package client.net.sf.saxon.ce.regex;

import client.net.sf.saxon.ce.Configuration;
import client.net.sf.saxon.ce.expr.z.IntBlockSet;
import client.net.sf.saxon.ce.expr.z.IntSet;
import client.net.sf.saxon.ce.om.Axis;
import client.net.sf.saxon.ce.om.DocumentInfo;
import client.net.sf.saxon.ce.om.NodeInfo;
import client.net.sf.saxon.ce.pattern.NameTest;
import client.net.sf.saxon.ce.pattern.NodeKindTest;
import client.net.sf.saxon.ce.trans.XPathException;
import client.net.sf.saxon.ce.tree.iter.AxisIterator;
import client.net.sf.saxon.ce.tree.util.FastStringBuffer;
import client.net.sf.saxon.ce.tree.util.Navigator;
import client.net.sf.saxon.ce.type.Type;

import java.util.HashMap;
import java.util.Map;


/**
* This class provides knowledge of the names and contents of Unicode character blocks,
* as referenced using the \p{IsXXXXX} construct in a regular expression. The underlying
* data is in an XML resource file UnicodeBlocks.xml, which is accessed from the server
* the first time it is needed.
*/
public class UnicodeBlocks {

    private static Map<String, IntSet> blocks = null;

    public static IntSet getBlock(String name) throws RESyntaxException {
        if (blocks == null) {
            readBlocks(new Configuration());
        }
        IntSet cc = blocks.get(name);
        if (cc != null) {
            return cc;
        }
        cc = blocks.get(normalizeBlockName(name));
        return cc;
    }

    private static String normalizeBlockName(String name) {
        FastStringBuffer fsb = new FastStringBuffer(name.length());
        for (int i=0; i<name.length(); i++) {
            final char c = name.charAt(i);
            switch (c) {
                case ' ': case '\t': case '\r': case '\n': case '_':
                    // no action
                    break;
                default:
                    fsb.append(c);
            }
        }
        return fsb.toString();
    }

    private synchronized static void readBlocks(Configuration config) throws RESyntaxException {
        blocks = new HashMap<String, IntSet>(250);
        DocumentInfo doc;
        try {
            doc = config.buildDocument("unicodeBlocks.xml");
        } catch (XPathException err) {
            throw new RESyntaxException("Unable to read unicodeBlocks.xml file");
        }


        AxisIterator iter = doc.iterateAxis(Axis.DESCENDANT, new NameTest(Type.ELEMENT, "", "block", config.getNamePool()));
        while (true) {
            NodeInfo item = (NodeInfo)iter.next();
            if (item == null) {
                break;
            }
            String blockName = normalizeBlockName(Navigator.getAttributeValue(item, "", "name"));
            IntSet range = null;
            AxisIterator ranges = item.iterateAxis(Axis.CHILD, NodeKindTest.ELEMENT);
            while (true) {
                NodeInfo rangeElement = (NodeInfo)ranges.next();
                if (rangeElement == null) {
                    break;
                }
                int from = Integer.parseInt(Navigator.getAttributeValue(rangeElement, "", "from").substring(2), 16);
                int to = Integer.parseInt(Navigator.getAttributeValue(rangeElement, "", "to").substring(2), 16);
                IntSet cr = new IntBlockSet(from, to);
                if (range == null) {
                    range = cr;
                } else if (range instanceof IntBlockSet) {
                    range = range.mutableCopy().union(cr);
                } else {
                    range = range.union(cr);
                }
            }
            blocks.put(blockName, range);
        }

    }
}

// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is “Incompatible With Secondary Licenses”, as defined by the Mozilla Public License, v. 2.0.
TOP

Related Classes of client.net.sf.saxon.ce.regex.UnicodeBlocks

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.