Package org.pdf4j.saxon.event

Source Code of org.pdf4j.saxon.event.CharacterMapExpander

package org.pdf4j.saxon.event;
import org.pdf4j.saxon.charcode.UTF16;
import org.pdf4j.saxon.sort.IntHashMap;
import org.pdf4j.saxon.sort.IntIterator;
import org.pdf4j.saxon.tinytree.CompressedWhitespace;
import org.pdf4j.saxon.trans.XPathException;
import org.pdf4j.saxon.value.Whitespace;

import java.util.List;

* CharacterMapExpander: This ProxyReceiver expands characters occurring in a character map,
* as specified by the XSLT 2.0 xsl:character-map declaration
* @author Michael Kay

public class CharacterMapExpander extends ProxyReceiver {

    private IntHashMap charMap;
    private int min = Integer.MAX_VALUE;    // the lowest mapped character
    private int max = 0;                    // the highest mapped character
    private boolean mapsWhitespace = false;
    private boolean useNullMarkers = true;

     * Set the character maps to be used by this CharacterMapExpander.
     * They are merged into a single character map if there is more than one.

    public void setCharacterMaps(List maps) {
            // merge the character maps, allowing definitions in a later map
            // to overwrite definitions in an earlier map. (Note, we don't really
            // need to do this if there is only one map, but we want to scan the keys
            // anyway to extract the mimimum and maximum mapped characters.)

        charMap = new IntHashMap(64);
        for (int i = 0; i < maps.size(); i++) {
            IntHashMap hashMap = (IntHashMap)maps.get(i);
            IntIterator keys = hashMap.keyIterator();
            while (keys.hasNext()) {
                int next =;
                if (next < min) {
                    min = next;
                if (next > max) {
                    max = next;
                if (!mapsWhitespace && Whitespace.isWhitespace(next)) {
                    mapsWhitespace = true;
                charMap.put(next, hashMap.get(next));
        if (min > 0xD800) {
            // if all the mapped characters are above the BMP, we need to check
            // surrogates
            min = 0xD800;

     * Indicate whether the result of character mapping should be marked using NUL
     * characters to prevent subsequent XML or HTML character escaping

    public void setUseNullMarkers(boolean use) {
        useNullMarkers = use;

     * Output an attribute

    public void attribute(int nameCode, int typeCode, CharSequence value, int locationId, int properties)
            throws XPathException {
        if ((properties & ReceiverOptions.DISABLE_CHARACTER_MAPS) == 0) {
            CharSequence mapped = map(value, useNullMarkers);
            if (mapped == value) {
                // no mapping was done
                nextReceiver.attribute(nameCode, typeCode, value, locationId, properties);
            } else {
                nextReceiver.attribute(nameCode, typeCode, mapped,
                        (properties | ReceiverOptions.USE_NULL_MARKERS) & ~ReceiverOptions.NO_SPECIAL_CHARS);
        } else {
            nextReceiver.attribute(nameCode, typeCode, value, locationId, properties);

    * Output character data

    public void characters(CharSequence chars, int locationId, int properties) throws XPathException {

        if ((properties & ReceiverOptions.DISABLE_ESCAPING) == 0) {
            CharSequence mapped = map(chars, useNullMarkers);
            if (mapped != chars) {
                properties = (properties | ReceiverOptions.USE_NULL_MARKERS) & ~ReceiverOptions.NO_SPECIAL_CHARS;
            nextReceiver.characters(mapped, locationId, properties);
        } else {
            // if the user requests disable-output-escaping, this overrides the character
            // mapping
            nextReceiver.characters(chars, locationId, properties);


     * Perform the character mappping
     * @param in the input string to be mapped
     * @param insertNulls true if null (0) characters are to be inserted before
     * and after replacement characters. This is done to signal
     * that output escaping of these characters is disabled. The flag is set to true when writing
     * XML or HTML, but to false when writing TEXT.

    private CharSequence map(CharSequence in, boolean insertNulls) {

        if ((!mapsWhitespace) && in instanceof CompressedWhitespace) {
            return in;

        // First scan the string to see if there are any possible mapped
        // characters; if not, don't bother creating the new buffer

        boolean move = false;
        for (int i=0; i<in.length();) {
            char c = in.charAt(i++);
            if (c >= min && c <= max) {
                move = true;
        if (!move) {
            return in;

        FastStringBuffer buffer = new FastStringBuffer(in.length()*2);
        int i = 0;
        while(i < in.length()) {
            char c = in.charAt(i++);
            if (c >= min && c <= max) {
                if (UTF16.isHighSurrogate(c)) {
                    // assume the string is properly formed
                    char d = in.charAt(i++);
                    int s = UTF16.combinePair(c, d);
                    String rep = (String)charMap.get(s);
                    if (rep == null) {
                    } else {
                        if (insertNulls) {
                        } else {
                } else {
                    String rep = (String)charMap.get(c);
                    if (rep == null) {
                    } else {
                        if (insertNulls) {
                        } else {
            } else {
        return buffer;


// The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
// you may not use this file except in compliance with the License. You may obtain a copy of the
// License at
// Software distributed under the License is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the License for the specific language governing rights and limitations under the License.
// The Original Code is: all this file.
// The Initial Developer of the Original Code is Michael H. Kay
// Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
// Contributor(s): none.

Related Classes of org.pdf4j.saxon.event.CharacterMapExpander

Copyright © 2018 All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact