Package org.apache.pdfbox.pdmodel.font

Source Code of org.apache.pdfbox.pdmodel.font.PDSimpleFont

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pdfbox.pdmodel.font;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.font.encoding.DictionaryEncoding;
import org.apache.pdfbox.pdmodel.font.encoding.Encoding;
import org.apache.pdfbox.pdmodel.font.encoding.GlyphList;
import org.apache.pdfbox.pdmodel.font.encoding.MacRomanEncoding;
import org.apache.pdfbox.pdmodel.font.encoding.StandardEncoding;
import org.apache.pdfbox.pdmodel.font.encoding.WinAnsiEncoding;

import java.io.IOException;
import java.util.HashSet;
import java.util.Set;

/**
* A simple font. Simple fonts use a PostScript encoding vector.
*
* @author John Hewson
*/
public abstract class PDSimpleFont extends PDFont
{
    private static final Log LOG = LogFactory.getLog(PDSimpleFont.class);

    protected Encoding encoding;
    protected GlyphList glyphList;
    private final Set<Integer> noUnicode = new HashSet<Integer>(); // for logging

    /**
     * Constructor for embedding.
     */
    PDSimpleFont()
    {
        super();
    }

    /**
     * Constructor for Standard 14.
     */
    PDSimpleFont(String baseFont)
    {
        super(baseFont);
    }

    /**
     * Constructor.
     *
     * @param fontDictionary Font dictionary.
     */
    PDSimpleFont(COSDictionary fontDictionary) throws IOException
    {
        super(fontDictionary);
    }

    /**
     * Reads the Encoding from the Font dictionary or the embedded or substituted font file.
     * Must be called at the end of any subclass constructors.
     *
     * @throws IOException if the font file could not be read
     */
    protected final void readEncoding() throws IOException
    {
        COSBase encoding = dict.getDictionaryObject(COSName.ENCODING);
        if (encoding != null)
        {
            if (encoding instanceof COSName)
            {
                COSName encodingName = (COSName)encoding;
                this.encoding = Encoding.getInstance(encodingName);
                if (this.encoding == null)
                {
                    LOG.warn("Unknown encoding: " + encodingName.getName());
                    this.encoding = readEncodingFromFont(); // fallback
                }
            }
            else if (encoding instanceof COSDictionary)
            {
                COSDictionary encodingDict = (COSDictionary)encoding;
                Encoding builtIn = null;
                if (!encodingDict.containsKey(COSName.BASE_ENCODING) && isSymbolic())
                {
                    builtIn = readEncodingFromFont();
                }
                Boolean symbolic = getSymbolicFlag();
                if (symbolic == null)
                {
                    symbolic = builtIn != null;
                }

                if (builtIn == null && !encodingDict.containsKey(COSName.BASE_ENCODING) && symbolic)
                {
                    // TTF built-in encoding is handled by PDTrueTypeFont#codeToGID
                    this.encoding = null;
                }
                else
                {
                    this.encoding = new DictionaryEncoding(encodingDict, !symbolic, builtIn);
                }
            }
        }
        else
        {
            this.encoding = readEncodingFromFont();
        }

        // TTFs may have null encoding, but if it's non-symbolic then we have Standard Encoding
        if (this.encoding == null && getSymbolicFlag() != null && !getSymbolicFlag())
        {
            this.encoding = StandardEncoding.INSTANCE;
        }

        // TTFs may have null encoding, but if it's standard 14 then we know it's Standard Encoding
        if (this.encoding == null && isStandard14() &&
                !getName().equals("Symbol") &&
                !getName().equals("ZapfDingbats"))
        {
            this.encoding = StandardEncoding.INSTANCE;
        }
        // todo: what about Symbol and ZapfDingbats?

        // assign the glyph list based on the font
        if ("ZapfDingbats".equals(getName()))
        {
            glyphList = GlyphList.getZapfDingbats();
        }
        else
        {
            glyphList = GlyphList.getAdobeGlyphList();
        }
    }

    /**
     * Called by readEncoding() if the encoding needs to be extracted from the font file.
     *
     * @throws IOException if the font file could not be read
     */
    protected abstract Encoding readEncodingFromFont() throws IOException;

    /**
     * Returns the Encoding vector.
     */
    public Encoding getEncoding()
    {
        return encoding;
    }

    /**
     * Returns the Encoding vector.
     */
    public GlyphList getGlyphList()
    {
        return glyphList;
    }

    @Override
    protected Boolean isFontSymbolic()
    {
        Boolean result = getSymbolicFlag();
        if (result != null)
        {
            return result;
        }
        else if (isStandard14())
        {
            return getName().equals("Symbol") || getName().equals("ZapfDingbats");
        }
        else
        {
            if (encoding == null)
            {
                // sanity check, should never happen
                if (!(this instanceof PDTrueTypeFont))
                {
                    throw new IllegalStateException("PDFBox bug: encoding should not be null!");
                }

                // TTF without its non-symbolic flag set must be symbolic
                return true;
            }
            else if (encoding instanceof WinAnsiEncoding ||
                     encoding instanceof MacRomanEncoding ||
                     encoding instanceof StandardEncoding)
            {
                return false;
            }
            else if (encoding instanceof DictionaryEncoding)
            {
                // each name in Differences array must also be in the latin character set
                for (String name : ((DictionaryEncoding)encoding).getDifferences().values())
                {
                    if (name.equals(".notdef"))
                    {
                        // skip
                    }
                    else if (!(WinAnsiEncoding.INSTANCE.contains(name) &&
                               MacRomanEncoding.INSTANCE.contains(name) &&
                               StandardEncoding.INSTANCE.contains(name)))
                    {
                        return true;
                    }

                }
                return false;
            }
            else
            {
                // we don't know
                return null;
            }
        }
    }

    @Override
    public String toUnicode(int code) throws IOException
    {
        return toUnicode(code, GlyphList.getAdobeGlyphList());
    }

    @Override
    public String toUnicode(int code, GlyphList customGlyphList) throws IOException
    {
        // allow the glyph list to be overridden for the purpose of extracting Unicode
        // we only do this when the font's glyph list is the AGL, to avoid breaking Zapf Dingbats
        GlyphList unicodeGlyphList;
        if (this.glyphList == GlyphList.getAdobeGlyphList())
        {
            unicodeGlyphList = customGlyphList;
        }
        else
        {
            unicodeGlyphList = this.glyphList;
        }

        // first try to use a ToUnicode CMap
        String unicode = super.toUnicode(code);
        if (unicode != null)
        {
            return unicode;
        }

        // if the font is a "simple font" and uses MacRoman/MacExpert/WinAnsi[Encoding]
        // or has Differences with names from only Adobe Standard and/or Symbol, then:
        //
        //    a) Map the character codes to names
        //    b) Look up the name in the Adobe Glyph List to obtain the Unicode value

        String name = null;
        if (encoding != null)
        {
            name = encoding.getName(code);
            unicode = unicodeGlyphList.toUnicode(name);
            if (unicode != null)
            {
                return unicode;
            }
        }

        // if no value has been produced, there is no way to obtain Unicode for the character.
        if (LOG.isWarnEnabled() && !noUnicode.contains(code))
        {
            // we keep track of which warnings have been issued, so we don't log multiple times
            noUnicode.add(code);
            if (name != null)
            {
                LOG.warn("No Unicode mapping for " + name + " (" + code + ") in font " +
                        getName());
            }
            else
            {
                LOG.warn("No Unicode mapping for character code " + code + " in font " +
                        getName());
            }
        }

        return null;
    }

    @Override
    public boolean isVertical()
    {
        return false;
    }

    /**
     * Returns the glyph width from the AFM if this is a Standard 14 font.
     * @param code character code
     * @return width in 1/1000 text space
     */
    protected final float getStandard14Width(int code)
    {
        if (getStandard14AFM() != null)
        {
            String nameInAFM = getEncoding().getName(code);

            // the Adobe AFMs don't include .notdef, but Acrobat uses 250, test with PDFBOX-2334
            if (nameInAFM.equals(".notdef"))
            {
                return 250f;
            }

            return getStandard14AFM().getCharacterWidth(nameInAFM);
        }
        throw new IllegalStateException("No AFM");
    }

    public boolean isStandard14()
    {
        // this logic is based on Acrobat's behaviour, see see PDFBOX-2372
        // the Encoding entry cannot have Differences if we want "standard 14" font handling
        if (getEncoding() != null && getEncoding() instanceof DictionaryEncoding)
        {
            DictionaryEncoding dictionary = (DictionaryEncoding)getEncoding();
            if (dictionary.getDifferences().size() > 0)
            {
                // todo: do we need to check if entries actually differ from the base encoding?
                return false;
            }
        }
        return super.isStandard14();
    }
}
TOP

Related Classes of org.apache.pdfbox.pdmodel.font.PDSimpleFont

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.