Source Code of org.apache.pdfbox.pdmodel.font.PDType0Font

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pdfbox.pdmodel.font;


import java.io.IOException;
import java.io.InputStream;


import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.fontbox.cmap.CMap;
import org.apache.fontbox.util.BoundingBox;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.font.encoding.GlyphList;
import org.apache.pdfbox.pdmodel.font.encoding.StandardEncoding;
import org.apache.pdfbox.util.Matrix;
import org.apache.pdfbox.util.Vector;


/**
 * A Composite (Type 0) font.
 *
 * @author Ben Litchfield
 */
public class PDType0Font extends PDFont
{
    private static final Log LOG = LogFactory.getLog(PDType0Font.class);


    private final PDCIDFont descendantFont;
    private CMap cMap, cMapUCS2;
    private boolean isCMapPredefined;


    /**
     * Constructor.
     * 
     * @param fontDictionary The font dictionary according to the PDF specification.
     */
    public PDType0Font(COSDictionary fontDictionary) throws IOException
    {
        super(fontDictionary);
        COSArray descendantFonts = (COSArray)dict.getDictionaryObject(COSName.DESCENDANT_FONTS);
        COSDictionary descendantFontDictionary = (COSDictionary) descendantFonts.getObject(0);


        if (descendantFontDictionary == null)
        {
            throw new IOException("Missing descendant font dictionary");
        }


        readEncoding();
        fetchCMapUCS2();
        descendantFont = PDFontFactory.createDescendantFont(descendantFontDictionary, this);


        // warn if there may be text extraction issues
        if (!isSymbolic())
        {
            LOG.warn("Nonsymbolic Type 0 font: " + getName());
        }
    }


    /**
     * Reads the font's Encoding entry, which should be a CMap name/stream.
     */
    private void readEncoding() throws IOException
    {
        COSBase encoding = dict.getDictionaryObject(COSName.ENCODING);
        if (encoding != null)
        {
            if (encoding instanceof COSName)
            {
                // predefined CMap
                COSName encodingName = (COSName)encoding;
                cMap = CMapManager.getPredefinedCMap(encodingName.getName());
                if (cMap != null)
                {
                    isCMapPredefined = true;
                    return;
                }
                else
                {
                    throw new IOException("Missing required CMap");
                }
            }
            else
            {
                cMap = readCMap(encoding);
                if (cMap == null)
                {
                    throw new IOException("Missing required CMap");
                }
                else if (!cMap.hasCIDMappings())
                {
                    LOG.warn("Invalid Encoding CMap in font " + getName());
                }
            }
        }
    }


    /**
     * Fetches the corresponding UCS2 CMap if the font's CMap is predefined.
     */
    private void fetchCMapUCS2() throws IOException
    {
        // if the font is composite and uses a predefined cmap (excluding Identity-H/V) then
        // or if its decendant font uses Adobe-GB1/CNS1/Japan1/Korea1
        if (isCMapPredefined)
        {
            // a) Map the character code to a CID using the font's CMap
            // b) Obtain the ROS from the font's CIDSystemInfo
            // c) Construct a second CMap name by concatenating the ROS in the format "R-O-UCS2"
            // d) Obtain the CMap with the constructed name
            // e) Map the CID according to the CMap from step d), producing a Unicode value


            String cMapName = null;


            // get the encoding CMap
            COSBase encoding = dict.getDictionaryObject(COSName.ENCODING);
            if (encoding != null && encoding instanceof COSName)
            {
                cMapName = ((COSName)encoding).getName();
            }


            // try to find the corresponding Unicode (UC2) CMap
            if (cMapName != null && !cMapName.equals("Identity-H") &&
                                    !cMapName.equals("Identity-V"))
            {
                CMap cMap = CMapManager.getPredefinedCMap(cMapName);
                if (cMap != null)
                {
                    String ucs2Name = cMap.getRegistry() + "-" + cMap.getOrdering() + "-UCS2";
                    CMap ucs2CMap = CMapManager.getPredefinedCMap(ucs2Name);
                    if (ucs2CMap != null)
                    {
                        cMapUCS2 = ucs2CMap;
                    }
                }
            }
        }
    }


    /**
     * Returns the PostScript name of the font.
     */
    public String getBaseFont()
    {
        return dict.getNameAsString(COSName.BASE_FONT);
    }


    /**
     * Returns the descendant font.
     */
    public PDCIDFont getDescendantFont()
    {
        return descendantFont;
    }


    /**
     * Returns the font's CMap.
     */
    public CMap getCMap()
    {
        return cMap;
    }


    @Override
    public PDFontDescriptor getFontDescriptor()
    {
        return descendantFont.getFontDescriptor();
    }


    @Override
    public Matrix getFontMatrix()
    {
        return descendantFont.getFontMatrix();
    }


    @Override
    public boolean isVertical()
    {
        return cMap.getWMode() == 1;
    }


    @Override
    public float getHeight(int code) throws IOException
    {
        return descendantFont.getHeight(code);
    }


    @Override
    public float getAverageFontWidth()
    {
        return descendantFont.getAverageFontWidth();
    }


    @Override
    public Vector getPositionVector(int code)
    {
        // units are always 1/1000 text space, font matrix is not used, see FOP-2252
        return descendantFont.getPositionVector(code).scale(-1 / 1000f);
    }


    @Override
    public Vector getDisplacement(int code) throws IOException
    {
        if (isVertical())
        {
            return new Vector(0, descendantFont.getVerticalDisplacementVectorY(code) / 1000f);
        }
        else
        {
            return super.getDisplacement(code);
        }
    }


    @Override
    public float getWidth(int code) throws IOException
    {
        return descendantFont.getWidth(code);
    }


    @Override
    public float getWidthFromFont(int code) throws IOException
    {
        return descendantFont.getWidthFromFont(code);
    }


    @Override
    public boolean isEmbedded()
    {
        return descendantFont.isEmbedded();
    }


    @Override
    public String toUnicode(int code) throws IOException
    {
        // try to use a ToUnicode CMap
        String unicode = super.toUnicode(code);
        if (unicode != null)
        {
            return unicode;
        }


        if (!isSymbolic())
        {
            // this nonsymbolic behaviour isn't well documented, test with PDFBOX-1422,
            // also see PDCIDFontType2#cidToGID()
            String name = StandardEncoding.INSTANCE.getName(code);
            return GlyphList.getAdobeGlyphList().toUnicode(name);
        }
        else if (isCMapPredefined && cMapUCS2 != null)
        {
            // if the font is composite and uses a predefined cmap (excluding Identity-H/V) then
            // or if its decendant font uses Adobe-GB1/CNS1/Japan1/Korea1


            // a) Map the character code to a character identifier (CID) according to the font?s CMap
            int cid = codeToCID(code);


            // e) Map the CID according to the CMap from step d), producing a Unicode value
            return cMapUCS2.toUnicode(cid);
        }
        else
        {
            // if no value has been produced, there is no way to obtain Unicode for the character.
            return null;
        }
    }


    @Override
    public String getName()
    {
        return getBaseFont();
    }


    @Override
    public BoundingBox getBoundingBox() throws IOException
    {
        return descendantFont.getBoundingBox();
    }


    @Override
    public int readCode(InputStream in) throws IOException
    {
        return cMap.readCode(in);
    }


    /**
     * Returns the CID for the given character code. If not found then CID 0 is returned.
     *
     * @param code character code
     * @return CID
     */
    public int codeToCID(int code)
    {
        return descendantFont.codeToCID(code);
    }


    /**
     * Returns the GID for the given character code.
     *
     * @param code character code
     * @return GID
     */
    public int codeToGID(int code) throws IOException
    {
        return descendantFont.codeToGID(code);
    }


    @Override
    public boolean isStandard14()
    {
        return false;
    }


    @Override
    public boolean isDamaged()
    {
        return descendantFont.isDamaged();
    }


    @Override
    public String toString()
    {
        String descendant = null;
        if (getDescendantFont() != null)
        {
            descendant = getDescendantFont().getClass().getSimpleName();
        }
        return getClass().getSimpleName() + "/" + descendant + " " + getBaseFont();
    }
}
Source Code of org.apache.pdfbox.pdmodel.font.PDType0Font

Related Classes of org.apache.pdfbox.pdmodel.font.PDType0Font