Contribution to pdfbox

klaus.graaf Wed, 08 Oct 2014 08:56:49 -0700

Dear  developer-Team,
I'm evaluating PDFbox 1.8.6 for the creation of a pdf file for an editor.


At one point I got stuck, due to the missing of the correct characters from the 
PostScript font. The first characters I was missing were the German Umlaute.  
From studying the coding of the relevant Java class "PDType1AfmPfbFont" I found 
out, that the encoding  from the font file, was not transfer to the pdf-file.

I made a change to the class, so the encoding from the afm-file will be 
transferred to the pdf-file. I rebuild the project and now I get the correct 
characters. I wonder, if you would like to put the changes to your project.


The changed class is here. I erased some special handling for the German Umlaut 
and added an encoding dictionary (red lines).

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pdfbox.pdmodel.font;

import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.fontbox.afm.AFMParser;
import org.apache.fontbox.afm.CharMetric;
import org.apache.fontbox.afm.FontMetric;
import org.apache.fontbox.pfb.PfbParser;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSNumber;
import org.apache.pdfbox.encoding.AFMEncoding;
import org.apache.pdfbox.encoding.DictionaryEncoding;
import org.apache.pdfbox.encoding.Encoding;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.common.PDStream;

/**
* This is implementation of the Type1 Font with a afm and a pfb file.
*
 * @author <a href="mailto:[email protected]";>Michael Niedermair</a>
* @version $Revision: 1.5 $
*/
public class PDType1AfmPfbFont extends PDType1Font
{
    /**
     * the buffersize.
     */
    private static final int BUFFERSIZE = 0xffff;

    /**
     * The font metric.
     */
    private FontMetric metric;

    /**
     * The font encoding dictionary.
     */
    protected COSDictionary fontEncodingDic;

    /**
     * Create a new object.
     *
     * @param doc The PDF document that will hold the embedded font.
     * @param afmname The font filename.
     * @throws IOException If there is an error loading the data.
     */
    public PDType1AfmPfbFont(final PDDocument doc, final String afmname) throws 
IOException
    {

        super();
        InputStream afmin = new BufferedInputStream(new 
FileInputStream(afmname), BUFFERSIZE);
        String pfbname = afmname.replaceAll(".AFM", "").replaceAll(".afm", "") 
+ ".pfb";
        InputStream pfbin = new BufferedInputStream(new 
FileInputStream(pfbname), BUFFERSIZE);
        load(doc, afmin, pfbin);
    }

    /**
     * Create a new object.
     *
     * @param doc The PDF document that will hold the embedded font.
     * @param afm The afm input.
     * @param pfb The pfb input.
     * @throws IOException If there is an error loading the data.
     */
    public PDType1AfmPfbFont(final PDDocument doc, final InputStream afm, final 
InputStream pfb) throws IOException
    {
        super();
        load(doc, afm, pfb);
    }

    /**
     * This will load a afm and pfb to be embedding into a document.
     *
     * @param doc The PDF document that will hold the embedded font.
     * @param afm The afm input.
     * @param pfb The pfb input.
     * @throws IOException If there is an error loading the data.
     */
    private void load(final PDDocument doc, final InputStream afm, final 
InputStream pfb) throws IOException
    {
        fontEncodingDic = null;
        PDFontDescriptorDictionary fd = new PDFontDescriptorDictionary();
        setFontDescriptor(fd);

        // read the pfb
        PfbParser pfbparser = new PfbParser(pfb);
        pfb.close();

        PDStream fontStream = new PDStream(doc, pfbparser.getInputStream(), 
false);
        fontStream.getStream().setInt("Length", pfbparser.size());
        for (int i = 0; i < pfbparser.getLengths().length; i++)
        {
            fontStream.getStream().setInt("Length" + (i + 1), 
pfbparser.getLengths()[i]);
        }
        fontStream.addCompression();
        fd.setFontFile(fontStream);

        // read the afm
        AFMParser parser = new AFMParser(afm);
        parser.parse();
        metric = parser.getResult();
        setFontEncoding(afmToDictionary(new AFMEncoding(metric)));

        // set the values
        setBaseFont(metric.getFontName());
        fd.setFontName(metric.getFontName());
        fd.setFontFamily(metric.getFamilyName());
        fd.setNonSymbolic(true);
        fd.setFontBoundingBox(new PDRectangle(metric.getFontBBox()));
        fd.setItalicAngle(metric.getItalicAngle());
        fd.setAscent(metric.getAscender());
        fd.setDescent(metric.getDescender());
        fd.setCapHeight(metric.getCapHeight());
        fd.setXHeight(metric.getXHeight());
        fd.setAverageWidth(metric.getAverageCharacterWidth());
        fd.setCharacterSet(metric.getCharacterSet());

        // get firstchar, lastchar
        int firstchar = 255;
        int lastchar = 0;

        // widths
        List<CharMetric> listmetric = metric.getCharMetrics();
        Encoding encoding = getFontEncoding();
        int maxWidths = 256;
        List<Float> widths = new ArrayList<Float>(maxWidths);
        int zero = 250;
        Iterator<CharMetric> iter = listmetric.iterator();
        for (int i = 0; i < maxWidths; i++)
        {
            widths.add((float)zero);
        }
        while (iter.hasNext())
        {
            CharMetric m = iter.next();
            int n = m.getCharacterCode();
            if (n > 0)
            {
                firstchar = Math.min(firstchar, n);
                lastchar = Math.max(lastchar, n);
                if (m.getWx() > 0)
                {
                    int width = Math.round(m.getWx());
                    widths.set(n, (float)width);
                    // germandbls has 2 character codes !! Don't ask me why 
.....
                    // StandardEncoding = 0373 = 251
                    // WinANSIEncoding = 0337 = 223
                    if (m.getName().equals("germandbls") && n != 223)
                    {
                        widths.set(0337, (float)width);
                    }
                }
            }
        }
        setFirstChar(0);
        setLastChar(255);
        setWidths(widths);
    }

    /*
     * This will generate a Encoding from the AFM-Encoding, because the 
AFM-Enconding isn't exported to the pdf and
     * consequently the StandardEncoding is used so that any special character 
is missing I've copied the code from the
     * pdfbox-forum posted by V0JT4 and made some additions concerning german 
umlauts see also
     * https://sourceforge.net/forum/message.php?msg_id=4705274
     */
    private DictionaryEncoding afmToDictionary(AFMEncoding encoding) throws 
java.io.IOException
    {
        COSArray array = new COSArray();
        COSArray differEncoding = new COSArray();
        array.add(COSInteger.ZERO);
        for (int i = 0; i < 256; i++)
        {
                String  name = encoding.getName(i);
                COSName cosName = COSName.getPDFName(name);
                if ( name!=null )
                {
                  COSNumber cosDifferCode = COSNumber.get(new 
Integer(i).toString());
                  COSName cosDifferName = COSName.getPDFName(name);
                  differEncoding.add( cosDifferCode );
                  differEncoding.add( cosDifferName );
                }
            array.add(cosName);
        }

        COSDictionary dictionary = new COSDictionary();
        dictionary.setItem(COSName.NAME, COSName.ENCODING);
        dictionary.setItem(COSName.DIFFERENCES, array);
        dictionary.setItem(COSName.BASE_ENCODING, COSName.STANDARD_ENCODING);

        if ( differEncoding.size()> 0 )
        {
                        fontEncodingDic = new COSDictionary();
                        fontEncodingDic.setItem(COSName.TYPE, COSName.ENCODING);
                        fontEncodingDic.setItem(COSName.DIFFERENCES, 
differEncoding);

                        COSDictionary afont = (COSDictionary) getCOSObject();
                        afont.setItem(COSName.ENCODING, fontEncodingDic);
        }

        return new DictionaryEncoding(dictionary);
    }
    /**
     * The encoding dirctionary
     *
     * @return The dictionary of encoding
     *
     */
    public COSDictionary getFontEncodingDic()
    {
        return fontEncodingDic;
    }
    @Override
    public void clear()
    {
        super.clear();
        metric = null;
    }
}



Best regard
Klaus Graaf

Lufthansa Systems
Dr. Klaus Graaf
Schützenwall 1
D-22844 Norderstedt

Büro:       +49-40-5070-6849
Fax:        +49-40-5070-7880
Handy:    +49-151-58920261
Internet:  http://www.lhsystems.com<http://www.lhsystems.com/>
Email:     mailto:[email protected]


 
Sitz der Gesellschaft / Corporate Headquarters: Lufthansa Systems AS GmbH, 
Norderstedt, Registereintragung / Registration: Amtsgericht Norderstedt 3688NO
Geschaeftsfuehrung / Management Board: Bernd Appel

Contribution to pdfbox

Reply via email to