Dear developer-Team, I'm evaluating PDFbox 1.8.6 for the creation of a pdf file for an editor.
At one point I got stuck, due to the missing of the correct characters from the PostScript font. The first characters I was missing were the German Umlaute. From studying the coding of the relevant Java class "PDType1AfmPfbFont" I found out, that the encoding from the font file, was not transfer to the pdf-file. I made a change to the class, so the encoding from the afm-file will be transferred to the pdf-file. I rebuild the project and now I get the correct characters. I wonder, if you would like to put the changes to your project. The changed class is here. I erased some special handling for the German Umlaut and added an encoding dictionary (red lines). /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pdfbox.pdmodel.font; import java.io.BufferedInputStream; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import org.apache.fontbox.afm.AFMParser; import org.apache.fontbox.afm.CharMetric; import org.apache.fontbox.afm.FontMetric; import org.apache.fontbox.pfb.PfbParser; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSInteger; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSNumber; import org.apache.pdfbox.encoding.AFMEncoding; import org.apache.pdfbox.encoding.DictionaryEncoding; import org.apache.pdfbox.encoding.Encoding; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.common.PDStream; /** * This is implementation of the Type1 Font with a afm and a pfb file. * * @author <a href="mailto:[email protected]">Michael Niedermair</a> * @version $Revision: 1.5 $ */ public class PDType1AfmPfbFont extends PDType1Font { /** * the buffersize. */ private static final int BUFFERSIZE = 0xffff; /** * The font metric. */ private FontMetric metric; /** * The font encoding dictionary. */ protected COSDictionary fontEncodingDic; /** * Create a new object. * * @param doc The PDF document that will hold the embedded font. * @param afmname The font filename. * @throws IOException If there is an error loading the data. */ public PDType1AfmPfbFont(final PDDocument doc, final String afmname) throws IOException { super(); InputStream afmin = new BufferedInputStream(new FileInputStream(afmname), BUFFERSIZE); String pfbname = afmname.replaceAll(".AFM", "").replaceAll(".afm", "") + ".pfb"; InputStream pfbin = new BufferedInputStream(new FileInputStream(pfbname), BUFFERSIZE); load(doc, afmin, pfbin); } /** * Create a new object. * * @param doc The PDF document that will hold the embedded font. * @param afm The afm input. * @param pfb The pfb input. * @throws IOException If there is an error loading the data. */ public PDType1AfmPfbFont(final PDDocument doc, final InputStream afm, final InputStream pfb) throws IOException { super(); load(doc, afm, pfb); } /** * This will load a afm and pfb to be embedding into a document. * * @param doc The PDF document that will hold the embedded font. * @param afm The afm input. * @param pfb The pfb input. * @throws IOException If there is an error loading the data. */ private void load(final PDDocument doc, final InputStream afm, final InputStream pfb) throws IOException { fontEncodingDic = null; PDFontDescriptorDictionary fd = new PDFontDescriptorDictionary(); setFontDescriptor(fd); // read the pfb PfbParser pfbparser = new PfbParser(pfb); pfb.close(); PDStream fontStream = new PDStream(doc, pfbparser.getInputStream(), false); fontStream.getStream().setInt("Length", pfbparser.size()); for (int i = 0; i < pfbparser.getLengths().length; i++) { fontStream.getStream().setInt("Length" + (i + 1), pfbparser.getLengths()[i]); } fontStream.addCompression(); fd.setFontFile(fontStream); // read the afm AFMParser parser = new AFMParser(afm); parser.parse(); metric = parser.getResult(); setFontEncoding(afmToDictionary(new AFMEncoding(metric))); // set the values setBaseFont(metric.getFontName()); fd.setFontName(metric.getFontName()); fd.setFontFamily(metric.getFamilyName()); fd.setNonSymbolic(true); fd.setFontBoundingBox(new PDRectangle(metric.getFontBBox())); fd.setItalicAngle(metric.getItalicAngle()); fd.setAscent(metric.getAscender()); fd.setDescent(metric.getDescender()); fd.setCapHeight(metric.getCapHeight()); fd.setXHeight(metric.getXHeight()); fd.setAverageWidth(metric.getAverageCharacterWidth()); fd.setCharacterSet(metric.getCharacterSet()); // get firstchar, lastchar int firstchar = 255; int lastchar = 0; // widths List<CharMetric> listmetric = metric.getCharMetrics(); Encoding encoding = getFontEncoding(); int maxWidths = 256; List<Float> widths = new ArrayList<Float>(maxWidths); int zero = 250; Iterator<CharMetric> iter = listmetric.iterator(); for (int i = 0; i < maxWidths; i++) { widths.add((float)zero); } while (iter.hasNext()) { CharMetric m = iter.next(); int n = m.getCharacterCode(); if (n > 0) { firstchar = Math.min(firstchar, n); lastchar = Math.max(lastchar, n); if (m.getWx() > 0) { int width = Math.round(m.getWx()); widths.set(n, (float)width); // germandbls has 2 character codes !! Don't ask me why ..... // StandardEncoding = 0373 = 251 // WinANSIEncoding = 0337 = 223 if (m.getName().equals("germandbls") && n != 223) { widths.set(0337, (float)width); } } } } setFirstChar(0); setLastChar(255); setWidths(widths); } /* * This will generate a Encoding from the AFM-Encoding, because the AFM-Enconding isn't exported to the pdf and * consequently the StandardEncoding is used so that any special character is missing I've copied the code from the * pdfbox-forum posted by V0JT4 and made some additions concerning german umlauts see also * https://sourceforge.net/forum/message.php?msg_id=4705274 */ private DictionaryEncoding afmToDictionary(AFMEncoding encoding) throws java.io.IOException { COSArray array = new COSArray(); COSArray differEncoding = new COSArray(); array.add(COSInteger.ZERO); for (int i = 0; i < 256; i++) { String name = encoding.getName(i); COSName cosName = COSName.getPDFName(name); if ( name!=null ) { COSNumber cosDifferCode = COSNumber.get(new Integer(i).toString()); COSName cosDifferName = COSName.getPDFName(name); differEncoding.add( cosDifferCode ); differEncoding.add( cosDifferName ); } array.add(cosName); } COSDictionary dictionary = new COSDictionary(); dictionary.setItem(COSName.NAME, COSName.ENCODING); dictionary.setItem(COSName.DIFFERENCES, array); dictionary.setItem(COSName.BASE_ENCODING, COSName.STANDARD_ENCODING); if ( differEncoding.size()> 0 ) { fontEncodingDic = new COSDictionary(); fontEncodingDic.setItem(COSName.TYPE, COSName.ENCODING); fontEncodingDic.setItem(COSName.DIFFERENCES, differEncoding); COSDictionary afont = (COSDictionary) getCOSObject(); afont.setItem(COSName.ENCODING, fontEncodingDic); } return new DictionaryEncoding(dictionary); } /** * The encoding dirctionary * * @return The dictionary of encoding * */ public COSDictionary getFontEncodingDic() { return fontEncodingDic; } @Override public void clear() { super.clear(); metric = null; } } Best regard Klaus Graaf Lufthansa Systems Dr. Klaus Graaf Schützenwall 1 D-22844 Norderstedt Büro: +49-40-5070-6849 Fax: +49-40-5070-7880 Handy: +49-151-58920261 Internet: http://www.lhsystems.com<http://www.lhsystems.com/> Email: mailto:[email protected] Sitz der Gesellschaft / Corporate Headquarters: Lufthansa Systems AS GmbH, Norderstedt, Registereintragung / Registration: Amtsgericht Norderstedt 3688NO Geschaeftsfuehrung / Management Board: Bernd Appel
