Hi Klaus Thanks, can you open an issue on JIRA at https://issues.apache.org/jira/browse/pdfbox and if possible attach the file as an SVN patch (see https://ariejan.net/2007/07/03/how-to-create-and-apply-a-patch-with-subversion/) via More > Attach Files.
We might want to add some similar code to the trunk as well as 1.8, but I’ll figure that out later. Thanks -- John On 8 Oct 2014, at 08:51, [email protected] wrote: > Dear developer-Team, > I'm evaluating PDFbox 1.8.6 for the creation of a pdf file for an editor. > > At one point I got stuck, due to the missing of the correct characters from > the PostScript font. The first characters I was missing were the German > Umlaute. From studying the coding of the relevant Java class > "PDType1AfmPfbFont" I found out, that the encoding from the font file, was > not transfer to the pdf-file. > > I made a change to the class, so the encoding from the afm-file will be > transferred to the pdf-file. I rebuild the project and now I get the correct > characters. I wonder, if you would like to put the changes to your project. > > > The changed class is here. I erased some special handling for the German > Umlaut and added an encoding dictionary (red lines). > > /* > * Licensed to the Apache Software Foundation (ASF) under one or more > * contributor license agreements. See the NOTICE file distributed with > * this work for additional information regarding copyright ownership. > * The ASF licenses this file to You under the Apache License, Version 2.0 > * (the "License"); you may not use this file except in compliance with > * the License. You may obtain a copy of the License at > * > * http://www.apache.org/licenses/LICENSE-2.0 > * > * Unless required by applicable law or agreed to in writing, software > * distributed under the License is distributed on an "AS IS" BASIS, > * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > * See the License for the specific language governing permissions and > * limitations under the License. > */ > package org.apache.pdfbox.pdmodel.font; > > import java.io.BufferedInputStream; > import java.io.FileInputStream; > import java.io.IOException; > import java.io.InputStream; > import java.util.ArrayList; > import java.util.Iterator; > import java.util.List; > > import org.apache.fontbox.afm.AFMParser; > import org.apache.fontbox.afm.CharMetric; > import org.apache.fontbox.afm.FontMetric; > import org.apache.fontbox.pfb.PfbParser; > import org.apache.pdfbox.cos.COSArray; > import org.apache.pdfbox.cos.COSDictionary; > import org.apache.pdfbox.cos.COSInteger; > import org.apache.pdfbox.cos.COSName; > import org.apache.pdfbox.cos.COSNumber; > import org.apache.pdfbox.encoding.AFMEncoding; > import org.apache.pdfbox.encoding.DictionaryEncoding; > import org.apache.pdfbox.encoding.Encoding; > import org.apache.pdfbox.pdmodel.PDDocument; > import org.apache.pdfbox.pdmodel.common.PDRectangle; > import org.apache.pdfbox.pdmodel.common.PDStream; > > /** > * This is implementation of the Type1 Font with a afm and a pfb file. > * > * @author <a href="mailto:[email protected]">Michael Niedermair</a> > * @version $Revision: 1.5 $ > */ > public class PDType1AfmPfbFont extends PDType1Font > { > /** > * the buffersize. > */ > private static final int BUFFERSIZE = 0xffff; > > /** > * The font metric. > */ > private FontMetric metric; > > /** > * The font encoding dictionary. > */ > protected COSDictionary fontEncodingDic; > > /** > * Create a new object. > * > * @param doc The PDF document that will hold the embedded font. > * @param afmname The font filename. > * @throws IOException If there is an error loading the data. > */ > public PDType1AfmPfbFont(final PDDocument doc, final String afmname) > throws IOException > { > > super(); > InputStream afmin = new BufferedInputStream(new > FileInputStream(afmname), BUFFERSIZE); > String pfbname = afmname.replaceAll(".AFM", "").replaceAll(".afm", "") > + ".pfb"; > InputStream pfbin = new BufferedInputStream(new > FileInputStream(pfbname), BUFFERSIZE); > load(doc, afmin, pfbin); > } > > /** > * Create a new object. > * > * @param doc The PDF document that will hold the embedded font. > * @param afm The afm input. > * @param pfb The pfb input. > * @throws IOException If there is an error loading the data. > */ > public PDType1AfmPfbFont(final PDDocument doc, final InputStream afm, > final InputStream pfb) throws IOException > { > super(); > load(doc, afm, pfb); > } > > /** > * This will load a afm and pfb to be embedding into a document. > * > * @param doc The PDF document that will hold the embedded font. > * @param afm The afm input. > * @param pfb The pfb input. > * @throws IOException If there is an error loading the data. > */ > private void load(final PDDocument doc, final InputStream afm, final > InputStream pfb) throws IOException > { > fontEncodingDic = null; > PDFontDescriptorDictionary fd = new PDFontDescriptorDictionary(); > setFontDescriptor(fd); > > // read the pfb > PfbParser pfbparser = new PfbParser(pfb); > pfb.close(); > > PDStream fontStream = new PDStream(doc, pfbparser.getInputStream(), > false); > fontStream.getStream().setInt("Length", pfbparser.size()); > for (int i = 0; i < pfbparser.getLengths().length; i++) > { > fontStream.getStream().setInt("Length" + (i + 1), > pfbparser.getLengths()[i]); > } > fontStream.addCompression(); > fd.setFontFile(fontStream); > > // read the afm > AFMParser parser = new AFMParser(afm); > parser.parse(); > metric = parser.getResult(); > setFontEncoding(afmToDictionary(new AFMEncoding(metric))); > > // set the values > setBaseFont(metric.getFontName()); > fd.setFontName(metric.getFontName()); > fd.setFontFamily(metric.getFamilyName()); > fd.setNonSymbolic(true); > fd.setFontBoundingBox(new PDRectangle(metric.getFontBBox())); > fd.setItalicAngle(metric.getItalicAngle()); > fd.setAscent(metric.getAscender()); > fd.setDescent(metric.getDescender()); > fd.setCapHeight(metric.getCapHeight()); > fd.setXHeight(metric.getXHeight()); > fd.setAverageWidth(metric.getAverageCharacterWidth()); > fd.setCharacterSet(metric.getCharacterSet()); > > // get firstchar, lastchar > int firstchar = 255; > int lastchar = 0; > > // widths > List<CharMetric> listmetric = metric.getCharMetrics(); > Encoding encoding = getFontEncoding(); > int maxWidths = 256; > List<Float> widths = new ArrayList<Float>(maxWidths); > int zero = 250; > Iterator<CharMetric> iter = listmetric.iterator(); > for (int i = 0; i < maxWidths; i++) > { > widths.add((float)zero); > } > while (iter.hasNext()) > { > CharMetric m = iter.next(); > int n = m.getCharacterCode(); > if (n > 0) > { > firstchar = Math.min(firstchar, n); > lastchar = Math.max(lastchar, n); > if (m.getWx() > 0) > { > int width = Math.round(m.getWx()); > widths.set(n, (float)width); > // germandbls has 2 character codes !! Don't ask me why > ..... > // StandardEncoding = 0373 = 251 > // WinANSIEncoding = 0337 = 223 > if (m.getName().equals("germandbls") && n != 223) > { > widths.set(0337, (float)width); > } > } > } > } > setFirstChar(0); > setLastChar(255); > setWidths(widths); > } > > /* > * This will generate a Encoding from the AFM-Encoding, because the > AFM-Enconding isn't exported to the pdf and > * consequently the StandardEncoding is used so that any special character > is missing I've copied the code from the > * pdfbox-forum posted by V0JT4 and made some additions concerning german > umlauts see also > * https://sourceforge.net/forum/message.php?msg_id=4705274 > */ > private DictionaryEncoding afmToDictionary(AFMEncoding encoding) throws > java.io.IOException > { > COSArray array = new COSArray(); > COSArray differEncoding = new COSArray(); > array.add(COSInteger.ZERO); > for (int i = 0; i < 256; i++) > { > String name = encoding.getName(i); > COSName cosName = COSName.getPDFName(name); > if ( name!=null ) > { > COSNumber cosDifferCode = COSNumber.get(new > Integer(i).toString()); > COSName cosDifferName = COSName.getPDFName(name); > differEncoding.add( cosDifferCode ); > differEncoding.add( cosDifferName ); > } > array.add(cosName); > } > > COSDictionary dictionary = new COSDictionary(); > dictionary.setItem(COSName.NAME, COSName.ENCODING); > dictionary.setItem(COSName.DIFFERENCES, array); > dictionary.setItem(COSName.BASE_ENCODING, COSName.STANDARD_ENCODING); > > if ( differEncoding.size()> 0 ) > { > fontEncodingDic = new COSDictionary(); > fontEncodingDic.setItem(COSName.TYPE, > COSName.ENCODING); > fontEncodingDic.setItem(COSName.DIFFERENCES, > differEncoding); > > COSDictionary afont = (COSDictionary) getCOSObject(); > afont.setItem(COSName.ENCODING, fontEncodingDic); > } > > return new DictionaryEncoding(dictionary); > } > /** > * The encoding dirctionary > * > * @return The dictionary of encoding > * > */ > public COSDictionary getFontEncodingDic() > { > return fontEncodingDic; > } > @Override > public void clear() > { > super.clear(); > metric = null; > } > } > > > > Best regard > Klaus Graaf > > Lufthansa Systems > Dr. Klaus Graaf > Schützenwall 1 > D-22844 Norderstedt > > Büro: +49-40-5070-6849 > Fax: +49-40-5070-7880 > Handy: +49-151-58920261 > Internet: http://www.lhsystems.com<http://www.lhsystems.com/> > Email: mailto:[email protected] > > > > Sitz der Gesellschaft / Corporate Headquarters: Lufthansa Systems AS GmbH, > Norderstedt, Registereintragung / Registration: Amtsgericht Norderstedt 3688NO > Geschaeftsfuehrung / Management Board: Bernd Appel > >
