Author: jahewson Date: Fri Nov 6 00:01:55 2015 New Revision: 1712879 URL: http://svn.apache.org/viewvc?rev=1712879&view=rev Log: PDFBOX-2950: follow PDF spec rules for glyph mapping via CMap -> cmap
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java?rev=1712879&r1=1712878&r2=1712879&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java Fri Nov 6 00:01:55 2015 @@ -275,6 +275,7 @@ public final class COSName extends COSBa public static final COSName ID_TREE = new COSName("IDTree"); public static final COSName IDENTITY = new COSName("Identity"); public static final COSName IDENTITY_H = new COSName("Identity-H"); + public static final COSName IDENTITY_V = new COSName("Identity-V"); public static final COSName IF = new COSName("IF"); public static final COSName IM = new COSName("IM"); public static final COSName IMAGE = new COSName("Image"); Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java?rev=1712879&r1=1712878&r2=1712879&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java Fri Nov 6 00:01:55 2015 @@ -283,21 +283,13 @@ public class PDCIDFontType2 extends PDCI // font's 'cmap' table. The means by which this is accomplished are implementation- // dependent. - boolean hasUnicodeMap = parent.getCMapUCS2() != null; - if (cid2gid != null) { // Acrobat allows non-embedded GIDs - todo: can we find a test PDF for this? + LOG.warn("Using non-embedded GIDs in font " + getName()); int cid = codeToCID(code); return cid2gid[cid]; } - else if (hasIdentityCid2Gid || !hasUnicodeMap) - { - // same as above, but for the default Identity CID2GIDMap or when there is no - // ToUnicode CMap to fallback to, see PDFBOX-2599 and PDFBOX-2560 - // todo: can we find a test PDF for the Identity case? - return codeToCID(code); - } else { // fallback to the ToUnicode CMap, test with PDFBOX-1422 and PDFBOX-2560 Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java?rev=1712879&r1=1712878&r2=1712879&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java Fri Nov 6 00:01:55 2015 @@ -215,42 +215,35 @@ public class PDType0Font extends PDFont */ private void fetchCMapUCS2() throws IOException { - // if the font is composite and uses a predefined cmap (excluding Identity-H/V) then - // or if its decendant font uses Adobe-GB1/CNS1/Japan1/Korea1 - if (isCMapPredefined) + // if the font is composite and uses a predefined cmap (excluding Identity-H/V) + // or whose descendant CIDFont uses the Adobe-GB1, Adobe-CNS1, Adobe-Japan1, or + // Adobe-Korea1 character collection: + COSName name = dict.getCOSName(COSName.ENCODING); + if (isCMapPredefined && !(name == COSName.IDENTITY_H || name == COSName.IDENTITY_V) || + isDescendantCJK) { // a) Map the character code to a CID using the font's CMap // b) Obtain the ROS from the font's CIDSystemInfo // c) Construct a second CMap name by concatenating the ROS in the format "R-O-UCS2" // d) Obtain the CMap with the constructed name // e) Map the CID according to the CMap from step d), producing a Unicode value - - String cMapName = null; - - // get the encoding CMap - COSBase encoding = dict.getDictionaryObject(COSName.ENCODING); - if (encoding instanceof COSName) + + String strName = null; + if (name == null && isDescendantCJK) { - cMapName = ((COSName)encoding).getName(); + strName = cMap.getRegistry() + "-" + + cMap.getOrdering() + "-" + + cMap.getSupplement(); } - - if ("Identity-H".equals(cMapName) || "Identity-V".equals(cMapName)) + else if (name != null) { - if (isDescendantCJK) - { - cMapName = getCJKCMap(descendantFont.getCIDSystemInfo()); - } - else - { - // we can't map Identity-H or Identity-V to Unicode - return; - } + strName = name.getName(); } // try to find the corresponding Unicode (UC2) CMap - if (cMapName != null) + if (strName != null) { - CMap cMap = CMapManager.getPredefinedCMap(cMapName); + CMap cMap = CMapManager.getPredefinedCMap(strName); if (cMap != null) { String ucs2Name = cMap.getRegistry() + "-" + cMap.getOrdering() + "-UCS2"; @@ -414,7 +407,7 @@ public class PDType0Font extends PDFont return unicode; } - if (isCMapPredefined && cMapUCS2 != null) + if ((isCMapPredefined || isDescendantCJK) && cMapUCS2 != null) { // if the font is composite and uses a predefined cmap (excluding Identity-H/V) then // or if its decendant font uses Adobe-GB1/CNS1/Japan1/Korea1