Author: tilman
Date: Sun Feb 23 05:40:10 2025
New Revision: 1923993
URL: http://svn.apache.org/viewvc?rev=1923993&view=rev
Log:
PDFBOX-5961: support 3 / 4 byte input values
Modified:
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java
Modified:
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java
URL:
http://svn.apache.org/viewvc/pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java?rev=1923993&r1=1923992&r2=1923993&view=diff
==============================================================================
--- pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java
(original)
+++ pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java
Sun Feb 23 05:40:10 2025
@@ -58,6 +58,8 @@ public class CMap
private final Map<Integer, String> charToUnicodeOneByte = new HashMap<>();
// two byte input values
private final Map<Integer, String> charToUnicodeTwoBytes = new HashMap<>();
+ // 3 / 4 byte input values
+ private final Map<Integer, String> charToUnicodeMoreBytes = new
HashMap<>();
// CID mappings
// map with all code to cid mappings organized by the origin byte length
of the input value
@@ -94,7 +96,7 @@ public class CMap
*/
public boolean hasUnicodeMappings()
{
- return !charToUnicodeOneByte.isEmpty() ||
!charToUnicodeTwoBytes.isEmpty();
+ return !charToUnicodeOneByte.isEmpty() ||
!charToUnicodeTwoBytes.isEmpty() || !charToUnicodeMoreBytes.isEmpty();
}
/**
@@ -114,7 +116,15 @@ public class CMap
String unicode = code < 256 ? toUnicode(code, 1) : null;
if (unicode == null)
{
- unicode = toUnicode(code, 2);
+ if (code <= 0xFFFF)
+ {
+ return toUnicode(code, 2);
+ }
+ if (code <= 0xFFFFFF)
+ {
+ return toUnicode(code, 3);
+ }
+ return toUnicode(code, 4);
}
return unicode;
}
@@ -136,8 +146,7 @@ public class CMap
{
return charToUnicodeTwoBytes.get(code);
}
- LOG.warn("Mappings with more than 2 bytes aren't supported");
- return null;
+ return charToUnicodeMoreBytes.get(code);
}
/**
@@ -351,9 +360,14 @@ public class CMap
charToUnicodeTwoBytes.put(CMapStrings.getIndexValue(codes),
unicode);
unicodeToByteCodes.put(unicode, CMapStrings.getByteValue(codes));
// clone needed, bytes is modified later
}
+ else if (codes.length == 3 || codes.length == 4)
+ {
+ charToUnicodeMoreBytes.put(toInt(codes), unicode);
+ unicodeToByteCodes.put(unicode, codes.clone());
+ }
else
{
- LOG.warn("Mappings with more than 2 bytes aren't supported yet");
+ LOG.warn("Mappings with more than 4 bytes aren't supported yet");
}
// fixme: ugly little hack
if (SPACE.equals(unicode))
@@ -443,10 +457,28 @@ public class CMap
cmap.codespaceRanges.forEach(this::addCodespaceRange);
charToUnicodeOneByte.putAll(cmap.charToUnicodeOneByte);
charToUnicodeTwoBytes.putAll(cmap.charToUnicodeTwoBytes);
+ charToUnicodeMoreBytes.putAll(cmap.charToUnicodeMoreBytes);
cmap.charToUnicodeOneByte.forEach((k, v) -> unicodeToByteCodes.put(v,
new byte[]{(byte) (k % 0xFF)}));
cmap.charToUnicodeTwoBytes.forEach((k, v) -> unicodeToByteCodes.put(v,
new byte[]{(byte) ((k >>> 8) & 0xFF), (byte) (k & 0xFF)})
);
+ cmap.charToUnicodeMoreBytes.forEach((k, v) ->
+ {
+ byte[] bar;
+ if (k <= 0xFFFFFF)
+ {
+ // 3 bytes
+ bar = new byte[]{(byte) ((k >>> 16) & 0xFF), (byte) ((k
>>> 8) & 0xFF),
+ (byte) (k & 0xFF)};
+ }
+ else
+ {
+ // 4 bytes
+ bar = new byte[]{(byte) ((k >>> 24) & 0xFF), (byte) ((k
>>> 16) & 0xFF),
+ (byte) ((k >>> 8) & 0xFF), (byte) (k & 0xFF)};
+ }
+ unicodeToByteCodes.put(v, bar);
+ });
cmap.codeToCid.forEach((key, value) ->
{
Map<Integer, Integer> existingMapping = codeToCid.putIfAbsent(key,
value);