Author: lehmi Date: Tue Jan 19 18:48:41 2016 New Revision: 1725590 URL: http://svn.apache.org/viewvc?rev=1725590&view=rev Log: PDFBOX-3120: fixed codespace range matching algorithm
Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CodespaceRange.java Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java?rev=1725590&r1=1725589&r2=1725590&view=diff ============================================================================== --- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java (original) +++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java Tue Jan 19 18:48:41 2016 @@ -103,10 +103,6 @@ public class CMap */ public int readCode(InputStream in) throws IOException { - // save the position in the string - in.mark(maxCodeLength); - - // mapping algorithm byte[] bytes = new byte[maxCodeLength]; in.read(bytes,0,minCodeLength); for (int i = minCodeLength-1; i < maxCodeLength; i++) @@ -124,58 +120,11 @@ public class CMap bytes[byteCount] = (byte)in.read(); } } - - // reset to the original position in the string - in.reset(); - - // modified mapping algorithm - for (int i = 0; i < maxCodeLength; i++) - { - final byte curByte = (byte)in.read(); - bytes[i] = curByte; - final int byteCount = i + 1; - CodespaceRange match = null; - CodespaceRange shortest = null; - for (CodespaceRange range : codespaceRanges) - { - if (range.isPartialMatch(curByte, i)) - { - if (match == null) - { - match = range; - } - else if (range.getStart().length < match.getStart().length) - { - // for multiple matches, choose the codespace with the shortest codes - match = range; - } - } - - // find shortest range - if (shortest == null || range.getStart().length < shortest.getStart().length) - { - shortest = range; - } - } - - // if there are no matches, the range with the shortest codes is chosen - if (match == null) - { - match = shortest; - } - - // we're done when we have enough bytes for the matched range - if (match != null && match.getStart().length == byteCount) - { - return toInt(bytes, byteCount); - } - } - throw new IOException("CMap is invalid"); } /** - * Returns an int for the given a byte array + * Returns an int for the given byte array */ private int toInt(byte[] data, int dataLen) { Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CodespaceRange.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CodespaceRange.java?rev=1725590&r1=1725589&r2=1725590&view=diff ============================================================================== --- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CodespaceRange.java (original) +++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CodespaceRange.java Tue Jan 19 18:48:41 2016 @@ -26,6 +26,8 @@ public class CodespaceRange { private byte[] start; private byte[] end; + private int startInt; + private int endInt; private int codeLength = 0; /** @@ -61,6 +63,7 @@ public class CodespaceRange void setEnd(byte[] endBytes) { end = endBytes; + endInt = toInt(endBytes, endBytes.length); } /** Getter for property start. @@ -80,6 +83,7 @@ public class CodespaceRange { start = startBytes; codeLength = start.length; + startInt = toInt(startBytes, startBytes.length); } /** @@ -91,42 +95,31 @@ public class CodespaceRange } /** - * Returns true if the given code bytes match this codespace range. + * Returns an int for the given byte array */ - public boolean isFullMatch(byte[] code, int codeLen) + private int toInt(byte[] data, int dataLen) { - // code must be the same length as the bounding codes - if (codeLen == codeLength) + int code = 0; + for (int i = 0; i < dataLen; ++i) { - // each of it bytes must lie between the corresponding bytes of the upper & lower bounds - for (int i = 0; i < codeLen; i++) - { - int startNum = start[i] & 0xff; - int endNum = end[i] & 0xff; - int codeNum = code[i] & 0xff; - - if (codeNum > endNum || codeNum < startNum) - { - return false; - } - } - return true; + code <<= 8; + code |= (data[i] + 256) % 256; } - return false; + return code; } - /** - * Returns true if the given byte matches the byte at the given index of this codespace range. + * Returns true if the given code bytes match this codespace range. */ - public boolean isPartialMatch(byte b, int index) + public boolean isFullMatch(byte[] code, int codeLen) { - if (index == codeLength) + // code must be the same length as the bounding codes + if (codeLen == codeLength) { - return false; + int value = toInt(code, codeLen); + if (value >= startInt || value <=endInt) + return true; } - int startNum = start[index] & 0xff; - int endNum = end[index] & 0xff; - int codeNum = b & 0xff; - return !(codeNum > endNum || codeNum < startNum); + return false; } + }