Author: ssteiner Date: Fri Mar 9 11:08:02 2018 New Revision: 1826330 URL: http://svn.apache.org/viewvc?rev=1826330&view=rev Log: FOP-1969: Support for unicode Surrogate pairs
Added: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/fonts/CIDSubsetTestCase.java (with props) xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/fonts/FontSelectorTestCase.java (with props) xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/render/java2d/Java2DUtilTestCase.java (with props) xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/util/CharUtilitiesTestCase.java (with props) xmlgraphics/fop/branches/Temp_SurrogatePairs/fop/test/layoutengine/hyphenation-testcases/block_hyphenation_kerning_non_bmp.xml (with props) xmlgraphics/fop/branches/Temp_SurrogatePairs/fop/test/resources/fonts/ttf/Aegean600.LICENSE (with props) xmlgraphics/fop/branches/Temp_SurrogatePairs/fop/test/resources/fonts/ttf/Aegean600.ttf (with props) xmlgraphics/fop/branches/Temp_SurrogatePairs/fop/test/resources/fonts/ttf/AndroidEmoji.LICENSE (with props) xmlgraphics/fop/branches/Temp_SurrogatePairs/fop/test/resources/fonts/ttf/AndroidEmoji.ttf (with props) xmlgraphics/fop/branches/Temp_SurrogatePairs/fop/test/xml/pdf-encoding/test-custom-non-bmp-font.fo (with props) Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/pom.xml xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/complexscripts/util/GlyphSequence.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDFont.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDFull.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDSet.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDSubset.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/Font.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/FontSelector.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/GlyphMapping.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/MultiByteFont.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/truetype/OFMtxEntry.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/truetype/OpenFont.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/layoutmgr/inline/TextLayoutManager.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/pdf/PDFText.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/pdf/PDFTextUtil.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/pdf/PDFToUnicodeCMap.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/CustomFontMetricsMapper.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/Java2DPainter.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/Java2DRenderer.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/Java2DUtil.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/pcl/fonts/truetype/PCLTTFFontReader.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/pdf/PDFPainter.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/ps/PSPainter.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/util/CharUtilities.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/util/HexEncoder.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/complexscripts/bidi/BidiTestData.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/complexscripts/scripts/arabic/ArabicWordFormsTestCase.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/complexscripts/scripts/arabic/GenerateArabicTestData.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/fonts/CIDFullTestCase.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/fonts/truetype/TTFFileTestCase.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/render/pdf/PDFEncodingTestCase.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/render/pdf/PDFPainterTestCase.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/render/ps/PSPainterTestCase.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/util/HexEncoderTestCase.java xmlgraphics/fop/branches/Temp_SurrogatePairs/fop/build.xml xmlgraphics/fop/branches/Temp_SurrogatePairs/fop/test/xml/pdf-encoding/pdf-encoding-test.xconf xmlgraphics/fop/branches/Temp_SurrogatePairs/fop/test/xml/pdf-encoding/test-custom-font.fo Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/pom.xml URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/pom.xml?rev=1826330&r1=1826329&r2=1826330&view=diff ============================================================================== --- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/pom.xml (original) +++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/pom.xml Fri Mar 9 11:08:02 2018 @@ -137,6 +137,12 @@ <version>${xmlunit.version}</version> <scope>test</scope> </dependency> + <dependency> + <groupId>org.apache.pdfbox</groupId> + <artifactId>pdfbox</artifactId> + <version>2.0.3</version> + <scope>test</scope> + </dependency> </dependencies> <build> @@ -308,6 +314,7 @@ <headerLocation>${project.baseUri}src/tools/resources/checkstyle/LICENSE.txt</headerLocation> <includeResources>false</includeResources> <includeTestResources>false</includeTestResources> + <includeTestSourceDirectory>true</includeTestSourceDirectory> <linkXRef>false</linkXRef> <logViolationsToConsole>true</logViolationsToConsole> <suppressionsLocation>${project.baseUri}src/tools/resources/checkstyle/suppressions.xml</suppressionsLocation> Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/complexscripts/util/GlyphSequence.java URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/complexscripts/util/GlyphSequence.java?rev=1826330&r1=1826329&r2=1826330&view=diff ============================================================================== --- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/complexscripts/util/GlyphSequence.java (original) +++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/complexscripts/util/GlyphSequence.java Fri Mar 9 11:08:02 2018 @@ -147,6 +147,8 @@ public class GlyphSequence implements Cl /** * Obtain the number of characters in character array, where * each character constitutes a unicode scalar value. + * NB: Supplementary characters (non-BMP code points) count as 1 + * character, not as two UTF-16 code units. * @return number of characters available in character array */ public int getCharacterCount() { @@ -154,6 +156,21 @@ public class GlyphSequence implements Cl } /** + * Obtain the number of characters in character array, where + * each character constitutes a UTF-16 character. This means + * that every non-BMP character is counted as 2 characters. + * @return number of chars (UTF-16 code units) available in + * character array + */ + public int getUTF16CharacterCount() { + int count = 0; + for (int ch : characters.array()) { + count += Character.charCount(ch); + } + return count; + } + + /** * Obtain glyph id at specified index. * @param index to obtain glyph * @return the glyph identifier of glyph at specified index Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDFont.java URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDFont.java?rev=1826330&r1=1826329&r2=1826330&view=diff ============================================================================== --- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDFont.java (original) +++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDFont.java Fri Mar 9 11:08:02 2018 @@ -71,6 +71,20 @@ public abstract class CIDFont extends Cu */ public abstract CIDSet getCIDSet(); + /** + * Determines whether this font contains a particular code point/glyph. + * @param cp character to check + * @return True if the character is supported, False otherwise + */ + public abstract boolean hasCodePoint(int cp); + + /** + * Map a Unicode code point to a code point in the font. + * @param cp code point to map + * @return the mapped code point + */ + public abstract int mapCodePoint(int cp); + // ---- Optional ---- /** * Returns the default width for this font. Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDFull.java URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDFull.java?rev=1826330&r1=1826329&r2=1826330&view=diff ============================================================================== --- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDFull.java (original) +++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDFull.java Fri Mar 9 11:08:02 2018 @@ -69,10 +69,10 @@ public class CIDFull implements CIDSet { } /** {@inheritDoc} */ - public char getUnicode(int index) { + public int getUnicode(int index) { initGlyphIndices(); if (glyphIndices.get(index)) { - return (char) index; + return index; } else { return CharUtilities.NOT_A_CHARACTER; } @@ -80,7 +80,12 @@ public class CIDFull implements CIDSet { /** {@inheritDoc} */ public int mapChar(int glyphIndex, char unicode) { - return (char) glyphIndex; + return glyphIndex; + } + + /** {@inheritDoc} */ + public int mapCodePoint(int glyphIndex, int codePoint) { + return glyphIndex; } /** {@inheritDoc} */ Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDSet.java URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDSet.java?rev=1826330&r1=1826329&r2=1826330&view=diff ============================================================================== --- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDSet.java (original) +++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDSet.java Fri Mar 9 11:08:02 2018 @@ -41,7 +41,7 @@ public interface CIDSet { * @param index the subset index (character selector) * @return the Unicode value or "NOT A CHARACTER" (0xFFFF) */ - char getUnicode(int index); + int getUnicode(int index); /** * Gets the unicode character from the original font glyph index @@ -68,6 +68,16 @@ public interface CIDSet { int mapChar(int glyphIndex, char unicode); /** + * Maps a character to a character selector for a font subset. If the character isn't in the + * subset yet, it is added and a new character selector returned. Otherwise, the already + * allocated character selector is returned from the existing map/subset. + * @param glyphIndex the glyph index of the character + * @param codePoint the Unicode index of the character + * @return the subset index + */ + int mapCodePoint(int glyphIndex, int codePoint); + + /** * Returns an unmodifiable Map of the font subset. It maps from glyph index to * character selector (i.e. the subset index in this case). * @return Map Map<Integer, Integer> of the font subset Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDSubset.java URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDSubset.java?rev=1826330&r1=1826329&r2=1826330&view=diff ============================================================================== --- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDSubset.java (original) +++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/CIDSubset.java Fri Mar 9 11:08:02 2018 @@ -52,12 +52,12 @@ public class CIDSubset implements CIDSet /** * usedCharsIndex contains new glyph, original char (char selector -> Unicode) */ - private Map<Integer, Character> usedCharsIndex = new HashMap<Integer, Character>(); + private Map<Integer, Integer> usedCharsIndex = new HashMap<Integer, Integer>(); /** * A map between the original character and it's GID in the original font. */ - private Map<Character, Integer> charToGIDs = new HashMap<Character, Integer>(); + private Map<Integer, Integer> charToGIDs = new HashMap<Integer, Integer>(); private final MultiByteFont font; @@ -81,8 +81,8 @@ public class CIDSubset implements CIDSet } /** {@inheritDoc} */ - public char getUnicode(int index) { - Character mapValue = usedCharsIndex.get(index); + public int getUnicode(int index) { + Integer mapValue = usedCharsIndex.get(index); if (mapValue != null) { return mapValue; } else { @@ -92,6 +92,11 @@ public class CIDSubset implements CIDSet /** {@inheritDoc} */ public int mapChar(int glyphIndex, char unicode) { + return mapCodePoint(glyphIndex, unicode); + } + + /** {@inheritDoc} */ + public int mapCodePoint(int glyphIndex, int codePoint) { // Reencode to a new subset font or get the reencoded value // IOW, accumulate the accessed characters and build a character map for them Integer subsetCharSelector = usedGlyphs.get(glyphIndex); @@ -99,8 +104,8 @@ public class CIDSubset implements CIDSet int selector = usedGlyphsCount; usedGlyphs.put(glyphIndex, selector); usedGlyphsIndex.put(selector, glyphIndex); - usedCharsIndex.put(selector, unicode); - charToGIDs.put(unicode, glyphIndex); + usedCharsIndex.put(selector, codePoint); + charToGIDs.put(codePoint, glyphIndex); usedGlyphsCount++; return selector; } else { @@ -115,22 +120,28 @@ public class CIDSubset implements CIDSet /** {@inheritDoc} */ public char getUnicodeFromGID(int glyphIndex) { + // TODO this method is never called in the MultiByte font path. + // This is why we can safely cast the value of usedCharsIndex.get(selector) + // to int . BTW is a question if it should be changed to int as getUnicode + // or left like this. int selector = usedGlyphs.get(glyphIndex); - return usedCharsIndex.get(selector); + return (char) usedCharsIndex.get(selector).intValue(); } /** {@inheritDoc} */ public int getGIDFromChar(char ch) { - return charToGIDs.get(ch); + return charToGIDs.get((int) ch); } /** {@inheritDoc} */ public char[] getChars() { - char[] charArray = new char[usedGlyphsCount]; + StringBuilder buf = new StringBuilder(); + for (int i = 0; i < usedGlyphsCount; i++) { - charArray[i] = getUnicode(i); + buf.appendCodePoint(getUnicode(i)); } - return charArray; + + return buf.toString().toCharArray(); } /** {@inheritDoc} */ Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/Font.java URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/Font.java?rev=1826330&r1=1826329&r2=1826330&view=diff ============================================================================== --- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/Font.java (original) +++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/Font.java Fri Mar 9 11:08:02 2018 @@ -28,6 +28,8 @@ import org.apache.commons.logging.LogFac import org.apache.fop.complexscripts.fonts.Positionable; import org.apache.fop.complexscripts.fonts.Substitutable; +import org.apache.fop.render.java2d.CustomFontMetricsMapper; +import org.apache.fop.util.CharUtilities; /** * This class holds font state information and provides access to the font @@ -194,10 +196,17 @@ public class Font implements Substitutab * @param ch2 second character * @return the distance to adjust for kerning, 0 if there's no kerning */ - public int getKernValue(char ch1, char ch2) { - Map<Integer, Integer> kernPair = getKerning().get((int) ch1); + public int getKernValue(int ch1, int ch2) { + // Isolate surrogate pair + if ((ch1 >= 0xD800) && (ch1 <= 0xE000)) { + return 0; + } else if ((ch2 >= 0xD800) && (ch2 <= 0xE000)) { + return 0; + } + + Map<Integer, Integer> kernPair = getKerning().get(ch1); if (kernPair != null) { - Integer width = kernPair.get((int) ch2); + Integer width = kernPair.get(ch2); if (width != null) { return width * getFontSize() / 1000; } @@ -206,30 +215,6 @@ public class Font implements Substitutab } /** - * Returns the amount of kerning between two characters. - * - * The value returned measures in pt. So it is already adjusted for font size. - * - * @param ch1 first character - * @param ch2 second character - * @return the distance to adjust for kerning, 0 if there's no kerning - */ - public int getKernValue(int ch1, int ch2) { - // TODO !BMP - if (ch1 > 0x10000) { - return 0; - } else if ((ch1 >= 0xD800) && (ch1 <= 0xE000)) { - return 0; - } else if (ch2 > 0x10000) { - return 0; - } else if ((ch2 >= 0xD800) && (ch2 <= 0xE000)) { - return 0; - } else { - return getKernValue((char) ch1, (char) ch2); - } - } - - /** * Returns the width of a character * @param charnum character to look up * @return width of the character @@ -264,9 +249,29 @@ public class Font implements Substitutab } /** + * Map a unicode code point to a font character. + * Default uses CodePointMapping. + * @param cp code point to map + * @return the mapped character + */ + public int mapCodePoint(int cp) { + FontMetrics fontMetrics = getRealFontMetrics(); + + if (fontMetrics instanceof CIDFont) { + return ((CIDFont) fontMetrics).mapCodePoint(cp); + } + + if (CharUtilities.isBmpCodePoint(cp)) { + return mapChar((char) cp); + } + + return Typeface.NOT_FOUND; + } + + /** * Determines whether this font contains a particular character/glyph. * @param c character to check - * @return True if the character is supported, Falso otherwise + * @return True if the character is supported, False otherwise */ public boolean hasChar(char c) { if (metric instanceof org.apache.fop.fonts.Typeface) { @@ -278,6 +283,45 @@ public class Font implements Substitutab } /** + * Determines whether this font contains a particular code point/glyph. + * @param cp code point to check + * @return True if the code point is supported, False otherwise + */ + public boolean hasCodePoint(int cp) { + FontMetrics realFont = getRealFontMetrics(); + + if (realFont instanceof CIDFont) { + return ((CIDFont) realFont).hasCodePoint(cp); + } + + if (CharUtilities.isBmpCodePoint(cp)) { + return hasChar((char) cp); + } + + return false; + } + + /** + * Get the real underlying font if it is wrapped inside some container such as a {@link LazyFont} or a + * {@link CustomFontMetricsMapper}. + * + * @return instance of the font + */ + private FontMetrics getRealFontMetrics() { + FontMetrics realFontMetrics = metric; + + if (realFontMetrics instanceof CustomFontMetricsMapper) { + realFontMetrics = ((CustomFontMetricsMapper) realFontMetrics).getRealFont(); + } + + if (realFontMetrics instanceof LazyFont) { + return ((LazyFont) realFontMetrics).getRealFont(); + } + + return realFontMetrics; + } + + /** * {@inheritDoc} */ @Override @@ -380,10 +424,14 @@ public class Font implements Substitutab public int getCharWidth(int c) { if (c < 0x10000) { return getCharWidth((char) c); - } else { - // TODO !BMP - return -1; } + + if (hasCodePoint(c)) { + int mappedChar = mapCodePoint(c); + return getWidth(mappedChar); + } + + return -1; } /** Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/FontSelector.java URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/FontSelector.java?rev=1826330&r1=1826329&r2=1826330&view=diff ============================================================================== --- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/FontSelector.java (original) +++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/FontSelector.java Fri Mar 9 11:08:02 2018 @@ -24,6 +24,7 @@ import org.apache.fop.fo.FONode; import org.apache.fop.fo.FOText; import org.apache.fop.fo.flow.Character; import org.apache.fop.fo.properties.CommonFont; +import org.apache.fop.util.CharUtilities; /** * Helper class for automatic font selection. @@ -115,14 +116,18 @@ public final class FontSelector { final Font font = fi.getFontInstance(fontkeys[fontnum], commonFont.fontSize.getValue(context)); fonts[fontnum] = font; - for (int pos = firstIndex; pos < breakIndex; pos++) { - if (font.hasChar(charSeq.charAt(pos))) { + + int numCodePoints = 0; + for (int cp : CharUtilities.codepointsIter(charSeq, firstIndex, breakIndex)) { + numCodePoints++; + + if (font.hasCodePoint(cp)) { fontCount[fontnum]++; } } - // quick fall through if all characters can be displayed - if (fontCount[fontnum] == (breakIndex - firstIndex)) { + // quick fall through if all codepoints can be displayed + if (fontCount[fontnum] == numCodePoints) { return font; } } Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/GlyphMapping.java URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/GlyphMapping.java?rev=1826330&r1=1826329&r2=1826330&view=diff ============================================================================== --- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/GlyphMapping.java (original) +++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/GlyphMapping.java Fri Mar 9 11:08:02 2018 @@ -19,6 +19,7 @@ package org.apache.fop.fonts; +import java.util.ArrayList; import java.util.List; import org.apache.commons.logging.Log; @@ -30,6 +31,8 @@ import org.apache.fop.complexscripts.uti import org.apache.fop.traits.MinOptMax; import org.apache.fop.util.CharUtilities; +import static org.apache.fop.fonts.type1.AdobeStandardEncoding.i; + /** * Stores the mapping of a text fragment to glyphs, along with various information. */ @@ -57,7 +60,7 @@ public class GlyphMapping { MinOptMax areaIPD, boolean isHyphenated, boolean isSpace, boolean breakOppAfter, Font font, int level, int[][] gposAdjustments) { this(startIndex, endIndex, wordSpaceCount, letterSpaceCount, areaIPD, isHyphenated, - isSpace, breakOppAfter, font, level, gposAdjustments, null, null); + isSpace, breakOppAfter, font, level, gposAdjustments, null, null); } public GlyphMapping(int startIndex, int endIndex, int wordSpaceCount, int letterSpaceCount, @@ -87,11 +90,11 @@ public class GlyphMapping { GlyphMapping mapping; if (font.performsSubstitution() || font.performsPositioning()) { mapping = processWordMapping(text, startIndex, endIndex, font, - breakOpportunityChar, endsWithHyphen, level, - dontOptimizeForIdentityMapping, retainAssociations, retainControls); + breakOpportunityChar, endsWithHyphen, level, + dontOptimizeForIdentityMapping, retainAssociations, retainControls); } else { mapping = processWordNoMapping(text, startIndex, endIndex, font, - letterSpaceIPD, letterSpaceAdjustArray, precedingChar, breakOpportunityChar, endsWithHyphen, level); + letterSpaceIPD, letterSpaceAdjustArray, precedingChar, breakOpportunityChar, endsWithHyphen, level); } return mapping; } @@ -99,21 +102,20 @@ public class GlyphMapping { private static GlyphMapping processWordMapping(TextFragment text, int startIndex, int endIndex, final Font font, final char breakOpportunityChar, final boolean endsWithHyphen, int level, - boolean dontOptimizeForIdentityMapping, boolean retainAssociations, boolean retainControls) { - int e = endIndex; // end index of word in FOText character buffer + boolean dontOptimizeForIdentityMapping, boolean retainAssociations, boolean retainControls) { int nLS = 0; // # of letter spaces String script = text.getScript(); String language = text.getLanguage(); if (LOG.isDebugEnabled()) { LOG.debug("PW: [" + startIndex + "," + endIndex + "]: {" - + " +M" - + ", level = " + level - + " }"); + + " +M" + + ", level = " + level + + " }"); } // 1. extract unmapped character sequence. - CharSequence ics = text.subSequence(startIndex, e); + CharSequence ics = text.subSequence(startIndex, endIndex); // 2. if script is not specified (by FO property) or it is specified as 'auto', // then compute dominant script. @@ -126,7 +128,16 @@ public class GlyphMapping { // 3. perform mapping of chars to glyphs ... to glyphs ... to chars, retaining // associations if requested. - List associations = retainAssociations ? new java.util.ArrayList() : null; + List associations = retainAssociations ? new ArrayList() : null; + + // This is a workaround to read the ligature from the font even if the script + // does not match the one defined for the table. + // More info here: https://issues.apache.org/jira/browse/FOP-2638 + // zyyy == SCRIPT_UNDEFINED + if ("zyyy".equals(script) || "auto".equals(script)) { + script = "*"; + } + CharSequence mcs = font.performSubstitution(ics, script, language, associations, retainControls); // 4. compute glyph position adjustments on (substituted) characters. @@ -148,7 +159,11 @@ public class GlyphMapping { MinOptMax ipd = MinOptMax.ZERO; for (int i = 0, n = mcs.length(); i < n; i++) { int c = mcs.charAt(i); - // TODO !BMP + + if (CharUtilities.containsSurrogatePairAt(mcs, i)) { + c = Character.toCodePoint((char) c, mcs.charAt(++i)); + } + int w = font.getCharWidth(c); if (w < 0) { w = 0; @@ -161,7 +176,7 @@ public class GlyphMapping { // [TBD] - handle letter spacing - return new GlyphMapping(startIndex, e, 0, nLS, ipd, endsWithHyphen, false, + return new GlyphMapping(startIndex, endIndex, 0, nLS, ipd, endsWithHyphen, false, breakOpportunityChar != 0, font, level, gpa, !dontOptimizeForIdentityMapping && CharUtilities.isSameSequence(mcs, ics) ? null : mcs.toString(), associations); @@ -180,21 +195,23 @@ public class GlyphMapping { * @return glyph position adjustments (or null if no kerning) */ private static int[][] getKerningAdjustments(CharSequence mcs, final Font font, int[][] gpa) { - int nc = mcs.length(); + int numCodepoints = Character.codePointCount(mcs, 0, mcs.length()); // extract kerning array - int[] ka = new int[nc]; // kerning array - for (int i = 0, n = nc, cPrev = -1; i < n; i++) { - int c = mcs.charAt(i); - // TODO !BMP - if (cPrev >= 0) { - ka[i] = font.getKernValue(cPrev, c); + int[] kernings = new int[numCodepoints]; // kerning array + + int prevCp = -1; + int i = 0; + for (int cp : CharUtilities.codepointsIter(mcs)) { + if (prevCp >= 0) { + kernings[i] = font.getKernValue(prevCp, cp); } - cPrev = c; + prevCp = cp; + i++; } // was there a non-zero kerning? boolean hasKerning = false; - for (int i = 0, n = nc; i < n; i++) { - if (ka[i] != 0) { + for (int kerningValue : kernings) { + if (kerningValue != 0) { hasKerning = true; break; } @@ -202,11 +219,11 @@ public class GlyphMapping { // if non-zero kerning, then create and return glyph position adjustment array if (hasKerning) { if (gpa == null) { - gpa = new int[nc][4]; + gpa = new int[numCodepoints][4]; } - for (int i = 0, n = nc; i < n; i++) { + for (i = 0; i < numCodepoints; i++) { if (i > 0) { - gpa [i - 1][GlyphPositioningTable.Value.IDX_X_ADVANCE] += ka[i]; + gpa [i - 1][GlyphPositioningTable.Value.IDX_X_ADVANCE] += kernings[i]; } } return gpa; @@ -223,13 +240,14 @@ public class GlyphMapping { if (LOG.isDebugEnabled()) { LOG.debug("PW: [" + startIndex + "," + endIndex + "]: {" - + " -M" - + ", level = " + level - + " }"); + + " -M" + + ", level = " + level + + " }"); } - for (int i = startIndex; i < endIndex; i++) { - char currentChar = text.charAt(i); + CharSequence ics = text.subSequence(startIndex, endIndex); + int offset = 0; + for (int currentChar : CharUtilities.codepointsIter(ics)) { // character width int charWidth = font.getCharWidth(currentChar); @@ -238,24 +256,32 @@ public class GlyphMapping { // kerning if (kerning) { int kern = 0; - if (i > startIndex) { - char previousChar = text.charAt(i - 1); + if (offset > 0) { + int previousChar = java.lang.Character.codePointAt(ics, offset - 1); kern = font.getKernValue(previousChar, currentChar); } else if (precedingChar != 0) { kern = font.getKernValue(precedingChar, currentChar); } if (kern != 0) { - addToLetterAdjust(letterSpaceAdjustArray, i, kern); + addToLetterAdjust(letterSpaceAdjustArray, startIndex + offset, kern); wordIPD = wordIPD.plus(kern); } } + offset++; } if (kerning && (breakOpportunityChar != 0) && !isSpace(breakOpportunityChar) && endIndex > 0 && endsWithHyphen) { - int kern = font.getKernValue(text.charAt(endIndex - 1), breakOpportunityChar); + int endChar = text.charAt(endIndex - 1); + + if (java.lang.Character.isLowSurrogate((char) endChar)) { + char highSurrogate = text.charAt(endIndex - 2); + endChar = java.lang.Character.toCodePoint(highSurrogate, (char) endChar); + } + + int kern = font.getKernValue(endChar, (int) breakOpportunityChar); if (kern != 0) { addToLetterAdjust(letterSpaceAdjustArray, endIndex, kern); // TODO: add kern to wordIPD? Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/MultiByteFont.java URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/MultiByteFont.java?rev=1826330&r1=1826329&r2=1826330&view=diff ============================================================================== --- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/MultiByteFont.java (original) +++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/MultiByteFont.java Fri Mar 9 11:08:02 2018 @@ -23,6 +23,7 @@ import java.awt.Rectangle; import java.io.InputStream; import java.nio.CharBuffer; import java.nio.IntBuffer; +import java.util.ArrayList; import java.util.BitSet; import java.util.LinkedHashMap; import java.util.List; @@ -379,8 +380,36 @@ public class MultiByteFont extends CIDFo /** {@inheritDoc} */ @Override + public int mapCodePoint(int cp) { + notifyMapOperation(); + int glyphIndex = findGlyphIndex(cp); + if (glyphIndex == SingleByteEncoding.NOT_FOUND_CODE_POINT) { + + for (char ch : Character.toChars(cp)) { + //TODO better handling for non BMP + warnMissingGlyph(ch); + } + + if (!isOTFFile) { + glyphIndex = findGlyphIndex(Typeface.NOT_FOUND); + } + } + if (isEmbeddable()) { + glyphIndex = cidSet.mapCodePoint(glyphIndex, cp); + } + return (char) glyphIndex; + } + + /** {@inheritDoc} */ + @Override public boolean hasChar(char c) { - return (findGlyphIndex(c) != SingleByteEncoding.NOT_FOUND_CODE_POINT); + return hasCodePoint(c); + } + + /** {@inheritDoc} */ + @Override + public boolean hasCodePoint(int cp) { + return (findGlyphIndex(cp) != SingleByteEncoding.NOT_FOUND_CODE_POINT); } /** @@ -528,6 +557,8 @@ public class MultiByteFont extends CIDFo if (!retainControls) { ogs = elideControls(ogs); } + // ocs may not contains all the characters that were in cs. + // see: #createPrivateUseMapping(int gi) CharSequence ocs = mapGlyphsToChars(ogs); return ocs; } else { @@ -664,8 +695,9 @@ public class MultiByteFont extends CIDFo */ private CharSequence mapGlyphsToChars(GlyphSequence gs) { int ng = gs.getGlyphCount(); - CharBuffer cb = CharBuffer.allocate(ng); int ccMissing = Typeface.NOT_FOUND; + List<Character> chars = new ArrayList<Character>(gs.getUTF16CharacterCount()); + for (int i = 0, n = ng; i < n; i++) { int gi = gs.getGlyph(i); int cc = findCharacterFromGlyphIndex(gi); @@ -682,12 +714,19 @@ public class MultiByteFont extends CIDFo cc -= 0x10000; sh = ((cc >> 10) & 0x3FF) + 0xD800; sl = ((cc >> 0) & 0x3FF) + 0xDC00; - cb.put((char) sh); - cb.put((char) sl); + chars.add((char) sh); + chars.add((char) sl); } else { - cb.put((char) cc); + chars.add((char) cc); } } + + CharBuffer cb = CharBuffer.allocate(chars.size()); + + for (char c : chars) { + cb.put(c); + } + cb.flip(); return cb; } @@ -723,6 +762,14 @@ public class MultiByteFont extends CIDFo return sb; } + /** + * Removes the glyphs associated with elidable control characters. + * All the characters in an association must be elidable in order + * to remove the corresponding glyph. + * + * @param gs GlyphSequence that may contains the elidable glyphs + * @return GlyphSequence without the elidable glyphs + */ private static GlyphSequence elideControls(GlyphSequence gs) { if (hasElidableControl(gs)) { int[] ca = gs.getCharacterArray(false); @@ -734,13 +781,15 @@ public class MultiByteFont extends CIDFo int e = a.getEnd(); while (s < e) { int ch = ca [ s ]; - if (isElidableControl(ch)) { + if (!isElidableControl(ch)) { break; } else { ++s; } } - if (s == e) { + // If there is at least one non-elidable character in the char + // sequence then the glyph/association is kept. + if (s != e) { ngb.put(gs.getGlyph(i)); nal.add(a); } Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/truetype/OFMtxEntry.java URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/truetype/OFMtxEntry.java?rev=1826330&r1=1826329&r2=1826330&view=diff ============================================================================== --- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/truetype/OFMtxEntry.java (original) +++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/truetype/OFMtxEntry.java Fri Mar 9 11:08:02 2018 @@ -19,6 +19,7 @@ package org.apache.fop.fonts.truetype; +import java.util.ArrayList; import java.util.List; /** @@ -30,7 +31,7 @@ public class OFMtxEntry { private int lsb; private String name = ""; private int index; - private List unicodeIndex = new java.util.ArrayList(); + private List<Integer> unicodeIndex = new ArrayList<Integer>(); private int[] boundingBox = new int[4]; private long offset; private byte found; @@ -131,7 +132,7 @@ public class OFMtxEntry { * Returns the unicodeIndex. * @return List */ - public List getUnicodeIndex() { + public List<Integer> getUnicodeIndex() { return unicodeIndex; } Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/truetype/OpenFont.java URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/truetype/OpenFont.java?rev=1826330&r1=1826329&r2=1826330&view=diff ============================================================================== --- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/truetype/OpenFont.java (original) +++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/fonts/truetype/OpenFont.java Fri Mar 9 11:08:02 2018 @@ -390,6 +390,10 @@ public abstract class OpenFont { * tables are present. Currently only unicode cmaps are supported. * Set the unicodeIndex in the TTFMtxEntries and fills in the * cmaps vector. + * + * @see <a href="https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6cmap.html"> + * TrueType-Reference-Manual + * </a> */ protected boolean readCMAP() throws IOException { @@ -401,6 +405,7 @@ public abstract class OpenFont { int numCMap = fontFile.readTTFUShort(); // Number of cmap subtables long cmapUniOffset = 0; long symbolMapOffset = 0; + long surrogateMapOffset = 0; if (log.isDebugEnabled()) { log.debug(numCMap + " cmap tables"); @@ -422,9 +427,15 @@ public abstract class OpenFont { if (cmapPID == 3 && cmapEID == 0) { symbolMapOffset = cmapOffset; } + if (cmapPID == 3 && cmapEID == 10) { + surrogateMapOffset = cmapOffset; + } } - if (cmapUniOffset > 0) { + if (surrogateMapOffset > 0) { + // TODO maybe for SingleByte fonts instances we should not reach this branch + return readUnicodeCmap(surrogateMapOffset, 10); + } else if (cmapUniOffset > 0) { return readUnicodeCmap(cmapUniOffset, 1); } else if (symbolMapOffset > 0) { return readUnicodeCmap(symbolMapOffset, 0); @@ -443,14 +454,21 @@ public abstract class OpenFont { // Read unicode cmap seekTab(fontFile, OFTableName.CMAP, cmapUniOffset); int cmapFormat = fontFile.readTTFUShort(); - /*int cmap_length =*/ fontFile.readTTFUShort(); //skip cmap length + + if (cmapFormat < 8) { + fontFile.readTTFUShort(); //skip cmap length + fontFile.readTTFUShort(); //skip cmap version + } else { + fontFile.readTTFUShort(); //skip 2 bytes to read a Fixed32 + fontFile.readTTFULong(); //skip cmap length + fontFile.readTTFULong(); //skip cmap version + } if (log.isDebugEnabled()) { log.debug("CMAP format: " + cmapFormat); } if (cmapFormat == 4) { - fontFile.skip(2); // Skip version number int cmapSegCountX2 = fontFile.readTTFUShort(); int cmapSearchRange = fontFile.readTTFUShort(); int cmapEntrySelector = fontFile.readTTFUShort(); @@ -615,6 +633,90 @@ public abstract class OpenFont { } } } + } + } else if (cmapFormat == 12) { + long nGroups = fontFile.readTTFULong(); + + for (long i = 0; i < nGroups; ++i) { + long startCharCode = fontFile.readTTFULong(); + long endCharCode = fontFile.readTTFULong(); + long startGlyphCode = fontFile.readTTFULong(); + + if (startCharCode < 0 || startCharCode > 0x10FFFFL) { + log.warn("startCharCode outside Unicode range"); + continue; + } + + if (startCharCode >= 0xD800 && startCharCode <= 0xDFFF) { + log.warn("startCharCode is a surrogate pair: " + startCharCode); + } + + //endCharCode outside unicode range or is surrogate pair. + if (endCharCode > 0 && endCharCode < startCharCode || endCharCode > 0x10FFFFL) { + log.warn("startCharCode outside Unicode range"); + continue; + } + + if (endCharCode >= 0xD800 && endCharCode <= 0xDFFF) { + log.warn("endCharCode is a surrogate pair: " + startCharCode); + } + + for (long offset = 0; offset <= endCharCode - startCharCode; ++offset) { + long glyphIndexL = startGlyphCode + offset; + long charCodeL = startCharCode + offset; + + if (glyphIndexL >= numberOfGlyphs) { + log.warn("Format 12 cmap contains an invalid glyph index"); + break; + } + + if (charCodeL > 0x10FFFFL) { + log.warn("Format 12 cmap contains character beyond UCS-4"); + } + + if (glyphIndexL > Integer.MAX_VALUE) { + log.error("glyphIndex > Integer.MAX_VALUE"); + continue; + } + + if (charCodeL > Integer.MAX_VALUE) { + log.error("startCharCode + j > Integer.MAX_VALUE"); + continue; + } + + // Update lastChar + if (charCodeL < 0xFF && charCodeL > lastChar) { + lastChar = (short) charCodeL; + } + + int charCode = (int) charCodeL; + int glyphIndex = (int) glyphIndexL; + + // Also add winAnsiWidth. + List<Integer> ansiIndexes = null; + + if (charCodeL <= java.lang.Character.MAX_VALUE) { + ansiIndexes = ansiIndex.get((int) charCodeL); + } + + unicodeMappings.add(new UnicodeMapping(this, glyphIndex, charCode)); + mtxTab[glyphIndex].getUnicodeIndex().add(charCode); + + if (ansiIndexes == null) { + continue; + } + + for (Integer aIdx : ansiIndexes) { + ansiWidth[aIdx] = mtxTab[glyphIndex].getWx(); + + if (log.isTraceEnabled()) { + log.trace("Added width " + + mtxTab[glyphIndex].getWx() + + " uni: " + offset + + " ansi: " + aIdx); + } + } + } } } else { log.error("Cmap format not supported: " + cmapFormat); Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/layoutmgr/inline/TextLayoutManager.java URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/layoutmgr/inline/TextLayoutManager.java?rev=1826330&r1=1826329&r2=1826330&view=diff ============================================================================== --- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/layoutmgr/inline/TextLayoutManager.java (original) +++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/layoutmgr/inline/TextLayoutManager.java Fri Mar 9 11:08:02 2018 @@ -1023,8 +1023,10 @@ public class TextLayoutManager extends L //log.info("Word: " + new String(textArray, startIndex, stopIndex - startIndex)); for (int i = startIndex; i < stopIndex; i++) { - char ch = foText.charAt(i); - newIPD = newIPD.plus(font.getCharWidth(ch)); + int cp = Character.codePointAt(foText, i); + i += Character.charCount(cp) - 1; + + newIPD = newIPD.plus(font.getCharWidth(cp)); //if (i > startIndex) { if (i < stopIndex) { MinOptMax letterSpaceAdjust = letterSpaceAdjustArray[i + 1]; Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/pdf/PDFText.java URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/pdf/PDFText.java?rev=1826330&r1=1826329&r2=1826330&view=diff ============================================================================== --- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/pdf/PDFText.java (original) +++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/pdf/PDFText.java Fri Mar 9 11:08:02 2018 @@ -21,6 +21,10 @@ package org.apache.fop.pdf; import java.io.ByteArrayOutputStream; +import java.util.Locale; + +import org.apache.fop.util.CharUtilities; + /** * This class represents a simple number object. It also contains contains some * utility methods for outputting numbers to PDF. @@ -205,13 +209,19 @@ public class PDFText extends PDFObject { /** * Convert a char to a multibyte hex representation appending to string buffer. - * Since Java always stores strings in UTF-16, we don't have to do any conversion. + * The created string will be: + * <ul> + * <li>4-character string in case of non-BMP character</li> + * <li>6-character string in case of BMP character</li> + * </ul> * @param c character to encode * @param sb the string buffer to append output */ - public static final void toUnicodeHex(char c, StringBuffer sb) { - for (int i = 0; i < 4; ++i) { - sb.append(DIGITS[(c >> (12 - 4 * i)) & 0x0F]); + public static final void toUnicodeHex(int c, StringBuffer sb) { + if (CharUtilities.isBmpCodePoint(c)) { + sb.append(Integer.toHexString(c + 0x10000).substring(1).toUpperCase(Locale.US)); + } else { + sb.append(Integer.toHexString(c + 0x1000000).substring(1).toUpperCase(Locale.US)); } } Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/pdf/PDFTextUtil.java URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/pdf/PDFTextUtil.java?rev=1826330&r1=1826329&r2=1826330&view=diff ============================================================================== --- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/pdf/PDFTextUtil.java (original) +++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/pdf/PDFTextUtil.java Fri Mar 9 11:08:02 2018 @@ -93,12 +93,12 @@ public abstract class PDFTextUtil { PDFNumber.doubleOut(lt[5], DEC, sb); } - private static void writeChar(char ch, StringBuffer sb, boolean multibyte, boolean cid) { + private static void writeChar(int codePoint, StringBuffer sb, boolean multibyte, boolean cid) { if (!multibyte) { - if (cid || ch < 32 || ch > 127) { - sb.append("\\").append(Integer.toOctalString(ch)); + if (cid || codePoint < 32 || codePoint > 127) { + sb.append("\\").append(Integer.toOctalString(codePoint)); } else { - switch (ch) { + switch (codePoint) { case '(': case ')': case '\\': @@ -106,15 +106,15 @@ public abstract class PDFTextUtil { break; default: } - sb.append(ch); + sb.appendCodePoint(codePoint); } } else { - PDFText.toUnicodeHex(ch, sb); + PDFText.toUnicodeHex(codePoint, sb); } } - private void writeChar(char ch, StringBuffer sb) { - writeChar(ch, sb, useMultiByte, useCid); + private void writeChar(int codePoint, StringBuffer sb) { + writeChar(codePoint, sb, useMultiByte, useCid); } private void checkInTextObject() { @@ -260,9 +260,17 @@ public abstract class PDFTextUtil { /** * Writes a char to the "TJ-Buffer". - * @param codepoint the mapped character (code point/character code) + * @param ch the mapped character (code point/character code) */ - public void writeTJMappedChar(char codepoint) { + public void writeTJMappedChar(char ch) { + writeTJMappedCodePoint((int) ch); + } + + /** + * Writes a codepoint to the "TJ-Buffer". + * @param codePoint the mapped character (code point/character code) + */ + public void writeTJMappedCodePoint(int codePoint) { if (bufTJ == null) { bufTJ = new StringBuffer(); } @@ -270,7 +278,7 @@ public abstract class PDFTextUtil { bufTJ.append('['); bufTJ.append(startText); } - writeChar(codepoint, bufTJ); + writeChar(codePoint, bufTJ); } /** Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/pdf/PDFToUnicodeCMap.java URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/pdf/PDFToUnicodeCMap.java?rev=1826330&r1=1826329&r2=1826330&view=diff ============================================================================== --- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/pdf/PDFToUnicodeCMap.java (original) +++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/pdf/PDFToUnicodeCMap.java Fri Mar 9 11:08:02 2018 @@ -129,8 +129,17 @@ public class PDFToUnicodeCMap extends PD charIndex++; } writer.write("<" + padCharIndex(charIndex) + "> "); - writer.write("<" + padHexString(Integer.toHexString(charArray[charIndex]), 4) - + ">\n"); + + if (Character.codePointAt(charArray, charIndex) > 0xFFFF) { + // Handle UTF-16 surrogate pairs + String pairs = Integer.toHexString(charArray[charIndex]) + + Integer.toHexString(charArray[++charIndex]); + writer.write("<" + pairs + ">\n"); + i++; + } else { + writer.write("<" + padHexString(Integer.toHexString(charArray[charIndex]), 4) + + ">\n"); + } charIndex++; } remainingEntries -= entriesThisSection; Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/CustomFontMetricsMapper.java URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/CustomFontMetricsMapper.java?rev=1826330&r1=1826329&r2=1826330&view=diff ============================================================================== --- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/CustomFontMetricsMapper.java (original) +++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/CustomFontMetricsMapper.java Fri Mar 9 11:08:02 2018 @@ -221,6 +221,11 @@ public class CustomFontMetricsMapper ext return typeface.hasKerningInfo(); } + /** {@inheritDoc} */ + public boolean isMultiByte() { + return typeface.isMultiByte(); + } + /** * {@inheritDoc} */ Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/Java2DPainter.java URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/Java2DPainter.java?rev=1826330&r1=1826329&r2=1826330&view=diff ============================================================================== --- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/Java2DPainter.java (original) +++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/Java2DPainter.java Fri Mar 9 11:08:02 2018 @@ -239,7 +239,7 @@ public class Java2DPainter extends Abstr g2dState.updateFont(font.getFontName(), state.getFontSize() * 1000); Graphics2D g2d = this.g2dState.getGraph(); - GlyphVector gv = g2d.getFont().createGlyphVector(g2d.getFontRenderContext(), text); + GlyphVector gv = Java2DUtil.createGlyphVector(text, g2d, font, fontInfo); Point2D cursor = new Point2D.Float(0, 0); int l = text.length(); @@ -248,8 +248,17 @@ public class Java2DPainter extends Abstr cursor.setLocation(cursor.getX() + dp[0][0], cursor.getY() - dp[0][1]); gv.setGlyphPosition(0, cursor); } + + int currentIdx = 0; for (int i = 0; i < l; i++) { - char orgChar = text.charAt(i); + int orgChar = text.codePointAt(i); + // The dp (GPOS/kerning adjustment) is performed over glyphs and not + // characters (GlyphMapping.processWordMapping). The length of dp is + // adjusted later to fit the length of the String adding trailing 0. + // This means that it's probably ok to consume one of the 2 surrogate + // pairs. + i += CharUtilities.incrementIfNonBMP(orgChar); + float xGlyphAdjust = 0; float yGlyphAdjust = 0; int cw = font.getCharWidth(orgChar); @@ -268,7 +277,7 @@ public class Java2DPainter extends Abstr } cursor.setLocation(cursor.getX() + cw + xGlyphAdjust, cursor.getY() - yGlyphAdjust); - gv.setGlyphPosition(i + 1, cursor); + gv.setGlyphPosition(++currentIdx, cursor); } g2d.drawGlyphVector(gv, x, y); } @@ -289,6 +298,4 @@ public class Java2DPainter extends Abstr g2dState.transform(transform); } - - } Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/Java2DRenderer.java URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/Java2DRenderer.java?rev=1826330&r1=1826329&r2=1826330&view=diff ============================================================================== --- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/Java2DRenderer.java (original) +++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/Java2DRenderer.java Fri Mar 9 11:08:02 2018 @@ -732,7 +732,7 @@ public abstract class Java2DRenderer ext AffineTransform at = new AffineTransform(); at.translate(rx / 1000f, bl / 1000f); state.transform(at); - renderText(text, state.getGraph(), font); + renderText(text, state.getGraph(), font, fontInfo); restoreGraphicsState(); currentIPPosition = saveIP + text.getAllocIPD(); @@ -750,8 +750,9 @@ public abstract class Java2DRenderer ext * @param text the TextArea * @param g2d the Graphics2D to render to * @param font the font to paint with + * @param fontInfo the font information */ - public static void renderText(TextArea text, Graphics2D g2d, Font font) { + public static void renderText(TextArea text, Graphics2D g2d, Font font, FontInfo fontInfo) { Color col = (Color) text.getTrait(Trait.COLOR); g2d.setColor(col); @@ -763,7 +764,7 @@ public abstract class Java2DRenderer ext WordArea word = (WordArea) child; String s = word.getWord(); int[] letterAdjust = word.getLetterAdjustArray(); - GlyphVector gv = g2d.getFont().createGlyphVector(g2d.getFontRenderContext(), s); + GlyphVector gv = Java2DUtil.createGlyphVector(s, g2d, font, fontInfo); double additionalWidth = 0.0; if (letterAdjust == null && text.getTextLetterSpaceAdjust() == 0 @@ -772,12 +773,21 @@ public abstract class Java2DRenderer ext } else { int[] offsets = getGlyphOffsets(s, font, text, letterAdjust); float cursor = 0.0f; - for (int i = 0; i < offsets.length; i++) { + + if (offsets.length != gv.getNumGlyphs()) { + log.error(String.format("offsets length different from glyphNumber: %d != %d", + offsets.length, gv.getNumGlyphs())); + } + + // If for any reason offsets.length != gv.getNumGlyphs() then we have to choose the minimum to avoid + // ArrayIndexOutOfBoundsException. This might happen when surrogate pairs are not correctly handled. + for (int i = 0; i < Math.min(offsets.length, gv.getNumGlyphs()); i++) { Point2D pt = gv.getGlyphPosition(i); pt.setLocation(cursor, pt.getY()); gv.setGlyphPosition(i, pt); cursor += offsets[i] / 1000f; } + additionalWidth = cursor - gv.getLogicalBounds().getWidth(); } g2d.drawGlyphVector(gv, textCursor, 0); @@ -800,11 +810,11 @@ public abstract class Java2DRenderer ext private static int[] getGlyphOffsets(String s, Font font, TextArea text, int[] letterAdjust) { - int textLen = s.length(); + int textLen = s.codePointCount(0, s.length()); int[] offsets = new int[textLen]; for (int i = 0; i < textLen; i++) { - final char c = s.charAt(i); - final char mapped = font.mapChar(c); + int c = s.codePointAt(i); + final int mapped = font.mapCodePoint(c); int wordSpace; if (CharUtilities.isAdjustableSpace(mapped)) { Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/Java2DUtil.java URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/Java2DUtil.java?rev=1826330&r1=1826329&r2=1826330&view=diff ============================================================================== --- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/Java2DUtil.java (original) +++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/java2d/Java2DUtil.java Fri Mar 9 11:08:02 2018 @@ -19,11 +19,20 @@ package org.apache.fop.render.java2d; +import java.awt.Graphics2D; +import java.awt.font.GlyphVector; +import java.util.Arrays; + import org.apache.fop.apps.FOUserAgent; +import org.apache.fop.fonts.Font; import org.apache.fop.fonts.FontCollection; import org.apache.fop.fonts.FontEventAdapter; import org.apache.fop.fonts.FontInfo; import org.apache.fop.fonts.FontManager; +import org.apache.fop.fonts.LazyFont; +import org.apache.fop.fonts.MultiByteFont; +import org.apache.fop.fonts.Typeface; +import org.apache.fop.util.CharUtilities; /** * Rendering-related utilities for Java2D. @@ -56,5 +65,84 @@ public final class Java2DUtil { return fi; } + /** + * Creates an instance of {@link GlyphVector} that correctly handle surrogate pairs and advanced font features such + * as GSUB/GPOS/GDEF. + * + * @param text Text to render + * @param g2d the target Graphics2D instance + * @param font the font instance + * @param fontInfo the font information + * @return an instance of {@link GlyphVector} + */ + public static GlyphVector createGlyphVector(String text, Graphics2D g2d, Font font, FontInfo fontInfo) { + MultiByteFont multiByteFont = getMultiByteFont(font.getFontName(), fontInfo); + + if (multiByteFont == null) { + return createGlyphVector(text, g2d); + } + + return createGlyphVectorMultiByteFont(text, g2d, multiByteFont); + } + + /** + * Creates a {@link GlyphVector} using characters. Filters out non-bmp characters. + */ + private static GlyphVector createGlyphVector(String text, Graphics2D g2d) { + StringBuilder sb = new StringBuilder(text.length()); + for (int cp : CharUtilities.codepointsIter(text)) { + // If we are here we probably do not support non-BMP codepoints + sb.appendCodePoint(cp <= 0xFFFF ? cp : Typeface.NOT_FOUND); + } + return g2d.getFont().createGlyphVector(g2d.getFontRenderContext(), sb.toString()); + } + + /** + * Creates a {@link GlyphVector} using glyph indexes instead of characters. To correctly support the advanced font + * features we have to build the GlyphVector passing the glyph indexes instead of the characters. This because some + * of the chars in text might have been replaced by an internal font representation during + * GlyphMapping.processWordMapping. Eg 'fi' replaced with the corresponding character in the font ligatures table + * (GSUB). + */ + private static GlyphVector createGlyphVectorMultiByteFont(String text, Graphics2D g2d, + MultiByteFont multiByteFont) { + int[] glyphCodes = new int[text.length()]; + int currentIdx = 0; + + for (int cp : CharUtilities.codepointsIter(text)) { + // mapChar is not working here because MultiByteFont.mapChar replaces the glyph index with + // CIDSet.mapChar when isEmbeddable == true. + glyphCodes[currentIdx++] = multiByteFont.findGlyphIndex(cp); + } + + // Trims glyphCodes + if (currentIdx != text.length()) { + glyphCodes = Arrays.copyOf(glyphCodes, currentIdx); + } + + return g2d.getFont().createGlyphVector(g2d.getFontRenderContext(), glyphCodes); + } + + /** + * Returns an instance of {@link MultiByteFont} for the given font name. This method will try to unwrap containers + * such as {@link CustomFontMetricsMapper} and {@link LazyFont} + * + * @param fontName font key + * @param fontInfo font information + * @return An instance of {@link MultiByteFont} or null if it + */ + private static MultiByteFont getMultiByteFont(String fontName, FontInfo fontInfo) { + Typeface tf = fontInfo.getFonts().get(fontName); + + if (tf instanceof CustomFontMetricsMapper) { + tf = ((CustomFontMetricsMapper) tf).getRealFont(); + } + + if (tf instanceof LazyFont) { + tf = ((LazyFont) tf).getRealFont(); + } + + return (tf instanceof MultiByteFont) ? (MultiByteFont) tf : null; + } } Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/pcl/fonts/truetype/PCLTTFFontReader.java URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/pcl/fonts/truetype/PCLTTFFontReader.java?rev=1826330&r1=1826329&r2=1826330&view=diff ============================================================================== --- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/pcl/fonts/truetype/PCLTTFFontReader.java (original) +++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/pcl/fonts/truetype/PCLTTFFontReader.java Fri Mar 9 11:08:02 2018 @@ -627,7 +627,7 @@ public class PCLTTFFontReader extends PC int nextOffset = 0; int charCode = 0; if (entry.getUnicodeIndex().size() > 0) { - charCode = (Integer) entry.getUnicodeIndex().get(0); + charCode = entry.getUnicodeIndex().get(0); } else { charCode = entry.getIndex(); } @@ -743,7 +743,7 @@ public class PCLTTFFontReader extends PC OFMtxEntry entry = mtx.get(i); int charCode = 0; if (entry.getUnicodeIndex().size() > 0) { - charCode = (Integer) entry.getUnicodeIndex().get(0); + charCode = entry.getUnicodeIndex().get(0); } else { charCode = entry.getIndex(); } Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/pdf/PDFPainter.java URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/pdf/PDFPainter.java?rev=1826330&r1=1826329&r2=1826330&view=diff ============================================================================== --- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/pdf/PDFPainter.java (original) +++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/pdf/PDFPainter.java Fri Mar 9 11:08:02 2018 @@ -479,11 +479,17 @@ public class PDFPainter extends Abstract textutil.adjustGlyphTJ(-dx[0] / fontSize); } for (int i = 0; i < l; i++) { - char orgChar = text.charAt(i); - char ch; + int orgChar = text.charAt(i); + int ch; + + // surrogate pairs have to be merged in a single code point + if (CharUtilities.containsSurrogatePairAt(text, i)) { + orgChar = Character.toCodePoint((char) orgChar, text.charAt(++i)); + } + float glyphAdjust = 0; - if (font.hasChar(orgChar)) { - ch = font.mapChar(orgChar); + if (font.hasCodePoint(orgChar)) { + ch = font.mapCodePoint(orgChar); ch = selectAndMapSingleByteFont(tf, fontName, fontSize, textutil, ch); if ((wordSpacing != 0) && CharUtilities.isAdjustableSpace(orgChar)) { glyphAdjust += wordSpacing; @@ -495,14 +501,14 @@ public class PDFPainter extends Abstract int spaceDiff = font.getCharWidth(CharUtilities.SPACE) - font.getCharWidth(orgChar); glyphAdjust = -spaceDiff; } else { - ch = font.mapChar(orgChar); + ch = font.mapCodePoint(orgChar); if ((wordSpacing != 0) && CharUtilities.isAdjustableSpace(orgChar)) { glyphAdjust += wordSpacing; } } ch = selectAndMapSingleByteFont(tf, fontName, fontSize, textutil, ch); } - textutil.writeTJMappedChar(ch); + textutil.writeTJMappedCodePoint(ch); if (dx != null && i < dxl - 1) { glyphAdjust += dx[i + 1]; @@ -551,9 +557,7 @@ public class PDFPainter extends Abstract double xd = (xo - xoLast) / 1000f; double yd = (yo - yoLast) / 1000f; tu.writeTd(xd, yd); - ch = f.mapChar(ch); - ch = selectAndMapSingleByteFont(tf, f.getFontName(), fsPoints, tu, ch); - tu.writeTj(ch, tf.isMultiByte(), true); + tu.writeTj(f.mapChar(ch), tf.isMultiByte(), true); xc += xa + pa[2]; yc += ya + pa[3]; xoLast = xo; @@ -584,8 +588,8 @@ public class PDFPainter extends Abstract } */ - private char selectAndMapSingleByteFont(Typeface tf, String fontName, float fontSize, PDFTextUtil textutil, - char ch) { + private int selectAndMapSingleByteFont(Typeface tf, String fontName, float fontSize, PDFTextUtil textutil, + int ch) { if ((tf instanceof SingleByteFont && ((SingleByteFont)tf).hasAdditionalEncodings()) || tf.isCID()) { int encoding = ch / 256; if (encoding == 0) { Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/ps/PSPainter.java URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/ps/PSPainter.java?rev=1826330&r1=1826329&r2=1826330&view=diff ============================================================================== --- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/ps/PSPainter.java (original) +++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/render/ps/PSPainter.java Fri Mar 9 11:08:02 2018 @@ -458,8 +458,8 @@ public class PSPainter extends AbstractI StringBuffer sb = new StringBuffer(initialSize); boolean isOTF = multiByte && ((MultiByteFont)tf).isOTFFile(); for (int i = start; i < end; i++) { - char orgChar = text.charAt(i); - char ch; + int orgChar = text.charAt(i); + int ch; int cw; int xGlyphAdjust = 0; int yGlyphAdjust = 0; @@ -473,8 +473,13 @@ public class PSPainter extends AbstractI if ((wordSpacing != 0) && CharUtilities.isAdjustableSpace(orgChar)) { xGlyphAdjust -= wordSpacing; } - ch = font.mapChar(orgChar); - cw = font.getCharWidth(orgChar); // this is never used? + + // surrogate pairs have to be merged in a single code point + if (CharUtilities.containsSurrogatePairAt(text, i)) { + orgChar = Character.toCodePoint((char) orgChar, text.charAt(++i)); + } + + ch = font.mapCodePoint(orgChar); } if (dp != null && i < dp.length && dp[i] != null) { Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/util/CharUtilities.java URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/util/CharUtilities.java?rev=1826330&r1=1826329&r2=1826330&view=diff ============================================================================== --- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/util/CharUtilities.java (original) +++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/util/CharUtilities.java Fri Mar 9 11:08:02 2018 @@ -19,6 +19,9 @@ package org.apache.fop.util; +import java.util.Iterator; +import java.util.NoSuchElementException; + /** * This class provides utilities to distinguish various kinds of Unicode * whitespace and to get character widths in a given FontState. @@ -354,4 +357,134 @@ public class CharUtilities { } } + /** + * Determine whether the specified character (Unicode code point) is in then Basic + * Multilingual Plane (BMP). Such code points can be represented using a single {@code char}. + * + * @see Character#isBmpCodePoint(int) from Java 1.7 + * @param codePoint the character (Unicode code point) to be tested + * @return {@code true} if the specified code point is between Character#MIN_VALUE and + * Character#MAX_VALUE} inclusive; {@code false} otherwise + */ + public static boolean isBmpCodePoint(int codePoint) { + return codePoint >>> 16 == 0; + } + + /** + * Returns 1 if codePoint not in the BMP. This function is particularly useful in for + * loops over strings where, in presence of surrogate pairs, you need to skip one loop. + * + * @param codePoint 1 if codePoint > 0xFFFF, 0 otherwise + * @return 1 if codePoint > 0xFFFF, 0 otherwise + */ + public static int incrementIfNonBMP(int codePoint) { + return isBmpCodePoint(codePoint) ? 0 : 1; + } + + /** + * Determine if the given characters is part of a surrogate pair. + * + * @param ch character to be checked + * @return true if ch is an high surrogate or a low surrogate + */ + public static boolean isSurrogatePair(char ch) { + return Character.isHighSurrogate(ch) || Character.isLowSurrogate(ch); + } + + /** + * Tells whether there is a surrogate pair starting from the given index in the {@link CharSequence}. If the + * character at index is an high surrogate then the character at index+1 is checked to be a low surrogate. If a + * malformed surrogate pair is encountered then an {@link IllegalArgumentException} is thrown. + * <pre> + * high surrogate [0xD800 - 0xDC00] + * low surrogate [0xDC00 - 0xE000] + * </pre> + * + * @param chars CharSequence to check + * @param index index in the CharSequqnce where to start the check + * @throws IllegalArgumentException if there wrong usage of surrogate pairs + * @return true if there is a well-formed surrogate pair at index + */ + public static boolean containsSurrogatePairAt(CharSequence chars, int index) { + char ch = chars.charAt(index); + + if (Character.isHighSurrogate(ch)) { + if ((index + 1) > chars.length()) { + throw new IllegalArgumentException( + "ill-formed UTF-16 sequence, contains isolated high surrogate at end of sequence"); + } + + if (Character.isLowSurrogate(chars.charAt(index + 1))) { + return true; + } + + throw new IllegalArgumentException( + "ill-formed UTF-16 sequence, contains isolated high surrogate at index " + index); + + } else if (Character.isLowSurrogate(ch)) { + throw new IllegalArgumentException( + "ill-formed UTF-16 sequence, contains isolated low surrogate at index " + index); + } + + return false; + } + + /** + * Creates an iterator to iter a {@link CharSequence} codepoints. + * + * @see #codepointsIter(CharSequence, int, int) + * @param s {@link CharSequence} to iter + * @return codepoint iterator for the given {@link CharSequence}. + */ + public static Iterable<Integer> codepointsIter(final CharSequence s) { + return codepointsIter(s, 0, s.length()); + } + + /** + * Creates an iterator to iter a sub-CharSequence codepoints. + * + * @see <a haref="http://bugs.java.com/bugdatabase/view_bug.do?bug_id=5003547">Bug JDK-5003547</a> + * @param s {@link CharSequence} to iter + * @param beginIndex lower range + * @param endIndex upper range + * @return codepoint iterator for the given sub-CharSequence. + */ + public static Iterable<Integer> codepointsIter(final CharSequence s, final int beginIndex, final int endIndex) { + if (beginIndex < 0) { + throw new StringIndexOutOfBoundsException(beginIndex); + } + if (endIndex > s.length()) { + throw new StringIndexOutOfBoundsException(endIndex); + } + int subLen = endIndex - beginIndex; + if (subLen < 0) { + throw new StringIndexOutOfBoundsException(subLen); + } + + return new Iterable<Integer>() { + public Iterator<Integer> iterator() { + return new Iterator<Integer>() { + int nextIndex = beginIndex; + + public boolean hasNext() { + return nextIndex < endIndex; + } + + public Integer next() { + if (!hasNext()) { + // Findbugs wants this: IT_NO_SUCH_ELEMENT + throw new NoSuchElementException(); + } + int result = Character.codePointAt(s, nextIndex); + nextIndex += Character.charCount(result); + return result; + } + + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + }; + } } Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/util/HexEncoder.java URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/util/HexEncoder.java?rev=1826330&r1=1826329&r2=1826330&view=diff ============================================================================== --- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/util/HexEncoder.java (original) +++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/main/java/org/apache/fop/util/HexEncoder.java Fri Mar 9 11:08:02 2018 @@ -45,13 +45,20 @@ public final class HexEncoder { } /** - * Returns an hex encoding of the given character as a four-character string. + * Returns an hex encoding of the given character as: + * <ul> + * <li>4-character string in case of non-BMP character</li> + * <li>6-character string in case of BMP character</li> + * </ul> * * @param c a character * @return an hex-encoded representation of the character */ - public static String encode(char c) { - return encode(c, 4); + public static String encode(int c) { + if (CharUtilities.isBmpCodePoint(c)) { + return encode(c, 4); + } else { + return encode(c, 6); + } } - } Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/complexscripts/bidi/BidiTestData.java URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/complexscripts/bidi/BidiTestData.java?rev=1826330&r1=1826329&r2=1826330&view=diff ============================================================================== --- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/complexscripts/bidi/BidiTestData.java (original) +++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/complexscripts/bidi/BidiTestData.java Fri Mar 9 11:08:02 2018 @@ -23,6 +23,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.ObjectInputStream; +import org.apache.commons.io.IOUtils; /* * !!! THIS IS A GENERATED FILE !!! @@ -64,9 +65,7 @@ public final class BidiTestData { } catch (ClassNotFoundException e) { data = null; } finally { - if (is != null) { - try { is.close(); } catch (Exception e) { /* NOP */ } - } + IOUtils.closeQuietly(is); } return data; } Modified: xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/complexscripts/scripts/arabic/ArabicWordFormsTestCase.java URL: http://svn.apache.org/viewvc/xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/complexscripts/scripts/arabic/ArabicWordFormsTestCase.java?rev=1826330&r1=1826329&r2=1826330&view=diff ============================================================================== --- xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/complexscripts/scripts/arabic/ArabicWordFormsTestCase.java (original) +++ xmlgraphics/fop/branches/Temp_SurrogatePairs/fop-core/src/test/java/org/apache/fop/complexscripts/scripts/arabic/ArabicWordFormsTestCase.java Fri Mar 9 11:08:02 2018 @@ -34,6 +34,8 @@ import static org.junit.Assert.assertEqu import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import org.apache.commons.io.IOUtils; + import org.apache.fop.complexscripts.fonts.GlyphPositioningTable; import org.apache.fop.complexscripts.fonts.GlyphSubstitutionTable; import org.apache.fop.complexscripts.fonts.ttx.TTXFile; @@ -88,14 +90,12 @@ public class ArabicWordFormsTestCase imp FileInputStream fis = null; try { fis = new FileInputStream(dpn); - if (fis != null) { - ObjectInputStream ois = new ObjectInputStream(fis); - List<Object[]> data = (List<Object[]>) ois.readObject(); - if (data != null) { - processWordForms(data); - } - ois.close(); + ObjectInputStream ois = new ObjectInputStream(fis); + List<Object[]> data = (List<Object[]>) ois.readObject(); + if (data != null) { + processWordForms(data); } + ois.close(); } catch (FileNotFoundException e) { throw new RuntimeException(e.getMessage(), e); } catch (IOException e) { @@ -103,9 +103,7 @@ public class ArabicWordFormsTestCase imp } catch (Exception e) { throw new RuntimeException(e.getMessage(), e); } finally { - if (fis != null) { - try { fis.close(); } catch (Exception e) { /* NOP */ } - } + IOUtils.closeQuietly(fis); } } --------------------------------------------------------------------- To unsubscribe, e-mail: fop-commits-unsubscr...@xmlgraphics.apache.org For additional commands, e-mail: fop-commits-h...@xmlgraphics.apache.org