Author: ggregory Date: Sat Jul 16 22:28:34 2011 New Revision: 1147499 URL: http://svn.apache.org/viewvc?rev=1147499&view=rev Log: [LANG-728] StringEscapeUtils.escapeXml(str) does not support supplemental characters. Add one failing test method with @Ignore (= it is not run.)
Modified: commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/StringEscapeUtilsTest.java Modified: commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/StringEscapeUtilsTest.java URL: http://svn.apache.org/viewvc/commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/StringEscapeUtilsTest.java?rev=1147499&r1=1147498&r2=1147499&view=diff ============================================================================== --- commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/StringEscapeUtilsTest.java (original) +++ commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/StringEscapeUtilsTest.java Sat Jul 16 22:28:34 2011 @@ -16,15 +16,19 @@ */ package org.apache.commons.lang3; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + import java.io.FileInputStream; import java.io.IOException; import java.io.StringWriter; import java.lang.reflect.Constructor; import java.lang.reflect.Modifier; -import static org.junit.Assert.*; - import org.apache.commons.io.IOUtils; +import org.junit.Ignore; import org.junit.Test; /** @@ -318,6 +322,31 @@ public class StringEscapeUtilsTest { assertEquals("XML was unescaped incorrectly", "<abc>", sw.toString() ); } + /** + * Tests Supplementary characters. + * <p> + * From http://www.w3.org/International/questions/qa-escapes + * </p> + * <blockquote> + * Supplementary characters are those Unicode characters that have code points higher than the characters in + * the Basic Multilingual Plane (BMP). In UTF-16 a supplementary character is encoded using two 16-bit surrogate code points from the + * BMP. Because of this, some people think that supplementary characters need to be represented using two escapes, but this is incorrect + * you must use the single, code point value for that character. For example, use 𣎴 rather than ��. + * </blockquote> + * @see <a href="http://www.w3.org/International/questions/qa-escapes">Using character escapes in markup and CSS</a> + * @see <a href="https://issues.apache.org/jira/browse/LANG-728">LANG-728</a> + */ + @Ignore + @Test + public void testEscapeXmlSupplementaryCharacters() { + // Example from https://issues.apache.org/jira/browse/LANG-728 + assertEquals("Supplementary character must be represented using a single escape", "𣎴", + StringEscapeUtils.escapeXml("\uD84C\uDFB4")); + // Example from See http://www.w3.org/International/questions/qa-escapes + assertEquals("Supplementary character must be represented using a single escape", "𣎴", + StringEscapeUtils.escapeXml("\uD84C;\uDFB4;")); + } + // Tests issue #38569 // http://issues.apache.org/bugzilla/show_bug.cgi?id=38569 @Test