Author: bayard Date: Tue Nov 17 08:45:55 2009 New Revision: 881197 URL: http://svn.apache.org/viewvc?rev=881197&view=rev Log: Removing the EscapeUtils/UnescapeUtils classes, and making StringEscapeUtils the replacement. Writer variants of StringEscapeUtils are dropped - instead you hit those via the translator objects. Some javadoc/testing cleanup needed.
Removed: commons/proper/lang/trunk/src/java/org/apache/commons/lang/text/translate/EscapeUtils.java commons/proper/lang/trunk/src/java/org/apache/commons/lang/text/translate/UnescapeUtils.java commons/proper/lang/trunk/src/test/org/apache/commons/lang/text/translate/EscapeUtilsTest.java commons/proper/lang/trunk/src/test/org/apache/commons/lang/text/translate/UnescapeUtilsTest.java Modified: commons/proper/lang/trunk/src/java/org/apache/commons/lang/StringEscapeUtils.java commons/proper/lang/trunk/src/test/org/apache/commons/lang/StringEscapeUtilsTest.java Modified: commons/proper/lang/trunk/src/java/org/apache/commons/lang/StringEscapeUtils.java URL: http://svn.apache.org/viewvc/commons/proper/lang/trunk/src/java/org/apache/commons/lang/StringEscapeUtils.java?rev=881197&r1=881196&r2=881197&view=diff ============================================================================== --- commons/proper/lang/trunk/src/java/org/apache/commons/lang/StringEscapeUtils.java (original) +++ commons/proper/lang/trunk/src/java/org/apache/commons/lang/StringEscapeUtils.java Tue Nov 17 08:45:55 2009 @@ -19,8 +19,11 @@ import java.io.IOException; import java.io.Writer; -import org.apache.commons.lang.text.translate.EscapeUtils; -import org.apache.commons.lang.text.translate.UnescapeUtils; +import org.apache.commons.lang.text.translate.*; + +// CsvTranslators +import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang.CharUtils; /** * <p>Escapes and unescapes <code>String</code>s for @@ -38,12 +41,162 @@ * @author Pete Gieser * @since 2.0 * @version $Id$ - * - * @deprecated Use text.translate.EscapeUtils and text.translate.UnescapeUtils instead */ -...@deprecated public class StringEscapeUtils { + /* ESCAPE TRANSLATORS */ + + public static final CharSequenceTranslator ESCAPE_JAVA = + new LookupTranslator( + new String[][] { + {"\"", "\\\""}, + {"\\", "\\\\"}, + }).with( + new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()) + ).with( + UnicodeEscaper.outsideOf(32, 0x7f) + ); + + public static final CharSequenceTranslator ESCAPE_ECMASCRIPT = + new AggregateTranslator( + new LookupTranslator( + new String[][] { + {"'", "\\'"}, + {"\"", "\\\""}, + {"\\", "\\\\"}, + {"/", "\\/"} + }), + new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()), + UnicodeEscaper.outsideOf(32, 0x7f) + ); + + public static final CharSequenceTranslator ESCAPE_XML = + new AggregateTranslator( + new LookupTranslator(EntityArrays.BASIC_ESCAPE()), + new LookupTranslator(EntityArrays.APOS_ESCAPE()) + ); + + public static final CharSequenceTranslator ESCAPE_HTML3 = + new AggregateTranslator( + new LookupTranslator(EntityArrays.BASIC_ESCAPE()), + new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()) + ); + + public static final CharSequenceTranslator ESCAPE_HTML4 = + new AggregateTranslator( + new LookupTranslator(EntityArrays.BASIC_ESCAPE()), + new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()), + new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE()) + ); + + public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper(); + + // TODO: Create a parent class - 'SinglePassTranslator' ? + // TODO: It would handle the index checking, and length returning, and + // TODO: could also have an optimization check method. + static class CsvEscaper extends CharSequenceTranslator { + + private static final char CSV_DELIMITER = ','; + private static final char CSV_QUOTE = '"'; + private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); + private static final char[] CSV_SEARCH_CHARS = new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF}; + + // TODO: Replace with a RegexTranslator. That should consume the number of characters the regex uses up? + @Override + public int translate(CharSequence input, int index, Writer out) throws IOException { + + if(index != 0) { + throw new IllegalStateException("CsvEscaper should never reach the [1] index"); + } + + if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) { + out.write(input.toString()); + } else { + out.write(CSV_QUOTE); + out.write(StringUtils.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR)); + out.write(CSV_QUOTE); + } + return input.length(); + } + } + + /* UNESCAPE TRANSLATORS */ + + // TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)? + public static final CharSequenceTranslator UNESCAPE_JAVA = + new AggregateTranslator( + new UnicodeUnescaper(), + new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()), + new LookupTranslator( + new String[][] { + {"\\\\", "\\"}, + {"\\\"", "\""}, + {"\\'", "'"}, + {"\\", ""} + }) + ); + + public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA; + + public static final CharSequenceTranslator UNESCAPE_HTML3 = + new AggregateTranslator( + new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), + new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()), + new NumericEntityUnescaper() + ); + + public static final CharSequenceTranslator UNESCAPE_HTML4 = + new AggregateTranslator( + new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), + new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()), + new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()), + new NumericEntityUnescaper() + ); + + public static final CharSequenceTranslator UNESCAPE_XML = + new AggregateTranslator( + new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), + new LookupTranslator(EntityArrays.APOS_UNESCAPE()), + new NumericEntityUnescaper() + ); + + public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper(); + + static class CsvUnescaper extends CharSequenceTranslator { + + private static final char CSV_DELIMITER = ','; + private static final char CSV_QUOTE = '"'; + private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); + private static final char[] CSV_SEARCH_CHARS = new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF}; + + // TODO: Replace with a RegexTranslator. That should consume the number of characters the regex uses up? + @Override + public int translate(CharSequence input, int index, Writer out) throws IOException { + + if(index != 0) { + throw new IllegalStateException("CsvUnescaper should never reach the [1] index"); + } + + if ( input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE ) { + out.write(input.toString()); + return input.length(); + } + + // strip quotes + String quoteless = input.subSequence(1, input.length() - 1).toString(); + + if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) { + // deal with escaped quotes; ie) "" + out.write(StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR)); + } else { + out.write(input.toString()); + } + return input.length(); + } + } + + /* Helper functions */ + /** * <p><code>StringEscapeUtils</code> instances should NOT be constructed in * standard programming.</p> @@ -78,39 +231,25 @@ * </pre> * </p> * - * @param str String to escape values in, may be null + * @param input String to escape values in, may be null * @return String with escaped values, <code>null</code> if null string input */ - public static String escapeJava(String str) { - return EscapeUtils.escapeJava(str); - } - - /** - * <p>Escapes the characters in a <code>String</code> using Java String rules to - * a <code>Writer</code>.</p> - * - * <p>A <code>null</code> string input has no effect.</p> - * - * @see #escapeJava(java.lang.String) - * @param out Writer to write escaped string into - * @param str String to escape values in, may be null - * @throws IllegalArgumentException if the Writer is <code>null</code> - * @throws IOException if error occurs on underlying Writer - */ - public static void escapeJava(Writer out, String str) throws IOException { - EscapeUtils.ESCAPE_JAVA.translate(str, out); + public static final String escapeJava(String input) { + return ESCAPE_JAVA.translate(input); } /** - * <p>Escapes the characters in a <code>String</code> using JavaScript String rules.</p> - * <p>Escapes any values it finds into their JavaScript String form. + * <p>Escapes the characters in a <code>String</code> using EcmaScript String rules.</p> + * <p>Escapes any values it finds into their EcmaScript String form. * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> * * <p>So a tab becomes the characters <code>'\\'</code> and * <code>'t'</code>.</p> * - * <p>The only difference between Java strings and JavaScript strings - * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p> + * <p>The only difference between Java strings and EcmaScript strings + * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p> + * + * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects. </p> * * <p>Example: * <pre> @@ -119,27 +258,11 @@ * </pre> * </p> * - * @param str String to escape values in, may be null + * @param input String to escape values in, may be null * @return String with escaped values, <code>null</code> if null string input */ - public static String escapeJavaScript(String str) { - return EscapeUtils.escapeEcmaScript(str); - } - - /** - * <p>Escapes the characters in a <code>String</code> using JavaScript String rules - * to a <code>Writer</code>.</p> - * - * <p>A <code>null</code> string input has no effect.</p> - * - * @see #escapeJavaScript(java.lang.String) - * @param out Writer to write escaped string into - * @param str String to escape values in, may be null - * @throws IllegalArgumentException if the Writer is <code>null</code> - * @throws IOException if error occurs on underlying Writer - **/ - public static void escapeJavaScript(Writer out, String str) throws IOException { - EscapeUtils.ESCAPE_ECMASCRIPT.translate(str, out); + public static final String escapeEcmaScript(String input) { + return ESCAPE_ECMASCRIPT.translate(input); } /** @@ -148,65 +271,26 @@ * <code>'n'</code> into a newline character, unless the <code>'\'</code> * is preceded by another <code>'\'</code>.</p> * - * @param str the <code>String</code> to unescape, may be null + * @param input the <code>String</code> to unescape, may be null * @return a new unescaped <code>String</code>, <code>null</code> if null string input */ - public static String unescapeJava(String str) { - return UnescapeUtils.unescapeJava(str); - } - - /** - * <p>Unescapes any Java literals found in the <code>String</code> to a - * <code>Writer</code>.</p> - * - * <p>For example, it will turn a sequence of <code>'\'</code> and - * <code>'n'</code> into a newline character, unless the <code>'\'</code> - * is preceded by another <code>'\'</code>.</p> - * - * <p>A <code>null</code> string input has no effect.</p> - * - * @param out the <code>Writer</code> used to output unescaped characters - * @param str the <code>String</code> to unescape, may be null - * @throws IllegalArgumentException if the Writer is <code>null</code> - * @throws IOException if error occurs on underlying Writer - */ - public static void unescapeJava(Writer out, String str) throws IOException { - UnescapeUtils.UNESCAPE_JAVA.translate(str, out); + public static final String unescapeJava(String input) { + return UNESCAPE_JAVA.translate(input); } /** - * <p>Unescapes any JavaScript literals found in the <code>String</code>.</p> + * <p>Unescapes any EcmaScript literals found in the <code>String</code>.</p> * * <p>For example, it will turn a sequence of <code>'\'</code> and <code>'n'</code> * into a newline character, unless the <code>'\'</code> is preceded by another * <code>'\'</code>.</p> * * @see #unescapeJava(String) - * @param str the <code>String</code> to unescape, may be null + * @param input the <code>String</code> to unescape, may be null * @return A new unescaped <code>String</code>, <code>null</code> if null string input */ - public static String unescapeJavaScript(String str) { - return UnescapeUtils.unescapeEcmaScript(str); - } - - /** - * <p>Unescapes any JavaScript literals found in the <code>String</code> to a - * <code>Writer</code>.</p> - * - * <p>For example, it will turn a sequence of <code>'\'</code> and <code>'n'</code> - * into a newline character, unless the <code>'\'</code> is preceded by another - * <code>'\'</code>.</p> - * - * <p>A <code>null</code> string input has no effect.</p> - * - * @see #unescapeJava(Writer,String) - * @param out the <code>Writer</code> used to output unescaped characters - * @param str the <code>String</code> to unescape, may be null - * @throws IllegalArgumentException if the Writer is <code>null</code> - * @throws IOException if error occurs on underlying Writer - */ - public static void unescapeJavaScript(Writer out, String str) throws IOException { - UnescapeUtils.UNESCAPE_ECMASCRIPT.translate(str, out); + public static final String unescapeEcmaScript(String input) { + return UNESCAPE_ECMASCRIPT.translate(input); } // HTML and XML @@ -227,7 +311,7 @@ * Note that the commonly used apostrophe escape character (&apos;) * is not a legal entity and so is not supported). </p> * - * @param str the <code>String</code> to escape, may be null + * @param input the <code>String</code> to escape, may be null * @return a new escaped <code>String</code>, <code>null</code> if null string input * * @see #unescapeHtml(String) @@ -237,43 +321,14 @@ * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> */ - public static String escapeHtml(String str) { - return EscapeUtils.escapeHtml4(str); + public static final String escapeHtml4(String input) { + return ESCAPE_HTML4.translate(input); } - /** - * <p>Escapes the characters in a <code>String</code> using HTML entities and writes - * them to a <code>Writer</code>.</p> - * - * <p> - * For example: - * </p> - * <code>"bread" & "butter"</code> - * <p>becomes:</p> - * <code>&quot;bread&quot; &amp; &quot;butter&quot;</code>. - * - * <p>Supports all known HTML 4.0 entities, including funky accents. - * Note that the commonly used apostrophe escape character (&apos;) - * is not a legal entity and so is not supported). </p> - * - * @param writer the writer receiving the escaped string, not null - * @param string the <code>String</code> to escape, may be null - * @throws IllegalArgumentException if the writer is null - * @throws IOException when <code>Writer</code> passed throws the exception from - * calls to the {...@link Writer#write(int)} methods. - * - * @see #escapeHtml(String) - * @see #unescapeHtml(String) - * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a> - * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a> - * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a> - * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> - * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> - */ - public static void escapeHtml(Writer writer, String string) throws IOException { - EscapeUtils.ESCAPE_HTML4.translate(string, writer); + public static final String escapeHtml3(String input) { + return ESCAPE_HTML3.translate(input); } - + //----------------------------------------------------------------------- /** * <p>Unescapes a string containing entity escapes to a string @@ -287,34 +342,15 @@ * verbatim into the result string. e.g. "&gt;&zzzz;x" will * become ">&zzzz;x".</p> * - * @param str the <code>String</code> to unescape, may be null + * @param input the <code>String</code> to unescape, may be null * @return a new unescaped <code>String</code>, <code>null</code> if null string input - * @see #escapeHtml(Writer, String) */ - public static String unescapeHtml(String str) { - return UnescapeUtils.unescapeHtml4(str); + public static final String unescapeHtml4(String input) { + return UNESCAPE_HTML4.translate(input); } - /** - * <p>Unescapes a string containing entity escapes to a string - * containing the actual Unicode characters corresponding to the - * escapes. Supports HTML 4.0 entities.</p> - * - * <p>For example, the string "&lt;Fran&ccedil;ais&gt;" - * will become "<Français>"</p> - * - * <p>If an entity is unrecognized, it is left alone, and inserted - * verbatim into the result string. e.g. "&gt;&zzzz;x" will - * become ">&zzzz;x".</p> - * - * @param writer the writer receiving the unescaped string, not null - * @param string the <code>String</code> to unescape, may be null - * @throws IllegalArgumentException if the writer is null - * @throws IOException if an IOException occurs - * @see #escapeHtml(String) - */ - public static void unescapeHtml(Writer writer, String string) throws IOException { - UnescapeUtils.UNESCAPE_HTML4.translate(string, writer); + public static final String unescapeHtml3(String input) { + return UNESCAPE_HTML3.translate(input); } //----------------------------------------------------------------------- @@ -331,36 +367,14 @@ * <p>Note that unicode characters greater than 0x7f are as of 3.0, no longer * escaped. </p> * - * @param writer the writer receiving the unescaped string, not null - * @param str the <code>String</code> to escape, may be null - * @throws IllegalArgumentException if the writer is null - * @throws IOException if there is a problem writing - * @see #unescapeXml(java.lang.String) - */ - public static void escapeXml(Writer writer, String str) throws IOException { - EscapeUtils.ESCAPE_XML.translate(str, writer); - } - - /** - * <p>Escapes the characters in a <code>String</code> using XML entities.</p> - * - * <p>For example: <tt>"bread" & "butter"</tt> => - * <tt>&quot;bread&quot; &amp; &quot;butter&quot;</tt>. - * </p> - * - * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). - * Does not support DTDs or external entities.</p> - * - * <p>Note that unicode characters greater than 0x7f are as of 3.0, no longer - * escaped. </p> - * - * @param str the <code>String</code> to escape, may be null + * @param input the <code>String</code> to escape, may be null * @return a new escaped <code>String</code>, <code>null</code> if null string input * @see #unescapeXml(java.lang.String) */ - public static String escapeXml(String str) { - return EscapeUtils.escapeXml(str); + public static final String escapeXml(String input) { + return ESCAPE_XML.translate(input); } + //----------------------------------------------------------------------- /** @@ -374,34 +388,14 @@ * <p>Note that numerical \\u unicode codes are unescaped to their respective * unicode characters. This may change in future releases. </p> * - * @param writer the writer receiving the unescaped string, not null - * @param str the <code>String</code> to unescape, may be null - * @throws IllegalArgumentException if the writer is null - * @throws IOException if there is a problem writing - * @see #escapeXml(String) - */ - public static void unescapeXml(Writer writer, String str) throws IOException { - UnescapeUtils.UNESCAPE_XML.translate(str, writer); - } - - /** - * <p>Unescapes a string containing XML entity escapes to a string - * containing the actual Unicode characters corresponding to the - * escapes.</p> - * - * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). - * Does not support DTDs or external entities.</p> - * - * <p>Note that numerical \\u unicode codes are unescaped to their respective - * unicode characters. This may change in future releases. </p> - * - * @param str the <code>String</code> to unescape, may be null + * @param input the <code>String</code> to unescape, may be null * @return a new unescaped <code>String</code>, <code>null</code> if null string input * @see #escapeXml(String) */ - public static String unescapeXml(String str) { - return UnescapeUtils.unescapeXml(str); + public static final String unescapeXml(String input) { + return UNESCAPE_XML.translate(input); } + //----------------------------------------------------------------------- @@ -422,40 +416,13 @@ * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. * - * @param str the input CSV column String, may be null + * @param input the input CSV column String, may be null * @return the input String, enclosed in double quotes if the value contains a comma, * newline or double quote, <code>null</code> if null string input * @since 2.4 */ - public static String escapeCsv(String str) { - return EscapeUtils.escapeCsv(str); - } - - /** - * <p>Writes a <code>String</code> value for a CSV column enclosed in double quotes, - * if required.</p> - * - * <p>If the value contains a comma, newline or double quote, then the - * String value is written enclosed in double quotes.</p> - * </p> - * - * <p>Any double quote characters in the value are escaped with another double quote.</p> - * - * <p>If the value does not contain a comma, newline or double quote, then the - * String value is written unchanged (null values are ignored).</p> - * </p> - * - * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and - * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. - * - * @param str the input CSV column String, may be null - * @param out Writer to write input string to, enclosed in double quotes if it contains - * a comma, newline or double quote - * @throws IOException if error occurs on underlying Writer - * @since 2.4 - */ - public static void escapeCsv(Writer out, String str) throws IOException { - EscapeUtils.ESCAPE_CSV.translate(str, out); + public static final String escapeCsv(String input) { + return ESCAPE_CSV.translate(input); } /** @@ -475,40 +442,13 @@ * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. * - * @param str the input CSV column String, may be null + * @param input the input CSV column String, may be null * @return the input String, with enclosing double quotes removed and embedded double * quotes unescaped, <code>null</code> if null string input * @since 2.4 */ - public static String unescapeCsv(String str) { - return UnescapeUtils.unescapeCsv(str); - } - - /** - * <p>Returns a <code>String</code> value for an unescaped CSV column. </p> - * - * <p>If the value is enclosed in double quotes, and contains a comma, newline - * or double quote, then quotes are removed. - * </p> - * - * <p>Any double quote escaped characters (a pair of double quotes) are unescaped - * to just one double quote. </p> - * - * <p>If the value is not enclosed in double quotes, or is and does not contain a - * comma, newline or double quote, then the String value is returned unchanged.</p> - * </p> - * - * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and - * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. - * - * @param str the input CSV column String, may be null - * @param out Writer to write the input String to, with enclosing double quotes - * removed and embedded double quotes unescaped, <code>null</code> if null string input - * @throws IOException if error occurs on underlying Writer - * @since 2.4 - */ - public static void unescapeCsv(Writer out, String str) throws IOException { - UnescapeUtils.UNESCAPE_CSV.translate(str, out); + public static final String unescapeCsv(String input) { + return UNESCAPE_CSV.translate(input); } } Modified: commons/proper/lang/trunk/src/test/org/apache/commons/lang/StringEscapeUtilsTest.java URL: http://svn.apache.org/viewvc/commons/proper/lang/trunk/src/test/org/apache/commons/lang/StringEscapeUtilsTest.java?rev=881197&r1=881196&r2=881197&view=diff ============================================================================== --- commons/proper/lang/trunk/src/test/org/apache/commons/lang/StringEscapeUtilsTest.java (original) +++ commons/proper/lang/trunk/src/test/org/apache/commons/lang/StringEscapeUtilsTest.java Tue Nov 17 08:45:55 2009 @@ -63,14 +63,14 @@ public void testEscapeJava() throws IOException { assertEquals(null, StringEscapeUtils.escapeJava(null)); try { - StringEscapeUtils.escapeJava(null, null); + StringEscapeUtils.ESCAPE_JAVA.translate(null, null); fail(); } catch (IOException ex) { fail(); } catch (IllegalArgumentException ex) { } try { - StringEscapeUtils.escapeJava(null, ""); + StringEscapeUtils.ESCAPE_JAVA.translate("", null); fail(); } catch (IOException ex) { fail(); @@ -123,21 +123,21 @@ assertEquals(message, expected, converted); StringWriter writer = new StringWriter(); - StringEscapeUtils.escapeJava(writer, original); + StringEscapeUtils.ESCAPE_JAVA.translate(original, writer); assertEquals(expected, writer.toString()); } public void testUnescapeJava() throws IOException { assertEquals(null, StringEscapeUtils.unescapeJava(null)); try { - StringEscapeUtils.unescapeJava(null, null); + StringEscapeUtils.UNESCAPE_JAVA.translate(null, null); fail(); } catch (IOException ex) { fail(); } catch (IllegalArgumentException ex) { } try { - StringEscapeUtils.unescapeJava(null, ""); + StringEscapeUtils.UNESCAPE_JAVA.translate("", null); fail(); } catch (IOException ex) { fail(); @@ -177,31 +177,31 @@ expected, actual); StringWriter writer = new StringWriter(); - StringEscapeUtils.unescapeJava(writer, original); + StringEscapeUtils.UNESCAPE_JAVA.translate(original, writer); assertEquals(unescaped, writer.toString()); } - public void testEscapeJavaScript() { - assertEquals(null, StringEscapeUtils.escapeJavaScript(null)); + public void testEscapeEcmaScript() { + assertEquals(null, StringEscapeUtils.escapeEcmaScript(null)); try { - StringEscapeUtils.escapeJavaScript(null, null); + StringEscapeUtils.ESCAPE_ECMASCRIPT.translate(null, null); fail(); } catch (IOException ex) { fail(); } catch (IllegalArgumentException ex) { } try { - StringEscapeUtils.escapeJavaScript(null, ""); + StringEscapeUtils.ESCAPE_ECMASCRIPT.translate("", null); fail(); } catch (IOException ex) { fail(); } catch (IllegalArgumentException ex) { } - assertEquals("He didn\\'t say, \\\"stop!\\\"", StringEscapeUtils.escapeJavaScript("He didn't say, \"stop!\"")); + assertEquals("He didn\\'t say, \\\"stop!\\\"", StringEscapeUtils.escapeEcmaScript("He didn't say, \"stop!\"")); assertEquals("document.getElementById(\\\"test\\\").value = \\'<script>alert(\\'aaa\\');<\\/script>\\';", - StringEscapeUtils.escapeJavaScript("document.getElementById(\"test\").value = '<script>alert('aaa');</script>';")); + StringEscapeUtils.escapeEcmaScript("document.getElementById(\"test\").value = '<script>alert('aaa');</script>';")); } @@ -227,10 +227,10 @@ String message = htmlEscapes[i][0]; String expected = htmlEscapes[i][1]; String original = htmlEscapes[i][2]; - assertEquals(message, expected, StringEscapeUtils.escapeHtml(original)); + assertEquals(message, expected, StringEscapeUtils.escapeHtml4(original)); StringWriter sw = new StringWriter(); try { - StringEscapeUtils.escapeHtml(sw, original); + StringEscapeUtils.ESCAPE_HTML4.translate(original, sw); } catch (IOException e) { } String actual = original == null ? null : sw.toString(); @@ -238,16 +238,16 @@ } } - public void testUnescapeHtml() { + public void testUnescapeHtml4() { for (int i = 0; i < htmlEscapes.length; ++i) { String message = htmlEscapes[i][0]; String expected = htmlEscapes[i][2]; String original = htmlEscapes[i][1]; - assertEquals(message, expected, StringEscapeUtils.unescapeHtml(original)); + assertEquals(message, expected, StringEscapeUtils.unescapeHtml4(original)); StringWriter sw = new StringWriter(); try { - StringEscapeUtils.unescapeHtml(sw, original); + StringEscapeUtils.UNESCAPE_HTML4.translate(original, sw); } catch (IOException e) { } String actual = original == null ? null : sw.toString(); @@ -256,18 +256,18 @@ // \u00E7 is a cedilla (c with wiggle under) // note that the test string must be 7-bit-clean (unicode escaped) or else it will compile incorrectly // on some locales - assertEquals("funny chars pass through OK", "Fran\u00E7ais", StringEscapeUtils.unescapeHtml("Fran\u00E7ais")); + assertEquals("funny chars pass through OK", "Fran\u00E7ais", StringEscapeUtils.unescapeHtml4("Fran\u00E7ais")); - assertEquals("Hello&;World", StringEscapeUtils.unescapeHtml("Hello&;World")); - assertEquals("Hello&#;World", StringEscapeUtils.unescapeHtml("Hello&#;World")); - assertEquals("Hello&# ;World", StringEscapeUtils.unescapeHtml("Hello&# ;World")); - assertEquals("Hello&##;World", StringEscapeUtils.unescapeHtml("Hello&##;World")); + assertEquals("Hello&;World", StringEscapeUtils.unescapeHtml4("Hello&;World")); + assertEquals("Hello&#;World", StringEscapeUtils.unescapeHtml4("Hello&#;World")); + assertEquals("Hello&# ;World", StringEscapeUtils.unescapeHtml4("Hello&# ;World")); + assertEquals("Hello&##;World", StringEscapeUtils.unescapeHtml4("Hello&##;World")); } public void testUnescapeHexCharsHtml() { // Simple easy to grok test - assertEquals("hex number unescape", "\u0080\u009F", StringEscapeUtils.unescapeHtml("€Ÿ")); - assertEquals("hex number unescape", "\u0080\u009F", StringEscapeUtils.unescapeHtml("€Ÿ")); + assertEquals("hex number unescape", "\u0080\u009F", StringEscapeUtils.unescapeHtml4("€Ÿ")); + assertEquals("hex number unescape", "\u0080\u009F", StringEscapeUtils.unescapeHtml4("€Ÿ")); // Test all Character values: for (char i = Character.MIN_VALUE; i < Character.MAX_VALUE; i++) { Character c1 = new Character(i); @@ -275,19 +275,19 @@ String expected = c1.toString() + c2.toString(); String escapedC1 = "&#x" + Integer.toHexString((c1.charValue())) + ";"; String escapedC2 = "&#x" + Integer.toHexString((c2.charValue())) + ";"; - assertEquals("hex number unescape index " + (int)i, expected, StringEscapeUtils.unescapeHtml(escapedC1 + escapedC2)); + assertEquals("hex number unescape index " + (int)i, expected, StringEscapeUtils.unescapeHtml4(escapedC1 + escapedC2)); } } public void testUnescapeUnknownEntity() throws Exception { - assertEquals("&zzzz;", StringEscapeUtils.unescapeHtml("&zzzz;")); + assertEquals("&zzzz;", StringEscapeUtils.unescapeHtml4("&zzzz;")); } public void testEscapeHtmlVersions() throws Exception { - assertEquals("Β", StringEscapeUtils.escapeHtml("\u0392")); - assertEquals("\u0392", StringEscapeUtils.unescapeHtml("Β")); + assertEquals("Β", StringEscapeUtils.escapeHtml4("\u0392")); + assertEquals("\u0392", StringEscapeUtils.unescapeHtml4("Β")); //todo: refine API for escaping/unescaping specific HTML versions @@ -310,14 +310,14 @@ StringWriter sw = new StringWriter(); try { - StringEscapeUtils.escapeXml(sw, "<abc>"); + StringEscapeUtils.ESCAPE_XML.translate("<abc>", sw); } catch (IOException e) { } assertEquals("XML was escaped incorrectly", "<abc>", sw.toString() ); sw = new StringWriter(); try { - StringEscapeUtils.unescapeXml(sw, "<abc>"); + StringEscapeUtils.UNESCAPE_XML.translate("<abc>", sw); } catch (IOException e) { } assertEquals("XML was unescaped incorrectly", "<abc>", sw.toString() ); @@ -326,14 +326,14 @@ // Tests issue #38569 // http://issues.apache.org/bugzilla/show_bug.cgi?id=38569 public void testStandaloneAmphersand() { - assertEquals("<P&O>", StringEscapeUtils.unescapeHtml("<P&O>")); - assertEquals("test & <", StringEscapeUtils.unescapeHtml("test & <")); + assertEquals("<P&O>", StringEscapeUtils.unescapeHtml4("<P&O>")); + assertEquals("test & <", StringEscapeUtils.unescapeHtml4("test & <")); assertEquals("<P&O>", StringEscapeUtils.unescapeXml("<P&O>")); assertEquals("test & <", StringEscapeUtils.unescapeXml("test & <")); } public void testLang313() { - assertEquals("& &", StringEscapeUtils.unescapeHtml("& &")); + assertEquals("& &", StringEscapeUtils.unescapeHtml4("& &")); } public void testEscapeCsvString() throws Exception @@ -361,7 +361,7 @@ private void checkCsvEscapeWriter(String expected, String value) { try { StringWriter writer = new StringWriter(); - StringEscapeUtils.escapeCsv(writer, value); + StringEscapeUtils.ESCAPE_CSV.translate(value, writer); assertEquals(expected, writer.toString()); } catch (IOException e) { fail("Threw: " + e); @@ -397,7 +397,7 @@ private void checkCsvUnescapeWriter(String expected, String value) { try { StringWriter writer = new StringWriter(); - StringEscapeUtils.unescapeCsv(writer, value); + StringEscapeUtils.UNESCAPE_CSV.translate(value, writer); assertEquals(expected, writer.toString()); } catch (IOException e) { fail("Threw: " + e); @@ -414,14 +414,14 @@ String original = new String(data, "UTF8"); - String escaped = StringEscapeUtils.escapeHtml( original ); + String escaped = StringEscapeUtils.escapeHtml4( original ); assertEquals( "High unicode should not have been escaped", original, escaped); - String unescaped = StringEscapeUtils.unescapeHtml( escaped ); + String unescaped = StringEscapeUtils.unescapeHtml4( escaped ); assertEquals( "High unicode should have been unchanged", original, unescaped); // TODO: I think this should hold, needs further investigation -// String unescapedFromEntity = StringEscapeUtils.unescapeHtml( "𝍢" ); +// String unescapedFromEntity = StringEscapeUtils.unescapeHtml4( "𝍢" ); // assertEquals( "High unicode should have been unescaped", original, unescapedFromEntity); } @@ -429,11 +429,11 @@ public void testEscapeHiragana() throws java.io.UnsupportedEncodingException { // Some random Japanese unicode characters String original = "\u304B\u304C\u3068"; - String escaped = StringEscapeUtils.escapeHtml(original); - assertEquals( "Hiragana character unicode behaviour should not be being escaped by escapeHtml", + String escaped = StringEscapeUtils.escapeHtml4(original); + assertEquals( "Hiragana character unicode behaviour should not be being escaped by escapeHtml4", original, escaped); - String unescaped = StringEscapeUtils.unescapeHtml( escaped ); + String unescaped = StringEscapeUtils.unescapeHtml4( escaped ); assertEquals( "Hiragana character unicode behaviour has changed - expected no unescaping", escaped, unescaped); }