Author: bayard Date: Thu Jul 7 03:44:22 2011 New Revision: 1143641 URL: http://svn.apache.org/viewvc?rev=1143641&view=rev Log: Making unescapeHtml _NOT_ escape unfinished numeric entities by default (it ignores them); however adding options that will fire an exception or unescape the numeric entity. LANG-710
Modified: commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaperTest.java Modified: commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java URL: http://svn.apache.org/viewvc/commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java?rev=1143641&r1=1143640&r2=1143641&view=diff ============================================================================== --- commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java (original) +++ commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaper.java Thu Jul 7 03:44:22 2011 @@ -18,6 +18,8 @@ package org.apache.commons.lang3.text.tr import java.io.IOException; import java.io.Writer; +import java.util.Arrays; +import java.util.EnumSet; /** * Translate XML numeric entities of the form &#[xX]?\d+;? to @@ -30,6 +32,41 @@ import java.io.Writer; */ public class NumericEntityUnescaper extends CharSequenceTranslator { + public static enum OPTION { semiColonRequired, semiColonOptional, errorIfNoSemiColon } + + // TODO?: Create an OptionsSet class to hide some of the conditional logic below + private final EnumSet<OPTION> options; + + /** + * Create a UnicodeUnescaper. + * + * The constructor takes a list of options, only one of which is currently + * available (whether to allow the semi-colon on the end of a numeric entity to + * be optional. + * + * For example, to support numeric entities without a ';': + * new NumericEntityUnescaper(NumericEntityUnescaper.OPTION.semiColonOptional) + * + * @param options to apply to this unescaper + */ + public NumericEntityUnescaper(OPTION... options) { + if(options.length > 0) { + this.options = EnumSet.copyOf(Arrays.asList(options)); + } else { + this.options = EnumSet.copyOf(Arrays.asList(new OPTION[] { OPTION.semiColonRequired })); + } + } + + /** + * Whether the passed in option is currently set. + * + * @param option to check state of + * @return whether the option is set + */ + public boolean isSet(OPTION option) { + return (options == null) ? false : options.contains(option); + } + /** * {@inheritDoc} */ @@ -61,6 +98,17 @@ public class NumericEntityUnescaper exte end++; } + boolean semiNext = (end != seqEnd) && (input.charAt(end) == ';'); + + if(!semiNext) { + if(isSet(OPTION.semiColonRequired)) { + return 0; + } else + if(isSet(OPTION.errorIfNoSemiColon)) { + throw new RuntimeException("Semi-colon required at end of numeric entity"); + } + } + int entityValue; try { if(isHex) { @@ -80,8 +128,6 @@ public class NumericEntityUnescaper exte out.write(entityValue); } - boolean semiNext = (end != seqEnd) && (input.charAt(end) == ';'); - return 2 + (end - start) + (isHex ? 1 : 0) + (semiNext ? 1 : 0); } return 0; Modified: commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaperTest.java URL: http://svn.apache.org/viewvc/commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaperTest.java?rev=1143641&r1=1143640&r2=1143641&view=diff ============================================================================== --- commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaperTest.java (original) +++ commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/text/translate/NumericEntityUnescaperTest.java Thu Jul 7 03:44:22 2011 @@ -44,12 +44,32 @@ public class NumericEntityUnescaperTest } public void testUnfinishedEntity() { - NumericEntityUnescaper neu = new NumericEntityUnescaper(); + // parse it + NumericEntityUnescaper neu = new NumericEntityUnescaper(NumericEntityUnescaper.OPTION.semiColonOptional); String input = "Test 0 not test"; String expected = "Test \u0030 not test"; String result = neu.translate(input); - assertEquals("Failed to support unfinished entities (i.e. missing semi-colon", expected, result); + assertEquals("Failed to support unfinished entities (i.e. missing semi-colon)", expected, result); + + // ignore it + neu = new NumericEntityUnescaper(); + input = "Test 0 not test"; + expected = input; + + result = neu.translate(input); + assertEquals("Failed to ignore unfinished entities (i.e. missing semi-colon)", expected, result); + + // fail it + neu = new NumericEntityUnescaper(NumericEntityUnescaper.OPTION.errorIfNoSemiColon); + input = "Test 0 not test"; + + try { + result = neu.translate(input); + fail("RuntimeException expected"); + } catch(RuntimeException re) { + // expected + } } }