Author: vsiveton
Date: Wed May 20 11:05:40 2009
New Revision: 776659
URL: http://svn.apache.org/viewvc?rev=776659&view=rev
Log:
o clarify javadoc for escapeHTML/unescapeHTML and added new unescapeHTML
o take care of ' if xmlmode
o updated test case
o removed throw IllegalArgumentException in unescapeHTML
Modified:
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java
Modified:
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java
URL:
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java?rev=776659&r1=776658&r2=776659&view=diff
==============================================================================
---
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java
(original)
+++
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java
Wed May 20 11:05:40 2009
@@ -95,6 +95,8 @@
/**
* Escape special HTML characters in a String in <code>xml</code> mode.
*
+ * <b>Note</b>: this method doesn't escape non-ascii characters by numeric
characters references.
+ *
* @param text the String to escape, may be null.
* @return The escaped text or the empty string if text == null.
* @see #escapeHTML(String,boolean)
@@ -108,10 +110,11 @@
* Escape special HTML characters in a String.
*
* <pre>
- * < becomes <code><</code>
- * > becomes <code>></code>
- * & becomes <code>&</code>
- * " becomes <code>"</code>
+ * < becomes <code>&lt;</code>
+ * > becomes <code>&gt;</code>
+ * & becomes <code>&amp;</code>
+ * " becomes <code>&quot;</code>
+ * ' becomes <code>&apos;</code> if xmlMode = true
* </pre>
*
* If <code>xmlMode</code> is true, every other character than the above
remains unchanged,
@@ -124,9 +127,12 @@
* </pre>
*
* @param text The String to escape, may be null.
- * @param xmlMode set to <code>false</code> to replace non-ascii
characters.
+ * @param xmlMode <code>true</code> to replace also ' to &apos,
<code>false</code> to replace non-ascii
+ * characters by numeric characters references.
* @return The escaped text or the empty string if text == null.
* @since 1.1
+ * @see <a
href="http://www.w3.org/TR/2000/REC-xml-20001006#sec-predefined-ent">http://www.w3.org/TR/2000/REC-xml-20001006#sec-predefined-ent</a>
+ * @see <a
href="http://www.w3.org/TR/html401/charset.html#h-5.3">http://www.w3.org/TR/html401/charset.html#h-5.3</a>
*/
public static final String escapeHTML( String text, boolean xmlMode )
{
@@ -158,7 +164,14 @@
default:
if ( xmlMode )
{
- buffer.append( c );
+ if ( c == '\'' )
+ {
+ buffer.append( "'" );
+ }
+ else
+ {
+ buffer.append( c );
+ }
}
else
{
@@ -188,6 +201,19 @@
}
/**
+ * Unescapes HTML entities in a string in non xml mode.
+ *
+ * @param text the <code>String</code> to unescape, may be null.
+ * @return a new unescaped <code>String</code>, <code>null</code> if null
string input.
+ * @since 1.1.1.
+ * @see #unescapeHTML(String, boolean)
+ */
+ public static String unescapeHTML( String text )
+ {
+ return unescapeHTML( text, false );
+ }
+
+ /**
* Unescapes HTML entities in a string.
*
* <p> Unescapes a string containing entity escapes to a string
@@ -204,18 +230,27 @@
* </pre>
*
* @param text the <code>String</code> to unescape, may be null.
+ * @param xmlMode set to <code>true</code> to replace &apos by '.
* @return a new unescaped <code>String</code>, <code>null</code> if null
string input.
* @since 1.1.1.
*/
- public static String unescapeHTML( String text )
+ public static String unescapeHTML( String text, boolean xmlMode )
{
if ( text == null )
{
return null;
}
- // StringEscapeUtils.unescapeHtml returns entities it doesn't
recognize unchanged
- String unescaped = StringEscapeUtils.unescapeHtml( text );
+ String unescaped;
+ if ( xmlMode )
+ {
+ unescaped = StringEscapeUtils.unescapeXml( text );
+ }
+ else
+ {
+ // StringEscapeUtils.unescapeHtml returns entities it doesn't
recognize unchanged
+ unescaped = StringEscapeUtils.unescapeHtml( text );
+ }
if ( !text.equals( unescaped ) )
{
@@ -233,21 +268,19 @@
}
tmp = tmp.substring( i + 3 );
- if ( tmp.indexOf( ';' ) == -1 )
- {
- throw new IllegalArgumentException( "Wrong HTML near '..." +
tmp + "'" );
- }
-
- String entity = tmp.substring( 0, tmp.indexOf( ';' ) );
- try
- {
- Integer.parseInt( entity, 16 );
- }
- catch ( Exception e )
+ if ( tmp.indexOf( ';' ) != -1 )
{
- throw new IllegalArgumentException( "Wrong HTML near '..." +
tmp + "'" );
+ String entity = tmp.substring( 0, tmp.indexOf( ';' ) );
+ try
+ {
+ Integer.parseInt( entity, 16 );
+ entities.add( entity );
+ }
+ catch ( NumberFormatException e )
+ {
+ // nop
+ }
}
- entities.add( entity );
}
for ( int i = 0; i < entities.size(); i++ )
@@ -383,7 +416,7 @@
}
//
-// Imported code from ASF Harmony project
+// Imported code from ASF Harmony project rev 770909
//
http://svn.apache.org/repos/asf/harmony/enhanced/classlib/trunk/modules/luni/src/main/java/java/lang/Character.java
//
Modified:
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java
URL:
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java?rev=776659&r1=776658&r2=776659&view=diff
==============================================================================
---
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java
(original)
+++
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java
Wed May 20 11:05:40 2009
@@ -19,7 +19,6 @@
* under the License.
*/
-import org.apache.commons.lang.StringEscapeUtils;
import org.codehaus.plexus.PlexusTestCase;
/**
@@ -38,13 +37,19 @@
{
assertEquals( HtmlTools.escapeHTML( null ), "" );
assertEquals( HtmlTools.escapeHTML( "" ), "" );
+ assertEquals( HtmlTools.escapeHTML( "\u0009" ), "\u0009" );
+ assertEquals( HtmlTools.escapeHTML( "\u0001" ), "\u0001" );
+
+ // Predefined entities
assertEquals( HtmlTools.escapeHTML( "<" ), "<" );
assertEquals( HtmlTools.escapeHTML( ">" ), ">" );
assertEquals( HtmlTools.escapeHTML( "&" ), "&" );
assertEquals( HtmlTools.escapeHTML( "\"" ), """ );
- assertEquals( HtmlTools.escapeHTML( "&" ), "&amp;" );
+ assertEquals( HtmlTools.escapeHTML( "\'" ), "'" );
+ assertEquals( HtmlTools.escapeHTML( "\'", false ), "\'" );
// xml mode
+ assertEquals( HtmlTools.escapeHTML( "&" ), "&amp;" );
assertEquals( HtmlTools.escapeHTML( "\u00e4", true ), "\u00e4" );
assertEquals( HtmlTools.escapeHTML( "\u00e4", false ), "ä" );
assertEquals( HtmlTools.escapeHTML( "\u0159", false ), "ř" );
@@ -58,26 +63,23 @@
{
assertNull( HtmlTools.unescapeHTML( null ) );
assertEquals( "", HtmlTools.unescapeHTML( "" ) );
+ assertEquals( "\u0009", HtmlTools.unescapeHTML( "\u0009" ) );
+ assertEquals( "\u0001", HtmlTools.unescapeHTML( "\u0001" ) );
assertEquals( "<", HtmlTools.unescapeHTML( "<" ) );
assertEquals( ">", HtmlTools.unescapeHTML( ">" ) );
assertEquals( "&", HtmlTools.unescapeHTML( "&" ) );
assertEquals( "\"", HtmlTools.unescapeHTML( """ ) );
+ assertEquals( "'", HtmlTools.unescapeHTML( "'" ) );
+ assertEquals( "\'", HtmlTools.unescapeHTML( "'", true ) );
assertEquals( "&", HtmlTools.unescapeHTML( "&amp;" ) );
assertEquals( "<Français>", HtmlTools.unescapeHTML(
"&lt;Fran&ccedil;ais&gt;" ) );
assertEquals( "\u0159", HtmlTools.unescapeHTML( "ř" ) );
assertEquals( "\uD808\uDF45", HtmlTools.unescapeHTML( "𒍅" ) );
assertEquals( "\uD835\uDFED", HtmlTools.unescapeHTML( "𝟭" ) );
assertEquals( "\uD808\uDF45\uD835\uDFED", HtmlTools.unescapeHTML(
"𒍅𝟭" ) );
-
- try
- {
- HtmlTools.unescapeHTML( "test 𝟭 test" );
- assertTrue( false );
- }
- catch ( IllegalArgumentException e )
- {
- assertTrue( true );
- }
+ assertEquals( "𝟭 𝟭", HtmlTools.unescapeHTML( "𝟭
𝟭" ) );
+ assertEquals( "𝟭 \uD835\uDFED", HtmlTools.unescapeHTML(
"𝟭 𝟭" ) );
+ assertEquals( "&#xQWER;", HtmlTools.unescapeHTML( "&#xQWER;" ) );
}
/**