Author: ltheussl
Date: Tue May 5 08:06:19 2009
New Revision: 771612
URL: http://svn.apache.org/viewvc?rev=771612&view=rev
Log:
[DOXIA-311] Character references do not work in xdoc section titles.
Partial fix: all html entities should be properly handled now, custom entities
are still un-escaped in section titles.
Entities are now emitted as text (not rawText) by the XhtmlBaseParser,
unrecognized entities are emitted as an unknown event.
Modified:
maven/doxia/doxia/trunk/doxia-core/pom.xml
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/markup/HtmlMarkup.java
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/sink/XhtmlBaseSink.java
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/parser/XhtmlBaseParserTest.java
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/sink/SinkTestDocument.java
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/sink/XhtmlBaseSinkTest.java
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java
maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java
maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/test/java/org/apache/maven/doxia/module/xdoc/XdocParserTest.java
Modified: maven/doxia/doxia/trunk/doxia-core/pom.xml
URL:
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/pom.xml?rev=771612&r1=771611&r2=771612&view=diff
==============================================================================
--- maven/doxia/doxia/trunk/doxia-core/pom.xml (original)
+++ maven/doxia/doxia/trunk/doxia-core/pom.xml Tue May 5 08:06:19 2009
@@ -55,6 +55,11 @@
<artifactId>xercesImpl</artifactId>
<version>2.8.1</version>
</dependency>
+ <dependency>
+ <groupId>commons-lang</groupId>
+ <artifactId>commons-lang</artifactId>
+ <version>2.4</version>
+ </dependency>
<!-- test -->
</dependencies>
Modified:
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/markup/HtmlMarkup.java
URL:
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/markup/HtmlMarkup.java?rev=771612&r1=771611&r2=771612&view=diff
==============================================================================
---
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/markup/HtmlMarkup.java
(original)
+++
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/markup/HtmlMarkup.java
Tue May 5 08:06:19 2009
@@ -48,6 +48,13 @@
/** An end HTML tag. Eg <code></p></code>. */
int TAG_TYPE_END = 3;
+ /**
+ * An HTML entity. Eg <code>&lt;</code>.
+ *
+ * @since 1.1.1.
+ */
+ int ENTITY_TYPE = 4;
+
// ----------------------------------------------------------------------
// All XHTML 1.0 tags
// ----------------------------------------------------------------------
Modified:
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java
URL:
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java?rev=771612&r1=771611&r2=771612&view=diff
==============================================================================
---
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java
(original)
+++
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java
Tue May 5 08:06:19 2009
@@ -23,10 +23,12 @@
import javax.swing.text.html.HTML.Tag;
import org.apache.maven.doxia.macro.MacroExecutionException;
+import org.apache.maven.doxia.markup.HtmlMarkup;
import org.apache.maven.doxia.sink.Sink;
import org.apache.maven.doxia.sink.SinkEventAttributeSet;
import org.apache.maven.doxia.sink.SinkEventAttributes;
import org.apache.maven.doxia.util.DoxiaUtils;
+import org.apache.maven.doxia.util.HtmlTools;
import org.codehaus.plexus.util.StringUtils;
import org.codehaus.plexus.util.xml.pull.XmlPullParser;
@@ -467,13 +469,20 @@
}
else
{
- if ( getLocalEntities().containsKey( textChars ) )
+ String unescaped = HtmlTools.unescapeHtml( text );
+
+ // TODO: StringEscapeUtils.unescapeHtml returns unknown entities
as is,
+ // they should be handled as one character as well
+ if ( text.equals( unescaped ) && text.length() > 1 )
{
- sink.rawText( text );
+ // this means the entity is unrecognized: emit as unknown
+ Object[] required = new Object[] { new Integer(
HtmlMarkup.ENTITY_TYPE ) };
+
+ sink.unknown( text, required, null );
}
else
{
- sink.text( text );
+ sink.text( unescaped );
}
}
}
Modified:
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/sink/XhtmlBaseSink.java
URL:
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/sink/XhtmlBaseSink.java?rev=771612&r1=771611&r2=771612&view=diff
==============================================================================
---
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/sink/XhtmlBaseSink.java
(original)
+++
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/sink/XhtmlBaseSink.java
Tue May 5 08:06:19 2009
@@ -1769,7 +1769,8 @@
{
getTextBuffer().append( text );
}
- else {
+ else
+ {
write( text );
}
}
@@ -1797,6 +1798,22 @@
*/
public void unknown( String name, Object[] requiredParams,
SinkEventAttributes attributes )
{
+ if ( requiredParams == null || !( requiredParams[0] instanceof Integer
) )
+ {
+ getLog().warn( "Missing type information for unknown event: " +
name + ", ignoring!" );
+
+ return;
+ }
+
+ int tagType = ( (Integer) requiredParams[0] ).intValue();
+
+ if ( tagType == ENTITY_TYPE )
+ {
+ rawText( name );
+
+ return;
+ }
+
Tag tag = HtmlTools.getHtmlTag( name );
if ( tag == null )
@@ -1805,13 +1822,6 @@
}
else
{
- if ( requiredParams == null || !( requiredParams[0] instanceof
Integer ) )
- {
- throw new IllegalArgumentException( "Missing required
parameter: TAG_TYPE" );
- }
-
- int tagType = ( (Integer) requiredParams[0] ).intValue();
-
if ( tagType == TAG_TYPE_SIMPLE )
{
writeSimpleTag( tag, attributes );
@@ -1878,7 +1888,7 @@
*/
protected static String escapeHTML( String text )
{
- return HtmlTools.escapeHTML( text );
+ return HtmlTools.escapeHTML( text, false );
}
/**
Modified:
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java
URL:
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java?rev=771612&r1=771611&r2=771612&view=diff
==============================================================================
---
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java
(original)
+++
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java
Tue May 5 08:06:19 2009
@@ -25,6 +25,8 @@
import javax.swing.text.html.HTML.Tag;
+import org.apache.commons.lang.StringEscapeUtils;
+
import org.apache.maven.doxia.markup.HtmlMarkup;
@@ -173,6 +175,27 @@
}
/**
+ * Unescapes HTML entities in a string.
+ *
+ * <p> Unescapes a string containing entity escapes to a string
+ * containing the actual Unicode characters corresponding to the
+ * escapes. Supports HTML 4.0 entities.</p>
+ *
+ * <p>For example, the string "&lt;Fran&ccedil;ais&gt;"
+ * will become "<Français>".</p>
+ *
+ * @param text the <code>String</code> to unescape, may be null.
+ *
+ * @return a new unescaped <code>String</code>, <code>null</code> if null
string input.
+ *
+ * @since 1.1.1.
+ */
+ public static String unescapeHtml( String text )
+ {
+ return StringEscapeUtils.unescapeHtml( text );
+ }
+
+ /**
* Encode an url
*
* @param url the String to encode, may be null
Modified:
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/parser/XhtmlBaseParserTest.java
URL:
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/parser/XhtmlBaseParserTest.java?rev=771612&r1=771611&r2=771612&view=diff
==============================================================================
---
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/parser/XhtmlBaseParserTest.java
(original)
+++
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/parser/XhtmlBaseParserTest.java
Tue May 5 08:06:19 2009
@@ -336,21 +336,20 @@
assertEquals( "bold", event.getName() );
event = (SinkEventElement) it.next();
- assertEquals( "rawText", event.getName() );
- assertEquals( "ř", (String) event.getArgs()[0] );
+ assertEquals( "text", event.getName() );
+ assertEquals( "\u0159", (String) event.getArgs()[0] );
event = (SinkEventElement) it.next();
- assertEquals( "rawText", event.getName() );
- assertEquals( " ", (String) event.getArgs()[0] );
+ assertEquals( "text", event.getName() );
+ assertEquals( "\u00A0", (String) event.getArgs()[0] );
event = (SinkEventElement) it.next();
- assertEquals( "rawText", event.getName() );
- assertEquals( "š", (String) event.getArgs()[0] );
+ assertEquals( "text", event.getName() );
+ assertEquals( "\u0161", (String) event.getArgs()[0] );
event = (SinkEventElement) it.next();
- // FIXME: DOXIA-310
- //assertEquals( "rawText", event.getName() );
- //assertEquals( "𝟭", (String) event.getArgs()[0] );
+ assertEquals( "unknown", event.getName() );
+ assertEquals( "𝟭", (String) event.getArgs()[0] );
event = (SinkEventElement) it.next();
assertEquals( "bold_", event.getName() );
@@ -361,7 +360,7 @@
throws Exception
{
final String text = "<!DOCTYPE test [<!ENTITY foo \"ř\"><!ENTITY
tritPos \"𝟭\">]>"
- +
"<body><h2>&&foo;</h2><p>&&foo;</p><p>&tritPos;</p></body>";
+ +
"<body><h2>&&foo;&tritPos;</h2><p>&&foo;&tritPos;</p></body>";
parser.setValidate( false );
parser.parse( text, sink );
@@ -376,8 +375,12 @@
assertEquals( "&", textEvt.getArgs()[0] );
textEvt = (SinkEventElement) it.next();
- assertEquals( "rawText", textEvt.getName() );
- assertEquals( "ř", textEvt.getArgs()[0] );
+ assertEquals( "text", textEvt.getName() );
+ assertEquals( "\u0159", textEvt.getArgs()[0] );
+
+ textEvt = (SinkEventElement) it.next();
+ assertEquals( "unknown", textEvt.getName() );
+ assertEquals( "𝟭", textEvt.getArgs()[0] );
assertEquals( "sectionTitle1_", ( (SinkEventElement) it.next()
).getName() );
assertEquals( "paragraph", ( (SinkEventElement) it.next() ).getName()
);
@@ -387,15 +390,11 @@
assertEquals( "&", textEvt.getArgs()[0] );
textEvt = (SinkEventElement) it.next();
- assertEquals( "rawText", textEvt.getName() );
- assertEquals( "ř", textEvt.getArgs()[0] );
-
- assertEquals( "paragraph_", ( (SinkEventElement) it.next() ).getName()
);
- //assertEquals( "section1_", ( (SinkEventElement) it.next()
).getName() );
+ assertEquals( "text", textEvt.getName() );
+ assertEquals( "\u0159", textEvt.getArgs()[0] );
- assertEquals( "paragraph", ( (SinkEventElement) it.next() ).getName()
);
textEvt = (SinkEventElement) it.next();
- assertEquals( "rawText", textEvt.getName() );
+ assertEquals( "unknown", textEvt.getName() );
assertEquals( "𝟭", textEvt.getArgs()[0] );
assertEquals( "paragraph_", ( (SinkEventElement) it.next() ).getName()
);
Modified:
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/sink/SinkTestDocument.java
URL:
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/sink/SinkTestDocument.java?rev=771612&r1=771611&r2=771612&view=diff
==============================================================================
---
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/sink/SinkTestDocument.java
(original)
+++
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/sink/SinkTestDocument.java
Tue May 5 08:06:19 2009
@@ -596,9 +596,8 @@
sink.paragraph_();
sink.paragraph();
- sink.text( "Copyright symbol: " + COPYRIGHT + ", " + COPYRIGHT + ", "
+ COPYRIGHT + "." );
+ sink.text( "Copyright symbol:" );
+ sink.text( "\u00a9" );
sink.paragraph_();
}
-
- private static final char COPYRIGHT = '\u00a9';
}
Modified:
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/sink/XhtmlBaseSinkTest.java
URL:
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/sink/XhtmlBaseSinkTest.java?rev=771612&r1=771611&r2=771612&view=diff
==============================================================================
---
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/sink/XhtmlBaseSinkTest.java
(original)
+++
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/sink/XhtmlBaseSinkTest.java
Tue May 5 08:06:19 2009
@@ -744,7 +744,7 @@
*/
public void testText()
{
- String text = "a text";
+ String text = "a text & \u00c6";
try
{
@@ -756,7 +756,7 @@
sink.close();
}
- assertEquals( "a text", writer.toString() );
+ assertEquals( "a text & Æ", writer.toString() );
writer = new StringWriter();
@@ -770,7 +770,7 @@
sink.close();
}
- assertEquals( "a text", writer.toString() );
+ assertEquals( "a text & Æ", writer.toString() );
}
/**
Modified:
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java
URL:
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java?rev=771612&r1=771611&r2=771612&view=diff
==============================================================================
---
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java
(original)
+++
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java
Tue May 5 08:06:19 2009
@@ -51,6 +51,22 @@
/**
* Verify the expected results.
*/
+ public void testUnescapeHTML()
+ {
+ assertNull( HtmlTools.unescapeHtml( null ) );
+ assertEquals( "", HtmlTools.unescapeHtml( "" ) );
+ assertEquals( "<", HtmlTools.unescapeHtml( "<" ) );
+ assertEquals( ">", HtmlTools.unescapeHtml( ">" ) );
+ assertEquals( "&", HtmlTools.unescapeHtml( "&" ) );
+ assertEquals( "\"", HtmlTools.unescapeHtml( """ ) );
+ assertEquals( "&", HtmlTools.unescapeHtml( "&amp;" ) );
+ assertEquals( "<Français>", HtmlTools.unescapeHtml(
"&lt;Fran&ccedil;ais&gt;" ) );
+ assertEquals( "𒍅", HtmlTools.unescapeHtml( "𒍅" ) );
+ }
+
+ /**
+ * Verify the expected results.
+ */
public void testEncodeId()
{
assertEquals( HtmlTools.encodeId( null ), null );
Modified:
maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java
URL:
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java?rev=771612&r1=771611&r2=771612&view=diff
==============================================================================
---
maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java
(original)
+++
maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java
Tue May 5 08:06:19 2009
@@ -36,6 +36,7 @@
import org.apache.maven.doxia.parser.XhtmlBaseParser;
import org.apache.maven.doxia.sink.Sink;
import org.apache.maven.doxia.sink.SinkEventAttributeSet;
+import org.apache.maven.doxia.util.HtmlTools;
import org.codehaus.plexus.util.IOUtil;
import org.codehaus.plexus.util.StringUtils;
@@ -184,7 +185,7 @@
sink.sectionTitle( Sink.SECTION_LEVEL_1, attribs );
- sink.text( parser.getAttributeValue( null,
Attribute.NAME.toString() ) );
+ sink.text( HtmlTools.unescapeHtml( parser.getAttributeValue( null,
Attribute.NAME.toString() ) ) );
sink.sectionTitle1_();
}
@@ -203,7 +204,7 @@
sink.sectionTitle( Sink.SECTION_LEVEL_2, attribs );
- sink.text( parser.getAttributeValue( null,
Attribute.NAME.toString() ) );
+ sink.text( HtmlTools.unescapeHtml( parser.getAttributeValue( null,
Attribute.NAME.toString() ) ) );
sink.sectionTitle2_();
}
Modified:
maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/test/java/org/apache/maven/doxia/module/xdoc/XdocParserTest.java
URL:
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/test/java/org/apache/maven/doxia/module/xdoc/XdocParserTest.java?rev=771612&r1=771611&r2=771612&view=diff
==============================================================================
---
maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/test/java/org/apache/maven/doxia/module/xdoc/XdocParserTest.java
(original)
+++
maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/test/java/org/apache/maven/doxia/module/xdoc/XdocParserTest.java
Tue May 5 08:06:19 2009
@@ -454,8 +454,7 @@
SinkEventElement textEvt = (SinkEventElement) it.next();
assertEquals( "text", textEvt.getName() );
- // FIXME: DOXIA-311
- assertEquals( "&ř", textEvt.getArgs()[0] );
+ assertEquals( "&\u0159", textEvt.getArgs()[0] );
assertEquals( "sectionTitle1_", ( (SinkEventElement) it.next()
).getName() );
assertEquals( "paragraph", ( (SinkEventElement) it.next() ).getName()
);
@@ -465,8 +464,8 @@
assertEquals( "&", textEvt.getArgs()[0] );
textEvt = (SinkEventElement) it.next();
- assertEquals( "rawText", textEvt.getName() );
- assertEquals( "ř", textEvt.getArgs()[0] );
+ assertEquals( "text", textEvt.getName() );
+ assertEquals( "\u0159", textEvt.getArgs()[0] );
assertEquals( "paragraph_", ( (SinkEventElement) it.next() ).getName()
);
assertEquals( "section1_", ( (SinkEventElement) it.next() ).getName()
);