Roman reported that the HTML parser eats too much whitespace: the
sequence like
<b> text </b> must be parsed preserving at most one leading and trailing
space
(now all are discarded). As this is an interpretation and not a
programming bug,
this patch may break existing HTML parser tests that require complete
discarding of the word surrounding whitespace. These tests should be
corrected
later.
2006-09-03 Audrius Meskauskas <[EMAIL PROTECTED]>
* gnu/javax/swing/text/html/parser/HTML_401F.java (defineElements):
Disallow H1 - H6 in the paragraphs.
* gnu/javax/swing/text/html/parser/support/textPreProcessor.java
(preprocess): Leave at most one leading and/or trailing space.
* javax/swing/text/html/HTMLDocument.java (HTMLReader.handleText):
Do not add any text after closing the HTML tag.
### Eclipse Workspace Patch 1.0
#P classpath
Index: javax/swing/text/html/HTMLDocument.java
===================================================================
RCS file: /sources/classpath/classpath/javax/swing/text/html/HTMLDocument.java,v
retrieving revision 1.40
diff -u -r1.40 HTMLDocument.java
--- javax/swing/text/html/HTMLDocument.java 25 Aug 2006 11:40:44 -0000 1.40
+++ javax/swing/text/html/HTMLDocument.java 3 Sep 2006 20:31:04 -0000
@@ -1181,7 +1181,7 @@
*/
public void handleText(char[] data, int pos)
{
- if (data != null && data.length > 0)
+ if (shouldInsert() && data != null && data.length > 0)
addContent(data, 0, data.length);
}
Index: gnu/javax/swing/text/html/parser/HTML_401F.java
===================================================================
RCS file: /sources/classpath/classpath/gnu/javax/swing/text/html/parser/HTML_401F.java,v
retrieving revision 1.4
diff -u -r1.4 HTML_401F.java
--- gnu/javax/swing/text/html/parser/HTML_401F.java 16 Jul 2006 15:03:08 -0000 1.4
+++ gnu/javax/swing/text/html/parser/HTML_401F.java 3 Sep 2006 20:31:01 -0000
@@ -2445,8 +2445,10 @@
attr(VALUE, null, null, 0, IMPLIED)
}
);
+
+ // Headers in the paragraph are not allowed.
defElement(P, 0, false, true, new ContentModel( 0,
- new noTagModel(P), null),
+ new noTagModel(new String[] { P, H1, H2, H3, H4, H5, H6 }), null),
NONE
,
new String[] {
Index: gnu/javax/swing/text/html/parser/support/textPreProcessor.java
===================================================================
RCS file: /sources/classpath/classpath/gnu/javax/swing/text/html/parser/support/textPreProcessor.java,v
retrieving revision 1.2
diff -u -r1.2 textPreProcessor.java
--- gnu/javax/swing/text/html/parser/support/textPreProcessor.java 2 Jul 2005 20:32:15 -0000 1.2
+++ gnu/javax/swing/text/html/parser/support/textPreProcessor.java 3 Sep 2006 20:31:01 -0000
@@ -42,17 +42,17 @@
/**
* Pre - processes text in text parts of the html document.
- * Not thread - safe.
+ *
* @author Audrius Meskauskas, Lithuania ([EMAIL PROTECTED])
*/
public class textPreProcessor
{
/**
- * Pre - process non-preformatted text.
- * \t, \r and \n mutate into spaces, then multiple spaces mutate
- * into single one, all whitespace around tags is consumed.
- * The content of the passed buffer is destroyed.
- * @param text A text to pre-process.
+ * Pre - process non-preformatted text. \t, \r and \n mutate into spaces, then
+ * multiple spaces mutate into single one, all whitespace around tags is
+ * consumed. The content of the passed buffer is destroyed.
+ *
+ * @param a_text A text to pre-process.
*/
public char[] preprocess(StringBuffer a_text)
{
@@ -64,17 +64,22 @@
int a = 0;
int b = text.length - 1;
+ // Remove leading/trailing whitespace, leaving at most one character
try
{
- while (Constants.bWHITESPACE.get(text [ a ]))
+ while (Constants.bWHITESPACE.get(text[a])
+ && Constants.bWHITESPACE.get(text[a + 1]))
a++;
- while (Constants.bWHITESPACE.get(text [ b ]))
+
+ while (b > a && Constants.bWHITESPACE.get(text[b])
+ && Constants.bWHITESPACE.get(text[b - 1]))
b--;
}
catch (ArrayIndexOutOfBoundsException sx)
{
- // A text fragment, consisting from line breaks only.
- return null;
+ // A text fragment, consisting from spaces and line breaks only,
+ // mutates into single space.
+ return new char[] { ' ' };
}
a_text.setLength(0);
@@ -83,10 +88,9 @@
boolean spaceNow;
char c;
- chars:
- for (int i = a; i <= b; i++)
+ chars: for (int i = a; i <= b; i++)
{
- c = text [ i ];
+ c = text[i];
spaceNow = Constants.bWHITESPACE.get(c);
if (spacesWere && spaceNow)
continue chars;