This fixes the handling if implied paragraph tags in HTMLDocument. The problem is that (in Swing) content is not allowed to be directly in block-like tags (like body or td), but instead must be embedded in a p-implied tag. Audrius tried something similar by creating a HTML_401Swing DTD, but that was not really a solution as it only applied for the body tag. I pulled that handling into the HTMLDocument class and removed that special DTD. That's good because so the parser doesn't need to care about Swing's special requirements.
I also removed one big performance hole in HTMLDocument, which was the printBuffer() and print() debug methods. These methods created loads and loads of strings, only to discard them right away. I scrabbed them. 2006-11-08 Roman Kennke <[EMAIL PROTECTED]> * javax/swing/text/html/HTMLEditorKit.java (getParser): Use plain HTML_401F DTD. * javax/swing/text/html/HTMLDocument.java (HTMLReader.print): Removed method and all calls to it. (HTMLReader.printBuffer): Removed method and all calls to it. (HTMLReader.inImpliedParagraph): New field. (HTMLReader.inParagraph): New field. (HTMLReader.addContent): Create implied p-tag if necessary. (HTMLReader.addSpecialElement): Create implied p-tag if necessary. (HTMLReader.blockClose): Close implied p-tag if necessary. (HTMLReader.blockOpen): Close implied p-tag if necessary. * gnu/javax/swing/text/html/parser/HTML_401Swing.java: Removed. /Roman
Index: gnu/javax/swing/text/html/parser/HTML_401Swing.java =================================================================== RCS file: gnu/javax/swing/text/html/parser/HTML_401Swing.java diff -N gnu/javax/swing/text/html/parser/HTML_401Swing.java --- gnu/javax/swing/text/html/parser/HTML_401Swing.java 16 Jul 2006 18:25:53 -0000 1.2 +++ /dev/null 1 Jan 1970 00:00:00 -0000 @@ -1,91 +0,0 @@ -/* HTML_401Swing.java -- The HTML 4.01 DTD, adapted for HTML rendering in Swing - Copyright (C) 2006 Free Software Foundation, Inc. - -This file is part of GNU Classpath. - -GNU Classpath is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2, or (at your option) -any later version. - -GNU Classpath is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GNU Classpath; see the file COPYING. If not, write to the -Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA -02110-1301 USA. - -Linking this library statically or dynamically with other modules is -making a combined work based on this library. Thus, the terms and -conditions of the GNU General Public License cover the whole -combination. - -As a special exception, the copyright holders of this library give you -permission to link this library with independent modules to produce an -executable, regardless of the license terms of these independent -modules, and to copy and distribute the resulting executable under -terms of your choice, provided that you also meet, for each linked -independent module, the terms and conditions of the license of that -module. An independent module is a module which is not derived from -or based on this library. If you modify this library, you may extend -this exception to your version of the library, but you are not -obligated to do so. If you do not wish to do so, delete this -exception statement from your version. */ - - -package gnu.javax.swing.text.html.parser; - -import javax.swing.text.html.parser.DTD; - -/** - * This class is necessary because the current implementation of the GNU - * Classpath Swing requires always enclose the text into paragraphs. - * - * @author Audrius Meskauskas ([EMAIL PROTECTED]) - */ -public class HTML_401Swing extends HTML_401F -{ - /** - * The singleton instance; - */ - final static HTML_401Swing singleton = new HTML_401Swing(); - - /** - * Either takes the document (by name) from DTD table, or - * creates a new instance and registers it in the tabe. - * The document is registerd under name "-//W3C//DTD HTML 4.01 Frameset//EN". - * @return The new or existing DTD for parsing HTML 4.01 Frameset. - */ - public static DTD getInstance() - { - return singleton; - } - - /** - * Get elements that are allowed in the document body, at the zero level. - * This list disallows the text at this level (the implied P tag will be - * generated). It also disallows A, B, I, U, CITE and other similar - * elements that have the plain text inside. They will also be placed - * inside the generated implied P tags. - */ - protected String[] getBodyElements() - { - return new String[] { - APPLET, BASEFONT, - BR, BUTTON, - IFRAME, IMG, - INPUT, LABEL, MAP, OBJECT, - SCRIPT, SELECT, - TEXTAREA, - BLOCKQUOTE, CENTER, DEL, DIR, - DIV, DL, FIELDSET, FORM, H1, - H2, H3, H4, H5, H6, - HR, INS, ISINDEX, MENU, NOFRAMES, - NOSCRIPT, OL, P, PRE, TABLE, - UL - }; - } -} Index: javax/swing/text/html/HTMLDocument.java =================================================================== RCS file: /cvsroot/classpath/classpath/javax/swing/text/html/HTMLDocument.java,v retrieving revision 1.47 diff -u -1 -5 -r1.47 HTMLDocument.java --- javax/swing/text/html/HTMLDocument.java 6 Nov 2006 20:28:55 -0000 1.47 +++ javax/swing/text/html/HTMLDocument.java 8 Nov 2006 11:20:45 -0000 @@ -558,65 +558,69 @@ /** * This variable becomes true after the insert tag has been encountered. */ boolean insertTagEncountered; /** A temporary variable that helps with the printing out of debug information **/ boolean debug = false; /** * This is true when we are inside a pre tag. */ boolean inPreTag = false; /** + * True when we are inside a paragraph (P, H1-H6, P-IMPLIED). + */ + boolean inParagraph = false; + + /** + * True when we are currently inside an implied paragraph. + */ + boolean inImpliedParagraph = false; + + /** * This is true when we are inside a style tag. This will add text * content inside this style tag beeing parsed as CSS. * * This is package private to avoid accessor methods. */ boolean inStyleTag = false; /** * This is true when we are inside a <textarea> tag. Any text * content will then be added to the text area. * * This is package private to avoid accessor methods. */ boolean inTextArea = false; /** * This contains all stylesheets that are somehow read, either * via embedded style tags, or via linked stylesheets. The * elements will be String objects containing a stylesheet each. */ ArrayList styles; /** * The document model for a textarea. * * This is package private to avoid accessor methods. */ Document textAreaDocument; - void print (String line) - { - if (debug) - System.out.println (line); - } - public class TagAction { /** * This method is called when a start tag is seen for one of the types * of tags associated with this Action. By default this does nothing. */ public void start(HTML.Tag t, MutableAttributeSet a) { // Nothing to do here. } /** * Called when an end tag is seen for one of the types of tags associated * with this Action. By default does nothing. */ @@ -800,39 +804,41 @@ blockOpen(HTML.Tag.IMPLIED, new SimpleAttributeSet()); addSpecialElement(t, a); blockClose(HTML.Tag.IMPLIED); } } public class ParagraphAction extends BlockAction { /** * This method is called when a start tag is seen for one of the types * of tags associated with this Action. */ public void start(HTML.Tag t, MutableAttributeSet a) { blockOpen(t, a); + inParagraph = true; } /** * Called when an end tag is seen for one of the types of tags associated * with this Action. */ public void end(HTML.Tag t) { blockClose(t); + inParagraph = false; } } /** * This action is performed when a <pre> tag is parsed. */ public class PreAction extends BlockAction { /** * This method is called when a start tag is seen for one of the types * of tags associated with this Action. */ public void start(HTML.Tag t, MutableAttributeSet a) { inPreTag = true; @@ -869,42 +875,40 @@ { addSpecialElement(t, a); } } class AreaAction extends TagAction { /** * This method is called when a start tag is seen for one of the types * of tags associated with this Action. */ public void start(HTML.Tag t, MutableAttributeSet a) throws NotImplementedException { // FIXME: Implement. - print ("AreaAction.start not implemented"); } /** * Called when an end tag is seen for one of the types of tags associated * with this Action. */ public void end(HTML.Tag t) throws NotImplementedException { // FIXME: Implement. - print ("AreaAction.end not implemented"); } } /** * Converts HTML tags to CSS attributes. */ class ConvertAction extends TagAction { public void start(HTML.Tag tag, MutableAttributeSet atts) { pushCharacterStyle(); charAttr.addAttribute(tag, atts.copyAttributes()); StyleSheet styleSheet = getStyleSheet(); @@ -929,56 +933,53 @@ { popCharacterStyle(); } } class BaseAction extends TagAction { /** * This method is called when a start tag is seen for one of the types * of tags associated with this Action. */ public void start(HTML.Tag t, MutableAttributeSet a) throws NotImplementedException { // FIXME: Implement. - print ("BaseAction.start not implemented"); } /** * Called when an end tag is seen for one of the types of tags associated * with this Action. */ public void end(HTML.Tag t) throws NotImplementedException { // FIXME: Implement. - print ("BaseAction.end not implemented"); } } class HeadAction extends BlockAction { /** * This method is called when a start tag is seen for one of the types * of tags associated with this Action. */ public void start(HTML.Tag t, MutableAttributeSet a) throws NotImplementedException { // FIXME: Implement. - print ("HeadAction.start not implemented: "+t); super.start(t, a); } /** * Called when an end tag is seen for one of the types of tags associated * with this Action. */ public void end(HTML.Tag t) { // We read in all the stylesheets that are embedded or referenced // inside the header. if (styles != null) { int numStyles = styles.size(); for (int i = 0; i < numStyles; i++) @@ -989,92 +990,86 @@ } super.end(t); } } class LinkAction extends TagAction { /** * This method is called when a start tag is seen for one of the types * of tags associated with this Action. */ public void start(HTML.Tag t, MutableAttributeSet a) throws NotImplementedException { // FIXME: Implement. - print ("LinkAction.start not implemented"); } /** * Called when an end tag is seen for one of the types of tags associated * with this Action. */ public void end(HTML.Tag t) throws NotImplementedException { // FIXME: Implement. - print ("LinkAction.end not implemented"); } } class MapAction extends TagAction { /** * This method is called when a start tag is seen for one of the types * of tags associated with this Action. */ public void start(HTML.Tag t, MutableAttributeSet a) throws NotImplementedException { // FIXME: Implement. - print ("MapAction.start not implemented"); } /** * Called when an end tag is seen for one of the types of tags associated * with this Action. */ public void end(HTML.Tag t) throws NotImplementedException { // FIXME: Implement. - print ("MapAction.end not implemented"); } } class MetaAction extends TagAction { /** * This method is called when a start tag is seen for one of the types * of tags associated with this Action. */ public void start(HTML.Tag t, MutableAttributeSet a) throws NotImplementedException { // FIXME: Implement. - print ("MetaAction.start not implemented"); } /** * Called when an end tag is seen for one of the types of tags associated * with this Action. */ public void end(HTML.Tag t) throws NotImplementedException { // FIXME: Implement. - print ("MetaAction.end not implemented"); } } class StyleAction extends TagAction { /** * This method is called when a start tag is seen for one of the types * of tags associated with this Action. */ public void start(HTML.Tag t, MutableAttributeSet a) { inStyleTag = true; } /** @@ -1085,56 +1080,51 @@ { inStyleTag = false; } } class TitleAction extends TagAction { /** * This method is called when a start tag is seen for one of the types * of tags associated with this Action. */ public void start(HTML.Tag t, MutableAttributeSet a) throws NotImplementedException { // FIXME: Implement. - print ("TitleAction.start not implemented"); } /** * Called when an end tag is seen for one of the types of tags associated * with this Action. */ public void end(HTML.Tag t) throws NotImplementedException { // FIXME: Implement. - print ("TitleAction.end not implemented"); } } public HTMLReader(int offset) { this (offset, 0, 0, null); } public HTMLReader(int offset, int popDepth, int pushDepth, HTML.Tag insertTag) { - print ("HTMLReader created with pop: "+popDepth - + " push: "+pushDepth + " offset: "+offset - + " tag: "+insertTag); this.insertTag = insertTag; this.offset = offset; this.popDepth = popDepth; this.pushDepth = pushDepth; initTags(); } void initTags() { tagToAction = new HashMap(72); CharacterAction characterAction = new CharacterAction(); HiddenAction hiddenAction = new HiddenAction(); AreaAction areaAction = new AreaAction(); BaseAction baseAction = new BaseAction(); BlockAction blockAction = new BlockAction(); @@ -1400,31 +1390,30 @@ } } } /** * This is invoked after the stream has been parsed but before it has been * flushed. * * @param eol one of \n, \r, or \r\n, whichever was encountered the most in * parsing the stream * @since 1.3 */ public void handleEndOfLineString(String eol) { // FIXME: Implement. - print ("HTMLReader.handleEndOfLineString not implemented yet"); } /** * Adds the given text to the textarea document. Called only when we are * within a textarea. * * @param data the text to add to the textarea */ protected void textAreaContent(char[] data) { try { int offset = textAreaDocument.getLength(); textAreaDocument.insertString(offset, new String(data), null); } @@ -1461,166 +1450,178 @@ { // Add remaining last line. addContent(data, start, data.length - start); } } /** * Instructs the parse buffer to create a block element with the given * attributes. * * @param t the tag that requires opening a new block * @param attr the attribute set for the new block */ protected void blockOpen(HTML.Tag t, MutableAttributeSet attr) { - printBuffer(); + if (inImpliedParagraph) + blockClose(HTML.Tag.IMPLIED); + DefaultStyledDocument.ElementSpec element; parseStack.push(t); AbstractDocument.AttributeContext ctx = getAttributeContext(); AttributeSet copy = attr.copyAttributes(); copy = ctx.addAttribute(copy, StyleConstants.NameAttribute, t); element = new DefaultStyledDocument.ElementSpec(copy, DefaultStyledDocument.ElementSpec.StartTagType); parseBuffer.addElement(element); - printBuffer(); } /** * Instructs the parse buffer to close the block element associated with * the given HTML.Tag * * @param t the HTML.Tag that is closing its block */ protected void blockClose(HTML.Tag t) { - printBuffer(); DefaultStyledDocument.ElementSpec element; + if (inImpliedParagraph) + { + inImpliedParagraph = false; + inParagraph = false; + if (t != HTML.Tag.IMPLIED) + blockClose(HTML.Tag.IMPLIED); + } + // If the previous tag is a start tag then we insert a synthetic // content tag. DefaultStyledDocument.ElementSpec prev; prev = (DefaultStyledDocument.ElementSpec) parseBuffer.get(parseBuffer.size() - 1); if (prev.getType() == DefaultStyledDocument.ElementSpec.StartTagType) { AbstractDocument.AttributeContext ctx = getAttributeContext(); AttributeSet attributes = ctx.getEmptySet(); attributes = ctx.addAttribute(attributes, StyleConstants.NameAttribute, HTML.Tag.CONTENT); element = new DefaultStyledDocument.ElementSpec(attributes, DefaultStyledDocument.ElementSpec.ContentType, new char[0], 0, 0); parseBuffer.add(element); } element = new DefaultStyledDocument.ElementSpec(null, DefaultStyledDocument.ElementSpec.EndTagType); parseBuffer.addElement(element); - printBuffer(); if (parseStack.size() > 0) parseStack.pop(); } /** * Adds text to the appropriate context using the current character * attribute set. * * @param data the text to add * @param offs the offset at which to add it * @param length the length of the text to add */ protected void addContent(char[] data, int offs, int length) { addContent(data, offs, length, true); } /** * Adds text to the appropriate context using the current character * attribute set, and possibly generating an IMPLIED Tag if necessary. * * @param data the text to add * @param offs the offset at which to add it * @param length the length of the text to add * @param generateImpliedPIfNecessary whether or not we should generate * an HTML.Tag.IMPLIED tag if necessary */ protected void addContent(char[] data, int offs, int length, boolean generateImpliedPIfNecessary) { + if (generateImpliedPIfNecessary && (! inParagraph) && (! inPreTag)) + { + blockOpen(HTML.Tag.IMPLIED, new SimpleAttributeSet()); + inParagraph = true; + inImpliedParagraph = true; + } + AbstractDocument.AttributeContext ctx = getAttributeContext(); DefaultStyledDocument.ElementSpec element; AttributeSet attributes = null; // Copy the attribute set, don't use the same object because // it may change if (charAttr != null) attributes = charAttr.copyAttributes(); else attributes = ctx.getEmptySet(); attributes = ctx.addAttribute(attributes, StyleConstants.NameAttribute, HTML.Tag.CONTENT); element = new DefaultStyledDocument.ElementSpec(attributes, DefaultStyledDocument.ElementSpec.ContentType, data, offs, length); - printBuffer(); // Add the element to the buffer parseBuffer.addElement(element); - printBuffer(); if (parseBuffer.size() > HTMLDocument.this.getTokenThreshold()) { try { flush(); } catch (BadLocationException ble) { // TODO: what to do here? } } } /** * Adds content that is specified in the attribute set. * * @param t the HTML.Tag * @param a the attribute set specifying the special content */ protected void addSpecialElement(HTML.Tag t, MutableAttributeSet a) { + if (t != HTML.Tag.FRAME && ! inParagraph && ! inImpliedParagraph) + { + blockOpen(HTML.Tag.IMPLIED, new SimpleAttributeSet()); + inParagraph = true; + inImpliedParagraph = true; + } + a.addAttribute(StyleConstants.NameAttribute, t); // The two spaces are required because some special elements like HR // must be broken. At least two characters are needed to break into the // two parts. DefaultStyledDocument.ElementSpec spec = new DefaultStyledDocument.ElementSpec(a.copyAttributes(), DefaultStyledDocument.ElementSpec.ContentType, new char[] {' ', ' '}, 0, 2 ); parseBuffer.add(spec); } - void printBuffer() - { - print ("\n*********BUFFER**********"); - for (int i = 0; i < parseBuffer.size(); i ++) - print (" "+parseBuffer.get(i)); - print ("***************************"); - } } /** * Gets the reader for the parser to use when loading the document with HTML. * * @param pos - the starting position * @return - the reader */ public HTMLEditorKit.ParserCallback getReader(int pos) { return new HTMLReader(pos); } /** * Gets the reader for the parser to use when loading the document with HTML. Index: javax/swing/text/html/HTMLEditorKit.java =================================================================== RCS file: /cvsroot/classpath/classpath/javax/swing/text/html/HTMLEditorKit.java,v retrieving revision 1.39 diff -u -1 -5 -r1.39 HTMLEditorKit.java --- javax/swing/text/html/HTMLEditorKit.java 8 Nov 2006 11:09:31 -0000 1.39 +++ javax/swing/text/html/HTMLEditorKit.java 8 Nov 2006 11:20:45 -0000 @@ -70,31 +70,31 @@ import javax.swing.text.Document; import javax.swing.text.EditorKit; import javax.swing.text.Element; import javax.swing.text.MutableAttributeSet; import javax.swing.text.StyleConstants; import javax.swing.text.StyledDocument; import javax.swing.text.StyledEditorKit; import javax.swing.text.TextAction; import javax.swing.text.View; import javax.swing.text.ViewFactory; import javax.swing.text.html.parser.ParserDelegator; /* Move these imports here after javax.swing.text.html to make it compile with jikes. */ import gnu.javax.swing.text.html.parser.GnuParserDelegator; -import gnu.javax.swing.text.html.parser.HTML_401Swing; +import gnu.javax.swing.text.html.parser.HTML_401F; /** * @author Lillian Angel (langel at redhat dot com) */ public class HTMLEditorKit extends StyledEditorKit implements Serializable, Cloneable, Accessible { /** * Fires the hyperlink events on the associated component * when needed. */ public static class LinkController extends MouseAdapter @@ -959,31 +959,31 @@ HTMLDocument document = new HTMLDocument(getStyleSheet()); document.setParser(getParser()); return document; } /** * Get the parser that this editor kit uses for reading HTML streams. This * method can be overridden to use the alternative parser. * * @return the HTML parser (by default, [EMAIL PROTECTED] ParserDelegator}). */ protected Parser getParser() { if (parser == null) { - parser = new GnuParserDelegator(HTML_401Swing.getInstance()); + parser = new GnuParserDelegator(HTML_401F.getInstance()); } return parser; } /** * Inserts HTML into an existing document. * * @param doc - the Document to insert the HTML into. * @param offset - where to begin inserting the HTML. * @param html - the String to insert * @param popDepth - the number of ElementSpec.EndTagTypes * to generate before inserting * @param pushDepth - the number of ElementSpec.StartTagTypes * with a direction of ElementSpec.JoinNextDirection that * should be generated before