Simon,
I don't seem to be able to apply your patch again - an 'unknown line
type was found in line 12'.
Can you do it again - and attach it to our old jira-issue, I have
reopened it for this purpose.
regards,
Martin
On 11/1/05, Simon Kitching <[EMAIL PROTECTED]> wrote:
> Martin Marinschek wrote:
> > Don't stress yourself - it's just the nightly build, so not to big of a
> > problem.
>
> Thanks, but it's hopefully done anyway.
>
> changes:
> * Handle DOCTYPE and Processing Instruction commands in input HTML
> * Track line# of input for error messages
> * Remove some debugging printlns
>
> I can also provide a patch soon to format the code to the MyFaces
> convention rather than the Sun convention if you wish. Sorry, my Eclipse
> is set up to format stuff that way automatically and I forgot to
> reformat before posting.
>
> Regards,
>
> Simon
>
>
> Index: ReducedHTMLParser.java
> ===================================================================
> --- ReducedHTMLParser.java (revision 329922)
> +++ ReducedHTMLParser.java (working copy)
> @@ -49,6 +49,7 @@
> private static final int STATE_IN_TAG = 2;
>
> private int offset;
> + private int lineNumber;
> private CharSequence seq;
> private CallbackListener listener;
>
> @@ -75,15 +76,32 @@
> return offset >= seq.length();
> }
>
> + int getCurrentLineNumber() {
> + return lineNumber;
> + }
> +
> /**
> * Advance the current parse position over any whitespace characters.
> */
> void consumeWhitespace() {
> + boolean crSeen = false;
> +
> while (offset < seq.length()) {
> char c = seq.charAt(offset);
> if (!Character.isWhitespace(c)) {
> break;
> }
> +
> + // Track line number for error messages.
> + if (c == '\r') {
> + ++lineNumber;
> + crSeen = true;
> + } else if ((c == '\n') && !crSeen) {
> + ++lineNumber;
> + } else {
> + crSeen = false;
> + }
> +
> ++offset;
> }
> }
> @@ -193,6 +211,10 @@
> // TODO: should we consider a string to be terminated by a newline?
> // that would help with runaway strings but I think that multiline
> // strings *are* allowed...
> + //
> + // TODO: detect newlines within strings and increment lineNumber.
> + // This isn't so important, though; they aren't common and being a
> + // few lines out in an error message isn't serious either.
> StringBuffer stringBuf = new StringBuffer();
> boolean escaping = false;
> while (!isFinished()) {
> @@ -248,6 +270,8 @@
> * @param s is a set of characters that should not be discarded.
> */
> void consumeExcept(String s) {
> + boolean crSeen = false;
> +
> while (offset < seq.length()) {
> char c = seq.charAt(offset);
> if (s.indexOf(c) >= 0) {
> @@ -255,6 +279,16 @@
> return;
> }
>
> + // Track line number for error messages.
> + if (c == '\r') {
> + ++lineNumber;
> + crSeen = true;
> + } else if ((c == '\n') && !crSeen) {
> + ++lineNumber;
> + } else {
> + crSeen = false;
> + }
> +
> ++offset;
> }
> }
> @@ -269,6 +303,7 @@
> int currentTagStart = -1;
> String currentTagName = null;
>
> + lineNumber = 1;
> offset = 0;
> while (offset < seq.length())
> {
> @@ -282,6 +317,10 @@
> if (consumeMatch("<!--")) {
> // VERIFY: can "< ! --" start a comment?
> state = STATE_IN_COMMENT;
> + } else if (consumeMatch("<!")) {
> + // xml processing instruction or <!DOCTYPE> tag
> + // we don't need to actually do anything here
> + log.debug("PI found at line " + getCurrentLineNumber());
> } else if (consumeMatch("</")) {
> // VERIFY: is "< / foo >" a valid end-tag?
>
> @@ -306,10 +345,17 @@
> // the current info until the end of this tag.
> currentTagStart = offset - 1;
> currentTagName = consumeElementName();
> - state = STATE_IN_TAG;
> + if (currentTagName == null) {
> + log.warn("Invalid HTML; bare lessthan sign found at
> line "
> + + getCurrentLineNumber());
> + // remain in STATE_READY; this isn't really the
> start of
> + // an xml element.
> + } else {
> + state = STATE_IN_TAG;
> + }
> } else {
> // should never get here
> - throw new Error("Internal error");
> + throw new Error("Internal error at line " +
> getCurrentLineNumber());
> }
>
> continue;
> @@ -378,7 +424,6 @@
> */
> void openedTag(int startOffset, int endOffset, String tagName) {
> log.debug("Found open tag at " + startOffset + ":" + endOffset + ":"
> + tagName);
> - System.out.println("Found open tag at " + startOffset + ":" +
> endOffset + ":" + tagName);
>
> if ("head".equalsIgnoreCase(tagName)) {
> listener.openedStartTag(startOffset, HEAD_TAG);
> @@ -394,7 +439,6 @@
>
> void closedTag(int startOffset, int endOffset, String tagName) {
> log.debug("Found close tag at " + startOffset + ":" + endOffset +
> ":" + tagName);
> - System.out.println("Found close tag at " + startOffset + ":" +
> endOffset + ":" + tagName);
>
> if ("head".equalsIgnoreCase(tagName)) {
> listener.openedEndTag(startOffset, HEAD_TAG);
>
>
> Index: ReducedHTMLParserTest.java
> ===================================================================
> --- ReducedHTMLParserTest.java (revision 329925)
> +++ ReducedHTMLParserTest.java (working copy)
> @@ -322,8 +322,19 @@
> parser.consumeExcept("z");
> }
>
> + // test parsing completes when invalid tag found.
> + public void testParseBadTag() {
> + String s = "xxxx \n\n <# \n\n";
> + CallbackListener listener = new ParseCallbackListener();
> + ReducedHTMLParser parser = new ReducedHTMLParser(s, listener);
> +
> + parser.parse();
> + assertTrue(parser.isFinished());
> + }
> +
> // test the full parse method
> public void testParse() {
> + String s0 = "<!DOCTYPE PUBLIC \"sss\" \"http:foo\">\n";
> String s1 = "<html><head>";
> String s2 = "\n<!-- a comment --><title>foo</title>";
> String s3 = "</head>";
> @@ -338,6 +349,7 @@
> String s8 = "</body> </html>";
>
> StringBuffer buf = new StringBuffer();
> + buf.append(s0);
> buf.append(s1);
> buf.append(s2);
> buf.append(s3);
> @@ -354,13 +366,13 @@
>
> // check that listener has correctly computed the offset to the char
> just
> // before the </head> tag starts.
> - int afterHeadPos = s1.length();
> + int afterHeadPos = s0.length() + s1.length();
> assertEquals("Pos after <head> tag ", afterHeadPos,
> listener.headerInsertPosition);
>
> - int beforeBodyPos = s1.length() + s2.length() + s3.length();
> + int beforeBodyPos = afterHeadPos + s2.length() + s3.length();
> assertEquals("Pos before <body> tag", beforeBodyPos,
> listener.beforeBodyPosition);
>
> - int afterBodyPos = s1.length() + s2.length() + s3.length() +
> s4.length();
> + int afterBodyPos = beforeBodyPos + s4.length();
> assertEquals("Pos after <body> tag", afterBodyPos,
> listener.bodyInsertPosition);
> }
> }
>
>
>
--
http://www.irian.at
Your JSF powerhouse -
JSF Trainings in English and German