Author: craigmcc Date: Fri Apr 29 19:05:53 2005 New Revision: 165372 URL: http://svn.apache.org/viewcvs?rev=165372&view=rev Log: Check in "beefed up" parser and unit test case. Modified name of unit test class (convention is "FooTestCase" to test a class "Foo", in the same package) and tweaked build.xml so that "ant test" works.
Added: struts/shale/trunk/clay-plugin/src/test/ struts/shale/trunk/clay-plugin/src/test/org/ struts/shale/trunk/clay-plugin/src/test/org/apache/ struts/shale/trunk/clay-plugin/src/test/org/apache/shale/ struts/shale/trunk/clay-plugin/src/test/org/apache/shale/clay/ struts/shale/trunk/clay-plugin/src/test/org/apache/shale/clay/parser/ struts/shale/trunk/clay-plugin/src/test/org/apache/shale/clay/parser/ParserTestCase.java Modified: struts/shale/trunk/clay-plugin/build.xml struts/shale/trunk/clay-plugin/src/java/org/apache/shale/clay/parser/Parser.java Modified: struts/shale/trunk/clay-plugin/build.xml URL: http://svn.apache.org/viewcvs/struts/shale/trunk/clay-plugin/build.xml?rev=165372&r1=165371&r2=165372&view=diff ============================================================================== --- struts/shale/trunk/clay-plugin/build.xml (original) +++ struts/shale/trunk/clay-plugin/build.xml Fri Apr 29 19:05:53 2005 @@ -304,7 +304,7 @@ usefile="false"/> <batchtest fork="yes"> <fileset dir="${build.home}/test-classes" - includes="org/apache/shale/clay/test/*/*TestCase.class"/> + includes="org/apache/shale/clay/**/*TestCase.class"/> </batchtest> </junit> Modified: struts/shale/trunk/clay-plugin/src/java/org/apache/shale/clay/parser/Parser.java URL: http://svn.apache.org/viewcvs/struts/shale/trunk/clay-plugin/src/java/org/apache/shale/clay/parser/Parser.java?rev=165372&r1=165371&r2=165372&view=diff ============================================================================== --- struts/shale/trunk/clay-plugin/src/java/org/apache/shale/clay/parser/Parser.java (original) +++ struts/shale/trunk/clay-plugin/src/java/org/apache/shale/clay/parser/Parser.java Fri Apr 29 19:05:53 2005 @@ -110,7 +110,7 @@ } /** - * <p>This method is overriden to convert the key into a neutral + * <p>This method is overridden to convert the key into a neutral * case so that the <code>Map</code> access method will be case * insensitive.</p> */ @@ -125,18 +125,78 @@ } + + /** + * <p>This array of HTML tags can have optional ending tags.</p> + */ + private static final String[] OPTIONAL_ENDING_TAG = {"TR", "TH", "TD", "LI", "DT", "DD", "LH", "OPTION"}; /** - * <p>Parse a document fragment into graphs of [EMAIL PROTECTED] Node}. The - * resulting type is a list because the fragment might not be - * well-formed. + * <p>This array of parent tags is cross referenced by the <code>OPTIONAL_ENDING_TAG</code> array</p> + */ + private static final String[][] TAG_PARENTS = {{"TABLE"}, {"TABLE"}, {"TR"}, {"OL", "UL"}, {"DL"}, {"DL"}, {"DL"}, {"SELECT"}}; + + /** + * <p> + * Determines if a HTML nodeName is a type of tag that can optionally have a + * ending tag * </p> - * - * @param document input source - * @return collection of [EMAIL PROTECTED] Node} + * + * @param nodeName the name of the html node + * @return <code>true</code> if the nodeName is in the + * <code>OPTIONAL-ENDING_TAG<code> array; otherwise, <code>false</code> is returned + */ + protected boolean isOptionalEndingTag(String nodeName) { + if (nodeName != null) { + for (int i = 0; i < OPTIONAL_ENDING_TAG.length; i++) { + if (OPTIONAL_ENDING_TAG[i].equalsIgnoreCase(nodeName)) { + return true; + } + } + } + + return false; + } + + /** + * <p> + * Checks to see if a optional ending tag has a valid parent. This is use to + * detect a implicit ending tag + * </p> + * + * @param nodeName of the optional ending tag + * @param parentNodeName name of the parent + * @return <code>true</code> if the parentNodeName is a valid parent for + * the nodeName; otherwise, a <code>false</code> value is returned */ + protected boolean isValidOptionalEndingTagParent(String nodeName, + String parentNodeName) { + if (nodeName != null && parentNodeName != null) { + for (int i = 0; i < OPTIONAL_ENDING_TAG.length; i++) { + if (OPTIONAL_ENDING_TAG[i].equalsIgnoreCase(nodeName)) { + for (int j = 0; j < TAG_PARENTS[i].length; j++) { + if (TAG_PARENTS[i][j].equalsIgnoreCase(parentNodeName)) { + return true; + } + } + break; + } + } + } + return false; + } + + /** + * <p> + * Parse a document fragment into graphs of [EMAIL PROTECTED] Node}. The resulting + * type is a list because the fragment might not be well-formed. + * </p> + * + * @param document + * input source + * @return collection of [EMAIL PROTECTED] Node} + */ public List parse(StringBuffer document) { - Stack nodes = new Stack(); Node root = new Node(null); Node current = root; current.setName("namingContainer"); @@ -149,10 +209,10 @@ Node node = buildNode(token); if (!node.isStart() && node.isEnd()) { - - while (true) { + + pop: while (true) { if (current == null) - break; + break pop; if (isNodeNameEqual(current, node)) { @@ -163,9 +223,12 @@ // nodes are equal, make the parent of the // begin tag the current node current = current.getParent(); - break; + break pop; } - + + if (isOptionalEndingTag(current.getName())) + current.setWellFormed(true); + current = (Node) current.getParent(); } @@ -181,22 +244,52 @@ // </select> // - if ((current.getName() != null && current.getName() - .equalsIgnoreCase("option")) - && (current.isStart() && !current.isEnd()) - && (current.getParent() != null - && current.getParent().getName() != null && current - .getParent().getName().equalsIgnoreCase( - "select"))) { + + if (isOptionalEndingTag(current.getName()) + && current.isStart() && !current.isEnd() + && current.getParent() != null + && isValidOptionalEndingTagParent(node.getName(), current.getParent().getName())) { + current.setWellFormed(true); current.getParent().addChild(node); current = node; } else { - - // adding a new node to the current making it current - current.addChild(node); - current = node; + // the current node is a optional and the new node is it's parent + // simulate having ending nodes + if (this.isValidOptionalEndingTagParent(current.getName(), node.getName())) { + + pop: while (true) { + if (current == null) + break pop; + + if (isNodeNameEqual(current, node)) { + + // isWellFormed indicates a beginning tag and ending tag + // was found + current.setWellFormed(true); + + // nodes are equal, make the parent of the + // begin tag the current node + current = current.getParent(); + current.addChild(node); + current = node; + + break pop; + } + + if (isOptionalEndingTag(current.getName())) + current.setWellFormed(true); + + current = (Node) current.getParent(); + + } + + } else { + // adding a new node to the current making it current + current.addChild(node); + current = node; + } } } else { if (current != null) @@ -227,12 +320,12 @@ if (node1 != null && node2 != null) { if (node1.getName() != null && node2.getName() != null) { - if (node1.getName().equals(node2.getName())) { + if (node1.getName().equalsIgnoreCase(node2.getName())) { if (node1.getQname() == null && node2.getQname() == null) f = true; else if (node1.getQname() != null && node2.getQname() != null - && node1.getQname().equals(node2.getQname())) + && node1.getQname().equalsIgnoreCase(node2.getQname())) f = true; } } @@ -247,6 +340,32 @@ return f; } + private final static String[] SELF_TERMINATING = {"META", "LINK", "HR", + "BASEFONT","IMG", "PARAM", "BR", "AREA", "INPUT", "ISINDEX", + "BASE"}; + + /** + * <p> + * Checks to see if the nodeName is within the <code>SELF_TERMINATING</code> + * table of values + * </p> + * + * @param nodeName to check for self termination + * @return <code>true</code> if is self terminating otherwise + * <code>false</code> + */ + protected boolean isSelfTerminating(String nodeName) { + + if (nodeName != null) { + for (int i = 0; i < SELF_TERMINATING.length; i++) { + if (SELF_TERMINATING[i].equalsIgnoreCase(nodeName)) + return true; + } + } + + return false; + } + /** * <p>This is a factory method that builds a [EMAIL PROTECTED] Node} from a * [EMAIL PROTECTED] Token}. @@ -255,6 +374,7 @@ protected Node buildNode(Token token) { boolean isBeginTag = false; boolean isEndTag = false; + boolean isComment = false; String nodeName = null; String qname = null; @@ -272,43 +392,72 @@ isEndTag = true; isBeginTag = true; } else if (token.getDocument().charAt(token.getBeginOffset()) == '<' + && token.getDocument().charAt(token.getBeginOffset() + 1) == '!' + && token.getDocument().charAt(token.getEndOffset() - 2) == '-' + && token.getDocument().charAt(token.getEndOffset() - 1) == '>') { + // self contained comment tag found + isEndTag = true; + isBeginTag = true; + isComment = true; + } else if (token.getDocument().charAt(token.getBeginOffset()) == '<' + && token.getDocument().charAt(token.getBeginOffset() + 1) == '!' + && token.getDocument().charAt(token.getEndOffset() - 1) != '>') { + // begin comment tag found + isEndTag = false; + isBeginTag = true; + isComment = true; + } else if (token.getDocument().charAt(token.getBeginOffset()) != '<' + && token.getDocument().charAt(token.getEndOffset() - 2) == '-' + && token.getDocument().charAt(token.getEndOffset() - 1) == '>') { + // ending comment tag found + isEndTag = true; + isBeginTag = false; + isComment = true; + } else if (token.getDocument().charAt(token.getBeginOffset()) == '<' && (token.getDocument().charAt(token.getBeginOffset() + 1) != '/' - && token.getDocument().charAt( - token.getBeginOffset() + 1) != '?' && token - .getDocument().charAt(token.getBeginOffset() + 1) != '%') + && token.getDocument().charAt(token.getBeginOffset() + 1) != '?' + && token.getDocument().charAt(token.getBeginOffset() + 1) != '%') && token.getDocument().charAt(token.getEndOffset() - 1) == '>') { - // ending tag found + // beginning tag found isEndTag = false; isBeginTag = true; } // find the node name if (isBeginTag || isEndTag) { - // find the node name delimiter - int e = token.getDocument() - .indexOf(" ", token.getBeginOffset() + 2); - // end of token is the delimiter - if (e == -1 || e > token.getEndOffset()) - e = (isBeginTag && isEndTag) ? (token.getEndOffset() - 2) - : (token.getEndOffset() - 1); - // find the start of the node attribute body - int s = (!isBeginTag && isEndTag) ? token.getBeginOffset() + 2 - : token.getBeginOffset() + 1; - - // return the full node name - nodeName = token.getDocument().substring(s, e); - // separate the namespace - e = nodeName.indexOf(':'); - if (e > -1) - qname = nodeName.substring(0, e); - nodeName = nodeName.substring(e + 1); + // comments are treated special because and ending comment may will not + // have a node name <!-- <input > --> + if (isComment) { + + nodeName = "--"; + + } else { + // find the node name delimiter + int e = token.getDocument().indexOf(" ", token.getBeginOffset() + 2); + // end of token is the delimiter + if (e == -1 || e > token.getEndOffset()) + e = (isBeginTag && isEndTag) ? (token.getEndOffset() - 2) + : (token.getEndOffset() - 1); + // find the start of the node attribute body + int s = (!isBeginTag && isEndTag) ? token.getBeginOffset() + 2 + : token.getBeginOffset() + 1; + + // return the full node name + nodeName = token.getDocument().substring(s, e); + // separate the namespace + e = nodeName.indexOf(':'); + if (e > -1) + qname = nodeName.substring(0, e); + nodeName = nodeName.substring(e + 1); + + } } Attributes attributes = this.new Attributes(); // look for attribute in a beginning tag only - if (isBeginTag) { + if (isBeginTag && !isComment) { int s = token.getDocument() .indexOf(" ", token.getBeginOffset() + 2); @@ -330,9 +479,7 @@ // add some exception here. check for html elements that are assumed // self terminating - if (nodeName != null - && (nodeName.equalsIgnoreCase("input") || - nodeName.equalsIgnoreCase("br"))) { + if (isBeginTag && isSelfTerminating(nodeName)) { isEndTag = true; } Added: struts/shale/trunk/clay-plugin/src/test/org/apache/shale/clay/parser/ParserTestCase.java URL: http://svn.apache.org/viewcvs/struts/shale/trunk/clay-plugin/src/test/org/apache/shale/clay/parser/ParserTestCase.java?rev=165372&view=auto ============================================================================== --- struts/shale/trunk/clay-plugin/src/test/org/apache/shale/clay/parser/ParserTestCase.java (added) +++ struts/shale/trunk/clay-plugin/src/test/org/apache/shale/clay/parser/ParserTestCase.java Fri Apr 29 19:05:53 2005 @@ -0,0 +1,267 @@ +/* + * Copyright 2004-2005 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.shale.clay.parser; + +import java.util.Iterator; +import java.util.List; + +import junit.framework.TestCase; + +public class ParserTestCase extends TestCase { + + + /** + * <p>Tests to see if we can parse a document fragment + * that has multiple root nodes</p> + */ + public void testManyRootNodes() { + Parser p = new Parser(); + StringBuffer doc1 = new StringBuffer(); + + doc1.append("<p>") + .append("<input type=text size=10 maxsize=10 id=username>") + .append("<input type=text size=10 maxsize=10 id=password>") + .append("</p>") + .append("<p>") + .append("This is a test. Just a test") + .append("</p>") + .append("<p></p>"); + + List nodes1 = p.parse(doc1); + assertTrue("Has 3 root nodes", nodes1.size() == 3); + + } + + /** + * <p>Test a couple comment block scenarios</p> + */ + public void testCommentBlocks() { + Parser p = new Parser(); + StringBuffer doc1 = new StringBuffer(); + + + doc1.append("<p>") + .append("<!--<input type=text size=10 maxsize=10 id=username>") + .append("<input type=text size=10 maxsize=10 id=password>-->") + .append("</p>") + .append("<!--This is a test. Just a test-->") + .append("<!--<p>Testing <b>123</b></p>-->"); + + List nodes1 = p.parse(doc1); + assertTrue("Has 3 root nodes", nodes1.size() == 3); + + Node node = (Node) nodes1.get(0); + assertTrue("first paragraph has 1 node", node.getChildren().size() == 1); + + node = (Node) node.getChildren().get(0); + assertTrue("first comment block has 2 nodes", node.getChildren().size() == 2); + + node = (Node) nodes1.get(1); + assertTrue("second root has 0 child nodes", node.getChildren().size() == 0); + + node = (Node) nodes1.get(2); + assertTrue("third root has 1 child node", node.getChildren().size() == 1); + + + } + + + /** + * <p> + * Tests case insensitivity in parsing the document. + * </p> + */ + public void testCaseInsensitivity() { + Parser p = new Parser(); + StringBuffer doc1 = new StringBuffer(); + + doc1.append("<body>").append("<p>").append( + "<input type=text size=10 maxsize=10 id=username>").append( + "<input type=text size=10 maxsize=10 id=password>").append( + "<textarea id=memo>testing 123</textarea>").append("</p>") + .append("</body>"); + + List nodes1 = p.parse(doc1); + assertTrue("Lower case HTML document", nodes1.size() == 1); + + StringBuffer doc2 = new StringBuffer(); + + doc2.append("<BODY>").append("<p>").append( + "<input TYPE=text size=10 maxSize=10 Id=username>").append( + "<input typE=text size=10 MAXSIZE=10 id=password>").append( + "<textarea id=memo>testing 123</TEXTAREA>").append("</P>") + .append("</body>"); + + List nodes2 = p.parse(doc2); + assertTrue("Mixed case HTML document", nodes2.size() == 1); + + compareTrees(nodes1, nodes2); + + } + + /** + * <p> + * Tests the parsing to make sure that self terminated nodes are handled the + * same as well-formed self terminating nodes. + * </p> + */ + public void testSelfTerminating() { + Parser p = new Parser(); + StringBuffer doc1 = new StringBuffer(); + + doc1 + .append("<head>") + .append("<title>Shale Rocks</title>") + .append( + "<style type=\"text/css\" media=\"all\"><!-- @import \"common.css\"; --></style>") + .append( + "<style type=\"text/css\" media=\"all\"><!-- @import \"content.css\"; --></style>") + .append( + "<script type=\"text/javascript\" src=\"common.js\"></script>") + .append( + "<meta http-equiv=\"content-type\" content=\"text/html; charset=iso-8859-1\">") + .append("<META HTTP-EQUIV=\"PRAGMA\" CONTENT=\"NO-CACHE\">") + .append("<META HTTP-EQUIV=\"Expires\" CONTENT=\"-1\">") + .append( + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\">") + .append("</head>"); + + List nodes1 = p.parse(doc1); + assertTrue("Lazy HTML has 1 node", nodes1.size() == 1); + + StringBuffer doc2 = new StringBuffer(); + + doc2 + .append("<head>") + .append("<title>Shale Rocks</title>") + .append( + "<style type=\"text/css\" media=\"all\"><!-- @import \"common.css\"; --></style>") + .append( + "<style type=\"text/css\" media=\"all\"><!-- @import \"content.css\"; --></style>") + .append( + "<script type=\"text/javascript\" src=\"common.js\"></script>") + .append( + "<meta http-equiv=\"content-type\" content=\"text/html; charset=iso-8859-1\"/>") + .append("<META HTTP-EQUIV=\"PRAGMA\" CONTENT=\"NO-CACHE\"/>") + .append("<META HTTP-EQUIV=\"Expires\" CONTENT=\"-1\"/>") + .append( + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\"/>") + .append("</head>"); + + List nodes2 = p.parse(doc2); + assertTrue("Well-formed HTML has 1 node", nodes2.size() == 1); + + compareTrees(nodes1, nodes2); + + } + + /** + * <p> + * Tests to make sure that the parser handles the HTML tags that can have + * optional ending tags the same that it would a document that was + * well-formed + * </p> + */ + public void testOptionalEnding() { + Parser p = new Parser(); + + StringBuffer doc1 = new StringBuffer(); + + // lazy html + doc1 + .append("<table>") + .append("<tr><td>") + .append("<ol><li>1<li>2<li>3</ol>") + .append("<tr><td>") + .append( + "<select><option value=1>1<option value=1>2<option value=1>3</select>") + .append("</table>"); + + List nodes1 = p.parse(doc1); + assertTrue("Lazy HTML has 1 node", nodes1.size() == 1); + + // good html + StringBuffer doc2 = new StringBuffer(); + doc2 + .append("<table>") + .append("<tr><td>") + .append("<ol><li>1</li><li>2</li><li>3</li></ol>") + .append("</td></tr>") + .append("<tr><td>") + .append( + "<select><option value=1>1</option><option value=1>2</option><option value=1>3</option></select>") + .append("</td></tr>").append("</table>"); + + List nodes2 = p.parse(doc2); + assertTrue("Well-formed HTML has 1 node", nodes2.size() == 1); + + compareTrees(nodes1, nodes2); + + } + + /** + * <p> + * Aserts that two trees of parsed HTML have the same number children and + * the same attributes. Verifies that the structure is the same + * </p> + * + * @param tree1 + * @param tree2 + */ + protected void compareTrees(List tree1, List tree2) { + + boolean isSame = (tree1 == null && tree2 == null) + || (tree1.size() == tree2.size()); + + assertTrue("Trees have same # children", isSame); + if (tree1 != null && tree2 != null) { + for (int i = 0; i < tree1.size(); i++) { + Node node1 = (Node) tree1.get(i); + Node node2 = (Node) tree2.get(i); + + isSame = false; + if (node1 != null && node2 != null) { + if (node1.getName() == null && node2.getName() == null) + isSame = true; + else + isSame = (node1.getName() != null + && node2.getName() != null && node1.getName() + .equalsIgnoreCase(node2.getName())); + + assertTrue("Nodes names are equal", isSame); + + isSame = (node1.getAttributes().size() == node2 + .getAttributes().size()); + assertTrue("Nodes have same # attributes", isSame); + Iterator ki = node1.getAttributes().keySet().iterator(); + while (ki.hasNext()) { + String key = (String) ki.next(); + String value1 = (String) node1.getAttributes().get(key); + String value2 = (String) node2.getAttributes().get(key); + isSame = value1.equalsIgnoreCase(value2); + assertTrue("Nodes have same attribute value", isSame); + + } + compareTrees(node1.getChildren(), node2.getChildren()); + + } + } + + } + } + +} --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]