Author: sebb Date: Sat Mar 3 03:27:49 2007 New Revision: 514130 URL: http://svn.apache.org/viewvc?view=rev&rev=514130 Log: Bug 40696 - retrieve embedded resources from STYLE URL() attributes
Modified: jakarta/jmeter/branches/rel-2-2/bin/testfiles/HTMLParserTestCase.html jakarta/jmeter/branches/rel-2-2/src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParser.java jakarta/jmeter/branches/rel-2-2/src/protocol/http/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser.java jakarta/jmeter/branches/rel-2-2/src/protocol/http/org/apache/jmeter/protocol/http/parser/HtmlParsingUtils.java jakarta/jmeter/branches/rel-2-2/src/protocol/http/org/apache/jmeter/protocol/http/parser/JTidyHTMLParser.java jakarta/jmeter/branches/rel-2-2/src/protocol/http/org/apache/jmeter/protocol/http/parser/RegexpHTMLParser.java jakarta/jmeter/branches/rel-2-2/xdocs/changes.xml Modified: jakarta/jmeter/branches/rel-2-2/bin/testfiles/HTMLParserTestCase.html URL: http://svn.apache.org/viewvc/jakarta/jmeter/branches/rel-2-2/bin/testfiles/HTMLParserTestCase.html?view=diff&rev=514130&r1=514129&r2=514130 ============================================================================== --- jakarta/jmeter/branches/rel-2-2/bin/testfiles/HTMLParserTestCase.html (original) +++ jakarta/jmeter/branches/rel-2-2/bin/testfiles/HTMLParserTestCase.html Sat Mar 3 03:27:49 2007 @@ -3,7 +3,7 @@ </head> <body background="images/body.gif"> <table background="images/table.gif"> -<tr background="images/tr.gif"> +<tr style="background url('images/tr.gif')"> <td background="images/td.gif"><img name="a" src="images/image-a.gif" align="top"></td> </tr> </table> Modified: jakarta/jmeter/branches/rel-2-2/src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParser.java URL: http://svn.apache.org/viewvc/jakarta/jmeter/branches/rel-2-2/src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParser.java?view=diff&rev=514130&r1=514129&r2=514130 ============================================================================== --- jakarta/jmeter/branches/rel-2-2/src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParser.java (original) +++ jakarta/jmeter/branches/rel-2-2/src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParser.java Sat Mar 3 03:27:49 2007 @@ -41,6 +41,7 @@ protected static final String ATT_HREF = "href";// $NON-NLS-1$ protected static final String ATT_REL = "rel";// $NON-NLS-1$ protected static final String ATT_SRC = "src";// $NON-NLS-1$ + protected static final String ATT_STYLE = "style";// $NON-NLS-1$ protected static final String ATT_TYPE = "type";// $NON-NLS-1$ protected static final String ATT_IS_IMAGE = "image";// $NON-NLS-1$ protected static final String TAG_APPLET = "applet";// $NON-NLS-1$ Modified: jakarta/jmeter/branches/rel-2-2/src/protocol/http/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser.java URL: http://svn.apache.org/viewvc/jakarta/jmeter/branches/rel-2-2/src/protocol/http/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser.java?view=diff&rev=514130&r1=514129&r2=514130 ============================================================================== --- jakarta/jmeter/branches/rel-2-2/src/protocol/http/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser.java (original) +++ jakarta/jmeter/branches/rel-2-2/src/protocol/http/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser.java Sat Mar 3 03:27:49 2007 @@ -1,187 +1,195 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package org.apache.jmeter.protocol.http.parser; - -import java.net.MalformedURLException; -import java.net.URL; -import java.util.Iterator; - -import org.apache.jorphan.logging.LoggingManager; -import org.apache.log.Logger; - -import org.htmlparser.Node; -import org.htmlparser.Parser; -import org.htmlparser.Tag; -import org.htmlparser.tags.AppletTag; -import org.htmlparser.tags.BaseHrefTag; -import org.htmlparser.tags.BodyTag; -import org.htmlparser.tags.CompositeTag; -import org.htmlparser.tags.FrameTag; -import org.htmlparser.tags.ImageTag; -import org.htmlparser.tags.InputTag; -import org.htmlparser.tags.LinkTag; -import org.htmlparser.tags.ScriptTag; -import org.htmlparser.util.NodeIterator; -import org.htmlparser.util.ParserException; - -/** - * HtmlParser implementation using SourceForge's HtmlParser. - * - */ -public class HtmlParserHTMLParser extends HTMLParser { - private static final Logger log = LoggingManager.getLoggerForClass(); - - static{ - org.htmlparser.scanners.ScriptScanner.STRICT = false; // Try to ensure that more javascript code is processed OK ... - } - protected HtmlParserHTMLParser() { - super(); - log.info("Using htmlparser version 2.0"); - } - - protected boolean isReusable() { - return true; - } - - /* - * (non-Javadoc) - * - * @see org.apache.jmeter.protocol.http.parser.HtmlParser#getEmbeddedResourceURLs(byte[], - * java.net.URL) - */ - public Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl, URLCollection urls) throws HTMLParseException { - - if (log.isDebugEnabled()) log.debug("Parsing html of: " + baseUrl); - - Parser htmlParser = null; - try { - String contents = new String(html); - htmlParser = new Parser(); - htmlParser.setInputHTML(contents); - } catch (Exception e) { - throw new HTMLParseException(e); - } - - // Now parse the DOM tree - try { - // we start to iterate through the elements - parseNodes(htmlParser.elements(), new URLPointer(baseUrl), urls); - log.debug("End : parseNodes"); - } catch (ParserException e) { - throw new HTMLParseException(e); - } - - return urls.iterator(); - } - - /* - * A dummy class to pass the pointer of URL. - */ - private static class URLPointer { - private URLPointer(URL newUrl) { - url = newUrl; - } - private URL url; - } - - /** - * Recursively parse all nodes to pick up all URL s. - * @see e the nodes to be parsed - * @see baseUrl Base URL from which the HTML code was obtained - * @see urls URLCollection - */ - private void parseNodes(final NodeIterator e, - final URLPointer baseUrl, final URLCollection urls) - throws HTMLParseException, ParserException { - while(e.hasMoreNodes()) { - Node node = e.nextNode(); - // a url is always in a Tag. - if (!(node instanceof Tag)) { - continue; - } - Tag tag = (Tag) node; - String tagname=tag.getTagName(); - String binUrlStr = null; - - // first we check to see if body tag has a - // background set - if (tag instanceof BodyTag) { - binUrlStr = tag.getAttribute(ATT_BACKGROUND); - } else if (tag instanceof BaseHrefTag) { - BaseHrefTag baseHref = (BaseHrefTag) tag; - String baseref = baseHref.getBaseUrl().toString(); - try { - if (!baseref.equals(""))// Bugzilla 30713 - { - baseUrl.url = new URL(baseUrl.url, baseHref.getBaseUrl()); - } - } catch (MalformedURLException e1) { - throw new HTMLParseException(e1); - } - } else if (tag instanceof ImageTag) { - ImageTag image = (ImageTag) tag; - binUrlStr = image.getImageURL(); - } else if (tag instanceof AppletTag) { - // look for applets - - // This will only work with an Applet .class file. - // Ideally, this should be upgraded to work with Objects (IE) - // and archives (.jar and .zip) files as well. - AppletTag applet = (AppletTag) tag; - binUrlStr = applet.getAppletClass(); - } else if (tag instanceof InputTag) { - // we check the input tag type for image - if (ATT_IS_IMAGE.equalsIgnoreCase(tag.getAttribute(ATT_TYPE))) { - // then we need to download the binary - binUrlStr = tag.getAttribute(ATT_SRC); - } - } else if (tag instanceof LinkTag) { - LinkTag link = (LinkTag) tag; - if (link.getChild(0) instanceof ImageTag) { - ImageTag img = (ImageTag) link.getChild(0); - binUrlStr = img.getImageURL(); - } - } else if (tag instanceof ScriptTag) { - binUrlStr = tag.getAttribute(ATT_SRC); - } else if (tag instanceof FrameTag) { - binUrlStr = tag.getAttribute(ATT_SRC); - } else if (tagname.equalsIgnoreCase(TAG_EMBED) - || tagname.equalsIgnoreCase(TAG_BGSOUND)){ - binUrlStr = tag.getAttribute(ATT_SRC); - } else if (tagname.equalsIgnoreCase(TAG_LINK)) { - // Putting the string first means it works even if the attribute is null - if (STYLESHEET.equalsIgnoreCase(tag.getAttribute(ATT_REL))) { - binUrlStr = tag.getAttribute(ATT_HREF); - } - } else { - binUrlStr = tag.getAttribute(ATT_BACKGROUND); - } - - if (binUrlStr != null) { - urls.addURL(binUrlStr, baseUrl.url); - } - // second, if the tag was a composite tag, - // recursively parse its children. - if (tag instanceof CompositeTag) { - CompositeTag composite = (CompositeTag) tag; - parseNodes(composite.elements(), baseUrl, urls); - } - } - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.jmeter.protocol.http.parser; + +import java.net.MalformedURLException; +import java.net.URL; +import java.util.Iterator; + +import org.apache.jorphan.logging.LoggingManager; +import org.apache.log.Logger; +import org.htmlparser.Node; +import org.htmlparser.Parser; +import org.htmlparser.Tag; +import org.htmlparser.tags.AppletTag; +import org.htmlparser.tags.BaseHrefTag; +import org.htmlparser.tags.BodyTag; +import org.htmlparser.tags.CompositeTag; +import org.htmlparser.tags.FrameTag; +import org.htmlparser.tags.ImageTag; +import org.htmlparser.tags.InputTag; +import org.htmlparser.tags.LinkTag; +import org.htmlparser.tags.ScriptTag; +import org.htmlparser.util.NodeIterator; +import org.htmlparser.util.ParserException; + +/** + * HtmlParser implementation using SourceForge's HtmlParser. + * + */ +class HtmlParserHTMLParser extends HTMLParser { + private static final Logger log = LoggingManager.getLoggerForClass(); + + static{ + org.htmlparser.scanners.ScriptScanner.STRICT = false; // Try to ensure that more javascript code is processed OK ... + } + + protected HtmlParserHTMLParser() { + super(); + log.info("Using htmlparser version: "+Parser.getVersion()); + } + + protected boolean isReusable() { + return true; + } + + /* + * (non-Javadoc) + * + * @see org.apache.jmeter.protocol.http.parser.HtmlParser#getEmbeddedResourceURLs(byte[], + * java.net.URL) + */ + public Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl, URLCollection urls) throws HTMLParseException { + + if (log.isDebugEnabled()) log.debug("Parsing html of: " + baseUrl); + + Parser htmlParser = null; + try { + String contents = new String(html); + htmlParser = new Parser(); + htmlParser.setInputHTML(contents); + } catch (Exception e) { + throw new HTMLParseException(e); + } + + // Now parse the DOM tree + try { + // we start to iterate through the elements + parseNodes(htmlParser.elements(), new URLPointer(baseUrl), urls); + log.debug("End : parseNodes"); + } catch (ParserException e) { + throw new HTMLParseException(e); + } + + return urls.iterator(); + } + + /* + * A dummy class to pass the pointer of URL. + */ + private static class URLPointer { + private URLPointer(URL newUrl) { + url = newUrl; + } + private URL url; + } + + /** + * Recursively parse all nodes to pick up all URL s. + * @see e the nodes to be parsed + * @see baseUrl Base URL from which the HTML code was obtained + * @see urls URLCollection + */ + private void parseNodes(final NodeIterator e, + final URLPointer baseUrl, final URLCollection urls) + throws HTMLParseException, ParserException { + while(e.hasMoreNodes()) { + Node node = e.nextNode(); + // a url is always in a Tag. + if (!(node instanceof Tag)) { + continue; + } + Tag tag = (Tag) node; + String tagname=tag.getTagName(); + String binUrlStr = null; + + // first we check to see if body tag has a + // background set + if (tag instanceof BodyTag) { + binUrlStr = tag.getAttribute(ATT_BACKGROUND); + } else if (tag instanceof BaseHrefTag) { + BaseHrefTag baseHref = (BaseHrefTag) tag; + String baseref = baseHref.getBaseUrl().toString(); + try { + if (!baseref.equals(""))// Bugzilla 30713 + { + baseUrl.url = new URL(baseUrl.url, baseHref.getBaseUrl()); + } + } catch (MalformedURLException e1) { + throw new HTMLParseException(e1); + } + } else if (tag instanceof ImageTag) { + ImageTag image = (ImageTag) tag; + binUrlStr = image.getImageURL(); + } else if (tag instanceof AppletTag) { + // look for applets + + // This will only work with an Applet .class file. + // Ideally, this should be upgraded to work with Objects (IE) + // and archives (.jar and .zip) files as well. + AppletTag applet = (AppletTag) tag; + binUrlStr = applet.getAppletClass(); + } else if (tag instanceof InputTag) { + // we check the input tag type for image + if (ATT_IS_IMAGE.equalsIgnoreCase(tag.getAttribute(ATT_TYPE))) { + // then we need to download the binary + binUrlStr = tag.getAttribute(ATT_SRC); + } + } else if (tag instanceof LinkTag) { + LinkTag link = (LinkTag) tag; + if (link.getChild(0) instanceof ImageTag) { + ImageTag img = (ImageTag) link.getChild(0); + binUrlStr = img.getImageURL(); + } + } else if (tag instanceof ScriptTag) { + binUrlStr = tag.getAttribute(ATT_SRC); + } else if (tag instanceof FrameTag) { + binUrlStr = tag.getAttribute(ATT_SRC); + } else if (tagname.equalsIgnoreCase(TAG_EMBED) + || tagname.equalsIgnoreCase(TAG_BGSOUND)){ + binUrlStr = tag.getAttribute(ATT_SRC); + } else if (tagname.equalsIgnoreCase(TAG_LINK)) { + // Putting the string first means it works even if the attribute is null + if (STYLESHEET.equalsIgnoreCase(tag.getAttribute(ATT_REL))) { + binUrlStr = tag.getAttribute(ATT_HREF); + } + } else { + binUrlStr = tag.getAttribute(ATT_BACKGROUND); + } + + if (binUrlStr != null) { + urls.addURL(binUrlStr, baseUrl.url); + } + + // Now look for URLs in the STYLE attribute + String styleTagStr = tag.getAttribute(ATT_STYLE); + if(styleTagStr != null) { + HtmlParsingUtils.extractStyleURLs(baseUrl.url, urls, styleTagStr); + } + + // second, if the tag was a composite tag, + // recursively parse its children. + if (tag instanceof CompositeTag) { + CompositeTag composite = (CompositeTag) tag; + parseNodes(composite.elements(), baseUrl, urls); + } + } + } + +} Modified: jakarta/jmeter/branches/rel-2-2/src/protocol/http/org/apache/jmeter/protocol/http/parser/HtmlParsingUtils.java URL: http://svn.apache.org/viewvc/jakarta/jmeter/branches/rel-2-2/src/protocol/http/org/apache/jmeter/protocol/http/parser/HtmlParsingUtils.java?view=diff&rev=514130&r1=514129&r2=514130 ============================================================================== --- jakarta/jmeter/branches/rel-2-2/src/protocol/http/org/apache/jmeter/protocol/http/parser/HtmlParsingUtils.java (original) +++ jakarta/jmeter/branches/rel-2-2/src/protocol/http/org/apache/jmeter/protocol/http/parser/HtmlParsingUtils.java Sat Mar 3 03:27:49 2007 @@ -30,9 +30,12 @@ import org.apache.jmeter.protocol.http.sampler.HTTPSamplerBase; import org.apache.jmeter.protocol.http.sampler.HTTPSamplerFactory; import org.apache.jmeter.testelement.property.PropertyIterator; +import org.apache.jmeter.util.JMeterUtils; import org.apache.jorphan.logging.LoggingManager; import org.apache.log.Logger; -import org.apache.oro.text.PatternCacheLRU; +import org.apache.oro.text.regex.MatchResult; +import org.apache.oro.text.regex.Pattern; +import org.apache.oro.text.regex.PatternMatcherInput; import org.apache.oro.text.regex.Perl5Compiler; import org.apache.oro.text.regex.Perl5Matcher; import org.w3c.dom.Document; @@ -54,14 +57,6 @@ * Perl5Compiler.MULTILINE_MASK | Perl5Compiler.READ_ONLY_MASK; */ - private static PatternCacheLRU patternCache = new PatternCacheLRU(1000, new Perl5Compiler()); - - private static ThreadLocal localMatcher = new ThreadLocal() { - protected Object initialValue() { - return new Perl5Matcher(); - } - }; - /** * Private constructor to prevent instantiation. */ @@ -71,7 +66,7 @@ public static synchronized boolean isAnchorMatched(HTTPSamplerBase newLink, HTTPSamplerBase config) { boolean ok = true; - Perl5Matcher matcher = (Perl5Matcher) localMatcher.get(); + Perl5Matcher matcher = JMeterUtils.getMatcher(); PropertyIterator iter = config.getArguments().iterator(); String query = null; @@ -91,7 +86,7 @@ Argument item = (Argument) iter.next().getObjectValue(); if (query.indexOf(item.getName() + "=") == -1) { if (!(ok = ok - && matcher.contains(query, patternCache + && matcher.contains(query, JMeterUtils.getPatternCache() .getPattern(item.getName(), Perl5Compiler.READ_ONLY_MASK)))) { return false; } @@ -101,20 +96,20 @@ if (config.getDomain() != null && config.getDomain().length() > 0 && !newLink.getDomain().equals(config.getDomain())) { if (!(ok = ok - && matcher.matches(newLink.getDomain(), patternCache.getPattern(config.getDomain(), + && matcher.matches(newLink.getDomain(), JMeterUtils.getPatternCache().getPattern(config.getDomain(), Perl5Compiler.READ_ONLY_MASK)))) { return false; } } if (!newLink.getPath().equals(config.getPath()) - && !matcher.matches(newLink.getPath(), patternCache.getPattern("[/]*" + config.getPath(), + && !matcher.matches(newLink.getPath(), JMeterUtils.getPatternCache().getPattern("[/]*" + config.getPath(), Perl5Compiler.READ_ONLY_MASK))) { return false; } if (!(ok = ok - && matcher.matches(newLink.getProtocol(), patternCache.getPattern(config.getProtocol(), + && matcher.matches(newLink.getProtocol(), JMeterUtils.getPatternCache().getPattern(config.getProtocol(), Perl5Compiler.READ_ONLY_MASK)))) { return false; } @@ -123,10 +118,10 @@ } public static synchronized boolean isArgumentMatched(Argument arg, Argument patternArg) { - Perl5Matcher matcher = (Perl5Matcher) localMatcher.get(); - return (arg.getName().equals(patternArg.getName()) || matcher.matches(arg.getName(), patternCache.getPattern( + Perl5Matcher matcher = JMeterUtils.getMatcher(); + return (arg.getName().equals(patternArg.getName()) || matcher.matches(arg.getName(), JMeterUtils.getPatternCache().getPattern( patternArg.getName(), Perl5Compiler.READ_ONLY_MASK))) - && (arg.getValue().equals(patternArg.getValue()) || matcher.matches(arg.getValue(), patternCache + && (arg.getValue().equals(patternArg.getValue()) || matcher.matches(arg.getValue(), JMeterUtils.getPatternCache() .getPattern(patternArg.getValue(), Perl5Compiler.READ_ONLY_MASK))); } @@ -311,5 +306,20 @@ String action = atts.getNamedItem("action").getNodeValue(); HTTPSamplerBase url = createUrlFromAnchor(action, context); return url; + } + + public static void extractStyleURLs(final URL baseUrl, final URLCollection urls, String styleTagStr) { + Perl5Matcher matcher = JMeterUtils.getMatcher(); + Pattern pattern = JMeterUtils.getPatternCache().getPattern( + "URL\\(\\s*('|\")(.*)('|\")\\s*\\)", + Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.SINGLELINE_MASK | Perl5Compiler.READ_ONLY_MASK); + PatternMatcherInput input = null; + input = new PatternMatcherInput(styleTagStr); + while (matcher.contains(input, pattern)) { + MatchResult match = matcher.getMatch(); + // The value is in the second group + String styleUrl = match.group(2); + urls.addURL(styleUrl, baseUrl); + } } } Modified: jakarta/jmeter/branches/rel-2-2/src/protocol/http/org/apache/jmeter/protocol/http/parser/JTidyHTMLParser.java URL: http://svn.apache.org/viewvc/jakarta/jmeter/branches/rel-2-2/src/protocol/http/org/apache/jmeter/protocol/http/parser/JTidyHTMLParser.java?view=diff&rev=514130&r1=514129&r2=514130 ============================================================================== --- jakarta/jmeter/branches/rel-2-2/src/protocol/http/org/apache/jmeter/protocol/http/parser/JTidyHTMLParser.java (original) +++ jakarta/jmeter/branches/rel-2-2/src/protocol/http/org/apache/jmeter/protocol/http/parser/JTidyHTMLParser.java Sat Mar 3 03:27:49 2007 @@ -147,6 +147,11 @@ break; } + String style = getValue(attrs, ATT_STYLE); + if (style != null) { + HtmlParsingUtils.extractStyleURLs(baseUrl, urls, style); + } + NodeList children = node.getChildNodes(); if (children != null) { int len = children.getLength(); @@ -154,6 +159,7 @@ baseUrl = scanNodes(children.item(i), urls, baseUrl); } } + break; // case Node.TEXT_NODE: Modified: jakarta/jmeter/branches/rel-2-2/src/protocol/http/org/apache/jmeter/protocol/http/parser/RegexpHTMLParser.java URL: http://svn.apache.org/viewvc/jakarta/jmeter/branches/rel-2-2/src/protocol/http/org/apache/jmeter/protocol/http/parser/RegexpHTMLParser.java?view=diff&rev=514130&r1=514129&r2=514130 ============================================================================== --- jakarta/jmeter/branches/rel-2-2/src/protocol/http/org/apache/jmeter/protocol/http/parser/RegexpHTMLParser.java (original) +++ jakarta/jmeter/branches/rel-2-2/src/protocol/http/org/apache/jmeter/protocol/http/parser/RegexpHTMLParser.java Sat Mar 3 03:27:49 2007 @@ -22,6 +22,7 @@ import java.net.URL; import java.util.Iterator; +import org.apache.jmeter.util.JMeterUtils; import org.apache.jorphan.logging.LoggingManager; import org.apache.log.Logger; @@ -72,9 +73,9 @@ * </ul> * * @author <a href="mailto:[EMAIL PROTECTED]">Jordi Salvat i Alabart</a> - * @version $Revision$ updated on $Date$ */ class RegexpHTMLParser extends HTMLParser { + private static final Logger log = LoggingManager.getLoggerForClass(); /** * Regexp fragment matching a tag attribute's value (including the equals @@ -96,11 +97,17 @@ * Regular expression used against the HTML code to find the URIs of images, * etc.: */ - private static final String REGEXP = "<(?:" + "!--.*?-->" + "|BASE" + SEP + "HREF" + VALUE - + "|(?:IMG|SCRIPT|FRAME|IFRAME|BGSOUND|FRAME)" + SEP + "SRC" + VALUE + "|APPLET" + SEP + "CODE(?:BASE)?" - + VALUE + "|(?:EMBED|OBJECT)" + SEP + "(?:SRC|CODEBASE)" + VALUE + "|(?:BODY|TABLE|TR|TD)" + SEP - + "BACKGROUND" + VALUE + "|INPUT(?:" + SEP + "(?:SRC" + VALUE - + "|TYPE\\s*=\\s*(?:\"image\"|'image'|image(?=[\\s>])))){2,}" + "|LINK(?:" + SEP + "(?:HREF" + VALUE + private static final String REGEXP = + "<(?:" + "!--.*?-->" + + "|BASE" + SEP + "HREF" + VALUE + + "|(?:IMG|SCRIPT|FRAME|IFRAME|BGSOUND|FRAME)" + SEP + "SRC" + VALUE + + "|APPLET" + SEP + "CODE(?:BASE)?" + VALUE + + "|(?:EMBED|OBJECT)" + SEP + "(?:SRC|CODEBASE)" + VALUE + + "|(?:BODY|TABLE|TR|TD)" + SEP + "BACKGROUND" + VALUE + + "|[^<]+?STYLE\\s*=['\"].*?URL\\(\\s*['\"](.+?)['\"]\\s*\\)" + + "|INPUT(?:" + SEP + "(?:SRC" + VALUE + + "|TYPE\\s*=\\s*(?:\"image\"|'image'|image(?=[\\s>])))){2,}" + + "|LINK(?:" + SEP + "(?:HREF" + VALUE + "|REL\\s*=\\s*(?:\"stylesheet\"|'stylesheet'|stylesheet(?=[\\s>])))){2,}" + ")"; // Number of capturing groups possibly containing Base HREFs: @@ -112,15 +119,6 @@ static Pattern pattern; /** - * Thread-local matcher: - */ - private static ThreadLocal localMatcher = new ThreadLocal() { - protected Object initialValue() { - return new Perl5Matcher(); - } - }; - - /** * Thread-local input: */ private static ThreadLocal localInput = new ThreadLocal() { @@ -129,9 +127,6 @@ } }; - /** Used to store the Logger (used for debug and error messages). */ - transient private static Logger log; - protected boolean isReusable() { return true; } @@ -142,10 +137,6 @@ protected RegexpHTMLParser() { super(); - // Define this here to ensure it's ready to report any trouble - // with the regexp: - log = LoggingManager.getLoggerForClass(); - // Compile the regular expression: try { Perl5Compiler c = new Perl5Compiler(); @@ -166,7 +157,7 @@ */ public Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl, URLCollection urls) { - Perl5Matcher matcher = (Perl5Matcher) localMatcher.get(); + Perl5Matcher matcher = JMeterUtils.getMatcher(); PatternMatcherInput input = (PatternMatcherInput) localInput.get(); // TODO: find a way to avoid the cost of creating a String here -- // probably a new PatternMatcherInput working on a byte[] would do @@ -176,7 +167,7 @@ MatchResult match = matcher.getMatch(); String s; if (log.isDebugEnabled()) - log.debug("match groups " + match.groups()); + log.debug("match groups " + match.groups() + " " + match.toString()); // Check for a BASE HREF: for (int g = 1; g <= NUM_BASE_GROUPS && g <= match.groups(); g++) { s = match.group(g); @@ -197,10 +188,10 @@ } for (int g = NUM_BASE_GROUPS + 1; g <= match.groups(); g++) { s = match.group(g); - if (log.isDebugEnabled()) { - log.debug("group " + g + " - " + match.group(g)); - } if (s != null) { + if (log.isDebugEnabled()) { + log.debug("group " + g + " - " + match.group(g)); + } urls.addURL(s, baseUrl); } } Modified: jakarta/jmeter/branches/rel-2-2/xdocs/changes.xml URL: http://svn.apache.org/viewvc/jakarta/jmeter/branches/rel-2-2/xdocs/changes.xml?view=diff&rev=514130&r1=514129&r2=514130 ============================================================================== --- jakarta/jmeter/branches/rel-2-2/xdocs/changes.xml (original) +++ jakarta/jmeter/branches/rel-2-2/xdocs/changes.xml Sat Mar 3 03:27:49 2007 @@ -111,6 +111,7 @@ <li>Bug 40383 - don't set content-type if already set</li> <li>Mailer Visualiser test button now works if test plan has not yet been saved</li> <li>Bug 36959 - Shortcuts "ctrl c" and "ctrl v" don't work on the tree elements</li> +<li>Bug 40696 - retrieve embedded resources from STYLE URL() attributes</li> </ul> <h3>Version 2.2</h3> --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]