Hello all,
I coded up a change so w3af can be told to completely ignore URL's based on
a whitelist or a blacklist regex. It works as as modification of the
nonTargets misc option which I replaced as redundant. I did some testing
with it and it works well, with a few caveats, which I will repeat here and
I've also added to the help. The problem is that some of the discovery
plugins could be made to not work correctly if a whitelist regex is used
such as ^http://www.example.com/site1/.*$ since they make requests to
yahoo.com, google.com, archive.org, etc. A few other plugins don't work
correctly either with certain regex's, such as the domain_dot plugin with
the example regex above, since it makes call's to the domain rather than the
domain + path. I can't really think of a way to prevent this at such a low
level.
I think this will help me when I'm scanning large sites and want to break
down the scan a bit, and it may help folks like pUm (from the users list)
for their scanning needs if they need to be very certain the scan won't
affect other parts of the site.
Let me know if you have any suggestions for the patch.
Note to Andres, I did make the change to remove the nonTargets option, so
this is slightly different than what I sent you earlier.
Zach Jansen
Index: core/controllers/miscSettings.py
===================================================================
--- core/controllers/miscSettings.py (revision 2940)
+++ core/controllers/miscSettings.py (working copy)
@@ -30,7 +30,9 @@
# Raise errors
from core.controllers.w3afException import w3afException
+import re
+
class miscSettings(configurable):
'''
A class that acts as an interface for the user interfaces, so they can configure w3af settings using getOptions and SetOptions.
@@ -53,8 +55,9 @@
cf.cf.save('interface', 'eth0' )
cf.cf.save('localAddress', '127.0.0.1' )
cf.cf.save('demo', False )
- cf.cf.save('nonTargets', [] )
cf.cf.save('exportFuzzableRequests', '')
+ cf.cf.save('blackListRegex', 'None')
+ cf.cf.save('whiteListRegex', '.*')
def getOptions( self ):
'''
@@ -108,10 +111,19 @@
d12 = 'Enable this when you are doing a demo in a conference'
o12 = option('demo', cf.cf.getData('demo'), d12, 'boolean', tabid='Misc settings')
- d13 = 'A comma separated list of URLs that w3af should completely ignore'
- h13 = 'Sometimes it\'s a good idea to ignore some URLs and test them manually'
- o13 = option('nonTargets', cf.cf.getData('nonTargets'), d13, 'list', tabid='Misc settings')
+ d16 = 'Globally ignore any URLs that w3af finds that match this regex. Has precedence over whiteListRegex.'
+ h16 = 'For example ".*\.(?:flv|flac|pdf)$" to ignore flash video, flac audio, and pdf files. Note that this setting has precedence over the whiteListRegex.\n\n'
+ h16 += 'Setting the verbose flag in your output plugins will show which URLs are being ignored. \n\n'
+ h16 += 'Default Value: "None" (do not ignore any URLs)'
+ o16 = option('blackListRegex', cf.cf.getData('blackListRegex'),d16, 'string', tabid='Misc settings', help=h16)
+ d17 = 'Globally ignore any URLs that w3af find that do NOT match this regex'
+ h17 = 'For example use "^http://www.example.com/site1/.*$" to scan only a subsection of a domain. \n\n'
+ h17 += 'Note that this setting is global to all URL requests and may cause unexpected or incorrect results from some plugins. An example of this would be enabling the yahoo, MSN, or google spider plugins but setting the whitelist to ^http://www.example.com/site1/.*$ will cause the search engine plugins to return empty results. If this is a problem you probably want to use the webSpider followRegex and ignoreRegex, or include those search engines in your whitelist regex. \n\n'
+ h17 += 'Setting the verbose flag in your output plugins will show which URLs are being ignored. \n\n'
+ h17 += 'Default Value: ".*" (match all URLs)'
+ o17 = option('whiteListRegex', cf.cf.getData('whiteListRegex'),d17, 'string', tabid='Misc settings', help=h17)
+
d14 = 'Export all discovered fuzzable requests to the given file (CSV)'
o14 = option('exportFuzzableRequests', cf.cf.getData('exportFuzzableRequests'), d14, 'string', tabid='Export fuzzable Requests')
@@ -128,9 +140,10 @@
ol.add(o10)
ol.add(o11)
ol.add(o12)
- ol.add(o13)
ol.add(o14)
ol.add(o15)
+ ol.add(o16)
+ ol.add(o17)
return ol
def getDesc( self ):
@@ -161,8 +174,21 @@
cf.cf.save('interface', optionsMap['interface'].getValue() )
cf.cf.save('localAddress', optionsMap['localAddress'].getValue() )
cf.cf.save('demo', optionsMap['demo'].getValue() )
- cf.cf.save('nonTargets', optionsMap['nonTargets'].getValue() )
cf.cf.save('exportFuzzableRequests', optionsMap['exportFuzzableRequests'].getValue() )
+ try:
+ re.compile(optionsMap['blackListRegex'].getValue() )
+ except:
+ msg = 'You specified an invalid regular expression: "' + optionsMap['blackListRegex'].getValue() + '".'
+ raise w3afException(msg)
+ else:
+ cf.cf.save('blackListRegex', optionsMap['blackListRegex'].getValue() )
+ try:
+ re.compile(optionsMap['whiteListRegex'].getValue() )
+ except:
+ msg = 'You specified an invalid regular expression: "' + optionsMap['whiteListRegex'].getValue() + '".'
+ raise w3afException(msg)
+ else:
+ cf.cf.save('whiteListRegex', optionsMap['whiteListRegex'].getValue() )
# This is an undercover call to __init__ :) , so I can set all default parameters.
miscSettings()
Index: core/data/url/xUrllib.py
===================================================================
--- core/data/url/xUrllib.py (revision 2940)
+++ core/data/url/xUrllib.py (working copy)
@@ -89,6 +89,10 @@
self._paused = False
self._mustStop = False
self._ignore_errors_conf = False
+
+ # Whitelist/blacklist regex options
+ self._compiled_whitelist_re = None
+ self._compiled_blacklist_re = None
def pause(self, pauseYesNo):
'''
@@ -220,14 +224,25 @@
If the user configured w3af to ignore a URL, we are going to be applying that configuration here.
This is the lowest layer inside w3af.
'''
- listOfNonTargets = cf.cf.getData('nonTargets') or []
- for u in listOfNonTargets:
- if urlParser.uri2url( uri ) == urlParser.uri2url( u ):
- msg = 'The URL you are trying to reach was configured as a non-target. ( '
- msg += uri +' ). Returning an empty response.'
- om.out.debug( msg )
- return True
+ # Don't recompile if we don't have to. Seems like there might be a better way to do this.
+ # Part of init maybe?
+ if self._compiled_whitelist_re == None:
+ self._compiled_whitelist_re = re.compile(cf.cf.getData('whiteListRegex'))
+ if self._compiled_blacklist_re == None:
+ self._compiled_blacklist_re = re.compile(cf.cf.getData('blackListRegex'))
+ # Test against the regex's
+ if self._compiled_blacklist_re.match(uri):
+ msg = 'The URL you are trying to reach was configured as a non-target via blackListRegex. ( '
+ msg += uri + ' ). Returning an empty response.'
+ om.out.debug( msg )
+ return True
+ if not self._compiled_whitelist_re.match(uri):
+ msg = 'The URL you are trying to reach was configured as a non-target via whiteListRegex. ( '
+ msg += uri + ' ). Returning an empty response.'
+ om.out.debug( msg )
+ return True
+
return False
def sendRawRequest( self, head, postdata, fixContentLength=True, get_size=True):
------------------------------------------------------------------------------
Enter the BlackBerry Developer Challenge
This is your chance to win up to $100,000 in prizes! For a limited time,
vendors submitting new applications to BlackBerry App World(TM) will have
the opportunity to enter the BlackBerry Developer Challenge. See full prize
details at: http://p.sf.net/sfu/blackberry
_______________________________________________
W3af-develop mailing list
W3af-develop@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/w3af-develop