Hi,
Andres Riancho <andres.rian...@gmail.com> wrote:
> >> What I think is better is to save the compiled regular expressions to
> >> the kb in miscSettings, and in "_isBlacklisted" you should just match
> >> them to the current URL.
> >>
> >> Do you think it would be possible to do that?
> >
> > Sure, I'll get to it as soon as I have time. I guess that'll be tomorrow.
> > I'll
> > also add the extended documentation Zach wrote about earlier.
>
> Excellent, I'll wait for the new patch =)
please see the patch in the attachment. The Regexes are now compiled and saved
when the configuration is set or changes. I also added Zach's extended
documentation from his patch about the consequences of using the Whitelist.
Patrick
--
The Plague: You wanted to know who I am, Zero Cool? Well, let me explain
the New World Order. Governments and corporations need people
like you and me. We are Samurai... the Keyboard Cowboys... and
all those other people who have no idea what's going on are
the cattle... Moooo.
(Hackers)
diff --git a/core/controllers/miscSettings.py b/core/controllers/miscSettings.py
index b71bd78..611be7a 100644
--- a/core/controllers/miscSettings.py
+++ b/core/controllers/miscSettings.py
@@ -30,6 +30,9 @@ from core.data.options.optionList import optionList
# Raise errors
from core.controllers.w3afException import w3afException
+import re
+import fnmatch
+
class miscSettings(configurable):
'''
@@ -53,8 +56,12 @@ class miscSettings(configurable):
cf.cf.save('interface', 'eth0' )
cf.cf.save('localAddress', '127.0.0.1' )
cf.cf.save('demo', False )
- cf.cf.save('nonTargets', [] )
cf.cf.save('exportFuzzableRequests', '')
+ cf.cf.save('useWildcardMatching', False )
+ cf.cf.save('blacklistRegex', [] )
+ cf.cf.save('whitelistRegex', ['.*'] )
+ cf.cf.save('blacklistRegexCompiled', [] )
+ cf.cf.save('whitelistRegexCompiled', [ re.compile( '.*' ) ] )
def getOptions( self ):
'''
@@ -77,43 +84,76 @@ class miscSettings(configurable):
d5 = 'A list with all fuzzable header names'
o5 = option('fuzzableHeaders', cf.cf.getData('fuzzableHeaders'), d5,
'list', tabid='Fuzzer parameters')
- d15 = 'Indicates what HTML form combo values w3af plugins will use:
all, tb, tmb, t, b'
- h15 = 'Indicates what HTML form combo values, e.g. select options
values, w3af plugins will use: all (All values), tb (only top and bottom
values), tmb (top, middle and bottom values), t (top values), b (bottom values)'
- o15 = option('fuzzFormComboValues',
cf.cf.getData('fuzzFormComboValues'), d15, 'string', help=h15, tabid='Fuzzer
parameters')
+ d6 = 'Indicates what HTML form combo values w3af plugins will use:
all, tb, tmb, t, b'
+ h6 = 'Indicates what HTML form combo values, e.g. select options
values, w3af plugins will use: all (All values), tb (only top and bottom
values), tmb (top, middle and bottom values), t (top values), b (bottom values)'
+ o6 = option('fuzzFormComboValues',
cf.cf.getData('fuzzFormComboValues'), d6, 'string', help=h6, tabid='Fuzzer
parameters')
######## Core parameters ########
- d6 = 'Automatic dependency enabling for plugins'
- h6 = 'If autoDependencies is enabled, and pluginA depends on pluginB
that wasn\'t enabled, then pluginB is automatically enabled.'
- o6 = option('autoDependencies', cf.cf.getData('autoDependencies'), d6,
'boolean', help=h6, tabid='Core settings')
+ d7 = 'Automatic dependency enabling for plugins'
+ h7 = 'If autoDependencies is enabled, and pluginA depends on pluginB
that wasn\'t enabled, then pluginB is automatically enabled.'
+ o7 = option('autoDependencies', cf.cf.getData('autoDependencies'), d7,
'boolean', help=h7, tabid='Core settings')
- d7 = 'Maximum depth of the discovery phase'
- h7 = 'For example, if set to 10, the webSpider plugin will only follow
10 link levels while spidering the site. This applies to the whole discovery
phase; not only to the webSpider.'
- o7 = option('maxDepth', cf.cf.getData('maxDepth'), d7, 'integer',
help=h7, tabid='Core settings')
+ d8 = 'Maximum depth of the discovery phase'
+ h8 = 'For example, if set to 10, the webSpider plugin will only follow
10 link levels while spidering the site. This applies to the whole discovery
phase; not only to the webSpider.'
+ o8 = option('maxDepth', cf.cf.getData('maxDepth'), d8, 'integer',
help=h8, tabid='Core settings')
- d8 = 'Maximum number of threads that the w3af process will spawn'
- h8 = 'The maximum valid number of threads is 100.'
- o8 = option('maxThreads', cf.cf.getData('maxThreads'), d8, 'integer',
tabid='Core settings', help=h8)
+ d9 = 'Maximum number of threads that the w3af process will spawn'
+ h9 = 'The maximum valid number of threads is 100.'
+ o9 = option('maxThreads', cf.cf.getData('maxThreads'), d9, 'integer',
tabid='Core settings', help=h9)
- d9 = 'Maximum number of times the discovery function is called'
- o9 = option('maxDiscoveryLoops', cf.cf.getData('maxDiscoveryLoops'),
d9, 'integer', tabid='Core settings')
+ d10 = 'Maximum number of times the discovery function is called'
+ o10 = option('maxDiscoveryLoops', cf.cf.getData('maxDiscoveryLoops'),
d10, 'integer', tabid='Core settings')
######## Network parameters ########
- d10 = 'Local interface name to use when sniffing, doing reverse
connections, etc.'
- o10 = option('interface', cf.cf.getData('interface'), d10, 'string',
tabid='Network settings')
+ d11 = 'Local interface name to use when sniffing, doing reverse
connections, etc.'
+ o11 = option('interface', cf.cf.getData('interface'), d11, 'string',
tabid='Network settings')
- d11 = 'Local IP address to use when doing reverse connections'
- o11 = option('localAddress', cf.cf.getData('localAddress'), d11,
'string', tabid='Core settings')
+ d12 = 'Local IP address to use when doing reverse connections'
+ o12 = option('localAddress', cf.cf.getData('localAddress'), d12,
'string', tabid='Core settings')
######### Misc ###########
- d12 = 'Enable this when you are doing a demo in a conference'
- o12 = option('demo', cf.cf.getData('demo'), d12, 'boolean',
tabid='Misc settings')
-
- d13 = 'A comma separated list of URLs that w3af should completely
ignore'
- h13 = 'Sometimes it\'s a good idea to ignore some URLs and test them
manually'
- o13 = option('nonTargets', cf.cf.getData('nonTargets'), d13, 'list',
tabid='Misc settings')
+ d13 = 'Enable this when you are doing a demo in a conference'
+ o13 = option('demo', cf.cf.getData('demo'), d13, 'boolean',
tabid='Misc settings')
+
+ ######### Targets #########
+ d14 = 'Use wildcard- instead of regex matching for defining black- or
whitelists'
+ h14 = ('Per default, w3af uses regex matching for the black- and '
+ 'whitelist. If useWildcardMatching is set, the target black-
and '
+ 'whitelist will use wildcard patterns instead of regular '
+ 'expressions for matching, which are easier to define.')
+ o14 = option('useWildcardMatching',
cf.cf.getData('useWildcardMatching'), d14, 'boolean', tabid='Target settings',
help=h14)
- d14 = 'Export all discovered fuzzable requests to the given file (CSV)'
- o14 = option('exportFuzzableRequests',
cf.cf.getData('exportFuzzableRequests'), d14, 'string', tabid='Export fuzzable
Requests')
+ # XXX There's one bug here: You can't use commas (',') in the regexes,
as
+ # they will be recognized as list item separators by Python. What seems
+ # to work is to enclose the regex with single ticks ('') when saving
the
+ # config, but those ticks will be lost the next time you go and look at
+ # the config, so you have to set them again.
+ d15 = ('A comma separated blacklist of URLs that w3af should globally '
+ 'ignore. Has precedence over whitelistRegex.')
+ h15 = ('URLs in the blacklist will not be tested by w3af. Please use '
+ 'regular expressions to specify the URLs, or, if
useWildcardMatching '
+ 'is enabled, wildcard patterns. blacklistRegex has '
+ 'precedence over whitelistRegex.')
+ o15 = option('blacklistRegex', cf.cf.getData('blacklistRegex'), d15,
'list', tabid='Target settings', help=h15)
+
+ d16 = 'A comma separated whitelist that every URL has to match before
it is tested'
+ h16 = ('If a whitelist is given, only targets matching one of the '
+ 'patterns in the list will be tested. Please use regular '
+ 'expressions to specify the URLs, or, if useWildcardMatching '
+ 'is enabled, wildcard patterns. Note that this setting is
global '
+ 'to all URL requests and may cause unexpected or incorrect '
+ 'results from some plugins. An example of this would be
enabling '
+ 'the yahoo, MSN, or google spider plugins but setting the '
+ 'whitelist to ^http://www.example.com/site1/.*$ will cause the
'
+ 'search engine plugins to return empty results. If this is a '
+ 'problem you probably want to use the webSpider followRegex
and '
+ 'ignoreRegex, or include those search engines in your
whitelist '
+ 'regex. Setting the verbose flag in your output plugins will
show '
+ 'which URLs are being ignored.')
+ o16 = option('whitelistRegex', cf.cf.getData('whitelistRegex'), d16,
'list', tabid='Target settings', help=h16)
+
+ d17 = 'Export all discovered fuzzable requests to the given file (CSV)'
+ o17 = option('exportFuzzableRequests',
cf.cf.getData('exportFuzzableRequests'), d17, 'string', tabid='Export fuzzable
Requests')
ol = optionList()
ol.add(o1)
@@ -131,6 +171,8 @@ class miscSettings(configurable):
ol.add(o13)
ol.add(o14)
ol.add(o15)
+ ol.add(o16)
+ ol.add(o17)
return ol
def getDesc( self ):
@@ -161,8 +203,26 @@ class miscSettings(configurable):
cf.cf.save('interface', optionsMap['interface'].getValue() )
cf.cf.save('localAddress', optionsMap['localAddress'].getValue() )
cf.cf.save('demo', optionsMap['demo'].getValue() )
- cf.cf.save('nonTargets', optionsMap['nonTargets'].getValue() )
cf.cf.save('exportFuzzableRequests',
optionsMap['exportFuzzableRequests'].getValue() )
+ cf.cf.save('useWildcardMatching',
optionsMap['useWildcardMatching'].getValue() )
+
+ # Save the white- and blacklists as strings and as compiled regex.
+ for l in ( ( optionsMap['blacklistRegex'].getValue(), 'blacklistRegex'
),
+ ( optionsMap['whitelistRegex'].getValue(), 'whitelistRegex'
) ):
+ the_list, var_name = l
+ if optionsMap['useWildcardMatching'].getValue():
+ regex_list = map( fnmatch.translate, the_list )
+ else:
+ regex_list = the_list
+ compiled_list = []
+ for regex in regex_list:
+ try:
+ compiled_list.append( re.compile( regex ) )
+ except:
+ msg = 'You specified an invalid regular expression: "
%s".' % regex
+ raise w3afException(msg)
+ cf.cf.save( var_name, the_list )
+ cf.cf.save( var_name + 'Compiled', compiled_list )
# This is an undercover call to __init__ :) , so I can set all default
parameters.
miscSettings()
diff --git a/core/data/url/xUrllib.py b/core/data/url/xUrllib.py
index ab16622..9b31445 100644
--- a/core/data/url/xUrllib.py
+++ b/core/data/url/xUrllib.py
@@ -55,6 +55,8 @@ import core.data.kb.knowledgeBase as kb
# This is a singleton that's used for assigning request IDs
from core.controllers.misc.number_generator import consecutive_number_generator
+# For the blacklisting code
+import fnmatch
class sizeExceeded( Exception ):
pass
@@ -89,7 +91,7 @@ class xUrllib:
self._paused = False
self._mustStop = False
self._ignore_errors_conf = False
-
+
def pause(self, pauseYesNo):
'''
When the core wants to pause a scan, it calls this method, in order to
freeze all actions
@@ -214,20 +216,27 @@ class xUrllib:
req = urllib2.Request( uri )
req = self._addHeaders( req )
return req.headers
-
+
def _isBlacklisted( self, uri ):
'''
- If the user configured w3af to ignore a URL, we are going to be
applying that configuration here.
+ If the user configured w3af to ignore a URL, we apply that
configuration here.
This is the lowest layer inside w3af.
'''
- listOfNonTargets = cf.cf.getData('nonTargets') or []
- for u in listOfNonTargets:
- if urlParser.uri2url( uri ) == urlParser.uri2url( u ):
- msg = 'The URL you are trying to reach was configured as a
non-target. ( '
- msg += uri +' ). Returning an empty response.'
+ # First: blacklist
+ for regex in cf.cf.getData( 'blacklistRegexCompiled' ):
+ if regex.match( uri ):
+ msg = ( 'The URL you are trying to reach was configured as a '
+ 'non-target via blacklistRegex. ( %s ). Returning an
empty '
+ 'response.' ) % uri
om.out.debug( msg )
return True
-
+ # Second: whitelist
+ if filter(lambda regex: regex.match( uri ), cf.cf.getData(
'whitelistRegexCompiled' ) ) == []:
+ msg = ( 'The URL you are trying to reach was configured as a '
+ 'non-target via whitelistRegex. ( %s ). Returning an empty
'
+ 'response.' ) % uri
+ om.out.debug( msg )
+ return True
return False
def sendRawRequest( self, head, postdata, fixContentLength=True,
get_size=True):
------------------------------------------------------------------------------
Let Crystal Reports handle the reporting - Free Crystal Reports 2008 30-Day
trial. Simplify your report design, integration and deployment - and focus on
what you do best, core application coding. Discover what's new with
Crystal Reports now. http://p.sf.net/sfu/bobj-july
_______________________________________________
W3af-develop mailing list
W3af-develop@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/w3af-develop