Hi,

Andres Riancho <andres.rian...@gmail.com> wrote:

> >> What I think is better is to save the compiled regular expressions to
> >> the kb in miscSettings, and in "_isBlacklisted" you should just match
> >> them to the current URL.
> >>
> >> Do you think it would be possible to do that?
> >
> > Sure, I'll get to it as soon as I have time. I guess that'll be tomorrow. 
> > I'll
> > also add the extended documentation Zach wrote about earlier.
> 
> Excellent, I'll wait for the new patch =)

please see the patch in the attachment. The Regexes are now compiled and saved
when the configuration is set or changes. I also added Zach's extended
documentation from his patch about the consequences of using the Whitelist.


Patrick

-- 
The Plague: You wanted to know who I am, Zero Cool? Well, let me explain 
            the New World Order. Governments and corporations need people
            like you and me. We are Samurai... the Keyboard Cowboys... and
            all those other people who have no idea what's going on are 
            the cattle... Moooo.
(Hackers)
diff --git a/core/controllers/miscSettings.py b/core/controllers/miscSettings.py
index b71bd78..611be7a 100644
--- a/core/controllers/miscSettings.py
+++ b/core/controllers/miscSettings.py
@@ -30,6 +30,9 @@ from core.data.options.optionList import optionList
 # Raise errors
 from core.controllers.w3afException import w3afException
 
+import re
+import fnmatch
+
 
 class miscSettings(configurable):
     '''
@@ -53,8 +56,12 @@ class miscSettings(configurable):
             cf.cf.save('interface', 'eth0' )
             cf.cf.save('localAddress', '127.0.0.1' )
             cf.cf.save('demo', False )
-            cf.cf.save('nonTargets', [] )
             cf.cf.save('exportFuzzableRequests', '')
+            cf.cf.save('useWildcardMatching', False )
+            cf.cf.save('blacklistRegex', [] )
+            cf.cf.save('whitelistRegex', ['.*'] )
+            cf.cf.save('blacklistRegexCompiled', [] )
+            cf.cf.save('whitelistRegexCompiled', [ re.compile( '.*' ) ] )
     
     def getOptions( self ):
         '''
@@ -77,43 +84,76 @@ class miscSettings(configurable):
         d5 = 'A list with all fuzzable header names'
         o5 = option('fuzzableHeaders', cf.cf.getData('fuzzableHeaders'), d5, 
'list', tabid='Fuzzer parameters')
 
-        d15 = 'Indicates what HTML form combo values w3af plugins will use: 
all, tb, tmb, t, b'
-        h15 = 'Indicates what HTML form combo values, e.g. select options 
values,  w3af plugins will use: all (All values), tb (only top and bottom 
values), tmb (top, middle and bottom values), t (top values), b (bottom values)'
-        o15 = option('fuzzFormComboValues', 
cf.cf.getData('fuzzFormComboValues'), d15, 'string', help=h15, tabid='Fuzzer 
parameters')
+        d6 = 'Indicates what HTML form combo values w3af plugins will use: 
all, tb, tmb, t, b'
+        h6 = 'Indicates what HTML form combo values, e.g. select options 
values,  w3af plugins will use: all (All values), tb (only top and bottom 
values), tmb (top, middle and bottom values), t (top values), b (bottom values)'
+        o6 = option('fuzzFormComboValues', 
cf.cf.getData('fuzzFormComboValues'), d6, 'string', help=h6, tabid='Fuzzer 
parameters')
 
         ######## Core parameters ########
-        d6 = 'Automatic dependency enabling for plugins'
-        h6 = 'If autoDependencies is enabled, and pluginA depends on pluginB 
that wasn\'t enabled, then pluginB is automatically enabled.'
-        o6 = option('autoDependencies', cf.cf.getData('autoDependencies'), d6, 
'boolean', help=h6, tabid='Core settings')
+        d7 = 'Automatic dependency enabling for plugins'
+        h7 = 'If autoDependencies is enabled, and pluginA depends on pluginB 
that wasn\'t enabled, then pluginB is automatically enabled.'
+        o7 = option('autoDependencies', cf.cf.getData('autoDependencies'), d7, 
'boolean', help=h7, tabid='Core settings')
 
-        d7 = 'Maximum depth of the discovery phase'
-        h7 = 'For example, if set to 10, the webSpider plugin will only follow 
10 link levels while spidering the site. This applies to the whole discovery 
phase; not only to the webSpider.'
-        o7 = option('maxDepth', cf.cf.getData('maxDepth'), d7, 'integer', 
help=h7, tabid='Core settings')
+        d8 = 'Maximum depth of the discovery phase'
+        h8 = 'For example, if set to 10, the webSpider plugin will only follow 
10 link levels while spidering the site. This applies to the whole discovery 
phase; not only to the webSpider.'
+        o8 = option('maxDepth', cf.cf.getData('maxDepth'), d8, 'integer', 
help=h8, tabid='Core settings')
         
-        d8 = 'Maximum number of threads that the w3af process will spawn'
-        h8 = 'The maximum valid number of threads is 100.'
-        o8 = option('maxThreads', cf.cf.getData('maxThreads'), d8, 'integer', 
tabid='Core settings', help=h8)
+        d9 = 'Maximum number of threads that the w3af process will spawn'
+        h9 = 'The maximum valid number of threads is 100.'
+        o9 = option('maxThreads', cf.cf.getData('maxThreads'), d9, 'integer', 
tabid='Core settings', help=h9)
         
-        d9 = 'Maximum number of times the discovery function is called'
-        o9 = option('maxDiscoveryLoops', cf.cf.getData('maxDiscoveryLoops'), 
d9, 'integer', tabid='Core settings')
+        d10 = 'Maximum number of times the discovery function is called'
+        o10 = option('maxDiscoveryLoops', cf.cf.getData('maxDiscoveryLoops'), 
d10, 'integer', tabid='Core settings')
         
         ######## Network parameters ########
-        d10 = 'Local interface name to use when sniffing, doing reverse 
connections, etc.'
-        o10 = option('interface', cf.cf.getData('interface'), d10, 'string', 
tabid='Network settings')
+        d11 = 'Local interface name to use when sniffing, doing reverse 
connections, etc.'
+        o11 = option('interface', cf.cf.getData('interface'), d11, 'string', 
tabid='Network settings')
 
-        d11 = 'Local IP address to use when doing reverse connections'
-        o11 = option('localAddress', cf.cf.getData('localAddress'), d11, 
'string', tabid='Core settings')
+        d12 = 'Local IP address to use when doing reverse connections'
+        o12 = option('localAddress', cf.cf.getData('localAddress'), d12, 
'string', tabid='Core settings')
         
         ######### Misc ###########
-        d12 = 'Enable this when you are doing a demo in a conference'
-        o12 = option('demo', cf.cf.getData('demo'), d12, 'boolean', 
tabid='Misc settings')
-        
-        d13 = 'A comma separated list of URLs that w3af should completely 
ignore'
-        h13 = 'Sometimes it\'s a good idea to ignore some URLs and test them 
manually'
-        o13 = option('nonTargets', cf.cf.getData('nonTargets'), d13, 'list', 
tabid='Misc settings')
+        d13 = 'Enable this when you are doing a demo in a conference'
+        o13 = option('demo', cf.cf.getData('demo'), d13, 'boolean', 
tabid='Misc settings')
+
+        ######### Targets #########
+        d14 = 'Use wildcard- instead of regex matching for defining black- or 
whitelists'
+        h14 = ('Per default, w3af uses regex matching for the black- and '
+               'whitelist. If useWildcardMatching is set, the target black- 
and '
+               'whitelist will use wildcard patterns instead of regular '
+               'expressions for matching, which are easier to define.')
+        o14 = option('useWildcardMatching', 
cf.cf.getData('useWildcardMatching'), d14, 'boolean', tabid='Target settings', 
help=h14)
         
-        d14 = 'Export all discovered fuzzable requests to the given file (CSV)'
-        o14 = option('exportFuzzableRequests', 
cf.cf.getData('exportFuzzableRequests'), d14, 'string', tabid='Export fuzzable 
Requests')
+        # XXX There's one bug here: You can't use commas (',') in the regexes, 
as
+        # they will be recognized as list item separators by Python. What seems
+        # to work is to enclose the regex with single ticks ('') when saving 
the
+        # config, but those ticks will be lost the next time you go and look at
+        # the config, so you have to set them again.
+        d15 = ('A comma separated blacklist of URLs that w3af should globally '
+               'ignore. Has precedence over whitelistRegex.')
+        h15 = ('URLs in the blacklist will not be tested by w3af. Please use '
+               'regular expressions to specify the URLs, or, if 
useWildcardMatching '
+               'is enabled, wildcard patterns. blacklistRegex has '
+               'precedence over whitelistRegex.')
+        o15 = option('blacklistRegex', cf.cf.getData('blacklistRegex'), d15, 
'list', tabid='Target settings', help=h15)
+
+        d16 = 'A comma separated whitelist that every URL has to match before 
it is tested'
+        h16 = ('If a whitelist is given, only targets matching one of the '
+                'patterns in the list will be tested. Please use regular '
+                'expressions to specify the URLs, or, if useWildcardMatching '
+                'is enabled, wildcard patterns. Note that this setting is 
global '
+                'to all URL requests and may cause unexpected or incorrect '
+                'results from some plugins. An example of this would be 
enabling '
+                'the yahoo, MSN, or google spider plugins but setting the '
+                'whitelist to ^http://www.example.com/site1/.*$ will cause the 
'
+                'search engine plugins to return empty results. If this is a '
+                'problem you probably want to use the webSpider followRegex 
and '
+                'ignoreRegex, or include those search engines in your 
whitelist '
+                'regex. Setting the verbose flag in your output plugins will 
show '
+                'which URLs are being ignored.')
+        o16 = option('whitelistRegex', cf.cf.getData('whitelistRegex'), d16, 
'list', tabid='Target settings', help=h16)
+
+        d17 = 'Export all discovered fuzzable requests to the given file (CSV)'
+        o17 = option('exportFuzzableRequests', 
cf.cf.getData('exportFuzzableRequests'), d17, 'string', tabid='Export fuzzable 
Requests')
         
         ol = optionList()
         ol.add(o1)
@@ -131,6 +171,8 @@ class miscSettings(configurable):
         ol.add(o13)
         ol.add(o14)
         ol.add(o15)
+        ol.add(o16)
+        ol.add(o17)
         return ol
     
     def getDesc( self ):
@@ -161,8 +203,26 @@ class miscSettings(configurable):
         cf.cf.save('interface', optionsMap['interface'].getValue() )
         cf.cf.save('localAddress', optionsMap['localAddress'].getValue() )
         cf.cf.save('demo', optionsMap['demo'].getValue()  )
-        cf.cf.save('nonTargets', optionsMap['nonTargets'].getValue() )
         cf.cf.save('exportFuzzableRequests', 
optionsMap['exportFuzzableRequests'].getValue() )
+        cf.cf.save('useWildcardMatching', 
optionsMap['useWildcardMatching'].getValue() )
+
+        # Save the white- and blacklists as strings and as compiled regex.
+        for l in ( ( optionsMap['blacklistRegex'].getValue(), 'blacklistRegex' 
),
+                   ( optionsMap['whitelistRegex'].getValue(), 'whitelistRegex' 
) ):
+            the_list, var_name = l
+            if optionsMap['useWildcardMatching'].getValue():
+                regex_list = map( fnmatch.translate, the_list )
+            else:
+                regex_list = the_list
+            compiled_list = []
+            for regex in regex_list:
+                try:
+                    compiled_list.append( re.compile( regex ) )
+                except:
+                    msg = 'You specified an invalid regular expression: " 
%s".' % regex
+                    raise w3afException(msg)
+            cf.cf.save( var_name, the_list )
+            cf.cf.save( var_name + 'Compiled', compiled_list )
         
 # This is an undercover call to __init__ :) , so I can set all default 
parameters.
 miscSettings()
diff --git a/core/data/url/xUrllib.py b/core/data/url/xUrllib.py
index ab16622..9b31445 100644
--- a/core/data/url/xUrllib.py
+++ b/core/data/url/xUrllib.py
@@ -55,6 +55,8 @@ import core.data.kb.knowledgeBase as kb
 # This is a singleton that's used for assigning request IDs
 from core.controllers.misc.number_generator import consecutive_number_generator
 
+# For the blacklisting code
+import fnmatch
 
 class sizeExceeded( Exception ):
     pass
@@ -89,7 +91,7 @@ class xUrllib:
         self._paused = False
         self._mustStop = False
         self._ignore_errors_conf = False
-    
+        
     def pause(self,  pauseYesNo):
         '''
         When the core wants to pause a scan, it calls this method, in order to 
freeze all actions
@@ -214,20 +216,27 @@ class xUrllib:
         req = urllib2.Request( uri )
         req = self._addHeaders( req )
         return req.headers
-    
+
     def _isBlacklisted( self, uri ):
         '''
-        If the user configured w3af to ignore a URL, we are going to be 
applying that configuration here.
+        If the user configured w3af to ignore a URL, we apply that 
configuration here.
         This is the lowest layer inside w3af.
         '''
-        listOfNonTargets = cf.cf.getData('nonTargets') or []
-        for u in listOfNonTargets:
-            if urlParser.uri2url( uri ) == urlParser.uri2url( u ):
-                msg = 'The URL you are trying to reach was configured as a 
non-target. ( '
-                msg += uri +' ). Returning an empty response.'
+        # First: blacklist
+        for regex in cf.cf.getData( 'blacklistRegexCompiled' ):
+            if regex.match( uri ):
+                msg = ( 'The URL you are trying to reach was configured as a '
+                        'non-target via blacklistRegex. ( %s ). Returning an 
empty '
+                        'response.' ) % uri
                 om.out.debug( msg )
                 return True
-        
+        # Second: whitelist
+        if filter(lambda regex: regex.match( uri ), cf.cf.getData( 
'whitelistRegexCompiled' ) ) == []:
+            msg = ( 'The URL you are trying to reach was configured as a '
+                    'non-target via whitelistRegex. ( %s ). Returning an empty 
'
+                    'response.' ) % uri
+            om.out.debug( msg )
+            return True
         return False
     
     def sendRawRequest( self, head, postdata, fixContentLength=True, 
get_size=True):
------------------------------------------------------------------------------
Let Crystal Reports handle the reporting - Free Crystal Reports 2008 30-Day 
trial. Simplify your report design, integration and deployment - and focus on 
what you do best, core application coding. Discover what's new with 
Crystal Reports now.  http://p.sf.net/sfu/bobj-july
_______________________________________________
W3af-develop mailing list
W3af-develop@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/w3af-develop

Reply via email to