Author: kiyoto <[email protected]> Date: Sat, 23 Oct 2010 08:23:25 -0700 Subject: No longer assumes Apache time zone == server time zone. Also, the header is fixed to reflect proper licensing terms. Commit: 68a7a5f99809c0c35f1bb73c7bc547b9282d2422
--- visitor/visitor.py | 21 +++++++++++++-------- 1 files changed, 13 insertions(+), 8 deletions(-) diff --git a/visitor/visitor.py b/visitor/visitor.py index c15134d..6950cfe 100644 --- a/visitor/visitor.py +++ b/visitor/visitor.py @@ -1,4 +1,5 @@ -# author: Kiyoto Tamura <[email protected]> +# Copyright 2010 The Tor Project +# See LICENSE for licensing information # # A Python port of Karsten Loesing's VisiTor. # @@ -14,7 +15,7 @@ from cStringIO import StringIO # regexes used in the script IP_RE = re.compile(r'(\d+\.){3}\d+') -APACHE_DATETIME = re.compile(r'\[(\d{2}/\w{3}/\d{4}:\d{2}:\d{2}:\d{2}) -\d{4}\]') +APACHE_DATETIME = re.compile(r'\[(\d{2}/\w{3}/\d{4}:\d{2}:\d{2}:\d{2}) ([+-]\d{4})\]') TOR_USERAGENTS = [('torbutton1_2_0', re.compile(r'Mozilla/5\.0 \(Windows; U; Windows NT 5\.1; ' r'[a-z]{2}-[A-Z]{2}; rv\:1\.8\.1\.16\) ' r'Gecko/20080702 Firefox/2\.0\.0\.16')), @@ -56,13 +57,17 @@ def get_exitlist(exitlist_filepath): return exitlist -def apache_time2datetime(time_str): +def apache_time2datetime(time_str, timediff_str): """ Transforms the apache time to a Python datetime object. """ - # We need to convert the time to UTC - yr, mo, d, h, m, s, _, _, _ = gmtime(mktime(strptime(time_str, '%d/%b/%Y:%H:%M:%S'))) - return datetime(yr, mo, d, h, m, s) + # the apache timezone diff format is like -?xx00 where + # `xx00`ranges from 0000 to 2300 + # Note the division by 36 is 60 * 60 / 100 + yr, mo, d, h, m, s, _, _, _ = strptime(time_str, '%d/%b/%Y:%H:%M:%S') + local_datetime = datetime(yr, mo, d, h, m, s) + timezone_diff = timedelta(0, int(timediff_str) * 36) + return local_datetime - timezone_diff def parse_apache_line(log_line): """ @@ -76,8 +81,8 @@ def parse_apache_line(log_line): apache_datetime = APACHE_DATETIME.search(log_line) if apache_datetime is None: raise ApacheParseError("Could not match the datetime for the line %s"%log_line) - apache_datetime = apache_time2datetime(apache_datetime.group(1)) - + apache_datetime = apache_time2datetime(apache_datetime.group(1), + apache_datetime.group(2)) user_agent = log_line.split('" ')[-1].rstrip('\n') return ip, user_agent, apache_datetime # maybe turn it into a dict if it gets confusing -- 1.7.1
