ArielGlenn has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/280104 )
Change subject: thumbDateAnalysis full pylint and pep8 ...................................................................... thumbDateAnalysis full pylint and pep8 Change-Id: I81310c994c52f841eb3d93491a6b129efad07564 --- M tools/thumbs/crunchinglogs/datascripts/thumbDateAnalysis.py 1 file changed, 75 insertions(+), 66 deletions(-) Approvals: ArielGlenn: Looks good to me, approved jenkins-bot: Verified diff --git a/tools/thumbs/crunchinglogs/datascripts/thumbDateAnalysis.py b/tools/thumbs/crunchinglogs/datascripts/thumbDateAnalysis.py index 5576798..4f0c1bf 100644 --- a/tools/thumbs/crunchinglogs/datascripts/thumbDateAnalysis.py +++ b/tools/thumbs/crunchinglogs/datascripts/thumbDateAnalysis.py @@ -1,35 +1,42 @@ # -*- coding: utf-8 -*- -import os import re import sys -import time import getopt + def usage(message=None): - print "Usage: %s [--sdate=date --edate=date --created [filename]" % sys.argv[0] - print "sdate: start date for which to print stats, default: earliest date in file " - print "edate: end date for which to print stats, default: latest date in file" - print "created: show only the number of files and sizes on the date the first thumb" - print "was created (presumably the date the image itself was first uploaded)" - print "" - print "Date format for sdate and edate: yyyy-mm-dd" - print "" - print "If no filename is specified, input is read from stdin" - print - print "Format of input file: (sample line)" - print "2011-10-29 01:57:51 100311 Festiwal_Słowian_i_Wikingów_2009_121.jpg/640px-Festiwal_Słowian_i_Wikingów_2009_121.jpg" - print "date in yyyy-mm-dd format, time in hh:mm::ss format, size in bytes, thumb directory/thumb filename" + if message is not None: + sys.stderr.write(message + "\n") + usage_message = """ +Usage: thumbDateAnalysis.py [--sdate=date --edate=date --created [filename] + + --sdate: start date for which to print stats, default: earliest date in file + --edate: end date for which to print stats, default: latest date in file + --created: show only the number of files and sizes on the date the first thumb + was created (presumably the date the image itself was first uploaded) + +Date format for sdate and edate: yyyy-mm-dd + +If no filename is specified, input is read from stdin. + +Format of input file: (sample line) + +2011-10-29 01:57:51 100311 Festiwal_Słowian_i_Wikingów_2009_121.jpg/640px-Festiwal_Słowian_i_Wikingów_2009_121.jpg +date in yyyy-mm-dd format, time in hh:mm::ss format, size in bytes, thumb directory/thumb filename +""" + sys.stderr.write(usage_message) sys.exit(1) -if __name__ == "__main__": + +def do_main(): sdate = None edate = None created = False try: - (options, remainder) = getopt.gnu_getopt(sys.argv[1:], "", - [ 'sdate=', 'edate=', 'created' ]) - except: + (options, remainder) = getopt.gnu_getopt( + sys.argv[1:], "", ['sdate=', 'edate=', 'created']) + except Exception: usage("Unknown option specified") for (opt, val) in options: @@ -41,71 +48,73 @@ created = True dateexp = re.compile(r"^\d{4}-\d{2}-\d{2}$") - for d in filter(None, [ sdate, edate ]): - if not dateexp.match(d): + for date in filter(None, [sdate, edate]): + if not dateexp.match(date): usage("Bad date format.") if len(remainder) == 1: - inputFile = remainder[0] - fHandle = open(inputFile,"r") + input_file = remainder[0] + f_handle = open(input_file, "r") elif len(remainder) == 0: - fHandle = sys.stdin + f_handle = sys.stdin else: usage("Too many arguments.") - lastDirName = None - numFilesSameDate = 0 - byteCountSameDate = 0 - fileCounts = {} - byteCounts = {} - for line in fHandle: + last_dir_name = None + file_counts = {} + byte_counts = {} + for line in f_handle: try: - ( fDate, fTime, fSize, path ) = line.rstrip().split() - except: + (f_date, ftime_unused, f_size, path) = line.rstrip().split() + except Exception: print >> sys.stderr, "skipping badly formatted line: ", line.rstrip() continue - ( dirName, fName ) = path.split('/',2) - if not lastDirName: - lastDirName = dirName - if dirName != lastDirName: - # should just print the number of files for every date sorted by date order, plus the dir name of course" - if (sdate and (fDate >= sdate)) or not sdate: - if (edate and (fDate <= edate)) or not edate: + (dir_name, fname_unused) = path.split('/', 2) + if not last_dir_name: + last_dir_name = dir_name + if dir_name != last_dir_name: + # should just print the number of files + # for every date sorted by date order, plus the dir name of course + if (sdate and (f_date >= sdate)) or not sdate: + if (edate and (f_date <= edate)) or not edate: # print the stats - dateStrings = fileCounts.keys() - dateStrings.sort() + date_strings = file_counts.keys() + date_strings.sort() if created: - printDates = [ dateStrings[0] ] + print_dates = [date_strings[0]] else: - printDates = dateStrings - for d in printDates: - print "Date:", d, "FilesThisDate:", fileCounts[d], "ByteCountThisDate:", byteCounts[d], "Dir: ", lastDirName - lastDirName = dirName + print_dates = date_strings + for date in print_dates: + print ("Date:", date, "FilesThisDate:", file_counts[date], + "ByteCountThisDate:", byte_counts[date], "Dir: ", last_dir_name) + last_dir_name = dir_name # reinitialize stats - numFilesSameDate = 0 - byteCountSameDate = 0 - fileCounts = {} - byteCounts = {} + file_counts = {} + byte_counts = {} # add to the stats. - if (sdate and (fDate >= sdate)) or not sdate: - if (edate and (fDate <= edate)) or not edate: - if fDate not in fileCounts: - fileCounts[fDate] = 0 - fileCounts[fDate] = fileCounts[fDate] + 1 - if fDate not in byteCounts: - byteCounts[fDate] = 0 - byteCounts[fDate] = byteCounts[fDate] + int(fSize) + if (sdate and (f_date >= sdate)) or not sdate: + if (edate and (f_date <= edate)) or not edate: + if f_date not in file_counts: + file_counts[f_date] = 0 + file_counts[f_date] = file_counts[f_date] + 1 + if f_date not in byte_counts: + byte_counts[f_date] = 0 + byte_counts[f_date] = byte_counts[f_date] + int(f_size) # print stats for final dir - if (sdate and (fDate >= sdate)) or not sdate: - if (edate and (fDate <= edate)) or not edate: - dateStrings = fileCounts.keys() - dateStrings.sort() + if (sdate and (f_date >= sdate)) or not sdate: + if (edate and (f_date <= edate)) or not edate: + date_strings = file_counts.keys() + date_strings.sort() if created: - printDates = [ dateStrings[0] ] + print_dates = [date_strings[0]] else: - printDates = dateStrings - for d in printDates: - print "Date:", d, "FilesThisDate:", fileCounts[d], "ByteCountThisDate:", byteCounts[d], "Dir: ", dirName + print_dates = date_strings + for date in print_dates: + print("Date:", date, "FilesThisDate:", file_counts[date], + "ByteCountThisDate:", byte_counts[date], "Dir: ", dir_name) sys.exit(0) + +if __name__ == "__main__": + do_main() -- To view, visit https://gerrit.wikimedia.org/r/280104 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I81310c994c52f841eb3d93491a6b129efad07564 Gerrit-PatchSet: 2 Gerrit-Project: operations/dumps Gerrit-Branch: ariel Gerrit-Owner: ArielGlenn <ar...@wikimedia.org> Gerrit-Reviewer: ArielGlenn <ar...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits