ArielGlenn has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/280104 )

Change subject: thumbDateAnalysis full pylint and pep8
......................................................................


thumbDateAnalysis full pylint and pep8

Change-Id: I81310c994c52f841eb3d93491a6b129efad07564
---
M tools/thumbs/crunchinglogs/datascripts/thumbDateAnalysis.py
1 file changed, 75 insertions(+), 66 deletions(-)

Approvals:
  ArielGlenn: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/tools/thumbs/crunchinglogs/datascripts/thumbDateAnalysis.py 
b/tools/thumbs/crunchinglogs/datascripts/thumbDateAnalysis.py
index 5576798..4f0c1bf 100644
--- a/tools/thumbs/crunchinglogs/datascripts/thumbDateAnalysis.py
+++ b/tools/thumbs/crunchinglogs/datascripts/thumbDateAnalysis.py
@@ -1,35 +1,42 @@
 # -*- coding: utf-8  -*-
 
-import os
 import re
 import sys
-import time
 import getopt
 
+
 def usage(message=None):
-    print "Usage: %s [--sdate=date --edate=date --created [filename]" % 
sys.argv[0]
-    print "sdate: start date for which to print stats, default: earliest date 
in file "
-    print "edate: end date for which to print stats, default: latest date in 
file"
-    print "created: show only the number of files and sizes on the date the 
first thumb"
-    print "was created (presumably the date the image itself was first 
uploaded)"
-    print ""
-    print "Date format for sdate and edate: yyyy-mm-dd"
-    print ""
-    print "If no filename is specified, input is read from stdin"
-    print
-    print "Format of input file: (sample line)"
-    print "2011-10-29  01:57:51   100311   
Festiwal_Słowian_i_Wikingów_2009_121.jpg/640px-Festiwal_Słowian_i_Wikingów_2009_121.jpg"
-    print "date in yyyy-mm-dd format, time in hh:mm::ss format, size in bytes, 
thumb directory/thumb filename"
+    if message is not None:
+        sys.stderr.write(message + "\n")
+    usage_message = """
+Usage: thumbDateAnalysis.py [--sdate=date --edate=date --created [filename]
+
+  --sdate:   start date for which to print stats, default: earliest date in 
file
+  --edate:   end date for which to print stats, default: latest date in file
+  --created: show only the number of files and sizes on the date the first 
thumb
+             was created (presumably the date the image itself was first 
uploaded)
+
+Date format for sdate and edate: yyyy-mm-dd
+
+If no filename is specified, input is read from stdin.
+
+Format of input file: (sample line)
+
+2011-10-29  01:57:51   100311   
Festiwal_Słowian_i_Wikingów_2009_121.jpg/640px-Festiwal_Słowian_i_Wikingów_2009_121.jpg
+date in yyyy-mm-dd format, time in hh:mm::ss format, size in bytes, thumb 
directory/thumb filename
+"""
+    sys.stderr.write(usage_message)
     sys.exit(1)
 
-if __name__ == "__main__":
+
+def do_main():
     sdate = None
     edate = None
     created = False
     try:
-        (options, remainder) = getopt.gnu_getopt(sys.argv[1:], "",
-                                                 [ 'sdate=', 'edate=', 
'created' ])
-    except:
+        (options, remainder) = getopt.gnu_getopt(
+            sys.argv[1:], "", ['sdate=', 'edate=', 'created'])
+    except Exception:
         usage("Unknown option specified")
 
     for (opt, val) in options:
@@ -41,71 +48,73 @@
             created = True
 
     dateexp = re.compile(r"^\d{4}-\d{2}-\d{2}$")
-    for d in filter(None, [ sdate, edate ]):
-        if not dateexp.match(d):
+    for date in filter(None, [sdate, edate]):
+        if not dateexp.match(date):
             usage("Bad date format.")
 
     if len(remainder) == 1:
-        inputFile = remainder[0]
-        fHandle = open(inputFile,"r")
+        input_file = remainder[0]
+        f_handle = open(input_file, "r")
     elif len(remainder) == 0:
-        fHandle = sys.stdin
+        f_handle = sys.stdin
     else:
         usage("Too many arguments.")
 
-    lastDirName = None
-    numFilesSameDate = 0
-    byteCountSameDate = 0
-    fileCounts = {}
-    byteCounts = {}
-    for line in fHandle:
+    last_dir_name = None
+    file_counts = {}
+    byte_counts = {}
+    for line in f_handle:
         try:
-            ( fDate, fTime, fSize, path ) = line.rstrip().split()
-        except:
+            (f_date, ftime_unused, f_size, path) = line.rstrip().split()
+        except Exception:
             print >> sys.stderr, "skipping badly formatted line: ", 
line.rstrip()
             continue
-        ( dirName, fName ) = path.split('/',2)
-        if not lastDirName:
-            lastDirName = dirName
-        if dirName != lastDirName:
-            # should just print the number of files for every date sorted by 
date order, plus the dir name of course"
-            if (sdate and (fDate >= sdate)) or not sdate:
-                if (edate and (fDate <= edate)) or not edate:
+        (dir_name, fname_unused) = path.split('/', 2)
+        if not last_dir_name:
+            last_dir_name = dir_name
+        if dir_name != last_dir_name:
+            # should just print the number of files
+            # for every date sorted by date order, plus the dir name of course
+            if (sdate and (f_date >= sdate)) or not sdate:
+                if (edate and (f_date <= edate)) or not edate:
                     # print the stats
-                    dateStrings = fileCounts.keys()
-                    dateStrings.sort()
+                    date_strings = file_counts.keys()
+                    date_strings.sort()
                     if created:
-                        printDates = [ dateStrings[0] ]
+                        print_dates = [date_strings[0]]
                     else:
-                        printDates = dateStrings
-                    for d in printDates:
-                        print "Date:", d, "FilesThisDate:", fileCounts[d], 
"ByteCountThisDate:", byteCounts[d], "Dir: ", lastDirName
-            lastDirName = dirName
+                        print_dates = date_strings
+                    for date in print_dates:
+                        print ("Date:", date, "FilesThisDate:", 
file_counts[date],
+                               "ByteCountThisDate:", byte_counts[date], "Dir: 
", last_dir_name)
+            last_dir_name = dir_name
             # reinitialize stats
-            numFilesSameDate = 0
-            byteCountSameDate = 0
-            fileCounts = {}
-            byteCounts = {}
+            file_counts = {}
+            byte_counts = {}
         # add to the stats.
-        if (sdate and (fDate >= sdate)) or not sdate:
-            if (edate and (fDate <= edate)) or not edate:
-                if fDate not in fileCounts:
-                    fileCounts[fDate] = 0
-                fileCounts[fDate] = fileCounts[fDate] + 1
-                if fDate not in byteCounts:
-                    byteCounts[fDate] = 0
-                byteCounts[fDate] = byteCounts[fDate] + int(fSize)
+        if (sdate and (f_date >= sdate)) or not sdate:
+            if (edate and (f_date <= edate)) or not edate:
+                if f_date not in file_counts:
+                    file_counts[f_date] = 0
+                file_counts[f_date] = file_counts[f_date] + 1
+                if f_date not in byte_counts:
+                    byte_counts[f_date] = 0
+                byte_counts[f_date] = byte_counts[f_date] + int(f_size)
 
     # print stats for final dir
-    if (sdate and (fDate >= sdate)) or not sdate:
-        if (edate and (fDate <= edate)) or not edate:
-            dateStrings = fileCounts.keys()
-            dateStrings.sort()
+    if (sdate and (f_date >= sdate)) or not sdate:
+        if (edate and (f_date <= edate)) or not edate:
+            date_strings = file_counts.keys()
+            date_strings.sort()
             if created:
-                printDates = [ dateStrings[0] ]
+                print_dates = [date_strings[0]]
             else:
-                printDates = dateStrings
-            for d in printDates:
-                print "Date:", d, "FilesThisDate:", fileCounts[d], 
"ByteCountThisDate:", byteCounts[d], "Dir: ", dirName
+                print_dates = date_strings
+            for date in print_dates:
+                print("Date:", date, "FilesThisDate:", file_counts[date],
+                      "ByteCountThisDate:", byte_counts[date], "Dir: ", 
dir_name)
     sys.exit(0)
 
+
+if __name__ == "__main__":
+    do_main()

-- 
To view, visit https://gerrit.wikimedia.org/r/280104
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I81310c994c52f841eb3d93491a6b129efad07564
Gerrit-PatchSet: 2
Gerrit-Project: operations/dumps
Gerrit-Branch: ariel
Gerrit-Owner: ArielGlenn <ar...@wikimedia.org>
Gerrit-Reviewer: ArielGlenn <ar...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to