dumps)

ArielGlenn (Code Review) Wed, 30 Mar 2016 09:12:19 -0700

ArielGlenn has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/280195


Change subject: flake8 for dumps/tools directory
......................................................................

flake8 for dumps/tools directory

Change-Id: Ic0a2810434173f4fcdb3ce584d18678ff6d3421e
---
M xmldumps-backup/tools/fixaborts.py
M xmldumps-backup/tools/mysql2txt.py
M xmldumps-backup/tools/pagerange.py
3 files changed, 61 insertions(+), 51 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/dumps 
refs/changes/95/280195/1

diff --git a/xmldumps-backup/tools/fixaborts.py 
b/xmldumps-backup/tools/fixaborts.py
index 502a559..a89ebf1 100644
--- a/xmldumps-backup/tools/fixaborts.py
+++ b/xmldumps-backup/tools/fixaborts.py
@@ -10,6 +10,7 @@
 import sys
 import getopt
 
+
 def usage(message=None):
     '''
     display a helpful usage message
@@ -162,8 +163,10 @@
     file, create a new status file with that date and time and
     a status of 'done'
     '''
-    # <li>2016-02-13 02:55:36 <a 
href="enwiktionary/20160203">enwiktionary</a>: <span class='done'>Dump 
complete</span></li>
-    # <li>2016-03-03 03:12:33 <a 
href="avwiktionary/20160203">avwiktionary</a>: <span class="failed">dump 
aborted</span></li>
+    # <li>2016-02-13 02:55:36 <a 
href="enwiktionary/20160203">enwiktionary</a>: \
+    #    <span class='done'>Dump complete</span></li>
+    # <li>2016-03-03 03:12:33 <a 
href="avwiktionary/20160203">avwiktionary</a>: \
+    #    <span class="failed">dump aborted</span></li>
     statusfile = get_status_filepath(wiki, dumpdir, date)
     new_content_format = ('<li>{0} <a href="{1}/{2}">{1}</a>: '
                           '<span class="done">Dump complete</span></li>\n')
diff --git a/xmldumps-backup/tools/mysql2txt.py 
b/xmldumps-backup/tools/mysql2txt.py
index 014e093..9a685e5 100644
--- a/xmldumps-backup/tools/mysql2txt.py
+++ b/xmldumps-backup/tools/mysql2txt.py
@@ -1,17 +1,19 @@
 # this script reads from stdin a sql file created by mysqldump, grabs the 
requested columns from
-# the requested table from each tuple, and writes them out one tuple per line 
+# the requested table from each tuple, and writes them out one tuple per line
 # with a comma between columns, keeping the original escaping of values as 
done by mysql.
 
 import getopt
-import os
 import re
 import sys
+
 
 class ConverterError(Exception):
     pass
 
+
 class MysqlFile:
-    def __init__(self, f, tableRequested, columnsRequested, 
valuesRequestedCols, valuesRequestedVals, fieldSeparator):
+    def __init__(self, f, tableRequested, columnsRequested, 
valuesRequestedCols,
+                 valuesRequestedVals, fieldSeparator):
         self.file = f
         self.tableRequested = tableRequested
         self.columnsRequested = columnsRequested
@@ -43,7 +45,7 @@
         if not tableFound:
             raise ConverterError("create statement for requested table not 
found in file")
 
-    def getLine(self, maxbytes = 0):
+    def getLine(self, maxbytes=0):
         """returns line including the \n, up to maxbytes"""
         line = ""
         length = 0
@@ -52,7 +54,7 @@
         while self.buffer[self.bufferInd] != '\n':
                 line = line + self.buffer[self.bufferInd]
                 if not self.incrementBufferPtr():
-                   return False
+                    return False
                 length = length + 1
                 if maxbytes and length == maxbytes:
                     return line
@@ -65,10 +67,10 @@
         # skip up to the newline...
         while self.buffer[self.bufferInd] != '\n':
                 if not self.incrementBufferPtr():
-                   return False
+                    return False
         # and now the newline.
         return self.incrementBufferPtr()
-        
+
     def findInsertStatement(self):
         """leave the file contents at the line immediately following
         an INSERT statement"""
@@ -90,14 +92,14 @@
         self.columnsInTable = []
         columnNameExpr = re.compile('\s+`([^`]+)`')
         line = self.getLine()
-        while (line and not self.eof and line[0] != ')' ):
+        while (line and not self.eof and line[0] != ')'):
             columnNameMatch = columnNameExpr.match(line)
             if (columnNameMatch):
                 self.columnsInTable.append(columnNameMatch.group(1))
             line = self.getLine()
 
         for c in self.columnsRequested:
-            if not c in self.columnsInTable:
+            if c not in self.columnsInTable:
                 raise ConverterError("requested column %s not found in table" 
% c)
 
 #        print "columns in table: ", self.columnsInTable
@@ -110,16 +112,16 @@
                 v = v | self.GET
             if c in self.valuesRequestedCols:
                 v = v | self.CHECK
-            self.columnsToGet.append( v )
+            self.columnsToGet.append(v)
 
 #        print "columns to get: ", self.columnsToGet
 
         self.columnOrder = []
         # we want here a list which tells us to
         # write the ith column we read from tuple first,
-        # the jth one second, the kth one third etc. 
+        # the jth one second, the kth one third etc.
         columnsToGetTrue = []
-        for i in range(0,len(self.columnsToGet)):
+        for i in range(0, len(self.columnsToGet)):
             if self.columnsToGet[i] & self.GET:
                 columnsToGetTrue.append(self.columnsInTable[i])
         for c in self.columnsRequested:
@@ -127,19 +129,19 @@
 
 #        print "column order: ", self.columnOrder
 
-    def whine(self, message = None):
+    def whine(self, message=None):
         if (message):
-            raise ConverterError("whine whine whine: " + message )
+            raise ConverterError("whine whine whine: " + message)
         else:
             raise ConverterError("whine whine whine. failed to parse a row.")
 
     def getColumnsFromRow(self):
         """returns a list of column values extracted from a row.
-        f is an open input file positioned at the beginning of a 
+        f is an open input file positioned at the beginning of a
         tuple representing a row in mysql output format,
         colsToGet is a list of True/False correspnding to which
         elements in the tuple we want to retrieve and return"""
-    
+
 #        print "buffer is ", self.buffer[self.bufferInd:self.bufferInd+80], 
"..."
         if not self.skipStartOfRow():
             self.whine("couldn't find start of row")
@@ -177,7 +179,7 @@
         return True
 
     def skipEndOfRow(self):
-        # expect... what do we expect? ); or ), 
+        # expect... what do we expect? ); or ),
         # the first means end of row with no more rows after, the second means 
end of
         # specific row only
         if not self.skipChar(')'):
@@ -190,12 +192,12 @@
             self.skipChar('\n')
 
     def getColValue(self):
-        #expect: a string of digits 
-        # or: '  some stuff, ' 
-        value=""
+        # expect: a string of digits
+        # or: '  some stuff, '
+        value = ""
         if (self.buffer[self.bufferInd].isdigit()):
             while self.buffer[self.bufferInd].isdigit():
-                value=value + self.buffer[self.bufferInd]
+                value = value + self.buffer[self.bufferInd]
                 if not self.incrementBufferPtr():
                     return False
             # there will be a comma before the next
@@ -208,20 +210,20 @@
             escaped = False
             while not done:
                 if self.buffer[self.bufferInd] != "'" and 
self.buffer[self.bufferInd] != '\\':
-                    value=value + self.buffer[self.bufferInd]
+                    value = value + self.buffer[self.bufferInd]
                     if not self.incrementBufferPtr():
                         return False
                     escaped = False
                 elif self.buffer[self.bufferInd] == "'":
-                    value=value + self.buffer[self.bufferInd]
+                    value = value + self.buffer[self.bufferInd]
                     if not self.incrementBufferPtr():
                         return False
                     if not escaped:
                         done = True
                     else:
                         escaped = False
-                else: # escape char \ found
-                    value=value + self.buffer[self.bufferInd]
+                else:  # escape char \ found
+                    value = value + self.buffer[self.bufferInd]
                     if not self.incrementBufferPtr():
                         return False
                     if escaped:
@@ -237,11 +239,14 @@
             self.whine()
 
     def skipColValue(self):
-        #expect: a string of digits with possibly a . in there
-        # or: '  some stuff, ' 
+        # expect: a string of digits with possibly a . in there
+        # or: '  some stuff, '
         if (self.buffer[self.bufferInd].isdigit()):
             # might have a float so... crudely...
-            while self.buffer[self.bufferInd].isdigit() or 
self.buffer[self.bufferInd] == '.' or self.buffer[self.bufferInd] == 'e' or 
self.buffer[self.bufferInd] == '-':
+            while (self.buffer[self.bufferInd].isdigit() or
+                   self.buffer[self.bufferInd] == '.' or
+                   self.buffer[self.bufferInd] == 'e' or
+                   self.buffer[self.bufferInd] == '-'):
                 if not self.incrementBufferPtr():
                     return False
             # there will be a comma before the next
@@ -262,7 +267,7 @@
                         done = True
                     else:
                         escaped = False
-                else: # escape char \ found
+                else:  # escape char \ found
                     if not self.incrementBufferPtr():
                         return False
                     if escaped:
@@ -274,7 +279,7 @@
                 # column if we aren't at the end of the row.
                 self.skipChar(',')
         else:
-#            print "buffer is ", 
self.buffer[self.bufferInd:self.bufferInd+80], "..."
+            # print "buffer is ", 
self.buffer[self.bufferInd:self.bufferInd+80], "..."
             self.whine("failed to parse a value, found start character " + 
self.buffer[self.bufferInd])
 
     def skipChar(self, c):
@@ -288,12 +293,12 @@
     def incrementBufferPtr(self):
         self.bufferInd = self.bufferInd + 1
         if self.bufferInd == len(self.buffer):
-            return self.fillBuffer() # this will move the index accordingly
+            return self.fillBuffer()  # this will move the index accordingly
         return True
 
     def fillBuffer(self):
         if self.bufferInd == len(self.buffer) and not self.rowsDone:
-            # we are out of data in the buffer, and there's more 
+            # we are out of data in the buffer, and there's more
             # rows to be gotten
 
             # fixme this should be a constant someplace configurable
@@ -311,20 +316,21 @@
         return column
 
     def writeColumns(self, columns, outFile):
-        """takes a list of column values without names. 
-        must find the names these correspond to, figure out the right 
+        """takes a list of column values without names.
+        must find the names these correspond to, figure out the right
         order (or alternatively maybe we have a map that tells us the order)
         and write the values out in the new order."""
         if columns:
             ind = 0
             for i in self.columnOrder:
-                outFile.write(self.formatColumn(columns[i])) 
-                if ind < len(self.columnOrder)-1:
+                outFile.write(self.formatColumn(columns[i]))
+                if ind < len(self.columnOrder) - 1:
                     outFile.write(self.fieldSeparator)
                 ind = ind + 1
             outFile.write('\n')
 
-def usage(message = None):
+
+def usage(message=None):
     if message:
         print message
         print "Usage: python mysql2txt.py --table=tablename 
--columns=col1,col2... "
@@ -356,7 +362,8 @@
     fieldSeparator = ' '
 
     try:
-        (options, remainder) = getopt.gnu_getopt(sys.argv[1:], "", ['table=', 
'columns=', 'values=', 'separator=' ])
+        (options, remainder) = getopt.gnu_getopt(
+            sys.argv[1:], "", ['table=', 'columns=', 'values=', 'separator='])
     except:
         usage("Unknown option specified")
 
@@ -367,14 +374,14 @@
             if ',' in val:
                 columnsRequested = val.split(',')
             else:
-                columnsRequested = [ val ]
+                columnsRequested = [val]
         elif opt == "--values":
             if ',' in val:
                 vlist = val.split(',')
             else:
-                vlist = [ val ]
-            valuesRequestedCols = [ v.split('=')[0] for v in vlist ]
-            valuesRequestedVals = [ v.split('=')[1] for v in vlist ]
+                vlist = [val]
+            valuesRequestedCols = [v.split('=')[0] for v in vlist]
+            valuesRequestedVals = [v.split('=')[1] for v in vlist]
         elif opt == "--separator":
             fieldSeparator = val
 
@@ -384,7 +391,8 @@
     if (not tableRequested or not columnsRequested):
         usage("Missing required option")
 
-    m = MysqlFile(sys.stdin, tableRequested, columnsRequested, 
valuesRequestedCols, valuesRequestedVals, fieldSeparator)
+    m = MysqlFile(sys.stdin, tableRequested, columnsRequested,
+                  valuesRequestedCols, valuesRequestedVals, fieldSeparator)
     m.fillBuffer()
 
     m.findCreateStatement()
@@ -401,5 +409,4 @@
             m.rowsDone = False
             m.findInsertStatement()
 
-    exit(0);
-
+    exit(0)
diff --git a/xmldumps-backup/tools/pagerange.py 
b/xmldumps-backup/tools/pagerange.py
index d9693ed..1942da5 100644
--- a/xmldumps-backup/tools/pagerange.py
+++ b/xmldumps-backup/tools/pagerange.py
@@ -67,7 +67,7 @@
 
         ranges = []
         page_start = 1
-        numrevs = self.total_revs/numjobs + 1
+        numrevs = self.total_revs / numjobs + 1
         prevguess = 0
         for jobnum in range(1, int(numjobs) + 1):
             if jobnum == numjobs:
@@ -75,7 +75,7 @@
                 ranges.append((page_start, self.total_pages))
                 break
             numjobs_left = numjobs - jobnum + 1
-            interval = ((self.total_pages - page_start)/numjobs_left) + 1
+            interval = ((self.total_pages - page_start) / numjobs_left) + 1
             (start, end) = self.get_page_range(page_start, numrevs,
                                                page_start + interval, 
prevguess)
             page_start = end + 1
@@ -117,8 +117,8 @@
         total = 0
         maxtodo = 1000000
 
-        runstodo = estimate/maxtodo + 1
-        step = (guess - page_start)/runstodo
+        runstodo = estimate / maxtodo + 1
+        step = (guess - page_start) / runstodo
         ends = range(page_start, guess, step)
 
         if ends[-1] != guess:

-- 
To view, visit https://gerrit.wikimedia.org/r/280195
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ic0a2810434173f4fcdb3ce584d18678ff6d3421e
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps
Gerrit-Branch: ariel
Gerrit-Owner: ArielGlenn <ar...@wikimedia.org>
Gerrit-Reviewer: ArielGlenn <ar...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

[MediaWiki-commits] [Gerrit] flake8 for dumps/tools directory - change (operations/dumps)

Reply via email to