[MediaWiki-commits] [Gerrit] dumps: clean up docstrings for recompress jobs - change (operations/dumps)

ArielGlenn (Code Review) Mon, 09 Nov 2015 15:30:52 -0800

ArielGlenn has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/252134


Change subject: dumps: clean up docstrings for recompress jobs
......................................................................

dumps: clean up docstrings for recompress jobs

Change-Id: If3997a080c6de2a2b5ad60d9b4689d913ea44531
---
M xmldumps-backup/dumps/recompressjobs.py
1 file changed, 54 insertions(+), 30 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/dumps 
refs/changes/34/252134/1

diff --git a/xmldumps-backup/dumps/recompressjobs.py 
b/xmldumps-backup/dumps/recompressjobs.py
index 2a728a5..31ce42a 100644
--- a/xmldumps-backup/dumps/recompressjobs.py
+++ b/xmldumps-backup/dumps/recompressjobs.py
@@ -68,10 +68,16 @@
                             self.get_index_filetype(), self.file_ext, 
fname.partnum,
                             fname.checkpoint, fname.temp)
 
-    # output files is a list of checkpoint files, otherwise it is a list of 
one file.
-    # checkpoint files get done one at a time. we can't really do parallel 
recompression jobs of
-    # 200 files, right?
     def build_command(self, runner, output_files):
+        '''
+        arguments:
+        runner: Runner object
+        output_files: if checkpointing of files is enabled, this should be a
+                      list of checkpoint files, otherwise it should be a list
+                      of the one file that will be produced by the dump
+        Note that checkpoint files get done one at a time. not in parallel
+        '''
+
         # FIXME need shell escape
         if not exists(self.wiki.config.bzip2):
             raise BackupError("bzip2 command %s not found" % 
self.wiki.config.bzip2)
@@ -122,8 +128,8 @@
         if error:
             raise BackupError("error recompressing bz2 file(s)")
 
-    # shows all files possible if we don't have checkpoint files. without temp 
files of course
     def list_outfiles_to_publish(self, dump_dir):
+        '''shows all files possible if we don't have checkpoint files. without 
temp files of course'''
         files = []
         input_files = 
self.item_for_recompression.list_outfiles_for_input(dump_dir)
         for inp_file in input_files:
@@ -131,9 +137,11 @@
             files.append(self.get_multistream_index_fname(inp_file))
         return files
 
-    # shows all files possible if we don't have checkpoint files. without temp 
files of course
-    # only the parts we are actually supposed to do (if there is a limit)
     def list_outfiles_to_check_for_truncation(self, dump_dir):
+        '''
+        shows all files possible if we don't have checkpoint files. without 
temp files of course
+        only the parts we are actually supposed to do (if there is a limit)
+        '''
         files = []
         input_files = 
self.item_for_recompression.list_outfiles_for_input(dump_dir)
         for inp_file in input_files:
@@ -143,9 +151,11 @@
             files.append(self.get_multistream_index_fname(inp_file))
         return files
 
-    # shows all files possible if we don't have checkpoint files. no temp 
files.
-    # only the parts we are actually supposed to do (if there is a limit)
     def list_outfiles_for_build_command(self, dump_dir, partnum=None):
+        '''
+        shows all files possible if we don't have checkpoint files. no temp 
files.
+        only the parts we are actually supposed to do (if there is a limit)
+        '''
         files = []
         input_files = 
self.item_for_recompression.list_outfiles_for_input(dump_dir)
         for inp_file in input_files:
@@ -162,10 +172,11 @@
                                       inp_file.partnum, inp_file.checkpoint, 
inp_file.temp))
         return files
 
-    # shows all files possible if we don't have checkpoint files. should 
include temp files
-    # does just the parts we do if there is a limit
     def list_outfiles_for_cleanup(self, dump_dir, dump_names=None):
-        # some stages (eg XLMStubs) call this for several different dump_names
+        '''
+        shows all files possible if we don't have checkpoint files. should 
include temp files
+        does just the parts we do if there is a limit
+        '''
         if dump_names is None:
             dump_names = [self.dumpname]
         multistream_names = []
@@ -175,20 +186,20 @@
 
         files = []
         if self.item_for_recompression._checkpoints_enabled:
-            # we will pass list of parts or partnum_todo, or False, depending 
on the job setup.
             files.extend(self.list_checkpt_files_for_filepart(
                 dump_dir, self.get_fileparts_list(), multistream_names))
             files.extend(self.list_temp_files_for_filepart(
                 dump_dir, self.get_fileparts_list(), multistream_names))
         else:
-            # we will pass list of parts or partnum_todo, or False, depending 
on the job setup.
             files.extend(self.list_reg_files_for_filepart(
                 dump_dir, self.get_fileparts_list(), multistream_names))
         return files
 
-    # must return all output files that could be produced by a full run of 
this stage,
-    # not just whatever we happened to produce (if run for one file part, say)
     def list_outfiles_for_input(self, dump_dir):
+        '''
+        must return all output files that could be produced by a full run of 
this stage,
+        not just whatever we happened to produce (if run for one file part, 
say)
+        '''
         files = []
         input_files = 
self.item_for_recompression.list_outfiles_for_input(dump_dir)
         for inp_file in input_files:
@@ -225,10 +236,16 @@
     def get_file_ext(self):
         return "7z"
 
-    # output files is a list of checkpoint files, otherwise it is a list of 
one file.
-    # checkpoint files get done one at a time. we can't really do parallel 
recompression jobs of
-    # 200 files, right?
     def build_command(self, runner, output_files):
+        '''
+        arguments:
+        runner: Runner object
+        output_files: if checkpointing of files is enabled, this should be a
+                      list of checkpoint files, otherwise it should be a list
+                      of the one file that will be produced by the dump
+        Note that checkpoint files get done one at a time, not in parallel
+        '''
+
         # FIXME need shell escape
         if not exists(self.wiki.config.bzip2):
             raise BackupError("bzip2 command %s not found" % 
self.wiki.config.bzip2)
@@ -276,8 +293,10 @@
         if error:
             raise BackupError("error recompressing bz2 file(s)")
 
-    # shows all files possible if we don't have checkpoint files. without temp 
files of course
     def list_outfiles_to_publish(self, dump_dir):
+        '''
+        shows all files possible if we don't have checkpoint files. without 
temp files of course
+        '''
         files = []
         input_files = 
self.item_for_recompression.list_outfiles_for_input(dump_dir)
         for inp_file in input_files:
@@ -286,9 +305,11 @@
                                       inp_file.checkpoint, inp_file.temp))
         return files
 
-    # shows all files possible if we don't have checkpoint files. without temp 
files of course
-    # only the parts we are actually supposed to do (if there is a limit)
     def list_outfiles_to_check_for_truncation(self, dump_dir):
+        '''
+        shows all files possible if we don't have checkpoint files. without 
temp files of course
+        only the parts we are actually supposed to do (if there is a limit)
+        '''
         files = []
         input_files = 
self.item_for_recompression.list_outfiles_for_input(dump_dir)
         for inp_file in input_files:
@@ -299,9 +320,11 @@
                                       inp_file.checkpoint, inp_file.temp))
         return files
 
-    # shows all files possible if we don't have checkpoint files. no temp 
files.
-    # only the parts we are actually supposed to do (if there is a limit)
     def list_outfiles_for_build_command(self, dump_dir, partnum=None):
+        '''
+        shows all files possible if we don't have checkpoint files. no temp 
files.
+        only the parts we are actually supposed to do (if there is a limit)
+        '''
         files = []
         input_files = 
self.item_for_recompression.list_outfiles_for_input(dump_dir)
         for inp_file in input_files:
@@ -315,28 +338,29 @@
                                       inp_file.checkpoint, inp_file.temp))
         return files
 
-    # shows all files possible if we don't have checkpoint files. should 
include temp files
-    # does just the parts we do if there is a limit
     def list_outfiles_for_cleanup(self, dump_dir, dump_names=None):
-        # some stages (eg XLMStubs) call this for several different dump_names
+        '''
+        shows all files possible if we don't have checkpoint files. should 
include temp files
+        does just the parts we do if there is a limit
+        '''
         if dump_names is None:
             dump_names = [self.dumpname]
         files = []
         if self.item_for_recompression._checkpoints_enabled:
-            # we will pass list of parts or partnum_todo, or False, depending 
on the job setup.
             files.extend(self.list_checkpt_files_for_filepart(
                 dump_dir, self.get_fileparts_list(), dump_names))
             files.extend(self.list_temp_files_for_filepart(
                 dump_dir, self.get_fileparts_list(), dump_names))
         else:
-            # we will pass list of parts or partnum_todo, or False, depending 
on the job setup.
             files.extend(self.list_reg_files_for_filepart(
                 dump_dir, self.get_fileparts_list(), dump_names))
         return files
 
-    # must return all output files that could be produced by a full run of 
this stage,
-    # not just whatever we happened to produce (if run for one file part, say)
     def list_outfiles_for_input(self, dump_dir):
+        '''
+        must return all output files that could be produced by a full run of 
this stage,
+        not just whatever we happened to produce (if run for one file part, 
say)
+        '''
         files = []
         input_files = 
self.item_for_recompression.list_outfiles_for_input(dump_dir)
         for inp_file in input_files:

-- 
To view, visit https://gerrit.wikimedia.org/r/252134
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: If3997a080c6de2a2b5ad60d9b4689d913ea44531
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps
Gerrit-Branch: ariel
Gerrit-Owner: ArielGlenn <ar...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

[MediaWiki-commits] [Gerrit] dumps: clean up docstrings for recompress jobs - change (operations/dumps)

Reply via email to