ArielGlenn has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/324013

Change subject: handle adds/changes-specifig args in incr_dumps module
......................................................................

handle adds/changes-specifig args in incr_dumps module

* set up secondary usage message for  args specific to the dump type
  (in our case, adds/changes dumps)
* set up arg passing for such args in the generate dumps wrapper
* move argument processing of these args into the incr_dumps module
* sort out the 'cutoff' option: it should never be specified
  from the command line but only generated from the date of the wiki
  dump run and the delay config value

Bug: T133547

Change-Id: I715666363cd0ec963ae4bab48e1c0bcf09542406
---
M xmldumps-backup/generateincrementals.py
M xmldumps-backup/incr_dumps.py
M xmldumps-backup/miscdumplib.py
3 files changed, 56 insertions(+), 27 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/dumps 
refs/changes/13/324013/1

diff --git a/xmldumps-backup/generateincrementals.py 
b/xmldumps-backup/generateincrementals.py
index 204c30c..f9d1af2 100644
--- a/xmldumps-backup/generateincrementals.py
+++ b/xmldumps-backup/generateincrementals.py
@@ -15,6 +15,7 @@
 from miscdumplib import MD5File, MiscDumpDirs, MiscDumpDir
 from miscdumplib import MiscDumpLock, StatusInfo
 from miscdumplib import log, safe, make_link
+import incr_dumps
 from incr_dumps import IncrDump
 from incr_dumps import DumpConfig
 from incr_dumps import cutoff_from_date
@@ -62,7 +63,6 @@
                 return
 
             other_runs_text = "other runs: %s" % make_link(wikiname, wikiname)
-
             try:
                 wiki = Wiki(self._config, wikiname)
                 wiki.set_date(dump_date)
@@ -73,7 +73,6 @@
                 for filename in output_files:
                     output_fileinfo[filename] = 
FileUtils.file_info(os.path.join(path, filename))
                 files_text = []
-                errors = False
                 for filename in output_fileinfo:
                     file_date, file_size = output_fileinfo[filename]
                     log(self.verbose, "output file %s for %s %s %s"
@@ -136,9 +135,6 @@
         self.wikiname = wikiname
         self.dumpdir = MiscDumpDir(self._config, self.date)
         self.do_dumps = do_dumps
-
-        self.do_stubs = args['do_stubs']
-        self.do_revs = args['do_revs']
         self.do_index = do_index
         self.dryrun = dryrun
         self.forcerun = forcerun
@@ -154,7 +150,7 @@
             if not exists(self.dumpdir.get_dumpdir(self.wikiname)):
                 os.makedirs(self.dumpdir.get_dumpdir(self.wikiname))
             status = self.status_info.get_status()
-            if status == "done" and not self.forcerun:
+            if status == "done:all" and not self.forcerun:
                 log(self.verbose, "wiki %s skipped, adds/changes dump already"
                     " complete" % self.wikiname)
                 return STATUS_GOOD
@@ -181,7 +177,7 @@
                     if not md5sums(self.wiki, self.wiki.config.fileperms,
                                    output_files, expected):
                         return STATUS_FAILED
-                    self.status_info.set_status("done")
+                    self.status_info.set_status("done:" + 
self.incr.get_stages_done())
                     lock.unlock()
 
                 if self.do_index:
@@ -289,11 +285,13 @@
 
 Args:  If your dump needs specific arguments passed to the class that
        are not provided for here, you can pass them on the command line
-       before the final wikidbname argument.  These arguments will be
-       in pairs, first the argument name, then whitespace, then the argument
-       value.
+       before the final wikidbname argument.  Arguments with values should
+       be passed as argname:value, and arguments without values (flags that
+       will be set as True) should be passed simply as argname.
 """)
     sys.stderr.write(usage_message)
+    secondary_message = incr_dumps.get_usage()
+    sys.stderr.write(secondary_message)
     sys.exit(1)
 
 
@@ -305,12 +303,12 @@
     dryrun = False
     verbose = False
     forcerun = False
-    args = {'do_stubs': True, 'do_revs': True, 'cutoff': None}
+    wikiname = None
 
     try:
         (options, remainder) = getopt.gnu_getopt(
             sys.argv[1:], "",
-            ['date=', 'configfile=', 'stubsonly', 'revsonly',
+            ['date=', 'configfile=', 'wiki=', 'dumpsonly',
              'indexonly', 'dryrun', 'verbose', 'forcerun'])
     except Exception as ex:
         usage("Unknown option specified")
@@ -320,16 +318,12 @@
             date = val
         elif opt == "--configfile":
             config_file = val
-        elif opt == "--stubsonly":
+        elif opt == "--wiki":
+            wikiname = val
+        elif opt == "--dumpsonly":
             do_index = False
-            args['do_revs'] = False
-        elif opt == "--revsonly":
-            do_index = False
-            args['do_stubs'] = False
         elif opt == "--indexonly":
             do_dump = False
-            args['do_stubs'] = False
-            args['do_revs'] = False
         elif opt == "--dryrun":
             dryrun = True
         elif opt == "--verbose":
@@ -353,8 +347,17 @@
     else:
         args['cutoff'] = cutoff_from_date(date, config)
 
+    args = {}
     if len(remainder) > 0:
-        dump_one = MiscDumpOne(config, date, remainder[0], do_dump, do_index,
+        for opt in remainder:
+            if ':' in opt:
+                name, value = opt.split(':', 1)
+                args[name] = value
+            else:
+                args[opt] = True
+
+    if wikiname is not None:
+        dump_one = MiscDumpOne(config, date, wikiname, do_dump, do_index,
                                dryrun, verbose, forcerun, args)
         dump_one.do_one_wiki()
     else:
diff --git a/xmldumps-backup/incr_dumps.py b/xmldumps-backup/incr_dumps.py
index ded6a73..49e17c7 100644
--- a/xmldumps-backup/incr_dumps.py
+++ b/xmldumps-backup/incr_dumps.py
@@ -111,8 +111,16 @@
         self.dryrun = dryrun
         self.verbose = verbose
         self.args = args
-        if not self.args['cutoff']:
-            self.args['cutoff'] = cutoff_from_date(self.wiki.date, 
self.wiki.config)
+       if not self.args['cutoff']:
+           self.args['cutoff'] = cutoff_from_date(self.wiki.date, 
self.wiki.config)
+        if 'revsonly' in args:
+            self.dostubs = False
+        else:
+            self.dostubs = True
+        if 'stubsonly' in args:
+            self.dorevs = False
+        else:
+            self.dorevs = True
 
     def get_prev_incrdate(self, date, dumpok=False, revidok=False):
         # find the most recent incr dump before the
@@ -204,7 +212,7 @@
         return max_revid
 
     def dump_stub(self, start_revid, end_revid):
-        if 'do_stubs' not in self.args:
+        if not self.dostubs:
             return True
 
         dumpdir = MiscDumpDir(self.wiki.config, self.wiki.date)
@@ -231,7 +239,7 @@
         return True
 
     def dump_revs(self):
-        if 'do_revs' not in self.args:
+        if not self.dorevs:
             return True
         dumpdir = MiscDumpDir(self.wiki.config, self.wiki.date)
         outputdir = dumpdir.get_dumpdir(self.wiki.db_name, self.wiki.date)
@@ -288,6 +296,16 @@
             return False
         return True
 
+    def get_stages_done(self):
+        """
+        return comma-sep list of stages that are complete, in case not all are.
+        if all are complete, return 'all'
+        """
+        if 'stubsonly' in self.args:
+            return 'stubs'
+        else:
+            return 'all'
+
     def get_output_files(self):
         dumpdir = MiscDumpDir(self.wiki.config, self.wiki.date)
         outputdir = dumpdir.get_dumpdir(self.wiki.db_name, self.wiki.date)
@@ -296,8 +314,16 @@
         revsfile = RevsFile(self.wiki.config, self.wiki.date, 
self.wiki.db_name)
         filenames = [revidfile.get_filename(), stubfile.get_filename(), 
revsfile.get_filename()]
         expected = []
-        if 'do_revs' in self.args:
+        if self.dorevs:
             expected.append(revsfile)
-        if 'do_stubs' in self.args:
+        if self.dostubs:
             expected.append(stubfile)
         return [os.path.join(outputdir, filename) for filename in filenames], 
expected
+
+def get_usage():
+    return """Specific args:
+
+stubsonly        -- dump stubs but not revs
+revsonly         -- dump revs but not stubs (requires that
+                    stubs have already been dumped)
+"""
diff --git a/xmldumps-backup/miscdumplib.py b/xmldumps-backup/miscdumplib.py
index 0157bad..359da4f 100644
--- a/xmldumps-backup/miscdumplib.py
+++ b/xmldumps-backup/miscdumplib.py
@@ -294,7 +294,7 @@
             if dumpok:
                 for dump in reversed(dirs):
                     status_info = StatusInfo(self._config, dump, self.wikiname)
-                    if status_info.get_status(dump) == "done":
+                    if status_info.get_status(dump).startswith("done"):
                         return dump
             else:
                 return dirs[-1]

-- 
To view, visit https://gerrit.wikimedia.org/r/324013
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I715666363cd0ec963ae4bab48e1c0bcf09542406
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps
Gerrit-Branch: master
Gerrit-Owner: ArielGlenn <ar...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to