ArielGlenn has uploaded a new change for review. https://gerrit.wikimedia.org/r/324013
Change subject: handle adds/changes-specifig args in incr_dumps module ...................................................................... handle adds/changes-specifig args in incr_dumps module * set up secondary usage message for args specific to the dump type (in our case, adds/changes dumps) * set up arg passing for such args in the generate dumps wrapper * move argument processing of these args into the incr_dumps module * sort out the 'cutoff' option: it should never be specified from the command line but only generated from the date of the wiki dump run and the delay config value Bug: T133547 Change-Id: I715666363cd0ec963ae4bab48e1c0bcf09542406 --- M xmldumps-backup/generateincrementals.py M xmldumps-backup/incr_dumps.py M xmldumps-backup/miscdumplib.py 3 files changed, 56 insertions(+), 27 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/dumps refs/changes/13/324013/1 diff --git a/xmldumps-backup/generateincrementals.py b/xmldumps-backup/generateincrementals.py index 204c30c..f9d1af2 100644 --- a/xmldumps-backup/generateincrementals.py +++ b/xmldumps-backup/generateincrementals.py @@ -15,6 +15,7 @@ from miscdumplib import MD5File, MiscDumpDirs, MiscDumpDir from miscdumplib import MiscDumpLock, StatusInfo from miscdumplib import log, safe, make_link +import incr_dumps from incr_dumps import IncrDump from incr_dumps import DumpConfig from incr_dumps import cutoff_from_date @@ -62,7 +63,6 @@ return other_runs_text = "other runs: %s" % make_link(wikiname, wikiname) - try: wiki = Wiki(self._config, wikiname) wiki.set_date(dump_date) @@ -73,7 +73,6 @@ for filename in output_files: output_fileinfo[filename] = FileUtils.file_info(os.path.join(path, filename)) files_text = [] - errors = False for filename in output_fileinfo: file_date, file_size = output_fileinfo[filename] log(self.verbose, "output file %s for %s %s %s" @@ -136,9 +135,6 @@ self.wikiname = wikiname self.dumpdir = MiscDumpDir(self._config, self.date) self.do_dumps = do_dumps - - self.do_stubs = args['do_stubs'] - self.do_revs = args['do_revs'] self.do_index = do_index self.dryrun = dryrun self.forcerun = forcerun @@ -154,7 +150,7 @@ if not exists(self.dumpdir.get_dumpdir(self.wikiname)): os.makedirs(self.dumpdir.get_dumpdir(self.wikiname)) status = self.status_info.get_status() - if status == "done" and not self.forcerun: + if status == "done:all" and not self.forcerun: log(self.verbose, "wiki %s skipped, adds/changes dump already" " complete" % self.wikiname) return STATUS_GOOD @@ -181,7 +177,7 @@ if not md5sums(self.wiki, self.wiki.config.fileperms, output_files, expected): return STATUS_FAILED - self.status_info.set_status("done") + self.status_info.set_status("done:" + self.incr.get_stages_done()) lock.unlock() if self.do_index: @@ -289,11 +285,13 @@ Args: If your dump needs specific arguments passed to the class that are not provided for here, you can pass them on the command line - before the final wikidbname argument. These arguments will be - in pairs, first the argument name, then whitespace, then the argument - value. + before the final wikidbname argument. Arguments with values should + be passed as argname:value, and arguments without values (flags that + will be set as True) should be passed simply as argname. """) sys.stderr.write(usage_message) + secondary_message = incr_dumps.get_usage() + sys.stderr.write(secondary_message) sys.exit(1) @@ -305,12 +303,12 @@ dryrun = False verbose = False forcerun = False - args = {'do_stubs': True, 'do_revs': True, 'cutoff': None} + wikiname = None try: (options, remainder) = getopt.gnu_getopt( sys.argv[1:], "", - ['date=', 'configfile=', 'stubsonly', 'revsonly', + ['date=', 'configfile=', 'wiki=', 'dumpsonly', 'indexonly', 'dryrun', 'verbose', 'forcerun']) except Exception as ex: usage("Unknown option specified") @@ -320,16 +318,12 @@ date = val elif opt == "--configfile": config_file = val - elif opt == "--stubsonly": + elif opt == "--wiki": + wikiname = val + elif opt == "--dumpsonly": do_index = False - args['do_revs'] = False - elif opt == "--revsonly": - do_index = False - args['do_stubs'] = False elif opt == "--indexonly": do_dump = False - args['do_stubs'] = False - args['do_revs'] = False elif opt == "--dryrun": dryrun = True elif opt == "--verbose": @@ -353,8 +347,17 @@ else: args['cutoff'] = cutoff_from_date(date, config) + args = {} if len(remainder) > 0: - dump_one = MiscDumpOne(config, date, remainder[0], do_dump, do_index, + for opt in remainder: + if ':' in opt: + name, value = opt.split(':', 1) + args[name] = value + else: + args[opt] = True + + if wikiname is not None: + dump_one = MiscDumpOne(config, date, wikiname, do_dump, do_index, dryrun, verbose, forcerun, args) dump_one.do_one_wiki() else: diff --git a/xmldumps-backup/incr_dumps.py b/xmldumps-backup/incr_dumps.py index ded6a73..49e17c7 100644 --- a/xmldumps-backup/incr_dumps.py +++ b/xmldumps-backup/incr_dumps.py @@ -111,8 +111,16 @@ self.dryrun = dryrun self.verbose = verbose self.args = args - if not self.args['cutoff']: - self.args['cutoff'] = cutoff_from_date(self.wiki.date, self.wiki.config) + if not self.args['cutoff']: + self.args['cutoff'] = cutoff_from_date(self.wiki.date, self.wiki.config) + if 'revsonly' in args: + self.dostubs = False + else: + self.dostubs = True + if 'stubsonly' in args: + self.dorevs = False + else: + self.dorevs = True def get_prev_incrdate(self, date, dumpok=False, revidok=False): # find the most recent incr dump before the @@ -204,7 +212,7 @@ return max_revid def dump_stub(self, start_revid, end_revid): - if 'do_stubs' not in self.args: + if not self.dostubs: return True dumpdir = MiscDumpDir(self.wiki.config, self.wiki.date) @@ -231,7 +239,7 @@ return True def dump_revs(self): - if 'do_revs' not in self.args: + if not self.dorevs: return True dumpdir = MiscDumpDir(self.wiki.config, self.wiki.date) outputdir = dumpdir.get_dumpdir(self.wiki.db_name, self.wiki.date) @@ -288,6 +296,16 @@ return False return True + def get_stages_done(self): + """ + return comma-sep list of stages that are complete, in case not all are. + if all are complete, return 'all' + """ + if 'stubsonly' in self.args: + return 'stubs' + else: + return 'all' + def get_output_files(self): dumpdir = MiscDumpDir(self.wiki.config, self.wiki.date) outputdir = dumpdir.get_dumpdir(self.wiki.db_name, self.wiki.date) @@ -296,8 +314,16 @@ revsfile = RevsFile(self.wiki.config, self.wiki.date, self.wiki.db_name) filenames = [revidfile.get_filename(), stubfile.get_filename(), revsfile.get_filename()] expected = [] - if 'do_revs' in self.args: + if self.dorevs: expected.append(revsfile) - if 'do_stubs' in self.args: + if self.dostubs: expected.append(stubfile) return [os.path.join(outputdir, filename) for filename in filenames], expected + +def get_usage(): + return """Specific args: + +stubsonly -- dump stubs but not revs +revsonly -- dump revs but not stubs (requires that + stubs have already been dumped) +""" diff --git a/xmldumps-backup/miscdumplib.py b/xmldumps-backup/miscdumplib.py index 0157bad..359da4f 100644 --- a/xmldumps-backup/miscdumplib.py +++ b/xmldumps-backup/miscdumplib.py @@ -294,7 +294,7 @@ if dumpok: for dump in reversed(dirs): status_info = StatusInfo(self._config, dump, self.wikiname) - if status_info.get_status(dump) == "done": + if status_info.get_status(dump).startswith("done"): return dump else: return dirs[-1] -- To view, visit https://gerrit.wikimedia.org/r/324013 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I715666363cd0ec963ae4bab48e1c0bcf09542406 Gerrit-PatchSet: 1 Gerrit-Project: operations/dumps Gerrit-Branch: master Gerrit-Owner: ArielGlenn <ar...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits