ArielGlenn has uploaded a new change for review. https://gerrit.wikimedia.org/r/254837
Change subject: dumps: new option 'cleanup' to require cleanup when dump rerun ...................................................................... dumps: new option 'cleanup' to require cleanup when dump rerun by default files produced from a previous run of same date and wiki will now be left intact and reused. Change-Id: I79626e3aa15012e8c23036c00d3384216a8f6396 --- M xmldumps-backup/dumpadmin.py M xmldumps-backup/dumps/runner.py M xmldumps-backup/worker.py 3 files changed, 20 insertions(+), 13 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/dumps refs/changes/37/254837/1 diff --git a/xmldumps-backup/dumpadmin.py b/xmldumps-backup/dumpadmin.py index 7b42f84..b7eb1fb 100644 --- a/xmldumps-backup/dumpadmin.py +++ b/xmldumps-backup/dumpadmin.py @@ -378,7 +378,7 @@ runner = Runner(wiki, prefetch=True, spawn=True, job=None, skip_jobs=[], restart=False, notice="", dryrun=False, enabled=None, partnum_todo=False, checkpoint_file=None, - page_id_range=None, skipdone=[], verbose=self.verbose) + page_id_range=None, skipdone=[], cleanup=False, verbose=self.verbose) if not failed_jobs: if self.verbose: @@ -559,7 +559,7 @@ runner = Runner(wiki, prefetch=True, spawn=True, job=None, skip_jobs=[], restart=False, notice="", dryrun=False, enabled=None, partnum_todo=False, checkpoint_file=None, - page_id_range=None, skipdone=[], verbose=self.verbose) + page_id_range=None, skipdone=[], cleanup=False, verbose=self.verbose) known_jobs = [item.name() for item in runner.dump_item_list.dump_items] + ['tables'] if ':' in self.job_status: diff --git a/xmldumps-backup/dumps/runner.py b/xmldumps-backup/dumps/runner.py index 8b2c779..9b75c3a 100644 --- a/xmldumps-backup/dumps/runner.py +++ b/xmldumps-backup/dumps/runner.py @@ -442,7 +442,7 @@ def __init__(self, wiki, prefetch=True, spawn=True, job=None, skip_jobs=None, restart=False, notice="", dryrun=False, enabled=None, partnum_todo=None, checkpoint_file=None, page_id_range=None, - skipdone=False, verbose=False): + skipdone=False, cleanup=False, verbose=False): self.wiki = wiki self.db_name = wiki.db_name self.prefetch = prefetch @@ -458,6 +458,7 @@ self.skipdone = skipdone self.verbose = verbose self.enabled = enabled + self.cleanup_old_files = cleanup if self.checkpoint_file is not None: fname = DumpFilename(self.wiki) @@ -475,8 +476,11 @@ for setting in [StatusHtml.NAME, IndexHtml.NAME, Checksummer.NAME, RunInfoFile.NAME, SymLinks.NAME, Feeds.NAME, NoticeFile.NAME, "makedir", "clean_old_dumps", - "clean_old_files", "check_trunc_files"]: + "cleanup_old_files", "check_trunc_files"]: self.enabled[setting] = True + + if not self.cleanup_old_files: + del self.enabled["cleanup_old_files"] if self.dryrun or self._partnum_todo is not None or self.checkpoint_file is not None: for setting in [StatusHtml.NAME, IndexHtml.NAME, Checksummer.NAME, @@ -485,11 +489,8 @@ del self.enabled[setting] if self.dryrun: - for setting in ["logging", "check_trunc_files", "clean_old_files"]: + for setting in ["logging", "check_trunc_files"]: del self.enabled[setting] - if self.page_id_range: - for setting in ["clean_old_files"]: - self.enabled[setting] = True self.job_requested = job @@ -503,11 +504,11 @@ if self.job_requested == "latestlinks" or self.job_requested == "createdirs": for setting in [Checksummer.NAME, NoticeFile.NAME, "makedir", - "clean_old_dumps", "clean_old_files", "check_trunc_files"]: + "clean_old_dumps", "check_trunc_files"]: del self.enabled[setting] if self.job_requested == "noop": - for setting in ["clean_old_dumps", "clean_old_files", "check_trunc_files"]: + for setting in ["clean_old_dumps", "check_trunc_files"]: del self.enabled[setting] self.skip_jobs = skip_jobs diff --git a/xmldumps-backup/worker.py b/xmldumps-backup/worker.py index 3cbf04a..c24e506 100644 --- a/xmldumps-backup/worker.py +++ b/xmldumps-backup/worker.py @@ -141,7 +141,7 @@ usage_text = """Usage: python worker.py [options] [wikidbname] Options: --aftercheckpoint, --checkpoint, --partnum, --configfile, --date, --job, --skipjobs, --addnotice, --delnotice, --force, --noprefetch, - --nospawn, --restartfrom, --log, --cutoff\n") + --nospawn, --restartfrom, --log, --cleanup, --cutoff\n") --aftercheckpoint: Restart this job from the after specified checkpoint file, doing the rest of the job for the appropriate part number if parallel subjobs each doing one part are configured, or for the all the rest of the revisions @@ -187,6 +187,8 @@ --cutoff: Given a cutoff date in yyyymmdd format, display the next wiki for which dumps should be run, if its last dump was older than the cutoff date, and exit, or if there are no such wikis, just exit +--cleanup: Remove all files that may already exist for the spefici wiki and + run, for the specified job or all jobs --verbose: Print lots of stuff (includes printing full backtraces for any exception) This is used primarily for debugging """ @@ -218,6 +220,7 @@ skipdone = False do_locking = False verbose = False + cleanup_files = False try: (options, remainder) = getopt.gnu_getopt( @@ -226,7 +229,7 @@ 'delnotice', 'force', 'dryrun', 'noprefetch', 'nospawn', 'restartfrom', 'aftercheckpoint=', 'log', 'partnum=', 'checkpoint=', 'pageidrange=', 'cutoff=', "skipdone", - "exclusive", 'verbose']) + "exclusive", "cleanup", 'verbose']) except: usage("Unknown option specified") @@ -270,6 +273,8 @@ usage("--cutoff value must be in yyyymmdd format") elif opt == "--skipdone": skipdone = True + elif opt == "--cleanup": + cleanup_files = True elif opt == "--exclusive": do_locking = True elif opt == "--verbose": @@ -413,7 +418,8 @@ enabled = {"logging": True} runner = Runner(wiki, prefetch, spawn, job_requested, skip_jobs, restart, html_notice, dryrun, enabled, - partnum_todo, checkpoint_file, page_id_range, skipdone, verbose) + partnum_todo, checkpoint_file, page_id_range, skipdone, + cleanup_files, verbose) if restart: sys.stderr.write("Running %s, restarting from job %s...\n" % -- To view, visit https://gerrit.wikimedia.org/r/254837 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I79626e3aa15012e8c23036c00d3384216a8f6396 Gerrit-PatchSet: 1 Gerrit-Project: operations/dumps Gerrit-Branch: ariel Gerrit-Owner: ArielGlenn <ar...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits