ArielGlenn has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/254837

Change subject: dumps: new option 'cleanup' to require cleanup when dump rerun
......................................................................

dumps: new option 'cleanup' to require cleanup when dump rerun

by default files produced from a previous run of same date and wiki
will now be left intact and reused.

Change-Id: I79626e3aa15012e8c23036c00d3384216a8f6396
---
M xmldumps-backup/dumpadmin.py
M xmldumps-backup/dumps/runner.py
M xmldumps-backup/worker.py
3 files changed, 20 insertions(+), 13 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/dumps 
refs/changes/37/254837/1

diff --git a/xmldumps-backup/dumpadmin.py b/xmldumps-backup/dumpadmin.py
index 7b42f84..b7eb1fb 100644
--- a/xmldumps-backup/dumpadmin.py
+++ b/xmldumps-backup/dumpadmin.py
@@ -378,7 +378,7 @@
         runner = Runner(wiki, prefetch=True, spawn=True, job=None,
                         skip_jobs=[], restart=False, notice="", dryrun=False,
                         enabled=None, partnum_todo=False, checkpoint_file=None,
-                        page_id_range=None, skipdone=[], verbose=self.verbose)
+                        page_id_range=None, skipdone=[], cleanup=False, 
verbose=self.verbose)
 
         if not failed_jobs:
             if self.verbose:
@@ -559,7 +559,7 @@
         runner = Runner(wiki, prefetch=True, spawn=True, job=None,
                         skip_jobs=[], restart=False, notice="", dryrun=False,
                         enabled=None, partnum_todo=False, checkpoint_file=None,
-                        page_id_range=None, skipdone=[], verbose=self.verbose)
+                        page_id_range=None, skipdone=[], cleanup=False, 
verbose=self.verbose)
 
         known_jobs = [item.name() for item in 
runner.dump_item_list.dump_items] + ['tables']
         if ':' in self.job_status:
diff --git a/xmldumps-backup/dumps/runner.py b/xmldumps-backup/dumps/runner.py
index 8b2c779..9b75c3a 100644
--- a/xmldumps-backup/dumps/runner.py
+++ b/xmldumps-backup/dumps/runner.py
@@ -442,7 +442,7 @@
     def __init__(self, wiki, prefetch=True, spawn=True, job=None, 
skip_jobs=None,
                  restart=False, notice="", dryrun=False, enabled=None,
                  partnum_todo=None, checkpoint_file=None, page_id_range=None,
-                 skipdone=False, verbose=False):
+                 skipdone=False, cleanup=False, verbose=False):
         self.wiki = wiki
         self.db_name = wiki.db_name
         self.prefetch = prefetch
@@ -458,6 +458,7 @@
         self.skipdone = skipdone
         self.verbose = verbose
         self.enabled = enabled
+        self.cleanup_old_files = cleanup
 
         if self.checkpoint_file is not None:
             fname = DumpFilename(self.wiki)
@@ -475,8 +476,11 @@
         for setting in [StatusHtml.NAME, IndexHtml.NAME, Checksummer.NAME,
                         RunInfoFile.NAME, SymLinks.NAME,
                         Feeds.NAME, NoticeFile.NAME, "makedir", 
"clean_old_dumps",
-                        "clean_old_files", "check_trunc_files"]:
+                        "cleanup_old_files", "check_trunc_files"]:
             self.enabled[setting] = True
+
+        if not self.cleanup_old_files:
+            del self.enabled["cleanup_old_files"]
 
         if self.dryrun or self._partnum_todo is not None or 
self.checkpoint_file is not None:
             for setting in [StatusHtml.NAME, IndexHtml.NAME, Checksummer.NAME,
@@ -485,11 +489,8 @@
                 del self.enabled[setting]
 
         if self.dryrun:
-            for setting in ["logging", "check_trunc_files", "clean_old_files"]:
+            for setting in ["logging", "check_trunc_files"]:
                 del self.enabled[setting]
-        if self.page_id_range:
-            for setting in ["clean_old_files"]:
-                self.enabled[setting] = True
 
         self.job_requested = job
 
@@ -503,11 +504,11 @@
 
         if self.job_requested == "latestlinks" or self.job_requested == 
"createdirs":
             for setting in [Checksummer.NAME, NoticeFile.NAME, "makedir",
-                            "clean_old_dumps", "clean_old_files", 
"check_trunc_files"]:
+                            "clean_old_dumps", "check_trunc_files"]:
                 del self.enabled[setting]
 
         if self.job_requested == "noop":
-            for setting in ["clean_old_dumps", "clean_old_files", 
"check_trunc_files"]:
+            for setting in ["clean_old_dumps", "check_trunc_files"]:
                 del self.enabled[setting]
 
         self.skip_jobs = skip_jobs
diff --git a/xmldumps-backup/worker.py b/xmldumps-backup/worker.py
index 3cbf04a..c24e506 100644
--- a/xmldumps-backup/worker.py
+++ b/xmldumps-backup/worker.py
@@ -141,7 +141,7 @@
     usage_text = """Usage: python worker.py [options] [wikidbname]
 Options: --aftercheckpoint, --checkpoint, --partnum, --configfile, --date, 
--job,
          --skipjobs, --addnotice, --delnotice, --force, --noprefetch,
-         --nospawn, --restartfrom, --log, --cutoff\n")
+         --nospawn, --restartfrom, --log, --cleanup, --cutoff\n")
 --aftercheckpoint: Restart this job from the after specified checkpoint file, 
doing the
                rest of the job for the appropriate part number if parallel 
subjobs each
                doing one part are configured, or for the all the rest of the 
revisions
@@ -187,6 +187,8 @@
 --cutoff:      Given a cutoff date in yyyymmdd format, display the next wiki 
for which
                dumps should be run, if its last dump was older than the cutoff 
date,
                and exit, or if there are no such wikis, just exit
+--cleanup:     Remove all files that may already exist for the spefici wiki and
+               run, for the specified job or all jobs
 --verbose:     Print lots of stuff (includes printing full backtraces for any 
exception)
                This is used primarily for debugging
 """
@@ -218,6 +220,7 @@
         skipdone = False
         do_locking = False
         verbose = False
+        cleanup_files = False
 
         try:
             (options, remainder) = getopt.gnu_getopt(
@@ -226,7 +229,7 @@
                  'delnotice', 'force', 'dryrun', 'noprefetch', 'nospawn',
                  'restartfrom', 'aftercheckpoint=', 'log', 'partnum=',
                  'checkpoint=', 'pageidrange=', 'cutoff=', "skipdone",
-                 "exclusive", 'verbose'])
+                 "exclusive", "cleanup", 'verbose'])
         except:
             usage("Unknown option specified")
 
@@ -270,6 +273,8 @@
                     usage("--cutoff value must be in yyyymmdd format")
             elif opt == "--skipdone":
                 skipdone = True
+            elif opt == "--cleanup":
+                cleanup_files = True
             elif opt == "--exclusive":
                 do_locking = True
             elif opt == "--verbose":
@@ -413,7 +418,8 @@
                 enabled = {"logging": True}
             runner = Runner(wiki, prefetch, spawn, job_requested, skip_jobs,
                             restart, html_notice, dryrun, enabled,
-                            partnum_todo, checkpoint_file, page_id_range, 
skipdone, verbose)
+                            partnum_todo, checkpoint_file, page_id_range, 
skipdone,
+                            cleanup_files, verbose)
 
             if restart:
                 sys.stderr.write("Running %s, restarting from job %s...\n" %

-- 
To view, visit https://gerrit.wikimedia.org/r/254837
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I79626e3aa15012e8c23036c00d3384216a8f6396
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps
Gerrit-Branch: ariel
Gerrit-Owner: ArielGlenn <ar...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to