ArielGlenn has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/254836

Change subject: dumps: fix up cleanup of old files from previous run
......................................................................

dumps: fix up cleanup of old files from previous run

in a previous commit this cleanup was disabled, now re-enabled

for history dumps, explain better which files are listed
when rerun of a specific page range is requested, also
return all files when not dealing with a run of a specific
page range

Change-Id: I8a61b54f6f4e467b303b78acde031120cd9c9509
---
M xmldumps-backup/dumps/jobs.py
M xmldumps-backup/dumps/xmljobs.py
2 files changed, 10 insertions(+), 3 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/dumps 
refs/changes/36/254836/1

diff --git a/xmldumps-backup/dumps/jobs.py b/xmldumps-backup/dumps/jobs.py
index 32c0e25..ffd4b1c 100644
--- a/xmldumps-backup/dumps/jobs.py
+++ b/xmldumps-backup/dumps/jobs.py
@@ -294,7 +294,7 @@
         return recombine_command_string
 
     def cleanup_old_files(self, dump_dir, runner):
-        if "clean_old_files" not in runner.enabled:
+        if "clean_old_files" in runner.enabled:
             if self.checkpoint_file is not None:
                 # we only rerun this one, so just remove this one
                 if exists(dump_dir.filename_public_path(self.checkpoint_file)):
@@ -497,6 +497,7 @@
         if dump_names is None:
             dump_names = [self.dumpname]
         files = []
+
         if self.checkpoint_file is not None:
             files.append(self.checkpoint_file)
             return files
diff --git a/xmldumps-backup/dumps/xmljobs.py b/xmldumps-backup/dumps/xmljobs.py
index 465ff29..621a2d6 100644
--- a/xmldumps-backup/dumps/xmljobs.py
+++ b/xmldumps-backup/dumps/xmljobs.py
@@ -780,7 +780,9 @@
     def list_outfiles_for_cleanup(self, dump_dir, dump_names=None):
         files = Dump.list_outfiles_for_cleanup(self, dump_dir, dump_names)
         files_to_return = []
+
         if self.page_id_range:
+            # this file is for one page range only
             if ',' in self.page_id_range:
                 (first_page_id, last_page_id) = self.page_id_range.split(',', 
2)
                 first_page_id = int(first_page_id)
@@ -788,8 +790,9 @@
             else:
                 first_page_id = int(self.page_id_range)
                 last_page_id = None
-            # filter any checkpoint files, removing from the list any with
-            # page range outside of the page range this job will cover
+
+            # checkpoint files cover specific page ranges. for those,
+            # list only files within the given page range for cleanup
             for fname in files:
                 if fname.is_checkpoint_file:
                     if (not first_page_id or
@@ -801,6 +804,9 @@
                             files_to_return.append(fname)
                 else:
                     files_to_return.append(fname)
+        else:
+            files_to_return = files
+
         return files_to_return
 
 

-- 
To view, visit https://gerrit.wikimedia.org/r/254836
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I8a61b54f6f4e467b303b78acde031120cd9c9509
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps
Gerrit-Branch: ariel
Gerrit-Owner: ArielGlenn <ar...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to