[MediaWiki-commits] [Gerrit] operations/dumps[master]: retry failed page content pieces immediately after page cont...
ArielGlenn has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/345985 ) Change subject: retry failed page content pieces immediately after page content step completes .. retry failed page content pieces immediately after page content step completes This means we don't regenerate partial stubs, we just put the failed commands back in the queue and retry until success or we hit max retry limit. There are several new config settings as a result of this update. Bug: T160507 Change-Id: I7db012f3ca8670e3274d33c7ff860962a7eae8f2 --- M xmldumps-backup/defaults.conf M xmldumps-backup/dumps/CommandManagement.py M xmldumps-backup/dumps/WikiDump.py M xmldumps-backup/dumps/apijobs.py M xmldumps-backup/dumps/flowjob.py M xmldumps-backup/dumps/pagerange.py M xmldumps-backup/dumps/recombinejobs.py M xmldumps-backup/dumps/recompressjobs.py M xmldumps-backup/dumps/runner.py M xmldumps-backup/dumps/tablesjobs.py M xmldumps-backup/dumps/xmlcontentjobs.py M xmldumps-backup/dumps/xmljobs.py 12 files changed, 72 insertions(+), 50 deletions(-) Approvals: ArielGlenn: Looks good to me, approved jenkins-bot: Verified diff --git a/xmldumps-backup/defaults.conf b/xmldumps-backup/defaults.conf index 4025b62..d17cbbb 100644 --- a/xmldumps-backup/defaults.conf +++ b/xmldumps-backup/defaults.conf @@ -62,6 +62,9 @@ pagesPerChunkAbstract=0 jobsperbatch="" revsPerJob=100 +maxRetries=0 +retryWait=30 +revsMargin=100 [otherformats] multistream=0 diff --git a/xmldumps-backup/dumps/CommandManagement.py b/xmldumps-backup/dumps/CommandManagement.py index 9b054bb..8877eca 100644 --- a/xmldumps-backup/dumps/CommandManagement.py +++ b/xmldumps-backup/dumps/CommandManagement.py @@ -173,11 +173,14 @@ return False return True -def exited_with_errors(self): +def exited_with_errors(self, stringfmt=True): if not self.exited_successfully(): # we wil return the whole pipeline I guess, they might as well # see it in the error report instead of the specific issue in the pipe. -return self.pipeline_string() +if stringfmt: +return self.pipeline_string() +else: +return self._commands return None # Checks the exit values of the individual commands in the @@ -352,12 +355,12 @@ return False return True -def exited_with_errors(self): +def exited_with_errors(self, stringfmt=True): """Return list of commands that exited with errors.""" commands = [] for pipeline in self._command_pipelines: if not pipeline.exited_successfully(): -command = pipeline.exited_with_errors() +command = pipeline.exited_with_errors(stringfmt) if command is not None: commands.append(command) return commands @@ -575,11 +578,11 @@ return False return True -def commands_with_errors(self): +def commands_with_errors(self, stringfmt=True): commands = [] for series in self._command_serieses: if not series.exited_successfully(): -commands.extend(series.exited_with_errors()) +commands.extend(series.exited_with_errors(stringfmt)) return commands def watch_output_queue(self): diff --git a/xmldumps-backup/dumps/WikiDump.py b/xmldumps-backup/dumps/WikiDump.py index 9c4801e..09d3b6a 100644 --- a/xmldumps-backup/dumps/WikiDump.py +++ b/xmldumps-backup/dumps/WikiDump.py @@ -210,6 +210,12 @@ "chunks", "checkpointTime", 1) self.revs_per_job = self.get_opt_for_proj_or_default( "chunks", "revsPerJob", 1) +self.max_retries = self.get_opt_for_proj_or_default( +"chunks", "maxRetries", 1) +self.retry_wait = self.get_opt_for_proj_or_default( +"chunks", "retryWait", 1) +self.revs_margin = self.get_opt_for_proj_or_default( +"chunks", "revsMargin", 1) if not self.conf.has_section('otherformats'): self.conf.add_section('otherformats') diff --git a/xmldumps-backup/dumps/apijobs.py b/xmldumps-backup/dumps/apijobs.py index 089a4da..341be04 100644 --- a/xmldumps-backup/dumps/apijobs.py +++ b/xmldumps-backup/dumps/apijobs.py @@ -27,17 +27,17 @@ raise BackupError("siteinfo dump %s trying to produce more than one file" % self.dumpname) output_dfname = dfnames[0] -error = self.get_siteinfo( +error, broken = self.get_siteinfo( runner.dump_dir.filename_public_path(output_dfname), runner) while error and retries < maxretries: retries = retries + 1 time.sleep(5) -error = self.get_siteinfo( +error, broken = self.get_siteinfo(
[MediaWiki-commits] [Gerrit] operations/dumps[master]: retry failed page content pieces immediately after page cont...
ArielGlenn has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/345985 ) Change subject: retry failed page content pieces immediately after page content step completes .. retry failed page content pieces immediately after page content step completes This means we don't regenerate partial stubs, we just put the failed commands back in the queue and retry until success or we hit max retry limit. There are several new config settings as a result of this update. Bug: T160507 Change-Id: I7db012f3ca8670e3274d33c7ff860962a7eae8f2 --- M xmldumps-backup/defaults.conf M xmldumps-backup/dumps/CommandManagement.py M xmldumps-backup/dumps/WikiDump.py M xmldumps-backup/dumps/apijobs.py M xmldumps-backup/dumps/flowjob.py M xmldumps-backup/dumps/pagerange.py M xmldumps-backup/dumps/recombinejobs.py M xmldumps-backup/dumps/recompressjobs.py M xmldumps-backup/dumps/runner.py M xmldumps-backup/dumps/tablesjobs.py M xmldumps-backup/dumps/xmlcontentjobs.py M xmldumps-backup/dumps/xmljobs.py 12 files changed, 72 insertions(+), 50 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/dumps refs/changes/85/345985/1 diff --git a/xmldumps-backup/defaults.conf b/xmldumps-backup/defaults.conf index 4025b62..d17cbbb 100644 --- a/xmldumps-backup/defaults.conf +++ b/xmldumps-backup/defaults.conf @@ -62,6 +62,9 @@ pagesPerChunkAbstract=0 jobsperbatch="" revsPerJob=100 +maxRetries=0 +retryWait=30 +revsMargin=100 [otherformats] multistream=0 diff --git a/xmldumps-backup/dumps/CommandManagement.py b/xmldumps-backup/dumps/CommandManagement.py index 9b054bb..8877eca 100644 --- a/xmldumps-backup/dumps/CommandManagement.py +++ b/xmldumps-backup/dumps/CommandManagement.py @@ -173,11 +173,14 @@ return False return True -def exited_with_errors(self): +def exited_with_errors(self, stringfmt=True): if not self.exited_successfully(): # we wil return the whole pipeline I guess, they might as well # see it in the error report instead of the specific issue in the pipe. -return self.pipeline_string() +if stringfmt: +return self.pipeline_string() +else: +return self._commands return None # Checks the exit values of the individual commands in the @@ -352,12 +355,12 @@ return False return True -def exited_with_errors(self): +def exited_with_errors(self, stringfmt=True): """Return list of commands that exited with errors.""" commands = [] for pipeline in self._command_pipelines: if not pipeline.exited_successfully(): -command = pipeline.exited_with_errors() +command = pipeline.exited_with_errors(stringfmt) if command is not None: commands.append(command) return commands @@ -575,11 +578,11 @@ return False return True -def commands_with_errors(self): +def commands_with_errors(self, stringfmt=True): commands = [] for series in self._command_serieses: if not series.exited_successfully(): -commands.extend(series.exited_with_errors()) +commands.extend(series.exited_with_errors(stringfmt)) return commands def watch_output_queue(self): diff --git a/xmldumps-backup/dumps/WikiDump.py b/xmldumps-backup/dumps/WikiDump.py index 9c4801e..09d3b6a 100644 --- a/xmldumps-backup/dumps/WikiDump.py +++ b/xmldumps-backup/dumps/WikiDump.py @@ -210,6 +210,12 @@ "chunks", "checkpointTime", 1) self.revs_per_job = self.get_opt_for_proj_or_default( "chunks", "revsPerJob", 1) +self.max_retries = self.get_opt_for_proj_or_default( +"chunks", "maxRetries", 1) +self.retry_wait = self.get_opt_for_proj_or_default( +"chunks", "retryWait", 1) +self.revs_margin = self.get_opt_for_proj_or_default( +"chunks", "revsMargin", 1) if not self.conf.has_section('otherformats'): self.conf.add_section('otherformats') diff --git a/xmldumps-backup/dumps/apijobs.py b/xmldumps-backup/dumps/apijobs.py index 089a4da..341be04 100644 --- a/xmldumps-backup/dumps/apijobs.py +++ b/xmldumps-backup/dumps/apijobs.py @@ -27,17 +27,17 @@ raise BackupError("siteinfo dump %s trying to produce more than one file" % self.dumpname) output_dfname = dfnames[0] -error = self.get_siteinfo( +error, broken = self.get_siteinfo( runner.dump_dir.filename_public_path(output_dfname), runner) while error and retries < maxretries: retries = retries + 1 time.sleep(5) -error = self.get_siteinfo( +error, broken = self.get_siteinfo(