[MediaWiki-commits] [Gerrit] operations/dumps[master]: retry failed page content pieces immediately after page cont...

2017-04-02 Thread ArielGlenn (Code Review)
ArielGlenn has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/345985 )

Change subject: retry failed page content pieces immediately after page content 
step completes
..


retry failed page content pieces immediately after page content step completes

This means we don't regenerate partial stubs, we just put the failed
commands back in the queue and retry until success or we hit max retry
limit.

There are several new config settings as a result of this update.

Bug: T160507
Change-Id: I7db012f3ca8670e3274d33c7ff860962a7eae8f2
---
M xmldumps-backup/defaults.conf
M xmldumps-backup/dumps/CommandManagement.py
M xmldumps-backup/dumps/WikiDump.py
M xmldumps-backup/dumps/apijobs.py
M xmldumps-backup/dumps/flowjob.py
M xmldumps-backup/dumps/pagerange.py
M xmldumps-backup/dumps/recombinejobs.py
M xmldumps-backup/dumps/recompressjobs.py
M xmldumps-backup/dumps/runner.py
M xmldumps-backup/dumps/tablesjobs.py
M xmldumps-backup/dumps/xmlcontentjobs.py
M xmldumps-backup/dumps/xmljobs.py
12 files changed, 72 insertions(+), 50 deletions(-)

Approvals:
  ArielGlenn: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/xmldumps-backup/defaults.conf b/xmldumps-backup/defaults.conf
index 4025b62..d17cbbb 100644
--- a/xmldumps-backup/defaults.conf
+++ b/xmldumps-backup/defaults.conf
@@ -62,6 +62,9 @@
 pagesPerChunkAbstract=0
 jobsperbatch=""
 revsPerJob=100
+maxRetries=0
+retryWait=30
+revsMargin=100
 
 [otherformats]
 multistream=0
diff --git a/xmldumps-backup/dumps/CommandManagement.py 
b/xmldumps-backup/dumps/CommandManagement.py
index 9b054bb..8877eca 100644
--- a/xmldumps-backup/dumps/CommandManagement.py
+++ b/xmldumps-backup/dumps/CommandManagement.py
@@ -173,11 +173,14 @@
 return False
 return True
 
-def exited_with_errors(self):
+def exited_with_errors(self, stringfmt=True):
 if not self.exited_successfully():
 # we wil return the whole pipeline I guess, they might as well
 # see it in the error report instead of the specific issue in the 
pipe.
-return self.pipeline_string()
+if stringfmt:
+return self.pipeline_string()
+else:
+return self._commands
 return None
 
 # Checks the exit values of the individual commands in the
@@ -352,12 +355,12 @@
 return False
 return True
 
-def exited_with_errors(self):
+def exited_with_errors(self, stringfmt=True):
 """Return list of commands that exited with errors."""
 commands = []
 for pipeline in self._command_pipelines:
 if not pipeline.exited_successfully():
-command = pipeline.exited_with_errors()
+command = pipeline.exited_with_errors(stringfmt)
 if command is not None:
 commands.append(command)
 return commands
@@ -575,11 +578,11 @@
 return False
 return True
 
-def commands_with_errors(self):
+def commands_with_errors(self, stringfmt=True):
 commands = []
 for series in self._command_serieses:
 if not series.exited_successfully():
-commands.extend(series.exited_with_errors())
+commands.extend(series.exited_with_errors(stringfmt))
 return commands
 
 def watch_output_queue(self):
diff --git a/xmldumps-backup/dumps/WikiDump.py 
b/xmldumps-backup/dumps/WikiDump.py
index 9c4801e..09d3b6a 100644
--- a/xmldumps-backup/dumps/WikiDump.py
+++ b/xmldumps-backup/dumps/WikiDump.py
@@ -210,6 +210,12 @@
 "chunks", "checkpointTime", 1)
 self.revs_per_job = self.get_opt_for_proj_or_default(
 "chunks", "revsPerJob", 1)
+self.max_retries = self.get_opt_for_proj_or_default(
+"chunks", "maxRetries", 1)
+self.retry_wait = self.get_opt_for_proj_or_default(
+"chunks", "retryWait", 1)
+self.revs_margin = self.get_opt_for_proj_or_default(
+"chunks", "revsMargin", 1)
 
 if not self.conf.has_section('otherformats'):
 self.conf.add_section('otherformats')
diff --git a/xmldumps-backup/dumps/apijobs.py b/xmldumps-backup/dumps/apijobs.py
index 089a4da..341be04 100644
--- a/xmldumps-backup/dumps/apijobs.py
+++ b/xmldumps-backup/dumps/apijobs.py
@@ -27,17 +27,17 @@
 raise BackupError("siteinfo dump %s trying to produce more than 
one file" %
   self.dumpname)
 output_dfname = dfnames[0]
-error = self.get_siteinfo(
+error, broken = self.get_siteinfo(
 runner.dump_dir.filename_public_path(output_dfname), runner)
 while error and retries < maxretries:
 retries = retries + 1
 time.sleep(5)
-error = self.get_siteinfo(
+error, broken = self.get_siteinfo(
 

[MediaWiki-commits] [Gerrit] operations/dumps[master]: retry failed page content pieces immediately after page cont...

2017-04-01 Thread ArielGlenn (Code Review)
ArielGlenn has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/345985 )

Change subject: retry failed page content pieces immediately after page content 
step completes
..

retry failed page content pieces immediately after page content step completes

This means we don't regenerate partial stubs, we just put the failed
commands back in the queue and retry until success or we hit max retry
limit.

There are several new config settings as a result of this update.

Bug: T160507
Change-Id: I7db012f3ca8670e3274d33c7ff860962a7eae8f2
---
M xmldumps-backup/defaults.conf
M xmldumps-backup/dumps/CommandManagement.py
M xmldumps-backup/dumps/WikiDump.py
M xmldumps-backup/dumps/apijobs.py
M xmldumps-backup/dumps/flowjob.py
M xmldumps-backup/dumps/pagerange.py
M xmldumps-backup/dumps/recombinejobs.py
M xmldumps-backup/dumps/recompressjobs.py
M xmldumps-backup/dumps/runner.py
M xmldumps-backup/dumps/tablesjobs.py
M xmldumps-backup/dumps/xmlcontentjobs.py
M xmldumps-backup/dumps/xmljobs.py
12 files changed, 72 insertions(+), 50 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/dumps 
refs/changes/85/345985/1

diff --git a/xmldumps-backup/defaults.conf b/xmldumps-backup/defaults.conf
index 4025b62..d17cbbb 100644
--- a/xmldumps-backup/defaults.conf
+++ b/xmldumps-backup/defaults.conf
@@ -62,6 +62,9 @@
 pagesPerChunkAbstract=0
 jobsperbatch=""
 revsPerJob=100
+maxRetries=0
+retryWait=30
+revsMargin=100
 
 [otherformats]
 multistream=0
diff --git a/xmldumps-backup/dumps/CommandManagement.py 
b/xmldumps-backup/dumps/CommandManagement.py
index 9b054bb..8877eca 100644
--- a/xmldumps-backup/dumps/CommandManagement.py
+++ b/xmldumps-backup/dumps/CommandManagement.py
@@ -173,11 +173,14 @@
 return False
 return True
 
-def exited_with_errors(self):
+def exited_with_errors(self, stringfmt=True):
 if not self.exited_successfully():
 # we wil return the whole pipeline I guess, they might as well
 # see it in the error report instead of the specific issue in the 
pipe.
-return self.pipeline_string()
+if stringfmt:
+return self.pipeline_string()
+else:
+return self._commands
 return None
 
 # Checks the exit values of the individual commands in the
@@ -352,12 +355,12 @@
 return False
 return True
 
-def exited_with_errors(self):
+def exited_with_errors(self, stringfmt=True):
 """Return list of commands that exited with errors."""
 commands = []
 for pipeline in self._command_pipelines:
 if not pipeline.exited_successfully():
-command = pipeline.exited_with_errors()
+command = pipeline.exited_with_errors(stringfmt)
 if command is not None:
 commands.append(command)
 return commands
@@ -575,11 +578,11 @@
 return False
 return True
 
-def commands_with_errors(self):
+def commands_with_errors(self, stringfmt=True):
 commands = []
 for series in self._command_serieses:
 if not series.exited_successfully():
-commands.extend(series.exited_with_errors())
+commands.extend(series.exited_with_errors(stringfmt))
 return commands
 
 def watch_output_queue(self):
diff --git a/xmldumps-backup/dumps/WikiDump.py 
b/xmldumps-backup/dumps/WikiDump.py
index 9c4801e..09d3b6a 100644
--- a/xmldumps-backup/dumps/WikiDump.py
+++ b/xmldumps-backup/dumps/WikiDump.py
@@ -210,6 +210,12 @@
 "chunks", "checkpointTime", 1)
 self.revs_per_job = self.get_opt_for_proj_or_default(
 "chunks", "revsPerJob", 1)
+self.max_retries = self.get_opt_for_proj_or_default(
+"chunks", "maxRetries", 1)
+self.retry_wait = self.get_opt_for_proj_or_default(
+"chunks", "retryWait", 1)
+self.revs_margin = self.get_opt_for_proj_or_default(
+"chunks", "revsMargin", 1)
 
 if not self.conf.has_section('otherformats'):
 self.conf.add_section('otherformats')
diff --git a/xmldumps-backup/dumps/apijobs.py b/xmldumps-backup/dumps/apijobs.py
index 089a4da..341be04 100644
--- a/xmldumps-backup/dumps/apijobs.py
+++ b/xmldumps-backup/dumps/apijobs.py
@@ -27,17 +27,17 @@
 raise BackupError("siteinfo dump %s trying to produce more than 
one file" %
   self.dumpname)
 output_dfname = dfnames[0]
-error = self.get_siteinfo(
+error, broken = self.get_siteinfo(
 runner.dump_dir.filename_public_path(output_dfname), runner)
 while error and retries < maxretries:
 retries = retries + 1
 time.sleep(5)
-error = self.get_siteinfo(
+error, broken = self.get_siteinfo(