[MediaWiki-commits] [Gerrit] wikimedia...process-control[master]: Include logfile path in failmails
jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/346583 ) Change subject: Include logfile path in failmails .. Include logfile path in failmails Change-Id: I919594c5c1c0f5316038d6ae63267806092c433e --- M processcontrol/mailer.py M processcontrol/runner.py M tests/test_job_runner.py 3 files changed, 21 insertions(+), 14 deletions(-) Approvals: jenkins-bot: Verified Ejegg: Looks good to me, approved diff --git a/processcontrol/mailer.py b/processcontrol/mailer.py index 2d74472..ec17bfd 100644 --- a/processcontrol/mailer.py +++ b/processcontrol/mailer.py @@ -4,15 +4,20 @@ class Mailer(object): -def __init__(self, config): -self.from_address = config.get("failmail/from_address") -self.to_address = config.get("failmail/to_address") +def __init__(self, job): +self.job = job +self.from_address = job.config.get("failmail/from_address") +self.to_address = job.config.get("failmail/to_address") # FIXME: this is set to ensure one failmail per instance. Should # do something more sophisticated to collect all calls and send # the mail before exiting. self.sent_fail_mail = False -def fail_mail(self, subject, body="Hope your wits are freshly sharpened!"): +def fail_mail(self, subject, logfile=None): +if logfile is not None: +body = "See the logs for more information: {logfile}".format(logfile=logfile) +else: +body = "No details available." if self.sent_fail_mail: return diff --git a/processcontrol/runner.py b/processcontrol/runner.py index 1b05b57..e97494c 100644 --- a/processcontrol/runner.py +++ b/processcontrol/runner.py @@ -15,7 +15,8 @@ def __init__(self, job): self.global_config = config.GlobalConfiguration() self.job = job -self.mailer = mailer.Mailer(self.job.config) +self.mailer = mailer.Mailer(self.job) +self.logfile = None def run(self): # Check that we are the service user. @@ -54,6 +55,7 @@ self.process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=self.job.environment) streamer = output_streamer.OutputStreamer(self.process, self.job.slug, self.start_time) +self.logfile = streamer.filename streamer.start() # should be safe from deadlocks because our OutputStreamer @@ -69,25 +71,25 @@ self.process = None def fail_exitcode(self, return_code): -message = "Job {name} failed with code {code}".format(name=self.job.name, code=return_code) +message = "{name} failed with code {code}".format(name=self.job.name, code=return_code) config.log.error(message) # TODO: Prevent future jobs according to config. -self.mailer.fail_mail(message) +self.mailer.fail_mail(message, logfile=self.logfile) raise JobFailure(message) def fail_has_stderr(self, stderr_data): -message = "Job {name} printed things to stderr:".format(name=self.job.name) +message = "{name} printed things to stderr:".format(name=self.job.name) config.log.error(message) body = stderr_data.decode("utf-8") config.log.error(body) -self.mailer.fail_mail(message, body) +self.mailer.fail_mail(message, body, logfile=self.logfile) raise JobFailure(message) def fail_timeout(self): self.process.kill() -message = "Job {name} timed out after {timeout} minutes".format(name=self.job.name, timeout=self.job.timeout) +message = "{name} timed out after {timeout} minutes".format(name=self.job.name, timeout=self.job.timeout) config.log.error(message) -self.mailer.fail_mail(message) +self.mailer.fail_mail(message, logfile=self.logfile) # FIXME: Job will return SIGKILL now, fail_exitcode should ignore that signal now? raise JobFailure(message) diff --git a/tests/test_job_runner.py b/tests/test_job_runner.py index a82e808..5af7fac 100644 --- a/tests/test_job_runner.py +++ b/tests/test_job_runner.py @@ -51,7 +51,7 @@ run_job("return_code") loglines = caplog.actual() -assert ("root", "ERROR", "Job False job failed with code 1") in loglines +assert ("root", "ERROR", "False job failed with code 1") in loglines MockSmtp().sendmail.assert_called_once() @@ -65,8 +65,8 @@ run_job("timeout") loglines = caplog.actual() -assert ("root", "ERROR", "Job Timing out job timed out after 0.005 minutes") in loglines -assert ("root", "ERROR", "Job Timing out job failed with code -9") in loglines +assert ("root", "ERROR", "Timing out job timed out after 0.005 minutes") in loglines +assert ("root", "ERROR", "Timing out job failed with code -9") in loglines
[MediaWiki-commits] [Gerrit] wikimedia...process-control[master]: Include logfile path in failmails
Awight has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/346583 ) Change subject: Include logfile path in failmails .. Include logfile path in failmails Change-Id: I919594c5c1c0f5316038d6ae63267806092c433e --- M processcontrol/mailer.py M processcontrol/runner.py M tests/test_job_runner.py 3 files changed, 16 insertions(+), 11 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/wikimedia/fundraising/process-control refs/changes/83/346583/1 diff --git a/processcontrol/mailer.py b/processcontrol/mailer.py index 2d74472..8311b4e 100644 --- a/processcontrol/mailer.py +++ b/processcontrol/mailer.py @@ -4,15 +4,18 @@ class Mailer(object): -def __init__(self, config): -self.from_address = config.get("failmail/from_address") -self.to_address = config.get("failmail/to_address") +def __init__(self, job, runner): +self.job = job +self.runner = runner +self.from_address = job.config.get("failmail/from_address") +self.to_address = job.config.get("failmail/to_address") # FIXME: this is set to ensure one failmail per instance. Should # do something more sophisticated to collect all calls and send # the mail before exiting. self.sent_fail_mail = False -def fail_mail(self, subject, body="Hope your wits are freshly sharpened!"): +def fail_mail(self, subject): +body = "See the logs for more information: {logfile}".format(logfile=self.runner.logfile) if self.sent_fail_mail: return diff --git a/processcontrol/runner.py b/processcontrol/runner.py index 81aaabb..82fa6fc 100644 --- a/processcontrol/runner.py +++ b/processcontrol/runner.py @@ -15,7 +15,8 @@ def __init__(self, job): self.global_config = config.GlobalConfiguration() self.job = job -self.mailer = mailer.Mailer(self.job.config) +self.mailer = mailer.Mailer(self.job, self) +self.logfile = None def run(self): # Check that we are the service user. @@ -61,6 +62,7 @@ self.process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=self.job.environment) streamer = output_streamer.OutputStreamer(self.process, self.job.slug, self.start_time) +self.logfile = streamer.filename streamer.start() # should be safe from deadlocks because our OutputStreamer @@ -76,14 +78,14 @@ self.process = None def fail_exitcode(self, return_code): -message = "Job {name} failed with code {code}".format(name=self.job.name, code=return_code) +message = "{name} failed with code {code}".format(name=self.job.name, code=return_code) config.log.error(message) # TODO: Prevent future jobs according to config. self.mailer.fail_mail(message) raise JobFailure(message) def fail_has_stderr(self, stderr_data): -message = "Job {name} printed things to stderr:".format(name=self.job.name) +message = "{name} printed things to stderr:".format(name=self.job.name) config.log.error(message) body = stderr_data.decode("utf-8") config.log.error(body) @@ -92,7 +94,7 @@ def fail_timeout(self): self.process.kill() -message = "Job {name} timed out after {timeout} minutes".format(name=self.job.name, timeout=self.job.timeout) +message = "{name} timed out after {timeout} minutes".format(name=self.job.name, timeout=self.job.timeout) config.log.error(message) self.mailer.fail_mail(message) # FIXME: Job will return SIGKILL now, fail_exitcode should ignore that signal now? diff --git a/tests/test_job_runner.py b/tests/test_job_runner.py index a82e808..5af7fac 100644 --- a/tests/test_job_runner.py +++ b/tests/test_job_runner.py @@ -51,7 +51,7 @@ run_job("return_code") loglines = caplog.actual() -assert ("root", "ERROR", "Job False job failed with code 1") in loglines +assert ("root", "ERROR", "False job failed with code 1") in loglines MockSmtp().sendmail.assert_called_once() @@ -65,8 +65,8 @@ run_job("timeout") loglines = caplog.actual() -assert ("root", "ERROR", "Job Timing out job timed out after 0.005 minutes") in loglines -assert ("root", "ERROR", "Job Timing out job failed with code -9") in loglines +assert ("root", "ERROR", "Timing out job timed out after 0.005 minutes") in loglines +assert ("root", "ERROR", "Timing out job failed with code -9") in loglines MockSmtp().sendmail.assert_called_once() -- To view, visit https://gerrit.wikimedia.org/r/346583 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I919594c5c1c0f5316038d6ae63267806092c433e Gerrit-PatchSet: 1 Gerrit-Project: wikimedia/fundraising/process-control Gerrit-Branch: