ArielGlenn has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/342310 )
Change subject: use the various json outputs to write a combined file for
status api use
......................................................................
use the various json outputs to write a combined file for status api use
Bug: T147177
Change-Id: I9d8ca1115559d9e4deec45fb649d8a472a7ffa19
---
M xmldumps-backup/dumps/runner.py
M xmldumps-backup/dumps/runnerutils.py
A xmldumps-backup/dumps/runstatusapi.py
3 files changed, 182 insertions(+), 6 deletions(-)
Approvals:
ArielGlenn: Looks good to me, approved
jenkins-bot: Verified
diff --git a/xmldumps-backup/dumps/runner.py b/xmldumps-backup/dumps/runner.py
index 9cbafaf..961140a 100644
--- a/xmldumps-backup/dumps/runner.py
+++ b/xmldumps-backup/dumps/runner.py
@@ -23,6 +23,7 @@
from dumps.runnerutils import Maintenance, RunInfoFile, DumpRunJobData
from dumps.utils import DbServerInfo, FilePartInfo, TimeUtils
+from dumps.runstatusapi import StatusAPI
class Logger(threading.Thread):
@@ -477,8 +478,9 @@
self.enabled = {}
for setting in [StatusHtml.NAME, Report.NAME, Checksummer.NAME,
RunInfoFile.NAME, SymLinks.NAME, RunSettings.NAME,
- Feeds.NAME, NoticeFile.NAME, "makedir",
"clean_old_dumps",
- "cleanup_old_files", "check_trunc_files",
"cleanup_tmp_files"]:
+ Feeds.NAME, NoticeFile.NAME, StatusAPI.NAME,
+ "makedir", "clean_old_dumps", "cleanup_old_files",
+ "check_trunc_files", "cleanup_tmp_files"]:
self.enabled[setting] = True
if not self.cleanup_old_files:
@@ -487,7 +489,7 @@
if self.dryrun or self._partnum_todo is not None or
self.checkpoint_file is not None:
for setting in [StatusHtml.NAME, Report.NAME, Checksummer.NAME,
- RunInfoFile.NAME, SymLinks.NAME, RunSettings.NAME,
+ StatusAPI.NAME, RunInfoFile.NAME, SymLinks.NAME,
RunSettings.NAME,
Feeds.NAME, NoticeFile.NAME, "makedir",
"clean_old_dumps"]:
if setting in self.enabled:
del self.enabled[setting]
@@ -507,7 +509,7 @@
del self.enabled[setting]
if self.job_requested == "createdirs":
- for setting in [SymLinks.NAME, Feeds.NAME, RunSettings.NAME]:
+ for setting in [SymLinks.NAME, Feeds.NAME, RunSettings.NAME,
StatusAPI.NAME]:
if setting in self.enabled:
del self.enabled[setting]
@@ -568,6 +570,9 @@
self.failurehandler,
self.log_and_print, self.verbose)
+ self.runstatus_updater = StatusAPI(self.wiki, self.enabled, "json",
+ self.log_and_print, self.verbose)
+
def log_queue_reader(self, log):
if not log:
return
@@ -583,6 +588,7 @@
def html_update_callback(self):
self.report.update_index_html_and_json()
self.statushtml.update_status_file()
+ self.runstatus_updater.write_statusapi_file()
# returns 0 on success, 1 on error
def save_command(self, commands, outfile):
@@ -665,6 +671,7 @@
item.start()
self.report.update_index_html_and_json()
self.statushtml.update_status_file()
+ self.runstatus_updater.write_statusapi_file()
self.dumpjobdata.do_before_job(self.dump_item_list.dump_items)
@@ -687,7 +694,7 @@
item.set_status("failed")
if item.status() == "done":
- self.dumpjobdata.do_after_job(item)
+ self.dumpjobdata.do_after_job(item, self.dump_item_list.dump_items)
elif item.status() == "waiting" or item.status() == "skipped":
# don't update the checksum files for this item.
pass
@@ -780,6 +787,7 @@
# All jobs are either in status "done", "waiting", "failed",
"skipped"
self.report.update_index_html_and_json("done")
self.statushtml.update_status_file("done")
+ self.runstatus_updater.write_statusapi_file()
else:
# This may happen if we start a dump now and abort before all
items are
# done. Then some are left for example in state "waiting". When
@@ -787,6 +795,7 @@
# previously in "waiting" are still in status "waiting"
self.report.update_index_html_and_json("partialdone")
self.statushtml.update_status_file("partialdone")
+ self.runstatus_updater.write_statusapi_file()
self.dumpjobdata.do_after_dump(self.dump_item_list.dump_items)
diff --git a/xmldumps-backup/dumps/runnerutils.py
b/xmldumps-backup/dumps/runnerutils.py
index e80372f..e1880ce 100644
--- a/xmldumps-backup/dumps/runnerutils.py
+++ b/xmldumps-backup/dumps/runnerutils.py
@@ -885,7 +885,7 @@
self.runinfofile.save_dump_runinfo_file(
RunInfoFile.report_dump_runinfo(dump_items))
- def do_after_job(self, item):
+ def do_after_job(self, item, dump_items):
self.checksummer.cp_chksum_tmpfiles_to_permfile()
# this will include checkpoint files if they are enabled.
for file_obj in item.list_outfiles_to_publish(self.dump_dir):
@@ -898,6 +898,8 @@
self.checksummer.checksums(file_obj, self)
self.symlinks.cleanup_symlinks()
self.feeds.cleanup_feeds()
+ self.runinfofile.save_dump_runinfo_file(
+ RunInfoFile.report_dump_runinfo(dump_items))
def do_latest_job(self):
self.symlinks.remove_symlinks_from_old_runs(self.wiki.date)
diff --git a/xmldumps-backup/dumps/runstatusapi.py
b/xmldumps-backup/dumps/runstatusapi.py
new file mode 100644
index 0000000..a82c3b6
--- /dev/null
+++ b/xmldumps-backup/dumps/runstatusapi.py
@@ -0,0 +1,165 @@
+"""
+classes and methods for writing out file
+with information about the current dump run in
+json format, for downloaders' use
+"""
+
+
+import os
+import sys
+import traceback
+import json
+from dumps.runnerutils import Checksummer
+from dumps.runnerutils import RunInfoFile
+from dumps.runnerutils import Report
+from dumps.fileutils import DumpFilename
+from dumps.fileutils import DumpDir
+
+
+class StatusAPI(object):
+ """
+ write specific run status information
+ to a single file, or update portions
+ of an existing file
+ """
+
+ NAME = "statusapi"
+ FILENAME = "dumpstatus"
+
+ # might add more someday, but not today
+ known_formats = ["json"]
+
+ def __init__(self, wiki, enabled, fileformat="json", error_callback=None,
verbose=False):
+ self.wiki = wiki
+ self._enabled = enabled
+ self.fileformat = fileformat
+ self.filepath = self.get_output_filepath()
+ if not self.filepath:
+ # here we should log something. FIXME
+ pass
+ self.error_callback = error_callback
+ self.verbose = verbose
+
+ def get_output_filepath(self):
+ if not self.check_format():
+ return None
+ return os.path.join(self.wiki.public_dir(),
+ self.wiki.date, StatusAPI.FILENAME + "." +
self.fileformat)
+
+ def check_format(self):
+ return bool(self.fileformat in StatusAPI.known_formats)
+
+ def get_dumprun_info(self):
+ dumpruninfo_path = os.path.join(self.wiki.public_dir(),
+ self.wiki.date, "dumpruninfo.json")
+ return self.get_json_file_contents(dumpruninfo_path)
+
+ def get_filehash_info(self):
+ """
+ return a list of jsonified contents of hash files, one entry per hash
type,
+ each entry containing the hashes and names of all files produced
+ """
+ contents = []
+ for hashtype in Checksummer.HASHTYPES:
+ dumpfile = DumpFilename(
+ self.wiki, None,
Checksummer.get_checksum_filename_basename(hashtype, "json"))
+ dump_dir = DumpDir(self.wiki, self.wiki.db_name)
+
+ basefilename = dump_dir.filename_public_path(dumpfile)
+ path = os.path.join(self.wiki.public_dir(), self.wiki.date,
basefilename)
+
+ contents.append(self.get_json_file_contents(path))
+ return [item for item in contents if item is not None]
+
+ def get_report_info(self):
+ reportinfo_path = os.path.join(self.wiki.public_dir(),
+ self.wiki.date, "report.json")
+ return self.get_json_file_contents(reportinfo_path)
+
+ def get_json_file_contents(self, path):
+ try:
+ fullpath = os.path.join(self.wiki.public_dir(),
+ self.wiki.date, path)
+ with open(fullpath, "r") as settings_fd:
+ contents = settings_fd.read()
+ if not contents:
+ return None
+ return json.loads(contents)
+ except IOError:
+ # may not exist, we don't care
+ return None
+
+ def complete_fileinfo(self, jobname, reportinfo, hashinfo_list):
+ """
+ given a list of dicts with info about each file,
+ grab the related hash info out of the hashinfo dicts for
+ each file and add it to that file's dict in the list
+ then return the modified dict
+ """
+ filenames = Report.get_filenames_for_job(jobname, reportinfo)
+ for filename in filenames:
+ for hashinfo in hashinfo_list:
+ hashtypes_sums = Checksummer.get_hashinfo(filename, hashinfo)
+ for (hashtype, checksum) in hashtypes_sums:
+ Report.add_file_property(jobname, filename, hashtype,
checksum, reportinfo)
+
+ def combine_status_sources(self, dumpruninfo, filehashinfo, reportinfo):
+ """
+ given the json dumpruninfo, the json report info, and the json file
hash infos,
+ convert them all into one structure for writing later as json to a
+ status output file
+ """
+ # list of filecontents passed in for filehashinfo
+ # because there is more than one hash type
+ if filehashinfo is None:
+ filehashinfo = [Checksummer.get_empty_json()]
+ if dumpruninfo is None:
+ dumpruninfo = RunInfoFile.get_empty_json()
+ if reportinfo is not None:
+ dumpruninfo_jobs = RunInfoFile.get_jobs(dumpruninfo)
+ for jobname in Report.get_jobs(reportinfo):
+ if jobname in dumpruninfo_jobs:
+ # fold hash info into the report
+ self.complete_fileinfo(jobname, reportinfo, filehashinfo)
+ # fold the report info (file info for all job-related
files)
+ # into the dumpruninfo
+ fileinfo = Report.get_fileinfo_for_job(jobname, reportinfo)
+ RunInfoFile.add_job_property(jobname, "files", fileinfo,
dumpruninfo)
+ return dumpruninfo
+
+ def write_contents_json(self, contents):
+ if not self.filepath:
+ return
+ with open(self.filepath, "w+") as fdesc:
+ fdesc.write(json.dumps(contents) + "\n")
+
+ def write_contents(self, contents):
+ if not self.check_format():
+ return None
+ if self.fileformat == "json":
+ return self.write_contents_json(contents)
+
+ def write_statusapi_file(self):
+ if StatusAPI.NAME not in self._enabled:
+ return
+
+ try:
+ # we have three sources of information that need to
+ # be read and combined into the json file usable by
+ # dump status requesters
+ dumprun_info = self.get_dumprun_info()
+ filehash_info = self.get_filehash_info()
+ report_info = self.get_report_info()
+
+ contents = self.combine_status_sources(dumprun_info,
filehash_info, report_info)
+ self.write_contents(contents)
+ except Exception as ex:
+ if self.verbose:
+ exc_type, exc_value, exc_traceback = sys.exc_info()
+ sys.stderr.write(repr(traceback.format_exception(exc_type,
exc_value,
+
exc_traceback)))
+ message = "Couldn't update status files. Continuing anyways"
+ if self.error_callback:
+ self.error_callback(message)
+ else:
+ sys.stderr.write("%s\n" % message)
--
To view, visit https://gerrit.wikimedia.org/r/342310
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I9d8ca1115559d9e4deec45fb649d8a472a7ffa19
Gerrit-PatchSet: 3
Gerrit-Project: operations/dumps
Gerrit-Branch: master
Gerrit-Owner: ArielGlenn <[email protected]>
Gerrit-Reviewer: ArielGlenn <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits