[MediaWiki-commits] [Gerrit] operations/dumps[master]: use the various json outputs to write a combined file for st...

ArielGlenn (Code Review) Sun, 19 Mar 2017 11:31:52 -0700

ArielGlenn has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/342310 )


Change subject: use the various json outputs to write a combined file for 
status api use
......................................................................


use the various json outputs to write a combined file for status api use

Bug: T147177
Change-Id: I9d8ca1115559d9e4deec45fb649d8a472a7ffa19
---
M xmldumps-backup/dumps/runner.py
M xmldumps-backup/dumps/runnerutils.py
A xmldumps-backup/dumps/runstatusapi.py
3 files changed, 182 insertions(+), 6 deletions(-)

Approvals:
  ArielGlenn: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/xmldumps-backup/dumps/runner.py b/xmldumps-backup/dumps/runner.py
index 9cbafaf..961140a 100644
--- a/xmldumps-backup/dumps/runner.py
+++ b/xmldumps-backup/dumps/runner.py
@@ -23,6 +23,7 @@
 from dumps.runnerutils import Maintenance, RunInfoFile, DumpRunJobData
 
 from dumps.utils import DbServerInfo, FilePartInfo, TimeUtils
+from dumps.runstatusapi import StatusAPI
 
 
 class Logger(threading.Thread):
@@ -477,8 +478,9 @@
             self.enabled = {}
         for setting in [StatusHtml.NAME, Report.NAME, Checksummer.NAME,
                         RunInfoFile.NAME, SymLinks.NAME, RunSettings.NAME,
-                        Feeds.NAME, NoticeFile.NAME, "makedir", 
"clean_old_dumps",
-                        "cleanup_old_files", "check_trunc_files", 
"cleanup_tmp_files"]:
+                        Feeds.NAME, NoticeFile.NAME, StatusAPI.NAME,
+                        "makedir", "clean_old_dumps", "cleanup_old_files",
+                        "check_trunc_files", "cleanup_tmp_files"]:
             self.enabled[setting] = True
 
         if not self.cleanup_old_files:
@@ -487,7 +489,7 @@
 
         if self.dryrun or self._partnum_todo is not None or 
self.checkpoint_file is not None:
             for setting in [StatusHtml.NAME, Report.NAME, Checksummer.NAME,
-                            RunInfoFile.NAME, SymLinks.NAME, RunSettings.NAME,
+                            StatusAPI.NAME, RunInfoFile.NAME, SymLinks.NAME, 
RunSettings.NAME,
                             Feeds.NAME, NoticeFile.NAME, "makedir", 
"clean_old_dumps"]:
                 if setting in self.enabled:
                     del self.enabled[setting]
@@ -507,7 +509,7 @@
                     del self.enabled[setting]
 
         if self.job_requested == "createdirs":
-            for setting in [SymLinks.NAME, Feeds.NAME, RunSettings.NAME]:
+            for setting in [SymLinks.NAME, Feeds.NAME, RunSettings.NAME, 
StatusAPI.NAME]:
                 if setting in self.enabled:
                     del self.enabled[setting]
 
@@ -568,6 +570,9 @@
                              self.failurehandler,
                              self.log_and_print, self.verbose)
 
+        self.runstatus_updater = StatusAPI(self.wiki, self.enabled, "json",
+                                           self.log_and_print, self.verbose)
+
     def log_queue_reader(self, log):
         if not log:
             return
@@ -583,6 +588,7 @@
     def html_update_callback(self):
         self.report.update_index_html_and_json()
         self.statushtml.update_status_file()
+        self.runstatus_updater.write_statusapi_file()
 
     # returns 0 on success, 1 on error
     def save_command(self, commands, outfile):
@@ -665,6 +671,7 @@
             item.start()
             self.report.update_index_html_and_json()
             self.statushtml.update_status_file()
+            self.runstatus_updater.write_statusapi_file()
 
             self.dumpjobdata.do_before_job(self.dump_item_list.dump_items)
 
@@ -687,7 +694,7 @@
                     item.set_status("failed")
 
         if item.status() == "done":
-            self.dumpjobdata.do_after_job(item)
+            self.dumpjobdata.do_after_job(item, self.dump_item_list.dump_items)
         elif item.status() == "waiting" or item.status() == "skipped":
             # don't update the checksum files for this item.
             pass
@@ -780,6 +787,7 @@
             # All jobs are either in status "done", "waiting", "failed", 
"skipped"
             self.report.update_index_html_and_json("done")
             self.statushtml.update_status_file("done")
+            self.runstatus_updater.write_statusapi_file()
         else:
             # This may happen if we start a dump now and abort before all 
items are
             # done. Then some are left for example in state "waiting". When
@@ -787,6 +795,7 @@
             # previously in "waiting" are still in status "waiting"
             self.report.update_index_html_and_json("partialdone")
             self.statushtml.update_status_file("partialdone")
+            self.runstatus_updater.write_statusapi_file()
 
         self.dumpjobdata.do_after_dump(self.dump_item_list.dump_items)
 
diff --git a/xmldumps-backup/dumps/runnerutils.py 
b/xmldumps-backup/dumps/runnerutils.py
index e80372f..e1880ce 100644
--- a/xmldumps-backup/dumps/runnerutils.py
+++ b/xmldumps-backup/dumps/runnerutils.py
@@ -885,7 +885,7 @@
         self.runinfofile.save_dump_runinfo_file(
             RunInfoFile.report_dump_runinfo(dump_items))
 
-    def do_after_job(self, item):
+    def do_after_job(self, item, dump_items):
         self.checksummer.cp_chksum_tmpfiles_to_permfile()
         # this will include checkpoint files if they are enabled.
         for file_obj in item.list_outfiles_to_publish(self.dump_dir):
@@ -898,6 +898,8 @@
                 self.checksummer.checksums(file_obj, self)
                 self.symlinks.cleanup_symlinks()
                 self.feeds.cleanup_feeds()
+        self.runinfofile.save_dump_runinfo_file(
+            RunInfoFile.report_dump_runinfo(dump_items))
 
     def do_latest_job(self):
         self.symlinks.remove_symlinks_from_old_runs(self.wiki.date)
diff --git a/xmldumps-backup/dumps/runstatusapi.py 
b/xmldumps-backup/dumps/runstatusapi.py
new file mode 100644
index 0000000..a82c3b6
--- /dev/null
+++ b/xmldumps-backup/dumps/runstatusapi.py
@@ -0,0 +1,165 @@
+"""
+classes and methods for writing out file
+with information about the current dump run in
+json format, for downloaders' use
+"""
+
+
+import os
+import sys
+import traceback
+import json
+from dumps.runnerutils import Checksummer
+from dumps.runnerutils import RunInfoFile
+from dumps.runnerutils import Report
+from dumps.fileutils import DumpFilename
+from dumps.fileutils import DumpDir
+
+
+class StatusAPI(object):
+    """
+    write specific run status information
+    to a single file, or update portions
+    of an existing file
+    """
+
+    NAME = "statusapi"
+    FILENAME = "dumpstatus"
+
+    # might add more someday, but not today
+    known_formats = ["json"]
+
+    def __init__(self, wiki, enabled, fileformat="json", error_callback=None, 
verbose=False):
+        self.wiki = wiki
+        self._enabled = enabled
+        self.fileformat = fileformat
+        self.filepath = self.get_output_filepath()
+        if not self.filepath:
+            # here we should log something. FIXME
+            pass
+        self.error_callback = error_callback
+        self.verbose = verbose
+
+    def get_output_filepath(self):
+        if not self.check_format():
+            return None
+        return os.path.join(self.wiki.public_dir(),
+                            self.wiki.date, StatusAPI.FILENAME + "." + 
self.fileformat)
+
+    def check_format(self):
+        return bool(self.fileformat in StatusAPI.known_formats)
+
+    def get_dumprun_info(self):
+        dumpruninfo_path = os.path.join(self.wiki.public_dir(),
+                                        self.wiki.date, "dumpruninfo.json")
+        return self.get_json_file_contents(dumpruninfo_path)
+
+    def get_filehash_info(self):
+        """
+        return a list of jsonified contents of hash files, one entry per hash 
type,
+        each entry containing the hashes and names of all files produced
+        """
+        contents = []
+        for hashtype in Checksummer.HASHTYPES:
+            dumpfile = DumpFilename(
+                self.wiki, None, 
Checksummer.get_checksum_filename_basename(hashtype, "json"))
+            dump_dir = DumpDir(self.wiki, self.wiki.db_name)
+
+            basefilename = dump_dir.filename_public_path(dumpfile)
+            path = os.path.join(self.wiki.public_dir(), self.wiki.date, 
basefilename)
+
+            contents.append(self.get_json_file_contents(path))
+        return [item for item in contents if item is not None]
+
+    def get_report_info(self):
+        reportinfo_path = os.path.join(self.wiki.public_dir(),
+                                       self.wiki.date, "report.json")
+        return self.get_json_file_contents(reportinfo_path)
+
+    def get_json_file_contents(self, path):
+        try:
+            fullpath = os.path.join(self.wiki.public_dir(),
+                                    self.wiki.date, path)
+            with open(fullpath, "r") as settings_fd:
+                contents = settings_fd.read()
+                if not contents:
+                    return None
+                return json.loads(contents)
+        except IOError:
+            # may not exist, we don't care
+            return None
+
+    def complete_fileinfo(self, jobname, reportinfo, hashinfo_list):
+        """
+        given a list of dicts with info about each file,
+        grab the related hash info out of the hashinfo dicts for
+        each file and add it to that file's dict in the list
+        then return the modified dict
+        """
+        filenames = Report.get_filenames_for_job(jobname, reportinfo)
+        for filename in filenames:
+            for hashinfo in hashinfo_list:
+                hashtypes_sums = Checksummer.get_hashinfo(filename, hashinfo)
+                for (hashtype, checksum) in hashtypes_sums:
+                    Report.add_file_property(jobname, filename, hashtype, 
checksum, reportinfo)
+
+    def combine_status_sources(self, dumpruninfo, filehashinfo, reportinfo):
+        """
+        given the json dumpruninfo, the json report info, and the json file 
hash infos,
+        convert them all into one structure for writing later as json to a
+        status output file
+        """
+        # list of filecontents passed in for filehashinfo
+        # because there is more than one hash type
+        if filehashinfo is None:
+            filehashinfo = [Checksummer.get_empty_json()]
+        if dumpruninfo is None:
+            dumpruninfo = RunInfoFile.get_empty_json()
+        if reportinfo is not None:
+            dumpruninfo_jobs = RunInfoFile.get_jobs(dumpruninfo)
+            for jobname in Report.get_jobs(reportinfo):
+                if jobname in dumpruninfo_jobs:
+                    # fold hash info into the report
+                    self.complete_fileinfo(jobname, reportinfo, filehashinfo)
+                    # fold the report info (file info for all job-related 
files)
+                    # into the dumpruninfo
+                    fileinfo = Report.get_fileinfo_for_job(jobname, reportinfo)
+                    RunInfoFile.add_job_property(jobname, "files", fileinfo, 
dumpruninfo)
+        return dumpruninfo
+
+    def write_contents_json(self, contents):
+        if not self.filepath:
+            return
+        with open(self.filepath, "w+") as fdesc:
+            fdesc.write(json.dumps(contents) + "\n")
+
+    def write_contents(self, contents):
+        if not self.check_format():
+            return None
+        if self.fileformat == "json":
+            return self.write_contents_json(contents)
+
+    def write_statusapi_file(self):
+        if StatusAPI.NAME not in self._enabled:
+            return
+
+        try:
+            # we have three sources of information that need to
+            # be read and combined into the json file usable by
+            # dump status requesters
+            dumprun_info = self.get_dumprun_info()
+            filehash_info = self.get_filehash_info()
+            report_info = self.get_report_info()
+
+            contents = self.combine_status_sources(dumprun_info, 
filehash_info, report_info)
+            self.write_contents(contents)
+        except Exception as ex:
+            if self.verbose:
+                exc_type, exc_value, exc_traceback = sys.exc_info()
+                sys.stderr.write(repr(traceback.format_exception(exc_type, 
exc_value,
+                                                                 
exc_traceback)))
+            message = "Couldn't update status files. Continuing anyways"
+            if self.error_callback:
+                self.error_callback(message)
+            else:
+                sys.stderr.write("%s\n" % message)

-- 
To view, visit https://gerrit.wikimedia.org/r/342310
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I9d8ca1115559d9e4deec45fb649d8a472a7ffa19
Gerrit-PatchSet: 3
Gerrit-Project: operations/dumps
Gerrit-Branch: master
Gerrit-Owner: ArielGlenn <[email protected]>
Gerrit-Reviewer: ArielGlenn <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

[MediaWiki-commits] [Gerrit] operations/dumps[master]: use the various json outputs to write a combined file for st...

Reply via email to