Author: sebb
Date: Sun Jun 22 15:59:35 2025
New Revision: 1926645
URL: http://svn.apache.org/viewvc?rev=1926645&view=rev
Log:
Only update main file if it is complete
Otherwise stats may be inconsistent for a project
Modified:
comdev/reporter.apache.org/trunk/scripts/mailglomper2.py
Modified: comdev/reporter.apache.org/trunk/scripts/mailglomper2.py
URL:
http://svn.apache.org/viewvc/comdev/reporter.apache.org/trunk/scripts/mailglomper2.py?rev=1926645&r1=1926644&r2=1926645&view=diff
==============================================================================
--- comdev/reporter.apache.org/trunk/scripts/mailglomper2.py (original)
+++ comdev/reporter.apache.org/trunk/scripts/mailglomper2.py Sun Jun 22
15:59:35 2025
@@ -32,6 +32,8 @@ SECS_PER_WEEK = 604800
__RAO_HOME = "../"
__MAILDATA_EXTENDED = __RAO_HOME + "data/maildata_extended.json"
+__MAILDATA_PARTIAL = __RAO_HOME + "data/maildata_partial.json"
+# TODO: could the partial data be used for a restart?
def tsprint(s): # print with timestamp
msg = "%s %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s)
@@ -180,15 +182,20 @@ async def gather_stats():
now = time.time()
if now - lastCheckpoint > 120: # checkpoint every 2 minutes
lastCheckpoint = now
- tsprint("Creating checkpoint of JSON files")
- with open(__MAILDATA_EXTENDED, "w+", encoding='utf-8') as f:
+ tsprint("Creating checkpoint of JSON file")
+ with open(__MAILDATA_PARTIAL, "w+", encoding='utf-8') as f:
json.dump(mls, f, indent=1) # sort_keys is expensive
- tsprint("Completed scanning, writing JSON files (%s)" % str(interrupted))
- with open(__MAILDATA_EXTENDED, "w+", encoding='utf-8') as f:
- json.dump(mls, f, indent=1, sort_keys=True)
+ # If the run was interrupted, it's likely that data is incomplete
+ # for a mailing list and so not useful even as a checkpoint
+ if interrupted:
+ tsprint("Did not complete successfully, not writing any output")
+ else:
+ tsprint("Completed scanning, writing JSON file")
+ with open(__MAILDATA_EXTENDED, "w+", encoding='utf-8') as f:
+ json.dump(mls, f, indent=1, sort_keys=True)
- tsprint("Dumped JSON files")
+ tsprint(f"Dumped JSON file")
elapsed = time.time() - startTime
tsprint("Completed in %d seconds" % elapsed)