Author: sebb
Date: Sun Jun 22 15:59:35 2025
New Revision: 1926645

URL: http://svn.apache.org/viewvc?rev=1926645&view=rev
Log:
Only update main file if it is complete

Otherwise stats may be inconsistent for a project

Modified:
    comdev/reporter.apache.org/trunk/scripts/mailglomper2.py

Modified: comdev/reporter.apache.org/trunk/scripts/mailglomper2.py
URL: 
http://svn.apache.org/viewvc/comdev/reporter.apache.org/trunk/scripts/mailglomper2.py?rev=1926645&r1=1926644&r2=1926645&view=diff
==============================================================================
--- comdev/reporter.apache.org/trunk/scripts/mailglomper2.py (original)
+++ comdev/reporter.apache.org/trunk/scripts/mailglomper2.py Sun Jun 22 
15:59:35 2025
@@ -32,6 +32,8 @@ SECS_PER_WEEK = 604800
 __RAO_HOME = "../"
 
 __MAILDATA_EXTENDED = __RAO_HOME + "data/maildata_extended.json"
+__MAILDATA_PARTIAL  = __RAO_HOME + "data/maildata_partial.json"
+# TODO: could the partial data be used for a restart?
 
 def tsprint(s):  # print with timestamp
     msg = "%s %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s)
@@ -180,15 +182,20 @@ async def gather_stats():
         now = time.time()
         if now - lastCheckpoint > 120:  # checkpoint every 2 minutes
             lastCheckpoint = now
-            tsprint("Creating checkpoint of JSON files")
-            with open(__MAILDATA_EXTENDED, "w+", encoding='utf-8') as f:
+            tsprint("Creating checkpoint of JSON file")
+            with open(__MAILDATA_PARTIAL, "w+", encoding='utf-8') as f:
                 json.dump(mls, f, indent=1)  # sort_keys is expensive
 
-    tsprint("Completed scanning, writing JSON files (%s)" % str(interrupted))
-    with open(__MAILDATA_EXTENDED, "w+", encoding='utf-8') as f:
-        json.dump(mls, f, indent=1, sort_keys=True)
+    # If the run was interrupted, it's likely that data is incomplete
+    # for a mailing list and so not useful even as a checkpoint
+    if interrupted:
+        tsprint("Did not complete successfully, not writing any output")
+    else:
+        tsprint("Completed scanning, writing JSON file")
+        with open(__MAILDATA_EXTENDED, "w+", encoding='utf-8') as f:
+            json.dump(mls, f, indent=1, sort_keys=True)
 
-    tsprint("Dumped JSON files")
+        tsprint(f"Dumped JSON file")
     elapsed = time.time() - startTime
     tsprint("Completed in %d seconds" % elapsed)
 


Reply via email to