Update of /cvsroot/spambayes/spambayes/Outlook2000
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv8330

Modified Files:
        oastats.py 
Log Message:
More detailed statistics in SpamBayes Manager.  These roughly match the
updated statistics in the sb_server Web UI.


Index: oastats.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/oastats.py,v
retrieving revision 1.8
retrieving revision 1.9
diff -C2 -d -r1.8 -r1.9
*** oastats.py  2 Nov 2004 21:33:46 -0000       1.8
--- oastats.py  6 Dec 2004 17:50:04 -0000       1.9
***************
*** 85,88 ****
--- 85,90 ----
          points.
          """
+         chunks = []
+         push = chunks.append
          num_seen = self.num_ham + self.num_spam + self.num_unsure
          if not session_only:
***************
*** 90,97 ****
              num_seen += (totals["num_ham"] + totals["num_spam"] +
                           totals["num_unsure"])
          if num_seen==0:
!             return [_("SpamBayes has processed zero messages")]
!         chunks = []
!         push = chunks.append
          if session_only:
              num_ham = self.num_ham
--- 92,98 ----
              num_seen += (totals["num_ham"] + totals["num_spam"] +
                           totals["num_unsure"])
+         push(_("Messages classified: %d") % num_seen);
          if num_seen==0:
!             return chunks
          if session_only:
              num_ham = self.num_ham
***************
*** 117,120 ****
--- 118,160 ----
          perc_spam = 100.0 * num_spam / num_seen
          perc_unsure = 100.0 * num_unsure / num_seen
+         num_ham_correct = num_ham - num_deleted_spam_fn
+         num_spam_correct = num_spam - num_recovered_good_fp
+         num_correct = num_ham_correct + num_spam_correct
+         num_incorrect = num_deleted_spam_fn + num_recovered_good_fp
+         perc_correct = 100.0 * num_correct / num_seen
+         perc_incorrect = 100.0 * num_incorrect / num_seen
+         perc_fp = 100.0 * num_recovered_good_fp / num_seen
+         perc_fn = 100.0 * num_deleted_spam_fn / num_seen
+         num_unsure_trained_ham = num_recovered_good - num_recovered_good_fp
+         num_unsure_trained_spam = num_deleted_spam - num_deleted_spam_fn
+         num_unsure_not_trained = num_unsure - num_unsure_trained_ham - 
num_unsure_trained_spam
+         if num_unsure:
+             perc_unsure_trained_ham = 100.0 * num_unsure_trained_ham / 
num_unsure
+             perc_unsure_trained_spam = 100.0 * num_unsure_trained_spam / 
num_unsure
+             perc_unsure_not_trained = 100.0 * num_unsure_not_trained / 
num_unsure
+         else:
+             perc_unsure_trained_ham = 0.0
+             perc_unsure_trained_spam = 0.0
+             perc_unsure_not_trained = 0.0
+         total_ham = num_ham_correct + num_recovered_good
+         total_spam = num_spam_correct + num_deleted_spam
+         if total_ham:
+             perc_ham_incorrect = 100.0 * num_recovered_good_fp / total_ham
+             perc_ham_unsure = 100.0 * num_unsure_trained_ham / total_ham
+             perc_ham_incorrect_or_unsure = \
+                 100.0 * (num_recovered_good_fp + num_unsure_trained_ham) / 
total_ham
+         else:
+             perc_ham_incorrect = 0.0
+             perc_ham_unsure = 0.0
+             perc_ham_incorrect_or_unsure = 0.0
+         if total_spam:
+             perc_spam_correct = 100.0 * num_spam_correct / total_spam
+             perc_spam_unsure = 100.0 * num_unsure_trained_spam / total_spam
+             perc_spam_correct_or_unsure = \
+                 100.0 * (num_spam_correct + num_unsure_trained_spam) / 
total_spam
+         else:
+             perc_spam_correct = 100.0
+             perc_spam_unsure = 0.0
+             perc_spam_incorrect_or_unsure = 100.0
          format_dict = locals().copy()
          del format_dict["self"]
***************
*** 122,126 ****
          del format_dict["chunks"]
          format_dict.update(dict(perc_spam=perc_spam, perc_ham=perc_ham,
!                                 perc_unsure=perc_unsure, num_seen=num_seen))
          format_dict["perc_ham_s"] = "%%(perc_ham).%df%%(perc)s" \
                                      % (decimal_points,)
--- 162,181 ----
          del format_dict["chunks"]
          format_dict.update(dict(perc_spam=perc_spam, perc_ham=perc_ham,
!                                 perc_unsure=perc_unsure, num_seen=num_seen,
!                                 num_correct=num_correct, 
num_incorrect=num_incorrect,
!                                 perc_correct=perc_correct, 
perc_incorrect=perc_incorrect,
!                                 perc_fp=perc_fp, perc_fn=perc_fn,
!                                 num_unsure_trained_ham=num_unsure_trained_ham,
!                                 
num_unsure_trained_spam=num_unsure_trained_spam,
!                                 num_unsure_not_trained=num_unsure_not_trained,
!                                 
perc_unsure_trained_ham=perc_unsure_trained_ham,
!                                 
perc_unsure_trained_spam=perc_unsure_trained_spam,
!                                 
perc_unsure_not_trained=perc_unsure_not_trained,
!                                 perc_ham_incorrect=perc_ham_incorrect,
!                                 perc_ham_unsure=perc_ham_unsure,
!                                 
perc_ham_incorrect_or_unsure=perc_ham_incorrect_or_unsure,
!                                 perc_spam_correct=perc_spam_correct,
!                                 perc_spam_unsure=perc_spam_unsure,
!                                 
perc_spam_correct_or_unsure=perc_spam_correct_or_unsure))
          format_dict["perc_ham_s"] = "%%(perc_ham).%df%%(perc)s" \
                                      % (decimal_points,)
***************
*** 129,151 ****
          format_dict["perc_unsure_s"] = "%%(perc_unsure).%df%%(perc)s" \
                                         % (decimal_points,)
          format_dict["perc"] = "%"
!         push((_("SpamBayes has processed %(num_seen)d messages - " \
!              "%(num_ham)d (%(perc_ham_s)s) good, " \
!              "%(num_spam)d (%(perc_spam_s)s) spam " \
!              "and %(num_unsure)d (%(perc_unsure_s)s) unsure") \
               % format_dict) % format_dict)
  
-         if num_recovered_good:
-             push(_("%(num_recovered_good)d message(s) were manually " \
-                  "classified as good (with %(num_recovered_good_fp)d " \
-                  "being false positives)") % format_dict)
-         else:
-             push(_("No messages were manually classified as good"))
-         if num_deleted_spam:
-             push(_("%(num_deleted_spam)d message(s) were manually " \
-                  "classified as spam (with %(num_deleted_spam_fn)d " \
-                  "being false negatives)") % format_dict)
-         else:
-             push(_("No messages were manually classified as spam"))
          return chunks
  
--- 184,252 ----
          format_dict["perc_unsure_s"] = "%%(perc_unsure).%df%%(perc)s" \
                                         % (decimal_points,)
+         format_dict["perc_correct_s"] = "%%(perc_correct).%df%%(perc)s" \
+                                        % (decimal_points,)
+         format_dict["perc_incorrect_s"] = "%%(perc_incorrect).%df%%(perc)s" \
+                                        % (decimal_points,)
+         format_dict["perc_fp_s"] = "%%(perc_fp).%df%%(perc)s" \
+                                     % (decimal_points,)
+         format_dict["perc_fn_s"] = "%%(perc_fn).%df%%(perc)s" \
+                                     % (decimal_points,)
+         format_dict["perc_spam_correct_s"] = 
"%%(perc_spam_correct).%df%%(perc)s" \
+                                        % (decimal_points,)
+         format_dict["perc_spam_unsure_s"] = 
"%%(perc_spam_unsure).%df%%(perc)s" \
+                                        % (decimal_points,)
+         format_dict["perc_spam_correct_or_unsure_s"] = 
"%%(perc_spam_correct_or_unsure).%df%%(perc)s" \
+                                        % (decimal_points,)
+         format_dict["perc_ham_incorrect_s"] = 
"%%(perc_ham_incorrect).%df%%(perc)s" \
+                                        % (decimal_points,)
+         format_dict["perc_ham_unsure_s"] = "%%(perc_ham_unsure).%df%%(perc)s" 
\
+                                        % (decimal_points,)
+         format_dict["perc_ham_incorrect_or_unsure_s"] = 
"%%(perc_ham_incorrect_or_unsure).%df%%(perc)s" \
+                                        % (decimal_points,)
+         format_dict["perc_unsure_trained_ham_s"] = 
"%%(perc_unsure_trained_ham).%df%%(perc)s" \
+                                        % (decimal_points,)
+         format_dict["perc_unsure_trained_spam_s"] = 
"%%(perc_unsure_trained_spam).%df%%(perc)s" \
+                                        % (decimal_points,)
+         format_dict["perc_unsure_not_trained_s"] = 
"%%(perc_unsure_not_trained).%df%%(perc)s" \
+                                        % (decimal_points,)
          format_dict["perc"] = "%"
!         
!         push((_("\tGood:\t%(num_ham)d (%(perc_ham_s)s)") \
!              % format_dict) % format_dict)
!         push((_("\tSpam:\t%(num_spam)d (%(perc_spam_s)s)") \
!              % format_dict) % format_dict)
!         push((_("\tUnsure:\t%(num_unsure)d (%(perc_unsure_s)s)") \
!              % format_dict) % format_dict)
!         push("")
! 
!         push((_("Classified correctly:\t%(num_correct)d (%(perc_correct_s)s 
of total)") \
!              % format_dict) % format_dict)
!         push((_("Classified incorrectly:\t%(num_incorrect)d 
(%(perc_incorrect_s)s of total)") \
!              % format_dict) % format_dict)
!         if num_incorrect:
!             push((_("\tFalse positives:\t%(num_recovered_good_fp)d 
(%(perc_fp_s)s of total)") \
!                  % format_dict) % format_dict)
!             push((_("\tFalse negatives:\t%(num_deleted_spam_fn)d 
(%(perc_fn_s)s of total)") \
!                  % format_dict) % format_dict)
!         push("")
!         
!         push(_("Manually classified as good:\t%(num_recovered_good)d") % 
format_dict)
!         push(_("Manually classified as spam:\t%(num_deleted_spam)d") % 
format_dict)
!         push("")
! 
!         if num_unsure:
!             push((_("Unsures trained as good:\t%(num_unsure_trained_ham)d 
(%(perc_unsure_trained_ham_s)s of unsures)") \
!                  % format_dict) % format_dict)
!             push((_("Unsures trained as spam:\t%(num_unsure_trained_spam)d 
(%(perc_unsure_trained_spam_s)s of unsures)") \
!                  % format_dict) % format_dict)
!             push((_("Unsures not trained:\t\t%(num_unsure_not_trained)d 
(%(perc_unsure_not_trained_s)s of unsures)") \
!                  % format_dict) % format_dict)
!             push("")
! 
!         push((_("Spam correctly identified:\t%(perc_spam_correct_s)s (+ 
%(perc_spam_unsure_s)s unsure)") \
!              % format_dict) % format_dict)
!         push((_("Ham incorrectly identified:\t%(perc_ham_incorrect_s)s (+ 
%(perc_ham_unsure_s)s unsure)") \
               % format_dict) % format_dict)
  
          return chunks
  

_______________________________________________
Spambayes-checkins mailing list
[EMAIL PROTECTED]
http://mail.python.org/mailman/listinfo/spambayes-checkins

Reply via email to