Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv15500/spambayes

Modified Files:
        Stats.py 
Log Message:
Include the cost figure (and a 'savings' figure, which is sure to be popular 
and meaningless
 <wink>) in the stats.

Index: Stats.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/Stats.py,v
retrieving revision 1.11
retrieving revision 1.12
diff -C2 -d -r1.11 -r1.12
*** Stats.py    21 Dec 2004 21:41:49 -0000      1.11
--- Stats.py    21 Dec 2004 23:19:41 -0000      1.12
***************
*** 53,57 ****
  class Stats(object):
      def __init__(self, spam_threshold, unsure_threshold, messageinfo_db,
!                  ham_string, unsure_string, spam_string):
          self.messageinfo_db = messageinfo_db
          self.spam_threshold = spam_threshold
--- 53,58 ----
  class Stats(object):
      def __init__(self, spam_threshold, unsure_threshold, messageinfo_db,
!                  ham_string, unsure_string, spam_string, fp_cost, fn_cost,
!                  unsure_cost):
          self.messageinfo_db = messageinfo_db
          self.spam_threshold = spam_threshold
***************
*** 60,63 ****
--- 61,67 ----
          self.unsure_string = unsure_string
          self.spam_string = spam_string
+         self.fp_cost = fp_cost
+         self.fn_cost = fn_cost
+         self.unsure_cost = unsure_cost
          # Reset session stats.
          self.Reset()
***************
*** 229,232 ****
--- 233,245 ----
                           data["num_unsure_trained_spam"]) / \
                           data["total_spam"]
+ 
+         data["total_cost"] = data["num_trained_ham_fp"] * self.fp_cost + \
+                              data["num_trained_spam_fn"] * self.fn_cost + \
+                              data["num_unsure"] * self.unsure_cost
+         # If there was no filtering done, what would the cost have been?
+         # (Assuming that any spam in the inbox earns the cost of a fn)
+         no_filter_cost = data["num_spam"] * self.fn_cost
+         data["cost_savings"] = no_filter_cost - data["total_cost"]
+ 
          return data
  
***************
*** 345,348 ****
--- 358,366 ----
              push((_("Good incorrectly 
identified:%(tab)s%(perc_ham_incorrect_s)s (+ %(perc_ham_unsure_s)s unsure)") \
                   % format_dict) % format_dict)
+         if format_dict["total_spam"] or format_dict["total_ham"]:
+             push("")
+ 
+         push(_("Total cost of spam:%(tab)s$%(total_cost).2f") % format_dict)
+         push(_("SpamBayes savings:%(tab)s$%(cost_savings).2f") % format_dict)
  
          return chunks

_______________________________________________
Spambayes-checkins mailing list
[EMAIL PROTECTED]
http://mail.python.org/mailman/listinfo/spambayes-checkins

Reply via email to