Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv15500/spambayes
Modified Files:
Stats.py
Log Message:
Include the cost figure (and a 'savings' figure, which is sure to be popular
and meaningless
<wink>) in the stats.
Index: Stats.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/Stats.py,v
retrieving revision 1.11
retrieving revision 1.12
diff -C2 -d -r1.11 -r1.12
*** Stats.py 21 Dec 2004 21:41:49 -0000 1.11
--- Stats.py 21 Dec 2004 23:19:41 -0000 1.12
***************
*** 53,57 ****
class Stats(object):
def __init__(self, spam_threshold, unsure_threshold, messageinfo_db,
! ham_string, unsure_string, spam_string):
self.messageinfo_db = messageinfo_db
self.spam_threshold = spam_threshold
--- 53,58 ----
class Stats(object):
def __init__(self, spam_threshold, unsure_threshold, messageinfo_db,
! ham_string, unsure_string, spam_string, fp_cost, fn_cost,
! unsure_cost):
self.messageinfo_db = messageinfo_db
self.spam_threshold = spam_threshold
***************
*** 60,63 ****
--- 61,67 ----
self.unsure_string = unsure_string
self.spam_string = spam_string
+ self.fp_cost = fp_cost
+ self.fn_cost = fn_cost
+ self.unsure_cost = unsure_cost
# Reset session stats.
self.Reset()
***************
*** 229,232 ****
--- 233,245 ----
data["num_unsure_trained_spam"]) / \
data["total_spam"]
+
+ data["total_cost"] = data["num_trained_ham_fp"] * self.fp_cost + \
+ data["num_trained_spam_fn"] * self.fn_cost + \
+ data["num_unsure"] * self.unsure_cost
+ # If there was no filtering done, what would the cost have been?
+ # (Assuming that any spam in the inbox earns the cost of a fn)
+ no_filter_cost = data["num_spam"] * self.fn_cost
+ data["cost_savings"] = no_filter_cost - data["total_cost"]
+
return data
***************
*** 345,348 ****
--- 358,366 ----
push((_("Good incorrectly
identified:%(tab)s%(perc_ham_incorrect_s)s (+ %(perc_ham_unsure_s)s unsure)") \
% format_dict) % format_dict)
+ if format_dict["total_spam"] or format_dict["total_ham"]:
+ push("")
+
+ push(_("Total cost of spam:%(tab)s$%(total_cost).2f") % format_dict)
+ push(_("SpamBayes savings:%(tab)s$%(cost_savings).2f") % format_dict)
return chunks
_______________________________________________
Spambayes-checkins mailing list
[EMAIL PROTECTED]
http://mail.python.org/mailman/listinfo/spambayes-checkins