Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv11482/spambayes
Modified Files:
storage.py
Log Message:
Improvements to ZODB and ZEO storage classes:
* don't try storing to a read-only database;
* make packing on close optional;
* don't pack after closing;
* optionally remove the pack backup file;
* handle username, password, storage name, wait, and wait timeout for ZEO;
* use persistent caches for ZEO.
Index: storage.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/storage.py,v
retrieving revision 1.60
retrieving revision 1.61
diff -C2 -d -r1.60 -r1.61
*** storage.py 22 Apr 2006 04:41:54 -0000 1.60
--- storage.py 27 Mar 2007 10:57:53 -0000 1.61
***************
*** 67,70 ****
--- 67,71 ----
import time
import types
+ import tempfile
from spambayes import classifier
from spambayes.Options import options, get_pathname_option
***************
*** 676,679 ****
--- 677,681 ----
except ImportError:
Persistent = object
+
class _PersistentClassifier(classifier.Classifier, Persistent):
def __init__(self):
***************
*** 759,762 ****
--- 761,765 ----
abort = ZODB.Transaction.get_transaction().abort
from ZODB.POSException import ConflictError
+ from ZODB.POSException import ReadOnlyError
from ZODB.POSException import TransactionFailedError
***************
*** 781,786 ****
self.db_name
abort()
! def close(self):
# Ensure that the db is saved before closing. Alternatively, we
# could abort any waiting transaction. We need to do *something*
--- 784,792 ----
self.db_name
abort()
+ except ReadOnlyError:
+ print >> sys.stderr, "Can't store transaction to read-only db."
+ abort()
! def close(self, pack=True, retain_backup=True):
# Ensure that the db is saved before closing. Alternatively, we
# could abort any waiting transaction. We need to do *something*
***************
*** 791,805 ****
self.store()
- # Do the closing.
- self.DB.close()
-
# We don't make any use of the 'undo' capabilities of the
# FileStorage at the moment, so might as well pack the database
# each time it is closed, to save as much disk space as possible.
# Pack it up to where it was 'yesterday'.
! # XXX What is the 'referencesf' parameter for pack()? It doesn't
! # XXX seem to do anything according to the source.
! if self.mode != 'r' and hasattr(self.storage, "pack"):
! self.storage.pack(time.time()-60*60*24, None)
self.storage.close()
--- 797,809 ----
self.store()
# We don't make any use of the 'undo' capabilities of the
# FileStorage at the moment, so might as well pack the database
# each time it is closed, to save as much disk space as possible.
# Pack it up to where it was 'yesterday'.
! if pack and self.mode != 'r':
! self.pack(time.time()-60*60*24, retain_backup)
!
! # Do the closing.
! self.DB.close()
self.storage.close()
***************
*** 811,814 ****
--- 815,831 ----
print >> sys.stderr, 'Closed', self.db_name, 'database'
+ def pack(self, t, retain_backup=True):
+ """Like FileStorage pack(), but optionally remove the .old
+ backup file that is created. Often for our purposes we do
+ not care about being able to recover from this. Also
+ ignore the referencesf parameter, which appears to not do
+ anything."""
+ if hasattr(self.storage, "pack"):
+ self.storage.pack(t, None)
+ if not retain_backup:
+ old_name = self.db_filename + ".old"
+ if os.path.exists(old_name):
+ os.remove(old_name)
+
class ZEOClassifier(ZODBClassifier):
***************
*** 818,828 ****
self.port = None
db_name = "SpamBayes"
for info in source_info:
if info.startswith("host"):
! self.host = info[5:]
elif info.startswith("port"):
self.port = int(info[5:])
elif info.startswith("dbname"):
db_name = info[7:]
ZODBClassifier.__init__(self, db_name)
--- 835,865 ----
self.port = None
db_name = "SpamBayes"
+ self.username = ''
+ self.password = ''
+ self.storage_name = '1'
+ self.wait = None
+ self.wait_timeout = None
for info in source_info:
if info.startswith("host"):
! try:
! # ZEO only accepts strings, not unicode.
! self.host = str(info[5:])
! except UnicodeDecodeError, e:
! print >> sys.stderr, "Couldn't set host", \
! info[5:], str(e)
elif info.startswith("port"):
self.port = int(info[5:])
elif info.startswith("dbname"):
db_name = info[7:]
+ elif info.startswith("user"):
+ self.username = info[5:]
+ elif info.startswith("pass"):
+ self.password = info[5:]
+ elif info.startswith("storage_name"):
+ self.storage_name = info[13:]
+ elif info.startswith("wait_timeout"):
+ self.wait_timeout = int(info[13:])
+ elif info.startswith("wait"):
+ self.wait = info[5:] == "True"
ZODBClassifier.__init__(self, db_name)
***************
*** 833,837 ****
else:
addr = self.host
! self.storage = ClientStorage(addr)
--- 870,908 ----
else:
addr = self.host
! if options["globals", "verbose"]:
! print >> sys.stderr, "Connecting to ZEO server", addr, \
! self.username, self.password
! # Use persistent caches, with the cache in the temp directory.
! # If the temp directory is cleared out, we lose the cache, but
! # that doesn't really matter, and we should always be able to
! # write to it.
! try:
! self.storage = ClientStorage(addr, name=self.db_name,
! read_only=self.mode=='r',
! username=self.username,
! client=self.db_name,
! wait=self.wait,
! wait_timeout=self.wait_timeout,
! storage=self.storage_name,
! var=tempfile.gettempdir(),
! password=self.password)
! except ValueError:
! # Probably bad cache; remove it and try without the cache.
! try:
! os.remove(os.path.join(tempfile.gettempdir(),
! self.db_name + \
! self.storage_name + ".zec"))
! except OSError:
! pass
! self.storage = ClientStorage(addr, name=self.db_name,
! read_only=self.mode=='r',
! username=self.username,
! wait=self.wait,
! wait_timeout=self.wait_timeout,
! storage=self.storage_name,
! password=self.password)
!
! def is_connected(self):
! return self.storage.is_connected()
***************
*** 840,849 ****
NO_TRAINING_FLAG = 1
! class Trainer:
'''Associates a Classifier object and one or more Corpora, \
is an observer of the corpora'''
def __init__(self, bayes, is_spam, updateprobs=NO_UPDATEPROBS):
! '''Constructor(Classifier, is_spam(True|False),
updprobs(True|False)'''
self.bayes = bayes
--- 911,921 ----
NO_TRAINING_FLAG = 1
! class Trainer(object):
'''Associates a Classifier object and one or more Corpora, \
is an observer of the corpora'''
def __init__(self, bayes, is_spam, updateprobs=NO_UPDATEPROBS):
! '''Constructor(Classifier, is_spam(True|False),
! updateprobs(True|False)'''
self.bayes = bayes
***************
*** 860,867 ****
if options["globals", "verbose"]:
! print >> sys.stderr, 'training with',message.key()
self.bayes.learn(message.tokenize(), self.is_spam)
- # self.updateprobs)
message.setId(message.key())
message.RememberTrained(self.is_spam)
--- 932,938 ----
if options["globals", "verbose"]:
! print >> sys.stderr, 'training with ', message.key()
self.bayes.learn(message.tokenize(), self.is_spam)
message.setId(message.key())
message.RememberTrained(self.is_spam)
_______________________________________________
Spambayes-checkins mailing list
[email protected]
http://mail.python.org/mailman/listinfo/spambayes-checkins