<http://mailing-list.nuclearelephant.com/5003.html>
But more importantly (and more generally), the fcntl locking used by
the hash drivers is designed to assure per-process control over a
given file. For a multi-threaded daemon, the locking is going to have
*no* internal effect ...
That sucks, since as you noted later in the thread, the docs say the
hash driver is thread safe. I guess this is what is causing my
hash corruption. I wish I had found this thread BEFORE I ran into
problems.
The attached patch implements a thread-safe per-user lock for the hash
driver. After minimal testing, it seems to be ok.
It has a failure case that if the pid is not written correctly into the
lock file (fs corruption or disk full or whatever) and dspam crashes and
leaves the lock behind, future instances of dspam will not reclaim the
lock. Maybe something as simple as checking the return value of fclose()
would help. I didn't bother as it seems that under the same conditions
the hash driver itself isn't robust enough against hash corruption to
worry about lock files.
It has another rarer failure case that if a lock is written correctly
but dspam crashes and leaves the lock behind, and when or by the time
a new dspam tries another delivery for that user, there is another process
running with the pid of the previous dspam (the pid written into the lock
file), dspam will not reclaim the lock as long as that other process is
running. This should be exceedingly rare. A good fix might be for dspam
to iterate over and remove all possible locks when starting up. Assuming
dspam protects itself against multiple invocations, at startup it is
guaranteed that any existing locks are invalid.
There should also probably be a way to choose between the previous fcntl
locks and the new dot-lock style lock (file existence lock). So that
folks that run invoke a dspam per delivery, where fcntl is sufficient,
are not subject to the type of failures that the patch introduces.
-frank
diff -urp dspam-3.6.8.orig/src/hash_drv.c dspam-3.6.8/src/hash_drv.c
--- dspam-3.6.8.orig/src/hash_drv.c 2006-05-30 08:04:00.000000000 -0700
+++ dspam-3.6.8/src/hash_drv.c 2007-01-12 23:40:59.270670772 -0800
@@ -242,46 +242,111 @@ dspam_shutdown_driver (DRIVER_CTX *DTX)
int
_hash_drv_lock_get (
DSPAM_CTX *CTX,
- struct _hash_drv_storage *s,
const char *username)
{
char filename[MAX_FILENAME_LENGTH];
- int r;
+ int fd, r, pid;
+ FILE *f; /* don't want to deal with partial read()/write() */
+ int tries = 0;
_ds_userdir_path(filename, CTX->home, username, "lock");
_ds_prepare_path_for(filename);
- s->lock = fopen(filename, "a");
- if (s->lock == NULL) {
+again:
+ if (tries++ > 20) {
LOG(LOG_ERR, ERR_IO_FILE_OPEN, filename, strerror(errno));
return EFAILURE;
}
- r = _ds_get_fcntl_lock(fileno(s->lock));
- if (r) {
- fclose(s->lock);
- LOG(LOG_ERR, ERR_IO_LOCK, filename, r, strerror(errno));
+
+ fd = open(filename, O_RDWR|O_CREAT|O_EXCL, 0600);
+ if (fd == -1) {
+ if (errno == EEXIST) {
+ /* lock is held */
+ fd = open(filename, O_RDONLY);
+ if (fd == -1) {
+ if (errno == ENOENT) {
+ /* lock went away */
+ goto again;
+ } else {
+ /* lock is held but we can't open the file to read pid */
+ LOG(LOG_ERR, ERR_IO_FILE_OPEN, filename, strerror(errno));
+ return EFAILURE;
+ }
+ }
+
+ /* lock is held, but we now have it open for read */
+ f = fdopen(fd, "r");
+ if (!f) {
+ close(fd);
+ LOG(LOG_ERR, ERR_IO_FILE_OPEN, filename, strerror(errno));
+ return EFAILURE;
+ }
+ r = fscanf(f, "%d\n", &pid);
+ fclose(f);
+ if (r != 1) {
+ /* lock file was not written by us */
+ LOG(LOG_ERR, ERR_IO_FILE_OPEN, filename, strerror(errno));
+ return EFAILURE;
+ }
+
+ /* we read the pid from the lock file */
+ if (pid == getpid()) {
+ /* another thread holds the lock, try again after 100ms delay */
+ usleep(100000);
+ goto again;
+ }
+ /* some other process holds this lock */
+ if (!kill(pid, 0)) {
+ /* lock holder is still running, try again after 100ms delay */
+ usleep(100000);
+ goto again;
+ } else {
+ if (errno == ESRCH) {
+ /* lock holder is not running, go ahead and try to grab lock */
+ /* note: we might have raced to get here */
+ unlink(filename);
+ goto again;
+ } else {
+ /* lock holder is running, but not under our uid, just give up */
+ LOG(LOG_ERR, ERR_IO_FILE_OPEN, filename, strerror(errno));
+ return EFAILURE;
+ }
+ }
+
+ } else {
+ /* couldn't open lock file, and not because it already exists */
+ LOG(LOG_ERR, ERR_IO_FILE_OPEN, filename, strerror(errno));
+ return EFAILURE;
+ }
}
- return r;
+
+ /* we got the lock, save pid */
+ f = fdopen(fd, "w");
+ if (!f) {
+ close(fd);
+ LOG(LOG_ERR, ERR_IO_FILE_OPEN, filename, strerror(errno));
+ return EFAILURE;
+ }
+ fprintf(f, "%d\n", (int) getpid());
+ fclose(f);
+
+ return 0;
}
int
_hash_drv_lock_free (
- struct _hash_drv_storage *s,
+ DSPAM_CTX *CTX,
const char *username)
{
- int r;
+ char filename[MAX_FILENAME_LENGTH];
if (username == NULL)
return 0;
- r = _ds_free_fcntl_lock(fileno(s->lock));
- if (!r) {
- fclose(s->lock);
- } else {
- LOG(LOG_ERR, ERR_IO_LOCK_FREE, username, r, strerror(errno));
- }
+ _ds_userdir_path(filename, CTX->home, username, "lock");
+ unlink(filename);
- return r;
+ return 0;
}
int _hash_drv_open(
@@ -469,7 +534,7 @@ _ds_init_storage (DSPAM_CTX * CTX, void
else
_ds_userdir_path(db, CTX->home, CTX->group, "css");
- lock_result = _hash_drv_lock_get (CTX, s,
+ lock_result = _hash_drv_lock_get (CTX,
(CTX->group) ? CTX->group : CTX->username);
if (lock_result < 0)
goto BAIL;
@@ -533,7 +598,7 @@ _ds_shutdown_storage (DSPAM_CTX * CTX)
_hash_drv_close(s->map);
free(s->map);
lock_result =
- _hash_drv_lock_free (s, (CTX->group) ? CTX->group : CTX->username);
+ _hash_drv_lock_free (CTX, (CTX->group) ? CTX->group : CTX->username);
if (lock_result < 0)
return EUNKNOWN;
}
diff -urp dspam-3.6.8.orig/src/hash_drv.h dspam-3.6.8/src/hash_drv.h
--- dspam-3.6.8.orig/src/hash_drv.h 2006-05-13 05:17:30.000000000 -0700
+++ dspam-3.6.8/src/hash_drv.h 2007-01-12 23:41:08.056282528 -0800
@@ -58,7 +58,6 @@ typedef struct _hash_drv_map
struct _hash_drv_storage
{
hash_drv_map_t map;
- FILE *lock;
int dbh_attached;
unsigned long offset_nexttoken;
@@ -87,11 +86,10 @@ int _hash_drv_set_spamtotals
int _hash_drv_lock_get (
DSPAM_CTX * CTX,
- struct _hash_drv_storage *s,
const char *username);
int _hash_drv_lock_free (
- struct _hash_drv_storage *s,
+ DSPAM_CTX * CTX,
const char *username);
int _hash_drv_open(