The problem that I see is that with multiple threads all attempting to "BEGIN EXCLUSIVE", they will occasionally *all* fail, calling the busy handler repeatedly until it finally returns SQL_BUSY. Let me re-state for clarity's sake: 10 threads all try "BEGIN EXCLUSIVE" at the same time. One succeeds, processes, and COMMITs. The other 9 will sometimes repeatedly call the busy handler over and over until they fail with SQLITE_BUSY, even though the database *should be* available to start a new exclusive transaction.
I have tried to eliminate as many variables as I could in my testing and in the sample program:
* No SQL queries other than BEGIN and COMMIT are being used on an empty database.
* No threads are accessing the database in any place other than the pthread_cleanup handler.
* I have tested on Fedora Core 1,2,3, SUSE and Gentoo, and with 32 and 64 bit systems.
The remaining variables I am working with are system load and CPU count:
* On a hyperthreaded (2 virtual CPUs) computer it fails at least 50% of the time when unloaded and < 5% with the load around 1, and then climbs up again as the load increases.
* On a multi-CPU computer it fails about 10% of the time when unloaded and < 1% when both CPUs are otherwise occupied.
* I have occasional access to a single-CPU Pentium 3 where I have not been able to duplicate the problem. This pc is running FC2 just like our chief offender the hyperthreaded PC who fails 50% of the time when unloaded.
In order to demonstrate this problem I have attached a program called sqlthreads.c. It builds under Linux with:
gcc -Wall -g sqlthreads.c -o sqlthreads -lpthread -lsqlite3.
sqlthreads takes two arguments: the number of threads to spawn, and the type of busy handler to use. 0 is the standard "sqlite3_busy_timeout" handler with a 20 second timeout. 1 is a custom sqlite3_busy_handler with a fixed sleep time. -1 is a similar sqlite3_busy_handler with a random sleep time. When run, the test launches N threads, waits a second, then cancels all of them. At this point, each thread will sqlite3_exec a BEGIN EXCLUSIVE. The one that succeeds will sleep for 1/2 second and then call COMMIT, at which time another thread *should* be free to begin. If at the end you see "It appears as though at least x threads failed to obtain an exclusive lock on the database!" then you have experienced what I consider to be the problem.
If I could get any independant corroboration of this problem, that would be fantastic. If you skimmed through my long email, it appears most likely to occur on a hyperthreaded PC, aka Pentium 4 2.8 ghz or higher.
-Eli
/* * sqlthreads.c -- sqlite3 locking / thread concurrency tester */
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
#include <signal.h>
#include <sqlite3.h>
// global variables
int g_failed = 0;
int g_threadCnt = 0;
pthread_t g_pool[256] = {0};
pthread_key_t g_pkeyDb = 0;
pthread_mutex_t g_logMutex = PTHREAD_MUTEX_INITIALIZER;
//
// log output wrapper, adds timestamp and thread id
//
void sqlog(char * pattern, ...)
{
va_list varArgs;
char timeBuf[16];
time_t t = time(0);
struct tm tmStruct;
// determine the current time
localtime_r(&t, &tmStruct);
strftime(timeBuf, sizeof(timeBuf), "%H:%M:%S", &tmStruct);
// init the variable argument processor
va_start(varArgs, pattern);
// obtain a mutex lock and write out the log entry
pthread_mutex_lock(&g_logMutex);
fprintf(stderr, "%s [%u] ", timeBuf, (unsigned int)pthread_self());
vfprintf(stderr, pattern, varArgs);
pthread_mutex_unlock(&g_logMutex);
// clean up and exit
va_end(varArgs);
}
//
// sqlite3 alternative busy handler
//
int busyHandler(void *pArg1, int iPriorCalls)
{
sqlog("!! dbBusyHandler %d\n", iPriorCalls);
// sleep if handler has been called less than threshold value
if (iPriorCalls < 20)
{
// adding a random value here greatly reduces locking
if (pArg1 < 0)
usleep((rand() % 500000) + 400000);
else usleep(500000);
return 1;
}
// have sqlite3_exec immediately return SQLITE_BUSY
return 0;
}
//
// each thread calls this when it is canceled
//
void destroySingleThread(void *ptr)
{
int rc;
sqlite3 * pDb;
// get our db handle from thread local storage
pDb = (sqlite3 *) pthread_getspecific(g_pkeyDb);
// we want to get EXCLUSIVE access to the DB
rc = sqlite3_exec(pDb, "BEGIN EXCLUSIVE", NULL, NULL, NULL);
if (rc != SQLITE_OK)
{
sqlog("destroySingleThread: Exclusive Transaction failed! %s\n",
sqlite3_errmsg(pDb));
g_failed++;
}
else
{
// sleep for a bit to simulate some SQL queries
usleep(500000);
sqlog("destroySingleThread: thread %u terminated!!\n", pthread_self());
// commit the transaction, which should free it up for other
threads
rc = sqlite3_exec(pDb, "COMMIT", NULL, NULL, NULL);
if (rc != SQLITE_OK)
{
sqlog("destroySingleThread: Commit failed! %s\n",
sqlite3_errmsg(pDb));
}
}
sqlite3_close(pDb);
return;
}
//
// each thread calls this to open the database and then spin
//
void * startSingleThread(void * sleepMode)
{
sqlite3 * pDb = NULL;
char * szDb = "./sqlthreadsDb";
sqlog("startSingleThread: new thread %u launched\n", pthread_self());
// open handle to the database and save it in thread local storage
if (SQLITE_OK != sqlite3_open(szDb, &pDb))
{
sqlog("startSingleThread: Failed to open database %s: %s\n",
szDb, sqlite3_errmsg(pDb));
return NULL;
}
// install a timeout or busy handler to be called if database is locked
if (* (int *)sleepMode == 0)
sqlite3_busy_timeout(pDb, 20000);
else sqlite3_busy_handler(pDb, busyHandler, sleepMode);
// put db handle in thread local storage so thread destructor can close
it
if (0 != pthread_setspecific(g_pkeyDb, pDb))
{
sqlog("startSingleThread: pthread_setspecific failed to save
pDb\n");
return NULL;
}
// push the node destructor onto the thread's cleanup stack
pthread_cleanup_push(destroySingleThread, 0);
// sit in a loop, doing nothing much
while (1)
{
pthread_testcancel();
usleep(100000);
}
// (never gets here, but...) remove the cleanup handler from the stack
pthread_cleanup_pop(1);
return NULL;
}
//
// launch all threads
//
int startThreads(int threadCnt, void * sleepMode)
{
int rc, idx;
pthread_attr_t attr;
// create a pthread_attr object for our pthreads
pthread_attr_init(&attr);
pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
// spawn the threads
for (idx = 0; idx < threadCnt; idx++)
{
rc = pthread_create(&g_pool[idx], &attr, startSingleThread,
sleepMode);
if (rc)
{
sqlog("startThreads: Failed to start thread %d: %d\n",
idx, rc);
break;
}
g_threadCnt = idx+1;
}
// destroy the pthread_attr object we used to create our pthreads
pthread_attr_destroy(&attr);
return 0;
}
//
// start the main server process
//
int main(int argc, char **argv)
{
int idx, threadCnt, sleepMode;
// parse cmd line args
if (argc != 3)
{
printf("\nUsage: %s [thread count] [sleep mode]\n", argv[0]);
printf( "\tSleep Mode is one of:\n"
"\t\t 0 - Standard sqlite3_busy_timeout
handler\n"
"\t\t-1 - sqlite3_busy_handler with random
usleep time\n"
"\t\t 1 - sqlite3_busy_handler with fixed
usleep time\n");
return 0;
}
// get the thread count and sleep mode
threadCnt = atoi(argv[1]);
sleepMode = atoi(argv[2]);
if (threadCnt < 1 || threadCnt > 50)
{
printf("\nBe reasonable! Thread count should be between 1 and
50\n\n");
return 0;
}
// print out a banner
sqlog("sqlite3 thread test starting...\n");
if (sleepMode == 0)
sqlog("Using Sqlite3 standard sqlite3_busy_timeout handler\n");
else if (sleepMode == 1)
sqlog("Using custom sqlite3_busy_handler /w fixed usleep\n");
else if (sleepMode == -1)
sqlog("Using custom sqlite3_busy_handler /w random usleep\n");
else
{
sqlog("\nInvalid sleep mode '%s' specified!\n\n", argv[2]);
exit(0);
}
// create a thread local storage key and start up our threads
pthread_key_create(&g_pkeyDb, NULL);
startThreads(threadCnt, &sleepMode);
// loop through the thread array and wait on each
sqlog("main: now sleep for 1 sec and then cancel all threads\n");
sleep(1);
sqlog("\nNote that if you are testing sqlite3_timeout_handler (0)"
"\nit may not produce output for up to 20 seconds\n");
// loop and cancel threads
for (idx = 0; idx < g_threadCnt; idx++)
{
sqlog("main: Sending cancel to thread %u\n", g_pool[idx]);
pthread_cancel(g_pool[idx]);
}
sqlog("main: pthread_cancel sent to all threads!\n");
// loop and wait for all threads to exit
for (idx = 0; idx < g_threadCnt; idx++)
{
if (g_pool[idx])
pthread_join(g_pool[idx], NULL);
}
sqlog("Concluding the sqlite3 thread test...\n");
if (g_failed)
sqlog("It appears as though at least %d threads failed to
obtain an "
"exclusive lock on the database!\n", g_failed);
else sqlog("Success! Now try again! :-)\n");
// delete thread local storage and exit
pthread_key_delete(g_pkeyDb);
pthread_exit(0);
return 0;
}

