To answer the various responses I've received:

I have verified the lock contention problem by viewing mutex statistics. The
offending nsv bucket got 100 times more locks than any other, and under
heavy load was busy up to 48% of the time. Not good.

I also verified that the hashtable in question was the primary source of the
locks. It's a single nsv. It can be (and frequently is) used multiple times
per page. The suggestion from various people to split it up is a promising
possibility; I'll see if it's feasible for my data structure.

The structure is updated rarely enough that it would be acceptable to have
to rebuild the entire data structure any time a single element was changed.
The C module I had in mind would be called something like ns_const. It would
be similar to nsvs, except it would perform no locking; any change would
require the datastructure to be rebuilt from scratch.

Thanks very much to everyone for the responses.

Regards,
Sean


-----Original Message-----
From: Rob Mayoff [mailto:[EMAIL PROTECTED]]
Sent: Monday, September 10, 2001 7:09 PM
Subject: Re: nsv vs. ns_cache vs. ns_share


+---------- On Sep 10, Sean Owen said:
> The problem is, under heavy load, we get into serious lock contention
> problems reading from it.

Are you speculating or have you looked at the mutex statistics?

> 99.99% of the time we are just reading, so
> ideally I'd like to use something akin to a read/write lock, however, the
> overhead of an actual rwlock will probably only make matters worse.
>
> The table is far too large (~50,000 entries)to just load up every time an
> interp is initialized.

We need to know more about your usage pattern.  Each NSV is a hash
table.  Are you using a lot of NSVs or one NSV with 50,000 keys?  Do you
access the NSV once per request, or many times per request?

> Any ideas for me? Is there any way to create a shared read-only
> datastructure that doesn't use thread interlocking? Do I need to code my
> datastructure up in C?

Yes, you'd need to do it in C.

I'd do something like this, but I haven't tested this code:

/*
 * An "atomicdata" is a global hash table. One atomicdata in the system
 * is considered "active".
 *
 * Each atomicdata has a reference count. When an atomicdata's reference
 * count is zero and it is not the active atomicdata, all resources
 * consumed by that atomicdata are freed.
 *
 * The command "dqd_atomicdata set {k1 v1 k2 v2 ...}" creates a new
 * atomicdata with reference count zero. It fills it with the key/value
 * pairs k1/v1, k2/v2, etc. Then it atomically makes that new atomicdata
 * be the active atomicdata. If there was already an active atomicdata,
 * then the command BLOCKS until the old atomicdata's reference count
 * is zero and then frees the old atomicdata's resources. Any other
 * thread that has a reference to the old atomicdata continues to access
 * the old atomicdata (blocking this thread) until it releases its
 * reference.
 *
 * The command "dqd_atomicdata get k" causes the current thread to
 * check whether it has a reference to an atomicdata. If it does not,
 * then it acquires a reference to the active atomicdata and increments
 * the active atomicdata's reference count. Once the thread is certain
 * that it has a reference to some atomicdata, it looks up "k" in the
 * atomicdata. If "k" is found, then the command returns the value
 * associated with "k". Otherwise, it returns an error. In either case,
 * the reference to the atomicdata is saved for use by future calls to
 * "dqd_atomicdata get" in the same thread.
 *
 * The command "dqd_atomicdata release" checks whether the current
 * thread has a reference to some atomicdata. If it does not, then the
 * command simply returns "0". If the thread does have a reference to an
 * atomicdata, then the command decrements the atomicdata's reference
 * count and forgets the reference.
 *
 * Each connection thread automatically performs the equivalent of
 * "dqd_atomicdata release" after completing each HTTP request. You
 * should use "dqd_atomicdata release" in threads that you create and in
 * scheduled procs.
 *
 * A thread must lock a mutex momentarily to acquire a reference to the
 * active atomicdata, and to release its reference, and to replace the
 * active atomicdata. No locking is required when a thread already has a
 * reference to an atomicdata.
 */

#ifndef USE_TCL8X
#define USE_TCL8X
#endif

#include "ns.h"

int Ns_ModuleVersion = 1;

typedef struct AtomicData {
    Tcl_HashTable table;
    int refCnt;
} AtomicData;

/*
 * Threads will access this to get the current table.  When we need
 * to modify the table, we'll construct a new one from scratch and
 * atomically swap them out.
 */
static AtomicData *global_ad;

/* These will protect access to global_ad->refCnt. */
static Ns_Mutex ad_lock;
static Ns_Cond ad_cond;

/* Hold each thread's reference to global_ad. */
static Ns_Tls ad_tls;

static AtomicData *
acquire(void)
{
    AtomicData *ad = Ns_TlsGet(&ad_tls);

    if (ad == NULL) {

        Ns_MutexLock(&ad_lock);
        ad = global_ad;
        ad->refCnt++;
        Ns_MutexUnlock(&ad_lock);

        Ns_TlsSet(&ad_tls, ad);
    }

    return ad;
}

static int
release(void)
{
    AtomicData *ad = Ns_TlsGet(&ad_tls);

    if (ad == NULL) return 0;

    Ns_TlsSet(&ad_tls, NULL);
    Ns_MutexLock(&ad_lock);
    if (--ad->refCnt == 0)
        Ns_CondBroadcast(&ad_cond);
    Ns_MutexUnlock(&ad_lock);

    return 1;
}

static void
setGlobal(Tcl_HashTable *newTable)
{
    AtomicData *ad = (AtomicData *) ns_malloc(sizeof *ad);
    AtomicData *old_ad;
    Tcl_HashEntry *he;
    Tcl_HashSearch search;

    ad->table = *newTable;
    ad->refCnt = 0;

    Ns_MutexLock(&ad_lock);
    old_ad = global_ad;
    global_ad = ad;

    while (old_ad->refCnt != 0) {
        Ns_CondWait(&ad_cond, &ad_lock);
    }

    Ns_MutexUnlock(&ad_lock);

    he = Tcl_FirstHashEntry(&old_ad->table, &search);
    while (he != NULL) {
        ns_free((void *) Tcl_GetHashValue(he));
        he = Tcl_NextHashEntry(&search);
    }

    ns_free((void *) old_ad);
}

static int
setCommand(Tcl_Interp *interp, Tcl_Obj *listObj)
{
    int elc;
    Tcl_Obj **elv;
    Tcl_HashTable ht;

    if (Tcl_ListObjGetElements(interp, listObj, &elc, &elv) == TCL_ERROR)
        return TCL_ERROR;

    if (elc & 1) {
        Tcl_SetResult(interp,
            "list must contain an even number of elements", TCL_STATIC);
        return TCL_ERROR;
    }

    Tcl_InitHashTable(&ht, TCL_STRING_KEYS);

    while (elc > 0) {
        int isNew;

        Tcl_HashEntry *he = Tcl_CreateHashEntry(&ht,
            Tcl_GetString(elv[0]), &isNew);

        if (!isNew) ns_free((void *) Tcl_GetHashValue(he));

        Tcl_SetHashValue(he, ns_strdup(Tcl_GetString(elv[1])));

        elc -= 2;
        elv += 2;
    }

    setGlobal(&ht);

    return TCL_OK;
}

static int
getCommand(Tcl_Interp *interp, Tcl_Obj *keyObj)
{
    AtomicData *ad;
    Tcl_HashEntry *he;
    char *key;

    ad = acquire();
    if (ad == NULL) {
        Tcl_SetResult(interp, "no active atomicdata", TCL_STATIC);
        return TCL_ERROR;
    }

    key = Tcl_GetString(keyObj);
    he = Tcl_FindHashEntry(&ad->table, key);
    if (he == NULL) {
        Tcl_AppendResult(interp, "atomicdata does not contain key \"",
            key, "\"", NULL);
        return TCL_ERROR;
    }

    Tcl_SetResult(interp, (char *) Tcl_GetHashValue(he), TCL_VOLATILE);
    return TCL_OK;
}

static int
releaseCommand(Tcl_Interp *interp)
{
    Tcl_SetResult(interp, release() ? "1" : "0", TCL_STATIC);
    return TCL_OK;
}

static int
tclCommand(ClientData dummy, Tcl_Interp *interp,
    int objc, Tcl_Obj * CONST objv[])
{
    if (objc == 2) {
        if (STREQ(Tcl_GetString(objv[1]), "release"))
            return releaseCommand(interp);
    }

    else if (objc == 3) {
        if (STREQ(Tcl_GetString(objv[1]), "get"))
            return getCommand(interp, objv[2]);
        else if (STREQ(Tcl_GetString(objv[1]), "set"))
            return setCommand(interp, objv[2]);
    }

    Tcl_SetResult(interp, "syntax error", TCL_STATIC);
    return TCL_ERROR;
}

static int
interpInit(Tcl_Interp *interp, void *context)
{
    Tcl_CreateObjCommand(interp, "dqd_atomicdata", tclCommand, NULL, NULL);
    return NS_OK;
}

int
Ns_ModuleInit(char *server, char *module)
{
    Ns_TlsAlloc(&ad_tls, NULL);
    Ns_MutexSetName2(&ad_lock, "ad_lock", "");

    return NS_OK;
}

Reply via email to