On 13 Feb 2008 12:37:32 +0300, Egor Pasko <[EMAIL PROTECTED]> wrote:
> why guess? just run Harmony under strace on your linux box, you will
> see only futexes triggered. Cheers.
Yep, that's what I told - Harmony relies on pthreads, pthreads relies
on futexes.

I'm not the guru in threading and atomics, but my simple
implementation [1] of futexes on Windows gives 2x faster uncontended
locking:

C:\Work\VSWork\Futex\Release>Futex.exe
mutex(): 4281 msecs
futex(): 2062 msecs

I haven't checked this prototype in MT application though.

Thanks,
Aleksey.

[1]
#include "stdafx.h"
#include "windows.h"
#include "time.h"

CRITICAL_SECTION mutex;
CRITICAL_SECTION fallbackMutex;
volatile int thinLock;
volatile bool isContended;

__forceinline void mutex_init() {
        InitializeCriticalSection(&mutex);
}

__forceinline void mutex_lock() {
        EnterCriticalSection(&mutex);
}

__forceinline void mutex_unlock() {
        LeaveCriticalSection(&mutex);
}

__forceinline void futex_init() {
        thinLock = 0;
        isContended = false;
        InitializeCriticalSection(&fallbackMutex);
}

__forceinline void futex_lock() {
        volatile void* thinLockPtr = &thinLock;
        int result;

        __asm {
                mov ecx, thinLockPtr
                mov eax, 0                      
                mov edx, 1
                lock cmpxchg [ecx], edx         
                mov result, eax
        }
        if (result == 0) {
                return;
        } else {
                printf("falling back on lock: result = %d\n", result);
                EnterCriticalSection(&fallbackMutex);
                isContended = true;
        }
}

__forceinline void futex_unlock() {
        if (!isContended) {
                thinLock = 0;
        } else {
                printf("falling back on unlock\n");
                LeaveCriticalSection(&fallbackMutex);   
        }
}

int _tmain(int argc, _TCHAR* argv[])
{
        mutex_init();
        futex_init();

        time_t t1, t2;
        int count = 100000000;

        t1 = clock();
        for(int c = 0; c < count; c++) {
                mutex_lock();
                mutex_unlock();
        }
        t2 = clock();

        printf("mutex(): %d msecs\n", (t2-t1));

        t1 = clock();
        for(int c = 0; c < count; c++) {
                futex_lock();
                futex_unlock();
        }
        t2 = clock();

        printf("futex(): %d msecs\n", (t2-t1));

        return 0;
}

Reply via email to