* Andi Kleen ([EMAIL PROTECTED]) wrote:
> Mathieu Desnoyers <[EMAIL PROTECTED]> writes:
> > 
> > The measurements I get (in cycles):
> >              enable interrupts (STI)   disable interrupts (CLI)   local 
> > CMPXCHG
> > IA32 (P4)    112                        82                         26
> > x86_64 AMD64 125                       102                         19
> 
> What exactly did you benchmark here? On K8 CLI/STI are only supposed
> to be a few cycles. pushf/popf might me more expensive, but not that much.
> 

Hi Andi,

I benchmarked cmpxchg_local vs local_irq_save/local_irq_restore.
Details, and code, follow.

* cpuinfo:

processor       : 0
vendor_id       : AuthenticAMD
cpu family      : 15
model           : 35
model name      : AMD Athlon(tm)64 X2 Dual Core Processor  3800+
stepping        : 2
cpu MHz         : 2009.204
cache size      : 512 KB
physical id     : 0
siblings        : 2
core id         : 0
cpu cores       : 2
fpu             : yes
fpu_exception   : yes
cpuid level     : 1
wp              : yes
flags           : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov 
pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt lm 3dnowext 
3dnow pni lahf_lm cmp_legacy
bogomips        : 4023.38
TLB size        : 1024 4K pages
clflush size    : 64
cache_alignment : 64
address sizes   : 40 bits physical, 48 bits virtual
power management: ts fid vid ttp

processor       : 1
vendor_id       : AuthenticAMD
cpu family      : 15
model           : 35
model name      : AMD Athlon(tm)64 X2 Dual Core Processor  3800+
stepping        : 2
cpu MHz         : 2009.204
cache size      : 512 KB
physical id     : 0
siblings        : 2
core id         : 1
cpu cores       : 2
fpu             : yes
fpu_exception   : yes
cpuid level     : 1
wp              : yes
flags           : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov 
pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt lm 3dnowext 
3dnow pni lahf_lm cmp_legacy
bogomips        : 4018.49
TLB size        : 1024 4K pages
clflush size    : 64
cache_alignment : 64
address sizes   : 40 bits physical, 48 bits virtual
power management: ts fid vid ttp


* Test ran:


/* test-cmpxchg-nolock.c
 *
 * Compare local cmpxchg with irq disable / enable.
 */


#include <linux/jiffies.h>
#include <linux/compiler.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/calc64.h>
#include <asm/timex.h>
#include <asm/system.h>

#define NR_LOOPS 20000

int test_val;

static void do_test_cmpxchg(void)
{
        int ret;
        long flags;
        unsigned int i;
        cycles_t time1, time2, time;
        long rem;

        local_irq_save(flags);
        preempt_disable();
        time1 = get_cycles();
        for (i = 0; i < NR_LOOPS; i++) {
                ret = cmpxchg_local(&test_val, 0, 0);
        }
        time2 = get_cycles();
        local_irq_restore(flags);
        preempt_enable();
        time = time2 - time1;

        printk(KERN_ALERT "test results: time for non locked cmpxchg\n");
        printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS);
        printk(KERN_ALERT "total time: %llu\n", time);
        time = div_long_long_rem(time, NR_LOOPS, &rem);
        printk(KERN_ALERT "-> non locked cmpxchg takes %llu cycles\n", time);
        printk(KERN_ALERT "test end\n");
}

/*
 * This test will have a higher standard deviation due to incoming interrupts.
 */
static void do_test_enable_int(void)
{
        long flags;
        unsigned int i;
        cycles_t time1, time2, time;
        long rem;

        local_irq_save(flags);
        preempt_disable();
        time1 = get_cycles();
        for (i = 0; i < NR_LOOPS; i++) {
                local_irq_restore(flags);
        }
        time2 = get_cycles();
        local_irq_restore(flags);
        preempt_enable();
        time = time2 - time1;

        printk(KERN_ALERT "test results: time for enabling interrupts (STI)\n");
        printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS);
        printk(KERN_ALERT "total time: %llu\n", time);
        time = div_long_long_rem(time, NR_LOOPS, &rem);
        printk(KERN_ALERT "-> enabling interrupts (STI) takes %llu cycles\n",
                                        time);
        printk(KERN_ALERT "test end\n");
}

static void do_test_disable_int(void)
{
        unsigned long flags, flags2;
        unsigned int i;
        cycles_t time1, time2, time;
        long rem;

        local_irq_save(flags);
        preempt_disable();
        time1 = get_cycles();
        for ( i = 0; i < NR_LOOPS; i++) {
                local_irq_save(flags2);
        }
        time2 = get_cycles();
        local_irq_restore(flags);
        preempt_enable();
        time = time2 - time1;

        printk(KERN_ALERT "test results: time for disabling interrupts 
(CLI)\n");
        printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS);
        printk(KERN_ALERT "total time: %llu\n", time);
        time = div_long_long_rem(time, NR_LOOPS, &rem);
        printk(KERN_ALERT "-> disabling interrupts (CLI) takes %llu cycles\n",
                                time);
        printk(KERN_ALERT "test end\n");
}



static int ltt_test_init(void)
{
        printk(KERN_ALERT "test init\n");
        
        do_test_cmpxchg();
        do_test_enable_int();
        do_test_disable_int();
        return -EAGAIN; /* Fail will directly unload the module */
}

static void ltt_test_exit(void)
{
        printk(KERN_ALERT "test exit\n");
}

module_init(ltt_test_init)
module_exit(ltt_test_exit)

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Mathieu Desnoyers");
MODULE_DESCRIPTION("Cmpxchg vs int Test");



-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to