Hi Mathieu,

On Wed, 2009-03-18 at 11:29 +0530, Subrata Modak wrote:
Hi Mathieu,
> 
> On Tue, 2009-03-17 at 11:41 -0400, Mathieu Desnoyers wrote:
> > * Subrata Modak (tosubr...@gmail.com) wrote:
> > > Hi Mathieu,
> > > 
> > > On Tue, Mar 17, 2009 at 7:02 AM, Mathieu Desnoyers <
> > > mathieu.desnoy...@polymtl.ca> wrote:
> > > 
> > > > Hi,
> > > >
> > > > I am trying to get access to some non-x86 hardware to run some atomic
> > > > primitive benchmarks for a paper on LTTng I am preparing. That should be
> > > > useful to argue about performance benefit of per-cpu atomic operations
> > > > vs interrupt disabling. I would like to run the following benchmark
> > > > module on CONFIG_SMP :
> > > >
> > > > - PowerPC
> > > > - MIPS
> > > > - ia64
> > > > - alpha
> > > >
> > > > usage :
> > > > make
> > > > insmod test-cmpxchg-nolock.ko
> > > > insmod: error inserting 'test-cmpxchg-nolock.ko': -1 Resource 
> > > > temporarily
> > > > unavailable
> > > > dmesg (see dmesg output)
> > > >
> > > 
> > > With your permission, can we include this test in LTP (
> > > http://ltp.sourceforge.net/), in some appropriate place as a small 
> > > benchmark
> > > test ?
> > > 
> > 
> > Hi Subrata,
> > 
> > Sure, maybe you'll want to use a better interface than a module init
> > that fails though. :)
> 
> Please Cc me when you come up with a better interface. Meanwhile, i will
> find out a better way to integrate this with LTP and will notify you
> when i do that. Thanks.

How about the following simple patch ? This will integrate it to LTP.

Nemeth,

Comments ?

> > 
> > Mathieu
> > 
> > > Regards--
> > > Subrata
> > > 
> > > 
> > > > If some of you would be kind enough to run my test module provided below
> > > > and provide the results of these tests on a recent kernel (2.6.26~2.6.29
> > > > should be good) along with their cpuinfo, I would greatly appreciate.
> > > >
> > > > Here are the CAS results for various Intel-based architectures :
> > > >
> > > > Architecture         | Speedup                      |      CAS     |
> > > >   Interrupts         |
> > > >                     | (cli + sti) / local cmpxchg  | local | sync | 
> > > > Enable
> > > > (sti) | Disable (cli)
> > > >
> > > > -------------------------------------------------------------------------------------------------
> > > > Intel Pentium 4      | 5.24                         |  25   | 81   | 70
> > > >       | 61          |
> > > > AMD Athlon(tm)64 X2  | 4.57                         |  7    | 17   | 17
> > > >       | 15          |
> > > > Intel Core2          | 6.33                         |  6    | 30   | 20
> > > >       | 18          |
> > > > Intel Xeon E5405     | 5.25                         |  8    | 24   | 20
> > > >       | 22          |
> > > >
> > > > The benefit expected on PowerPC, ia64 and alpha should principally come
> > > > from removed memory barriers in the local primitives.
> > > >
> > > > Thanks,
> > > >
> > > > Mathieu
> > > >
> > > > P.S. please forgive the coding style and hackish interface. :)
> > > >
---

--- 
ltp-full-20090331.orig/testcases/kernel/device-drivers/misc_modules/per_cpu_atomic_operations_vs_interrupt_disabling_module/Makefile
        1970-01-01 05:30:00.000000000 +0530
+++ 
ltp-full-20090331/testcases/kernel/device-drivers/misc_modules/per_cpu_atomic_operations_vs_interrupt_disabling_module/Makefile
     2009-03-31 20:33:16.000000000 +0530
@@ -0,0 +1,20 @@
+ifneq ($(KERNELRELEASE),)
+         obj-m += test-cmpxchg-nolock.o
+else
+KERNELDIR ?= /lib/modules/$(shell uname -r)/build
+PWD := $(shell pwd)
+KERNELRELEASE = $(shell cat 
$(KERNELDIR)/$(KBUILD_OUTPUT)/include/linux/version.h | sed -n 
's/.*UTS_RELEASE.*\"\(.*\)\".*/\1/p')
+ifneq ($(INSTALL_MOD_PATH),)
+         DEPMOD_OPT := -b $(INSTALL_MOD_PATH)
+endif
+        
+default:
+       $(MAKE) -C $(KERNELDIR) M=$(PWD) modules
+        
+modules_install:
+       $(MAKE) -C $(KERNELDIR) M=$(PWD) modules_install
+       if [ -f $(KERNELDIR)/$(KBUILD_OUTPUT)/System.map ] ; then /sbin/depmod 
-ae -F $(KERNELDIR)/$(KBUILD_OUTPUT)/System.map $(DEPMOD_OPT) $(KERNELRELEASE) 
; fi
+        
+clean:
+       $(MAKE) -C $(KERNELDIR) M=$(PWD) clean
+endif
--- 
ltp-full-20090331.orig/testcases/kernel/device-drivers/misc_modules/per_cpu_atomic_operations_vs_interrupt_disabling_module/test-cmpxchg-nolock.c
   1970-01-01 05:30:00.000000000 +0530
+++ 
ltp-full-20090331/testcases/kernel/device-drivers/misc_modules/per_cpu_atomic_operations_vs_interrupt_disabling_module/test-cmpxchg-nolock.c
        2009-03-31 20:34:04.000000000 +0530
@@ -0,0 +1,301 @@
+/******************************************************************************/
+/*                                                                            
*/
+/* Copyright (c) Mathieu Desnoyers <mathieu.desnoy...@polymtl.ca>, 2009       
*/
+/*                                                                            
*/
+/* This program is free software;  you can redistribute it and/or modify      
*/
+/* it under the terms of the GNU General Public License as published by       
*/
+/* the Free Software Foundation; either version 2 of the License, or          
*/
+/* (at your option) any later version.                                        
*/
+/*                                                                            
*/
+/* This program is distributed in the hope that it will be useful,            
*/
+/* but WITHOUT ANY WARRANTY;  without even the implied warranty of            
*/
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See                  
*/
+/* the GNU General Public License for more details.                           
*/
+/*                                                                            
*/
+/* You should have received a copy of the GNU General Public License          
*/
+/* along with this program;  if not, write to the Free Software               
*/
+/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA    
*/
+/*                                                                            
*/
+/* usage :
+        make
+        insmod test-cmpxchg-nolock.ko
+        insmod: error inserting 'test-cmpxchg-nolock.ko': 
+                -1 Resource temporarily unavailable
+        dmesg (see dmesg output)                                              
*/
+/******************************************************************************/
+
+
+
+/* test-cmpxchg-nolock.c
+*
+* Compare local cmpxchg with irq disable / enable.
+*/
+
+
+#include <linux/jiffies.h>
+#include <linux/compiler.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/math64.h>
+#include <asm/timex.h>
+#include <asm/system.h>
+
+#define NR_LOOPS 20000
+
+int test_val;
+
+static void do_testbaseline(void)
+{
+       unsigned long flags;
+       unsigned int i;
+       cycles_t time1, time2, time;
+       u32 rem;
+
+       local_irq_save(flags);
+       preempt_disable();
+       time1 = get_cycles();
+       for (i = 0; i < NR_LOOPS; i++) {
+       asm volatile ("");
+       }
+       time2 = get_cycles();
+       local_irq_restore(flags);
+       preempt_enable();
+       time = time2 - time1;
+
+       printk(KERN_ALERT "test results: time for baseline\n");
+       printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS);
+       printk(KERN_ALERT "total time: %llu\n", time);
+       time = div_u64_rem(time, NR_LOOPS, &rem);
+       printk(KERN_ALERT "-> baseline takes %llu cycles\n", time);
+       printk(KERN_ALERT "test end\n");
+}
+
+static void do_test_sync_cmpxchg(void)
+{
+       int ret;
+       unsigned long flags;
+       unsigned int i;
+       cycles_t time1, time2, time;
+       u32 rem;
+
+       local_irq_save(flags);
+       preempt_disable();
+       time1 = get_cycles();
+       for (i = 0; i < NR_LOOPS; i++) {
+#ifdef CONFIG_X86_32
+       ret = sync_cmpxchg(&test_val, 0, 0);
+#else
+       ret = cmpxchg(&test_val, 0, 0);
+#endif
+       }
+       time2 = get_cycles();
+       local_irq_restore(flags);
+       preempt_enable();
+       time = time2 - time1;
+
+       printk(KERN_ALERT "test results: time for locked cmpxchg\n");
+       printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS);
+       printk(KERN_ALERT "total time: %llu\n", time);
+       time = div_u64_rem(time, NR_LOOPS, &rem);
+       printk(KERN_ALERT "-> locked cmpxchg takes %llu cycles\n", time);
+       printk(KERN_ALERT "test end\n");
+}
+
+static void do_test_cmpxchg(void)
+{
+       int ret;
+       unsigned long flags;
+       unsigned int i;
+       cycles_t time1, time2, time;
+       u32 rem;
+
+       local_irq_save(flags);
+       preempt_disable();
+       time1 = get_cycles();
+       for (i = 0; i < NR_LOOPS; i++) {
+       ret = cmpxchg_local(&test_val, 0, 0);
+       }
+       time2 = get_cycles();
+       local_irq_restore(flags);
+       preempt_enable();
+       time = time2 - time1;
+
+       printk(KERN_ALERT "test results: time for non locked cmpxchg\n");
+       printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS);
+       printk(KERN_ALERT "total time: %llu\n", time);
+       time = div_u64_rem(time, NR_LOOPS, &rem);
+       printk(KERN_ALERT "-> non locked cmpxchg takes %llu cycles\n", time);
+       printk(KERN_ALERT "test end\n");
+}
+static void do_test_sync_inc(void)
+{
+       int ret;
+       unsigned long flags;
+       unsigned int i;
+       cycles_t time1, time2, time;
+       u32 rem;
+       atomic_t val;
+
+       local_irq_save(flags);
+       preempt_disable();
+       time1 = get_cycles();
+       for (i = 0; i < NR_LOOPS; i++) {
+       ret = atomic_add_return(10, &val);
+       }
+       time2 = get_cycles();
+       local_irq_restore(flags);
+       preempt_enable();
+       time = time2 - time1;
+
+       printk(KERN_ALERT "test results: time for locked add return\n");
+       printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS);
+       printk(KERN_ALERT "total time: %llu\n", time);
+       time = div_u64_rem(time, NR_LOOPS, &rem);
+       printk(KERN_ALERT "-> locked add return takes %llu cycles\n", time);
+       printk(KERN_ALERT "test end\n");
+}
+
+
+static void do_test_inc(void)
+{
+       int ret;
+       unsigned long flags;
+       unsigned int i;
+       cycles_t time1, time2, time;
+       u32 rem;
+       local_t loc_val;
+
+       local_irq_save(flags);
+       preempt_disable();
+       time1 = get_cycles();
+       for (i = 0; i < NR_LOOPS; i++) {
+       ret = local_add_return(10, &loc_val);
+       }
+       time2 = get_cycles();
+       local_irq_restore(flags);
+       preempt_enable();
+       time = time2 - time1;
+
+       printk(KERN_ALERT "test results: time for non locked add return\n");
+       printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS);
+       printk(KERN_ALERT "total time: %llu\n", time);
+       time = div_u64_rem(time, NR_LOOPS, &rem);
+       printk(KERN_ALERT "-> non locked add return takes %llu cycles\n", time);
+       printk(KERN_ALERT "test end\n");
+}
+
+
+
+/*
+ * This test will have a higher standard deviation due to incoming interrupts.
+ */
+static void do_test_enable_int(void)
+{
+       unsigned long flags;
+       unsigned int i;
+       cycles_t time1, time2, time;
+       u32 rem;
+
+       local_irq_save(flags);
+       preempt_disable();
+       time1 = get_cycles();
+       for (i = 0; i < NR_LOOPS; i++) {
+       local_irq_restore(flags);
+       }
+       time2 = get_cycles();
+       local_irq_restore(flags);
+       preempt_enable();
+       time = time2 - time1;
+
+       printk(KERN_ALERT "test results: time for enabling interrupts (STI)\n");
+       printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS);
+       printk(KERN_ALERT "total time: %llu\n", time);
+       time = div_u64_rem(time, NR_LOOPS, &rem);
+       printk(KERN_ALERT "-> enabling interrupts (STI) takes %llu cycles\n",
+       time);
+       printk(KERN_ALERT "test end\n");
+}
+
+static void do_test_disable_int(void)
+{
+       unsigned long flags, flags2;
+       unsigned int i;
+       cycles_t time1, time2, time;
+       u32 rem;
+
+       local_irq_save(flags);
+       preempt_disable();
+       time1 = get_cycles();
+       for ( i = 0; i < NR_LOOPS; i++) {
+       local_irq_save(flags2);
+       }
+       time2 = get_cycles();
+       local_irq_restore(flags);
+       preempt_enable();
+       time = time2 - time1;
+
+       printk(KERN_ALERT "test results: time for disabling interrupts 
(CLI)\n");
+       printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS);
+       printk(KERN_ALERT "total time: %llu\n", time);
+       time = div_u64_rem(time, NR_LOOPS, &rem);
+       printk(KERN_ALERT "-> disabling interrupts (CLI) takes %llu cycles\n",
+       time);
+       printk(KERN_ALERT "test end\n");
+}
+
+static void do_test_int(void)
+{
+       unsigned long flags;
+       unsigned int i;
+       cycles_t time1, time2, time;
+       u32 rem;
+
+       local_irq_save(flags);
+       preempt_disable();
+       time1 = get_cycles();
+       for (i = 0; i < NR_LOOPS; i++) {
+       local_irq_restore(flags);
+       local_irq_save(flags);
+       }
+       time2 = get_cycles();
+       local_irq_restore(flags);
+       preempt_enable();
+       time = time2 - time1;
+
+       printk(KERN_ALERT "test results: time for disabling/enabling interrupts 
(STI/CLI)\n");
+       printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS);
+       printk(KERN_ALERT "total time: %llu\n", time);
+       time = div_u64_rem(time, NR_LOOPS, &rem);
+       printk(KERN_ALERT "-> enabling/disabling interrupts (STI/CLI) takes 
%llu cycles\n",
+       time);
+       printk(KERN_ALERT "test end\n");
+}
+
+
+
+static int ltt_test_init(void)
+{
+       printk(KERN_ALERT "test init\n");
+
+       do_testbaseline();
+       do_test_sync_cmpxchg();
+       do_test_cmpxchg();
+       do_test_sync_inc();
+       do_test_inc();
+       do_test_enable_int();
+       do_test_disable_int();
+       do_test_int();
+       return -EAGAIN; /* Fail will directly unload the module */
+}
+
+static void ltt_test_exit(void)
+{
+       printk(KERN_ALERT "test exit\n");
+}
+
+module_init(ltt_test_init)
+module_exit(ltt_test_exit)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("Cmpxchg vs int Test");

---
Regards--
Subrata

> 
> Regards--
> Subrata
> 


------------------------------------------------------------------------------
_______________________________________________
Ltp-list mailing list
Ltp-list@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/ltp-list

Reply via email to