Re: performance tests in xenomai

Philippe Gerum Tue, 27 Jul 2004 14:41:00 +0200

On Tue, 2004-07-27 at 12:52, Paolo Mantegazza wrote:
> Marc Kleine-Budde wrote:
> 
> I'm curious to know if yue are working with Linux preemption 
> enabled/disabled?
>


I see no possible relation between preempt and the figures here. preempt
does not override hw masking control in any case.

> Paolo.
> > On Thu, Jul 22, 2004 at 05:41:00PM +0200, Philippe Gerum wrote:
> > 
> >>>The specs of the testmachine:
> >>>Pentium III (Coppermine), 933 MHz, Debian Testing, X11 fluxbox, Konsole
> >>>rtai-3.1 cvs co from this afternoon, linux-2.4.26 with Adeos 2.4r13/x86
> >>>gcc version 3.3.4 (Debian 1:3.3.4-3)
> > 
> > 
> >>>first: the fusion demo (with my skin used from userspace)
> >>>---------------------------------------------------------
> > 
> > 
> >>># while true; do echo `seq 1 46`; done
> >>>nanosleep jitter: min = 8382 ns, max = 56289 ns, avg = 29422 ns
> >>>sema4 handshake: min = 3847 ns, max = 26440 ns, avg = 12046 ns
> > 
> > 
> >>># while true; do echo `seq 1 46`; done
> >>># virtual destop switching in X
> >>>nanosleep jitter: min = 9537 ns, max = 123902 ns, avg = 30370 ns
> >>>sema4 handshake: min = 3855 ns, max = 43500 ns, avg = 12672 ns
> > 
> > 
> >>Never put X in the loop; results cannot be reliable with it and it makes
> >>no sense to interpret them since the user-space X driver can do whatever
> >>it wants to with your hadware, especially when switching back and forth
> >>vt7. So I cannot comment these figures.
> > 
> > 
> >>># idle
> >>>nanosleep jitter: min = 9047 ns, max = 58851 ns, avg = 11766 ns
> >>>sema4 handshake: min = 3738 ns, max = 20652 ns, avg = 4435 ns
> > 
> > 
> >>Because vesuvio (and kilauea) implementation uses a threaded interrupt
> >>model and internal mutexes that proved to be nice on the paper but
> >>inefficient performance-wise; this is why I killed them from the fusion
> >>branch. In order to have the latest performance figures for Xenomai, you
> >>should do your tests on fusion, since recent optimizations to the
> >>scheduler, interrupt model and aperiodic timer went there, and cannot be
> >>backported without heavily changing the Xenomai support in vesuvio,
> >>which I won't do.
> > 
> > 
> > I've portet my skin to fusion. The nanosleep jitter is better, but still
> > much worse than lxrt. Probably there are bugs in my skin, so I decided
> > to swtich to the posix skin....
> > 
> > Today I've made some tests with Gilles' posix skin, using the same
> > machine (P3 933) no X11 this time, linux-2.6.7, Adeos 2.6r6c4/x86, gcc
> > version 3.3.4 (Debian 1:3.3.4-3).
> > 
> > # idle system
> > Demo thread now sampling at 1000 Hz ...
> > 5000 samples:
> > nanosleep jitter: min = 7928 ns, max = 19156 ns, avg = 19670174733 ns
> > sema4 handshake: min = 1749 ns, max = 6489 ns, avg = 1802 ns
> > simple kernel thread unloaded
> > 
> > # while true; do echo `seq 1 46`; done
> > nanosleep jitter: min = 11326 ns, max = 49537 ns, avg = 19670193298 ns
> > sema4 handshake: min = 2538 ns, max = 22492 ns, avg = 8549 ns
> > 
> > # while true; do echo `seq 1 46`; done
> > # ping -f
> > nanosleep jitter: min = 9078 ns, max = 63771 ns, avg = 19670193781 ns
> > sema4 handshake: min = 2141 ns, max = 35318 ns, avg = 8525 ns
> > 
> > # while true; do echo `seq 1 46`; done
> > # ping -f
> > # dd if=/dev/zero of=/tmp/test
> > nanosleep jitter: min = 10028 ns, max = 113471 ns, avg = 19670201296 ns
> > sema4 handshake: min = 2080 ns, max = 57584 ns, avg = 11123 ns
> > 
> > Philippe, have you an explanation for that still high jitter?
> > 
> > regard - Marc
> > 
> > 
> > P.S.: ported fusion demo example attached...
> > 
> > 
> > ------------------------------------------------------------------------
> > 
> > #include <linux/module.h>
> > #include <linux/kernel.h>
> > 
> > #include <rts_config.h>
> > 
> > #if defined(HAVE_RTAI_PSE51)
> > # include <rtai_pse51.h>
> > #elif defined (HAVE_POSIX_POSIX_H)
> > # include <posix/posix.h>
> > #endif
> > 
> > #define     nanosleep(rqtp, rmtp)   clock_nanosleep(CLOCK_MONOTONIC, 0, 
> > rqtp, rmtp)
> > #define printf                      printk
> > 
> > MODULE_LICENSE("GPL");
> > 
> > #define WAIT_USEC 1000
> > 
> > static sem_t                semA, semB;
> > static pthread_t    sampler_thid, poster_thid;
> > static xnsysinfo_t  info;
> > 
> > int pthread_info_rt(xnsysinfo_t     *info)
> > {
> >     info->cpufreq = xnarch_get_cpu_freq();
> >     info->tickval = xnpod_get_tickval();
> >     
> >     return 0;
> > }
> > 
> > static inline long long __cputime (void)
> > {
> >     long long       t;
> >     __asm__ __volatile__( "rdtsc" : "=A" (t));
> >     return t;
> > }
> > 
> > static inline long long __count2ns (long long       t) {
> >     unsigned long rem;
> >     /* return t * 1000000000 / info.cpufreq; */
> > 
> >     return xnarch_ulldiv(t * 1000000000L, info.cpufreq, &rem);
> > }
> > 
> > void *poster_thread (void   *arg)
> > {
> >     for (;;)
> >     {
> >     sem_wait(&semA);
> >     sem_post(&semB);
> >     }
> > }
> > 
> > void *sampler_thread (void  *arg)
> > {
> >     long long minj1 = 10000000, maxj1 = 0, sumj1 = 0;
> >     long long minj2 = 10000000, maxj2 = 0, sumj2 = 0;
> >     long long t, t0, t1;
> >     struct timespec ts;
> >     int count = 0, n;
> >     unsigned long rem;
> > 
> >     printf("Demo thread now sampling at %d Hz ...\n",1000000 / WAIT_USEC);
> > 
> >     for (n = count = 0; n < 5000; n++)
> >     {
> >     /* First, align on the next incoming tick. */
> >     ts.tv_sec = 0;
> >     ts.tv_nsec = 1000000;
> >     nanosleep(&ts,NULL);
> > 
> >     /* Then, perform the measurement. */
> >     ts.tv_nsec = WAIT_USEC * 1000;
> >     t0 = __cputime();
> >     nanosleep(&ts,NULL);
> >     t1 = __cputime();
> > 
> >     t = t1 - t0;
> >     if (t > maxj1) maxj1 = t;
> >     if (t < minj1) minj1 = t;
> >     sumj1 += t;
> > 
> >     /* Handshake with Bob. */
> >     sem_post(&semA);
> >     t0 = __cputime();
> >     sem_wait(&semB);
> >     t1 = __cputime();
> > 
> >     t = t1 - t0;
> >     if (t > maxj2) maxj2 = t;
> >     if (t < minj2) minj2 = t;
> >     sumj2 += t;
> > 
> >     count++;
> >     }
> > 
> >     printf("%d samples:\n",count);
> > 
> >     printf("nanosleep jitter: min = %lld ns, max = %lld ns, avg = %lld 
> > ns\n",
> >        __count2ns(minj1) - WAIT_USEC * 1000,
> >        __count2ns(maxj1) - WAIT_USEC * 1000,
> >        __count2ns(xnarch_ulldiv(sumj1, count, &rem) - WAIT_USEC * 1000));
> > 
> >     printf("sema4 handshake: min = %lld ns, max = %lld ns, avg = %lld ns\n",
> >        __count2ns(minj2),
> >        __count2ns(maxj2),
> >        __count2ns(xnarch_ulldiv(sumj2, count, &rem)));
> > 
> >     return NULL;
> > }
> > 
> > int init_module(void)
> > {
> >     struct sched_param      param;
> >     pthread_attr_t  thattr_poster, thattr_sampler;
> > 
> >     sem_init(&semA,0,0);
> >     sem_init(&semB,0,0);
> > 
> >     pthread_info_rt(&info);
> > 
> >     pthread_attr_init(&thattr_poster);
> >     pthread_attr_init(&thattr_sampler);
> >     pthread_attr_setdetachstate(&thattr_poster,PTHREAD_CREATE_DETACHED);
> >     pthread_attr_setdetachstate(&thattr_sampler,PTHREAD_CREATE_DETACHED);
> >     pthread_attr_setfp_np(&thattr_poster, 0);
> >     pthread_attr_setfp_np(&thattr_sampler, 0);
> > 
> >     pthread_attr_setschedpolicy(&thattr_poster,SCHED_FIFO);
> >     pthread_attr_setschedpolicy(&thattr_sampler,SCHED_FIFO);
> >     param.sched_priority = 10;
> >     pthread_attr_setschedparam(&thattr_poster,&param);
> >     pthread_attr_setschedparam(&thattr_sampler,&param);
> > 
> >     pthread_create(&poster_thid,&thattr_poster,&poster_thread,NULL);
> >     pthread_create(&sampler_thid,&thattr_sampler,&sampler_thread,NULL);
> > 
> >     return 0;
> > }
> > 
> > void cleanup_module(void)
> > {
> >     pthread_cancel(poster_thid);
> >     pthread_cancel(sampler_thid);
> > 
> >     sem_destroy(&semA);
> >     sem_destroy(&semB);
> >     printk("simple kernel thread unloaded\n");
> > }
-- 

Philippe.

Re: performance tests in xenomai

Reply via email to