On Tue, 2004-07-27 at 12:52, Paolo Mantegazza wrote:
> Marc Kleine-Budde wrote:
>
> I'm curious to know if yue are working with Linux preemption
> enabled/disabled?
>
I see no possible relation between preempt and the figures here. preempt
does not override hw masking control in any case.
> Paolo.
> > On Thu, Jul 22, 2004 at 05:41:00PM +0200, Philippe Gerum wrote:
> >
> >>>The specs of the testmachine:
> >>>Pentium III (Coppermine), 933 MHz, Debian Testing, X11 fluxbox, Konsole
> >>>rtai-3.1 cvs co from this afternoon, linux-2.4.26 with Adeos 2.4r13/x86
> >>>gcc version 3.3.4 (Debian 1:3.3.4-3)
> >
> >
> >>>first: the fusion demo (with my skin used from userspace)
> >>>---------------------------------------------------------
> >
> >
> >>># while true; do echo `seq 1 46`; done
> >>>nanosleep jitter: min = 8382 ns, max = 56289 ns, avg = 29422 ns
> >>>sema4 handshake: min = 3847 ns, max = 26440 ns, avg = 12046 ns
> >
> >
> >>># while true; do echo `seq 1 46`; done
> >>># virtual destop switching in X
> >>>nanosleep jitter: min = 9537 ns, max = 123902 ns, avg = 30370 ns
> >>>sema4 handshake: min = 3855 ns, max = 43500 ns, avg = 12672 ns
> >
> >
> >>Never put X in the loop; results cannot be reliable with it and it makes
> >>no sense to interpret them since the user-space X driver can do whatever
> >>it wants to with your hadware, especially when switching back and forth
> >>vt7. So I cannot comment these figures.
> >
> >
> >>># idle
> >>>nanosleep jitter: min = 9047 ns, max = 58851 ns, avg = 11766 ns
> >>>sema4 handshake: min = 3738 ns, max = 20652 ns, avg = 4435 ns
> >
> >
> >>Because vesuvio (and kilauea) implementation uses a threaded interrupt
> >>model and internal mutexes that proved to be nice on the paper but
> >>inefficient performance-wise; this is why I killed them from the fusion
> >>branch. In order to have the latest performance figures for Xenomai, you
> >>should do your tests on fusion, since recent optimizations to the
> >>scheduler, interrupt model and aperiodic timer went there, and cannot be
> >>backported without heavily changing the Xenomai support in vesuvio,
> >>which I won't do.
> >
> >
> > I've portet my skin to fusion. The nanosleep jitter is better, but still
> > much worse than lxrt. Probably there are bugs in my skin, so I decided
> > to swtich to the posix skin....
> >
> > Today I've made some tests with Gilles' posix skin, using the same
> > machine (P3 933) no X11 this time, linux-2.6.7, Adeos 2.6r6c4/x86, gcc
> > version 3.3.4 (Debian 1:3.3.4-3).
> >
> > # idle system
> > Demo thread now sampling at 1000 Hz ...
> > 5000 samples:
> > nanosleep jitter: min = 7928 ns, max = 19156 ns, avg = 19670174733 ns
> > sema4 handshake: min = 1749 ns, max = 6489 ns, avg = 1802 ns
> > simple kernel thread unloaded
> >
> > # while true; do echo `seq 1 46`; done
> > nanosleep jitter: min = 11326 ns, max = 49537 ns, avg = 19670193298 ns
> > sema4 handshake: min = 2538 ns, max = 22492 ns, avg = 8549 ns
> >
> > # while true; do echo `seq 1 46`; done
> > # ping -f
> > nanosleep jitter: min = 9078 ns, max = 63771 ns, avg = 19670193781 ns
> > sema4 handshake: min = 2141 ns, max = 35318 ns, avg = 8525 ns
> >
> > # while true; do echo `seq 1 46`; done
> > # ping -f
> > # dd if=/dev/zero of=/tmp/test
> > nanosleep jitter: min = 10028 ns, max = 113471 ns, avg = 19670201296 ns
> > sema4 handshake: min = 2080 ns, max = 57584 ns, avg = 11123 ns
> >
> > Philippe, have you an explanation for that still high jitter?
> >
> > regard - Marc
> >
> >
> > P.S.: ported fusion demo example attached...
> >
> >
> > ------------------------------------------------------------------------
> >
> > #include <linux/module.h>
> > #include <linux/kernel.h>
> >
> > #include <rts_config.h>
> >
> > #if defined(HAVE_RTAI_PSE51)
> > # include <rtai_pse51.h>
> > #elif defined (HAVE_POSIX_POSIX_H)
> > # include <posix/posix.h>
> > #endif
> >
> > #define nanosleep(rqtp, rmtp) clock_nanosleep(CLOCK_MONOTONIC, 0,
> > rqtp, rmtp)
> > #define printf printk
> >
> > MODULE_LICENSE("GPL");
> >
> > #define WAIT_USEC 1000
> >
> > static sem_t semA, semB;
> > static pthread_t sampler_thid, poster_thid;
> > static xnsysinfo_t info;
> >
> > int pthread_info_rt(xnsysinfo_t *info)
> > {
> > info->cpufreq = xnarch_get_cpu_freq();
> > info->tickval = xnpod_get_tickval();
> >
> > return 0;
> > }
> >
> > static inline long long __cputime (void)
> > {
> > long long t;
> > __asm__ __volatile__( "rdtsc" : "=A" (t));
> > return t;
> > }
> >
> > static inline long long __count2ns (long long t) {
> > unsigned long rem;
> > /* return t * 1000000000 / info.cpufreq; */
> >
> > return xnarch_ulldiv(t * 1000000000L, info.cpufreq, &rem);
> > }
> >
> > void *poster_thread (void *arg)
> > {
> > for (;;)
> > {
> > sem_wait(&semA);
> > sem_post(&semB);
> > }
> > }
> >
> > void *sampler_thread (void *arg)
> > {
> > long long minj1 = 10000000, maxj1 = 0, sumj1 = 0;
> > long long minj2 = 10000000, maxj2 = 0, sumj2 = 0;
> > long long t, t0, t1;
> > struct timespec ts;
> > int count = 0, n;
> > unsigned long rem;
> >
> > printf("Demo thread now sampling at %d Hz ...\n",1000000 / WAIT_USEC);
> >
> > for (n = count = 0; n < 5000; n++)
> > {
> > /* First, align on the next incoming tick. */
> > ts.tv_sec = 0;
> > ts.tv_nsec = 1000000;
> > nanosleep(&ts,NULL);
> >
> > /* Then, perform the measurement. */
> > ts.tv_nsec = WAIT_USEC * 1000;
> > t0 = __cputime();
> > nanosleep(&ts,NULL);
> > t1 = __cputime();
> >
> > t = t1 - t0;
> > if (t > maxj1) maxj1 = t;
> > if (t < minj1) minj1 = t;
> > sumj1 += t;
> >
> > /* Handshake with Bob. */
> > sem_post(&semA);
> > t0 = __cputime();
> > sem_wait(&semB);
> > t1 = __cputime();
> >
> > t = t1 - t0;
> > if (t > maxj2) maxj2 = t;
> > if (t < minj2) minj2 = t;
> > sumj2 += t;
> >
> > count++;
> > }
> >
> > printf("%d samples:\n",count);
> >
> > printf("nanosleep jitter: min = %lld ns, max = %lld ns, avg = %lld
> > ns\n",
> > __count2ns(minj1) - WAIT_USEC * 1000,
> > __count2ns(maxj1) - WAIT_USEC * 1000,
> > __count2ns(xnarch_ulldiv(sumj1, count, &rem) - WAIT_USEC * 1000));
> >
> > printf("sema4 handshake: min = %lld ns, max = %lld ns, avg = %lld ns\n",
> > __count2ns(minj2),
> > __count2ns(maxj2),
> > __count2ns(xnarch_ulldiv(sumj2, count, &rem)));
> >
> > return NULL;
> > }
> >
> > int init_module(void)
> > {
> > struct sched_param param;
> > pthread_attr_t thattr_poster, thattr_sampler;
> >
> > sem_init(&semA,0,0);
> > sem_init(&semB,0,0);
> >
> > pthread_info_rt(&info);
> >
> > pthread_attr_init(&thattr_poster);
> > pthread_attr_init(&thattr_sampler);
> > pthread_attr_setdetachstate(&thattr_poster,PTHREAD_CREATE_DETACHED);
> > pthread_attr_setdetachstate(&thattr_sampler,PTHREAD_CREATE_DETACHED);
> > pthread_attr_setfp_np(&thattr_poster, 0);
> > pthread_attr_setfp_np(&thattr_sampler, 0);
> >
> > pthread_attr_setschedpolicy(&thattr_poster,SCHED_FIFO);
> > pthread_attr_setschedpolicy(&thattr_sampler,SCHED_FIFO);
> > param.sched_priority = 10;
> > pthread_attr_setschedparam(&thattr_poster,¶m);
> > pthread_attr_setschedparam(&thattr_sampler,¶m);
> >
> > pthread_create(&poster_thid,&thattr_poster,&poster_thread,NULL);
> > pthread_create(&sampler_thid,&thattr_sampler,&sampler_thread,NULL);
> >
> > return 0;
> > }
> >
> > void cleanup_module(void)
> > {
> > pthread_cancel(poster_thid);
> > pthread_cancel(sampler_thid);
> >
> > sem_destroy(&semA);
> > sem_destroy(&semB);
> > printk("simple kernel thread unloaded\n");
> > }
--
Philippe.