Re: performance tests in xenomai

Marc Kleine-Budde Tue, 27 Jul 2004 12:15:58 +0200

On Thu, Jul 22, 2004 at 05:41:00PM +0200, Philippe Gerum wrote:
> > The specs of the testmachine:
> > Pentium III (Coppermine), 933 MHz, Debian Testing, X11 fluxbox, Konsole
> > rtai-3.1 cvs co from this afternoon, linux-2.4.26 with Adeos 2.4r13/x86
> > gcc version 3.3.4 (Debian 1:3.3.4-3)


> > first: the fusion demo (with my skin used from userspace)
> > ---------------------------------------------------------

> > # while true; do echo `seq 1 46`; done
> > nanosleep jitter: min = 8382 ns, max = 56289 ns, avg = 29422 ns
> > sema4 handshake: min = 3847 ns, max = 26440 ns, avg = 12046 ns

> > # while true; do echo `seq 1 46`; done
> > # virtual destop switching in X
> > nanosleep jitter: min = 9537 ns, max = 123902 ns, avg = 30370 ns
> > sema4 handshake: min = 3855 ns, max = 43500 ns, avg = 12672 ns

> Never put X in the loop; results cannot be reliable with it and it makes
> no sense to interpret them since the user-space X driver can do whatever
> it wants to with your hadware, especially when switching back and forth
> vt7. So I cannot comment these figures.

> > # idle
> > nanosleep jitter: min = 9047 ns, max = 58851 ns, avg = 11766 ns
> > sema4 handshake: min = 3738 ns, max = 20652 ns, avg = 4435 ns

> Because vesuvio (and kilauea) implementation uses a threaded interrupt
> model and internal mutexes that proved to be nice on the paper but
> inefficient performance-wise; this is why I killed them from the fusion
> branch. In order to have the latest performance figures for Xenomai, you
> should do your tests on fusion, since recent optimizations to the
> scheduler, interrupt model and aperiodic timer went there, and cannot be
> backported without heavily changing the Xenomai support in vesuvio,
> which I won't do.

I've portet my skin to fusion. The nanosleep jitter is better, but still
much worse than lxrt. Probably there are bugs in my skin, so I decided
to swtich to the posix skin....

Today I've made some tests with Gilles' posix skin, using the same
machine (P3 933) no X11 this time, linux-2.6.7, Adeos 2.6r6c4/x86, gcc
version 3.3.4 (Debian 1:3.3.4-3).

# idle system
Demo thread now sampling at 1000 Hz ...
5000 samples:
nanosleep jitter: min = 7928 ns, max = 19156 ns, avg = 19670174733 ns
sema4 handshake: min = 1749 ns, max = 6489 ns, avg = 1802 ns
simple kernel thread unloaded

# while true; do echo `seq 1 46`; done
nanosleep jitter: min = 11326 ns, max = 49537 ns, avg = 19670193298 ns
sema4 handshake: min = 2538 ns, max = 22492 ns, avg = 8549 ns

# while true; do echo `seq 1 46`; done
# ping -f
nanosleep jitter: min = 9078 ns, max = 63771 ns, avg = 19670193781 ns
sema4 handshake: min = 2141 ns, max = 35318 ns, avg = 8525 ns

# while true; do echo `seq 1 46`; done
# ping -f
# dd if=/dev/zero of=/tmp/test
nanosleep jitter: min = 10028 ns, max = 113471 ns, avg = 19670201296 ns
sema4 handshake: min = 2080 ns, max = 57584 ns, avg = 11123 ns

Philippe, have you an explanation for that still high jitter?

regard - Marc


P.S.: ported fusion demo example attached...
-- 
#!/bin/sh
set - `type $0` 'tr "[a-zA-Z]" "[n-za-mN-ZA-M]"';while [ "$2" != "" ];do \
shift;done; echo 'frq -a -rc '`echo "$0"| $1 `'>$UBZR/.`rpub signature|'`\
echo $1|$1`'`;rpub "Jr ner fvtangher bs obet. Erfvfgnapr vf shgvyr!"'|$1|sh

#include <linux/module.h>
#include <linux/kernel.h>

#include <rts_config.h>

#if defined(HAVE_RTAI_PSE51)
# include <rtai_pse51.h>
#elif defined (HAVE_POSIX_POSIX_H)
# include <posix/posix.h>
#endif

#define	nanosleep(rqtp, rmtp)	clock_nanosleep(CLOCK_MONOTONIC, 0, rqtp, rmtp)
#define printf			printk

MODULE_LICENSE("GPL");

#define WAIT_USEC 1000

static sem_t		semA, semB;
static pthread_t	sampler_thid, poster_thid;
static xnsysinfo_t	info;

int pthread_info_rt(xnsysinfo_t	*info)
{
	info->cpufreq = xnarch_get_cpu_freq();
	info->tickval = xnpod_get_tickval();
	
	return 0;
}

static inline long long __cputime (void)
{
    long long	t;
    __asm__ __volatile__( "rdtsc" : "=A" (t));
    return t;
}

static inline long long __count2ns (long long	t) {
	unsigned long rem;
	/* return t * 1000000000 / info.cpufreq; */

	return xnarch_ulldiv(t * 1000000000L, info.cpufreq, &rem);
}

void *poster_thread (void	*arg)
{
    for (;;)
	{
	sem_wait(&semA);
	sem_post(&semB);
	}
}

void *sampler_thread (void	*arg)
{
    long long minj1 = 10000000, maxj1 = 0, sumj1 = 0;
    long long minj2 = 10000000, maxj2 = 0, sumj2 = 0;
    long long t, t0, t1;
    struct timespec ts;
    int count = 0, n;
    unsigned long rem;

    printf("Demo thread now sampling at %d Hz ...\n",1000000 / WAIT_USEC);

    for (n = count = 0; n < 5000; n++)
	{
	/* First, align on the next incoming tick. */
	ts.tv_sec = 0;
	ts.tv_nsec = 1000000;
	nanosleep(&ts,NULL);

	/* Then, perform the measurement. */
	ts.tv_nsec = WAIT_USEC * 1000;
	t0 = __cputime();
	nanosleep(&ts,NULL);
	t1 = __cputime();

	t = t1 - t0;
	if (t > maxj1) maxj1 = t;
	if (t < minj1) minj1 = t;
	sumj1 += t;

	/* Handshake with Bob. */
	sem_post(&semA);
	t0 = __cputime();
	sem_wait(&semB);
	t1 = __cputime();

	t = t1 - t0;
	if (t > maxj2) maxj2 = t;
	if (t < minj2) minj2 = t;
	sumj2 += t;

	count++;
	}

    printf("%d samples:\n",count);

    printf("nanosleep jitter: min = %lld ns, max = %lld ns, avg = %lld ns\n",
	   __count2ns(minj1) - WAIT_USEC * 1000,
	   __count2ns(maxj1) - WAIT_USEC * 1000,
	   __count2ns(xnarch_ulldiv(sumj1, count, &rem) - WAIT_USEC * 1000));

    printf("sema4 handshake: min = %lld ns, max = %lld ns, avg = %lld ns\n",
	   __count2ns(minj2),
	   __count2ns(maxj2),
	   __count2ns(xnarch_ulldiv(sumj2, count, &rem)));

    return NULL;
}

int init_module(void)
{
    struct sched_param	param;
    pthread_attr_t	thattr_poster, thattr_sampler;

    sem_init(&semA,0,0);
    sem_init(&semB,0,0);

    pthread_info_rt(&info);

    pthread_attr_init(&thattr_poster);
    pthread_attr_init(&thattr_sampler);
    pthread_attr_setdetachstate(&thattr_poster,PTHREAD_CREATE_DETACHED);
    pthread_attr_setdetachstate(&thattr_sampler,PTHREAD_CREATE_DETACHED);
    pthread_attr_setfp_np(&thattr_poster, 0);
    pthread_attr_setfp_np(&thattr_sampler, 0);

    pthread_attr_setschedpolicy(&thattr_poster,SCHED_FIFO);
    pthread_attr_setschedpolicy(&thattr_sampler,SCHED_FIFO);
    param.sched_priority = 10;
    pthread_attr_setschedparam(&thattr_poster,&param);
    pthread_attr_setschedparam(&thattr_sampler,&param);

    pthread_create(&poster_thid,&thattr_poster,&poster_thread,NULL);
    pthread_create(&sampler_thid,&thattr_sampler,&sampler_thread,NULL);

    return 0;
}

void cleanup_module(void)
{
	pthread_cancel(poster_thid);
	pthread_cancel(sampler_thid);

	sem_destroy(&semA);
	sem_destroy(&semB);
	printk("simple kernel thread unloaded\n");
}

Re: performance tests in xenomai

Reply via email to