Re: performance tests in xenomai

Paolo Mantegazza Tue, 27 Jul 2004 12:44:34 +0200

Marc Kleine-Budde wrote:

I'm curious to know if yue are working with Linux preemptionenabled/disabled?


Paolo.

On Thu, Jul 22, 2004 at 05:41:00PM +0200, Philippe Gerum wrote:

The specs of the testmachine:
Pentium III (Coppermine), 933 MHz, Debian Testing, X11 fluxbox, Konsole
rtai-3.1 cvs co from this afternoon, linux-2.4.26 with Adeos 2.4r13/x86
gcc version 3.3.4 (Debian 1:3.3.4-3)

first: the fusion demo (with my skin used from userspace)
---------------------------------------------------------

# while true; do echo `seq 1 46`; done
nanosleep jitter: min = 8382 ns, max = 56289 ns, avg = 29422 ns
sema4 handshake: min = 3847 ns, max = 26440 ns, avg = 12046 ns

# while true; do echo `seq 1 46`; done
# virtual destop switching in X
nanosleep jitter: min = 9537 ns, max = 123902 ns, avg = 30370 ns
sema4 handshake: min = 3855 ns, max = 43500 ns, avg = 12672 ns

Never put X in the loop; results cannot be reliable with it and it makes
no sense to interpret them since the user-space X driver can do whatever
it wants to with your hadware, especially when switching back and forth
vt7. So I cannot comment these figures.

# idle
nanosleep jitter: min = 9047 ns, max = 58851 ns, avg = 11766 ns
sema4 handshake: min = 3738 ns, max = 20652 ns, avg = 4435 ns

Because vesuvio (and kilauea) implementation uses a threaded interrupt
model and internal mutexes that proved to be nice on the paper but
inefficient performance-wise; this is why I killed them from the fusion
branch. In order to have the latest performance figures for Xenomai, you
should do your tests on fusion, since recent optimizations to the
scheduler, interrupt model and aperiodic timer went there, and cannot be
backported without heavily changing the Xenomai support in vesuvio,
which I won't do.



I've portet my skin to fusion. The nanosleep jitter is better, but still
much worse than lxrt. Probably there are bugs in my skin, so I decided
to swtich to the posix skin....

Today I've made some tests with Gilles' posix skin, using the same
machine (P3 933) no X11 this time, linux-2.6.7, Adeos 2.6r6c4/x86, gcc
version 3.3.4 (Debian 1:3.3.4-3).

# idle system
Demo thread now sampling at 1000 Hz ...
5000 samples:
nanosleep jitter: min = 7928 ns, max = 19156 ns, avg = 19670174733 ns
sema4 handshake: min = 1749 ns, max = 6489 ns, avg = 1802 ns
simple kernel thread unloaded

# while true; do echo `seq 1 46`; done
nanosleep jitter: min = 11326 ns, max = 49537 ns, avg = 19670193298 ns
sema4 handshake: min = 2538 ns, max = 22492 ns, avg = 8549 ns

# while true; do echo `seq 1 46`; done
# ping -f
nanosleep jitter: min = 9078 ns, max = 63771 ns, avg = 19670193781 ns
sema4 handshake: min = 2141 ns, max = 35318 ns, avg = 8525 ns

# while true; do echo `seq 1 46`; done
# ping -f
# dd if=/dev/zero of=/tmp/test
nanosleep jitter: min = 10028 ns, max = 113471 ns, avg = 19670201296 ns
sema4 handshake: min = 2080 ns, max = 57584 ns, avg = 11123 ns

Philippe, have you an explanation for that still high jitter?

regard - Marc


P.S.: ported fusion demo example attached...


------------------------------------------------------------------------

#include <linux/module.h>
#include <linux/kernel.h>

#include <rts_config.h>

#if defined(HAVE_RTAI_PSE51)
# include <rtai_pse51.h>
#elif defined (HAVE_POSIX_POSIX_H)
# include <posix/posix.h>
#endif

#define nanosleep(rqtp, rmtp)   clock_nanosleep(CLOCK_MONOTONIC, 0, rqtp, rmtp)
#define printf                  printk

MODULE_LICENSE("GPL");

#define WAIT_USEC 1000

static sem_t            semA, semB;
static pthread_t        sampler_thid, poster_thid;
static xnsysinfo_t      info;

int pthread_info_rt(xnsysinfo_t *info)
{
        info->cpufreq = xnarch_get_cpu_freq();
        info->tickval = xnpod_get_tickval();
        
        return 0;
}

static inline long long __cputime (void)
{
    long long   t;
    __asm__ __volatile__( "rdtsc" : "=A" (t));
    return t;
}

static inline long long __count2ns (long long   t) {
        unsigned long rem;
        /* return t * 1000000000 / info.cpufreq; */

        return xnarch_ulldiv(t * 1000000000L, info.cpufreq, &rem);
}

void *poster_thread (void       *arg)
{
    for (;;)
        {
        sem_wait(&semA);
        sem_post(&semB);
        }
}

void *sampler_thread (void      *arg)
{
    long long minj1 = 10000000, maxj1 = 0, sumj1 = 0;
    long long minj2 = 10000000, maxj2 = 0, sumj2 = 0;
    long long t, t0, t1;
    struct timespec ts;
    int count = 0, n;
    unsigned long rem;

    printf("Demo thread now sampling at %d Hz ...\n",1000000 / WAIT_USEC);

    for (n = count = 0; n < 5000; n++)
        {
        /* First, align on the next incoming tick. */
        ts.tv_sec = 0;
        ts.tv_nsec = 1000000;
        nanosleep(&ts,NULL);

        /* Then, perform the measurement. */
        ts.tv_nsec = WAIT_USEC * 1000;
        t0 = __cputime();
        nanosleep(&ts,NULL);
        t1 = __cputime();

        t = t1 - t0;
        if (t > maxj1) maxj1 = t;
        if (t < minj1) minj1 = t;
        sumj1 += t;

        /* Handshake with Bob. */
        sem_post(&semA);
        t0 = __cputime();
        sem_wait(&semB);
        t1 = __cputime();

        t = t1 - t0;
        if (t > maxj2) maxj2 = t;
        if (t < minj2) minj2 = t;
        sumj2 += t;

        count++;
        }

    printf("%d samples:\n",count);

    printf("nanosleep jitter: min = %lld ns, max = %lld ns, avg = %lld ns\n",
           __count2ns(minj1) - WAIT_USEC * 1000,
           __count2ns(maxj1) - WAIT_USEC * 1000,
           __count2ns(xnarch_ulldiv(sumj1, count, &rem) - WAIT_USEC * 1000));

    printf("sema4 handshake: min = %lld ns, max = %lld ns, avg = %lld ns\n",
           __count2ns(minj2),
           __count2ns(maxj2),
           __count2ns(xnarch_ulldiv(sumj2, count, &rem)));

    return NULL;
}

int init_module(void)
{
    struct sched_param  param;
    pthread_attr_t      thattr_poster, thattr_sampler;

    sem_init(&semA,0,0);
    sem_init(&semB,0,0);

    pthread_info_rt(&info);

    pthread_attr_init(&thattr_poster);
    pthread_attr_init(&thattr_sampler);
    pthread_attr_setdetachstate(&thattr_poster,PTHREAD_CREATE_DETACHED);
    pthread_attr_setdetachstate(&thattr_sampler,PTHREAD_CREATE_DETACHED);
    pthread_attr_setfp_np(&thattr_poster, 0);
    pthread_attr_setfp_np(&thattr_sampler, 0);

    pthread_attr_setschedpolicy(&thattr_poster,SCHED_FIFO);
    pthread_attr_setschedpolicy(&thattr_sampler,SCHED_FIFO);
    param.sched_priority = 10;
    pthread_attr_setschedparam(&thattr_poster,&param);
    pthread_attr_setschedparam(&thattr_sampler,&param);

    pthread_create(&poster_thid,&thattr_poster,&poster_thread,NULL);
    pthread_create(&sampler_thid,&thattr_sampler,&sampler_thread,NULL);

    return 0;
}

void cleanup_module(void)
{
        pthread_cancel(poster_thid);
        pthread_cancel(sampler_thid);

        sem_destroy(&semA);
        sem_destroy(&semB);
        printk("simple kernel thread unloaded\n");
}

Re: performance tests in xenomai

Reply via email to