On Fri, Jan 05, 2018 at 09:24:31PM +0100, Willy Tarreau wrote:
> On Fri, Jan 05, 2018 at 07:58:04PM +0000, Alan Cox wrote:
> > and the techniques
> > to deal with rdtsc disabling are well known and used in other existing
> > attacks.
> 
> Yes i've tested one of them for the spectre poc, but it really did not
> work well, leading to about 1 among 10 bytes only to be valid. In fact
> either you run the counter thread on the other sibling of the same core
> and it significantly perturbates the local activity, or you run it on
> another core, and the time it takes to retrieve the time requires some
> L1+L2 traversal. I'm not saying it doesn't work at all, I'm saying that
> the accuracy is highly degraded and that can turn something 100%
> reproducible into something requiring a long time to run, making the
> attack more noticeable (and possibly letting observed data degrade
> during the period).

So I worked on an improved RDTSC emulation (attached) and it works
reasonably well on the spectre poc found online. Its accuracy is almost
as good as rdtsc on my i7-6700k on two threads running on the same core,
and 8-10 times worse on two distinct cores, but still leads to ~50%
success rate on the PoC. So my conclusion now is that it's indeed
pointless to invest time trying to make RDTSC less accessible/accurate.

Willy
/*
 * Evaluation of alternatives to rdtsc - 2018-01-06 - Willy Tarreau 
<w...@1wt.eu>
 *
 * Observation on core-i7 6700k @4.4 GHz :
 *  - 2 threads, same core:
 *      hard resolution (local)  ~= 28-30 cycles
 *      soft resolution (remote) ~= 29 cycles
 *
 *  - 2 distinct cores:
 *      hard resolution (local)  ~= 27-28 cycles
 *      soft resolution (remote) ~= 180-260 cycles
 */
#include <pthread.h>
#include <signal.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>

#define MAXLOG 10000000

static volatile uint64_t softtsc;
static uint64_t t0, t1, harddur, softdur, hardamp, softamp;
static uint64_t hardlog[MAXLOG];
static uint64_t softlog[MAXLOG];
static pthread_t thr;
static int hardcnt, softcnt;

static inline uint64_t rdtsc()
{
        uint32_t a, d;

        __asm__ volatile("rdtsc" : "=a" (a), "=d" (d));
        return a + ((uint64_t)d << 32);
}

void *run_softtsc(void *arg)
{
        register __attribute__((unused)) uint64_t c = 0;

        /* best resolution so far : 29 cycles */
        __asm__ volatile(".align 16\n"
                         "1:\n"
                         "mov %0, %1\n"
                         "inc %0\n"
                         "sfence\n"
                         "jmp 1b\n"
                         : "+r"(c) : "m"(softtsc));

        /* 36-37 cycles */
        while (1) {
                //__asm__ volatile("incq %0\nsfence" :: "m"(softtsc));
                //__asm__ volatile("addq $1,%0\nsfence" :: "m"(softtsc));
                //softtsc = c++;
                //__asm__ volatile("sfence"/* ::: "memory"*/);
                softtsc++;
        }
}

/* display the message and exit with the code */
__attribute__((noreturn)) void die(int code, const char *format, ...)
{
        va_list args;

        va_start(args, format);
        vfprintf(stderr, format, args);
        va_end(args);
        exit(code);
}

int main(int argc, char **argv)
{
        int err;
        int log;

        err = pthread_create(&thr, NULL, &run_softtsc, NULL);
        if (err != 0)
                die(1, "cannot create thread");

        pthread_detach(thr);

        t0 = rdtsc();
        for (log = 0; log < MAXLOG; log++)
                hardlog[log] = rdtsc();
        t1 = rdtsc();
        harddur = t1 - t0;
        hardamp = hardlog[MAXLOG-1] - hardlog[0];

        hardcnt = t0 = 0;
        for (log = 0; log < MAXLOG; log++) {
                if (t0 != hardlog[log]) {
                        hardcnt++;
                        t0 = hardlog[log];
                }
        }


        t0 = rdtsc();
        for (log = 0; log < MAXLOG; log++)
                softlog[log] = softtsc;
        t1 = rdtsc();
        softdur = t1 - t0;
        softamp = softlog[MAXLOG-1] - softlog[0];

        //pthread_kill(thr, SIGKILL);
        //pthread_join(thr, NULL);

        softcnt = t0 = 0;
        for (log = 0; log < MAXLOG; log++) {
                if (t0 != softlog[log]) {
                        softcnt++;
                        t0 = softlog[log];
                }
        }

        printf("hard: duration=%Lu cycles amplitude=%Lu values=%Lu 
resolution=%Lu\n",
               (unsigned long long)harddur, (unsigned long long)hardamp,
               (unsigned long long)hardcnt, (unsigned long 
long)harddur/hardcnt);

        printf("soft: duration=%Lu cycles amplitude=%Lu values=%Lu 
resolution=%Lu\n",
               (unsigned long long)softdur, (unsigned long long)softamp,
               (unsigned long long)softcnt, (unsigned long 
long)softdur/softcnt);

        //for (log = 0; log < MAXLOG; log++)
        //      printf("%d %Lu %Lu\n", log, (unsigned long long)hardlog[log], 
(unsigned long long)softlog[log]);
        exit (0);
}

Reply via email to