On Fri, Mar 15, 2024 at 09:24:36PM +1000, Gavin Shan wrote: > > On 3/15/24 21:05, Michael S. Tsirkin wrote: > > On Fri, Mar 15, 2024 at 08:45:10PM +1000, Gavin Shan wrote: > > > > > Yes, I guess smp_wmb() ('dmb') is buggy on NVidia's grace-hopper > > > > > platform. I tried > > > to reproduce it with my own driver where one thread writes to the shared > > > buffer > > > and another thread reads from the buffer. I don't hit the out-of-order > > > issue so > > > far. > > > > Make sure the 2 areas you are accessing are in different cache lines. > > > > Yes, I already put those 2 areas to separate cache lines. > > > > > > My driver may be not correct somewhere and I will update if I can > > > reproduce > > > the issue with my driver in the future. > > > > Then maybe your change is just making virtio slower and masks the bug > > that is actually elsewhere? > > > > You don't really need a driver. Here's a simple test: without barriers > > assertion will fail. With barriers it will not. > > (Warning: didn't bother testing too much, could be buggy. > > > > --- > > > > #include <pthread.h> > > #include <stdio.h> > > #include <stdlib.h> > > #include <assert.h> > > > > #define FIRST values[0] > > #define SECOND values[64] > > > > volatile int values[100] = {}; > > > > void* writer_thread(void* arg) { > > while (1) { > > FIRST++; > > // NEED smp_wmb here > __asm__ volatile("dmb ishst" : : : "memory"); > > SECOND++; > > } > > } > > > > void* reader_thread(void* arg) { > > while (1) { > > int first = FIRST; > > // NEED smp_rmb here > __asm__ volatile("dmb ishld" : : : "memory"); > > int second = SECOND; > > assert(first - second == 1 || first - second == 0); > > } > > } > > > > int main() { > > pthread_t writer, reader; > > > > pthread_create(&writer, NULL, writer_thread, NULL); > > pthread_create(&reader, NULL, reader_thread, NULL); > > > > pthread_join(writer, NULL); > > pthread_join(reader, NULL); > > > > return 0; > > } > > > > Had a quick test on NVidia's grace-hopper and Ampere's CPUs. I hit > the assert on both of them. After replacing 'dmb' with 'dsb', I can > hit assert on both of them too. I need to look at the code closely. > > [root@virt-mtcollins-02 test]# ./a > a: a.c:26: reader_thread: Assertion `first - second == 1 || first - second == > 0' failed. > Aborted (core dumped) > > [root@nvidia-grace-hopper-05 test]# ./a > a: a.c:26: reader_thread: Assertion `first - second == 1 || first - second == > 0' failed. > Aborted (core dumped) > > Thanks, > Gavin
Actually this test is broken. No need for ordering it's a simple race. The following works on x86 though (x86 does not need barriers though). #include <pthread.h> #include <stdio.h> #include <stdlib.h> #include <assert.h> #if 0 #define x86_rmb() asm volatile("lfence":::"memory") #define x86_mb() asm volatile("mfence":::"memory") #define x86_smb() asm volatile("sfence":::"memory") #else #define x86_rmb() asm volatile("":::"memory") #define x86_mb() asm volatile("":::"memory") #define x86_smb() asm volatile("":::"memory") #endif #define FIRST values[0] #define SECOND values[640] #define FLAG values[1280] volatile unsigned values[2000] = {}; void* writer_thread(void* arg) { while (1) { /* Now synchronize with reader */ while(FLAG); FIRST++; x86_smb(); SECOND++; x86_smb(); FLAG = 1; } } void* reader_thread(void* arg) { while (1) { /* Now synchronize with writer */ while(!FLAG); x86_rmb(); unsigned first = FIRST; x86_rmb(); unsigned second = SECOND; assert(first - second == 1 || first - second == 0); FLAG = 0; if (!(first %1000000)) printf("%d\n", first); } } int main() { pthread_t writer, reader; pthread_create(&writer, NULL, writer_thread, NULL); pthread_create(&reader, NULL, reader_thread, NULL); pthread_join(writer, NULL); pthread_join(reader, NULL); return 0; }