Hi, Recently I ran a program which has spin lock(I did it myself using atomic operations) under ARM architecture on gem5, but I found that the result was not as expected! I ran this program on a physical machine with arm64 and the result is right. Also, I compiled the same source code with x86 ISA, and the result is also right on GEM5. So I guess maybe there are some wrong with GEM5 source code or maybe I'm compiling it the wrong way?(I tried both full system mode and system call mode, but I did not get the right results either)
Here is my running log: gem5 executing on ubuntu, pid 23488 command line: build/ARM_HTM/gem5.debug configs/example/se.py --cpu-type=O3_ARM_v7a_3 --num-cpus=4 --ruby --cmd=benchmark/arm-lock Global frequency set at 1000000000000 ticks per second **** REAL SIMULATION **** Parallel histogram with 4 procs Hello from thread 0 Hello from thread 1 Hello from thread 3 Hello from thread 2 Goodbye from thread 0 Goodbye from thread 1 Goodbye from thread 3 Goodbye from thread 2 2 seconds Total is 2943 Expected total is 4000 Exiting @ tick 187329500 because exiting with last active thread context Here is my program source code: #include <stdio.h> #include <stdlib.h> #include <assert.h> #include <pthread.h> #include <unistd.h> #include <stdatomic.h> #include <time.h> #define ARRAYSIZE 2 #define ITERATIONS 1000 // spin lock typedef atomic_int lock_t; void lock_init(lock_t *lock) { atomic_init(lock, 0); } void lock_acquire(lock_t *lock) { while (atomic_exchange_explicit(lock, 1, memory_order_acquire)) ; // spin until acquired } int lock_is_acquired(lock_t *lock) { return atomic_load_explicit(lock, memory_order_acquire); } void lock_release(lock_t *lock) { atomic_store_explicit(lock, 0, memory_order_release); } volatile long int histogram[ARRAYSIZE]; lock_t global_lock; void* work(void* void_ptr) { // Use thread id for RNG seed, // this will prevent threads generating the same array indices. long int idx = (long int)void_ptr; unsigned int seedp = (unsigned int)idx; int i, rc; printf("Hello from thread %ld\n", idx); for (i=0; i<ITERATIONS; i++) { int num = rand_r(&seedp)%ARRAYSIZE; lock_acquire(&global_lock); // start critical section long int temp = histogram[num]; temp += 1; histogram[num] = temp; // end critical section lock_release(&global_lock); } printf("Goodbye from thread %ld\n", idx); } int main() { long int i, total, numberOfProcessors; pthread_t *threads; int rc; clock_t start, finish, duration; numberOfProcessors = sysconf(_SC_NPROCESSORS_ONLN); printf("Parallel histogram with %ld procs\n", numberOfProcessors); lock_init(&global_lock); // initialise the array for (i=0; i<ARRAYSIZE; i++) histogram[i] = 0; threads = (pthread_t*) malloc(sizeof(pthread_t)*numberOfProcessors); for (i=0; i<numberOfProcessors-1; i++) { rc = pthread_create(&threads[i], NULL, work, (void*)i); assert(rc==0); } work((void*)(numberOfProcessors-1)); start = clock(); // wait for worker threads for (i=0; i<numberOfProcessors-1; i++) { rc = pthread_join(threads[i], NULL); assert(rc==0); } finish = clock(); duration = (double)(finish - start); printf("%ld seconds\n", duration); // verify array contents total = 0; for (i=0; i<ARRAYSIZE; i++) total += histogram[i]; // free resources free(threads); printf("Total is %lu\nExpected total is %lu\n", total, ITERATIONS*numberOfProcessors); return 0; } I compiled it with the following command: aarch64-linux-gnu-gcc-10 -std=c11 -static -pthread -o arm-lock arm-lock.c Could you please help me with this problem? Thanks in advance. Kind Regards, Chao _______________________________________________ gem5-users mailing list -- gem5-users@gem5.org To unsubscribe send an email to gem5-users-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s