Hi,
 Recently I ran a program which has spin lock(I did it myself using atomic 
operations) under ARM architecture on gem5, but I found that the result was not 
as expected!  I ran this program on a physical machine with arm64 and the 
result is right. Also, I compiled the same source code with x86 ISA, and the 
result is also right on GEM5. So I guess maybe there are some wrong with GEM5 
source code or maybe I'm compiling it the wrong way?(I tried both full system 
mode and system call mode, but I did not get the right results either)

Here is my running log:

gem5 executing on ubuntu, pid 23488
command line: build/ARM_HTM/gem5.debug configs/example/se.py 
--cpu-type=O3_ARM_v7a_3 --num-cpus=4 --ruby --cmd=benchmark/arm-lock

Global frequency set at 1000000000000 ticks per second
**** REAL SIMULATION ****
Parallel histogram with 4 procs
Hello from thread 0
Hello from thread 1
Hello from thread 3
Hello from thread 2
Goodbye from thread 0
Goodbye from thread 1
Goodbye from thread 3
Goodbye from thread 2
2 seconds
Total is 2943
Expected total is 4000
Exiting @ tick 187329500 because exiting with last active thread context

Here is my program source code:

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <pthread.h>
#include <unistd.h>
#include <stdatomic.h>
#include <time.h>

#define ARRAYSIZE             2
#define ITERATIONS            1000

// spin lock
typedef atomic_int lock_t;

void lock_init(lock_t *lock) {
    atomic_init(lock, 0);
}

void lock_acquire(lock_t *lock) {
    while (atomic_exchange_explicit(lock, 1, memory_order_acquire))
        ; // spin until acquired
}

int lock_is_acquired(lock_t *lock) {
    return atomic_load_explicit(lock, memory_order_acquire);
}

void lock_release(lock_t *lock) {
    atomic_store_explicit(lock, 0, memory_order_release);
}

volatile long int histogram[ARRAYSIZE];
lock_t global_lock;

void* work(void* void_ptr) {
    // Use thread id for RNG seed,
    // this will prevent threads generating the same array indices.
    long int idx = (long int)void_ptr;
    unsigned int seedp = (unsigned int)idx;
    int i, rc;

    printf("Hello from thread %ld\n", idx);

    for (i=0; i<ITERATIONS; i++)
    {
        int num = rand_r(&seedp)%ARRAYSIZE;

        lock_acquire(&global_lock);
        
        // start critical section
        long int temp = histogram[num];
        temp += 1;
        histogram[num] = temp;
        // end critical section

        lock_release(&global_lock);
    }

    printf("Goodbye from thread %ld\n", idx);
}

int main() {
    long int i, total, numberOfProcessors;
    pthread_t *threads;
    int rc;
    clock_t start, finish, duration;

    numberOfProcessors = sysconf(_SC_NPROCESSORS_ONLN);

    printf("Parallel histogram with %ld procs\n", numberOfProcessors);

    lock_init(&global_lock);
    

    // initialise the array
    for (i=0; i<ARRAYSIZE; i++)
        histogram[i] = 0;

    threads = (pthread_t*) malloc(sizeof(pthread_t)*numberOfProcessors);
    for (i=0; i<numberOfProcessors-1; i++) {
        rc = pthread_create(&threads[i], NULL, work, (void*)i);
        assert(rc==0);
    }
    work((void*)(numberOfProcessors-1));

    start = clock();
    // wait for worker threads
    for (i=0; i<numberOfProcessors-1; i++) {
        rc = pthread_join(threads[i], NULL);
        assert(rc==0);
    }
    
    finish = clock();
    
    duration = (double)(finish - start);
    printf("%ld seconds\n", duration);
    // verify array contents
    total = 0;
    for (i=0; i<ARRAYSIZE; i++)
        total += histogram[i];

    // free resources
    free(threads);

    printf("Total is %lu\nExpected total is %lu\n",
        total, ITERATIONS*numberOfProcessors);

    return 0;
}


I compiled it with the following command: aarch64-linux-gnu-gcc-10 -std=c11 
-static -pthread -o arm-lock arm-lock.c

Could you please help me with this problem? Thanks in advance.

Kind Regards,

Chao
_______________________________________________
gem5-users mailing list -- gem5-users@gem5.org
To unsubscribe send an email to gem5-users-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

Reply via email to