Issue 90985
Summary [X86] Worse runtime performance on Zen CPU when optimizing for Zen
Labels new issue
Assignees
Reporter Systemcluster
    The following code compiled with `-O3 -march=znver4` (or any other `znver`) runs around 25% slower on Zen hardware than when compiled with `-O3 -march=x86-64-v4` or the baseline `x86-64`.

```c
bool check_prime(int64_t n) {
    if (n < 2) {
        return true;
    }
    int64_t lim = (int64_t)ceil((double)n / 2.0);
    for (int64_t i = 2; i < lim; i++) {
        if (n % i == 0) {
            return false;
        }
 }
    return true;
}
```

<details>
<summary>Full code</summary>

```c
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <math.h>
#include <time.h>

bool check_prime(int64_t n) {
    if (n < 2) {
 return true;
    }
    int64_t lim = (int64_t)ceil((double)n / 2.0);
    for (int64_t i = 2; i < lim; i++) {
        if (n % i == 0) {
            return false;
        }
    }
    return true;
}

int main() {
    clock_t now = clock();
    int sum = 0;
    for (int i = 0; i < 1000000; i++) {
        if (check_prime(i)) {
            sum += 1;
        }
    }
    printf("%f, %d\n", (double)(clock() - now) / CLOCKS_PER_SEC, sum);
    return 0;
}
```

</details>

Running on a Ryzen 7950X:

```cmd
> clang.exe -std=c11 -O3 -march=znver4 ./src/perf.c && ./a.exe
24.225000 seconds, 78501

> clang.exe -std=c11 -O3 -march=x86-64-v4 ./src/perf.c && ./a.exe
20.866000 seconds, 78501

> clang.exe -std=c11 -O3 ./src/perf.c && ./a.exe                  
20.819000 seconds, 78501
```

```cmd
> clang.exe --version
clang version 18.1.4
Target: x86_64-pc-windows-msvc
Thread model: posix
InstalledDir: C:\Program Files\LLVM\bin
```

Disassembly here: https://godbolt.org/z/orssnKP74

I originally noticed the issue with Rust: https://godbolt.org/z/Kh1v3G74K
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to