Issue |
90985
|
Summary |
[X86] Worse runtime performance on Zen CPU when optimizing for Zen
|
Labels |
new issue
|
Assignees |
|
Reporter |
Systemcluster
|
The following code compiled with `-O3 -march=znver4` (or any other `znver`) runs around 25% slower on Zen hardware than when compiled with `-O3 -march=x86-64-v4` or the baseline `x86-64`.
```c
bool check_prime(int64_t n) {
if (n < 2) {
return true;
}
int64_t lim = (int64_t)ceil((double)n / 2.0);
for (int64_t i = 2; i < lim; i++) {
if (n % i == 0) {
return false;
}
}
return true;
}
```
<details>
<summary>Full code</summary>
```c
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <math.h>
#include <time.h>
bool check_prime(int64_t n) {
if (n < 2) {
return true;
}
int64_t lim = (int64_t)ceil((double)n / 2.0);
for (int64_t i = 2; i < lim; i++) {
if (n % i == 0) {
return false;
}
}
return true;
}
int main() {
clock_t now = clock();
int sum = 0;
for (int i = 0; i < 1000000; i++) {
if (check_prime(i)) {
sum += 1;
}
}
printf("%f, %d\n", (double)(clock() - now) / CLOCKS_PER_SEC, sum);
return 0;
}
```
</details>
Running on a Ryzen 7950X:
```cmd
> clang.exe -std=c11 -O3 -march=znver4 ./src/perf.c && ./a.exe
24.225000 seconds, 78501
> clang.exe -std=c11 -O3 -march=x86-64-v4 ./src/perf.c && ./a.exe
20.866000 seconds, 78501
> clang.exe -std=c11 -O3 ./src/perf.c && ./a.exe
20.819000 seconds, 78501
```
```cmd
> clang.exe --version
clang version 18.1.4
Target: x86_64-pc-windows-msvc
Thread model: posix
InstalledDir: C:\Program Files\LLVM\bin
```
Disassembly here: https://godbolt.org/z/orssnKP74
I originally noticed the issue with Rust: https://godbolt.org/z/Kh1v3G74K
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs