| Issue |
176335
|
| Summary |
[NVPTX] Wrong code at -Os on nvptx64-unknown-nvidiacl
|
| Labels |
miscompilation,
backend:NVPTX,
generated by fuzzer
|
| Assignees |
|
| Reporter |
XChy
|
### Testcase
Reduced OpenCL testcase:
```opencl
struct s0
{
int f7
};
struct s1
{
int g_200;
uint g_232[2];
ushort g_244;
uchar g_323;
short g_372;
short *g_464;
struct s0 g_488;
volatile struct s0 *g_487;
int g_709[7];
ulong *g_829;
long g_951;
long g_1115[9][4][1];
ulong **g_1144;
ulong g_1143;
uint g_1155;
int g_1238
};
char func_6(uint p_8, char p_9, char p_10, struct s1 *p_1669)
{
int __trans_tmp_3, __trans_tmp_2;
uchar *l_1304 = &p_1669->g_323;
ushort *l_1316 = &p_1669->g_244;
long *l_1327 = &p_1669->g_951;
char si1 = p_1669->g_709[6], si2 = p_1669->g_200;
__trans_tmp_2 = si2 == 0 || si1 && si2 == 1 ?: si1 % si2;
si2 = p_8;
(__trans_tmp_2 && __trans_tmp_2 / si2 && si2 <= 8 / __trans_tmp_2
|| si2 && __trans_tmp_2 - si2 || si2 <= 0)
&& __trans_tmp_2 && 7 / __trans_tmp_2
?: si2 || (*l_1316 = p_9 != p_1669);
if (p_8)
for (; p_1669->g_372;)
for (; p_9; p_9++)
*p_1669->g_464 = (*p_1669->g_487, *p_1669->g_829);
__trans_tmp_3 = p_8 & -p_8 ?: p_8;
if ((uchar)(4 ^ __trans_tmp_3))
{
ushort ui1 = p_10;
**p_1669->g_1144 =
(*l_1316 = *l_1327 |= p_9 == 0 ?: ui1 / p_9) < p_1669->g_1143
&& p_1669->g_1155;
}
*l_1304 &= p_8 == 0 ?: p_10 / p_8;
uint bs_var_9[5] = {};
for (uchar bs_inc_0 = 0; bs_inc_0 < 7; bs_inc_0 += 1)
bs_var_9[bs_inc_0 / 2] -= 6;
if (p_1669->g_1238) return p_1669->g_232[1];
return 0;
}
__kernel void entry(__global ulong *result, __global ulong *bs_result)
{
struct s1 c_1670;
struct s1 *p_1669 = &c_1670;
long p_1669_12;
short p_1669_13;
struct s1 c_1671 = { 5,
{},
6,
1,
5,
&p_1669_13,
{},
&p_1669->g_488,
{},
&p_1669_12,
5,
{},
&p_1669->g_829 };
c_1670 = c_1671;
func_6(p_1669->g_1115[2][0][0], 2, 199, p_1669);
bs_result[0] = result[0] = p_1669->g_951;
}
```
### Reproduce
GPU: NVIDIA GeForce RTX 5070 Ti
Reproduce method: Run the kernel entry on a single thread, and print result[0] and bs_result[0] at the host.
Host launcher cl_launcher.c: https://github.com/user-attachments/files/24588166/cl_launcher.c
Result at -O1:
```
0x7fe7,
0x7fe7,
```
Result at -Os:
```
0xffffffffffffffe5,
0xffffffffffffffe5,
```
Full reproducible command:
```bash
# Compile the device code
clang -O1 -xcl --target=nvptx64-unknown-nvidiacl -march=sm_120 -Xclang -mlink-builtin-bitcode -Xclang $(clang -print-resource-dir)/lib/libclc/nvptx64--nvidiacl.bc -S test.cl -o correct.out
clang -Os -xcl --target=nvptx64-unknown-nvidiacl -march=sm_120 -Xclang -mlink-builtin-bitcode -Xclang $(clang -print-resource-dir)/lib/libclc/nvptx64--nvidiacl.bc -S test.cl -o wrong.out
# Compile the host launcher
gcc -lOpenCL cl_launcher.c -o cl_launcher
# Run the kernel entry on a single thread
./cl_launcher -f correct.out -p <platform id> -d <device id> -l 1 -g 1 ---backsmith ---binary
./cl_launcher -f wrong.out -p <platform id> -d <device id> -l 1 -g 1 ---backsmith ---binary
```
### UB check
We treat the device code as C code and run it on X86-64 with sanitizer: https://godbolt.org/z/d9341nxME
It passed the UBSan, MemorySan, and ASan.
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs