Author: Artem Belevich Date: 2025-06-18T10:08:27-07:00 New Revision: 298f1c276f4f9c18b25a79ffe6e619e89c5fbf7e
URL: https://github.com/llvm/llvm-project/commit/298f1c276f4f9c18b25a79ffe6e619e89c5fbf7e DIFF: https://github.com/llvm/llvm-project/commit/298f1c276f4f9c18b25a79ffe6e619e89c5fbf7e.diff LOG: Revert "Add missing intrinsics to cuda headers" (#144755) Reverts llvm/llvm-project#143664 as it breaks CUDA compilation. Added: Modified: clang/lib/Headers/__clang_cuda_intrinsics.h Removed: ################################################################################ diff --git a/clang/lib/Headers/__clang_cuda_intrinsics.h b/clang/lib/Headers/__clang_cuda_intrinsics.h index 5e13f3f78df70..8b230af6f6647 100644 --- a/clang/lib/Headers/__clang_cuda_intrinsics.h +++ b/clang/lib/Headers/__clang_cuda_intrinsics.h @@ -479,290 +479,6 @@ inline __device__ unsigned __funnelshift_rc(unsigned low32, unsigned high32, return ret; } -#pragma push_macro("__INTRINSIC_LOAD") -#define __INTRINSIC_LOAD(__FnName, __AsmOp, __DeclType, __TmpType, __AsmType, \ - __Clobber) \ - inline __device__ __DeclType __FnName(const __DeclType *__ptr) { \ - __TmpType __ret; \ - asm(__AsmOp " %0, [%1];" : __AsmType(__ret) : "l"(__ptr)__Clobber); \ - return (__DeclType)__ret; \ - } - -#pragma push_macro("__INTRINSIC_LOAD2") -#define __INTRINSIC_LOAD2(__FnName, __AsmOp, __DeclType, __TmpType, __AsmType, \ - __Clobber) \ - inline __device__ __DeclType __FnName(const __DeclType *__ptr) { \ - __DeclType __ret; \ - __TmpType __tmp; \ - asm(__AsmOp " {%0,%1}, [%2];" \ - : __AsmType(__tmp.x), __AsmType(__tmp.y) \ - : "l"(__ptr)__Clobber); \ - using __ElementType = decltype(__ret.x); \ - __ret.x = (__ElementType)(__tmp.x); \ - __ret.y = (__ElementType)__tmp.y; \ - return __ret; \ - } - -#pragma push_macro("__INTRINSIC_LOAD4") -#define __INTRINSIC_LOAD4(__FnName, __AsmOp, __DeclType, __TmpType, __AsmType, \ - __Clobber) \ - inline __device__ __DeclType __FnName(const __DeclType *__ptr) { \ - __DeclType __ret; \ - __TmpType __tmp; \ - asm(__AsmOp " {%0,%1,%2,%3}, [%4];" \ - : __AsmType(__tmp.x), __AsmType(__tmp.y), __AsmType(__tmp.z), \ - __AsmType(__tmp.w) \ - : "l"(__ptr)__Clobber); \ - using __ElementType = decltype(__ret.x); \ - __ret.x = (__ElementType)__tmp.x; \ - __ret.y = (__ElementType)__tmp.y; \ - __ret.z = (__ElementType)__tmp.z; \ - __ret.w = (__ElementType)__tmp.w; \ - return __ret; \ - } - -__INTRINSIC_LOAD(__ldcg, "ld.global.cg.s8", char, unsigned int, "=r", ); -__INTRINSIC_LOAD(__ldcg, "ld.global.cg.s8", signed char, unsigned int, "=r", ); -__INTRINSIC_LOAD(__ldcg, "ld.global.cg.s16", short, unsigned short, "=h", ); -__INTRINSIC_LOAD(__ldcg, "ld.global.cg.s32", int, unsigned int, "=r", ); -__INTRINSIC_LOAD(__ldcg, "ld.global.cg.s64", long long, unsigned long long, - "=l", ); - -__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.s8", char2, int2, "=r", ); -__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.s8", char4, int4, "=r", ); -__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.s16", short2, short2, "=h", ); -__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.s16", short4, short4, "=h", ); -__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.s32", int2, int2, "=r", ); -__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.s32", int4, int4, "=r", ); -__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.s64 ", longlong2, longlong2, "=l", ); - -__INTRINSIC_LOAD(__ldcg, "ld.global.cg.u8", unsigned char, unsigned int, - "=r", ); -__INTRINSIC_LOAD(__ldcg, "ld.global.cg.u16", unsigned short, unsigned short, - "=h", ); -__INTRINSIC_LOAD(__ldcg, "ld.global.cg.u32", unsigned int, unsigned int, - "=r", ); -__INTRINSIC_LOAD(__ldcg, "ld.global.cg.u64", unsigned long long, - unsigned long long, "=l", ); - -__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.u8", uchar2, int2, "=r", ); -__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.u8", uchar4, int4, "=r", ); -__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.u16", ushort2, ushort2, "=h", ); -__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.u16", ushort4, ushort4, "=h", ); -__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.u32", uint2, uint2, "=r", ); -__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.u32", uint4, uint4, "=r", ); -__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.u64", ulonglong2, ulonglong2, - "=l", ); - -__INTRINSIC_LOAD(__ldcg, "ld.global.cg.f32", float, float, "=f", ); -__INTRINSIC_LOAD(__ldcg, "ld.global.cg.f64", double, double, "=d", ); -__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.f32", float2, float2, "=f", ); -__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.f32", float4, float4, "=f", ); -__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.f64", double2, double2, "=d", ); - -inline __device__ long __ldcg(const long *__ptr) { - unsigned long __ret; - if (sizeof(long) == 8) { - asm("ld.global.cg.s64 %0, [%1];" : "=l"(__ret) : "l"(__ptr)); - } else { - asm("ld.global.cg.s32 %0, [%1];" : "=r"(__ret) : "l"(__ptr)); - } - return (long)__ret; -} - -__INTRINSIC_LOAD(__ldcv, "ld.global.cv.u8", unsigned char, unsigned int, - "=r", : "memory"); -__INTRINSIC_LOAD(__ldcv, "ld.global.cv.u16", unsigned short, unsigned short, - "=h", : "memory"); -__INTRINSIC_LOAD(__ldcv, "ld.global.cv.u32", unsigned int, unsigned int, - "=r", : "memory"); -__INTRINSIC_LOAD(__ldcv, "ld.global.cv.u64", unsigned long long, - unsigned long long, "=l", : "memory"); - -__INTRINSIC_LOAD(__ldcv, "ld.global.cv.s8", char, unsigned int, - "=r", : "memory"); -__INTRINSIC_LOAD(__ldcv, "ld.global.cv.s8", signed char, unsigned int, - "=r", : "memory"); -__INTRINSIC_LOAD(__ldcv, "ld.global.cv.s16", short, unsigned short, - "=h", : "memory"); -__INTRINSIC_LOAD(__ldcv, "ld.global.cv.s32", int, unsigned int, - "=r", : "memory"); -__INTRINSIC_LOAD(__ldcv, "ld.global.cv.s64", long long, unsigned long long, - "=l", : "memory"); - -__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.u8", uchar2, uint2, - "=r", : "memory"); -__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.u8", uchar4, uint4, - "=r", : "memory"); -__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.u16", ushort2, ushort2, - "=h", : "memory"); -__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.u16", ushort4, ushort4, - "=h", : "memory"); -__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.u32", uint2, uint2, - "=r", : "memory"); -__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.u32", uint4, uint4, - "=r", : "memory"); -__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.u64", ulonglong2, ulonglong2, - "=l", : "memory"); - -__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.s8", char2, int2, "=r", : "memory"); -__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.s8", char4, int4, "=r", : "memory"); -__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.s16", short2, short2, - "=h", : "memory"); -__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.s16", short4, short4, - "=h", : "memory"); -__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.s32", int2, int2, "=r", : "memory"); -__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.s32", int4, int4, "=r", : "memory"); -__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.s64", longlong2, longlong2, - "=l", : "memory"); - -__INTRINSIC_LOAD(__ldcv, "ld.global.cv.f32", float, float, "=f", : "memory"); -__INTRINSIC_LOAD(__ldcv, "ld.global.cv.f64", double, double, "=d", : "memory"); - -__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.f32", float2, float2, - "=f", : "memory"); -__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.f32", float4, float4, - "=f", : "memory"); -__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.f64", double2, double2, - "=d", : "memory"); - -inline __device__ long __ldcv(const long *__ptr) { - unsigned long __ret; - if (sizeof(long) == 8) { - asm("ld.global.cv.s64 %0, [%1];" : "=l"(__ret) : "l"(__ptr)); - } else { - asm("ld.global.cv.s32 %0, [%1];" : "=r"(__ret) : "l"(__ptr)); - } - return (long)__ret; -} - -__INTRINSIC_LOAD(__ldcs, "ld.global.cs.s8", char, unsigned int, "=r", ); -__INTRINSIC_LOAD(__ldcs, "ld.global.cs.s8", signed char, signed int, "=r", ); -__INTRINSIC_LOAD(__ldcs, "ld.global.cs.s16", short, unsigned short, "=h", ); -__INTRINSIC_LOAD(__ldcs, "ld.global.cs.s32", int, unsigned int, "=r", ); -__INTRINSIC_LOAD(__ldcs, "ld.global.cs.s64", long long, unsigned long long, - "=l", ); - -__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.s8", char2, int2, "=r", ); -__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.s8", char4, int4, "=r", ); -__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.s16", short2, short2, "=h", ); -__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.s16", short4, short4, "=h", ); -__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.s32", int2, int2, "=r", ); -__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.s32", int4, int4, "=r", ); -__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.s64", longlong2, longlong2, "=l", ); - -__INTRINSIC_LOAD(__ldcs, "ld.global.cs.u8", unsigned char, unsigned int, - "=r", ); -__INTRINSIC_LOAD(__ldcs, "ld.global.cs.u16", unsigned short, unsigned short, - "=h", ); -__INTRINSIC_LOAD(__ldcs, "ld.global.cs.u32", unsigned int, unsigned int, - "=r", ); -__INTRINSIC_LOAD(__ldcs, "ld.global.cs.u64", unsigned long long, - unsigned long long, "=l", ); - -__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.u8", uchar2, uint2, "=r", ); -__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.u8", uchar4, uint4, "=r", ); -__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.u16", ushort2, ushort2, "=h", ); -__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.u16", ushort4, ushort4, "=h", ); -__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.u32", uint2, uint2, "=r", ); -__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.u32", uint4, uint4, "=r", ); -__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.u64", ulonglong2, ulonglong2, - "=l", ); - -__INTRINSIC_LOAD(__ldcs, "ld.global.cs.f32", float, float, "=f", ); -__INTRINSIC_LOAD(__ldcs, "ld.global.cs.f64", double, double, "=d", ); -__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.f32", float2, float2, "=f", ); -__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.f32", float4, float4, "=f", ); -__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.f64", double2, double2, "=d", ); - -#pragma pop_macro("__INTRINSIC_LOAD") -#pragma pop_macro("__INTRINSIC_LOAD2") -#pragma pop_macro("__INTRINSIC_LOAD4") - -inline __device__ long __ldcs(const long *__ptr) { - unsigned long __ret; - if (sizeof(long) == 8) { - asm("ld.global.cs.s64 %0, [%1];" : "=l"(__ret) : "l"(__ptr)); - } else { - asm("ld.global.cs.s32 %0, [%1];" : "=r"(__ret) : "l"(__ptr)); - } - return (long)__ret; -} - -#pragma push_macro("__INTRINSIC_STORE") -#define __INTRINSIC_STORE(__FnName, __AsmOp, __DeclType, __TmpType, __AsmType) \ - inline __device__ void __FnName(__DeclType *__ptr, __DeclType __value) { \ - __TmpType __tmp = (__TmpType)__value; \ - asm(__AsmOp " [%0], %1;" ::"l"(__ptr), __AsmType(__tmp) : "memory"); \ - } - -#pragma push_macro("__INTRINSIC_STORE2") -#define __INTRINSIC_STORE2(__FnName, __AsmOp, __DeclType, __TmpType, \ - __AsmType) \ - inline __device__ void __FnName(__DeclType *__ptr, __DeclType __value) { \ - __TmpType __tmp; \ - using __ElementType = decltype(__tmp.x); \ - __tmp.x = (__ElementType)(__value.x); \ - __tmp.y = (__ElementType)(__value.y); \ - asm(__AsmOp " [%0], {%1,%2};" ::"l"(__ptr), __AsmType(__tmp.x), \ - __AsmType(__tmp.y) \ - : "memory"); \ - } - -#pragma push_macro("__INTRINSIC_STORE4") -#define __INTRINSIC_STORE4(__FnName, __AsmOp, __DeclType, __TmpType, \ - __AsmType) \ - inline __device__ void __FnName(__DeclType *__ptr, __DeclType __value) { \ - __TmpType __tmp; \ - using __ElementType = decltype(__tmp.x); \ - __tmp.x = (__ElementType)(__value.x); \ - __tmp.y = (__ElementType)(__value.y); \ - __tmp.z = (__ElementType)(__value.z); \ - __tmp.w = (__ElementType)(__value.w); \ - asm(__AsmOp " [%0], {%1,%2,%3,%4};" ::"l"(__ptr), __AsmType(__tmp.x), \ - __AsmType(__tmp.y), __AsmType(__tmp.z), __AsmType(__tmp.w) \ - : "memory"); \ - } - -__INTRINSIC_STORE(__stwt, "st.global.wt.s8", char, int, "r"); -__INTRINSIC_STORE(__stwt, "st.global.wt.s8", signed char, int, "r"); -__INTRINSIC_STORE(__stwt, "st.global.wt.s16", short, short, "h"); -__INTRINSIC_STORE(__stwt, "st.global.wt.s32", int, int, "r"); -__INTRINSIC_STORE(__stwt, "st.global.wt.s64", long long, long long, "l"); - -__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.s8", char2, int2, "r"); -__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.s8", char4, int4, "r"); -__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.s16", short2, short2, "h"); -__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.s16", short4, short4, "h"); -__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.s32", int2, int2, "r"); -__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.s32", int4, int4, "r"); -__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.s64", longlong2, longlong2, "l"); - -__INTRINSIC_STORE(__stwt, "st.global.wt.u8", unsigned char, int, "r"); -__INTRINSIC_STORE(__stwt, "st.global.wt.u16", unsigned short, unsigned short, - "h"); -__INTRINSIC_STORE(__stwt, "st.global.wt.u32", unsigned int, unsigned int, "r"); -__INTRINSIC_STORE(__stwt, "st.global.wt.u64", unsigned long long, - unsigned long long, "l"); - -__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.u8", uchar2, uchar2, "r"); -__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.u8", uchar4, uint4, "r"); -__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.u16", ushort2, ushort2, "h"); -__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.u16", ushort4, ushort4, "h"); -__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.u32", uint2, uint2, "r"); -__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.u32", uint4, uint4, "r"); -__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.u64", ulonglong2, ulonglong2, "l"); - -__INTRINSIC_STORE(__stwt, "st.global.wt.f32", float, float, "f"); -__INTRINSIC_STORE(__stwt, "st.global.wt.f64", double, double, "d"); -__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.f32", float2, float2, "f"); -__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.f32", float4, float4, "f"); -__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.f64", double2, double2, "d"); - -#pragma pop_macro("__INTRINSIC_STORE") -#pragma pop_macro("__INTRINSIC_STORE2") -#pragma pop_macro("__INTRINSIC_STORE4") - #endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320 #if CUDA_VERSION >= 11000 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits