llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: Artem Belevich (Artem-B)

<details>
<summary>Changes</summary>

Reverts llvm/llvm-project#<!-- -->143664
as it breaks CUDA compilation.

---
Full diff: https://github.com/llvm/llvm-project/pull/144755.diff


1 Files Affected:

- (modified) clang/lib/Headers/__clang_cuda_intrinsics.h (-284) 


``````````diff
diff --git a/clang/lib/Headers/__clang_cuda_intrinsics.h 
b/clang/lib/Headers/__clang_cuda_intrinsics.h
index 5e13f3f78df70..8b230af6f6647 100644
--- a/clang/lib/Headers/__clang_cuda_intrinsics.h
+++ b/clang/lib/Headers/__clang_cuda_intrinsics.h
@@ -479,290 +479,6 @@ inline __device__ unsigned __funnelshift_rc(unsigned 
low32, unsigned high32,
   return ret;
 }
 
-#pragma push_macro("__INTRINSIC_LOAD")
-#define __INTRINSIC_LOAD(__FnName, __AsmOp, __DeclType, __TmpType, __AsmType,  
\
-                         __Clobber)                                            
\
-  inline __device__ __DeclType __FnName(const __DeclType *__ptr) {             
\
-    __TmpType __ret;                                                           
\
-    asm(__AsmOp " %0, [%1];" : __AsmType(__ret) : "l"(__ptr)__Clobber);        
\
-    return (__DeclType)__ret;                                                  
\
-  }
-
-#pragma push_macro("__INTRINSIC_LOAD2")
-#define __INTRINSIC_LOAD2(__FnName, __AsmOp, __DeclType, __TmpType, __AsmType, 
\
-                          __Clobber)                                           
\
-  inline __device__ __DeclType __FnName(const __DeclType *__ptr) {             
\
-    __DeclType __ret;                                                          
\
-    __TmpType __tmp;                                                           
\
-    asm(__AsmOp " {%0,%1}, [%2];"                                              
\
-        : __AsmType(__tmp.x), __AsmType(__tmp.y)                               
\
-        : "l"(__ptr)__Clobber);                                                
\
-    using __ElementType = decltype(__ret.x);                                   
\
-    __ret.x = (__ElementType)(__tmp.x);                                        
\
-    __ret.y = (__ElementType)__tmp.y;                                          
\
-    return __ret;                                                              
\
-  }
-
-#pragma push_macro("__INTRINSIC_LOAD4")
-#define __INTRINSIC_LOAD4(__FnName, __AsmOp, __DeclType, __TmpType, __AsmType, 
\
-                          __Clobber)                                           
\
-  inline __device__ __DeclType __FnName(const __DeclType *__ptr) {             
\
-    __DeclType __ret;                                                          
\
-    __TmpType __tmp;                                                           
\
-    asm(__AsmOp " {%0,%1,%2,%3}, [%4];"                                        
\
-        : __AsmType(__tmp.x), __AsmType(__tmp.y), __AsmType(__tmp.z),          
\
-          __AsmType(__tmp.w)                                                   
\
-        : "l"(__ptr)__Clobber);                                                
\
-    using __ElementType = decltype(__ret.x);                                   
\
-    __ret.x = (__ElementType)__tmp.x;                                          
\
-    __ret.y = (__ElementType)__tmp.y;                                          
\
-    __ret.z = (__ElementType)__tmp.z;                                          
\
-    __ret.w = (__ElementType)__tmp.w;                                          
\
-    return __ret;                                                              
\
-  }
-
-__INTRINSIC_LOAD(__ldcg, "ld.global.cg.s8", char, unsigned int, "=r", );
-__INTRINSIC_LOAD(__ldcg, "ld.global.cg.s8", signed char, unsigned int, "=r", );
-__INTRINSIC_LOAD(__ldcg, "ld.global.cg.s16", short, unsigned short, "=h", );
-__INTRINSIC_LOAD(__ldcg, "ld.global.cg.s32", int, unsigned int, "=r", );
-__INTRINSIC_LOAD(__ldcg, "ld.global.cg.s64", long long, unsigned long long,
-                 "=l", );
-
-__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.s8", char2, int2, "=r", );
-__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.s8", char4, int4, "=r", );
-__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.s16", short2, short2, "=h", );
-__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.s16", short4, short4, "=h", );
-__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.s32", int2, int2, "=r", );
-__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.s32", int4, int4, "=r", );
-__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.s64 ", longlong2, longlong2, "=l", 
);
-
-__INTRINSIC_LOAD(__ldcg, "ld.global.cg.u8", unsigned char, unsigned int,
-                 "=r", );
-__INTRINSIC_LOAD(__ldcg, "ld.global.cg.u16", unsigned short, unsigned short,
-                 "=h", );
-__INTRINSIC_LOAD(__ldcg, "ld.global.cg.u32", unsigned int, unsigned int,
-                 "=r", );
-__INTRINSIC_LOAD(__ldcg, "ld.global.cg.u64", unsigned long long,
-                 unsigned long long, "=l", );
-
-__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.u8", uchar2, int2, "=r", );
-__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.u8", uchar4, int4, "=r", );
-__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.u16", ushort2, ushort2, "=h", );
-__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.u16", ushort4, ushort4, "=h", );
-__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.u32", uint2, uint2, "=r", );
-__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.u32", uint4, uint4, "=r", );
-__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.u64", ulonglong2, ulonglong2,
-                  "=l", );
-
-__INTRINSIC_LOAD(__ldcg, "ld.global.cg.f32", float, float, "=f", );
-__INTRINSIC_LOAD(__ldcg, "ld.global.cg.f64", double, double, "=d", );
-__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.f32", float2, float2, "=f", );
-__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.f32", float4, float4, "=f", );
-__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.f64", double2, double2, "=d", );
-
-inline __device__ long __ldcg(const long *__ptr) {
-  unsigned long __ret;
-  if (sizeof(long) == 8) {
-    asm("ld.global.cg.s64 %0, [%1];" : "=l"(__ret) : "l"(__ptr));
-  } else {
-    asm("ld.global.cg.s32 %0, [%1];" : "=r"(__ret) : "l"(__ptr));
-  }
-  return (long)__ret;
-}
-
-__INTRINSIC_LOAD(__ldcv, "ld.global.cv.u8", unsigned char, unsigned int,
-                 "=r", : "memory");
-__INTRINSIC_LOAD(__ldcv, "ld.global.cv.u16", unsigned short, unsigned short,
-                 "=h", : "memory");
-__INTRINSIC_LOAD(__ldcv, "ld.global.cv.u32", unsigned int, unsigned int,
-                 "=r", : "memory");
-__INTRINSIC_LOAD(__ldcv, "ld.global.cv.u64", unsigned long long,
-                 unsigned long long, "=l", : "memory");
-
-__INTRINSIC_LOAD(__ldcv, "ld.global.cv.s8", char, unsigned int,
-                 "=r", : "memory");
-__INTRINSIC_LOAD(__ldcv, "ld.global.cv.s8", signed char, unsigned int,
-                 "=r", : "memory");
-__INTRINSIC_LOAD(__ldcv, "ld.global.cv.s16", short, unsigned short,
-                 "=h", : "memory");
-__INTRINSIC_LOAD(__ldcv, "ld.global.cv.s32", int, unsigned int,
-                 "=r", : "memory");
-__INTRINSIC_LOAD(__ldcv, "ld.global.cv.s64", long long, unsigned long long,
-                 "=l", : "memory");
-
-__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.u8", uchar2, uint2,
-                  "=r", : "memory");
-__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.u8", uchar4, uint4,
-                  "=r", : "memory");
-__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.u16", ushort2, ushort2,
-                  "=h", : "memory");
-__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.u16", ushort4, ushort4,
-                  "=h", : "memory");
-__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.u32", uint2, uint2,
-                  "=r", : "memory");
-__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.u32", uint4, uint4,
-                  "=r", : "memory");
-__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.u64", ulonglong2, ulonglong2,
-                  "=l", : "memory");
-
-__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.s8", char2, int2, "=r", : "memory");
-__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.s8", char4, int4, "=r", : "memory");
-__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.s16", short2, short2,
-                  "=h", : "memory");
-__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.s16", short4, short4,
-                  "=h", : "memory");
-__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.s32", int2, int2, "=r", : "memory");
-__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.s32", int4, int4, "=r", : "memory");
-__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.s64", longlong2, longlong2,
-                  "=l", : "memory");
-
-__INTRINSIC_LOAD(__ldcv, "ld.global.cv.f32", float, float, "=f", : "memory");
-__INTRINSIC_LOAD(__ldcv, "ld.global.cv.f64", double, double, "=d", : "memory");
-
-__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.f32", float2, float2,
-                  "=f", : "memory");
-__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.f32", float4, float4,
-                  "=f", : "memory");
-__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.f64", double2, double2,
-                  "=d", : "memory");
-
-inline __device__ long __ldcv(const long *__ptr) {
-  unsigned long __ret;
-  if (sizeof(long) == 8) {
-    asm("ld.global.cv.s64 %0, [%1];" : "=l"(__ret) : "l"(__ptr));
-  } else {
-    asm("ld.global.cv.s32 %0, [%1];" : "=r"(__ret) : "l"(__ptr));
-  }
-  return (long)__ret;
-}
-
-__INTRINSIC_LOAD(__ldcs, "ld.global.cs.s8", char, unsigned int, "=r", );
-__INTRINSIC_LOAD(__ldcs, "ld.global.cs.s8", signed char, signed int, "=r", );
-__INTRINSIC_LOAD(__ldcs, "ld.global.cs.s16", short, unsigned short, "=h", );
-__INTRINSIC_LOAD(__ldcs, "ld.global.cs.s32", int, unsigned int, "=r", );
-__INTRINSIC_LOAD(__ldcs, "ld.global.cs.s64", long long, unsigned long long,
-                 "=l", );
-
-__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.s8", char2, int2, "=r", );
-__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.s8", char4, int4, "=r", );
-__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.s16", short2, short2, "=h", );
-__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.s16", short4, short4, "=h", );
-__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.s32", int2, int2, "=r", );
-__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.s32", int4, int4, "=r", );
-__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.s64", longlong2, longlong2, "=l", );
-
-__INTRINSIC_LOAD(__ldcs, "ld.global.cs.u8", unsigned char, unsigned int,
-                 "=r", );
-__INTRINSIC_LOAD(__ldcs, "ld.global.cs.u16", unsigned short, unsigned short,
-                 "=h", );
-__INTRINSIC_LOAD(__ldcs, "ld.global.cs.u32", unsigned int, unsigned int,
-                 "=r", );
-__INTRINSIC_LOAD(__ldcs, "ld.global.cs.u64", unsigned long long,
-                 unsigned long long, "=l", );
-
-__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.u8", uchar2, uint2, "=r", );
-__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.u8", uchar4, uint4, "=r", );
-__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.u16", ushort2, ushort2, "=h", );
-__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.u16", ushort4, ushort4, "=h", );
-__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.u32", uint2, uint2, "=r", );
-__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.u32", uint4, uint4, "=r", );
-__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.u64", ulonglong2, ulonglong2,
-                  "=l", );
-
-__INTRINSIC_LOAD(__ldcs, "ld.global.cs.f32", float, float, "=f", );
-__INTRINSIC_LOAD(__ldcs, "ld.global.cs.f64", double, double, "=d", );
-__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.f32", float2, float2, "=f", );
-__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.f32", float4, float4, "=f", );
-__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.f64", double2, double2, "=d", );
-
-#pragma pop_macro("__INTRINSIC_LOAD")
-#pragma pop_macro("__INTRINSIC_LOAD2")
-#pragma pop_macro("__INTRINSIC_LOAD4")
-
-inline __device__ long __ldcs(const long *__ptr) {
-  unsigned long __ret;
-  if (sizeof(long) == 8) {
-    asm("ld.global.cs.s64 %0, [%1];" : "=l"(__ret) : "l"(__ptr));
-  } else {
-    asm("ld.global.cs.s32 %0, [%1];" : "=r"(__ret) : "l"(__ptr));
-  }
-  return (long)__ret;
-}
-
-#pragma push_macro("__INTRINSIC_STORE")
-#define __INTRINSIC_STORE(__FnName, __AsmOp, __DeclType, __TmpType, __AsmType) 
\
-  inline __device__ void __FnName(__DeclType *__ptr, __DeclType __value) {     
\
-    __TmpType __tmp = (__TmpType)__value;                                      
\
-    asm(__AsmOp " [%0], %1;" ::"l"(__ptr), __AsmType(__tmp) : "memory");       
\
-  }
-
-#pragma push_macro("__INTRINSIC_STORE2")
-#define __INTRINSIC_STORE2(__FnName, __AsmOp, __DeclType, __TmpType,           
\
-                           __AsmType)                                          
\
-  inline __device__ void __FnName(__DeclType *__ptr, __DeclType __value) {     
\
-    __TmpType __tmp;                                                           
\
-    using __ElementType = decltype(__tmp.x);                                   
\
-    __tmp.x = (__ElementType)(__value.x);                                      
\
-    __tmp.y = (__ElementType)(__value.y);                                      
\
-    asm(__AsmOp " [%0], {%1,%2};" ::"l"(__ptr), __AsmType(__tmp.x),            
\
-        __AsmType(__tmp.y)                                                     
\
-        : "memory");                                                           
\
-  }
-
-#pragma push_macro("__INTRINSIC_STORE4")
-#define __INTRINSIC_STORE4(__FnName, __AsmOp, __DeclType, __TmpType,           
\
-                           __AsmType)                                          
\
-  inline __device__ void __FnName(__DeclType *__ptr, __DeclType __value) {     
\
-    __TmpType __tmp;                                                           
\
-    using __ElementType = decltype(__tmp.x);                                   
\
-    __tmp.x = (__ElementType)(__value.x);                                      
\
-    __tmp.y = (__ElementType)(__value.y);                                      
\
-    __tmp.z = (__ElementType)(__value.z);                                      
\
-    __tmp.w = (__ElementType)(__value.w);                                      
\
-    asm(__AsmOp " [%0], {%1,%2,%3,%4};" ::"l"(__ptr), __AsmType(__tmp.x),      
\
-        __AsmType(__tmp.y), __AsmType(__tmp.z), __AsmType(__tmp.w)             
\
-        : "memory");                                                           
\
-  }
-
-__INTRINSIC_STORE(__stwt, "st.global.wt.s8", char, int, "r");
-__INTRINSIC_STORE(__stwt, "st.global.wt.s8", signed char, int, "r");
-__INTRINSIC_STORE(__stwt, "st.global.wt.s16", short, short, "h");
-__INTRINSIC_STORE(__stwt, "st.global.wt.s32", int, int, "r");
-__INTRINSIC_STORE(__stwt, "st.global.wt.s64", long long, long long, "l");
-
-__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.s8", char2, int2, "r");
-__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.s8", char4, int4, "r");
-__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.s16", short2, short2, "h");
-__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.s16", short4, short4, "h");
-__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.s32", int2, int2, "r");
-__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.s32", int4, int4, "r");
-__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.s64", longlong2, longlong2, "l");
-
-__INTRINSIC_STORE(__stwt, "st.global.wt.u8", unsigned char, int, "r");
-__INTRINSIC_STORE(__stwt, "st.global.wt.u16", unsigned short, unsigned short,
-                  "h");
-__INTRINSIC_STORE(__stwt, "st.global.wt.u32", unsigned int, unsigned int, "r");
-__INTRINSIC_STORE(__stwt, "st.global.wt.u64", unsigned long long,
-                  unsigned long long, "l");
-
-__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.u8", uchar2, uchar2, "r");
-__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.u8", uchar4, uint4, "r");
-__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.u16", ushort2, ushort2, "h");
-__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.u16", ushort4, ushort4, "h");
-__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.u32", uint2, uint2, "r");
-__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.u32", uint4, uint4, "r");
-__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.u64", ulonglong2, ulonglong2, "l");
-
-__INTRINSIC_STORE(__stwt, "st.global.wt.f32", float, float, "f");
-__INTRINSIC_STORE(__stwt, "st.global.wt.f64", double, double, "d");
-__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.f32", float2, float2, "f");
-__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.f32", float4, float4, "f");
-__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.f64", double2, double2, "d");
-
-#pragma pop_macro("__INTRINSIC_STORE")
-#pragma pop_macro("__INTRINSIC_STORE2")
-#pragma pop_macro("__INTRINSIC_STORE4")
-
 #endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320
 
 #if CUDA_VERSION >= 11000

``````````

</details>


https://github.com/llvm/llvm-project/pull/144755
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to