================
@@ -13,16 +13,15 @@
#include <clc/mem_fence/clc_mem_semantic.h>
#include <clc/opencl/synchronization/cl_mem_fence_flags.h>
-_CLC_INLINE int __opencl_get_memory_scope(cl_mem_fence_flags flag) {
- int memory_scope = 0;
+static _CLC_INLINE int __opencl_get_memory_scope(cl_mem_fence_flags flag) {
if (flag & CLK_GLOBAL_MEM_FENCE)
- memory_scope |= __MEMORY_SCOPE_DEVICE;
+ return __MEMORY_SCOPE_DEVICE;
if (flag & CLK_LOCAL_MEM_FENCE)
- memory_scope |= __MEMORY_SCOPE_WRKGRP;
- return memory_scope;
+ return __MEMORY_SCOPE_WRKGRP;
+ return __MEMORY_SCOPE_SINGLE;
----------------
vmustya wrote:
According to the [OpenCL C
spec](https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_C.html#synchronization-functions),
the synchronization functions allow the `0` to be passed as a valid argument
value. When `0` is passed, the single-thread fence looks reasonable to me,
because the implementation shouldn't issue any cross-thread memory
synchronization in that case. The `__builtin_unreachable()` won't work
correctly.
https://github.com/llvm/llvm-project/pull/170542
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits