Author: jvesely Date: Fri Sep 8 16:59:00 2017 New Revision: 312839 URL: http://llvm.org/viewvc/llvm-project?rev=312839&view=rev Log: Implement vload_half{,n} and vload(half)
v2: add vload(half) as well make helpers amdgpu specific (NVPTX uses different private AS numbering) use clang builtin on clang >= 6 Signed-off-by: Jan Vesely <jan.ves...@rutgers.edu> Reviewed-by: Tom Stellard <tstel...@redhat.com> Added: libclc/trunk/amdgpu/lib/shared/vload_half_helpers.ll libclc/trunk/generic/lib/shared/vload_half.inc Modified: libclc/trunk/amdgpu/lib/SOURCES_4.0 libclc/trunk/amdgpu/lib/SOURCES_5.0 libclc/trunk/generic/include/clc/shared/vload.h libclc/trunk/generic/lib/shared/vload.cl Modified: libclc/trunk/amdgpu/lib/SOURCES_4.0 URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/amdgpu/lib/SOURCES_4.0?rev=312839&r1=312838&r2=312839&view=diff ============================================================================== --- libclc/trunk/amdgpu/lib/SOURCES_4.0 (original) +++ libclc/trunk/amdgpu/lib/SOURCES_4.0 Fri Sep 8 16:59:00 2017 @@ -1 +1,2 @@ +shared/vload_half_helpers.ll shared/vstore_half_helpers.ll Modified: libclc/trunk/amdgpu/lib/SOURCES_5.0 URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/amdgpu/lib/SOURCES_5.0?rev=312839&r1=312838&r2=312839&view=diff ============================================================================== --- libclc/trunk/amdgpu/lib/SOURCES_5.0 (original) +++ libclc/trunk/amdgpu/lib/SOURCES_5.0 Fri Sep 8 16:59:00 2017 @@ -1 +1,2 @@ +shared/vload_half_helpers.ll shared/vstore_half_helpers.ll Added: libclc/trunk/amdgpu/lib/shared/vload_half_helpers.ll URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/amdgpu/lib/shared/vload_half_helpers.ll?rev=312839&view=auto ============================================================================== --- libclc/trunk/amdgpu/lib/shared/vload_half_helpers.ll (added) +++ libclc/trunk/amdgpu/lib/shared/vload_half_helpers.ll Fri Sep 8 16:59:00 2017 @@ -0,0 +1,23 @@ +define float @__clc_vload_half_float_helper__private(half addrspace(0)* nocapture %ptr) nounwind alwaysinline { + %data = load half, half addrspace(0)* %ptr + %res = fpext half %data to float + ret float %res +} + +define float @__clc_vload_half_float_helper__global(half addrspace(1)* nocapture %ptr) nounwind alwaysinline { + %data = load half, half addrspace(1)* %ptr + %res = fpext half %data to float + ret float %res +} + +define float @__clc_vload_half_float_helper__local(half addrspace(3)* nocapture %ptr) nounwind alwaysinline { + %data = load half, half addrspace(3)* %ptr + %res = fpext half %data to float + ret float %res +} + +define float @__clc_vload_half_float_helper__constant(half addrspace(2)* nocapture %ptr) nounwind alwaysinline { + %data = load half, half addrspace(2)* %ptr + %res = fpext half %data to float + ret float %res +} Modified: libclc/trunk/generic/include/clc/shared/vload.h URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/generic/include/clc/shared/vload.h?rev=312839&r1=312838&r2=312839&view=diff ============================================================================== --- libclc/trunk/generic/include/clc/shared/vload.h (original) +++ libclc/trunk/generic/include/clc/shared/vload.h Fri Sep 8 16:59:00 2017 @@ -1,18 +1,21 @@ -#define _CLC_VLOAD_DECL(PRIM_TYPE, VEC_TYPE, WIDTH, ADDR_SPACE) \ - _CLC_OVERLOAD _CLC_DECL VEC_TYPE vload##WIDTH(size_t offset, const ADDR_SPACE PRIM_TYPE *x); +#define _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, VEC_TYPE, WIDTH, ADDR_SPACE) \ + _CLC_OVERLOAD _CLC_DECL VEC_TYPE vload##SUFFIX##WIDTH(size_t offset, const ADDR_SPACE MEM_TYPE *x); -#define _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, ADDR_SPACE) \ - _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##2, 2, ADDR_SPACE) \ - _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##3, 3, ADDR_SPACE) \ - _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##4, 4, ADDR_SPACE) \ - _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE) \ - _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##16, 16, ADDR_SPACE) +#define _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, ADDR_SPACE) \ + _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##2, 2, ADDR_SPACE) \ + _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##3, 3, ADDR_SPACE) \ + _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##4, 4, ADDR_SPACE) \ + _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE) \ + _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##16, 16, ADDR_SPACE) + +#define _CLC_VECTOR_VLOAD_PRIM3(SUFFIX, MEM_TYPE, PRIM_TYPE) \ + _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __private) \ + _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __local) \ + _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __constant) \ + _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global) \ #define _CLC_VECTOR_VLOAD_PRIM1(PRIM_TYPE) \ - _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __private) \ - _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __local) \ - _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __constant) \ - _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __global) \ + _CLC_VECTOR_VLOAD_PRIM3(, PRIM_TYPE, PRIM_TYPE) \ #define _CLC_VECTOR_VLOAD_PRIM() \ _CLC_VECTOR_VLOAD_PRIM1(char) \ @@ -24,14 +27,26 @@ _CLC_VECTOR_VLOAD_PRIM1(long) \ _CLC_VECTOR_VLOAD_PRIM1(ulong) \ _CLC_VECTOR_VLOAD_PRIM1(float) \ - + _CLC_VECTOR_VLOAD_PRIM3(_half, half, float) + #ifdef cl_khr_fp64 -#define _CLC_VECTOR_VLOAD() \ - _CLC_VECTOR_VLOAD_PRIM1(double) \ - _CLC_VECTOR_VLOAD_PRIM() -#else -#define _CLC_VECTOR_VLOAD() \ - _CLC_VECTOR_VLOAD_PRIM() +#pragma OPENCL EXTENSION cl_khr_fp64: enable + _CLC_VECTOR_VLOAD_PRIM1(double) #endif +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16: enable + _CLC_VECTOR_VLOAD_PRIM1(half) +#endif + +_CLC_VECTOR_VLOAD_PRIM() +// Plain vload_half also needs to be declared +_CLC_VLOAD_DECL(_half, half, float, , __constant) +_CLC_VLOAD_DECL(_half, half, float, , __global) +_CLC_VLOAD_DECL(_half, half, float, , __local) +_CLC_VLOAD_DECL(_half, half, float, , __private) -_CLC_VECTOR_VLOAD() +#undef _CLC_VLOAD_DECL +#undef _CLC_VECTOR_VLOAD_DECL +#undef _CLC_VECTOR_VLOAD_PRIM3 +#undef _CLC_VECTOR_VLOAD_PRIM1 +#undef _CLC_VECTOR_VLOAD_PRIM Modified: libclc/trunk/generic/lib/shared/vload.cl URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/generic/lib/shared/vload.cl?rev=312839&r1=312838&r2=312839&view=diff ============================================================================== --- libclc/trunk/generic/lib/shared/vload.cl (original) +++ libclc/trunk/generic/lib/shared/vload.cl Fri Sep 8 16:59:00 2017 @@ -50,3 +50,62 @@ VLOAD_TYPES() #pragma OPENCL EXTENSION cl_khr_fp64 : enable VLOAD_ADDR_SPACES(double) #endif +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + VLOAD_ADDR_SPACES(half) +#endif + +/* vload_half are legal even without cl_khr_fp16 */ +/* no vload_half for double */ +#if __clang_major__ < 6 +float __clc_vload_half_float_helper__constant(const __constant half *); +float __clc_vload_half_float_helper__global(const __global half *); +float __clc_vload_half_float_helper__local(const __local half *); +float __clc_vload_half_float_helper__private(const __private half *); + +#define VEC_LOAD1(val, AS) val = __clc_vload_half_float_helper##AS (&mem[offset++]); +#else +#define VEC_LOAD1(val, AS) val = __builtin_load_halff(&mem[offset++]); +#endif + +#define VEC_LOAD2(val, AS) \ + VEC_LOAD1(val.lo, AS) \ + VEC_LOAD1(val.hi, AS) +#define VEC_LOAD3(val, AS) \ + VEC_LOAD1(val.s0, AS) \ + VEC_LOAD1(val.s1, AS) \ + VEC_LOAD1(val.s2, AS) +#define VEC_LOAD4(val, AS) \ + VEC_LOAD2(val.lo, AS) \ + VEC_LOAD2(val.hi, AS) +#define VEC_LOAD8(val, AS) \ + VEC_LOAD4(val.lo, AS) \ + VEC_LOAD4(val.hi, AS) +#define VEC_LOAD16(val, AS) \ + VEC_LOAD8(val.lo, AS) \ + VEC_LOAD8(val.hi, AS) + +#define __FUNC(SUFFIX, VEC_SIZE, TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE vload_half##SUFFIX(size_t offset, const AS half *mem) { \ + offset *= VEC_SIZE; \ + TYPE __tmp; \ + VEC_LOAD##VEC_SIZE(__tmp, AS) \ + return __tmp; \ + } + +#define FUNC(SUFFIX, VEC_SIZE, TYPE, AS) __FUNC(SUFFIX, VEC_SIZE, TYPE, AS) + +#define __CLC_BODY "vload_half.inc" +#include <clc/math/gentype.inc> +#undef __CLC_BODY +#undef FUNC +#undef __FUNC +#undef VEC_LOAD16 +#undef VEC_LOAD8 +#undef VEC_LOAD4 +#undef VEC_LOAD3 +#undef VEC_LOAD2 +#undef VEC_LOAD1 +#undef VLOAD_TYPES +#undef VLOAD_ADDR_SPACES +#undef VLOAD_VECTORIZE Added: libclc/trunk/generic/lib/shared/vload_half.inc URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/generic/lib/shared/vload_half.inc?rev=312839&view=auto ============================================================================== --- libclc/trunk/generic/lib/shared/vload_half.inc (added) +++ libclc/trunk/generic/lib/shared/vload_half.inc Fri Sep 8 16:59:00 2017 @@ -0,0 +1,13 @@ +#if __CLC_FPSIZE == 32 +#ifdef __CLC_VECSIZE + FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __private); + FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __local); + FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __global); + FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __constant); +#else + FUNC(, 1, __CLC_GENTYPE, __private); + FUNC(, 1, __CLC_GENTYPE, __local); + FUNC(, 1, __CLC_GENTYPE, __global); + FUNC(, 1, __CLC_GENTYPE, __constant); +#endif +#endif _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits