Author: Joseph Huber Date: 2026-01-09T08:58:17-06:00 New Revision: 5c4324326d770bab1628225ebb1a04698a27b59b
URL: https://github.com/llvm/llvm-project/commit/5c4324326d770bab1628225ebb1a04698a27b59b DIFF: https://github.com/llvm/llvm-project/commit/5c4324326d770bab1628225ebb1a04698a27b59b.diff LOG: [SPIR-V] Initial support for SPIR-V in `gpuintrin.h` (#174910) Summary: https://github.com/llvm/llvm-project/pull/174862 and https://github.com/llvm/llvm-project/pull/174655 provided the intrinsics required to get the fundamental operations working for these. This patch sets up the basic support (as far as I know). This should be the first step towards allowing SPIR-V to build things like the LLVM libc and the OpenMP Device Runtime Library. The implementations here are intentionally inefficient, such as not using the dedicated SPIR-V opcode for read firstlane. This is just to start and hopefully start testing things later. Would appreciate someone more familiar with the backend double-checking these. Added: clang/lib/Headers/spirvintrin.h Modified: clang/lib/Headers/CMakeLists.txt clang/lib/Headers/gpuintrin.h clang/test/Headers/gpuintrin.c clang/test/Headers/gpuintrin_lang.c Removed: ################################################################################ diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 1b96ac417bf70..c92b370b88d2d 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -297,6 +297,7 @@ set(gpu_files gpuintrin.h nvptxintrin.h amdgpuintrin.h + spirvintrin.h ) set(windows_only_files diff --git a/clang/lib/Headers/gpuintrin.h b/clang/lib/Headers/gpuintrin.h index cea19f0f74464..f3cf2d0776c0c 100644 --- a/clang/lib/Headers/gpuintrin.h +++ b/clang/lib/Headers/gpuintrin.h @@ -60,6 +60,8 @@ _Pragma("omp end declare target"); #include <nvptxintrin.h> #elif defined(__AMDGPU__) #include <amdgpuintrin.h> +#elif defined(__SPIRV__) +#include <spirvintrin.h> #elif !defined(_OPENMP) #error "This header is only meant to be used on GPU architectures." #endif diff --git a/clang/lib/Headers/spirvintrin.h b/clang/lib/Headers/spirvintrin.h new file mode 100644 index 0000000000000..2a10a47adedde --- /dev/null +++ b/clang/lib/Headers/spirvintrin.h @@ -0,0 +1,194 @@ +//===-- spirvintrin.h - SPIR-V intrinsic functions ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __SPIRVINTRIN_H +#define __SPIRVINTRIN_H + +#ifndef __SPIRV__ +#error "This file is intended for SPIR-V targets or offloading to SPIR-V" +#endif + +#ifndef __GPUINTRIN_H +#error "Never use <spirvintrin.h> directly; include <gpuintrin.h> instead" +#endif + +_Pragma("omp begin declare target device_type(nohost)"); +_Pragma("omp begin declare variant match(device = {arch(spirv64)})"); + +// Type aliases to the address spaces used by the SPIR-V backend. +#define __gpu_private __attribute__((address_space(0))) +#define __gpu_constant __attribute__((address_space(2))) +#define __gpu_local __attribute__((address_space(3))) +#define __gpu_global __attribute__((address_space(1))) +#define __gpu_generic __attribute__((address_space(4))) + +// Attribute to declare a function as a kernel. +#define __gpu_kernel __attribute__((device_kernel, visibility("protected"))) + +// Returns the number of workgroups in the 'x' dimension of the grid. +_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_x(void) { + return __builtin_spirv_num_workgroups(0); +} + +// Returns the number of workgroups in the 'y' dimension of the grid. +_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_y(void) { + return __builtin_spirv_num_workgroups(1); +} + +// Returns the number of workgroups in the 'z' dimension of the grid. +_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_z(void) { + return __builtin_spirv_num_workgroups(2); +} + +// Returns the 'x' dimension of the current workgroup's id. +_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_block_id_x(void) { + return __builtin_spirv_workgroup_id(0); +} + +// Returns the 'y' dimension of the current workgroup's id. +_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_block_id_y(void) { + return __builtin_spirv_workgroup_id(1); +} + +// Returns the 'z' dimension of the current workgroup's id. +_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_block_id_z(void) { + return __builtin_spirv_workgroup_id(2); +} + +// Returns the number of workitems in the 'x' dimension. +_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_threads_x(void) { + return __builtin_spirv_workgroup_size(0); +} + +// Returns the number of workitems in the 'y' dimension. +_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_threads_y(void) { + return __builtin_spirv_workgroup_size(1); +} + +// Returns the number of workitems in the 'z' dimension. +_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_threads_z(void) { + return __builtin_spirv_workgroup_size(2); +} + +// Returns the 'x' dimension id of the workitem in the current workgroup. +_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_thread_id_x(void) { + return __builtin_spirv_local_invocation_id(0); +} + +// Returns the 'y' dimension id of the workitem in the current workgroup. +_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_thread_id_y(void) { + return __builtin_spirv_local_invocation_id(1); +} + +// Returns the 'z' dimension id of the workitem in the current workgroup. +_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_thread_id_z(void) { + return __builtin_spirv_local_invocation_id(2); +} + +// Returns the size of an wavefront, either 32 or 64 depending on hardware +// and compilation options. +_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_lanes(void) { + return __builtin_spirv_subgroup_size(); +} + +// Returns the id of the thread inside of an wavefront executing together. +_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_lane_id(void) { + return __builtin_spirv_subgroup_local_invocation_id(); +} + +// Returns the bit-mask of active threads in the current wavefront. This +// implementation is incorrect if the target uses more than 64 lanes. +_DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_lane_mask(void) { + uint32_t [[clang::ext_vector_type(4)]] __mask = + __builtin_spirv_subgroup_ballot(1); + return __builtin_bit_cast(uint64_t, + __builtin_shufflevector(__mask, __mask, 0, 1)); +} + +// Copies the value from the first active thread in the wavefront to the rest. +_DEFAULT_FN_ATTRS static __inline__ uint32_t +__gpu_read_first_lane_u32(uint64_t __lane_mask, uint32_t __x) { + return __builtin_spirv_subgroup_shuffle(__x, + __builtin_ctzg(__gpu_lane_mask())); +} + +// Returns a bitmask of threads in the current lane for which \p x is true. This +// implementation is incorrect if the target uses more than 64 lanes. +_DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_ballot(uint64_t __lane_mask, + bool __x) { + // The lane_mask & gives the nvptx semantics when lane_mask is a subset of + // the active threads. + uint32_t [[clang::ext_vector_type(4)]] __mask = + __builtin_spirv_subgroup_ballot(__x); + return __lane_mask & __builtin_bit_cast(uint64_t, __builtin_shufflevector( + __mask, __mask, 0, 1)); +} + +// Waits for all the threads in the block to converge and issues a fence. +_DEFAULT_FN_ATTRS static __inline__ void __gpu_sync_threads(void) { + __builtin_spirv_group_barrier(); +} + +// Wait for all threads in the wavefront to converge, this is a noop on SPIR-V. +_DEFAULT_FN_ATTRS static __inline__ void __gpu_sync_lane(uint64_t __lane_mask) { +} + +// Shuffles the the lanes inside the wavefront according to the given index. +_DEFAULT_FN_ATTRS static __inline__ uint32_t +__gpu_shuffle_idx_u32(uint64_t __lane_mask, uint32_t __idx, uint32_t __x, + uint32_t __width) { + uint32_t __lane = __idx + (__gpu_lane_id() & ~(__width - 1)); + return __builtin_spirv_subgroup_shuffle(__x, __lane); +} + +// Returns a bitmask marking all lanes that have the same value of __x. +_DEFAULT_FN_ATTRS static __inline__ uint64_t +__gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) { + return __gpu_match_any_u32_impl(__lane_mask, __x); +} + +// Returns a bitmask marking all lanes that have the same value of __x. +_DEFAULT_FN_ATTRS static __inline__ uint64_t +__gpu_match_any_u64(uint64_t __lane_mask, uint64_t __x) { + return __gpu_match_any_u64_impl(__lane_mask, __x); +} + +// Returns the current lane mask if every lane contains __x. +_DEFAULT_FN_ATTRS static __inline__ uint64_t +__gpu_match_all_u32(uint64_t __lane_mask, uint32_t __x) { + return __gpu_match_all_u32_impl(__lane_mask, __x); +} + +// Returns the current lane mask if every lane contains __x. +_DEFAULT_FN_ATTRS static __inline__ uint64_t +__gpu_match_all_u64(uint64_t __lane_mask, uint64_t __x) { + return __gpu_match_all_u64_impl(__lane_mask, __x); +} + +// SPIR-V does not expose this, always return false. +_DEFAULT_FN_ATTRS static __inline__ bool __gpu_is_ptr_local(void *ptr) { + return 0; +} + +// SPIR-V does not expose this, always return false. +_DEFAULT_FN_ATTRS static __inline__ bool __gpu_is_ptr_private(void *ptr) { + return 0; +} + +// SPIR-V only supports 'OpTerminateInvocation' in fragment shaders. +_DEFAULT_FN_ATTRS [[noreturn]] static __inline__ void __gpu_exit(void) { + __builtin_trap(); +} + +// This is a no-op as SPIR-V does not support it. +_DEFAULT_FN_ATTRS static __inline__ void __gpu_thread_suspend(void) {} + +_Pragma("omp end declare variant"); +_Pragma("omp end declare target"); + +#endif // __SPIRVINTRIN_H diff --git a/clang/test/Headers/gpuintrin.c b/clang/test/Headers/gpuintrin.c index d9813f28f0655..c8fe721c8c37c 100644 --- a/clang/test/Headers/gpuintrin.c +++ b/clang/test/Headers/gpuintrin.c @@ -8,6 +8,10 @@ // RUN: -target-feature +ptx62 \ // RUN: -triple nvptx64-nvidia-cuda -emit-llvm %s -o - \ // RUN: | FileCheck %s --check-prefix=NVPTX +// RUN: %clang_cc1 -internal-isystem %S/Inputs/include \ +// RUN: -internal-isystem %S/../../lib/Headers/ \ +// RUN: -triple spirv64-- -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefix=SPIRV #include <gpuintrin.h> @@ -985,8 +989,436 @@ __gpu_kernel void foo() { // NVPTX-NEXT: call void @llvm.nvvm.exit() // NVPTX-NEXT: ret void // -//. -// AMDGPU: [[RNG3]] = !{i32 1, i32 0} -// AMDGPU: [[META4]] = !{} -// AMDGPU: [[RNG5]] = !{i16 1, i16 1025} -//. +// +// SPIRV-LABEL: define protected spir_kernel void @foo( +// SPIRV-SAME: ) #[[ATTR0:[0-9]+]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_num_blocks_x() +// SPIRV-NEXT: [[CALL1:%.*]] = call spir_func i32 @__gpu_num_blocks_y() +// SPIRV-NEXT: [[CALL2:%.*]] = call spir_func i32 @__gpu_num_blocks_z() +// SPIRV-NEXT: [[CALL3:%.*]] = call spir_func i32 @__gpu_num_blocks(i32 noundef 0) +// SPIRV-NEXT: [[CALL4:%.*]] = call spir_func i32 @__gpu_block_id_x() +// SPIRV-NEXT: [[CALL5:%.*]] = call spir_func i32 @__gpu_block_id_y() +// SPIRV-NEXT: [[CALL6:%.*]] = call spir_func i32 @__gpu_block_id_z() +// SPIRV-NEXT: [[CALL7:%.*]] = call spir_func i32 @__gpu_block_id(i32 noundef 0) +// SPIRV-NEXT: [[CALL8:%.*]] = call spir_func i32 @__gpu_num_threads_x() +// SPIRV-NEXT: [[CALL9:%.*]] = call spir_func i32 @__gpu_num_threads_y() +// SPIRV-NEXT: [[CALL10:%.*]] = call spir_func i32 @__gpu_num_threads_z() +// SPIRV-NEXT: [[CALL11:%.*]] = call spir_func i32 @__gpu_num_threads(i32 noundef 0) +// SPIRV-NEXT: [[CALL12:%.*]] = call spir_func i32 @__gpu_thread_id_x() +// SPIRV-NEXT: [[CALL13:%.*]] = call spir_func i32 @__gpu_thread_id_y() +// SPIRV-NEXT: [[CALL14:%.*]] = call spir_func i32 @__gpu_thread_id_z() +// SPIRV-NEXT: [[CALL15:%.*]] = call spir_func i32 @__gpu_thread_id(i32 noundef 0) +// SPIRV-NEXT: [[CALL16:%.*]] = call spir_func i32 @__gpu_num_lanes() +// SPIRV-NEXT: [[CALL17:%.*]] = call spir_func i32 @__gpu_lane_id() +// SPIRV-NEXT: [[CALL18:%.*]] = call spir_func i64 @__gpu_lane_mask() +// SPIRV-NEXT: [[CALL19:%.*]] = call spir_func i32 @__gpu_read_first_lane_u32(i64 noundef -1, i32 noundef -1) +// SPIRV-NEXT: [[CALL20:%.*]] = call spir_func i64 @__gpu_read_first_lane_u64(i64 noundef -1, i64 noundef -1) +// SPIRV-NEXT: [[CALL21:%.*]] = call spir_func i64 @__gpu_ballot(i64 noundef -1, i1 noundef zeroext true) +// SPIRV-NEXT: call spir_func void @__gpu_sync_threads() +// SPIRV-NEXT: call spir_func void @__gpu_sync_lane(i64 noundef -1) +// SPIRV-NEXT: [[CALL22:%.*]] = call spir_func i32 @__gpu_shuffle_idx_u32(i64 noundef -1, i32 noundef -1, i32 noundef -1, i32 noundef 0) +// SPIRV-NEXT: [[CALL23:%.*]] = call spir_func i64 @__gpu_first_lane_id(i64 noundef -1) +// SPIRV-NEXT: [[CALL24:%.*]] = call spir_func zeroext i1 @__gpu_is_first_in_lane(i64 noundef -1) +// SPIRV-NEXT: call spir_func void @__gpu_exit() #[[ATTR7:[0-9]+]] +// SPIRV-NEXT: unreachable +// +// +// SPIRV-LABEL: define internal spir_func i32 @__gpu_num_blocks_x( +// SPIRV-SAME: ) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[SPV_NUM_WORKGROUPS:%.*]] = call i64 @llvm.spv.num.workgroups.i64(i32 0) +// SPIRV-NEXT: [[CONV:%.*]] = trunc i64 [[SPV_NUM_WORKGROUPS]] to i32 +// SPIRV-NEXT: ret i32 [[CONV]] +// +// +// SPIRV-LABEL: define internal spir_func i32 @__gpu_num_blocks_y( +// SPIRV-SAME: ) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[SPV_NUM_WORKGROUPS:%.*]] = call i64 @llvm.spv.num.workgroups.i64(i32 1) +// SPIRV-NEXT: [[CONV:%.*]] = trunc i64 [[SPV_NUM_WORKGROUPS]] to i32 +// SPIRV-NEXT: ret i32 [[CONV]] +// +// +// SPIRV-LABEL: define internal spir_func i32 @__gpu_num_blocks_z( +// SPIRV-SAME: ) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[SPV_NUM_WORKGROUPS:%.*]] = call i64 @llvm.spv.num.workgroups.i64(i32 2) +// SPIRV-NEXT: [[CONV:%.*]] = trunc i64 [[SPV_NUM_WORKGROUPS]] to i32 +// SPIRV-NEXT: ret i32 [[CONV]] +// +// +// SPIRV-LABEL: define internal spir_func i32 @__gpu_num_blocks( +// SPIRV-SAME: i32 noundef [[__DIM:%.*]]) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SPIRV-NEXT: [[__DIM_ADDR:%.*]] = alloca i32, align 4 +// SPIRV-NEXT: store i32 [[__DIM]], ptr [[__DIM_ADDR]], align 4 +// SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr [[__DIM_ADDR]], align 4 +// SPIRV-NEXT: switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [ +// SPIRV-NEXT: i32 0, label %[[SW_BB:.*]] +// SPIRV-NEXT: i32 1, label %[[SW_BB1:.*]] +// SPIRV-NEXT: i32 2, label %[[SW_BB3:.*]] +// SPIRV-NEXT: ] +// SPIRV: [[SW_BB]]: +// SPIRV-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_num_blocks_x() +// SPIRV-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 +// SPIRV-NEXT: br label %[[RETURN:.*]] +// SPIRV: [[SW_BB1]]: +// SPIRV-NEXT: [[CALL2:%.*]] = call spir_func i32 @__gpu_num_blocks_y() +// SPIRV-NEXT: store i32 [[CALL2]], ptr [[RETVAL]], align 4 +// SPIRV-NEXT: br label %[[RETURN]] +// SPIRV: [[SW_BB3]]: +// SPIRV-NEXT: [[CALL4:%.*]] = call spir_func i32 @__gpu_num_blocks_z() +// SPIRV-NEXT: store i32 [[CALL4]], ptr [[RETVAL]], align 4 +// SPIRV-NEXT: br label %[[RETURN]] +// SPIRV: [[SW_DEFAULT]]: +// SPIRV-NEXT: store i32 1, ptr [[RETVAL]], align 4 +// SPIRV-NEXT: br label %[[RETURN]] +// SPIRV: [[RETURN]]: +// SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 +// SPIRV-NEXT: ret i32 [[TMP1]] +// +// +// SPIRV-LABEL: define internal spir_func i32 @__gpu_block_id_x( +// SPIRV-SAME: ) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[SPV_GROUP_ID:%.*]] = call i64 @llvm.spv.group.id.i64(i32 0) +// SPIRV-NEXT: [[CONV:%.*]] = trunc i64 [[SPV_GROUP_ID]] to i32 +// SPIRV-NEXT: ret i32 [[CONV]] +// +// +// SPIRV-LABEL: define internal spir_func i32 @__gpu_block_id_y( +// SPIRV-SAME: ) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[SPV_GROUP_ID:%.*]] = call i64 @llvm.spv.group.id.i64(i32 1) +// SPIRV-NEXT: [[CONV:%.*]] = trunc i64 [[SPV_GROUP_ID]] to i32 +// SPIRV-NEXT: ret i32 [[CONV]] +// +// +// SPIRV-LABEL: define internal spir_func i32 @__gpu_block_id_z( +// SPIRV-SAME: ) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[SPV_GROUP_ID:%.*]] = call i64 @llvm.spv.group.id.i64(i32 2) +// SPIRV-NEXT: [[CONV:%.*]] = trunc i64 [[SPV_GROUP_ID]] to i32 +// SPIRV-NEXT: ret i32 [[CONV]] +// +// +// SPIRV-LABEL: define internal spir_func i32 @__gpu_block_id( +// SPIRV-SAME: i32 noundef [[__DIM:%.*]]) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SPIRV-NEXT: [[__DIM_ADDR:%.*]] = alloca i32, align 4 +// SPIRV-NEXT: store i32 [[__DIM]], ptr [[__DIM_ADDR]], align 4 +// SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr [[__DIM_ADDR]], align 4 +// SPIRV-NEXT: switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [ +// SPIRV-NEXT: i32 0, label %[[SW_BB:.*]] +// SPIRV-NEXT: i32 1, label %[[SW_BB1:.*]] +// SPIRV-NEXT: i32 2, label %[[SW_BB3:.*]] +// SPIRV-NEXT: ] +// SPIRV: [[SW_BB]]: +// SPIRV-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_block_id_x() +// SPIRV-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 +// SPIRV-NEXT: br label %[[RETURN:.*]] +// SPIRV: [[SW_BB1]]: +// SPIRV-NEXT: [[CALL2:%.*]] = call spir_func i32 @__gpu_block_id_y() +// SPIRV-NEXT: store i32 [[CALL2]], ptr [[RETVAL]], align 4 +// SPIRV-NEXT: br label %[[RETURN]] +// SPIRV: [[SW_BB3]]: +// SPIRV-NEXT: [[CALL4:%.*]] = call spir_func i32 @__gpu_block_id_z() +// SPIRV-NEXT: store i32 [[CALL4]], ptr [[RETVAL]], align 4 +// SPIRV-NEXT: br label %[[RETURN]] +// SPIRV: [[SW_DEFAULT]]: +// SPIRV-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// SPIRV-NEXT: br label %[[RETURN]] +// SPIRV: [[RETURN]]: +// SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 +// SPIRV-NEXT: ret i32 [[TMP1]] +// +// +// SPIRV-LABEL: define internal spir_func i32 @__gpu_num_threads_x( +// SPIRV-SAME: ) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[SPV_WORKGROUP_SIZE:%.*]] = call i64 @llvm.spv.workgroup.size.i64(i32 0) +// SPIRV-NEXT: [[CONV:%.*]] = trunc i64 [[SPV_WORKGROUP_SIZE]] to i32 +// SPIRV-NEXT: ret i32 [[CONV]] +// +// +// SPIRV-LABEL: define internal spir_func i32 @__gpu_num_threads_y( +// SPIRV-SAME: ) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[SPV_WORKGROUP_SIZE:%.*]] = call i64 @llvm.spv.workgroup.size.i64(i32 1) +// SPIRV-NEXT: [[CONV:%.*]] = trunc i64 [[SPV_WORKGROUP_SIZE]] to i32 +// SPIRV-NEXT: ret i32 [[CONV]] +// +// +// SPIRV-LABEL: define internal spir_func i32 @__gpu_num_threads_z( +// SPIRV-SAME: ) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[SPV_WORKGROUP_SIZE:%.*]] = call i64 @llvm.spv.workgroup.size.i64(i32 2) +// SPIRV-NEXT: [[CONV:%.*]] = trunc i64 [[SPV_WORKGROUP_SIZE]] to i32 +// SPIRV-NEXT: ret i32 [[CONV]] +// +// +// SPIRV-LABEL: define internal spir_func i32 @__gpu_num_threads( +// SPIRV-SAME: i32 noundef [[__DIM:%.*]]) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SPIRV-NEXT: [[__DIM_ADDR:%.*]] = alloca i32, align 4 +// SPIRV-NEXT: store i32 [[__DIM]], ptr [[__DIM_ADDR]], align 4 +// SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr [[__DIM_ADDR]], align 4 +// SPIRV-NEXT: switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [ +// SPIRV-NEXT: i32 0, label %[[SW_BB:.*]] +// SPIRV-NEXT: i32 1, label %[[SW_BB1:.*]] +// SPIRV-NEXT: i32 2, label %[[SW_BB3:.*]] +// SPIRV-NEXT: ] +// SPIRV: [[SW_BB]]: +// SPIRV-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_num_threads_x() +// SPIRV-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 +// SPIRV-NEXT: br label %[[RETURN:.*]] +// SPIRV: [[SW_BB1]]: +// SPIRV-NEXT: [[CALL2:%.*]] = call spir_func i32 @__gpu_num_threads_y() +// SPIRV-NEXT: store i32 [[CALL2]], ptr [[RETVAL]], align 4 +// SPIRV-NEXT: br label %[[RETURN]] +// SPIRV: [[SW_BB3]]: +// SPIRV-NEXT: [[CALL4:%.*]] = call spir_func i32 @__gpu_num_threads_z() +// SPIRV-NEXT: store i32 [[CALL4]], ptr [[RETVAL]], align 4 +// SPIRV-NEXT: br label %[[RETURN]] +// SPIRV: [[SW_DEFAULT]]: +// SPIRV-NEXT: store i32 1, ptr [[RETVAL]], align 4 +// SPIRV-NEXT: br label %[[RETURN]] +// SPIRV: [[RETURN]]: +// SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 +// SPIRV-NEXT: ret i32 [[TMP1]] +// +// +// SPIRV-LABEL: define internal spir_func i32 @__gpu_thread_id_x( +// SPIRV-SAME: ) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[SPV_THREAD_ID_IN_GROUP:%.*]] = call i64 @llvm.spv.thread.id.in.group.i64(i32 0) +// SPIRV-NEXT: [[CONV:%.*]] = trunc i64 [[SPV_THREAD_ID_IN_GROUP]] to i32 +// SPIRV-NEXT: ret i32 [[CONV]] +// +// +// SPIRV-LABEL: define internal spir_func i32 @__gpu_thread_id_y( +// SPIRV-SAME: ) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[SPV_THREAD_ID_IN_GROUP:%.*]] = call i64 @llvm.spv.thread.id.in.group.i64(i32 1) +// SPIRV-NEXT: [[CONV:%.*]] = trunc i64 [[SPV_THREAD_ID_IN_GROUP]] to i32 +// SPIRV-NEXT: ret i32 [[CONV]] +// +// +// SPIRV-LABEL: define internal spir_func i32 @__gpu_thread_id_z( +// SPIRV-SAME: ) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[SPV_THREAD_ID_IN_GROUP:%.*]] = call i64 @llvm.spv.thread.id.in.group.i64(i32 2) +// SPIRV-NEXT: [[CONV:%.*]] = trunc i64 [[SPV_THREAD_ID_IN_GROUP]] to i32 +// SPIRV-NEXT: ret i32 [[CONV]] +// +// +// SPIRV-LABEL: define internal spir_func i32 @__gpu_thread_id( +// SPIRV-SAME: i32 noundef [[__DIM:%.*]]) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SPIRV-NEXT: [[__DIM_ADDR:%.*]] = alloca i32, align 4 +// SPIRV-NEXT: store i32 [[__DIM]], ptr [[__DIM_ADDR]], align 4 +// SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr [[__DIM_ADDR]], align 4 +// SPIRV-NEXT: switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [ +// SPIRV-NEXT: i32 0, label %[[SW_BB:.*]] +// SPIRV-NEXT: i32 1, label %[[SW_BB1:.*]] +// SPIRV-NEXT: i32 2, label %[[SW_BB3:.*]] +// SPIRV-NEXT: ] +// SPIRV: [[SW_BB]]: +// SPIRV-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_thread_id_x() +// SPIRV-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 +// SPIRV-NEXT: br label %[[RETURN:.*]] +// SPIRV: [[SW_BB1]]: +// SPIRV-NEXT: [[CALL2:%.*]] = call spir_func i32 @__gpu_thread_id_y() +// SPIRV-NEXT: store i32 [[CALL2]], ptr [[RETVAL]], align 4 +// SPIRV-NEXT: br label %[[RETURN]] +// SPIRV: [[SW_BB3]]: +// SPIRV-NEXT: [[CALL4:%.*]] = call spir_func i32 @__gpu_thread_id_z() +// SPIRV-NEXT: store i32 [[CALL4]], ptr [[RETVAL]], align 4 +// SPIRV-NEXT: br label %[[RETURN]] +// SPIRV: [[SW_DEFAULT]]: +// SPIRV-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// SPIRV-NEXT: br label %[[RETURN]] +// SPIRV: [[RETURN]]: +// SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 +// SPIRV-NEXT: ret i32 [[TMP1]] +// +// +// SPIRV-LABEL: define internal spir_func i32 @__gpu_num_lanes( +// SPIRV-SAME: ) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = call i32 @llvm.spv.subgroup.size() +// SPIRV-NEXT: ret i32 [[TMP0]] +// +// +// SPIRV-LABEL: define internal spir_func i32 @__gpu_lane_id( +// SPIRV-SAME: ) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = call i32 @llvm.spv.subgroup.local.invocation.id() +// SPIRV-NEXT: ret i32 [[TMP0]] +// +// +// SPIRV-LABEL: define internal spir_func i64 @__gpu_lane_mask( +// SPIRV-SAME: ) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[__MASK:%.*]] = alloca <4 x i32>, align 16 +// SPIRV-NEXT: [[REF_TMP:%.*]] = alloca <2 x i32>, align 8 +// SPIRV-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.spv.wave.ballot(i1 true) +// SPIRV-NEXT: store <4 x i32> [[TMP0]], ptr [[__MASK]], align 16 +// SPIRV-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[__MASK]], align 16 +// SPIRV-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[__MASK]], align 16 +// SPIRV-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 0, i32 1> +// SPIRV-NEXT: store <2 x i32> [[SHUFFLE]], ptr [[REF_TMP]], align 8 +// SPIRV-NEXT: [[TMP3:%.*]] = load i64, ptr [[REF_TMP]], align 8 +// SPIRV-NEXT: ret i64 [[TMP3]] +// +// +// SPIRV-LABEL: define internal spir_func i32 @__gpu_read_first_lane_u32( +// SPIRV-SAME: i64 noundef [[__LANE_MASK:%.*]], i32 noundef [[__X:%.*]]) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[__LANE_MASK_ADDR:%.*]] = alloca i64, align 8 +// SPIRV-NEXT: [[__X_ADDR:%.*]] = alloca i32, align 4 +// SPIRV-NEXT: store i64 [[__LANE_MASK]], ptr [[__LANE_MASK_ADDR]], align 8 +// SPIRV-NEXT: store i32 [[__X]], ptr [[__X_ADDR]], align 4 +// SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr [[__X_ADDR]], align 4 +// SPIRV-NEXT: [[CALL:%.*]] = call spir_func i64 @__gpu_lane_mask() +// SPIRV-NEXT: [[TMP1:%.*]] = call i64 @llvm.cttz.i64(i64 [[CALL]], i1 true) +// SPIRV-NEXT: [[CAST:%.*]] = trunc i64 [[TMP1]] to i32 +// SPIRV-NEXT: [[SPV_SHUFFLE:%.*]] = call i32 @llvm.spv.wave.readlane.i32(i32 [[TMP0]], i32 [[CAST]]) +// SPIRV-NEXT: ret i32 [[SPV_SHUFFLE]] +// +// +// SPIRV-LABEL: define internal spir_func i64 @__gpu_read_first_lane_u64( +// SPIRV-SAME: i64 noundef [[__LANE_MASK:%.*]], i64 noundef [[__X:%.*]]) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[__LANE_MASK_ADDR:%.*]] = alloca i64, align 8 +// SPIRV-NEXT: [[__X_ADDR:%.*]] = alloca i64, align 8 +// SPIRV-NEXT: [[__HI:%.*]] = alloca i32, align 4 +// SPIRV-NEXT: [[__LO:%.*]] = alloca i32, align 4 +// SPIRV-NEXT: store i64 [[__LANE_MASK]], ptr [[__LANE_MASK_ADDR]], align 8 +// SPIRV-NEXT: store i64 [[__X]], ptr [[__X_ADDR]], align 8 +// SPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr [[__X_ADDR]], align 8 +// SPIRV-NEXT: [[SHR:%.*]] = lshr i64 [[TMP0]], 32 +// SPIRV-NEXT: [[CONV:%.*]] = trunc i64 [[SHR]] to i32 +// SPIRV-NEXT: store i32 [[CONV]], ptr [[__HI]], align 4 +// SPIRV-NEXT: [[TMP1:%.*]] = load i64, ptr [[__X_ADDR]], align 8 +// SPIRV-NEXT: [[AND:%.*]] = and i64 [[TMP1]], 4294967295 +// SPIRV-NEXT: [[CONV1:%.*]] = trunc i64 [[AND]] to i32 +// SPIRV-NEXT: store i32 [[CONV1]], ptr [[__LO]], align 4 +// SPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr [[__LANE_MASK_ADDR]], align 8 +// SPIRV-NEXT: [[TMP3:%.*]] = load i32, ptr [[__HI]], align 4 +// SPIRV-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_read_first_lane_u32(i64 noundef [[TMP2]], i32 noundef [[TMP3]]) +// SPIRV-NEXT: [[CONV2:%.*]] = zext i32 [[CALL]] to i64 +// SPIRV-NEXT: [[SHL:%.*]] = shl i64 [[CONV2]], 32 +// SPIRV-NEXT: [[TMP4:%.*]] = load i64, ptr [[__LANE_MASK_ADDR]], align 8 +// SPIRV-NEXT: [[TMP5:%.*]] = load i32, ptr [[__LO]], align 4 +// SPIRV-NEXT: [[CALL3:%.*]] = call spir_func i32 @__gpu_read_first_lane_u32(i64 noundef [[TMP4]], i32 noundef [[TMP5]]) +// SPIRV-NEXT: [[CONV4:%.*]] = zext i32 [[CALL3]] to i64 +// SPIRV-NEXT: [[AND5:%.*]] = and i64 [[CONV4]], 4294967295 +// SPIRV-NEXT: [[OR:%.*]] = or i64 [[SHL]], [[AND5]] +// SPIRV-NEXT: ret i64 [[OR]] +// +// +// SPIRV-LABEL: define internal spir_func i64 @__gpu_ballot( +// SPIRV-SAME: i64 noundef [[__LANE_MASK:%.*]], i1 noundef zeroext [[__X:%.*]]) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[__LANE_MASK_ADDR:%.*]] = alloca i64, align 8 +// SPIRV-NEXT: [[__X_ADDR:%.*]] = alloca i8, align 1 +// SPIRV-NEXT: [[__MASK:%.*]] = alloca <4 x i32>, align 16 +// SPIRV-NEXT: [[REF_TMP:%.*]] = alloca <2 x i32>, align 8 +// SPIRV-NEXT: store i64 [[__LANE_MASK]], ptr [[__LANE_MASK_ADDR]], align 8 +// SPIRV-NEXT: [[STOREDV:%.*]] = zext i1 [[__X]] to i8 +// SPIRV-NEXT: store i8 [[STOREDV]], ptr [[__X_ADDR]], align 1 +// SPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr [[__X_ADDR]], align 1 +// SPIRV-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1 +// SPIRV-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.spv.wave.ballot(i1 [[LOADEDV]]) +// SPIRV-NEXT: store <4 x i32> [[TMP1]], ptr [[__MASK]], align 16 +// SPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr [[__LANE_MASK_ADDR]], align 8 +// SPIRV-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[__MASK]], align 16 +// SPIRV-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[__MASK]], align 16 +// SPIRV-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <2 x i32> <i32 0, i32 1> +// SPIRV-NEXT: store <2 x i32> [[SHUFFLE]], ptr [[REF_TMP]], align 8 +// SPIRV-NEXT: [[TMP5:%.*]] = load i64, ptr [[REF_TMP]], align 8 +// SPIRV-NEXT: [[AND:%.*]] = and i64 [[TMP2]], [[TMP5]] +// SPIRV-NEXT: ret i64 [[AND]] +// +// +// SPIRV-LABEL: define internal spir_func void @__gpu_sync_threads( +// SPIRV-SAME: ) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: call void @llvm.spv.group.memory.barrier.with.group.sync() +// SPIRV-NEXT: ret void +// +// +// SPIRV-LABEL: define internal spir_func void @__gpu_sync_lane( +// SPIRV-SAME: i64 noundef [[__LANE_MASK:%.*]]) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[__LANE_MASK_ADDR:%.*]] = alloca i64, align 8 +// SPIRV-NEXT: store i64 [[__LANE_MASK]], ptr [[__LANE_MASK_ADDR]], align 8 +// SPIRV-NEXT: ret void +// +// +// SPIRV-LABEL: define internal spir_func i32 @__gpu_shuffle_idx_u32( +// SPIRV-SAME: i64 noundef [[__LANE_MASK:%.*]], i32 noundef [[__IDX:%.*]], i32 noundef [[__X:%.*]], i32 noundef [[__WIDTH:%.*]]) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[__LANE_MASK_ADDR:%.*]] = alloca i64, align 8 +// SPIRV-NEXT: [[__IDX_ADDR:%.*]] = alloca i32, align 4 +// SPIRV-NEXT: [[__X_ADDR:%.*]] = alloca i32, align 4 +// SPIRV-NEXT: [[__WIDTH_ADDR:%.*]] = alloca i32, align 4 +// SPIRV-NEXT: [[__LANE:%.*]] = alloca i32, align 4 +// SPIRV-NEXT: store i64 [[__LANE_MASK]], ptr [[__LANE_MASK_ADDR]], align 8 +// SPIRV-NEXT: store i32 [[__IDX]], ptr [[__IDX_ADDR]], align 4 +// SPIRV-NEXT: store i32 [[__X]], ptr [[__X_ADDR]], align 4 +// SPIRV-NEXT: store i32 [[__WIDTH]], ptr [[__WIDTH_ADDR]], align 4 +// SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr [[__IDX_ADDR]], align 4 +// SPIRV-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_lane_id() +// SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr [[__WIDTH_ADDR]], align 4 +// SPIRV-NEXT: [[SUB:%.*]] = sub i32 [[TMP1]], 1 +// SPIRV-NEXT: [[NOT:%.*]] = xor i32 [[SUB]], -1 +// SPIRV-NEXT: [[AND:%.*]] = and i32 [[CALL]], [[NOT]] +// SPIRV-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[AND]] +// SPIRV-NEXT: store i32 [[ADD]], ptr [[__LANE]], align 4 +// SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr [[__X_ADDR]], align 4 +// SPIRV-NEXT: [[TMP3:%.*]] = load i32, ptr [[__LANE]], align 4 +// SPIRV-NEXT: [[SPV_SHUFFLE:%.*]] = call i32 @llvm.spv.wave.readlane.i32(i32 [[TMP2]], i32 [[TMP3]]) +// SPIRV-NEXT: ret i32 [[SPV_SHUFFLE]] +// +// +// SPIRV-LABEL: define internal spir_func i64 @__gpu_first_lane_id( +// SPIRV-SAME: i64 noundef [[__LANE_MASK:%.*]]) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[__LANE_MASK_ADDR:%.*]] = alloca i64, align 8 +// SPIRV-NEXT: store i64 [[__LANE_MASK]], ptr [[__LANE_MASK_ADDR]], align 8 +// SPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr [[__LANE_MASK_ADDR]], align 8 +// SPIRV-NEXT: [[TMP1:%.*]] = call i64 @llvm.cttz.i64(i64 [[TMP0]], i1 true) +// SPIRV-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], 1 +// SPIRV-NEXT: [[ISZERO:%.*]] = icmp eq i64 [[TMP0]], 0 +// SPIRV-NEXT: [[FFS:%.*]] = select i1 [[ISZERO]], i64 0, i64 [[TMP2]] +// SPIRV-NEXT: [[CAST:%.*]] = trunc i64 [[FFS]] to i32 +// SPIRV-NEXT: [[SUB:%.*]] = sub nsw i32 [[CAST]], 1 +// SPIRV-NEXT: [[CONV:%.*]] = sext i32 [[SUB]] to i64 +// SPIRV-NEXT: ret i64 [[CONV]] +// +// +// SPIRV-LABEL: define internal spir_func zeroext i1 @__gpu_is_first_in_lane( +// SPIRV-SAME: i64 noundef [[__LANE_MASK:%.*]]) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[__LANE_MASK_ADDR:%.*]] = alloca i64, align 8 +// SPIRV-NEXT: store i64 [[__LANE_MASK]], ptr [[__LANE_MASK_ADDR]], align 8 +// SPIRV-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_lane_id() +// SPIRV-NEXT: [[CONV:%.*]] = zext i32 [[CALL]] to i64 +// SPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr [[__LANE_MASK_ADDR]], align 8 +// SPIRV-NEXT: [[CALL1:%.*]] = call spir_func i64 @__gpu_first_lane_id(i64 noundef [[TMP0]]) +// SPIRV-NEXT: [[CMP:%.*]] = icmp eq i64 [[CONV]], [[CALL1]] +// SPIRV-NEXT: ret i1 [[CMP]] +// +// +// SPIRV-LABEL: define internal spir_func void @__gpu_exit( +// SPIRV-SAME: ) #[[ATTR1:[0-9]+]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: call void @llvm.trap() +// SPIRV-NEXT: ret void diff --git a/clang/test/Headers/gpuintrin_lang.c b/clang/test/Headers/gpuintrin_lang.c index 653f87aea2ce3..e3db72d5ff928 100644 --- a/clang/test/Headers/gpuintrin_lang.c +++ b/clang/test/Headers/gpuintrin_lang.c @@ -22,6 +22,11 @@ // RUN: -fopenmp-is-target-device -triple amdgcn -emit-llvm %s -o - \ // RUN: | FileCheck %s --check-prefix=OPENMP // +// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -DSYCL \ +// RUN: -internal-isystem %S/../../lib/Headers/ -fsycl-is-device \ +// RUN: -x c++ -triple spirv64 -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefix=SYCL +// // RUN: %clang_cc1 -internal-isystem %S/Inputs/include \ // RUN: -std=c89 -internal-isystem %S/../../lib/Headers/ \ // RUN: -triple amdgcn-amd-amdhsa -emit-llvm %s -o - \ @@ -32,11 +37,13 @@ #ifdef __device__ __device__ int foo() { return __gpu_thread_id_x(); } +#elif defined(SYCL) +extern "C" [[clang::sycl_external]] int foo() { return __gpu_thread_id_x(); } #else // CUDA-LABEL: define dso_local i32 @foo( // CUDA-SAME: ) #[[ATTR0:[0-9]+]] { // CUDA-NEXT: [[ENTRY:.*:]] -// CUDA-NEXT: [[TMP0:%.*]] = call {{.*}}i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CUDA-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() // CUDA-NEXT: ret i32 [[TMP0]] // // HIP-LABEL: define dso_local i32 @foo( @@ -61,6 +68,17 @@ __device__ int foo() { return __gpu_thread_id_x(); } // OPENMP-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() // OPENMP-NEXT: ret i32 [[TMP0]] // +// SYCL-LABEL: define spir_func i32 @foo( +// SYCL-SAME: ) #[[ATTR0:[0-9]+]] { +// SYCL-NEXT: [[ENTRY:.*:]] +// SYCL-NEXT: [[RETVAL_I:%.*]] = alloca i32, align 4 +// SYCL-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SYCL-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr [[RETVAL]] to ptr addrspace(4) +// SYCL-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr [[RETVAL_I]] to ptr addrspace(4) +// SYCL-NEXT: [[SPV_THREAD_ID_IN_GROUP_I:%.*]] = call i64 @llvm.spv.thread.id.in.group.i64(i32 0) +// SYCL-NEXT: [[CONV_I:%.*]] = trunc i64 [[SPV_THREAD_ID_IN_GROUP_I]] to i32 +// SYCL-NEXT: ret i32 [[CONV_I]] +// // C89-LABEL: define dso_local i32 @foo( // C89-SAME: ) #[[ATTR0:[0-9]+]] { // C89-NEXT: [[ENTRY:.*:]] _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
