hsmhsm updated this revision to Diff 380320. hsmhsm added a comment. Rebase.
Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D110257/new/ https://reviews.llvm.org/D110257 Files: clang/lib/CodeGen/CGExpr.cpp clang/lib/CodeGen/CodeGenFunction.cpp clang/lib/CodeGen/CodeGenFunction.h clang/test/CodeGenCUDA/builtins-amdgcn.cu clang/test/CodeGenCXX/amdgcn-automatic-variable.cpp clang/test/CodeGenCXX/amdgcn-func-arg.cpp clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp clang/test/CodeGenCXX/vla.cpp clang/test/CodeGenSYCL/address-space-deduction.cpp clang/test/OpenMP/amdgcn_target_init_temp_alloca.cpp
Index: clang/test/OpenMP/amdgcn_target_init_temp_alloca.cpp =================================================================== --- clang/test/OpenMP/amdgcn_target_init_temp_alloca.cpp +++ clang/test/OpenMP/amdgcn_target_init_temp_alloca.cpp @@ -12,7 +12,9 @@ int arr[N]; // CHECK: [[VAR_ADDR:%.+]] = alloca [100 x i32]*, align 8, addrspace(5) + // CHECK-NEXT: [[VAR2_ADDR:%.+]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: [[VAR_ADDR_CAST:%.+]] = addrspacecast [100 x i32]* addrspace(5)* [[VAR_ADDR]] to [100 x i32]** + // CHECK-NEXT: [[VAR2_ADDR_CAST:%.+]] = addrspacecast i32 addrspace(5)* [[VAR2_ADDR]] to i32* // CHECK: store [100 x i32]* [[VAR:%.+]], [100 x i32]** [[VAR_ADDR_CAST]], align 8 #pragma omp target Index: clang/test/CodeGenSYCL/address-space-deduction.cpp =================================================================== --- clang/test/CodeGenSYCL/address-space-deduction.cpp +++ clang/test/CodeGenSYCL/address-space-deduction.cpp @@ -1,34 +1,33 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -triple spir64 -fsycl-is-device -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s - // CHECK-LABEL: @_Z4testv( // CHECK-NEXT: entry: // CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast i32* [[I]] to i32 addrspace(4)* // CHECK-NEXT: [[PPTR:%.*]] = alloca i32 addrspace(4)*, align 8 -// CHECK-NEXT: [[PPTR_ASCAST:%.*]] = addrspacecast i32 addrspace(4)** [[PPTR]] to i32 addrspace(4)* addrspace(4)* // CHECK-NEXT: [[IS_I_PTR:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[IS_I_PTR_ASCAST:%.*]] = addrspacecast i8* [[IS_I_PTR]] to i8 addrspace(4)* // CHECK-NEXT: [[VAR23:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[VAR23_ASCAST:%.*]] = addrspacecast i32* [[VAR23]] to i32 addrspace(4)* // CHECK-NEXT: [[CP:%.*]] = alloca i8 addrspace(4)*, align 8 -// CHECK-NEXT: [[CP_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[CP]] to i8 addrspace(4)* addrspace(4)* // CHECK-NEXT: [[ARR:%.*]] = alloca [42 x i32], align 4 -// CHECK-NEXT: [[ARR_ASCAST:%.*]] = addrspacecast [42 x i32]* [[ARR]] to [42 x i32] addrspace(4)* // CHECK-NEXT: [[CPP:%.*]] = alloca i8 addrspace(4)*, align 8 -// CHECK-NEXT: [[CPP_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[CPP]] to i8 addrspace(4)* addrspace(4)* // CHECK-NEXT: [[APTR:%.*]] = alloca i32 addrspace(4)*, align 8 -// CHECK-NEXT: [[APTR_ASCAST:%.*]] = addrspacecast i32 addrspace(4)** [[APTR]] to i32 addrspace(4)* addrspace(4)* // CHECK-NEXT: [[STR:%.*]] = alloca i8 addrspace(4)*, align 8 -// CHECK-NEXT: [[STR_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[STR]] to i8 addrspace(4)* addrspace(4)* // CHECK-NEXT: [[PHI_STR:%.*]] = alloca i8 addrspace(4)*, align 8 -// CHECK-NEXT: [[PHI_STR_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[PHI_STR]] to i8 addrspace(4)* addrspace(4)* // CHECK-NEXT: [[SELECT_NULL:%.*]] = alloca i8 addrspace(4)*, align 8 -// CHECK-NEXT: [[SELECT_NULL_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[SELECT_NULL]] to i8 addrspace(4)* addrspace(4)* // CHECK-NEXT: [[SELECT_STR_TRIVIAL1:%.*]] = alloca i8 addrspace(4)*, align 8 -// CHECK-NEXT: [[SELECT_STR_TRIVIAL1_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[SELECT_STR_TRIVIAL1]] to i8 addrspace(4)* addrspace(4)* // CHECK-NEXT: [[SELECT_STR_TRIVIAL2:%.*]] = alloca i8 addrspace(4)*, align 8 +// CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast i32* [[I]] to i32 addrspace(4)* +// CHECK-NEXT: [[PPTR_ASCAST:%.*]] = addrspacecast i32 addrspace(4)** [[PPTR]] to i32 addrspace(4)* addrspace(4)* +// CHECK-NEXT: [[IS_I_PTR_ASCAST:%.*]] = addrspacecast i8* [[IS_I_PTR]] to i8 addrspace(4)* +// CHECK-NEXT: [[VAR23_ASCAST:%.*]] = addrspacecast i32* [[VAR23]] to i32 addrspace(4)* +// CHECK-NEXT: [[CP_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[CP]] to i8 addrspace(4)* addrspace(4)* +// CHECK-NEXT: [[ARR_ASCAST:%.*]] = addrspacecast [42 x i32]* [[ARR]] to [42 x i32] addrspace(4)* +// CHECK-NEXT: [[CPP_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[CPP]] to i8 addrspace(4)* addrspace(4)* +// CHECK-NEXT: [[APTR_ASCAST:%.*]] = addrspacecast i32 addrspace(4)** [[APTR]] to i32 addrspace(4)* addrspace(4)* +// CHECK-NEXT: [[STR_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[STR]] to i8 addrspace(4)* addrspace(4)* +// CHECK-NEXT: [[PHI_STR_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[PHI_STR]] to i8 addrspace(4)* addrspace(4)* +// CHECK-NEXT: [[SELECT_NULL_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[SELECT_NULL]] to i8 addrspace(4)* addrspace(4)* +// CHECK-NEXT: [[SELECT_STR_TRIVIAL1_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[SELECT_STR_TRIVIAL1]] to i8 addrspace(4)* addrspace(4)* // CHECK-NEXT: [[SELECT_STR_TRIVIAL2_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[SELECT_STR_TRIVIAL2]] to i8 addrspace(4)* addrspace(4)* // CHECK-NEXT: store i32 0, i32 addrspace(4)* [[I_ASCAST]], align 4 // CHECK-NEXT: store i32 addrspace(4)* [[I_ASCAST]], i32 addrspace(4)* addrspace(4)* [[PPTR_ASCAST]], align 8 Index: clang/test/CodeGenCXX/vla.cpp =================================================================== --- clang/test/CodeGenCXX/vla.cpp +++ clang/test/CodeGenCXX/vla.cpp @@ -18,19 +18,19 @@ // rdar://problem/9506377 void test0(void *array, int n) { // CHECK-LABEL: define{{.*}} void @_Z5test0Pvi( - // X64: [[ARRAY:%.*]] = alloca i8*, align 8 // AMDGCN: [[ARRAY0:%.*]] = alloca i8*, align 8, addrspace(5) + // AMDGCN-NEXT: [[N0:%.*]] = alloca i32, align 4, addrspace(5) + // AMDGCN-NEXT: [[REF0:%.*]] = alloca i16*, align 8, addrspace(5) + // AMDGCN-NEXT: [[S0:%.*]] = alloca i16, align 2, addrspace(5) // AMDGCN-NEXT: [[ARRAY:%.*]] = addrspacecast i8* addrspace(5)* [[ARRAY0]] to i8** - // X64-NEXT: [[N:%.*]] = alloca i32, align 4 - // AMDGCN: [[N0:%.*]] = alloca i32, align 4, addrspace(5) // AMDGCN-NEXT: [[N:%.*]] = addrspacecast i32 addrspace(5)* [[N0]] to i32* - // X64-NEXT: [[REF:%.*]] = alloca i16*, align 8 - // AMDGCN: [[REF0:%.*]] = alloca i16*, align 8, addrspace(5) // AMDGCN-NEXT: [[REF:%.*]] = addrspacecast i16* addrspace(5)* [[REF0]] to i16** - // X64-NEXT: [[S:%.*]] = alloca i16, align 2 - // AMDGCN: [[S0:%.*]] = alloca i16, align 2, addrspace(5) // AMDGCN-NEXT: [[S:%.*]] = addrspacecast i16 addrspace(5)* [[S0]] to i16* - // CHECK-NEXT: store i8* + // X64: [[ARRAY:%.*]] = alloca i8*, align 8 + // X64-NEXT: [[N:%.*]] = alloca i32, align 4 + // X64-NEXT: [[REF:%.*]] = alloca i16*, align 8 + // X64-NEXT: [[S:%.*]] = alloca i16, align 2 + // CHECK-NEXT: store i8* // CHECK-NEXT: store i32 // Capture the bounds. Index: clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp =================================================================== --- clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp +++ clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp @@ -6,8 +6,8 @@ // CHECK-LABEL: @_Z29test_non_volatile_parameter32Pj( // CHECK-NEXT: entry: // CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8, addrspace(5) -// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[PTR_ADDR]] to i32** // CHECK-NEXT: [[RES:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[PTR_ADDR]] to i32** // CHECK-NEXT: [[RES_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[RES]] to i32* // CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR_ASCAST]], align 8 @@ -32,8 +32,8 @@ // CHECK-LABEL: @_Z29test_non_volatile_parameter64Py( // CHECK-NEXT: entry: // CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i64*, align 8, addrspace(5) -// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[PTR_ADDR]] to i64** // CHECK-NEXT: [[RES:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[PTR_ADDR]] to i64** // CHECK-NEXT: [[RES_ASCAST:%.*]] = addrspacecast i64 addrspace(5)* [[RES]] to i64* // CHECK-NEXT: store i64* [[PTR:%.*]], i64** [[PTR_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load i64*, i64** [[PTR_ADDR_ASCAST]], align 8 @@ -58,8 +58,8 @@ // CHECK-LABEL: @_Z25test_volatile_parameter32PVj( // CHECK-NEXT: entry: // CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8, addrspace(5) -// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[PTR_ADDR]] to i32** // CHECK-NEXT: [[RES:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[PTR_ADDR]] to i32** // CHECK-NEXT: [[RES_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[RES]] to i32* // CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR_ASCAST]], align 8 @@ -84,8 +84,8 @@ // CHECK-LABEL: @_Z25test_volatile_parameter64PVy( // CHECK-NEXT: entry: // CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i64*, align 8, addrspace(5) -// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[PTR_ADDR]] to i64** // CHECK-NEXT: [[RES:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[PTR_ADDR]] to i64** // CHECK-NEXT: [[RES_ASCAST:%.*]] = addrspacecast i64 addrspace(5)* [[RES]] to i64* // CHECK-NEXT: store i64* [[PTR:%.*]], i64** [[PTR_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load i64*, i64** [[PTR_ADDR_ASCAST]], align 8 Index: clang/test/CodeGenCXX/amdgcn-func-arg.cpp =================================================================== --- clang/test/CodeGenCXX/amdgcn-func-arg.cpp +++ clang/test/CodeGenCXX/amdgcn-func-arg.cpp @@ -33,8 +33,8 @@ // CHECK-LABEL: @_Z22test_indirect_arg_autov( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = alloca [[CLASS_A:%.*]], align 4, addrspace(5) -// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast [[CLASS_A]] addrspace(5)* [[A]] to %class.A* // CHECK-NEXT: [[AGG_TMP:%.*]] = alloca [[CLASS_A]], align 4, addrspace(5) +// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast [[CLASS_A]] addrspace(5)* [[A]] to %class.A* // CHECK-NEXT: [[AGG_TMP_ASCAST:%.*]] = addrspacecast [[CLASS_A]] addrspace(5)* [[AGG_TMP]] to %class.A* // CHECK-NEXT: call void @_ZN1AC1Ev(%class.A* noundef [[A_ASCAST]]) // CHECK-NEXT: [[TMP0:%.*]] = bitcast %class.A* [[AGG_TMP_ASCAST]] to i8* @@ -85,8 +85,8 @@ // CHECK-LABEL: @_Z19test_byval_arg_autov( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[CLASS_B:%.*]], align 4, addrspace(5) -// CHECK-NEXT: [[B_ASCAST:%.*]] = addrspacecast [[CLASS_B]] addrspace(5)* [[B]] to %class.B* // CHECK-NEXT: [[AGG_TMP:%.*]] = alloca [[CLASS_B]], align 4, addrspace(5) +// CHECK-NEXT: [[B_ASCAST:%.*]] = addrspacecast [[CLASS_B]] addrspace(5)* [[B]] to %class.B* // CHECK-NEXT: [[AGG_TMP_ASCAST:%.*]] = addrspacecast [[CLASS_B]] addrspace(5)* [[AGG_TMP]] to %class.B* // CHECK-NEXT: [[TMP0:%.*]] = bitcast %class.B* [[AGG_TMP_ASCAST]] to i8* // CHECK-NEXT: [[TMP1:%.*]] = bitcast %class.B* [[B_ASCAST]] to i8* Index: clang/test/CodeGenCXX/amdgcn-automatic-variable.cpp =================================================================== --- clang/test/CodeGenCXX/amdgcn-automatic-variable.cpp +++ clang/test/CodeGenCXX/amdgcn-automatic-variable.cpp @@ -17,16 +17,16 @@ // CHECK-LABEL: @_Z5func2v( // CHECK-NEXT: entry: // CHECK-NEXT: [[LV1:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[LV1_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[LV1]] to i32* // CHECK-NEXT: [[LV2:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[LV2_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[LV2]] to i32* // CHECK-NEXT: [[LA:%.*]] = alloca [100 x i32], align 4, addrspace(5) -// CHECK-NEXT: [[LA_ASCAST:%.*]] = addrspacecast [100 x i32] addrspace(5)* [[LA]] to [100 x i32]* // CHECK-NEXT: [[LP1:%.*]] = alloca i32*, align 8, addrspace(5) -// CHECK-NEXT: [[LP1_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[LP1]] to i32** // CHECK-NEXT: [[LP2:%.*]] = alloca i32*, align 8, addrspace(5) -// CHECK-NEXT: [[LP2_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[LP2]] to i32** // CHECK-NEXT: [[LVC:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[LV1_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[LV1]] to i32* +// CHECK-NEXT: [[LV2_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[LV2]] to i32* +// CHECK-NEXT: [[LA_ASCAST:%.*]] = addrspacecast [100 x i32] addrspace(5)* [[LA]] to [100 x i32]* +// CHECK-NEXT: [[LP1_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[LP1]] to i32** +// CHECK-NEXT: [[LP2_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[LP2]] to i32** // CHECK-NEXT: [[LVC_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[LVC]] to i32* // CHECK-NEXT: store i32 1, i32* [[LV1_ASCAST]], align 4 // CHECK-NEXT: store i32 2, i32* [[LV2_ASCAST]], align 4 Index: clang/test/CodeGenCUDA/builtins-amdgcn.cu =================================================================== --- clang/test/CodeGenCUDA/builtins-amdgcn.cu +++ clang/test/CodeGenCUDA/builtins-amdgcn.cu @@ -12,10 +12,10 @@ // CHECK-LABEL: @_Z16use_dispatch_ptrPi( // CHECK-NEXT: entry: // CHECK-NEXT: [[OUT:%.*]] = alloca i32*, align 8, addrspace(5) -// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[OUT]] to i32** // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca i32*, align 8, addrspace(5) -// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[OUT_ADDR]] to i32** // CHECK-NEXT: [[DISPATCH_PTR:%.*]] = alloca i32*, align 8, addrspace(5) +// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[OUT]] to i32** +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[OUT_ADDR]] to i32** // CHECK-NEXT: [[DISPATCH_PTR_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[DISPATCH_PTR]] to i32** // CHECK-NEXT: [[TMP0:%.*]] = addrspacecast i32 addrspace(1)* [[OUT_COERCE:%.*]] to i32* // CHECK-NEXT: store i32* [[TMP0]], i32** [[OUT_ASCAST]], align 8 @@ -36,19 +36,20 @@ } __global__ -// CHECK-LABEL: @_Z12test_ds_fmaxf( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[SRC_ADDR]] to float* -// CHECK-NEXT: [[X:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[X]] to float* -// CHECK-NEXT: store float [[SRC:%.*]], float* [[SRC_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[SRC_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = call contract float @llvm.amdgcn.ds.fmax.f32(float addrspace(3)* @_ZZ12test_ds_fmaxfE6shared, float [[TMP0]], i32 0, i32 0, i1 false) -// CHECK-NEXT: store volatile float [[TMP1]], float* [[X_ASCAST]], align 4 -// CHECK-NEXT: ret void -// -void test_ds_fmax(float src) { + // CHECK-LABEL: @_Z12test_ds_fmaxf( + // CHECK-NEXT: entry: + // CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4, addrspace(5) + // CHECK-NEXT: [[X:%.*]] = alloca float, align 4, addrspace(5) + // CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[SRC_ADDR]] to float* + // CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[X]] to float* + // CHECK-NEXT: store float [[SRC:%.*]], float* [[SRC_ADDR_ASCAST]], align 4 + // CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[SRC_ADDR_ASCAST]], align 4 + // CHECK-NEXT: [[TMP1:%.*]] = call contract float @llvm.amdgcn.ds.fmax.f32(float addrspace(3)* @_ZZ12test_ds_fmaxfE6shared, float [[TMP0]], i32 0, i32 0, i1 false) + // CHECK-NEXT: store volatile float [[TMP1]], float* [[X_ASCAST]], align 4 + // CHECK-NEXT: ret void + // + void + test_ds_fmax(float src) { __shared__ float shared; volatile float x = __builtin_amdgcn_ds_fmaxf(&shared, src, 0, 0, false); } @@ -56,8 +57,8 @@ // CHECK-LABEL: @_Z12test_ds_faddf( // CHECK-NEXT: entry: // CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[SRC_ADDR]] to float* // CHECK-NEXT: [[X:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[SRC_ADDR]] to float* // CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[X]] to float* // CHECK-NEXT: store float [[SRC:%.*]], float* [[SRC_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[SRC_ADDR_ASCAST]], align 4 @@ -73,12 +74,12 @@ // CHECK-LABEL: @_Z12test_ds_fminfPf( // CHECK-NEXT: entry: // CHECK-NEXT: [[SHARED:%.*]] = alloca float*, align 8, addrspace(5) -// CHECK-NEXT: [[SHARED_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[SHARED]] to float** // CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[SRC_ADDR]] to float* // CHECK-NEXT: [[SHARED_ADDR:%.*]] = alloca float*, align 8, addrspace(5) -// CHECK-NEXT: [[SHARED_ADDR_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[SHARED_ADDR]] to float** // CHECK-NEXT: [[X:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[SHARED_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[SHARED]] to float** +// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[SRC_ADDR]] to float* +// CHECK-NEXT: [[SHARED_ADDR_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[SHARED_ADDR]] to float** // CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[X]] to float* // CHECK-NEXT: [[TMP0:%.*]] = addrspacecast float addrspace(1)* [[SHARED_COERCE:%.*]] to float* // CHECK-NEXT: store float* [[TMP0]], float** [[SHARED_ASCAST]], align 8 @@ -123,12 +124,12 @@ // CHECK-LABEL: @_Z14test_uicmp_i64Pyyy( // CHECK-NEXT: entry: // CHECK-NEXT: [[OUT:%.*]] = alloca i64*, align 8, addrspace(5) -// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[OUT]] to i64** // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca i64*, align 8, addrspace(5) -// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[OUT_ADDR]] to i64** // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5) -// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast i64 addrspace(5)* [[A_ADDR]] to i64* // CHECK-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[OUT]] to i64** +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[OUT_ADDR]] to i64** +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast i64 addrspace(5)* [[A_ADDR]] to i64* // CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast i64 addrspace(5)* [[B_ADDR]] to i64* // CHECK-NEXT: [[TMP0:%.*]] = addrspacecast i64 addrspace(1)* [[OUT_COERCE:%.*]] to i64* // CHECK-NEXT: store i64* [[TMP0]], i64** [[OUT_ASCAST]], align 8 @@ -153,8 +154,8 @@ // CHECK-LABEL: @_Z14test_s_memtimePy( // CHECK-NEXT: entry: // CHECK-NEXT: [[OUT:%.*]] = alloca i64*, align 8, addrspace(5) -// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[OUT]] to i64** // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca i64*, align 8, addrspace(5) +// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[OUT]] to i64** // CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[OUT_ADDR]] to i64** // CHECK-NEXT: [[TMP0:%.*]] = addrspacecast i64 addrspace(1)* [[OUT_COERCE:%.*]] to i64* // CHECK-NEXT: store i64* [[TMP0]], i64** [[OUT_ASCAST]], align 8 @@ -176,12 +177,12 @@ // CHECK-LABEL: @_Z17test_ds_fmin_funcfPf( // CHECK-NEXT: entry: // CHECK-NEXT: [[SHARED:%.*]] = alloca float*, align 8, addrspace(5) -// CHECK-NEXT: [[SHARED_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[SHARED]] to float** // CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[SRC_ADDR]] to float* // CHECK-NEXT: [[SHARED_ADDR:%.*]] = alloca float*, align 8, addrspace(5) -// CHECK-NEXT: [[SHARED_ADDR_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[SHARED_ADDR]] to float** // CHECK-NEXT: [[X:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[SHARED_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[SHARED]] to float** +// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[SRC_ADDR]] to float* +// CHECK-NEXT: [[SHARED_ADDR_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[SHARED_ADDR]] to float** // CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[X]] to float* // CHECK-NEXT: [[TMP0:%.*]] = addrspacecast float addrspace(1)* [[SHARED_COERCE:%.*]] to float* // CHECK-NEXT: store float* [[TMP0]], float** [[SHARED_ASCAST]], align 8 @@ -202,14 +203,13 @@ func(shared); } - // CHECK-LABEL: @_Z14test_is_sharedPf( // CHECK-NEXT: entry: // CHECK-NEXT: [[X:%.*]] = alloca float*, align 8, addrspace(5) -// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[X]] to float** // CHECK-NEXT: [[X_ADDR:%.*]] = alloca float*, align 8, addrspace(5) -// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[X_ADDR]] to float** // CHECK-NEXT: [[RET:%.*]] = alloca i8, align 1, addrspace(5) +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[X]] to float** +// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[X_ADDR]] to float** // CHECK-NEXT: [[RET_ASCAST:%.*]] = addrspacecast i8 addrspace(5)* [[RET]] to i8* // CHECK-NEXT: [[TMP0:%.*]] = addrspacecast float addrspace(1)* [[X_COERCE:%.*]] to float* // CHECK-NEXT: store float* [[TMP0]], float** [[X_ASCAST]], align 8 Index: clang/lib/CodeGen/CodeGenFunction.h =================================================================== --- clang/lib/CodeGen/CodeGenFunction.h +++ clang/lib/CodeGen/CodeGenFunction.h @@ -379,6 +379,33 @@ /// we prefer to insert allocas. llvm::AssertingVH<llvm::Instruction> AllocaInsertPt; +private: + /// PostAllocaInsertPt - This is a place in the prologue where code can be + /// inserted that will be dominated by all the static allocas. This helps + /// achieve two things: + /// 1. Contiguity of all static allocas (within the prologue) is maintained. + /// 2. All other prologue code (which are dominated by static allocas) do + /// appear in the source order immediately after all static allocas. + /// + /// PostAllocaInsertPt will be lazily created when it is *really* required. + llvm::AssertingVH<llvm::Instruction> PostAllocaInsertPt = nullptr; + +public: + /// Return PostAllocaInsertPt. If it is not yet created, then insert it + /// immediately after AllocaInsertPt. + llvm::Instruction *getPostAllocaInsertPoint() { + if (!PostAllocaInsertPt) { + auto *EBB = AllocaInsertPt->getParent(); + assert(EBB->isEntryBlock() && + "EBB should be entry block of the current code gen function"); + PostAllocaInsertPt = AllocaInsertPt->clone(); + PostAllocaInsertPt->setName("postallocapt"); + PostAllocaInsertPt->insertAfter(AllocaInsertPt); + } + + return PostAllocaInsertPt; + } + /// API for captured statement code generation. class CGCapturedStmtInfo { public: Index: clang/lib/CodeGen/CodeGenFunction.cpp =================================================================== --- clang/lib/CodeGen/CodeGenFunction.cpp +++ clang/lib/CodeGen/CodeGenFunction.cpp @@ -424,6 +424,14 @@ AllocaInsertPt = nullptr; Ptr->eraseFromParent(); + // PostAllocaInsertPt, if created, was lazily created when it was required, + // remove it now since it was just created for our own convenience. + if (PostAllocaInsertPt) { + llvm::Instruction *PostPtr = PostAllocaInsertPt; + PostAllocaInsertPt = nullptr; + PostPtr->eraseFromParent(); + } + // If someone took the address of a label but never did an indirect goto, we // made a zero entry PHI node, which is illegal, zap it now. if (IndirectBranch) { Index: clang/lib/CodeGen/CGExpr.cpp =================================================================== --- clang/lib/CodeGen/CGExpr.cpp +++ clang/lib/CodeGen/CGExpr.cpp @@ -95,7 +95,7 @@ // otherwise alloca is inserted at the current insertion point of the // builder. if (!ArraySize) - Builder.SetInsertPoint(AllocaInsertPt); + Builder.SetInsertPoint(getPostAllocaInsertPoint()); V = getTargetHooks().performAddrSpaceCast( *this, V, getASTAllocaAddressSpace(), LangAS::Default, Ty->getPointerTo(DestAddrSpace), /*non-null*/ true);
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits