llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Nikolas Klauser (philnik777) <details> <summary>Changes</summary> There doesn't seem to be a good reason to reject gnu vectors in these builtins. The error messages for that case are also rather bizzare, which doesn't help with figuring out what's going wrong. --- Full diff: https://github.com/llvm/llvm-project/pull/198248.diff 2 Files Affected: - (modified) clang/lib/Sema/SemaChecking.cpp (+16-11) - (modified) clang/test/CodeGen/builtin-masked.c (+195) ``````````diff diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 530587208cce8..b9ecdb659b3a8 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2650,12 +2650,19 @@ static ExprResult BuiltinMaskedStore(Sema &S, CallExpr *TheCall) { S.Diag(ValArg->getExprLoc(), diag::err_vec_masked_load_store_ptr) << 2 << "vector"); - QualType PointeeTy = PtrTy->getPointeeType(); const VectorType *MaskVecTy = MaskTy->getAs<VectorType>(); - QualType MemoryTy = S.Context.getExtVectorType(PointeeTy.getUnqualifiedType(), - MaskVecTy->getNumElements()); - if (!S.Context.hasSameType(ValTy.getUnqualifiedType(), - MemoryTy.getUnqualifiedType())) + const VectorType *ValVecTy = ValTy->getAs<VectorType>(); + + if (MaskVecTy->getNumElements() != ValVecTy->getNumElements()) { + return ExprError( + S.Diag(TheCall->getBeginLoc(), diag::err_vec_masked_load_store_size) + << S.getASTContext().BuiltinInfo.getQuotedName( + TheCall->getBuiltinCallee()) + << MaskTy << ValTy); + } + + if (!S.Context.hasSameType(ValVecTy->getElementType().getUnqualifiedType(), + PtrTy->getPointeeType().getUnqualifiedType())) return ExprError(S.Diag(TheCall->getBeginLoc(), diag::err_vec_builtin_incompatible_vector) << TheCall->getDirectCallee() << /*isMorethantwoArgs*/ 2 @@ -2685,7 +2692,7 @@ static ExprResult BuiltinMaskedGather(Sema &S, CallExpr *TheCall) { QualType IdxTy = IdxArg->getType(); const VectorType *IdxVecTy = IdxTy->getAs<VectorType>(); - if (!IdxTy->isExtVectorType() || !IdxVecTy->getElementType()->isIntegerType()) + if (!IdxTy->isVectorType() || !IdxVecTy->getElementType()->isIntegerType()) return S.Diag(MaskArg->getBeginLoc(), diag::err_builtin_invalid_arg_type) << 1 << /* vector of */ 4 << /* integer */ 1 << /* no fp */ 0 << IdxTy; @@ -2736,7 +2743,7 @@ static ExprResult BuiltinMaskedScatter(Sema &S, CallExpr *TheCall) { QualType IdxTy = IdxArg->getType(); const VectorType *IdxVecTy = IdxTy->getAs<VectorType>(); - if (!IdxTy->isExtVectorType() || !IdxVecTy->getElementType()->isIntegerType()) + if (!IdxTy->isVectorType() || !IdxVecTy->getElementType()->isIntegerType()) return S.Diag(MaskArg->getBeginLoc(), diag::err_builtin_invalid_arg_type) << 2 << /* vector of */ 4 << /* integer */ 1 << /* no fp */ 0 << IdxTy; @@ -2744,7 +2751,6 @@ static ExprResult BuiltinMaskedScatter(Sema &S, CallExpr *TheCall) { QualType ValTy = ValArg->getType(); QualType MaskTy = MaskArg->getType(); QualType PtrTy = PtrArg->getType(); - QualType PointeeTy = PtrTy->getPointeeType(); const VectorType *MaskVecTy = MaskTy->castAs<VectorType>(); const VectorType *ValVecTy = ValTy->castAs<VectorType>(); @@ -2761,9 +2767,8 @@ static ExprResult BuiltinMaskedScatter(Sema &S, CallExpr *TheCall) { TheCall->getBuiltinCallee()) << MaskTy << ValTy); - QualType ArgTy = S.Context.getExtVectorType(PointeeTy.getUnqualifiedType(), - MaskVecTy->getNumElements()); - if (!S.Context.hasSameType(ValTy.getUnqualifiedType(), ArgTy)) + if (!S.Context.hasSameType(ValVecTy->getElementType().getUnqualifiedType(), + PtrTy->getPointeeType().getUnqualifiedType())) return ExprError(S.Diag(TheCall->getBeginLoc(), diag::err_vec_builtin_incompatible_vector) << TheCall->getDirectCallee() << /*isMoreThanTwoArgs*/ 2 diff --git a/clang/test/CodeGen/builtin-masked.c b/clang/test/CodeGen/builtin-masked.c index 28b94b71e8970..f5a4b7511491c 100644 --- a/clang/test/CodeGen/builtin-masked.c +++ b/clang/test/CodeGen/builtin-masked.c @@ -3,6 +3,7 @@ typedef int v8i __attribute__((ext_vector_type(8))); typedef _Bool v8b __attribute__((ext_vector_type(8))); +typedef int gv8i [[gnu::vector_size(sizeof(int) * 8)]]; // CHECK-LABEL: define dso_local <8 x i32> @test_load( // CHECK-SAME: i8 noundef [[M_COERCE:%.*]], ptr noundef [[P:%.*]]) #[[ATTR0:[0-9]+]] { @@ -104,6 +105,32 @@ void test_store(v8b m, v8i v, int *p) { __builtin_masked_store(m, v, p); } +// CHECK-LABEL: define dso_local void @gtest_store( +// CHECK-SAME: i8 noundef [[M_COERCE:%.*]], ptr noundef byval(<8 x i32>) align 32 [[TMP0:%.*]], ptr noundef [[P:%.*]]) #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[M:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[M_ADDR:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[V_ADDR:%.*]] = alloca <8 x i32>, align 32 +// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store i8 [[M_COERCE]], ptr [[M]], align 1 +// CHECK-NEXT: [[LOAD_BITS:%.*]] = load i8, ptr [[M]], align 1 +// CHECK-NEXT: [[M1:%.*]] = bitcast i8 [[LOAD_BITS]] to <8 x i1> +// CHECK-NEXT: [[V:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i1> [[M1]] to i8 +// CHECK-NEXT: store i8 [[TMP1]], ptr [[M_ADDR]], align 1 +// CHECK-NEXT: store <8 x i32> [[V]], ptr [[V_ADDR]], align 32 +// CHECK-NEXT: store ptr [[P]], ptr [[P_ADDR]], align 8 +// CHECK-NEXT: [[LOAD_BITS2:%.*]] = load i8, ptr [[M_ADDR]], align 1 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[LOAD_BITS2]] to <8 x i1> +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[V_ADDR]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[P_ADDR]], align 8 +// CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP3]], ptr align 4 [[TMP4]], <8 x i1> [[TMP2]]) +// CHECK-NEXT: ret void +// +void gtest_store(v8b m, gv8i v, int *p) { + __builtin_masked_store(m, v, p); +} + // CHECK-LABEL: define dso_local void @test_compress_store( // CHECK-SAME: i8 noundef [[M_COERCE:%.*]], ptr noundef byval(<8 x i32>) align 32 [[TMP0:%.*]], ptr noundef [[P:%.*]]) #[[ATTR3]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -130,6 +157,32 @@ void test_compress_store(v8b m, v8i v, int *p) { __builtin_masked_compress_store(m, v, p); } +// CHECK-LABEL: define dso_local void @gtest_compress_store( +// CHECK-SAME: i8 noundef [[M_COERCE:%.*]], ptr noundef byval(<8 x i32>) align 32 [[TMP0:%.*]], ptr noundef [[P:%.*]]) #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[M:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[M_ADDR:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[V_ADDR:%.*]] = alloca <8 x i32>, align 32 +// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store i8 [[M_COERCE]], ptr [[M]], align 1 +// CHECK-NEXT: [[LOAD_BITS:%.*]] = load i8, ptr [[M]], align 1 +// CHECK-NEXT: [[M1:%.*]] = bitcast i8 [[LOAD_BITS]] to <8 x i1> +// CHECK-NEXT: [[V:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i1> [[M1]] to i8 +// CHECK-NEXT: store i8 [[TMP1]], ptr [[M_ADDR]], align 1 +// CHECK-NEXT: store <8 x i32> [[V]], ptr [[V_ADDR]], align 32 +// CHECK-NEXT: store ptr [[P]], ptr [[P_ADDR]], align 8 +// CHECK-NEXT: [[LOAD_BITS2:%.*]] = load i8, ptr [[M_ADDR]], align 1 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[LOAD_BITS2]] to <8 x i1> +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[V_ADDR]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[P_ADDR]], align 8 +// CHECK-NEXT: call void @llvm.masked.compressstore.v8i32(<8 x i32> [[TMP3]], ptr [[TMP4]], <8 x i1> [[TMP2]]) +// CHECK-NEXT: ret void +// +void gtest_compress_store(v8b m, gv8i v, int *p) { + __builtin_masked_compress_store(m, v, p); +} + // CHECK-LABEL: define dso_local <8 x i32> @test_gather( // CHECK-SAME: i8 noundef [[MASK_COERCE:%.*]], ptr noundef byval(<8 x i32>) align 32 [[TMP0:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -157,6 +210,33 @@ v8i test_gather(v8b mask, v8i idx, int *ptr) { return __builtin_masked_gather(mask, idx, ptr); } +// CHECK-LABEL: define dso_local <8 x i32> @gtest_gather( +// CHECK-SAME: i8 noundef [[MASK_COERCE:%.*]], ptr noundef byval(<8 x i32>) align 32 [[TMP0:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[MASK:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[MASK_ADDR:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[IDX_ADDR:%.*]] = alloca <8 x i32>, align 32 +// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store i8 [[MASK_COERCE]], ptr [[MASK]], align 1 +// CHECK-NEXT: [[LOAD_BITS:%.*]] = load i8, ptr [[MASK]], align 1 +// CHECK-NEXT: [[MASK1:%.*]] = bitcast i8 [[LOAD_BITS]] to <8 x i1> +// CHECK-NEXT: [[IDX:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i1> [[MASK1]] to i8 +// CHECK-NEXT: store i8 [[TMP1]], ptr [[MASK_ADDR]], align 1 +// CHECK-NEXT: store <8 x i32> [[IDX]], ptr [[IDX_ADDR]], align 32 +// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[LOAD_BITS2:%.*]] = load i8, ptr [[MASK_ADDR]], align 1 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[LOAD_BITS2]] to <8 x i1> +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[IDX_ADDR]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], <8 x i32> [[TMP3]] +// CHECK-NEXT: [[MASKED_GATHER:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> align 4 [[TMP5]], <8 x i1> [[TMP2]], <8 x i32> poison) +// CHECK-NEXT: ret <8 x i32> [[MASKED_GATHER]] +// +v8i gtest_gather(v8b mask, gv8i idx, int *ptr) { + return __builtin_masked_gather(mask, idx, ptr); +} + // CHECK-LABEL: define dso_local void @test_scatter( // CHECK-SAME: i8 noundef [[MASK_COERCE:%.*]], ptr noundef byval(<8 x i32>) align 32 [[TMP0:%.*]], ptr noundef byval(<8 x i32>) align 32 [[TMP1:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR3]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -188,6 +268,37 @@ void test_scatter(v8b mask, v8i val, v8i idx, int *ptr) { __builtin_masked_scatter(mask, val, idx, ptr); } +// CHECK-LABEL: define dso_local void @gtest_scatter( +// CHECK-SAME: i8 noundef [[MASK_COERCE:%.*]], ptr noundef byval(<8 x i32>) align 32 [[TMP0:%.*]], ptr noundef byval(<8 x i32>) align 32 [[TMP1:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[MASK:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[MASK_ADDR:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca <8 x i32>, align 32 +// CHECK-NEXT: [[IDX_ADDR:%.*]] = alloca <8 x i32>, align 32 +// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store i8 [[MASK_COERCE]], ptr [[MASK]], align 1 +// CHECK-NEXT: [[LOAD_BITS:%.*]] = load i8, ptr [[MASK]], align 1 +// CHECK-NEXT: [[MASK1:%.*]] = bitcast i8 [[LOAD_BITS]] to <8 x i1> +// CHECK-NEXT: [[VAL:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32 +// CHECK-NEXT: [[IDX:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i1> [[MASK1]] to i8 +// CHECK-NEXT: store i8 [[TMP2]], ptr [[MASK_ADDR]], align 1 +// CHECK-NEXT: store <8 x i32> [[VAL]], ptr [[VAL_ADDR]], align 32 +// CHECK-NEXT: store <8 x i32> [[IDX]], ptr [[IDX_ADDR]], align 32 +// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[LOAD_BITS2:%.*]] = load i8, ptr [[MASK_ADDR]], align 1 +// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8 [[LOAD_BITS2]] to <8 x i1> +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VAL_ADDR]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[IDX_ADDR]], align 32 +// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], <8 x i32> [[TMP4]] +// CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP5]], <8 x ptr> align 4 [[TMP7]], <8 x i1> [[TMP3]]) +// CHECK-NEXT: ret void +// +void gtest_scatter(v8b mask, gv8i val, gv8i idx, int *ptr) { + __builtin_masked_scatter(mask, val, idx, ptr); +} + // CHECK-LABEL: define dso_local <8 x i32> @test_load_as( // CHECK-SAME: i8 noundef [[MASK_COERCE:%.*]], ptr addrspace(42) noundef [[PTR:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -236,6 +347,32 @@ void test_store_as(v8b m, v8i v, int __attribute__((address_space(42))) *p) { __builtin_masked_store(m, v, p); } +// CHECK-LABEL: define dso_local void @gtest_store_as( +// CHECK-SAME: i8 noundef [[M_COERCE:%.*]], ptr noundef byval(<8 x i32>) align 32 [[TMP0:%.*]], ptr addrspace(42) noundef [[P:%.*]]) #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[M:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[M_ADDR:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[V_ADDR:%.*]] = alloca <8 x i32>, align 32 +// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr addrspace(42), align 8 +// CHECK-NEXT: store i8 [[M_COERCE]], ptr [[M]], align 1 +// CHECK-NEXT: [[LOAD_BITS:%.*]] = load i8, ptr [[M]], align 1 +// CHECK-NEXT: [[M1:%.*]] = bitcast i8 [[LOAD_BITS]] to <8 x i1> +// CHECK-NEXT: [[V:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i1> [[M1]] to i8 +// CHECK-NEXT: store i8 [[TMP1]], ptr [[M_ADDR]], align 1 +// CHECK-NEXT: store <8 x i32> [[V]], ptr [[V_ADDR]], align 32 +// CHECK-NEXT: store ptr addrspace(42) [[P]], ptr [[P_ADDR]], align 8 +// CHECK-NEXT: [[LOAD_BITS2:%.*]] = load i8, ptr [[M_ADDR]], align 1 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[LOAD_BITS2]] to <8 x i1> +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[V_ADDR]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(42), ptr [[P_ADDR]], align 8 +// CHECK-NEXT: call void @llvm.masked.store.v8i32.p42(<8 x i32> [[TMP3]], ptr addrspace(42) align 4 [[TMP4]], <8 x i1> [[TMP2]]) +// CHECK-NEXT: ret void +// +void gtest_store_as(v8b m, gv8i v, int __attribute__((address_space(42))) *p) { + __builtin_masked_store(m, v, p); +} + // CHECK-LABEL: define dso_local <8 x i32> @test_gather_as( // CHECK-SAME: i8 noundef [[MASK_COERCE:%.*]], ptr noundef byval(<8 x i32>) align 32 [[TMP0:%.*]], ptr addrspace(42) noundef [[PTR:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -263,6 +400,33 @@ v8i test_gather_as(v8b mask, v8i idx, int __attribute__((address_space(42))) *pt return __builtin_masked_gather(mask, idx, ptr); } +// CHECK-LABEL: define dso_local <8 x i32> @gtest_gather_as( +// CHECK-SAME: i8 noundef [[MASK_COERCE:%.*]], ptr noundef byval(<8 x i32>) align 32 [[TMP0:%.*]], ptr addrspace(42) noundef [[PTR:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[MASK:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[MASK_ADDR:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[IDX_ADDR:%.*]] = alloca <8 x i32>, align 32 +// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(42), align 8 +// CHECK-NEXT: store i8 [[MASK_COERCE]], ptr [[MASK]], align 1 +// CHECK-NEXT: [[LOAD_BITS:%.*]] = load i8, ptr [[MASK]], align 1 +// CHECK-NEXT: [[MASK1:%.*]] = bitcast i8 [[LOAD_BITS]] to <8 x i1> +// CHECK-NEXT: [[IDX:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i1> [[MASK1]] to i8 +// CHECK-NEXT: store i8 [[TMP1]], ptr [[MASK_ADDR]], align 1 +// CHECK-NEXT: store <8 x i32> [[IDX]], ptr [[IDX_ADDR]], align 32 +// CHECK-NEXT: store ptr addrspace(42) [[PTR]], ptr [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[LOAD_BITS2:%.*]] = load i8, ptr [[MASK_ADDR]], align 1 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[LOAD_BITS2]] to <8 x i1> +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[IDX_ADDR]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(42), ptr [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr addrspace(42) [[TMP4]], <8 x i32> [[TMP3]] +// CHECK-NEXT: [[MASKED_GATHER:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p42(<8 x ptr addrspace(42)> align 4 [[TMP5]], <8 x i1> [[TMP2]], <8 x i32> poison) +// CHECK-NEXT: ret <8 x i32> [[MASKED_GATHER]] +// +v8i gtest_gather_as(v8b mask, gv8i idx, int __attribute__((address_space(42))) *ptr) { + return __builtin_masked_gather(mask, idx, ptr); +} + // CHECK-LABEL: define dso_local void @test_scatter_as( // CHECK-SAME: i8 noundef [[MASK_COERCE:%.*]], ptr noundef byval(<8 x i32>) align 32 [[TMP0:%.*]], ptr noundef byval(<8 x i32>) align 32 [[TMP1:%.*]], ptr addrspace(42) noundef [[PTR:%.*]]) #[[ATTR3]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -293,3 +457,34 @@ v8i test_gather_as(v8b mask, v8i idx, int __attribute__((address_space(42))) *pt void test_scatter_as(v8b mask, v8i val, v8i idx, int __attribute__((address_space(42))) *ptr) { __builtin_masked_scatter(mask, val, idx, ptr); } + +// CHECK-LABEL: define dso_local void @gtest_scatter_as( +// CHECK-SAME: i8 noundef [[MASK_COERCE:%.*]], ptr noundef byval(<8 x i32>) align 32 [[TMP0:%.*]], ptr noundef byval(<8 x i32>) align 32 [[TMP1:%.*]], ptr addrspace(42) noundef [[PTR:%.*]]) #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[MASK:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[MASK_ADDR:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca <8 x i32>, align 32 +// CHECK-NEXT: [[IDX_ADDR:%.*]] = alloca <8 x i32>, align 32 +// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(42), align 8 +// CHECK-NEXT: store i8 [[MASK_COERCE]], ptr [[MASK]], align 1 +// CHECK-NEXT: [[LOAD_BITS:%.*]] = load i8, ptr [[MASK]], align 1 +// CHECK-NEXT: [[MASK1:%.*]] = bitcast i8 [[LOAD_BITS]] to <8 x i1> +// CHECK-NEXT: [[VAL:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32 +// CHECK-NEXT: [[IDX:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i1> [[MASK1]] to i8 +// CHECK-NEXT: store i8 [[TMP2]], ptr [[MASK_ADDR]], align 1 +// CHECK-NEXT: store <8 x i32> [[VAL]], ptr [[VAL_ADDR]], align 32 +// CHECK-NEXT: store <8 x i32> [[IDX]], ptr [[IDX_ADDR]], align 32 +// CHECK-NEXT: store ptr addrspace(42) [[PTR]], ptr [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[LOAD_BITS2:%.*]] = load i8, ptr [[MASK_ADDR]], align 1 +// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8 [[LOAD_BITS2]] to <8 x i1> +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VAL_ADDR]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[IDX_ADDR]], align 32 +// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(42), ptr [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr addrspace(42) [[TMP6]], <8 x i32> [[TMP4]] +// CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p42(<8 x i32> [[TMP5]], <8 x ptr addrspace(42)> align 4 [[TMP7]], <8 x i1> [[TMP3]]) +// CHECK-NEXT: ret void +// +void gtest_scatter_as(v8b mask, gv8i val, gv8i idx, int __attribute__((address_space(42))) *ptr) { + __builtin_masked_scatter(mask, val, idx, ptr); +} `````````` </details> https://github.com/llvm/llvm-project/pull/198248 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
