[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
https://github.com/paulwalker-arm closed https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
https://github.com/paulwalker-arm updated https://github.com/llvm/llvm-project/pull/130973 >From e2642bec52f881c1d457f2c72ed3ae4ceec570e6 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Thu, 20 Mar 2025 14:58:51 + Subject: [PATCH 1/3] Add SROA tests for casts between fixed and scalable types. --- .../scalable-vectors-with-known-vscale.ll | 363 ++ llvm/test/Transforms/SROA/scalable-vectors.ll | 223 ++- 2 files changed, 585 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll diff --git a/llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll b/llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll new file mode 100644 index 0..03afc9d609488 --- /dev/null +++ b/llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll @@ -0,0 +1,363 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" + +; This test checks that SROA runs mem2reg on scalable vectors. + +define @alloca_nxv16i1( %pg) vscale_range(1) { +; CHECK-LABEL: @alloca_nxv16i1( +; CHECK-NEXT:ret [[PG:%.*]] +; + %pg.addr = alloca + store %pg, ptr %pg.addr + %1 = load , ptr %pg.addr + ret %1 +} + +define @alloca_nxv16i8( %vec) vscale_range(1) { +; CHECK-LABEL: @alloca_nxv16i8( +; CHECK-NEXT:ret [[VEC:%.*]] +; + %vec.addr = alloca + store %vec, ptr %vec.addr + %1 = load , ptr %vec.addr + ret %1 +} + +; Test scalable alloca that can't be promoted. Mem2Reg only considers +; non-volatile loads and stores for promotion. +define @unpromotable_alloca( %vec) vscale_range(1) { +; CHECK-LABEL: @unpromotable_alloca( +; CHECK-NEXT:[[VEC_ADDR:%.*]] = alloca , align 16 +; CHECK-NEXT:store volatile [[VEC:%.*]], ptr [[VEC_ADDR]], align 16 +; CHECK-NEXT:[[TMP1:%.*]] = load volatile , ptr [[VEC_ADDR]], align 16 +; CHECK-NEXT:ret [[TMP1]] +; + %vec.addr = alloca + store volatile %vec, ptr %vec.addr + %1 = load volatile , ptr %vec.addr + ret %1 +} + +; Test we bail out when using an alloca of a fixed-length vector (VLS) that was +; bitcasted to a scalable vector. +define @cast_alloca_to_svint32_t( %type.coerce) vscale_range(1) { +; CHECK-LABEL: @cast_alloca_to_svint32_t( +; CHECK-NEXT:[[TYPE:%.*]] = alloca <16 x i32>, align 64 +; CHECK-NEXT:[[TYPE_ADDR:%.*]] = alloca <16 x i32>, align 64 +; CHECK-NEXT:store [[TYPE_COERCE:%.*]], ptr [[TYPE]], align 16 +; CHECK-NEXT:[[TYPE1:%.*]] = load <16 x i32>, ptr [[TYPE]], align 64 +; CHECK-NEXT:store <16 x i32> [[TYPE1]], ptr [[TYPE_ADDR]], align 64 +; CHECK-NEXT:[[TMP1:%.*]] = load <16 x i32>, ptr [[TYPE_ADDR]], align 64 +; CHECK-NEXT:[[TMP2:%.*]] = load , ptr [[TYPE_ADDR]], align 16 +; CHECK-NEXT:ret [[TMP2]] +; + %type = alloca <16 x i32> + %type.addr = alloca <16 x i32> + store %type.coerce, ptr %type + %type1 = load <16 x i32>, ptr %type + store <16 x i32> %type1, ptr %type.addr + %1 = load <16 x i32>, ptr %type.addr + %2 = load , ptr %type.addr + ret %2 +} + +; When casting from VLA to VLS via memory check we bail out when producing a +; GEP where the element type is a scalable vector. +define @cast_alloca_from_svint32_t() vscale_range(1) { +; CHECK-LABEL: @cast_alloca_from_svint32_t( +; CHECK-NEXT:[[RETVAL_COERCE:%.*]] = alloca , align 16 +; CHECK-NEXT:store <16 x i32> zeroinitializer, ptr [[RETVAL_COERCE]], align 16 +; CHECK-NEXT:[[TMP1:%.*]] = load , ptr [[RETVAL_COERCE]], align 16 +; CHECK-NEXT:ret [[TMP1]] +; + %retval = alloca <16 x i32> + store <16 x i32> zeroinitializer, ptr %retval + %retval.coerce = alloca + call void @llvm.memcpy.p0.p0.i64(ptr align 16 %retval.coerce, ptr align 16 %retval, i64 64, i1 false) + %1 = load , ptr %retval.coerce + ret %1 +} + +; Test we bail out when using an alloca of a fixed-length vector (VLS) that was +; bitcasted to a scalable vector. +define void @select_load_alloca_to_svdouble_t() vscale_range(1) { +; CHECK-LABEL: @select_load_alloca_to_svdouble_t( +; CHECK-NEXT:[[Z:%.*]] = alloca <16 x half>, align 32 +; CHECK-NEXT:[[CMP:%.*]] = icmp eq i32 0, 0 +; CHECK-NEXT:[[COND:%.*]] = select i1 [[CMP]], ptr [[Z]], ptr null +; CHECK-NEXT:[[VAL:%.*]] = load , ptr [[COND]], align 16 +; CHECK-NEXT:ret void +; + %z = alloca <16 x half> + %cmp = icmp eq i32 0, 0 + %cond = select i1 %cmp, ptr %z, ptr null + %val = load , ptr %cond, align 16 + ret void +} + +define void @select_store_alloca_to_svdouble_t( %val) vscale_range(1) { +; CHECK-LABEL: @select_store_alloca_to_svdouble_t( +; CHECK-NEXT:[[Z:%.*]] = alloca <16 x half>, align 32 +; CHECK-NEXT:[[CMP
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
https://github.com/nikic approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
paulwalker-arm wrote: ping https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
https://github.com/paulwalker-arm updated https://github.com/llvm/llvm-project/pull/130973 Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
paulwalker-arm wrote: Rebased to incorporate https://github.com/llvm/llvm-project/pull/139190. Thanks again @topperc. https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
@@ -26,11 +26,15 @@ typedef vbool64_t fixed_bool64_t __attribute__((riscv_rvv_vector_bits(__riscv_v_ // // CHECK-128-LABEL: @call_bool32_ff( // CHECK-128-NEXT: entry: +// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 nikic wrote: I'd expect `` to also be broken. We need the fixed size to be a multiple of 8 to compute correct sizes. https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
@@ -26,11 +26,15 @@ typedef vbool64_t fixed_bool64_t __attribute__((riscv_rvv_vector_bits(__riscv_v_ // // CHECK-128-LABEL: @call_bool32_ff( // CHECK-128-NEXT: entry: +// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 topperc wrote: Before #100110, I believe we would have allowed <1 x i8> with when when vscale was 2. Those are both 8 bit values, but I think getTypeSize on the scalable vector would return vscale*1 byte? Is that also broken? https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
@@ -26,11 +26,15 @@ typedef vbool64_t fixed_bool64_t __attribute__((riscv_rvv_vector_bits(__riscv_v_ // // CHECK-128-LABEL: @call_bool32_ff( // CHECK-128-NEXT: entry: +// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 nikic wrote: @paulwalker-arm We need to do *something* to prevent this from getting blatantly miscompiled before this PR can land. Removal is an option, but maybe the IR generated by clang can be fixed instead. https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
@@ -26,11 +26,15 @@ typedef vbool64_t fixed_bool64_t __attribute__((riscv_rvv_vector_bits(__riscv_v_ // // CHECK-128-LABEL: @call_bool32_ff( // CHECK-128-NEXT: entry: +// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 paulwalker-arm wrote: @nikic - Is the removal a prerequisite of being able to land this PR? @topperc / @asi-sc - Any objections to the PR being reverted? Fully or Partly? Given the size of the regression (9x) I was hoping to backport this fix to LLVM 20. I guess this might not be possible if that means removing https://github.com/llvm/llvm-project/pull/100110 from LLVM 20 as well? https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
https://github.com/paulwalker-arm edited https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
https://github.com/paulwalker-arm edited https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
@@ -26,11 +26,15 @@ typedef vbool64_t fixed_bool64_t __attribute__((riscv_rvv_vector_bits(__riscv_v_ // // CHECK-128-LABEL: @call_bool32_ff( // CHECK-128-NEXT: entry: +// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 nikic wrote: Looks like this was added in https://github.com/llvm/llvm-project/pull/100110. This seems like a pretty fundamental problem with scalable vectors where the fixed size is non-byte-sized. The store size in that case is `(vscale x fixed_size_in_bits) / 8`, but we pretend that it is `vscale x (fixed_size_in_bits / 8)`, which are only the same if fixed_size_in_bits is a multiple of 8. I'm not sure what to do about this. This seems very hard to support properly, as it effectively adds an extra dimension to the current fixed vs scalable size distinction. In the near term the RISCV support in Clang for this should probably be reverted, and we should consider non-byte-sized scalable vectors as non-sized types, so that they can be used as SSA values but not loaded or stored. https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
https://github.com/nikic edited https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
https://github.com/paulwalker-arm edited https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
@@ -554,6 +554,22 @@ class VectorType : public Type { return VectorType::get(VTy->getElementType(), EltCnt * 2); } + /// This static method returns a VectorType with the same size-in-bits as + /// SizeTy but with an element type that matches the scalar type of EltTy. paulwalker-arm wrote: Done. https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
@@ -554,6 +554,22 @@ class VectorType : public Type { return VectorType::get(VTy->getElementType(), EltCnt * 2); } + /// This static method returns a VectorType with the same size-in-bits as + /// SizeTy but with an element type that matches the scalar type of EltTy. + static VectorType *getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy) { +if (SizeTy->getScalarType() == EltTy->getScalarType()) + return SizeTy; + +unsigned EltSize = EltTy->getScalarSizeInBits(); +if (!SizeTy->getPrimitiveSizeInBits().isKnownMultipleOf(EltSize)) paulwalker-arm wrote: Are you sure? The intent of this function is to create a VectorType that can be bitcasted, typically from/to `SizeTy`. When looking at `CastInst::castIsValid()` I can see it uses `getPrimitiveSizeInBits()` to determine validity. To put another way, the check is ensuring the IR is consistent, essentially protecting against the case where the ElementCount calculation would be invalid. So I don't think this function cares about data layout? https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
@@ -1168,6 +1168,15 @@ bool Function::nullPointerIsDefined() const { return hasFnAttribute(Attribute::NullPointerIsValid); } +unsigned Function::getVScaleValue() const { + Attribute Attr = getFnAttribute(Attribute::VScaleRange); + if (!Attr.isValid()) +return 0; + + unsigned VScale = Attr.getVScaleRangeMax().value_or(0); + return VScale == Attr.getVScaleRangeMin() ? VScale : 0; paulwalker-arm wrote: Sounds reasonable. Done. https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
@@ -80,7 +80,9 @@ fixed_bool32_t from_vbool32_t(vbool32_t type) { // // CHECK-128-LABEL: @to_vbool32_t( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT:ret [[TYPE_COERCE:%.*]] +// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 +// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]] +// CHECK-128-NEXT:ret [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]] paulwalker-arm wrote: Here is the input to the first two iterations of SROA, with subsequent iterations being unchanged: ``` ; *** IR Dump Before SROAPass on to_vbool32_t *** ; Function Attrs: nounwind vscale_range(2,2) define dso_local @to_vbool32_t( noundef %type.coerce) #0 { entry: %type = alloca <1 x i8>, align 1 %type.addr = alloca <1 x i8>, align 1 %saved-value = alloca <1 x i8>, align 1 store %type.coerce, ptr %type, align 1 %type1 = load <1 x i8>, ptr %type, align 1, !tbaa !6 store <1 x i8> %type1, ptr %type.addr, align 1, !tbaa !6 %0 = load <1 x i8>, ptr %type.addr, align 1, !tbaa !6 store <1 x i8> %0, ptr %saved-value, align 1, !tbaa !6 %1 = load , ptr %saved-value, align 1, !tbaa !6 ret %1 } ; *** IR Dump Before SROAPass on to_vbool32_t *** ; Function Attrs: nounwind vscale_range(2,2) define dso_local @to_vbool32_t( noundef %type.coerce) local_unnamed_addr #0 { entry: %saved-value = alloca <1 x i8>, align 1 %saved-value.0. = load , ptr %saved-value, align 1, !tbaa !6 ret %saved-value.0. } ``` There is a conversation above (see call_bool32_ff) where my observation is that clang generates invalid IR for these cases. Whilst not my intention it seems this PR has the side effect of not hidden such undefined behaviour. https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
https://github.com/paulwalker-arm updated https://github.com/llvm/llvm-project/pull/130973 >From 32a2805a41dc3ff02bff9df26f4665923445b488 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Thu, 20 Mar 2025 14:58:51 + Subject: [PATCH 1/4] Add SROA tests for casts between fixed and scalable types. --- .../scalable-vectors-with-known-vscale.ll | 363 ++ llvm/test/Transforms/SROA/scalable-vectors.ll | 223 ++- 2 files changed, 585 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll diff --git a/llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll b/llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll new file mode 100644 index 0..03afc9d609488 --- /dev/null +++ b/llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll @@ -0,0 +1,363 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" + +; This test checks that SROA runs mem2reg on scalable vectors. + +define @alloca_nxv16i1( %pg) vscale_range(1) { +; CHECK-LABEL: @alloca_nxv16i1( +; CHECK-NEXT:ret [[PG:%.*]] +; + %pg.addr = alloca + store %pg, ptr %pg.addr + %1 = load , ptr %pg.addr + ret %1 +} + +define @alloca_nxv16i8( %vec) vscale_range(1) { +; CHECK-LABEL: @alloca_nxv16i8( +; CHECK-NEXT:ret [[VEC:%.*]] +; + %vec.addr = alloca + store %vec, ptr %vec.addr + %1 = load , ptr %vec.addr + ret %1 +} + +; Test scalable alloca that can't be promoted. Mem2Reg only considers +; non-volatile loads and stores for promotion. +define @unpromotable_alloca( %vec) vscale_range(1) { +; CHECK-LABEL: @unpromotable_alloca( +; CHECK-NEXT:[[VEC_ADDR:%.*]] = alloca , align 16 +; CHECK-NEXT:store volatile [[VEC:%.*]], ptr [[VEC_ADDR]], align 16 +; CHECK-NEXT:[[TMP1:%.*]] = load volatile , ptr [[VEC_ADDR]], align 16 +; CHECK-NEXT:ret [[TMP1]] +; + %vec.addr = alloca + store volatile %vec, ptr %vec.addr + %1 = load volatile , ptr %vec.addr + ret %1 +} + +; Test we bail out when using an alloca of a fixed-length vector (VLS) that was +; bitcasted to a scalable vector. +define @cast_alloca_to_svint32_t( %type.coerce) vscale_range(1) { +; CHECK-LABEL: @cast_alloca_to_svint32_t( +; CHECK-NEXT:[[TYPE:%.*]] = alloca <16 x i32>, align 64 +; CHECK-NEXT:[[TYPE_ADDR:%.*]] = alloca <16 x i32>, align 64 +; CHECK-NEXT:store [[TYPE_COERCE:%.*]], ptr [[TYPE]], align 16 +; CHECK-NEXT:[[TYPE1:%.*]] = load <16 x i32>, ptr [[TYPE]], align 64 +; CHECK-NEXT:store <16 x i32> [[TYPE1]], ptr [[TYPE_ADDR]], align 64 +; CHECK-NEXT:[[TMP1:%.*]] = load <16 x i32>, ptr [[TYPE_ADDR]], align 64 +; CHECK-NEXT:[[TMP2:%.*]] = load , ptr [[TYPE_ADDR]], align 16 +; CHECK-NEXT:ret [[TMP2]] +; + %type = alloca <16 x i32> + %type.addr = alloca <16 x i32> + store %type.coerce, ptr %type + %type1 = load <16 x i32>, ptr %type + store <16 x i32> %type1, ptr %type.addr + %1 = load <16 x i32>, ptr %type.addr + %2 = load , ptr %type.addr + ret %2 +} + +; When casting from VLA to VLS via memory check we bail out when producing a +; GEP where the element type is a scalable vector. +define @cast_alloca_from_svint32_t() vscale_range(1) { +; CHECK-LABEL: @cast_alloca_from_svint32_t( +; CHECK-NEXT:[[RETVAL_COERCE:%.*]] = alloca , align 16 +; CHECK-NEXT:store <16 x i32> zeroinitializer, ptr [[RETVAL_COERCE]], align 16 +; CHECK-NEXT:[[TMP1:%.*]] = load , ptr [[RETVAL_COERCE]], align 16 +; CHECK-NEXT:ret [[TMP1]] +; + %retval = alloca <16 x i32> + store <16 x i32> zeroinitializer, ptr %retval + %retval.coerce = alloca + call void @llvm.memcpy.p0.p0.i64(ptr align 16 %retval.coerce, ptr align 16 %retval, i64 64, i1 false) + %1 = load , ptr %retval.coerce + ret %1 +} + +; Test we bail out when using an alloca of a fixed-length vector (VLS) that was +; bitcasted to a scalable vector. +define void @select_load_alloca_to_svdouble_t() vscale_range(1) { +; CHECK-LABEL: @select_load_alloca_to_svdouble_t( +; CHECK-NEXT:[[Z:%.*]] = alloca <16 x half>, align 32 +; CHECK-NEXT:[[CMP:%.*]] = icmp eq i32 0, 0 +; CHECK-NEXT:[[COND:%.*]] = select i1 [[CMP]], ptr [[Z]], ptr null +; CHECK-NEXT:[[VAL:%.*]] = load , ptr [[COND]], align 16 +; CHECK-NEXT:ret void +; + %z = alloca <16 x half> + %cmp = icmp eq i32 0, 0 + %cond = select i1 %cmp, ptr %z, ptr null + %val = load , ptr %cond, align 16 + ret void +} + +define void @select_store_alloca_to_svdouble_t( %val) vscale_range(1) { +; CHECK-LABEL: @select_store_alloca_to_svdouble_t( +; CHECK-NEXT:[[Z:%.*]] = alloca <16 x half>, align 32 +; CHECK-NEXT:[[CMP
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
@@ -554,6 +554,22 @@ class VectorType : public Type { return VectorType::get(VTy->getElementType(), EltCnt * 2); } + /// This static method returns a VectorType with the same size-in-bits as + /// SizeTy but with an element type that matches the scalar type of EltTy. + static VectorType *getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy) { +if (SizeTy->getScalarType() == EltTy->getScalarType()) + return SizeTy; + +unsigned EltSize = EltTy->getScalarSizeInBits(); +if (!SizeTy->getPrimitiveSizeInBits().isKnownMultipleOf(EltSize)) nikic wrote: This should really be using DataLayout, not getPrimitiveSizeInBits. https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
@@ -554,6 +554,22 @@ class VectorType : public Type { return VectorType::get(VTy->getElementType(), EltCnt * 2); } + /// This static method returns a VectorType with the same size-in-bits as + /// SizeTy but with an element type that matches the scalar type of EltTy. nikic wrote: Should indicate that it can return null on failure. https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
@@ -80,7 +80,9 @@ fixed_bool32_t from_vbool32_t(vbool32_t type) { // // CHECK-128-LABEL: @to_vbool32_t( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT:ret [[TYPE_COERCE:%.*]] +// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 +// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]] +// CHECK-128-NEXT:ret [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]] nikic wrote: This used to return the argument, but now it's returning the result of an uninitialized load. What is the original (pre-SROA) IR here? https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
https://github.com/nikic commented: Apart from the RISCV test changes this looks reasonable to me. https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
https://github.com/nikic edited https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
@@ -1168,6 +1168,15 @@ bool Function::nullPointerIsDefined() const { return hasFnAttribute(Attribute::NullPointerIsValid); } +unsigned Function::getVScaleValue() const { + Attribute Attr = getFnAttribute(Attribute::VScaleRange); + if (!Attr.isValid()) +return 0; + + unsigned VScale = Attr.getVScaleRangeMax().value_or(0); + return VScale == Attr.getVScaleRangeMin() ? VScale : 0; rj-jesus wrote: Nit: maybe you could do `getVScaleRangeMin()` first to avoid the `.value_or(0)` (since the former isn't `std::optional`)? https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
@@ -554,6 +554,22 @@ class VectorType : public Type { return VectorType::get(VTy->getElementType(), EltCnt * 2); } + /// This static method returns a VectorType with the same size-in-bits as + /// SizeTy but with an element type that matches the scalar type of EltTy. + static VectorType *getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy) { rj-jesus wrote: What do you think of `getWithPreservedSize(Type *ElementType, const VectorType *Other)`? Similar to `VectorType::get`, but preserving the bit-size of `Other` instead of its element count. https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
https://github.com/rj-jesus edited https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
https://github.com/rj-jesus commented: This looks good as far as I can see, but I don't feel qualified to approve it. FWIW it fixes a large regression on GROMACS with LLVM 20. https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
paulwalker-arm wrote: ping https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
@@ -554,6 +554,22 @@ class VectorType : public Type { return VectorType::get(VTy->getElementType(), EltCnt * 2); } + /// This static method returns a VectorType with the same size-in-bits as + /// SizeTy but with an element type that matches the scalar type of EltTy. + static VectorType *getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy) { paulwalker-arm wrote: I really struggled to come up with a name that conveyed enough information whilst not causing all uses to overflow the 80 char line limit. This attempt being a shortened version of `get[VectorType]With[Total]Size[of_the_first_operand]AndScalar[Type_of_the second_operand]`. I'm not precious over this so suggestions are very welcome. The downside of passing in a `TypeSize` is that I'd always have to create a new VectorType, whereas the current implementation has an early exit for the most common case of "casting" between fixed and scalable vectors of the same element type. A lesser reason was it being awkward to convert between ElementCount and TypeSize and I wanted to avoid introducing another use of `getKnownMinValue()`. Is this rational strong enough? or would you rather I go the `TypeSize` route? https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
@@ -1990,21 +2028,56 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) { static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, Type *NewTy) { Type *OldTy = V->getType(); - assert(canConvertValue(DL, OldTy, NewTy) && "Value not convertable to type"); + +#ifndef NDEBUG + BasicBlock *BB = IRB.GetInsertBlock(); + assert(BB && BB->getParent() && "VScale unknown!"); paulwalker-arm wrote: Yes. My reasoning was that saying "Function* not available" is obvious from the code and the only reason we need the Function* is to read VScale, so I went with this. https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
@@ -1990,21 +2028,56 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) { static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, Type *NewTy) { Type *OldTy = V->getType(); - assert(canConvertValue(DL, OldTy, NewTy) && "Value not convertable to type"); + +#ifndef NDEBUG + BasicBlock *BB = IRB.GetInsertBlock(); + assert(BB && BB->getParent() && "VScale unknown!"); sdesmalen-arm wrote: Was it supposed to have "VScale unknown" as the message here? https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
@@ -554,6 +554,22 @@ class VectorType : public Type { return VectorType::get(VTy->getElementType(), EltCnt * 2); } + /// This static method returns a VectorType with the same size-in-bits as + /// SizeTy but with an element type that matches the scalar type of EltTy. + static VectorType *getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy) { sdesmalen-arm wrote: nit: This name does not describe what the function does. "Size" parameter `SizeTy` is not a size, but a vector type. And the "Scalar" parameter `EltTy` is not necessarily a scalar, but can be a vector. To me it would be less confusing if this were: ```suggestion static VectorType *getWithSizeAndScalar(TypeSize *Size, Type *EltTy) { ``` where EltTy is a `scalar` type. https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
https://github.com/paulwalker-arm updated https://github.com/llvm/llvm-project/pull/130973 >From 32a2805a41dc3ff02bff9df26f4665923445b488 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Thu, 20 Mar 2025 14:58:51 + Subject: [PATCH 1/2] Add SROA tests for casts between fixed and scalable types. --- .../scalable-vectors-with-known-vscale.ll | 363 ++ llvm/test/Transforms/SROA/scalable-vectors.ll | 223 ++- 2 files changed, 585 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll diff --git a/llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll b/llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll new file mode 100644 index 0..03afc9d609488 --- /dev/null +++ b/llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll @@ -0,0 +1,363 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" + +; This test checks that SROA runs mem2reg on scalable vectors. + +define @alloca_nxv16i1( %pg) vscale_range(1) { +; CHECK-LABEL: @alloca_nxv16i1( +; CHECK-NEXT:ret [[PG:%.*]] +; + %pg.addr = alloca + store %pg, ptr %pg.addr + %1 = load , ptr %pg.addr + ret %1 +} + +define @alloca_nxv16i8( %vec) vscale_range(1) { +; CHECK-LABEL: @alloca_nxv16i8( +; CHECK-NEXT:ret [[VEC:%.*]] +; + %vec.addr = alloca + store %vec, ptr %vec.addr + %1 = load , ptr %vec.addr + ret %1 +} + +; Test scalable alloca that can't be promoted. Mem2Reg only considers +; non-volatile loads and stores for promotion. +define @unpromotable_alloca( %vec) vscale_range(1) { +; CHECK-LABEL: @unpromotable_alloca( +; CHECK-NEXT:[[VEC_ADDR:%.*]] = alloca , align 16 +; CHECK-NEXT:store volatile [[VEC:%.*]], ptr [[VEC_ADDR]], align 16 +; CHECK-NEXT:[[TMP1:%.*]] = load volatile , ptr [[VEC_ADDR]], align 16 +; CHECK-NEXT:ret [[TMP1]] +; + %vec.addr = alloca + store volatile %vec, ptr %vec.addr + %1 = load volatile , ptr %vec.addr + ret %1 +} + +; Test we bail out when using an alloca of a fixed-length vector (VLS) that was +; bitcasted to a scalable vector. +define @cast_alloca_to_svint32_t( %type.coerce) vscale_range(1) { +; CHECK-LABEL: @cast_alloca_to_svint32_t( +; CHECK-NEXT:[[TYPE:%.*]] = alloca <16 x i32>, align 64 +; CHECK-NEXT:[[TYPE_ADDR:%.*]] = alloca <16 x i32>, align 64 +; CHECK-NEXT:store [[TYPE_COERCE:%.*]], ptr [[TYPE]], align 16 +; CHECK-NEXT:[[TYPE1:%.*]] = load <16 x i32>, ptr [[TYPE]], align 64 +; CHECK-NEXT:store <16 x i32> [[TYPE1]], ptr [[TYPE_ADDR]], align 64 +; CHECK-NEXT:[[TMP1:%.*]] = load <16 x i32>, ptr [[TYPE_ADDR]], align 64 +; CHECK-NEXT:[[TMP2:%.*]] = load , ptr [[TYPE_ADDR]], align 16 +; CHECK-NEXT:ret [[TMP2]] +; + %type = alloca <16 x i32> + %type.addr = alloca <16 x i32> + store %type.coerce, ptr %type + %type1 = load <16 x i32>, ptr %type + store <16 x i32> %type1, ptr %type.addr + %1 = load <16 x i32>, ptr %type.addr + %2 = load , ptr %type.addr + ret %2 +} + +; When casting from VLA to VLS via memory check we bail out when producing a +; GEP where the element type is a scalable vector. +define @cast_alloca_from_svint32_t() vscale_range(1) { +; CHECK-LABEL: @cast_alloca_from_svint32_t( +; CHECK-NEXT:[[RETVAL_COERCE:%.*]] = alloca , align 16 +; CHECK-NEXT:store <16 x i32> zeroinitializer, ptr [[RETVAL_COERCE]], align 16 +; CHECK-NEXT:[[TMP1:%.*]] = load , ptr [[RETVAL_COERCE]], align 16 +; CHECK-NEXT:ret [[TMP1]] +; + %retval = alloca <16 x i32> + store <16 x i32> zeroinitializer, ptr %retval + %retval.coerce = alloca + call void @llvm.memcpy.p0.p0.i64(ptr align 16 %retval.coerce, ptr align 16 %retval, i64 64, i1 false) + %1 = load , ptr %retval.coerce + ret %1 +} + +; Test we bail out when using an alloca of a fixed-length vector (VLS) that was +; bitcasted to a scalable vector. +define void @select_load_alloca_to_svdouble_t() vscale_range(1) { +; CHECK-LABEL: @select_load_alloca_to_svdouble_t( +; CHECK-NEXT:[[Z:%.*]] = alloca <16 x half>, align 32 +; CHECK-NEXT:[[CMP:%.*]] = icmp eq i32 0, 0 +; CHECK-NEXT:[[COND:%.*]] = select i1 [[CMP]], ptr [[Z]], ptr null +; CHECK-NEXT:[[VAL:%.*]] = load , ptr [[COND]], align 16 +; CHECK-NEXT:ret void +; + %z = alloca <16 x half> + %cmp = icmp eq i32 0, 0 + %cond = select i1 %cmp, ptr %z, ptr null + %val = load , ptr %cond, align 16 + ret void +} + +define void @select_store_alloca_to_svdouble_t( %val) vscale_range(1) { +; CHECK-LABEL: @select_store_alloca_to_svdouble_t( +; CHECK-NEXT:[[Z:%.*]] = alloca <16 x half>, align 32 +; CHECK-NEXT:[[CMP
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
@@ -2034,6 +2071,18 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, } } + if (isa(NewTy) && isa(OldTy)) { +auto *Ty = VectorType::getWithSizeAndScalar(cast(NewTy), OldTy); +V = IRB.CreateInsertVector(Ty, PoisonValue::get(Ty), V, IRB.getInt64(0)); +return IRB.CreateBitCast(V, NewTy); + } + + if (isa(NewTy) && isa(OldTy)) { +auto *Ty = VectorType::getWithSizeAndScalar(cast(OldTy), NewTy); +V = IRB.CreateBitCast(V, Ty); +return IRB.CreateExtractVector(NewTy, V, IRB.getInt64(0)); + } paulwalker-arm wrote: I've extended the coverage to input pointer casts but creating a wrapper function so that existing uses of CreateBitCast are replaced with a variant that supports bit casting between fixed and scalable vector types. https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
@@ -473,6 +473,14 @@ std::optional Attribute::getVScaleRangeMax() const { return unpackVScaleRangeArgs(pImpl->getValueAsInt()).second; } +unsigned Attribute::getVScaleValue() const { + std::optional VScale = getVScaleRangeMax(); + if (VScale && *VScale == getVScaleRangeMin()) +return *VScale; + + return 0; +} paulwalker-arm wrote: Done. https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
https://github.com/paulwalker-arm edited https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
@@ -26,11 +26,15 @@ typedef vbool64_t fixed_bool64_t __attribute__((riscv_rvv_vector_bits(__riscv_v_ // // CHECK-128-LABEL: @call_bool32_ff( // CHECK-128-NEXT: entry: +// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 paulwalker-arm wrote: I don't know, perhaps there is a front end problem for RISCV. When investigating one of the affected test cases where vscale=2: ``` fixed_bool32_t call_bool32_ff(fixed_bool32_t op1, fixed_bool32_t op2) { return __riscv_vmand(op1, op2, __riscv_v_fixed_vlen / 32); } ``` I see the snippet: ``` %saved-value = alloca <1 x i8>, align 1 store <1 x i8> %0, ptr %saved-value, align 1, !tbaa !6 %1 = load , ptr %saved-value, align 1, !tbaa !6 ``` However: ``` DL.getTypeStoreSize(<1 x i8>) => 1 DL.getTypeStoreSize() => vscale x 1 ``` This means the store size of `` is 2 bytes, which makes the load undefined behaviour? Looking at the new output it's just not removing the undefined accesses. I'm not familiar with the RVV instructions (does it have sub-byte memory accesses?) but for SVE the store size for predicates is always a multiple of bytes and thus we model the storage of fixed length predicates as i8 vectors and then "cast" them to scalable boolean vectors. We also have a later combine to reconstitute a real scalable vector predicate load/store when possible. Even for pure scalable vectors the storage type is always byte sized (i.e. ) with us using reinterpret intrinsics to shrink/expand them. I know SVE is not perfect here though as trying to alloca/load/store something smaller will likely lead to isel failures, but that cannot (or at least shouldn't) happen outside of hand written ll tests. https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
https://github.com/topperc edited https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
@@ -26,11 +26,15 @@ typedef vbool64_t fixed_bool64_t __attribute__((riscv_rvv_vector_bits(__riscv_v_ // // CHECK-128-LABEL: @call_bool32_ff( // CHECK-128-NEXT: entry: +// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 topperc wrote: vbool32_t should hold vlen/32 elements. If vlen is 128, this should be a 4 x i8 type. So there's something wrong somewhere https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
@@ -26,11 +26,15 @@ typedef vbool64_t fixed_bool64_t __attribute__((riscv_rvv_vector_bits(__riscv_v_ // // CHECK-128-LABEL: @call_bool32_ff( // CHECK-128-NEXT: entry: +// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 topperc wrote: Err nevermind. The fixed vector used i8 elements and the scalable vector uses i1. So <1 x i8> is correct. The 4 elements fit in a single byte. https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
mshockwave wrote: > @pawosm-arm - I've removed the reviewers because it is not quite ready yet. I > need to investigate the potential regressions shown by the RISCV tests. you can also turn this PR into a draft if you want https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
paulwalker-arm wrote: @pawosm-arm - I've removed the reviewers because it is not quite ready yet. I need to investigate the potential regressions shown by the RISCV tests. https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
llvmbot wrote: @llvm/pr-subscribers-clang Author: Paul Walker (paulwalker-arm) Changes For function whose vscale_range is limited to a single value we can size scalable vectors. This aids SROA by allowing scalable vector load and store operations to be considered for replacement whereby bitcasts through memory can be replaced by vector insert or extract operations. --- Patch is 56.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/130973.diff 11 Files Affected: - (modified) clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c (+25-13) - (modified) clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-cast.c (+6-2) - (modified) clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c (+9-7) - (modified) clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c (+10-13) - (modified) llvm/include/llvm/IR/Attributes.h (+4) - (modified) llvm/include/llvm/IR/DerivedTypes.h (+16) - (modified) llvm/lib/IR/AttributeImpl.h (+1) - (modified) llvm/lib/IR/Attributes.cpp (+8) - (modified) llvm/lib/Transforms/Scalar/SROA.cpp (+92-34) - (added) llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll (+248) - (modified) llvm/test/Transforms/SROA/scalable-vectors.ll (+142) ``diff diff --git a/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c b/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c index e2f02dc64f766..66fd466eccfef 100644 --- a/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c +++ b/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c @@ -26,11 +26,15 @@ typedef vbool64_t fixed_bool64_t __attribute__((riscv_rvv_vector_bits(__riscv_v_ // // CHECK-128-LABEL: @call_bool32_ff( // CHECK-128-NEXT: entry: +// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 +// CHECK-128-NEXT:[[SAVED_VALUE3:%.*]] = alloca <1 x i8>, align 1 // CHECK-128-NEXT:[[SAVED_VALUE4:%.*]] = alloca , align 1 // CHECK-128-NEXT:[[RETVAL_COERCE:%.*]] = alloca , align 1 -// CHECK-128-NEXT:[[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv2i1.i64( [[OP1_COERCE:%.*]], [[OP2_COERCE:%.*]], i64 4) -// CHECK-128-NEXT:store [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6:![0-9]+]] -// CHECK-128-NEXT:[[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10:![0-9]+]] +// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6:![0-9]+]] +// CHECK-128-NEXT: [[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_:%.*]] = load , ptr [[SAVED_VALUE3]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT:[[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv2i1.i64( [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]], [[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_]], i64 4) +// CHECK-128-NEXT:store [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA9:![0-9]+]] +// CHECK-128-NEXT:[[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6]] // CHECK-128-NEXT:store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT:[[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT:ret [[TMP2]] @@ -52,11 +56,15 @@ fixed_bool32_t call_bool32_ff(fixed_bool32_t op1, fixed_bool32_t op2) { // // CHECK-128-LABEL: @call_bool64_ff( // CHECK-128-NEXT: entry: +// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 +// CHECK-128-NEXT:[[SAVED_VALUE3:%.*]] = alloca <1 x i8>, align 1 // CHECK-128-NEXT:[[SAVED_VALUE4:%.*]] = alloca , align 1 // CHECK-128-NEXT:[[RETVAL_COERCE:%.*]] = alloca , align 1 -// CHECK-128-NEXT:[[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv1i1.i64( [[OP1_COERCE:%.*]], [[OP2_COERCE:%.*]], i64 2) +// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT: [[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_:%.*]] = load , ptr [[SAVED_VALUE3]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT:[[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv1i1.i64( [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]], [[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_]], i64 2) // CHECK-128-NEXT:store [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA11:![0-9]+]] -// CHECK-128-NEXT:[[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10]] +// CHECK-128-NEXT:[[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6]] // CHECK-128-NEXT:store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT:[[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT:ret [[TMP2]] @@ -82,11 +90,13 @@ fixed_bool64_t call_bool64_ff(fixed_bool64_t op1, fixed_bool64_t op2) { // // CHECK-128-LABEL: @call_bool32_fs(
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
https://github.com/paulwalker-arm created https://github.com/llvm/llvm-project/pull/130973 For function whose vscale_range is limited to a single value we can size scalable vectors. This aids SROA by allowing scalable vector load and store operations to be considered for replacement whereby bitcasts through memory can be replaced by vector insert or extract operations. >From cb6a620c93b3809742fd067233844221e74dde4f Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Fri, 7 Mar 2025 11:54:20 + Subject: [PATCH] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. For function whose vscale_range is limited to a single value we can size scalable vectors. This aids SROA by allowing scalable vector load and store operations to be considered for replacement whereby bitcasts through memory can be replaced by vector insert or extract operations. --- .../attr-riscv-rvv-vector-bits-less-8-call.c | 38 ++- .../attr-riscv-rvv-vector-bits-less-8-cast.c | 8 +- .../CodeGen/RISCV/attr-rvv-vector-bits-cast.c | 16 +- .../CodeGen/attr-arm-sve-vector-bits-cast.c | 23 +- llvm/include/llvm/IR/Attributes.h | 4 + llvm/include/llvm/IR/DerivedTypes.h | 16 ++ llvm/lib/IR/AttributeImpl.h | 1 + llvm/lib/IR/Attributes.cpp| 8 + llvm/lib/Transforms/Scalar/SROA.cpp | 126 ++--- .../scalable-vectors-with-known-vscale.ll | 248 ++ llvm/test/Transforms/SROA/scalable-vectors.ll | 142 ++ 11 files changed, 561 insertions(+), 69 deletions(-) create mode 100644 llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll diff --git a/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c b/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c index e2f02dc64f766..66fd466eccfef 100644 --- a/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c +++ b/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c @@ -26,11 +26,15 @@ typedef vbool64_t fixed_bool64_t __attribute__((riscv_rvv_vector_bits(__riscv_v_ // // CHECK-128-LABEL: @call_bool32_ff( // CHECK-128-NEXT: entry: +// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 +// CHECK-128-NEXT:[[SAVED_VALUE3:%.*]] = alloca <1 x i8>, align 1 // CHECK-128-NEXT:[[SAVED_VALUE4:%.*]] = alloca , align 1 // CHECK-128-NEXT:[[RETVAL_COERCE:%.*]] = alloca , align 1 -// CHECK-128-NEXT:[[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv2i1.i64( [[OP1_COERCE:%.*]], [[OP2_COERCE:%.*]], i64 4) -// CHECK-128-NEXT:store [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6:![0-9]+]] -// CHECK-128-NEXT:[[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10:![0-9]+]] +// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6:![0-9]+]] +// CHECK-128-NEXT: [[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_:%.*]] = load , ptr [[SAVED_VALUE3]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT:[[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv2i1.i64( [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]], [[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_]], i64 4) +// CHECK-128-NEXT:store [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA9:![0-9]+]] +// CHECK-128-NEXT:[[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6]] // CHECK-128-NEXT:store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT:[[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT:ret [[TMP2]] @@ -52,11 +56,15 @@ fixed_bool32_t call_bool32_ff(fixed_bool32_t op1, fixed_bool32_t op2) { // // CHECK-128-LABEL: @call_bool64_ff( // CHECK-128-NEXT: entry: +// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 +// CHECK-128-NEXT:[[SAVED_VALUE3:%.*]] = alloca <1 x i8>, align 1 // CHECK-128-NEXT:[[SAVED_VALUE4:%.*]] = alloca , align 1 // CHECK-128-NEXT:[[RETVAL_COERCE:%.*]] = alloca , align 1 -// CHECK-128-NEXT:[[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv1i1.i64( [[OP1_COERCE:%.*]], [[OP2_COERCE:%.*]], i64 2) +// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT: [[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_:%.*]] = load , ptr [[SAVED_VALUE3]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT:[[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv1i1.i64( [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]], [[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_]], i64 2) // CHECK-128-NEXT:store [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA11:![0-9]+]] -// CHECK-128-NEXT:[[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10]] +// CHECK-128-NEXT:[[TMP1:%.*]] =
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
https://github.com/paulwalker-arm updated https://github.com/llvm/llvm-project/pull/130973 >From 487a823a9ec35df1a93109ef03630738bdc39ab1 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Fri, 7 Mar 2025 11:54:20 + Subject: [PATCH] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. For function whose vscale_range is limited to a single value we can size scalable vectors. This aids SROA by allowing scalable vector load and store operations to be considered for replacement whereby bitcasts through memory can be replaced by vector insert or extract operations. --- .../attr-riscv-rvv-vector-bits-less-8-call.c | 38 ++- .../attr-riscv-rvv-vector-bits-less-8-cast.c | 8 +- .../CodeGen/RISCV/attr-rvv-vector-bits-cast.c | 16 +- .../CodeGen/attr-arm-sve-vector-bits-cast.c | 23 +- llvm/include/llvm/IR/Attributes.h | 4 + llvm/include/llvm/IR/DerivedTypes.h | 16 ++ llvm/lib/IR/AttributeImpl.h | 1 + llvm/lib/IR/Attributes.cpp| 8 + llvm/lib/Transforms/Scalar/SROA.cpp | 130 ++--- .../scalable-vectors-with-known-vscale.ll | 248 ++ llvm/test/Transforms/SROA/scalable-vectors.ll | 142 ++ 11 files changed, 563 insertions(+), 71 deletions(-) create mode 100644 llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll diff --git a/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c b/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c index e2f02dc64f766..66fd466eccfef 100644 --- a/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c +++ b/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c @@ -26,11 +26,15 @@ typedef vbool64_t fixed_bool64_t __attribute__((riscv_rvv_vector_bits(__riscv_v_ // // CHECK-128-LABEL: @call_bool32_ff( // CHECK-128-NEXT: entry: +// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 +// CHECK-128-NEXT:[[SAVED_VALUE3:%.*]] = alloca <1 x i8>, align 1 // CHECK-128-NEXT:[[SAVED_VALUE4:%.*]] = alloca , align 1 // CHECK-128-NEXT:[[RETVAL_COERCE:%.*]] = alloca , align 1 -// CHECK-128-NEXT:[[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv2i1.i64( [[OP1_COERCE:%.*]], [[OP2_COERCE:%.*]], i64 4) -// CHECK-128-NEXT:store [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6:![0-9]+]] -// CHECK-128-NEXT:[[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10:![0-9]+]] +// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6:![0-9]+]] +// CHECK-128-NEXT: [[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_:%.*]] = load , ptr [[SAVED_VALUE3]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT:[[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv2i1.i64( [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]], [[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_]], i64 4) +// CHECK-128-NEXT:store [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA9:![0-9]+]] +// CHECK-128-NEXT:[[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6]] // CHECK-128-NEXT:store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT:[[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT:ret [[TMP2]] @@ -52,11 +56,15 @@ fixed_bool32_t call_bool32_ff(fixed_bool32_t op1, fixed_bool32_t op2) { // // CHECK-128-LABEL: @call_bool64_ff( // CHECK-128-NEXT: entry: +// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 +// CHECK-128-NEXT:[[SAVED_VALUE3:%.*]] = alloca <1 x i8>, align 1 // CHECK-128-NEXT:[[SAVED_VALUE4:%.*]] = alloca , align 1 // CHECK-128-NEXT:[[RETVAL_COERCE:%.*]] = alloca , align 1 -// CHECK-128-NEXT:[[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv1i1.i64( [[OP1_COERCE:%.*]], [[OP2_COERCE:%.*]], i64 2) +// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT: [[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_:%.*]] = load , ptr [[SAVED_VALUE3]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT:[[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv1i1.i64( [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]], [[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_]], i64 2) // CHECK-128-NEXT:store [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA11:![0-9]+]] -// CHECK-128-NEXT:[[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10]] +// CHECK-128-NEXT:[[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6]] // CHECK-128-NEXT:store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT:[[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT:ret [[TMP2]] @@ -82,11 +90,13 @@ fixed_boo
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
github-actions[bot] wrote: :warning: undef deprecator found issues in your code. :warning: You can test this locally with the following command: ``bash git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' cd043e4fbe6125df4cb4993c625fa5e46194e478 cb6a620c93b3809742fd067233844221e74dde4f llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-cast.c clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c llvm/include/llvm/IR/Attributes.h llvm/include/llvm/IR/DerivedTypes.h llvm/lib/IR/AttributeImpl.h llvm/lib/IR/Attributes.cpp llvm/lib/Transforms/Scalar/SROA.cpp llvm/test/Transforms/SROA/scalable-vectors.ll `` The following files introduce new uses of undef: - llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll [Undef](https://llvm.org/docs/LangRef.html#undefined-values) is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields `undef`. You should use `poison` values for placeholders instead. In tests, avoid using `undef` and having tests that trigger undefined behavior. If you need an operand with some unimportant value, you can add a new argument to the function and use that instead. For example, this is considered a bad practice: ```llvm define void @fn() { ... br i1 undef, ... } ``` Please use the following instead: ```llvm define void @fn(i1 %cond) { ... br i1 %cond, ... } ``` Please refer to the [Undefined Behavior Manual](https://llvm.org/docs/UndefinedBehavior.html) for more information. https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff cd043e4fbe6125df4cb4993c625fa5e46194e478 cb6a620c93b3809742fd067233844221e74dde4f --extensions cpp,c,h -- clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-cast.c clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c llvm/include/llvm/IR/Attributes.h llvm/include/llvm/IR/DerivedTypes.h llvm/lib/IR/AttributeImpl.h llvm/lib/IR/Attributes.cpp llvm/lib/Transforms/Scalar/SROA.cpp `` View the diff from clang-format here. ``diff diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h index 6c8f416708..f8b79eea19 100644 --- a/llvm/include/llvm/IR/DerivedTypes.h +++ b/llvm/include/llvm/IR/DerivedTypes.h @@ -556,7 +556,7 @@ public: /// This static method returns a VectorType with the same size-in-bits as /// SizeTy but with an element type that matches the scalar type of EltTy. - static VectorType* getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy) { + static VectorType *getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy) { if (SizeTy->getScalarType() == EltTy->getScalarType()) return SizeTy; diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 81a2b38410..2e758caa65 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -2198,8 +2198,7 @@ checkVectorTypesForPromotion(Partition &P, const DataLayout &DL, SmallVectorImpl &CandidateTys, bool HaveCommonEltTy, Type *CommonEltTy, bool HaveVecPtrTy, bool HaveCommonVecPtrTy, - VectorType *CommonVecPtrTy, - unsigned VScale) { + VectorType *CommonVecPtrTy, unsigned VScale) { // If we didn't find a vector type, nothing to do here. if (CandidateTys.empty()) return nullptr; @@ -2311,9 +2310,9 @@ static VectorType *createAndCheckVectorTypesForPromotion( } } - return checkVectorTypesForPromotion(P, DL, CandidateTys, HaveCommonEltTy, - CommonEltTy, HaveVecPtrTy, - HaveCommonVecPtrTy, CommonVecPtrTy, VScale); + return checkVectorTypesForPromotion( + P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, + HaveCommonVecPtrTy, CommonVecPtrTy, VScale); } /// Test whether the given alloca partitioning and range of slices can be @@ -2325,7 +2324,8 @@ static VectorType *createAndCheckVectorTypesForPromotion( /// SSA value. We only can ensure this for a limited set of operations, and we /// don't want to do the rewrites unless we are confident that the result will /// be promotable, so we have an early test here. -static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL, unsigned VScale) { +static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL, + unsigned VScale) { // Collect the candidate types for vector-based promotion. Also track whether // we have different element types. SmallVector CandidateTys; `` https://github.com/llvm/llvm-project/pull/130973 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)
llvmbot wrote: @llvm/pr-subscribers-llvm-transforms Author: Paul Walker (paulwalker-arm) Changes For function whose vscale_range is limited to a single value we can size scalable vectors. This aids SROA by allowing scalable vector load and store operations to be considered for replacement whereby bitcasts through memory can be replaced by vector insert or extract operations. --- Patch is 56.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/130973.diff 11 Files Affected: - (modified) clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c (+25-13) - (modified) clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-cast.c (+6-2) - (modified) clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c (+9-7) - (modified) clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c (+10-13) - (modified) llvm/include/llvm/IR/Attributes.h (+4) - (modified) llvm/include/llvm/IR/DerivedTypes.h (+16) - (modified) llvm/lib/IR/AttributeImpl.h (+1) - (modified) llvm/lib/IR/Attributes.cpp (+8) - (modified) llvm/lib/Transforms/Scalar/SROA.cpp (+92-34) - (added) llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll (+248) - (modified) llvm/test/Transforms/SROA/scalable-vectors.ll (+142) ``diff diff --git a/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c b/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c index e2f02dc64f766..66fd466eccfef 100644 --- a/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c +++ b/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c @@ -26,11 +26,15 @@ typedef vbool64_t fixed_bool64_t __attribute__((riscv_rvv_vector_bits(__riscv_v_ // // CHECK-128-LABEL: @call_bool32_ff( // CHECK-128-NEXT: entry: +// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 +// CHECK-128-NEXT:[[SAVED_VALUE3:%.*]] = alloca <1 x i8>, align 1 // CHECK-128-NEXT:[[SAVED_VALUE4:%.*]] = alloca , align 1 // CHECK-128-NEXT:[[RETVAL_COERCE:%.*]] = alloca , align 1 -// CHECK-128-NEXT:[[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv2i1.i64( [[OP1_COERCE:%.*]], [[OP2_COERCE:%.*]], i64 4) -// CHECK-128-NEXT:store [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6:![0-9]+]] -// CHECK-128-NEXT:[[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10:![0-9]+]] +// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6:![0-9]+]] +// CHECK-128-NEXT: [[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_:%.*]] = load , ptr [[SAVED_VALUE3]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT:[[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv2i1.i64( [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]], [[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_]], i64 4) +// CHECK-128-NEXT:store [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA9:![0-9]+]] +// CHECK-128-NEXT:[[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6]] // CHECK-128-NEXT:store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT:[[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT:ret [[TMP2]] @@ -52,11 +56,15 @@ fixed_bool32_t call_bool32_ff(fixed_bool32_t op1, fixed_bool32_t op2) { // // CHECK-128-LABEL: @call_bool64_ff( // CHECK-128-NEXT: entry: +// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 +// CHECK-128-NEXT:[[SAVED_VALUE3:%.*]] = alloca <1 x i8>, align 1 // CHECK-128-NEXT:[[SAVED_VALUE4:%.*]] = alloca , align 1 // CHECK-128-NEXT:[[RETVAL_COERCE:%.*]] = alloca , align 1 -// CHECK-128-NEXT:[[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv1i1.i64( [[OP1_COERCE:%.*]], [[OP2_COERCE:%.*]], i64 2) +// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT: [[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_:%.*]] = load , ptr [[SAVED_VALUE3]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT:[[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv1i1.i64( [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]], [[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_]], i64 2) // CHECK-128-NEXT:store [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA11:![0-9]+]] -// CHECK-128-NEXT:[[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10]] +// CHECK-128-NEXT:[[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6]] // CHECK-128-NEXT:store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT:[[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT:ret [[TMP2]] @@ -82,11 +90,13 @@ fixed_bool64_t call_bool64_ff(fixed_bool64_t op1, fixed_bool64_t op2) { // // CHECK-128-LABEL: @call_bo