[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-06-11 Thread Paul Walker via cfe-commits

https://github.com/paulwalker-arm closed 
https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-06-10 Thread Paul Walker via cfe-commits

https://github.com/paulwalker-arm updated 
https://github.com/llvm/llvm-project/pull/130973

>From e2642bec52f881c1d457f2c72ed3ae4ceec570e6 Mon Sep 17 00:00:00 2001
From: Paul Walker 
Date: Thu, 20 Mar 2025 14:58:51 +
Subject: [PATCH 1/3] Add SROA tests for casts between fixed and scalable
 types.

---
 .../scalable-vectors-with-known-vscale.ll | 363 ++
 llvm/test/Transforms/SROA/scalable-vectors.ll | 223 ++-
 2 files changed, 585 insertions(+), 1 deletion(-)
 create mode 100644 
llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll

diff --git a/llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll 
b/llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll
new file mode 100644
index 0..03afc9d609488
--- /dev/null
+++ b/llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll
@@ -0,0 +1,363 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes='sroa' -S | FileCheck %s 
--check-prefixes=CHECK,CHECK-PRESERVE-CFG
+; RUN: opt < %s -passes='sroa' -S | FileCheck %s 
--check-prefixes=CHECK,CHECK-MODIFY-CFG
+
+target datalayout = 
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+; This test checks that SROA runs mem2reg on scalable vectors.
+
+define  @alloca_nxv16i1( %pg) 
vscale_range(1) {
+; CHECK-LABEL: @alloca_nxv16i1(
+; CHECK-NEXT:ret  [[PG:%.*]]
+;
+  %pg.addr = alloca 
+  store  %pg, ptr %pg.addr
+  %1 = load , ptr %pg.addr
+  ret  %1
+}
+
+define  @alloca_nxv16i8( %vec) 
vscale_range(1) {
+; CHECK-LABEL: @alloca_nxv16i8(
+; CHECK-NEXT:ret  [[VEC:%.*]]
+;
+  %vec.addr = alloca 
+  store  %vec, ptr %vec.addr
+  %1 = load , ptr %vec.addr
+  ret  %1
+}
+
+; Test scalable alloca that can't be promoted. Mem2Reg only considers
+; non-volatile loads and stores for promotion.
+define  @unpromotable_alloca( %vec) 
vscale_range(1) {
+; CHECK-LABEL: @unpromotable_alloca(
+; CHECK-NEXT:[[VEC_ADDR:%.*]] = alloca , align 16
+; CHECK-NEXT:store volatile  [[VEC:%.*]], ptr 
[[VEC_ADDR]], align 16
+; CHECK-NEXT:[[TMP1:%.*]] = load volatile , ptr 
[[VEC_ADDR]], align 16
+; CHECK-NEXT:ret  [[TMP1]]
+;
+  %vec.addr = alloca 
+  store volatile  %vec, ptr %vec.addr
+  %1 = load volatile , ptr %vec.addr
+  ret  %1
+}
+
+; Test we bail out when using an alloca of a fixed-length vector (VLS) that was
+; bitcasted to a scalable vector.
+define  @cast_alloca_to_svint32_t( 
%type.coerce) vscale_range(1) {
+; CHECK-LABEL: @cast_alloca_to_svint32_t(
+; CHECK-NEXT:[[TYPE:%.*]] = alloca <16 x i32>, align 64
+; CHECK-NEXT:[[TYPE_ADDR:%.*]] = alloca <16 x i32>, align 64
+; CHECK-NEXT:store  [[TYPE_COERCE:%.*]], ptr [[TYPE]], 
align 16
+; CHECK-NEXT:[[TYPE1:%.*]] = load <16 x i32>, ptr [[TYPE]], align 64
+; CHECK-NEXT:store <16 x i32> [[TYPE1]], ptr [[TYPE_ADDR]], align 64
+; CHECK-NEXT:[[TMP1:%.*]] = load <16 x i32>, ptr [[TYPE_ADDR]], align 64
+; CHECK-NEXT:[[TMP2:%.*]] = load , ptr [[TYPE_ADDR]], 
align 16
+; CHECK-NEXT:ret  [[TMP2]]
+;
+  %type = alloca <16 x i32>
+  %type.addr = alloca <16 x i32>
+  store  %type.coerce, ptr %type
+  %type1 = load <16 x i32>, ptr %type
+  store <16 x i32> %type1, ptr %type.addr
+  %1 = load <16 x i32>, ptr %type.addr
+  %2 = load , ptr %type.addr
+  ret  %2
+}
+
+; When casting from VLA to VLS via memory check we bail out when producing a
+; GEP where the element type is a scalable vector.
+define  @cast_alloca_from_svint32_t() vscale_range(1) {
+; CHECK-LABEL: @cast_alloca_from_svint32_t(
+; CHECK-NEXT:[[RETVAL_COERCE:%.*]] = alloca , align 16
+; CHECK-NEXT:store <16 x i32> zeroinitializer, ptr [[RETVAL_COERCE]], 
align 16
+; CHECK-NEXT:[[TMP1:%.*]] = load , ptr 
[[RETVAL_COERCE]], align 16
+; CHECK-NEXT:ret  [[TMP1]]
+;
+  %retval = alloca <16 x i32>
+  store <16 x i32> zeroinitializer, ptr %retval
+  %retval.coerce = alloca 
+  call void @llvm.memcpy.p0.p0.i64(ptr align 16 %retval.coerce, ptr align 16 
%retval, i64 64, i1 false)
+  %1 = load , ptr %retval.coerce
+  ret  %1
+}
+
+; Test we bail out when using an alloca of a fixed-length vector (VLS) that was
+; bitcasted to a scalable vector.
+define void @select_load_alloca_to_svdouble_t() vscale_range(1) {
+; CHECK-LABEL: @select_load_alloca_to_svdouble_t(
+; CHECK-NEXT:[[Z:%.*]] = alloca <16 x half>, align 32
+; CHECK-NEXT:[[CMP:%.*]] = icmp eq i32 0, 0
+; CHECK-NEXT:[[COND:%.*]] = select i1 [[CMP]], ptr [[Z]], ptr null
+; CHECK-NEXT:[[VAL:%.*]] = load , ptr [[COND]], align 
16
+; CHECK-NEXT:ret void
+;
+  %z = alloca <16 x half>
+  %cmp = icmp eq i32 0, 0
+  %cond = select i1 %cmp, ptr %z, ptr null
+  %val = load , ptr %cond, align 16
+  ret void
+}
+
+define void @select_store_alloca_to_svdouble_t( %val) 
vscale_range(1) {
+; CHECK-LABEL: @select_store_alloca_to_svdouble_t(
+; CHECK-NEXT:[[Z:%.*]] = alloca <16 x half>, align 32
+; CHECK-NEXT:[[CMP

[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-06-06 Thread Nikita Popov via cfe-commits

https://github.com/nikic approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-05-21 Thread Paul Walker via cfe-commits

paulwalker-arm wrote:

ping

https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-05-15 Thread Paul Walker via cfe-commits

https://github.com/paulwalker-arm updated 
https://github.com/llvm/llvm-project/pull/130973



  



Rate limit · GitHub


  body {
background-color: #f6f8fa;
color: #24292e;
font-family: -apple-system,BlinkMacSystemFont,Segoe 
UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
margin: 0;
  }

  .container { margin: 50px auto; max-width: 600px; text-align: center; 
padding: 0 24px; }

  a { color: #0366d6; text-decoration: none; }
  a:hover { text-decoration: underline; }

  h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; 
text-shadow: 0 1px 0 #fff; }
  p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; }

  ul { list-style: none; margin: 25px 0; padding: 0; }
  li { display: table-cell; font-weight: bold; width: 1%; }

  .logo { display: inline-block; margin-top: 35px; }
  .logo-img-2x { display: none; }
  @media
  only screen and (-webkit-min-device-pixel-ratio: 2),
  only screen and (   min--moz-device-pixel-ratio: 2),
  only screen and ( -o-min-device-pixel-ratio: 2/1),
  only screen and (min-device-pixel-ratio: 2),
  only screen and (min-resolution: 192dpi),
  only screen and (min-resolution: 2dppx) {
.logo-img-1x { display: none; }
.logo-img-2x { display: inline-block; }
  }

  #suggestions {
margin-top: 35px;
color: #ccc;
  }
  #suggestions a {
color: #66;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
  }


  
  



  Whoa there!
  You have exceeded a secondary rate limit.
Please wait a few minutes before you try again;
in some cases this may take up to an hour.
  
  
https://support.github.com/contact";>Contact Support —
https://githubstatus.com";>GitHub Status —
https://twitter.com/githubstatus";>@githubstatus
  

  

  

  

  

  


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-05-15 Thread Paul Walker via cfe-commits

paulwalker-arm wrote:

Rebased to incorporate https://github.com/llvm/llvm-project/pull/139190. Thanks 
again @topperc.

https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-05-01 Thread Nikita Popov via cfe-commits


@@ -26,11 +26,15 @@ typedef vbool64_t fixed_bool64_t 
__attribute__((riscv_rvv_vector_bits(__riscv_v_
 //
 // CHECK-128-LABEL: @call_bool32_ff(
 // CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1

nikic wrote:

I'd expect `` to also be broken. We need the fixed size to be 
a multiple of 8 to compute correct sizes.

https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-05-01 Thread Craig Topper via cfe-commits


@@ -26,11 +26,15 @@ typedef vbool64_t fixed_bool64_t 
__attribute__((riscv_rvv_vector_bits(__riscv_v_
 //
 // CHECK-128-LABEL: @call_bool32_ff(
 // CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1

topperc wrote:

Before #100110, I believe we would have allowed <1 x i8> with  
when when vscale was 2. Those are both 8 bit values, but I think getTypeSize on 
the scalable vector would return vscale*1 byte? Is that also broken?

https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-05-01 Thread Nikita Popov via cfe-commits


@@ -26,11 +26,15 @@ typedef vbool64_t fixed_bool64_t 
__attribute__((riscv_rvv_vector_bits(__riscv_v_
 //
 // CHECK-128-LABEL: @call_bool32_ff(
 // CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1

nikic wrote:

@paulwalker-arm We need to do *something* to prevent this from getting 
blatantly miscompiled before this PR can land. Removal is an option, but maybe 
the IR generated by clang can be fixed instead.

https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-05-01 Thread Paul Walker via cfe-commits


@@ -26,11 +26,15 @@ typedef vbool64_t fixed_bool64_t 
__attribute__((riscv_rvv_vector_bits(__riscv_v_
 //
 // CHECK-128-LABEL: @call_bool32_ff(
 // CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1

paulwalker-arm wrote:

@nikic - Is the removal a prerequisite of being able to land this PR?

@topperc / @asi-sc - Any objections to the PR being reverted? Fully or Partly?

Given the size of the regression (9x) I was hoping to backport this fix to LLVM 
20. I guess this might not be possible if that means removing 
https://github.com/llvm/llvm-project/pull/100110 from LLVM 20 as well? 

https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-04-29 Thread Paul Walker via cfe-commits

https://github.com/paulwalker-arm edited 
https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-04-29 Thread Paul Walker via cfe-commits

https://github.com/paulwalker-arm edited 
https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-04-29 Thread Nikita Popov via cfe-commits


@@ -26,11 +26,15 @@ typedef vbool64_t fixed_bool64_t 
__attribute__((riscv_rvv_vector_bits(__riscv_v_
 //
 // CHECK-128-LABEL: @call_bool32_ff(
 // CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1

nikic wrote:

Looks like this was added in https://github.com/llvm/llvm-project/pull/100110.

This seems like a pretty fundamental problem with scalable vectors where the 
fixed size is non-byte-sized. The store size in that case is `(vscale x 
fixed_size_in_bits) / 8`, but we pretend that it is `vscale x 
(fixed_size_in_bits / 8)`, which are only the same if fixed_size_in_bits is a 
multiple of 8.

I'm not sure what to do about this. This seems very hard to support properly, 
as it effectively adds an extra dimension to the current fixed vs scalable size 
distinction.

In the near term the RISCV support in Clang for this should probably be 
reverted, and we should consider non-byte-sized scalable vectors as non-sized 
types, so that they can be used as SSA values but not loaded or stored.

https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-04-29 Thread Nikita Popov via cfe-commits

https://github.com/nikic edited https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-04-29 Thread Paul Walker via cfe-commits

https://github.com/paulwalker-arm edited 
https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-04-29 Thread Paul Walker via cfe-commits


@@ -554,6 +554,22 @@ class VectorType : public Type {
 return VectorType::get(VTy->getElementType(), EltCnt * 2);
   }
 
+  /// This static method returns a VectorType with the same size-in-bits as
+  /// SizeTy but with an element type that matches the scalar type of EltTy.

paulwalker-arm wrote:

Done.

https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-04-29 Thread Paul Walker via cfe-commits


@@ -554,6 +554,22 @@ class VectorType : public Type {
 return VectorType::get(VTy->getElementType(), EltCnt * 2);
   }
 
+  /// This static method returns a VectorType with the same size-in-bits as
+  /// SizeTy but with an element type that matches the scalar type of EltTy.
+  static VectorType *getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy) {
+if (SizeTy->getScalarType() == EltTy->getScalarType())
+  return SizeTy;
+
+unsigned EltSize = EltTy->getScalarSizeInBits();
+if (!SizeTy->getPrimitiveSizeInBits().isKnownMultipleOf(EltSize))

paulwalker-arm wrote:

Are you sure? The intent of this function is to create a VectorType that can be 
bitcasted, typically from/to `SizeTy`. When looking at 
`CastInst::castIsValid()` I can see it uses `getPrimitiveSizeInBits()` to 
determine validity.

To put another way, the check is ensuring the IR is consistent, essentially 
protecting against the case where the ElementCount calculation would be 
invalid. So I don't think this function cares about data layout?


https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-04-29 Thread Paul Walker via cfe-commits


@@ -1168,6 +1168,15 @@ bool Function::nullPointerIsDefined() const {
   return hasFnAttribute(Attribute::NullPointerIsValid);
 }
 
+unsigned Function::getVScaleValue() const {
+  Attribute Attr = getFnAttribute(Attribute::VScaleRange);
+  if (!Attr.isValid())
+return 0;
+
+  unsigned VScale = Attr.getVScaleRangeMax().value_or(0);
+  return VScale == Attr.getVScaleRangeMin() ? VScale : 0;

paulwalker-arm wrote:

Sounds reasonable. Done.

https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-04-29 Thread Paul Walker via cfe-commits


@@ -80,7 +80,9 @@ fixed_bool32_t from_vbool32_t(vbool32_t type) {
 //
 // CHECK-128-LABEL: @to_vbool32_t(
 // CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:ret  [[TYPE_COERCE:%.*]]
+// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1
+// CHECK-128-NEXT:
[[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]]
+// CHECK-128-NEXT:ret  
[[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]]

paulwalker-arm wrote:

Here is the input to the first two iterations of SROA, with subsequent 
iterations being unchanged:
```
; *** IR Dump Before SROAPass on to_vbool32_t ***
; Function Attrs: nounwind vscale_range(2,2)
define dso_local  @to_vbool32_t( noundef 
%type.coerce) #0 {
entry:
  %type = alloca <1 x i8>, align 1
  %type.addr = alloca <1 x i8>, align 1
  %saved-value = alloca <1 x i8>, align 1
  store  %type.coerce, ptr %type, align 1
  %type1 = load <1 x i8>, ptr %type, align 1, !tbaa !6
  store <1 x i8> %type1, ptr %type.addr, align 1, !tbaa !6
  %0 = load <1 x i8>, ptr %type.addr, align 1, !tbaa !6
  store <1 x i8> %0, ptr %saved-value, align 1, !tbaa !6
  %1 = load , ptr %saved-value, align 1, !tbaa !6
  ret  %1
}
; *** IR Dump Before SROAPass on to_vbool32_t ***
; Function Attrs: nounwind vscale_range(2,2)
define dso_local  @to_vbool32_t( noundef 
%type.coerce) local_unnamed_addr #0 {
entry:
  %saved-value = alloca <1 x i8>, align 1
  %saved-value.0. = load , ptr %saved-value, align 1, !tbaa !6
  ret  %saved-value.0.
}
```
There is a conversation above (see call_bool32_ff) where my observation is that 
clang generates invalid IR for these cases. Whilst not my intention it seems 
this PR has the side effect of not hidden such undefined behaviour.

https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-04-29 Thread Paul Walker via cfe-commits

https://github.com/paulwalker-arm updated 
https://github.com/llvm/llvm-project/pull/130973

>From 32a2805a41dc3ff02bff9df26f4665923445b488 Mon Sep 17 00:00:00 2001
From: Paul Walker 
Date: Thu, 20 Mar 2025 14:58:51 +
Subject: [PATCH 1/4] Add SROA tests for casts between fixed and scalable
 types.

---
 .../scalable-vectors-with-known-vscale.ll | 363 ++
 llvm/test/Transforms/SROA/scalable-vectors.ll | 223 ++-
 2 files changed, 585 insertions(+), 1 deletion(-)
 create mode 100644 
llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll

diff --git a/llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll 
b/llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll
new file mode 100644
index 0..03afc9d609488
--- /dev/null
+++ b/llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll
@@ -0,0 +1,363 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes='sroa' -S | FileCheck %s 
--check-prefixes=CHECK,CHECK-PRESERVE-CFG
+; RUN: opt < %s -passes='sroa' -S | FileCheck %s 
--check-prefixes=CHECK,CHECK-MODIFY-CFG
+
+target datalayout = 
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+; This test checks that SROA runs mem2reg on scalable vectors.
+
+define  @alloca_nxv16i1( %pg) 
vscale_range(1) {
+; CHECK-LABEL: @alloca_nxv16i1(
+; CHECK-NEXT:ret  [[PG:%.*]]
+;
+  %pg.addr = alloca 
+  store  %pg, ptr %pg.addr
+  %1 = load , ptr %pg.addr
+  ret  %1
+}
+
+define  @alloca_nxv16i8( %vec) 
vscale_range(1) {
+; CHECK-LABEL: @alloca_nxv16i8(
+; CHECK-NEXT:ret  [[VEC:%.*]]
+;
+  %vec.addr = alloca 
+  store  %vec, ptr %vec.addr
+  %1 = load , ptr %vec.addr
+  ret  %1
+}
+
+; Test scalable alloca that can't be promoted. Mem2Reg only considers
+; non-volatile loads and stores for promotion.
+define  @unpromotable_alloca( %vec) 
vscale_range(1) {
+; CHECK-LABEL: @unpromotable_alloca(
+; CHECK-NEXT:[[VEC_ADDR:%.*]] = alloca , align 16
+; CHECK-NEXT:store volatile  [[VEC:%.*]], ptr 
[[VEC_ADDR]], align 16
+; CHECK-NEXT:[[TMP1:%.*]] = load volatile , ptr 
[[VEC_ADDR]], align 16
+; CHECK-NEXT:ret  [[TMP1]]
+;
+  %vec.addr = alloca 
+  store volatile  %vec, ptr %vec.addr
+  %1 = load volatile , ptr %vec.addr
+  ret  %1
+}
+
+; Test we bail out when using an alloca of a fixed-length vector (VLS) that was
+; bitcasted to a scalable vector.
+define  @cast_alloca_to_svint32_t( 
%type.coerce) vscale_range(1) {
+; CHECK-LABEL: @cast_alloca_to_svint32_t(
+; CHECK-NEXT:[[TYPE:%.*]] = alloca <16 x i32>, align 64
+; CHECK-NEXT:[[TYPE_ADDR:%.*]] = alloca <16 x i32>, align 64
+; CHECK-NEXT:store  [[TYPE_COERCE:%.*]], ptr [[TYPE]], 
align 16
+; CHECK-NEXT:[[TYPE1:%.*]] = load <16 x i32>, ptr [[TYPE]], align 64
+; CHECK-NEXT:store <16 x i32> [[TYPE1]], ptr [[TYPE_ADDR]], align 64
+; CHECK-NEXT:[[TMP1:%.*]] = load <16 x i32>, ptr [[TYPE_ADDR]], align 64
+; CHECK-NEXT:[[TMP2:%.*]] = load , ptr [[TYPE_ADDR]], 
align 16
+; CHECK-NEXT:ret  [[TMP2]]
+;
+  %type = alloca <16 x i32>
+  %type.addr = alloca <16 x i32>
+  store  %type.coerce, ptr %type
+  %type1 = load <16 x i32>, ptr %type
+  store <16 x i32> %type1, ptr %type.addr
+  %1 = load <16 x i32>, ptr %type.addr
+  %2 = load , ptr %type.addr
+  ret  %2
+}
+
+; When casting from VLA to VLS via memory check we bail out when producing a
+; GEP where the element type is a scalable vector.
+define  @cast_alloca_from_svint32_t() vscale_range(1) {
+; CHECK-LABEL: @cast_alloca_from_svint32_t(
+; CHECK-NEXT:[[RETVAL_COERCE:%.*]] = alloca , align 16
+; CHECK-NEXT:store <16 x i32> zeroinitializer, ptr [[RETVAL_COERCE]], 
align 16
+; CHECK-NEXT:[[TMP1:%.*]] = load , ptr 
[[RETVAL_COERCE]], align 16
+; CHECK-NEXT:ret  [[TMP1]]
+;
+  %retval = alloca <16 x i32>
+  store <16 x i32> zeroinitializer, ptr %retval
+  %retval.coerce = alloca 
+  call void @llvm.memcpy.p0.p0.i64(ptr align 16 %retval.coerce, ptr align 16 
%retval, i64 64, i1 false)
+  %1 = load , ptr %retval.coerce
+  ret  %1
+}
+
+; Test we bail out when using an alloca of a fixed-length vector (VLS) that was
+; bitcasted to a scalable vector.
+define void @select_load_alloca_to_svdouble_t() vscale_range(1) {
+; CHECK-LABEL: @select_load_alloca_to_svdouble_t(
+; CHECK-NEXT:[[Z:%.*]] = alloca <16 x half>, align 32
+; CHECK-NEXT:[[CMP:%.*]] = icmp eq i32 0, 0
+; CHECK-NEXT:[[COND:%.*]] = select i1 [[CMP]], ptr [[Z]], ptr null
+; CHECK-NEXT:[[VAL:%.*]] = load , ptr [[COND]], align 
16
+; CHECK-NEXT:ret void
+;
+  %z = alloca <16 x half>
+  %cmp = icmp eq i32 0, 0
+  %cond = select i1 %cmp, ptr %z, ptr null
+  %val = load , ptr %cond, align 16
+  ret void
+}
+
+define void @select_store_alloca_to_svdouble_t( %val) 
vscale_range(1) {
+; CHECK-LABEL: @select_store_alloca_to_svdouble_t(
+; CHECK-NEXT:[[Z:%.*]] = alloca <16 x half>, align 32
+; CHECK-NEXT:[[CMP

[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-04-29 Thread Nikita Popov via cfe-commits


@@ -554,6 +554,22 @@ class VectorType : public Type {
 return VectorType::get(VTy->getElementType(), EltCnt * 2);
   }
 
+  /// This static method returns a VectorType with the same size-in-bits as
+  /// SizeTy but with an element type that matches the scalar type of EltTy.
+  static VectorType *getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy) {
+if (SizeTy->getScalarType() == EltTy->getScalarType())
+  return SizeTy;
+
+unsigned EltSize = EltTy->getScalarSizeInBits();
+if (!SizeTy->getPrimitiveSizeInBits().isKnownMultipleOf(EltSize))

nikic wrote:

This should really be using DataLayout, not getPrimitiveSizeInBits.

https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-04-29 Thread Nikita Popov via cfe-commits


@@ -554,6 +554,22 @@ class VectorType : public Type {
 return VectorType::get(VTy->getElementType(), EltCnt * 2);
   }
 
+  /// This static method returns a VectorType with the same size-in-bits as
+  /// SizeTy but with an element type that matches the scalar type of EltTy.

nikic wrote:

Should indicate that it can return null on failure.

https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-04-29 Thread Nikita Popov via cfe-commits


@@ -80,7 +80,9 @@ fixed_bool32_t from_vbool32_t(vbool32_t type) {
 //
 // CHECK-128-LABEL: @to_vbool32_t(
 // CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:ret  [[TYPE_COERCE:%.*]]
+// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1
+// CHECK-128-NEXT:
[[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]]
+// CHECK-128-NEXT:ret  
[[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]]

nikic wrote:

This used to return the argument, but now it's returning the result of an 
uninitialized load. What is the original (pre-SROA) IR here?

https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-04-29 Thread Nikita Popov via cfe-commits

https://github.com/nikic commented:

Apart from the RISCV test changes this looks reasonable to me.

https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-04-29 Thread Nikita Popov via cfe-commits

https://github.com/nikic edited https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-04-28 Thread Ricardo Jesus via cfe-commits


@@ -1168,6 +1168,15 @@ bool Function::nullPointerIsDefined() const {
   return hasFnAttribute(Attribute::NullPointerIsValid);
 }
 
+unsigned Function::getVScaleValue() const {
+  Attribute Attr = getFnAttribute(Attribute::VScaleRange);
+  if (!Attr.isValid())
+return 0;
+
+  unsigned VScale = Attr.getVScaleRangeMax().value_or(0);
+  return VScale == Attr.getVScaleRangeMin() ? VScale : 0;

rj-jesus wrote:

Nit: maybe you could do `getVScaleRangeMin()` first to avoid the `.value_or(0)` 
(since the former isn't `std::optional`)?

https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-04-28 Thread Ricardo Jesus via cfe-commits


@@ -554,6 +554,22 @@ class VectorType : public Type {
 return VectorType::get(VTy->getElementType(), EltCnt * 2);
   }
 
+  /// This static method returns a VectorType with the same size-in-bits as
+  /// SizeTy but with an element type that matches the scalar type of EltTy.
+  static VectorType *getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy) {

rj-jesus wrote:

What do you think of `getWithPreservedSize(Type *ElementType, const VectorType 
*Other)`? Similar to `VectorType::get`, but preserving the bit-size of `Other` 
instead of its element count.

https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-04-28 Thread Ricardo Jesus via cfe-commits

https://github.com/rj-jesus edited 
https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-04-28 Thread Ricardo Jesus via cfe-commits

https://github.com/rj-jesus commented:

This looks good as far as I can see, but I don't feel qualified to approve it.
FWIW it fixes a large regression on GROMACS with LLVM 20.

https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-04-22 Thread Paul Walker via cfe-commits

paulwalker-arm wrote:

ping

https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-04-10 Thread Paul Walker via cfe-commits


@@ -554,6 +554,22 @@ class VectorType : public Type {
 return VectorType::get(VTy->getElementType(), EltCnt * 2);
   }
 
+  /// This static method returns a VectorType with the same size-in-bits as
+  /// SizeTy but with an element type that matches the scalar type of EltTy.
+  static VectorType *getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy) {

paulwalker-arm wrote:

I really struggled to come up with a name that conveyed enough information 
whilst not causing all uses to overflow the 80 char line limit. This attempt 
being a shortened version of 
`get[VectorType]With[Total]Size[of_the_first_operand]AndScalar[Type_of_the 
second_operand]`. I'm not precious over this so suggestions are very welcome.

The downside of passing in a `TypeSize` is that I'd always have to create a new 
VectorType, whereas the current implementation has an early exit for the most 
common case of "casting" between fixed and scalable vectors of the same element 
type.

A lesser reason was it being awkward to convert between ElementCount and 
TypeSize and I wanted to avoid introducing another use of `getKnownMinValue()`.

Is this rational strong enough? or would you rather I go the `TypeSize` route?

https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-04-10 Thread Paul Walker via cfe-commits


@@ -1990,21 +2028,56 @@ static bool canConvertValue(const DataLayout &DL, Type 
*OldTy, Type *NewTy) {
 static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
Type *NewTy) {
   Type *OldTy = V->getType();
-  assert(canConvertValue(DL, OldTy, NewTy) && "Value not convertable to type");
+
+#ifndef NDEBUG
+  BasicBlock *BB = IRB.GetInsertBlock();
+  assert(BB && BB->getParent() && "VScale unknown!");

paulwalker-arm wrote:

Yes.  My reasoning was that saying "Function* not available" is obvious from 
the code and the only reason we need the Function* is to read VScale, so I went 
with this.

https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-04-10 Thread Sander de Smalen via cfe-commits


@@ -1990,21 +2028,56 @@ static bool canConvertValue(const DataLayout &DL, Type 
*OldTy, Type *NewTy) {
 static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
Type *NewTy) {
   Type *OldTy = V->getType();
-  assert(canConvertValue(DL, OldTy, NewTy) && "Value not convertable to type");
+
+#ifndef NDEBUG
+  BasicBlock *BB = IRB.GetInsertBlock();
+  assert(BB && BB->getParent() && "VScale unknown!");

sdesmalen-arm wrote:

Was it supposed to have "VScale unknown" as the message here?

https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-04-10 Thread Sander de Smalen via cfe-commits


@@ -554,6 +554,22 @@ class VectorType : public Type {
 return VectorType::get(VTy->getElementType(), EltCnt * 2);
   }
 
+  /// This static method returns a VectorType with the same size-in-bits as
+  /// SizeTy but with an element type that matches the scalar type of EltTy.
+  static VectorType *getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy) {

sdesmalen-arm wrote:

nit: This name does not describe what the function does. "Size" parameter 
`SizeTy` is not a size, but a vector type. And the "Scalar" parameter `EltTy` 
is not necessarily a scalar, but can be a vector.

To me it would be less confusing if this were:
```suggestion
  static VectorType *getWithSizeAndScalar(TypeSize *Size, Type *EltTy) {
```
where EltTy is a `scalar` type.

https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-04-05 Thread Paul Walker via cfe-commits

https://github.com/paulwalker-arm updated 
https://github.com/llvm/llvm-project/pull/130973

>From 32a2805a41dc3ff02bff9df26f4665923445b488 Mon Sep 17 00:00:00 2001
From: Paul Walker 
Date: Thu, 20 Mar 2025 14:58:51 +
Subject: [PATCH 1/2] Add SROA tests for casts between fixed and scalable
 types.

---
 .../scalable-vectors-with-known-vscale.ll | 363 ++
 llvm/test/Transforms/SROA/scalable-vectors.ll | 223 ++-
 2 files changed, 585 insertions(+), 1 deletion(-)
 create mode 100644 
llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll

diff --git a/llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll 
b/llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll
new file mode 100644
index 0..03afc9d609488
--- /dev/null
+++ b/llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll
@@ -0,0 +1,363 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes='sroa' -S | FileCheck %s 
--check-prefixes=CHECK,CHECK-PRESERVE-CFG
+; RUN: opt < %s -passes='sroa' -S | FileCheck %s 
--check-prefixes=CHECK,CHECK-MODIFY-CFG
+
+target datalayout = 
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+; This test checks that SROA runs mem2reg on scalable vectors.
+
+define  @alloca_nxv16i1( %pg) 
vscale_range(1) {
+; CHECK-LABEL: @alloca_nxv16i1(
+; CHECK-NEXT:ret  [[PG:%.*]]
+;
+  %pg.addr = alloca 
+  store  %pg, ptr %pg.addr
+  %1 = load , ptr %pg.addr
+  ret  %1
+}
+
+define  @alloca_nxv16i8( %vec) 
vscale_range(1) {
+; CHECK-LABEL: @alloca_nxv16i8(
+; CHECK-NEXT:ret  [[VEC:%.*]]
+;
+  %vec.addr = alloca 
+  store  %vec, ptr %vec.addr
+  %1 = load , ptr %vec.addr
+  ret  %1
+}
+
+; Test scalable alloca that can't be promoted. Mem2Reg only considers
+; non-volatile loads and stores for promotion.
+define  @unpromotable_alloca( %vec) 
vscale_range(1) {
+; CHECK-LABEL: @unpromotable_alloca(
+; CHECK-NEXT:[[VEC_ADDR:%.*]] = alloca , align 16
+; CHECK-NEXT:store volatile  [[VEC:%.*]], ptr 
[[VEC_ADDR]], align 16
+; CHECK-NEXT:[[TMP1:%.*]] = load volatile , ptr 
[[VEC_ADDR]], align 16
+; CHECK-NEXT:ret  [[TMP1]]
+;
+  %vec.addr = alloca 
+  store volatile  %vec, ptr %vec.addr
+  %1 = load volatile , ptr %vec.addr
+  ret  %1
+}
+
+; Test we bail out when using an alloca of a fixed-length vector (VLS) that was
+; bitcasted to a scalable vector.
+define  @cast_alloca_to_svint32_t( 
%type.coerce) vscale_range(1) {
+; CHECK-LABEL: @cast_alloca_to_svint32_t(
+; CHECK-NEXT:[[TYPE:%.*]] = alloca <16 x i32>, align 64
+; CHECK-NEXT:[[TYPE_ADDR:%.*]] = alloca <16 x i32>, align 64
+; CHECK-NEXT:store  [[TYPE_COERCE:%.*]], ptr [[TYPE]], 
align 16
+; CHECK-NEXT:[[TYPE1:%.*]] = load <16 x i32>, ptr [[TYPE]], align 64
+; CHECK-NEXT:store <16 x i32> [[TYPE1]], ptr [[TYPE_ADDR]], align 64
+; CHECK-NEXT:[[TMP1:%.*]] = load <16 x i32>, ptr [[TYPE_ADDR]], align 64
+; CHECK-NEXT:[[TMP2:%.*]] = load , ptr [[TYPE_ADDR]], 
align 16
+; CHECK-NEXT:ret  [[TMP2]]
+;
+  %type = alloca <16 x i32>
+  %type.addr = alloca <16 x i32>
+  store  %type.coerce, ptr %type
+  %type1 = load <16 x i32>, ptr %type
+  store <16 x i32> %type1, ptr %type.addr
+  %1 = load <16 x i32>, ptr %type.addr
+  %2 = load , ptr %type.addr
+  ret  %2
+}
+
+; When casting from VLA to VLS via memory check we bail out when producing a
+; GEP where the element type is a scalable vector.
+define  @cast_alloca_from_svint32_t() vscale_range(1) {
+; CHECK-LABEL: @cast_alloca_from_svint32_t(
+; CHECK-NEXT:[[RETVAL_COERCE:%.*]] = alloca , align 16
+; CHECK-NEXT:store <16 x i32> zeroinitializer, ptr [[RETVAL_COERCE]], 
align 16
+; CHECK-NEXT:[[TMP1:%.*]] = load , ptr 
[[RETVAL_COERCE]], align 16
+; CHECK-NEXT:ret  [[TMP1]]
+;
+  %retval = alloca <16 x i32>
+  store <16 x i32> zeroinitializer, ptr %retval
+  %retval.coerce = alloca 
+  call void @llvm.memcpy.p0.p0.i64(ptr align 16 %retval.coerce, ptr align 16 
%retval, i64 64, i1 false)
+  %1 = load , ptr %retval.coerce
+  ret  %1
+}
+
+; Test we bail out when using an alloca of a fixed-length vector (VLS) that was
+; bitcasted to a scalable vector.
+define void @select_load_alloca_to_svdouble_t() vscale_range(1) {
+; CHECK-LABEL: @select_load_alloca_to_svdouble_t(
+; CHECK-NEXT:[[Z:%.*]] = alloca <16 x half>, align 32
+; CHECK-NEXT:[[CMP:%.*]] = icmp eq i32 0, 0
+; CHECK-NEXT:[[COND:%.*]] = select i1 [[CMP]], ptr [[Z]], ptr null
+; CHECK-NEXT:[[VAL:%.*]] = load , ptr [[COND]], align 
16
+; CHECK-NEXT:ret void
+;
+  %z = alloca <16 x half>
+  %cmp = icmp eq i32 0, 0
+  %cond = select i1 %cmp, ptr %z, ptr null
+  %val = load , ptr %cond, align 16
+  ret void
+}
+
+define void @select_store_alloca_to_svdouble_t( %val) 
vscale_range(1) {
+; CHECK-LABEL: @select_store_alloca_to_svdouble_t(
+; CHECK-NEXT:[[Z:%.*]] = alloca <16 x half>, align 32
+; CHECK-NEXT:[[CMP

[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-04-05 Thread Paul Walker via cfe-commits


@@ -2034,6 +2071,18 @@ static Value *convertValue(const DataLayout &DL, 
IRBuilderTy &IRB, Value *V,
 }
   }
 
+  if (isa(NewTy) && isa(OldTy)) {
+auto *Ty = VectorType::getWithSizeAndScalar(cast(NewTy), 
OldTy);
+V = IRB.CreateInsertVector(Ty, PoisonValue::get(Ty), V, IRB.getInt64(0));
+return IRB.CreateBitCast(V, NewTy);
+  }
+
+  if (isa(NewTy) && isa(OldTy)) {
+auto *Ty = VectorType::getWithSizeAndScalar(cast(OldTy), 
NewTy);
+V = IRB.CreateBitCast(V, Ty);
+return IRB.CreateExtractVector(NewTy, V, IRB.getInt64(0));
+  }

paulwalker-arm wrote:

I've extended the coverage to input pointer casts but creating a wrapper 
function so that existing uses of CreateBitCast are replaced with a variant 
that supports bit casting between fixed and scalable vector types.

https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-03-20 Thread Paul Walker via cfe-commits


@@ -473,6 +473,14 @@ std::optional Attribute::getVScaleRangeMax() 
const {
   return unpackVScaleRangeArgs(pImpl->getValueAsInt()).second;
 }
 
+unsigned Attribute::getVScaleValue() const {
+  std::optional VScale = getVScaleRangeMax();
+  if (VScale && *VScale == getVScaleRangeMin())
+return *VScale;
+
+  return 0;
+}

paulwalker-arm wrote:

Done.

https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-03-20 Thread Paul Walker via cfe-commits

https://github.com/paulwalker-arm edited 
https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-03-13 Thread Paul Walker via cfe-commits


@@ -26,11 +26,15 @@ typedef vbool64_t fixed_bool64_t 
__attribute__((riscv_rvv_vector_bits(__riscv_v_
 //
 // CHECK-128-LABEL: @call_bool32_ff(
 // CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1

paulwalker-arm wrote:

I don't know, perhaps there is a front end problem for RISCV.  When 
investigating one of the affected test cases where vscale=2:
```
fixed_bool32_t call_bool32_ff(fixed_bool32_t op1, fixed_bool32_t op2) {
  return __riscv_vmand(op1, op2, __riscv_v_fixed_vlen / 32);
}
```
I see the snippet:
```
  %saved-value = alloca <1 x i8>, align 1
  store <1 x i8> %0, ptr %saved-value, align 1, !tbaa !6
  %1 = load , ptr %saved-value, align 1, !tbaa !6
```
However:
```
DL.getTypeStoreSize(<1 x i8>) => 1
DL.getTypeStoreSize() => vscale x 1
```
This means the store size of `` is 2 bytes, which makes the 
load undefined behaviour? Looking at the new output it's just not removing the 
undefined accesses.

I'm not familiar with the RVV instructions (does it have sub-byte memory 
accesses?) but for SVE the store size for predicates is always a multiple of 
bytes and thus we model the storage of fixed length predicates as i8 vectors 
and then "cast" them to scalable boolean vectors.  We also have a later combine 
to reconstitute a real scalable vector predicate load/store when possible.

Even for pure scalable vectors the storage type is always byte sized (i.e. 
) with us using reinterpret intrinsics to shrink/expand them. 
 I know SVE is not perfect here though as trying to alloca/load/store something 
smaller will likely lead to isel failures, but that cannot (or at least 
shouldn't) happen outside of hand written ll tests.

https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-03-12 Thread Craig Topper via cfe-commits

https://github.com/topperc edited 
https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-03-12 Thread Craig Topper via cfe-commits


@@ -26,11 +26,15 @@ typedef vbool64_t fixed_bool64_t 
__attribute__((riscv_rvv_vector_bits(__riscv_v_
 //
 // CHECK-128-LABEL: @call_bool32_ff(
 // CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1

topperc wrote:

vbool32_t should hold vlen/32 elements. If vlen is 128, this should be a 4 x i8 
type. So there's something wrong somewhere

https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-03-12 Thread Craig Topper via cfe-commits


@@ -26,11 +26,15 @@ typedef vbool64_t fixed_bool64_t 
__attribute__((riscv_rvv_vector_bits(__riscv_v_
 //
 // CHECK-128-LABEL: @call_bool32_ff(
 // CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1

topperc wrote:

Err nevermind. The fixed vector used i8 elements and the scalable vector uses 
i1. So <1 x i8> is correct. The 4 elements fit in a single byte.

https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-03-12 Thread Min-Yih Hsu via cfe-commits

mshockwave wrote:

> @pawosm-arm - I've removed the reviewers because it is not quite ready yet. I 
> need to investigate the potential regressions shown by the RISCV tests.

you can also turn this PR into a draft if you want

https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-03-12 Thread Paul Walker via cfe-commits

paulwalker-arm wrote:

@pawosm-arm - I've removed the reviewers because it is not quite ready yet.  I 
need to investigate the potential regressions shown by the RISCV tests.

https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-03-12 Thread via cfe-commits

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Paul Walker (paulwalker-arm)


Changes

For function whose vscale_range is limited to a single value we can size 
scalable vectors. This aids SROA by allowing scalable vector load and store 
operations to be considered for replacement whereby bitcasts through memory can 
be replaced by vector insert or extract operations.

---

Patch is 56.51 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/130973.diff


11 Files Affected:

- (modified) clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c 
(+25-13) 
- (modified) clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-cast.c 
(+6-2) 
- (modified) clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c (+9-7) 
- (modified) clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c (+10-13) 
- (modified) llvm/include/llvm/IR/Attributes.h (+4) 
- (modified) llvm/include/llvm/IR/DerivedTypes.h (+16) 
- (modified) llvm/lib/IR/AttributeImpl.h (+1) 
- (modified) llvm/lib/IR/Attributes.cpp (+8) 
- (modified) llvm/lib/Transforms/Scalar/SROA.cpp (+92-34) 
- (added) llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll 
(+248) 
- (modified) llvm/test/Transforms/SROA/scalable-vectors.ll (+142) 


``diff
diff --git a/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c 
b/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c
index e2f02dc64f766..66fd466eccfef 100644
--- a/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c
+++ b/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c
@@ -26,11 +26,15 @@ typedef vbool64_t fixed_bool64_t 
__attribute__((riscv_rvv_vector_bits(__riscv_v_
 //
 // CHECK-128-LABEL: @call_bool32_ff(
 // CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1
+// CHECK-128-NEXT:[[SAVED_VALUE3:%.*]] = alloca <1 x i8>, align 1
 // CHECK-128-NEXT:[[SAVED_VALUE4:%.*]] = alloca , align 1
 // CHECK-128-NEXT:[[RETVAL_COERCE:%.*]] = alloca , align 1
-// CHECK-128-NEXT:[[TMP0:%.*]] = tail call  
@llvm.riscv.vmand.nxv2i1.i64( [[OP1_COERCE:%.*]],  [[OP2_COERCE:%.*]], i64 4)
-// CHECK-128-NEXT:store  [[TMP0]], ptr [[SAVED_VALUE4]], 
align 1, !tbaa [[TBAA6:![0-9]+]]
-// CHECK-128-NEXT:[[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], 
align 1, !tbaa [[TBAA10:![0-9]+]]
+// CHECK-128-NEXT:
[[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-128-NEXT:
[[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_:%.*]] = load 
, ptr [[SAVED_VALUE3]], align 1, !tbaa [[TBAA6]]
+// CHECK-128-NEXT:[[TMP0:%.*]] = tail call  
@llvm.riscv.vmand.nxv2i1.i64( 
[[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]],  
[[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_]], i64 4)
+// CHECK-128-NEXT:store  [[TMP0]], ptr [[SAVED_VALUE4]], 
align 1, !tbaa [[TBAA9:![0-9]+]]
+// CHECK-128-NEXT:[[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], 
align 1, !tbaa [[TBAA6]]
 // CHECK-128-NEXT:store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
 // CHECK-128-NEXT:[[TMP2:%.*]] = load , ptr 
[[RETVAL_COERCE]], align 1
 // CHECK-128-NEXT:ret  [[TMP2]]
@@ -52,11 +56,15 @@ fixed_bool32_t call_bool32_ff(fixed_bool32_t op1, 
fixed_bool32_t op2) {
 //
 // CHECK-128-LABEL: @call_bool64_ff(
 // CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1
+// CHECK-128-NEXT:[[SAVED_VALUE3:%.*]] = alloca <1 x i8>, align 1
 // CHECK-128-NEXT:[[SAVED_VALUE4:%.*]] = alloca , align 1
 // CHECK-128-NEXT:[[RETVAL_COERCE:%.*]] = alloca , align 1
-// CHECK-128-NEXT:[[TMP0:%.*]] = tail call  
@llvm.riscv.vmand.nxv1i1.i64( [[OP1_COERCE:%.*]],  [[OP2_COERCE:%.*]], i64 2)
+// CHECK-128-NEXT:
[[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]]
+// CHECK-128-NEXT:
[[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_:%.*]] = load 
, ptr [[SAVED_VALUE3]], align 1, !tbaa [[TBAA6]]
+// CHECK-128-NEXT:[[TMP0:%.*]] = tail call  
@llvm.riscv.vmand.nxv1i1.i64( 
[[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]],  
[[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_]], i64 2)
 // CHECK-128-NEXT:store  [[TMP0]], ptr [[SAVED_VALUE4]], 
align 1, !tbaa [[TBAA11:![0-9]+]]
-// CHECK-128-NEXT:[[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], 
align 1, !tbaa [[TBAA10]]
+// CHECK-128-NEXT:[[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], 
align 1, !tbaa [[TBAA6]]
 // CHECK-128-NEXT:store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
 // CHECK-128-NEXT:[[TMP2:%.*]] = load , ptr 
[[RETVAL_COERCE]], align 1
 // CHECK-128-NEXT:ret  [[TMP2]]
@@ -82,11 +90,13 @@ fixed_bool64_t call_bool64_ff(fixed_bool64_t op1, 
fixed_bool64_t op2) {
 //
 // CHECK-128-LABEL: @call_bool32_fs(
 

[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-03-12 Thread Paul Walker via cfe-commits

https://github.com/paulwalker-arm created 
https://github.com/llvm/llvm-project/pull/130973

For function whose vscale_range is limited to a single value we can size 
scalable vectors. This aids SROA by allowing scalable vector load and store 
operations to be considered for replacement whereby bitcasts through memory can 
be replaced by vector insert or extract operations.

>From cb6a620c93b3809742fd067233844221e74dde4f Mon Sep 17 00:00:00 2001
From: Paul Walker 
Date: Fri, 7 Mar 2025 11:54:20 +
Subject: [PATCH] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and
 scalable vectors.

For function whose vscale_range is limited to a single value we can
size scalable vectors. This aids SROA by allowing scalable vector
load and store operations to be considered for replacement whereby
bitcasts through memory can be replaced by vector insert or extract
operations.
---
 .../attr-riscv-rvv-vector-bits-less-8-call.c  |  38 ++-
 .../attr-riscv-rvv-vector-bits-less-8-cast.c  |   8 +-
 .../CodeGen/RISCV/attr-rvv-vector-bits-cast.c |  16 +-
 .../CodeGen/attr-arm-sve-vector-bits-cast.c   |  23 +-
 llvm/include/llvm/IR/Attributes.h |   4 +
 llvm/include/llvm/IR/DerivedTypes.h   |  16 ++
 llvm/lib/IR/AttributeImpl.h   |   1 +
 llvm/lib/IR/Attributes.cpp|   8 +
 llvm/lib/Transforms/Scalar/SROA.cpp   | 126 ++---
 .../scalable-vectors-with-known-vscale.ll | 248 ++
 llvm/test/Transforms/SROA/scalable-vectors.ll | 142 ++
 11 files changed, 561 insertions(+), 69 deletions(-)
 create mode 100644 
llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll

diff --git a/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c 
b/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c
index e2f02dc64f766..66fd466eccfef 100644
--- a/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c
+++ b/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c
@@ -26,11 +26,15 @@ typedef vbool64_t fixed_bool64_t 
__attribute__((riscv_rvv_vector_bits(__riscv_v_
 //
 // CHECK-128-LABEL: @call_bool32_ff(
 // CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1
+// CHECK-128-NEXT:[[SAVED_VALUE3:%.*]] = alloca <1 x i8>, align 1
 // CHECK-128-NEXT:[[SAVED_VALUE4:%.*]] = alloca , align 1
 // CHECK-128-NEXT:[[RETVAL_COERCE:%.*]] = alloca , align 1
-// CHECK-128-NEXT:[[TMP0:%.*]] = tail call  
@llvm.riscv.vmand.nxv2i1.i64( [[OP1_COERCE:%.*]],  [[OP2_COERCE:%.*]], i64 4)
-// CHECK-128-NEXT:store  [[TMP0]], ptr [[SAVED_VALUE4]], 
align 1, !tbaa [[TBAA6:![0-9]+]]
-// CHECK-128-NEXT:[[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], 
align 1, !tbaa [[TBAA10:![0-9]+]]
+// CHECK-128-NEXT:
[[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-128-NEXT:
[[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_:%.*]] = load 
, ptr [[SAVED_VALUE3]], align 1, !tbaa [[TBAA6]]
+// CHECK-128-NEXT:[[TMP0:%.*]] = tail call  
@llvm.riscv.vmand.nxv2i1.i64( 
[[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]],  
[[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_]], i64 4)
+// CHECK-128-NEXT:store  [[TMP0]], ptr [[SAVED_VALUE4]], 
align 1, !tbaa [[TBAA9:![0-9]+]]
+// CHECK-128-NEXT:[[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], 
align 1, !tbaa [[TBAA6]]
 // CHECK-128-NEXT:store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
 // CHECK-128-NEXT:[[TMP2:%.*]] = load , ptr 
[[RETVAL_COERCE]], align 1
 // CHECK-128-NEXT:ret  [[TMP2]]
@@ -52,11 +56,15 @@ fixed_bool32_t call_bool32_ff(fixed_bool32_t op1, 
fixed_bool32_t op2) {
 //
 // CHECK-128-LABEL: @call_bool64_ff(
 // CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1
+// CHECK-128-NEXT:[[SAVED_VALUE3:%.*]] = alloca <1 x i8>, align 1
 // CHECK-128-NEXT:[[SAVED_VALUE4:%.*]] = alloca , align 1
 // CHECK-128-NEXT:[[RETVAL_COERCE:%.*]] = alloca , align 1
-// CHECK-128-NEXT:[[TMP0:%.*]] = tail call  
@llvm.riscv.vmand.nxv1i1.i64( [[OP1_COERCE:%.*]],  [[OP2_COERCE:%.*]], i64 2)
+// CHECK-128-NEXT:
[[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]]
+// CHECK-128-NEXT:
[[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_:%.*]] = load 
, ptr [[SAVED_VALUE3]], align 1, !tbaa [[TBAA6]]
+// CHECK-128-NEXT:[[TMP0:%.*]] = tail call  
@llvm.riscv.vmand.nxv1i1.i64( 
[[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]],  
[[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_]], i64 2)
 // CHECK-128-NEXT:store  [[TMP0]], ptr [[SAVED_VALUE4]], 
align 1, !tbaa [[TBAA11:![0-9]+]]
-// CHECK-128-NEXT:[[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], 
align 1, !tbaa [[TBAA10]]
+// CHECK-128-NEXT:[[TMP1:%.*]] = 

[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-03-12 Thread Paul Walker via cfe-commits

https://github.com/paulwalker-arm updated 
https://github.com/llvm/llvm-project/pull/130973

>From 487a823a9ec35df1a93109ef03630738bdc39ab1 Mon Sep 17 00:00:00 2001
From: Paul Walker 
Date: Fri, 7 Mar 2025 11:54:20 +
Subject: [PATCH] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and
 scalable vectors.

For function whose vscale_range is limited to a single value we can
size scalable vectors. This aids SROA by allowing scalable vector
load and store operations to be considered for replacement whereby
bitcasts through memory can be replaced by vector insert or extract
operations.
---
 .../attr-riscv-rvv-vector-bits-less-8-call.c  |  38 ++-
 .../attr-riscv-rvv-vector-bits-less-8-cast.c  |   8 +-
 .../CodeGen/RISCV/attr-rvv-vector-bits-cast.c |  16 +-
 .../CodeGen/attr-arm-sve-vector-bits-cast.c   |  23 +-
 llvm/include/llvm/IR/Attributes.h |   4 +
 llvm/include/llvm/IR/DerivedTypes.h   |  16 ++
 llvm/lib/IR/AttributeImpl.h   |   1 +
 llvm/lib/IR/Attributes.cpp|   8 +
 llvm/lib/Transforms/Scalar/SROA.cpp   | 130 ++---
 .../scalable-vectors-with-known-vscale.ll | 248 ++
 llvm/test/Transforms/SROA/scalable-vectors.ll | 142 ++
 11 files changed, 563 insertions(+), 71 deletions(-)
 create mode 100644 
llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll

diff --git a/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c 
b/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c
index e2f02dc64f766..66fd466eccfef 100644
--- a/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c
+++ b/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c
@@ -26,11 +26,15 @@ typedef vbool64_t fixed_bool64_t 
__attribute__((riscv_rvv_vector_bits(__riscv_v_
 //
 // CHECK-128-LABEL: @call_bool32_ff(
 // CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1
+// CHECK-128-NEXT:[[SAVED_VALUE3:%.*]] = alloca <1 x i8>, align 1
 // CHECK-128-NEXT:[[SAVED_VALUE4:%.*]] = alloca , align 1
 // CHECK-128-NEXT:[[RETVAL_COERCE:%.*]] = alloca , align 1
-// CHECK-128-NEXT:[[TMP0:%.*]] = tail call  
@llvm.riscv.vmand.nxv2i1.i64( [[OP1_COERCE:%.*]],  [[OP2_COERCE:%.*]], i64 4)
-// CHECK-128-NEXT:store  [[TMP0]], ptr [[SAVED_VALUE4]], 
align 1, !tbaa [[TBAA6:![0-9]+]]
-// CHECK-128-NEXT:[[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], 
align 1, !tbaa [[TBAA10:![0-9]+]]
+// CHECK-128-NEXT:
[[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-128-NEXT:
[[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_:%.*]] = load 
, ptr [[SAVED_VALUE3]], align 1, !tbaa [[TBAA6]]
+// CHECK-128-NEXT:[[TMP0:%.*]] = tail call  
@llvm.riscv.vmand.nxv2i1.i64( 
[[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]],  
[[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_]], i64 4)
+// CHECK-128-NEXT:store  [[TMP0]], ptr [[SAVED_VALUE4]], 
align 1, !tbaa [[TBAA9:![0-9]+]]
+// CHECK-128-NEXT:[[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], 
align 1, !tbaa [[TBAA6]]
 // CHECK-128-NEXT:store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
 // CHECK-128-NEXT:[[TMP2:%.*]] = load , ptr 
[[RETVAL_COERCE]], align 1
 // CHECK-128-NEXT:ret  [[TMP2]]
@@ -52,11 +56,15 @@ fixed_bool32_t call_bool32_ff(fixed_bool32_t op1, 
fixed_bool32_t op2) {
 //
 // CHECK-128-LABEL: @call_bool64_ff(
 // CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1
+// CHECK-128-NEXT:[[SAVED_VALUE3:%.*]] = alloca <1 x i8>, align 1
 // CHECK-128-NEXT:[[SAVED_VALUE4:%.*]] = alloca , align 1
 // CHECK-128-NEXT:[[RETVAL_COERCE:%.*]] = alloca , align 1
-// CHECK-128-NEXT:[[TMP0:%.*]] = tail call  
@llvm.riscv.vmand.nxv1i1.i64( [[OP1_COERCE:%.*]],  [[OP2_COERCE:%.*]], i64 2)
+// CHECK-128-NEXT:
[[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]]
+// CHECK-128-NEXT:
[[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_:%.*]] = load 
, ptr [[SAVED_VALUE3]], align 1, !tbaa [[TBAA6]]
+// CHECK-128-NEXT:[[TMP0:%.*]] = tail call  
@llvm.riscv.vmand.nxv1i1.i64( 
[[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]],  
[[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_]], i64 2)
 // CHECK-128-NEXT:store  [[TMP0]], ptr [[SAVED_VALUE4]], 
align 1, !tbaa [[TBAA11:![0-9]+]]
-// CHECK-128-NEXT:[[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], 
align 1, !tbaa [[TBAA10]]
+// CHECK-128-NEXT:[[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], 
align 1, !tbaa [[TBAA6]]
 // CHECK-128-NEXT:store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
 // CHECK-128-NEXT:[[TMP2:%.*]] = load , ptr 
[[RETVAL_COERCE]], align 1
 // CHECK-128-NEXT:ret  [[TMP2]]
@@ -82,11 +90,13 @@ fixed_boo

[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-03-12 Thread via cfe-commits

github-actions[bot] wrote:




:warning: undef deprecator found issues in your code. :warning:



You can test this locally with the following command:


``bash
git diff -U0 --pickaxe-regex -S 
'([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 
cd043e4fbe6125df4cb4993c625fa5e46194e478 
cb6a620c93b3809742fd067233844221e74dde4f 
llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll 
clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c 
clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-cast.c 
clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c 
clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c 
llvm/include/llvm/IR/Attributes.h llvm/include/llvm/IR/DerivedTypes.h 
llvm/lib/IR/AttributeImpl.h llvm/lib/IR/Attributes.cpp 
llvm/lib/Transforms/Scalar/SROA.cpp 
llvm/test/Transforms/SROA/scalable-vectors.ll
``




The following files introduce new uses of undef:
 - llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll

[Undef](https://llvm.org/docs/LangRef.html#undefined-values) is now deprecated 
and should only be used in the rare cases where no replacement is possible. For 
example, a load of uninitialized memory yields `undef`. You should use `poison` 
values for placeholders instead.

In tests, avoid using `undef` and having tests that trigger undefined behavior. 
If you need an operand with some unimportant value, you can add a new argument 
to the function and use that instead.

For example, this is considered a bad practice:
```llvm
define void @fn() {
  ...
  br i1 undef, ...
}
```

Please use the following instead:
```llvm
define void @fn(i1 %cond) {
  ...
  br i1 %cond, ...
}
```

Please refer to the [Undefined Behavior 
Manual](https://llvm.org/docs/UndefinedBehavior.html) for more information.



https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-03-12 Thread via cfe-commits

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:



You can test this locally with the following command:


``bash
git-clang-format --diff cd043e4fbe6125df4cb4993c625fa5e46194e478 
cb6a620c93b3809742fd067233844221e74dde4f --extensions cpp,c,h -- 
clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c 
clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-cast.c 
clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c 
clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c 
llvm/include/llvm/IR/Attributes.h llvm/include/llvm/IR/DerivedTypes.h 
llvm/lib/IR/AttributeImpl.h llvm/lib/IR/Attributes.cpp 
llvm/lib/Transforms/Scalar/SROA.cpp
``





View the diff from clang-format here.


``diff
diff --git a/llvm/include/llvm/IR/DerivedTypes.h 
b/llvm/include/llvm/IR/DerivedTypes.h
index 6c8f416708..f8b79eea19 100644
--- a/llvm/include/llvm/IR/DerivedTypes.h
+++ b/llvm/include/llvm/IR/DerivedTypes.h
@@ -556,7 +556,7 @@ public:
 
   /// This static method returns a VectorType with the same size-in-bits as
   /// SizeTy but with an element type that matches the scalar type of EltTy.
-  static VectorType* getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy) {
+  static VectorType *getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy) {
 if (SizeTy->getScalarType() == EltTy->getScalarType())
   return SizeTy;
 
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp 
b/llvm/lib/Transforms/Scalar/SROA.cpp
index 81a2b38410..2e758caa65 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2198,8 +2198,7 @@ checkVectorTypesForPromotion(Partition &P, const 
DataLayout &DL,
  SmallVectorImpl &CandidateTys,
  bool HaveCommonEltTy, Type *CommonEltTy,
  bool HaveVecPtrTy, bool HaveCommonVecPtrTy,
- VectorType *CommonVecPtrTy,
- unsigned VScale) {
+ VectorType *CommonVecPtrTy, unsigned VScale) {
   // If we didn't find a vector type, nothing to do here.
   if (CandidateTys.empty())
 return nullptr;
@@ -2311,9 +2310,9 @@ static VectorType *createAndCheckVectorTypesForPromotion(
 }
   }
 
-  return checkVectorTypesForPromotion(P, DL, CandidateTys, HaveCommonEltTy,
-  CommonEltTy, HaveVecPtrTy,
-  HaveCommonVecPtrTy, CommonVecPtrTy, 
VScale);
+  return checkVectorTypesForPromotion(
+  P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
+  HaveCommonVecPtrTy, CommonVecPtrTy, VScale);
 }
 
 /// Test whether the given alloca partitioning and range of slices can be
@@ -2325,7 +2324,8 @@ static VectorType *createAndCheckVectorTypesForPromotion(
 /// SSA value. We only can ensure this for a limited set of operations, and we
 /// don't want to do the rewrites unless we are confident that the result will
 /// be promotable, so we have an early test here.
-static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL, 
unsigned VScale) {
+static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL,
+   unsigned VScale) {
   // Collect the candidate types for vector-based promotion. Also track whether
   // we have different element types.
   SmallVector CandidateTys;

``




https://github.com/llvm/llvm-project/pull/130973
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (PR #130973)

2025-03-12 Thread via cfe-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: Paul Walker (paulwalker-arm)


Changes

For function whose vscale_range is limited to a single value we can size 
scalable vectors. This aids SROA by allowing scalable vector load and store 
operations to be considered for replacement whereby bitcasts through memory can 
be replaced by vector insert or extract operations.

---

Patch is 56.51 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/130973.diff


11 Files Affected:

- (modified) clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c 
(+25-13) 
- (modified) clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-cast.c 
(+6-2) 
- (modified) clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c (+9-7) 
- (modified) clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c (+10-13) 
- (modified) llvm/include/llvm/IR/Attributes.h (+4) 
- (modified) llvm/include/llvm/IR/DerivedTypes.h (+16) 
- (modified) llvm/lib/IR/AttributeImpl.h (+1) 
- (modified) llvm/lib/IR/Attributes.cpp (+8) 
- (modified) llvm/lib/Transforms/Scalar/SROA.cpp (+92-34) 
- (added) llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll 
(+248) 
- (modified) llvm/test/Transforms/SROA/scalable-vectors.ll (+142) 


``diff
diff --git a/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c 
b/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c
index e2f02dc64f766..66fd466eccfef 100644
--- a/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c
+++ b/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c
@@ -26,11 +26,15 @@ typedef vbool64_t fixed_bool64_t 
__attribute__((riscv_rvv_vector_bits(__riscv_v_
 //
 // CHECK-128-LABEL: @call_bool32_ff(
 // CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1
+// CHECK-128-NEXT:[[SAVED_VALUE3:%.*]] = alloca <1 x i8>, align 1
 // CHECK-128-NEXT:[[SAVED_VALUE4:%.*]] = alloca , align 1
 // CHECK-128-NEXT:[[RETVAL_COERCE:%.*]] = alloca , align 1
-// CHECK-128-NEXT:[[TMP0:%.*]] = tail call  
@llvm.riscv.vmand.nxv2i1.i64( [[OP1_COERCE:%.*]],  [[OP2_COERCE:%.*]], i64 4)
-// CHECK-128-NEXT:store  [[TMP0]], ptr [[SAVED_VALUE4]], 
align 1, !tbaa [[TBAA6:![0-9]+]]
-// CHECK-128-NEXT:[[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], 
align 1, !tbaa [[TBAA10:![0-9]+]]
+// CHECK-128-NEXT:
[[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-128-NEXT:
[[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_:%.*]] = load 
, ptr [[SAVED_VALUE3]], align 1, !tbaa [[TBAA6]]
+// CHECK-128-NEXT:[[TMP0:%.*]] = tail call  
@llvm.riscv.vmand.nxv2i1.i64( 
[[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]],  
[[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_]], i64 4)
+// CHECK-128-NEXT:store  [[TMP0]], ptr [[SAVED_VALUE4]], 
align 1, !tbaa [[TBAA9:![0-9]+]]
+// CHECK-128-NEXT:[[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], 
align 1, !tbaa [[TBAA6]]
 // CHECK-128-NEXT:store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
 // CHECK-128-NEXT:[[TMP2:%.*]] = load , ptr 
[[RETVAL_COERCE]], align 1
 // CHECK-128-NEXT:ret  [[TMP2]]
@@ -52,11 +56,15 @@ fixed_bool32_t call_bool32_ff(fixed_bool32_t op1, 
fixed_bool32_t op2) {
 //
 // CHECK-128-LABEL: @call_bool64_ff(
 // CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1
+// CHECK-128-NEXT:[[SAVED_VALUE3:%.*]] = alloca <1 x i8>, align 1
 // CHECK-128-NEXT:[[SAVED_VALUE4:%.*]] = alloca , align 1
 // CHECK-128-NEXT:[[RETVAL_COERCE:%.*]] = alloca , align 1
-// CHECK-128-NEXT:[[TMP0:%.*]] = tail call  
@llvm.riscv.vmand.nxv1i1.i64( [[OP1_COERCE:%.*]],  [[OP2_COERCE:%.*]], i64 2)
+// CHECK-128-NEXT:
[[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]]
+// CHECK-128-NEXT:
[[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_:%.*]] = load 
, ptr [[SAVED_VALUE3]], align 1, !tbaa [[TBAA6]]
+// CHECK-128-NEXT:[[TMP0:%.*]] = tail call  
@llvm.riscv.vmand.nxv1i1.i64( 
[[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]],  
[[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_]], i64 2)
 // CHECK-128-NEXT:store  [[TMP0]], ptr [[SAVED_VALUE4]], 
align 1, !tbaa [[TBAA11:![0-9]+]]
-// CHECK-128-NEXT:[[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], 
align 1, !tbaa [[TBAA10]]
+// CHECK-128-NEXT:[[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], 
align 1, !tbaa [[TBAA6]]
 // CHECK-128-NEXT:store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
 // CHECK-128-NEXT:[[TMP2:%.*]] = load , ptr 
[[RETVAL_COERCE]], align 1
 // CHECK-128-NEXT:ret  [[TMP2]]
@@ -82,11 +90,13 @@ fixed_bool64_t call_bool64_ff(fixed_bool64_t op1, 
fixed_bool64_t op2) {
 //
 // CHECK-128-LABEL: @call_bo