[PATCH] D76078: [AArch64][SVE] Add a pass for SVE intrinsic optimisations

2020-04-14 Thread Kerry McLaughlin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG36c76de6789c: [AArch64][SVE] Add a pass for SVE intrinsic 
optimisations (authored by kmclaughlin).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D76078/new/

https://reviews.llvm.org/D76078

Files:
  llvm/lib/Target/AArch64/AArch64.h
  llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
  llvm/lib/Target/AArch64/CMakeLists.txt
  llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
  llvm/test/CodeGen/AArch64/O3-pipeline.ll
  llvm/test/CodeGen/AArch64/sve-intrinsic-opts-ptest.ll
  llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll
@@ -0,0 +1,203 @@
+; RUN: opt -S -sve-intrinsic-opts -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck --check-prefix OPT %s
+
+define  @reinterpret_test_h( %a) {
+; OPT-LABEL: @reinterpret_test_h(
+; OPT-NOT: convert
+; OPT: ret  %a
+  %1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %a)
+  %2 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %1)
+  ret  %2
+}
+
+; Reinterprets are not redundant because the second reinterpret zeros the
+; lanes that don't exist within its input.
+define  @reinterpret_test_h_rev( %a) {
+; OPT-LABEL: @reinterpret_test_h_rev(
+; OPT: %1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %a)
+; OPT-NEXT: %2 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1)
+; OPT-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %a)
+  %2 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1)
+  ret  %2
+}
+
+define  @reinterpret_test_w( %a) {
+; OPT-LABEL: @reinterpret_test_w(
+; OPT-NOT: convert
+; OPT: ret  %a
+  %1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %a)
+  %2 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %1)
+  ret  %2
+}
+
+; Reinterprets are not redundant because the second reinterpret zeros the
+; lanes that don't exist within its input.
+define  @reinterpret_test_w_rev( %a) {
+; OPT-LABEL: @reinterpret_test_w_rev(
+; OPT: %1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %a)
+; OPT-NEXT: %2 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1)
+; OPT-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %a)
+  %2 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1)
+  ret  %2
+}
+
+define  @reinterpret_test_d( %a) {
+; OPT-LABEL: @reinterpret_test_d(
+; OPT-NOT: convert
+; OPT: ret  %a
+  %1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a)
+  %2 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %1)
+  ret  %2
+}
+
+; Reinterprets are not redundant because the second reinterpret zeros the
+; lanes that don't exist within its input.
+define  @reinterpret_test_d_rev( %a) {
+; OPT-LABEL: @reinterpret_test_d_rev(
+; OPT: %1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %a)
+; OPT-NEXT: %2 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1)
+; OPT-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %a)
+  %2 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1)
+  ret  %2
+}
+
+define  @reinterpret_reductions(i32 %cond,  %a,  %b,  %c) {
+; OPT-LABEL: reinterpret_reductions
+; OPT-NOT: convert
+; OPT-NOT: phi 
+; OPT: phi  [ %a, %br_phi_a ], [ %b, %br_phi_b ], [ %c, %br_phi_c ]
+; OPT-NOT: convert
+; OPT: ret
+
+entry:
+  switch i32 %cond, label %br_phi_c [
+ i32 43, label %br_phi_a
+ i32 45, label %br_phi_b
+  ]
+
+br_phi_a:
+  %a1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a)
+  br label %join
+
+br_phi_b:
+  %b1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %b)
+  br label %join
+
+br_phi_c:
+  %c1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %c)
+  br label %join
+
+join:
+  %pg = phi  [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
+  %pg1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg)
+  ret  %pg1
+}
+
+; No transform as the reinterprets are converting from different types (nxv2i1 & nxv4i1)
+; As the incoming values to the phi must all be the same type, we cannot remove the reinterprets.
+define  @reinterpret_reductions_1(i32 %cond,  %a,  %b,  %c) {
+; OPT-LABEL: reinterpret_reductions_1
+; OPT: convert
+; OPT: phi  [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
+; OPT-NOT: phi 
+; OPT: tail call  @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg)
+; OPT: ret
+
+entry:
+  switch i32 %cond, label %br_phi_c [
+ i32 43, label %br_phi_a
+ i32 45, label %br_phi_b
+  ]
+
+br_phi_a:
+  %a1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a)
+  br label %join
+
+br_phi_b:
+  %b1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %b)
+  br label %join
+
+br_phi

[PATCH] D76078: [AArch64][SVE] Add a pass for SVE intrinsic optimisations

2020-04-09 Thread Eli Friedman via Phabricator via cfe-commits
efriedma accepted this revision.
efriedma added a comment.
This revision is now accepted and ready to land.

LGTM


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D76078/new/

https://reviews.llvm.org/D76078



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D76078: [AArch64][SVE] Add a pass for SVE intrinsic optimisations

2020-04-09 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin added a comment.

Ping :)


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D76078/new/

https://reviews.llvm.org/D76078



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D76078: [AArch64][SVE] Add a pass for SVE intrinsic optimisations

2020-03-25 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 252575.
kmclaughlin marked an inline comment as done.
kmclaughlin added a comment.

Use SmallSetVector for the list of functions gathered by runOnModule to 
preserve the order of iteration


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D76078/new/

https://reviews.llvm.org/D76078

Files:
  llvm/lib/Target/AArch64/AArch64.h
  llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
  llvm/lib/Target/AArch64/CMakeLists.txt
  llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
  llvm/test/CodeGen/AArch64/O3-pipeline.ll
  llvm/test/CodeGen/AArch64/sve-intrinsic-opts-ptest.ll
  llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll
@@ -0,0 +1,203 @@
+; RUN: opt -S -sve-intrinsic-opts -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck --check-prefix OPT %s
+
+define  @reinterpret_test_h( %a) {
+; OPT-LABEL: @reinterpret_test_h(
+; OPT-NOT: convert
+; OPT: ret  %a
+  %1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %a)
+  %2 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %1)
+  ret  %2
+}
+
+; Reinterprets are not redundant because the second reinterpret zeros the
+; lanes that don't exist within its input.
+define  @reinterpret_test_h_rev( %a) {
+; OPT-LABEL: @reinterpret_test_h_rev(
+; OPT: %1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %a)
+; OPT-NEXT: %2 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1)
+; OPT-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %a)
+  %2 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1)
+  ret  %2
+}
+
+define  @reinterpret_test_w( %a) {
+; OPT-LABEL: @reinterpret_test_w(
+; OPT-NOT: convert
+; OPT: ret  %a
+  %1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %a)
+  %2 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %1)
+  ret  %2
+}
+
+; Reinterprets are not redundant because the second reinterpret zeros the
+; lanes that don't exist within its input.
+define  @reinterpret_test_w_rev( %a) {
+; OPT-LABEL: @reinterpret_test_w_rev(
+; OPT: %1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %a)
+; OPT-NEXT: %2 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1)
+; OPT-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %a)
+  %2 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1)
+  ret  %2
+}
+
+define  @reinterpret_test_d( %a) {
+; OPT-LABEL: @reinterpret_test_d(
+; OPT-NOT: convert
+; OPT: ret  %a
+  %1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a)
+  %2 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %1)
+  ret  %2
+}
+
+; Reinterprets are not redundant because the second reinterpret zeros the
+; lanes that don't exist within its input.
+define  @reinterpret_test_d_rev( %a) {
+; OPT-LABEL: @reinterpret_test_d_rev(
+; OPT: %1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %a)
+; OPT-NEXT: %2 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1)
+; OPT-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %a)
+  %2 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1)
+  ret  %2
+}
+
+define  @reinterpret_reductions(i32 %cond,  %a,  %b,  %c) {
+; OPT-LABEL: reinterpret_reductions
+; OPT-NOT: convert
+; OPT-NOT: phi 
+; OPT: phi  [ %a, %br_phi_a ], [ %b, %br_phi_b ], [ %c, %br_phi_c ]
+; OPT-NOT: convert
+; OPT: ret
+
+entry:
+  switch i32 %cond, label %br_phi_c [
+ i32 43, label %br_phi_a
+ i32 45, label %br_phi_b
+  ]
+
+br_phi_a:
+  %a1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a)
+  br label %join
+
+br_phi_b:
+  %b1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %b)
+  br label %join
+
+br_phi_c:
+  %c1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %c)
+  br label %join
+
+join:
+  %pg = phi  [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
+  %pg1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg)
+  ret  %pg1
+}
+
+; No transform as the reinterprets are converting from different types (nxv2i1 & nxv4i1)
+; As the incoming values to the phi must all be the same type, we cannot remove the reinterprets.
+define  @reinterpret_reductions_1(i32 %cond,  %a,  %b,  %c) {
+; OPT-LABEL: reinterpret_reductions_1
+; OPT: convert
+; OPT: phi  [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
+; OPT-NOT: phi 
+; OPT: tail call  @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg)
+; OPT: ret
+
+entry:
+  switch i32 %cond, label %br_phi_c [
+ i32 43, label %br_phi_a
+ i32 45, label %br_phi_b
+  ]
+
+br_phi_a:
+  %a1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a)
+  br label %join
+
+br_phi_b:
+  %b1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %b)
+  br label %join
+
+br_phi

[PATCH] D76078: [AArch64][SVE] Add a pass for SVE intrinsic optimisations

2020-03-24 Thread Eli Friedman via Phabricator via cfe-commits
efriedma added inline comments.



Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:233
+  bool Changed = false;
+  for (auto *F : Functions) {
+DominatorTree *DT = 
&getAnalysis(*F).getDomTree();

Iterating over a SmallPtrSet is non-deterministic.

In this context, probably SetVector is the right data structure.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D76078/new/

https://reviews.llvm.org/D76078



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D76078: [AArch64][SVE] Add a pass for SVE intrinsic optimisations

2020-03-24 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 252368.
kmclaughlin marked 3 inline comments as done.
kmclaughlin added a comment.

Use SmallPtrSet instead of SmallVector for storing functions found by 
runOnModule
Add more comments to clarify the purpose of the pass and some of the negative 
reinterpret tests


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D76078/new/

https://reviews.llvm.org/D76078

Files:
  llvm/lib/Target/AArch64/AArch64.h
  llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
  llvm/lib/Target/AArch64/CMakeLists.txt
  llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
  llvm/test/CodeGen/AArch64/O3-pipeline.ll
  llvm/test/CodeGen/AArch64/sve-intrinsic-opts-ptest.ll
  llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll
@@ -0,0 +1,203 @@
+; RUN: opt -S -sve-intrinsic-opts -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck --check-prefix OPT %s
+
+define  @reinterpret_test_h( %a) {
+; OPT-LABEL: @reinterpret_test_h(
+; OPT-NOT: convert
+; OPT: ret  %a
+  %1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %a)
+  %2 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %1)
+  ret  %2
+}
+
+; Reinterprets are not redundant because the second reinterpret zeros the
+; lanes that don't exist within its input.
+define  @reinterpret_test_h_rev( %a) {
+; OPT-LABEL: @reinterpret_test_h_rev(
+; OPT: %1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %a)
+; OPT-NEXT: %2 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1)
+; OPT-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %a)
+  %2 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1)
+  ret  %2
+}
+
+define  @reinterpret_test_w( %a) {
+; OPT-LABEL: @reinterpret_test_w(
+; OPT-NOT: convert
+; OPT: ret  %a
+  %1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %a)
+  %2 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %1)
+  ret  %2
+}
+
+; Reinterprets are not redundant because the second reinterpret zeros the
+; lanes that don't exist within its input.
+define  @reinterpret_test_w_rev( %a) {
+; OPT-LABEL: @reinterpret_test_w_rev(
+; OPT: %1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %a)
+; OPT-NEXT: %2 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1)
+; OPT-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %a)
+  %2 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1)
+  ret  %2
+}
+
+define  @reinterpret_test_d( %a) {
+; OPT-LABEL: @reinterpret_test_d(
+; OPT-NOT: convert
+; OPT: ret  %a
+  %1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a)
+  %2 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %1)
+  ret  %2
+}
+
+; Reinterprets are not redundant because the second reinterpret zeros the
+; lanes that don't exist within its input.
+define  @reinterpret_test_d_rev( %a) {
+; OPT-LABEL: @reinterpret_test_d_rev(
+; OPT: %1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %a)
+; OPT-NEXT: %2 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1)
+; OPT-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %a)
+  %2 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1)
+  ret  %2
+}
+
+define  @reinterpret_reductions(i32 %cond,  %a,  %b,  %c) {
+; OPT-LABEL: reinterpret_reductions
+; OPT-NOT: convert
+; OPT-NOT: phi 
+; OPT: phi  [ %a, %br_phi_a ], [ %b, %br_phi_b ], [ %c, %br_phi_c ]
+; OPT-NOT: convert
+; OPT: ret
+
+entry:
+  switch i32 %cond, label %br_phi_c [
+ i32 43, label %br_phi_a
+ i32 45, label %br_phi_b
+  ]
+
+br_phi_a:
+  %a1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a)
+  br label %join
+
+br_phi_b:
+  %b1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %b)
+  br label %join
+
+br_phi_c:
+  %c1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %c)
+  br label %join
+
+join:
+  %pg = phi  [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
+  %pg1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg)
+  ret  %pg1
+}
+
+; No transform as the reinterprets are converting from different types (nxv2i1 & nxv4i1)
+; As the incoming values to the phi must all be the same type, we cannot remove the reinterprets.
+define  @reinterpret_reductions_1(i32 %cond,  %a,  %b,  %c) {
+; OPT-LABEL: reinterpret_reductions_1
+; OPT: convert
+; OPT: phi  [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
+; OPT-NOT: phi 
+; OPT: tail call  @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg)
+; OPT: ret
+
+entry:
+  switch i32 %cond, label %br_phi_c [
+ i32 43, label %br_phi_a
+ i32 45, label %br_phi_b
+  ]
+
+br_phi_a:
+  %a1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a)
+  br label %join
+
+br_phi_b:
+  %b1 = tail call  

[PATCH] D76078: [AArch64][SVE] Add a pass for SVE intrinsic optimisations

2020-03-23 Thread Andrzej Warzynski via Phabricator via cfe-commits
andwar added a comment.

Thanks for the updates @kmclaughlin ! Would you mind adding a comment to 
clearly mark the negative tests? E.g. for `@reinterpret_reductions_1`? Maybe 
also a comment `why` a particular case is not optimised? You've already done 
that for some tests, but not all of them.




Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:10
+//
+// Performs general IR level optimizations on SVE intrinsics.
+//

I don't see any documentation for the pass that's being added here (apart from 
the commit msg). Perhaps it's worth expanding this comment?



Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:221
+bool SVEIntrinsicOpts::optimizeFunctions(
+SmallVectorImpl &Functions) {
+  bool Changed = false;

I think that you could simplify things a bit by using `SmallPtrSet` instead: 
http://llvm.org/docs/ProgrammersManual.html#dss-smallptrset.

With a set you can avoid explicit checks like this:
``` 
std::find(Functions.begin(), Functions.end(), Inst->getFunction()) == 
Functions.end()
```
With `SmallPtrSet` you can write less code :)



Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:241
+
+  for (auto &F : M.getFunctionList()) {
+if (!F.isDeclaration())

Could you decorate this `for` loop with a comment explaining `what` kind of 
data is generated here and `why`? Ta!


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D76078/new/

https://reviews.llvm.org/D76078



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D76078: [AArch64][SVE] Add a pass for SVE intrinsic optimisations

2020-03-20 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin added a comment.

Thanks for reviewing this, @efriedma & @andwar!




Comment at: llvm/lib/Target/AArch64/AArch64TargetMachine.cpp:441
+  // Expand any SVE vector library calls that we can't code generate directly.
+  bool ExpandToOptimize = (TM->getOptLevel() != CodeGenOpt::None);
+  if (EnableSVEIntrinsicOpts && TM->getOptLevel() == CodeGenOpt::Aggressive)

efriedma wrote:
> unused bool?
Removed



Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:120
+if (!Reinterpret ||
+RequiredType != Reinterpret->getArgOperand(0)->getType())
+  return false;

andwar wrote:
> Isn't it guaranteed that `RequiredType == 
> Reinterpret->getArgOperand(0)->getType()` is always true? I.e., `PN` and the 
> incoming values have identical type.
The incoming values to `PN` will all have the same type, but this is making 
sure that the reinterprets are all converting from the same type (there is a 
test for this in sve-intrinsic-opts-reinterpret.ll called 
`reinterpret_reductions_1`, where the arguments to convert.to.svbool are a mix 
of nxv2i1 and nxv4i1)



Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:224
+  bool Changed = false;
+  for (auto II = BB->begin(), IE = BB->end(); II != IE;) {
+Instruction *I = &(*II);

andwar wrote:
> 1. Could this be a for-range loop instead?
> 
> 2. This loop seems to be a perfect candidate for `make_early_inc_range` 
> (https://github.com/llvm/llvm-project/blob/172f1460ae05ab5c33c757142c8bdb10acfbdbe1/llvm/include/llvm/ADT/STLExtras.h#L499),
>  e.g.
> ```
>  for (Instruction &I : make_early_inc_range(BB))
>   Changed |= optimizeIntrinsic(&I);
> ```
Changed this to use `make_early_inc_range` as suggested



Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:234
+  DT = &getAnalysis().getDomTree();
+  bool Changed = false;
+

efriedma wrote:
> You might want to check whether the module actually declares any of the SVE 
> intrinsics before you iterate over the whole function.
Thanks for the suggestion - I changed this to a module pass so that we can 
check if any of the SVE intrinsics we are interested in are declared first.



Comment at: llvm/test/CodeGen/AArch64/sve-intrinsic-opts-ptest.ll:18
+; OPT-LABEL: ptest_any2
+; OPT: %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( 
%1,  %2)
+  %mask = tail call  @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)

andwar wrote:
> What's `%1` and `%2`? Is it worth adding the calls that generated them in the 
> expected output?
I think that would make sense. I've added `%1` and `%2` to the expected output 
and added more checks to the other tests here.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D76078/new/

https://reviews.llvm.org/D76078



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D76078: [AArch64][SVE] Add a pass for SVE intrinsic optimisations

2020-03-20 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin updated this revision to Diff 251643.
kmclaughlin marked 13 inline comments as done.
kmclaughlin added a comment.

- Changed this from a function pass to a module pass & now check if any of the 
relevant SVE intrinsics are declared first before iterating over functions
- Added more checks on expected output in the tests


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D76078/new/

https://reviews.llvm.org/D76078

Files:
  llvm/lib/Target/AArch64/AArch64.h
  llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
  llvm/lib/Target/AArch64/CMakeLists.txt
  llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
  llvm/test/CodeGen/AArch64/O3-pipeline.ll
  llvm/test/CodeGen/AArch64/sve-intrinsic-opts-ptest.ll
  llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll
@@ -0,0 +1,199 @@
+; RUN: opt -S -sve-intrinsic-opts -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck --check-prefix OPT %s
+
+define  @reinterpret_test_h( %a) {
+; OPT-LABEL: @reinterpret_test_h(
+; OPT-NOT: convert
+; OPT: ret  %a
+  %1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %a)
+  %2 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %1)
+  ret  %2
+}
+
+; Reinterprets are not redundant because the second reinterpret zeros the
+; lanes that don't exist within its input.
+define  @reinterpret_test_h_rev( %a) {
+; OPT-LABEL: @reinterpret_test_h_rev(
+; OPT: %1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %a)
+; OPT-NEXT: %2 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1)
+; OPT-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %a)
+  %2 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1)
+  ret  %2
+}
+
+define  @reinterpret_test_w( %a) {
+; OPT-LABEL: @reinterpret_test_w(
+; OPT-NOT: convert
+; OPT: ret  %a
+  %1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %a)
+  %2 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %1)
+  ret  %2
+}
+
+; Reinterprets are not redundant because the second reinterpret zeros the
+; lanes that don't exist within its input.
+define  @reinterpret_test_w_rev( %a) {
+; OPT-LABEL: @reinterpret_test_w_rev(
+; OPT: %1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %a)
+; OPT-NEXT: %2 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1)
+; OPT-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %a)
+  %2 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1)
+  ret  %2
+}
+
+define  @reinterpret_test_d( %a) {
+; OPT-LABEL: @reinterpret_test_d(
+; OPT-NOT: convert
+; OPT: ret  %a
+  %1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a)
+  %2 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %1)
+  ret  %2
+}
+
+; Reinterprets are not redundant because the second reinterpret zeros the
+; lanes that don't exist within its input.
+define  @reinterpret_test_d_rev( %a) {
+; OPT-LABEL: @reinterpret_test_d_rev(
+; OPT: %1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %a)
+; OPT-NEXT: %2 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1)
+; OPT-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %a)
+  %2 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1)
+  ret  %2
+}
+
+define  @reinterpret_reductions(i32 %cond,  %a,  %b,  %c) {
+; OPT-LABEL: reinterpret_reductions
+; OPT-NOT: convert
+; OPT-NOT: phi 
+; OPT: phi  [ %a, %br_phi_a ], [ %b, %br_phi_b ], [ %c, %br_phi_c ]
+; OPT-NOT: convert
+; OPT: ret
+
+entry:
+  switch i32 %cond, label %br_phi_c [
+ i32 43, label %br_phi_a
+ i32 45, label %br_phi_b
+  ]
+
+br_phi_a:
+  %a1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a)
+  br label %join
+
+br_phi_b:
+  %b1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %b)
+  br label %join
+
+br_phi_c:
+  %c1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %c)
+  br label %join
+
+join:
+  %pg = phi  [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
+  %pg1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg)
+  ret  %pg1
+}
+
+define  @reinterpret_reductions_1(i32 %cond,  %a,  %b,  %c) {
+; OPT-LABEL: reinterpret_reductions_1
+; OPT: convert
+; OPT: phi  [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
+; OPT-NOT: phi 
+; OPT: tail call  @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg)
+; OPT: ret
+
+entry:
+  switch i32 %cond, label %br_phi_c [
+ i32 43, label %br_phi_a
+ i32 45, label %br_phi_b
+  ]
+
+br_phi_a:
+  %a1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a)
+  br label %join
+
+br_phi_b:
+  %b1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %b)
+  br label %join
+
+br_phi_c:
+  %c1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %c)
+  br label %j

[PATCH] D76078: [AArch64][SVE] Add a pass for SVE intrinsic optimisations

2020-03-20 Thread Andrzej Warzynski via Phabricator via cfe-commits
andwar added a comment.

> Have you considered adding an interface for the new PM?

Please ignore that comment ^^^. I incorrectly assumed that the backend has also 
been ported to the new PM, but that's not the case. In particular, `llc` will 
only use the legacy PM anyway: 
https://github.com/llvm/llvm-project/blob/55b92dcb35a0758bea294ab6e42f9954ac06cac3/llvm/tools/llc/llc.cpp#L510

Apologies for the confusion!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D76078/new/

https://reviews.llvm.org/D76078



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D76078: [AArch64][SVE] Add a pass for SVE intrinsic optimisations

2020-03-16 Thread Andrzej Warzynski via Phabricator via cfe-commits
andwar added a comment.

Cheers for working on this @kmclaughlin ! Have you considered adding an 
interface for the new PM? You could check this for reference: 
https://reviews.llvm.org/rGd6de5f12d485a85504bc99d384a85634574a27e2 (also 
implements a FunctionPass).




Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:30
+
+#define DEBUG_TYPE "sve-intrinsicopts"
+

In AArch64TargetMachine.cpp it's:
```
static cl::opt EnableSVEIntrinsicOpts(
"aarch64-sve-intrinsic-opts", cl::Hidden,
cl::desc("Enable SVE intrinsic opts"),
cl::init(true));
```
so it probably make sense to use `see-intrinsic-opts` here as well?



Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:110
+  auto *PN = dyn_cast(X->getOperand(0));
+  if (!PN)
+return false;

Given where this method is called, this is guaranteed to be always non-null. 
Perhaps `assert` instead?



Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:120
+if (!Reinterpret ||
+RequiredType != Reinterpret->getArgOperand(0)->getType())
+  return false;

Isn't it guaranteed that `RequiredType == 
Reinterpret->getArgOperand(0)->getType()` is always true? I.e., `PN` and the 
incoming values have identical type.



Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:125
+  // Create the new Phi
+  LLVMContext &C1 = PN->getContext();
+  IRBuilder<> Builder(C1);

[nit] Perhaps `Ctx` instead of `C1`?



Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:131
+
+  for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
+auto *Reinterpret = cast(PN->getIncomingValue(i));

`i` -> `I`



Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:187
+  // elide away both reinterprets if there are no other users of Y.
+  if (auto *Y = isReinterpretToSVBool(I->getArgOperand(0))) {
+Value *SourceVal = Y->getArgOperand(0);

I'd be tempted to rewrite this to use early exit 
(https://llvm.org/docs/CodingStandards.html#use-early-exits-and-continue-to-simplify-code):

```
auto *Y = isReinterpretToSVBool(I->getArgOperand(0));
if (nullptr == Y)
  return false;

// The rest of the function
```



Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:224
+  bool Changed = false;
+  for (auto II = BB->begin(), IE = BB->end(); II != IE;) {
+Instruction *I = &(*II);

1. Could this be a for-range loop instead?

2. This loop seems to be a perfect candidate for `make_early_inc_range` 
(https://github.com/llvm/llvm-project/blob/172f1460ae05ab5c33c757142c8bdb10acfbdbe1/llvm/include/llvm/ADT/STLExtras.h#L499),
 e.g.
```
 for (Instruction &I : make_early_inc_range(BB))
  Changed |= optimizeIntrinsic(&I);
```



Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:240
+  ReversePostOrderTraversal RPOT(Root);
+  for (auto I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
+Changed |= optimizeBlock(*I);

AFAIK, this iterates over BBs so `I` should be replaced with `BB`. 

Also, perhaps `for (auto *BB : RPOT) {` instead? 



Comment at: llvm/test/CodeGen/AArch64/sve-intrinsic-opts-ptest.ll:18
+; OPT-LABEL: ptest_any2
+; OPT: %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( 
%1,  %2)
+  %mask = tail call  @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)

What's `%1` and `%2`? Is it worth adding the calls that generated them in the 
expected output?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D76078/new/

https://reviews.llvm.org/D76078



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D76078: [AArch64][SVE] Add a pass for SVE intrinsic optimisations

2020-03-12 Thread Eli Friedman via Phabricator via cfe-commits
efriedma added inline comments.



Comment at: llvm/lib/Target/AArch64/AArch64TargetMachine.cpp:441
+  // Expand any SVE vector library calls that we can't code generate directly.
+  bool ExpandToOptimize = (TM->getOptLevel() != CodeGenOpt::None);
+  if (EnableSVEIntrinsicOpts && TM->getOptLevel() == CodeGenOpt::Aggressive)

unused bool?



Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:56
+
+  static bool processPhiNode(Instruction *I);
+

`processPhiNode(IntrinsicInst *I)`?



Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:109
+
+  auto *PN = dyn_cast(X->getOperand(0));
+  if (!PN)

Please use getArgOperand() to get the arguments of calls.



Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:234
+  DT = &getAnalysis().getDomTree();
+  bool Changed = false;
+

You might want to check whether the module actually declares any of the SVE 
intrinsics before you iterate over the whole function.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D76078/new/

https://reviews.llvm.org/D76078



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D76078: [AArch64][SVE] Add a pass for SVE intrinsic optimisations

2020-03-12 Thread Kerry McLaughlin via Phabricator via cfe-commits
kmclaughlin created this revision.
kmclaughlin added reviewers: sdesmalen, andwar, efriedma, cameron.mcinally, 
c-rhodes.
Herald added subscribers: danielkiss, psnobl, rkruppe, hiraditya, 
kristof.beyls, tschuett, mgorny.
Herald added a reviewer: rengolin.
Herald added a project: LLVM.

Creates the SVEIntrinsicOpts pass. In this patch, the pass tries
to remove unnecessary reinterpret intrinsics which convert to
and from svbool_t (llvm.aarch64.sve.convert.[to|from].svbool)

For example, the reinterprets below are redundant:

  %1 = call  
@llvm.aarch64.sve.convert.to.svbool.nxv4i1( %a)
  %2 = call  
@llvm.aarch64.sve.convert.from.svbool.nxv4i1( %1)

The pass also looks for ptest intrinsics and phi instructions where
the operands are being needlessly converted to and from svbool_t.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D76078

Files:
  llvm/lib/Target/AArch64/AArch64.h
  llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
  llvm/lib/Target/AArch64/CMakeLists.txt
  llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
  llvm/test/CodeGen/AArch64/O3-pipeline.ll
  llvm/test/CodeGen/AArch64/sve-intrinsic-opts-ptest.ll
  llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll
@@ -0,0 +1,196 @@
+; RUN: opt -S -sve-intrinsicopts -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck --check-prefix OPT %s
+
+define  @reinterpret_test_h( %a) {
+; OPT-LABEL: @reinterpret_test_h(
+; OPT: ret  %a
+  %1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %a)
+  %2 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %1)
+  ret  %2
+}
+
+; Reinterprets are not redundant because the second reinterpret zeros the
+; lanes that don't exist within its input.
+define  @reinterpret_test_h_rev( %a) {
+; OPT-LABEL: @reinterpret_test_h_rev(
+; OPT: %1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %a)
+; OPT-NEXT: %2 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1)
+; OPT-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %a)
+  %2 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1)
+  ret  %2
+}
+
+define  @reinterpret_test_w( %a) {
+; OPT-LABEL: @reinterpret_test_w(
+; OPT: ret  %a
+  %1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %a)
+  %2 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %1)
+  ret  %2
+}
+
+; Reinterprets are not redundant because the second reinterpret zeros the
+; lanes that don't exist within its input.
+define  @reinterpret_test_w_rev( %a) {
+; OPT-LABEL: @reinterpret_test_w_rev(
+; OPT: %1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %a)
+; OPT-NEXT: %2 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1)
+; OPT-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %a)
+  %2 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1)
+  ret  %2
+}
+
+define  @reinterpret_test_d( %a) {
+; OPT-LABEL: @reinterpret_test_d(
+; OPT: ret  %a
+  %1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a)
+  %2 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %1)
+  ret  %2
+}
+
+; Reinterprets are not redundant because the second reinterpret zeros the
+; lanes that don't exist within its input.
+define  @reinterpret_test_d_rev( %a) {
+; OPT-LABEL: @reinterpret_test_d_rev(
+; OPT: %1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %a)
+; OPT-NEXT: %2 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1)
+; OPT-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %a)
+  %2 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1)
+  ret  %2
+}
+
+define  @reinterpret_reductions(i32 %cond,  %a,  %b,  %c) {
+; OPT-LABEL: reinterpret_reductions
+; OPT-NOT: convert
+; OPT-NOT: phi 
+; OPT: phi  [ %a, %br_phi_a ], [ %b, %br_phi_b ], [ %c, %br_phi_c ]
+; OPT-NOT: convert
+; OPT: ret
+
+entry:
+  switch i32 %cond, label %br_phi_c [
+ i32 43, label %br_phi_a
+ i32 45, label %br_phi_b
+  ]
+
+br_phi_a:
+  %a1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a)
+  br label %join
+
+br_phi_b:
+  %b1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %b)
+  br label %join
+
+br_phi_c:
+  %c1 = tail call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %c)
+  br label %join
+
+join:
+  %pg = phi  [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
+  %pg1 = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg)
+  ret  %pg1
+}
+
+define  @reinterpret_reductions_1(i32 %cond,  %a,  %b,  %c) {
+; OPT-LABEL: reinterpret_reductions_1
+; OPT: convert
+; OPT: phi  [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
+; OPT-NOT: phi 
+; OPT: tail call  @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg)
+; OPT: ret
+
+entry:
+  switch i32 %cond, label %br_phi_c