[PATCH] D103082: [AArch64][SVE] Optimize svbool dupq ACLE intrinsic to fixed predicate patterns

2021-05-27 Thread Eli Friedman via Phabricator via cfe-commits
efriedma added a comment.

Do we really need a dedicated LLVM intrinsic to make the pattern-matching work 
here?  It would be better if we could leverage 
@llvm.experimental.vector.insert.nxv16i8.v16i8 or something like that.  
Something along the lines of https://godbolt.org/z/Wz4azzKrP seems 
straightforward enough to match, and generates decent code even without any 
special patterns.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D103082/new/

https://reviews.llvm.org/D103082

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D103082: [AArch64][SVE] Optimize svbool dupq ACLE intrinsic to fixed predicate patterns

2021-05-25 Thread Bradley Smith via Phabricator via cfe-commits
bsmith created this revision.
bsmith added reviewers: paulwalker-arm, peterwaller-arm, joechrisellis, 
sdesmalen.
Herald added subscribers: psnobl, hiraditya, kristof.beyls, tschuett.
Herald added a reviewer: efriedma.
bsmith requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

This patch introduces a new dupq LLVM intrinsic which is emitted upon
encountering the svbool dupq ACLE intrinsics, instead of expanding them
directly.

This allows us to defer the expansion of said intrinsic until much
later when we can reasonably optimize to fixed predicates using ptrue.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D103082

Files:
  clang/include/clang/Basic/arm_sve.td
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
  llvm/test/CodeGen/AArch64/sve-intrinsics-dupq.ll
  llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-dupq.ll

Index: llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-dupq.ll
===
--- /dev/null
+++ llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-dupq.ll
@@ -0,0 +1,195 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; DUPQ b8
+
+define  @dupq_b_0() #0 {
+; CHECK-LABEL: @dupq_b_0(
+; CHECK: ret  zeroinitializer
+  %1 = tail call  @llvm.aarch64.sve.dupq.b8(i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false,
+  i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false)
+  ret  %1
+}
+
+define  @dupq_b_d() #0 {
+; CHECK-LABEL: @dupq_b_d(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: %2 = call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1)
+; CHECK-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.dupq.b8(i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false,
+  i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false)
+  ret  %1
+}
+
+define  @dupq_b_w() #0 {
+; CHECK-LABEL: @dupq_b_w(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+; CHECK-NEXT: %2 = call  @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1)
+; CHECK-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.dupq.b8(i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false,
+  i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false)
+  ret  %1
+}
+
+define  @dupq_b_h() #0 {
+; CHECK-LABEL: @dupq_b_h(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+; CHECK-NEXT: %2 = call  @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1)
+; CHECK-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.dupq.b8(i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false,
+  i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false)
+  ret  %1
+}
+
+define  @dupq_b_b() #0 {
+; CHECK-LABEL: @dupq_b_b(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+; CHECK-NEXT: ret  %1
+  %1 = tail call  @llvm.aarch64.sve.dupq.b8(i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
+  i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true)
+  ret  %1
+}
+
+; DUPQ b16
+
+define  @dupq_h_0() #0 {
+; CHECK-LABEL: @dupq_h_0(
+; CHECK: ret  zeroinitializer
+  %1 = tail call  @llvm.aarch64.sve.dupq.b16(i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false)
+  ret  %1
+}
+
+define  @dupq_h_d() #0 {
+; CHECK-LABEL: @dupq_h_d(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: %2 = call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1)
+; CHECK-NEXT: %3 = call  @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %2)
+; CHECK-NEXT: ret  %3
+  %1 = tail call  @llvm.aarch64.sve.dupq.b16(i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false)
+  ret  %1
+}
+
+define  @dupq_h_w() #0 {
+; CHECK-LABEL: @dupq_h_w(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+; CHECK-NEXT: %2 = call  @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1)
+; CHECK-NEXT: %3 = call  @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %2)
+; CHECK-NEXT: ret  %3
+  %1 = tail call  @llvm.aarch64.sve.dupq.b16(i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false)
+  ret  %1
+}
+
+define  @dupq_h_h() #0 {
+; CHECK-LABEL: @dupq_h_h(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+; CHECK-NEXT: ret