bsmith created this revision.
bsmith added reviewers: paulwalker-arm, peterwaller-arm, joechrisellis,
sdesmalen.
Herald added subscribers: psnobl, hiraditya, kristof.beyls, tschuett.
Herald added a reviewer: efriedma.
bsmith requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.
This patch introduces a new dupq LLVM intrinsic which is emitted upon
encountering the svbool dupq ACLE intrinsics, instead of expanding them
directly.
This allows us to defer the expansion of said intrinsic until much
later when we can reasonably optimize to fixed predicates using ptrue.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D103082
Files:
clang/include/clang/Basic/arm_sve.td
clang/lib/CodeGen/CGBuiltin.cpp
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c
llvm/include/llvm/IR/IntrinsicsAArch64.td
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/test/CodeGen/AArch64/sve-intrinsics-dupq.ll
llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-dupq.ll
Index: llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-dupq.ll
===
--- /dev/null
+++ llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-dupq.ll
@@ -0,0 +1,195 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; DUPQ b8
+
+define @dupq_b_0() #0 {
+; CHECK-LABEL: @dupq_b_0(
+; CHECK: ret zeroinitializer
+ %1 = tail call @llvm.aarch64.sve.dupq.b8(i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false,
+ i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false)
+ ret %1
+}
+
+define @dupq_b_d() #0 {
+; CHECK-LABEL: @dupq_b_d(
+; CHECK: %1 = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: %2 = call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1)
+; CHECK-NEXT: ret %2
+ %1 = tail call @llvm.aarch64.sve.dupq.b8(i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false,
+ i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false)
+ ret %1
+}
+
+define @dupq_b_w() #0 {
+; CHECK-LABEL: @dupq_b_w(
+; CHECK: %1 = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+; CHECK-NEXT: %2 = call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1)
+; CHECK-NEXT: ret %2
+ %1 = tail call @llvm.aarch64.sve.dupq.b8(i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false,
+ i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false)
+ ret %1
+}
+
+define @dupq_b_h() #0 {
+; CHECK-LABEL: @dupq_b_h(
+; CHECK: %1 = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+; CHECK-NEXT: %2 = call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1)
+; CHECK-NEXT: ret %2
+ %1 = tail call @llvm.aarch64.sve.dupq.b8(i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false,
+ i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false)
+ ret %1
+}
+
+define @dupq_b_b() #0 {
+; CHECK-LABEL: @dupq_b_b(
+; CHECK: %1 = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+; CHECK-NEXT: ret %1
+ %1 = tail call @llvm.aarch64.sve.dupq.b8(i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
+ i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true)
+ ret %1
+}
+
+; DUPQ b16
+
+define @dupq_h_0() #0 {
+; CHECK-LABEL: @dupq_h_0(
+; CHECK: ret zeroinitializer
+ %1 = tail call @llvm.aarch64.sve.dupq.b16(i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false)
+ ret %1
+}
+
+define @dupq_h_d() #0 {
+; CHECK-LABEL: @dupq_h_d(
+; CHECK: %1 = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: %2 = call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1)
+; CHECK-NEXT: %3 = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %2)
+; CHECK-NEXT: ret %3
+ %1 = tail call @llvm.aarch64.sve.dupq.b16(i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false)
+ ret %1
+}
+
+define @dupq_h_w() #0 {
+; CHECK-LABEL: @dupq_h_w(
+; CHECK: %1 = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+; CHECK-NEXT: %2 = call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1)
+; CHECK-NEXT: %3 = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %2)
+; CHECK-NEXT: ret %3
+ %1 = tail call @llvm.aarch64.sve.dupq.b16(i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false)
+ ret %1
+}
+
+define @dupq_h_h() #0 {
+; CHECK-LABEL: @dupq_h_h(
+; CHECK: %1 = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+; CHECK-NEXT: ret