Conanap created this revision.
Conanap added reviewers: PowerPC, saghir, nemanjai, hfinkel.
Conanap added projects: LLVM, clang, PowerPC.
Herald added subscribers: steven.zhang, kbarton.
This patch implements 128-bit Binary Vector Multiply builtins for PowerPC10.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D83955
Files:
clang/include/clang/Basic/BuiltinsPPC.def
clang/lib/Headers/altivec.h
clang/test/CodeGen/builtins-ppc-p10vector.c
llvm/include/llvm/IR/IntrinsicsPowerPC.td
llvm/lib/Target/PowerPC/PPCInstrPrefix.td
llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll
Index: llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s
+; This test case aims to test the vector multiply instructions on Power10.
+
+declare <1 x i128> @llvm.ppc.altivec.vmuleud(<2 x i64>, <2 x i64>) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vmuloud(<2 x i64>, <2 x i64>) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vmulesd(<2 x i64>, <2 x i64>) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vmulosd(<2 x i64>, <2 x i64>) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vmsumcud(<2 x i64>, <2 x i64>, <1 x i128>) nounwind readnone
+
+define <1 x i128> @test_vmuleud(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+; CHECK-LABEL: test_vmuleud:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmuleud v2, v2, v3
+; CHECK-NEXT: blr
+ %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmuleud(<2 x i64> %x, <2 x i64> %y)
+ ret <1 x i128> %tmp
+}
+
+define <1 x i128> @test_vmuloud(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+; CHECK-LABEL: test_vmuloud:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmuloud v2, v2, v3
+; CHECK-NEXT: blr
+ %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmuloud(<2 x i64> %x, <2 x i64> %y)
+ ret <1 x i128> %tmp
+}
+
+define <1 x i128> @test_vmulesd(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+; CHECK-LABEL: test_vmulesd:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmulesd v2, v2, v3
+; CHECK-NEXT: blr
+ %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmulesd(<2 x i64> %x, <2 x i64> %y)
+ ret <1 x i128> %tmp
+}
+
+define <1 x i128> @test_vmulosd(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+; CHECK-LABEL: test_vmulosd:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmulosd v2, v2, v3
+; CHECK-NEXT: blr
+ %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmulosd(<2 x i64> %x, <2 x i64> %y)
+ ret <1 x i128> %tmp
+}
+
+define <1 x i128> @test_vmsumcud(<2 x i64> %x, <2 x i64> %y, <1 x i128> %z) nounwind readnone {
+; CHECK-LABEL: test_vmsumcud:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmsumcud v2, v2, v3, v4
+; CHECK-NEXT: blr
+ %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmsumcud(<2 x i64> %x, <2 x i64> %y, <1 x i128> %z)
+ ret <1 x i128> %tmp
+}
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===================================================================
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -977,20 +977,31 @@
}
def VMULESD : VXForm_1<968, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmulesd $vD, $vA, $vB", IIC_VecGeneral, []>;
+ "vmulesd $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vmulesd v2i64:$vA,
+ v2i64:$vB))]>;
def VMULEUD : VXForm_1<712, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmuleud $vD, $vA, $vB", IIC_VecGeneral, []>;
+ "vmuleud $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vmuleud v2i64:$vA,
+ v2i64:$vB))]>;
def VMULOSD : VXForm_1<456, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmulosd $vD, $vA, $vB", IIC_VecGeneral, []>;
+ "vmulosd $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vmulosd v2i64:$vA,
+ v2i64:$vB))]>;
def VMULOUD : VXForm_1<200, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmuloud $vD, $vA, $vB", IIC_VecGeneral, []>;
+ "vmuloud $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vmuloud v2i64:$vA,
+ v2i64:$vB))]>;
def VMSUMCUD : VAForm_1a<23, (outs vrrc:$vD),
(ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
- "vmsumcud $vD, $vA, $vB, $vC", IIC_VecGeneral, []>;
+ "vmsumcud $vD, $vA, $vB, $vC", IIC_VecGeneral,
+ [(set v1i128:$vD,
+ (int_ppc_altivec_vmsumcud v2i64:$vA, v2i64:$vB,
+ v1i128:$vC))]>;
def VDIVSQ : VXForm_1<267, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vdivsq $vD, $vA, $vB", IIC_VecGeneral, []>;
Index: llvm/include/llvm/IR/IntrinsicsPowerPC.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -186,6 +186,13 @@
[llvm_v1i128_ty], [llvm_v1i128_ty, llvm_v1i128_ty],
[IntrNoMem]>;
+/// PowerPC_Vec_QDD_Intrinsic - A PowerPC intrinsic that takes two v2i64
+/// vectors and returns one v1i128. These intrinsics have no side effects.
+class PowerPC_Vec_QDD_Intrinsic<string GCCIntSuffix>
+ : PowerPC_Vec_Intrinsic<GCCIntSuffix,
+ [llvm_v1i128_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+
//===----------------------------------------------------------------------===//
// PowerPC VSX Intrinsic Class Definitions.
//
@@ -622,6 +629,10 @@
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_v4i32_ty], [IntrNoMem]>;
+ def int_ppc_altivec_vmsumcud : GCCBuiltin<"__builtin_altivec_vmsumcud">,
+ Intrinsic<[llvm_v1i128_ty],
+ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v1i128_ty], [IntrNoMem]>;
+
// Vector Multiply Instructions.
def int_ppc_altivec_vmulesb : GCCBuiltin<"__builtin_altivec_vmulesb">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
@@ -632,6 +643,7 @@
def int_ppc_altivec_vmulesw : GCCBuiltin<"__builtin_altivec_vmulesw">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
+ def int_ppc_altivec_vmulesd : PowerPC_Vec_QDD_Intrinsic<"vmulesd">;
def int_ppc_altivec_vmuleub : GCCBuiltin<"__builtin_altivec_vmuleub">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
@@ -641,6 +653,7 @@
def int_ppc_altivec_vmuleuw : GCCBuiltin<"__builtin_altivec_vmuleuw">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
+ def int_ppc_altivec_vmuleud : PowerPC_Vec_QDD_Intrinsic<"vmuleud">;
def int_ppc_altivec_vmulosb : GCCBuiltin<"__builtin_altivec_vmulosb">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
@@ -651,6 +664,7 @@
def int_ppc_altivec_vmulosw : GCCBuiltin<"__builtin_altivec_vmulosw">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
+ def int_ppc_altivec_vmulosd : PowerPC_Vec_QDD_Intrinsic<"vmulosd">;
def int_ppc_altivec_vmuloub : GCCBuiltin<"__builtin_altivec_vmuloub">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
@@ -660,6 +674,7 @@
def int_ppc_altivec_vmulouw : GCCBuiltin<"__builtin_altivec_vmulouw">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
+ def int_ppc_altivec_vmuloud : PowerPC_Vec_QDD_Intrinsic<"vmuloud">;
// Vector Sum Instructions.
def int_ppc_altivec_vsumsws : GCCBuiltin<"__builtin_altivec_vsumsws">,
Index: clang/test/CodeGen/builtins-ppc-p10vector.c
===================================================================
--- clang/test/CodeGen/builtins-ppc-p10vector.c
+++ clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -581,3 +581,41 @@
// CHECK: ret <4 x float>
return vec_splati_ins(vfa, 0, 1.0f);
}
+
+vector unsigned __int128 test_vec_mule_u128(void) {
+ // CHECK-BE: @llvm.ppc.altivec.vmuleud(<2 x i64>
+ // CHECK-BE-NEXT: ret <1 x i128>
+ // CHECK-LE: @llvm.ppc.altivec.vmuloud(<2 x i64>
+ // CHECK-LE-NEXT: ret <1 x i128>
+ return vec_mule(vulla, vullb);
+}
+
+vector signed __int128 test_vec_mule_s128(void) {
+ // CHECK-BE: @llvm.ppc.altivec.vmulesd(<2 x i64>
+ // CHECK-BE-NEXT: ret <1 x i128>
+ // CHECK-LE: @llvm.ppc.altivec.vmulosd(<2 x i64>
+ // CHECK-LE-NEXT: ret <1 x i128>
+ return vec_mule(vslla, vsllb);
+}
+
+vector unsigned __int128 test_vec_mulo_u128(void) {
+ // CHECK-BE: @llvm.ppc.altivec.vmuloud(<2 x i64>
+ // CHECK-BE-NEXT: ret <1 x i128>
+ // CHECK-LE: @llvm.ppc.altivec.vmuleud(<2 x i64>
+ // CHECK-LE-NEXT: ret <1 x i128>
+ return vec_mulo(vulla, vullb);
+}
+
+vector signed __int128 test_vec_mulo_s128(void) {
+ // CHECK-BE: @llvm.ppc.altivec.vmulosd(<2 x i64>
+ // CHECK-BE-NEXT: ret <1 x i128>
+ // CHECK-LE: @llvm.ppc.altivec.vmulesd(<2 x i64>
+ // CHECK-LE-NEXT: ret <1 x i128>
+ return vec_mulo(vslla, vsllb);
+}
+
+vector unsigned __int128 test_vec_msumc_u128(void) {
+ // CHECK: @llvm.ppc.altivec.vmsumcud(<2 x i64>
+ // CHECK-NEXT: ret <1 x i128>
+ return vec_msumc(vulla, vullb, vui128a);
+}
Index: clang/lib/Headers/altivec.h
===================================================================
--- clang/lib/Headers/altivec.h
+++ clang/lib/Headers/altivec.h
@@ -5467,6 +5467,16 @@
return __builtin_altivec_vmsumuhm(__a, __b, __c);
}
+/* vec_msumc */
+
+#ifdef _ARCH_PWR10
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_msumc(vector unsigned long long __a, vector unsigned long long __b,
+ vector unsigned __int128 __c) {
+ return __builtin_altivec_vmsumcud(__a, __b, __c);
+}
+#endif
+
/* vec_vmsummbm */
static __inline__ vector int __attribute__((__always_inline__))
@@ -5693,6 +5703,26 @@
}
#endif
+#ifdef _ARCH_PWR10
+static __inline__ vector signed __int128 __ATTRS_o_ai
+vec_mule(vector signed long long __a, vector signed long long __b) {
+#ifdef __LITTLE_ENDIAN__
+ return __builtin_altivec_vmulosd(__a, __b);
+#else
+ return __builtin_altivec_vmulesd(__a, __b);
+#endif
+}
+
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_mule(vector unsigned long long __a, vector unsigned long long __b) {
+#ifdef __LITTLE_ENDIAN__
+ return __builtin_altivec_vmuloud(__a, __b);
+#else
+ return __builtin_altivec_vmuleud(__a, __b);
+#endif
+}
+#endif
+
/* vec_vmulesb */
static __inline__ vector short __attribute__((__always_inline__))
@@ -5795,6 +5825,26 @@
}
#endif
+#ifdef _ARCH_PWR10
+static __inline__ vector signed __int128 __ATTRS_o_ai
+vec_mulo(vector signed long long __a, vector signed long long __b) {
+#ifdef __LITTLE_ENDIAN__
+ return __builtin_altivec_vmulesd(__a, __b);
+#else
+ return __builtin_altivec_vmulosd(__a, __b);
+#endif
+}
+
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_mulo(vector unsigned long long __a, vector unsigned long long __b) {
+#ifdef __LITTLE_ENDIAN__
+ return __builtin_altivec_vmuleud(__a, __b);
+#else
+ return __builtin_altivec_vmuloud(__a, __b);
+#endif
+}
+#endif
+
/* vec_vmulosb */
static __inline__ vector short __attribute__((__always_inline__))
Index: clang/include/clang/Basic/BuiltinsPPC.def
===================================================================
--- clang/include/clang/Basic/BuiltinsPPC.def
+++ clang/include/clang/Basic/BuiltinsPPC.def
@@ -100,6 +100,11 @@
BUILTIN(__builtin_altivec_vmulosh, "V4SiV8SsV8Ss", "")
BUILTIN(__builtin_altivec_vmulouw, "V2ULLiV4UiV4Ui", "")
BUILTIN(__builtin_altivec_vmulosw, "V2SLLiV4SiV4Si", "")
+BUILTIN(__builtin_altivec_vmuleud, "V1ULLLiV2ULLiV2ULLi", "")
+BUILTIN(__builtin_altivec_vmulesd, "V1SLLLiV2SLLiV2SLLi", "")
+BUILTIN(__builtin_altivec_vmuloud, "V1ULLLiV2ULLiV2ULLi", "")
+BUILTIN(__builtin_altivec_vmulosd, "V1SLLLiV2SLLiV2SLLi", "")
+BUILTIN(__builtin_altivec_vmsumcud, "V1ULLLiV2ULLiV2ULLiV1ULLLi", "")
BUILTIN(__builtin_altivec_vnmsubfp, "V4fV4fV4fV4f", "")
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits