[PATCH] D113642: [PowerPC] Provide XL-compatible vec_round implementation

Nemanja Ivanovic via Phabricator via cfe-commits Wed, 10 Nov 2021 20:13:11 -0800

nemanjai created this revision.
nemanjai added reviewers: PowerPC, rzurob, qiongsiwu.
Herald added subscribers: shchenz, kbarton, hiraditya.
nemanjai requested review of this revision.
Herald added projects: clang, LLVM.


The XL implementation of `vec_round` for `vector double` uses 
"round-to-nearest, ties to even" just as the `vector float` version does. 
However clang and gcc use "round-to-nearest-away" for `vector double` and 
"round-to-nearest, ties to even" for `vector float`.

The XL behaviour is implemented under the `__XL_COMPAT_ALTIVEC__` macro 
similarly to other instances of incompatibility.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D113642

Files:
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-vsx.c
  clang/test/CodeGen/builtins-ppc-xlcompat.c
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/test/CodeGen/PowerPC/read-set-flm.ll

Index: llvm/test/CodeGen/PowerPC/read-set-flm.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/read-set-flm.ll
+++ llvm/test/CodeGen/PowerPC/read-set-flm.ll
@@ -11,7 +11,6 @@
 ; CHECK-NEXT:    xsdivdp 1, 1, 2
 ; CHECK-NEXT:    xsadddp 1, 1, 3
 ; CHECK-NEXT:    xsadddp 0, 1, 0
-; CHECK-NEXT:    mffs 1
 ; CHECK-NEXT:    mtfsf 255, 4
 ; CHECK-NEXT:    xsdivdp 1, 3, 4
 ; CHECK-NEXT:    xsadddp 1, 1, 2
@@ -47,7 +46,6 @@
 ; CHECK-NEXT:    xsdivdp 1, 1, 2
 ; CHECK-NEXT:    xsadddp 1, 1, 3
 ; CHECK-NEXT:    xsadddp 0, 1, 0
-; CHECK-NEXT:    mffs 1
 ; CHECK-NEXT:    mtfsf 255, 4
 ; CHECK-NEXT:    xsdivdp 1, 3, 4
 ; CHECK-NEXT:    xsadddp 1, 1, 2
@@ -96,7 +94,6 @@
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mffs 0
 ; CHECK-NEXT:    stfd 0, 0(30)
-; CHECK-NEXT:    mffs 0
 ; CHECK-NEXT:    mtfsf 255, 31
 ; CHECK-NEXT:    addi 1, 1, 64
 ; CHECK-NEXT:    ld 0, 16(1)
@@ -134,7 +131,6 @@
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mffs 0
 ; CHECK-NEXT:    stfd 0, 0(30)
-; CHECK-NEXT:    mffs 0
 ; CHECK-NEXT:    mtfsf 255, 31
 ; CHECK-NEXT:    addi 1, 1, 64
 ; CHECK-NEXT:    ld 0, 16(1)
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -12122,6 +12122,7 @@
   MachineFunction::iterator It = ++BB->getIterator();
 
   MachineFunction *F = BB->getParent();
+  MachineRegisterInfo &MRI = F->getRegInfo();
 
   if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
       MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
@@ -12727,7 +12728,10 @@
     Register OldFPSCRReg = MI.getOperand(0).getReg();
 
     // Save FPSCR value.
-    BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
+    if (MRI.use_empty(OldFPSCRReg))
+      BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
+    else
+      BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
 
     // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
     // the following settings:
@@ -12860,7 +12864,10 @@
 
     // Result of setflm is previous FPSCR content, so we need to save it first.
     Register OldFPSCRReg = MI.getOperand(0).getReg();
-    BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
+    if (MRI.use_empty(OldFPSCRReg))
+      BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
+    else
+      BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
 
     // Put bits in 32:63 to FPSCR.
     Register NewFPSCRReg = MI.getOperand(1).getReg();
Index: clang/test/CodeGen/builtins-ppc-xlcompat.c
===================================================================
--- clang/test/CodeGen/builtins-ppc-xlcompat.c
+++ clang/test/CodeGen/builtins-ppc-xlcompat.c
@@ -5,11 +5,16 @@
 // RUN: %clang_cc1 -target-feature +altivec -target-feature +vsx \
 // RUN:   -triple powerpc64le-unknown-linux-gnu -emit-llvm %s -o - \
 // RUN:   -D__XL_COMPAT_ALTIVEC__ -target-cpu pwr8 | FileCheck %s
+// RUN: %clang_cc1 -target-feature +altivec -target-feature +vsx \
+// RUN:   -triple powerpc64le-unknown-linux-gnu -emit-llvm %s -o - \
+// RUN:   -U__XL_COMPAT_ALTIVEC__ -target-cpu pwr8 | FileCheck \
+// RUN:   --check-prefix=NOCOMPAT %s
 #include <altivec.h>
 vector double vd = { 3.4e22, 1.8e-3 };
 vector signed long long vsll = { -12345678999ll, 12345678999 };
 vector unsigned long long vull = { 11547229456923630743llu, 18014402265226391llu };
 vector float res_vf;
+vector double res_vd;
 vector signed int res_vsi;
 vector unsigned int res_vui;
 
@@ -38,4 +43,11 @@
 // CHECK:         [[TMP8:%.*]] = load <2 x double>, <2 x double>* @vd, align 16
 // CHECK-NEXT:    fmul <2 x double> [[TMP8]], <double 1.600000e+01, double 1.600000e+01>
 // CHECK:         call <4 x i32> @llvm.ppc.vsx.xvcvdpuxws(<2 x double>
+
+  res_vd = vec_round(vd);
+// CHECK:         call double @llvm.ppc.readflm()
+// CHECK:         call double @llvm.ppc.setrnd(i32 0)
+// CHECK:         call <2 x double> @llvm.rint.v2f64(<2 x double>
+// CHECK:         call double @llvm.ppc.setflm(double
+// NOCOMPAT:      call <2 x double> @llvm.round.v2f64(<2 x double>
 }
Index: clang/test/CodeGen/builtins-ppc-vsx.c
===================================================================
--- clang/test/CodeGen/builtins-ppc-vsx.c
+++ clang/test/CodeGen/builtins-ppc-vsx.c
@@ -409,10 +409,6 @@
 // CHECK: call <4 x float> @llvm.ppc.altivec.vrfin(<4 x float>
 // CHECK-LE: call <4 x float> @llvm.ppc.altivec.vrfin(<4 x float>
 
-  res_vd = vec_round(vd);
-// CHECK: call <2 x double> @llvm.round.v2f64(<2 x double>
-// CHECK-LE: call <2 x double> @llvm.round.v2f64(<2 x double>
-
   res_vd = vec_perm(vd, vd, vuc);
 // CHECK: @llvm.ppc.altivec.vperm
 // CHECK-LE: @llvm.ppc.altivec.vperm
Index: clang/lib/Headers/altivec.h
===================================================================
--- clang/lib/Headers/altivec.h
+++ clang/lib/Headers/altivec.h
@@ -8413,9 +8413,20 @@
 }
 
 #ifdef __VSX__
+#ifdef __XL_COMPAT_ALTIVEC__
+static __inline__ vector double __ATTRS_o_ai vec_rint(vector double __a);
+static __inline__ vector double __ATTRS_o_ai vec_round(vector double __a) {
+  double __fpscr = __builtin_readflm();
+  __builtin_setrnd(0);
+  vector double __rounded = vec_rint(__a);
+  __builtin_setflm(__fpscr);
+  return __rounded;
+}
+#else
 static __inline__ vector double __ATTRS_o_ai vec_round(vector double __a) {
   return __builtin_vsx_xvrdpi(__a);
 }
+#endif
 
 /* vec_rint */

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D113642: [PowerPC] Provide XL-compatible vec_round implementation

Reply via email to