Moving between GPRs and VRs in any mode with size less than or equal to
8 bytes becomes available with vector extensions.  Without adapting
costs for those loads, we typically go over memory.

gcc/ChangeLog:

        * config/s390/s390.cc (s390_register_move_cost): Add costing for
        vlvg/vlgv.
---
 gcc/config/s390/s390.cc | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 737b176766a..b5e636c5376 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -3862,7 +3862,21 @@ s390_register_move_cost (machine_mode mode,
 {
   /* On s390, copy between fprs and gprs is expensive.  */
 
-  /* It becomes somewhat faster having ldgr/lgdr.  */
+  /* With vector extensions any GPR<->VR load up to 8 bytes is supported.  */
+  if (TARGET_VX && GET_MODE_SIZE (mode) <= 8)
+    {
+      /* ldgr/vlvgg take one cycle and vlvg[bhf] take two cycles. */
+      if (reg_classes_intersect_p (from, GENERAL_REGS)
+         && reg_classes_intersect_p (to, VEC_REGS))
+       return GET_MODE_SIZE (mode) == 8 ? 1 : 2;
+      /* lgdr/vlgv[fg] take three cycles and vlgv[bh] take five cycles. */
+      if (reg_classes_intersect_p (to, GENERAL_REGS)
+         && reg_classes_intersect_p (from, VEC_REGS))
+       return GET_MODE_SIZE (mode) >= 4 ? 3 : 4;
+    }
+
+  /* Without vector extensions it still becomes somewhat faster having
+     ldgr/lgdr.  */
   if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
     {
       /* ldgr is single cycle. */
-- 
2.49.0

Reply via email to