This patch tweaks ix86_ternlog_idx to allow any SUBREG that matches
the register_operand predicate, and is split out as an independent
piece of a patch that I have to clean-up redundant ternlog patterns
in sse.md.  It turns out that some of these patterns aren't (yet)
sufficiently redundant to be obsolete.  The problem is that the
"new" ternlog pattern has the restriction that it allows SUBREGs,
but only those where the inner and outer modes are the same size,
where regular patterns use "register_operand" which allows arbitrary
(including paradoxical) SUBREGs.

A motivating example is f2 in gcc.target/i386/avx512dq-abs-copysign-1.c

void f2 (float x, float y)
{
  register float a __asm ("xmm16"), b __asm ("xmm17");
  a = x;
  b = y;
  asm volatile ("" : "+v" (a), "+v" (b));
  a = __builtin_copysignf (a, b);
  asm volatile ("" : "+v" (a));
}

for which combine tries:

(set (subreg:V4SF (reg:SF 100 [ _3 ]) 0)
    (ior:V4SF (and:V4SF (not:V4SF (reg:V4SF 104))
            (subreg:V4SF (reg:SF 110) 0))
        (reg:V4SF 106)))

where the SUBREG is paradoxical, with inner mode SF and outer mode V4SF.
This patch allows the recently added ternlog_operand to accept this case.

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32}
with no new failures.  Ok for mainline?


2024-06-18  Roger Sayle  <ro...@nextmovesoftware.com>

gcc/ChangeLog
        * config/i386/i386-expand.cc (ix86_ternlog_idx): Allow any SUBREG
        that matches register_operand.  Use rtx_equal_p to compare REG
        or SUBREG "leaf" operands.


Thanks in advance,
Roger
--

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 312329e..174c52b 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -25570,27 +25570,32 @@ ix86_ternlog_idx (rtx op, rtx *args)
 
   switch (GET_CODE (op))
     {
+    case SUBREG:
+      if (!register_operand (op, GET_MODE (op)))
+       return -1;
+      /* FALLTHRU */
+
     case REG:
       if (!args[0])
        {
          args[0] = op;
          return 0xf0;
        }
-      if (REGNO (op) == REGNO (args[0]))
+      if (rtx_equal_p (op, args[0]))
        return 0xf0;
       if (!args[1])
        {
          args[1] = op;
          return 0xcc;
        }
-      if (REGNO (op) == REGNO (args[1]))
+      if (rtx_equal_p (op, args[1]))
        return 0xcc;
       if (!args[2])
        {
          args[2] = op;
          return 0xaa;
        }
-      if (REG_P (args[2]) && REGNO (op) == REGNO (args[2]))
+      if (rtx_equal_p (op, args[2]))
        return 0xaa;
       return -1;
 
@@ -25628,12 +25633,6 @@ ix86_ternlog_idx (rtx op, rtx *args)
        return 0x55;
       return -1;
 
-    case SUBREG:
-      if (GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))
-         != GET_MODE_SIZE (GET_MODE (op)))
-       return -1;
-      return ix86_ternlog_idx (SUBREG_REG (op), args);
-
     case NOT:
       idx0 = ix86_ternlog_idx (XEXP (op, 0), args);
       return (idx0 >= 0) ? idx0 ^ 0xff : -1;

Reply via email to