Hi Uros,

Sorry for the late update. I have tried adjusting the combine pass but
found it is not easy to modify shift const, so I came up with an
alternative solution with your patch. It matches the non-canonical
zero-extend in ix86_decompose_address and adjust ix86_rtx_cost to
combine below pattern

(set (reg:DI 85)
   (and:DI (ashift:DI (reg:DI 87)
           (const_int 1 [0x1]))
       (const_int 4294967294 [0xfffffffe])))

Survived bootstrap and regtest on x86-64-linux. Ok for master?

Uros Bizjak <ubiz...@gmail.com> 于2021年8月16日周一 下午5:26写道:

>
> On Mon, Aug 16, 2021 at 11:18 AM Hongyu Wang <wwwhhhyyy...@gmail.com> wrote:
> >
> > > So, the question is if the combine pass really needs to zero-extend
> > > with 0xfffffffe, the left shift << 1 guarantees zero in the LSB, so
> > > 0xffffffff should be better and in line with canonical zero-extension
> > > RTX.
> >
> > The shift mask is generated in simplify_shift_const_1:
> >
> > mask_rtx = gen_int_mode (nonzero_bits (varop, int_varop_mode),
> >                          int_result_mode);
> > rtx count_rtx = gen_int_shift_amount (int_result_mode, count);
> > mask_rtx
> >   = simplify_const_binary_operation (code, int_result_mode,
> >                                      mask_rtx, count_rtx);
> >
> > Can we adjust the count for ashift if nonzero_bits overlaps it?
> >
> > > Also, ix86_decompose_address accepts ASHIFT RTX when ASHIFT is
> > > embedded in the PLUS chain, but naked ASHIFT is rejected (c.f. the
> > > call in ix86_legitimate_address_p) for some (historic?) reason. It
> > > looks to me that this restriction is not necessary, since
> > > ix86_legitimize_address can canonicalize ASHIFT RTXes without
> > > problems. The attached patch that survives bootstrap and regtest can
> > > help in your case.
> >
> > We have a split to transform ashift to mult, I'm afraid it could not
> > help this issue.
>
> If you want existing *lea<mode> to accept ASHIFT RTX, it uses
> address_no_seg_operand predicate which uses address_operand predicate,
> which calls ix86_legitimate_address_p, which ATM rejects ASHIFT RTXes.
>
> Uros.
From 4bcebb985439867d12f2038e97c72baaf092ffbf Mon Sep 17 00:00:00 2001
From: Hongyu Wang <hongyu.wang@intel.com>
Date: Tue, 17 Aug 2021 16:53:46 +0800
Subject: [PATCH] i386: Optimize lea with zero-extend. [PR 101716]

For ASHIFT + ZERO_EXTEND pattern, combine pass failed to
match it to lea since it will generate non-canonical
zero-extend. Adjust predicate and cost_model to allow combine
for lea.

gcc/ChangeLog:

	PR target/101716
	* config/i386/i386.c (ix86_live_on_entry): Adjust comment.
	(ix86_decompose_address): Remove retval check for ASHIFT,
	allow non-canonical zero extend if AND mask covers ASHIFT
	count.
	(ix86_legitimate_address_p): Adjust condition for decompose.
	(ix86_rtx_costs): Adjust cost for lea with non-canonical
	zero-extend.

	Co-Authored by: Uros Bizjak <ubizjak@gmail.com>

gcc/testsuite/ChangeLog:

	PR target/101716
	* gcc.target/i386/pr101716.c: New test.
---
 gcc/config/i386/i386.c                   | 36 ++++++++++++++++++++----
 gcc/testsuite/gcc.target/i386/pr101716.c | 11 ++++++++
 2 files changed, 41 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr101716.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 5bff131f6d9..a997fc04004 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -10018,8 +10018,7 @@ ix86_live_on_entry (bitmap regs)
 
 /* Extract the parts of an RTL expression that is a valid memory address
    for an instruction.  Return 0 if the structure of the address is
-   grossly off.  Return -1 if the address contains ASHIFT, so it is not
-   strictly valid, but still used for computing length of lea instruction.  */
+   grossly off.  */
 
 int
 ix86_decompose_address (rtx addr, struct ix86_address *out)
@@ -10029,7 +10028,6 @@ ix86_decompose_address (rtx addr, struct ix86_address *out)
   HOST_WIDE_INT scale = 1;
   rtx scale_rtx = NULL_RTX;
   rtx tmp;
-  int retval = 1;
   addr_space_t seg = ADDR_SPACE_GENERIC;
 
   /* Allow zero-extended SImode addresses,
@@ -10053,6 +10051,27 @@ ix86_decompose_address (rtx addr, struct ix86_address *out)
 	  if (CONST_INT_P (addr))
 	    return 0;
 	}
+      else if (GET_CODE (addr) == AND)
+	{
+	  /* For ASHIFT inside AND, combine will not generate
+	     canonical zero-extend. Merge mask for AND and shift_count
+	     to check if it is canonical zero-extend.  */
+	  tmp = XEXP (addr, 0);
+	  rtx mask = XEXP (addr, 1);
+	  if (tmp && GET_CODE(tmp) == ASHIFT)
+	    {
+	      rtx shift_val = XEXP (tmp, 1);
+	      if (CONST_INT_P (mask) && CONST_INT_P (shift_val)
+		  && (((unsigned HOST_WIDE_INT) INTVAL(mask)
+		      | (HOST_WIDE_INT_1U << (INTVAL(shift_val) - 1)))
+		      == 0xffffffff))
+		{
+		  addr = lowpart_subreg (SImode, XEXP (addr, 0),
+					 DImode);
+		}
+	    }
+
+	}
     }
 
   /* Allow SImode subregs of DImode addresses,
@@ -10179,7 +10198,6 @@ ix86_decompose_address (rtx addr, struct ix86_address *out)
       if ((unsigned HOST_WIDE_INT) scale > 3)
 	return 0;
       scale = 1 << scale;
-      retval = -1;
     }
   else
     disp = addr;			/* displacement */
@@ -10252,7 +10270,7 @@ ix86_decompose_address (rtx addr, struct ix86_address *out)
   out->scale = scale;
   out->seg = seg;
 
-  return retval;
+  return 1;
 }
 
 /* Return cost of the memory address x.
@@ -10765,7 +10783,7 @@ ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
   HOST_WIDE_INT scale;
   addr_space_t seg;
 
-  if (ix86_decompose_address (addr, &parts) <= 0)
+  if (ix86_decompose_address (addr, &parts) == 0)
     /* Decomposition failed.  */
     return false;
 
@@ -20419,6 +20437,12 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
 	               << (GET_MODE (XEXP (x, 1)) != DImode)));
 	  return true;
 	}
+      else if (code == AND
+	       && address_no_seg_operand (x, mode))
+	{
+	  *total = cost->lea;
+	  return true;
+	}
       /* FALLTHRU */
 
     case NEG:
diff --git a/gcc/testsuite/gcc.target/i386/pr101716.c b/gcc/testsuite/gcc.target/i386/pr101716.c
new file mode 100644
index 00000000000..5e3ea64a320
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr101716.c
@@ -0,0 +1,11 @@
+/* PR target/101716 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+/* { dg-final { scan-assembler "leal\[\\t \]\[^\\n\]*eax" } } */
+/* { dg-final { scan-assembler-not "movl\[\\t \]\[^\\n\]*eax" } } */
+
+unsigned long long sample1(unsigned long long m) {
+    unsigned int t = -1;
+    return (m << 1) & t;
+}
-- 
2.18.1

Reply via email to