On Thu, Feb 20, 2025 at 5:37 AM H.J. Lu <[email protected]> wrote:
>
> On Wed, Feb 19, 2025 at 10:09 PM Uros Bizjak <[email protected]> wrote:
> >
> ...
> > > My algorithm keeps a list of registers which can access the stack
> > > starting with SP and FP. If any registers are derived from the list,
> > > add them to the list until all used registers are exhausted. If
> > > any stores use the register on the list, update the stack alignment.
> > > The missing part is that it doesn't check if the register is actually
> > > used for memory access.
> > >
> > > Here is the v2 patch to also check if the register is used for memory
> > > access.
> >
> > @@ -8473,16 +8482,15 @@ static void
> > ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
> > {
> > /* This insn may reference stack slot. Update the maximum stack slot
> > - alignment. */
> > + alignment if the memory is reference by the specified register. */
> >
> > referenced
>
> Fixed.
>
> > + stack_access_data *p = (stack_access_data *) data;
> > subrtx_iterator::array_type array;
> > FOR_EACH_SUBRTX (iter, array, pat, ALL)
> > - if (MEM_P (*iter))
> > + if (MEM_P (*iter) && reg_mentioned_p (p->reg, *iter))
> >
> > @@ -8494,7 +8502,7 @@ ix86_find_all_reg_use_1 (rtx set, HARD_REG_SET
> > &stack_slot_access,
> > auto_bitmap &worklist)
> > {
> > rtx dest = SET_DEST (set);
> > - if (!REG_P (dest))
> > + if (!GENERAL_REG_P (dest))
> > return;
> >
> > The above change is not needed now that the register in the memory
> > reference is checked. The compiler can generate GPR->XMM->GPR
> > sequence.
>
> Fixed.
>
> > @@ -8630,8 +8638,8 @@ ix86_find_max_used_stack_alignment (unsigned int
> > &stack_alignment,
> > if (!NONJUMP_INSN_P (insn))
> > continue;
> >
> > - note_stores (insn, ix86_update_stack_alignment,
> > - &stack_alignment);
> > + stack_access_data data = { DF_REF_REG (ref), &stack_alignment };
> > + note_stores (insn, ix86_update_stack_alignment, &data);
> >
> > Do we need FOR_EACH_SUBRTX here instead of note_stores to also process
> > reads from stack? Reads should also be aligned.
> >
>
> Yes, we do since ix86_update_stack_alignment gets the RTL pattern,
> not operand:
>
> Breakpoint 1, ix86_update_stack_alignment (pat=0x7fffe966a108,
> data=0x7fffffffce00)
> at /export/gnu/import/git/gitlab/x86-gcc/gcc/config/i386/i386.cc:8486
> 8486 stack_access_data *p = (stack_access_data *) data;
> (set (reg/f:DI 20 xmm0 [orig:126 _53 ] [126])
> (reg/f:DI 0 ax [orig:126 _53 ] [126]))
> (gdb)
>
> FOR_EACH_SUBRTX is needed to check for the memory
> operand referenced by the stack access register.
>
> Here is the v3 patch. OK for master?
>
> --
> H.J.
Here is the v4 patch to move
stack_access_data data = { nullptr, &stack_alignment };
out of the loop.
--
H.J.
From 1f8d9b3850333984eab7e70cf73fee908aec9e77 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <[email protected]>
Date: Wed, 19 Feb 2025 19:48:07 +0800
Subject: [PATCH v4] x86: Check the stack access register for stack access
Pass the stack access register to ix86_update_stack_alignment to check
the stack access register for stack access.
gcc/
PR target/118936
* config/i386/i386.cc (stack_access_data): New.
(ix86_update_stack_alignment): Check if the memory is referenced
by the stack access register.
(ix86_find_max_used_stack_alignment): Also pass the stack access
register to ix86_update_stack_alignment.
gcc/testsuite/
PR target/118936
* gcc.target/i386/pr118936.c: New test.
Signed-off-by: H.J. Lu <[email protected]>
---
gcc/config/i386/i386.cc | 26 ++++++++++++++++--------
gcc/testsuite/gcc.target/i386/pr118936.c | 22 ++++++++++++++++++++
2 files changed, 40 insertions(+), 8 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr118936.c
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 560e6525b56..f70bcd9c63f 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -8466,6 +8466,15 @@ output_probe_stack_range (rtx reg, rtx end)
return "";
}
+/* Data passed to ix86_update_stack_alignment. */
+struct stack_access_data
+{
+ /* The stack access register. */
+ const_rtx reg;
+ /* Pointer to stack alignment. */
+ unsigned int *stack_alignment;
+};
+
/* Update the maximum stack slot alignment from memory alignment in
PAT. */
@@ -8473,16 +8482,16 @@ static void
ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
{
/* This insn may reference stack slot. Update the maximum stack slot
- alignment. */
+ alignment if the memory is referenced by the stack access register.
+ */
+ stack_access_data *p = (stack_access_data *) data;
subrtx_iterator::array_type array;
FOR_EACH_SUBRTX (iter, array, pat, ALL)
- if (MEM_P (*iter))
+ if (MEM_P (*iter) && reg_mentioned_p (p->reg, *iter))
{
unsigned int alignment = MEM_ALIGN (*iter);
- unsigned int *stack_alignment
- = (unsigned int *) data;
- if (alignment > *stack_alignment)
- *stack_alignment = alignment;
+ if (alignment > *p->stack_alignment)
+ *p->stack_alignment = alignment;
break;
}
}
@@ -8616,6 +8625,7 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
while (!bitmap_empty_p (worklist));
hard_reg_set_iterator hrsi;
+ stack_access_data data = { nullptr, &stack_alignment };
EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, reg, hrsi)
for (df_ref ref = DF_REG_USE_CHAIN (reg);
@@ -8630,8 +8640,8 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
if (!NONJUMP_INSN_P (insn))
continue;
- note_stores (insn, ix86_update_stack_alignment,
- &stack_alignment);
+ data.reg = DF_REF_REG (ref);
+ note_stores (insn, ix86_update_stack_alignment, &data);
}
}
diff --git a/gcc/testsuite/gcc.target/i386/pr118936.c b/gcc/testsuite/gcc.target/i386/pr118936.c
new file mode 100644
index 00000000000..a3de8cbc27a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr118936.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-omit-frame-pointer -fno-toplevel-reorder" } */
+
+struct S1
+{
+ int f1 : 17;
+ int f2 : 23;
+ int f3 : 11;
+ int f4 : 31;
+ int f6;
+};
+volatile struct S1 **g_1680;
+volatile struct S1 ***g_1679[8][8];
+void
+func_40 (struct S1 p_41, short *l_2436)
+{
+ char __trans_tmp_3;
+ __trans_tmp_3 = p_41.f6;
+ *l_2436 ^= __trans_tmp_3;
+ for (int i = 0; i < 8; i+= 1)
+ g_1679[1][i] = &g_1680;
+}
--
2.48.1