On Thu, Feb 13, 2025 at 9:31 AM H.J. Lu <[email protected]> wrote:
>
> Don't assume that stack slots can only be accessed by stack or frame
> registers. We first find all registers defined by stack or frame
> registers. Then check memory accesses by such registers, including
> stack and frame registers.
>
> gcc/
>
> PR target/109780
> PR target/109093
> * config/i386/i386.cc (ix86_update_stack_alignment): New.
> (ix86_find_all_reg_use_1): Likewise.
> (ix86_find_all_reg_use): Likewise.
> (ix86_find_max_used_stack_alignment): Also check memory accesses
> from registers defined by stack or frame registers.
>
> gcc/testsuite/
>
> PR target/109780
> PR target/109093
> * g++.target/i386/pr109780-1.C: New test.
> * gcc.target/i386/pr109093-1.c: Likewise.
> * gcc.target/i386/pr109780-1.c: Likewise.
> * gcc.target/i386/pr109780-2.c: Likewise.
> * gcc.target/i386/pr109780-3.c: Likewise.
Some non-algorithmical changes below, otherwise LGTM. Please also get
someone to review dataflow infrastructure usage, I am not well versed
with it.
+/* Helper function for ix86_find_all_reg_use. */
+
+static void
+ix86_find_all_reg_use_1 (rtx set, HARD_REG_SET &stack_slot_access,
+ auto_bitmap &worklist)
+{
+ rtx src = SET_SRC (set);
+ if (MEM_P (src))
Also reject assignment from CONST_SCALAR_INT?
+ return;
+
+ rtx dest = SET_DEST (set);
+ if (!REG_P (dest))
+ return;
Can we switch these two so the test for REG_P (dest) will be first? We
are not interested in anything that doesn't assign to a register.
+/* Find all registers defined with REG. */
+
+static void
+ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access,
+ unsigned int reg, auto_bitmap &worklist)
+{
+ for (df_ref ref = DF_REG_USE_CHAIN (reg);
+ ref != NULL;
+ ref = DF_REF_NEXT_REG (ref))
+ {
+ if (DF_REF_IS_ARTIFICIAL (ref))
+ continue;
+
+ rtx_insn *insn = DF_REF_INSN (ref);
+ if (!NONDEBUG_INSN_P (insn))
+ continue;
Here we pass only NONJUMP_INSN_P (X) || JUMP_P (X) || CALL_P (X)
+ if (CALL_P (insn) || JUMP_P (insn))
+ continue;
And here remains only NONJUMP_INSN_P (X), so both above conditions
could be substituted with:
if (!NONJUMP_INSN_P (X))
continue;
+
+ rtx set = single_set (insn);
+ if (set)
+ ix86_find_all_reg_use_1 (set, stack_slot_access, worklist);
+
+ rtx pat = PATTERN (insn);
+ if (GET_CODE (pat) != PARALLEL)
+ continue;
+
+ for (int i = 0; i < XVECLEN (pat, 0); i++)
+ {
+ rtx exp = XVECEXP (pat, 0, i);
+ switch (GET_CODE (exp))
+ {
+ case ASM_OPERANDS:
+ case CLOBBER:
+ case PREFETCH:
+ case USE:
+ break;
+ case UNSPEC:
+ case UNSPEC_VOLATILE:
+ for (int j = XVECLEN (exp, 0) - 1; j >= 0; j--)
+ {
+ rtx x = XVECEXP (exp, 0, j);
+ if (GET_CODE (x) == SET)
+ ix86_find_all_reg_use_1 (x, stack_slot_access,
+ worklist);
+ }
+ break;
+ case SET:
+ ix86_find_all_reg_use_1 (exp, stack_slot_access,
+ worklist);
+ break;
+ default:
+ debug_rtx (exp);
Stray debug remaining?
+ HARD_REG_SET stack_slot_access;
+ CLEAR_HARD_REG_SET (stack_slot_access);
+
+ /* Stack slot can be accessed by stack pointer, frame pointer or
+ registers defined by stack pointer or frame pointer. */
+ auto_bitmap worklist;
Please put a line of vertical space here ...
+ add_to_hard_reg_set (&stack_slot_access, Pmode,
+ STACK_POINTER_REGNUM);
+ bitmap_set_bit (worklist, STACK_POINTER_REGNUM);
... here ...
+ if (frame_pointer_needed)
+ {
+ add_to_hard_reg_set (&stack_slot_access, Pmode,
+ HARD_FRAME_POINTER_REGNUM);
+ bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
+ }
... here ...
+ unsigned int reg;
... here ...
+ do
+ {
+ reg = bitmap_clear_first_set_bit (worklist);
+ ix86_find_all_reg_use (stack_slot_access, reg, worklist);
+ }
+ while (!bitmap_empty_p (worklist));
+
+ hard_reg_set_iterator hrsi;
... here ...
+ EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, reg, hrsi)
+ for (df_ref ref = DF_REG_USE_CHAIN (reg);
+ ref != NULL;
+ ref = DF_REF_NEXT_REG (ref))
+ {
+ if (DF_REF_IS_ARTIFICIAL (ref))
+ continue;
+
+ rtx_insn *insn = DF_REF_INSN (ref);
... and here.
+ if (!NONDEBUG_INSN_P (insn))
!NONJUMP_INSN_P ?
+ continue;
Also some vertical space here.
+ note_stores (insn, ix86_update_stack_alignment,
+ &stack_alignment);
+ }
}
diff --git a/gcc/testsuite/gcc.target/i386/pr109093-1.c
b/gcc/testsuite/gcc.target/i386/pr109093-1.c
new file mode 100644
index 00000000000..0459d1947f9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr109093-1.c
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx2 -mtune=znver1
-ftrivial-auto-var-init=zero -fno-stack-protector" } */
+
Please use
/* { dg-do run { target avx2_runtime } } */
+{
+ __builtin_cpu_init ();
+
+ if (__builtin_cpu_supports ("avx2"))
+ run ();
+
And remove the above code from the test.
diff --git a/gcc/testsuite/gcc.target/i386/pr109780-3.c
b/gcc/testsuite/gcc.target/i386/pr109780-3.c
new file mode 100644
index 00000000000..f26fdd79af1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr109780-3.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx2 -mtune=znver1 -fno-stack-protector
-fno-stack-clash-protection" } */
+
Also here.
Thanks,
Uros.