This crash happens because LRA tries to save an AVX hard reg in a large mode, and it only appears in the function in smaller modes. Stack alignment isn't set up to support the larger mode.

Currently, biggest_mode for hard registers is set up from regno_reg_rtx, set up to a large mode for argument regs. That mode is not necessarily seen in the function itself and may be too large. If that initialization is changed to use VOIDmode, we compute the correct value during lra_push_insns, but then subsequently we clear it to VOIDmode again, and it never seems to get updated. Hence, the patch has several parts: initialize hard reg biggest_mode with VOIDmode, ensure it gets updated during process_bb_lives, and use the value in split_reg.

Bootstrapped and tested on x86_64-linux, ok?


Bernd
	PR target/70083
	* lra-lives.c (process_bb_lives): Also update biggest mode for hard
	regs.
	(lra_create_live_ranges_1): initialize hard register biggest_mode to
	VOIDmode.
	* lra-constraints.c (split_reg): For hard regs, try to find the
	biggest single-register mode used in the function.

testsuite/
	PR target/70083
	* gcc.dg/torture/pr70083.c: New test.
	* gcc.target/i386/pr70083.c: New test.

Index: gcc/lra-lives.c
===================================================================
--- gcc/lra-lives.c	(revision 234025)
+++ gcc/lra-lives.c	(working copy)
@@ -700,12 +700,13 @@ process_bb_lives (basic_block bb, int &c
 
       /* Update max ref width and hard reg usage.  */
       for (reg = curr_id->regs; reg != NULL; reg = reg->next)
-	if (reg->regno >= FIRST_PSEUDO_REGISTER
-	    && (GET_MODE_SIZE (reg->biggest_mode)
-		> GET_MODE_SIZE (lra_reg_info[reg->regno].biggest_mode)))
-	  lra_reg_info[reg->regno].biggest_mode = reg->biggest_mode;
-	else if (reg->regno < FIRST_PSEUDO_REGISTER)
-	  lra_hard_reg_usage[reg->regno] += freq;
+	{
+	  if (GET_MODE_SIZE (reg->biggest_mode)
+	      > GET_MODE_SIZE (lra_reg_info[reg->regno].biggest_mode))
+	    lra_reg_info[reg->regno].biggest_mode = reg->biggest_mode;
+	  if (reg->regno < FIRST_PSEUDO_REGISTER)
+	    lra_hard_reg_usage[reg->regno] += freq;
+	}
 
       call_p = CALL_P (curr_insn);
       src_regno = (set != NULL_RTX && REG_P (SET_SRC (set))
@@ -1208,7 +1209,7 @@ lra_create_live_ranges_1 (bool all_p, bo
 	 conservative because of recent transformation.  Here in this
 	 file we recalculate it again as it costs practically
 	 nothing.  */
-      if (regno_reg_rtx[i] != NULL_RTX)
+      if (i >= FIRST_PSEUDO_REGISTER && regno_reg_rtx[i] != NULL_RTX)
 	lra_reg_info[i].biggest_mode = GET_MODE (regno_reg_rtx[i]);
       else
 	lra_reg_info[i].biggest_mode = VOIDmode;
Index: gcc/lra-constraints.c
===================================================================
--- gcc/lra-constraints.c	(revision 234025)
+++ gcc/lra-constraints.c	(working copy)
@@ -4972,6 +4972,7 @@ split_reg (bool before_p, int original_r
   rtx_insn *restore, *save;
   bool after_p;
   bool call_save_p;
+  machine_mode mode;
 
   if (original_regno < FIRST_PSEUDO_REGISTER)
     {
@@ -4979,24 +4980,32 @@ split_reg (bool before_p, int original_r
       hard_regno = original_regno;
       call_save_p = false;
       nregs = 1;
+      mode = lra_reg_info[hard_regno].biggest_mode;
+      machine_mode reg_rtx_mode = GET_MODE (regno_reg_rtx[hard_regno]);
+      if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (reg_rtx_mode))
+	{
+	  original_reg = regno_reg_rtx[hard_regno];
+	  mode = reg_rtx_mode;
+	}
+      else
+	original_reg = gen_rtx_REG (mode, hard_regno);
     }
   else
     {
+      mode = PSEUDO_REGNO_MODE (original_regno);
       hard_regno = reg_renumber[original_regno];
-      nregs = hard_regno_nregs[hard_regno][PSEUDO_REGNO_MODE (original_regno)];
+      nregs = hard_regno_nregs[hard_regno][mode];
       rclass = lra_get_allocno_class (original_regno);
       original_reg = regno_reg_rtx[original_regno];
       call_save_p = need_for_call_save_p (original_regno);
     }
-  original_reg = regno_reg_rtx[original_regno];
   lra_assert (hard_regno >= 0);
   if (lra_dump_file != NULL)
     fprintf (lra_dump_file,
 	     "	  ((((((((((((((((((((((((((((((((((((((((((((((((\n");
+	  
   if (call_save_p)
     {
-      machine_mode mode = GET_MODE (original_reg);
-
       mode = HARD_REGNO_CALLER_SAVE_MODE (hard_regno,
 					  hard_regno_nregs[hard_regno][mode],
 					  mode);
@@ -5004,8 +5013,7 @@ split_reg (bool before_p, int original_r
     }
   else
     {
-      rclass = choose_split_class (rclass, hard_regno,
-				   GET_MODE (original_reg));
+      rclass = choose_split_class (rclass, hard_regno, mode);
       if (rclass == NO_REGS)
 	{
 	  if (lra_dump_file != NULL)
@@ -5023,8 +5031,7 @@ split_reg (bool before_p, int original_r
 	    }
 	  return false;
 	}
-      new_reg = lra_create_new_reg (GET_MODE (original_reg), original_reg,
-				    rclass, "split");
+      new_reg = lra_create_new_reg (mode, original_reg, rclass, "split");
       reg_renumber[REGNO (new_reg)] = hard_regno;
     }
   save = emit_spill_move (true, new_reg, original_reg);
Index: gcc/testsuite/gcc.dg/torture/pr70083.c
===================================================================
--- gcc/testsuite/gcc.dg/torture/pr70083.c	(revision 0)
+++ gcc/testsuite/gcc.dg/torture/pr70083.c	(working copy)
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-Wno-psabi" } */
+
+typedef short v16hi __attribute__ ((vector_size (32)));
+typedef int v8si __attribute__ ((vector_size (32)));
+typedef long long v4di __attribute__ ((vector_size (32)));
+
+int
+foo(int u32_0, int u64_0, int u64_1, v16hi v32u16_0, v8si v32u32_0, v4di v32u64_0, v16hi v32u16_1, v8si v32u32_1, v4di v32u64_1)
+{
+  v32u32_1 %= (v8si) v32u16_1 | 1;
+  v32u64_1[1] |= ((1));
+  v32u16_0 /= (v16hi){~u64_1, 1, 0xb56c, 0xd279, 0x26b6, 0x74d9, 0xf764, 0, 0, -v32u16_1[6]} | 1;
+  v32u16_1 ^= (v16hi){0xc98d, 1, 0x8c71, u32_0, 0x5366, 0, ~v32u64_1[1]} & 31;
+  v32u32_0 -= (v8si)~v32u64_1;
+  v32u32_1[2] |= 0x1f;
+  v32u16_0 %= (v16hi){2, 0xffff, u32_0, 1, v32u64_0[1], u32_0 };
+  v32u32_1 /= (v8si){0x1e7390, v32u16_0[12], ~v32u16_1[2], -u64_0};
+  return v32u16_0[4] + v32u16_0[5] + v32u32_0[5] + v32u32_1[6] + v32u64_1[3];
+}
Index: gcc/testsuite/gcc.target/i386/pr70083.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr70083.c	(revision 0)
+++ gcc/testsuite/gcc.target/i386/pr70083.c	(working copy)
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-Wno-psabi -O2 -fno-dce -fschedule-insns -fno-sched-critical-path-heuristic -mavx512dq --param=max-cse-insns=1" } */
+
+typedef short v16hi __attribute__ ((vector_size (32)));
+typedef int v8si __attribute__ ((vector_size (32)));
+typedef long long v4di __attribute__ ((vector_size (32)));
+
+int
+foo(int u32_0, int u64_0, int u64_1, v16hi v32u16_0, v8si v32u32_0, v4di v32u64_0, v16hi v32u16_1, v8si v32u32_1, v4di v32u64_1)
+{
+  v32u32_1 %= (v8si) v32u16_1 | 1;
+  v32u64_1[1] |= ((1));
+  v32u16_0 /= (v16hi){~u64_1, 1, 0xb56c, 0xd279, 0x26b6, 0x74d9, 0xf764, 0, 0, -v32u16_1[6]} | 1;
+  v32u16_1 ^= (v16hi){0xc98d, 1, 0x8c71, u32_0, 0x5366, 0, ~v32u64_1[1]} & 31;
+  v32u32_0 -= (v8si)~v32u64_1;
+  v32u32_1[2] |= 0x1f;
+  v32u16_0 %= (v16hi){2, 0xffff, u32_0, 1, v32u64_0[1], u32_0 };
+  v32u32_1 /= (v8si){0x1e7390, v32u16_0[12], ~v32u16_1[2], -u64_0};
+  return v32u16_0[4] + v32u16_0[5] + v32u32_0[5] + v32u32_1[6] + v32u64_1[3];
+}

Reply via email to