On 09/04/2014 10:30 AM, Zamyatin, Igor wrote:
>
>> -----Original Message-----
>> From: gcc-patches-ow...@gcc.gnu.org [mailto:gcc-patches-
>> ow...@gcc.gnu.org] On Behalf Of Vladimir Makarov
>> Sent: Thursday, September 04, 2014 12:19 AM
>> To: Ilya Enkovich
>> Cc: g...@gnu.org; gcc-patches; Evgeny Stupachenko; Richard Biener; Uros
>> Bizjak; Jeff Law
>> Subject: Re: Enable EBX for x86 in 32bits PIC code
>>
>> On 2014-08-29 2:47 AM, Ilya Enkovich wrote:
>>> Seems your patch doesn't cover all cases.  Attached is a modified
>>> patch (with your changes included) and a test where double constant is
>>> wrongly rematerialized.  I also see in ira dump that there is still a
>>> copy of PIC reg created:
>>>
>>> Initialization of original PIC reg:
>>> (insn 23 22 24 2 (set (reg:SI 127)
>>>          (reg:SI 3 bx)) test.cc:42 90 {*movsi_internal}
>>>       (expr_list:REG_DEAD (reg:SI 3 bx)
>>>          (nil)))
>>> ...
>>> Copy is created:
>>> (insn 135 37 25 3 (set (reg:SI 138 [127])
>>>          (reg:SI 127)) 90 {*movsi_internal}
>>>       (expr_list:REG_DEAD (reg:SI 127)
>>>          (nil)))
>>> ...
>>> Copy is used:
>>> (insn 119 25 122 3 (set (reg:DF 134)
>>>          (mem/u/c:DF (plus:SI (reg:SI 138 [127])
>>>                  (const:SI (unspec:SI [
>>>                              (symbol_ref/u:SI ("*.LC0") [flags 0x2])
>>>                          ] UNSPEC_GOTOFF))) [5  S8 A64])) 128 
>>> {*movdf_internal}
>>>       (expr_list:REG_EQUIV (const_double:DF
>>> 2.9999999999999997371893933895137251965934410691261292e-4
>>> [0x0.9d495182a99308p-11])
>>>          (nil)))
>>>
>> The copy is created by a newer IRA optimization for function prologues.
>>
>> The patch in the attachment should solve the problem.  I also added the code
>> to prevent spilling the pic pseudo in LRA which could happen before
>> theoretically.
> Hi, Vladimir!
>
> I applied patch as an addition to your previous patch (was I right?) and 
> unfortunately got all spec2000 tests failed at the runtime (segfault)
>
> Looking at 164.gzip I saw following code in spec_init
>
>
> 00004bc0 <spec_init>:
>     4bc0:       55                      push   %ebp
>     4bc1:       57                      push   %edi
>     4bc2:       56                      push   %esi
>     4bc3:       e8 58 c6 ff ff          call   1220 <__x86.get_pc_thunk.si>
>     4bc8:       81 c6 38 84 00 00       add    $0x8438,%esi
>     4bce:       53                      push   %ebx
>     4bcf:       8d 64 24 e4             lea    -0x1c(%esp),%esp
>     4bd3:       83 be a0 03 00 00 03    cmpl   $0x3,0x3a0(%esi)
>     4bda:       7f 67                   jg     4c43 <spec_init+0x83>
>     4bdc:       8d ae 40 12 05 00       lea    0x51240(%esi),%ebp
>     4be2:       8d 45 30                lea    0x30(%ebp),%eax
>     4be5:       89 c6                   mov    %eax,%esi                      
>        <---- incorrect move, GOT value is now lost (here was mov    
> %eax,0x1c(%esp) before this additional patch)
>     4be7:       8b 7d 00                mov    0x0(%ebp),%edi
>     4bea:       89 f3                   mov    %esi,%ebx                      
>       <---- now ebx contains incorrect value so call to malloc will be 
> executed wrongly
>     4bec:       c7 45 04 00 00 00 00    movl   $0x0,0x4(%ebp)
>     4bf3:       c7 45 08 00 00 00 00    movl   $0x0,0x8(%ebp)
>     4bfa:       c7 45 0c 00 00 00 00    movl   $0x0,0xc(%ebp)
>     4c01:       8d 87 00 90 01 00       lea    0x19000(%edi),%eax
>     4c07:       89 04 24                mov    %eax,(%esp)
>     4c0a:       e8 a1 be ff ff          call   ab0 <malloc@plt>
>
>
I've investigated the wrong code generation.  I did a mistake in my last
patch excluding pic pseudo from live-range analysis when risky
transformations are on.

Here is the right version of all IRA/LRA changes relative to trunk.  I
managed to compile and run successfully all 32-bit PIC SPECInt2000
programs with these changes.

Index: ira-color.c
===================================================================
--- ira-color.c (revision 214576)
+++ ira-color.c (working copy)
@@ -3239,9 +3239,11 @@
          ira_assert (ALLOCNO_CLASS (subloop_allocno) == rclass);
          ira_assert (bitmap_bit_p (subloop_node->all_allocnos,
                                    ALLOCNO_NUM (subloop_allocno)));
-         if ((flag_ira_region == IRA_REGION_MIXED)
-             && (loop_tree_node->reg_pressure[pclass]
-                 <= ira_class_hard_regs_num[pclass]))
+         if ((flag_ira_region == IRA_REGION_MIXED
+              && (loop_tree_node->reg_pressure[pclass]
+                  <= ira_class_hard_regs_num[pclass]))
+             || (pic_offset_table_rtx != NULL
+                 && regno == (int) REGNO (pic_offset_table_rtx)))
            {
              if (! ALLOCNO_ASSIGNED_P (subloop_allocno))
                {
Index: ira-emit.c
===================================================================
--- ira-emit.c  (revision 214576)
+++ ira-emit.c  (working copy)
@@ -620,7 +620,10 @@
                  /* don't create copies because reload can spill an
                     allocno set by copy although the allocno will not
                     get memory slot.  */
-                 || ira_equiv_no_lvalue_p (regno)))
+                 || ira_equiv_no_lvalue_p (regno)
+                 || (pic_offset_table_rtx != NULL
+                     && (ALLOCNO_REGNO (allocno)
+                         == (int) REGNO (pic_offset_table_rtx)))))
            continue;
          original_reg = allocno_emit_reg (allocno);
          if (parent_allocno == NULL
Index: ira.c
===================================================================
--- ira.c       (revision 214576)
+++ ira.c       (working copy)
@@ -4887,7 +4887,7 @@
   FOR_BB_INSNS (first, insn)
     {
       rtx dest = interesting_dest_for_shprep (insn, call_dom);
-      if (!dest)
+      if (!dest || dest == pic_offset_table_rtx)
        continue;
 
       rtx newreg = NULL_RTX;
Index: lra-assigns.c
===================================================================
--- lra-assigns.c       (revision 214576)
+++ lra-assigns.c       (working copy)
@@ -879,11 +879,13 @@
        }
       /* Spill pseudos.         */
       EXECUTE_IF_SET_IN_BITMAP (&spill_pseudos_bitmap, 0, spill_regno, bi)
-       if ((int) spill_regno >= lra_constraint_new_regno_start
-           && ! bitmap_bit_p (&lra_inheritance_pseudos, spill_regno)
-           && ! bitmap_bit_p (&lra_split_regs, spill_regno)
-           && ! bitmap_bit_p (&lra_subreg_reload_pseudos, spill_regno)
-           && ! bitmap_bit_p (&lra_optional_reload_pseudos, spill_regno))
+       if ((pic_offset_table_rtx != NULL
+            && spill_regno == REGNO (pic_offset_table_rtx))
+           || ((int) spill_regno >= lra_constraint_new_regno_start
+               && ! bitmap_bit_p (&lra_inheritance_pseudos, spill_regno)
+               && ! bitmap_bit_p (&lra_split_regs, spill_regno)
+               && ! bitmap_bit_p (&lra_subreg_reload_pseudos, spill_regno)
+               && ! bitmap_bit_p (&lra_optional_reload_pseudos, spill_regno)))
          goto fail;
       insn_pseudos_num = 0;
       if (lra_dump_file != NULL)
@@ -1053,9 +1055,15 @@
       return;
     }
   for (n = 0, i = FIRST_PSEUDO_REGISTER; i < max_regno; i++)
-    if (reg_renumber[i] >= 0 && lra_reg_info[i].nrefs > 0)
+    if ((pic_offset_table_rtx == NULL_RTX
+        || i != (int) REGNO (pic_offset_table_rtx))
+       && reg_renumber[i] >= 0 && lra_reg_info[i].nrefs > 0)
       sorted_pseudos[n++] = i;
   qsort (sorted_pseudos, n, sizeof (int), pseudo_compare_func);
+  if (pic_offset_table_rtx != NULL_RTX
+      && (regno = REGNO (pic_offset_table_rtx)) >= FIRST_PSEUDO_REGISTER
+      && reg_renumber[regno] >= 0 && lra_reg_info[regno].nrefs > 0)
+    sorted_pseudos[n++] = regno;
   for (i = n - 1; i >= 0; i--)
     {
       regno = sorted_pseudos[i];
@@ -1360,6 +1368,8 @@
        }
       EXECUTE_IF_SET_IN_SPARSESET (live_range_hard_reg_pseudos, conflict_regno)
        {
+         gcc_assert (pic_offset_table_rtx == NULL
+                     || conflict_regno != REGNO (pic_offset_table_rtx));
          if ((int) conflict_regno >= lra_constraint_new_regno_start)
            sorted_pseudos[nfails++] = conflict_regno;
          if (lra_dump_file != NULL)
Index: lra-constraints.c
===================================================================
--- lra-constraints.c   (revision 214576)
+++ lra-constraints.c   (working copy)
@@ -4021,7 +4021,11 @@
       ("Maximum number of LRA constraint passes is achieved (%d)\n",
        LRA_MAX_CONSTRAINT_ITERATION_NUMBER);
   changed_p = false;
-  lra_risky_transformations_p = false;
+  if (pic_offset_table_rtx
+      && REGNO (pic_offset_table_rtx) >= FIRST_PSEUDO_REGISTER)
+    lra_risky_transformations_p = true;
+  else
+    lra_risky_transformations_p = false;
   new_insn_uid_start = get_max_uid ();
   new_regno_start = first_p ? lra_constraint_new_regno_start : max_reg_num ();
   /* Mark used hard regs for target stack size calulations.  */

Reply via email to