On 09/04/2014 10:30 AM, Zamyatin, Igor wrote:
>
>> -----Original Message-----
>> From: [email protected] [mailto:gcc-patches-
>> [email protected]] On Behalf Of Vladimir Makarov
>> Sent: Thursday, September 04, 2014 12:19 AM
>> To: Ilya Enkovich
>> Cc: [email protected]; gcc-patches; Evgeny Stupachenko; Richard Biener; Uros
>> Bizjak; Jeff Law
>> Subject: Re: Enable EBX for x86 in 32bits PIC code
>>
>> On 2014-08-29 2:47 AM, Ilya Enkovich wrote:
>>> Seems your patch doesn't cover all cases. Attached is a modified
>>> patch (with your changes included) and a test where double constant is
>>> wrongly rematerialized. I also see in ira dump that there is still a
>>> copy of PIC reg created:
>>>
>>> Initialization of original PIC reg:
>>> (insn 23 22 24 2 (set (reg:SI 127)
>>> (reg:SI 3 bx)) test.cc:42 90 {*movsi_internal}
>>> (expr_list:REG_DEAD (reg:SI 3 bx)
>>> (nil)))
>>> ...
>>> Copy is created:
>>> (insn 135 37 25 3 (set (reg:SI 138 [127])
>>> (reg:SI 127)) 90 {*movsi_internal}
>>> (expr_list:REG_DEAD (reg:SI 127)
>>> (nil)))
>>> ...
>>> Copy is used:
>>> (insn 119 25 122 3 (set (reg:DF 134)
>>> (mem/u/c:DF (plus:SI (reg:SI 138 [127])
>>> (const:SI (unspec:SI [
>>> (symbol_ref/u:SI ("*.LC0") [flags 0x2])
>>> ] UNSPEC_GOTOFF))) [5 S8 A64])) 128
>>> {*movdf_internal}
>>> (expr_list:REG_EQUIV (const_double:DF
>>> 2.9999999999999997371893933895137251965934410691261292e-4
>>> [0x0.9d495182a99308p-11])
>>> (nil)))
>>>
>> The copy is created by a newer IRA optimization for function prologues.
>>
>> The patch in the attachment should solve the problem. I also added the code
>> to prevent spilling the pic pseudo in LRA which could happen before
>> theoretically.
> Hi, Vladimir!
>
> I applied patch as an addition to your previous patch (was I right?) and
> unfortunately got all spec2000 tests failed at the runtime (segfault)
>
> Looking at 164.gzip I saw following code in spec_init
>
>
> 00004bc0 <spec_init>:
> 4bc0: 55 push %ebp
> 4bc1: 57 push %edi
> 4bc2: 56 push %esi
> 4bc3: e8 58 c6 ff ff call 1220 <__x86.get_pc_thunk.si>
> 4bc8: 81 c6 38 84 00 00 add $0x8438,%esi
> 4bce: 53 push %ebx
> 4bcf: 8d 64 24 e4 lea -0x1c(%esp),%esp
> 4bd3: 83 be a0 03 00 00 03 cmpl $0x3,0x3a0(%esi)
> 4bda: 7f 67 jg 4c43 <spec_init+0x83>
> 4bdc: 8d ae 40 12 05 00 lea 0x51240(%esi),%ebp
> 4be2: 8d 45 30 lea 0x30(%ebp),%eax
> 4be5: 89 c6 mov %eax,%esi
> <---- incorrect move, GOT value is now lost (here was mov
> %eax,0x1c(%esp) before this additional patch)
> 4be7: 8b 7d 00 mov 0x0(%ebp),%edi
> 4bea: 89 f3 mov %esi,%ebx
> <---- now ebx contains incorrect value so call to malloc will be
> executed wrongly
> 4bec: c7 45 04 00 00 00 00 movl $0x0,0x4(%ebp)
> 4bf3: c7 45 08 00 00 00 00 movl $0x0,0x8(%ebp)
> 4bfa: c7 45 0c 00 00 00 00 movl $0x0,0xc(%ebp)
> 4c01: 8d 87 00 90 01 00 lea 0x19000(%edi),%eax
> 4c07: 89 04 24 mov %eax,(%esp)
> 4c0a: e8 a1 be ff ff call ab0 <malloc@plt>
>
>
I've investigated the wrong code generation. I did a mistake in my last
patch excluding pic pseudo from live-range analysis when risky
transformations are on.
Here is the right version of all IRA/LRA changes relative to trunk. I
managed to compile and run successfully all 32-bit PIC SPECInt2000
programs with these changes.
Index: ira-color.c
===================================================================
--- ira-color.c (revision 214576)
+++ ira-color.c (working copy)
@@ -3239,9 +3239,11 @@
ira_assert (ALLOCNO_CLASS (subloop_allocno) == rclass);
ira_assert (bitmap_bit_p (subloop_node->all_allocnos,
ALLOCNO_NUM (subloop_allocno)));
- if ((flag_ira_region == IRA_REGION_MIXED)
- && (loop_tree_node->reg_pressure[pclass]
- <= ira_class_hard_regs_num[pclass]))
+ if ((flag_ira_region == IRA_REGION_MIXED
+ && (loop_tree_node->reg_pressure[pclass]
+ <= ira_class_hard_regs_num[pclass]))
+ || (pic_offset_table_rtx != NULL
+ && regno == (int) REGNO (pic_offset_table_rtx)))
{
if (! ALLOCNO_ASSIGNED_P (subloop_allocno))
{
Index: ira-emit.c
===================================================================
--- ira-emit.c (revision 214576)
+++ ira-emit.c (working copy)
@@ -620,7 +620,10 @@
/* don't create copies because reload can spill an
allocno set by copy although the allocno will not
get memory slot. */
- || ira_equiv_no_lvalue_p (regno)))
+ || ira_equiv_no_lvalue_p (regno)
+ || (pic_offset_table_rtx != NULL
+ && (ALLOCNO_REGNO (allocno)
+ == (int) REGNO (pic_offset_table_rtx)))))
continue;
original_reg = allocno_emit_reg (allocno);
if (parent_allocno == NULL
Index: ira.c
===================================================================
--- ira.c (revision 214576)
+++ ira.c (working copy)
@@ -4887,7 +4887,7 @@
FOR_BB_INSNS (first, insn)
{
rtx dest = interesting_dest_for_shprep (insn, call_dom);
- if (!dest)
+ if (!dest || dest == pic_offset_table_rtx)
continue;
rtx newreg = NULL_RTX;
Index: lra-assigns.c
===================================================================
--- lra-assigns.c (revision 214576)
+++ lra-assigns.c (working copy)
@@ -879,11 +879,13 @@
}
/* Spill pseudos. */
EXECUTE_IF_SET_IN_BITMAP (&spill_pseudos_bitmap, 0, spill_regno, bi)
- if ((int) spill_regno >= lra_constraint_new_regno_start
- && ! bitmap_bit_p (&lra_inheritance_pseudos, spill_regno)
- && ! bitmap_bit_p (&lra_split_regs, spill_regno)
- && ! bitmap_bit_p (&lra_subreg_reload_pseudos, spill_regno)
- && ! bitmap_bit_p (&lra_optional_reload_pseudos, spill_regno))
+ if ((pic_offset_table_rtx != NULL
+ && spill_regno == REGNO (pic_offset_table_rtx))
+ || ((int) spill_regno >= lra_constraint_new_regno_start
+ && ! bitmap_bit_p (&lra_inheritance_pseudos, spill_regno)
+ && ! bitmap_bit_p (&lra_split_regs, spill_regno)
+ && ! bitmap_bit_p (&lra_subreg_reload_pseudos, spill_regno)
+ && ! bitmap_bit_p (&lra_optional_reload_pseudos, spill_regno)))
goto fail;
insn_pseudos_num = 0;
if (lra_dump_file != NULL)
@@ -1053,9 +1055,15 @@
return;
}
for (n = 0, i = FIRST_PSEUDO_REGISTER; i < max_regno; i++)
- if (reg_renumber[i] >= 0 && lra_reg_info[i].nrefs > 0)
+ if ((pic_offset_table_rtx == NULL_RTX
+ || i != (int) REGNO (pic_offset_table_rtx))
+ && reg_renumber[i] >= 0 && lra_reg_info[i].nrefs > 0)
sorted_pseudos[n++] = i;
qsort (sorted_pseudos, n, sizeof (int), pseudo_compare_func);
+ if (pic_offset_table_rtx != NULL_RTX
+ && (regno = REGNO (pic_offset_table_rtx)) >= FIRST_PSEUDO_REGISTER
+ && reg_renumber[regno] >= 0 && lra_reg_info[regno].nrefs > 0)
+ sorted_pseudos[n++] = regno;
for (i = n - 1; i >= 0; i--)
{
regno = sorted_pseudos[i];
@@ -1360,6 +1368,8 @@
}
EXECUTE_IF_SET_IN_SPARSESET (live_range_hard_reg_pseudos, conflict_regno)
{
+ gcc_assert (pic_offset_table_rtx == NULL
+ || conflict_regno != REGNO (pic_offset_table_rtx));
if ((int) conflict_regno >= lra_constraint_new_regno_start)
sorted_pseudos[nfails++] = conflict_regno;
if (lra_dump_file != NULL)
Index: lra-constraints.c
===================================================================
--- lra-constraints.c (revision 214576)
+++ lra-constraints.c (working copy)
@@ -4021,7 +4021,11 @@
("Maximum number of LRA constraint passes is achieved (%d)\n",
LRA_MAX_CONSTRAINT_ITERATION_NUMBER);
changed_p = false;
- lra_risky_transformations_p = false;
+ if (pic_offset_table_rtx
+ && REGNO (pic_offset_table_rtx) >= FIRST_PSEUDO_REGISTER)
+ lra_risky_transformations_p = true;
+ else
+ lra_risky_transformations_p = false;
new_insn_uid_start = get_max_uid ();
new_regno_start = first_p ? lra_constraint_new_regno_start : max_reg_num ();
/* Mark used hard regs for target stack size calulations. */