On 09/04/2014 10:30 AM, Zamyatin, Igor wrote: > >> -----Original Message----- >> From: gcc-patches-ow...@gcc.gnu.org [mailto:gcc-patches- >> ow...@gcc.gnu.org] On Behalf Of Vladimir Makarov >> Sent: Thursday, September 04, 2014 12:19 AM >> To: Ilya Enkovich >> Cc: g...@gnu.org; gcc-patches; Evgeny Stupachenko; Richard Biener; Uros >> Bizjak; Jeff Law >> Subject: Re: Enable EBX for x86 in 32bits PIC code >> >> On 2014-08-29 2:47 AM, Ilya Enkovich wrote: >>> Seems your patch doesn't cover all cases. Attached is a modified >>> patch (with your changes included) and a test where double constant is >>> wrongly rematerialized. I also see in ira dump that there is still a >>> copy of PIC reg created: >>> >>> Initialization of original PIC reg: >>> (insn 23 22 24 2 (set (reg:SI 127) >>> (reg:SI 3 bx)) test.cc:42 90 {*movsi_internal} >>> (expr_list:REG_DEAD (reg:SI 3 bx) >>> (nil))) >>> ... >>> Copy is created: >>> (insn 135 37 25 3 (set (reg:SI 138 [127]) >>> (reg:SI 127)) 90 {*movsi_internal} >>> (expr_list:REG_DEAD (reg:SI 127) >>> (nil))) >>> ... >>> Copy is used: >>> (insn 119 25 122 3 (set (reg:DF 134) >>> (mem/u/c:DF (plus:SI (reg:SI 138 [127]) >>> (const:SI (unspec:SI [ >>> (symbol_ref/u:SI ("*.LC0") [flags 0x2]) >>> ] UNSPEC_GOTOFF))) [5 S8 A64])) 128 >>> {*movdf_internal} >>> (expr_list:REG_EQUIV (const_double:DF >>> 2.9999999999999997371893933895137251965934410691261292e-4 >>> [0x0.9d495182a99308p-11]) >>> (nil))) >>> >> The copy is created by a newer IRA optimization for function prologues. >> >> The patch in the attachment should solve the problem. I also added the code >> to prevent spilling the pic pseudo in LRA which could happen before >> theoretically. > Hi, Vladimir! > > I applied patch as an addition to your previous patch (was I right?) and > unfortunately got all spec2000 tests failed at the runtime (segfault) > > Looking at 164.gzip I saw following code in spec_init > > > 00004bc0 <spec_init>: > 4bc0: 55 push %ebp > 4bc1: 57 push %edi > 4bc2: 56 push %esi > 4bc3: e8 58 c6 ff ff call 1220 <__x86.get_pc_thunk.si> > 4bc8: 81 c6 38 84 00 00 add $0x8438,%esi > 4bce: 53 push %ebx > 4bcf: 8d 64 24 e4 lea -0x1c(%esp),%esp > 4bd3: 83 be a0 03 00 00 03 cmpl $0x3,0x3a0(%esi) > 4bda: 7f 67 jg 4c43 <spec_init+0x83> > 4bdc: 8d ae 40 12 05 00 lea 0x51240(%esi),%ebp > 4be2: 8d 45 30 lea 0x30(%ebp),%eax > 4be5: 89 c6 mov %eax,%esi > <---- incorrect move, GOT value is now lost (here was mov > %eax,0x1c(%esp) before this additional patch) > 4be7: 8b 7d 00 mov 0x0(%ebp),%edi > 4bea: 89 f3 mov %esi,%ebx > <---- now ebx contains incorrect value so call to malloc will be > executed wrongly > 4bec: c7 45 04 00 00 00 00 movl $0x0,0x4(%ebp) > 4bf3: c7 45 08 00 00 00 00 movl $0x0,0x8(%ebp) > 4bfa: c7 45 0c 00 00 00 00 movl $0x0,0xc(%ebp) > 4c01: 8d 87 00 90 01 00 lea 0x19000(%edi),%eax > 4c07: 89 04 24 mov %eax,(%esp) > 4c0a: e8 a1 be ff ff call ab0 <malloc@plt> > > I've investigated the wrong code generation. I did a mistake in my last patch excluding pic pseudo from live-range analysis when risky transformations are on.
Here is the right version of all IRA/LRA changes relative to trunk. I managed to compile and run successfully all 32-bit PIC SPECInt2000 programs with these changes.
Index: ira-color.c =================================================================== --- ira-color.c (revision 214576) +++ ira-color.c (working copy) @@ -3239,9 +3239,11 @@ ira_assert (ALLOCNO_CLASS (subloop_allocno) == rclass); ira_assert (bitmap_bit_p (subloop_node->all_allocnos, ALLOCNO_NUM (subloop_allocno))); - if ((flag_ira_region == IRA_REGION_MIXED) - && (loop_tree_node->reg_pressure[pclass] - <= ira_class_hard_regs_num[pclass])) + if ((flag_ira_region == IRA_REGION_MIXED + && (loop_tree_node->reg_pressure[pclass] + <= ira_class_hard_regs_num[pclass])) + || (pic_offset_table_rtx != NULL + && regno == (int) REGNO (pic_offset_table_rtx))) { if (! ALLOCNO_ASSIGNED_P (subloop_allocno)) { Index: ira-emit.c =================================================================== --- ira-emit.c (revision 214576) +++ ira-emit.c (working copy) @@ -620,7 +620,10 @@ /* don't create copies because reload can spill an allocno set by copy although the allocno will not get memory slot. */ - || ira_equiv_no_lvalue_p (regno))) + || ira_equiv_no_lvalue_p (regno) + || (pic_offset_table_rtx != NULL + && (ALLOCNO_REGNO (allocno) + == (int) REGNO (pic_offset_table_rtx))))) continue; original_reg = allocno_emit_reg (allocno); if (parent_allocno == NULL Index: ira.c =================================================================== --- ira.c (revision 214576) +++ ira.c (working copy) @@ -4887,7 +4887,7 @@ FOR_BB_INSNS (first, insn) { rtx dest = interesting_dest_for_shprep (insn, call_dom); - if (!dest) + if (!dest || dest == pic_offset_table_rtx) continue; rtx newreg = NULL_RTX; Index: lra-assigns.c =================================================================== --- lra-assigns.c (revision 214576) +++ lra-assigns.c (working copy) @@ -879,11 +879,13 @@ } /* Spill pseudos. */ EXECUTE_IF_SET_IN_BITMAP (&spill_pseudos_bitmap, 0, spill_regno, bi) - if ((int) spill_regno >= lra_constraint_new_regno_start - && ! bitmap_bit_p (&lra_inheritance_pseudos, spill_regno) - && ! bitmap_bit_p (&lra_split_regs, spill_regno) - && ! bitmap_bit_p (&lra_subreg_reload_pseudos, spill_regno) - && ! bitmap_bit_p (&lra_optional_reload_pseudos, spill_regno)) + if ((pic_offset_table_rtx != NULL + && spill_regno == REGNO (pic_offset_table_rtx)) + || ((int) spill_regno >= lra_constraint_new_regno_start + && ! bitmap_bit_p (&lra_inheritance_pseudos, spill_regno) + && ! bitmap_bit_p (&lra_split_regs, spill_regno) + && ! bitmap_bit_p (&lra_subreg_reload_pseudos, spill_regno) + && ! bitmap_bit_p (&lra_optional_reload_pseudos, spill_regno))) goto fail; insn_pseudos_num = 0; if (lra_dump_file != NULL) @@ -1053,9 +1055,15 @@ return; } for (n = 0, i = FIRST_PSEUDO_REGISTER; i < max_regno; i++) - if (reg_renumber[i] >= 0 && lra_reg_info[i].nrefs > 0) + if ((pic_offset_table_rtx == NULL_RTX + || i != (int) REGNO (pic_offset_table_rtx)) + && reg_renumber[i] >= 0 && lra_reg_info[i].nrefs > 0) sorted_pseudos[n++] = i; qsort (sorted_pseudos, n, sizeof (int), pseudo_compare_func); + if (pic_offset_table_rtx != NULL_RTX + && (regno = REGNO (pic_offset_table_rtx)) >= FIRST_PSEUDO_REGISTER + && reg_renumber[regno] >= 0 && lra_reg_info[regno].nrefs > 0) + sorted_pseudos[n++] = regno; for (i = n - 1; i >= 0; i--) { regno = sorted_pseudos[i]; @@ -1360,6 +1368,8 @@ } EXECUTE_IF_SET_IN_SPARSESET (live_range_hard_reg_pseudos, conflict_regno) { + gcc_assert (pic_offset_table_rtx == NULL + || conflict_regno != REGNO (pic_offset_table_rtx)); if ((int) conflict_regno >= lra_constraint_new_regno_start) sorted_pseudos[nfails++] = conflict_regno; if (lra_dump_file != NULL) Index: lra-constraints.c =================================================================== --- lra-constraints.c (revision 214576) +++ lra-constraints.c (working copy) @@ -4021,7 +4021,11 @@ ("Maximum number of LRA constraint passes is achieved (%d)\n", LRA_MAX_CONSTRAINT_ITERATION_NUMBER); changed_p = false; - lra_risky_transformations_p = false; + if (pic_offset_table_rtx + && REGNO (pic_offset_table_rtx) >= FIRST_PSEUDO_REGISTER) + lra_risky_transformations_p = true; + else + lra_risky_transformations_p = false; new_insn_uid_start = get_max_uid (); new_regno_start = first_p ? lra_constraint_new_regno_start : max_reg_num (); /* Mark used hard regs for target stack size calulations. */