On Mon, Mar 19, 2012 at 6:50 PM, H.J. Lu <hjl.to...@gmail.com> wrote:
>>> Please test my proposed patch. If it works OK, I will commit it to SVN. >> >> The onyl acceptable way is to generate ZERO_EXTEND in place, so: >> >> --cut here-- >> static rtx >> get_thread_pointer (enum machine_mode tp_mode, bool to_reg) >> { >> rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP); >> >> if (GET_MODE (tp) != tp_mode) >> { >> gcc_assert (GET_MODE (tp) == SImode); >> gcc_assert (tp_mode == DImode); >> >> tp = gen_rtx_ZERO_EXTEND (tp_mode, tp); >> } >> >> if (to_reg) >> tp = copy_to_mode_reg (tp_mode, tp); >> >> return tp; >> } >> --cut here-- > > This version works fine. Attached patch was committed to mainline SVN with following ChangeLog: 2012-03-19 Uros Bizjak <ubiz...@gmail.com> * config/i386/i386.c (get_thread_pointer): Add tp_mode argument. Generate ZERO_EXTEND in place if GET_MODE (tp) != tp_mode. (legitimize_tls_address) <TLS_MODEL_INITIAL_EXEC>: Always generate DImode UNSPEC_GOTNTPOFF references on TARGET_64BIT. (ix86_decompose_address): Allow zero extended UNSPEC_TP references. Revert: 2012-03-13 Uros Bizjak <ubiz...@gmail.com> * config/i386/i386.h (TARGET_TLS_INDIRECT_SEG_REFS): New. * config/i386/i386.c (ix86_decompose_address): Use TARGET_TLS_INDIRECT_SEG_REFS to prevent %fs:(%reg) addresses. (legitimize_tls_address): Use TARGET_TLS_INDIRECT_SEG_REFS to load thread pointer to a register. Revert: 2012-03-10 H.J. Lu <hongjiu...@intel.com> * config/i386/i386.c (ix86_decompose_address): Disallow fs:(reg) if Pmode != word_mode. (legitimize_tls_address): Call gen_tls_initial_exec_x32 if Pmode == SImode for TARGET_X32. * config/i386/i386.md (UNSPEC_TLS_IE_X32): New. (tls_initial_exec_x32): Likewise. Tested on x86_64-pc-linux-gnu {,-m32}. Thanks, Uros.
Index: i386.md =================================================================== --- i386.md (revision 185524) +++ i386.md (working copy) @@ -96,7 +96,6 @@ UNSPEC_TLS_LD_BASE UNSPEC_TLSDESC UNSPEC_TLS_IE_SUN - UNSPEC_TLS_IE_X32 ;; Other random patterns UNSPEC_SCAS @@ -12836,28 +12835,6 @@ } [(set_attr "type" "multi")]) -;; When Pmode == SImode, there may be no REX prefix for ADD. Avoid -;; any instructions between MOV and ADD, which may interfere linker -;; IE->LE optimization, since the last byte of the previous instruction -;; before ADD may look like a REX prefix. This also avoids -;; movl x@gottpoff(%rip), %reg32 -;; movl $fs:(%reg32), %reg32 -;; Since address override works only on the (reg32) part in fs:(reg32), -;; we can't use it as memory operand. -(define_insn "tls_initial_exec_x32" - [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI - [(match_operand 1 "tls_symbolic_operand")] - UNSPEC_TLS_IE_X32)) - (clobber (reg:CC FLAGS_REG))] - "TARGET_X32" -{ - output_asm_insn - ("mov{l}\t{%%fs:0, %0|%0, DWORD PTR fs:0}", operands); - return "add{l}\t{%a1@gottpoff(%%rip), %0|%0, %a1@gottpoff[rip]}"; -} - [(set_attr "type" "multi")]) - ;; GNU2 TLS patterns can be split. (define_expand "tls_dynamic_gnu2_32" Index: i386.c =================================================================== --- i386.c (revision 185524) +++ i386.c (working copy) @@ -11514,6 +11514,10 @@ ix86_decompose_address (rtx addr, struct ix86_addr scale = 1 << scale; break; + case ZERO_EXTEND: + op = XEXP (op, 0); + /* FALLTHRU */ + case UNSPEC: if (XINT (op, 1) == UNSPEC_TP && TARGET_TLS_DIRECT_SEG_REFS @@ -12483,15 +12487,20 @@ legitimize_pic_address (rtx orig, rtx reg) /* Load the thread pointer. If TO_REG is true, force it into a register. */ static rtx -get_thread_pointer (bool to_reg) +get_thread_pointer (enum machine_mode tp_mode, bool to_reg) { rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP); - if (GET_MODE (tp) != Pmode) - tp = convert_to_mode (Pmode, tp, 1); + if (GET_MODE (tp) != tp_mode) + { + gcc_assert (GET_MODE (tp) == SImode); + gcc_assert (tp_mode == DImode); + tp = gen_rtx_ZERO_EXTEND (tp_mode, tp); + } + if (to_reg) - tp = copy_addr_to_reg (tp); + tp = copy_to_mode_reg (tp_mode, tp); return tp; } @@ -12543,6 +12552,7 @@ legitimize_tls_address (rtx x, enum tls_model mode { rtx dest, base, off; rtx pic = NULL_RTX, tp = NULL_RTX; + enum machine_mode tp_mode = Pmode; int type; switch (model) @@ -12568,7 +12578,7 @@ legitimize_tls_address (rtx x, enum tls_model mode else emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic)); - tp = get_thread_pointer (true); + tp = get_thread_pointer (Pmode, true); dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest)); set_unique_reg_note (get_last_insn (), REG_EQUAL, x); @@ -12618,7 +12628,7 @@ legitimize_tls_address (rtx x, enum tls_model mode else emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic)); - tp = get_thread_pointer (true); + tp = get_thread_pointer (Pmode, true); set_unique_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_MINUS (Pmode, tmp, tp)); } @@ -12674,18 +12684,10 @@ legitimize_tls_address (rtx x, enum tls_model mode emit_insn (gen_tls_initial_exec_64_sun (dest, x)); return dest; } - else if (Pmode == SImode) - { - /* Always generate - movl %fs:0, %reg32 - addl xgottpoff(%rip), %reg32 - to support linker IE->LE optimization and avoid - fs:(%reg32) as memory operand. */ - dest = gen_reg_rtx (Pmode); - emit_insn (gen_tls_initial_exec_x32 (dest, x)); - return dest; - } + /* Generate DImode references to avoid %fs:(%reg32) + problems and linker IE->LE relaxation bug. */ + tp_mode = DImode; pic = NULL; type = UNSPEC_GOTNTPOFF; } @@ -12708,24 +12710,23 @@ legitimize_tls_address (rtx x, enum tls_model mode type = UNSPEC_INDNTPOFF; } - off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type); - off = gen_rtx_CONST (Pmode, off); + off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type); + off = gen_rtx_CONST (tp_mode, off); if (pic) - off = gen_rtx_PLUS (Pmode, pic, off); - off = gen_const_mem (Pmode, off); + off = gen_rtx_PLUS (tp_mode, pic, off); + off = gen_const_mem (tp_mode, off); set_mem_alias_set (off, ix86_GOT_alias_set ()); if (TARGET_64BIT || TARGET_ANY_GNU_TLS) { - base = get_thread_pointer (for_mov - || !(TARGET_TLS_DIRECT_SEG_REFS - && TARGET_TLS_INDIRECT_SEG_REFS)); - off = force_reg (Pmode, off); - return gen_rtx_PLUS (Pmode, base, off); + base = get_thread_pointer (tp_mode, + for_mov || !TARGET_TLS_DIRECT_SEG_REFS); + off = force_reg (tp_mode, off); + return gen_rtx_PLUS (tp_mode, base, off); } else { - base = get_thread_pointer (true); + base = get_thread_pointer (Pmode, true); dest = gen_reg_rtx (Pmode); emit_insn (ix86_gen_sub3 (dest, base, off)); } @@ -12739,14 +12740,13 @@ legitimize_tls_address (rtx x, enum tls_model mode if (TARGET_64BIT || TARGET_ANY_GNU_TLS) { - base = get_thread_pointer (for_mov - || !(TARGET_TLS_DIRECT_SEG_REFS - && TARGET_TLS_INDIRECT_SEG_REFS)); + base = get_thread_pointer (Pmode, + for_mov || !TARGET_TLS_DIRECT_SEG_REFS); return gen_rtx_PLUS (Pmode, base, off); } else { - base = get_thread_pointer (true); + base = get_thread_pointer (Pmode, true); dest = gen_reg_rtx (Pmode); emit_insn (ix86_gen_sub3 (dest, base, off)); } @@ -13274,8 +13274,7 @@ ix86_delegitimize_tls_address (rtx orig_x) rtx x = orig_x, unspec; struct ix86_address addr; - if (!(TARGET_TLS_DIRECT_SEG_REFS - && TARGET_TLS_INDIRECT_SEG_REFS)) + if (!TARGET_TLS_DIRECT_SEG_REFS) return orig_x; if (MEM_P (x)) x = XEXP (x, 0); Index: i386.h =================================================================== --- i386.h (revision 185524) +++ i386.h (working copy) @@ -467,9 +467,6 @@ extern int x86_prefetch_sse; #define TARGET_TLS_DIRECT_SEG_REFS_DEFAULT 0 #endif -/* Address override works only on the (%reg) part of %fs:(%reg). */ -#define TARGET_TLS_INDIRECT_SEG_REFS (Pmode == word_mode) - /* Fence to use after loop using storent. */ extern tree x86_mfence;