On Mon, Aug 18, 2025 at 12:59 AM Hongtao Liu <[email protected]> wrote:
>
> On Mon, Aug 18, 2025 at 4:12 PM Hongtao Liu <[email protected]> wrote:
> >
> > On Mon, Aug 18, 2025 at 4:50 AM H.J. Lu <[email protected]> wrote:
> > >
> > > We can't place a TLS call before a conditional jump in a basic block like
> > >
> > > (code_label 13 11 14 4 2 (nil) [1 uses])
> > > (note 14 13 16 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
> > > (jump_insn 16 14 17 4 (set (pc)
> > > (if_then_else (le (reg:CCNO 17 flags)
> > > (const_int 0 [0]))
> > > (label_ref 27)
> > > (pc))) "x.c":10:21 discrim 1 1462 {*jcc}
> > > (expr_list:REG_DEAD (reg:CCNO 17 flags)
> > > (int_list:REG_BR_PROB 628353713 (nil)))
> > > -> 27)
> > >
> > > since the TLS call will clobber flags register nor place a TLS call in a
> > > basic block if any live caller-saved registers aren't dead at the end of
> > > the basic block:
> > >
> > > ;; live in 6 [bp] 7 [sp] 16 [argp] 17 [flags] 19 [frame] 104
> > > ;; live gen 0 [ax] 102 106 108 116 117 118 120
> > > ;; live kill 5 [di]
> > >
> > > Instead, we should place such call before all register setting basic
> > > blocks which dominate the current basic block.
> > >
> > > Keep track the replaced GNU and GNU2 TLS instructions. Use these info to
> > > place the __tls_get_addr call and mark FLAGS register as dead.
> > >
> > > gcc/
> > >
> > > PR target/121572
> > > * config/i386/i386-features.cc (replace_tls_call): Add a bitmap
> > > argument and put the updated TLS instruction in the bitmap.
> > > (ix86_get_dominator_for_reg): New.
> > > (ix86_place_single_tls_call): Add 2 bitmap arguments for updated
> > > GNU and GNU2 TLS instructions. Add the live flag register to the
> > > bitmap. Insert the __tls_get_addr call before INSN if it replaces
> > > a __tls_get_addr call. Mark FLAGS register as dead if INSN
> > > replaced the GNU2 TLS instruction. Clear the live register bitmap
> > > only for hard register. If there is a conditional jump in the
> > > basic block or any live caller-saved registers aren't dead at the
> > > end of the basic block, get the basic block which dominates all
> > > basic blocks which set the live registers.
> > >
> > > gcc/testsuite/
> > >
> > > PR target/121572
> > > * gcc.target/i386/pr121572-1a.c: New test.
> > > * gcc.target/i386/pr121572-1b.c: Likewise.
> > > * gcc.target/i386/pr121572-2a.c: Likewise.
> > > * gcc.target/i386/pr121572-2b.c: Likewise.
> > >
> > > Signed-off-by: H.J. Lu <[email protected]>
> > > ---
> > > gcc/config/i386/i386-features.cc | 136 ++++++++++++++++----
> > > gcc/testsuite/gcc.target/i386/pr121572-1a.c | 41 ++++++
> > > gcc/testsuite/gcc.target/i386/pr121572-1b.c | 18 +++
> > > gcc/testsuite/gcc.target/i386/pr121572-2a.c | 39 ++++++
> > > gcc/testsuite/gcc.target/i386/pr121572-2b.c | 6 +
> > > 5 files changed, 215 insertions(+), 25 deletions(-)
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-1a.c
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-1b.c
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-2a.c
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-2b.c
> > >
> > > diff --git a/gcc/config/i386/i386-features.cc
> > > b/gcc/config/i386/i386-features.cc
> > > index f0bdc5c1880..903f2b0b478 100644
> > > --- a/gcc/config/i386/i386-features.cc
> > > +++ b/gcc/config/i386/i386-features.cc
> > > @@ -3684,10 +3684,12 @@ ix86_broadcast_inner (rtx op, machine_mode mode,
> > > return op;
> > > }
> > >
> > > -/* Replace CALL instruction in TLS_CALL_INSNS with SET from SRC. */
> > > +/* Replace CALL instruction in TLS_CALL_INSNS with SET from SRC and
> > > + put the updated instruction in UPDATED_TLS_INSNS. */
> > >
> > > static void
> > > -replace_tls_call (rtx src, auto_bitmap &tls_call_insns)
> > > +replace_tls_call (rtx src, auto_bitmap &tls_call_insns,
> > > + auto_bitmap &updated_tls_insns)
> > > {
> > > bitmap_iterator bi;
> > > unsigned int id;
> > > @@ -3716,6 +3718,9 @@ replace_tls_call (rtx src, auto_bitmap
> > > &tls_call_insns)
> > > if (recog_memoized (set_insn) < 0)
> > > gcc_unreachable ();
> > >
> > > + /* Put SET_INSN in UPDATED_TLS_INSNS. */
> > > + bitmap_set_bit (updated_tls_insns, INSN_UID (set_insn));
> > > +
> > > if (dump_file)
> > > {
> > > fprintf (dump_file, "\nReplace:\n\n");
> > > @@ -3732,15 +3737,48 @@ replace_tls_call (rtx src, auto_bitmap
> > > &tls_call_insns)
> > > }
> > > }
> > >
> > > +/* Return the basic block which dominates all basic blocks which set
> > > + hard register REGNO used in basic block BB. */
> > > +
> > > +static basic_block
> > > +ix86_get_dominator_for_reg (unsigned int regno, basic_block bb)
> > > +{
> > > + basic_block set_bb;
> > > + auto_bitmap set_bbs;
> > > +
> > > + /* Get all BBs which set REGNO and dominate the current BB from all
> > > + DEFs of REGNO. */
> > > + for (df_ref def = DF_REG_DEF_CHAIN (regno);
> > > + def;
> > > + def = DF_REF_NEXT_REG (def))
> > > + if (!DF_REF_IS_ARTIFICIAL (def)
> > > + && !DF_REF_FLAGS_IS_SET (def, DF_REF_MAY_CLOBBER)
> > > + && !DF_REF_FLAGS_IS_SET (def, DF_REF_MUST_CLOBBER))
> > > + {
> > > + set_bb = DF_REF_BB (def);
> > > + if (dominated_by_p (CDI_DOMINATORS, bb, set_bb))
> > > + bitmap_set_bit (set_bbs, set_bb->index);
> > > + }
> > > +
> > > + bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs);
> > > + return bb;
> > > +}
> > > +
> > > /* Generate a TLS call of KIND with VAL and copy the call result to DEST,
> > > at entry of the nearest dominator for basic block map BBS, which is in
> > > the fake loop that contains the whole function, so that there is only
> > > - a single TLS CALL of KIND with VAL in the whole function. If
> > > - TLSDESC_SET isn't nullptr, insert it before the TLS call. */
> > > + a single TLS CALL of KIND with VAL in the whole function.
> > > + UPDATED_GNU_TLS_INSNS contains instructions which replace the GNU TLS
> > > + instructions. UPDATED_GNU2_TLS_INSNS contains instructions which
> > > + replace the GNU2 TLS instructions. If TLSDESC_SET isn't nullptr,
> > > + insert it before the TLS call. */
> > >
> > > static void
> > > ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind,
> > > - bitmap bbs, rtx tlsdesc_set = nullptr)
> > > + auto_bitmap &bbs,
> > > + auto_bitmap &updated_gnu_tls_insns,
> > > + auto_bitmap &updated_gnu2_tls_insns,
> > > + rtx tlsdesc_set = nullptr)
> > > {
> > > basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS,
> > > bbs);
> > > while (bb->loop_father->latch
> > > @@ -3748,6 +3786,7 @@ ix86_place_single_tls_call (rtx dest, rtx val,
> > > x86_cse_kind kind,
> > > bb = get_immediate_dominator (CDI_DOMINATORS,
> > > bb->loop_father->header);
> > >
> > > +place_tls_call:
> > > rtx_insn *insn = BB_HEAD (bb);
> > > while (insn && !NONDEBUG_INSN_P (insn))
> > > {
> > > @@ -3824,7 +3863,8 @@ ix86_place_single_tls_call (rtx dest, rtx val,
> > > x86_cse_kind kind,
> > > auto_bitmap live_caller_saved_regs;
> > > bitmap in = df_live ? DF_LIVE_IN (bb) : DF_LR_IN (bb);
> > >
> > > - bool flags_live_p = bitmap_bit_p (in, FLAGS_REG);
> > > + if (bitmap_bit_p (in, FLAGS_REG))
> > > + bitmap_set_bit (live_caller_saved_regs, FLAGS_REG);
> > >
> > > unsigned int i;
> > >
> > > @@ -3845,13 +3885,46 @@ ix86_place_single_tls_call (rtx dest, rtx val,
> > > x86_cse_kind kind,
> > > if (!NONDEBUG_INSN_P (insn))
> > > continue;
> > >
> > > + if (JUMP_P (insn))
> > > + {
> > > + /* This must be a conditional jump. */
> > > + rtx label = JUMP_LABEL (insn);
> > > + if (label == nullptr
> > > + || ANY_RETURN_P (label)
> > > + || !(LABEL_P (label) || SYMBOL_REF_P (label)))
> > > + gcc_unreachable ();
> Whenever we can't find a place w/ flag_reg dead, we should go to
> place_tls_call, why just jump_insn only?
We can't place a TSL call before a conditional jump since it will
clobber flags register. Other instructions are OK. If they read
flag register, flag register must be alive.
> > > +
> > > + /* Place the call before all FLAGS_REG setting BBs since
> > > + we can't place a call before nor after a conditional
> > > + jump. */
> > > + bb = ix86_get_dominator_for_reg (FLAGS_REG, bb);
> > > + goto place_tls_call;
> > Can we put those codes related to find the **BEFORE/AFTER** into a
> > function and called it recursively(or iteratively to decide find the
> > final BEFORE/AFTER)
> > So we can avoid those GOTO statements.
> >
> > i.e
> >
> > switch (kind)
> > {
> > ...
> > }
> >
> > ix86_place_single_tls_call_place (....)
> >
> > if (before)
> > emit_insn_before (..);
> > else if (after)
> > emit_insn_after (...);
> > else
> > gcc_unreachable ();
> >
> > ....
> >
> > > + }
> > > +
> > > /* Check if FLAGS register is live. */
> > > set = single_set (insn);
> > > if (set)
> > > {
> > > rtx dest = SET_DEST (set);
> > > - if (REG_P (dest) && REGNO (dest) == FLAGS_REG)
> > > - flags_live_p = true;
> > > + if (REG_P (dest))
> > > + {
> > > + if (bitmap_bit_p (updated_gnu_tls_insns,
> > > + INSN_UID (insn)))
> > > + {
> > > + /* Insert the __tls_get_addr call before INSN which
> > > + replaces a __tls_get_addr call. */
> > > + before = insn;
> > > + goto insert_before;
> > > + }
> > > + if (bitmap_bit_p (updated_gnu2_tls_insns,
> > > + INSN_UID (insn)))
> > > + /* Mark FLAGS register as dead since FLAGS register
> > > + would be clobbered by the GNU2 TLS instruction. */
> > > + bitmap_clear_bit (live_caller_saved_regs,
> > > + FLAGS_REG);
> > > + else if (REGNO (dest) == FLAGS_REG)
> > > + bitmap_set_bit (live_caller_saved_regs, FLAGS_REG);
> > > + }
> > > }
> > >
> > > rtx link;
> > > @@ -3863,29 +3936,30 @@ ix86_place_single_tls_call (rtx dest, rtx val,
> > > x86_cse_kind kind,
> > > for (i = REGNO (XEXP (link, 0));
> > > i < END_REGNO (XEXP (link, 0));
> > > i++)
> > > - bitmap_clear_bit (live_caller_saved_regs, i);
> > > -
> > > - /* Check if FLAGS register is dead. */
> > > - if (REGNO (XEXP (link, 0)) == FLAGS_REG)
> > > - flags_live_p = false;
> > > + if (i < FIRST_PSEUDO_REGISTER)
> > > + bitmap_clear_bit (live_caller_saved_regs, i);
> > >
> > > if (bitmap_empty_p (live_caller_saved_regs))
> > > {
> > > - /* All live caller-saved registers are dead after
> > > - this instruction. Since TLS instructions
> > > - clobber FLAGS register, it must be dead where
> > > - the TLS will be inserted after. */
> > > - if (flags_live_p)
> > > - gcc_unreachable ();
> > > after = insn;
> > > goto insert_after;
> > > }
> > > }
> > > }
> > >
> > > - /* All live caller-saved registers should be dead at the end
> > > - of this basic block. */
> > > - gcc_unreachable ();
> > > + /* If any live caller-saved registers aren't dead at the end
> > > + of this basic block, get the basic block which dominates all
> > > + basic blocks which set the remaining live registers. */
> > > + auto_bitmap set_bbs;
> > > + bitmap_iterator bi;
> > > + unsigned int id;
> > > + EXECUTE_IF_SET_IN_BITMAP (live_caller_saved_regs, 0, id, bi)
> > > + {
> > > + basic_block set_bb = ix86_get_dominator_for_reg (id, bb);
> > > + bitmap_set_bit (set_bbs, set_bb->index);
> > > + }
> > > + bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs);
> > > + goto place_tls_call;
> > > }
> > >
> > > /* Emit the TLS CALL insn. */
> > > @@ -3895,7 +3969,10 @@ insert_after:
> > > tls_insn = emit_insn_after (tls, after);
> > > }
> > > else
> > > - tls_insn = emit_insn_before (tls, before);
> > > + {
> > > +insert_before:
> > > + tls_insn = emit_insn_before (tls, before);
> > > + }
> > >
> > > rtx_insn *tlsdesc_insn = nullptr;
> > > if (tlsdesc_set)
> > > @@ -4213,6 +4290,8 @@ pass_x86_cse::x86_cse (void)
> > > basic_block bb;
> > > rtx_insn *insn;
> > > unsigned int i;
> > > + auto_bitmap updated_gnu_tls_insns;
> > > + auto_bitmap updated_gnu2_tls_insns;
> > >
> > > df_set_flags (DF_DEFER_INSN_RESCAN);
> > >
> > > @@ -4333,7 +4412,10 @@ pass_x86_cse::x86_cse (void)
> > > case X86_CSE_TLS_LD_BASE:
> > > case X86_CSE_TLSDESC:
> > > broadcast_reg = gen_reg_rtx (load->mode);
> > > - replace_tls_call (broadcast_reg, load->insns);
> > > + replace_tls_call (broadcast_reg, load->insns,
> > > + (load->kind == X86_CSE_TLSDESC
> > > + ? updated_gnu2_tls_insns
> > > + : updated_gnu_tls_insns));
> > > load->broadcast_reg = broadcast_reg;
> > > break;
> > >
> > > @@ -4399,6 +4481,8 @@ pass_x86_cse::x86_cse (void)
> > > load->val,
> > > load->kind,
> > > load->bbs,
> > > + updated_gnu_tls_insns,
> > > + updated_gnu2_tls_insns,
> > > PATTERN (load->def_insn));
> > > break;
> > > case X86_CSE_VEC_DUP:
> > > @@ -4442,7 +4526,9 @@ pass_x86_cse::x86_cse (void)
> > > ix86_place_single_tls_call (load->broadcast_reg,
> > > load->val,
> > > load->kind,
> > > - load->bbs);
> > > + load->bbs,
> > > + updated_gnu_tls_insns,
> > > + updated_gnu2_tls_insns);
> > > break;
> > > case X86_CSE_CONST0_VECTOR:
> > > case X86_CSE_CONSTM1_VECTOR:
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr121572-1a.c
> > > b/gcc/testsuite/gcc.target/i386/pr121572-1a.c
> > > new file mode 100644
> > > index 00000000000..270d8ff5cb6
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr121572-1a.c
> > > @@ -0,0 +1,41 @@
> > > +/* { dg-do compile { target *-*-linux* } } */
> > > +/* { dg-options "-O0 -fpic -fplt -mtls-dialect=gnu" } */
> > > +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
> > > +/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } }
> > > {^\t?\.} } } */
> > > +
> > > +/*
> > > +**bug:
> > > +**.LFB[0-9]+:
> > > +**...
> > > +** leaq tv_cache@tlsld\(%rip\), %rdi
> > > +** call __tls_get_addr@PLT
> > > +** movl \$-1, %edi
> > > +** mov[l|q] %[e|r]ax, %[e|r]bx
> > > +** call val@PLT
> > > +**...
> > > +*/
> > > +
> > > +extern __thread int tv_cache __attribute__ ((visibility ("hidden")));
> > > +extern void use_cache (int);
> > > +extern int val (int v);
> > > +
> > > +__attribute__ ((optimize (2)))
> > > +void
> > > +bug (void)
> > > +{
> > > + int compared = val (-1);
> > > +
> > > + if (compared == 0 || (compared > 0 && val (2) == 0))
> > > + {
> > > + __builtin_trap ();
> > > + }
> > > +
> > > + if (compared < 0)
> > > + {
> > > + use_cache (tv_cache);
> > > + return;
> > > + }
> > > +
> > > + use_cache (tv_cache);
> > > + __builtin_trap ();
> > > +}
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr121572-1b.c
> > > b/gcc/testsuite/gcc.target/i386/pr121572-1b.c
> > > new file mode 100644
> > > index 00000000000..8a6089109f5
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr121572-1b.c
> > > @@ -0,0 +1,18 @@
> > > +/* { dg-do compile { target *-*-linux* } } */
> > > +/* { dg-options "-O0 -fpic -fplt -mtls-dialect=gnu2" } */
> > > +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
> > > +/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } }
> > > {^\t?\.} } } */
> > > +
> > > +/*
> > > +**bug:
> > > +**.LFB[0-9]+:
> > > +**...
> > > +** lea[l|q] tv_cache@TLSDESC\(%rip\), %[e|r]ax
> > > +** movl \$-1, %edi
> > > +** call \*tv_cache@TLSCALL\(%[e|r]ax\)
> > > +** mov[l|q] %[e|r]ax, %[e|r]bx
> > > +** call val@PLT
> > > +**...
> > > +*/
> > > +
> > > +#include "pr121572-1a.c"
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr121572-2a.c
> > > b/gcc/testsuite/gcc.target/i386/pr121572-2a.c
> > > new file mode 100644
> > > index 00000000000..38b254657d3
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr121572-2a.c
> > > @@ -0,0 +1,39 @@
> > > +/* { dg-do compile { target *-*-linux* } } */
> > > +/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu" } */
> > > +
> > > +typedef enum
> > > +{
> > > + MPFR_RNDN
> > > +} mpfr_rnd_t;
> > > +typedef int mpfr_t[1];
> > > +long __gmpfr_emin, mpfr_agm_expo_0;
> > > +_Thread_local long __gmpfr_emax;
> > > +int mpfr_agm_compare, mpfr_agm___trans_tmp_1;
> > > +mpfr_t mpfr_agm_u;
> > > +void mpfr_mul (int *, int, int, mpfr_rnd_t);
> > > +int
> > > +mpfr_agm (int op1)
> > > +{
> > > + int op2 = 0;
> > > + if (__builtin_expect (mpfr_agm_compare == 0, 0))
> > > + return 0;
> > > + if (mpfr_agm_compare > 0)
> > > + {
> > > + int t = op1;
> > > + op2 = t;
> > > + }
> > > + mpfr_agm_expo_0 = __gmpfr_emax;
> > > + for (;;)
> > > + {
> > > + retry:
> > > + mpfr_mul (mpfr_agm_u, op1, op2, MPFR_RNDN);
> > > + if (0)
> > > + goto retry;
> > > + if (__builtin_expect (mpfr_agm___trans_tmp_1, 1))
> > > + break;
> > > + }
> > > + __gmpfr_emin = __gmpfr_emax;
> > > + return 0;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "call\[ \t\]__tls_get_addr@PLT" 1 {
> > > target { ! ia32 } } } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr121572-2b.c
> > > b/gcc/testsuite/gcc.target/i386/pr121572-2b.c
> > > new file mode 100644
> > > index 00000000000..33d70024324
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr121572-2b.c
> > > @@ -0,0 +1,6 @@
> > > +/* { dg-do compile { target *-*-linux* } } */
> > > +/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu2" } */
> > > +
> > > +#include "pr121572-2a.c"
> > > +
> > > +/* { dg-final { scan-assembler-times "call\[
> > > \t\]\\*__gmpfr_emax@TLSCALL\\(%(?:r|e)ax\\)" 1 { target { ! ia32 } } } }
> > > */
> > > --
> > > 2.50.1
> > >
> >
> >
> > --
> > BR,
> > Hongtao
>
>
>
> --
> BR,
> Hongtao
--
H.J.