On Fri, Jan 31, 2025 at 10:09 PM Uros Bizjak <[email protected]> wrote:
>
> On Fri, Jan 31, 2025 at 2:54 PM Uros Bizjak <[email protected]> wrote:
> >
> > On Fri, Jan 31, 2025 at 2:36 PM H.J. Lu <[email protected]> wrote:
> > >
> > > -fno-plt forces external call to indirect call via GOT memory. But
> > > -mindirect-branch-register requires indirect call and jump via register.
> > > For -mindirect-branch-register, expanding indirect call via register and
> > > update call patterns and peepholes to disable indirect call via memory.
> > >
> > > gcc/
> > >
> > > PR target/118713
> > > * config/i386/i386-expand.cc (ix86_expand_call): Force indirect
> > > call via register for -mindirect-branch-register.
> > > * config/i386/i386.md (*call): Disable indirect call via memory
> > > for -mindirect-branch-register.
> > > (*call_got_x32): Likewise.
> > > (*sibcall_GOT_32): Likewise.
> > > (*sibcall): Likewise.
> > > (*sibcall_memory): Likewise.
> > > (*call_pop): Likewise.
> > > (*sibcall_pop): Likewise.
> > > (*sibcall_pop_memory): Likewise.
> > > (*call_value): Likewise.
> > > (*call_value_got_x32): Likewise.
> > > (*sibcall_value_GOT_32): Likewise.
> > > (*sibcall_value): Likewise.
> > > (*sibcall_value_memory): Likewise.
> > > (*call_value_pop): Likewise.
> > > (*sibcall_value_pop): Likewise.
> > > (*sibcall_value_pop_memory): Likewise.
> > >
> > > gcc/testsuite/
> > >
> > > PR target/118713
> > > * gcc.target/i386/pr118713-1-x32.c: New test.
> > > * gcc.target/i386/pr118713-1.c: Likewise.
> > > * gcc.target/i386/pr118713-2-x32.c: Likewise.
> > > * gcc.target/i386/pr118713-2.c: Likewise.
> > > * gcc.target/i386/pr118713-3-x32.c: Likewise.
> > > * gcc.target/i386/pr118713-3.c: Likewise.
> > > * gcc.target/i386/pr118713-4-x32.c: Likewise.
> > > * gcc.target/i386/pr118713-4.c: Likewise.
> > > * gcc.target/i386/pr118713-5-x32.c: Likewise.
> > > * gcc.target/i386/pr118713-5.c: Likewise.
> > > * gcc.target/i386/pr118713-6-x32.c: Likewise.
> > > * gcc.target/i386/pr118713-6.c: Likewise.
> > > * gcc.target/i386/pr118713-7-x32.c: Likewise.
> > > * gcc.target/i386/pr118713-7.c: Likewise.
> > > * gcc.target/i386/pr118713-8-x32.c: Likewise.
> > > * gcc.target/i386/pr118713-8.c: Likewise.
> > > * gcc.target/i386/pr118713-9-x32.c: Likewise.
> > > * gcc.target/i386/pr118713-9.c: Likewise.
> > > * gcc.target/i386/pr118713-10-x32.c: Likewise.
> > > * gcc.target/i386/pr118713-10.c: Likewise.
> > > * gcc.target/i386/pr118713-11-x32.c: Likewise.
> > > * gcc.target/i386/pr118713-11.c: Likewise.
> > > * gcc.target/i386/pr118713-12-x32.c: Likewise.
> > > * gcc.target/i386/pr118713-12.c: Likewise.
> > >
> > > Co-Authored-By: Uros Bizjak <[email protected]>
> > > Signed-off-by: H.J. Lu <[email protected]>
> > > ---
> > > gcc/config/i386/i386-expand.cc | 20 ++--
> > > gcc/config/i386/i386.md | 98 +++++++++++++------
> > > .../gcc.target/i386/pr118713-1-x32.c | 8 ++
> > > gcc/testsuite/gcc.target/i386/pr118713-1.c | 14 +++
> > > .../gcc.target/i386/pr118713-10-x32.c | 8 ++
> > > gcc/testsuite/gcc.target/i386/pr118713-10.c | 15 +++
> > > .../gcc.target/i386/pr118713-11-x32.c | 8 ++
> > > gcc/testsuite/gcc.target/i386/pr118713-11.c | 14 +++
> > > .../gcc.target/i386/pr118713-12-x32.c | 8 ++
> > > gcc/testsuite/gcc.target/i386/pr118713-12.c | 14 +++
> > > .../gcc.target/i386/pr118713-2-x32.c | 8 ++
> > > gcc/testsuite/gcc.target/i386/pr118713-2.c | 15 +++
> > > .../gcc.target/i386/pr118713-3-x32.c | 8 ++
> > > gcc/testsuite/gcc.target/i386/pr118713-3.c | 14 +++
> > > .../gcc.target/i386/pr118713-4-x32.c | 8 ++
> > > gcc/testsuite/gcc.target/i386/pr118713-4.c | 14 +++
> > > .../gcc.target/i386/pr118713-5-x32.c | 8 ++
> > > gcc/testsuite/gcc.target/i386/pr118713-5.c | 13 +++
> > > .../gcc.target/i386/pr118713-6-x32.c | 15 +++
> > > gcc/testsuite/gcc.target/i386/pr118713-6.c | 14 +++
> > > .../gcc.target/i386/pr118713-7-x32.c | 8 ++
> > > gcc/testsuite/gcc.target/i386/pr118713-7.c | 13 +++
> > > .../gcc.target/i386/pr118713-8-x32.c | 8 ++
> > > gcc/testsuite/gcc.target/i386/pr118713-8.c | 13 +++
> > > .../gcc.target/i386/pr118713-9-x32.c | 8 ++
> > > gcc/testsuite/gcc.target/i386/pr118713-9.c | 14 +++
> > > 26 files changed, 353 insertions(+), 35 deletions(-)
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-1-x32.c
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-1.c
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-10-x32.c
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-10.c
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-11-x32.c
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-11.c
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-12-x32.c
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-12.c
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-2-x32.c
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-2.c
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-3-x32.c
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-3.c
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-4-x32.c
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-4.c
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-5-x32.c
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-5.c
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-6-x32.c
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-6.c
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-7-x32.c
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-7.c
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-8-x32.c
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-8.c
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-9-x32.c
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-9.c
> > >
> > > diff --git a/gcc/config/i386/i386-expand.cc
> > > b/gcc/config/i386/i386-expand.cc
> > > index 117f6f6f7eb..1d1614d2ccc 100644
> > > --- a/gcc/config/i386/i386-expand.cc
> > > +++ b/gcc/config/i386/i386-expand.cc
> > > @@ -10223,15 +10223,21 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx
> > > callarg1,
> > > && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
> > > && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
> > > fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr,
> > > 0)));
> > > + else if (TARGET_INDIRECT_BRANCH_REGISTER && MEM_P (fnaddr))
> > > + {
> > > + fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
> > > + fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode,
> > > fnaddr));
> > > + }
> > > /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect
> > > branch via x32 GOT slot is OK. */
> > > - else if (!(TARGET_X32
> > > - && MEM_P (fnaddr)
> > > - && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND
> > > - && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode))
> > > - && (sibcall
> > > - ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
> > > - : !call_insn_operand (XEXP (fnaddr, 0), word_mode)))
> > > + if (TARGET_X32
> > > + && MEM_P (fnaddr)
> > > + && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND
> > > + && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode))
> > > + ;
> > > + else if (sibcall
> > > + ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
> > > + : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
> > > {
> > > fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
> > > fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode,
> > > fnaddr));
> > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> > > index 52c02b6351a..0c9721bdc7a 100644
> > > --- a/gcc/config/i386/i386.md
> > > +++ b/gcc/config/i386/i386.md
> > > @@ -20122,18 +20122,23 @@ (define_expand "sibcall"
> > > })
> > >
> > > (define_insn "*call"
> > > - [(call (mem:QI (match_operand:W 0 "call_insn_operand" "<c>BwBz"))
> > > + [(call (mem:QI (match_operand:W 0 "call_insn_operand" "<c>,BwBz"))
> > > (match_operand 1))]
> > > "!SIBLING_CALL_P (insn)"
> > > "* return ix86_output_call_insn (insn, operands[0]);"
> > > - [(set_attr "type" "call")])
> > > + [(set_attr "type" "call")
> > > + (set (attr "enabled")
> > > + (if_then_else
> > > + (eq_attr "alternative" "1")
> > > + (symbol_ref "!TARGET_INDIRECT_BRANCH_REGISTER")
> > > + (const_string "*")))])
> >
> > Did you read my previous message? You don't need to add "enabled"
> > attribute (and alternatives), because Bw and Bs already include:
> >
> > (define_constraint "Bs"
> > "@internal Sibcall memory operand."
> > (ior (and (not (match_test "TARGET_INDIRECT_BRANCH_REGISTER"))
> > (not (match_test "TARGET_X32"))
> > (match_operand 0 "sibcall_memory_operand"))
> > (and (match_test "TARGET_X32")
> > (match_test "Pmode == DImode")
> > (match_operand 0 "GOT_memory_operand"))))
> >
> > (define_constraint "Bw"
> > "@internal Call memory operand."
> > (ior (and (not (match_test "TARGET_INDIRECT_BRANCH_REGISTER"))
> > (not (match_test "TARGET_X32"))
> > (match_operand 0 "memory_operand"))
> > (and (match_test "TARGET_X32")
> > (match_test "Pmode == DImode")
> > (match_operand 0 "GOT_memory_operand"))))
> >
> > "Bz" constraint does not mention TARGET_INDIRECT_BRANCH_REGISTER.
> >
> > So, please investigate the condition and eventually fix Bs, Bw and Bz
> > constraints. There is no need to introduce and disable alternatives
> > when corresponding constraints are disabled.
>
> OTOH, you can use and disable alternatives, but then you don't need to
> check TARGET_INDIRECT_BRANCH_REGISTER usage in "Bs" and "Bw"
> constraint. The choice depends on the question if T_I_B_R fully
> disables "Bs" and "Bw". If "Bz" is also invalid with
> TARGET_INDIRECT_BRANCH_REGISTER, only then you should use alternatives
> to disable "BwBz" resp "BsBz" alternatives in call patterns.
The problem is with Bz. The indirect branch is generated by
ix86_output_call_insn
which calls "x86_nopic_noplt_attribute_p (call_op))" to check if indirect branch
should be generated on a symbol reference. I am testing this patch.
> AFAICS, when T_I_B_R is active, it should fully disable "Bs" as well
> as "Bw" constraint. As it is written now, it doesn't disable X32 GOT
> memory operand, which is probably wrong.
>
> Also, TLS patterns use the "Bz" constraint, so it should not be
> disabled with T_I_B_R.
>
> Uros.
--
H.J.
diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index 9dd280568bd..d3d6e9e66f4 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -203,21 +203,21 @@ (define_special_memory_constraint "Br"
(define_constraint "Bs"
"@internal Sibcall memory operand."
- (ior (and (not (match_test "TARGET_INDIRECT_BRANCH_REGISTER"))
- (not (match_test "TARGET_X32"))
- (match_operand 0 "sibcall_memory_operand"))
- (and (match_test "TARGET_X32")
- (match_test "Pmode == DImode")
- (match_operand 0 "GOT_memory_operand"))))
+ (and (match_test "!TARGET_INDIRECT_BRANCH_REGISTER")
+ (ior (and (match_test "!TARGET_X32")
+ (match_operand 0 "sibcall_memory_operand"))
+ (and (match_test "TARGET_X32")
+ (match_test "Pmode == DImode")
+ (match_operand 0 "GOT_memory_operand")))))
(define_constraint "Bw"
"@internal Call memory operand."
- (ior (and (not (match_test "TARGET_INDIRECT_BRANCH_REGISTER"))
- (not (match_test "TARGET_X32"))
- (match_operand 0 "memory_operand"))
- (and (match_test "TARGET_X32")
- (match_test "Pmode == DImode")
- (match_operand 0 "GOT_memory_operand"))))
+ (and (match_test "!TARGET_INDIRECT_BRANCH_REGISTER")
+ (ior (and (match_test "!TARGET_X32")
+ (match_operand 0 "memory_operand"))
+ (and (match_test "TARGET_X32")
+ (match_test "Pmode == DImode")
+ (match_operand 0 "GOT_memory_operand")))))
(define_constraint "Bz"
"@internal Constant call address operand."
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 117f6f6f7eb..7a2df43e7cd 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -10225,13 +10225,21 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
/* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect
branch via x32 GOT slot is OK. */
- else if (!(TARGET_X32
- && MEM_P (fnaddr)
- && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND
- && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode))
- && (sibcall
- ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
- : !call_insn_operand (XEXP (fnaddr, 0), word_mode)))
+ if (TARGET_X32
+ && MEM_P (fnaddr)
+ && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND
+ && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode))
+ {
+ if (TARGET_INDIRECT_BRANCH_REGISTER)
+ {
+ fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
+ fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode,
+ fnaddr));
+ }
+ }
+ else if (sibcall
+ ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
+ : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
{
fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index f122fd8a0a3..bea3fd4b2e2 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -371,6 +371,7 @@ extern int asm_preferred_eh_data_format (int, int);
extern enum attr_cpu ix86_schedule;
#endif
+extern bool ix86_nopic_noplt_attribute_p (rtx call_op);
extern const char * ix86_output_call_insn (rtx_insn *insn, rtx call_op);
extern const char * ix86_output_indirect_jmp (rtx call_op);
extern const char * ix86_output_function_return (bool long_p);
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 11770aa8a50..f89201684a8 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -16724,7 +16724,7 @@ ix86_ifunc_ref_local_ok (void)
This is currently used only with 64-bit or 32-bit GOT32X ELF targets
to call the function marked "noplt" indirectly. */
-static bool
+bool
ix86_nopic_noplt_attribute_p (rtx call_op)
{
if (flag_pic || ix86_cmodel == CM_LARGE
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 52c02b6351a..d6ae3ee378a 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -20133,7 +20133,7 @@ (define_insn "*call_got_x32"
[(call (mem:QI (zero_extend:DI
(match_operand:SI 0 "GOT_memory_operand" "Bg")))
(match_operand 1))]
- "TARGET_X32"
+ "TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER"
{
rtx fnaddr = gen_const_mem (DImode, XEXP (operands[0], 0));
return ix86_output_call_insn (insn, fnaddr);
@@ -20338,7 +20338,7 @@ (define_insn "*call_value_got_x32"
(zero_extend:DI
(match_operand:SI 1 "GOT_memory_operand" "Bg")))
(match_operand 2)))]
- "TARGET_X32"
+ "TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER"
{
rtx fnaddr = gen_const_mem (DImode, XEXP (operands[1], 0));
return ix86_output_call_insn (insn, fnaddr);
@@ -20460,7 +20460,7 @@ (define_insn "*sibcall_value_pop_memory"
(plus:SI (reg:SI SP_REG)
(match_operand:SI 3 "immediate_operand" "i")))
(unspec [(const_int 0)] UNSPEC_PEEPSIB)]
- "!TARGET_64BIT"
+ "!TARGET_64BIT && !TARGET_INDIRECT_BRANCH_REGISTER"
"* return ix86_output_call_insn (insn, operands[1]);"
[(set_attr "type" "callv")])
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index e733a474f9e..31e751ac329 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -670,7 +670,8 @@ (define_predicate "constant_call_address_operand"
(match_code "symbol_ref")
{
if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC
- || flag_force_indirect_call)
+ || flag_force_indirect_call
+ || ix86_nopic_noplt_attribute_p (op))
return false;
if (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op))
return false;