https://gcc.gnu.org/g:1eefa6e0c84e3008ed7ac44d08a8e5206038fb33
commit r16-4321-g1eefa6e0c84e3008ed7ac44d08a8e5206038fb33 Author: Takayuki 'January June' Suwa <[email protected]> Date: Fri Sep 19 21:24:27 2025 +0900 xtensa: Make large CONST_INT legitimate until the postreload pass Generally, RISC machines only have a limited bit width for integer constant immediate values, and it is common to implement TARGET_LEGITIMATE_CONSTANT_P() for their representation. However, by making bare CONST_INTs less visible in RTL, some optimizers may miss out on opportunities. - Operands with the nonmemory/immediate_operand() predicates never accept constants that TARGET_LEGITIMATE_CONSTANT_P() rejects, so templates containing their predicates may unintentionally not be used for insns containing such constants during the RTL generation or instruction combination passes - Some optimizers only accept bare CONST_INTs and may not consider their equivalents (such as literal pool entry references) at all (Unrelated to this patch, but perhaps even worse, some optimizers such as RTL ifcvt, assume that not only the constant format but also the insn format is CISC-like) As a clear example, the effect of constant-anchored optimization during the postreload pass can be seen by compiling the following with and without -mconst16 or -mauto-litpools: /* example */ void test(int a[4]) { a[0] = 0xDEADFACE; a[1] = 0xDEADFACE - 1; a[2] = 0xDEADFACE - 2; a[3] = 0xDEADFACE + 254; } ;; without -mauto-litpools .literal_position .literal .LC0, -559023410 .literal .LC1, -559023411 .literal .LC2, -559023412 .literal .LC3, -559023156 test: entry sp, 32 l32r a8, .LC0 s32i.n a8, a2, 0 l32r a8, .LC1 s32i.n a8, a2, 4 l32r a8, .LC2 s32i.n a8, a2, 8 l32r a8, .LC3 s32i.n a8, a2, 12 retw.n ;; with -mauto-litpools test: entry sp, 32 movi a8, -559023410 s32i.n a8, a2, 0 addi.n a8, a8, -1 ;; const-anchored s32i.n a8, a2, 4 addi.n a8, a8, -1 ;; const-anchored s32i.n a8, a2, 8 addmi a8, a8, 0x100 :: const-anchored s32i.n a8, a2, 12 retw.n Therefore, we aim to overcome the above obstacles by introducing a tweak that legitimates a full-bitwidth CONST_INT regardless of other conditions until a specific RTL path is reached. Then, the most appropriate point to switch the behavior of TARGET_LEGITIMATE_CONSTANT_P() would probably be just before reload/LRA, but as mentioned earlier, there is an optimizer that should be utilized in postreload, so the switchover point will be just after that. This patch introduces a new target-specific pass called "xt_largeconst" to implement all of the above, which will also serves as a host for other future optimizers related to large constants, such as "constantsynth". As a result, this patch also resolves some of the issues mentioned in the previous patch notes: - B[GE/LT]U branch instructions with immediate values of 32768 or 65536 cannot be emitted - Insn combination templates matching the CLAMPS instruction cannot be matched against large upper and lower bounds gcc/ChangeLog: * config/xtensa/constraints.md (Y): Change to reference xtensa_postreload_completed_p() instead of xtensa_split1_finished_p(). * config/xtensa/predicates.md (move_operand): Ditto. * config/xtensa/t-xtensa (PASSES_EXTRA): Add xtensa-passes.def as target-specific pass description. * config/xtensa/xtensa-passes.def: New definition file that inserts pass_xtensa_largeconst after pass_postreload_cse. * config/xtensa/xtensa-protos.h (xtensa_split1_finished_p): Remove. (xtensa_postreload_completed_p, make_pass_xtensa_largeconst): New function prototypes. * config/xtensa/xtensa.cc (machine_function): Add a new member "postreload_completed". (xtensa_emit_move_sequence): Change to reference xtensa_postreload_completed_p() instead of can_create_pseudo_p(). (xtensa_split1_finished_p): Remove. (xtensa_postreload_completed_p): New function. (xtensa_legitimate_constant_p): Change to also consider xtensa_postreload_completed_p(). (litpool_set_src_1, litpool_set_src, do_largeconst, rest_of_handle_largeconst): New sub-functions for pass_xtensa_largeconst. (pass_data_xtensa_largeconst, pass_xtensa_largeconst): New target-specific pass definition. (make_pass_xtensa_largeconst): New function called by the pass manager. * config/xtensa/xtensa.md (The auxiliary define_split for movdi_internal): Change to reference xtensa_postreload_completed_p() instead of xtensa_split1_finished_p(). (The first of three auxiliary define_splits for mov[sh]i_internal): Remove. gcc/testsuite/ChangeLog: * gcc.target/xtensa/BGEUI-BLTUI-32k-64k.c: Disable optimizations and modify to also verify RTL dump in the "expand" pass. Diff: --- gcc/config/xtensa/constraints.md | 3 +- gcc/config/xtensa/predicates.md | 2 +- gcc/config/xtensa/t-xtensa | 2 + gcc/config/xtensa/xtensa-passes.def | 20 +++ gcc/config/xtensa/xtensa-protos.h | 3 +- gcc/config/xtensa/xtensa.cc | 189 ++++++++++++++++++++- gcc/config/xtensa/xtensa.md | 14 +- .../gcc.target/xtensa/BGEUI-BLTUI-32k-64k.c | 3 +- 8 files changed, 212 insertions(+), 24 deletions(-) diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md index 727ec1e2c202..08fdab1c2e7f 100644 --- a/gcc/config/xtensa/constraints.md +++ b/gcc/config/xtensa/constraints.md @@ -121,7 +121,8 @@ (ior (and (match_code "const_int,const_double,const,symbol_ref,label_ref") (match_test "TARGET_AUTO_LITPOOLS")) (and (match_code "const_int") - (match_test "! xtensa_split1_finished_p ()")))) + (match_test "!TARGET_CONST16 + && ! xtensa_postreload_completed_p ()")))) ;; Memory constraints. diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md index 20160a4c4e5f..62c0f8aca208 100644 --- a/gcc/config/xtensa/predicates.md +++ b/gcc/config/xtensa/predicates.md @@ -147,7 +147,7 @@ (ior (and (match_code "const_int") (match_test "(GET_MODE_CLASS (mode) == MODE_INT && xtensa_simm12b (INTVAL (op))) - || ! xtensa_split1_finished_p ()")) + || ! xtensa_postreload_completed_p ()")) (and (match_code "const_int,const_double,const,symbol_ref,label_ref") (match_test "(TARGET_CONST16 || TARGET_AUTO_LITPOOLS) && CONSTANT_P (op)"))))) diff --git a/gcc/config/xtensa/t-xtensa b/gcc/config/xtensa/t-xtensa index 98f98e0d7361..d09d41ab37d4 100644 --- a/gcc/config/xtensa/t-xtensa +++ b/gcc/config/xtensa/t-xtensa @@ -23,3 +23,5 @@ $(out_object_file): gt-xtensa.h xtensa-dynconfig.o: $(srcdir)/config/xtensa/xtensa-dynconfig.cc $(COMPILE) $< $(POSTCOMPILE) + +PASSES_EXTRA += $(srcdir)/config/xtensa/xtensa-passes.def diff --git a/gcc/config/xtensa/xtensa-passes.def b/gcc/config/xtensa/xtensa-passes.def new file mode 100644 index 000000000000..3958957ff7fa --- /dev/null +++ b/gcc/config/xtensa/xtensa-passes.def @@ -0,0 +1,20 @@ +/* Description of target passes for Tensilica's Xtensa architecture. + Copyright (C) 2025 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +INSERT_PASS_AFTER (pass_postreload_cse, 1, pass_xtensa_largeconst); diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h index 98e75c6a5356..c538e483cfc6 100644 --- a/gcc/config/xtensa/xtensa-protos.h +++ b/gcc/config/xtensa/xtensa-protos.h @@ -58,7 +58,7 @@ extern char *xtensa_emit_call (int, rtx *); extern char *xtensa_emit_sibcall (int, rtx *); extern bool xtensa_tls_referenced_p (rtx); extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx); -extern bool xtensa_split1_finished_p (void); +extern bool xtensa_postreload_completed_p (void); extern void xtensa_split_DI_reg_imm (rtx *); extern char *xtensa_bswapsi2_output (rtx_insn *, const char *); @@ -82,5 +82,6 @@ extern void xtensa_adjust_reg_alloc_order (void); extern enum reg_class xtensa_regno_to_class (int regno); extern HOST_WIDE_INT xtensa_initial_elimination_offset (int from, int to); extern const char **xtensa_get_config_strings (void); +extern rtl_opt_pass *make_pass_xtensa_largeconst (gcc::context *); #endif /* !__XTENSA_PROTOS_H__ */ diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc index 00c36915795a..4b44d35054b7 100644 --- a/gcc/config/xtensa/xtensa.cc +++ b/gcc/config/xtensa/xtensa.cc @@ -111,6 +111,7 @@ struct GTY(()) machine_function rtx last_logues_a9_content; HARD_REG_SET eliminated_callee_saved; hash_map<rtx, int> *litpool_usage; + bool postreload_completed; }; static void xtensa_option_override (void); @@ -1342,7 +1343,7 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) rtx src = operands[1]; if (CONSTANT_P (src) - && (! CONST_INT_P (src) || ! xtensa_simm12b (INTVAL (src)))) + && ! (CONST_INT_P (src) && xtensa_simm12b (INTVAL (src)))) { rtx dst = operands[0]; @@ -1366,8 +1367,8 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) return 1; } - if (! TARGET_AUTO_LITPOOLS && ! TARGET_CONST16 - && ! (CONST_INT_P (src) && can_create_pseudo_p ())) + if (!TARGET_CONST16 && !TARGET_AUTO_LITPOOLS + && (! CONST_INT_P (src) || xtensa_postreload_completed_p ())) { src = force_const_mem (SImode, src); operands[1] = src; @@ -2623,12 +2624,12 @@ xtensa_shlrd_which_direction (rtx op0, rtx op1) } -/* Return true after "split1" pass has been finished. */ +/* Return true after "postreload" pass has been completed. */ bool -xtensa_split1_finished_p (void) +xtensa_postreload_completed_p (void) { - return cfun && (cfun->curr_properties & PROP_rtl_split_insns); + return cfun && cfun->machine->postreload_completed; } @@ -5143,7 +5144,8 @@ static bool xtensa_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x) { if (CONST_INT_P (x)) - return TARGET_AUTO_LITPOOLS || TARGET_CONST16 + return TARGET_CONST16 || TARGET_AUTO_LITPOOLS + || ! xtensa_postreload_completed_p () || xtensa_simm12b (INTVAL (x)); return !xtensa_tls_referenced_p (x); @@ -5714,4 +5716,177 @@ xtensa_md_asm_adjust (vec<rtx> &outputs ATTRIBUTE_UNUSED, return NULL; } +/* Machine-specific pass in order to replace all assignments of large + integer constants (i.e., that do not fit into the immediate field which + can hold signed 12 bits) with other legitimate forms, specifically, + references to literal pool entries, when neither TARGET_CONST16 nor + TARGET_AUTO_LITPOOLS is enabled. + + This pass also serves as a place to provide other optimizations, for + example, converting constants that are too large to fit into their + immediate fields into other representations that are more efficient + from a particular point of view. */ + +namespace +{ + +/* Replace the source of [SH]Imode allocation whose value does not fit + into signed 12 bits with a reference to litpool entry. */ + +static bool +litpool_set_src_1 (rtx_insn *insn, rtx set, bool in_group) +{ + rtx dest, src; + enum machine_mode mode; + + if (REG_P (dest = SET_DEST (set)) + && ((mode = GET_MODE (dest)) == SImode || mode == HImode) + && CONST_INT_P (src = SET_SRC (set)) + && ! xtensa_simm12b (INTVAL (src))) + { + remove_reg_equal_equiv_notes (insn); + validate_change (insn, &SET_SRC (set), + force_const_mem (mode, src), in_group); + add_reg_note (insn, REG_EQUIV, copy_rtx (src)); + return true; + } + + return false; +} + +static bool +litpool_set_src (rtx_insn *insn) +{ + rtx pat = PATTERN (insn); + int i; + bool changed; + + switch (GET_CODE (pat)) + { + case SET: + return litpool_set_src_1 (insn, pat, 0); + + /* There should be no assignments within PARALLEL in this target, + but just to be sure. */ + case PARALLEL: + changed = false; + for (i = 0; i < XVECLEN (pat, 0); ++i) + if (GET_CODE (XVECEXP (pat, 0, i)) == SET + && litpool_set_src_1 (insn, XVECEXP (pat, 0, i), 1)) + changed = true; + if (changed) + apply_change_group (); + return changed; + + default: + return false; + } +} + +/* Replace all occurrences of large immediate values in assignment sources + that were permitted for convenience with their legitimate forms, or + more efficient representations if possible. */ + +static void +do_largeconst (void) +{ + bool replacing_required = !TARGET_CONST16 && !TARGET_AUTO_LITPOOLS; + rtx_insn *insn; + + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + if (NONJUMP_INSN_P (insn)) + { + /* Replace the source of [SH]Imode allocation whose value does not + fit into signed 12 bits with a reference to litpool entry. */ + if (replacing_required) + litpool_set_src (insn); + } +} + +/* Convert assignments for large constants. */ + +static unsigned int +rest_of_handle_largeconst (void) +{ + /* Until this flag becomes true, all RTL expressions that assign integer + (not symbol nor floating-point) constants to [SH]Imode registers are + allowed regardless of the values' bit width or configurations of + TARGET_CONST16 and TARGET_AUTO_LITPOOLS. This trick avoids some of + the problems that can arise from blindly following the result of + TARGET_LEGITIMATE_CONSTANT_P() either directly or via general/ + immediate_operands(). + + For example, the "cbranchsi4" MD expansion pattern in this target has + "nonmemory_operand" predicate specified for operand 2, which is + reasonable for most RISC machines where only registers or small set of + constants can be compared. Incidentally, the Xtensa ISA has branch + instructions that perform GEU/LTU comparisons with 32768 or 65536, but + such constants are previously not accepted by "nonmemory_operand" + because the predicate is internally constrained to "immediate_operand" + which is essentially TARGET_LEGITIMATE_CONSTANT_P(). It would not be + impossible to describe a peculiar predicate or condition in the pattern + to get around this, but it would be "elephant" (inelegant). + Fortunately, this issue will be salvaged at higher optimization levels + in subsequent RTL instruction combination pass, but these instructions + are suppose to be emitted properly without any optimization. + + Also, there are not a few cases where optimizers only accept bare + CONST_INTs and do not consider that references to pooled constants + are semantically equivalent to bare ones. A good example of this is + a certain constant anchoring optimization performed in the postreload + pass, which requires anchoring constants to be bare, not pooled. + + In any case, once postreload is complete, the trick described above + is no longer needed, so such assignments must now be all converted + back to references to literal pool entries (the original legitimate + form) if neither TARGET_CONST16 nor TARGET_AUTO_LITPOOLS is enabled. + See the function do_largeconst() called below. */ + cfun->machine->postreload_completed = true; + + df_set_flags (DF_DEFER_INSN_RESCAN); + df_note_add_problem (); + df_analyze (); + + /* Do the process. */ + do_largeconst (); + + return 0; +} + +const pass_data pass_data_xtensa_largeconst = +{ + RTL_PASS, /* type */ + "xt_largeconst", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_MACH_DEP, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_df_finish, /* todo_flags_finish */ +}; + +class pass_xtensa_largeconst : public rtl_opt_pass +{ +public: + pass_xtensa_largeconst (gcc::context *ctxt) + : rtl_opt_pass (pass_data_xtensa_largeconst, ctxt) + {} + + /* opt_pass methods: */ + unsigned int execute (function *) final override + { + return rest_of_handle_largeconst (); + } + +}; // class pass_xtensa_largeconst + +} // anon namespace + +rtl_opt_pass * +make_pass_xtensa_largeconst (gcc::context *ctxt) +{ + return new pass_xtensa_largeconst (ctxt); +} + #include "gt-xtensa.h" diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md index 52ffb161c0f5..9a0c631dc3f4 100644 --- a/gcc/config/xtensa/xtensa.md +++ b/gcc/config/xtensa/xtensa.md @@ -1268,7 +1268,7 @@ [(set (match_operand:DI 0 "register_operand") (match_operand:DI 1 "const_int_operand"))] "!TARGET_CONST16 - && ! xtensa_split1_finished_p ()" + && ! xtensa_postreload_completed_p ()" [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) @@ -1312,18 +1312,6 @@ } [(set_attr "mode" "SI")]) -(define_split - [(set (match_operand:SHI 0 "register_operand") - (match_operand:SHI 1 "const_int_operand"))] - "!TARGET_CONST16 && !TARGET_AUTO_LITPOOLS - && ! xtensa_split1_finished_p () - && ! xtensa_simm12b (INTVAL (operands[1]))" - [(set (match_dup 0) - (match_dup 1))] -{ - operands[1] = force_const_mem (<MODE>mode, operands[1]); -}) - (define_split [(set (match_operand:SHI 0 "register_operand") (match_operand:SHI 1 "constantpool_operand"))] diff --git a/gcc/testsuite/gcc.target/xtensa/BGEUI-BLTUI-32k-64k.c b/gcc/testsuite/gcc.target/xtensa/BGEUI-BLTUI-32k-64k.c index 05873b896896..bd6bf5f2111c 100644 --- a/gcc/testsuite/gcc.target/xtensa/BGEUI-BLTUI-32k-64k.c +++ b/gcc/testsuite/gcc.target/xtensa/BGEUI-BLTUI-32k-64k.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2" } */ +/* { dg-options "-O0 -fdump-rtl-expand" } */ extern void foo(void); @@ -15,5 +15,6 @@ void BLTUI_test(unsigned int a) foo(); } +/* { dg-final { scan-rtl-dump-times "ubtrue" 2 "expand" } } */ /* { dg-final { scan-assembler-times "bgeui" 1 } } */ /* { dg-final { scan-assembler-times "bltui" 1 } } */
