On 2023/02/16 7:18, Max Filippov wrote:
> Hi Suwa-san,

Hi!

> 
> On Thu, Jan 26, 2023 at 7:17 PM Takayuki 'January June' Suwa
> <jjsuwa_sys3...@yahoo.co.jp> wrote:
>>
>> In the case of the CALL0 ABI, values that must be retained before and
>> after function calls are placed in the callee-saved registers (A12
>> through A15) and referenced later.  However, it is often the case that
>> the save and the reference are each only once and a simple register-
>> register move (with two exceptions; i. the register saved to/restored
>> from is the stack pointer, ii. the function needs an additional stack
>> pointer adjustment to grow the stack).
>>
>> e.g. in the following example, if there are no other occurrences of
>> register A14:
>>
>> ;; before
>>         ; prologue {
>>   ...
>>         s32i.n  a14, sp, 16
>>   ...                           ;; no frame pointer needed
>>                                 ;; no additional stack growth
>>         ; } prologue
>>   ...
>>         mov.n   a14, a6         ;; A6 is not SP
>>   ...
>>         call0   foo
>>   ...
>>         mov.n   a8, a14         ;; A8 is not SP
>>   ...
>>         ; epilogue {
>>   ...
>>         l32i.n  a14, sp, 16
>>   ...
>>         ; } epilogue
>>
>> It can be possible like this:
>>
>> ;; after
>>         ; prologue {
>>   ...
>>         (no save needed)
>>   ...
>>         ; } prologue
>>   ...
>>         s32i.n  a6, sp, 16      ;; replaced with A14's slot
>>   ...
>>         call0   foo
>>   ...
>>         l32i.n  a8, sp, 16      ;; through SP
>>   ...
>>         ; epilogue {
>>   ...
>>         (no restoration needed)
>>   ...
>>         ; } epilogue
>>
>> This patch adds the abovementioned logic to the function prologue/epilogue
>> RTL expander code.
>>
>> gcc/ChangeLog:
>>
>>         * config/xtensa/xtensa.cc (machine_function): Add new member
>>         'eliminated_callee_saved_bmp'.
>>         (xtensa_can_eliminate_callee_saved_reg_p): New function to
>>         determine whether the register can be eliminated or not.
>>         (xtensa_expand_prologue): Add invoking the above function and
>>         elimination the use of callee-saved register by using its stack
>>         slot through the stack pointer (or the frame pointer if needed)
>>         directly.
>>         (xtensa_expand_prologue): Modify to not emit register restoration
>>         insn from its stack slot if the register is already eliminated.
>>
>> gcc/testsuite/ChangeLog:
>>
>>         * gcc.target/xtensa/elim_callee_saved.c: New.
>> ---
>>  gcc/config/xtensa/xtensa.cc                   | 132 ++++++++++++++----
>>  .../gcc.target/xtensa/elim_callee_saved.c     |  38 +++++
>>  2 files changed, 145 insertions(+), 25 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c
> 
> This version passes regression tests, but I still have a couple questions.
> 
>> diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
>> index 3e2e22d4cbe..ff59c933d4d 100644
>> --- a/gcc/config/xtensa/xtensa.cc
>> +++ b/gcc/config/xtensa/xtensa.cc
>> @@ -105,6 +105,7 @@ struct GTY(()) machine_function
>>    bool epilogue_done;
>>    bool inhibit_logues_a1_adjusts;
>>    rtx last_logues_a9_content;
>> +  HOST_WIDE_INT eliminated_callee_saved_bmp;
>>  };
>>
>>  static void xtensa_option_override (void);
>> @@ -3343,6 +3344,66 @@ xtensa_emit_adjust_stack_ptr (HOST_WIDE_INT offset, 
>> int flags)
>>      cfun->machine->last_logues_a9_content = GEN_INT (offset);
>>  }
>>
>> +static bool
>> +xtensa_can_eliminate_callee_saved_reg_p (unsigned int regno,
>> +                                        rtx_insn **p_insnS,
>> +                                        rtx_insn **p_insnR)
>> +{
>> +  df_ref ref;
>> +  rtx_insn *insn, *insnS = NULL, *insnR = NULL;
>> +  rtx pattern;
>> +
>> +  if (!optimize || !df || call_used_or_fixed_reg_p (regno))
>> +    return false;
>> +
>> +  for (ref = DF_REG_DEF_CHAIN (regno);
>> +       ref; ref = DF_REF_NEXT_REG (ref))
>> +    if (DF_REF_CLASS (ref) != DF_REF_REGULAR
>> +       || DEBUG_INSN_P (insn = DF_REF_INSN (ref)))
>> +      continue;
>> +    else if (GET_CODE (pattern = PATTERN (insn)) == SET
>> +            && REG_P (SET_DEST (pattern))
>> +            && REGNO (SET_DEST (pattern)) == regno
>> +            && REG_NREGS (SET_DEST (pattern)) == 1
>> +            && REG_P (SET_SRC (pattern))
>> +            && REGNO (SET_SRC (pattern)) != A1_REG)
> 
> Do I understand correctly that the check for A1 here and below is
> for the case when regno is a hard frame pointer and the function
> needs the frame pointer? If so, wouldn't it be better to check
> for it explicitly in the beginning?

I see.  But I can't be sure that the body of the function never saves and 
restores the stack pointer to another register if the function doesn't need the 
frame pointer.
Therefore, I think that the validity depends on the regtest.

> 
>> +      {
>> +       if (insnS)
>> +         return false;
>> +       insnS = insn;
>> +       continue;
>> +      }
>> +    else
>> +      return false;
>> +
>> +  for (ref = DF_REG_USE_CHAIN (regno);
>> +       ref; ref = DF_REF_NEXT_REG (ref))
>> +    if (DF_REF_CLASS (ref) != DF_REF_REGULAR
>> +       || DEBUG_INSN_P (insn = DF_REF_INSN (ref)))
>> +      continue;
>> +    else if (GET_CODE (pattern = PATTERN (insn)) == SET
>> +            && REG_P (SET_SRC (pattern))
>> +            && REGNO (SET_SRC (pattern)) == regno
>> +            && REG_NREGS (SET_SRC (pattern)) == 1
>> +            && REG_P (SET_DEST (pattern))
>> +            && REGNO (SET_DEST (pattern)) != A1_REG)
>> +      {
>> +       if (insnR)
>> +         return false;
>> +       insnR = insn;
>> +       continue;
>> +      }
>> +    else
>> +      return false;
>> +
>> +  if (!insnS || !insnR)
>> +    return false;
>> +
>> +  *p_insnS = insnS, *p_insnR = insnR;
>> +
>> +  return true;
>> +}
> 
> [...]
> 
>> diff --git a/gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c 
>> b/gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c
>> new file mode 100644
>> index 00000000000..cd3d6b9f249
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c
>> @@ -0,0 +1,38 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -mabi=call0" } */
>> +
>> +extern void foo(void);
>> +
>> +/* eliminated one register (the reservoir of variable 'a') by its stack 
>> slot through the stack pointer.  */
>> +int test0(int a) {
>> +  int array[252];  /* the maximum bound of non-large stack.  */
>> +  foo();
>> +  asm volatile("" : : "m"(array));
>> +  return a;
>> +}
>> +
>> +/* cannot eliminate if large stack is needed, because the offset from TOS 
>> cannot fit into single L32I/S32I instruction.  */
>> +int test1(int a) {
>> +  int array[10000];  /* requires large stack.  */
>> +  foo();
>> +  asm volatile("" : : "m"(array));
>> +  return a;
>> +}
>> +
>> +/* register A15 is the reservoir of the stack pointer and cannot be 
>> eliminated if the frame pointer is needed.
>> +   other registers still can be, but through the frame pointer rather the 
>> stack pointer.  */
>> +int test2(int a) {
>> +  int* p = __builtin_alloca(16);
>> +  foo();
>> +  asm volatile("" : : "r"(p));
>> +  return a;
>> +}
>> +
>> +/* in -O0 the composite hard registers may still remain unsplitted at 
>> pro_and_epilogue and must be excluded.  */
>> +extern double bar(void);
>> +int __attribute__((optimize(0))) test3(int a) {
>> +  return bar() + a;
>> +}
>> +
>> +/* { dg-final { scan-assembler-times "mov\t|mov.n\t" 21 } } */
> 
> This test looks quite fragile as the number of movs would vary
> when the testsuite is run with additional options.

I totally agree with your point.

> 
>> +/* { dg-final { scan-assembler-times "a15, 8" 2 } } */
>> --
>> 2.30.2

Reply via email to