On 12/02/16 14:56, Charles Baylis wrote:
> When compiling with -mlong-calls and -pg, calls to the __gnu_mcount_nc
> function are not generated as long calls.
> 
> This is encountered when building an allyesconfig Linux kernel because
> the Linux build system generates very large sections by partial
> linking a large number of object files. This causes link failures,
> which don't go away with -mlong-calls due to this bug. (However, with
> this patch linking still fails due to calls in inline asm)
> 
> For example:
> 
>     extern void g(void);
>     int f() { g(); return 0; }
> 
> compiles to:
> 
>         push    {r4, lr}
>         push    {lr}
>         bl      __gnu_mcount_nc    ;// not a long call

Ouch!  That's left the stack misaligned.  That might be OK iff we can
assume __gnu_mcount_nc is not required to be an ABI-conforming call.

>         ldr     r3, .L2
>         blx     r3                 ;// a long call to g()
>         mov     r0, #0
>         pop     {r4, pc}
> 
> The call to __gnu_mcount_nc is generated from ARM_FUNCTION_PROFILER in
> config/arm/bpabi.h.
> 
> For targets without MOVW/MOVT, the long call sequence requires a load
> from the literal pool, and it is too late to set up a literal pool
> entry from within ARM_FUNCTION_PROFILER. My approach to fix this is to
> modify the prologue generation to load the address of __gnu_mcount_nc
> into ip, so that it is ready when the call is generated.
> 
> This patch only implements the fix for ARM and Thumb-2. A similar fix
> is possible for Thumb-1, but requires more slightly complex changes to
> the prologue generation to make sure there is a low register
> available.

Can you check this with a nested function?  Eg.

int f()
{
  int a;
  int b ()
  {
    return a+1;
  }

  a = 1;
  a += b();
  assert (a == 3);
  return a;
}

I think this will break with your patch since the closure will be passed
in IP.

R.

> 
> This feels like a bit of a hack to me, so ideas for a cleaner solution
> are welcome, if none, is this acceptable for trunk now, or should it
> wait until GCC 7?
> 
> 
> 0001-ARM-PR69770-fix-mlong-calls-with-pg.patch
> 
> 
> From 34993396a43fcfc263db5b02b2d1837c490f52ad Mon Sep 17 00:00:00 2001
> From: Charles Baylis <charles.bay...@linaro.org>
> Date: Thu, 11 Feb 2016 18:07:00 +0000
> Subject: [PATCH] [ARM] PR69770 fix -mlong-calls with -pg
> 
> gcc/ChangeLog:
> 
> 2016-02-12  Charles Baylis  <charles.bay...@linaro.org>
> 
>       * config/arm/arm.c (arm_expand_prologue): Load address of
>       __gnu_mcount_nc in r12 if profiling and long calls are enabled.
>         * config/arm/bpabi.h (ARM_FUNCTION_PROFILER): Emit long call to
>       __gnu_mcount_nc long calls are enabled.
>       (ARM_FUNCTION_PROFILER_SUPPORTS_LONG_CALLS): New define.
> 
> gcc/testsuite/ChangeLog:
> 
> 2016-02-12  Charles Baylis  <charles.bay...@linaro.org>
> 
>         * gcc.target/arm/pr69770.c: New test.
> 
> Change-Id: I4c639a5edf32fa8c67324d37faee1cb4ddd57a5c
> 
> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
> index 27aecf7..9ce9a58 100644
> --- a/gcc/config/arm/arm.c
> +++ b/gcc/config/arm/arm.c
> @@ -21739,6 +21739,15 @@ arm_expand_prologue (void)
>        arm_load_pic_register (mask);
>      }
>  
> +  if (crtl->profile && TARGET_LONG_CALLS
> +      && ARM_FUNCTION_PROFILER_SUPPORTS_LONG_CALLS)
> +    {
> +      rtx tmp = gen_rtx_SET (gen_rtx_REG (SImode, IP_REGNUM),
> +                          gen_rtx_SYMBOL_REF (Pmode, "__gnu_mcount_nc"));
> +      emit_insn (tmp);
> +      emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, IP_REGNUM)));
> +    }
> +
>    /* If we are profiling, make sure no instructions are scheduled before
>       the call to mcount.  Similarly if the user has requested no
>       scheduling in the prolog.  Similarly if we want non-call exceptions
> diff --git a/gcc/config/arm/bpabi.h b/gcc/config/arm/bpabi.h
> index 82128ef..b734a24 100644
> --- a/gcc/config/arm/bpabi.h
> +++ b/gcc/config/arm/bpabi.h
> @@ -173,11 +173,21 @@
>  
>  #undef  NO_PROFILE_COUNTERS
>  #define NO_PROFILE_COUNTERS 1
> +#undef  ARM_FUNCTION_PROFILER_SUPPORTS_LONG_CALLS
> +#define ARM_FUNCTION_PROFILER_SUPPORTS_LONG_CALLS 1
>  #undef  ARM_FUNCTION_PROFILER
>  #define ARM_FUNCTION_PROFILER(STREAM, LABELNO)                       \
>  {                                                                    \
> -  fprintf (STREAM, "\tpush\t{lr}\n");                                        
> \
> -  fprintf (STREAM, "\tbl\t__gnu_mcount_nc\n");                               
> \
> +  if (TARGET_LONG_CALLS && TARGET_32BIT)                             \
> +  {                                                                  \
> +    fprintf (STREAM, "\tpush\t{lr}\n");                                      
> \
> +    /* arm_expand_prolog() has already set up ip to contain the */   \
> +    /* address of __gnu_mcount_nc.  */                                       
> \
> +    fprintf (STREAM, "\tblx\tip\n");                                 \
> +  } else {                                                           \
> +    fprintf (STREAM, "\tpush\t{lr}\n");                                      
> \
> +    fprintf (STREAM, "\tbl\t__gnu_mcount_nc\n");                     \
> +  }                                                                  \
>  }
>  
>  #undef SUBTARGET_FRAME_POINTER_REQUIRED
> diff --git a/gcc/testsuite/gcc.target/arm/pr69770.c 
> b/gcc/testsuite/gcc.target/arm/pr69770.c
> new file mode 100644
> index 0000000..61e5c6d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/pr69770.c
> @@ -0,0 +1,9 @@
> +/* { dg-do compile } */
> +/* { dg-options "-pg -mlong-calls" } */
> +
> +extern void g(void);
> +
> +int f() { g(); return 0; }
> +
> +/* { dg-final { scan-assembler-not "bl\[ \t\]+__gnu_mcount_nc" } } */
> +/* { dg-final { scan-assembler "__gnu_mcount_nc" } } */
> 

Reply via email to