Re: [PATCH v4 1/9] ppc64 (le): prepare for -mprofile-kernel

2015-11-26 Thread Denis Kirjanov
On 11/25/15, Torsten Duwe  wrote:
> The gcc switch -mprofile-kernel, available for ppc64 on gcc > 4.8.5,
> allows to call _mcount very early in the function, which low-level
> ASM code and code patching functions need to consider.
> Especially the link register and the parameter registers are still
> alive and not yet saved into a new stack frame.
>
> Signed-off-by: Torsten Duwe 
> ---
>  arch/powerpc/kernel/entry_64.S  | 44
> +++--
>  arch/powerpc/kernel/ftrace.c| 12 +--
>  arch/powerpc/kernel/module_64.c | 13 
>  3 files changed, 65 insertions(+), 4 deletions(-)
>
> diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
> index a94f155..8d56b16 100644
> --- a/arch/powerpc/kernel/entry_64.S
> +++ b/arch/powerpc/kernel/entry_64.S
> @@ -1206,7 +1206,11 @@ _GLOBAL(enter_prom)
>  #ifdef CONFIG_DYNAMIC_FTRACE
>  _GLOBAL(mcount)
>  _GLOBAL(_mcount)
> - blr
> + mflrr0
> + mtctr   r0
> + ld  r0,LRSAVE(r1)
> + mtlrr0
> + bctr
>
>  _GLOBAL_TOC(ftrace_caller)
>   /* Taken from output of objdump from lib64/glibc */
> @@ -1262,13 +1266,28 @@ _GLOBAL(ftrace_stub)
>
>  #ifdef CONFIG_FUNCTION_GRAPH_TRACER
>  _GLOBAL(ftrace_graph_caller)
> +#ifdef CC_USING_MPROFILE_KERNEL
> + // with -mprofile-kernel, parameter regs are still alive at _mcount
> + std r10, 104(r1)
> + std r9, 96(r1)
> + std r8, 88(r1)
> + std r7, 80(r1)
> + std r6, 72(r1)
> + std r5, 64(r1)
> + std r4, 56(r1)
> + std r3, 48(r1)
> + mfctr   r4  // ftrace_caller has moved local addr here
> + std r4, 40(r1)
> + mflrr3  // ftrace_caller has restored LR from stack
> +#else
>   /* load r4 with local address */
>   ld  r4, 128(r1)
> - subir4, r4, MCOUNT_INSN_SIZE
>
>   /* Grab the LR out of the caller stack frame */
>   ld  r11, 112(r1)
>   ld  r3, 16(r11)
> +#endif
> + subir4, r4, MCOUNT_INSN_SIZE
>
>   bl  prepare_ftrace_return
>   nop
> @@ -1277,6 +1296,26 @@ _GLOBAL(ftrace_graph_caller)
>* prepare_ftrace_return gives us the address we divert to.
>* Change the LR in the callers stack frame to this.
>*/
> +
> +#ifdef CC_USING_MPROFILE_KERNEL
> + mtlrr3
> +
> + ld  r0, 40(r1)
> + mtctr   r0
> + ld  r10, 104(r1)
> + ld  r9, 96(r1)
> + ld  r8, 88(r1)
> + ld  r7, 80(r1)
> + ld  r6, 72(r1)
> + ld  r5, 64(r1)
> + ld  r4, 56(r1)
> + ld  r3, 48(r1)
> +
> + addir1, r1, 112
> + mflrr0
> + std r0, LRSAVE(r1)
> + bctr
> +#else
>   ld  r11, 112(r1)
>   std r3, 16(r11)
>
> @@ -1284,6 +1323,7 @@ _GLOBAL(ftrace_graph_caller)
>   mtlrr0
>   addir1, r1, 112
>   blr
> +#endif
>
>  _GLOBAL(return_to_handler)
>   /* need to save return values */
> diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
> index 44d4d8e..080c525 100644
> --- a/arch/powerpc/kernel/ftrace.c
> +++ b/arch/powerpc/kernel/ftrace.c
> @@ -306,11 +306,19 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned
> long addr)
>* The load offset is different depending on the ABI. For simplicity
>* just mask it out when doing the compare.
>*/
> +#ifndef CC_USING_MPROFILE_KERNEL
>   if ((op[0] != 0x4808) || ((op[1] & 0x) != 0xe841)) {
> - pr_err("Unexpected call sequence: %x %x\n", op[0], op[1]);
> + pr_err("Unexpected call sequence at %p: %x %x\n",
> + ip, op[0], op[1]);
>   return -EINVAL;
>   }
> -
> +#else
> + /* look for patched "NOP" on ppc64 with -mprofile-kernel */
> + if (op[0] != 0x6000) {
> + pr_err("Unexpected call at %p: %x\n", ip, op[0]);
> + return -EINVAL;
> + }
> +#endif
>   /* If we never set up a trampoline to ftrace_caller, then bail */
>   if (!rec->arch.mod->arch.tramp) {
>   pr_err("No ftrace trampoline\n");
> diff --git a/arch/powerpc/kernel/module_64.c
> b/arch/powerpc/kernel/module_64.c
> index 6838451..0819ce7 100644
> --- a/arch/powerpc/kernel/module_64.c
> +++ b/arch/powerpc/kernel/module_64.c
> @@ -475,6 +475,19 @@ static unsigned long stub_for_addr(Elf64_Shdr *sechdrs,
>  static int restore_r2(u32 *instruction, struct module *me)
>  {
>   if (*instruction != PPC_INST_NOP) {
> +#ifdef CC_USING_MPROFILE_KERNEL
> + /* -mprofile_kernel sequence starting with
> +  * mflr r0; std r0, LRSAVE(r1)
> +  */
> + if (instruction[-3] == 0x7c0802a6 &&
> + instruction[-2] == 0xf8010010) {
> + /* Nothing to be done here, it's an _mcount
> +  * call location and r2 will have to be
> +  * restored in the _mcount function.

Re: [PATCH v4 1/9] ppc64 (le): prepare for -mprofile-kernel

2015-11-26 Thread Torsten Duwe
On Thu, Nov 26, 2015 at 01:12:12PM +0300, Denis Kirjanov wrote:
> On 11/25/15, Torsten Duwe  wrote:
> > +*/
> > +   return 2;
> I didn't find where you check for this return value.

That's a pure debugging convenience. The return test is for != 0,
so any non-zero value will do. I've encountered situations where
I'd really liked to know _why_ a routine failed/succeeded, visible
in the registers in the debugger.

This is no big thing, I have no strong opinion about this.

Torsten

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 1/9] ppc64 (le): prepare for -mprofile-kernel

2015-11-25 Thread Torsten Duwe
The gcc switch -mprofile-kernel, available for ppc64 on gcc > 4.8.5,
allows to call _mcount very early in the function, which low-level
ASM code and code patching functions need to consider.
Especially the link register and the parameter registers are still
alive and not yet saved into a new stack frame.

Signed-off-by: Torsten Duwe 
---
 arch/powerpc/kernel/entry_64.S  | 44 +++--
 arch/powerpc/kernel/ftrace.c| 12 +--
 arch/powerpc/kernel/module_64.c | 13 
 3 files changed, 65 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index a94f155..8d56b16 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -1206,7 +1206,11 @@ _GLOBAL(enter_prom)
 #ifdef CONFIG_DYNAMIC_FTRACE
 _GLOBAL(mcount)
 _GLOBAL(_mcount)
-   blr
+   mflrr0
+   mtctr   r0
+   ld  r0,LRSAVE(r1)
+   mtlrr0
+   bctr
 
 _GLOBAL_TOC(ftrace_caller)
/* Taken from output of objdump from lib64/glibc */
@@ -1262,13 +1266,28 @@ _GLOBAL(ftrace_stub)
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 _GLOBAL(ftrace_graph_caller)
+#ifdef CC_USING_MPROFILE_KERNEL
+   // with -mprofile-kernel, parameter regs are still alive at _mcount
+   std r10, 104(r1)
+   std r9, 96(r1)
+   std r8, 88(r1)
+   std r7, 80(r1)
+   std r6, 72(r1)
+   std r5, 64(r1)
+   std r4, 56(r1)
+   std r3, 48(r1)
+   mfctr   r4  // ftrace_caller has moved local addr here
+   std r4, 40(r1)
+   mflrr3  // ftrace_caller has restored LR from stack
+#else
/* load r4 with local address */
ld  r4, 128(r1)
-   subir4, r4, MCOUNT_INSN_SIZE
 
/* Grab the LR out of the caller stack frame */
ld  r11, 112(r1)
ld  r3, 16(r11)
+#endif
+   subir4, r4, MCOUNT_INSN_SIZE
 
bl  prepare_ftrace_return
nop
@@ -1277,6 +1296,26 @@ _GLOBAL(ftrace_graph_caller)
 * prepare_ftrace_return gives us the address we divert to.
 * Change the LR in the callers stack frame to this.
 */
+
+#ifdef CC_USING_MPROFILE_KERNEL
+   mtlrr3
+
+   ld  r0, 40(r1)
+   mtctr   r0
+   ld  r10, 104(r1)
+   ld  r9, 96(r1)
+   ld  r8, 88(r1)
+   ld  r7, 80(r1)
+   ld  r6, 72(r1)
+   ld  r5, 64(r1)
+   ld  r4, 56(r1)
+   ld  r3, 48(r1)
+
+   addir1, r1, 112
+   mflrr0
+   std r0, LRSAVE(r1)
+   bctr
+#else
ld  r11, 112(r1)
std r3, 16(r11)
 
@@ -1284,6 +1323,7 @@ _GLOBAL(ftrace_graph_caller)
mtlrr0
addir1, r1, 112
blr
+#endif
 
 _GLOBAL(return_to_handler)
/* need to save return values */
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index 44d4d8e..080c525 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -306,11 +306,19 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long 
addr)
 * The load offset is different depending on the ABI. For simplicity
 * just mask it out when doing the compare.
 */
+#ifndef CC_USING_MPROFILE_KERNEL
if ((op[0] != 0x4808) || ((op[1] & 0x) != 0xe841)) {
-   pr_err("Unexpected call sequence: %x %x\n", op[0], op[1]);
+   pr_err("Unexpected call sequence at %p: %x %x\n",
+   ip, op[0], op[1]);
return -EINVAL;
}
-
+#else
+   /* look for patched "NOP" on ppc64 with -mprofile-kernel */
+   if (op[0] != 0x6000) {
+   pr_err("Unexpected call at %p: %x\n", ip, op[0]);
+   return -EINVAL;
+   }
+#endif
/* If we never set up a trampoline to ftrace_caller, then bail */
if (!rec->arch.mod->arch.tramp) {
pr_err("No ftrace trampoline\n");
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 6838451..0819ce7 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -475,6 +475,19 @@ static unsigned long stub_for_addr(Elf64_Shdr *sechdrs,
 static int restore_r2(u32 *instruction, struct module *me)
 {
if (*instruction != PPC_INST_NOP) {
+#ifdef CC_USING_MPROFILE_KERNEL
+   /* -mprofile_kernel sequence starting with
+* mflr r0; std r0, LRSAVE(r1)
+*/
+   if (instruction[-3] == 0x7c0802a6 &&
+   instruction[-2] == 0xf8010010) {
+   /* Nothing to be done here, it's an _mcount
+* call location and r2 will have to be
+* restored in the _mcount function.
+*/
+   return 2;
+   };
+#endif
pr_err("%s: Expect noop after relocate, got %08x\n",