Re: [PATCH v6 20/25] powerpc: Change system_call_exception calling convention

2022-09-23 Thread Nicholas Piggin
On Wed Sep 21, 2022 at 4:56 PM AEST, Rohan McLure wrote:
> Change system_call_exception arguments to pass a pointer to a stack frame
> container caller state, as well as the original r0, which determines the
> number of the syscall. This has been observed to yield improved performance
> to passing them by registers, circumventing the need to allocate a stack 
> frame.
>
> Signed-off-by: Rohan McLure 

Thanks for splitting it out, I think it does make it nicer to review.

[...]

> diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
> index 15af0ed019a7..0e9ba3efee94 100644
> --- a/arch/powerpc/kernel/syscall.c
> +++ b/arch/powerpc/kernel/syscall.c
> @@ -13,9 +13,7 @@
>  
>  
>  /* Has to run notrace because it is entered not completely "reconciled" */
> -notrace long system_call_exception(long r3, long r4, long r5,
> -long r6, long r7, long r8,
> -unsigned long r0, struct pt_regs *regs)
> +notrace long system_call_exception(struct pt_regs *regs, unsigned long r0)
>  {
>   long ret;
>   syscall_fn f;
> @@ -136,12 +134,6 @@ notrace long system_call_exception(long r3, long r4, 
> long r5,
>   r0 = do_syscall_trace_enter(regs);
>   if (unlikely(r0 >= NR_syscalls))
>   return regs->gpr[3];
> - r3 = regs->gpr[3];
> - r4 = regs->gpr[4];
> - r5 = regs->gpr[5];
> - r6 = regs->gpr[6];
> - r7 = regs->gpr[7];
> - r8 = regs->gpr[8];
>  
>   } else if (unlikely(r0 >= NR_syscalls)) {
>   if (unlikely(trap_is_unsupported_scv(regs))) {
> -- 
> 2.34.1

This is probably just missing the hunk

+   ret = f(regs->gpr[3], regs->gpr[4], regs->gpr[5],   

   
+   regs->gpr[6], regs->gpr[7], regs->gpr[8]);

which got into your next patch.

Otherwise I think it looks good.

Reviewed-by: Nicholas Piggin 


[PATCH v6 20/25] powerpc: Change system_call_exception calling convention

2022-09-21 Thread Rohan McLure
Change system_call_exception arguments to pass a pointer to a stack frame
container caller state, as well as the original r0, which determines the
number of the syscall. This has been observed to yield improved performance
to passing them by registers, circumventing the need to allocate a stack frame.

Signed-off-by: Rohan McLure 
---
V6: Split off from syscall wrapper patch.
---
 arch/powerpc/include/asm/interrupt.h |  3 +--
 arch/powerpc/kernel/entry_32.S   |  6 +++---
 arch/powerpc/kernel/interrupt_64.S   | 20 ++--
 arch/powerpc/kernel/syscall.c| 10 +-
 4 files changed, 15 insertions(+), 24 deletions(-)

diff --git a/arch/powerpc/include/asm/interrupt.h 
b/arch/powerpc/include/asm/interrupt.h
index 8069dbc4b8d1..48eec9cd1429 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -665,8 +665,7 @@ static inline void interrupt_cond_local_irq_enable(struct 
pt_regs *regs)
local_irq_enable();
 }
 
-long system_call_exception(long r3, long r4, long r5, long r6, long r7, long 
r8,
-  unsigned long r0, struct pt_regs *regs);
+long system_call_exception(struct pt_regs *regs, unsigned long r0);
 notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs 
*regs, long scv);
 notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs);
 notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs);
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index e4b694cebc44..96782aa72083 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -122,9 +122,9 @@ transfer_to_syscall:
SAVE_NVGPRS(r1)
kuep_lock
 
-   /* Calling convention has r9 = orig r0, r10 = regs */
-   addir10,r1,STACK_FRAME_OVERHEAD
-   mr  r9,r0
+   /* Calling convention has r3 = regs, r4 = orig r0 */
+   addir3,r1,STACK_FRAME_OVERHEAD
+   mr  r4,r0
bl  system_call_exception
 
 ret_from_syscall:
diff --git a/arch/powerpc/kernel/interrupt_64.S 
b/arch/powerpc/kernel/interrupt_64.S
index 7d92a7a54727..a5dd78bdbe6d 100644
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -70,7 +70,7 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
ld  r2,PACATOC(r13)
mfcrr12
li  r11,0
-   /* Can we avoid saving r3-r8 in common case? */
+   /* Save syscall parameters in r3-r8 */
SAVE_GPRS(3, 8, r1)
/* Zero r9-r12, this should only be required when restoring all GPRs */
std r11,GPR9(r1)
@@ -87,9 +87,11 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
std r11,_TRAP(r1)
std r12,_CCR(r1)
std r3,ORIG_GPR3(r1)
-   addir10,r1,STACK_FRAME_OVERHEAD
+   /* Calling convention has r3 = regs, r4 = orig r0 */
+   addir3,r1,STACK_FRAME_OVERHEAD
+   mr  r4,r0
ld  r11,exception_marker@toc(r2)
-   std r11,-16(r10)/* "regshere" marker */
+   std r11,-16(r3) /* "regshere" marker */
 
 BEGIN_FTR_SECTION
HMT_MEDIUM
@@ -104,8 +106,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 * but this is the best we can do.
 */
 
-   /* Calling convention has r9 = orig r0, r10 = regs */
-   mr  r9,r0
bl  system_call_exception
 
 .Lsyscall_vectored_\name\()_exit:
@@ -237,7 +237,7 @@ END_BTB_FLUSH_SECTION
ld  r2,PACATOC(r13)
mfcrr12
li  r11,0
-   /* Can we avoid saving r3-r8 in common case? */
+   /* Save syscall parameters in r3-r8 */
SAVE_GPRS(3, 8, r1)
/* Zero r9-r12, this should only be required when restoring all GPRs */
std r11,GPR9(r1)
@@ -260,9 +260,11 @@ END_BTB_FLUSH_SECTION
std r11,_TRAP(r1)
std r12,_CCR(r1)
std r3,ORIG_GPR3(r1)
-   addir10,r1,STACK_FRAME_OVERHEAD
+   /* Calling convention has r3 = regs, r4 = orig r0 */
+   addir3,r1,STACK_FRAME_OVERHEAD
+   mr  r4,r0
ld  r11,exception_marker@toc(r2)
-   std r11,-16(r10)/* "regshere" marker */
+   std r11,-16(r3) /* "regshere" marker */
 
 #ifdef CONFIG_PPC_BOOK3S
li  r11,1
@@ -283,8 +285,6 @@ END_BTB_FLUSH_SECTION
wrteei  1
 #endif
 
-   /* Calling convention has r9 = orig r0, r10 = regs */
-   mr  r9,r0
bl  system_call_exception
 
 .Lsyscall_exit:
diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
index 15af0ed019a7..0e9ba3efee94 100644
--- a/arch/powerpc/kernel/syscall.c
+++ b/arch/powerpc/kernel/syscall.c
@@ -13,9 +13,7 @@
 
 
 /* Has to run notrace because it is entered not completely "reconciled" */
-notrace long system_call_exception(long r3, long r4, long r5,
-  long r6, long r7, long r8,
-