Re: [RFC PATCH 27/27] powerpc/64s: system call support for scv/rfscv instructions

2019-10-02 Thread Michal Suchánek
On Sun, Sep 15, 2019 at 11:28:13AM +1000, Nicholas Piggin wrote:
> Add support for the scv instruction on POWER9 and later CPUs.
> 
> For now this implements the zeroth scv vector 'scv 0', as identical
> to 'sc' system calls, with the exception that lr is not preserved, and
> it is 64-bit only. There may yet be changes made to this ABI, so it's
> for testing only.
> 
> This also doesn't yet properly handle PR KVM, or the case where a guest
> is denied AIL=3 mode. I haven't added real mode entry points, so scv
> must not be used when AIL=0, but I haven't cleared the FSCR in this
> case.
> 
> This also implements a strange hack to handle the non-implemented
> vectors, scheduling a decrementer and expecting it to interrupt and
> replay pending soft masked interrupts. This is unlikely to be a good
> idea, and needs to actually do a proper handler and replay in case an
> interrupt is pending.
> 
> It may also require some errata handling before it can be safely used
> on all POWER9 CPUs, I have to look that up.
> 
> rfscv is implemented to return from scv type system calls. It can not
> be used to return from sc system calls because those are defined to
> preserve lr.
> 
> In a comparison of getpid syscall, the test program had scv taking
> about 3 more cycles in user mode (92 vs 89 for sc), due to lr handling.
> Total cycles taken for a getpid system call on POWER9 are improved from
> 436 to 345 (26.3% faster), mostly due to reducing mtmsr and mtspr.
...
> diff --git a/arch/powerpc/kernel/syscall_64.c 
> b/arch/powerpc/kernel/syscall_64.c
> index 034b52d3d78c..3e8aa5ae8ec8 100644
> --- a/arch/powerpc/kernel/syscall_64.c
> +++ b/arch/powerpc/kernel/syscall_64.c
> @@ -15,6 +15,77 @@ extern void __noreturn tabort_syscall(void);
>  
>  typedef long (*syscall_fn)(long, long, long, long, long, long);
>  
> +#ifdef CONFIG_PPC_BOOK3S
> +long system_call_vectored_exception(long r3, long r4, long r5, long r6, long 
> r7, long r8, unsigned long r0, struct pt_regs *regs)
> +{
> + unsigned long ti_flags;
> + syscall_fn f;
> +
> + BUG_ON(!(regs->msr & MSR_RI));
> + BUG_ON(!(regs->msr & MSR_PR));
> + BUG_ON(!FULL_REGS(regs));
> + BUG_ON(regs->softe != IRQS_ENABLED);
> +
> + if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
> + unlikely(regs->msr & MSR_TS_T))
> + tabort_syscall();
> +
> + account_cpu_user_entry();
> +
> +#ifdef CONFIG_PPC_SPLPAR
> + if (IS_ENABLED(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) &&
> + firmware_has_feature(FW_FEATURE_SPLPAR)) {
> + struct lppaca *lp = get_lppaca();
> +
> + if (unlikely(local_paca->dtl_ridx != lp->dtl_idx))
This adds another instance of the lack of endian conversion issue.
> + accumulate_stolen_time();
> + }
> +#endif

Thanks

Michal


[RFC PATCH 27/27] powerpc/64s: system call support for scv/rfscv instructions

2019-09-14 Thread Nicholas Piggin
Add support for the scv instruction on POWER9 and later CPUs.

For now this implements the zeroth scv vector 'scv 0', as identical
to 'sc' system calls, with the exception that lr is not preserved, and
it is 64-bit only. There may yet be changes made to this ABI, so it's
for testing only.

This also doesn't yet properly handle PR KVM, or the case where a guest
is denied AIL=3 mode. I haven't added real mode entry points, so scv
must not be used when AIL=0, but I haven't cleared the FSCR in this
case.

This also implements a strange hack to handle the non-implemented
vectors, scheduling a decrementer and expecting it to interrupt and
replay pending soft masked interrupts. This is unlikely to be a good
idea, and needs to actually do a proper handler and replay in case an
interrupt is pending.

It may also require some errata handling before it can be safely used
on all POWER9 CPUs, I have to look that up.

rfscv is implemented to return from scv type system calls. It can not
be used to return from sc system calls because those are defined to
preserve lr.

In a comparison of getpid syscall, the test program had scv taking
about 3 more cycles in user mode (92 vs 89 for sc), due to lr handling.
Total cycles taken for a getpid system call on POWER9 are improved from
436 to 345 (26.3% faster), mostly due to reducing mtmsr and mtspr.
---
 arch/powerpc/include/asm/head-64.h|  2 +-
 arch/powerpc/include/asm/ppc_asm.h|  2 +
 arch/powerpc/include/asm/processor.h  |  2 +-
 arch/powerpc/kernel/cpu_setup_power.S |  2 +-
 arch/powerpc/kernel/dt_cpu_ftrs.c |  1 +
 arch/powerpc/kernel/entry_64.S| 94 +++
 arch/powerpc/kernel/exceptions-64s.S  | 77 --
 arch/powerpc/kernel/syscall_64.c  | 72 
 8 files changed, 245 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/head-64.h 
b/arch/powerpc/include/asm/head-64.h
index 2dabcf668292..4cb9efa2eb21 100644
--- a/arch/powerpc/include/asm/head-64.h
+++ b/arch/powerpc/include/asm/head-64.h
@@ -128,7 +128,7 @@ end_##sname:
.if ((start) % (size) != 0);\
.error "Fixed section exception vector misalignment";   \
.endif; \
-   .if ((size) != 0x20) && ((size) != 0x80) && ((size) != 0x100); \
+   .if ((size) != 0x20) && ((size) != 0x80) && ((size) != 0x100) && 
((size) != 0x1000); \
.error "Fixed section exception vector bad size";   \
.endif; \
.if (start) < sname##_start;\
diff --git a/arch/powerpc/include/asm/ppc_asm.h 
b/arch/powerpc/include/asm/ppc_asm.h
index 6b03dff61a05..160f3bb77ea4 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -755,6 +755,8 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, 
CPU_FTR_CELL_TB_BUG, 96)
 #define N_SLINE68
 #define N_SO   100
 
+#define RFSCV  .long 0x4ca4
+
 /*
  * Create an endian fixup trampoline
  *
diff --git a/arch/powerpc/include/asm/processor.h 
b/arch/powerpc/include/asm/processor.h
index a9993e7a443b..bda5667a74df 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -294,7 +294,7 @@ struct thread_struct {
.regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \
.addr_limit = KERNEL_DS, \
.fpexc_mode = 0, \
-   .fscr = FSCR_TAR | FSCR_EBB \
+   .fscr = FSCR_TAR | FSCR_EBB | FSCR_SCV \
 }
 #endif
 
diff --git a/arch/powerpc/kernel/cpu_setup_power.S 
b/arch/powerpc/kernel/cpu_setup_power.S
index 3239a9fe6c1c..bbdcee752e67 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -178,7 +178,7 @@ __init_LPCR_ISA300:
 
 __init_FSCR:
mfspr   r3,SPRN_FSCR
-   ori r3,r3,FSCR_TAR|FSCR_DSCR|FSCR_EBB
+   ori r3,r3,FSCR_SCV|FSCR_TAR|FSCR_DSCR|FSCR_EBB
mtspr   SPRN_FSCR,r3
blr
 
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c 
b/arch/powerpc/kernel/dt_cpu_ftrs.c
index bd95318d2202..453f1893906b 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -565,6 +565,7 @@ static struct dt_cpu_feature_match __initdata
{"little-endian", feat_enable_le, CPU_FTR_REAL_LE},
{"smt", feat_enable_smt, 0},
{"interrupt-facilities", feat_enable, 0},
+   {"system-call-vectored", feat_enable, 0},
{"timer-facilities", feat_enable, 0},
{"timer-facilities-v3", feat_enable, 0},
{"debug-facilities", feat_enable, 0},
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 00173cc904ef..360059feec1e 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -62,6 +62,100 @@ exception_marker:
.section".text"
.align 7
 
+   .globl system_call_vectored_common
+system_call_vectored_common:
+