Re: [PATCH v4 4/9] kentry: Simplify the common syscall API

2021-03-19 Thread Thomas Gleixner
On Wed, Mar 17 2021 at 11:12, Andy Lutomirski wrote:
> @@ -119,31 +119,12 @@ static inline __must_check int 
> arch_syscall_enter_tracehook(struct pt_regs *regs
>  void enter_from_user_mode(struct pt_regs *regs);
>  
>  /**
> + * kentry_syscall_begin - Prepare to invoke a syscall handler
>   * @regs:Pointer to currents pt_regs
>   * @syscall: The syscall number
>   *
>   * Invoked from architecture specific syscall entry code with interrupts
> - * enabled after invoking syscall_enter_from_user_mode_prepare() and extra
> - * architecture specific work.
> + * enabled after kentry_enter_from_usermode or a similar function.

Please write functions with () at the end. Also what the heck means
'similar function' here? I really spent quite some time to document this
stuff and it wants to stay that way.

>   *
> + * Called with IRQs on.  Returns with IRQs still on.

interrupts enabled please. This is technical documentation and not twatter.

> +void kentry_syscall_end(struct pt_regs *regs);

Thanks,

tglx


Re: [PATCH v4 4/9] kentry: Simplify the common syscall API

2021-03-19 Thread Thomas Gleixner
On Wed, Mar 17 2021 at 11:12, Andy Lutomirski wrote:
> The new common syscall API had a large and confusing API surface.  Simplify
> it.  Now there is exactly one way to use it.  It's a bit more verbose than
> the old way for the simple x86_64 native case, but it's much easier to use
> right,

and therefore much easier to get wrong...

>  __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
>  {
> - nr = syscall_enter_from_user_mode(regs, nr);
> -
> + kentry_enter_from_user_mode(regs);
> + local_irq_enable();

... That needs to be _after_ instrumentation_begin(). If you fiddle
with this then please make sure that objtool validates noinstr...

> + kentry_enter_from_user_mode(regs);
> + local_irq_enable();

Ditto

> + instrumentation_begin();
>  static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
>  {
>   unsigned int nr = syscall_32_enter(regs);
> + bool ret;
>   int res;
>  
> - /*
> -  * This cannot use syscall_enter_from_user_mode() as it has to
> -  * fetch EBP before invoking any of the syscall entry work
> -  * functions.
> -  */
> - syscall_enter_from_user_mode_prepare(regs);
> -
> + kentry_enter_from_user_mode(regs);
> + local_irq_enable();

...

>   instrumentation_begin();

Thanks,

tglx


[PATCH v4 4/9] kentry: Simplify the common syscall API

2021-03-17 Thread Andy Lutomirski
The new common syscall API had a large and confusing API surface.  Simplify
it.  Now there is exactly one way to use it.  It's a bit more verbose than
the old way for the simple x86_64 native case, but it's much easier to use
right, and the diffstat should speak for itself.

Acked-by: Mark Rutland 
Signed-off-by: Andy Lutomirski 
---
 arch/x86/entry/common.c  | 57 +++-
 include/linux/entry-common.h | 86 ++--
 kernel/entry/common.c| 57 +++-
 3 files changed, 54 insertions(+), 146 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index ef1c65938a6b..8710b2300b8d 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -38,9 +38,12 @@
 #ifdef CONFIG_X86_64
 __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
 {
-   nr = syscall_enter_from_user_mode(regs, nr);
-
+   kentry_enter_from_user_mode(regs);
+   local_irq_enable();
instrumentation_begin();
+
+   nr = kentry_syscall_begin(regs, nr);
+
if (likely(nr < NR_syscalls)) {
nr = array_index_nospec(nr, NR_syscalls);
regs->ax = sys_call_table[nr](regs);
@@ -52,8 +55,12 @@ __visible noinstr void do_syscall_64(unsigned long nr, 
struct pt_regs *regs)
regs->ax = x32_sys_call_table[nr](regs);
 #endif
}
+
+   kentry_syscall_end(regs);
+
+   local_irq_disable();
instrumentation_end();
-   syscall_exit_to_user_mode(regs);
+   kentry_exit_to_user_mode(regs);
 }
 #endif
 
@@ -83,33 +90,34 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs 
*regs)
 {
unsigned int nr = syscall_32_enter(regs);
 
+   kentry_enter_from_user_mode(regs);
+   local_irq_enable();
+   instrumentation_begin();
+
/*
 * Subtlety here: if ptrace pokes something larger than 2^32-1 into
 * orig_ax, the unsigned int return value truncates it.  This may
 * or may not be necessary, but it matches the old asm behavior.
 */
-   nr = (unsigned int)syscall_enter_from_user_mode(regs, nr);
-   instrumentation_begin();
-
+   nr = (unsigned int)kentry_syscall_begin(regs, nr);
do_syscall_32_irqs_on(regs, nr);
+   kentry_syscall_end(regs);
 
+   local_irq_disable();
instrumentation_end();
-   syscall_exit_to_user_mode(regs);
+   kentry_exit_to_user_mode(regs);
 }
 
 static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
 {
unsigned int nr = syscall_32_enter(regs);
+   bool ret;
int res;
 
-   /*
-* This cannot use syscall_enter_from_user_mode() as it has to
-* fetch EBP before invoking any of the syscall entry work
-* functions.
-*/
-   syscall_enter_from_user_mode_prepare(regs);
-
+   kentry_enter_from_user_mode(regs);
+   local_irq_enable();
instrumentation_begin();
+
/* Fetch EBP from where the vDSO stashed it. */
if (IS_ENABLED(CONFIG_X86_64)) {
/*
@@ -126,21 +134,23 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs 
*regs)
if (res) {
/* User code screwed up. */
regs->ax = -EFAULT;
-
-   instrumentation_end();
-   local_irq_disable();
-   irqentry_exit_to_user_mode(regs);
-   return false;
+   ret = false;
+   goto out;
}
 
/* The case truncates any ptrace induced syscall nr > 2^32 -1 */
-   nr = (unsigned int)syscall_enter_from_user_mode_work(regs, nr);
+   nr = (unsigned int)kentry_syscall_begin(regs, nr);
 
/* Now this is just like a normal syscall. */
do_syscall_32_irqs_on(regs, nr);
 
+   kentry_syscall_end(regs);
+   ret = true;
+
+out:
+   local_irq_disable();
instrumentation_end();
-   syscall_exit_to_user_mode(regs);
+   kentry_exit_to_user_mode(regs);
return true;
 }
 
@@ -233,8 +243,11 @@ __visible void noinstr ret_from_fork(struct task_struct 
*prev,
user_regs->ax = 0;
}
 
+   kentry_syscall_end(user_regs);
+
+   local_irq_disable();
instrumentation_end();
-   syscall_exit_to_user_mode(user_regs);
+   kentry_exit_to_user_mode(user_regs);
 }
 
 #ifdef CONFIG_XEN_PV
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index fd2d7c35670a..5287c6c15a66 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -119,31 +119,12 @@ static inline __must_check int 
arch_syscall_enter_tracehook(struct pt_regs *regs
 void enter_from_user_mode(struct pt_regs *regs);
 
 /**
- * syscall_enter_from_user_mode_prepare - Establish state and enable interrupts
- * @regs:  Pointer to currents pt_regs
- *
- * Invoked from architecture specific syscall entry code with interrupts
- * disabled. The calling code has to be non-instrumentable. When the
- *