Re: [PATCH 13/20] maccess: always use strict semantics for probe_kernel_read

2020-05-20 Thread Christoph Hellwig
On Wed, May 20, 2020 at 08:11:26PM +0900, Masami Hiramatsu wrote:
> > -   ret = probe_kernel_read(&c, (u8 *)addr + len, 1);
> > +   if (IS_ENABLED(CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE) &&
> > +   (unsigned long)addr < TASK_SIZE) {
> > +   ret = probe_user_read(&c,
> > +   (__force u8 __user *)addr + len, 1);
> > +   } else {
> > +   ret = probe_kernel_read(&c, (u8 *)addr + len, 1);
> > +   }
> > len++;
> > } while (c && ret == 0 && len < MAX_STRING_SIZE);
> 
> To avoid redundant check in the loop, we can use strnlen_user_nofault() out of
> the loop. Something like below.

Yes, I've done something very similar in response to Linus' comment (just
using an ifdef instead).


Re: [PATCH 13/20] maccess: always use strict semantics for probe_kernel_read

2020-05-20 Thread Masami Hiramatsu
On Tue, 19 May 2020 15:44:42 +0200
Christoph Hellwig  wrote:

> diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
> index 2f6737cc53e6c..82da20e712507 100644
> --- a/kernel/trace/trace_kprobe.c
> +++ b/kernel/trace/trace_kprobe.c
> @@ -1208,7 +1208,13 @@ fetch_store_strlen(unsigned long addr)
>   u8 c;
>  
>   do {
> - ret = probe_kernel_read(&c, (u8 *)addr + len, 1);
> + if (IS_ENABLED(CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE) &&
> + (unsigned long)addr < TASK_SIZE) {
> + ret = probe_user_read(&c,
> + (__force u8 __user *)addr + len, 1);
> + } else {
> + ret = probe_kernel_read(&c, (u8 *)addr + len, 1);
> + }
>   len++;
>   } while (c && ret == 0 && len < MAX_STRING_SIZE);

To avoid redundant check in the loop, we can use strnlen_user_nofault() out of
the loop. Something like below.

...
u8 c;

if (IS_ENABLED(CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE) &&
(unsigned long)addr < TASK_SIZE) {
return strnlen_user_nofault((__force u8 __user *)addr, 
MAX_STRING_SIZE);

do {
ret = probe_kernel_read(&c, (u8 *)addr + len, 1);
len++;
} while (c && ret == 0 && len < MAX_STRING_SIZE);
...

This must work because we must not have a string that continues across
kernel space and user space.

Thank you,


-- 
Masami Hiramatsu 


Re: [PATCH 13/20] maccess: always use strict semantics for probe_kernel_read

2020-05-19 Thread Linus Torvalds
On Tue, May 19, 2020 at 6:45 AM Christoph Hellwig  wrote:
>
> +
> +   if (IS_ENABLED(CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE) &&
> +   compat && (unsigned long)unsafe_ptr < TASK_SIZE)
> +   ret = probe_user_read(dst, user_ptr, size);
> +   else
> +   ret = probe_kernel_read(dst, unsafe_ptr, size);
...
> -   ret = probe_kernel_read(&c, (u8 *)addr + len, 1);
> +   if (IS_ENABLED(CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE) 
> &&
> +   (unsigned long)addr < TASK_SIZE) {
> +   ret = probe_user_read(&c,
> +   (__force u8 __user *)addr + len, 1);
> +   } else {
> +   ret = probe_kernel_read(&c, (u8 *)addr + len, 1);
> +   }
...
> +   if (IS_ENABLED(CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE) &&
> +   (unsigned long)src < TASK_SIZE) {
> +   return probe_user_read(dest, (__force const void __user *)src,
> +   size);

If you can't make the conditional legible and fit on a single line and
make it obvious _why_ you have that conditional, just use a helper
function.

Either for just the conditional itself, or for the whole operation.
And at least for the bpf case, since you want the whole operation for
that error handling and clearing of the result buffer anyway, I
suspect it would be cleaner to have that kind of
"bpf_copy_legacy_nofault()" function or whatever.

(And see previous email why I dislike that "compat" naming in the bpf case)

Linus


[PATCH 13/20] maccess: always use strict semantics for probe_kernel_read

2020-05-19 Thread Christoph Hellwig
Except for historical confusion in the kprobes/uprobes and bpf tracers,
there is no good reason to ever allow user memory accesses from
probe_kernel_read.  Switch probe_kernel_read to only read from kernel
memory itself, and try to read user memory in the tracers only if
the address is smaller than TASK_SIZE, and the architecture has
non-overlapping address ranges.

Signed-off-by: Christoph Hellwig 
---
 arch/parisc/lib/memcpy.c|  3 +--
 arch/um/kernel/maccess.c|  3 +--
 arch/x86/mm/maccess.c   |  5 +
 include/linux/uaccess.h |  4 +---
 kernel/trace/bpf_trace.c| 18 -
 kernel/trace/trace_kprobe.c | 14 -
 mm/maccess.c| 39 ++---
 7 files changed, 36 insertions(+), 50 deletions(-)

diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c
index 5ef648bd33119..9fe662b3b5604 100644
--- a/arch/parisc/lib/memcpy.c
+++ b/arch/parisc/lib/memcpy.c
@@ -57,8 +57,7 @@ void * memcpy(void * dst,const void *src, size_t count)
 EXPORT_SYMBOL(raw_copy_in_user);
 EXPORT_SYMBOL(memcpy);
 
-bool probe_kernel_read_allowed(void *dst, const void *unsafe_src, size_t size,
-   bool strict)
+bool probe_kernel_read_allowed(void *dst, const void *unsafe_src, size_t size)
 {
if ((unsigned long)unsafe_src < PAGE_SIZE)
return false;
diff --git a/arch/um/kernel/maccess.c b/arch/um/kernel/maccess.c
index 90a1bec923158..734f3d7e57c0f 100644
--- a/arch/um/kernel/maccess.c
+++ b/arch/um/kernel/maccess.c
@@ -7,8 +7,7 @@
 #include 
 #include 
 
-bool probe_kernel_read_allowed(void *dst, const void *src, size_t size,
-   bool strict)
+bool probe_kernel_read_allowed(void *dst, const void *src, size_t size)
 {
void *psrc = (void *)rounddown((unsigned long)src, PAGE_SIZE);
 
diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c
index 5c323ab187b27..a1bd81677aa72 100644
--- a/arch/x86/mm/maccess.c
+++ b/arch/x86/mm/maccess.c
@@ -26,10 +26,7 @@ static __always_inline bool invalid_probe_range(u64 vaddr)
 }
 #endif
 
-bool probe_kernel_read_allowed(void *dst, const void *unsafe_src, size_t size,
-   bool strict)
+bool probe_kernel_read_allowed(void *dst, const void *unsafe_src, size_t size)
 {
-   if (!strict)
-   return true;
return !invalid_probe_range((unsigned long)unsafe_src);
 }
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 28944a14e0534..78e0ff8641559 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -301,11 +301,9 @@ copy_struct_from_user(void *dst, size_t ksize, const void 
__user *src,
return 0;
 }
 
-bool probe_kernel_read_allowed(void *dst, const void *unsafe_src,
-   size_t size, bool strict);
+bool probe_kernel_read_allowed(void *dst, const void *unsafe_src, size_t size);
 
 extern long probe_kernel_read(void *dst, const void *src, size_t size);
-extern long probe_kernel_read_strict(void *dst, const void *src, size_t size);
 extern long probe_user_read(void *dst, const void __user *src, size_t size);
 
 extern long notrace probe_kernel_write(void *dst, const void *src, size_t 
size);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index bab9b8a175cb0..c6007d9a987d5 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -180,15 +180,23 @@ static __always_inline int
 bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr,
 const bool compat)
 {
+   const void __user *user_ptr = (__force const void __user *)unsafe_ptr;
int ret = security_locked_down(LOCKDOWN_BPF_READ);
 
if (unlikely(ret < 0))
-   goto out;
-   ret = compat ? probe_kernel_read(dst, unsafe_ptr, size) :
- probe_kernel_read_strict(dst, unsafe_ptr, size);
+   goto fail;
+
+   if (IS_ENABLED(CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE) &&
+   compat && (unsigned long)unsafe_ptr < TASK_SIZE)
+   ret = probe_user_read(dst, user_ptr, size);
+   else
+   ret = probe_kernel_read(dst, unsafe_ptr, size);
if (unlikely(ret < 0))
-out:
-   memset(dst, 0, size);
+   goto fail;
+
+   return 0;
+fail:
+   memset(dst, 0, size);
return ret;
 }
 
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 2f6737cc53e6c..82da20e712507 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1208,7 +1208,13 @@ fetch_store_strlen(unsigned long addr)
u8 c;
 
do {
-   ret = probe_kernel_read(&c, (u8 *)addr + len, 1);
+   if (IS_ENABLED(CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE) &&
+   (unsigned long)addr < TASK_SIZE) {
+   ret = probe_user_read(&c,
+   (__force u8 __user *)addr + len, 1);
+   } else {
+   ret = probe_kernel_read(&c, (u8 *