On 2/28/19 6:29 PM, Masami Hiramatsu wrote:
> Hi Yonghong,
> 
> On Thu, 28 Feb 2019 22:49:43 +0000
> Yonghong Song <[email protected]> wrote:
> 
>>
>>
>> On 2/28/19 8:03 AM, Masami Hiramatsu wrote:
>>> Add probe_user_read(), strncpy_from_unsafe_user() and
>>> strnlen_unsafe_user() which allows caller to access user-space
>>> in IRQ context.
>>>
>>> Current probe_kernel_read() and strncpy_from_unsafe() are
>>> not available for user-space memory, because it sets
>>> KERNEL_DS while accessing data. On some arch, user address
>>> space and kernel address space can be co-exist, but others
>>> can not. In that case, setting KERNEL_DS means given
>>
>> Just curious. Given the list of arch's currently linux supports,
>> do you know which arch's fall into "user address space and
>> kernel address space" can co-exist, and which arch's cannot?
> 
> As far as I can heard, (and based on probe_kernel_read() failure)
> sparc32 (and sparc64?), arm64, and s390 will not work.
> x86 works, but if user patch the 4G/4G, it shouldn't work.

Thanks for the info! Great to know the details.

> 
> Thank you,
> 
>>
>> Thanks!
>>
>> Yonghong
>>
>>
>>> address is treated as a kernel address space.
>>> Also strnlen_user() is only available from user context since
>>> it can sleep if pagefault is enabled.
>>>
>>> To access user-space memory without pagefault, we need
>>> these new functions which sets USER_DS while accessing
>>> the data.
>>>
>>> Signed-off-by: Masami Hiramatsu <[email protected]>
>>> ---
>>>     Changes in v5:
>>>      - Simplify probe_user_read() (Thanks, Peter!)
>>>      - Add strnlen_unsafe_user()
>>>     Changes in v3:
>>>      - Use user_access_ok() for probe_user_read().
>>>     Changes in v2:
>>>      - Simplify strncpy_from_unsafe_user() using strncpy_from_user()
>>>        according to Linus's suggestion.
>>>      - Simplify probe_user_read() not using intermediate function.
>>> ---
>>>    include/linux/uaccess.h |   14 +++++
>>>    mm/maccess.c            |  122 
>>> +++++++++++++++++++++++++++++++++++++++++++++--
>>>    2 files changed, 130 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
>>> index 1afd9dfabe67..5be7f9adb418 100644
>>> --- a/include/linux/uaccess.h
>>> +++ b/include/linux/uaccess.h
>>> @@ -258,6 +258,17 @@ extern long probe_kernel_read(void *dst, const void 
>>> *src, size_t size);
>>>    extern long __probe_kernel_read(void *dst, const void *src, size_t size);
>>>    
>>>    /*
>>> + * probe_user_read(): safely attempt to read from a location in user space
>>> + * @dst: pointer to the buffer that shall take the data
>>> + * @src: address to read from
>>> + * @size: size of the data chunk
>>> + *
>>> + * Safely read from address @src to the buffer at @dst.  If a kernel fault
>>> + * happens, handle that and return -EFAULT.
>>> + */
>>> +extern long probe_user_read(void *dst, const void __user *src, size_t 
>>> size);
>>> +
>>> +/*
>>>     * probe_kernel_write(): safely attempt to write to a location
>>>     * @dst: address to write to
>>>     * @src: pointer to the data that shall be written
>>> @@ -270,6 +281,9 @@ extern long notrace probe_kernel_write(void *dst, const 
>>> void *src, size_t size);
>>>    extern long notrace __probe_kernel_write(void *dst, const void *src, 
>>> size_t size);
>>>    
>>>    extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long 
>>> count);
>>> +extern long strncpy_from_unsafe_user(char *dst, const void __user 
>>> *unsafe_addr,
>>> +                                long count);
>>> +extern long strnlen_unsafe_user(const void __user *unsafe_addr, long 
>>> count);
>>>    
>>>    /**
>>>     * probe_kernel_address(): safely attempt to read from a location
>>> diff --git a/mm/maccess.c b/mm/maccess.c
>>> index ec00be51a24f..d1b2ec78d9ef 100644
>>> --- a/mm/maccess.c
>>> +++ b/mm/maccess.c
>>> @@ -5,8 +5,20 @@
>>>    #include <linux/mm.h>
>>>    #include <linux/uaccess.h>
>>>    
>>> +static __always_inline long
>>> +probe_read_common(void *dst, const void __user *src, size_t size)
>>> +{
>>> +   long ret;
>>> +
>>> +   pagefault_disable();
>>> +   ret = __copy_from_user_inatomic(dst, src, size);
>>> +   pagefault_enable();
>>> +
>>> +   return ret ? -EFAULT : 0;
>>> +}
>>> +
>>>    /**
>>> - * probe_kernel_read(): safely attempt to read from a location
>>> + * probe_kernel_read(): safely attempt to read from a kernel-space location
>>>     * @dst: pointer to the buffer that shall take the data
>>>     * @src: address to read from
>>>     * @size: size of the data chunk
>>> @@ -29,17 +41,45 @@ long __probe_kernel_read(void *dst, const void *src, 
>>> size_t size)
>>>     mm_segment_t old_fs = get_fs();
>>>    
>>>     set_fs(KERNEL_DS);
>>> -   pagefault_disable();
>>> -   ret = __copy_from_user_inatomic(dst,
>>> -                   (__force const void __user *)src, size);
>>> -   pagefault_enable();
>>> +   ret = probe_read_common(dst, (__force const void __user *)src, size);
>>>     set_fs(old_fs);
>>>    
>>> -   return ret ? -EFAULT : 0;
>>> +   return ret;
>>>    }
>>>    EXPORT_SYMBOL_GPL(probe_kernel_read);
>>>    
>>>    /**
>>> + * probe_user_read(): safely attempt to read from a user-space location
>>> + * @dst: pointer to the buffer that shall take the data
>>> + * @src: address to read from. This must be a user address.
>>> + * @size: size of the data chunk
>>> + *
>>> + * Safely read from user address @src to the buffer at @dst. If a kernel 
>>> fault
>>> + * happens, handle that and return -EFAULT.
>>> + */
>>> +
>>> +long __weak probe_user_read(void *dst, const void __user *src, size_t size)
>>> +    __attribute__((alias("__probe_user_read")));
>>> +
>>> +long __probe_user_read(void *dst, const void __user *src, size_t size)
>>> +{
>>> +   long ret = -EFAULT;
>>> +   mm_segment_t old_fs = get_fs();
>>> +
>>> +   /*
>>> +    * Since this can be called in IRQ context, we carefully set the
>>> +    * USER_DS and use user_access_ok() which checks segment setting
>>> +    * instead of task context.
>>> +    */
>>> +   set_fs(USER_DS);
>>> +   if (user_access_ok(src, size))
>>> +           ret = probe_read_common(dst, src, size);
>>> +   set_fs(old_fs);
>>> +   return ret;
>>> +}
>>> +EXPORT_SYMBOL_GPL(probe_user_read);
>>> +
>>> +/**
>>>     * probe_kernel_write(): safely attempt to write to a location
>>>     * @dst: address to write to
>>>     * @src: pointer to the data that shall be written
>>> @@ -66,6 +106,7 @@ long __probe_kernel_write(void *dst, const void *src, 
>>> size_t size)
>>>    }
>>>    EXPORT_SYMBOL_GPL(probe_kernel_write);
>>>    
>>> +
>>>    /**
>>>     * strncpy_from_unsafe: - Copy a NUL terminated string from unsafe 
>>> address.
>>>     * @dst:   Destination address, in kernel space.  This buffer must be at
>>> @@ -105,3 +146,72 @@ long strncpy_from_unsafe(char *dst, const void 
>>> *unsafe_addr, long count)
>>>    
>>>     return ret ? -EFAULT : src - unsafe_addr;
>>>    }
>>> +
>>> +/**
>>> + * strncpy_from_unsafe_user: - Copy a NUL terminated string from unsafe 
>>> user
>>> + *                         address.
>>> + * @dst:   Destination address, in kernel space.  This buffer must be at
>>> + *         least @count bytes long.
>>> + * @unsafe_addr: Unsafe user address.
>>> + * @count: Maximum number of bytes to copy, including the trailing NUL.
>>> + *
>>> + * Copies a NUL-terminated string from unsafe user address to kernel 
>>> buffer.
>>> + *
>>> + * On success, returns the length of the string INCLUDING the trailing NUL.
>>> + *
>>> + * If access fails, returns -EFAULT (some data may have been copied
>>> + * and the trailing NUL added).
>>> + *
>>> + * If @count is smaller than the length of the string, copies @count-1 
>>> bytes,
>>> + * sets the last byte of @dst buffer to NUL and returns @count.
>>> + */
>>> +long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr,
>>> +                         long count)
>>> +{
>>> +   mm_segment_t old_fs = get_fs();
>>> +   long ret;
>>> +
>>> +   if (unlikely(count <= 0))
>>> +           return 0;
>>> +
>>> +   set_fs(USER_DS);
>>> +   pagefault_disable();
>>> +   ret = strncpy_from_user(dst, unsafe_addr, count);
>>> +   pagefault_enable();
>>> +   set_fs(old_fs);
>>> +   if (ret >= count) {
>>> +           ret = count;
>>> +           dst[ret - 1] = '\0';
>>> +   } else if (ret > 0)
>>> +           ret++;
>>> +   return ret;
>>> +}
>>> +
>>> +/**
>>> + * strnlen_unsafe_user: - Get the size of a user string INCLUDING final 
>>> NUL.
>>> + * @unsafe_addr: The string to measure.
>>> + * @count: Maximum count (including NUL character)
>>> + *
>>> + * Get the size of a NUL-terminated string in user space without pagefault.
>>> + *
>>> + * Returns the size of the string INCLUDING the terminating NUL.
>>> + *
>>> + * If the string is too long, returns a number larger than @count. User
>>> + * has to check the return value against "> count".
>>> + * On exception (or invalid count), returns 0.
>>> + *
>>> + * Unlike strnlen_user, this can be used from IRQ handler etc. because
>>> + * it disables pagefaults.
>>> + */
>>> +long strnlen_unsafe_user(const void __user *unsafe_addr, long count)
>>> +{
>>> +   mm_segment_t old_fs = get_fs();
>>> +   int ret;
>>> +
>>> +   set_fs(USER_DS);
>>> +   pagefault_disable();
>>> +   ret = strnlen_user(unsafe_addr, count);
>>> +   pagefault_enable();
>>> +   set_fs(old_fs);
>>> +   return ret;
>>> +}
>>>
> 
> 

Reply via email to