By writing the function in asm we avoid cross object code flow and objtool no longer gets confused about a 'stray' CLAC.
Also; the asm version is actually _simpler_. Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org> --- arch/x86/include/asm/asm.h | 24 ------------------- arch/x86/include/asm/uaccess_64.h | 3 -- arch/x86/lib/copy_user_64.S | 48 ++++++++++++++++++++++++++++++++++++++ arch/x86/lib/usercopy_64.c | 20 --------------- 4 files changed, 48 insertions(+), 47 deletions(-) --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -148,30 +148,6 @@ _ASM_PTR (entry); \ .popsection -.macro ALIGN_DESTINATION - /* check for bad alignment of destination */ - movl %edi,%ecx - andl $7,%ecx - jz 102f /* already aligned */ - subl $8,%ecx - negl %ecx - subl %ecx,%edx -100: movb (%rsi),%al -101: movb %al,(%rdi) - incq %rsi - incq %rdi - decl %ecx - jnz 100b -102: - .section .fixup,"ax" -103: addl %ecx,%edx /* ecx is zerorest also */ - jmp copy_user_handle_tail - .previous - - _ASM_EXTABLE_UA(100b, 103b) - _ASM_EXTABLE_UA(101b, 103b) - .endm - #else # define _EXPAND_EXTABLE_HANDLE(x) #x # define _ASM_EXTABLE_HANDLE(from, to, handler) \ --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -208,9 +208,6 @@ __copy_from_user_flushcache(void *dst, c } unsigned long -copy_user_handle_tail(char *to, char *from, unsigned len); - -unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len); #endif /* _ASM_X86_UACCESS_64_H */ --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -16,6 +16,30 @@ #include <asm/smap.h> #include <asm/export.h> +.macro ALIGN_DESTINATION + /* check for bad alignment of destination */ + movl %edi,%ecx + andl $7,%ecx + jz 102f /* already aligned */ + subl $8,%ecx + negl %ecx + subl %ecx,%edx +100: movb (%rsi),%al +101: movb %al,(%rdi) + incq %rsi + incq %rdi + decl %ecx + jnz 100b +102: + .section .fixup,"ax" +103: addl %ecx,%edx /* ecx is zerorest also */ + jmp copy_user_handle_tail + .previous + + _ASM_EXTABLE_UA(100b, 103b) + _ASM_EXTABLE_UA(101b, 103b) + .endm + /* * copy_user_generic_unrolled - memory copy with exception handling. * This version is for CPUs like P4 that don't have efficient micro @@ -194,6 +218,30 @@ ENDPROC(copy_user_enhanced_fast_string) EXPORT_SYMBOL(copy_user_enhanced_fast_string) /* + * Try to copy last bytes and clear the rest if needed. + * Since protection fault in copy_from/to_user is not a normal situation, + * it is not necessary to optimize tail handling. + * + * Input: + * rdi destination + * rsi source + * rdx count + * + * Output: + * eax uncopied bytes or 0 if successful. + */ +ALIGN; +copy_user_handle_tail: + movl %edx,%ecx +1: rep movsb +2: mov %ecx,%eax + ASM_CLAC + ret + + _ASM_EXTABLE_UA(1b, 2b) +ENDPROC(copy_user_handle_tail) + +/* * copy_user_nocache - Uncached memory copy with exception handling * This will force destination out of cache for more performance. * --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -55,26 +55,6 @@ unsigned long clear_user(void __user *to EXPORT_SYMBOL(clear_user); /* - * Try to copy last bytes and clear the rest if needed. - * Since protection fault in copy_from/to_user is not a normal situation, - * it is not necessary to optimize tail handling. - */ -__visible unsigned long -copy_user_handle_tail(char *to, char *from, unsigned len) -{ - for (; len; --len, to++) { - char c; - - if (__get_user_nocheck(c, from++, sizeof(char))) - break; - if (__put_user_nocheck(c, to, sizeof(char))) - break; - } - clac(); - return len; -} - -/* * Similar to copy_user_handle_tail, probe for the write fault point, * but reuse __memcpy_mcsafe in case a new read error is encountered. * clac() is handled in _copy_to_iter_mcsafe().