copy_from_iter_nocache() is only "nocache" for iovecs. Enhance it to also use a nocache copy for bvecs. This improves performance by 2-3X when splice()ing to a file in a DAX-mounted, pmem-backed file system.
Cc: Ross Zwisler <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: "H. Peter Anvin" <[email protected]> Cc: <[email protected]> Cc: Al Viro <[email protected]> Cc: Dan Williams <[email protected]> Signed-off-by: Brian Boylston <[email protected]> Reviewed-by: Toshi Kani <[email protected]> Reported-by: Oliver Moreno <[email protected]> --- arch/x86/include/asm/pmem.h | 6 +++--- lib/iov_iter.c | 11 +++++++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h index 643eba4..d071f45c 100644 --- a/arch/x86/include/asm/pmem.h +++ b/arch/x86/include/asm/pmem.h @@ -73,12 +73,12 @@ static inline void arch_wb_cache_pmem(void *addr, size_t size) } /* - * copy_from_iter_nocache() on x86 only uses non-temporal stores for iovec - * iterators, so for other types (bvec & kvec) we must do a cache write-back. + * copy_from_iter_nocache() on x86 uses non-temporal stores for iovec and + * bvec iterators, but for kvec we must do a cache write-back. */ static inline bool __iter_needs_pmem_wb(struct iov_iter *i) { - return iter_is_iovec(i) == false; + return (i->type & ITER_KVEC) == ITER_KVEC; } /** diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 7e3138c..df4cb00 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -342,6 +342,13 @@ static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t kunmap_atomic(from); } +static void memcpy_from_page_nocache(char *to, struct page *page, size_t offset, size_t len) +{ + char *from = kmap_atomic(page); + __copy_from_user_inatomic_nocache(to, from, len); + kunmap_atomic(from); +} + static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len) { char *to = kmap_atomic(page); @@ -392,8 +399,8 @@ size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) iterate_and_advance(i, bytes, v, __copy_from_user_nocache((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), - memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, - v.bv_offset, v.bv_len), + memcpy_from_page_nocache((to += v.bv_len) - v.bv_len, + v.bv_page, v.bv_offset, v.bv_len), memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) ) -- 1.8.3.1

