On Fri 14-04-17 19:35:41, Dan Williams wrote: > Introduce copy_from_iter_ops() to enable passing custom sub-routines to > iterate_and_advance(). Define pmem operations that guarantee cache > bypass to supplement the existing usage of __copy_from_iter_nocache() > backed by arch_wb_cache_pmem(). > > Cc: Jan Kara <j...@suse.cz> > Cc: Jeff Moyer <jmo...@redhat.com> > Cc: Christoph Hellwig <h...@lst.de> > Cc: Toshi Kani <toshi.k...@hpe.com> > Cc: Al Viro <v...@zeniv.linux.org.uk> > Cc: Matthew Wilcox <mawil...@microsoft.com> > Cc: Ross Zwisler <ross.zwis...@linux.intel.com> > Cc: Linus Torvalds <torva...@linux-foundation.org> > Signed-off-by: Dan Williams <dan.j.willi...@intel.com>
... > +static int pmem_from_user(void *dst, const void __user *src, unsigned size) > +{ > + unsigned long flushed, dest = (unsigned long) dest; ^^^ dst here? Otherwise the patch looks good to me so feel free to add: Reviewed-by: Jan Kara <j...@suse.cz> after fixing this. Honza > + int rc = __copy_from_user_nocache(dst, src, size); > + > + /* > + * On x86_64 __copy_from_user_nocache() uses non-temporal stores > + * for the bulk of the transfer, but we need to manually flush > + * if the transfer is unaligned. A cached memory copy is used > + * when destination or size is not naturally aligned. That is: > + * - Require 8-byte alignment when size is 8 bytes or larger. > + * - Require 4-byte alignment when size is 4 bytes. > + */ > + if (size < 8) { > + if (!IS_ALIGNED(dest, 4) || size != 4) > + arch_wb_cache_pmem(dst, 1); > + } else { > + if (!IS_ALIGNED(dest, 8)) { > + dest = ALIGN(dest, boot_cpu_data.x86_clflush_size); > + arch_wb_cache_pmem(dst, 1); > + } > + > + flushed = dest - (unsigned long) dst; > + if (size > flushed && !IS_ALIGNED(size - flushed, 8)) > + arch_wb_cache_pmem(dst + size - 1, 1); > + } > + > + return rc; > +} > + > +static void pmem_from_page(char *to, struct page *page, size_t offset, > size_t len) > +{ > + char *from = kmap_atomic(page); > + > + arch_memcpy_to_pmem(to, from + offset, len); > + kunmap_atomic(from); > +} > + > +size_t arch_copy_from_iter_pmem(void *addr, size_t bytes, struct iov_iter *i) > +{ > + return copy_from_iter_ops(addr, bytes, i, pmem_from_user, > pmem_from_page, > + arch_memcpy_to_pmem); > +} > +EXPORT_SYMBOL_GPL(arch_copy_from_iter_pmem); > diff --git a/include/linux/uio.h b/include/linux/uio.h > index 804e34c6f981..edb78f3fe2c8 100644 > --- a/include/linux/uio.h > +++ b/include/linux/uio.h > @@ -91,6 +91,10 @@ size_t copy_to_iter(const void *addr, size_t bytes, struct > iov_iter *i); > size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i); > bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i); > size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i); > +size_t copy_from_iter_ops(void *addr, size_t bytes, struct iov_iter *i, > + int (*user)(void *, const void __user *, unsigned), > + void (*page)(char *, struct page *, size_t, size_t), > + void (*copy)(void *, void *, unsigned)); > bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter > *i); > size_t iov_iter_zero(size_t bytes, struct iov_iter *); > unsigned long iov_iter_alignment(const struct iov_iter *i); > diff --git a/lib/Kconfig b/lib/Kconfig > index 0c4aac6ef394..4d8f575e65b3 100644 > --- a/lib/Kconfig > +++ b/lib/Kconfig > @@ -404,6 +404,9 @@ config DMA_VIRT_OPS > depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT) > default n > > +config COPY_FROM_ITER_OPS > + bool > + > config CHECK_SIGNATURE > bool > > diff --git a/lib/iov_iter.c b/lib/iov_iter.c > index e68604ae3ced..85f8021504e3 100644 > --- a/lib/iov_iter.c > +++ b/lib/iov_iter.c > @@ -571,6 +571,31 @@ size_t copy_from_iter(void *addr, size_t bytes, struct > iov_iter *i) > } > EXPORT_SYMBOL(copy_from_iter); > > +#ifdef CONFIG_COPY_FROM_ITER_OPS > +size_t copy_from_iter_ops(void *addr, size_t bytes, struct iov_iter *i, > + int (*user)(void *, const void __user *, unsigned), > + void (*page)(char *, struct page *, size_t, size_t), > + void (*copy)(void *, void *, unsigned)) > +{ > + char *to = addr; > + > + if (unlikely(i->type & ITER_PIPE)) { > + WARN_ON(1); > + return 0; > + } > + iterate_and_advance(i, bytes, v, > + user((to += v.iov_len) - v.iov_len, v.iov_base, > + v.iov_len), > + page((to += v.bv_len) - v.bv_len, v.bv_page, v.bv_offset, > + v.bv_len), > + copy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) > + ) > + > + return bytes; > +} > +EXPORT_SYMBOL_GPL(copy_from_iter_ops); > +#endif > + > bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) > { > char *to = addr; > -- Jan Kara <j...@suse.com> SUSE Labs, CR