arch_memcpy_to_pmem() calls __copy_user_nocache() to copy data via non-temporal stores. However, __copy_user_nocache() still performs cached copy when a request is not naturally aligned or is less then 4 bytes.
Call clflush_cache_range() to flush destination when a request leads to cached copy. Signed-off-by: Toshi Kani <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: H. Peter Anvin <[email protected]> Cc: Borislav Petkov <[email protected]> Cc: Dan Williams <[email protected]> Cc: Ross Zwisler <[email protected]> Cc: Vishal Verma <[email protected]> --- arch/x86/include/asm/pmem.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h index c57fd1e..f135064 100644 --- a/arch/x86/include/asm/pmem.h +++ b/arch/x86/include/asm/pmem.h @@ -45,6 +45,17 @@ static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src, if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n", __func__, dst, src, unwritten)) BUG(); + + /* + * Flush the caches when the request is not naturally aligned. + * Non-temporal stores are not used for unaligned copy. + */ + if (((n >= 8) && + (!IS_ALIGNED((unsigned long)dst, 8) || !IS_ALIGNED(n, 8))) || + ((n < 8) && + (!IS_ALIGNED((unsigned long)dst, 4) || (n != 4)))) { + clflush_cache_range(dst, n); + } } /**

