On 12/31/17 17:55 +0200, Michael S. Tsirkin wrote: > On Mon, Dec 25, 2017 at 09:06:10AM +0800, Haozhong Zhang wrote: > > The new function pmem_persistent() flushes the previous cached writes > > on the specified memory buffer, which ensures the write persistence if > > the buffer is in persistent memory. > > > > Signed-off-by: Haozhong Zhang <haozhong.zh...@intel.com> > > --- > > include/qemu/pmem.h | 25 ++++++++++ > > util/Makefile.objs | 1 + > > util/pmem.c | 132 > > ++++++++++++++++++++++++++++++++++++++++++++++++++++ > > 3 files changed, 158 insertions(+) > > create mode 100644 include/qemu/pmem.h > > create mode 100644 util/pmem.c > > > > diff --git a/include/qemu/pmem.h b/include/qemu/pmem.h > > new file mode 100644 > > index 0000000000..6593ae1d5c > > --- /dev/null > > +++ b/include/qemu/pmem.h > > @@ -0,0 +1,25 @@ > > +/* > > + * Helper functions to operate on persistent memory. > > + * > > + * Copyright (c) 2017 Intel Corporation. > > + * > > + * Author: Haozhong Zhang <haozhong.zh...@intel.com> > > + * > > + * This work is licensed under the terms of the GNU GPL, version 2 or > > later. > > + * See the COPYING file in the top-level directory. > > + */ > > + > > +#ifndef QEMU_PMEM_H > > +#define QEMU_PMEM_H > > + > > +/** > > + * Flush previous cached writes to the specified memory buffer. If the > > + * buffer is in persistent memory, this function will ensure the write > > + * persistence. > > + * > > + * @p: the pointer to the memory buffer > > + * @len: the length in bytes of the memory buffer > > + */ > > +void pmem_persistent(void *p, unsigned long len); > > + > > +#endif /* QEMU_PMEM_H */ > > diff --git a/util/Makefile.objs b/util/Makefile.objs > > index 2973b0a323..2614a84a9e 100644 > > --- a/util/Makefile.objs > > +++ b/util/Makefile.objs > > @@ -41,6 +41,7 @@ util-obj-y += timed-average.o > > util-obj-y += base64.o > > util-obj-y += log.o > > util-obj-y += pagesize.o > > +util-obj-y += pmem.o > > util-obj-y += qdist.o > > util-obj-y += qht.o > > util-obj-y += range.o > > diff --git a/util/pmem.c b/util/pmem.c > > new file mode 100644 > > index 0000000000..44be1dde58 > > --- /dev/null > > +++ b/util/pmem.c > > @@ -0,0 +1,132 @@ > > +/* > > + * Helper functions to operate on persistent memory. > > + * > > + * Copyright (c) 2017 Intel Corporation. > > + * > > + * Author: Haozhong Zhang <haozhong.zh...@intel.com> > > + * > > + * This work is licensed under the terms of the GNU GPL, version 2 or > > later. > > + * See the COPYING file in the top-level directory. > > + */ > > + > > +#include "qemu/osdep.h" > > +#include "qemu-common.h" > > +#include "qemu/pmem.h" > > + > > +static size_t cache_line_size; > > + > > +typedef void (*cache_flush_func_t)(void *p); > > +typedef void (*store_fence_func_t)(void); > > + > > +static cache_flush_func_t cache_flush_func; > > +static store_fence_func_t store_fence_func; > > + > > +#if defined(__x86_64__) || defined(__i386__) > > + > > +#define CPUID_1_0_EBX_CLSIZE_MASK 0x0000ff00 > > +#define CPUID_1_0_EBX_CLSIZE_SHIFT 8 > > +#define CPUID_1_0_EDX_CLFLUSH (1U << 19) > > +#define CPUID_7_0_EBX_CLFLUSHOPT (1U << 23) > > +#define CPUID_7_0_EBX_CLWB (1U << 24) > > + > > +static inline void cpuid(uint32_t function, uint32_t count, > > + uint32_t *eax, uint32_t *ebx, > > + uint32_t *ecx, uint32_t *edx) > > +{ > > + uint32_t vec[4]; > > + > > +#ifdef __x86_64__ > > + asm volatile("cpuid" > > + : "=a"(vec[0]), "=b"(vec[1]), > > + "=c"(vec[2]), "=d"(vec[3]) > > + : "0"(function), "c"(count) : "cc"); > > +#else > > + asm volatile("pusha\n\t" > > + "cpuid\n\t" > > + "mov %%eax, 0(%2)\n\t" > > + "mov %%ebx, 4(%2)\n\t" > > + "mov %%ecx, 8(%2)\n\t" > > + "mov %%edx, 12(%2)\n\t" > > + "popa" > > + : : "a"(function), "c"(count), "S"(vec) > > + : "memory", "cc"); > > +#endif > > + > > + if (eax) { > > + *eax = vec[0]; > > + } > > + if (ebx) { > > + *ebx = vec[1]; > > + } > > + if (ecx) { > > + *ecx = vec[2]; > > + } > > + if (edx) { > > + *edx = vec[3]; > > + } > > +} > > + > > +static void clflush(void *p) > > +{ > > + asm volatile("clflush %0" : "+m" (*(volatile char *)p)); > > +} > > + > > +static void clflushopt(void *p) > > +{ > > + asm volatile(".byte 0x66; clflush %0" : "+m" (*(volatile char *)p)); > > +} > > + > > +static void clwb(void *p) > > +{ > > + asm volatile(".byte 0x66; xsaveopt %0" : "+m" (*(volatile char *)p)); > > +} > > + > > +static void sfence(void) > > +{ > > + asm volatile("sfence" : : : "memory"); > > +} > > + > > +static void __attribute__((constructor)) init_funcs(void) > > +{ > > + uint32_t ebx, edx; > > + > > + cpuid(0x1, 0x0, NULL, &ebx, NULL, &edx); > > + > > + cache_line_size = ((ebx & CPUID_1_0_EBX_CLSIZE_MASK) >> > > + CPUID_1_0_EBX_CLSIZE_SHIFT) * 8; > > + assert(cache_line_size && !(cache_line_size & (cache_line_size - 1))); > > + > > + cpuid(0x7, 0x0, NULL, &ebx, NULL, NULL); > > + if (ebx & CPUID_7_0_EBX_CLWB) { > > + cache_flush_func = clwb; > > + } else if (ebx & CPUID_7_0_EBX_CLFLUSHOPT) { > > + cache_flush_func = clflushopt; > > + } else { > > + if (edx & CPUID_1_0_EDX_CLFLUSH) { > > + cache_flush_func = clflush; > > + } > > + } > > + > > + store_fence_func = sfence; > > +} > > + > > +#endif /* __x86_64__ || __i386__ */ > > + > > +void pmem_persistent(void *p, unsigned long len) > > +{ > > + uintptr_t s, e; > > + > > + if (!cache_flush_func || !store_fence_func) { > > + return; > > + } > > + > > + s = (uintptr_t)p & ~(cache_line_size - 1); > > + e = (uintptr_t)p + len; > > + > > + while (s < e) { > > + cache_flush_func((void *)s); > > + s += cache_line_size; > > + } > > + > > + store_fence_func(); > > +} > > > Shouldn't something be done for non x86 platforms?
This patch basically follows what pmdk [1] does. Recently there is an initial AARCH64 port to pmdk [2]. I can add those instruction changes to this patch series as well. For other platforms, pmem_persistent() currently does nothing, and we can add their support later when it's clear what changes are needed. [1] Formerly known as nvml, http://pmem.io/pmdk/ [2] https://github.com/vvenkates27/nvml/commit/aeabe1d4bf9fc88c2b645d45ebce3de969e13a31 Haozhong