Applications in Linux guest that use device-dax never trigger flush
that can be trapped by KVM/QEMU. Meanwhile, if the host backend is not
device-dax, QEMU cannot guarantee the persistence of guest writes.
Before solving this flushing problem, QEMU should warn users if the
host backend is not device-dax.

Signed-off-by: Haozhong Zhang <haozhong.zh...@intel.com>
Message-id: capcyv4hv2-zw8smcrtd0p_86kgr3dhovne+6t5sy2u7wxg3...@mail.gmail.com
---
 hw/mem/nvdimm.c      |  6 ++++++
 include/qemu/osdep.h |  9 ++++++++
 util/osdep.c         | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 76 insertions(+)

diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c
index a9b0863f20..b23542fbdf 100644
--- a/hw/mem/nvdimm.c
+++ b/hw/mem/nvdimm.c
@@ -26,6 +26,7 @@
 #include "qapi/error.h"
 #include "qapi/visitor.h"
 #include "hw/mem/nvdimm.h"
+#include "qemu/error-report.h"
 
 static void nvdimm_get_label_size(Object *obj, Visitor *v, const char *name,
                                   void *opaque, Error **errp)
@@ -84,6 +85,11 @@ static void nvdimm_realize(PCDIMMDevice *dimm, Error **errp)
     NVDIMMDevice *nvdimm = NVDIMM(dimm);
     uint64_t align, pmem_size, size = memory_region_size(mr);
 
+    if (!qemu_fd_is_dev_dax(memory_region_get_fd(mr))) {
+        error_report("warning: nvdimm backend does not look like a DAX device, 
"
+                     "unable to guarantee persistence of guest writes");
+    }
+
     align = memory_region_get_alignment(mr);
 
     pmem_size = size - nvdimm->label_size;
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index 1c9f5e260c..7f26af371e 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -470,4 +470,13 @@ char *qemu_get_pid_name(pid_t pid);
  */
 pid_t qemu_fork(Error **errp);
 
+/**
+ * qemu_fd_is_dev_dax:
+ *
+ * Check whether @fd describes a DAX device.
+ *
+ * Returns true if it is; otherwise, return false.
+ */
+bool qemu_fd_is_dev_dax(int fd);
+
 #endif
diff --git a/util/osdep.c b/util/osdep.c
index a2863c8e53..02881f96bc 100644
--- a/util/osdep.c
+++ b/util/osdep.c
@@ -471,3 +471,64 @@ writev(int fd, const struct iovec *iov, int iov_cnt)
     return readv_writev(fd, iov, iov_cnt, true);
 }
 #endif
+
+#ifdef __linux__
+static ssize_t qemu_dev_dax_sysfs_read(int fd, const char *entry,
+                                       char *buf, size_t len)
+{
+    ssize_t read_bytes;
+    struct stat st;
+    unsigned int major, minor;
+    char *path, *pos;
+    int sysfs_fd;
+
+    if (fstat(fd, &st)) {
+        return 0;
+    }
+
+    major = major(st.st_rdev);
+    minor = minor(st.st_rdev);
+    path = g_strdup_printf("/sys/dev/char/%u:%u/%s", major, minor, entry);
+
+    sysfs_fd = open(path, O_RDONLY);
+    g_free(path);
+    if (sysfs_fd == -1) {
+        return 0;
+    }
+
+    read_bytes = read(sysfs_fd, buf, len - 1);
+    close(sysfs_fd);
+    if (read_bytes > 0) {
+        buf[read_bytes] = '\0';
+        pos = g_strstr_len(buf, read_bytes, "\n");
+        if (pos) {
+            *pos = '\0';
+        }
+    }
+
+    return read_bytes;
+}
+#endif /* __linux__ */
+
+bool qemu_fd_is_dev_dax(int fd)
+{
+    bool is_dax = false;
+
+#ifdef __linux__
+    char devtype[7];
+    ssize_t len;
+
+    if (fd == -1) {
+        return false;
+    }
+
+    len = qemu_dev_dax_sysfs_read(fd, "device/devtype",
+                                  devtype, sizeof(devtype));
+    if (len <= 0) {
+        return false;
+    }
+    is_dax = !strncmp(devtype, "nd_dax", len);
+#endif /* __linux__ */
+
+    return is_dax;
+}
-- 
2.11.0


Reply via email to