This integrates the dax window manager with the virtio-fs file system
operations:

- Removes the lock from virtio::fs::dax_window, since this is now
  handled by the window manager.
- A manager instance has a lifetime matching a virtio-fs mount's
  lifetime: it is created at mount time and deleted at unmount time.
- The FUSE_READ fallback as well as the logic of using it when the DAX
  window is not available or a read using it fails are untouched.

Signed-off-by: Fotis Xenakis <fo...@windowslive.com>
---
 drivers/virtio-fs.hh           |   2 +-
 fs/virtiofs/virtiofs.hh        |  19 ++++-
 fs/virtiofs/virtiofs_i.hh      |   3 -
 fs/virtiofs/virtiofs_vfsops.cc |  37 ++++++--
 fs/virtiofs/virtiofs_vnops.cc  | 151 ++++++---------------------------
 5 files changed, 73 insertions(+), 139 deletions(-)

diff --git a/drivers/virtio-fs.hh b/drivers/virtio-fs.hh
index d78d7962..a2f7f748 100644
--- a/drivers/virtio-fs.hh
+++ b/drivers/virtio-fs.hh
@@ -10,6 +10,7 @@

 #include <osv/mutex.h>
 #include <osv/waitqueue.hh>
+
 #include "drivers/virtio.hh"
 #include "drivers/virtio-device.hh"
 #include "fs/virtiofs/fuse_kernel.h"
@@ -49,7 +50,6 @@ public:
     struct dax_window {
         mmioaddr_t addr;
         u64 len;
-        mutex lock;
     };

     explicit fs(virtio_device& dev);
diff --git a/fs/virtiofs/virtiofs.hh b/fs/virtiofs/virtiofs.hh
index 475d5eba..32121686 100644
--- a/fs/virtiofs/virtiofs.hh
+++ b/fs/virtiofs/virtiofs.hh
@@ -8,11 +8,11 @@
 #ifndef __INCLUDE_VIRTIOFS_H__
 #define __INCLUDE_VIRTIOFS_H__

-#include <osv/vnode.h>
+#include <osv/debug.h>
 #include <osv/mount.h>
-#include <osv/dentry.h>
-#include <osv/prex.h>
-#include <osv/buf.h>
+#include <osv/vnode.h>
+
+#include "drivers/virtio-fs.hh"
 #include "fuse_kernel.h"

 #define VIRTIOFS_DEBUG_ENABLED 1
@@ -23,6 +23,17 @@
 #define virtiofs_debug(...)
 #endif

+// Necessary pre-declaration because virtiofs::dax depends on virtiofs_inode,
+// declared below.
+namespace virtiofs {
+class dax_manager;
+}
+
+struct virtiofs_mount_data {
+    virtio::fs* drv;
+    virtiofs::dax_manager* dax;
+};
+
 struct virtiofs_inode {
     uint64_t nodeid;
     struct fuse_attr attr;
diff --git a/fs/virtiofs/virtiofs_i.hh b/fs/virtiofs/virtiofs_i.hh
index 76533d74..e879546d 100644
--- a/fs/virtiofs/virtiofs_i.hh
+++ b/fs/virtiofs/virtiofs_i.hh
@@ -8,9 +8,6 @@
 #ifndef VIRTIOFS_IO_H
 #define VIRTIOFS_IO_H

-#include "fuse_kernel.h"
-#include <osv/mutex.h>
-#include <osv/waitqueue.hh>
 #include "drivers/virtio-fs.hh"

 int fuse_req_send_and_receive_reply(virtio::fs* drv, uint32_t opcode,
diff --git a/fs/virtiofs/virtiofs_vfsops.cc b/fs/virtiofs/virtiofs_vfsops.cc
index 30daa42b..e2101200 100644
--- a/fs/virtiofs/virtiofs_vfsops.cc
+++ b/fs/virtiofs/virtiofs_vfsops.cc
@@ -5,14 +5,19 @@
  * BSD license as described in the LICENSE file in the top-level directory.
  */

+#include <atomic>
+#include <memory>
+#include <new>
 #include <sys/types.h>
-#include <osv/device.h>
+
+#include <api/assert.h>
 #include <osv/debug.h>
-#include <iomanip>
-#include <iostream>
+#include <osv/device.h>
+
+#include "drivers/virtio-fs.hh"
 #include "virtiofs.hh"
+#include "virtiofs_dax.hh"
 #include "virtiofs_i.hh"
-#include "drivers/virtio-fs.hh"

 using fuse_request = virtio::fs::fuse_request;

@@ -115,7 +120,26 @@ static int virtiofs_mount(struct mount* mp, const char* 
dev, int flags,

     virtiofs_set_vnode(mp->m_root->d_vnode, root_node);

-    mp->m_data = drv;
+    // TODO OPT: Coupling the window manager to a mount as done here means that
+    // multiple mounts of the same device will *NOT* work. Ideally, we want to
+    // couple the window manager with the device, but that would require the
+    // device driver layer to be aware of the filesystem layer, thus depending
+    // on it, creating a circular dependency.
+    auto* m_data = new (std::nothrow) virtiofs_mount_data;
+    if (!m_data) {
+        return ENOMEM;
+    }
+    m_data->drv = drv;
+    if (drv->get_dax()) {
+        m_data->dax = new (std::nothrow) virtiofs::dax_manager {*drv};
+        if (!m_data->dax) {
+            return ENOMEM;
+        }
+    } else {
+        m_data->dax = nullptr;
+    }
+
+    mp->m_data = m_data;
     mp->m_dev = device;

     return 0;
@@ -141,6 +165,9 @@ static int virtiofs_statfs(struct mount* mp, struct statfs* 
statp)

 static int virtiofs_unmount(struct mount* mp, int flags)
 {
+    auto* m_data = static_cast<virtiofs_mount_data*>(mp->m_data);
+    delete m_data->dax;
+    delete m_data;
     struct device* dev = mp->m_dev;
     return device_close(dev);
 }
diff --git a/fs/virtiofs/virtiofs_vnops.cc b/fs/virtiofs/virtiofs_vnops.cc
index 38c000ec..822780ad 100644
--- a/fs/virtiofs/virtiofs_vnops.cc
+++ b/fs/virtiofs/virtiofs_vnops.cc
@@ -5,28 +5,27 @@
  * BSD license as described in the LICENSE file in the top-level directory.
  */

-#include <sys/stat.h>
+#include <cstdlib>
+#include <cstring>
 #include <dirent.h>
-#include <sys/param.h>
-
 #include <errno.h>
-#include <string.h>
-#include <stdlib.h>
 #include <fcntl.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <sys/types.h>

-#include <osv/prex.h>
-#include <osv/vnode.h>
-#include <osv/file.h>
-#include <osv/mount.h>
+#include <osv/contiguous_alloc.hh>
 #include <osv/debug.h>
-
-#include <sys/types.h>
 #include <osv/device.h>
-#include <osv/sched.hh>
+#include <osv/file.h>
 #include <osv/mmio.hh>
-#include <osv/contiguous_alloc.hh>
+#include <osv/mount.h>
+#include <osv/prex.h>
+#include <osv/sched.hh>
+#include <osv/vnode.h>

 #include "virtiofs.hh"
+#include "virtiofs_dax.hh"
 #include "virtiofs_i.hh"

 static constexpr uint32_t OPEN_FLAGS = O_RDONLY;
@@ -59,7 +58,8 @@ static int virtiofs_lookup(struct vnode* vnode, char* name, 
struct vnode** vpp)
     }
     strcpy(in_args.get(), name);

-    auto* drv = static_cast<virtio::fs*>(vnode->v_mount->m_data);
+    auto* m_data = static_cast<virtiofs_mount_data*>(vnode->v_mount->m_data);
+    auto* drv = m_data->drv;
     auto error = fuse_req_send_and_receive_reply(drv, FUSE_LOOKUP,
         inode->nodeid, in_args.get(), in_args_len, out_args.get(),
         sizeof(*out_args));
@@ -110,7 +110,8 @@ static int virtiofs_open(struct file* fp)
     }
     in_args->flags = OPEN_FLAGS;

-    auto* drv = static_cast<virtio::fs*>(vnode->v_mount->m_data);
+    auto* m_data = static_cast<virtiofs_mount_data*>(vnode->v_mount->m_data);
+    auto* drv = m_data->drv;
     auto error = fuse_req_send_and_receive_reply(drv, FUSE_OPEN,
         inode->nodeid, in_args.get(), sizeof(*in_args), out_args.get(),
         sizeof(*out_args));
@@ -145,7 +146,8 @@ static int virtiofs_close(struct vnode* vnode, struct file* 
fp)
     in_args->fh = f_data->file_handle;
     in_args->flags = OPEN_FLAGS; // need to be same as in FUSE_OPEN

-    auto* drv = static_cast<virtio::fs*>(vnode->v_mount->m_data);
+    auto* m_data = static_cast<virtiofs_mount_data*>(vnode->v_mount->m_data);
+    auto* drv = m_data->drv;
     auto error = fuse_req_send_and_receive_reply(drv, FUSE_RELEASE,
         inode->nodeid, in_args.get(), sizeof(*in_args), nullptr, 0);
     if (error) {
@@ -172,7 +174,8 @@ static int virtiofs_readlink(struct vnode* vnode, struct 
uio* uio)
         return ENOMEM;
     }

-    auto* drv = static_cast<virtio::fs*>(vnode->v_mount->m_data);
+    auto* m_data = static_cast<virtiofs_mount_data*>(vnode->v_mount->m_data);
+    auto* drv = m_data->drv;
     auto error = fuse_req_send_and_receive_reply(drv, FUSE_READLINK,
         inode->nodeid, nullptr, 0, link_path.get(), PATH_MAX);
     if (error) {
@@ -185,107 +188,6 @@ static int virtiofs_readlink(struct vnode* vnode, struct 
uio* uio)
     return uiomove(link_path.get(), strlen(link_path.get()), uio);
 }

-// Read @read_amt bytes from @inode, using the DAX window.
-static int virtiofs_read_direct(virtiofs_inode& inode, u64 file_handle,
-    u64 read_amt, virtio::fs& drv, struct uio& uio)
-{
-    auto* dax = drv.get_dax();
-    // Enter the critical path: setup mapping -> read -> remove mapping
-    std::lock_guard<mutex> guard {dax->lock};
-
-    // Setup mapping
-    // NOTE: There are restrictions on the arguments to FUSE_SETUPMAPPING, from
-    // the spec: "Alignment constraints for FUSE_SETUPMAPPING and
-    // FUSE_REMOVEMAPPING requests are communicated during FUSE_INIT
-    // negotiation"):
-    // - foffset: multiple of map_alignment from FUSE_INIT
-    // - len: not larger than remaining file?
-    // - moffset: multiple of map_alignment from FUSE_INIT
-    // In practice, map_alignment is the host's page size, because foffset and
-    // moffset are passed to mmap() on the host.
-    std::unique_ptr<fuse_setupmapping_in> in_args {
-        new (std::nothrow) fuse_setupmapping_in()};
-    if (!in_args) {
-        return ENOMEM;
-    }
-    in_args->fh = file_handle;
-    in_args->flags = 0;
-    uint64_t moffset = 0;
-    in_args->moffset = moffset;
-
-    auto map_align = drv.get_map_alignment();
-    if (map_align < 0) {
-        kprintf("[virtiofs] inode %lld, map alignment not set\n", 
inode.nodeid);
-        return ENOTSUP;
-    }
-    uint64_t alignment = 1ul << map_align;
-    auto foffset = align_down(static_cast<uint64_t>(uio.uio_offset), 
alignment);
-    in_args->foffset = foffset;
-
-    // The possible excess part of the file mapped due to alignment constraints
-    // NOTE: map_excess <= alignemnt
-    auto map_excess = uio.uio_offset - foffset;
-    if (moffset + map_excess >= dax->len) {
-        // No usable room in DAX window due to map_excess
-        return ENOBUFS;
-    }
-    // Actual read amount is read_amt, or what fits in the DAX window
-    auto read_amt_act = std::min<uint64_t>(read_amt,
-        dax->len - moffset - map_excess);
-    in_args->len = read_amt_act + map_excess;
-
-    virtiofs_debug("inode %lld, setting up mapping (foffset=%lld, len=%lld, "
-                   "moffset=%lld)\n", inode.nodeid, in_args->foffset,
-                   in_args->len, in_args->moffset);
-    auto error = fuse_req_send_and_receive_reply(&drv, FUSE_SETUPMAPPING,
-        inode.nodeid, in_args.get(), sizeof(*in_args), nullptr, 0);
-    if (error) {
-        kprintf("[virtiofs] inode %lld, mapping setup failed\n", inode.nodeid);
-        return error;
-    }
-
-    // Read from the DAX window
-    // NOTE: It shouldn't be necessary to use the mmio* interface (i.e. 
volatile
-    // accesses). From the spec: "Drivers map this shared memory region with
-    // writeback caching as if it were regular RAM."
-    // The location of the requested data in the DAX window
-    auto req_data = dax->addr + moffset + map_excess;
-    error = uiomove(const_cast<void*>(req_data), read_amt_act, &uio);
-    if (error) {
-        kprintf("[virtiofs] inode %lld, uiomove failed\n", inode.nodeid);
-        return error;
-    }
-
-    // Remove mapping
-    // NOTE: This is only necessary when FUSE_SETUPMAPPING fails. From the 
spec:
-    // "If the device runs out of resources the FUSE_SETUPMAPPING request fails
-    // until resources are available again following FUSE_REMOVEMAPPING."
-    auto r_in_args_size = sizeof(fuse_removemapping_in) +
-        sizeof(fuse_removemapping_one);
-    std::unique_ptr<u8> r_in_args {new (std::nothrow) u8[r_in_args_size]};
-    if (!r_in_args) {
-        return ENOMEM;
-    }
-    auto r_in = new (r_in_args.get()) fuse_removemapping_in();
-    auto r_one = new (r_in_args.get() + sizeof(fuse_removemapping_in))
-        fuse_removemapping_one();
-    r_in->count = 1;
-    r_one->moffset = in_args->moffset;
-    r_one->len = in_args->len;
-
-    virtiofs_debug("inode %lld, removing mapping (moffset=%lld, len=%lld)\n",
-        inode.nodeid, r_one->moffset, r_one->len);
-    error = fuse_req_send_and_receive_reply(&drv, FUSE_REMOVEMAPPING,
-        inode.nodeid, r_in_args.get(), r_in_args_size, nullptr, 0);
-    if (error) {
-        kprintf("[virtiofs] inode %lld, mapping removal failed\n",
-            inode.nodeid);
-        return error;
-    }
-
-    return 0;
-}
-
 // Read @read_amt bytes from @inode, using the fallback FUSE_READ mechanism.
 static int virtiofs_read_fallback(virtiofs_inode& inode, u64 file_handle,
     u32 read_amt, u32 flags, virtio::fs& drv, struct uio& uio)
@@ -314,9 +216,6 @@ static int virtiofs_read_fallback(virtiofs_inode& inode, 
u64 file_handle,
     return uiomove(buf.get(), read_amt, &uio);
 }

-// TODO: Optimize it to reduce number of exits to host (each
-// fuse_req_send_and_receive_reply()) by reading eagerly "ahead/around" just
-// like ROFS does and caching it
 static int virtiofs_read(struct vnode* vnode, struct file* fp, struct uio* uio,
     int ioflag)
 {
@@ -343,17 +242,17 @@ static int virtiofs_read(struct vnode* vnode, struct 
file* fp, struct uio* uio,

     auto* inode = static_cast<virtiofs_inode*>(vnode->v_data);
     auto* file_data = static_cast<virtiofs_file_data*>(fp->f_data);
-    auto* drv = static_cast<virtio::fs*>(vnode->v_mount->m_data);
+    auto* m_data = static_cast<virtiofs_mount_data*>(vnode->v_mount->m_data);
+    auto* drv = m_data->drv;
+    auto* dax = m_data->dax;

     // Total read amount is what they requested, or what is left
     auto read_amt = std::min<uint64_t>(uio->uio_resid,
         inode->attr.size - uio->uio_offset);

-    if (drv->get_dax()) {
+    if (dax) {
         // Try to read from DAX
-        if (!virtiofs_read_direct(*inode, file_data->file_handle, read_amt,
-            *drv, *uio)) {
-
+        if (!dax->read(*inode, file_data->file_handle, read_amt, *uio, false)) 
{
             return 0;
         }
     }
--
2.27.0

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/AM0PR03MB629209F313FBB0BA6E3FFC70A6980%40AM0PR03MB6292.eurprd03.prod.outlook.com.

Reply via email to