Re: [RFC PATCH v2 1/5] vhost-user: Add VIRTIO Shared Memory map request

2024-07-11 Thread Stefan Hajnoczi
On Fri, Jun 28, 2024 at 04:57:06PM +0200, Albert Esteve wrote:
> Add SHMEM_MAP/UNMAP requests to vhost-user to
> handle VIRTIO Shared Memory mappings.
> 
> This request allows backends to dynamically map
> fds into a VIRTIO Shared Memory Region indentified
> by its `shmid`. Then, the fd memory is advertised
> to the driver as a base addres + offset, so it
> can be read/written (depending on the mmap flags
> requested) while its valid.
> 
> The backend can munmap the memory range
> in a given VIRTIO Shared Memory Region (again,
> identified by its `shmid`), to free it. Upon
> receiving this message, the front-end must
> mmap the regions with PROT_NONE to reserve
> the virtual memory space.
> 
> The device model needs to create MemoryRegion
> instances for the VIRTIO Shared Memory Regions
> and add them to the `VirtIODevice` instance.
> 
> Signed-off-by: Albert Esteve 
> ---
>  docs/interop/vhost-user.rst   |  27 +
>  hw/virtio/vhost-user.c| 122 ++
>  hw/virtio/virtio.c|  12 +++
>  include/hw/virtio/virtio.h|   5 +
>  subprojects/libvhost-user/libvhost-user.c |  65 
>  subprojects/libvhost-user/libvhost-user.h |  53 ++
>  6 files changed, 284 insertions(+)
> 
> diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
> index d8419fd2f1..d52ba719d5 100644
> --- a/docs/interop/vhost-user.rst
> +++ b/docs/interop/vhost-user.rst
> @@ -1859,6 +1859,33 @@ is sent by the front-end.
>when the operation is successful, or non-zero otherwise. Note that if the
>operation fails, no fd is sent to the backend.
>  
> +``VHOST_USER_BACKEND_SHMEM_MAP``
> +  :id: 9
> +  :equivalent ioctl: N/A
> +  :request payload: fd and ``struct VhostUserMMap``
> +  :reply payload: N/A
> +
> +  This message can be submitted by the backends to advertise a new mapping
> +  to be made in a given VIRTIO Shared Memory Region. Upon receiving the 
> message,
> +  The front-end will mmap the given fd into the VIRTIO Shared Memory Region
> +  with the requested ``shmid``. A reply is generated indicating whether 
> mapping
> +  succeeded.
> +
> +  Mapping over an already existing map is not allowed and request shall fail.
> +  Therefore, the memory range in the request must correspond with a valid,
> +  free region of the VIRTIO Shared Memory Region.
> +
> +``VHOST_USER_BACKEND_SHMEM_UNMAP``
> +  :id: 10
> +  :equivalent ioctl: N/A
> +  :request payload: ``struct VhostUserMMap``
> +  :reply payload: N/A
> +
> +  This message can be submitted by the backends so that the front-end un-mmap
> +  a given range (``offset``, ``len``) in the VIRTIO Shared Memory Region with

s/offset/shm_offset/

> +  the requested ``shmid``.

Please clarify that  must correspond to the entirety of a
valid mapped region.

By the way, the VIRTIO 1.3 gives the following behavior for the virtiofs
DAX Window:

  When a FUSE_SETUPMAPPING request perfectly overlaps a previous
  mapping, the previous mapping is replaced. When a mapping partially
  overlaps a previous mapping, the previous mapping is split into one or
  two smaller mappings. When a mapping is partially unmapped it is also
  split into one or two smaller mappings.

  Establishing new mappings or splitting existing mappings consumes
  resources. If the device runs out of resources the FUSE_SETUPMAPPING
  request fails until resources are available again following
  FUSE_REMOVEMAPPING.

I think SETUPMAPPING/REMOVMAPPING can be implemented using
SHMEM_MAP/UNMAP. SHMEM_MAP/UNMAP do not allow atomically replacing
partial ranges, but as far as I know that's not necessary for virtiofs
in practice.

It's worth mentioning that mappings consume resources and that SHMEM_MAP
can fail when there are no resources available. The process-wide limit
is vm.max_map_count on Linux although a vhost-user frontend may reduce
it further to control vhost-user resource usage.

> +  A reply is generated indicating whether unmapping succeeded.
> +
>  .. _reply_ack:
>  
>  VHOST_USER_PROTOCOL_F_REPLY_ACK
> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> index cdf9af4a4b..7ee8a472c6 100644
> --- a/hw/virtio/vhost-user.c
> +++ b/hw/virtio/vhost-user.c
> @@ -115,6 +115,8 @@ typedef enum VhostUserBackendRequest {
>  VHOST_USER_BACKEND_SHARED_OBJECT_ADD = 6,
>  VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE = 7,
>  VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP = 8,
> +VHOST_USER_BACKEND_SHMEM_MAP = 9,
> +VHOST_USER_BACKEND_SHMEM_UNMAP = 10,
>  VHOST_USER_BACKEND_MAX
>  }  VhostUserBackendRequest;
>  
> @@ -192,6 +194,24 @@ typedef struct VhostUserShared {
>  unsigned char uuid[16];
>  } VhostUserShared;
>  
> +/* For the flags field of VhostUserMMap */
> +#define VHOST_USER_FLAG_MAP_R (1u << 0)
> +#define VHOST_USER_FLAG_MAP_W (1u << 1)
> +
> +typedef struct {
> +/* VIRTIO Shared Memory Region ID */
> +uint8_t shmid;
> +uint8_t padding[7];
> +/* File offset */
> +

[RFC PATCH v2 1/5] vhost-user: Add VIRTIO Shared Memory map request

2024-06-28 Thread Albert Esteve
Add SHMEM_MAP/UNMAP requests to vhost-user to
handle VIRTIO Shared Memory mappings.

This request allows backends to dynamically map
fds into a VIRTIO Shared Memory Region indentified
by its `shmid`. Then, the fd memory is advertised
to the driver as a base addres + offset, so it
can be read/written (depending on the mmap flags
requested) while its valid.

The backend can munmap the memory range
in a given VIRTIO Shared Memory Region (again,
identified by its `shmid`), to free it. Upon
receiving this message, the front-end must
mmap the regions with PROT_NONE to reserve
the virtual memory space.

The device model needs to create MemoryRegion
instances for the VIRTIO Shared Memory Regions
and add them to the `VirtIODevice` instance.

Signed-off-by: Albert Esteve 
---
 docs/interop/vhost-user.rst   |  27 +
 hw/virtio/vhost-user.c| 122 ++
 hw/virtio/virtio.c|  12 +++
 include/hw/virtio/virtio.h|   5 +
 subprojects/libvhost-user/libvhost-user.c |  65 
 subprojects/libvhost-user/libvhost-user.h |  53 ++
 6 files changed, 284 insertions(+)

diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
index d8419fd2f1..d52ba719d5 100644
--- a/docs/interop/vhost-user.rst
+++ b/docs/interop/vhost-user.rst
@@ -1859,6 +1859,33 @@ is sent by the front-end.
   when the operation is successful, or non-zero otherwise. Note that if the
   operation fails, no fd is sent to the backend.
 
+``VHOST_USER_BACKEND_SHMEM_MAP``
+  :id: 9
+  :equivalent ioctl: N/A
+  :request payload: fd and ``struct VhostUserMMap``
+  :reply payload: N/A
+
+  This message can be submitted by the backends to advertise a new mapping
+  to be made in a given VIRTIO Shared Memory Region. Upon receiving the 
message,
+  The front-end will mmap the given fd into the VIRTIO Shared Memory Region
+  with the requested ``shmid``. A reply is generated indicating whether mapping
+  succeeded.
+
+  Mapping over an already existing map is not allowed and request shall fail.
+  Therefore, the memory range in the request must correspond with a valid,
+  free region of the VIRTIO Shared Memory Region.
+
+``VHOST_USER_BACKEND_SHMEM_UNMAP``
+  :id: 10
+  :equivalent ioctl: N/A
+  :request payload: ``struct VhostUserMMap``
+  :reply payload: N/A
+
+  This message can be submitted by the backends so that the front-end un-mmap
+  a given range (``offset``, ``len``) in the VIRTIO Shared Memory Region with
+  the requested ``shmid``.
+  A reply is generated indicating whether unmapping succeeded.
+
 .. _reply_ack:
 
 VHOST_USER_PROTOCOL_F_REPLY_ACK
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index cdf9af4a4b..7ee8a472c6 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -115,6 +115,8 @@ typedef enum VhostUserBackendRequest {
 VHOST_USER_BACKEND_SHARED_OBJECT_ADD = 6,
 VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE = 7,
 VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP = 8,
+VHOST_USER_BACKEND_SHMEM_MAP = 9,
+VHOST_USER_BACKEND_SHMEM_UNMAP = 10,
 VHOST_USER_BACKEND_MAX
 }  VhostUserBackendRequest;
 
@@ -192,6 +194,24 @@ typedef struct VhostUserShared {
 unsigned char uuid[16];
 } VhostUserShared;
 
+/* For the flags field of VhostUserMMap */
+#define VHOST_USER_FLAG_MAP_R (1u << 0)
+#define VHOST_USER_FLAG_MAP_W (1u << 1)
+
+typedef struct {
+/* VIRTIO Shared Memory Region ID */
+uint8_t shmid;
+uint8_t padding[7];
+/* File offset */
+uint64_t fd_offset;
+/* Offset within the VIRTIO Shared Memory Region */
+uint64_t shm_offset;
+/* Size of the mapping */
+uint64_t len;
+/* Flags for the mmap operation, from VHOST_USER_FLAG_* */
+uint64_t flags;
+} VhostUserMMap;
+
 typedef struct {
 VhostUserRequest request;
 
@@ -224,6 +244,7 @@ typedef union {
 VhostUserInflight inflight;
 VhostUserShared object;
 VhostUserTransferDeviceState transfer_state;
+VhostUserMMap mmap;
 } VhostUserPayload;
 
 typedef struct VhostUserMsg {
@@ -1748,6 +1769,100 @@ vhost_user_backend_handle_shared_object_lookup(struct 
vhost_user *u,
 return 0;
 }
 
+static int
+vhost_user_backend_handle_shmem_map(struct vhost_dev *dev,
+VhostUserMMap *vu_mmap,
+int fd)
+{
+void *addr = 0;
+MemoryRegion *mr = NULL;
+
+if (fd < 0) {
+error_report("Bad fd for map");
+return -EBADF;
+}
+
+if (!dev->vdev->shmem_list ||
+dev->vdev->n_shmem_regions <= vu_mmap->shmid) {
+error_report("Device only has %d VIRTIO Shared Memory Regions. "
+ "Requested ID: %d",
+ dev->vdev->n_shmem_regions, vu_mmap->shmid);
+return -EFAULT;
+}
+
+mr = >vdev->shmem_list[vu_mmap->shmid];
+
+if (!mr) {
+error_report("VIRTIO Shared Memory Region at "
+ "ID %d unitialized",