Hi Akihiko,
> Subject: Re: [PATCH v3 9/9] virtio-gpu-dmabuf: Create dmabuf for blobs
> associated with VFIO devices
>
> On 2025/11/22 15:46, Vivek Kasireddy wrote:
> > In addition to memfd, a blob resource can also have its backing
> > storage in a VFIO device region. Therefore, we first need to figure
> > out if the blob is backed by a VFIO device region or a memfd before
> > we can call the right API to get a dmabuf fd created.
> >
> > So, we first call virtio_gpu_create_udmabuf() which further calls
> > ram_block_is_memfd_backed() to check if the blob's backing storage
> > is located in a memfd or not. If it is not, we call vfio_create_dmabuf()
> > which identifies the right VFIO device and eventually invokes the
> > API vfio_device_create_dmabuf_fd() to have a dmabuf fd created.
> >
> > Note that in virtio_gpu_remap_dmabuf(), we first try to test if
> > the VFIO dmabuf exporter supports mmap or not. If it doesn't, we
> > use the VFIO device fd directly to create the CPU mapping.
> >
> > While at it, remove the unnecessary rcu_read_lock/rcu_read_unlock
> > from virtio_gpu_create_udmabuf().
> >
> > Cc: Marc-André Lureau <[email protected]>
> > Cc: Alex Bennée <[email protected]>
> > Cc: Akihiko Odaki <[email protected]>
> > Cc: Dmitry Osipenko <[email protected]>
> > Cc: Alex Williamson <[email protected]>
> > Cc: Cédric Le Goater <[email protected]>
> > Signed-off-by: Vivek Kasireddy <[email protected]>
> > ---
> > hw/display/Kconfig | 5 ++
> > hw/display/virtio-gpu-dmabuf.c | 122
> ++++++++++++++++++++++++++++++---
> > 2 files changed, 119 insertions(+), 8 deletions(-)
> >
> > diff --git a/hw/display/Kconfig b/hw/display/Kconfig
> > index 1e95ab28ef..0d090f25f5 100644
> > --- a/hw/display/Kconfig
> > +++ b/hw/display/Kconfig
> > @@ -106,6 +106,11 @@ config VIRTIO_VGA
> > depends on VIRTIO_PCI
> > select VGA
> >
> > +config VIRTIO_GPU_VFIO_BLOB
> > + bool
> > + default y
> > + depends on VFIO
> > +
> > config VHOST_USER_GPU
> > bool
> > default y
> > diff --git a/hw/display/virtio-gpu-dmabuf.c b/hw/display/virtio-gpu-
> dmabuf.c
> > index 258c48d31b..d121a2c9a7 100644
> > --- a/hw/display/virtio-gpu-dmabuf.c
> > +++ b/hw/display/virtio-gpu-dmabuf.c
> > @@ -18,6 +18,7 @@
> > #include "ui/console.h"
> > #include "hw/virtio/virtio-gpu.h"
> > #include "hw/virtio/virtio-gpu-pixman.h"
> > +#include "hw/vfio/vfio-device.h"
> > #include "trace.h"
> > #include "system/ramblock.h"
> > #include "system/hostmem.h"
> > @@ -40,10 +41,42 @@ static bool ram_block_is_memfd_backed(RAMBlock
> *rb)
> > return false;
> > }
> >
> > +static void vfio_create_dmabuf(struct virtio_gpu_simple_resource *res)
> > +{
> > +#if defined(VIRTIO_GPU_VFIO_BLOB)
> > + VFIODevice *vbasedev;
> > + RAMBlock *first_rb;
> > + ram_addr_t offset;
> > +
> > + first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false,
> &offset);
> > + if (!first_rb) {
> > + qemu_log_mask(LOG_GUEST_ERROR,
> > + "%s: Could not find ramblock\n", __func__);
> > + return;
> > + }
> > +
> > + vbasedev = vfio_device_lookup(first_rb->mr);
> > + if (!vbasedev) {
> > + qemu_log_mask(LOG_GUEST_ERROR,
> > + "%s: No VFIO device found to create dmabuf from\n",
> > + __func__);
> > + return;
> > + }
> > +
> > + res->dmabuf_fd = vfio_device_create_dmabuf_fd(vbasedev,
> > + res->iov, res->iov_cnt);
> > + if (res->dmabuf_fd < 0) {
> > + qemu_log_mask(LOG_GUEST_ERROR,
> > + "%s: VFIO_DEVICE_FEATURE_DMA_BUF: %s\n",
> > + __func__, strerror(errno));
> > + }
> > +#endif
> > +}
> > +
> > static void virtio_gpu_create_udmabuf(struct virtio_gpu_simple_resource
> *res)
> > {
> > struct udmabuf_create_list *list;
> > - RAMBlock *rb;
> > + RAMBlock *rb, *first_rb;
> > ram_addr_t offset;
> > int udmabuf, i;
> >
> > @@ -52,15 +85,17 @@ static void virtio_gpu_create_udmabuf(struct
> virtio_gpu_simple_resource *res)
> > return;
> > }
> >
> > + first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false,
> &offset);
> > + if (!ram_block_is_memfd_backed(first_rb)) {
> > + return;
> > + }
> > +
>
> We had an extensive discussion but I still don't understand the benefit
> of this change while I see it adds complexity by having another call of
I'll get rid of the extra call to qemu_ram_block_from_host() in the next
version.
> qemu_ram_block_from_host() and imposing an extra restriction that all
> elements need to belong to one RAMBlock.
I thought you suggested that we need to ensure all entries (need to be
validated and) are associated with the same memory region? As
virtio_gpu_create_udmabuf() was not doing that, I thought this
change/restriction needs to be added.
And, since calling ram_block_is_memfd_backed() for each entry would
incur extra overhead (as there can be thousands of entries and fcntl needs
to check with the kernel), I figured calling it once for the first ram block
and comparing all the other ram blocks against it made sense.
Also, rethinking this whole situation again, I don't think we should try to
create
a dmabuf for a buffer that might have mixed/different memory regions or
memfds (as this is most likely an invalid scenario that could lead to undefined
behavior) so this change is meant to prevent such scenario.
Thanks,
Vivek
>
> If anyone else have some opinion on this, I'd like to hear.
>
> Regards,
> Akihiko Odaki
>
> > list = g_malloc0(sizeof(struct udmabuf_create_list) +
> > sizeof(struct udmabuf_create_item) * res->iov_cnt);
> >
> > for (i = 0; i < res->iov_cnt; i++) {
> > - rcu_read_lock();
> > rb = qemu_ram_block_from_host(res->iov[i].iov_base, false,
> > &offset);
> > - rcu_read_unlock();
> > -
> > - if (!rb || rb->fd < 0) {
> > + if (rb != first_rb) {
> > g_free(list);
> > return;
> > }
> > @@ -81,11 +116,77 @@ static void virtio_gpu_create_udmabuf(struct
> virtio_gpu_simple_resource *res)
> > g_free(list);
> > }
> >
> > +static void *vfio_dmabuf_mmap(struct virtio_gpu_simple_resource *res)
> > +{
> > + struct vfio_region_info *info = NULL;
> > + VFIODevice *vbasedev = NULL;
> > + ram_addr_t offset, len = 0;
> > + RAMBlock *first_rb, *rb;
> > + void *map, *submap;
> > + int i, ret = -1;
> > +
> > + first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false,
> &offset);
> > + if (!first_rb) {
> > + return MAP_FAILED;
> > + }
> > +#if defined(VIRTIO_GPU_VFIO_BLOB)
> > + vbasedev = vfio_device_lookup(first_rb->mr);
> > +#endif
> > + if (!vbasedev) {
> > + return MAP_FAILED;
> > + }
> > +
> > + /*
> > + * We first reserve a contiguous chunk of address space for the entire
> > + * dmabuf, then replace it with smaller mappings that correspond to the
> > + * individual segments of the dmabuf.
> > + */
> > + map = mmap(NULL, res->blob_size, PROT_READ, MAP_SHARED,
> vbasedev->fd, 0);
> > + if (map == MAP_FAILED) {
> > + return map;
> > + }
> > +
> > + for (i = 0; i < res->iov_cnt; i++) {
> > + rb = qemu_ram_block_from_host(res->iov[i].iov_base, false,
> > &offset);
> > + if (rb != first_rb) {
> > + goto err;
> > + }
> > +#if defined(VIRTIO_GPU_VFIO_BLOB)
> > + ret = vfio_get_region_index_from_mr(rb->mr);
> > + if (ret < 0) {
> > + goto err;
> > + }
> > +
> > + ret = vfio_device_get_region_info(vbasedev, ret, &info);
> > +#endif
> > + if (ret < 0 || !info) {
> > + goto err;
> > + }
> > +
> > + submap = mmap(map + len, res->iov[i].iov_len, PROT_READ,
> > + MAP_SHARED | MAP_FIXED, vbasedev->fd,
> > + info->offset + offset);
> > + if (submap == MAP_FAILED) {
> > + goto err;
> > + }
> > +
> > + len += res->iov[i].iov_len;
> > + }
> > + return map;
> > +err:
> > + munmap(map, res->blob_size);
> > + return MAP_FAILED;
> > +}
> > +
> > static void virtio_gpu_remap_dmabuf(struct virtio_gpu_simple_resource
> *res)
> > {
> > res->remapped = mmap(NULL, res->blob_size, PROT_READ,
> > MAP_SHARED, res->dmabuf_fd, 0);
> > if (res->remapped == MAP_FAILED) {
> > + res->remapped = vfio_dmabuf_mmap(res);
> > + if (res->remapped != MAP_FAILED) {
> > + return;
> > + }
> > warn_report("%s: dmabuf mmap failed: %s", __func__,
> > strerror(errno));
> > res->remapped = NULL;
> > @@ -146,6 +247,13 @@ void virtio_gpu_init_dmabuf(struct
> virtio_gpu_simple_resource *res)
> > } else {
> > virtio_gpu_create_udmabuf(res);
> > if (res->dmabuf_fd < 0) {
> > + vfio_create_dmabuf(res);
> > + }
> > +
> > + if (res->dmabuf_fd < 0) {
> > + qemu_log_mask(LOG_GUEST_ERROR,
> > + "%s: memory region cannot be used to create
> > dmabuf\n",
> > + __func__);
> > return;
> > }
> > virtio_gpu_remap_dmabuf(res);
> > @@ -160,9 +268,7 @@ void virtio_gpu_init_dmabuf(struct
> virtio_gpu_simple_resource *res)
> >
> > void virtio_gpu_fini_dmabuf(struct virtio_gpu_simple_resource *res)
> > {
> > - if (res->remapped) {
> > - virtio_gpu_destroy_dmabuf(res);
> > - }
> > + virtio_gpu_destroy_dmabuf(res);
> > }
> >
> > static void virtio_gpu_free_dmabuf(VirtIOGPU *g, VGPUDMABuf *dmabuf)