Re: [PATCH v14 04/10] virtio-gpu: Support asynchronous fencing

Yiwei Zhang Tue, 11 Nov 2025 16:45:01 -0800

On Tue, Nov 11, 2025 at 4:28 AM Dmitry Osipenko
<[email protected]> wrote:
>
> On 11/11/25 05:29, Yiwei Zhang wrote:
> > On Sat, Nov 8, 2025 at 3:56 AM Yiwei Zhang <[email protected]> wrote:
> >>
> >> On Mon, Oct 20, 2025 at 4:42 PM Dmitry Osipenko
> >> <[email protected]> wrote:
> >>>
> >>> Support asynchronous fencing feature of virglrenderer. It allows Qemu to
> >>> handle fence as soon as it's signalled instead of periodically polling
> >>> the fence status. This feature is required for enabling DRM context
> >>> support in Qemu because legacy fencing mode isn't supported for DRM
> >>> contexts in virglrenderer.
> >>>
> >>> Reviewed-by: Akihiko Odaki <[email protected]>
> >>> Acked-by: Michael S. Tsirkin <[email protected]>
> >>> Tested-by: Alex Bennée <[email protected]>
> >>> Reviewed-by: Alex Bennée <[email protected]>
> >>> Signed-off-by: Dmitry Osipenko <[email protected]>
> >>> ---
> >>>  hw/display/virtio-gpu-gl.c     |   5 ++
> >>>  hw/display/virtio-gpu-virgl.c  | 130 +++++++++++++++++++++++++++++++++
> >>>  include/hw/virtio/virtio-gpu.h |  11 +++
> >>>  meson.build                    |   2 +
> >>>  4 files changed, 148 insertions(+)
> >>>
> >>> diff --git a/hw/display/virtio-gpu-gl.c b/hw/display/virtio-gpu-gl.c
> >>> index c06a078fb36a..1468c6ed1467 100644
> >>> --- a/hw/display/virtio-gpu-gl.c
> >>> +++ b/hw/display/virtio-gpu-gl.c
> >>> @@ -169,6 +169,11 @@ static void 
> >>> virtio_gpu_gl_device_unrealize(DeviceState *qdev)
> >>>      if (gl->renderer_state >= RS_INITED) {
> >>>  #if VIRGL_VERSION_MAJOR >= 1
> >>>          qemu_bh_delete(gl->cmdq_resume_bh);
> >>> +
> >>> +        if (gl->async_fence_bh) {
> >>> +            virtio_gpu_virgl_reset_async_fences(g);
> >>> +            qemu_bh_delete(gl->async_fence_bh);
> >>> +        }
> >>>  #endif
> >>>          if (virtio_gpu_stats_enabled(g->parent_obj.conf)) {
> >>>              timer_free(gl->print_stats);
> >>> diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c
> >>> index cd8b367f6fa6..0320d6deca76 100644
> >>> --- a/hw/display/virtio-gpu-virgl.c
> >>> +++ b/hw/display/virtio-gpu-virgl.c
> >>> @@ -24,6 +24,23 @@
> >>>
> >>>  #include <virglrenderer.h>
> >>>
> >>> +/*
> >>> + * VIRGL_CHECK_VERSION available since libvirglrenderer 1.0.1 and was 
> >>> fixed
> >>> + * in 1.1.0. Undefine bugged version of the macro and provide our own.
> >>> + */
> >>> +#if defined(VIRGL_CHECK_VERSION) && \
> >>> +    VIRGL_VERSION_MAJOR == 1 && VIRGL_VERSION_MINOR < 1
> >>> +#undef VIRGL_CHECK_VERSION
> >>> +#endif
> >>> +
> >>> +#ifndef VIRGL_CHECK_VERSION
> >>> +#define VIRGL_CHECK_VERSION(major, minor, micro) \
> >>> +    (VIRGL_VERSION_MAJOR > (major) || \
> >>> +     VIRGL_VERSION_MAJOR == (major) && VIRGL_VERSION_MINOR > (minor) || \
> >>> +     VIRGL_VERSION_MAJOR == (major) && VIRGL_VERSION_MINOR == (minor) && 
> >>> \
> >>> +     VIRGL_VERSION_MICRO >= (micro))
> >>> +#endif
> >>> +
> >>>  struct virtio_gpu_virgl_resource {
> >>>      struct virtio_gpu_simple_resource base;
> >>>      MemoryRegion *mr;
> >>> @@ -1051,6 +1068,106 @@ static void virgl_write_context_fence(void 
> >>> *opaque, uint32_t ctx_id,
> >>>  }
> >>>  #endif
> >>>
> >>> +void virtio_gpu_virgl_reset_async_fences(VirtIOGPU *g)
> >>> +{
> >>> +    struct virtio_gpu_virgl_context_fence *f;
> >>> +    VirtIOGPUGL *gl = VIRTIO_GPU_GL(g);
> >>> +
> >>> +    while (!QSLIST_EMPTY(&gl->async_fenceq)) {
> >>> +        f = QSLIST_FIRST(&gl->async_fenceq);
> >>> +
> >>> +        QSLIST_REMOVE_HEAD(&gl->async_fenceq, next);
> >>> +
> >>> +        g_free(f);
> >>> +    }
> >>> +}
> >>> +
> >>> +#if VIRGL_CHECK_VERSION(1, 1, 2)
> >>> +static void virtio_gpu_virgl_async_fence_bh(void *opaque)
> >>> +{
> >>> +    QSLIST_HEAD(, virtio_gpu_virgl_context_fence) async_fenceq;
> >>> +    struct virtio_gpu_ctrl_command *cmd, *tmp;
> >>> +    struct virtio_gpu_virgl_context_fence *f;
> >>> +    VirtIOGPU *g = opaque;
> >>> +    VirtIOGPUGL *gl = VIRTIO_GPU_GL(g);
> >>> +
> >>> +    if (gl->renderer_state != RS_INITED) {
> >>> +        return;
> >>> +    }
> >>> +
> >>> +    QSLIST_MOVE_ATOMIC(&async_fenceq, &gl->async_fenceq);
> >>> +
> >>> +    while (!QSLIST_EMPTY(&async_fenceq)) {
> >>> +        f = QSLIST_FIRST(&async_fenceq);
> >>> +
> >>> +        QSLIST_REMOVE_HEAD(&async_fenceq, next);
> >>> +
> >>> +        QTAILQ_FOREACH_SAFE(cmd, &g->fenceq, next, tmp) {
> >>> +            /*
> >>> +             * the guest can end up emitting fences out of order
> >>> +             * so we should check all fenced cmds not just the first one.
> >>> +             */
> >>> +            if (cmd->cmd_hdr.fence_id > f->fence_id) {
> >>> +                continue;
> >>> +            }
> >>> +            if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_INFO_RING_IDX) {
> >>> +                if (cmd->cmd_hdr.ring_idx != f->ring_idx) {
> >>> +                    continue;
> >>> +                }
> >>> +                if (cmd->cmd_hdr.ctx_id != f->ctx_id) {
> >>> +                    continue;
> >>> +                }
> >>> +            } else if (f->ring_idx >= 0) {
> >>> +                /* ctx0 GL-query fences don't have ring info */
> >>> +                continue;
> >>> +            }
> >>> +            virtio_gpu_ctrl_response_nodata(g, cmd, 
> >>> VIRTIO_GPU_RESP_OK_NODATA);
> >>> +            QTAILQ_REMOVE(&g->fenceq, cmd, next);
> >>> +            g_free(cmd);
> >>> +        }
> >>
> >> Conditions above are a little bit confusing. Skipping unsignaled
> >> fences first makes sense to me. Next we can use f->ctx_id == 0 to
> >> distinguish ctx0 fence vs context fence. Then:
> >> - for f->ctx_id == 0, skip any RING_IDX
> >> - for f->ctx_id > 0, only care about RING_IDX along with comparing
> >> ctx_id and ring_idx
> >>
> >> So, if we check the RING_IDX flag first like in the existing patch,
> >> the else condition is only meaningful for the ctx0 fence, and
> >> f->ring_idx >= 0 will never be evaluated to true. Can we drop the
> >> "else if" part?
> >>
> >>> +
> >>> +        trace_virtio_gpu_fence_resp(f->fence_id);
> >>> +        g_free(f);
> >>> +        g->inflight--;
> >>> +        if (virtio_gpu_stats_enabled(g->parent_obj.conf)) {
> >>> +            trace_virtio_gpu_dec_inflight_fences(g->inflight);
> >>> +        }
> >>> +    }
> >>> +}
> >>> +
> >>> +static void
> >>> +virtio_gpu_virgl_push_async_fence(VirtIOGPU *g, uint32_t ctx_id,
> >>> +                                  int64_t ring_idx, uint64_t fence_id)
> >>> +{
> >>> +    struct virtio_gpu_virgl_context_fence *f;
> >>> +    VirtIOGPUGL *gl = VIRTIO_GPU_GL(g);
> >>> +
> >>> +    f = g_new(struct virtio_gpu_virgl_context_fence, 1);
> >>> +    f->ctx_id = ctx_id;
> >>> +    f->ring_idx = ring_idx;
> >>> +    f->fence_id = fence_id;
> >>> +
> >>> +    QSLIST_INSERT_HEAD_ATOMIC(&gl->async_fenceq, f, next);
> >>> +
> >>> +    qemu_bh_schedule(gl->async_fence_bh);
> >>> +}
> >>> +
> >>> +static void virgl_write_async_fence(void *opaque, uint32_t fence)
> >>> +{
> >>> +    VirtIOGPU *g = opaque;
> >>> +
> >>> +    virtio_gpu_virgl_push_async_fence(g, 0, -1, fence);
> >>> +}
> >>> +
> >>> +static void virgl_write_async_context_fence(void *opaque, uint32_t 
> >>> ctx_id,
> >>> +                                            uint32_t ring_idx, uint64_t 
> >>> fence)
> >>> +{
> >>> +    VirtIOGPU *g = opaque;
> >>> +
> >>> +    virtio_gpu_virgl_push_async_fence(g, ctx_id, ring_idx, fence);
> >>> +}
> >>> +#endif
> >>> +
> >>>  static virgl_renderer_gl_context
> >>>  virgl_create_context(void *opaque, int scanout_idx,
> >>>                       struct virgl_renderer_gl_ctx_param *params)
> >>> @@ -1150,6 +1267,8 @@ void virtio_gpu_virgl_reset_scanout(VirtIOGPU *g)
> >>>  void virtio_gpu_virgl_reset(VirtIOGPU *g)
> >>>  {
> >>>      virgl_renderer_reset();
> >>> +
> >>> +    virtio_gpu_virgl_reset_async_fences(g);
> >>>  }
> >>>
> >>>  int virtio_gpu_virgl_init(VirtIOGPU *g)
> >>> @@ -1162,6 +1281,12 @@ int virtio_gpu_virgl_init(VirtIOGPU *g)
> >>>      if (qemu_egl_display) {
> >>>          virtio_gpu_3d_cbs.version = 4;
> >>>          virtio_gpu_3d_cbs.get_egl_display = virgl_get_egl_display;
> >>> +#if VIRGL_CHECK_VERSION(1, 1, 2)
> >>> +        virtio_gpu_3d_cbs.write_fence         = virgl_write_async_fence;
> >>> +        virtio_gpu_3d_cbs.write_context_fence = 
> >>> virgl_write_async_context_fence;
> >>> +        flags |= VIRGL_RENDERER_ASYNC_FENCE_CB;
> >>> +        flags |= VIRGL_RENDERER_THREAD_SYNC;
> >>> +#endif
> >>>      }
> >>>  #endif
> >>>  #ifdef VIRGL_RENDERER_D3D11_SHARE_TEXTURE
> >>> @@ -1195,6 +1320,11 @@ int virtio_gpu_virgl_init(VirtIOGPU *g)
> >>>      gl->cmdq_resume_bh = aio_bh_new(qemu_get_aio_context(),
> >>>                                      virtio_gpu_virgl_resume_cmdq_bh,
> >>>                                      g);
> >>> +#if VIRGL_CHECK_VERSION(1, 1, 2)
> >>> +    gl->async_fence_bh = aio_bh_new(qemu_get_aio_context(),
> >>> +                                    virtio_gpu_virgl_async_fence_bh,
> >>> +                                    g);
> >>> +#endif
> >>>  #endif
> >>>
> >>>      return 0;
> >>> diff --git a/include/hw/virtio/virtio-gpu.h 
> >>> b/include/hw/virtio/virtio-gpu.h
> >>> index 9f16f89a36d2..e15c16aa5945 100644
> >>> --- a/include/hw/virtio/virtio-gpu.h
> >>> +++ b/include/hw/virtio/virtio-gpu.h
> >>> @@ -233,6 +233,13 @@ struct VirtIOGPUClass {
> >>>                               Error **errp);
> >>>  };
> >>>
> >>> +struct virtio_gpu_virgl_context_fence {
> >>> +    uint32_t ctx_id;
> >>> +    int64_t ring_idx;
> >>
> >> If I didn't miss anything above, we don't need -1 to tell anything.
> >> Then the ring_idx here can be a uint32_t, and virgl_write_async_fence
> >> can just pass 0.
> >>
> >>> +    uint64_t fence_id;
> >>> +    QSLIST_ENTRY(virtio_gpu_virgl_context_fence) next;
> >>> +};
> >>> +
> >>>  /* VirtIOGPUGL renderer states */
> >>>  typedef enum {
> >>>      RS_START,       /* starting state */
> >>> @@ -250,6 +257,9 @@ struct VirtIOGPUGL {
> >>>      QEMUTimer *print_stats;
> >>>
> >>>      QEMUBH *cmdq_resume_bh;
> >>> +
> >>> +    QEMUBH *async_fence_bh;
> >>> +    QSLIST_HEAD(, virtio_gpu_virgl_context_fence) async_fenceq;
> >>>  };
> >>>
> >>>  struct VhostUserGPU {
> >>> @@ -379,5 +389,6 @@ void virtio_gpu_virgl_reset_scanout(VirtIOGPU *g);
> >>>  void virtio_gpu_virgl_reset(VirtIOGPU *g);
> >>>  int virtio_gpu_virgl_init(VirtIOGPU *g);
> >>>  GArray *virtio_gpu_virgl_get_capsets(VirtIOGPU *g);
> >>> +void virtio_gpu_virgl_reset_async_fences(VirtIOGPU *g);
> >>>
> >>>  #endif
> >>> diff --git a/meson.build b/meson.build
> >>> index e96c28da09b6..e3d48150483e 100644
> >>> --- a/meson.build
> >>> +++ b/meson.build
> >>> @@ -2597,6 +2597,8 @@ config_host_data.set('CONFIG_VNC_JPEG', 
> >>> jpeg.found())
> >>>  config_host_data.set('CONFIG_VNC_SASL', sasl.found())
> >>>  if virgl.found()
> >>>    config_host_data.set('VIRGL_VERSION_MAJOR', 
> >>> virgl.version().split('.')[0])
> >>> +  config_host_data.set('VIRGL_VERSION_MINOR', 
> >>> virgl.version().split('.')[1])
> >>> +  config_host_data.set('VIRGL_VERSION_MICRO', 
> >>> virgl.version().split('.')[2])
> >>>  endif
> >>>  config_host_data.set('CONFIG_VIRTFS', have_virtfs)
> >>>  config_host_data.set('CONFIG_VTE', vte.found())
> >>> --
> >>> 2.51.0
> >>>
> >
> > Friendly ping. My early comments here might have been missed ; )
>
> I indeed missed that first part of the comment, thanks.
>
> Could you please give a code sample of your suggestion?
>
> The `if (f->ring_idx >= 0)` is a sanity-check for that we're not getting
> a fence that has ring_idx without a set RING_IDX flag. All other fences
> are processed by that fence-handling loop.
>
> --
> Best regards,
> Dmitry


QEMU creates ctx0 fences without seeing the RING_IDX flag, and I think
that sanity check is a bit redundant. So I was suggesting below for
simplicity:

diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c
index ccba1d8ee4..12c1e1764c 100644
--- a/hw/display/virtio-gpu-virgl.c
+++ b/hw/display/virtio-gpu-virgl.c
@@ -1117,9 +1117,6 @@ static void virtio_gpu_virgl_async_fence_bh(void *opaque)
                 if (cmd->cmd_hdr.ctx_id != f->ctx_id) {
                     continue;
                 }
-            } else if (f->ring_idx >= 0) {
-                /* ctx0 GL-query fences don't have ring info */
-                continue;
             }
             virtio_gpu_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA);
             QTAILQ_REMOVE(&g->fenceq, cmd, next);
@@ -1137,7 +1134,7 @@ static void virtio_gpu_virgl_async_fence_bh(void *opaque)

 static void
 virtio_gpu_virgl_push_async_fence(VirtIOGPU *g, uint32_t ctx_id,
-                                  int64_t ring_idx, uint64_t fence_id)
+                                  uint32_t ring_idx, uint64_t fence_id)
 {
     struct virtio_gpu_virgl_context_fence *f;
     VirtIOGPUGL *gl = VIRTIO_GPU_GL(g);
@@ -1156,7 +1153,7 @@ static void virgl_write_async_fence(void
*opaque, uint32_t fence)
 {
     VirtIOGPU *g = opaque;

-    virtio_gpu_virgl_push_async_fence(g, 0, -1, fence);
+    virtio_gpu_virgl_push_async_fence(g, 0, UINT32_MAX, fence);
 }

 static void virgl_write_async_context_fence(void *opaque, uint32_t ctx_id,
diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
index a4963508a4..cd576c9e0d 100644
--- a/include/hw/virtio/virtio-gpu.h
+++ b/include/hw/virtio/virtio-gpu.h
@@ -238,7 +238,7 @@ struct VirtIOGPUClass {

 struct virtio_gpu_virgl_context_fence {
     uint32_t ctx_id;
-    int64_t ring_idx;
+    uint32_t ring_idx;
     uint64_t fence_id;
     QSLIST_ENTRY(virtio_gpu_virgl_context_fence) next;
 };

Re: [PATCH v14 04/10] virtio-gpu: Support asynchronous fencing

Reply via email to