The issue with the old mechanism is that we had to introduce new
API each time we needed a new queue family, and all the queue families
were functionally fixed to a given purpose.

Nvidia's GPUs are able to handle video encoding and compute on the
same queue, which results in a speedup when pre-processing is required.

Also, this enables us to expose optical flow queues for frame interpolation.
---
 libavutil/hwcontext_vulkan.c | 57 ++++++++++++++++++++++++++++++++----
 libavutil/hwcontext_vulkan.h | 25 ++++++++++++++++
 2 files changed, 77 insertions(+), 5 deletions(-)

diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index da377aa1a4..af60ab29d2 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -1428,7 +1428,8 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
     VulkanDevicePriv *p = ctx->hwctx;
     AVVulkanDeviceContext *hwctx = &p->p;
     FFVulkanFunctions *vk = &p->vkctx.vkfn;
-    VkQueueFamilyProperties *qf;
+    VkQueueFamilyProperties2 *qf;
+    VkQueueFamilyVideoPropertiesKHR *qf_vid;
     int graph_index, comp_index, tx_index, enc_index, dec_index;
 
     /* Set device extension flags */
@@ -1474,11 +1475,27 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
         return AVERROR_EXTERNAL;
     }
 
-    qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties));
+    qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties2));
     if (!qf)
         return AVERROR(ENOMEM);
 
-    vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, qf);
+    qf_vid = av_malloc_array(qf_num, sizeof(VkQueueFamilyVideoPropertiesKHR));
+    if (!qf_vid) {
+        av_free(qf);
+        return AVERROR(ENOMEM);
+    }
+
+    for (uint32_t i = 0; i < qf_num; i++) {
+        qf_vid[i] = (VkQueueFamilyVideoPropertiesKHR) {
+            .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR,
+        };
+        qf[i] = (VkQueueFamilyProperties2) {
+            .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2,
+            .pNext = &qf_vid[i],
+        };
+    }
+
+    vk->GetPhysicalDeviceQueueFamilyProperties2(hwctx->phys_dev, &qf_num, qf);
 
     p->qf_mutex = av_calloc(qf_num, sizeof(*p->qf_mutex));
     if (!p->qf_mutex) {
@@ -1488,12 +1505,12 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
     p->nb_tot_qfs = qf_num;
 
     for (uint32_t i = 0; i < qf_num; i++) {
-        p->qf_mutex[i] = av_calloc(qf[i].queueCount, sizeof(**p->qf_mutex));
+        p->qf_mutex[i] = av_calloc(qf[i].queueFamilyProperties.queueCount, 
sizeof(**p->qf_mutex));
         if (!p->qf_mutex[i]) {
             av_free(qf);
             return AVERROR(ENOMEM);
         }
-        for (uint32_t j = 0; j < qf[i].queueCount; j++) {
+        for (uint32_t j = 0; j < qf[i].queueFamilyProperties.queueCount; j++) {
             err = pthread_mutex_init(&p->qf_mutex[i][j], NULL);
             if (err != 0) {
                 av_log(ctx, AV_LOG_ERROR, "pthread_mutex_init failed : %s\n",
@@ -1550,6 +1567,36 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
 
 #undef CHECK_QUEUE
 
+    /* Update the new queue family fields. If non-zero already,
+     * it means API users have set it. */
+    if (!hwctx->nb_qf) {
+#define ADD_QUEUE(ctx_qf, qc, flag)                                    \
+    do {                                                               \
+        if (ctx_qf != -1) {                                            \
+            hwctx->qf[hwctx->nb_qf++] = (AVVulkanDeviceQueueFamily) {  \
+                .idx = ctx_qf,                                         \
+                .num = qc,                                             \
+                .flags = flag,                                         \
+            };                                                         \
+        }                                                              \
+    } while (0)
+
+        ADD_QUEUE(hwctx->queue_family_index, hwctx->nb_graphics_queues, 
VK_QUEUE_GRAPHICS_BIT);
+        ADD_QUEUE(hwctx->queue_family_comp_index, hwctx->nb_comp_queues, 
VK_QUEUE_COMPUTE_BIT);
+        ADD_QUEUE(hwctx->queue_family_tx_index, hwctx->nb_tx_queues, 
VK_QUEUE_TRANSFER_BIT);
+        ADD_QUEUE(hwctx->queue_family_decode_index, hwctx->nb_decode_queues, 
VK_QUEUE_VIDEO_DECODE_BIT_KHR);
+        ADD_QUEUE(hwctx->queue_family_encode_index, hwctx->nb_encode_queues, 
VK_QUEUE_VIDEO_ENCODE_BIT_KHR);
+#undef ADD_QUEUE
+    }
+
+    for (int i = 0; i < hwctx->nb_qf; i++) {
+        if (!hwctx->qf[i].video_caps &&
+            hwctx->qf[i].flags & (VK_QUEUE_VIDEO_DECODE_BIT_KHR |
+                                  VK_QUEUE_VIDEO_ENCODE_BIT_KHR)) {
+            hwctx->qf[i].video_caps = 
qf_vid[hwctx->qf[i].idx].videoCodecOperations;
+        }
+    }
+
     if (!hwctx->lock_queue)
         hwctx->lock_queue = lock_queue;
     if (!hwctx->unlock_queue)
diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
index cbbd2390c1..394af46649 100644
--- a/libavutil/hwcontext_vulkan.h
+++ b/libavutil/hwcontext_vulkan.h
@@ -30,6 +30,20 @@
 
 typedef struct AVVkFrame AVVkFrame;
 
+typedef struct AVVulkanDeviceQueueFamily {
+    /* Queue family index */
+    int idx;
+    /* Number of queues in the queue family in use */
+    int num;
+    /* Queue family capabilities. Must be non-zero.
+     * Flags may be removed to indicate the queue family may not be used
+     * for a given purpose. */
+    VkQueueFlagBits flags;
+    /* Vulkan implementations are allowed to list multiple video queues
+     * which differ in what they can encode or decode. */
+    VkVideoCodecOperationFlagBitsKHR video_caps;
+} AVVulkanDeviceQueueFamily;
+
 /**
  * @file
  * API-specific header for AV_HWDEVICE_TYPE_VULKAN.
@@ -151,6 +165,17 @@ typedef struct AVVulkanDeviceContext {
      * Similar to lock_queue(), unlocks a queue. Must only be called after 
locking.
      */
     void (*unlock_queue)(struct AVHWDeviceContext *ctx, uint32_t queue_family, 
uint32_t index);
+
+    /**
+     * Queue families used. Must be preferentially ordered. List may contain
+     * duplicates.
+     *
+     * For compatibility reasons, all the enabled queue families listed above
+     * (queue_family_(tx/comp/encode/decode)_index) must also be included in
+     * this list until they're removed after deprecation.
+     */
+    AVVulkanDeviceQueueFamily qf[16];
+    int nb_qf;
 } AVVulkanDeviceContext;
 
 /**
-- 
2.45.1.288.g0e0cd299f1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to