Just posting this as a single email not to spam the ML too much. Going to push this sometime over the weekend alongside the other 3 patches to finish all work on Vulkan before the next release. Satisfied with the code, though the last patch is slightly ugly. Performance improvements out of async code: 78% on a toy GPU with 1 queue for an upload+scale Probably closer to 200% on a discrete GPU with 4 transfer/compute queues.
>From cbf54e68dd253581515d76f930c7e7e8d77809b7 Mon Sep 17 00:00:00 2001 From: Lynne <d...@lynne.ee> Date: Thu, 14 May 2020 00:37:21 +0100 Subject: [PATCH 10/10] lavfi/vulkan: use all enabled queues in the queue family This should significantly improve the performance with certain filterchains. --- libavfilter/vf_avgblur_vulkan.c | 39 ++-- libavfilter/vf_chromaber_vulkan.c | 30 +-- libavfilter/vf_overlay_vulkan.c | 37 ++-- libavfilter/vf_scale_vulkan.c | 30 +-- libavfilter/vulkan.c | 296 +++++++++++++++++++++++------- libavfilter/vulkan.h | 74 ++++++-- 6 files changed, 371 insertions(+), 135 deletions(-) diff --git a/libavfilter/vf_avgblur_vulkan.c b/libavfilter/vf_avgblur_vulkan.c index 105d753f73..12d57e0875 100644 --- a/libavfilter/vf_avgblur_vulkan.c +++ b/libavfilter/vf_avgblur_vulkan.c @@ -97,6 +97,10 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) if (!sampler) return AVERROR_EXTERNAL; + s->vkctx.queue_family_idx = s->vkctx.hwctx->queue_family_comp_index; + s->vkctx.queue_count = GET_QUEUE_COUNT(s->vkctx.hwctx, 0, 1, 0); + s->vkctx.cur_queue_idx = rand() % s->vkctx.queue_count; + { /* Create shader for the horizontal pass */ desc_i[0].updater = s->input_images; desc_i[1].updater = s->tmp_images; @@ -184,8 +188,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) } /* Execution context */ - RET(ff_vk_create_exec_ctx(ctx, &s->exec, - s->vkctx.hwctx->queue_family_comp_index)); + RET(ff_vk_create_exec_ctx(ctx, &s->exec)); s->initialized = 1; @@ -198,22 +201,30 @@ fail: static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *tmp_f, AVFrame *in_f) { int err; + VkCommandBuffer cmd_buf; AvgBlurVulkanContext *s = avctx->priv; AVVkFrame *in = (AVVkFrame *)in_f->data[0]; AVVkFrame *tmp = (AVVkFrame *)tmp_f->data[0]; AVVkFrame *out = (AVVkFrame *)out_f->data[0]; int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + /* Update descriptors and init the exec context */ + ff_vk_start_exec_recording(avctx, s->exec); + cmd_buf = ff_vk_get_exec_buf(avctx, s->exec); + for (int i = 0; i < planes; i++) { - RET(ff_vk_create_imageview(avctx, &s->input_images[i].imageView, in->img[i], + RET(ff_vk_create_imageview(avctx, s->exec, &s->input_images[i].imageView, + in->img[i], av_vkfmt_from_pixfmt(s->vkctx.input_format)[i], ff_comp_identity_map)); - RET(ff_vk_create_imageview(avctx, &s->tmp_images[i].imageView, tmp->img[i], + RET(ff_vk_create_imageview(avctx, s->exec, &s->tmp_images[i].imageView, + tmp->img[i], av_vkfmt_from_pixfmt(s->vkctx.output_format)[i], ff_comp_identity_map)); - RET(ff_vk_create_imageview(avctx, &s->output_images[i].imageView, out->img[i], + RET(ff_vk_create_imageview(avctx, s->exec, &s->output_images[i].imageView, + out->img[i], av_vkfmt_from_pixfmt(s->vkctx.output_format)[i], ff_comp_identity_map)); @@ -225,8 +236,6 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *tmp_f ff_vk_update_descriptor_set(avctx, s->pl_hor, 0); ff_vk_update_descriptor_set(avctx, s->pl_ver, 0); - ff_vk_start_exec_recording(avctx, s->exec); - for (int i = 0; i < planes; i++) { VkImageMemoryBarrier bar[] = { { @@ -270,7 +279,7 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *tmp_f }, }; - vkCmdPipelineBarrier(s->exec->buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar); @@ -286,12 +295,12 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *tmp_f ff_vk_bind_pipeline_exec(avctx, s->exec, s->pl_hor); - vkCmdDispatch(s->exec->buf, FFALIGN(s->vkctx.output_width, CGS)/CGS, + vkCmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS)/CGS, s->vkctx.output_height, 1); ff_vk_bind_pipeline_exec(avctx, s->exec, s->pl_ver); - vkCmdDispatch(s->exec->buf, s->vkctx.output_width, + vkCmdDispatch(cmd_buf, s->vkctx.output_width, FFALIGN(s->vkctx.output_height, CGS)/CGS, 1); ff_vk_add_exec_dep(avctx, s->exec, in_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); @@ -301,14 +310,10 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *tmp_f if (err) return err; -fail: - - for (int i = 0; i < planes; i++) { - ff_vk_destroy_imageview(avctx, &s->input_images[i].imageView); - ff_vk_destroy_imageview(avctx, &s->tmp_images[i].imageView); - ff_vk_destroy_imageview(avctx, &s->output_images[i].imageView); - } + return err; +fail: + ff_vk_discard_exec_deps(avctx, s->exec); return err; } diff --git a/libavfilter/vf_chromaber_vulkan.c b/libavfilter/vf_chromaber_vulkan.c index 673b3a7a68..1bee5e10f8 100644 --- a/libavfilter/vf_chromaber_vulkan.c +++ b/libavfilter/vf_chromaber_vulkan.c @@ -73,6 +73,10 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) if (!sampler) return AVERROR_EXTERNAL; + s->vkctx.queue_family_idx = s->vkctx.hwctx->queue_family_comp_index; + s->vkctx.queue_count = GET_QUEUE_COUNT(s->vkctx.hwctx, 0, 1, 0); + s->vkctx.cur_queue_idx = rand() % s->vkctx.queue_count; + s->pl = ff_vk_create_pipeline(ctx); if (!s->pl) return AVERROR(ENOMEM); @@ -154,8 +158,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) RET(ff_vk_init_compute_pipeline(ctx, s->pl)); /* Execution context */ - RET(ff_vk_create_exec_ctx(ctx, &s->exec, - s->vkctx.hwctx->queue_family_comp_index)); + RET(ff_vk_create_exec_ctx(ctx, &s->exec)); s->initialized = 1; @@ -168,17 +171,24 @@ fail: static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f) { int err = 0; + VkCommandBuffer cmd_buf; ChromaticAberrationVulkanContext *s = avctx->priv; AVVkFrame *in = (AVVkFrame *)in_f->data[0]; AVVkFrame *out = (AVVkFrame *)out_f->data[0]; int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + /* Update descriptors and init the exec context */ + ff_vk_start_exec_recording(avctx, s->exec); + cmd_buf = ff_vk_get_exec_buf(avctx, s->exec); + for (int i = 0; i < planes; i++) { - RET(ff_vk_create_imageview(avctx, &s->input_images[i].imageView, in->img[i], + RET(ff_vk_create_imageview(avctx, s->exec, &s->input_images[i].imageView, + in->img[i], av_vkfmt_from_pixfmt(s->vkctx.input_format)[i], ff_comp_identity_map)); - RET(ff_vk_create_imageview(avctx, &s->output_images[i].imageView, out->img[i], + RET(ff_vk_create_imageview(avctx, s->exec, &s->output_images[i].imageView, + out->img[i], av_vkfmt_from_pixfmt(s->vkctx.output_format)[i], ff_comp_identity_map)); @@ -188,8 +198,6 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f) ff_vk_update_descriptor_set(avctx, s->pl, 0); - ff_vk_start_exec_recording(avctx, s->exec); - for (int i = 0; i < planes; i++) { VkImageMemoryBarrier bar[2] = { { @@ -220,7 +228,7 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f) }, }; - vkCmdPipelineBarrier(s->exec->buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar); @@ -236,7 +244,7 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f) ff_vk_update_push_exec(avctx, s->exec, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(s->opts), &s->opts); - vkCmdDispatch(s->exec->buf, + vkCmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGROUPS[0])/CGROUPS[0], FFALIGN(s->vkctx.output_height, CGROUPS[1])/CGROUPS[1], 1); @@ -247,12 +255,10 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f) if (err) return err; - for (int i = 0; i < planes; i++) { - ff_vk_destroy_imageview(avctx, &s->input_images[i].imageView); - ff_vk_destroy_imageview(avctx, &s->output_images[i].imageView); - } + return err; fail: + ff_vk_discard_exec_deps(avctx, s->exec); return err; } diff --git a/libavfilter/vf_overlay_vulkan.c b/libavfilter/vf_overlay_vulkan.c index 83cfae40e2..60a7356456 100644 --- a/libavfilter/vf_overlay_vulkan.c +++ b/libavfilter/vf_overlay_vulkan.c @@ -87,6 +87,10 @@ static av_cold int init_filter(AVFilterContext *ctx) if (!s->pl) return AVERROR(ENOMEM); + s->vkctx.queue_family_idx = s->vkctx.hwctx->queue_family_comp_index; + s->vkctx.queue_count = GET_QUEUE_COUNT(s->vkctx.hwctx, 0, 1, 0); + s->vkctx.cur_queue_idx = rand() % s->vkctx.queue_count; + { /* Create the shader */ const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); const int ialpha = av_pix_fmt_desc_get(s->vkctx.input_format)->flags & AV_PIX_FMT_FLAG_ALPHA; @@ -211,8 +215,7 @@ static av_cold int init_filter(AVFilterContext *ctx) } /* Execution context */ - RET(ff_vk_create_exec_ctx(ctx, &s->exec, - s->vkctx.hwctx->queue_family_comp_index)); + RET(ff_vk_create_exec_ctx(ctx, &s->exec)); s->initialized = 1; @@ -226,6 +229,7 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *main_f, AVFrame *overlay_f) { int err; + VkCommandBuffer cmd_buf; OverlayVulkanContext *s = avctx->priv; int planes = av_pix_fmt_count_planes(s->vkctx.output_format); @@ -236,16 +240,23 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVHWFramesContext *main_fc = (AVHWFramesContext*)main_f->hw_frames_ctx->data; AVHWFramesContext *overlay_fc = (AVHWFramesContext*)overlay_f->hw_frames_ctx->data; + /* Update descriptors and init the exec context */ + ff_vk_start_exec_recording(avctx, s->exec); + cmd_buf = ff_vk_get_exec_buf(avctx, s->exec); + for (int i = 0; i < planes; i++) { - RET(ff_vk_create_imageview(avctx, &s->main_images[i].imageView, main->img[i], + RET(ff_vk_create_imageview(avctx, s->exec, &s->main_images[i].imageView, + main->img[i], av_vkfmt_from_pixfmt(main_fc->sw_format)[i], ff_comp_identity_map)); - RET(ff_vk_create_imageview(avctx, &s->overlay_images[i].imageView, overlay->img[i], + RET(ff_vk_create_imageview(avctx, s->exec, &s->overlay_images[i].imageView, + overlay->img[i], av_vkfmt_from_pixfmt(overlay_fc->sw_format)[i], ff_comp_identity_map)); - RET(ff_vk_create_imageview(avctx, &s->output_images[i].imageView, out->img[i], + RET(ff_vk_create_imageview(avctx, s->exec, &s->output_images[i].imageView, + out->img[i], av_vkfmt_from_pixfmt(s->vkctx.output_format)[i], ff_comp_identity_map)); @@ -256,8 +267,6 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, ff_vk_update_descriptor_set(avctx, s->pl, 0); - ff_vk_start_exec_recording(avctx, s->exec); - for (int i = 0; i < planes; i++) { VkImageMemoryBarrier bar[3] = { { @@ -301,7 +310,7 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, }, }; - vkCmdPipelineBarrier(s->exec->buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar); @@ -317,7 +326,7 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, ff_vk_bind_pipeline_exec(avctx, s->exec, s->pl); - vkCmdDispatch(s->exec->buf, + vkCmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGROUPS[0])/CGROUPS[0], FFALIGN(s->vkctx.output_height, CGROUPS[1])/CGROUPS[1], 1); @@ -329,14 +338,10 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, if (err) return err; -fail: - - for (int i = 0; i < planes; i++) { - ff_vk_destroy_imageview(avctx, &s->main_images[i].imageView); - ff_vk_destroy_imageview(avctx, &s->overlay_images[i].imageView); - ff_vk_destroy_imageview(avctx, &s->output_images[i].imageView); - } + return err; +fail: + ff_vk_discard_exec_deps(avctx, s->exec); return err; } diff --git a/libavfilter/vf_scale_vulkan.c b/libavfilter/vf_scale_vulkan.c index 328e6bcce5..9b2e5b92f6 100644 --- a/libavfilter/vf_scale_vulkan.c +++ b/libavfilter/vf_scale_vulkan.c @@ -115,6 +115,10 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) int crop_w = in->width - (in->crop_left + in->crop_right); int crop_h = in->height - (in->crop_top + in->crop_bottom); + s->vkctx.queue_family_idx = s->vkctx.hwctx->queue_family_comp_index; + s->vkctx.queue_count = GET_QUEUE_COUNT(s->vkctx.hwctx, 0, 1, 0); + s->vkctx.cur_queue_idx = rand() % s->vkctx.queue_count; + switch (s->scaler) { case F_NEAREST: sampler_mode = VK_FILTER_NEAREST; @@ -276,8 +280,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) } /* Execution context */ - RET(ff_vk_create_exec_ctx(ctx, &s->exec, - s->vkctx.hwctx->queue_family_comp_index)); + RET(ff_vk_create_exec_ctx(ctx, &s->exec)); s->initialized = 1; @@ -290,14 +293,20 @@ fail: static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f) { int err = 0; + VkCommandBuffer cmd_buf; ScaleVulkanContext *s = avctx->priv; AVVkFrame *in = (AVVkFrame *)in_f->data[0]; AVVkFrame *out = (AVVkFrame *)out_f->data[0]; VkImageMemoryBarrier barriers[AV_NUM_DATA_POINTERS*2]; int barrier_count = 0; + /* Update descriptors and init the exec context */ + ff_vk_start_exec_recording(avctx, s->exec); + cmd_buf = ff_vk_get_exec_buf(avctx, s->exec); + for (int i = 0; i < av_pix_fmt_count_planes(s->vkctx.input_format); i++) { - RET(ff_vk_create_imageview(avctx, &s->input_images[i].imageView, in->img[i], + RET(ff_vk_create_imageview(avctx, s->exec, &s->input_images[i].imageView, + in->img[i], av_vkfmt_from_pixfmt(s->vkctx.input_format)[i], ff_comp_identity_map)); @@ -305,7 +314,8 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f) } for (int i = 0; i < av_pix_fmt_count_planes(s->vkctx.output_format); i++) { - RET(ff_vk_create_imageview(avctx, &s->output_images[i].imageView, out->img[i], + RET(ff_vk_create_imageview(avctx, s->exec, &s->output_images[i].imageView, + out->img[i], av_vkfmt_from_pixfmt(s->vkctx.output_format)[i], ff_comp_identity_map)); @@ -314,8 +324,6 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f) ff_vk_update_descriptor_set(avctx, s->pl, 0); - ff_vk_start_exec_recording(avctx, s->exec); - for (int i = 0; i < av_pix_fmt_count_planes(s->vkctx.input_format); i++) { VkImageMemoryBarrier bar = { .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, @@ -358,13 +366,13 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f) out->access[i] = bar.dstAccessMask; } - vkCmdPipelineBarrier(s->exec->buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, barrier_count, barriers); ff_vk_bind_pipeline_exec(avctx, s->exec, s->pl); - vkCmdDispatch(s->exec->buf, + vkCmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGROUPS[0])/CGROUPS[0], FFALIGN(s->vkctx.output_height, CGROUPS[1])/CGROUPS[1], 1); @@ -375,12 +383,10 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f) if (err) return err; - for (int i = 0; i < av_pix_fmt_count_planes(s->vkctx.input_format); i++) - ff_vk_destroy_imageview(avctx, &s->input_images[i].imageView); - for (int i = 0; i < av_pix_fmt_count_planes(s->vkctx.output_format); i++) - ff_vk_destroy_imageview(avctx, &s->output_images[i].imageView); + return err; fail: + ff_vk_discard_exec_deps(avctx, s->exec); return err; } diff --git a/libavfilter/vulkan.c b/libavfilter/vulkan.c index ccf71cb7cd..301ee4354f 100644 --- a/libavfilter/vulkan.c +++ b/libavfilter/vulkan.c @@ -311,72 +311,116 @@ int ff_vk_add_push_constant(AVFilterContext *avctx, VulkanPipeline *pl, } FN_CREATING(VulkanFilterContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num) -int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx, int queue) +int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx) { VkResult ret; FFVkExecContext *e; VulkanFilterContext *s = avctx->priv; + int queue_family = s->queue_family_idx; + int nb_queues = s->queue_count; + VkCommandPoolCreateInfo cqueue_create = { .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, - .queueFamilyIndex = queue, + .queueFamilyIndex = queue_family, }; VkCommandBufferAllocateInfo cbuf_create = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, - .commandBufferCount = 1, + .commandBufferCount = nb_queues, }; - VkFenceCreateInfo fence_spawn = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO }; e = create_exec_ctx(s); if (!e) return AVERROR(ENOMEM); + e->queues = av_mallocz(nb_queues * sizeof(*e->queues)); + if (!e->queues) + return AVERROR(ENOMEM); + + e->bufs = av_mallocz(nb_queues * sizeof(*e->bufs)); + if (!e->bufs) + return AVERROR(ENOMEM); + + /* Create command pool */ ret = vkCreateCommandPool(s->hwctx->act_dev, &cqueue_create, s->hwctx->alloc, &e->pool); if (ret != VK_SUCCESS) { av_log(avctx, AV_LOG_ERROR, "Command pool creation failure: %s\n", ff_vk_ret2str(ret)); - return 1; + return AVERROR_EXTERNAL; } cbuf_create.commandPool = e->pool; - ret = vkAllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, &e->buf); + /* Allocate command buffer */ + ret = vkAllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, e->bufs); if (ret != VK_SUCCESS) { av_log(avctx, AV_LOG_ERROR, "Command buffer alloc failure: %s\n", ff_vk_ret2str(ret)); - return 1; + return AVERROR_EXTERNAL; } - ret = vkCreateFence(s->hwctx->act_dev, &fence_spawn, - s->hwctx->alloc, &e->fence); - if (ret != VK_SUCCESS) { - av_log(avctx, AV_LOG_ERROR, "Failed to create frame fence: %s\n", - ff_vk_ret2str(ret)); - return 1; + for (int i = 0; i < nb_queues; i++) { + FFVkQueueCtx *q = &e->queues[i]; + vkGetDeviceQueue(s->hwctx->act_dev, queue_family, i, &q->queue); } - vkGetDeviceQueue(s->hwctx->act_dev, queue, 0, &e->queue); - *ctx = e; return 0; } +void ff_vk_discard_exec_deps(AVFilterContext *avctx, FFVkExecContext *e) +{ + VulkanFilterContext *s = avctx->priv; + FFVkQueueCtx *q = &e->queues[s->cur_queue_idx]; + + for (int j = 0; j < q->nb_buf_deps; j++) + av_buffer_unref(&q->buf_deps[j]); + q->nb_buf_deps = 0; + + for (int j = 0; j < q->nb_frame_deps; j++) + av_frame_free(&q->frame_deps[j]); + q->nb_frame_deps = 0; + + e->sem_wait_cnt = 0; + e->sem_sig_cnt = 0; +} + int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e) { VkResult ret; + VulkanFilterContext *s = avctx->priv; + FFVkQueueCtx *q = &e->queues[s->cur_queue_idx]; + VkCommandBufferBeginInfo cmd_start = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, }; - e->sem_wait_cnt = 0; - e->sem_sig_cnt = 0; + /* Create the fence and don't wait for it initially */ + if (!q->fence) { + VkFenceCreateInfo fence_spawn = { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + }; + ret = vkCreateFence(s->hwctx->act_dev, &fence_spawn, s->hwctx->alloc, + &q->fence); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to queue frame fence: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } else { + vkWaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); + vkResetFences(s->hwctx->act_dev, 1, &q->fence); + } - ret = vkBeginCommandBuffer(e->buf, &cmd_start); + /* Discard queue dependencies */ + ff_vk_discard_exec_deps(avctx, e); + + ret = vkBeginCommandBuffer(e->bufs[s->cur_queue_idx], &cmd_start); if (ret != VK_SUCCESS) { av_log(avctx, AV_LOG_ERROR, "Failed to start command recoding: %s\n", ff_vk_ret2str(ret)); @@ -386,28 +430,43 @@ int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e) return 0; } +VkCommandBuffer ff_vk_get_exec_buf(AVFilterContext *avctx, FFVkExecContext *e) +{ + VulkanFilterContext *s = avctx->priv; + return e->bufs[s->cur_queue_idx]; +} + int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e, AVFrame *frame, VkPipelineStageFlagBits in_wait_dst_flag) { + AVFrame **dst; + VulkanFilterContext *s = avctx->priv; AVVkFrame *f = (AVVkFrame *)frame->data[0]; + FFVkQueueCtx *q = &e->queues[s->cur_queue_idx]; AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data; int planes = av_pix_fmt_count_planes(fc->sw_format); for (int i = 0; i < planes; i++) { e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc, (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait)); - if (!e->sem_wait) + if (!e->sem_wait) { + ff_vk_discard_exec_deps(avctx, e); return AVERROR(ENOMEM); + } e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc, (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst)); - if (!e->sem_wait_dst) + if (!e->sem_wait_dst) { + ff_vk_discard_exec_deps(avctx, e); return AVERROR(ENOMEM); + } e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc, (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig)); - if (!e->sem_sig) + if (!e->sem_sig) { + ff_vk_discard_exec_deps(avctx, e); return AVERROR(ENOMEM); + } e->sem_wait[e->sem_wait_cnt] = f->sem[i]; e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag; @@ -417,6 +476,21 @@ int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e, e->sem_sig_cnt++; } + dst = av_fast_realloc(q->frame_deps, &q->frame_deps_alloc_size, + (q->nb_frame_deps + 1) * sizeof(*dst)); + if (!dst) { + ff_vk_discard_exec_deps(avctx, e); + return AVERROR(ENOMEM); + } + + q->frame_deps = dst; + q->frame_deps[q->nb_frame_deps] = av_frame_clone(frame); + if (!q->frame_deps[q->nb_frame_deps]) { + ff_vk_discard_exec_deps(avctx, e); + return AVERROR(ENOMEM); + } + q->nb_frame_deps++; + return 0; } @@ -424,11 +498,12 @@ int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e) { VkResult ret; VulkanFilterContext *s = avctx->priv; + FFVkQueueCtx *q = &e->queues[s->cur_queue_idx]; VkSubmitInfo s_info = { .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .commandBufferCount = 1, - .pCommandBuffers = &e->buf, + .pCommandBuffers = &e->bufs[s->cur_queue_idx], .pWaitSemaphores = e->sem_wait, .pWaitDstStageMask = e->sem_wait_dst, @@ -438,21 +513,57 @@ int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e) .signalSemaphoreCount = e->sem_sig_cnt, }; - vkEndCommandBuffer(e->buf); + ret = vkEndCommandBuffer(e->bufs[s->cur_queue_idx]); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to finish command buffer: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } - ret = vkQueueSubmit(e->queue, 1, &s_info, e->fence); + ret = vkQueueSubmit(q->queue, 1, &s_info, q->fence); if (ret != VK_SUCCESS) { av_log(avctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n", ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } - vkWaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX); - vkResetFences(s->hwctx->act_dev, 1, &e->fence); + /* Rotate queues */ + s->cur_queue_idx = (s->cur_queue_idx + 1) % s->queue_count; return 0; } +int ff_vk_add_dep_exec_ctx(AVFilterContext *avctx, FFVkExecContext *e, + AVBufferRef **deps, int nb_deps) +{ + AVBufferRef **dst; + VulkanFilterContext *s = avctx->priv; + FFVkQueueCtx *q = &e->queues[s->cur_queue_idx]; + + if (!deps || !nb_deps) + return 0; + + dst = av_fast_realloc(q->buf_deps, &q->buf_deps_alloc_size, + (q->nb_buf_deps + nb_deps) * sizeof(*dst)); + if (!dst) + goto err; + + q->buf_deps = dst; + + for (int i = 0; i < nb_deps; i++) { + q->buf_deps[q->nb_buf_deps] = deps[i]; + if (!q->buf_deps[q->nb_buf_deps]) + goto err; + q->nb_buf_deps++; + } + + return 0; + +err: + ff_vk_discard_exec_deps(avctx, e); + return AVERROR(ENOMEM); +} + int ff_vk_filter_query_formats(AVFilterContext *avctx) { static const enum AVPixelFormat pixel_formats[] = { @@ -685,9 +796,24 @@ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt) return high ? "rgba16f" : "rgba8"; } -int ff_vk_create_imageview(AVFilterContext *avctx, VkImageView *v, VkImage img, - VkFormat fmt, const VkComponentMapping map) +typedef struct ImageViewCtx { + VkImageView view; +} ImageViewCtx; + +static void destroy_imageview(void *opaque, uint8_t *data) +{ + VulkanFilterContext *s = opaque; + ImageViewCtx *iv = (ImageViewCtx *)data; + vkDestroyImageView(s->hwctx->act_dev, iv->view, s->hwctx->alloc); + av_free(iv); +} + +int ff_vk_create_imageview(AVFilterContext *avctx, FFVkExecContext *e, + VkImageView *v, VkImage img, VkFormat fmt, + const VkComponentMapping map) { + int err; + AVBufferRef *buf; VulkanFilterContext *s = avctx->priv; VkImageViewCreateInfo imgview_spawn = { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, @@ -705,24 +831,32 @@ int ff_vk_create_imageview(AVFilterContext *avctx, VkImageView *v, VkImage img, }, }; + ImageViewCtx *iv = av_mallocz(sizeof(*iv)); + VkResult ret = vkCreateImageView(s->hwctx->act_dev, &imgview_spawn, - s->hwctx->alloc, v); + s->hwctx->alloc, &iv->view); if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n", + av_log(avctx, AV_LOG_ERROR, "Failed to create imageview: %s\n", ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } - return 0; -} + buf = av_buffer_create((uint8_t *)iv, sizeof(*iv), destroy_imageview, s, 0); + if (!buf) { + destroy_imageview(s, (uint8_t *)iv); + return AVERROR(ENOMEM); + } -void ff_vk_destroy_imageview(AVFilterContext *avctx, VkImageView *v) -{ - VulkanFilterContext *s = avctx->priv; - if (v && *v) { - vkDestroyImageView(s->hwctx->act_dev, *v, s->hwctx->alloc); - *v = NULL; + /* Add to queue dependencies */ + err = ff_vk_add_dep_exec_ctx(avctx, e, &buf, 1); + if (err) { + av_buffer_unref(&buf); + return err; } + + *v = iv->view; + + return 0; } FN_CREATING(VulkanPipeline, SPIRVShader, shader, shaders, shaders_num) @@ -870,11 +1004,11 @@ int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl, goto print; pl->desc_layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout), - pl->descriptor_sets_num + 1); + pl->desc_layout_num + 1); if (!pl->desc_layout) return AVERROR(ENOMEM); - layout = &pl->desc_layout[pl->descriptor_sets_num]; + layout = &pl->desc_layout[pl->desc_layout_num]; memset(layout, 0, sizeof(*layout)); { /* Create descriptor set layout descriptions */ @@ -946,11 +1080,11 @@ int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl, pl->desc_template_info = av_realloc_array(pl->desc_template_info, sizeof(*pl->desc_template_info), - pl->descriptor_sets_num + 1); + pl->desc_layout_num + 1); if (!pl->desc_template_info) return AVERROR(ENOMEM); - dt = &pl->desc_template_info[pl->descriptor_sets_num]; + dt = &pl->desc_template_info[pl->desc_layout_num]; memset(dt, 0, sizeof(*dt)); dt->sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO; @@ -960,13 +1094,13 @@ int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl, dt->descriptorUpdateEntryCount = num; } - pl->descriptor_sets_num++; + pl->desc_layout_num++; print: /* Write shader info */ for (int i = 0; i < num; i++) { const struct descriptor_props *prop = &descriptor_props[desc[i].type]; - GLSLA("layout (set = %i, binding = %i", pl->descriptor_sets_num - 1, i); + GLSLA("layout (set = %i, binding = %i", pl->desc_layout_num - 1, i); if (desc[i].mem_layout) GLSLA(", %s", desc[i].mem_layout); @@ -1004,15 +1138,17 @@ void ff_vk_update_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl, VulkanFilterContext *s = avctx->priv; vkUpdateDescriptorSetWithTemplate(s->hwctx->act_dev, - pl->desc_set[set_id], - pl->desc_template[set_id], s); + pl->desc_set[set_id * s->cur_queue_idx], + pl->desc_template[set_id], + s); } void ff_vk_update_push_exec(AVFilterContext *avctx, FFVkExecContext *e, VkShaderStageFlagBits stage, int offset, size_t size, void *src) { - vkCmdPushConstants(e->buf, e->bound_pl->pipeline_layout, + VulkanFilterContext *s = avctx->priv; + vkCmdPushConstants(e->bufs[s->cur_queue_idx], e->bound_pl->pipeline_layout, stage, offset, size, src); } @@ -1021,6 +1157,10 @@ int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl) VkResult ret; VulkanFilterContext *s = avctx->priv; + int queues_count = 1; + + pl->descriptor_sets_num = pl->desc_layout_num * queues_count; + { /* Init descriptor set pool */ VkDescriptorPoolCreateInfo pool_create_info = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, @@ -1063,7 +1203,7 @@ int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl) { /* Finally create the pipeline layout */ VkPipelineLayoutCreateInfo spawn_pipeline_layout = { .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .setLayoutCount = pl->descriptor_sets_num, + .setLayoutCount = pl->desc_layout_num, .pSetLayouts = pl->desc_layout, .pushConstantRangeCount = pl->push_consts_num, .pPushConstantRanges = pl->push_consts, @@ -1089,7 +1229,7 @@ int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl) /* Create update templates for the descriptor sets */ for (int i = 0; i < pl->descriptor_sets_num; i++) { - desc_template_info = &pl->desc_template_info[i]; + desc_template_info = &pl->desc_template_info[i % pl->desc_layout_num]; desc_template_info->pipelineLayout = pl->pipeline_layout; ret = vkCreateDescriptorUpdateTemplate(s->hwctx->act_dev, desc_template_info, @@ -1153,27 +1293,53 @@ int ff_vk_init_compute_pipeline(AVFilterContext *avctx, VulkanPipeline *pl) void ff_vk_bind_pipeline_exec(AVFilterContext *avctx, FFVkExecContext *e, VulkanPipeline *pl) { - vkCmdBindPipeline(e->buf, pl->bind_point, pl->pipeline); + VulkanFilterContext *s = avctx->priv; + + vkCmdBindPipeline(e->bufs[s->cur_queue_idx], pl->bind_point, pl->pipeline); - vkCmdBindDescriptorSets(e->buf, pl->bind_point, pl->pipeline_layout, 0, - pl->descriptor_sets_num, pl->desc_set, 0, 0); + vkCmdBindDescriptorSets(e->bufs[s->cur_queue_idx], pl->bind_point, + pl->pipeline_layout, 0, pl->descriptor_sets_num, + pl->desc_set, 0, 0); e->bound_pl = pl; } static void free_exec_ctx(VulkanFilterContext *s, FFVkExecContext *e) { - vkDestroyFence(s->hwctx->act_dev, e->fence, s->hwctx->alloc); + /* Make sure all queues have finished executing */ + for (int i = 0; i < s->queue_count; i++) { + FFVkQueueCtx *q = &e->queues[i]; - if (e->buf != VK_NULL_HANDLE) - vkFreeCommandBuffers(s->hwctx->act_dev, e->pool, 1, &e->buf); - if (e->pool != VK_NULL_HANDLE) - vkDestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc); + if (q->fence) { + vkWaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); + vkResetFences(s->hwctx->act_dev, 1, &q->fence); + } + + /* Free the fence */ + if (q->fence) + vkDestroyFence(s->hwctx->act_dev, q->fence, s->hwctx->alloc); - av_free(e->sem_wait); - av_free(e->sem_wait_dst); - av_free(e->sem_sig); + /* Free buffer dependencies */ + for (int j = 0; j < q->nb_buf_deps; j++) + av_buffer_unref(&q->buf_deps[j]); + av_free(q->buf_deps); + /* Free frame dependencies */ + for (int j = 0; j < q->nb_frame_deps; j++) + av_frame_free(&q->frame_deps[j]); + av_free(q->frame_deps); + } + + if (e->bufs) + vkFreeCommandBuffers(s->hwctx->act_dev, e->pool, s->queue_count, e->bufs); + if (e->pool) + vkDestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc); + + av_freep(&e->bufs); + av_freep(&e->queues); + av_freep(&e->sem_sig); + av_freep(&e->sem_wait); + av_freep(&e->sem_wait_dst); av_free(e); } @@ -1191,7 +1357,7 @@ static void free_pipeline(VulkanFilterContext *s, VulkanPipeline *pl) vkDestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout, s->hwctx->alloc); - for (int i = 0; i < pl->descriptor_sets_num; i++) { + for (int i = 0; i < pl->desc_layout_num; i++) { if (pl->desc_template && pl->desc_template[i]) vkDestroyDescriptorUpdateTemplate(s->hwctx->act_dev, pl->desc_template[i], s->hwctx->alloc); @@ -1229,6 +1395,10 @@ void ff_vk_filter_uninit(AVFilterContext *avctx) glslang_uninit(); + for (int i = 0; i < s->exec_ctx_num; i++) + free_exec_ctx(s, s->exec_ctx[i]); + av_freep(&s->exec_ctx); + for (int i = 0; i < s->samplers_num; i++) { vkDestroySampler(s->hwctx->act_dev, *s->samplers[i], s->hwctx->alloc); av_free(s->samplers[i]); @@ -1239,10 +1409,6 @@ void ff_vk_filter_uninit(AVFilterContext *avctx) free_pipeline(s, s->pipelines[i]); av_freep(&s->pipelines); - for (int i = 0; i < s->exec_ctx_num; i++) - free_exec_ctx(s, s->exec_ctx[i]); - av_freep(&s->exec_ctx); - av_freep(&s->scratch); s->scratch_size = 0; diff --git a/libavfilter/vulkan.h b/libavfilter/vulkan.h index 30a64ce933..f9a4dc5839 100644 --- a/libavfilter/vulkan.h +++ b/libavfilter/vulkan.h @@ -49,6 +49,17 @@ goto fail; \ } while (0) +/* Gets the queues count for a single queue family */ +#define GET_QUEUE_COUNT(hwctx, graph, comp, tx) ( \ + graph ? hwctx->nb_graphics_queues : \ + comp ? (hwctx->nb_comp_queues ? \ + hwctx->nb_comp_queues : hwctx->nb_graphics_queues) : \ + tx ? (hwctx->nb_tx_queues ? hwctx->nb_tx_queues : \ + (hwctx->nb_comp_queues ? \ + hwctx->nb_comp_queues : hwctx->nb_graphics_queues)) : \ + 0 \ +) + /* Useful for attaching immutable samplers to arrays */ #define DUP_SAMPLER_ARRAY4(x) (VkSampler []){ x, x, x, x, } @@ -98,6 +109,7 @@ typedef struct VulkanPipeline { VkDescriptorPool desc_pool; VkDescriptorSet *desc_set; VkDescriptorUpdateTemplate *desc_template; + int desc_layout_num; int descriptor_sets_num; int pool_size_desc_num; @@ -106,11 +118,29 @@ typedef struct VulkanPipeline { VkDescriptorPoolSize *pool_size_desc; } VulkanPipeline; +typedef struct FFVkQueueCtx { + VkFence fence; + VkQueue queue; + + /* Buffer dependencies */ + AVBufferRef **buf_deps; + int nb_buf_deps; + int buf_deps_alloc_size; + + /* Frame dependencies */ + AVFrame **frame_deps; + int nb_frame_deps; + int frame_deps_alloc_size; +} FFVkQueueCtx; + typedef struct FFVkExecContext { VkCommandPool pool; - VkCommandBuffer buf; - VkQueue queue; - VkFence fence; + VkCommandBuffer *bufs; + FFVkQueueCtx *queues; + + AVBufferRef ***deps; + int *nb_deps; + int *dep_alloc_size; VulkanPipeline *bound_pl; @@ -134,6 +164,11 @@ typedef struct VulkanFilterContext { AVHWDeviceContext *device; AVVulkanDeviceContext *hwctx; + /* State - mirrored with the exec ctx */ + int cur_queue_idx; + int queue_family_idx; + int queue_count; + /* Properties */ int output_width; int output_height; @@ -192,15 +227,12 @@ VkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords, /** * Create an imageview. + * Guaranteed to remain alive until the queue submission has finished executing, + * and will be destroyed after that. */ -int ff_vk_create_imageview(AVFilterContext *avctx, VkImageView *v, VkImage img, - VkFormat fmt, const VkComponentMapping map); - -/** - * Destroy an imageview. Command buffer must have completed executing, which - * ff_vk_submit_exec_queue() will ensure - */ -void ff_vk_destroy_imageview(AVFilterContext *avctx, VkImageView *v); +int ff_vk_create_imageview(AVFilterContext *avctx, FFVkExecContext *e, + VkImageView *v, VkImage img, VkFormat fmt, + const VkComponentMapping map); /** * Define a push constant for a given stage into a pipeline. @@ -264,7 +296,7 @@ void ff_vk_update_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl, * Init an execution context for command recording and queue submission. * WIll be auto-freed on uninit. */ -int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx, int queue); +int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx); /** * Begin recording to the command buffer. Previous execution must have been @@ -288,7 +320,23 @@ void ff_vk_update_push_exec(AVFilterContext *avctx, FFVkExecContext *e, size_t size, void *src); /** - * Adds a frame as a queue dependency. This manages semaphore signalling. + * Gets the command buffer to use for this submission from the exe context. + */ +VkCommandBuffer ff_vk_get_exec_buf(AVFilterContext *avctx, FFVkExecContext *e); + +/** + * Adds a generic AVBufferRef as a queue depenency. + */ +int ff_vk_add_dep_exec_ctx(AVFilterContext *avctx, FFVkExecContext *e, + AVBufferRef **deps, int nb_deps); + +/** + * Discards all queue dependencies + */ +void ff_vk_discard_exec_deps(AVFilterContext *avctx, FFVkExecContext *e); + +/** + * Adds a frame as a queue dependency. This also manages semaphore signalling. * Must be called before submission. */ int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e, -- 2.26.2
>From 7efe44ca5b5fc3baf8ca9c5eae37746eb4f09d3e Mon Sep 17 00:00:00 2001 From: Lynne <d...@lynne.ee> Date: Thu, 14 May 2020 21:52:53 +0100 Subject: [PATCH 09/10] lavfi/vulkan: fix 2 minor memory leaks --- libavfilter/vulkan.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/libavfilter/vulkan.c b/libavfilter/vulkan.c index ff76ab15e9..ccf71cb7cd 100644 --- a/libavfilter/vulkan.c +++ b/libavfilter/vulkan.c @@ -822,6 +822,7 @@ int ff_vk_compile_shader(AVFilterContext *avctx, SPIRVShader *shd, &shd->shader.module); /* Free the GLSlangResult struct */ + av_free(res->data); av_free(res); if (ret != VK_SUCCESS) { @@ -1228,8 +1229,10 @@ void ff_vk_filter_uninit(AVFilterContext *avctx) glslang_uninit(); - for (int i = 0; i < s->samplers_num; i++) + for (int i = 0; i < s->samplers_num; i++) { vkDestroySampler(s->hwctx->act_dev, *s->samplers[i], s->hwctx->alloc); + av_free(s->samplers[i]); + } av_freep(&s->samplers); for (int i = 0; i < s->pipelines_num; i++) -- 2.26.2
>From cb805554705fb9bbea8c1f48cfb043da19ccb5cb Mon Sep 17 00:00:00 2001 From: Lynne <d...@lynne.ee> Date: Fri, 15 May 2020 13:21:10 +0100 Subject: [PATCH 08/10] hwcontext_vulkan: move physical device feature discovery to device_init Otherwise custom vulkan device contexts won't work. --- libavutil/hwcontext_vulkan.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index c05edbf1d4..8cde0278a6 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -523,7 +523,6 @@ static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select) VkPhysicalDevice *devices = NULL; VkPhysicalDeviceIDProperties *idp = NULL; VkPhysicalDeviceProperties2 *prop = NULL; - VulkanDevicePriv *p = ctx->internal->priv; AVVulkanDeviceContext *hwctx = ctx->hwctx; ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, NULL); @@ -627,10 +626,9 @@ static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select) } end: - if (choice > -1) { - p->dev_is_nvidia = (prop[choice].properties.vendorID == 0x10de); + if (choice > -1) hwctx->phys_dev = devices[choice]; - } + av_free(devices); av_free(prop); av_free(idp); @@ -998,16 +996,6 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, if ((err = find_device(ctx, dev_select))) goto end; - vkGetPhysicalDeviceProperties(hwctx->phys_dev, &p->props); - av_log(ctx, AV_LOG_VERBOSE, "Using device: %s\n", p->props.deviceName); - av_log(ctx, AV_LOG_VERBOSE, "Alignments:\n"); - av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyOffsetAlignment: %li\n", - p->props.limits.optimalBufferCopyOffsetAlignment); - av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyRowPitchAlignment: %li\n", - p->props.limits.optimalBufferCopyRowPitchAlignment); - av_log(ctx, AV_LOG_VERBOSE, " minMemoryMapAlignment: %li\n", - p->props.limits.minMemoryMapAlignment); - vkGetPhysicalDeviceFeatures(hwctx->phys_dev, &dev_features); #define COPY_FEATURE(DST, NAME) (DST).features.NAME = dev_features.NAME; COPY_FEATURE(hwctx->device_features, shaderImageGatherExtended) @@ -1063,17 +1051,31 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) AVVulkanDeviceContext *hwctx = ctx->hwctx; VulkanDevicePriv *p = ctx->internal->priv; + vkGetPhysicalDeviceProperties(hwctx->phys_dev, &p->props); + av_log(ctx, AV_LOG_VERBOSE, "Using device: %s\n", p->props.deviceName); + av_log(ctx, AV_LOG_VERBOSE, "Alignments:\n"); + av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyOffsetAlignment: %li\n", + p->props.limits.optimalBufferCopyOffsetAlignment); + av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyRowPitchAlignment: %li\n", + p->props.limits.optimalBufferCopyRowPitchAlignment); + av_log(ctx, AV_LOG_VERBOSE, " minMemoryMapAlignment: %li\n", + p->props.limits.minMemoryMapAlignment); + /* Set device extension flags */ for (int i = 0; i < hwctx->nb_enabled_dev_extensions; i++) { for (int j = 0; j < FF_ARRAY_ELEMS(optional_device_exts); j++) { if (!strcmp(hwctx->enabled_dev_extensions[i], optional_device_exts[j].name)) { + av_log(ctx, AV_LOG_VERBOSE, "Using device extension %s\n", + hwctx->enabled_dev_extensions[i]); p->extensions |= optional_device_exts[j].flag; break; } } } + p->dev_is_nvidia = (p->props.vendorID == 0x10de); + vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL); if (!queue_num) { av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n"); -- 2.26.2
>From aed28c39ef16e7fe60873755f55f86f42a254f20 Mon Sep 17 00:00:00 2001 From: Lynne <d...@lynne.ee> Date: Fri, 15 May 2020 00:01:08 +0100 Subject: [PATCH 07/10] hwcontext_vulkan: split uploading and downloading contexts This allows us to speed up only-uploading or only-downloading use cases. --- libavutil/hwcontext_vulkan.c | 123 +++++++++++++++++++---------------- 1 file changed, 66 insertions(+), 57 deletions(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 1412ca9f83..c05edbf1d4 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -72,10 +72,6 @@ typedef struct VulkanDevicePriv { /* Debug callback */ VkDebugUtilsMessengerEXT debug_ctx; - /* Image transfers */ - VulkanExecCtx upload_ctx; - VulkanExecCtx download_ctx; - /* Extensions */ uint64_t extensions; @@ -89,6 +85,10 @@ typedef struct VulkanDevicePriv { typedef struct VulkanFramesPriv { /* Image conversions */ VulkanExecCtx conv_ctx; + + /* Image transfers */ + VulkanExecCtx upload_ctx; + VulkanExecCtx download_ctx; } VulkanFramesPriv; typedef struct AVVkFrameInternal { @@ -732,11 +732,11 @@ fail: return AVERROR(ENOMEM); } -static int create_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd, +static int create_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd, int queue_family_index, int num_queues) { VkResult ret; - AVVulkanDeviceContext *hwctx = ctx->hwctx; + AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; VkCommandPoolCreateInfo cqueue_create = { .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, @@ -763,7 +763,7 @@ static int create_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd, ret = vkCreateCommandPool(hwctx->act_dev, &cqueue_create, hwctx->alloc, &cmd->pool); if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Command pool creation failure: %s\n", + av_log(hwfc, AV_LOG_ERROR, "Command pool creation failure: %s\n", vk_ret2str(ret)); return AVERROR_EXTERNAL; } @@ -773,7 +773,7 @@ static int create_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd, /* Allocate command buffer */ ret = vkAllocateCommandBuffers(hwctx->act_dev, &cbuf_create, cmd->bufs); if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Command buffer alloc failure: %s\n", + av_log(hwfc, AV_LOG_ERROR, "Command buffer alloc failure: %s\n", vk_ret2str(ret)); return AVERROR_EXTERNAL; } @@ -786,9 +786,9 @@ static int create_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd, return 0; } -static void free_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd) +static void free_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd) { - AVVulkanDeviceContext *hwctx = ctx->hwctx; + AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; /* Make sure all queues have finished executing */ for (int i = 0; i < cmd->nb_queues; i++) { @@ -818,12 +818,12 @@ static void free_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd) av_freep(&cmd->queues); } -static VkCommandBuffer get_buf_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd) +static VkCommandBuffer get_buf_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd) { return cmd->bufs[cmd->cur_queue_idx]; } -static void unref_exec_ctx_deps(AVHWDeviceContext *ctx, VulkanExecCtx *cmd) +static void unref_exec_ctx_deps(AVHWFramesContext *hwfc, VulkanExecCtx *cmd) { VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx]; @@ -832,10 +832,10 @@ static void unref_exec_ctx_deps(AVHWDeviceContext *ctx, VulkanExecCtx *cmd) q->nb_buf_deps = 0; } -static int wait_start_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd) +static int wait_start_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd) { VkResult ret; - AVVulkanDeviceContext *hwctx = ctx->hwctx; + AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx]; VkCommandBufferBeginInfo cmd_start = { @@ -851,7 +851,7 @@ static int wait_start_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd) ret = vkCreateFence(hwctx->act_dev, &fence_spawn, hwctx->alloc, &q->fence); if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Failed to queue frame fence: %s\n", + av_log(hwfc, AV_LOG_ERROR, "Failed to queue frame fence: %s\n", vk_ret2str(ret)); return AVERROR_EXTERNAL; } @@ -861,11 +861,11 @@ static int wait_start_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd) } /* Discard queue dependencies */ - unref_exec_ctx_deps(ctx, cmd); + unref_exec_ctx_deps(hwfc, cmd); ret = vkBeginCommandBuffer(cmd->bufs[cmd->cur_queue_idx], &cmd_start); if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Unable to init command buffer: %s\n", + av_log(hwfc, AV_LOG_ERROR, "Unable to init command buffer: %s\n", vk_ret2str(ret)); return AVERROR_EXTERNAL; } @@ -873,7 +873,7 @@ static int wait_start_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd) return 0; } -static int add_buf_dep_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd, +static int add_buf_dep_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd, AVBufferRef * const *deps, int nb_deps) { AVBufferRef **dst; @@ -899,11 +899,11 @@ static int add_buf_dep_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd, return 0; err: - unref_exec_ctx_deps(ctx, cmd); + unref_exec_ctx_deps(hwfc, cmd); return AVERROR(ENOMEM); } -static int submit_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd, +static int submit_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd, VkSubmitInfo *s_info, int synchronous) { VkResult ret; @@ -911,9 +911,9 @@ static int submit_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd, ret = vkEndCommandBuffer(cmd->bufs[cmd->cur_queue_idx]); if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Unable to finish command buffer: %s\n", + av_log(hwfc, AV_LOG_ERROR, "Unable to finish command buffer: %s\n", vk_ret2str(ret)); - unref_exec_ctx_deps(ctx, cmd); + unref_exec_ctx_deps(hwfc, cmd); return AVERROR_EXTERNAL; } @@ -922,17 +922,17 @@ static int submit_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd, ret = vkQueueSubmit(q->queue, 1, s_info, q->fence); if (ret != VK_SUCCESS) { - unref_exec_ctx_deps(ctx, cmd); + unref_exec_ctx_deps(hwfc, cmd); return AVERROR_EXTERNAL; } q->was_synchronous = synchronous; if (synchronous) { - AVVulkanDeviceContext *hwctx = ctx->hwctx; + AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; vkWaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); vkResetFences(hwctx->act_dev, 1, &q->fence); - unref_exec_ctx_deps(ctx, cmd); + unref_exec_ctx_deps(hwfc, cmd); } else { /* Rotate queues */ cmd->cur_queue_idx = (cmd->cur_queue_idx + 1) % cmd->nb_queues; } @@ -945,8 +945,6 @@ static void vulkan_device_free(AVHWDeviceContext *ctx) VulkanDevicePriv *p = ctx->internal->priv; AVVulkanDeviceContext *hwctx = ctx->hwctx; - free_exec_ctx(ctx, &p->cmd); - vkDestroyDevice(hwctx->act_dev, hwctx->alloc); if (p->debug_ctx) { @@ -1061,7 +1059,6 @@ end: static int vulkan_device_init(AVHWDeviceContext *ctx) { - int err; uint32_t queue_num; AVVulkanDeviceContext *hwctx = ctx->hwctx; VulkanDevicePriv *p = ctx->internal->priv; @@ -1104,12 +1101,6 @@ if (n >= queue_num) { (hwctx->queue_family_comp_index != hwctx->queue_family_tx_index)) p->qfs[p->num_qfs++] = hwctx->queue_family_comp_index; - /* Create exec context - if there's something invalid this will error out */ - err = create_exec_ctx(ctx, &p->cmd, hwctx->queue_family_tx_index, - GET_QUEUE_COUNT(hwctx, 0, 0, 1)); - if (err) - return err; - /* Get device capabilities */ vkGetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops); @@ -1429,7 +1420,6 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, uint32_t dst_qf; VkImageLayout new_layout; VkAccessFlags new_access; - AVHWDeviceContext *ctx = hwfc->device_ctx; const int planes = av_pix_fmt_count_planes(hwfc->sw_format); VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 }; @@ -1465,7 +1455,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, break; } - if ((err = wait_start_exec_ctx(ctx, ectx))) + if ((err = wait_start_exec_ctx(hwfc, ectx))) return err; /* Change the image layout to something more optimal for writes. @@ -1488,12 +1478,12 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, frame->access[i] = img_bar[i].dstAccessMask; } - vkCmdPipelineBarrier(get_buf_exec_ctx(ctx, ectx), + vkCmdPipelineBarrier(get_buf_exec_ctx(hwfc, ectx), VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 0, NULL, planes, img_bar); - return submit_exec_ctx(ctx, ectx, &s_info, 0); + return submit_exec_ctx(hwfc, ectx, &s_info, 0); } static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, @@ -1685,7 +1675,9 @@ static void vulkan_frames_uninit(AVHWFramesContext *hwfc) { VulkanFramesPriv *fp = hwfc->internal->priv; - free_exec_ctx(hwfc->device_ctx, &fp->conv_ctx); + free_exec_ctx(hwfc, &fp->conv_ctx); + free_exec_ctx(hwfc, &fp->upload_ctx); + free_exec_ctx(hwfc, &fp->download_ctx); } static int vulkan_frames_init(AVHWFramesContext *hwfc) @@ -1704,19 +1696,28 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc) if (!hwctx->usage) hwctx->usage = DEFAULT_USAGE_FLAGS; - err = create_exec_ctx(hwfc->device_ctx, &fp->conv_ctx, + err = create_exec_ctx(hwfc, &fp->conv_ctx, dev_hwctx->queue_family_comp_index, GET_QUEUE_COUNT(dev_hwctx, 0, 1, 0)); if (err) - return err; + goto fail; + + err = create_exec_ctx(hwfc, &fp->upload_ctx, + dev_hwctx->queue_family_tx_index, + GET_QUEUE_COUNT(dev_hwctx, 0, 0, 1)); + if (err) + goto fail; + + err = create_exec_ctx(hwfc, &fp->download_ctx, + dev_hwctx->queue_family_tx_index, 1); + if (err) + goto fail; /* Test to see if allocation will fail */ err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, hwctx->create_pnext); - if (err) { - free_exec_ctx(hwfc->device_ctx, &fp->conv_ctx); - return err; - } + if (err) + goto fail; vulkan_frame_free(hwfc, (uint8_t *)f); @@ -1727,12 +1728,19 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc) hwfc, vulkan_pool_alloc, NULL); if (!hwfc->internal->pool_internal) { - free_exec_ctx(hwfc->device_ctx, &fp->conv_ctx); - return AVERROR(ENOMEM); + err = AVERROR(ENOMEM); + goto fail; } } return 0; + +fail: + free_exec_ctx(hwfc, &fp->conv_ctx); + free_exec_ctx(hwfc, &fp->upload_ctx); + free_exec_ctx(hwfc, &fp->download_ctx); + + return err; } static int vulkan_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame) @@ -2774,13 +2782,13 @@ static int unmap_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs, return err; } -static int transfer_image_buf(AVHWDeviceContext *ctx, const AVFrame *f, +static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f, AVBufferRef **bufs, const int *buf_stride, int w, int h, enum AVPixelFormat pix_fmt, int to_buf) { int err; AVVkFrame *frame = (AVVkFrame *)f->data[0]; - VulkanDevicePriv *s = ctx->internal->priv; + VulkanFramesPriv *fp = hwfc->internal->priv; int bar_num = 0; VkPipelineStageFlagBits sem_wait_dst[AV_NUM_DATA_POINTERS]; @@ -2789,7 +2797,8 @@ static int transfer_image_buf(AVHWDeviceContext *ctx, const AVFrame *f, const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 }; - VkCommandBuffer cmd_buf = get_buf_exec_ctx(ctx, &s->cmd); + VulkanExecCtx *ectx = to_buf ? &fp->download_ctx : &fp->upload_ctx; + VkCommandBuffer cmd_buf = get_buf_exec_ctx(hwfc, ectx); VkSubmitInfo s_info = { .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, @@ -2800,7 +2809,7 @@ static int transfer_image_buf(AVHWDeviceContext *ctx, const AVFrame *f, .waitSemaphoreCount = planes, }; - if ((err = wait_start_exec_ctx(ctx, &s->cmd))) + if ((err = wait_start_exec_ctx(hwfc, ectx))) return err; /* Change the image layout to something more optimal for transfers */ @@ -2877,14 +2886,14 @@ static int transfer_image_buf(AVHWDeviceContext *ctx, const AVFrame *f, for (int ref = 0; ref < AV_NUM_DATA_POINTERS; ref++) { if (!f->buf[ref]) break; - if ((err = add_buf_dep_exec_ctx(hwfc, &s->cmd, &f->buf[ref], 1))) + if ((err = add_buf_dep_exec_ctx(hwfc, ectx, &f->buf[ref], 1))) return err; } - if (ref && (err = add_buf_dep_exec_ctx(hwfc, &s->cmd, bufs, planes))) + if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, planes))) return err; - return submit_exec_ctx(hwfc, &s->cmd, &s_info, !ref); + return submit_exec_ctx(hwfc, ectx, &s_info, !ref); } else { - return submit_exec_ctx(hwfc, &s->cmd, &s_info, 1); + return submit_exec_ctx(hwfc, ectx, &s_info, 1); } } @@ -2953,7 +2962,7 @@ static int vulkan_transfer_data_from_mem(AVHWFramesContext *hwfc, AVFrame *dst, goto end; /* Copy buffers to image */ - err = transfer_image_buf(dev_ctx, dst, bufs, tmp.linesize, + err = transfer_image_buf(hwfc, dst, bufs, tmp.linesize, src->width, src->height, src->format, 0); end: @@ -3098,7 +3107,7 @@ static int vulkan_transfer_data_to_mem(AVHWFramesContext *hwfc, AVFrame *dst, } /* Copy image to buffer */ - if ((err = transfer_image_buf(dev_ctx, src, bufs, tmp.linesize, + if ((err = transfer_image_buf(hwfc, src, bufs, tmp.linesize, dst->width, dst->height, dst->format, 1))) goto end; -- 2.26.2
>From bb1d5170fc6b7ecc6bcb845541c1428d96c4e0e8 Mon Sep 17 00:00:00 2001 From: Lynne <d...@lynne.ee> Date: Fri, 15 May 2020 00:21:51 +0100 Subject: [PATCH 06/10] hwcontext_vulkan: set usage for DRM imports to the frames context usage They're nothing special, and there's no reason they should always use the default flags. --- libavutil/hwcontext_vulkan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 8d7efae070..1412ca9f83 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -1942,6 +1942,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f AVVulkanDeviceContext *hwctx = ctx->hwctx; VulkanDevicePriv *p = ctx->internal->priv; VulkanFramesPriv *fp = hwfc->internal->priv; + AVVulkanFramesContext *frames_hwctx = hwfc->hwctx; const AVPixFmtDescriptor *fmt_desc = av_pix_fmt_desc_get(hwfc->sw_format); const int has_modifiers = p->extensions & EXT_DRM_MODIFIER_FLAGS; VkSubresourceLayout plane_data[AV_NUM_DATA_POINTERS] = { 0 }; @@ -2041,7 +2042,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f .flags = VK_IMAGE_CREATE_ALIAS_BIT, .tiling = f->tiling, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */ - .usage = DEFAULT_USAGE_FLAGS, + .usage = frames_hwctx->usage, .samples = VK_SAMPLE_COUNT_1_BIT, .pQueueFamilyIndices = p->qfs, .queueFamilyIndexCount = p->num_qfs, -- 2.26.2
>From 10938b3fdc0951f0314a64a61a588dd87fbcfe04 Mon Sep 17 00:00:00 2001 From: Lynne <d...@lynne.ee> Date: Fri, 15 May 2020 00:16:58 +0100 Subject: [PATCH 05/10] hwcontext_vulkan: do not OR the user-specified usage with our default flags Some users may need special formats that aren't available when the STORAGE flag bit is set, which would result in allocations failing. --- libavutil/hwcontext_vulkan.c | 3 ++- libavutil/hwcontext_vulkan.h | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 1587a7faba..8d7efae070 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -1701,7 +1701,8 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc) hwctx->tiling = hwctx->tiling ? hwctx->tiling : p->use_linear_images ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; - hwctx->usage |= DEFAULT_USAGE_FLAGS; + if (!hwctx->usage) + hwctx->usage = DEFAULT_USAGE_FLAGS; err = create_exec_ctx(hwfc->device_ctx, &fp->conv_ctx, dev_hwctx->queue_family_comp_index, diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h index bf564fa04b..14b7e548a6 100644 --- a/libavutil/hwcontext_vulkan.h +++ b/libavutil/hwcontext_vulkan.h @@ -112,8 +112,8 @@ typedef struct AVVulkanFramesContext { */ VkImageTiling tiling; /** - * Defines extra usage of output frames. This is bitwise OR'd with the - * standard usage flags (SAMPLED, STORAGE, TRANSFER_SRC and TRANSFER_DST). + * Defines extra usage of output frames. If left as 0, the following bits + * are set: TRANSFER_SRC, TRANSFER_DST. SAMPLED and STORAGE. */ VkImageUsageFlagBits usage; /** -- 2.26.2
>From 129238df128c949b01334ab6f72421933ce257b0 Mon Sep 17 00:00:00 2001 From: Lynne <d...@lynne.ee> Date: Thu, 14 May 2020 23:37:14 +0100 Subject: [PATCH 04/10] hwcontext_vulkan: actually use the frames exec context for prep/import/export This was never actually used, likely due to confusion, as the device context also had one used for uploads and downloads. Also, since we're only using it for very quick image barriers (which are practically free on all hardware), use the compute queue instead of the transfer queue. --- libavutil/hwcontext_vulkan.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index d4cbcbee05..1587a7faba 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -87,7 +87,8 @@ typedef struct VulkanDevicePriv { } VulkanDevicePriv; typedef struct VulkanFramesPriv { - VulkanExecCtx cmd; + /* Image conversions */ + VulkanExecCtx conv_ctx; } VulkanFramesPriv; typedef struct AVVkFrameInternal { @@ -1632,6 +1633,7 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, int size) AVHWFramesContext *hwfc = opaque; AVVulkanFramesContext *hwctx = hwfc->hwctx; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + VulkanFramesPriv *fp = hwfc->internal->priv; VkExportMemoryAllocateInfo eminfo[AV_NUM_DATA_POINTERS]; VkExternalMemoryHandleTypeFlags e = 0x0; @@ -1663,7 +1665,7 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, int size) if (err) goto fail; - err = prepare_frame(hwfc, &p->cmd, f, PREP_MODE_WRITE); + err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_WRITE); if (err) goto fail; @@ -1683,7 +1685,7 @@ static void vulkan_frames_uninit(AVHWFramesContext *hwfc) { VulkanFramesPriv *fp = hwfc->internal->priv; - free_exec_ctx(hwfc->device_ctx, &fp->cmd); + free_exec_ctx(hwfc->device_ctx, &fp->conv_ctx); } static int vulkan_frames_init(AVHWFramesContext *hwfc) @@ -1701,9 +1703,9 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc) hwctx->usage |= DEFAULT_USAGE_FLAGS; - err = create_exec_ctx(hwfc->device_ctx, &fp->cmd, - dev_hwctx->queue_family_tx_index, - GET_QUEUE_COUNT(dev_hwctx, 0, 0, 1)); + err = create_exec_ctx(hwfc->device_ctx, &fp->conv_ctx, + dev_hwctx->queue_family_comp_index, + GET_QUEUE_COUNT(dev_hwctx, 0, 1, 0)); if (err) return err; @@ -1711,7 +1713,7 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc) err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, hwctx->create_pnext); if (err) { - free_exec_ctx(hwfc->device_ctx, &p->cmd); + free_exec_ctx(hwfc->device_ctx, &fp->conv_ctx); return err; } @@ -1724,7 +1726,7 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc) hwfc, vulkan_pool_alloc, NULL); if (!hwfc->internal->pool_internal) { - free_exec_ctx(hwfc->device_ctx, &p->cmd); + free_exec_ctx(hwfc->device_ctx, &fp->conv_ctx); return AVERROR(ENOMEM); } } @@ -1938,6 +1940,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f AVHWDeviceContext *ctx = hwfc->device_ctx; AVVulkanDeviceContext *hwctx = ctx->hwctx; VulkanDevicePriv *p = ctx->internal->priv; + VulkanFramesPriv *fp = hwfc->internal->priv; const AVPixFmtDescriptor *fmt_desc = av_pix_fmt_desc_get(hwfc->sw_format); const int has_modifiers = p->extensions & EXT_DRM_MODIFIER_FLAGS; VkSubresourceLayout plane_data[AV_NUM_DATA_POINTERS] = { 0 }; @@ -2107,7 +2110,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f /* NOTE: This is completely uneccesary and unneeded once we can import * semaphores from DRM. Otherwise we have to activate the semaphores. * We're reusing the exec context that's also used for uploads/downloads. */ - err = prepare_frame(hwfc, &p->cmd, f, PREP_MODE_RO_SHADER); + err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_RO_SHADER); if (err) goto fail; @@ -2467,6 +2470,7 @@ static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst, VkResult ret; AVVkFrame *f = (AVVkFrame *)src->data[0]; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + VulkanFramesPriv *fp = hwfc->internal->priv; AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; const int planes = av_pix_fmt_count_planes(hwfc->sw_format); VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdKHR); @@ -2478,7 +2482,7 @@ static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst, if (!drm_desc) return AVERROR(ENOMEM); - err = prepare_frame(hwfc, &p->cmd, f, PREP_MODE_EXTERNAL_EXPORT); + err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_EXTERNAL_EXPORT); if (err < 0) goto end; -- 2.26.2
>From 93b1b731fcbc1ed7c69cf2c7690f968df6766bad Mon Sep 17 00:00:00 2001 From: Lynne <d...@lynne.ee> Date: Thu, 14 May 2020 23:59:22 +0100 Subject: [PATCH 03/10] hwcontext_vulkan: support user-provided pools If an external pool was provided we skipped all of frames init, including the exec context. --- libavutil/hwcontext_vulkan.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index dd46ab3d8f..d4cbcbee05 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -1695,9 +1695,6 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc) AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; - if (hwfc->pool) - return 0; - /* Default pool flags */ hwctx->tiling = hwctx->tiling ? hwctx->tiling : p->use_linear_images ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; @@ -1720,12 +1717,16 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc) vulkan_frame_free(hwfc, (uint8_t *)f); - hwfc->internal->pool_internal = av_buffer_pool_init2(sizeof(AVVkFrame), - hwfc, vulkan_pool_alloc, - NULL); - if (!hwfc->internal->pool_internal) { - free_exec_ctx(hwfc->device_ctx, &p->cmd); - return AVERROR(ENOMEM); + /* If user did not specify a pool, hwfc->pool will be set to the internal one + * in hwcontext.c just after this gets called */ + if (!hwfc->pool) { + hwfc->internal->pool_internal = av_buffer_pool_init2(sizeof(AVVkFrame), + hwfc, vulkan_pool_alloc, + NULL); + if (!hwfc->internal->pool_internal) { + free_exec_ctx(hwfc->device_ctx, &p->cmd); + return AVERROR(ENOMEM); + } } return 0; -- 2.26.2
>From a5e14f05382d3553bd81009cc484a36b6401f3c3 Mon Sep 17 00:00:00 2001 From: Lynne <d...@lynne.ee> Date: Thu, 14 May 2020 00:28:00 +0100 Subject: [PATCH 02/10] hwcontext_vulkan: use all enabled queues for transfers, make uploads async This commit makes full use of the enabled queues to provide asynchronous uploads of images (downloads remain synchronous). For a pure uploading use cases, the performance gains can be significant. --- libavutil/hwcontext_vulkan.c | 332 +++++++++++++++++++++++++---------- 1 file changed, 236 insertions(+), 96 deletions(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 516897aa89..dd46ab3d8f 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -41,11 +41,23 @@ #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x) #endif +typedef struct VulkanQueueCtx { + VkFence fence; + VkQueue queue; + int was_synchronous; + + /* Buffer dependencies */ + AVBufferRef **buf_deps; + int nb_buf_deps; + int buf_deps_alloc_size; +} VulkanQueueCtx; + typedef struct VulkanExecCtx { VkCommandPool pool; - VkCommandBuffer buf; - VkQueue queue; - VkFence fence; + VkCommandBuffer *bufs; + VulkanQueueCtx *queues; + int nb_queues; + int cur_queue_idx; } VulkanExecCtx; typedef struct VulkanDevicePriv { @@ -60,8 +72,9 @@ typedef struct VulkanDevicePriv { /* Debug callback */ VkDebugUtilsMessengerEXT debug_ctx; - /* Image uploading */ - VulkanExecCtx cmd; + /* Image transfers */ + VulkanExecCtx upload_ctx; + VulkanExecCtx download_ctx; /* Extensions */ uint64_t extensions; @@ -89,6 +102,16 @@ typedef struct AVVkFrameInternal { #endif } AVVkFrameInternal; +#define GET_QUEUE_COUNT(hwctx, graph, comp, tx) ( \ + graph ? hwctx->nb_graphics_queues : \ + comp ? (hwctx->nb_comp_queues ? \ + hwctx->nb_comp_queues : hwctx->nb_graphics_queues) : \ + tx ? (hwctx->nb_tx_queues ? hwctx->nb_tx_queues : \ + (hwctx->nb_comp_queues ? \ + hwctx->nb_comp_queues : hwctx->nb_graphics_queues)) : \ + 0 \ +) + #define VK_LOAD_PFN(inst, name) PFN_##name pfn_##name = (PFN_##name) \ vkGetInstanceProcAddr(inst, #name) @@ -709,7 +732,7 @@ fail: } static int create_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd, - int queue_family_index) + int queue_family_index, int num_queues) { VkResult ret; AVVulkanDeviceContext *hwctx = ctx->hwctx; @@ -722,21 +745,20 @@ static int create_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd, VkCommandBufferAllocateInfo cbuf_create = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, - .commandBufferCount = 1, + .commandBufferCount = num_queues, }; - VkFenceCreateInfo fence_spawn = { - .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, - }; + cmd->nb_queues = num_queues; - ret = vkCreateFence(hwctx->act_dev, &fence_spawn, - hwctx->alloc, &cmd->fence); - if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Failed to create frame fence: %s\n", - vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } + cmd->queues = av_mallocz(num_queues * sizeof(*cmd->queues)); + if (!cmd->queues) + return AVERROR(ENOMEM); + cmd->bufs = av_mallocz(num_queues * sizeof(*cmd->bufs)); + if (!cmd->bufs) + return AVERROR(ENOMEM); + + /* Create command pool */ ret = vkCreateCommandPool(hwctx->act_dev, &cqueue_create, hwctx->alloc, &cmd->pool); if (ret != VK_SUCCESS) { @@ -747,15 +769,18 @@ static int create_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd, cbuf_create.commandPool = cmd->pool; - ret = vkAllocateCommandBuffers(hwctx->act_dev, &cbuf_create, &cmd->buf); + /* Allocate command buffer */ + ret = vkAllocateCommandBuffers(hwctx->act_dev, &cbuf_create, cmd->bufs); if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Command buffer alloc failure: %s\n", vk_ret2str(ret)); return AVERROR_EXTERNAL; } - vkGetDeviceQueue(hwctx->act_dev, cqueue_create.queueFamilyIndex, 0, - &cmd->queue); + for (int i = 0; i < num_queues; i++) { + VulkanQueueCtx *q = &cmd->queues[i]; + vkGetDeviceQueue(hwctx->act_dev, queue_family_index, i, &q->queue); + } return 0; } @@ -764,12 +789,154 @@ static void free_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd) { AVVulkanDeviceContext *hwctx = ctx->hwctx; - if (cmd->fence) - vkDestroyFence(hwctx->act_dev, cmd->fence, hwctx->alloc); - if (cmd->buf) - vkFreeCommandBuffers(hwctx->act_dev, cmd->pool, 1, &cmd->buf); + /* Make sure all queues have finished executing */ + for (int i = 0; i < cmd->nb_queues; i++) { + VulkanQueueCtx *q = &cmd->queues[i]; + + if (q->fence && !q->was_synchronous && (i != cmd->cur_queue_idx)) { + vkWaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); + vkResetFences(hwctx->act_dev, 1, &q->fence); + } + + /* Free the fence */ + if (q->fence) + vkDestroyFence(hwctx->act_dev, q->fence, hwctx->alloc); + + /* Free buffer dependencies */ + for (int j = 0; j < q->nb_buf_deps; j++) + av_buffer_unref(&q->buf_deps[j]); + av_free(q->buf_deps); + } + + if (cmd->bufs) + vkFreeCommandBuffers(hwctx->act_dev, cmd->pool, cmd->nb_queues, cmd->bufs); if (cmd->pool) vkDestroyCommandPool(hwctx->act_dev, cmd->pool, hwctx->alloc); + + av_freep(&cmd->bufs); + av_freep(&cmd->queues); +} + +static VkCommandBuffer get_buf_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd) +{ + return cmd->bufs[cmd->cur_queue_idx]; +} + +static void unref_exec_ctx_deps(AVHWDeviceContext *ctx, VulkanExecCtx *cmd) +{ + VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx]; + + for (int j = 0; j < q->nb_buf_deps; j++) + av_buffer_unref(&q->buf_deps[j]); + q->nb_buf_deps = 0; +} + +static int wait_start_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd) +{ + VkResult ret; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx]; + + VkCommandBufferBeginInfo cmd_start = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + }; + + /* Create the fence and don't wait for it initially */ + if (!q->fence) { + VkFenceCreateInfo fence_spawn = { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + }; + ret = vkCreateFence(hwctx->act_dev, &fence_spawn, hwctx->alloc, + &q->fence); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to queue frame fence: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } else if (!q->was_synchronous) { + vkWaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); + vkResetFences(hwctx->act_dev, 1, &q->fence); + } + + /* Discard queue dependencies */ + unref_exec_ctx_deps(ctx, cmd); + + ret = vkBeginCommandBuffer(cmd->bufs[cmd->cur_queue_idx], &cmd_start); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Unable to init command buffer: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + return 0; +} + +static int add_buf_dep_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd, + AVBufferRef * const *deps, int nb_deps) +{ + AVBufferRef **dst; + VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx]; + + if (!deps || !nb_deps) + return 0; + + dst = av_fast_realloc(q->buf_deps, &q->buf_deps_alloc_size, + (q->nb_buf_deps + nb_deps) * sizeof(*dst)); + if (!dst) + goto err; + + q->buf_deps = dst; + + for (int i = 0; i < nb_deps; i++) { + q->buf_deps[q->nb_buf_deps] = av_buffer_ref(deps[i]); + if (!q->buf_deps[q->nb_buf_deps]) + goto err; + q->nb_buf_deps++; + } + + return 0; + +err: + unref_exec_ctx_deps(ctx, cmd); + return AVERROR(ENOMEM); +} + +static int submit_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd, + VkSubmitInfo *s_info, int synchronous) +{ + VkResult ret; + VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx]; + + ret = vkEndCommandBuffer(cmd->bufs[cmd->cur_queue_idx]); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Unable to finish command buffer: %s\n", + vk_ret2str(ret)); + unref_exec_ctx_deps(ctx, cmd); + return AVERROR_EXTERNAL; + } + + s_info->pCommandBuffers = &cmd->bufs[cmd->cur_queue_idx]; + s_info->commandBufferCount = 1; + + ret = vkQueueSubmit(q->queue, 1, s_info, q->fence); + if (ret != VK_SUCCESS) { + unref_exec_ctx_deps(ctx, cmd); + return AVERROR_EXTERNAL; + } + + q->was_synchronous = synchronous; + + if (synchronous) { + AVVulkanDeviceContext *hwctx = ctx->hwctx; + vkWaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); + vkResetFences(hwctx->act_dev, 1, &q->fence); + unref_exec_ctx_deps(ctx, cmd); + } else { /* Rotate queues */ + cmd->cur_queue_idx = (cmd->cur_queue_idx + 1) % cmd->nb_queues; + } + + return 0; } static void vulkan_device_free(AVHWDeviceContext *ctx) @@ -937,7 +1104,8 @@ if (n >= queue_num) { p->qfs[p->num_qfs++] = hwctx->queue_family_comp_index; /* Create exec context - if there's something invalid this will error out */ - err = create_exec_ctx(ctx, &p->cmd, hwctx->queue_family_tx_index); + err = create_exec_ctx(ctx, &p->cmd, hwctx->queue_family_tx_index, + GET_QUEUE_COUNT(hwctx, 0, 0, 1)); if (err) return err; @@ -1256,26 +1424,17 @@ enum PrepMode { static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, AVVkFrame *frame, enum PrepMode pmode) { - VkResult ret; + int err; uint32_t dst_qf; VkImageLayout new_layout; VkAccessFlags new_access; AVHWDeviceContext *ctx = hwfc->device_ctx; - AVVulkanDeviceContext *hwctx = ctx->hwctx; const int planes = av_pix_fmt_count_planes(hwfc->sw_format); VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 }; - VkCommandBufferBeginInfo cmd_start = { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, - .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, - }; - VkSubmitInfo s_info = { .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .commandBufferCount = 1, - .pCommandBuffers = &ectx->buf, - .pSignalSemaphores = frame->sem, .signalSemaphoreCount = planes, }; @@ -1305,9 +1464,8 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, break; } - ret = vkBeginCommandBuffer(ectx->buf, &cmd_start); - if (ret != VK_SUCCESS) - return AVERROR_EXTERNAL; + if ((err = wait_start_exec_ctx(ctx, ectx))) + return err; /* Change the image layout to something more optimal for writes. * This also signals the newly created semaphore, making it usable @@ -1329,23 +1487,12 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, frame->access[i] = img_bar[i].dstAccessMask; } - vkCmdPipelineBarrier(ectx->buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, 0, - 0, NULL, 0, NULL, planes, img_bar); - - ret = vkEndCommandBuffer(ectx->buf); - if (ret != VK_SUCCESS) - return AVERROR_EXTERNAL; - - ret = vkQueueSubmit(ectx->queue, 1, &s_info, ectx->fence); - if (ret != VK_SUCCESS) { - return AVERROR_EXTERNAL; - } else { - vkWaitForFences(hwctx->act_dev, 1, &ectx->fence, VK_TRUE, UINT64_MAX); - vkResetFences(hwctx->act_dev, 1, &ectx->fence); - } + vkCmdPipelineBarrier(get_buf_exec_ctx(ctx, ectx), + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, 0, NULL, 0, NULL, planes, img_bar); - return 0; + return submit_exec_ctx(ctx, ectx, &s_info, 0); } static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, @@ -1558,7 +1705,8 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc) hwctx->usage |= DEFAULT_USAGE_FLAGS; err = create_exec_ctx(hwfc->device_ctx, &fp->cmd, - dev_hwctx->queue_family_tx_index); + dev_hwctx->queue_family_tx_index, + GET_QUEUE_COUNT(dev_hwctx, 0, 0, 1)); if (err) return err; @@ -2619,12 +2767,12 @@ static int unmap_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs, return err; } -static int transfer_image_buf(AVHWDeviceContext *ctx, AVVkFrame *frame, +static int transfer_image_buf(AVHWDeviceContext *ctx, const AVFrame *f, AVBufferRef **bufs, const int *buf_stride, int w, int h, enum AVPixelFormat pix_fmt, int to_buf) { - VkResult ret; - AVVulkanDeviceContext *hwctx = ctx->hwctx; + int err; + AVVkFrame *frame = (AVVkFrame *)f->data[0]; VulkanDevicePriv *s = ctx->internal->priv; int bar_num = 0; @@ -2633,17 +2781,11 @@ static int transfer_image_buf(AVHWDeviceContext *ctx, AVVkFrame *frame, const int planes = av_pix_fmt_count_planes(pix_fmt); const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); - VkCommandBufferBeginInfo cmd_start = { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, - .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, - }; - VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 }; + VkCommandBuffer cmd_buf = get_buf_exec_ctx(ctx, &s->cmd); VkSubmitInfo s_info = { .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .commandBufferCount = 1, - .pCommandBuffers = &s->cmd.buf, .pSignalSemaphores = frame->sem, .pWaitSemaphores = frame->sem, .pWaitDstStageMask = sem_wait_dst, @@ -2651,12 +2793,8 @@ static int transfer_image_buf(AVHWDeviceContext *ctx, AVVkFrame *frame, .waitSemaphoreCount = planes, }; - ret = vkBeginCommandBuffer(s->cmd.buf, &cmd_start); - if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Unable to init command buffer: %s\n", - vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } + if ((err = wait_start_exec_ctx(ctx, &s->cmd))) + return err; /* Change the image layout to something more optimal for transfers */ for (int i = 0; i < planes; i++) { @@ -2690,7 +2828,7 @@ static int transfer_image_buf(AVHWDeviceContext *ctx, AVVkFrame *frame, } if (bar_num) - vkCmdPipelineBarrier(s->cmd.buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 0, NULL, bar_num, img_bar); @@ -2714,33 +2852,33 @@ static int transfer_image_buf(AVHWDeviceContext *ctx, AVVkFrame *frame, }; if (to_buf) - vkCmdCopyImageToBuffer(s->cmd.buf, frame->img[i], frame->layout[i], + vkCmdCopyImageToBuffer(cmd_buf, frame->img[i], frame->layout[i], vkbuf->buf, 1, &buf_reg); else - vkCmdCopyBufferToImage(s->cmd.buf, vkbuf->buf, frame->img[i], + vkCmdCopyBufferToImage(cmd_buf, vkbuf->buf, frame->img[i], frame->layout[i], 1, &buf_reg); } - ret = vkEndCommandBuffer(s->cmd.buf); - if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Unable to finish command buffer: %s\n", - vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - /* Wait for the download/upload to finish if uploading, otherwise the - * semaphore will take care of synchronization when uploading */ - ret = vkQueueSubmit(s->cmd.queue, 1, &s_info, s->cmd.fence); - if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n", - vk_ret2str(ret)); - return AVERROR_EXTERNAL; + /* When uploading, do this asynchronously if the source is refcounted by + * keeping the buffers as a submission dependency. + * The hwcontext is guaranteed to not be freed until all frames are freed + * in the frames_unint function. + * When downloading to buffer, do this synchronously and wait for the + * queue submission to finish executing */ + if (!to_buf) { + int ref; + for (int ref = 0; ref < AV_NUM_DATA_POINTERS; ref++) { + if (!f->buf[ref]) + break; + if ((err = add_buf_dep_exec_ctx(hwfc, &s->cmd, &f->buf[ref], 1))) + return err; + } + if (ref && (err = add_buf_dep_exec_ctx(hwfc, &s->cmd, bufs, planes))) + return err; + return submit_exec_ctx(hwfc, &s->cmd, &s_info, !ref); } else { - vkWaitForFences(hwctx->act_dev, 1, &s->cmd.fence, VK_TRUE, UINT64_MAX); - vkResetFences(hwctx->act_dev, 1, &s->cmd.fence); + return submit_exec_ctx(hwfc, &s->cmd, &s_info, 1); } - - return 0; } /* Technically we can use VK_EXT_external_memory_host to upload and download, @@ -2777,11 +2915,11 @@ static int vulkan_transfer_data_from_mem(AVHWFramesContext *hwfc, AVFrame *dst, err = vulkan_map_frame_to_mem(hwfc, map, dst, AV_HWFRAME_MAP_WRITE); if (err) - goto end; + return err; err = av_frame_copy(map, src); av_frame_free(&map); - goto end; + return err; } /* Create buffers */ @@ -2808,7 +2946,7 @@ static int vulkan_transfer_data_from_mem(AVHWFramesContext *hwfc, AVFrame *dst, goto end; /* Copy buffers to image */ - err = transfer_image_buf(dev_ctx, f, bufs, tmp.linesize, + err = transfer_image_buf(dev_ctx, dst, bufs, tmp.linesize, src->width, src->height, src->format, 0); end: @@ -2948,10 +3086,12 @@ static int vulkan_transfer_data_to_mem(AVHWFramesContext *hwfc, AVFrame *dst, err = create_buf(dev_ctx, &bufs[i], p_height, &tmp.linesize[i], VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL, NULL); + if (err) + goto end; } /* Copy image to buffer */ - if ((err = transfer_image_buf(dev_ctx, f, bufs, tmp.linesize, + if ((err = transfer_image_buf(dev_ctx, src, bufs, tmp.linesize, dst->width, dst->height, dst->format, 1))) goto end; -- 2.26.2
>From 1629fde5eafc5c7a35ee47654f6ef6e1c91b4c4a Mon Sep 17 00:00:00 2001 From: Lynne <d...@lynne.ee> Date: Thu, 14 May 2020 21:53:22 +0100 Subject: [PATCH 01/10] hwcontext_vulkan: wrap ImageBufs into AVBufferRefs Makes it easier to support multiple queues --- libavutil/hwcontext_vulkan.c | 92 ++++++++++++++++++++++-------------- 1 file changed, 56 insertions(+), 36 deletions(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 071c541600..516897aa89 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -2465,18 +2465,22 @@ typedef struct ImageBuffer { VkMemoryPropertyFlagBits flags; } ImageBuffer; -static void free_buf(AVHWDeviceContext *ctx, ImageBuffer *buf) +static void free_buf(void *opaque, uint8_t *data) { + AVHWDeviceContext *ctx = opaque; AVVulkanDeviceContext *hwctx = ctx->hwctx; - if (!buf) - return; + ImageBuffer *vkbuf = (ImageBuffer *)data; + + if (vkbuf->buf) + vkDestroyBuffer(hwctx->act_dev, vkbuf->buf, hwctx->alloc); + if (vkbuf->mem) + vkFreeMemory(hwctx->act_dev, vkbuf->mem, hwctx->alloc); - vkDestroyBuffer(hwctx->act_dev, buf->buf, hwctx->alloc); - vkFreeMemory(hwctx->act_dev, buf->mem, hwctx->alloc); + av_free(data); } -static int create_buf(AVHWDeviceContext *ctx, ImageBuffer *buf, int height, - int *stride, VkBufferUsageFlags usage, +static int create_buf(AVHWDeviceContext *ctx, AVBufferRef **buf, + int height, int *stride, VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags, void *create_pnext, void *alloc_pnext) { @@ -2493,34 +2497,44 @@ static int create_buf(AVHWDeviceContext *ctx, ImageBuffer *buf, int height, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, }; + ImageBuffer *vkbuf = av_mallocz(sizeof(*vkbuf)); + if (!vkbuf) + return AVERROR(ENOMEM); + *stride = FFALIGN(*stride, p->props.limits.optimalBufferCopyRowPitchAlignment); buf_spawn.size = height*(*stride); - ret = vkCreateBuffer(hwctx->act_dev, &buf_spawn, NULL, &buf->buf); + ret = vkCreateBuffer(hwctx->act_dev, &buf_spawn, NULL, &vkbuf->buf); if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Failed to create buffer: %s\n", vk_ret2str(ret)); return AVERROR_EXTERNAL; } - vkGetBufferMemoryRequirements(hwctx->act_dev, buf->buf, &req); + vkGetBufferMemoryRequirements(hwctx->act_dev, vkbuf->buf, &req); - err = alloc_mem(ctx, &req, flags, alloc_pnext, &buf->flags, &buf->mem); + err = alloc_mem(ctx, &req, flags, alloc_pnext, &vkbuf->flags, &vkbuf->mem); if (err) return err; - ret = vkBindBufferMemory(hwctx->act_dev, buf->buf, buf->mem, 0); + ret = vkBindBufferMemory(hwctx->act_dev, vkbuf->buf, vkbuf->mem, 0); if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n", vk_ret2str(ret)); - free_buf(ctx, buf); + free_buf(ctx, (uint8_t *)vkbuf); return AVERROR_EXTERNAL; } + *buf = av_buffer_create((uint8_t *)vkbuf, sizeof(*vkbuf), free_buf, ctx, 0); + if (!(*buf)) { + free_buf(ctx, (uint8_t *)vkbuf); + return AVERROR(ENOMEM); + } + return 0; } -static int map_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf, uint8_t *mem[], +static int map_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs, uint8_t *mem[], int nb_buffers, int invalidate) { VkResult ret; @@ -2529,7 +2543,8 @@ static int map_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf, uint8_t *mem[], int invalidate_count = 0; for (int i = 0; i < nb_buffers; i++) { - ret = vkMapMemory(hwctx->act_dev, buf[i].mem, 0, + ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data; + ret = vkMapMemory(hwctx->act_dev, vkbuf->mem, 0, VK_WHOLE_SIZE, 0, (void **)&mem[i]); if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n", @@ -2542,12 +2557,13 @@ static int map_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf, uint8_t *mem[], return 0; for (int i = 0; i < nb_buffers; i++) { + ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data; const VkMappedMemoryRange ival_buf = { .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, - .memory = buf[i].mem, + .memory = vkbuf->mem, .size = VK_WHOLE_SIZE, }; - if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + if (vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) continue; invalidate_ctx[invalidate_count++] = ival_buf; } @@ -2563,7 +2579,7 @@ static int map_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf, uint8_t *mem[], return 0; } -static int unmap_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf, +static int unmap_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs, int nb_buffers, int flush) { int err = 0; @@ -2574,12 +2590,13 @@ static int unmap_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf, if (flush) { for (int i = 0; i < nb_buffers; i++) { + ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data; const VkMappedMemoryRange flush_buf = { .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, - .memory = buf[i].mem, + .memory = vkbuf->mem, .size = VK_WHOLE_SIZE, }; - if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + if (vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) continue; flush_ctx[flush_count++] = flush_buf; } @@ -2594,14 +2611,16 @@ static int unmap_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf, } } - for (int i = 0; i < nb_buffers; i++) - vkUnmapMemory(hwctx->act_dev, buf[i].mem); + for (int i = 0; i < nb_buffers; i++) { + ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data; + vkUnmapMemory(hwctx->act_dev, vkbuf->mem); + } return err; } static int transfer_image_buf(AVHWDeviceContext *ctx, AVVkFrame *frame, - ImageBuffer *buffer, const int *buf_stride, int w, + AVBufferRef **bufs, const int *buf_stride, int w, int h, enum AVPixelFormat pix_fmt, int to_buf) { VkResult ret; @@ -2677,6 +2696,7 @@ static int transfer_image_buf(AVHWDeviceContext *ctx, AVVkFrame *frame, /* Schedule a copy for each plane */ for (int i = 0; i < planes; i++) { + ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data; const int p_w = i > 0 ? AV_CEIL_RSHIFT(w, desc->log2_chroma_w) : w; const int p_h = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h; VkBufferImageCopy buf_reg = { @@ -2695,9 +2715,9 @@ static int transfer_image_buf(AVHWDeviceContext *ctx, AVVkFrame *frame, if (to_buf) vkCmdCopyImageToBuffer(s->cmd.buf, frame->img[i], frame->layout[i], - buffer[i].buf, 1, &buf_reg); + vkbuf->buf, 1, &buf_reg); else - vkCmdCopyBufferToImage(s->cmd.buf, buffer[i].buf, frame->img[i], + vkCmdCopyBufferToImage(s->cmd.buf, vkbuf->buf, frame->img[i], frame->layout[i], 1, &buf_reg); } @@ -2735,7 +2755,7 @@ static int vulkan_transfer_data_from_mem(AVHWFramesContext *hwfc, AVFrame *dst, AVFrame tmp; AVVkFrame *f = (AVVkFrame *)dst->data[0]; AVHWDeviceContext *dev_ctx = hwfc->device_ctx; - ImageBuffer buf[AV_NUM_DATA_POINTERS] = { { 0 } }; + AVBufferRef *bufs[AV_NUM_DATA_POINTERS] = { 0 }; const int planes = av_pix_fmt_count_planes(src->format); int log2_chroma = av_pix_fmt_desc_get(src->format)->log2_chroma_h; @@ -2770,7 +2790,7 @@ static int vulkan_transfer_data_from_mem(AVHWFramesContext *hwfc, AVFrame *dst, int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h; tmp.linesize[i] = FFABS(src->linesize[i]); - err = create_buf(dev_ctx, &buf[i], p_height, + err = create_buf(dev_ctx, &bufs[i], p_height, &tmp.linesize[i], VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL, NULL); if (err) @@ -2778,22 +2798,22 @@ static int vulkan_transfer_data_from_mem(AVHWFramesContext *hwfc, AVFrame *dst, } /* Map, copy image to buffer, unmap */ - if ((err = map_buffers(dev_ctx, buf, tmp.data, planes, 0))) + if ((err = map_buffers(dev_ctx, bufs, tmp.data, planes, 0))) goto end; av_image_copy(tmp.data, tmp.linesize, (const uint8_t **)src->data, src->linesize, src->format, src->width, src->height); - if ((err = unmap_buffers(dev_ctx, buf, planes, 1))) + if ((err = unmap_buffers(dev_ctx, bufs, planes, 1))) goto end; /* Copy buffers to image */ - err = transfer_image_buf(dev_ctx, f, buf, tmp.linesize, + err = transfer_image_buf(dev_ctx, f, bufs, tmp.linesize, src->width, src->height, src->format, 0); end: for (int i = 0; i < planes; i++) - free_buf(dev_ctx, &buf[i]); + av_buffer_unref(&bufs[i]); return err; } @@ -2895,7 +2915,7 @@ static int vulkan_transfer_data_to_mem(AVHWFramesContext *hwfc, AVFrame *dst, AVFrame tmp; AVVkFrame *f = (AVVkFrame *)src->data[0]; AVHWDeviceContext *dev_ctx = hwfc->device_ctx; - ImageBuffer buf[AV_NUM_DATA_POINTERS] = { { 0 } }; + AVBufferRef *bufs[AV_NUM_DATA_POINTERS] = { 0 }; const int planes = av_pix_fmt_count_planes(dst->format); int log2_chroma = av_pix_fmt_desc_get(dst->format)->log2_chroma_h; @@ -2925,28 +2945,28 @@ static int vulkan_transfer_data_to_mem(AVHWFramesContext *hwfc, AVFrame *dst, int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h; tmp.linesize[i] = FFABS(dst->linesize[i]); - err = create_buf(dev_ctx, &buf[i], p_height, + err = create_buf(dev_ctx, &bufs[i], p_height, &tmp.linesize[i], VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL, NULL); } /* Copy image to buffer */ - if ((err = transfer_image_buf(dev_ctx, f, buf, tmp.linesize, + if ((err = transfer_image_buf(dev_ctx, f, bufs, tmp.linesize, dst->width, dst->height, dst->format, 1))) goto end; /* Map, copy buffer to frame, unmap */ - if ((err = map_buffers(dev_ctx, buf, tmp.data, planes, 1))) + if ((err = map_buffers(dev_ctx, bufs, tmp.data, planes, 1))) goto end; av_image_copy(dst->data, dst->linesize, (const uint8_t **)tmp.data, tmp.linesize, dst->format, dst->width, dst->height); - err = unmap_buffers(dev_ctx, buf, planes, 0); + err = unmap_buffers(dev_ctx, bufs, planes, 0); end: for (int i = 0; i < planes; i++) - free_buf(dev_ctx, &buf[i]); + av_buffer_unref(&bufs[i]); return err; } -- 2.26.2
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".