PR #23104 opened by Lynne URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23104 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23104.patch
This completes every single filter. scale_vulkan's GLSL will get replaced by swscale fully. And swscale will soon drop its glslang dep. >From fe16edf5aafd6fe59e56d812d4cd690aa9918cd7 Mon Sep 17 00:00:00 2001 From: Lynne <[email protected]> Date: Thu, 14 May 2026 17:09:50 +0900 Subject: [PATCH 1/2] vf_nlmeans_vulkan: port to compile-time SPIR-V generation --- configure | 2 +- libavfilter/vf_nlmeans_vulkan.c | 534 +++--------------- libavfilter/vulkan/Makefile | 4 + libavfilter/vulkan/nlmeans_denoise.comp.glsl | 86 +++ .../vulkan/nlmeans_horizontal.comp.glsl | 104 ++++ libavfilter/vulkan/nlmeans_vertical.comp.glsl | 122 ++++ libavfilter/vulkan/nlmeans_weights.comp.glsl | 144 +++++ 7 files changed, 550 insertions(+), 446 deletions(-) create mode 100644 libavfilter/vulkan/nlmeans_denoise.comp.glsl create mode 100644 libavfilter/vulkan/nlmeans_horizontal.comp.glsl create mode 100644 libavfilter/vulkan/nlmeans_vertical.comp.glsl create mode 100644 libavfilter/vulkan/nlmeans_weights.comp.glsl diff --git a/configure b/configure index 39a522e7e8..d953074c89 100755 --- a/configure +++ b/configure @@ -4222,7 +4222,7 @@ mptestsrc_filter_deps="gpl" msad_filter_select="scene_sad" negate_filter_deps="lut_filter" nlmeans_opencl_filter_deps="opencl" -nlmeans_vulkan_filter_deps="vulkan spirv_library" +nlmeans_vulkan_filter_deps="vulkan spirv_compiler" nnedi_filter_deps="gpl" ocr_filter_deps="libtesseract" ocv_filter_deps="libopencv" diff --git a/libavfilter/vf_nlmeans_vulkan.c b/libavfilter/vf_nlmeans_vulkan.c index c1430707b7..902c072669 100644 --- a/libavfilter/vf_nlmeans_vulkan.c +++ b/libavfilter/vf_nlmeans_vulkan.c @@ -19,19 +19,24 @@ */ #include "libavutil/mem.h" -#include "libavutil/random_seed.h" -#include "libavutil/vulkan_spirv.h" #include "libavutil/opt.h" #include "vulkan_filter.h" #include "filters.h" #include "video.h" -#define TYPE_NAME "vec4" +extern const unsigned char ff_nlmeans_horizontal_comp_spv_data[]; +extern const unsigned int ff_nlmeans_horizontal_comp_spv_len; +extern const unsigned char ff_nlmeans_vertical_comp_spv_data[]; +extern const unsigned int ff_nlmeans_vertical_comp_spv_len; +extern const unsigned char ff_nlmeans_weights_comp_spv_data[]; +extern const unsigned int ff_nlmeans_weights_comp_spv_len; +extern const unsigned char ff_nlmeans_denoise_comp_spv_data[]; +extern const unsigned int ff_nlmeans_denoise_comp_spv_len; + +/* Must be kept in sync with the definitions in the nlmeans_* shaders */ #define TYPE_ELEMS 4 #define TYPE_SIZE (TYPE_ELEMS*4) -#define TYPE_BLOCK_ELEMS 16 -#define TYPE_BLOCK_SIZE (TYPE_SIZE * TYPE_BLOCK_ELEMS) #define WG_SIZE 32 typedef struct NLMeansVulkanContext { @@ -80,210 +85,60 @@ typedef struct IntegralPushData { uint32_t nb_components; } IntegralPushData; -static void shared_shd_def(FFVulkanShader *shd) { - GLSLC(0, #extension GL_ARB_gpu_shader_int64 : require ); - GLSLC(0, ); - GLSLF(0, #define DTYPE %s ,TYPE_NAME); - GLSLF(0, #define T_ALIGN %i ,TYPE_SIZE); - GLSLF(0, #define T_BLOCK_ELEMS %i ,TYPE_BLOCK_ELEMS); - GLSLF(0, #define T_BLOCK_ALIGN %i ,TYPE_BLOCK_SIZE); - GLSLC(0, ); - GLSLC(0, layout(buffer_reference, buffer_reference_align = T_ALIGN) buffer DataBuffer { ); - GLSLC(1, DTYPE v[]; ); - GLSLC(0, }; ); - GLSLC(0, struct Block { ); - GLSLC(1, DTYPE data[T_BLOCK_ELEMS]; ); - GLSLC(0, }; ); - GLSLC(0, layout(buffer_reference, buffer_reference_align = T_BLOCK_ALIGN) buffer BlockBuffer { ); - GLSLC(1, Block v[]; ); - GLSLC(0, }; ); - GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); - GLSLC(1, uvec4 width; ); - GLSLC(1, uvec4 height; ); - GLSLC(1, vec4 strength; ); - GLSLC(1, uvec4 comp_off; ); - GLSLC(1, uvec4 comp_plane; ); - GLSLC(1, DataBuffer integral_base; ); - GLSLC(1, uint64_t integral_size; ); - GLSLC(1, uint64_t int_stride; ); - GLSLC(1, uint xyoffs_start; ); - GLSLC(1, uint nb_components; ); - GLSLC(0, }; ); - GLSLC(0, ); - - ff_vk_shader_add_push_const(shd, 0, sizeof(IntegralPushData), - VK_SHADER_STAGE_COMPUTE_BIT); -} - static av_cold int init_integral_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec, FFVulkanShader *shd_horizontal, FFVulkanShader *shd_vertical, - FFVkSPIRVCompiler *spv, - const AVPixFmtDescriptor *desc, int planes) + int planes) { int err; - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; FFVulkanShader *shd; - FFVulkanDescriptorSetBinding *desc_set; + /* Horizontal pass */ shd = shd_horizontal; - RET(ff_vk_shader_init(vkctx, shd, "nlmeans_horizontal", - VK_SHADER_STAGE_COMPUTE_BIT, - (const char *[]) { "GL_EXT_buffer_reference", - "GL_EXT_buffer_reference2" }, 2, - WG_SIZE, 1, 1, - 0)); - shared_shd_def(shd); + ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, NULL, + (uint32_t []) { WG_SIZE, 1, 1 }, 0); - GLSLC(0, ); - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, uint64_t offset; ); - GLSLC(1, DataBuffer dst; ); - GLSLC(1, BlockBuffer b_dst; ); - GLSLC(1, Block block; ); - GLSLC(1, DTYPE s2; ); - GLSLC(1, DTYPE prefix_sum; ); - GLSLC(1, ivec2 pos; ); - GLSLC(1, int k; ); - GLSLC(1, int o; ); - GLSLC(0, ); - GLSLC(1, DataBuffer integral_data; ); - GLSLC(0, ); - GLSLC(1, uint c_plane; ); - GLSLC(0, ); - GLSLC(1, uint comp_idx = uint(gl_WorkGroupID.y); ); - GLSLC(1, uint invoc_idx = uint(gl_WorkGroupID.z); ); - GLSLC(0, ); - GLSLC(1, if (strength[comp_idx] == 0.0) ); - GLSLC(2, return; ); - GLSLC(0, ); - GLSLC(1, offset = integral_size * (invoc_idx * nb_components + comp_idx); ); - GLSLC(1, integral_data = DataBuffer(uint64_t(integral_base) + offset); ); - GLSLC(0, ); - GLSLC(1, c_plane = comp_plane[comp_idx]; ); - GLSLC(0, ); - GLSLC(1, pos.y = int(gl_GlobalInvocationID.x); ); - GLSLC(1, if (pos.y < height[c_plane]) { ); - GLSLC(2, prefix_sum = DTYPE(0); ); - GLSLC(2, offset = int_stride * uint64_t(pos.y); ); - GLSLC(2, b_dst = BlockBuffer(uint64_t(integral_data) + offset); ); - GLSLC(0, ); - GLSLC(2, for (k = 0; k * T_BLOCK_ELEMS < width[c_plane]; k++) { ); - GLSLC(3, block = b_dst.v[k]; ); - GLSLC(3, for (o = 0; o < T_BLOCK_ELEMS; o++) { ); - GLSLC(4, s2 = block.data[o]; ); - GLSLC(4, block.data[o] = s2 + prefix_sum; ); - GLSLC(4, prefix_sum += s2; ); - GLSLC(3, } ); - GLSLC(3, b_dst.v[k] = block; ); - GLSLC(2, } ); - GLSLC(1, } ); - GLSLC(0, } ); + ff_vk_shader_add_push_const(shd, 0, sizeof(IntegralPushData), + VK_SHADER_STAGE_COMPUTE_BIT); - RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", &spv_opaque)); - RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main")); + RET(ff_vk_shader_link(vkctx, shd, + ff_nlmeans_horizontal_comp_spv_data, + ff_nlmeans_horizontal_comp_spv_len, "main")); RET(ff_vk_shader_register_exec(vkctx, exec, shd)); + /* Vertical pass */ shd = shd_vertical; - RET(ff_vk_shader_init(vkctx, shd, "nlmeans_vertical", - VK_SHADER_STAGE_COMPUTE_BIT, - (const char *[]) { "GL_EXT_buffer_reference", - "GL_EXT_buffer_reference2" }, 2, - WG_SIZE, 1, 1, - 0)); - shared_shd_def(shd); + ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, NULL, + (uint32_t []) { WG_SIZE, 1, 1 }, 0); - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "input_img", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(vkctx->input_format, FF_VK_REP_FLOAT), - .mem_quali = "readonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, + ff_vk_shader_add_push_const(shd, 0, sizeof(IntegralPushData), + VK_SHADER_STAGE_COMPUTE_BIT); + + const FFVulkanDescriptorSetBinding desc_set_img[] = { + { /* input_img */ + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .elems = planes, }, }; - RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 1, 0, 0)); + ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set_img, 1, 0, 0); - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "xyoffsets_buffer", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .mem_quali = "readonly", - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "ivec2 xyoffsets[];", + const FFVulkanDescriptorSetBinding desc_set_xyoffsets[] = { + { /* xyoffsets_buffer */ + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, }; - RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 1, 1, 0)); + ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set_xyoffsets, 1, 1, 0); - GLSLC(0, ); - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, uint64_t offset; ); - GLSLC(1, DataBuffer dst; ); - GLSLC(1, float s1; ); - GLSLC(1, DTYPE s2; ); - GLSLC(1, DTYPE prefix_sum; ); - GLSLC(1, uvec2 size; ); - GLSLC(1, ivec2 pos; ); - GLSLC(1, ivec2 pos_off; ); - GLSLC(0, ); - GLSLC(1, DataBuffer integral_data; ); - GLSLF(1, ivec2 offs[%i]; ,TYPE_ELEMS); - GLSLC(0, ); - GLSLC(1, uint c_off; ); - GLSLC(1, uint c_plane; ); - GLSLC(0, ); - GLSLC(1, uint comp_idx = uint(gl_WorkGroupID.y); ); - GLSLC(1, uint invoc_idx = uint(gl_WorkGroupID.z); ); - GLSLC(0, ); - GLSLC(1, if (strength[comp_idx] == 0.0) ); - GLSLC(2, return; ); - GLSLC(0, ); - GLSLC(1, offset = integral_size * (invoc_idx * nb_components + comp_idx); ); - GLSLC(1, integral_data = DataBuffer(uint64_t(integral_base) + offset); ); - for (int i = 0; i < TYPE_ELEMS; i++) - GLSLF(1, offs[%i] = xyoffsets[xyoffs_start + %i*invoc_idx + %i]; ,i,TYPE_ELEMS,i); - GLSLC(0, ); - GLSLC(1, c_off = comp_off[comp_idx]; ); - GLSLC(1, c_plane = comp_plane[comp_idx]; ); - GLSLC(1, size = imageSize(input_img[c_plane]); ); - GLSLC(0, ); - GLSLC(1, pos.x = int(gl_GlobalInvocationID.x); ); - GLSLC(1, if (pos.x < width[c_plane]) { ); - GLSLC(2, prefix_sum = DTYPE(0); ); - GLSLC(2, for (pos.y = 0; pos.y < height[c_plane]; pos.y++) { ); - GLSLC(3, offset = int_stride * uint64_t(pos.y); ); - GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); ); - GLSLC(4, s1 = imageLoad(input_img[c_plane], pos)[c_off]; ); - for (int i = 0; i < TYPE_ELEMS; i++) { - GLSLF(4, pos_off = pos + offs[%i]; ,i); - GLSLC(4, if (!IS_WITHIN(uvec2(pos_off), size)) ); - GLSLF(5, s2[%i] = s1; ,i); - GLSLC(4, else ); - GLSLF(5, s2[%i] = imageLoad(input_img[c_plane], pos_off)[c_off]; ,i); - } - GLSLC(4, s2 = (s1 - s2) * (s1 - s2); ); - GLSLC(3, dst.v[pos.x] = s2 + prefix_sum; ); - GLSLC(3, prefix_sum += s2; ); - GLSLC(2, } ); - GLSLC(1, } ); - GLSLC(0, } ); - - RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", &spv_opaque)); - RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main")); + RET(ff_vk_shader_link(vkctx, shd, + ff_nlmeans_vertical_comp_spv_data, + ff_nlmeans_vertical_comp_spv_len, "main")); RET(ff_vk_shader_register_exec(vkctx, exec, shd)); fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - return err; } @@ -305,172 +160,48 @@ typedef struct WeightsPushData { } WeightsPushData; static av_cold int init_weights_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec, - FFVulkanShader *shd, - FFVkSPIRVCompiler *spv, - const AVPixFmtDescriptor *desc, - int planes) + FFVulkanShader *shd, int planes) { int err; - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; - FFVulkanDescriptorSetBinding *desc_set; - RET(ff_vk_shader_init(vkctx, shd, "nlmeans_weights", - VK_SHADER_STAGE_COMPUTE_BIT, - (const char *[]) { "GL_EXT_buffer_reference", - "GL_EXT_buffer_reference2" }, 2, - WG_SIZE, WG_SIZE, 1, - 0)); - - GLSLC(0, #extension GL_ARB_gpu_shader_int64 : require ); - GLSLC(0, ); - GLSLF(0, #define DTYPE %s ,TYPE_NAME); - GLSLF(0, #define T_ALIGN %i ,TYPE_SIZE); - GLSLC(0, ); - GLSLC(0, layout(buffer_reference, buffer_reference_align = T_ALIGN) buffer DataBuffer { ); - GLSLC(1, DTYPE v[]; ); - GLSLC(0, }; ); - GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); - GLSLC(1, uvec4 width; ); - GLSLC(1, uvec4 height; ); - GLSLC(1, uvec4 ws_offset; ); - GLSLC(1, uvec4 ws_stride; ); - GLSLC(1, ivec4 patch_size; ); - GLSLC(1, vec4 strength; ); - GLSLC(1, uvec4 comp_off; ); - GLSLC(1, uvec4 comp_plane; ); - GLSLC(1, DataBuffer integral_base; ); - GLSLC(1, uint64_t integral_size; ); - GLSLC(1, uint64_t int_stride; ); - GLSLC(1, uint xyoffs_start; ); - GLSLC(1, uint ws_count; ); - GLSLC(1, uint nb_components; ); - GLSLC(0, }; ); - GLSLC(0, ); + ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, NULL, + (uint32_t []) { WG_SIZE, WG_SIZE, 1 }, 0); ff_vk_shader_add_push_const(shd, 0, sizeof(WeightsPushData), VK_SHADER_STAGE_COMPUTE_BIT); - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "input_img", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(vkctx->input_format, FF_VK_REP_FLOAT), - .mem_quali = "readonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, + const FFVulkanDescriptorSetBinding desc_set[] = { + { /* input_img */ + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .elems = planes, }, - { - .name = "weights_buffer", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "float weights[];", + { /* weights_buffer */ + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, - { - .name = "sums_buffer", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "float sums[];", + { /* sums_buffer */ + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, }; - RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 3, 0, 0)); + ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 3, 0, 0); - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "xyoffsets_buffer", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .mem_quali = "readonly", - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "ivec2 xyoffsets[];", + const FFVulkanDescriptorSetBinding desc_set_xyoffsets[] = { + { /* xyoffsets_buffer */ + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, }; - RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 1, 1, 0)); + ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set_xyoffsets, 1, 1, 0); - GLSLC(0, ); - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, uint64_t offset; ); - GLSLC(1, DataBuffer dst; ); - GLSLC(1, uvec2 size; ); - GLSLC(1, ivec2 pos; ); - GLSLC(1, ivec2 pos_off; ); - GLSLC(1, int p; ); - GLSLC(1, float s; ); - GLSLC(0, ); - GLSLC(1, DataBuffer integral_data; ); - GLSLF(1, ivec2 offs[%i]; ,TYPE_ELEMS); - GLSLC(0, ); - GLSLC(1, uint c_off; ); - GLSLC(1, uint c_plane; ); - GLSLC(1, uint ws_off; ); - GLSLC(0, ); - GLSLC(1, pos = ivec2(gl_GlobalInvocationID.xy); ); - GLSLC(1, uint comp_idx = uint(gl_WorkGroupID.z) %% nb_components; ); - GLSLC(1, uint invoc_idx = uint(gl_WorkGroupID.z) / nb_components; ); - GLSLC(0, ); - GLSLC(1, c_off = comp_off[comp_idx]; ); - GLSLC(1, c_plane = comp_plane[comp_idx]; ); - GLSLC(1, p = patch_size[comp_idx]; ); - GLSLC(1, s = strength[comp_idx]; ); - GLSLC(1, if (s == 0.0 || pos.x < p || pos.y < p || pos.x >= width[c_plane] - p || pos.y >= height[c_plane] - p) ); - GLSLC(2, return; ); - GLSLC(0, ); - GLSLC(1, offset = integral_size * (invoc_idx * nb_components + comp_idx); ); - GLSLC(1, integral_data = DataBuffer(uint64_t(integral_base) + offset); ); - for (int i = 0; i < TYPE_ELEMS; i++) - GLSLF(1, offs[%i] = xyoffsets[xyoffs_start + %i*invoc_idx + %i]; ,i,TYPE_ELEMS,i); - GLSLC(0, ); - GLSLC(1, ws_off = ws_count * invoc_idx + ws_offset[comp_idx] + pos.y * ws_stride[comp_idx] + pos.x; ); - GLSLC(1, size = imageSize(input_img[c_plane]); ); - GLSLC(0, ); - GLSLC(1, DTYPE a; ); - GLSLC(1, DTYPE b; ); - GLSLC(1, DTYPE c; ); - GLSLC(1, DTYPE d; ); - GLSLC(0, ); - GLSLC(1, DTYPE patch_diff; ); - GLSLC(1, vec4 src; ); - GLSLC(1, vec4 w; ); - GLSLC(1, float w_sum; ); - GLSLC(1, float sum; ); - GLSLC(0, ); - for (int i = 0; i < 4; i++) { - GLSLF(1, pos_off = pos + offs[%i]; ,i); - GLSLC(1, if (!IS_WITHIN(uvec2(pos_off), size)) ); - GLSLF(2, src[%i] = imageLoad(input_img[c_plane], pos)[c_off]; ,i); - GLSLC(1, else ); - GLSLF(2, src[%i] = imageLoad(input_img[c_plane], pos_off)[c_off]; ,i); - } - GLSLC(0, ); - GLSLC(1, offset = int_stride * uint64_t(pos.y - p); ); - GLSLC(1, dst = DataBuffer(uint64_t(integral_data) + offset); ); - GLSLC(1, a = dst.v[pos.x - p]; ); - GLSLC(1, c = dst.v[pos.x + p]; ); - GLSLC(1, offset = int_stride * uint64_t(pos.y + p); ); - GLSLC(1, dst = DataBuffer(uint64_t(integral_data) + offset); ); - GLSLC(1, b = dst.v[pos.x - p]; ); - GLSLC(1, d = dst.v[pos.x + p]; ); - GLSLC(0, ); - GLSLC(1, patch_diff = d + a - b - c; ); - GLSLC(1, w = exp(patch_diff * s); ); - GLSLC(1, w_sum = w[0] + w[1] + w[2] + w[3]; ); - GLSLC(1, sum = dot(w, src * 255); ); - GLSLC(0, ); - GLSLC(1, weights[ws_off] += w_sum; ); - GLSLC(1, sums[ws_off] += sum; ); - GLSLC(0, } ); - - RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", &spv_opaque)); - RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main")); + RET(ff_vk_shader_link(vkctx, shd, + ff_nlmeans_weights_comp_spv_data, + ff_nlmeans_weights_comp_spv_len, "main")); RET(ff_vk_shader_register_exec(vkctx, exec, shd)); fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - return err; } @@ -485,121 +216,49 @@ typedef struct DenoisePushData { } DenoisePushData; static av_cold int init_denoise_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec, - FFVulkanShader *shd, FFVkSPIRVCompiler *spv, - const AVPixFmtDescriptor *desc, int planes) + FFVulkanShader *shd, int planes) { int err; - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; - FFVulkanDescriptorSetBinding *desc_set; - RET(ff_vk_shader_init(vkctx, shd, "nlmeans_denoise", - VK_SHADER_STAGE_COMPUTE_BIT, - (const char *[]) { "GL_EXT_buffer_reference", - "GL_EXT_buffer_reference2" }, 2, - WG_SIZE, WG_SIZE, 1, - 0)); - GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); - GLSLC(1, uvec4 comp_off; ); - GLSLC(1, uvec4 comp_plane; ); - GLSLC(1, uvec4 ws_offset; ); - GLSLC(1, uvec4 ws_stride; ); - GLSLC(1, uint32_t ws_count; ); - GLSLC(1, uint32_t t; ); - GLSLC(1, uint32_t nb_components; ); - GLSLC(0, }; ); + ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, NULL, + (uint32_t []) { WG_SIZE, WG_SIZE, 1 }, 0); ff_vk_shader_add_push_const(shd, 0, sizeof(DenoisePushData), VK_SHADER_STAGE_COMPUTE_BIT); - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "input_img", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(vkctx->input_format, FF_VK_REP_FLOAT), - .mem_quali = "readonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, + const FFVulkanDescriptorSetBinding desc_set_img[] = { + { /* input_img */ + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .elems = planes, }, - { - .name = "output_img", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(vkctx->output_format, FF_VK_REP_FLOAT), - .mem_quali = "writeonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, + { /* output_img */ + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .elems = planes, }, }; - RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 2, 0, 0)); + ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set_img, 2, 0, 0); - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "weights_buffer", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .mem_quali = "readonly", - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "float weights[];", + const FFVulkanDescriptorSetBinding desc_set_ws[] = { + { /* weights_buffer */ + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, - { - .name = "sums_buffer", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .mem_quali = "readonly", - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "float sums[];", + { /* sums_buffer */ + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, }; + ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set_ws, 2, 0, 0); - RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 2, 0, 0)); - - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - GLSLC(1, const uint plane = uint(gl_WorkGroupID.z); ); - GLSLC(1, const uvec2 size = imageSize(output_img[plane]); ); - GLSLC(0, ); - GLSLC(1, uint c_off; ); - GLSLC(1, uint c_plane; ); - GLSLC(1, uint ws_off; ); - GLSLC(0, ); - GLSLC(1, float w_sum; ); - GLSLC(1, float sum; ); - GLSLC(1, vec4 src; ); - GLSLC(1, vec4 r; ); - GLSLC(1, uint invoc_idx; ); - GLSLC(1, uint comp_idx; ); - GLSLC(0, ); - GLSLC(1, if (!IS_WITHIN(pos, size)) ); - GLSLC(2, return; ); - GLSLC(0, ); - GLSLC(1, src = imageLoad(input_img[plane], pos); ); - GLSLC(1, for (comp_idx = 0; comp_idx < nb_components; comp_idx++) { ); - GLSLC(2, if (plane == comp_plane[comp_idx]) { ); - GLSLC(3, w_sum = 0.0; ); - GLSLC(3, sum = 0.0; ); - GLSLC(3, for (invoc_idx = 0; invoc_idx < t; invoc_idx++) { ); - GLSLC(4, ws_off = ws_count * invoc_idx + ws_offset[comp_idx] + pos.y * ws_stride[comp_idx] + pos.x; ); - GLSLC(4, w_sum += weights[ws_off]; ); - GLSLC(4, sum += sums[ws_off]; ); - GLSLC(3, } ); - GLSLC(3, c_off = comp_off[comp_idx]; ); - GLSLC(3, r[c_off] = (sum + src[c_off] * 255) / (1.0 + w_sum) / 255; ); - GLSLC(2, } ); - GLSLC(1, } ); - GLSLC(1, imageStore(output_img[plane], pos, r); ); - GLSLC(0, } ); - - RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", &spv_opaque)); - RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main")); + RET(ff_vk_shader_link(vkctx, shd, + ff_nlmeans_denoise_comp_spv_data, + ff_nlmeans_denoise_comp_spv_len, "main")); RET(ff_vk_shader_register_exec(vkctx, exec, shd)); fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - return err; } @@ -610,15 +269,9 @@ static av_cold int init_filter(AVFilterContext *ctx) NLMeansVulkanContext *s = ctx->priv; FFVulkanContext *vkctx = &s->vkctx; const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - FFVkSPIRVCompiler *spv = NULL; int *offsets_buf; int offsets_dispatched = 0, nb_dispatches = 0; - const AVPixFmtDescriptor *desc; - desc = av_pix_fmt_desc_get(vkctx->output_format); - if (!desc) - return AVERROR(EINVAL); - if (!(s->opts.r & 1)) { s->opts.r |= 1; av_log(ctx, AV_LOG_WARNING, "Research size should be odd, setting to %i", @@ -682,12 +335,6 @@ static av_cold int init_filter(AVFilterContext *ctx) s->opts.t = FFMIN(s->opts.t, (FFALIGN(s->nb_offsets, TYPE_ELEMS) / TYPE_ELEMS)); - spv = ff_vk_spirv_init(); - if (!spv) { - av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); - return AVERROR_EXTERNAL; - } - s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); if (!s->qf) { av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n"); @@ -698,11 +345,11 @@ static av_cold int init_filter(AVFilterContext *ctx) RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, 1, 0, 0, 0, NULL)); RET(init_integral_pipeline(vkctx, &s->e, &s->shd_horizontal, &s->shd_vertical, - spv, desc, planes)); + planes)); - RET(init_weights_pipeline(vkctx, &s->e, &s->shd_weights, spv, desc, planes)); + RET(init_weights_pipeline(vkctx, &s->e, &s->shd_weights, planes)); - RET(init_denoise_pipeline(vkctx, &s->e, &s->shd_denoise, spv, desc, planes)); + RET(init_denoise_pipeline(vkctx, &s->e, &s->shd_denoise, planes)); RET(ff_vk_shader_update_desc_buffer(vkctx, &s->e.contexts[0], &s->shd_vertical, 1, 0, 0, @@ -726,9 +373,6 @@ static av_cold int init_filter(AVFilterContext *ctx) s->initialized = 1; fail: - if (spv) - spv->uninit(&spv); - return err; } diff --git a/libavfilter/vulkan/Makefile b/libavfilter/vulkan/Makefile index 6d25cf8a50..cd303e535e 100644 --- a/libavfilter/vulkan/Makefile +++ b/libavfilter/vulkan/Makefile @@ -15,3 +15,7 @@ OBJS-$(CONFIG_TRANSPOSE_VULKAN_FILTER) += vulkan/transpose.comp.spv.o OBJS-$(CONFIG_V360_VULKAN_FILTER) += vulkan/v360.comp.spv.o OBJS-$(CONFIG_INTERLACE_VULKAN_FILTER) += vulkan/interlace.comp.spv.o OBJS-$(CONFIG_XFADE_VULKAN_FILTER) += vulkan/xfade.comp.spv.o +OBJS-$(CONFIG_NLMEANS_VULKAN_FILTER) += vulkan/nlmeans_horizontal.comp.spv.o \ + vulkan/nlmeans_vertical.comp.spv.o \ + vulkan/nlmeans_weights.comp.spv.o \ + vulkan/nlmeans_denoise.comp.spv.o diff --git a/libavfilter/vulkan/nlmeans_denoise.comp.glsl b/libavfilter/vulkan/nlmeans_denoise.comp.glsl new file mode 100644 index 0000000000..974c09318f --- /dev/null +++ b/libavfilter/vulkan/nlmeans_denoise.comp.glsl @@ -0,0 +1,86 @@ +/* + * Copyright (c) Lynne + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#pragma shader_stage(compute) + +#extension GL_EXT_shader_image_load_formatted : require +#extension GL_EXT_scalar_block_layout : require +#extension GL_EXT_nonuniform_qualifier : require +#extension GL_EXT_shader_explicit_arithmetic_types : require + +layout (local_size_x_id = 253, local_size_y_id = 254, local_size_z_id = 255) in; + +layout (push_constant, scalar) uniform pushConstants { + uvec4 comp_off; + uvec4 comp_plane; + uvec4 ws_offset; + uvec4 ws_stride; + uint32_t ws_count; + uint32_t t; + uint32_t nb_components; +}; + +layout (set = 0, binding = 0) uniform readonly image2D input_img[]; +layout (set = 0, binding = 1) uniform writeonly image2D output_img[]; + +layout (set = 1, binding = 0, scalar) readonly buffer weights_buffer { + float weights[]; +}; + +layout (set = 1, binding = 1, scalar) readonly buffer sums_buffer { + float sums[]; +}; + +void main() +{ + const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); + const uint plane = uint(gl_WorkGroupID.z); + const ivec2 size = imageSize(output_img[plane]); + + uint c_off; + uint c_plane; + uint ws_off; + + float w_sum; + float sum; + vec4 src; + vec4 r; + uint invoc_idx; + uint comp_idx; + + if (any(greaterThanEqual(pos, size))) + return; + + src = imageLoad(input_img[plane], pos); + for (comp_idx = 0; comp_idx < nb_components; comp_idx++) { + if (plane == comp_plane[comp_idx]) { + w_sum = 0.0; + sum = 0.0; + for (invoc_idx = 0; invoc_idx < t; invoc_idx++) { + ws_off = ws_count * invoc_idx + ws_offset[comp_idx] + pos.y * ws_stride[comp_idx] + pos.x; + w_sum += weights[ws_off]; + sum += sums[ws_off]; + } + c_off = comp_off[comp_idx]; + r[c_off] = (sum + src[c_off] * 255) / (1.0 + w_sum) / 255; + } + } + imageStore(output_img[plane], pos, r); +} diff --git a/libavfilter/vulkan/nlmeans_horizontal.comp.glsl b/libavfilter/vulkan/nlmeans_horizontal.comp.glsl new file mode 100644 index 0000000000..d1bd62ccb1 --- /dev/null +++ b/libavfilter/vulkan/nlmeans_horizontal.comp.glsl @@ -0,0 +1,104 @@ +/* + * Copyright (c) Lynne + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#pragma shader_stage(compute) + +#extension GL_EXT_scalar_block_layout : require +#extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference2 : require +#extension GL_EXT_shader_explicit_arithmetic_types : require +#extension GL_ARB_gpu_shader_int64 : require + +/* Must be kept in sync with the definitions in vf_nlmeans_vulkan.c */ +#define DTYPE vec4 +#define T_ALIGN 16 +#define T_BLOCK_ELEMS 16 +#define T_BLOCK_ALIGN 256 + +layout (local_size_x_id = 253, local_size_y_id = 254, local_size_z_id = 255) in; + +layout (buffer_reference, buffer_reference_align = T_ALIGN, scalar) buffer DataBuffer { + DTYPE v[]; +}; + +struct Block { + DTYPE data[T_BLOCK_ELEMS]; +}; + +layout (buffer_reference, buffer_reference_align = T_BLOCK_ALIGN, scalar) buffer BlockBuffer { + Block v[]; +}; + +layout (push_constant, scalar) uniform pushConstants { + uvec4 width; + uvec4 height; + vec4 strength; + uvec4 comp_off; + uvec4 comp_plane; + DataBuffer integral_base; + uint64_t integral_size; + uint64_t int_stride; + uint xyoffs_start; + uint nb_components; +}; + +void main() +{ + uint64_t offset; + BlockBuffer b_dst; + Block block; + DTYPE s2; + DTYPE prefix_sum; + ivec2 pos; + int k; + int o; + + DataBuffer integral_data; + + uint c_plane; + + uint comp_idx = uint(gl_WorkGroupID.y); + uint invoc_idx = uint(gl_WorkGroupID.z); + + if (strength[comp_idx] == 0.0) + return; + + offset = integral_size * (invoc_idx * nb_components + comp_idx); + integral_data = DataBuffer(uint64_t(integral_base) + offset); + + c_plane = comp_plane[comp_idx]; + + pos.y = int(gl_GlobalInvocationID.x); + if (pos.y < height[c_plane]) { + prefix_sum = DTYPE(0); + offset = int_stride * uint64_t(pos.y); + b_dst = BlockBuffer(uint64_t(integral_data) + offset); + + for (k = 0; k * T_BLOCK_ELEMS < width[c_plane]; k++) { + block = b_dst.v[k]; + for (o = 0; o < T_BLOCK_ELEMS; o++) { + s2 = block.data[o]; + block.data[o] = s2 + prefix_sum; + prefix_sum += s2; + } + b_dst.v[k] = block; + } + } +} diff --git a/libavfilter/vulkan/nlmeans_vertical.comp.glsl b/libavfilter/vulkan/nlmeans_vertical.comp.glsl new file mode 100644 index 0000000000..d5842f4a16 --- /dev/null +++ b/libavfilter/vulkan/nlmeans_vertical.comp.glsl @@ -0,0 +1,122 @@ +/* + * Copyright (c) Lynne + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#pragma shader_stage(compute) + +#extension GL_EXT_shader_image_load_formatted : require +#extension GL_EXT_scalar_block_layout : require +#extension GL_EXT_nonuniform_qualifier : require +#extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference2 : require +#extension GL_EXT_shader_explicit_arithmetic_types : require +#extension GL_ARB_gpu_shader_int64 : require + +/* Must be kept in sync with the definitions in vf_nlmeans_vulkan.c */ +#define DTYPE vec4 +#define T_ALIGN 16 +#define T_BLOCK_ELEMS 16 +#define T_BLOCK_ALIGN 256 +#define TYPE_ELEMS 4 + +layout (local_size_x_id = 253, local_size_y_id = 254, local_size_z_id = 255) in; + +layout (buffer_reference, buffer_reference_align = T_ALIGN, scalar) buffer DataBuffer { + DTYPE v[]; +}; + +struct Block { + DTYPE data[T_BLOCK_ELEMS]; +}; + +layout (buffer_reference, buffer_reference_align = T_BLOCK_ALIGN, scalar) buffer BlockBuffer { + Block v[]; +}; + +layout (push_constant, scalar) uniform pushConstants { + uvec4 width; + uvec4 height; + vec4 strength; + uvec4 comp_off; + uvec4 comp_plane; + DataBuffer integral_base; + uint64_t integral_size; + uint64_t int_stride; + uint xyoffs_start; + uint nb_components; +}; + +layout (set = 0, binding = 0) uniform readonly image2D input_img[]; + +layout (set = 1, binding = 0, scalar) readonly buffer xyoffsets_buffer { + ivec2 xyoffsets[]; +}; + +void main() +{ + uint64_t offset; + DataBuffer dst; + float s1; + DTYPE s2; + DTYPE prefix_sum; + uvec2 size; + ivec2 pos; + ivec2 pos_off; + + DataBuffer integral_data; + ivec2 offs[TYPE_ELEMS]; + + uint c_off; + uint c_plane; + + uint comp_idx = uint(gl_WorkGroupID.y); + uint invoc_idx = uint(gl_WorkGroupID.z); + + if (strength[comp_idx] == 0.0) + return; + + offset = integral_size * (invoc_idx * nb_components + comp_idx); + integral_data = DataBuffer(uint64_t(integral_base) + offset); + for (uint i = 0; i < TYPE_ELEMS; i++) + offs[i] = xyoffsets[xyoffs_start + TYPE_ELEMS*invoc_idx + i]; + + c_off = comp_off[comp_idx]; + c_plane = comp_plane[comp_idx]; + size = imageSize(input_img[c_plane]); + + pos.x = int(gl_GlobalInvocationID.x); + if (pos.x < width[c_plane]) { + prefix_sum = DTYPE(0); + for (pos.y = 0; pos.y < height[c_plane]; pos.y++) { + offset = int_stride * uint64_t(pos.y); + dst = DataBuffer(uint64_t(integral_data) + offset); + s1 = imageLoad(input_img[c_plane], pos)[c_off]; + for (int i = 0; i < TYPE_ELEMS; i++) { + pos_off = pos + offs[i]; + if (any(greaterThanEqual(uvec2(pos_off), size))) + s2[i] = s1; + else + s2[i] = imageLoad(input_img[c_plane], pos_off)[c_off]; + } + s2 = (s1 - s2) * (s1 - s2); + dst.v[pos.x] = s2 + prefix_sum; + prefix_sum += s2; + } + } +} diff --git a/libavfilter/vulkan/nlmeans_weights.comp.glsl b/libavfilter/vulkan/nlmeans_weights.comp.glsl new file mode 100644 index 0000000000..24c918bd0a --- /dev/null +++ b/libavfilter/vulkan/nlmeans_weights.comp.glsl @@ -0,0 +1,144 @@ +/* + * Copyright (c) Lynne + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#pragma shader_stage(compute) + +#extension GL_EXT_shader_image_load_formatted : require +#extension GL_EXT_scalar_block_layout : require +#extension GL_EXT_nonuniform_qualifier : require +#extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference2 : require +#extension GL_EXT_shader_explicit_arithmetic_types : require +#extension GL_ARB_gpu_shader_int64 : require + +/* Must be kept in sync with the definitions in vf_nlmeans_vulkan.c */ +#define DTYPE vec4 +#define T_ALIGN 16 +#define TYPE_ELEMS 4 + +layout (local_size_x_id = 253, local_size_y_id = 254, local_size_z_id = 255) in; + +layout (buffer_reference, buffer_reference_align = T_ALIGN, scalar) buffer DataBuffer { + DTYPE v[]; +}; + +layout (push_constant, scalar) uniform pushConstants { + uvec4 width; + uvec4 height; + uvec4 ws_offset; + uvec4 ws_stride; + ivec4 patch_size; + vec4 strength; + uvec4 comp_off; + uvec4 comp_plane; + DataBuffer integral_base; + uint64_t integral_size; + uint64_t int_stride; + uint xyoffs_start; + uint ws_count; + uint nb_components; +}; + +layout (set = 0, binding = 0) uniform readonly image2D input_img[]; + +layout (set = 0, binding = 1, scalar) buffer weights_buffer { + float weights[]; +}; + +layout (set = 0, binding = 2, scalar) buffer sums_buffer { + float sums[]; +}; + +layout (set = 1, binding = 0, scalar) readonly buffer xyoffsets_buffer { + ivec2 xyoffsets[]; +}; + +void main() +{ + uint64_t offset; + DataBuffer dst; + uvec2 size; + ivec2 pos; + ivec2 pos_off; + int p; + float s; + + DataBuffer integral_data; + ivec2 offs[TYPE_ELEMS]; + + uint c_off; + uint c_plane; + uint ws_off; + + pos = ivec2(gl_GlobalInvocationID.xy); + uint comp_idx = uint(gl_WorkGroupID.z) % nb_components; + uint invoc_idx = uint(gl_WorkGroupID.z) / nb_components; + + c_off = comp_off[comp_idx]; + c_plane = comp_plane[comp_idx]; + p = patch_size[comp_idx]; + s = strength[comp_idx]; + if (s == 0.0 || pos.x < p || pos.y < p || pos.x >= width[c_plane] - p || pos.y >= height[c_plane] - p) + return; + + offset = integral_size * (invoc_idx * nb_components + comp_idx); + integral_data = DataBuffer(uint64_t(integral_base) + offset); + for (uint i = 0; i < TYPE_ELEMS; i++) + offs[i] = xyoffsets[xyoffs_start + TYPE_ELEMS*invoc_idx + i]; + + ws_off = ws_count * invoc_idx + ws_offset[comp_idx] + pos.y * ws_stride[comp_idx] + pos.x; + size = imageSize(input_img[c_plane]); + + DTYPE a; + DTYPE b; + DTYPE c; + DTYPE d; + + DTYPE patch_diff; + vec4 src; + vec4 w; + float w_sum; + float sum; + + for (int i = 0; i < 4; i++) { + pos_off = pos + offs[i]; + if (any(greaterThanEqual(uvec2(pos_off), size))) + src[i] = imageLoad(input_img[c_plane], pos)[c_off]; + else + src[i] = imageLoad(input_img[c_plane], pos_off)[c_off]; + } + + offset = int_stride * uint64_t(pos.y - p); + dst = DataBuffer(uint64_t(integral_data) + offset); + a = dst.v[pos.x - p]; + c = dst.v[pos.x + p]; + offset = int_stride * uint64_t(pos.y + p); + dst = DataBuffer(uint64_t(integral_data) + offset); + b = dst.v[pos.x - p]; + d = dst.v[pos.x + p]; + + patch_diff = d + a - b - c; + w = exp(patch_diff * s); + w_sum = w[0] + w[1] + w[2] + w[3]; + sum = dot(w, src * 255); + + weights[ws_off] += w_sum; + sums[ws_off] += sum; +} -- 2.52.0 >From f48c81e5fea86531322ec95cfaaedd610ba57805 Mon Sep 17 00:00:00 2001 From: Lynne <[email protected]> Date: Tue, 21 Apr 2026 10:00:32 +0200 Subject: [PATCH 2/2] vf_blackdetect_vulkan: port to compile-time SPIR-V generation --- configure | 2 +- libavfilter/vf_blackdetect_vulkan.c | 119 ++++++++--------------- libavfilter/vulkan/Makefile | 1 + libavfilter/vulkan/blackdetect.comp.glsl | 64 ++++++++++++ 4 files changed, 109 insertions(+), 77 deletions(-) create mode 100644 libavfilter/vulkan/blackdetect.comp.glsl diff --git a/configure b/configure index d953074c89..32c9aacc62 100755 --- a/configure +++ b/configure @@ -4149,7 +4149,7 @@ ass_filter_deps="libass" avgblur_opencl_filter_deps="opencl" avgblur_vulkan_filter_deps="vulkan spirv_compiler" azmq_filter_deps="libzmq" -blackdetect_vulkan_filter_deps="vulkan spirv_library" +blackdetect_vulkan_filter_deps="vulkan spirv_compiler" blackframe_filter_deps="gpl" blend_vulkan_filter_deps="vulkan spirv_compiler" boxblur_filter_deps="gpl" diff --git a/libavfilter/vf_blackdetect_vulkan.c b/libavfilter/vf_blackdetect_vulkan.c index 279b057148..3abe2f9fb3 100644 --- a/libavfilter/vf_blackdetect_vulkan.c +++ b/libavfilter/vf_blackdetect_vulkan.c @@ -19,13 +19,14 @@ */ #include <float.h> -#include "libavutil/vulkan_spirv.h" #include "libavutil/opt.h" #include "libavutil/timestamp.h" #include "vulkan_filter.h" #include "filters.h" -#include "video.h" + +extern const unsigned char ff_blackdetect_comp_spv_data[]; +extern const unsigned int ff_blackdetect_comp_spv_len; typedef struct BlackDetectVulkanContext { FFVulkanContext vkctx; @@ -36,12 +37,16 @@ typedef struct BlackDetectVulkanContext { FFVulkanShader shd; AVBufferPool *sum_buf_pool; - double black_min_duration_time; - double picture_black_ratio_th; - double pixel_black_th; - int alpha; + double picture_black_ratio_th; + double pixel_black_th; + int alpha; - int64_t black_start; + int black_started; + int64_t black_start; ///< pts start time of the first black picture + int64_t black_end; ///< pts end time of the last black picture + double black_min_duration_time; ///< minimum duration of detected black, in seconds + int64_t black_min_duration; ///< minimum duration of detected black, expressed in timebase units + AVRational time_base; } BlackDetectVulkanContext; typedef struct BlackDetectPushData { @@ -56,14 +61,9 @@ typedef struct BlackDetectBuf { static av_cold int init_filter(AVFilterContext *ctx) { int err; - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; BlackDetectVulkanContext *s = ctx->priv; FFVulkanContext *vkctx = &s->vkctx; - FFVulkanShader *shd; - FFVkSPIRVCompiler *spv; - FFVulkanDescriptorSetBinding *desc; + const AVFilterLink *inlink = ctx->inputs[0]; const int plane = s->alpha ? 3 : 0; const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(s->vkctx.input_format); @@ -72,12 +72,6 @@ static av_cold int init_filter(AVFilterContext *ctx) return AVERROR(ENOTSUP); } - spv = ff_vk_spirv_init(); - if (!spv) { - av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); - return AVERROR_EXTERNAL; - } - s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); if (!s->qf) { av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n"); @@ -86,89 +80,58 @@ static av_cold int init_filter(AVFilterContext *ctx) } RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); - RET(ff_vk_shader_init(vkctx, &s->shd, "blackdetect", - VK_SHADER_STAGE_COMPUTE_BIT, - (const char *[]) { "GL_KHR_shader_subgroup_ballot" }, 1, - 32, 32, 1, - 0)); - shd = &s->shd; - GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); - GLSLC(1, float threshold; ); - GLSLC(0, }; ); + SPEC_LIST_CREATE(sl, 2, 2*sizeof(uint32_t)) + SPEC_LIST_ADD(sl, 0, 32, plane); + SPEC_LIST_ADD(sl, 1, 32, SLICES); - ff_vk_shader_add_push_const(shd, 0, sizeof(BlackDetectPushData), + ff_vk_shader_load(&s->shd, VK_SHADER_STAGE_COMPUTE_BIT, sl, + (int []) { 32, 32, 1 }, 0); + + ff_vk_shader_add_push_const(&s->shd, 0, sizeof(BlackDetectPushData), VK_SHADER_STAGE_COMPUTE_BIT); - desc = (FFVulkanDescriptorSetBinding []) { - { - .name = "input_img", + const FFVulkanDescriptorSetBinding desc[] = { + { /* input_img */ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), - .mem_quali = "readonly", - .dimensions = 2, - .elems = av_pix_fmt_count_planes(s->vkctx.input_format), .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, { - .name = "sum_buffer", + .elems = av_pix_fmt_count_planes(s->vkctx.input_format), + }, + { /* sum_buffer */ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "uint slice_sum[];", } }; + ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 2, 0, 0); - RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 2, 0, 0)); - - GLSLC(0, shared uint wg_sum; ); - GLSLC(0, ); - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, wg_sum = 0u; ); - GLSLC(1, barrier(); ); - GLSLC(0, ); - GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - GLSLF(1, if (!IS_WITHIN(pos, imageSize(input_img[%d]))) ,plane); - GLSLC(2, return; ); - GLSLF(1, float value = imageLoad(input_img[%d], pos).x; ,plane); - GLSLC(1, uvec4 isblack = subgroupBallot(value <= threshold); ); - GLSLC(1, if (subgroupElect()) ); - GLSLC(2, atomicAdd(wg_sum, subgroupBallotBitCount(isblack)); ); - GLSLC(1, barrier(); ); - GLSLC(1, if (gl_LocalInvocationIndex == 0u) ); - GLSLF(2, atomicAdd(slice_sum[gl_WorkGroupID.x %% %du], wg_sum); ,SLICES); - GLSLC(0, } ); - - RET(spv->compile_shader(vkctx, spv, &s->shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(vkctx, &s->shd, spv_data, spv_len, "main")); + RET(ff_vk_shader_link(vkctx, &s->shd, + ff_blackdetect_comp_spv_data, + ff_blackdetect_comp_spv_len, "main")); RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd)); + s->time_base = inlink->time_base; + s->black_min_duration = s->black_min_duration_time / av_q2d(s->time_base); s->black_start = AV_NOPTS_VALUE; s->initialized = 1; fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - if (spv) - spv->uninit(&spv); - return err; } static void report_black_region(AVFilterContext *ctx, int64_t black_end) { BlackDetectVulkanContext *s = ctx->priv; - const AVFilterLink *inlink = ctx->inputs[0]; + if (s->black_start == AV_NOPTS_VALUE) return; - if ((black_end - s->black_start) >= s->black_min_duration_time / av_q2d(inlink->time_base)) { + if ((black_end - s->black_start) >= s->black_min_duration) { av_log(ctx, AV_LOG_INFO, "black_start:%s black_end:%s black_duration:%s\n", - av_ts2timestr(s->black_start, &inlink->time_base), - av_ts2timestr(black_end, &inlink->time_base), - av_ts2timestr(black_end - s->black_start, &inlink->time_base)); + av_ts2timestr(s->black_start, &s->time_base), + av_ts2timestr(black_end, &s->time_base), + av_ts2timestr(black_end - s->black_start, &s->time_base)); } } @@ -359,11 +322,15 @@ fail: static void blackdetect_vulkan_uninit(AVFilterContext *avctx) { BlackDetectVulkanContext *s = avctx->priv; - AVFilterLink *inlink = avctx->inputs[0]; - FilterLink *inl = ff_filter_link(inlink); FFVulkanContext *vkctx = &s->vkctx; - report_black_region(avctx, inl->current_pts); + /* avctx->inputs[0] is NULL if the filter is freed before its input was + * ever linked (e.g. invalid options abort filter creation). s->initialized + * guarantees a frame was processed, so the input link is valid. */ + if (s->initialized) { + FilterLink *inl = ff_filter_link(avctx->inputs[0]); + report_black_region(avctx, inl->current_pts); + } ff_vk_exec_pool_free(vkctx, &s->e); ff_vk_shader_free(vkctx, &s->shd); diff --git a/libavfilter/vulkan/Makefile b/libavfilter/vulkan/Makefile index cd303e535e..2cfe9cfa93 100644 --- a/libavfilter/vulkan/Makefile +++ b/libavfilter/vulkan/Makefile @@ -2,6 +2,7 @@ clean:: $(RM) $(CLEANSUFFIXES:%=libavfilter/vulkan/%) OBJS-$(CONFIG_AVGBLUR_VULKAN_FILTER) += vulkan/avgblur.comp.spv.o +OBJS-$(CONFIG_BLACKDETECT_VULKAN_FILTER) += vulkan/blackdetect.comp.spv.o OBJS-$(CONFIG_BLEND_VULKAN_FILTER) += vulkan/blend.comp.spv.o OBJS-$(CONFIG_BWDIF_VULKAN_FILTER) += vulkan/bwdif.comp.spv.o OBJS-$(CONFIG_CHROMABER_VULKAN_FILTER) += vulkan/chromaber.comp.spv.o diff --git a/libavfilter/vulkan/blackdetect.comp.glsl b/libavfilter/vulkan/blackdetect.comp.glsl new file mode 100644 index 0000000000..21e7601060 --- /dev/null +++ b/libavfilter/vulkan/blackdetect.comp.glsl @@ -0,0 +1,64 @@ +/* + * Copyright 2025 (c) Niklas Haas + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#pragma shader_stage(compute) + +#extension GL_EXT_shader_image_load_formatted : require +#extension GL_EXT_scalar_block_layout : require +#extension GL_EXT_nonuniform_qualifier : require +#extension GL_KHR_shader_subgroup_ballot : require +#extension GL_EXT_null_initializer : require + +layout (constant_id = 0) const uint plane = 0; +layout (constant_id = 1) const uint slices = 0; + +layout (local_size_x_id = 253, local_size_y_id = 254, local_size_z_id = 255) in; + +layout (set = 0, binding = 0) uniform readonly image2D input_img[]; +layout (set = 0, binding = 1, scalar) buffer sum_buffer { + uint slice_sum[]; +}; + +layout (push_constant, scalar) uniform pushConstants { + float threshold; +}; + +shared uint wg_sum = { }; + +void main() +{ + ivec2 pos = ivec2(gl_GlobalInvocationID.xy); + + /* oob invocs still must reach the barrier, but must'nt + * get counted in, threshold is positive, so the fake value of 0.0 would + * otherwise be counted as black. */ + bool in_bounds = all(lessThan(pos, imageSize(input_img[plane]))); + float value = 0.0f; + if (in_bounds) + value = imageLoad(input_img[plane], pos).x; + + uvec4 isblack = subgroupBallot(in_bounds && value <= threshold); + if (subgroupElect()) + atomicAdd(wg_sum, subgroupBallotBitCount(isblack)); + + barrier(); + if (gl_LocalInvocationIndex == 0) + atomicAdd(slice_sum[gl_WorkGroupID.x % slices], wg_sum); +} -- 2.52.0 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
