This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit e27b510da88cfed9c46161473d5f987eaf599983 Author: Lynne <[email protected]> AuthorDate: Fri Jan 2 13:41:27 2026 +0100 Commit: Lynne <[email protected]> CommitDate: Mon Jan 12 17:28:42 2026 +0100 vulkan_prores: generate SPIR-V at compile-time --- configure | 2 +- libavcodec/vulkan/Makefile | 5 +- libavcodec/vulkan/dct.comp | 12 +- .../{prores_idct.comp => prores_idct.comp.glsl} | 50 +++++-- .../{prores_vld.comp => prores_vld.comp.glsl} | 98 +++++++++---- libavcodec/vulkan_prores.c | 157 +++++---------------- 6 files changed, 147 insertions(+), 177 deletions(-) diff --git a/configure b/configure index 0086769956..e893d8aa04 100755 --- a/configure +++ b/configure @@ -3398,7 +3398,7 @@ prores_videotoolbox_hwaccel_deps="videotoolbox" prores_videotoolbox_hwaccel_select="prores_decoder" prores_raw_vulkan_hwaccel_deps="vulkan" prores_raw_vulkan_hwaccel_select="prores_raw_decoder" -prores_vulkan_hwaccel_deps="vulkan spirv_library" +prores_vulkan_hwaccel_deps="vulkan" prores_vulkan_hwaccel_select="prores_decoder" vc1_d3d11va_hwaccel_deps="d3d11va" vc1_d3d11va_hwaccel_select="vc1_decoder" diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile index 465a8579fd..c0432e75cc 100644 --- a/libavcodec/vulkan/Makefile +++ b/libavcodec/vulkan/Makefile @@ -15,9 +15,8 @@ OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/common.o \ OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL) += vulkan/prores_raw_decode.comp.spv.o \ vulkan/prores_raw_idct.comp.spv.o -OBJS-$(CONFIG_PRORES_VULKAN_HWACCEL) += vulkan/common.o vulkan/dct.o \ - vulkan/prores_vld.o \ - vulkan/prores_idct.o +OBJS-$(CONFIG_PRORES_VULKAN_HWACCEL) += vulkan/prores_vld.comp.spv.o \ + vulkan/prores_idct.comp.spv.o OBJS-$(CONFIG_DPX_VULKAN_HWACCEL) += vulkan/common.o \ vulkan/dpx_unpack.o \ diff --git a/libavcodec/vulkan/dct.comp b/libavcodec/vulkan/dct.comp index 8e887c2023..177663320a 100644 --- a/libavcodec/vulkan/dct.comp +++ b/libavcodec/vulkan/dct.comp @@ -34,16 +34,8 @@ #ifndef VULKAN_DCT_H #define VULKAN_DCT_H -#ifndef NB_BLOCKS -#define NB_BLOCKS 1 -#endif - -#ifndef NB_COMPONENTS -#define NB_COMPONENTS 1 -#endif - -layout (constant_id = 16) const uint32_t nb_blocks = NB_BLOCKS; -layout (constant_id = 17) const uint32_t nb_components = NB_COMPONENTS; +layout (constant_id = 16) const uint32_t nb_blocks = 1; +layout (constant_id = 17) const uint32_t nb_components = 1; /* Padded by 1 row to avoid bank conflicts */ shared float blocks[nb_blocks][nb_components*8*(8 + 1)]; diff --git a/libavcodec/vulkan/prores_idct.comp b/libavcodec/vulkan/prores_idct.comp.glsl similarity index 73% rename from libavcodec/vulkan/prores_idct.comp rename to libavcodec/vulkan/prores_idct.comp.glsl index 25431d61c1..8d0e246025 100644 --- a/libavcodec/vulkan/prores_idct.comp +++ b/libavcodec/vulkan/prores_idct.comp.glsl @@ -16,22 +16,52 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#version 460 +#pragma shader_stage(compute) +#extension GL_GOOGLE_include_directive : require + +#include "common.comp" +#include "dct.comp" + +layout (constant_id = 0) const bool interlaced = false; + +layout (set = 0, binding = 0) readonly buffer quant_idx_buf { + uint8_t quant_idx[]; +}; +layout (set = 0, binding = 1) uniform uimage2D dst[]; + +layout (push_constant, scalar) uniform pushConstants { + u8buf slice_data; + uint bitstream_size; + + uint16_t width; + uint16_t height; + uint16_t mb_width; + uint16_t mb_height; + uint16_t slice_width; + uint16_t slice_height; + uint8_t log2_slice_width; + uint8_t log2_chroma_w; + uint8_t depth; + uint8_t alpha_info; + uint8_t bottom_field; + + uint8_t qmat_luma [8*8]; + uint8_t qmat_chroma[8*8]; +}; + uint get_px(uint tex_idx, ivec2 pos) { -#ifndef INTERLACED - return imageLoad(dst[tex_idx], pos).x; -#else - return imageLoad(dst[tex_idx], ivec2(pos.x, (pos.y << 1) + bottom_field)).x; -#endif + if (interlaced) + pos = ivec2(pos.x, (pos.y << 1) + bottom_field); + return uint(imageLoad(dst[nonuniformEXT(tex_idx)], pos).x); } void put_px(uint tex_idx, ivec2 pos, uint v) { -#ifndef INTERLACED - imageStore(dst[tex_idx], pos, uvec4(v)); -#else - imageStore(dst[tex_idx], ivec2(pos.x, (pos.y << 1) + bottom_field), uvec4(v)); -#endif + if (interlaced) + pos = ivec2(pos.x, (pos.y << 1) + bottom_field); + imageStore(dst[nonuniformEXT(tex_idx)], pos, uvec4(v)); } void main(void) diff --git a/libavcodec/vulkan/prores_vld.comp b/libavcodec/vulkan/prores_vld.comp.glsl similarity index 79% rename from libavcodec/vulkan/prores_vld.comp rename to libavcodec/vulkan/prores_vld.comp.glsl index ab0dbf0116..a22f7fed77 100644 --- a/libavcodec/vulkan/prores_vld.comp +++ b/libavcodec/vulkan/prores_vld.comp.glsl @@ -16,6 +16,43 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#version 460 +#pragma shader_stage(compute) +#extension GL_GOOGLE_include_directive : require + +#define GET_BITS_SMEM 4 +#include "common.comp" + +layout (constant_id = 0) const bool interlaced = false; + +layout (set = 0, binding = 0) readonly buffer slice_offsets_buf { + uint32_t slice_offsets[]; +}; +layout (set = 0, binding = 1) writeonly buffer quant_idx_buf { + uint8_t quant_idx[]; +}; +layout (set = 0, binding = 2) uniform writeonly uimage2D dst[]; + +layout (push_constant, scalar) uniform pushConstants { + u8buf slice_data; + uint bitstream_size; + + uint16_t width; + uint16_t height; + uint16_t mb_width; + uint16_t mb_height; + uint16_t slice_width; + uint16_t slice_height; + uint8_t log2_slice_width; + uint8_t log2_chroma_w; + uint8_t depth; + uint8_t alpha_info; + uint8_t bottom_field; + + uint8_t qmat_luma [8*8]; + uint8_t qmat_chroma[8*8]; +}; + /** * Table 9, encoded as (last_rice_q << 0) | (krice or kexp << 4) | ((kexp or kexp + 1) << 8) * According to the SMPTE document, abs(prev_dc_diff) should be used @@ -36,31 +73,29 @@ const uint16_t k_ac_level_codebook[] = { U16(0x202), U16(0x101), U16(0x102), U16 U16(0x210), U16(0x210), U16(0x210), U16(0x210), U16(0x320) }; -#ifndef INTERLACED - /* Figure 4, encoded as (x << 0) | (y << 4) */ - const uint8_t k_scan_tbl[] = { - U8(0x00), U8(0x01), U8(0x10), U8(0x11), U8(0x02), U8(0x03), U8(0x12), U8(0x13), - U8(0x20), U8(0x21), U8(0x30), U8(0x31), U8(0x22), U8(0x23), U8(0x32), U8(0x33), - U8(0x04), U8(0x05), U8(0x14), U8(0x24), U8(0x15), U8(0x06), U8(0x07), U8(0x16), - U8(0x25), U8(0x34), U8(0x35), U8(0x26), U8(0x17), U8(0x27), U8(0x36), U8(0x37), - U8(0x40), U8(0x41), U8(0x50), U8(0x60), U8(0x51), U8(0x42), U8(0x43), U8(0x52), - U8(0x61), U8(0x70), U8(0x71), U8(0x62), U8(0x53), U8(0x44), U8(0x45), U8(0x54), - U8(0x63), U8(0x72), U8(0x73), U8(0x64), U8(0x55), U8(0x46), U8(0x47), U8(0x56), - U8(0x65), U8(0x74), U8(0x75), U8(0x66), U8(0x57), U8(0x67), U8(0x76), U8(0x77), - }; -#else - /* Figure 5 */ - const uint8_t k_scan_tbl[] = { - U8(0x00), U8(0x10), U8(0x01), U8(0x11), U8(0x20), U8(0x30), U8(0x21), U8(0x31), - U8(0x02), U8(0x12), U8(0x03), U8(0x13), U8(0x22), U8(0x32), U8(0x23), U8(0x33), - U8(0x40), U8(0x50), U8(0x41), U8(0x42), U8(0x51), U8(0x60), U8(0x70), U8(0x61), - U8(0x52), U8(0x43), U8(0x53), U8(0x62), U8(0x71), U8(0x72), U8(0x63), U8(0x73), - U8(0x04), U8(0x14), U8(0x05), U8(0x06), U8(0x15), U8(0x24), U8(0x34), U8(0x25), - U8(0x16), U8(0x07), U8(0x17), U8(0x26), U8(0x35), U8(0x44), U8(0x54), U8(0x45), - U8(0x36), U8(0x27), U8(0x37), U8(0x46), U8(0x55), U8(0x64), U8(0x74), U8(0x65), - U8(0x56), U8(0x47), U8(0x57), U8(0x66), U8(0x75), U8(0x76), U8(0x67), U8(0x77), - }; -#endif +/* Figure 4, encoded as (x << 0) | (y << 4) */ +const uint8_t k_scan_tbl[] = { + U8(0x00), U8(0x01), U8(0x10), U8(0x11), U8(0x02), U8(0x03), U8(0x12), U8(0x13), + U8(0x20), U8(0x21), U8(0x30), U8(0x31), U8(0x22), U8(0x23), U8(0x32), U8(0x33), + U8(0x04), U8(0x05), U8(0x14), U8(0x24), U8(0x15), U8(0x06), U8(0x07), U8(0x16), + U8(0x25), U8(0x34), U8(0x35), U8(0x26), U8(0x17), U8(0x27), U8(0x36), U8(0x37), + U8(0x40), U8(0x41), U8(0x50), U8(0x60), U8(0x51), U8(0x42), U8(0x43), U8(0x52), + U8(0x61), U8(0x70), U8(0x71), U8(0x62), U8(0x53), U8(0x44), U8(0x45), U8(0x54), + U8(0x63), U8(0x72), U8(0x73), U8(0x64), U8(0x55), U8(0x46), U8(0x47), U8(0x56), + U8(0x65), U8(0x74), U8(0x75), U8(0x66), U8(0x57), U8(0x67), U8(0x76), U8(0x77), +}; + +/* Figure 5 */ +const uint8_t k_scan_tbl_interlaced[] = { + U8(0x00), U8(0x10), U8(0x01), U8(0x11), U8(0x20), U8(0x30), U8(0x21), U8(0x31), + U8(0x02), U8(0x12), U8(0x03), U8(0x13), U8(0x22), U8(0x32), U8(0x23), U8(0x33), + U8(0x40), U8(0x50), U8(0x41), U8(0x42), U8(0x51), U8(0x60), U8(0x70), U8(0x61), + U8(0x52), U8(0x43), U8(0x53), U8(0x62), U8(0x71), U8(0x72), U8(0x63), U8(0x73), + U8(0x04), U8(0x14), U8(0x05), U8(0x06), U8(0x15), U8(0x24), U8(0x34), U8(0x25), + U8(0x16), U8(0x07), U8(0x17), U8(0x26), U8(0x35), U8(0x44), U8(0x54), U8(0x45), + U8(0x36), U8(0x27), U8(0x37), U8(0x46), U8(0x55), U8(0x64), U8(0x74), U8(0x65), + U8(0x56), U8(0x47), U8(0x57), U8(0x66), U8(0x75), U8(0x76), U8(0x67), U8(0x77), +}; shared uint16_t dc_codebook [k_dc_codebook .length()], ac_run_codebook [k_ac_run_codebook .length()], @@ -70,11 +105,9 @@ shared uint8_t scan_tbl[k_scan_tbl.length()]; void put_px(uint tex_idx, ivec2 pos, uint v) { -#ifndef INTERLACED - imageStore(dst[tex_idx], pos, uvec4(uint16_t(v))); -#else - imageStore(dst[tex_idx], ivec2(pos.x, (pos.y << 1) + bottom_field), uvec4(uint16_t(v))); -#endif + if (interlaced) + pos = ivec2(pos.x, (pos.y << 1) + bottom_field); + imageStore(dst[nonuniformEXT(tex_idx)], pos, uvec4(uint16_t(v))); } /* 7.5.3 Pixel Arrangement */ @@ -287,7 +320,10 @@ void main(void) ac_run_codebook = k_ac_run_codebook; ac_level_codebook = k_ac_level_codebook; - scan_tbl = k_scan_tbl; + if (!interlaced) + scan_tbl = k_scan_tbl; + else + scan_tbl = k_scan_tbl_interlaced; /** * 4 ProRes Frame Structure diff --git a/libavcodec/vulkan_prores.c b/libavcodec/vulkan_prores.c index 7e7c2ace9c..08ef206395 100644 --- a/libavcodec/vulkan_prores.c +++ b/libavcodec/vulkan_prores.c @@ -21,12 +21,12 @@ #include "hwaccel_internal.h" #include "libavutil/mem.h" #include "libavutil/vulkan.h" -#include "libavutil/vulkan_spirv.h" -extern const char *ff_source_common_comp; -extern const char *ff_source_dct_comp; -extern const char *ff_source_prores_vld_comp; -extern const char *ff_source_prores_idct_comp; +extern const unsigned char ff_prores_vld_comp_spv_data[]; +extern const unsigned int ff_prores_vld_comp_spv_len; + +extern const unsigned char ff_prores_idct_comp_spv_data[]; +extern const unsigned int ff_prores_idct_comp_spv_len; const FFVulkanDecodeDescriptor ff_vk_dec_prores_desc = { .codec_id = AV_CODEC_ID_PRORES, @@ -342,171 +342,95 @@ fail: return err; } -static int add_push_data(FFVulkanShader *shd) -{ - GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); - GLSLC(1, u8buf slice_data; ); - GLSLC(1, uint bitstream_size; ); - GLSLC(0, ); - GLSLC(1, uint16_t width; ); - GLSLC(1, uint16_t height; ); - GLSLC(1, uint16_t mb_width; ); - GLSLC(1, uint16_t mb_height; ); - GLSLC(1, uint16_t slice_width; ); - GLSLC(1, uint16_t slice_height; ); - GLSLC(1, uint8_t log2_slice_width; ); - GLSLC(1, uint8_t log2_chroma_w; ); - GLSLC(1, uint8_t depth; ); - GLSLC(1, uint8_t alpha_info; ); - GLSLC(1, uint8_t bottom_field; ); - GLSLC(0, ); - GLSLC(1, uint8_t qmat_luma [8*8]; ); - GLSLC(1, uint8_t qmat_chroma[8*8]; ); - GLSLC(0, }; ); - - return ff_vk_shader_add_push_const(shd, 0, sizeof(ProresVkParameters), - VK_SHADER_STAGE_COMPUTE_BIT); -} - static int init_decode_shader(AVCodecContext *avctx, FFVulkanContext *s, - FFVkExecPool *pool, FFVkSPIRVCompiler *spv, - FFVulkanShader *shd, int max_num_mbs, - int interlaced) + FFVkExecPool *pool, FFVulkanShader *shd, + int max_num_mbs, int interlaced) { int err; AVHWFramesContext *dec_frames_ctx; dec_frames_ctx = (AVHWFramesContext *)avctx->hw_frames_ctx->data; - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; - - RET(ff_vk_shader_init(s, shd, "prores_vld", - VK_SHADER_STAGE_COMPUTE_BIT, - (const char *[]) { "GL_EXT_buffer_reference", - "GL_EXT_buffer_reference2" }, 2, - 8, 8, 1, - 0)); - - av_bprintf(&shd->src, "#define GET_BITS_SMEM %d\n", 4); + SPEC_LIST_CREATE(sl, 1, 1*sizeof(uint32_t)) + SPEC_LIST_ADD(sl, 0, 32, interlaced); - if (interlaced) - av_bprintf(&shd->src, "#define INTERLACED\n"); + ff_vk_shader_load(shd, + VK_SHADER_STAGE_COMPUTE_BIT, sl, + (uint32_t []) { 8, 8, 1 }, 0); - /* Common codec header */ - GLSLD(ff_source_common_comp); - - RET(add_push_data(shd)); + ff_vk_shader_add_push_const(shd, 0, sizeof(ProresVkParameters), + VK_SHADER_STAGE_COMPUTE_BIT); FFVulkanDescriptorSetBinding desc_set[] = { { .name = "slice_offsets_buf", .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_quali = "readonly", - .buf_content = "uint32_t slice_offsets", - .buf_elems = max_num_mbs + 1, }, { .name = "quant_idx_buf", .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_quali = "writeonly", - .buf_content = "uint8_t quant_idx", - .buf_elems = max_num_mbs, }, { .name = "dst", .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .dimensions = 2, - .mem_layout = ff_vk_shader_rep_fmt(dec_frames_ctx->sw_format, - FF_VK_REP_NATIVE), - .mem_quali = "writeonly", - .elems = av_pix_fmt_count_planes(dec_frames_ctx->sw_format), .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .elems = av_pix_fmt_count_planes(dec_frames_ctx->sw_format), }, }; - RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 0, 0)); + ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 0, 0); - GLSLD(ff_source_prores_vld_comp); - - RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main")); + RET(ff_vk_shader_link(s, shd, + ff_prores_vld_comp_spv_data, + ff_prores_vld_comp_spv_len, "main")); RET(ff_vk_shader_register_exec(s, pool, shd)); fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - return 0; } static int init_idct_shader(AVCodecContext *avctx, FFVulkanContext *s, - FFVkExecPool *pool, FFVkSPIRVCompiler *spv, - FFVulkanShader *shd, int max_num_mbs, - int interlaced) + FFVkExecPool *pool, FFVulkanShader *shd, + int max_num_mbs, int interlaced) { int err; AVHWFramesContext *dec_frames_ctx; dec_frames_ctx = (AVHWFramesContext *)avctx->hw_frames_ctx->data; - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; + SPEC_LIST_CREATE(sl, 2, 2*sizeof(uint32_t)) + SPEC_LIST_ADD(sl, 0, 32, interlaced); + SPEC_LIST_ADD(sl, 16, 32, 4*2); /* nb_blocks */ - RET(ff_vk_shader_init(s, shd, "prores_idct", - VK_SHADER_STAGE_COMPUTE_BIT, - (const char *[]) { "GL_EXT_buffer_reference", - "GL_EXT_buffer_reference2" }, 2, - 32, 2, 1, - 0)); + ff_vk_shader_load(shd, + VK_SHADER_STAGE_COMPUTE_BIT, sl, + (uint32_t []) { 32, 2, 1 }, 0); - if (interlaced) - av_bprintf(&shd->src, "#define INTERLACED\n"); - - /* Common codec header */ - GLSLD(ff_source_common_comp); - - RET(add_push_data(shd)); + ff_vk_shader_add_push_const(shd, 0, sizeof(ProresVkParameters), + VK_SHADER_STAGE_COMPUTE_BIT); FFVulkanDescriptorSetBinding desc_set[] = { { .name = "quant_idx_buf", .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_quali = "readonly", - .buf_content = "uint8_t quant_idx", - .buf_elems = max_num_mbs, }, { .name = "dst", .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .dimensions = 2, - .mem_layout = ff_vk_shader_rep_fmt(dec_frames_ctx->sw_format, - FF_VK_REP_NATIVE), - .elems = av_pix_fmt_count_planes(dec_frames_ctx->sw_format), .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .elems = av_pix_fmt_count_planes(dec_frames_ctx->sw_format), }, }; RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 0, 0)); - GLSLC(0, #define NB_BLOCKS 4*2); - GLSLD(ff_source_dct_comp); - - GLSLD(ff_source_prores_idct_comp); - - RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main")); + RET(ff_vk_shader_link(s, shd, + ff_prores_idct_comp_spv_data, + ff_prores_idct_comp_spv_len, "main")); RET(ff_vk_shader_register_exec(s, pool, shd)); fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - return 0; } @@ -529,17 +453,10 @@ static int vk_decode_prores_init(AVCodecContext *avctx) ProresContext *pr = avctx->priv_data; ProresVulkanDecodeContext *pv; - FFVkSPIRVCompiler *spv; int max_num_mbs, err; max_num_mbs = (avctx->coded_width >> 4) * (avctx->coded_height >> 4); - spv = ff_vk_spirv_init(); - if (!spv) { - av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); - return AVERROR_EXTERNAL; - } - err = ff_vk_decode_init(avctx); if (err < 0) return err; @@ -554,15 +471,11 @@ static int vk_decode_prores_init(AVCodecContext *avctx) ctx->sd_ctx_free = vk_decode_prores_uninit; RET(init_decode_shader(avctx, &ctx->s, &ctx->exec_pool, - spv, &pv->vld, max_num_mbs, pr->frame_type != 0)); + &pv->vld, max_num_mbs, pr->frame_type != 0)); RET(init_idct_shader(avctx, &ctx->s, &ctx->exec_pool, - spv, &pv->idct, max_num_mbs, pr->frame_type != 0)); - - err = 0; + &pv->idct, max_num_mbs, pr->frame_type != 0)); fail: - spv->uninit(&spv); - return err; } _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
