This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit 23ab1b1a66ed775bd4aee0396bc0eadd4daef077 Author: Lynne <[email protected]> AuthorDate: Sat Jan 3 15:44:26 2026 +0100 Commit: Lynne <[email protected]> CommitDate: Mon Jan 12 17:28:42 2026 +0100 vulkan/dct: embed DCT scaling values during SPIR-V generation Instead of relying on rounded off values, use specialization constants to bake the DCT values into the shader when its compiled. --- libavcodec/vulkan/{dct.comp => dct.glsl} | 34 ++++++++++++++--------------- libavcodec/vulkan/prores_idct.comp.glsl | 2 +- libavcodec/vulkan/prores_raw_idct.comp.glsl | 2 +- libavcodec/vulkan_prores.c | 12 +++++++++- libavcodec/vulkan_prores_raw.c | 12 +++++++++- 5 files changed, 40 insertions(+), 22 deletions(-) diff --git a/libavcodec/vulkan/dct.comp b/libavcodec/vulkan/dct.glsl similarity index 71% rename from libavcodec/vulkan/dct.comp rename to libavcodec/vulkan/dct.glsl index 177663320a..159d4873ad 100644 --- a/libavcodec/vulkan/dct.comp +++ b/libavcodec/vulkan/dct.glsl @@ -34,31 +34,29 @@ #ifndef VULKAN_DCT_H #define VULKAN_DCT_H +#extension GL_EXT_spec_constant_composites : require + layout (constant_id = 16) const uint32_t nb_blocks = 1; layout (constant_id = 17) const uint32_t nb_components = 1; -/* Padded by 1 row to avoid bank conflicts */ -shared float blocks[nb_blocks][nb_components*8*(8 + 1)]; +#define V(I) layout(constant_id = (18 + I)) const float sv##I = I; +V( 0) V( 1) V( 2) V( 3) V( 4) V( 5) V( 6) V( 7) V( 8) V( 9) V(10) V(11) V(12) +V(13) V(14) V(15) V(16) V(17) V(18) V(19) V(20) V(21) V(22) V(23) V(24) V(25) +V(26) V(27) V(28) V(29) V(30) V(31) V(32) V(33) V(34) V(35) V(36) V(37) V(38) +V(39) V(40) V(41) V(42) V(43) V(44) V(45) V(46) V(47) V(48) V(49) V(50) V(51) +V(52) V(53) V(54) V(55) V(56) V(57) V(58) V(59) V(60) V(61) V(62) V(63) const float idct_scale[64] = { - 0.1250000000000000, 0.1733799806652684, 0.1633203706095471, 0.1469844503024199, - 0.1250000000000000, 0.0982118697983878, 0.0676495125182746, 0.0344874224103679, - 0.1733799806652684, 0.2404849415639108, 0.2265318615882219, 0.2038732892122293, - 0.1733799806652684, 0.1362237766939547, 0.0938325693794663, 0.0478354290456362, - 0.1633203706095471, 0.2265318615882219, 0.2133883476483184, 0.1920444391778541, - 0.1633203706095471, 0.1283199917898342, 0.0883883476483185, 0.0450599888754343, - 0.1469844503024199, 0.2038732892122293, 0.1920444391778541, 0.1728354290456362, - 0.1469844503024199, 0.1154849415639109, 0.0795474112858021, 0.0405529186026822, - 0.1250000000000000, 0.1733799806652684, 0.1633203706095471, 0.1469844503024199, - 0.1250000000000000, 0.0982118697983878, 0.0676495125182746, 0.0344874224103679, - 0.0982118697983878, 0.1362237766939547, 0.1283199917898342, 0.1154849415639109, - 0.0982118697983878, 0.0771645709543638, 0.0531518809229535, 0.0270965939155924, - 0.0676495125182746, 0.0938325693794663, 0.0883883476483185, 0.0795474112858021, - 0.0676495125182746, 0.0531518809229535, 0.0366116523516816, 0.0186644585125857, - 0.0344874224103679, 0.0478354290456362, 0.0450599888754343, 0.0405529186026822, - 0.0344874224103679, 0.0270965939155924, 0.0186644585125857, 0.0095150584360892, + sv0, sv1, sv2, sv3, sv4, sv5, sv6, sv7, sv8, sv9, sv10, sv11, sv12, + sv13, sv14, sv15, sv16, sv17, sv18, sv19, sv20, sv21, sv22, sv23, sv24, sv25, + sv26, sv27, sv28, sv29, sv30, sv31, sv32, sv33, sv34, sv35, sv36, sv37, sv38, + sv39, sv40, sv41, sv42, sv43, sv44, sv45, sv46, sv47, sv48, sv49, sv50, sv51, + sv52, sv53, sv54, sv55, sv56, sv57, sv58, sv59, sv60, sv61, sv62, sv63 }; +/* Padded by 1 row to avoid bank conflicts */ +shared float blocks[nb_blocks][nb_components*8*(8 + 1)]; + void idct8(uint block, uint offset, uint stride) { float t0, t1, t2, t3, t4, t5, t6, t7, u8; diff --git a/libavcodec/vulkan/prores_idct.comp.glsl b/libavcodec/vulkan/prores_idct.comp.glsl index 8d0e246025..ee9eddf19d 100644 --- a/libavcodec/vulkan/prores_idct.comp.glsl +++ b/libavcodec/vulkan/prores_idct.comp.glsl @@ -21,7 +21,7 @@ #extension GL_GOOGLE_include_directive : require #include "common.comp" -#include "dct.comp" +#include "dct.glsl" layout (constant_id = 0) const bool interlaced = false; diff --git a/libavcodec/vulkan/prores_raw_idct.comp.glsl b/libavcodec/vulkan/prores_raw_idct.comp.glsl index 52014d035b..d009876fca 100644 --- a/libavcodec/vulkan/prores_raw_idct.comp.glsl +++ b/libavcodec/vulkan/prores_raw_idct.comp.glsl @@ -25,7 +25,7 @@ #extension GL_GOOGLE_include_directive : require #include "common.comp" -#include "dct.comp" +#include "dct.glsl" struct TileData { ivec2 pos; diff --git a/libavcodec/vulkan_prores.c b/libavcodec/vulkan_prores.c index 08ef206395..417a6b3bec 100644 --- a/libavcodec/vulkan_prores.c +++ b/libavcodec/vulkan_prores.c @@ -398,10 +398,20 @@ static int init_idct_shader(AVCodecContext *avctx, FFVulkanContext *s, AVHWFramesContext *dec_frames_ctx; dec_frames_ctx = (AVHWFramesContext *)avctx->hw_frames_ctx->data; - SPEC_LIST_CREATE(sl, 2, 2*sizeof(uint32_t)) + SPEC_LIST_CREATE(sl, 2 + 64, (2 + 64)*sizeof(uint32_t)) SPEC_LIST_ADD(sl, 0, 32, interlaced); SPEC_LIST_ADD(sl, 16, 32, 4*2); /* nb_blocks */ + const double idct_8_scales[8] = { + cos(4.0*M_PI/16.0) / 2.0, cos(1.0*M_PI/16.0) / 2.0, + cos(2.0*M_PI/16.0) / 2.0, cos(3.0*M_PI/16.0) / 2.0, + cos(4.0*M_PI/16.0) / 2.0, cos(5.0*M_PI/16.0) / 2.0, + cos(6.0*M_PI/16.0) / 2.0, cos(7.0*M_PI/16.0) / 2.0, + }; + for (int i = 0; i < 64; i++) + SPEC_LIST_ADD(sl, 18 + i, 32, + av_float2int(idct_8_scales[i >> 3]*idct_8_scales[i & 7])); + ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, sl, (uint32_t []) { 32, 2, 1 }, 0); diff --git a/libavcodec/vulkan_prores_raw.c b/libavcodec/vulkan_prores_raw.c index b8bcbb9178..42625ad59a 100644 --- a/libavcodec/vulkan_prores_raw.c +++ b/libavcodec/vulkan_prores_raw.c @@ -332,12 +332,22 @@ static int init_idct_shader(AVCodecContext *avctx, FFVulkanContext *s, int version) { int err; - SPEC_LIST_CREATE(sl, 2, 2*sizeof(uint32_t)) + SPEC_LIST_CREATE(sl, 2 + 64, (2 + 64)*sizeof(uint32_t)) int nb_blocks = version == 0 ? 8 : 16; SPEC_LIST_ADD(sl, 16, 32, nb_blocks); SPEC_LIST_ADD(sl, 17, 32, 4); /* nb_components */ + const double idct_8_scales[8] = { + cos(4.0*M_PI/16.0) / 2.0, cos(1.0*M_PI/16.0) / 2.0, + cos(2.0*M_PI/16.0) / 2.0, cos(3.0*M_PI/16.0) / 2.0, + cos(4.0*M_PI/16.0) / 2.0, cos(5.0*M_PI/16.0) / 2.0, + cos(6.0*M_PI/16.0) / 2.0, cos(7.0*M_PI/16.0) / 2.0, + }; + for (int i = 0; i < 64; i++) + SPEC_LIST_ADD(sl, 18 + i, 32, + av_float2int(idct_8_scales[i >> 3]*idct_8_scales[i & 7])); + ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, sl, (uint32_t []) { 8, nb_blocks, 4 }, 0); _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
