This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

commit 23ab1b1a66ed775bd4aee0396bc0eadd4daef077
Author:     Lynne <[email protected]>
AuthorDate: Sat Jan 3 15:44:26 2026 +0100
Commit:     Lynne <[email protected]>
CommitDate: Mon Jan 12 17:28:42 2026 +0100

    vulkan/dct: embed DCT scaling values during SPIR-V generation
    
    Instead of relying on rounded off values, use specialization constants
    to bake the DCT values into the shader when its compiled.
---
 libavcodec/vulkan/{dct.comp => dct.glsl}    | 34 ++++++++++++++---------------
 libavcodec/vulkan/prores_idct.comp.glsl     |  2 +-
 libavcodec/vulkan/prores_raw_idct.comp.glsl |  2 +-
 libavcodec/vulkan_prores.c                  | 12 +++++++++-
 libavcodec/vulkan_prores_raw.c              | 12 +++++++++-
 5 files changed, 40 insertions(+), 22 deletions(-)

diff --git a/libavcodec/vulkan/dct.comp b/libavcodec/vulkan/dct.glsl
similarity index 71%
rename from libavcodec/vulkan/dct.comp
rename to libavcodec/vulkan/dct.glsl
index 177663320a..159d4873ad 100644
--- a/libavcodec/vulkan/dct.comp
+++ b/libavcodec/vulkan/dct.glsl
@@ -34,31 +34,29 @@
 #ifndef VULKAN_DCT_H
 #define VULKAN_DCT_H
 
+#extension GL_EXT_spec_constant_composites : require
+
 layout (constant_id = 16) const uint32_t nb_blocks = 1;
 layout (constant_id = 17) const uint32_t nb_components = 1;
 
-/* Padded by 1 row to avoid bank conflicts */
-shared float blocks[nb_blocks][nb_components*8*(8 + 1)];
+#define V(I) layout(constant_id = (18 + I)) const float sv##I = I;
+V( 0) V( 1) V( 2) V( 3) V( 4) V( 5) V( 6) V( 7) V( 8) V( 9) V(10) V(11) V(12)
+V(13) V(14) V(15) V(16) V(17) V(18) V(19) V(20) V(21) V(22) V(23) V(24) V(25)
+V(26) V(27) V(28) V(29) V(30) V(31) V(32) V(33) V(34) V(35) V(36) V(37) V(38)
+V(39) V(40) V(41) V(42) V(43) V(44) V(45) V(46) V(47) V(48) V(49) V(50) V(51)
+V(52) V(53) V(54) V(55) V(56) V(57) V(58) V(59) V(60) V(61) V(62) V(63)
 
 const float idct_scale[64] = {
-    0.1250000000000000, 0.1733799806652684, 0.1633203706095471, 
0.1469844503024199,
-    0.1250000000000000, 0.0982118697983878, 0.0676495125182746, 
0.0344874224103679,
-    0.1733799806652684, 0.2404849415639108, 0.2265318615882219, 
0.2038732892122293,
-    0.1733799806652684, 0.1362237766939547, 0.0938325693794663, 
0.0478354290456362,
-    0.1633203706095471, 0.2265318615882219, 0.2133883476483184, 
0.1920444391778541,
-    0.1633203706095471, 0.1283199917898342, 0.0883883476483185, 
0.0450599888754343,
-    0.1469844503024199, 0.2038732892122293, 0.1920444391778541, 
0.1728354290456362,
-    0.1469844503024199, 0.1154849415639109, 0.0795474112858021, 
0.0405529186026822,
-    0.1250000000000000, 0.1733799806652684, 0.1633203706095471, 
0.1469844503024199,
-    0.1250000000000000, 0.0982118697983878, 0.0676495125182746, 
0.0344874224103679,
-    0.0982118697983878, 0.1362237766939547, 0.1283199917898342, 
0.1154849415639109,
-    0.0982118697983878, 0.0771645709543638, 0.0531518809229535, 
0.0270965939155924,
-    0.0676495125182746, 0.0938325693794663, 0.0883883476483185, 
0.0795474112858021,
-    0.0676495125182746, 0.0531518809229535, 0.0366116523516816, 
0.0186644585125857,
-    0.0344874224103679, 0.0478354290456362, 0.0450599888754343, 
0.0405529186026822,
-    0.0344874224103679, 0.0270965939155924, 0.0186644585125857, 
0.0095150584360892,
+     sv0,  sv1,  sv2,  sv3,  sv4,  sv5,  sv6,  sv7,  sv8,  sv9, sv10, sv11, 
sv12,
+    sv13, sv14, sv15, sv16, sv17, sv18, sv19, sv20, sv21, sv22, sv23, sv24, 
sv25,
+    sv26, sv27, sv28, sv29, sv30, sv31, sv32, sv33, sv34, sv35, sv36, sv37, 
sv38,
+    sv39, sv40, sv41, sv42, sv43, sv44, sv45, sv46, sv47, sv48, sv49, sv50, 
sv51,
+    sv52, sv53, sv54, sv55, sv56, sv57, sv58, sv59, sv60, sv61, sv62, sv63
 };
 
+/* Padded by 1 row to avoid bank conflicts */
+shared float blocks[nb_blocks][nb_components*8*(8 + 1)];
+
 void idct8(uint block, uint offset, uint stride)
 {
     float t0, t1, t2, t3, t4, t5, t6, t7, u8;
diff --git a/libavcodec/vulkan/prores_idct.comp.glsl 
b/libavcodec/vulkan/prores_idct.comp.glsl
index 8d0e246025..ee9eddf19d 100644
--- a/libavcodec/vulkan/prores_idct.comp.glsl
+++ b/libavcodec/vulkan/prores_idct.comp.glsl
@@ -21,7 +21,7 @@
 #extension GL_GOOGLE_include_directive : require
 
 #include "common.comp"
-#include "dct.comp"
+#include "dct.glsl"
 
 layout (constant_id = 0) const bool interlaced = false;
 
diff --git a/libavcodec/vulkan/prores_raw_idct.comp.glsl 
b/libavcodec/vulkan/prores_raw_idct.comp.glsl
index 52014d035b..d009876fca 100644
--- a/libavcodec/vulkan/prores_raw_idct.comp.glsl
+++ b/libavcodec/vulkan/prores_raw_idct.comp.glsl
@@ -25,7 +25,7 @@
 #extension GL_GOOGLE_include_directive : require
 
 #include "common.comp"
-#include "dct.comp"
+#include "dct.glsl"
 
 struct TileData {
    ivec2 pos;
diff --git a/libavcodec/vulkan_prores.c b/libavcodec/vulkan_prores.c
index 08ef206395..417a6b3bec 100644
--- a/libavcodec/vulkan_prores.c
+++ b/libavcodec/vulkan_prores.c
@@ -398,10 +398,20 @@ static int init_idct_shader(AVCodecContext *avctx, 
FFVulkanContext *s,
     AVHWFramesContext *dec_frames_ctx;
     dec_frames_ctx = (AVHWFramesContext *)avctx->hw_frames_ctx->data;
 
-    SPEC_LIST_CREATE(sl, 2, 2*sizeof(uint32_t))
+    SPEC_LIST_CREATE(sl, 2 + 64, (2 + 64)*sizeof(uint32_t))
     SPEC_LIST_ADD(sl,  0, 32, interlaced);
     SPEC_LIST_ADD(sl, 16, 32, 4*2); /* nb_blocks */
 
+    const double idct_8_scales[8] = {
+        cos(4.0*M_PI/16.0) / 2.0, cos(1.0*M_PI/16.0) / 2.0,
+        cos(2.0*M_PI/16.0) / 2.0, cos(3.0*M_PI/16.0) / 2.0,
+        cos(4.0*M_PI/16.0) / 2.0, cos(5.0*M_PI/16.0) / 2.0,
+        cos(6.0*M_PI/16.0) / 2.0, cos(7.0*M_PI/16.0) / 2.0,
+    };
+    for (int i = 0; i < 64; i++)
+        SPEC_LIST_ADD(sl, 18 + i, 32,
+                      av_float2int(idct_8_scales[i >> 3]*idct_8_scales[i & 
7]));
+
     ff_vk_shader_load(shd,
                       VK_SHADER_STAGE_COMPUTE_BIT, sl,
                       (uint32_t []) { 32, 2, 1 }, 0);
diff --git a/libavcodec/vulkan_prores_raw.c b/libavcodec/vulkan_prores_raw.c
index b8bcbb9178..42625ad59a 100644
--- a/libavcodec/vulkan_prores_raw.c
+++ b/libavcodec/vulkan_prores_raw.c
@@ -332,12 +332,22 @@ static int init_idct_shader(AVCodecContext *avctx, 
FFVulkanContext *s,
                             int version)
 {
     int err;
-    SPEC_LIST_CREATE(sl, 2, 2*sizeof(uint32_t))
+    SPEC_LIST_CREATE(sl, 2 + 64, (2 + 64)*sizeof(uint32_t))
 
     int nb_blocks = version == 0 ? 8 : 16;
     SPEC_LIST_ADD(sl, 16, 32, nb_blocks);
     SPEC_LIST_ADD(sl, 17, 32, 4); /* nb_components */
 
+    const double idct_8_scales[8] = {
+        cos(4.0*M_PI/16.0) / 2.0, cos(1.0*M_PI/16.0) / 2.0,
+        cos(2.0*M_PI/16.0) / 2.0, cos(3.0*M_PI/16.0) / 2.0,
+        cos(4.0*M_PI/16.0) / 2.0, cos(5.0*M_PI/16.0) / 2.0,
+        cos(6.0*M_PI/16.0) / 2.0, cos(7.0*M_PI/16.0) / 2.0,
+    };
+    for (int i = 0; i < 64; i++)
+        SPEC_LIST_ADD(sl, 18 + i, 32,
+                      av_float2int(idct_8_scales[i >> 3]*idct_8_scales[i & 
7]));
+
     ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, sl,
                       (uint32_t []) { 8, nb_blocks, 4 }, 0);
 

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to