[FFmpeg-devel] [PR] scale_vulkan: apply camera raw data (PR #23475)

Lynne via ffmpeg-devel Sat, 13 Jun 2026 22:20:21 -0700

PR #23475 opened by Lynne
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23475
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23475.patch


This implements a full camera RAW to linear XYZ conversion in scale_vulkan. The 
output is properly exposed, has white balance applied, and allows downstream 
users to tone-map the ridiculous dynamic range into whatever is necessary.
    
A BT2020 HLG could also be easily integrated, since the gain value is coded as 
"brightness over SDR", but I think its fine to let libplacebo handle it.


>From b93725cfa3500a39eebacf98de6fdb59555a4b4f Mon Sep 17 00:00:00 2001
From: Lynne <[email protected]>
Date: Sat, 16 May 2026 16:42:04 +0900
Subject: [PATCH] scale_vulkan: apply camera raw data

This implements a full camera RAW to linear XYZ conversion in scale_vulkan.
The output is properly exposed, has white balance applied, and allows downstream
users to tone-map the ridiculous dynamic range into whatever is necessary.

A BT2020 HLG could also be easily integrated, since the gain value is coded as
"brightness over SDR", but I think its fine to let libplacebo handle it.
---
 libavfilter/vf_scale_vulkan.c        | 94 +++++++++++++++++++++++++--
 libavfilter/vulkan/debayer.comp.glsl | 95 +++++++++++++++++++---------
 2 files changed, 155 insertions(+), 34 deletions(-)

diff --git a/libavfilter/vf_scale_vulkan.c b/libavfilter/vf_scale_vulkan.c
index 19b4e5e5ac..bcd9772a89 100644
--- a/libavfilter/vf_scale_vulkan.c
+++ b/libavfilter/vf_scale_vulkan.c
@@ -18,8 +18,12 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include <math.h>
+
+#include "libavutil/mastering_display_metadata.h"
 #include "libavutil/random_seed.h"
 #include "libavutil/opt.h"
+#include "libavutil/raw_color_params.h"
 #include "libavutil/vulkan_spirv.h"
 #include "vulkan_filter.h"
 #include "scale_eval.h"
@@ -55,13 +59,19 @@ typedef struct ScaleVulkanContext {
     FFVulkanShader shd;
     VkSampler sampler;
 
-    /* Push constants / options */
     struct {
-        float yuv_matrix[4][4];
+        float matrix[4][4];
         int crop_x;
         int crop_y;
         int crop_w;
         int crop_h;
+
+        /* RAW camera color processing (debayering only) */
+        float wb_red;
+        float wb_blue;
+        float gain;
+        float black_level_norm;
+        float inv_range;
     } opts;
 
     char *out_format_string;
@@ -71,6 +81,7 @@ typedef struct ScaleVulkanContext {
     enum ScalerFunc scaler;
     enum AVColorRange out_range;
     enum DebayerFunc debayer;
+    float exposure;
 } ScaleVulkanContext;
 
 static const char scale_bilinear[] = {
@@ -191,8 +202,8 @@ static int init_scale_shader(AVFilterContext *ctx, 
FFVulkanShader *shd,
 
         for (int y = 0; y < 3; y++)
             for (int x = 0; x < 3; x++)
-                s->opts.yuv_matrix[x][y] = tmp_mat[x][y];
-        s->opts.yuv_matrix[3][3] = 1.0;
+                s->opts.matrix[x][y] = tmp_mat[x][y];
+        s->opts.matrix[3][3] = 1.0;
     }
 
     return 0;
@@ -268,6 +279,11 @@ static av_cold int init_filter(AVFilterContext *ctx, 
AVFrame *in)
     GLSLC(1,    int crop_y;                                               );
     GLSLC(1,    int crop_w;                                               );
     GLSLC(1,    int crop_h;                                               );
+    GLSLC(1,    float wb_red;                                             );
+    GLSLC(1,    float wb_blue;                                            );
+    GLSLC(1,    float gain;                                               );
+    GLSLC(1,    float black_level_norm;                                   );
+    GLSLC(1,    float inv_range;                                          );
     GLSLC(0, };                                                           );
     GLSLC(0,                                                              );
 
@@ -363,6 +379,73 @@ static int scale_vulkan_filter_frame(AVFilterLink *link, 
AVFrame *in)
     if (err < 0)
         goto fail;
 
+    if (s->vkctx.input_format == AV_PIX_FMT_BAYER_RGGB16) {
+        memset(s->opts.matrix, 0, sizeof(s->opts.matrix));
+        s->opts.matrix[0][0]     = 1.0f;
+        s->opts.matrix[1][1]     = 1.0f;
+        s->opts.matrix[2][2]     = 1.0f;
+        s->opts.matrix[3][3]     = 1.0f;
+        s->opts.wb_red           = 1.0f;
+        s->opts.wb_blue          = 1.0f;
+        s->opts.gain             = 1.0f;
+        s->opts.black_level_norm = 0.0f;
+        s->opts.inv_range        = 1.0f;
+
+        AVFrameSideData *sd = av_frame_get_side_data(in, 
AV_FRAME_DATA_RAW_COLOR_PARAMS);
+        if (sd) {
+            const AVRawColorParams *rcp = (const AVRawColorParams *)sd->data;
+            if (rcp->type == AV_RAW_COLOR_PARAMS_PRORES_RAW) {
+                const AVProResRawColorParams *pr = &rcp->codec.prores_raw;
+
+                for (int r = 0; r < 3; r++)
+                    for (int c = 0; c < 3; c++)
+                        s->opts.matrix[r][c] = av_q2d(pr->color_matrix[r][c]);
+                s->opts.wb_red           = av_q2d(pr->wb_red);
+                s->opts.wb_blue          = av_q2d(pr->wb_blue);
+
+                /* The PRR gain val places a metered mid-grey exposure at
+                 * 0.18, but the actual shot may be exposed off that point
+                 * (ETTR is common practice for RAW), so use the user-provided
+                 * value to adjust the brightness */
+                s->opts.gain             = av_q2d(pr->gain) * 
exp2f(s->exposure);
+                float black              = av_q2d(rcp->black_level);
+                float white              = av_q2d(rcp->white_level);
+                s->opts.black_level_norm = black;
+                s->opts.inv_range        = 1.0 / (white - black);
+
+                /* The camera matrix converts to linear-light CIE 1931 XYZ
+                 * relative to D65 */
+                out->color_primaries = AVCOL_PRI_SMPTE428;
+                out->color_trc       = AVCOL_TRC_LINEAR;
+                out->colorspace      = AVCOL_SPC_UNSPECIFIED;
+                out->color_range     = AVCOL_RANGE_JPEG;
+
+                av_frame_side_data_remove(&out->side_data, &out->nb_side_data,
+                                          AV_FRAME_DATA_RAW_COLOR_PARAMS);
+
+                /* The output is scene-linear, after white balance and the
+                 * matrix, a properly exposed diffuse white sits at Y=1.0,
+                 * which by BT.2408 convention corresponds to the 203 nit
+                 * reference white.
+                 * The gain restores the highlight headroom the encoder 
reserved,
+                 * so the brightest encodable value is exactly Y = gain * 1.0.
+                 * Tag that as the mastering peak so tone mapping knows the
+                 * actual headroom, use a float format to carry
+                 * the > 1.0 range without clipping. */
+                AVMasteringDisplayMetadata *mdm =
+                    av_mastering_display_metadata_create_side_data(out);
+                if (!mdm) {
+                    err = AVERROR(ENOMEM);
+                    goto fail;
+                }
+
+                mdm->max_luminance = av_d2q(s->opts.gain * 203.0, INT_MAX);
+                mdm->min_luminance = av_make_q(1, 10000);
+                mdm->has_luminance = 1;
+            }
+        }
+    }
+
     if (out->width != in->width || out->height != in->height) {
         av_frame_side_data_remove_by_props(&out->side_data, &out->nb_side_data,
                                            AV_SIDE_DATA_PROP_SIZE_DEPENDENT);
@@ -440,7 +523,7 @@ static int scale_vulkan_config_output(AVFilterLink *outlink)
 
     if (s->vkctx.input_format == AV_PIX_FMT_BAYER_RGGB16) {
         if (s->vkctx.output_format == s->vkctx.input_format) {
-            s->vkctx.output_format = AV_PIX_FMT_RGBA64;
+            s->vkctx.output_format = AV_PIX_FMT_RGBAF16;
         } else if (!ff_vk_mt_is_np_rgb(s->vkctx.output_format)) {
             av_log(avctx, AV_LOG_ERROR, "Unsupported output format for 
debayer\n");
             return AVERROR(EINVAL);
@@ -504,6 +587,7 @@ static const AVOption scale_vulkan_options[] = {
     { "debayer", "Debayer algorithm to use", OFFSET(debayer), AV_OPT_TYPE_INT, 
{.i64 = DB_BILINEAR_HQ}, 0, DB_NB, .flags = FLAGS, .unit = "debayer" },
         { "bilinear", "Bilinear debayering (fastest)", 0, AV_OPT_TYPE_CONST, 
{.i64 = DB_BILINEAR}, 0, 0, .flags = FLAGS, .unit = "debayer" },
         { "bilinear_hq", "Bilinear debayering (high quality)", 0, 
AV_OPT_TYPE_CONST, {.i64 = DB_BILINEAR_HQ}, 0, 0, .flags = FLAGS, .unit = 
"debayer" },
+    { "exposure", "Exposure bias in stops, applied in scene-linear (RAW camera 
inputs only)", OFFSET(exposure), AV_OPT_TYPE_FLOAT, {.dbl = 0.0}, -16.0, 16.0, 
.flags = FLAGS },
     { "format", "Output video format (software format of hardware frames)", 
OFFSET(out_format_string), AV_OPT_TYPE_STRING, .flags = FLAGS },
     { "out_range", "Output colour range (from 0 to 2) (default 0)", 
OFFSET(out_range), AV_OPT_TYPE_INT, {.i64 = AVCOL_RANGE_UNSPECIFIED}, 
AVCOL_RANGE_UNSPECIFIED, AVCOL_RANGE_JPEG, .flags = FLAGS, .unit = "range" },
         { "full", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = 
AVCOL_RANGE_JPEG }, 0, 0, FLAGS, .unit = "range" },
diff --git a/libavfilter/vulkan/debayer.comp.glsl 
b/libavfilter/vulkan/debayer.comp.glsl
index 0a4e22de99..a1adc5610a 100644
--- a/libavfilter/vulkan/debayer.comp.glsl
+++ b/libavfilter/vulkan/debayer.comp.glsl
@@ -22,6 +22,7 @@
 #pragma shader_stage(compute)
 
 #extension GL_EXT_shader_image_load_formatted : require
+#extension GL_EXT_scalar_block_layout : require
 
 layout (local_size_x_id = 253, local_size_y_id = 254, local_size_z_id = 255) 
in;
 
@@ -30,90 +31,126 @@ layout (set = 0, binding = 1) uniform writeonly image2D 
dst;
 
 layout (constant_id = 0) const int debayer_mode = 0;
 
-layout(push_constant, std430) uniform pushConstants {
-   mat4 yuv_matrix;
+layout(push_constant, scalar) uniform pushConstants {
+   mat4 rgb_matrix;
    int crop_x;
    int crop_y;
    int crop_w;
    int crop_h;
+   float wb_red;
+   float wb_blue;
+   float gain;
+   float black_level_norm;
+   float inv_range;
 };
 
-#define LD(xo, yo) \
-    (imageLoad(src, pos + ivec2((xo), (yo))).r)
+/* Read a sample, apply black-level, normalize, and apply white balance.
+ * Mirrored coords. */
+float ld_at(ivec2 base, int xo, int yo)
+{
+    ivec2 p = base + ivec2(xo, yo);
+    ivec2 cmin = ivec2(crop_x, crop_y);
+    ivec2 cmax = ivec2(crop_x + crop_w - 1, crop_y + crop_h - 1);
+    p = mix(p, 2*cmin - p, lessThan(p, cmin));
+    p = mix(p, 2*cmax - p, greaterThan(p, cmax));
+
+    float v = imageLoad(src, p).r;
+    v = max(v - black_level_norm, 0.0) * inv_range;
+
+    bool x_odd = (p.x & 1) != 0;
+    bool y_odd = (p.y & 1) != 0;
+    float wb = x_odd == y_odd ? (x_odd ? wb_blue : wb_red) : 1.0;
+
+    /* Clamp at the white-balanced clip point: once the least sensitive
+     * channel saturates, the others keep rising and would skew clipped
+     * areas towards magenta; clipping every channel at the same level
+     * keeps blown highlights neutral. */
+    return min(v * wb, 1.0);
+}
+
+#define LD(xo, yo) ld_at(pos, (xo), (yo))
+
+void write(ivec2 pos, vec4 c)
+{
+    /* Apply the color matrix and restore the scene-linear scale. Negative
+     * values (interpolation undershoot, out-of-gamut camera response) have
+     * no physical meaning in XYZ, so floor at zero. */
+    c.rgb = max((c.rgb * mat3(rgb_matrix)) * gain, vec3(0.0));
+    imageStore(dst, pos, c);
+}
 
 void debayer_bilinear(ivec2 pos)
 {
     /* R basis */
-    vec4 tl = vec4(LD(0, 0),
+    write(pos,
+              vec4(LD(0, 0),
                    (LD(1, 0) + LD(-1, 0) + LD(0, 1) + LD(0, -1)) / 4.0f,
                    (LD(-1, -1) + LD(1, 1) + LD(-1, 1) + LD(1, -1)) / 4.0f,
-                   1.0f);
-    imageStore(dst, pos, tl);
+                   1.0f));
 
     /* G1 basis */
-    vec4 tr = vec4((LD(2, 0) + LD(0, 0)) / 2.0f,
+    write(pos + ivec2(1, 0),
+              vec4((LD(2, 0) + LD(0, 0)) / 2.0f,
                    LD(1, 0),
                    (LD(1, 1) + LD(1, -1)) / 2.0f,
-                   1.0f);
-    imageStore(dst, pos + ivec2(1, 0), tr);
+                   1.0f));
 
     /* G2 basis */
-    vec4 bl = vec4((LD(0, 2) + LD(0, 0)) / 2.0f,
+    write(pos + ivec2(0, 1),
+              vec4((LD(0, 2) + LD(0, 0)) / 2.0f,
                    LD(0, 1),
                    (LD(1, 1) + LD(-1, 1)) / 2.0f,
-                   1.0f);
-    imageStore(dst, pos + ivec2(0, 1), bl);
+                   1.0f));
 
     /* B basis */
-    vec4 br = vec4((LD(0, 0) + LD(2, 2) + LD(0, 2) + LD(2, 0)) / 4.0f,
+    write(pos + ivec2(1, 1),
+              vec4((LD(0, 0) + LD(2, 2) + LD(0, 2) + LD(2, 0)) / 4.0f,
                    (LD(2, 1) + LD(0, 1) + LD(1, 2) + LD(1, 0)) / 4.0f,
                    LD(1, 1),
-                   1.0f);
-    imageStore(dst, pos + ivec2(1, 1), br);
+                   1.0f));
 }
 
 void debayer_bilinear_hq(ivec2 pos)
 {
     /* R basis */
-    vec4 tl = vec4(LD(0, 0),
+    write(pos,
+              vec4(LD(0, 0),
                    (4.0f*LD(0, 0) + 2.0f*(LD(0, -1) + LD(0, 1) + LD(-1, 0) + 
LD(1, 0)) -
                     (LD(0, -2) + LD(0, 2) + LD(-2, 0) + LD(2, 0))) / 8.0f,
                    (12.0f*LD(0, 0) + 4.0f*(LD(-1, -1) + LD(-1, 1) + LD(1, -1) 
+ LD(1, 1)) -
                     3.0f*(LD(0, -2) + LD(0, 2) + LD(-2, 0) + LD(2, 0))) / 
16.0f,
-                   1.0f);
-    imageStore(dst, pos, tl);
+                   1.0f));
 
     /* G1 basis */
-    vec4 tr = vec4((10.0f*LD(1, 0) + 8.0f*(LD(0, 0) + LD(2, 0)) -
+    write(pos + ivec2(1, 0),
+              vec4((10.0f*LD(1, 0) + 8.0f*(LD(0, 0) + LD(2, 0)) -
                     2.0f*(LD(0, -1) + LD(2, 1) + LD(0, 1) + LD(2, -1) + LD(-1, 
0) + LD(3, 0)) +
                     LD(1, -2) + LD(1, 2)) / 16.0f,
                    LD(1, 0),
                    (10.0f*LD(1, 0) + 8.0f*(LD(1, -1) + LD(1, 1)) -
                     2.0f*(LD(0, -1) + LD(0, 1) + LD(2, -1) + LD(2, 1) + LD(1, 
-2) + LD(1, 2)) +
                     LD(-1, 0) + LD(3, 0)) / 16.0f,
-                   1.0f);
-    imageStore(dst, pos + ivec2(1, 0), tr);
-
+                   1.0f));
 
     /* G2 basis */
-    vec4 bl = vec4((10.0f*LD(0, 1) + 8.0f*(LD(0, 0) + LD(0, 2)) -
+    write(pos + ivec2(0, 1),
+              vec4((10.0f*LD(0, 1) + 8.0f*(LD(0, 0) + LD(0, 2)) -
                    2.0f*(LD(-1, 0) + LD(-1, 2) + LD(1, 0) + LD(1, 2) + LD(0, 
-1) + LD(0, 3)) +
                    LD(-2, 1) + LD(2, 1)) / 16.0f,
                    LD(0, 1),
                    (10.0f*LD(0, 1) + 8.0f*(LD(-1, 1) + LD(1, 1)) -
                    2.0f*(LD(-1, 0) + LD(1, 2) + LD(-1, 2) + LD(1, 0) + LD(-2, 
1) + LD(2, 1)) +
                     LD(0, -1) + LD(0, 3)) / 16.0f,
-                   1.0f);
-    imageStore(dst, pos + ivec2(0, 1), bl);
+                   1.0f));
 
     /* B basis */
-    vec4 br = vec4((12.0f*LD(1, 1) + 4.0f*(LD(0, 0) + LD(0, 2) + LD(2, 0) + 
LD(2, 2)) -
+    write(pos + ivec2(1, 1),
+              vec4((12.0f*LD(1, 1) + 4.0f*(LD(0, 0) + LD(0, 2) + LD(2, 0) + 
LD(2, 2)) -
                     3.0f*(LD(1, -1) + LD(1, 3) + LD(-1, 1) + LD(3, 1))) / 
16.0f,
                    (4.0f*LD(1, 1) + 2.0f*(LD(1, 0) + LD(1, 2) + LD(0, 1) + 
LD(2, 1)) -
                     (LD(1, -1) + LD(1, 3) + LD(-1, 1) + LD(3, 1))) / 8.0f,
                    LD(1, 1),
-                   1.0f);
-    imageStore(dst, pos + ivec2(1, 1), br);
+                   1.0f));
 }
 
 void main(void)
-- 
2.52.0

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-devel] [PR] scale_vulkan: apply camera raw data (PR #23475)

Reply via email to