From f563ee1ad93511dfe7dd252578d7801e0cbbe968 Mon Sep 17 00:00:00 2001
From: Sarthak Indurkhya <sarthak@Sarthaks-MacBook-Pro.local>
Date: Sat, 5 Jul 2025 22:33:46 +0530
Subject: [PATCH] avfilter: add inversetonemap filter

This filter performs inverse tone mapping from SDR to HDR using local adaptation and PQ mapping.

- Added inversetonemap video filter for SDR to HDR conversion.
---
 Changelog                       |  1 +
 libavfilter/Makefile            |  1 +
 libavfilter/allfilters.c        |  1 +
 libavfilter/vf_inversetonemap.c | 98 ++++++++++++++++-----------------
 4 files changed, 51 insertions(+), 50 deletions(-)
diff --git a/Changelog b/Changelog
index 81e2cc813f..0aecf6dbf1 100644
--- a/Changelog
+++ b/Changelog
@@ -2,6 +2,7 @@ Entries are sorted chronologically from oldest to youngest within each release,
 releases are sorted from youngest to oldest.
 
 version <next>:
+- Added inversetonemap video filter for SDR to HDR conversion.
 - yasm support dropped, users need to use nasm
 - VVC VAAPI decoder
 - RealVideo 6.0 decoder
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 97f8f17272..e715a3a5e4 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -191,6 +191,7 @@ OBJS-$(CONFIG_SINE_FILTER)                   += asrc_sine.o
 OBJS-$(CONFIG_ANULLSINK_FILTER)              += asink_anullsink.o
 
 # video filters
+OBJS-$(CONFIG_INVERSETONEMAP_FILTER) += vf_inversetonemap.o
 OBJS-$(CONFIG_ADDROI_FILTER)                 += vf_addroi.o
 OBJS-$(CONFIG_ALPHAEXTRACT_FILTER)           += vf_extractplanes.o
 OBJS-$(CONFIG_ALPHAMERGE_FILTER)             += vf_alphamerge.o framesync.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 3bc045b28f..2f67300ca1 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -176,6 +176,7 @@ extern const FFFilter ff_asrc_sine;
 
 extern const FFFilter ff_asink_anullsink;
 
+extern const FFFilter ff_vf_inversetonemap;
 extern const FFFilter ff_vf_addroi;
 extern const FFFilter ff_vf_alphaextract;
 extern const FFFilter ff_vf_alphamerge;
diff --git a/libavfilter/vf_inversetonemap.c b/libavfilter/vf_inversetonemap.c
index 28ea1ef29e..d8d8920151 100644
--- a/libavfilter/vf_inversetonemap.c
+++ b/libavfilter/vf_inversetonemap.c
@@ -1,3 +1,22 @@
+/*
+ * Copyright (c) 2025 Sarthak Indurkhya
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
 /**
  * @file
  * @brief SDR to HDR inverse tone mapping filter for FFmpeg
@@ -38,7 +57,7 @@ typedef struct FilterContext {
     float HDR_max;
 } FilterContext;
 
-//declaring lookup table
+
 static float bt709_gamma_lut[256];
 
 static void init_bt709_gamma_lut(void) {
@@ -51,14 +70,13 @@ static void init_bt709_gamma_lut(void) {
     }
 }
 
-// Rec.709 to Rec.2020 matrix
+
 static const float bt709_to_bt2020[3][3] = {
     {0.6274f, 0.3293f, 0.0433f},
     {0.0691f, 0.9195f, 0.0114f},
     {0.0164f, 0.0880f, 0.8956f}
 };
 
-// PQ transfer function
 #define PQ_M1 0.1593017578125f
 #define PQ_M2 78.84375f
 #define PQ_C1 0.8359375f
@@ -80,12 +98,10 @@ static void compute_local_adaptation(const float *R_full, float *sigma, int widt
     float *temp_blur = av_mallocz(npix * sizeof(float)); 
     float *spatial_weights = av_malloc((2 * radius + 1) * sizeof(float));
 
-    // 1D Gaussian kernel for separable blur
     for (int i = -radius; i <= radius; i++) {
         spatial_weights[i + radius] = expf(-(i * i) / (2 * sigma_spatial * sigma_spatial));
     }
 
-    // Scene max luminance
     float scene_max = 1e-6f;
     for (int i = 0; i < npix; i++) {
         if (R_full[i] > scene_max)
@@ -93,7 +109,6 @@ static void compute_local_adaptation(const float *R_full, float *sigma, int widt
     }
     float hdr_scale = HDR_max / scene_max;
 
-    // ---- Horizontal blur pass ----
     for (int y = 0; y < height; y++) {
         for (int x = 0; x < width; x++) {
             float sum = 0.0f, sum_weights = 0.0f;
@@ -113,7 +128,6 @@ static void compute_local_adaptation(const float *R_full, float *sigma, int widt
         }
     }
 
-    // ---- Vertical blur pass ----
     for (int y = 0; y < height; y++) {
         for (int x = 0; x < width; x++) {
             float sum = 0.0f, sum_weights = 0.0f;
@@ -137,18 +151,15 @@ static void compute_local_adaptation(const float *R_full, float *sigma, int widt
     av_free(spatial_weights);
 }
 static void compute_local_adaptation_fast(const float *R_full, float *sigma, int width, int height,
-    float sigma_spatial, float sigma_range, float HDR_max)
-{
+    float sigma_spatial, float sigma_range, float HDR_max){
     const int scale = 4;
     int down_w = width / scale;
     int down_h = height / scale;
     int npix_small = down_w * down_h;
     
-    // Allocating downsampled and result buffers
     float *R_small = av_malloc(npix_small * sizeof(float));
     float *sigma_small = av_malloc(npix_small * sizeof(float));
     
-    // Simple box downsampling
     for (int y = 0; y < down_h; y++) {
         for (int x = 0; x < down_w; x++) {
             float sum = 0.0f;
@@ -167,7 +178,6 @@ static void compute_local_adaptation_fast(const float *R_full, float *sigma, int
 
     compute_local_adaptation(R_small, sigma_small, down_w, down_h, sigma_spatial, sigma_range, HDR_max);
 
-    // Upsampling sigma_small -> sigma
     struct SwsContext *sws_ctx = sws_getContext(down_w, down_h, AV_PIX_FMT_GRAYF32,
               width, height, AV_PIX_FMT_GRAYF32,
               SWS_BILINEAR, NULL, NULL, NULL);
@@ -199,14 +209,12 @@ static void compute_hdr_intensity(const float *R_full, const float *sigma, float
 static void inverse_tone_map_linear_rgb(
     const float *R, const float *G, const float *B,
     float *R_hdr, float *G_hdr, float *B_hdr,
-    int width, int height, FilterContext *s)
-{
+    int width, int height, FilterContext *s){
     int npix = width * height;
     float *Y = av_malloc(npix * sizeof(float));
     float *Y_sigma = av_malloc(npix * sizeof(float));
     float *Y_hdr = av_malloc(npix * sizeof(float));
 
-    // Computing luminance (BT.2020)
     for (int i = 0; i < npix; i++)
         Y[i] = 0.2627f * R[i] + 0.6780f * G[i] + 0.0593f * B[i];
 
@@ -215,7 +223,6 @@ static void inverse_tone_map_linear_rgb(
     
     compute_hdr_intensity(Y, Y_sigma, Y_hdr, width, height, s->n, 1.0f);
 
-    // Scaling RGB channels
     for (int i = 0; i < npix; i++) {
         float scale = Y_hdr[i] / (Y[i] + 1e-6f);
         R_hdr[i] = R[i] * scale;
@@ -249,11 +256,20 @@ static void dither_pq_to_10bit(const float *pq, uint16_t *y_temp, int width, int
 }
 
 static int fil_func(AVFilterLink *inlink, AVFrame *in) {
+    const float exposure = 3.5f;
+    const float contrast = 1.02f;
+    const float black = 0.03f;
+    const float white = 8.0f;
+    const float s_curve_pow = 1.00f;
+    const float white_balance_r = 1.0f;
+    const float white_balance_g = 1.0f;
+    const float white_balance_b = 1.1f;
+
     AVFilterContext *ctx = inlink->dst;
     FilterContext *s = ctx->priv;
     int width = in->width, height = in->height, npix = width * height;
 
-    // Converting to RGB24
+
     struct SwsContext *sws_ctx = sws_getContext(
         width, height, in->format,
         width, height, AV_PIX_FMT_RGB24,
@@ -272,9 +288,6 @@ static int fil_func(AVFilterLink *inlink, AVFrame *in) {
               rgb_frame->data, rgb_frame->linesize);
     sws_freeContext(sws_ctx);
 
-    //Calling gamma lookup table initialization
-    init_bt709_gamma_lut();
-    // Gamma linearization & gamut mapping
     float *R = av_malloc(npix * sizeof(float));
     float *G = av_malloc(npix * sizeof(float));
     float *B = av_malloc(npix * sizeof(float));
@@ -290,20 +303,21 @@ static int fil_func(AVFilterLink *inlink, AVFrame *in) {
         }
     }
 
-    // Inverse Tone Mapping
     float *R_hdr = av_malloc(npix * sizeof(float));
     float *G_hdr = av_malloc(npix * sizeof(float));
     float *B_hdr = av_malloc(npix * sizeof(float));
  
     inverse_tone_map_linear_rgb(R, G, B, R_hdr, G_hdr, B_hdr, width, height, s);
 
-    float exposure = 3.5f;   //4.0
-    float contrast = 1.02f;  //1.10
-    float black = 0.03f, white = 8.0f; //0.04 //12.0
-    float s_curve_pow = 1.00f; //0.70
+    /*
+                float exposure = 3.5f;   //4.0
+                float contrast = 1.02f;  //1.10
+                float black = 0.03f, white = 8.0f; //0.04 //12.0
+                float s_curve_pow = 1.00f; //0.70
+    */
 
     for (int i = 0; i < npix; i++) {
-        float r = R[i] * exposure, g = G[i] * exposure, b = B[i] * exposure;
+        float r = R_hdr[i] * exposure, g = G_hdr[i] * exposure, b = B_hdr[i] * exposure;
         float lum = 0.2627f * r + 0.6780f * g + 0.0593f * b;
 
 
@@ -316,21 +330,14 @@ static int fil_func(AVFilterLink *inlink, AVFrame *in) {
         g *= scale;
         b *= scale;
 
-        // Contrast
         r = (r - 0.5f) * contrast + 0.5f;
         g = (g - 0.5f) * contrast + 0.5f;
         b = (b - 0.5f) * contrast + 0.5f;
 
-        // White balance correction
-        float white_balance_r = 1.0f;
-        float white_balance_g = 1.0f;
-        float white_balance_b = 1.1f; 
-
         r *= white_balance_r;
         g *= white_balance_g;
         b *= white_balance_b;
 
-        // Final clamp
         r = fminf(fmaxf(r, 0.0f), 1.0f);
         g = fminf(fmaxf(g, 0.0f), 1.0f);
         b = fminf(fmaxf(b, 0.0f), 1.0f);
@@ -340,18 +347,16 @@ static int fil_func(AVFilterLink *inlink, AVFrame *in) {
         B_hdr[i] = b;
     }
 
-    //Computing linear luminance (BT.2020)
     float *Y = av_malloc(npix * sizeof(float));
     for (int i = 0; i < npix; i++)
         Y[i] = 0.2627f * R_hdr[i] + 0.6780f * G_hdr[i] + 0.0593f * B_hdr[i];
 
-    // Applying PQ transfer
     float *pq = av_malloc(npix * sizeof(float));
     for (int i = 0; i < npix; i++) {
         pq[i] = linear_to_pq(Y[i] * (10000.0f / s->HDR_max));
         pq[i] = fminf(fmaxf(pq[i], 0.0f), 1.0f);
     }
-    //Allocating output frame (YUV420P10LE)
+    
     AVFrame *out = av_frame_alloc();
     out->format = AV_PIX_FMT_YUV420P10LE;
     out->width = width;
@@ -359,25 +364,21 @@ static int fil_func(AVFilterLink *inlink, AVFrame *in) {
     av_frame_get_buffer(out, 32);
     av_frame_copy_props(out, in);
 
-    // Setting HDR metadata
+    
     out->color_primaries = AVCOL_PRI_BT2020;
-    out->color_trc = AVCOL_TRC_SMPTE2084;  // PQ
+    out->color_trc = AVCOL_TRC_SMPTE2084;  
     out->colorspace = AVCOL_SPC_BT2020_NCL;
-    out->color_range = AVCOL_RANGE_MPEG;   // Full range for HDR
+    out->color_range = AVCOL_RANGE_MPEG;   
     
-    // Setting mastering display metadata if available
     if (out->metadata) {
-        // Mastering display primaries (BT.2020)
         av_dict_set(&out->metadata, "mastering_display_primaries", 
                    "0.708,0.292,0.170,0.797,0.131,0.046,0.3127,0.3290", 0);
         
-        // Mastering display luminance (in nits)
         char luminance_str[64];
         snprintf(luminance_str, sizeof(luminance_str), "%.1f,%.1f", 
-                 s->HDR_max, 0.001f);  // Max luminance, min luminance
+                 s->HDR_max, 0.001f);  
         av_dict_set(&out->metadata, "mastering_display_luminance", luminance_str, 0);
         
-        // Content light level
         char content_light_str[64];
         float max_content_light = 0.0f;
         for (int i = 0; i < npix; i++) {
@@ -389,12 +390,10 @@ static int fil_func(AVFilterLink *inlink, AVFrame *in) {
         av_dict_set(&out->metadata, "content_light_level", content_light_str, 0);
     }
 
-    // Clearing Y, U, V planes
     av_frame_make_writable(out);
     for (int y = 0; y < height; y++)
         memset(out->data[0] + y * out->linesize[0], 0, out->linesize[0]);
     
-    // Dithering PQ to 10-bit Y plane
     uint16_t *y_temp = av_malloc(npix * sizeof(uint16_t));
     dither_pq_to_10bit(pq, y_temp, width, height);
     for (int y = 0; y < height; y++) {
@@ -402,7 +401,6 @@ static int fil_func(AVFilterLink *inlink, AVFrame *in) {
         memcpy(row, y_temp + y * width, width * sizeof(uint16_t));
     }
 
-    // Computing and encoding U, V chroma planes from original SDR RGB
     for (int y = 0; y < height / 2; y++) {
         uint16_t *u_row = (uint16_t *)(out->data[1] + y * out->linesize[1]);
         uint16_t *v_row = (uint16_t *)(out->data[2] + y * out->linesize[2]);
@@ -425,8 +423,8 @@ static int fil_func(AVFilterLink *inlink, AVFrame *in) {
             float U = (b - Y_sdr) / 1.8814f;
             float V = (r - Y_sdr) / 1.4746f;
             float chroma_blend = 1.0f - fminf(Y_sdr, 1.0f);
-            float chroma_boost = 0.85f * chroma_blend + 0.85f * (1.0f - chroma_blend); //0.8
-            // float chroma_boost = 1.08f;
+            float chroma_boost = 0.85f * chroma_blend + 0.85f * (1.0f - chroma_blend); 
+
             U *= chroma_boost;
             V *= chroma_boost;
             U = fmaxf(fminf(U, 0.45f), -0.45f);
@@ -471,7 +469,7 @@ static const AVFilterPad fil_outputs[] = {
 };
 
 static int ff_filter_init(AVFilterContext *avctx) {
-    av_log_set_level(AV_LOG_DEBUG);
+    init_bt709_gamma_lut();
     av_log(avctx, AV_LOG_INFO, "Initializing filter with 1 input and 1 output\n");
     return 0;
 }
-- 
2.49.0