PR #23401 opened by michaelni URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23401 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23401.patch
The convolution filter accumulates pixel samples multiplied by user-supplied matrix coefficients. With large coefficients the int product and accumulator overflow before the result is clipped to the output range. Accumulate in float and cast the coefficient so the per-sample product is computed in float as well. This matches the float based sobel/scharr paths and the subsequent rdiv/bias scaling Found-by: Kery (Qi Kery <[email protected]>) Signed-off-by: Michael Niedermayer <[email protected]> >From e4e6af495afd6606d8d4b2c3bb5bd3e95c79f114 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer <[email protected]> Date: Sat, 6 Jun 2026 21:02:34 +0200 Subject: [PATCH] avfilter/convolution: accumulate user matrix products in float The convolution filter accumulates pixel samples multiplied by user-supplied matrix coefficients. With large coefficients the int product and accumulator overflow before the result is clipped to the output range. Accumulate in float and cast the coefficient so the per-sample product is computed in float as well. This matches the float based sobel/scharr paths and the subsequent rdiv/bias scaling Found-by: Kery (Qi Kery <[email protected]>) Signed-off-by: Michael Niedermayer <[email protected]> --- libavfilter/vf_convolution.c | 82 +++++++++++++++++++----------------- 1 file changed, 44 insertions(+), 38 deletions(-) diff --git a/libavfilter/vf_convolution.c b/libavfilter/vf_convolution.c index 7a27bdba20..7ec4d04287 100644 --- a/libavfilter/vf_convolution.c +++ b/libavfilter/vf_convolution.c @@ -322,16 +322,16 @@ static void filter16_3x3(uint8_t *dstp, int width, int x; for (x = 0; x < width; x++) { - int sum = AV_RN16A(&c[0][2 * x]) * matrix[0] + - AV_RN16A(&c[1][2 * x]) * matrix[1] + - AV_RN16A(&c[2][2 * x]) * matrix[2] + - AV_RN16A(&c[3][2 * x]) * matrix[3] + - AV_RN16A(&c[4][2 * x]) * matrix[4] + - AV_RN16A(&c[5][2 * x]) * matrix[5] + - AV_RN16A(&c[6][2 * x]) * matrix[6] + - AV_RN16A(&c[7][2 * x]) * matrix[7] + - AV_RN16A(&c[8][2 * x]) * matrix[8]; - sum = (int)(sum * rdiv + bias + 0.5f); + float sum = AV_RN16A(&c[0][2 * x]) * (float)matrix[0] + + AV_RN16A(&c[1][2 * x]) * (float)matrix[1] + + AV_RN16A(&c[2][2 * x]) * (float)matrix[2] + + AV_RN16A(&c[3][2 * x]) * (float)matrix[3] + + AV_RN16A(&c[4][2 * x]) * (float)matrix[4] + + AV_RN16A(&c[5][2 * x]) * (float)matrix[5] + + AV_RN16A(&c[6][2 * x]) * (float)matrix[6] + + AV_RN16A(&c[7][2 * x]) * (float)matrix[7] + + AV_RN16A(&c[8][2 * x]) * (float)matrix[8]; + sum = sum * rdiv + bias + 0.5f; dst[x] = av_clip(sum, 0, peak); } } @@ -345,12 +345,13 @@ static void filter16_5x5(uint8_t *dstp, int width, int x; for (x = 0; x < width; x++) { - int i, sum = 0; + int i; + float sum = 0; for (i = 0; i < 25; i++) - sum += AV_RN16A(&c[i][2 * x]) * matrix[i]; + sum += AV_RN16A(&c[i][2 * x]) * (float)matrix[i]; - sum = (int)(sum * rdiv + bias + 0.5f); + sum = sum * rdiv + bias + 0.5f; dst[x] = av_clip(sum, 0, peak); } } @@ -364,12 +365,13 @@ static void filter16_7x7(uint8_t *dstp, int width, int x; for (x = 0; x < width; x++) { - int i, sum = 0; + int i; + float sum = 0; for (i = 0; i < 49; i++) - sum += AV_RN16A(&c[i][2 * x]) * matrix[i]; + sum += AV_RN16A(&c[i][2 * x]) * (float)matrix[i]; - sum = (int)(sum * rdiv + bias + 0.5f); + sum = sum * rdiv + bias + 0.5f; dst[x] = av_clip(sum, 0, peak); } } @@ -383,12 +385,13 @@ static void filter16_row(uint8_t *dstp, int width, int x; for (x = 0; x < width; x++) { - int i, sum = 0; + int i; + float sum = 0; for (i = 0; i < 2 * radius + 1; i++) - sum += AV_RN16A(&c[i][2 * x]) * matrix[i]; + sum += AV_RN16A(&c[i][2 * x]) * (float)matrix[i]; - sum = (int)(sum * rdiv + bias + 0.5f); + sum = sum * rdiv + bias + 0.5f; dst[x] = av_clip(sum, 0, peak); } } @@ -398,7 +401,7 @@ static void filter16_column(uint8_t *dstp, int height, const uint8_t *c[], int peak, int radius, int dstride, int stride, int size) { - DECLARE_ALIGNED(64, int, sum)[16]; + DECLARE_ALIGNED(64, float, sum)[16]; uint16_t *dst = (uint16_t *)dstp; const int width = FFMIN(16, size); @@ -407,11 +410,11 @@ static void filter16_column(uint8_t *dstp, int height, memset(sum, 0, sizeof(sum)); for (int i = 0; i < 2 * radius + 1; i++) { for (int off16 = 0; off16 < width; off16++) - sum[off16] += AV_RN16A(&c[i][0 + y * stride + off16 * 2]) * matrix[i]; + sum[off16] += AV_RN16A(&c[i][0 + y * stride + off16 * 2]) * (float)matrix[i]; } for (int off16 = 0; off16 < width; off16++) { - sum[off16] = (int)(sum[off16] * rdiv + bias + 0.5f); + sum[off16] = sum[off16] * rdiv + bias + 0.5f; dst[off16] = av_clip(sum[off16], 0, peak); } dst += dstride / 2; @@ -426,12 +429,13 @@ static void filter_7x7(uint8_t *dst, int width, int x; for (x = 0; x < width; x++) { - int i, sum = 0; + int i; + float sum = 0; for (i = 0; i < 49; i++) - sum += c[i][x] * matrix[i]; + sum += c[i][x] * (float)matrix[i]; - sum = (int)(sum * rdiv + bias + 0.5f); + sum = sum * rdiv + bias + 0.5f; dst[x] = av_clip_uint8(sum); } } @@ -444,12 +448,13 @@ static void filter_5x5(uint8_t *dst, int width, int x; for (x = 0; x < width; x++) { - int i, sum = 0; + int i; + float sum = 0; for (i = 0; i < 25; i++) - sum += c[i][x] * matrix[i]; + sum += c[i][x] * (float)matrix[i]; - sum = (int)(sum * rdiv + bias + 0.5f); + sum = sum * rdiv + bias + 0.5f; dst[x] = av_clip_uint8(sum); } } @@ -465,10 +470,10 @@ static void filter_3x3(uint8_t *dst, int width, int x; for (x = 0; x < width; x++) { - int sum = c0[x] * matrix[0] + c1[x] * matrix[1] + c2[x] * matrix[2] + - c3[x] * matrix[3] + c4[x] * matrix[4] + c5[x] * matrix[5] + - c6[x] * matrix[6] + c7[x] * matrix[7] + c8[x] * matrix[8]; - sum = (int)(sum * rdiv + bias + 0.5f); + float sum = c0[x] * (float)matrix[0] + c1[x] * (float)matrix[1] + c2[x] * (float)matrix[2] + + c3[x] * (float)matrix[3] + c4[x] * (float)matrix[4] + c5[x] * (float)matrix[5] + + c6[x] * (float)matrix[6] + c7[x] * (float)matrix[7] + c8[x] * (float)matrix[8]; + sum = sum * rdiv + bias + 0.5f; dst[x] = av_clip_uint8(sum); } } @@ -481,12 +486,13 @@ static void filter_row(uint8_t *dst, int width, int x; for (x = 0; x < width; x++) { - int i, sum = 0; + int i; + float sum = 0; for (i = 0; i < 2 * radius + 1; i++) - sum += c[i][x] * matrix[i]; + sum += c[i][x] * (float)matrix[i]; - sum = (int)(sum * rdiv + bias + 0.5f); + sum = sum * rdiv + bias + 0.5f; dst[x] = av_clip_uint8(sum); } } @@ -496,18 +502,18 @@ static void filter_column(uint8_t *dst, int height, const uint8_t *c[], int peak, int radius, int dstride, int stride, int size) { - DECLARE_ALIGNED(64, int, sum)[16]; + DECLARE_ALIGNED(64, float, sum)[16]; for (int y = 0; y < height; y++) { memset(sum, 0, sizeof(sum)); for (int i = 0; i < 2 * radius + 1; i++) { for (int off16 = 0; off16 < 16; off16++) - sum[off16] += c[i][0 + y * stride + off16] * matrix[i]; + sum[off16] += c[i][0 + y * stride + off16] * (float)matrix[i]; } for (int off16 = 0; off16 < 16; off16++) { - sum[off16] = (int)(sum[off16] * rdiv + bias + 0.5f); + sum[off16] = sum[off16] * rdiv + bias + 0.5f; dst[off16] = av_clip_uint8(sum[off16]); } dst += dstride; -- 2.52.0 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
