PR #21229 opened by Niklas Haas (haasn)
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21229
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21229.patch

Split off from #21208


>From 154421323d50ae0584bd8ea4878912aebcd3866a Mon Sep 17 00:00:00 2001
From: Niklas Haas <[email protected]>
Date: Mon, 15 Dec 2025 18:15:24 +0100
Subject: [PATCH 1/9] swscale/ops_optimizer: don't commute clear with itself

These would normally be merged, not swapped.
---
 libswscale/ops_optimizer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libswscale/ops_optimizer.c b/libswscale/ops_optimizer.c
index f317a84f9a..c4a75820d3 100644
--- a/libswscale/ops_optimizer.c
+++ b/libswscale/ops_optimizer.c
@@ -50,7 +50,6 @@ static bool op_commute_clear(SwsOp *op, SwsOp *next)
     case SWS_OP_MIN:
     case SWS_OP_MAX:
     case SWS_OP_SCALE:
-    case SWS_OP_CLEAR:
     case SWS_OP_READ:
     case SWS_OP_SWIZZLE:
         ff_sws_apply_op_q(next, op->c.q4);
@@ -61,6 +60,7 @@ static bool op_commute_clear(SwsOp *op, SwsOp *next)
     case SWS_OP_LINEAR:
     case SWS_OP_PACK:
     case SWS_OP_UNPACK:
+    case SWS_OP_CLEAR:
         return false;
     case SWS_OP_TYPE_NB:
         break;
-- 
2.49.1


>From 48591e0395e9efba402aec5c1f62877109dd420a Mon Sep 17 00:00:00 2001
From: Niklas Haas <[email protected]>
Date: Mon, 15 Dec 2025 17:36:54 +0100
Subject: [PATCH 2/9] swscale/ops_optimizer: apply optimizations in a more
 predictable order

Instead of blindly interleaving re-ordering and minimizing opptimizations,
separate this loop into several passes - the first pass will minimize the
operation list in-place as much as possible, and the second pass will apply any
desired re-orderings. (We also want to try pushing clear back before any other
re-orderings, as this can trigger more phase 1 optimizations)

This restructuring leads to significantly more predictable and stable behavior,
especially when introducing more operation types going forwards. Does not
actually affect the current results, but matters with some upcoming changes
I have planned.
---
 libswscale/ops_optimizer.c | 102 +++++++++++++++++++++++--------------
 1 file changed, 65 insertions(+), 37 deletions(-)

diff --git a/libswscale/ops_optimizer.c b/libswscale/ops_optimizer.c
index c4a75820d3..3b57c76130 100644
--- a/libswscale/ops_optimizer.c
+++ b/libswscale/ops_optimizer.c
@@ -500,6 +500,7 @@ int ff_sws_op_list_optimize(SwsOpList *ops)
 retry:
     ff_sws_op_list_update_comps(ops);
 
+    /* Apply all in-place optimizations (that do not re-order the list) */
     for (int n = 0; n < ops->num_ops;) {
         SwsOp dummy = {0};
         SwsOp *op = &ops->ops[n];
@@ -604,25 +605,14 @@ retry:
                 ff_sws_op_list_remove_at(ops, n + 1, 1);
                 goto retry;
             }
-
-            /* Prefer to clear as late as possible, to avoid doing
-             * redundant work */
-            if (op_commute_clear(op, next)) {
-                FFSWAP(SwsOp, *op, *next);
-                goto retry;
-            }
             break;
 
-        case SWS_OP_SWIZZLE: {
-            bool seen[4] = {0};
-            bool has_duplicates = false;
+        case SWS_OP_SWIZZLE:
             for (int i = 0; i < 4; i++) {
                 if (next->comps.unused[i])
                     continue;
                 if (op->swizzle.in[i] != i)
                     noop = false;
-                has_duplicates |= seen[op->swizzle.in[i]];
-                seen[op->swizzle.in[i]] = true;
             }
 
             /* Identity swizzle */
@@ -639,22 +629,7 @@ retry:
                 ff_sws_op_list_remove_at(ops, n + 1, 1);
                 goto retry;
             }
-
-            /* Try to push swizzles with duplicates towards the output */
-            if (has_duplicates && op_commute_swizzle(op, next)) {
-                FFSWAP(SwsOp, *op, *next);
-                goto retry;
-            }
-
-            /* Move swizzle out of the way between two converts so that
-             * they may be merged */
-            if (prev->op == SWS_OP_CONVERT && next->op == SWS_OP_CONVERT) {
-                op->type = next->convert.to;
-                FFSWAP(SwsOp, *op, *next);
-                goto retry;
-            }
             break;
-        }
 
         case SWS_OP_CONVERT:
             /* No-op conversion */
@@ -813,16 +788,6 @@ retry:
                 goto retry;
             }
 
-            /* Scaling by integer before conversion to int */
-            if (op->c.q.den == 1 &&
-                next->op == SWS_OP_CONVERT &&
-                ff_sws_pixel_type_is_int(next->convert.to))
-            {
-                op->type = next->convert.to;
-                FFSWAP(SwsOp, *op, *next);
-                goto retry;
-            }
-
             /* Scaling by exact power of two */
             if (factor2 && ff_sws_pixel_type_is_int(op->type)) {
                 op->op = factor2 > 0 ? SWS_OP_LSHIFT : SWS_OP_RSHIFT;
@@ -837,6 +802,69 @@ retry:
         n++;
     }
 
+    /* Push clears to the back to void any unused components */
+    for (int n = 1; n < ops->num_ops - 1; n++) { /* exclude READ/WRITE */
+        SwsOp *op = &ops->ops[n];
+        SwsOp *next = &ops->ops[n + 1];
+
+        switch (op->op) {
+        case SWS_OP_CLEAR:
+            if (op_commute_clear(op, next)) {
+                FFSWAP(SwsOp, *op, *next);
+                goto retry;
+            }
+            break;
+        }
+    }
+
+    /* Apply any remaining preferential re-ordering optimizations; do these
+     * last because they are more likely to block other optimizations if done
+     * too aggressively */
+    for (int n = 1; n < ops->num_ops - 1; n++) { /* exclude READ/WRITE */
+        SwsOp *op = &ops->ops[n];
+        SwsOp *prev = &ops->ops[n - 1];
+        SwsOp *next = &ops->ops[n + 1];
+
+        switch (op->op) {
+        case SWS_OP_SWIZZLE: {
+            bool seen[4] = {0};
+            bool has_duplicates = false;
+            for (int i = 0; i < 4; i++) {
+                if (next->comps.unused[i])
+                    continue;
+                has_duplicates |= seen[op->swizzle.in[i]];
+                seen[op->swizzle.in[i]] = true;
+            }
+
+            /* Try to push swizzles with duplicates towards the output */
+            if (has_duplicates && op_commute_swizzle(op, next)) {
+                FFSWAP(SwsOp, *op, *next);
+                goto retry;
+            }
+
+            /* Move swizzle out of the way between two converts so that
+             * they may be merged */
+            if (prev->op == SWS_OP_CONVERT && next->op == SWS_OP_CONVERT) {
+                op->type = next->convert.to;
+                FFSWAP(SwsOp, *op, *next);
+                goto retry;
+            }
+            break;
+        }
+
+        case SWS_OP_SCALE:
+            /* Scaling by integer before conversion to int */
+            if (op->c.q.den == 1 && next->op == SWS_OP_CONVERT &&
+                ff_sws_pixel_type_is_int(next->convert.to))
+            {
+                op->type = next->convert.to;
+                FFSWAP(SwsOp, *op, *next);
+                goto retry;
+            }
+            break;
+        }
+    }
+
     return 0;
 }
 
-- 
2.49.1


>From 76a77914dbe22ddba9e87c49ccf28fb9057600c5 Mon Sep 17 00:00:00 2001
From: Niklas Haas <[email protected]>
Date: Mon, 15 Dec 2025 17:57:22 +0100
Subject: [PATCH 3/9] swscale/ops_optimizer: simplify loop slightly (cosmetic)

We always `goto retry` whenever an optimization case is hit, so we don't
need to defer the increment of `n`.
---
 libswscale/ops_optimizer.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/libswscale/ops_optimizer.c b/libswscale/ops_optimizer.c
index 3b57c76130..9d668fee74 100644
--- a/libswscale/ops_optimizer.c
+++ b/libswscale/ops_optimizer.c
@@ -501,7 +501,7 @@ retry:
     ff_sws_op_list_update_comps(ops);
 
     /* Apply all in-place optimizations (that do not re-order the list) */
-    for (int n = 0; n < ops->num_ops;) {
+    for (int n = 0; n < ops->num_ops; n++) {
         SwsOp dummy = {0};
         SwsOp *op = &ops->ops[n];
         SwsOp *prev = n ? &ops->ops[n - 1] : &dummy;
@@ -797,9 +797,6 @@ retry:
             break;
         }
         }
-
-        /* No optimization triggered, move on to next operation */
-        n++;
     }
 
     /* Push clears to the back to void any unused components */
-- 
2.49.1


>From dbe10c82f65305110c4c14371dcf687c2176842d Mon Sep 17 00:00:00 2001
From: Niklas Haas <[email protected]>
Date: Tue, 16 Dec 2025 13:12:46 +0100
Subject: [PATCH 4/9] swscale/format: only generate SHIFT ops when needed

Otherwise, we may spuriously generate illegal combinations like
SWS_OP_LSHIFT on SWS_PIXEL_F32.
---
 libswscale/format.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/libswscale/format.c b/libswscale/format.c
index be03adcdca..9e1121f484 100644
--- a/libswscale/format.c
+++ b/libswscale/format.c
@@ -935,11 +935,13 @@ int ff_sws_decode_pixfmt(SwsOpList *ops, enum 
AVPixelFormat fmt)
         .swizzle = swizzle_inv(swizzle),
     }));
 
-    RET(ff_sws_op_list_append(ops, &(SwsOp) {
-        .op   = SWS_OP_RSHIFT,
-        .type = pixel_type,
-        .c.u  = shift,
-    }));
+    if (shift) {
+        RET(ff_sws_op_list_append(ops, &(SwsOp) {
+            .op   = SWS_OP_RSHIFT,
+            .type = pixel_type,
+            .c.u  = shift,
+        }));
+    }
 
     RET(ff_sws_op_list_append(ops, &(SwsOp) {
         .op   = SWS_OP_CLEAR,
@@ -962,11 +964,13 @@ int ff_sws_encode_pixfmt(SwsOpList *ops, enum 
AVPixelFormat fmt)
     RET(fmt_analyze(fmt, &rw_op, &pack, &swizzle, &shift,
                     &pixel_type, &raw_type));
 
-    RET(ff_sws_op_list_append(ops, &(SwsOp) {
-        .op   = SWS_OP_LSHIFT,
-        .type = pixel_type,
-        .c.u  = shift,
-    }));
+    if (shift) {
+        RET(ff_sws_op_list_append(ops, &(SwsOp) {
+            .op   = SWS_OP_LSHIFT,
+            .type = pixel_type,
+            .c.u  = shift,
+        }));
+    }
 
     if (rw_op.elems > desc->nb_components) {
         /* Format writes unused alpha channel, clear it explicitly for sanity 
*/
-- 
2.49.1


>From d28b2007b20aac6745618f5310d6768352b53b4b Mon Sep 17 00:00:00 2001
From: Niklas Haas <[email protected]>
Date: Tue, 16 Dec 2025 00:33:30 +0100
Subject: [PATCH 5/9] swscale/ops: add type assertions to ff_sws_apply_op_q()

---
 libswscale/ops.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/libswscale/ops.c b/libswscale/ops.c
index f33dd02a37..9bbbfc50ca 100644
--- a/libswscale/ops.c
+++ b/libswscale/ops.c
@@ -114,21 +114,24 @@ void ff_sws_apply_op_q(const SwsOp *op, AVRational x[4])
     case SWS_OP_WRITE:
         return;
     case SWS_OP_UNPACK: {
-        unsigned val = x[0].num;
+        av_assert1(ff_sws_pixel_type_is_int(op->type));
         ff_sws_pack_op_decode(op, mask, shift);
+        unsigned val = x[0].num;
         for (int i = 0; i < 4; i++)
             x[i] = Q((val >> shift[i]) & mask[i]);
         return;
     }
     case SWS_OP_PACK: {
-        unsigned val = 0;
+        av_assert1(ff_sws_pixel_type_is_int(op->type));
         ff_sws_pack_op_decode(op, mask, shift);
+        unsigned val = 0;
         for (int i = 0; i < 4; i++)
             val |= (x[i].num & mask[i]) << shift[i];
         x[0] = Q(val);
         return;
     }
     case SWS_OP_SWAP_BYTES:
+        av_assert1(ff_sws_pixel_type_is_int(op->type));
         switch (ff_sws_pixel_type_size(op->type)) {
         case 2:
             for (int i = 0; i < 4; i++)
@@ -147,12 +150,14 @@ void ff_sws_apply_op_q(const SwsOp *op, AVRational x[4])
         }
         return;
     case SWS_OP_LSHIFT: {
+        av_assert1(ff_sws_pixel_type_is_int(op->type));
         AVRational mult = Q(1 << op->c.u);
         for (int i = 0; i < 4; i++)
             x[i] = x[i].den ? av_mul_q(x[i], mult) : x[i];
         return;
     }
     case SWS_OP_RSHIFT: {
+        av_assert1(ff_sws_pixel_type_is_int(op->type));
         AVRational mult = Q(1 << op->c.u);
         for (int i = 0; i < 4; i++)
             x[i] = x[i].den ? av_div_q(x[i], mult) : x[i];
@@ -175,6 +180,7 @@ void ff_sws_apply_op_q(const SwsOp *op, AVRational x[4])
         }
         return;
     case SWS_OP_DITHER:
+        av_assert1(!ff_sws_pixel_type_is_int(op->type));
         for (int i = 0; i < 4; i++)
             x[i] = x[i].den ? av_add_q(x[i], av_make_q(1, 2)) : x[i];
         return;
@@ -187,6 +193,7 @@ void ff_sws_apply_op_q(const SwsOp *op, AVRational x[4])
             x[i] = av_max_q(x[i], op->c.q4[i]);
         return;
     case SWS_OP_LINEAR: {
+        av_assert1(!ff_sws_pixel_type_is_int(op->type));
         const AVRational orig[4] = { x[0], x[1], x[2], x[3] };
         for (int i = 0; i < 4; i++) {
             AVRational sum = op->lin.m[i][4];
-- 
2.49.1


>From 4723780bfba752354c3ec822a837b3bf8c1a25a9 Mon Sep 17 00:00:00 2001
From: Niklas Haas <[email protected]>
Date: Tue, 16 Dec 2025 00:36:55 +0100
Subject: [PATCH 6/9] swscale/ops: correctly truncate on
 ff_sws_apply_op_q(SWS_OP_RSHIFT)

Instead of using a "precise" division, simulate the actual truncation.

Note that the division by `den` is unneeded in principle because the
denominator *should* always be 1 for an integer, but this way we don't
explode if the user should happen to pass `4/2` or something.

Fixes a lot of unnecessary clamps w.r.t. xv36, e.g.:

 xv36be -> yuv444p12be:
   [u16 XXXX -> ++++] SWS_OP_READ         : 4 elem(s) packed >> 0
   [u16 ...X -> ++++] SWS_OP_SWAP_BYTES
   [u16 ...X -> ++++] SWS_OP_SWIZZLE      : 1023
   [u16 ...X -> ++++] SWS_OP_RSHIFT       : >> 4
-  [u16 ...X -> ++++] SWS_OP_CONVERT      : u16 -> f32
-  [f32 ...X -> ++++] SWS_OP_MIN          : x <= {4095 4095 4095 _}
-  [f32 ...X -> ++++] SWS_OP_CONVERT      : f32 -> u16
   [u16 ...X -> ++++] SWS_OP_SWAP_BYTES
   [u16 ...X -> ++++] SWS_OP_WRITE        : 3 elem(s) planar >> 0
     (X = unused, + = exact, 0 = zero)
---
 libswscale/ops.c            | 3 +--
 tests/ref/fate/sws-ops-list | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/libswscale/ops.c b/libswscale/ops.c
index 9bbbfc50ca..f84416feed 100644
--- a/libswscale/ops.c
+++ b/libswscale/ops.c
@@ -158,9 +158,8 @@ void ff_sws_apply_op_q(const SwsOp *op, AVRational x[4])
     }
     case SWS_OP_RSHIFT: {
         av_assert1(ff_sws_pixel_type_is_int(op->type));
-        AVRational mult = Q(1 << op->c.u);
         for (int i = 0; i < 4; i++)
-            x[i] = x[i].den ? av_div_q(x[i], mult) : x[i];
+            x[i] = x[i].den ? Q((x[i].num / x[i].den) >> op->c.u) : x[i];
         return;
     }
     case SWS_OP_SWIZZLE: {
diff --git a/tests/ref/fate/sws-ops-list b/tests/ref/fate/sws-ops-list
index dbcad4052b..b49f944794 100644
--- a/tests/ref/fate/sws-ops-list
+++ b/tests/ref/fate/sws-ops-list
@@ -1 +1 @@
-708f10e4de79450729ab2120e4e44ec9
+e910ff7ceaeb64bfdbac3f652b67403f
-- 
2.49.1


>From 002aa4fe1c9f0ffcfa01c3fd4c674aabda4dd211 Mon Sep 17 00:00:00 2001
From: Niklas Haas <[email protected]>
Date: Tue, 16 Dec 2025 13:35:24 +0100
Subject: [PATCH 7/9] swscale/ops_chain: fix comment

---
 libswscale/ops_chain.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libswscale/ops_chain.h b/libswscale/ops_chain.h
index 7f436869b1..2f5a31793e 100644
--- a/libswscale/ops_chain.h
+++ b/libswscale/ops_chain.h
@@ -127,7 +127,7 @@ typedef struct SwsOpTable {
 
 /**
  * "Compile" a single op by looking it up in a list of fixed size op tables.
- * See `op_match` in `ops.c` for details on how the matching works.
+ * See `op_match` in `ops_chain.c` for details on how the matching works.
  *
  * Returns 0, AVERROR(EAGAIN), or a negative error code.
  */
-- 
2.49.1


>From d28e7949e0148e0f8a45c1f2dea0ad2187f391d0 Mon Sep 17 00:00:00 2001
From: Niklas Haas <[email protected]>
Date: Tue, 16 Dec 2025 13:59:04 +0100
Subject: [PATCH 8/9] swscale/ops: categorize ops by type compatibility

This is a more useful grouping than the previous, somewhat arbitrary one.
---
 libswscale/ops.h | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/libswscale/ops.h b/libswscale/ops.h
index d1a15294d1..fe072c1fe7 100644
--- a/libswscale/ops.h
+++ b/libswscale/ops.h
@@ -43,26 +43,28 @@ bool ff_sws_pixel_type_is_int(SwsPixelType type) av_const;
 typedef enum SwsOpType {
     SWS_OP_INVALID = 0,
 
-    /* Input/output handling */
+    /* Defined for all types; but implemented for integers only */
     SWS_OP_READ,            /* gather raw pixels from planes */
     SWS_OP_WRITE,           /* write raw pixels to planes */
     SWS_OP_SWAP_BYTES,      /* swap byte order (for differing endianness) */
+    SWS_OP_SWIZZLE,         /* rearrange channel order, or duplicate channels 
*/
+
+    /* Bit manipulation operations. Defined for integers only. */
     SWS_OP_UNPACK,          /* split tightly packed data into components */
     SWS_OP_PACK,            /* compress components into tightly packed data */
-
-    /* Pixel manipulation */
-    SWS_OP_CLEAR,           /* clear pixel values */
     SWS_OP_LSHIFT,          /* logical left shift of raw pixel values by (u8) 
*/
     SWS_OP_RSHIFT,          /* right shift of raw pixel values by (u8) */
-    SWS_OP_SWIZZLE,         /* rearrange channel order, or duplicate channels 
*/
-    SWS_OP_CONVERT,         /* convert (cast) between formats */
-    SWS_OP_DITHER,          /* add dithering noise */
 
-    /* Arithmetic operations */
-    SWS_OP_LINEAR,          /* generalized linear affine transform */
-    SWS_OP_SCALE,           /* multiplication by scalar (q) */
+    /* Generic arithmetic. Defined and implemented for all types */
+    SWS_OP_CLEAR,           /* clear pixel values */
+    SWS_OP_CONVERT,         /* convert (cast) between formats */
     SWS_OP_MIN,             /* numeric minimum (q4) */
     SWS_OP_MAX,             /* numeric maximum (q4) */
+    SWS_OP_SCALE,           /* multiplication by scalar (q) */
+
+    /* Floating-point only arithmetic operations. */
+    SWS_OP_LINEAR,          /* generalized linear affine transform */
+    SWS_OP_DITHER,          /* add dithering noise */
 
     SWS_OP_TYPE_NB,
 } SwsOpType;
-- 
2.49.1


>From 61ef852a7d3f2c01cc7ed42f401510179e9bef4f Mon Sep 17 00:00:00 2001
From: Niklas Haas <[email protected]>
Date: Tue, 16 Dec 2025 13:59:46 +0100
Subject: [PATCH 9/9] swscale/ops: update comment on SWS_COMP_EXACT

That the integer is "in-range" is implied by the min/max range tracking,
not the flag itself.
---
 libswscale/ops.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libswscale/ops.h b/libswscale/ops.h
index fe072c1fe7..6392d0ffdf 100644
--- a/libswscale/ops.h
+++ b/libswscale/ops.h
@@ -71,7 +71,7 @@ typedef enum SwsOpType {
 
 enum SwsCompFlags {
     SWS_COMP_GARBAGE = 1 << 0, /* contents are undefined / garbage data */
-    SWS_COMP_EXACT   = 1 << 1, /* value is an in-range, exact, integer */
+    SWS_COMP_EXACT   = 1 << 1, /* value is an exact integer */
     SWS_COMP_ZERO    = 1 << 2, /* known to be a constant zero */
 };
 
-- 
2.49.1

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to