ops_entries: misc fixes (PR #23524)

Ramiro Polla via ffmpeg-devel Wed, 17 Jun 2026 18:16:13 -0700

PR #23524 opened by Ramiro Polla (ramiro)
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23524
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23524.patch


`libswscale/aarch64/ops_entries.c` has diverged a bit since !22678 has stalled. 
This PR fixes the file, and I'll create another PR to make it run with fate.


>From dea830187b189e9c9fe3c9034964c00bcde6b82d Mon Sep 17 00:00:00 2001
From: Ramiro Polla <[email protected]>
Date: Tue, 16 Jun 2026 13:49:03 +0200
Subject: [PATCH 1/2] swscale/aarch64/ops: convert single-component packed
 read/write to planar

b488ee55 relaxed the check and stopped taking the number of components
into account. This led to the generation of duplicate read/write
functions for single component packed/planar.

ops_entries.c wasn't regenerated to take this into account; instead
some conversions were no longer being supported for the neon backend.
---
 libswscale/aarch64/ops_impl_conv.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libswscale/aarch64/ops_impl_conv.c 
b/libswscale/aarch64/ops_impl_conv.c
index 98cb89edbc..479afbb3ab 100644
--- a/libswscale/aarch64/ops_impl_conv.c
+++ b/libswscale/aarch64/ops_impl_conv.c
@@ -88,9 +88,9 @@ static int convert_to_aarch64_impl(SwsContext *ctx, const 
SwsOpList *ops, int n,
             out->op = AARCH64_SWS_OP_READ_NIBBLE;
         else if (op->rw.frac == 3)
             out->op = AARCH64_SWS_OP_READ_BIT;
-        else if (op->rw.mode == SWS_RW_PACKED)
+        else if (op->rw.mode == SWS_RW_PACKED && op->rw.elems > 1)
             out->op = AARCH64_SWS_OP_READ_PACKED;
-        else if (op->rw.mode == SWS_RW_PLANAR)
+        else if (op->rw.mode == SWS_RW_PACKED || op->rw.mode == SWS_RW_PLANAR)
             out->op = AARCH64_SWS_OP_READ_PLANAR;
         else
             return AVERROR(ENOTSUP);
@@ -106,9 +106,9 @@ static int convert_to_aarch64_impl(SwsContext *ctx, const 
SwsOpList *ops, int n,
             out->op = AARCH64_SWS_OP_WRITE_NIBBLE;
         else if (op->rw.frac == 3)
             out->op = AARCH64_SWS_OP_WRITE_BIT;
-        else if (op->rw.mode == SWS_RW_PACKED)
+        else if (op->rw.mode == SWS_RW_PACKED && op->rw.elems > 1)
             out->op = AARCH64_SWS_OP_WRITE_PACKED;
-        else if (op->rw.mode == SWS_RW_PLANAR)
+        else if (op->rw.mode == SWS_RW_PACKED || op->rw.mode == SWS_RW_PLANAR)
             out->op = AARCH64_SWS_OP_WRITE_PLANAR;
         else
             return AVERROR(ENOTSUP);
-- 
2.52.0


>From 7c50b687cbfb3368bc32ef1a0863ca2065152ce8 Mon Sep 17 00:00:00 2001
From: Ramiro Polla <[email protected]>
Date: Tue, 16 Jun 2026 14:24:50 +0200
Subject: [PATCH 2/2] swscale/aarch64/ops: skip filtering ops lists when
 generating ops_entries.c

sws_ops_aarch64 would still register operations that came before the
filtering ops (which are unsupported).
---
 libswscale/aarch64/ops_entries.c   |  7 -------
 libswscale/tests/sws_ops_aarch64.c | 16 ++++++++++++++++
 2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/libswscale/aarch64/ops_entries.c b/libswscale/aarch64/ops_entries.c
index 04a665a9f1..4b8e4bbae1 100644
--- a/libswscale/aarch64/ops_entries.c
+++ b/libswscale/aarch64/ops_entries.c
@@ -108,7 +108,6 @@
 { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0213, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x1001 },
 { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0213, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x1001 },
 { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0231, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x1011 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0312, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x1101 },
 { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0312, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x1101 },
 { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0312, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x1101 },
 { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0321, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x1111 },
@@ -247,7 +246,6 @@
 { .op = AARCH64_SWS_OP_CLEAR, .block_size = 16, .type = AARCH64_PIXEL_U16, 
.mask = 0x0001 },
 { .op = AARCH64_SWS_OP_CLEAR, .block_size = 16, .type = AARCH64_PIXEL_U16, 
.mask = 0x0010 },
 { .op = AARCH64_SWS_OP_CLEAR, .block_size = 16, .type = AARCH64_PIXEL_U16, 
.mask = 0x1000 },
-{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U8, .block_size = 8, 
.type = AARCH64_PIXEL_U16, .mask = 0x0111 },
 { .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U8, .block_size = 8, 
.type = AARCH64_PIXEL_F32, .mask = 0x0001 },
 { .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U8, .block_size = 8, 
.type = AARCH64_PIXEL_F32, .mask = 0x0011 },
 { .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U8, .block_size = 8, 
.type = AARCH64_PIXEL_F32, .mask = 0x0111 },
@@ -256,8 +254,6 @@
 { .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U8, .block_size = 8, 
.type = AARCH64_PIXEL_F32, .mask = 0x1111 },
 { .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U8, .block_size = 16, 
.type = AARCH64_PIXEL_U16, .mask = 0x0111 },
 { .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 8, 
.type = AARCH64_PIXEL_U8, .mask = 0x0111 },
-{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 8, 
.type = AARCH64_PIXEL_U32, .mask = 0x0010 },
-{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 8, 
.type = AARCH64_PIXEL_U32, .mask = 0x0100 },
 { .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 8, 
.type = AARCH64_PIXEL_U32, .mask = 0x0111 },
 { .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 8, 
.type = AARCH64_PIXEL_U32, .mask = 0x1110 },
 { .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 8, 
.type = AARCH64_PIXEL_F32, .mask = 0x0001 },
@@ -312,7 +308,6 @@
 { .op = AARCH64_SWS_OP_MAX, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask 
= 0x0011 },
 { .op = AARCH64_SWS_OP_MAX, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask 
= 0x0111 },
 { .op = AARCH64_SWS_OP_MAX, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask 
= 0x1001 },
-{ .op = AARCH64_SWS_OP_MAX, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask 
= 0x1110 },
 { .op = AARCH64_SWS_OP_MAX, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask 
= 0x1111 },
 { .op = AARCH64_SWS_OP_SCALE, .block_size = 8, .type = AARCH64_PIXEL_U32, 
.mask = 0x0001 },
 { .op = AARCH64_SWS_OP_SCALE, .block_size = 8, .type = AARCH64_PIXEL_U32, 
.mask = 0x0111 },
@@ -372,11 +367,9 @@
 { .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x3ff0, .dither.size_log2 = 
4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1001 },
 { .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x5023, .dither.size_log2 = 
4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 },
 { .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x5032, .dither.size_log2 = 
4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 },
-{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x5203, .dither.size_log2 = 
4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 },
 { .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x5230, .dither.size_log2 = 
4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 },
 { .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x5ff0, .dither.size_log2 = 
4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1001 },
 { .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x5fff, .dither.size_log2 = 
4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1000 },
-{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0xf000, .dither.size_log2 = 
4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 },
 { .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0xf023, .dither.size_log2 = 
4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 },
 { .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0xf032, .dither.size_log2 = 
4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 },
 { .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0xf203, .dither.size_log2 = 
4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 },
diff --git a/libswscale/tests/sws_ops_aarch64.c 
b/libswscale/tests/sws_ops_aarch64.c
index 4fa10c7bb0..3319af0536 100644
--- a/libswscale/tests/sws_ops_aarch64.c
+++ b/libswscale/tests/sws_ops_aarch64.c
@@ -77,6 +77,22 @@ static int register_op(SwsContext *ctx, void *opaque, 
SwsOpList *ops)
     struct AVTreeNode **root = (struct AVTreeNode **) opaque;
     int ret;
 
+    /* Skip ops lists which include filtering, since this is still not
+     * supported. */
+    for (int i = 0; i < ops->num_ops; i++) {
+        const SwsOp *op = &ops->ops[i];
+        switch (op->op) {
+        case SWS_OP_READ:
+        case SWS_OP_WRITE:
+            if (op->rw.filter.op)
+                return 0;
+            break;
+        case SWS_OP_FILTER_H:
+        case SWS_OP_FILTER_V:
+            return 0;
+        }
+    }
+
     /* Make on-stack copy of `ops` to iterate over */
     SwsOpList rest = *ops;
     /* Use at most two full vregs during the widest precision section */
-- 
2.52.0

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-devel] [PR] swscale/aarch64/ops_entries: misc fixes (PR #23524)

Reply via email to