PR #23530 opened by Ramiro Polla (ramiro)
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23530
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23530.patch

Introduces more specific swizzle operations, and removes some duplicates for 
read/write/clear/linear.


>From 6bd6059aecbae45670d0ebfee44a21423a719ed6 Mon Sep 17 00:00:00 2001
From: Ramiro Polla <[email protected]>
Date: Tue, 16 Jun 2026 13:56:20 +0200
Subject: [PATCH 1/4] swscale/aarch64/ops: remove redundant single-component
 packed read/write

These functions are essentially the same as single-component planar
read/write, and are actually never instantiated. This was left over
from the initial implementation.
---
 libswscale/aarch64/ops_asmgen.c | 56 +++++----------------------------
 1 file changed, 8 insertions(+), 48 deletions(-)

diff --git a/libswscale/aarch64/ops_asmgen.c b/libswscale/aarch64/ops_asmgen.c
index c03e0832ee..53501f52ea 100644
--- a/libswscale/aarch64/ops_asmgen.c
+++ b/libswscale/aarch64/ops_asmgen.c
@@ -449,23 +449,6 @@ static void asmgen_op_read_nibble(SwsAArch64Context *s, 
const SwsAArch64OpImplPa
     }
 }
 
-static void asmgen_op_read_packed_1(SwsAArch64Context *s, const 
SwsAArch64OpImplParams *p)
-{
-    RasmContext *r = s->rctx;
-    AArch64VecViews vl[1];
-    AArch64VecViews vh[1];
-
-    a64op_vec_views(s->vl[0], &vl[0]);
-    a64op_vec_views(s->vh[0], &vh[0]);
-
-    switch ((s->use_vh ? 0x100 : 0) | s->vec_size) {
-    case 0x008: i_ldr(r, vl[0].d,          a64op_post(s->in[0], s->vec_size * 
1)); break;
-    case 0x010: i_ldr(r, vl[0].q,          a64op_post(s->in[0], s->vec_size * 
1)); break;
-    case 0x108: i_ldp(r, vl[0].d, vh[0].d, a64op_post(s->in[0], s->vec_size * 
2)); break;
-    case 0x110: i_ldp(r, vl[0].q, vh[0].q, a64op_post(s->in[0], s->vec_size * 
2)); break;
-    }
-}
-
 static void asmgen_op_read_packed_n(SwsAArch64Context *s, const 
SwsAArch64OpImplParams *p, RasmOp *vx)
 {
     RasmContext *r = s->rctx;
@@ -479,13 +462,10 @@ static void asmgen_op_read_packed_n(SwsAArch64Context *s, 
const SwsAArch64OpImpl
 
 static void asmgen_op_read_packed(SwsAArch64Context *s, const 
SwsAArch64OpImplParams *p)
 {
-    if (p->mask == 0x0001) {
-        asmgen_op_read_packed_1(s, p);
-    } else {
-        asmgen_op_read_packed_n(s, p, s->vl);
-        if (s->use_vh)
-            asmgen_op_read_packed_n(s, p, s->vh);
-    }
+    av_assert0(p->mask != 0x0001);
+    asmgen_op_read_packed_n(s, p, s->vl);
+    if (s->use_vh)
+        asmgen_op_read_packed_n(s, p, s->vh);
 }
 
 static void asmgen_op_read_planar(SwsAArch64Context *s, const 
SwsAArch64OpImplParams *p)
@@ -574,23 +554,6 @@ static void asmgen_op_write_nibble(SwsAArch64Context *s, 
const SwsAArch64OpImplP
     }
 }
 
-static void asmgen_op_write_packed_1(SwsAArch64Context *s, const 
SwsAArch64OpImplParams *p)
-{
-    RasmContext *r = s->rctx;
-    AArch64VecViews vl[1];
-    AArch64VecViews vh[1];
-
-    a64op_vec_views(s->vl[0], &vl[0]);
-    a64op_vec_views(s->vh[0], &vh[0]);
-
-    switch ((s->use_vh ? 0x100 : 0) | s->vec_size) {
-    case 0x008: i_str(r, vl[0].d,          a64op_post(s->out[0], s->vec_size * 
1)); break;
-    case 0x010: i_str(r, vl[0].q,          a64op_post(s->out[0], s->vec_size * 
1)); break;
-    case 0x108: i_stp(r, vl[0].d, vh[0].d, a64op_post(s->out[0], s->vec_size * 
2)); break;
-    case 0x110: i_stp(r, vl[0].q, vh[0].q, a64op_post(s->out[0], s->vec_size * 
2)); break;
-    }
-}
-
 static void asmgen_op_write_packed_n(SwsAArch64Context *s, const 
SwsAArch64OpImplParams *p, RasmOp *vx)
 {
     RasmContext *r = s->rctx;
@@ -604,13 +567,10 @@ static void asmgen_op_write_packed_n(SwsAArch64Context 
*s, const SwsAArch64OpImp
 
 static void asmgen_op_write_packed(SwsAArch64Context *s, const 
SwsAArch64OpImplParams *p)
 {
-    if (p->mask == 0x0001) {
-        asmgen_op_write_packed_1(s, p);
-    } else {
-        asmgen_op_write_packed_n(s, p, s->vl);
-        if (s->use_vh)
-            asmgen_op_write_packed_n(s, p, s->vh);
-    }
+    av_assert0(p->mask != 0x0001);
+    asmgen_op_write_packed_n(s, p, s->vl);
+    if (s->use_vh)
+        asmgen_op_write_packed_n(s, p, s->vh);
 }
 
 static void asmgen_op_write_planar(SwsAArch64Context *s, const 
SwsAArch64OpImplParams *p)
-- 
2.52.0


>From 462be68c6eebb1ffba087585aa20cf21887e1010 Mon Sep 17 00:00:00 2001
From: Ramiro Polla <[email protected]>
Date: Tue, 16 Jun 2026 14:17:16 +0200
Subject: [PATCH 2/4] swscale/aarch64/ops: fix mask for swizzle ops

The mask for swizzle ops assumed that merely having a component assigned
to itself was enough to detect whether the swizzle was needed for that
component, but that wasn't correct. We should also take into account
whether the component is needed for the next operation or not.

Additionally, prevent duplicate functions from being generated by
clearing the swizzle index for unused components.
---
 libswscale/aarch64/ops_entries.c   | 123 ++++++++++++++++++-----------
 libswscale/aarch64/ops_impl_conv.c |  17 ++--
 2 files changed, 85 insertions(+), 55 deletions(-)

diff --git a/libswscale/aarch64/ops_entries.c b/libswscale/aarch64/ops_entries.c
index 4b8e4bbae1..3cd59f9c3d 100644
--- a/libswscale/aarch64/ops_entries.c
+++ b/libswscale/aarch64/ops_entries.c
@@ -95,63 +95,92 @@
 { .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 16, .type = 
AARCH64_PIXEL_U16, .mask = 0x1111 },
 { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0001, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x1111 },
 { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0001, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x1111 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0003, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x1111 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0003, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x1111 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0003, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x1111 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x000f, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x1110 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x000f, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x1110 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x000f, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x1110 },
 { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0123, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x1111 },
 { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0123, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x1111 },
 { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0123, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x1111 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0132, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x1111 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0132, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x1111 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0132, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x1111 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0213, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x1001 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0213, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x1001 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0213, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x1001 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0231, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x1011 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0312, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x1101 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0312, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x1101 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0321, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x1111 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x012f, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x1110 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x012f, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x1110 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x012f, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x1110 },
 { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0321, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x1111 },
 { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0321, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x1111 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1000, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x1110 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1000, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x1110 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1000, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x1110 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x03f2, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x1101 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0ff1, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x1001 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0fff, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x1000 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0fff, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x1000 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0fff, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x1000 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x100f, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x1110 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x100f, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x1110 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x100f, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x1110 },
 { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1023, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x1111 },
 { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1023, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x1111 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1203, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x1011 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1230, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x1010 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1230, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x1010 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1230, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x1010 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1320, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x1110 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1320, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x1110 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1320, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x1110 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2013, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x1101 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2013, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x1101 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2013, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x1101 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x102f, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x1110 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x102f, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x1110 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x132f, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x1110 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x132f, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x1110 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1f0f, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x1010 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1f3f, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x1010 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1f3f, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x1010 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1f3f, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x1010 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1fff, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x1000 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x20f3, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x1101 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x20f3, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x1101 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x20ff, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x1100 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x20ff, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x1100 },
 { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2103, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x1111 },
 { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2103, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x1111 },
 { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2103, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x1111 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2130, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x1110 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2130, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x1110 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2130, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x1110 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3000, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x0110 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3000, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x0110 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3000, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x0110 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3012, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x0101 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3012, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x0101 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3012, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x0101 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3021, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x0111 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3021, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x0111 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3021, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x0111 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3102, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x0111 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3102, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x0111 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3102, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x0111 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3120, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x0110 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3120, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x0110 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3120, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x0110 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3201, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x0011 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3201, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x0011 },
-{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3201, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x0011 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x210f, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x1110 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x210f, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x1110 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x210f, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x1110 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf00f, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x0110 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf00f, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x0110 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf00f, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x0110 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf021, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x0111 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf021, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x0111 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf021, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x0111 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf0f2, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x0101 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf0f2, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x0101 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf0f2, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x0101 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf0f3, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x0101 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf0ff, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x0100 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf0ff, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x0100 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf102, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x0111 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf102, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x0111 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf102, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x0111 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf123, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x0111 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf123, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x0111 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf123, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x0111 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf12f, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x0110 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf12f, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x0110 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf12f, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x0110 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf132, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x0111 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf132, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x0111 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf132, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x0111 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf321, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x0111 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf321, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x0111 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf321, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x0111 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf3f2, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x0101 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf3f2, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x0101 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xff01, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x0011 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xff01, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x0011 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xff01, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x0011 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xff03, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x0011 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xff03, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x0011 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xff0f, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x0010 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xff0f, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x0010 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xff0f, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x0010 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xff31, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x0011 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xff3f, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x0010 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xff3f, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x0010 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xff3f, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x0010 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xfff1, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x0001 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xfff2, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x0001 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xfff3, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x0001 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xfff3, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x0001 },
+{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xfff3, .block_size = 32, .type = 
AARCH64_PIXEL_U8, .mask = 0x0001 },
 { .op = AARCH64_SWS_OP_UNPACK, .pack = 0x0121, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x0111 },
 { .op = AARCH64_SWS_OP_UNPACK, .pack = 0x0121, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x0111 },
 { .op = AARCH64_SWS_OP_UNPACK, .pack = 0x0233, .block_size = 8, .type = 
AARCH64_PIXEL_U8, .mask = 0x0111 },
diff --git a/libswscale/aarch64/ops_impl_conv.c 
b/libswscale/aarch64/ops_impl_conv.c
index 479afbb3ab..b0a286edb6 100644
--- a/libswscale/aarch64/ops_impl_conv.c
+++ b/libswscale/aarch64/ops_impl_conv.c
@@ -155,15 +155,16 @@ static int convert_to_aarch64_impl(SwsContext *ctx, const 
SwsOpList *ops, int n,
             out->type = AARCH64_PIXEL_U32;
         break;
     case AARCH64_SWS_OP_SWIZZLE:
+        /* Recompute mask taking identity swizzle into account */
         out->mask = 0;
-        MASK_SET(out->mask, 0, op->swizzle.in[0] != 0);
-        MASK_SET(out->mask, 1, op->swizzle.in[1] != 1);
-        MASK_SET(out->mask, 2, op->swizzle.in[2] != 2);
-        MASK_SET(out->mask, 3, op->swizzle.in[3] != 3);
-        MASK_SET(out->swizzle, 0, op->swizzle.in[0]);
-        MASK_SET(out->swizzle, 1, op->swizzle.in[1]);
-        MASK_SET(out->swizzle, 2, op->swizzle.in[2]);
-        MASK_SET(out->swizzle, 3, op->swizzle.in[3]);
+        for (int i = 0; i < 4; i++) {
+            if (SWS_OP_NEEDED(op, i) && op->swizzle.in[i] != i) {
+                MASK_SET(out->mask, i, 1);
+                MASK_SET(out->swizzle, i, op->swizzle.in[i]);
+            } else {
+                MASK_SET(out->swizzle, i, 0xf);
+            }
+        }
         /* The element size and type don't matter. */
         out->block_size = block_size * ff_sws_pixel_type_size(op->type);
         out->type = AARCH64_PIXEL_U8;
-- 
2.52.0


>From 0960d2fb1c04405c76be1b29e4292d7ac7fe581f Mon Sep 17 00:00:00 2001
From: Ramiro Polla <[email protected]>
Date: Sat, 13 Jun 2026 02:19:48 +0200
Subject: [PATCH 3/4] swscale/aarch64/ops: remove redundant linear combinations

There is no easy optimization that can be triggered by knowing that the
offset is exactly 1. This led to identical functions being instantiated
for different params.

Also simplified the AVRational comparisons a bit.
---
 libswscale/aarch64/ops_entries.c   | 10 ++--------
 libswscale/aarch64/ops_impl_conv.c |  5 +++--
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/libswscale/aarch64/ops_entries.c b/libswscale/aarch64/ops_entries.c
index 3cd59f9c3d..3f6bd92b1a 100644
--- a/libswscale/aarch64/ops_entries.c
+++ b/libswscale/aarch64/ops_entries.c
@@ -349,14 +349,12 @@
 { .op = AARCH64_SWS_OP_SCALE, .block_size = 16, .type = AARCH64_PIXEL_U8, 
.mask = 0x0111 },
 { .op = AARCH64_SWS_OP_SCALE, .block_size = 16, .type = AARCH64_PIXEL_U16, 
.mask = 0x0001 },
 { .op = AARCH64_SWS_OP_SCALE, .block_size = 16, .type = AARCH64_PIXEL_U16, 
.mask = 0x0111 },
-{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000000dULL, .linear.fmla = 
0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 },
-{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000000dULL, .linear.fmla = 
1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 },
 { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000000fULL, .linear.fmla = 
0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 },
 { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000000fULL, .linear.fmla = 
1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 },
 { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x00000000fcULL, .linear.fmla = 
0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 },
 { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x00000000fcULL, .linear.fmla = 
1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 },
-{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x00000000fdULL, .linear.fmla = 
0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 },
-{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x00000000fdULL, .linear.fmla = 
1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 },
+{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x00000000ffULL, .linear.fmla = 
0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 },
+{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x00000000ffULL, .linear.fmla = 
1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 },
 { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000c000ULL, .linear.fmla = 
0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0010 },
 { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000c000ULL, .linear.fmla = 
1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0010 },
 { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000373dcc7ULL, .linear.fmla = 
0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 },
@@ -365,10 +363,6 @@
 { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x0003f3fccfULL, .linear.fmla = 
1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 },
 { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c00c00cULL, .linear.fmla = 
0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 },
 { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c00c00cULL, .linear.fmla = 
1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 },
-{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c10c40dULL, .linear.fmla = 
0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 },
-{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c10c40dULL, .linear.fmla = 
1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 },
-{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c10cc0dULL, .linear.fmla = 
0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 },
-{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c10cc0dULL, .linear.fmla = 
1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 },
 { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c30cc0fULL, .linear.fmla = 
0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 },
 { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c30cc0fULL, .linear.fmla = 
1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 },
 { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000ff3fcfcULL, .linear.fmla = 
0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 },
diff --git a/libswscale/aarch64/ops_impl_conv.c 
b/libswscale/aarch64/ops_impl_conv.c
index b0a286edb6..4e401527cd 100644
--- a/libswscale/aarch64/ops_impl_conv.c
+++ b/libswscale/aarch64/ops_impl_conv.c
@@ -214,10 +214,11 @@ static int convert_to_aarch64_impl(SwsContext *ctx, const 
SwsOpList *ops, int n,
                 continue;
             MASK_SET(out->mask, i, 1);
             for (int j = 0; j < 5; j++) {
+                const AVRational k = op->lin.m[i][j];
                 int jj = linear_index_from_sws_op(j);
-                if (!av_cmp_q(op->lin.m[i][j], av_make_q(1, 1)))
+                if (j < 4 && k.num == k.den)
                     LINEAR_MASK_SET(out->linear.mask, i, jj, LINEAR_MASK_1);
-                else if (av_cmp_q(op->lin.m[i][j], av_make_q(0, 1)))
+                else if (k.num != 0)
                     LINEAR_MASK_SET(out->linear.mask, i, jj, LINEAR_MASK_X);
             }
         }
-- 
2.52.0


>From 753079b80290d0f7482b41d5692cc044e6338d41 Mon Sep 17 00:00:00 2001
From: Ramiro Polla <[email protected]>
Date: Tue, 16 Jun 2026 16:34:52 +0200
Subject: [PATCH 4/4] swscale/aarch64/ops: mark more operations as
 type-invariant

This prevents the generation of a few more duplicate functions (where
there would be both f32 and u32 functions).
---
 libswscale/aarch64/ops_entries.c   | 25 +++++++++++--------------
 libswscale/aarch64/ops_impl_conv.c | 22 +++++++++++++++++-----
 2 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/libswscale/aarch64/ops_entries.c b/libswscale/aarch64/ops_entries.c
index 3f6bd92b1a..e340d0086d 100644
--- a/libswscale/aarch64/ops_entries.c
+++ b/libswscale/aarch64/ops_entries.c
@@ -13,9 +13,9 @@
 { .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = 
AARCH64_PIXEL_U16, .mask = 0x0011 },
 { .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = 
AARCH64_PIXEL_U16, .mask = 0x0111 },
 { .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = 
AARCH64_PIXEL_U16, .mask = 0x1111 },
-{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = 
AARCH64_PIXEL_F32, .mask = 0x0011 },
-{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = 
AARCH64_PIXEL_F32, .mask = 0x0111 },
-{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = 
AARCH64_PIXEL_F32, .mask = 0x1111 },
+{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = 
AARCH64_PIXEL_U32, .mask = 0x0011 },
+{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = 
AARCH64_PIXEL_U32, .mask = 0x0111 },
+{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = 
AARCH64_PIXEL_U32, .mask = 0x1111 },
 { .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x0011 },
 { .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x0111 },
 { .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x1111 },
@@ -31,9 +31,8 @@
 { .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = 
AARCH64_PIXEL_U16, .mask = 0x0111 },
 { .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = 
AARCH64_PIXEL_U16, .mask = 0x1111 },
 { .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = 
AARCH64_PIXEL_U32, .mask = 0x0001 },
-{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = 
AARCH64_PIXEL_F32, .mask = 0x0001 },
-{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = 
AARCH64_PIXEL_F32, .mask = 0x0111 },
-{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = 
AARCH64_PIXEL_F32, .mask = 0x1111 },
+{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = 
AARCH64_PIXEL_U32, .mask = 0x0111 },
+{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = 
AARCH64_PIXEL_U32, .mask = 0x1111 },
 { .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x0001 },
 { .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x0111 },
 { .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x1111 },
@@ -50,9 +49,9 @@
 { .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = 
AARCH64_PIXEL_U16, .mask = 0x0011 },
 { .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = 
AARCH64_PIXEL_U16, .mask = 0x0111 },
 { .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = 
AARCH64_PIXEL_U16, .mask = 0x1111 },
-{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = 
AARCH64_PIXEL_F32, .mask = 0x0011 },
-{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = 
AARCH64_PIXEL_F32, .mask = 0x0111 },
-{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = 
AARCH64_PIXEL_F32, .mask = 0x1111 },
+{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = 
AARCH64_PIXEL_U32, .mask = 0x0011 },
+{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = 
AARCH64_PIXEL_U32, .mask = 0x0111 },
+{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = 
AARCH64_PIXEL_U32, .mask = 0x1111 },
 { .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x0011 },
 { .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x0111 },
 { .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x1111 },
@@ -66,9 +65,8 @@
 { .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = 
AARCH64_PIXEL_U16, .mask = 0x0111 },
 { .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = 
AARCH64_PIXEL_U16, .mask = 0x1111 },
 { .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = 
AARCH64_PIXEL_U32, .mask = 0x0001 },
-{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = 
AARCH64_PIXEL_F32, .mask = 0x0001 },
-{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = 
AARCH64_PIXEL_F32, .mask = 0x0111 },
-{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = 
AARCH64_PIXEL_F32, .mask = 0x1111 },
+{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = 
AARCH64_PIXEL_U32, .mask = 0x0111 },
+{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = 
AARCH64_PIXEL_U32, .mask = 0x1111 },
 { .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x0001 },
 { .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x0111 },
 { .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 16, .type = 
AARCH64_PIXEL_U8, .mask = 0x1111 },
@@ -261,13 +259,12 @@
 { .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U16, 
.mask = 0x1101 },
 { .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U16, 
.mask = 0x1110 },
 { .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U32, 
.mask = 0x0001 },
+{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U32, 
.mask = 0x0010 },
 { .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U32, 
.mask = 0x0101 },
 { .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U32, 
.mask = 0x1000 },
 { .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U32, 
.mask = 0x1010 },
 { .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U32, 
.mask = 0x1011 },
 { .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U32, 
.mask = 0x1101 },
-{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_F32, 
.mask = 0x0010 },
-{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_F32, 
.mask = 0x1000 },
 { .op = AARCH64_SWS_OP_CLEAR, .block_size = 16, .type = AARCH64_PIXEL_U8, 
.mask = 0x0001 },
 { .op = AARCH64_SWS_OP_CLEAR, .block_size = 16, .type = AARCH64_PIXEL_U8, 
.mask = 0x0010 },
 { .op = AARCH64_SWS_OP_CLEAR, .block_size = 16, .type = AARCH64_PIXEL_U8, 
.mask = 0x0110 },
diff --git a/libswscale/aarch64/ops_impl_conv.c 
b/libswscale/aarch64/ops_impl_conv.c
index 4e401527cd..075569b3b9 100644
--- a/libswscale/aarch64/ops_impl_conv.c
+++ b/libswscale/aarch64/ops_impl_conv.c
@@ -149,11 +149,6 @@ static int convert_to_aarch64_impl(SwsContext *ctx, const 
SwsOpList *ops, int n,
         case 4: out->mask = 0x1111; break;
         };
         break;
-    case AARCH64_SWS_OP_SWAP_BYTES:
-        /* Only the element size matters, not the type. */
-        if (out->type == AARCH64_PIXEL_F32)
-            out->type = AARCH64_PIXEL_U32;
-        break;
     case AARCH64_SWS_OP_SWIZZLE:
         /* Recompute mask taking identity swizzle into account */
         out->mask = 0;
@@ -238,5 +233,22 @@ static int convert_to_aarch64_impl(SwsContext *ctx, const 
SwsOpList *ops, int n,
         break;
     }
 
+    switch (out->op) {
+    case AARCH64_SWS_OP_READ_BIT:
+    case AARCH64_SWS_OP_READ_NIBBLE:
+    case AARCH64_SWS_OP_READ_PACKED:
+    case AARCH64_SWS_OP_READ_PLANAR:
+    case AARCH64_SWS_OP_WRITE_BIT:
+    case AARCH64_SWS_OP_WRITE_NIBBLE:
+    case AARCH64_SWS_OP_WRITE_PACKED:
+    case AARCH64_SWS_OP_WRITE_PLANAR:
+    case AARCH64_SWS_OP_SWAP_BYTES:
+    case AARCH64_SWS_OP_CLEAR:
+        /* Only the element size matters, not the type. */
+        if (out->type == AARCH64_PIXEL_F32)
+            out->type = AARCH64_PIXEL_U32;
+        break;
+    }
+
     return 0;
 }
-- 
2.52.0

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to