Delete the setup code for SAD Neon intrinsics primitives since we now
have optimized Neon assembly implementations for all block sizes and
bitdepths.
The sad_pp_neon function is retained as it is used in psyCost_pp_neon
for both 4x4 and 8x8 blocks.
---
source/common/aarch64/pixel-prim.cpp | 11 -----------
1 file changed, 11 deletions(-)
diff --git a/source/common/aarch64/pixel-prim.cpp
b/source/common/aarch64/pixel-prim.cpp
index 1ceec869d..c57057f5d 100644
--- a/source/common/aarch64/pixel-prim.cpp
+++ b/source/common/aarch64/pixel-prim.cpp
@@ -1641,7 +1641,6 @@ void setupPixelPrimitives_neon(EncoderPrimitives &p)
p.pu[LUMA_ ## W ## x ## H].copy_pp = blockcopy_pp_neon<W, H>; \
p.pu[LUMA_ ## W ## x ## H].addAvg[NONALIGNED] = addAvg_neon<W, H>; \
p.pu[LUMA_ ## W ## x ## H].addAvg[ALIGNED] = addAvg_neon<W, H>; \
- p.pu[LUMA_ ## W ## x ## H].sad = sad_pp_neon<W, H>; \
p.pu[LUMA_ ## W ## x ## H].sad_x3 = sad_x3_neon<W, H>; \
p.pu[LUMA_ ## W ## x ## H].sad_x4 = sad_x4_neon<W, H>; \
p.pu[LUMA_ ## W ## x ## H].pixelavg_pp[NONALIGNED] = pixelavg_pp_neon<W,
H>; \
@@ -1702,16 +1701,6 @@ void setupPixelPrimitives_neon(EncoderPrimitives &p)
LUMA_PU(48, 64);
LUMA_PU(64, 16);
LUMA_PU(16, 64);
-
-#if defined(__APPLE__)
- p.pu[LUMA_4x4].sad = sad_pp_neon<4, 4>;
- p.pu[LUMA_4x8].sad = sad_pp_neon<4, 8>;
- p.pu[LUMA_4x16].sad = sad_pp_neon<4, 16>;
-#endif // defined(__APPLE__)
- p.pu[LUMA_8x4].sad = sad_pp_neon<8, 4>;
- p.pu[LUMA_8x8].sad = sad_pp_neon<8, 8>;
- p.pu[LUMA_8x16].sad = sad_pp_neon<8, 16>;
- p.pu[LUMA_8x32].sad = sad_pp_neon<8, 32>;
#if !(HIGH_BIT_DEPTH)
p.pu[LUMA_4x4].sad_x3 = sad_x3_neon<4, 4>;
--
2.39.3 (Apple Git-146)
>From 0db3ecb6d9eb6ea4ad186cc11fd19bedc5c0df3a Mon Sep 17 00:00:00 2001
Message-Id:
<0db3ecb6d9eb6ea4ad186cc11fd19bedc5c0df3a.1736263010.git.jonathan.wri...@arm.com>
In-Reply-To: <[email protected]>
References: <[email protected]>
From: Jonathan Wright <[email protected]>
Date: Mon, 9 Dec 2024 11:36:52 +0000
Subject: [PATCH 1/3] AArch64: Delete redundant SAD Neon intrinsics primitives
Delete the setup code for SAD Neon intrinsics primitives since we now
have optimized Neon assembly implementations for all block sizes and
bitdepths.
The sad_pp_neon function is retained as it is used in psyCost_pp_neon
for both 4x4 and 8x8 blocks.
---
source/common/aarch64/pixel-prim.cpp | 11 -----------
1 file changed, 11 deletions(-)
diff --git a/source/common/aarch64/pixel-prim.cpp
b/source/common/aarch64/pixel-prim.cpp
index 1ceec869d..c57057f5d 100644
--- a/source/common/aarch64/pixel-prim.cpp
+++ b/source/common/aarch64/pixel-prim.cpp
@@ -1641,7 +1641,6 @@ void setupPixelPrimitives_neon(EncoderPrimitives &p)
p.pu[LUMA_ ## W ## x ## H].copy_pp = blockcopy_pp_neon<W, H>; \
p.pu[LUMA_ ## W ## x ## H].addAvg[NONALIGNED] = addAvg_neon<W, H>; \
p.pu[LUMA_ ## W ## x ## H].addAvg[ALIGNED] = addAvg_neon<W, H>; \
- p.pu[LUMA_ ## W ## x ## H].sad = sad_pp_neon<W, H>; \
p.pu[LUMA_ ## W ## x ## H].sad_x3 = sad_x3_neon<W, H>; \
p.pu[LUMA_ ## W ## x ## H].sad_x4 = sad_x4_neon<W, H>; \
p.pu[LUMA_ ## W ## x ## H].pixelavg_pp[NONALIGNED] = pixelavg_pp_neon<W,
H>; \
@@ -1702,16 +1701,6 @@ void setupPixelPrimitives_neon(EncoderPrimitives &p)
LUMA_PU(48, 64);
LUMA_PU(64, 16);
LUMA_PU(16, 64);
-
-#if defined(__APPLE__)
- p.pu[LUMA_4x4].sad = sad_pp_neon<4, 4>;
- p.pu[LUMA_4x8].sad = sad_pp_neon<4, 8>;
- p.pu[LUMA_4x16].sad = sad_pp_neon<4, 16>;
-#endif // defined(__APPLE__)
- p.pu[LUMA_8x4].sad = sad_pp_neon<8, 4>;
- p.pu[LUMA_8x8].sad = sad_pp_neon<8, 8>;
- p.pu[LUMA_8x16].sad = sad_pp_neon<8, 16>;
- p.pu[LUMA_8x32].sad = sad_pp_neon<8, 32>;
#if !(HIGH_BIT_DEPTH)
p.pu[LUMA_4x4].sad_x3 = sad_x3_neon<4, 4>;
--
2.39.3 (Apple Git-146)
_______________________________________________
x265-devel mailing list
[email protected]
https://mailman.videolan.org/listinfo/x265-devel