Remove __APPLE__ guards around Neon kernel assignments that restrict
some Neon kernels to only be used (or not used) on Apple platforms, when
other platforms could also benefit from them.
Affected kernels:
- calcresidual (BLOCK_32x32)
- var (BLOCK_32x32 and BLOCK_64x64)
- scanPosLast
The removal ensures the same Neon kernel would be used on both Apple and
non-Apple platforms.
---
source/common/aarch64/asm-primitives.cpp | 3 +--
source/common/aarch64/pixel-prim.cpp | 7 +------
2 files changed, 2 insertions(+), 8 deletions(-)
diff --git a/source/common/aarch64/asm-primitives.cpp
b/source/common/aarch64/asm-primitives.cpp
index e16150d4f..2b6d3f812 100644
--- a/source/common/aarch64/asm-primitives.cpp
+++ b/source/common/aarch64/asm-primitives.cpp
@@ -671,9 +671,8 @@ void setupNeonPrimitives(EncoderPrimitives &p)
p.cu[BLOCK_32x32].normFact = PFX(normFact32_neon);
p.cu[BLOCK_64x64].normFact = PFX(normFact64_neon);
-#if !defined(__APPLE__)
p.scanPosLast = PFX(scanPosLast_neon);
-#endif
+
p.costCoeffNxN = PFX(costCoeffNxN_neon);
#endif
diff --git a/source/common/aarch64/pixel-prim.cpp
b/source/common/aarch64/pixel-prim.cpp
index b08f98457..4ba0ad1eb 100644
--- a/source/common/aarch64/pixel-prim.cpp
+++ b/source/common/aarch64/pixel-prim.cpp
@@ -1812,10 +1812,8 @@ void setupPixelPrimitives_neon(EncoderPrimitives &p)
#if !(HIGH_BIT_DEPTH)
p.cu[BLOCK_8x8].var = pixel_var_neon<8>;
p.cu[BLOCK_16x16].var = pixel_var_neon<16>;
-#if defined(__APPLE__)
- p.cu[BLOCK_32x32].var = pixel_var_neon<32>;
+ p.cu[BLOCK_32x32].var = pixel_var_neon<32>;
p.cu[BLOCK_64x64].var = pixel_var_neon<64>;
-#endif // defined(__APPLE__)
#endif // !(HIGH_BIT_DEPTH)
p.cu[BLOCK_16x16].blockfill_s[NONALIGNED] = blockfill_s_neon<16>;
@@ -1832,11 +1830,8 @@ void setupPixelPrimitives_neon(EncoderPrimitives &p)
p.cu[BLOCK_8x8].calcresidual[ALIGNED] = getResidual_neon<8>;
p.cu[BLOCK_16x16].calcresidual[NONALIGNED] = getResidual_neon<16>;
p.cu[BLOCK_16x16].calcresidual[ALIGNED] = getResidual_neon<16>;
-
-#if defined(__APPLE__)
p.cu[BLOCK_32x32].calcresidual[NONALIGNED] = getResidual_neon<32>;
p.cu[BLOCK_32x32].calcresidual[ALIGNED] = getResidual_neon<32>;
-#endif // defined(__APPLE__)
p.cu[BLOCK_4x4].sa8d = satd4_neon<4, 4>;
p.cu[BLOCK_8x8].sa8d = sa8d8_neon<8, 8>;
--
2.39.5 (Apple Git-154)
>From a233fd321bf61b9761f8636d8b99d471ab3c700c Mon Sep 17 00:00:00 2001
Message-Id:
<a233fd321bf61b9761f8636d8b99d471ab3c700c.1746034801.git.li.zha...@arm.com>
In-Reply-To: <[email protected]>
References: <[email protected]>
From: Li Zhang <[email protected]>
Date: Wed, 30 Apr 2025 19:29:01 +0200
Subject: [PATCH 3/3] AArch64: Remove redundant __APPLE__ guards
Remove __APPLE__ guards around Neon kernel assignments that restrict
some Neon kernels to only be used (or not used) on Apple platforms, when
other platforms could also benefit from them.
Affected kernels:
- calcresidual (BLOCK_32x32)
- var (BLOCK_32x32 and BLOCK_64x64)
- scanPosLast
The removal ensures the same Neon kernel would be used on both Apple and
non-Apple platforms.
---
source/common/aarch64/asm-primitives.cpp | 3 +--
source/common/aarch64/pixel-prim.cpp | 7 +------
2 files changed, 2 insertions(+), 8 deletions(-)
diff --git a/source/common/aarch64/asm-primitives.cpp
b/source/common/aarch64/asm-primitives.cpp
index e16150d4f..2b6d3f812 100644
--- a/source/common/aarch64/asm-primitives.cpp
+++ b/source/common/aarch64/asm-primitives.cpp
@@ -671,9 +671,8 @@ void setupNeonPrimitives(EncoderPrimitives &p)
p.cu[BLOCK_32x32].normFact = PFX(normFact32_neon);
p.cu[BLOCK_64x64].normFact = PFX(normFact64_neon);
-#if !defined(__APPLE__)
p.scanPosLast = PFX(scanPosLast_neon);
-#endif
+
p.costCoeffNxN = PFX(costCoeffNxN_neon);
#endif
diff --git a/source/common/aarch64/pixel-prim.cpp
b/source/common/aarch64/pixel-prim.cpp
index b08f98457..4ba0ad1eb 100644
--- a/source/common/aarch64/pixel-prim.cpp
+++ b/source/common/aarch64/pixel-prim.cpp
@@ -1812,10 +1812,8 @@ void setupPixelPrimitives_neon(EncoderPrimitives &p)
#if !(HIGH_BIT_DEPTH)
p.cu[BLOCK_8x8].var = pixel_var_neon<8>;
p.cu[BLOCK_16x16].var = pixel_var_neon<16>;
-#if defined(__APPLE__)
- p.cu[BLOCK_32x32].var = pixel_var_neon<32>;
+ p.cu[BLOCK_32x32].var = pixel_var_neon<32>;
p.cu[BLOCK_64x64].var = pixel_var_neon<64>;
-#endif // defined(__APPLE__)
#endif // !(HIGH_BIT_DEPTH)
p.cu[BLOCK_16x16].blockfill_s[NONALIGNED] = blockfill_s_neon<16>;
@@ -1832,11 +1830,8 @@ void setupPixelPrimitives_neon(EncoderPrimitives &p)
p.cu[BLOCK_8x8].calcresidual[ALIGNED] = getResidual_neon<8>;
p.cu[BLOCK_16x16].calcresidual[NONALIGNED] = getResidual_neon<16>;
p.cu[BLOCK_16x16].calcresidual[ALIGNED] = getResidual_neon<16>;
-
-#if defined(__APPLE__)
p.cu[BLOCK_32x32].calcresidual[NONALIGNED] = getResidual_neon<32>;
p.cu[BLOCK_32x32].calcresidual[ALIGNED] = getResidual_neon<32>;
-#endif // defined(__APPLE__)
p.cu[BLOCK_4x4].sa8d = satd4_neon<4, 4>;
p.cu[BLOCK_8x8].sa8d = sa8d8_neon<8, 8>;
--
2.39.5 (Apple Git-154)
_______________________________________________
x265-devel mailing list
[email protected]
https://mailman.videolan.org/listinfo/x265-devel