The branch target for longer vector lengths was mistakenly written as
.vl_gt_48_cpy2Dto1D_shr_32x32 (note 2Dto1D rather than 1Dto2D). Fix the
label so that the code works as intended.
---
source/common/aarch64/blockcopy8-sve.S | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/source/common/aarch64/blockcopy8-sve.S
b/source/common/aarch64/blockcopy8-sve.S
index d5664af58..1d742a64c 100644
--- a/source/common/aarch64/blockcopy8-sve.S
+++ b/source/common/aarch64/blockcopy8-sve.S
@@ -1275,7 +1275,7 @@ function PFX(cpy1Dto2D_shr_32x32_sve)
sub w4, w3, #1
dup z1.h, w4
cmp x9, #48
- bgt .vl_gt_48_cpy2Dto1D_shr_32x32
+ bgt .vl_gt_48_cpy1Dto2D_shr_32x32
ptrue p0.h, vl16
mov z2.h, #1
lsl z2.h, p0/m, z2.h, z1.h
--
2.34.1
>From 635737e6196dd7d0646d16b9bb5fd3e3cbe6ed9c Mon Sep 17 00:00:00 2001
Message-Id: <635737e6196dd7d0646d16b9bb5fd3e3cbe6ed9c.1736179734.git.george.st...@arm.com>
In-Reply-To: <[email protected]>
References: <[email protected]>
From: George Steed <[email protected]>
Date: Mon, 23 Dec 2024 10:48:28 +0000
Subject: [PATCH 5/6] blockcopy8-sve.S: Fix branch target in
cpy1Dto2D_shr_32x32_sve
The branch target for longer vector lengths was mistakenly written as
.vl_gt_48_cpy2Dto1D_shr_32x32 (note 2Dto1D rather than 1Dto2D). Fix the
label so that the code works as intended.
---
source/common/aarch64/blockcopy8-sve.S | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/source/common/aarch64/blockcopy8-sve.S b/source/common/aarch64/blockcopy8-sve.S
index d5664af58..1d742a64c 100644
--- a/source/common/aarch64/blockcopy8-sve.S
+++ b/source/common/aarch64/blockcopy8-sve.S
@@ -1275,7 +1275,7 @@ function PFX(cpy1Dto2D_shr_32x32_sve)
sub w4, w3, #1
dup z1.h, w4
cmp x9, #48
- bgt .vl_gt_48_cpy2Dto1D_shr_32x32
+ bgt .vl_gt_48_cpy1Dto2D_shr_32x32
ptrue p0.h, vl16
mov z2.h, #1
lsl z2.h, p0/m, z2.h, z1.h
--
2.34.1
_______________________________________________
x265-devel mailing list
[email protected]
https://mailman.videolan.org/listinfo/x265-devel