This accounts for variations in the number of bytes copied to the
destination buffer that could result from the substitution of
STP instructions with 2x unprivileged STTR variants if UAO is
supported and enabled.

Rather than duplicating the store fixups with the modifications,
the relevant alternatives are inserted in-line.

Signed-off-by: Oliver Swede <oli.sw...@arm.com>
---
 arch/arm64/lib/copy_user_fixup.S | 47 ++++++++++++++++++++++++++++----
 1 file changed, 41 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/lib/copy_user_fixup.S b/arch/arm64/lib/copy_user_fixup.S
index 37ca3d99a02a..2d413f9ba5d3 100644
--- a/arch/arm64/lib/copy_user_fixup.S
+++ b/arch/arm64/lib/copy_user_fixup.S
@@ -205,7 +205,12 @@ addr       .req    x15
        /* 32 < count < 128 -> count - ((addr-dst)&15) */
        cmp     count, 128
        sub     x0, addr, dst // relative fault offset
+       /* fault offset within dest. buffer */
+       alternative_if ARM64_HAS_UAO
+       bic     x0, x0, 7 // stp subst. for 2x sttr
+       alternative_else
        bic     x0, x0, 15 // bytes already copied (steps of 16B stores)
+       alternative_endif
        sub     x0, count, x0 // bytes yet to copy
        b.le    L(end_fixup)
        /* 128 < count -> count */
@@ -265,7 +270,12 @@ addr       .req    x15
        sub     tmp1, count, tmp1 // remaining bytes after non-overlapping 
section
        sub     x0, dstend, 64
        sub     x0, addr, x0
-       bic     x0, x0, 15 // fault offset within dest. buffer
+       /* fault offset within dest. buffer */
+       alternative_if ARM64_HAS_UAO
+       bic     x0, x0, 7 // stp subst. for 2x sttr
+       alternative_else
+       bic     x0, x0, 15 // bytes already copied (steps of 16B stores)
+       alternative_endif
        add     x0, dstend, x0
        sub     x0, x0, 64
        sub     x0, dstend, x0 // remaining bytes in final (overlapping) 64B
@@ -295,7 +305,12 @@ addr       .req    x15
         */
        sub     tmp1, dstend, 32
        sub     tmp1, addr, tmp1
-       bic     tmp1, tmp1, 15
+       /* fault offset */
+       alternative_if ARM64_HAS_UAO
+       bic     tmp1, tmp1, 7 // stp subst. for 2x sttr
+       alternative_else
+       bic     tmp1, tmp1, 15 // bytes already copied (steps of 16B stores)
+       alternative_endif
        mov     x0, 32
        sub     tmp1, x0, tmp1
        sub     x0, count, 32
@@ -309,7 +324,12 @@ addr       .req    x15
         */
        sub     tmp1, dstend, 32
        sub     tmp1, addr, tmp1
-       bic     tmp1, tmp1, 15
+       /* fault offset */
+       alternative_if ARM64_HAS_UAO
+       bic     tmp1, tmp1, 7 // stp subst. for 2x sttr
+       alternative_else
+       bic     tmp1, tmp1, 15 // bytes already copied (steps of 16B stores)
+       alternative_endif
        mov     x0, 32
        sub     tmp1, x0, tmp1
        sub     x0, count, 64
@@ -324,7 +344,12 @@ addr       .req    x15
         */
        sub     tmp1, dstend, 64
        sub     tmp1, addr, tmp1
-       bic     tmp1, tmp1, 15
+       /* fault offset */
+       alternative_if ARM64_HAS_UAO
+       bic     tmp1, tmp1, 7 // stp subst. for 2x sttr
+       alternative_else
+       bic     tmp1, tmp1, 15 // bytes already copied (steps of 16B stores)
+       alternative_endif
        mov     x0, 64
        sub     tmp1, x0, tmp1
        cmp     count, 128
@@ -378,10 +403,20 @@ addr      .req    x15
        /* Take the min from {16,(fault_addr&15)-(dst&15)}
         * and subtract from count to obtain the return value */
        bic     tmp1, dst, 15 // aligned dst
-       bic     x0, addr, 15
+       /* fault offset */
+       alternative_if ARM64_HAS_UAO
+       bic     x0, addr, 7 // stp subst. for 2x sttr
+       alternative_else
+       bic     x0, addr, 15 // bytes already copied (steps of 16B stores)
+       alternative_endif
        sub     x0, x0, tmp1 // relative fault offset
        cmp     x0, 16
-       bic     x0, addr, 15
+       /* fault offset */
+       alternative_if ARM64_HAS_UAO
+       bic     x0, addr, 7 // stp subst. for 2x sttr
+       alternative_else
+       bic     x0, addr, 15 // bytes already copied (steps of 16B stores)
+       alternative_endif
        sub     x0, x0, dst
        sub     x0, count, x0
        b.gt    L(end_fixup)
-- 
2.17.1

Reply via email to