Author: mjg
Date: Fri Nov 30 00:45:10 2018
New Revision: 341272
URL: https://svnweb.freebsd.org/changeset/base/341272

Log:
  amd64: tidy up copying backwards in memmove
  
  For non-ERMS case the code used handle possible trailing bytes with
  movsb first and then followed it up with movsq. This also happened
  to alter how calculations were done for other cases.
  
  Handle the tail with regular movs, just like when copying forward.
  Use leaq to calculate the right offset from the get go, instead of
  doing separate add and sub.
  
  This adjusts the offset for non-rep cases so that they can be used
  to handle the tail.
  
  The routine is still a work in progress.
  
  Sponsored by: The FreeBSD Foundation

Modified:
  head/lib/libc/amd64/string/memmove.S
  head/sys/amd64/amd64/support.S

Modified: head/lib/libc/amd64/string/memmove.S
==============================================================================
--- head/lib/libc/amd64/string/memmove.S        Fri Nov 30 00:00:51 2018        
(r341271)
+++ head/lib/libc/amd64/string/memmove.S        Fri Nov 30 00:45:10 2018        
(r341272)
@@ -150,24 +150,24 @@ __FBSDID("$FreeBSD$");
         */
         ALIGN_TEXT
 2:
-       addq    %rcx,%rdi
-       addq    %rcx,%rsi
+       cmpq    $256,%rcx
+       ja      2256f
 
+       leaq    -8(%rdi,%rcx),%rdi
+       leaq    -8(%rsi,%rcx),%rsi
+
        cmpq    $32,%rcx
        jb      2016f
 
-       cmpq    $256,%rcx
-       ja      2256f
-
 2032:
+       movq    (%rsi),%rdx
+       movq    %rdx,(%rdi)
        movq    -8(%rsi),%rdx
        movq    %rdx,-8(%rdi)
        movq    -16(%rsi),%rdx
        movq    %rdx,-16(%rdi)
        movq    -24(%rsi),%rdx
        movq    %rdx,-24(%rdi)
-       movq    -32(%rsi),%rdx
-       movq    %rdx,-32(%rdi)
        leaq    -32(%rsi),%rsi
        leaq    -32(%rdi),%rdi
        subq    $32,%rcx
@@ -181,10 +181,10 @@ __FBSDID("$FreeBSD$");
 2016:
        cmpb    $16,%cl
        jl      2008f
+       movq    (%rsi),%rdx
+       movq    %rdx,(%rdi)
        movq    -8(%rsi),%rdx
        movq    %rdx,-8(%rdi)
-       movq    -16(%rsi),%rdx
-       movq    %rdx,-16(%rdi)
        subb    $16,%cl
        jz      2000f
        leaq    -16(%rsi),%rsi
@@ -192,8 +192,8 @@ __FBSDID("$FreeBSD$");
 2008:
        cmpb    $8,%cl
        jl      2004f
-       movq    -8(%rsi),%rdx
-       movq    %rdx,-8(%rdi)
+       movq    (%rsi),%rdx
+       movq    %rdx,(%rdi)
        subb    $8,%cl
        jz      2000f
        leaq    -8(%rsi),%rsi
@@ -201,8 +201,8 @@ __FBSDID("$FreeBSD$");
 2004:
        cmpb    $4,%cl
        jl      2002f
-       movl    -4(%rsi),%edx
-       movl    %edx,-4(%rdi)
+       movl    4(%rsi),%edx
+       movl    %edx,4(%rdi)
        subb    $4,%cl
        jz      2000f
        leaq    -4(%rsi),%rsi
@@ -210,8 +210,8 @@ __FBSDID("$FreeBSD$");
 2002:
        cmpb    $2,%cl
        jl      2001f
-       movw    -2(%rsi),%dx
-       movw    %dx,-2(%rdi)
+       movw    6(%rsi),%dx
+       movw    %dx,6(%rdi)
        subb    $2,%cl
        jz      2000f
        leaq    -2(%rsi),%rsi
@@ -219,33 +219,31 @@ __FBSDID("$FreeBSD$");
 2001:
        cmpb    $1,%cl
        jl      2000f
-       movb    -1(%rsi),%dl
-       movb    %dl,-1(%rdi)
+       movb    7(%rsi),%dl
+       movb    %dl,7(%rdi)
 2000:
        \end
        ret
        ALIGN_TEXT
 2256:
-       decq    %rdi
-       decq    %rsi
        std
 .if \erms == 1
+       leaq    -1(%rdi,%rcx),%rdi
+       leaq    -1(%rsi,%rcx),%rsi
        rep
        movsb
+       cld
 .else
-       andq    $7,%rcx                         /* any fractional bytes? */
-       je      3f
-       rep
-       movsb
-3:
-       movq    %rdx,%rcx                       /* copy remainder by 32-bit 
words */
+       leaq    -8(%rdi,%rcx),%rdi
+       leaq    -8(%rsi,%rcx),%rsi
        shrq    $3,%rcx
-       subq    $7,%rsi
-       subq    $7,%rdi
        rep
        movsq
-.endif
        cld
+       movq    %rdx,%rcx
+       andb    $7,%cl
+       jne     2004b
+.endif
        \end
        ret
 .endif

Modified: head/sys/amd64/amd64/support.S
==============================================================================
--- head/sys/amd64/amd64/support.S      Fri Nov 30 00:00:51 2018        
(r341271)
+++ head/sys/amd64/amd64/support.S      Fri Nov 30 00:45:10 2018        
(r341272)
@@ -313,24 +313,24 @@ END(memcmp)
         */
         ALIGN_TEXT
 2:
-       addq    %rcx,%rdi
-       addq    %rcx,%rsi
+       cmpq    $256,%rcx
+       ja      2256f
 
+       leaq    -8(%rdi,%rcx),%rdi
+       leaq    -8(%rsi,%rcx),%rsi
+
        cmpq    $32,%rcx
        jb      2016f
 
-       cmpq    $256,%rcx
-       ja      2256f
-
 2032:
+       movq    (%rsi),%rdx
+       movq    %rdx,(%rdi)
        movq    -8(%rsi),%rdx
        movq    %rdx,-8(%rdi)
        movq    -16(%rsi),%rdx
        movq    %rdx,-16(%rdi)
        movq    -24(%rsi),%rdx
        movq    %rdx,-24(%rdi)
-       movq    -32(%rsi),%rdx
-       movq    %rdx,-32(%rdi)
        leaq    -32(%rsi),%rsi
        leaq    -32(%rdi),%rdi
        subq    $32,%rcx
@@ -344,10 +344,10 @@ END(memcmp)
 2016:
        cmpb    $16,%cl
        jl      2008f
+       movq    (%rsi),%rdx
+       movq    %rdx,(%rdi)
        movq    -8(%rsi),%rdx
        movq    %rdx,-8(%rdi)
-       movq    -16(%rsi),%rdx
-       movq    %rdx,-16(%rdi)
        subb    $16,%cl
        jz      2000f
        leaq    -16(%rsi),%rsi
@@ -355,8 +355,8 @@ END(memcmp)
 2008:
        cmpb    $8,%cl
        jl      2004f
-       movq    -8(%rsi),%rdx
-       movq    %rdx,-8(%rdi)
+       movq    (%rsi),%rdx
+       movq    %rdx,(%rdi)
        subb    $8,%cl
        jz      2000f
        leaq    -8(%rsi),%rsi
@@ -364,8 +364,8 @@ END(memcmp)
 2004:
        cmpb    $4,%cl
        jl      2002f
-       movl    -4(%rsi),%edx
-       movl    %edx,-4(%rdi)
+       movl    4(%rsi),%edx
+       movl    %edx,4(%rdi)
        subb    $4,%cl
        jz      2000f
        leaq    -4(%rsi),%rsi
@@ -373,8 +373,8 @@ END(memcmp)
 2002:
        cmpb    $2,%cl
        jl      2001f
-       movw    -2(%rsi),%dx
-       movw    %dx,-2(%rdi)
+       movw    6(%rsi),%dx
+       movw    %dx,6(%rdi)
        subb    $2,%cl
        jz      2000f
        leaq    -2(%rsi),%rsi
@@ -382,33 +382,31 @@ END(memcmp)
 2001:
        cmpb    $1,%cl
        jl      2000f
-       movb    -1(%rsi),%dl
-       movb    %dl,-1(%rdi)
+       movb    7(%rsi),%dl
+       movb    %dl,7(%rdi)
 2000:
        \end
        ret
        ALIGN_TEXT
 2256:
-       decq    %rdi
-       decq    %rsi
        std
 .if \erms == 1
+       leaq    -1(%rdi,%rcx),%rdi
+       leaq    -1(%rsi,%rcx),%rsi
        rep
        movsb
+       cld
 .else
-       andq    $7,%rcx                         /* any fractional bytes? */
-       je      3f
-       rep
-       movsb
-3:
-       movq    %rdx,%rcx                       /* copy remainder by 32-bit 
words */
+       leaq    -8(%rdi,%rcx),%rdi
+       leaq    -8(%rsi,%rcx),%rsi
        shrq    $3,%rcx
-       subq    $7,%rsi
-       subq    $7,%rdi
        rep
        movsq
-.endif
        cld
+       movq    %rdx,%rcx
+       andb    $7,%cl
+       jne     2004b
+.endif
        \end
        ret
 .endif
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to