Author: mjg
Date: Mon Oct 22 06:44:20 2018
New Revision: 339579
URL: https://svnweb.freebsd.org/changeset/base/339579

Log:
  amd64: finish the tail in memset with an overlapping store
  
  Instead of finding the exact size to fit in we can just shift the target
  by -8 + tail. Doing a blind write to a previously rep stosq'ed area comes
  with a penalty so do it conditionally.
  
  Sample win on EPYC when zeroing a 257 sized buffer (tail = 1) aligned to
  16 bytes:
  before: 44782846 ops/s
  after:  46118614 ops/s
  
  Idea stolen from NetBSD.
  
  Sponsored by: The FreeBSD Foundation

Modified:
  head/sys/amd64/amd64/support.S

Modified: head/sys/amd64/amd64/support.S
==============================================================================
--- head/sys/amd64/amd64/support.S      Mon Oct 22 04:12:51 2018        
(r339578)
+++ head/sys/amd64/amd64/support.S      Mon Oct 22 06:44:20 2018        
(r339579)
@@ -524,9 +524,12 @@ END(memcpy_erms)
        rep
        stosq
        movq    %r9,%rax
-       movq    %rdx,%rcx
-       andb    $7,%cl
-       jne     1004b
+       andl    $7,%edx
+       jnz     1f
+       POP_FRAME_POINTER
+       ret
+1:
+       movq    %r10,-8(%rdi,%rdx)
 .endif
        POP_FRAME_POINTER
        ret
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to