Author: mjg
Date: Thu Sep 27 14:05:44 2018
New Revision: 338963
URL: https://svnweb.freebsd.org/changeset/base/338963

Log:
  amd64: implement memcmp in assembly
  
  Both the in-kernel C variant and libc asm variant have very poor performance.
  The former compiles to a single byte comparison loop, which breaks down even
  for small sizes. The latter uses rep cmpsq/b which turn out to have very poor
  throughput and are slower than a hand-coded 32-byte comparison loop.
  
  Depending on size this is about 3-4 times faster than the current routines.
  
  Reviewed by:  kib
  Approved by:  re (gjb)
  Differential Revision:        https://reviews.freebsd.org/D17328

Modified:
  head/sys/amd64/amd64/support.S
  head/sys/conf/files
  head/sys/conf/files.arm
  head/sys/conf/files.arm64
  head/sys/conf/files.i386
  head/sys/conf/files.mips
  head/sys/conf/files.powerpc
  head/sys/conf/files.riscv
  head/sys/conf/files.sparc64

Modified: head/sys/amd64/amd64/support.S
==============================================================================
--- head/sys/amd64/amd64/support.S      Thu Sep 27 13:54:09 2018        
(r338962)
+++ head/sys/amd64/amd64/support.S      Thu Sep 27 14:05:44 2018        
(r338963)
@@ -101,6 +101,100 @@ ENTRY(sse2_pagezero)
 END(sse2_pagezero)
 
 /*
+ * memcmpy(b1, b2, len)
+ *        rdi,rsi,len
+ */
+ENTRY(memcmp)
+       PUSH_FRAME_POINTER
+       cmpq    $16,%rdx
+       jae     5f
+1:
+       testq   %rdx,%rdx
+       je      3f
+       xorl    %ecx,%ecx
+2:
+       movzbl  (%rdi,%rcx,1),%eax
+       movzbl  (%rsi,%rcx,1),%r8d
+       cmpb    %r8b,%al
+       jne     4f
+       addq    $1,%rcx
+       cmpq    %rcx,%rdx
+       jz      3f
+       movzbl  (%rdi,%rcx,1),%eax
+       movzbl  (%rsi,%rcx,1),%r8d
+       cmpb    %r8b,%al
+       jne     4f
+       addq    $1,%rcx
+       cmpq    %rcx,%rdx
+       jz      3f
+       movzbl  (%rdi,%rcx,1),%eax
+       movzbl  (%rsi,%rcx,1),%r8d
+       cmpb    %r8b,%al
+       jne     4f
+       addq    $1,%rcx
+       cmpq    %rcx,%rdx
+       jz      3f
+       movzbl  (%rdi,%rcx,1),%eax
+       movzbl  (%rsi,%rcx,1),%r8d
+       cmpb    %r8b,%al
+       jne     4f
+       addq    $1,%rcx
+       cmpq    %rcx,%rdx
+       jne     2b
+3:
+       xorl    %eax,%eax
+       POP_FRAME_POINTER
+       ret
+4:
+       subl    %r8d,%eax
+       POP_FRAME_POINTER
+       ret
+5:
+       cmpq    $32,%rdx
+       jae     7f
+6:
+       /*
+        * 8 bytes
+        */
+       movq    (%rdi),%r8
+       movq    (%rsi),%r9
+       cmpq    %r8,%r9
+       jne     1b
+       leaq    8(%rdi),%rdi
+       leaq    8(%rsi),%rsi
+       subq    $8,%rdx
+       cmpq    $8,%rdx
+       jae     6b
+       jl      1b
+       jmp     3b
+7:
+       /*
+        * 32 bytes
+        */
+       movq    (%rsi),%r8
+       movq    8(%rsi),%r9
+       subq    (%rdi),%r8
+       subq    8(%rdi),%r9
+       or      %r8,%r9
+       jnz     1b
+
+       movq    16(%rsi),%r8
+       movq    24(%rsi),%r9
+       subq    16(%rdi),%r8
+       subq    24(%rdi),%r9
+       or      %r8,%r9
+       jnz     1b
+
+       leaq    32(%rdi),%rdi
+       leaq    32(%rsi),%rsi
+       subq    $32,%rdx
+       cmpq    $32,%rdx
+       jae     7b
+       jnz     1b
+       jmp     3b
+END(memcmp)
+
+/*
  * memmove(dst, src, cnt)
  *         rdi, rsi, rdx
  * Adapted from bcopy written by:

Modified: head/sys/conf/files
==============================================================================
--- head/sys/conf/files Thu Sep 27 13:54:09 2018        (r338962)
+++ head/sys/conf/files Thu Sep 27 14:05:44 2018        (r338963)
@@ -4041,7 +4041,6 @@ libkern/murmur3_32.c              standard
 libkern/mcount.c               optional profiling-routine
 libkern/memcchr.c              standard
 libkern/memchr.c               standard
-libkern/memcmp.c               standard
 libkern/memmem.c               optional gdb
 libkern/qsort.c                        standard
 libkern/qsort_r.c              standard

Modified: head/sys/conf/files.arm
==============================================================================
--- head/sys/conf/files.arm     Thu Sep 27 13:54:09 2018        (r338962)
+++ head/sys/conf/files.arm     Thu Sep 27 14:05:44 2018        (r338963)
@@ -163,6 +163,7 @@ libkern/fls.c                       standard
 libkern/flsl.c                 standard
 libkern/flsll.c                        standard
 libkern/lshrdi3.c              standard
+libkern/memcmp.c               standard
 libkern/moddi3.c               standard
 libkern/qdivrem.c              standard
 libkern/ucmpdi2.c              standard

Modified: head/sys/conf/files.arm64
==============================================================================
--- head/sys/conf/files.arm64   Thu Sep 27 13:54:09 2018        (r338962)
+++ head/sys/conf/files.arm64   Thu Sep 27 14:05:44 2018        (r338963)
@@ -244,6 +244,7 @@ libkern/ffsll.c                     standard
 libkern/fls.c                  standard
 libkern/flsl.c                 standard
 libkern/flsll.c                        standard
+libkern/memcmp.c               standard
 libkern/memset.c               standard
 libkern/arm64/crc32c_armv8.S   standard
 cddl/contrib/opensolaris/common/atomic/aarch64/opensolaris_atomic.S    
optional zfs | dtrace compile-with "${CDDL_C}"

Modified: head/sys/conf/files.i386
==============================================================================
--- head/sys/conf/files.i386    Thu Sep 27 13:54:09 2018        (r338962)
+++ head/sys/conf/files.i386    Thu Sep 27 14:05:44 2018        (r338963)
@@ -548,6 +548,7 @@ kern/subr_sfbuf.c           standard
 libkern/divdi3.c               standard
 libkern/ffsll.c                        standard
 libkern/flsll.c                        standard
+libkern/memcmp.c               standard
 libkern/memset.c               standard
 libkern/moddi3.c               standard
 libkern/qdivrem.c              standard

Modified: head/sys/conf/files.mips
==============================================================================
--- head/sys/conf/files.mips    Thu Sep 27 13:54:09 2018        (r338962)
+++ head/sys/conf/files.mips    Thu Sep 27 14:05:44 2018        (r338963)
@@ -65,6 +65,7 @@ libkern/cmpdi2.c                      optional        mips | 
mipshf | mipsel | m
 libkern/ucmpdi2.c                      optional        mips | mipshf | mipsel 
| mipselhf
 libkern/ashldi3.c                      standard
 libkern/ashrdi3.c                      standard
+libkern/memcmp.c                       standard
 
 # cfe support
 dev/cfe/cfe_api.c                      optional        cfe

Modified: head/sys/conf/files.powerpc
==============================================================================
--- head/sys/conf/files.powerpc Thu Sep 27 13:54:09 2018        (r338962)
+++ head/sys/conf/files.powerpc Thu Sep 27 14:05:44 2018        (r338963)
@@ -98,6 +98,7 @@ libkern/fls.c                 standard
 libkern/flsl.c                 standard
 libkern/flsll.c                        standard
 libkern/lshrdi3.c              optional        powerpc | powerpcspe
+libkern/memcmp.c               standard
 libkern/memset.c               standard
 libkern/moddi3.c               optional        powerpc | powerpcspe
 libkern/qdivrem.c              optional        powerpc | powerpcspe

Modified: head/sys/conf/files.riscv
==============================================================================
--- head/sys/conf/files.riscv   Thu Sep 27 13:54:09 2018        (r338962)
+++ head/sys/conf/files.riscv   Thu Sep 27 14:05:44 2018        (r338963)
@@ -22,6 +22,7 @@ libkern/ffsll.c                       standard
 libkern/fls.c                  standard
 libkern/flsl.c                 standard
 libkern/flsll.c                        standard
+libkern/memcmp.c               standard
 libkern/memset.c               standard
 riscv/riscv/autoconf.c         standard
 riscv/riscv/bus_machdep.c      standard

Modified: head/sys/conf/files.sparc64
==============================================================================
--- head/sys/conf/files.sparc64 Thu Sep 27 13:54:09 2018        (r338962)
+++ head/sys/conf/files.sparc64 Thu Sep 27 14:05:44 2018        (r338963)
@@ -71,6 +71,7 @@ libkern/ffsll.c                       standard
 libkern/fls.c                  standard
 libkern/flsl.c                 standard
 libkern/flsll.c                        standard
+libkern/memcmp.c               standard
 sparc64/central/central.c      optional        central
 sparc64/ebus/ebus.c            optional        ebus
 sparc64/ebus/epic.c            optional        epic ebus
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to