Module Name: src
Committed By: dsl
Date: Sat Aug 1 20:35:46 UTC 2009
Modified Files:
src/common/lib/libc/arch/x86_64/string: bzero.S memset.S
Log Message:
Remove some long dependant instruction sequences (ie allow parallel code).
Since 'rep stos' will have a long setup time, avoid doing it more than once.
For misaligned (start address or length) write an unaligned word at both
ends of the buffer then aligned 'rep stosd' the middle.
Use the same code for bzero().
bzero.S is left being compiled for a while (empty) - to avoid issues with
duplicate symbols in libc.a after update builds.
To generate a diff of this commit:
cvs rdiff -u -r1.2 -r1.3 src/common/lib/libc/arch/x86_64/string/bzero.S \
src/common/lib/libc/arch/x86_64/string/memset.S
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/common/lib/libc/arch/x86_64/string/bzero.S
diff -u src/common/lib/libc/arch/x86_64/string/bzero.S:1.2 src/common/lib/libc/arch/x86_64/string/bzero.S:1.3
--- src/common/lib/libc/arch/x86_64/string/bzero.S:1.2 Mon Nov 12 18:41:59 2007
+++ src/common/lib/libc/arch/x86_64/string/bzero.S Sat Aug 1 20:35:45 2009
@@ -1,43 +1,8 @@
/*
- * Written by J.T. Conklin <[email protected]>.
- * Public domain.
- * Adapted for NetBSD/x86_64 by Frank van der Linden <[email protected]>
+ * Code for bzero() is in memset.S
+ *
+ * This file is left in place so that libc.a doesn't get a duplicate
+ * symbol error on update builds.
+ *
+ * If you are reading this in 2010, delete the file!
*/
-
-#include <machine/asm.h>
-
-#if defined(LIBC_SCCS)
- RCSID("$NetBSD: bzero.S,v 1.2 2007/11/12 18:41:59 ad Exp $")
-#endif
-
-ENTRY(bzero)
- movq %rsi,%rdx
-
- xorq %rax,%rax /* set fill data to 0 */
-
- /*
- * if the string is too short, it's really not worth the overhead
- * of aligning to word boundries, etc. So we jump to a plain
- * unaligned set.
- */
- cmpq $16,%rdx
- jb L1
-
- movq %rdi,%rcx /* compute misalignment */
- negq %rcx
- andq $7,%rcx
- subq %rcx,%rdx
- rep /* zero until word aligned */
- stosb
-
- movq %rdx,%rcx /* zero by words */
- shrq $3,%rcx
- andq $7,%rdx
- rep
- stosq
-
-L1: movq %rdx,%rcx /* zero remainder by bytes */
- rep
- stosb
-
- ret
Index: src/common/lib/libc/arch/x86_64/string/memset.S
diff -u src/common/lib/libc/arch/x86_64/string/memset.S:1.2 src/common/lib/libc/arch/x86_64/string/memset.S:1.3
--- src/common/lib/libc/arch/x86_64/string/memset.S:1.2 Mon Nov 12 18:41:59 2007
+++ src/common/lib/libc/arch/x86_64/string/memset.S Sat Aug 1 20:35:45 2009
@@ -1,57 +1,91 @@
-/*
- * Written by J.T. Conklin <[email protected]>.
- * Public domain.
- * Adapted for NetBSD/x86_64 by Frank van der Linden <[email protected]>
+/* $NetBSD: memset.S,v 1.3 2009/08/01 20:35:45 dsl Exp $ */
+
+/*-
+ * Copyright (c) 2009 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by David Laight.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
*/
#include <machine/asm.h>
#if defined(LIBC_SCCS)
- RCSID("$NetBSD: memset.S,v 1.2 2007/11/12 18:41:59 ad Exp $")
+ RCSID("$NetBSD: memset.S,v 1.3 2009/08/01 20:35:45 dsl Exp $")
+#endif
+
+#ifndef _KERNEL
+/* bzero, %rdi is buffer, %rsi length */
+
+ENTRY(bzero)
+ mov %rsi,%rdx /* length */
+ xor %eax,%eax /* value to write */
+ jmp 1f
#endif
+/* memset, %rdi is buffer, %rsi char to fill, %rdx length */
+
ENTRY(memset)
- movq %rsi,%rax
- andq $0xff,%rax
- movq %rdx,%rcx
- movq %rdi,%r11
-
- /*
- * if the string is too short, it's really not worth the overhead
- * of aligning to word boundries, etc. So we jump to a plain
- * unaligned set.
- */
- cmpq $0x0f,%rcx
- jle L1
-
- movb %al,%ah /* copy char to all bytes in word */
- movl %eax,%edx
- sall $16,%eax
- orl %edx,%eax
-
- movl %eax,%edx
- salq $32,%rax
- orq %rdx,%rax
-
- movq %rdi,%rdx /* compute misalignment */
- negq %rdx
- andq $7,%rdx
- movq %rcx,%r8
- subq %rdx,%r8
-
- movq %rdx,%rcx /* set until word aligned */
- rep
- stosb
-
- movq %r8,%rcx
- shrq $3,%rcx /* set by words */
- rep
- stosq
-
- movq %r8,%rcx /* set remainder by bytes */
- andq $7,%rcx
-L1: rep
- stosb
- movq %r11,%rax
+ movzbq %sil,%rax /* byte value to fill */
+ mov %rdx,%rsi /* copy of length */
+ mov $0x0101010101010101,%r9
+ imul %r9,%rax /* fill value in all bytes */
+
+1:
+ mov %rdi,%r9 /* Need to return buffer address */
+ or %edi,%edx /* address | length */
+ mov %rsi,%rcx
+ cmp $7,%rsi
+ jbe 10f /* jump if short fill */
+ test $7,%dl /* check for misaligned fill */
+ jnz 20f /* jump if misaligned */
+
+/* Target aligned and length multiple of 8 */
+2:
+ shr $3,%rcx
+ rep stosq
+ mov %r9,%rax
+ ret
+/*
+ * Short transfer, any faffing here will generate mispredicted branches.
+ * So we keep it simple.
+ */
+10: rep stosb
+ mov %r9,%rax
ret
+
+/*
+ * Buffer or length misaligned.
+ * Write pattern to first and last word of buffer, then fill middle.
+ * (This writes to some bytes more than once - possibly three times!.)
+ */
+20:
+ mov %rax,(%rdi)
+ movzbq %dil,%rdx /* low address for alignment */
+ mov %rax,-8(%rcx,%rdi)
+ and $7,%dl /* offset in word */
+ sub %rdx,%rcx /* adjust length ... */
+ add %rdx,%rdi /* ... and target */
+ jmp 2b