Module Name: src
Committed By: dsl
Date: Sat Jul 18 16:40:31 UTC 2009
Modified Files:
src/common/lib/libc/arch/x86_64/string: strchr.S
Log Message:
Shorten a dependency chain by using 'sbb, xor' (at a time when carry is set)
instead of 'mov, neg, dec'.
('mov, not' can't be used because it doesn't set the flags.)
To generate a diff of this commit:
cvs rdiff -u -r1.3 -r1.4 src/common/lib/libc/arch/x86_64/string/strchr.S
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/common/lib/libc/arch/x86_64/string/strchr.S
diff -u src/common/lib/libc/arch/x86_64/string/strchr.S:1.3 src/common/lib/libc/arch/x86_64/string/strchr.S:1.4
--- src/common/lib/libc/arch/x86_64/string/strchr.S:1.3 Sat Jul 18 11:41:23 2009
+++ src/common/lib/libc/arch/x86_64/string/strchr.S Sat Jul 18 16:40:31 2009
@@ -1,4 +1,4 @@
-/* $NetBSD: strchr.S,v 1.3 2009/07/18 11:41:23 dsl Exp $ */
+/* $NetBSD: strchr.S,v 1.4 2009/07/18 16:40:31 dsl Exp $ */
/*-
* Copyright (c) 2009 The NetBSD Foundation, Inc.
@@ -34,7 +34,7 @@
#include <machine/asm.h>
#if defined(LIBC_SCCS)
- RCSID("$NetBSD: strchr.S,v 1.3 2009/07/18 11:41:23 dsl Exp $")
+ RCSID("$NetBSD: strchr.S,v 1.4 2009/07/18 16:40:31 dsl Exp $")
#endif
/*
@@ -56,6 +56,7 @@
movabsq $0x0101010101010101,%r8
movzbq %sil,%rdx /* value to search for (c) */
+ /* These imul are 'directpath' on athlons, so are fast */
imul $0x80,%r8,%r9 /* 0x8080808080808080 */
imul %r8,%rdx /* (c) copied to all bytes */
test $7,%dil
@@ -111,14 +112,13 @@
/* I (dsl) think a _ALIGN_TEXT here will slow things down! */
20:
xor %rcx,%rcx
- mov %rdx,%rsi /* repeated char pattern (c) */
- sub %dil,%cl /* Convert low address values 1..7 */
- and $7,%cl /* to 7..1 */
+ sub %dil,%cl /* Convert low address values 1..7 ... */
+ sbb %rsi,%rsi /* carry was set, so %rsi now ~0u! */
+ and $7,%cl /* ... to 7..1 */
and $~7,%dil /* move address to start of word */
shl $3,%cl /* now 56, 48 ... 16, 8 */
movq (%rdi),%rax /* aligned word containing first data */
- neg %rsi /* generate ~c (not doesn't set flags) */
- dec %rsi
+ xor %rdx,%rsi /* invert of search pattern (~c) */
je 22f /* searching for 0xff */
21: shr %cl,%rsi /* ~c in low bytes */
or %rsi,%rax /* set some bits making low bytes invalid */