Module Name:    src
Committed By:   dsl
Date:           Sat Jul 18 16:40:31 UTC 2009

Modified Files:
        src/common/lib/libc/arch/x86_64/string: strchr.S

Log Message:
Shorten a dependency chain by using 'sbb, xor' (at a time when carry is set)
instead of 'mov, neg, dec'.
('mov, not' can't be used because it doesn't set the flags.)


To generate a diff of this commit:
cvs rdiff -u -r1.3 -r1.4 src/common/lib/libc/arch/x86_64/string/strchr.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/common/lib/libc/arch/x86_64/string/strchr.S
diff -u src/common/lib/libc/arch/x86_64/string/strchr.S:1.3 src/common/lib/libc/arch/x86_64/string/strchr.S:1.4
--- src/common/lib/libc/arch/x86_64/string/strchr.S:1.3	Sat Jul 18 11:41:23 2009
+++ src/common/lib/libc/arch/x86_64/string/strchr.S	Sat Jul 18 16:40:31 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: strchr.S,v 1.3 2009/07/18 11:41:23 dsl Exp $	*/
+/*	$NetBSD: strchr.S,v 1.4 2009/07/18 16:40:31 dsl Exp $	*/
 
 /*-
  * Copyright (c) 2009 The NetBSD Foundation, Inc.
@@ -34,7 +34,7 @@
 #include <machine/asm.h>
 
 #if defined(LIBC_SCCS)
-	RCSID("$NetBSD: strchr.S,v 1.3 2009/07/18 11:41:23 dsl Exp $")
+	RCSID("$NetBSD: strchr.S,v 1.4 2009/07/18 16:40:31 dsl Exp $")
 #endif
 
 /*
@@ -56,6 +56,7 @@
 	movabsq	$0x0101010101010101,%r8
 
 	movzbq	%sil,%rdx	/* value to search for (c) */
+	/* These imul are 'directpath' on athlons, so are fast */
 	imul	$0x80,%r8,%r9	/* 0x8080808080808080 */
 	imul	%r8,%rdx	/* (c) copied to all bytes */
 	test	$7,%dil
@@ -111,14 +112,13 @@
 /* I (dsl) think a _ALIGN_TEXT here will slow things down! */
 20:
 	xor	%rcx,%rcx
-	mov	%rdx,%rsi	/* repeated char pattern (c) */
-	sub	%dil,%cl	/* Convert low address values 1..7 */
-	and	$7,%cl		/* to 7..1 */
+	sub	%dil,%cl	/* Convert low address values 1..7 ... */
+	sbb	%rsi,%rsi	/* carry was set, so %rsi now ~0u! */
+	and	$7,%cl		/* ... to 7..1 */
 	and	$~7,%dil	/* move address to start of word */
 	shl	$3,%cl		/* now 56, 48 ... 16, 8 */
 	movq	(%rdi),%rax	/* aligned word containing first data */
-	neg	%rsi		/* generate ~c (not doesn't set flags) */
-	dec	%rsi
+	xor	%rdx,%rsi	/* invert of search pattern (~c) */
 	je	22f		/* searching for 0xff */
 21:	shr	%cl,%rsi	/* ~c in low bytes */
 	or	%rsi,%rax	/* set some bits making low bytes invalid */

Reply via email to