Module Name:    src
Committed By:   matt
Date:           Tue Dec 18 14:08:25 UTC 2012

Modified Files:
        src/sys/arch/arm/arm: cpu_in_cksum_v4hdr.S

Log Message:
Add ldrd support (one additional instruction over 5 ldrs) but two of them
won't be executed resulting in 2 less loads which should save a few cyles.


To generate a diff of this commit:
cvs rdiff -u -r1.1 -r1.2 src/sys/arch/arm/arm/cpu_in_cksum_v4hdr.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/arm/arm/cpu_in_cksum_v4hdr.S
diff -u src/sys/arch/arm/arm/cpu_in_cksum_v4hdr.S:1.1 src/sys/arch/arm/arm/cpu_in_cksum_v4hdr.S:1.2
--- src/sys/arch/arm/arm/cpu_in_cksum_v4hdr.S:1.1	Tue Dec 18 13:41:42 2012
+++ src/sys/arch/arm/arm/cpu_in_cksum_v4hdr.S	Tue Dec 18 14:08:25 2012
@@ -29,18 +29,30 @@
 
 #include <machine/asm.h>
 
-RCSID("$NetBSD: cpu_in_cksum_v4hdr.S,v 1.1 2012/12/18 13:41:42 matt Exp $")
+RCSID("$NetBSD: cpu_in_cksum_v4hdr.S,v 1.2 2012/12/18 14:08:25 matt Exp $")
 
 ENTRY(cpu_in_cksum_v4hdr)
+#ifdef _ARM_ARCH_DWORD_OK
+	tst	r0, #4			/* 64-bit aligned? */
+	ldreqd	r2, [r0], #8		/* load 1st/2nd words */
+	ldrne	ip, [r0], #4		/* load 1st word */
+	ldreq	ip, [r0, #8]		/* load 5th word */
+	ldrned	r2, [r0, #8]		/* load 4th/5th words */
+#else
 	ldr	ip, [r0]		/* load 1st word */
 	ldr	r3, [r0, #4]		/* load 2nd word */
-	adds	r3, r3, ip		/* add 1st to 2nd */
 	ldr	r2, [r0, #8]		/* load 3rd word */
-	adcs	r2, r2, r3		/* add sum to 3rd */
+#endif
+	adds	r3, r3, ip		/* accumulate */
+	adcs	r2, r2, r3		/* accumulate */
+#ifdef _ARM_ARCH_DWORD_OK
+	ldrd	r0, [r0]		/* load remaining words */
+#else
 	ldr	r1, [r0, #12]		/* load 4th word */
-	adcs	r1, r1, r2		/* add sum to 4th */
 	ldr	r0, [r0, #16]		/* load 5th word */
-	adcs	r0, r0, r1		/* add sum to 5th */
+#endif
+	adcs	r1, r1, r2		/* accumulate */
+	adcs	r0, r0, r1		/* accumulate */
 	/*
 	 * We now have a 33-bit (r0 + carry) sum which needs to resolved to a
 	 * 16-bit sum.
@@ -62,7 +74,7 @@ ENTRY(cpu_in_cksum_v4hdr)
 	movw	r1, #0xffff		/* load 0xffff */
 #else
 	mov	r1, #0x10000		/* load 0x10000 */
-	sub	r1, r1, #1		/* subtract 1 to get 0xffff */
+	sub	r1, r1, #1		/* subtract by 1 to get 0xffff */
 #endif
 	subge	r0, r0, r1		/* subtract 0xffff */
 	eor	r0, r0, r1		/* complement lower halfword */

Reply via email to