Module Name:    src
Committed By:   ragge
Date:           Wed Apr 25 11:06:49 UTC 2018

Added Files:
        src/sys/arch/vax/vax: cpu_in_cksum.S

Log Message:
VAX version of cpu_in_checksum().  Increases network performance significantly.


To generate a diff of this commit:
cvs rdiff -u -r0 -r1.1 src/sys/arch/vax/vax/cpu_in_cksum.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Added files:

Index: src/sys/arch/vax/vax/cpu_in_cksum.S
diff -u /dev/null src/sys/arch/vax/vax/cpu_in_cksum.S:1.1
--- /dev/null	Wed Apr 25 11:06:49 2018
+++ src/sys/arch/vax/vax/cpu_in_cksum.S	Wed Apr 25 11:06:49 2018
@@ -0,0 +1,222 @@
+/*	$NetBSD: cpu_in_cksum.S,v 1.1 2018/04/25 11:06:49 ragge Exp $	*/
+
+/*-
+ * Copyright (c) 2017 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Anders Magnusson.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ * Copyright (c) 1988, 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)in_cksum.c	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Assembly version of cpu_in_cksum() for vax, following the structure
+ * in the C version of the file but using vax instructions for speed.
+ * Increases network traffic speed with almost 50% (NFS tests).
+ */
+
+#include <machine/asm.h>
+__KERNEL_RCSID(0, "$NetBSD: cpu_in_cksum.S,v 1.1 2018/04/25 11:06:49 ragge Exp $");
+
+#include "assym.h"
+
+#define off	%r0
+#define mlen	%r1
+#define m	%r2
+#define data	%r3
+#define sum	%r4
+#define len	%r5
+#define byte_swapped	%r6
+#define tmp	%r7
+#
+# int cpu_in_cksum(struct mbuf *m, int len, int off, uint32_t initial_sum)
+#
+ENTRY(cpu_in_cksum, R7|R6)
+
+	subl2 $4,%sp
+
+	movl 4(%ap),m
+	movl 8(%ap),len
+	movl 12(%ap),off
+	movl 16(%ap),sum
+
+	clrl byte_swapped
+
+.Lfirstloop:				# for (;;) {
+	tstl m				# if (__predict_false(m == NULL)) {
+	jeql .Lout_of_data
+
+	movl M_LEN(m),mlen		# mlen = m->m_len;
+	cmpl off,mlen			# if (mlen > off) {
+	jgeq 1f
+	subl2 off,mlen			#	mlen -= off;
+	addl3 M_DATA(m),off,data	#	data = mtod(m, uint8_t *) + off;
+	jbr .Lpost_initial_offset	#	goto post_initial_offset;
+1:					# }
+	subl2 mlen,off			# off -= mlen;
+	tstl len			# if (len == 0)
+	jeql .Lsecondloop		#	break;
+	movl M_NEXT(m),m		# m = m->m_next;
+	jbr .Lfirstloop			# }
+
+.Lthirdstmt:
+	movl M_NEXT(m),m		# m = m->m_next) {
+.Lsecondloop:				# for (; 
+	tstl len			# len > 0;
+	jeql .Lendsecond
+	tstl m				# if (__predict_false(m == NULL)) {
+	jeql .Lout_of_data
+
+	movl M_LEN(m),mlen		# mlen = m->m_len;
+	movl M_DATA(m),data		# data = mtod(m, uint8_t *);
+.Lpost_initial_offset:
+	tstl mlen			# if (mlen == 0)
+	jeql .Lthirdstmt		#	continue;
+	cmpl len,mlen			# if (mlen > len)
+	jgeq 1f				
+	movl len,mlen			#	mlen = len;
+1:	subl2 mlen,len			# len -= mlen
+	cmpl mlen,$16			# if (mlen < 16)
+	jlss .Lshort_mbuf		#	goto short_mbuf;
+#
+#	Align on longword boundary
+#
+	blbc data,1f			# if ((uintptr_t)data & 1) {
+	movzbl (data)+,tmp		#	tmp = *data++;
+	addl2 tmp,sum			#	sum += tmp;
+	adwc $0,sum
+	rotl $8,sum,sum			#	sum = (sum << 8 | sum >> 24);
+	xorl2 $1,byte_swapped		#	byte_swapped ^= 1;
+	decl mlen			#	mlen--;
+1:					# }
+	bbc $1,data,1f			# if ((uintptr_t)data & 2) {
+	movzwl (data)+,tmp		#	tmp = *data++; (word *)
+	addl2 tmp,sum			#	sum += tmp;
+	adwc $0,sum
+	subl2 $2,mlen			#	mlen -= 2;
+1:					# }
+#
+# Add 16 word in a chunk
+#
+2:	subl2 $32,mlen			# while ((mlen -= 32) >= 0) {
+	jlss 1f
+	addl2 (data)+,sum		#	sum += *(uint32_t *)data;
+	adwc (data)+,sum		#	sum += *(uint32_t *)data;
+	adwc (data)+,sum		#	sum += *(uint32_t *)data;
+	adwc (data)+,sum		#	sum += *(uint32_t *)data;
+	adwc (data)+,sum		#	sum += *(uint32_t *)data;
+	adwc (data)+,sum		#	sum += *(uint32_t *)data;
+	adwc (data)+,sum		#	sum += *(uint32_t *)data;
+	adwc (data)+,sum		#	sum += *(uint32_t *)data;
+	adwc $0,sum
+	jbr 2b				# }
+
+1:	addl2 $32,mlen			# mlen += 32;
+	bbc $4,mlen,1f			# if (mlen >= 16) {
+	addl2 (data)+,sum		#	sum += *(uint32_t *)data;
+	adwc (data)+,sum		#	sum += *(uint32_t *)data;
+	adwc (data)+,sum		#	sum += *(uint32_t *)data;
+	adwc (data)+,sum		#	sum += *(uint32_t *)data;
+	adwc $0,sum
+	subl2 $16,mlen			#	mlen -= 16;
+1:					# }
+
+.Lshort_mbuf:				# short_mbuf:
+	bbc $3,mlen,1f			# if (mlen >= 8) {
+	addl2 (data)+,sum		#	sum += *(uint32_t *)data;
+	adwc (data)+,sum		#	sum += *(uint32_t *)data;
+	adwc $0,sum
+	subl2 $8,mlen			#	mlen -= 8;
+1:					# }
+	bbc $2,mlen,1f			# if (mlen >= 4) {
+	addl2 (data)+,sum		#	sum += *(uint32_t *)data;
+	adwc $0,sum
+	subl2 $4,mlen			#	mlen -= 4;
+1:					# }
+
+	bbc $1,mlen,1f			# if (mlen >= 2) {
+	movzwl (data)+,tmp		#	tmp = *data++; (word *)
+	addl2 tmp,sum			#	sum += tmp;
+	adwc $0,sum
+1:					# }
+	blbc mlen,1f			# if (mlen & 1) {
+	movzbl (data)+,tmp		#	tmp = *data++;
+	addl2 tmp,sum			#	sum += tmp;
+	adwc $0,sum
+	rotl $8,sum,sum			#	sum = (sum << 8 | sum >> 24);
+	xorl2 $1,byte_swapped		#	byte_swapped ^= 1;
+1:					# }
+	jbr .Lthirdstmt
+
+.Lendsecond:
+	tstl len			# if (len != 0)
+	jneq .Lout_of_data		#	goto out_of_data;
+	tstl byte_swapped		# if (byte_swapped) {
+	jeql 1f
+	rotl $8,sum,sum			# sum = (sum << 8 | sum >> 24);
+1:	rotl $16,sum,tmp		# tmp = sum >> 16;
+	addw2 tmp,sum			# sum(16) += tmp;
+	bicl2 $0xffff0000,sum		# sum &= ~0xffff0000;
+	adwc $0,sum
+	xorl3 $0xffff,sum,%r0		# return (sum ^ 0xffff);
+	ret
+.Lout_of_data:
+	pushab .Lin_cksum
+	calls $1,printf
+	mnegl $1,%r0
+	ret
+
+	.section	.rodata
+.Lin_cksum:
+	.asciz "in_cksum: out of data\n"
+

Reply via email to