Module Name:    src
Committed By:   thorpej
Date:           Sat Jul 10 20:22:38 UTC 2021

Modified Files:
        src/sys/arch/alpha/alpha: pmap.c
Added Files:
        src/sys/arch/alpha/alpha: pmap_subr.s

Log Message:
Move the optimized pmap_zero_page() from pmap.c to a new pmap_subr.s,
and optimize it a teeny bit little more.  Provide an optimized (for 21164,
anyway) pmap_copy_page() as well.


To generate a diff of this commit:
cvs rdiff -u -r1.296 -r1.297 src/sys/arch/alpha/alpha/pmap.c
cvs rdiff -u -r0 -r1.1 src/sys/arch/alpha/alpha/pmap_subr.s

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/alpha/alpha/pmap.c
diff -u src/sys/arch/alpha/alpha/pmap.c:1.296 src/sys/arch/alpha/alpha/pmap.c:1.297
--- src/sys/arch/alpha/alpha/pmap.c:1.296	Mon Jul  5 15:12:00 2021
+++ src/sys/arch/alpha/alpha/pmap.c	Sat Jul 10 20:22:37 2021
@@ -1,4 +1,4 @@
-/* $NetBSD: pmap.c,v 1.296 2021/07/05 15:12:00 thorpej Exp $ */
+/* $NetBSD: pmap.c,v 1.297 2021/07/10 20:22:37 thorpej Exp $ */
 
 /*-
  * Copyright (c) 1998, 1999, 2000, 2001, 2007, 2008, 2020
@@ -135,7 +135,7 @@
 
 #include <sys/cdefs.h>			/* RCS ID & Copyright macro defns */
 
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.296 2021/07/05 15:12:00 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.297 2021/07/10 20:22:37 thorpej Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -2758,85 +2758,9 @@ pmap_deactivate(struct lwp *l)
 	pmap_destroy(pmap);
 }
 
-/*
- * pmap_zero_page:		[ INTERFACE ]
- *
- *	Zero the specified (machine independent) page by mapping the page
- *	into virtual memory and clear its contents, one machine dependent
- *	page at a time.
- *
- *	Note: no locking is necessary in this function.
- */
-void
-pmap_zero_page(paddr_t phys)
-{
-	u_long *p0, *p1, *pend;
-
-#ifdef DEBUG
-	if (pmapdebug & PDB_FOLLOW)
-		printf("pmap_zero_page(%lx)\n", phys);
-#endif
-
-	p0 = (u_long *)ALPHA_PHYS_TO_K0SEG(phys);
-	p1 = NULL;
-	pend = (u_long *)((u_long)p0 + PAGE_SIZE);
-
-	/*
-	 * Unroll the loop a bit, doing 16 quadwords per iteration.
-	 * Do only 8 back-to-back stores, and alternate registers.
-	 */
-	do {
-		__asm volatile(
-		"# BEGIN loop body\n"
-		"	addq	%2, (8 * 8), %1		\n"
-		"	stq	$31, (0 * 8)(%0)	\n"
-		"	stq	$31, (1 * 8)(%0)	\n"
-		"	stq	$31, (2 * 8)(%0)	\n"
-		"	stq	$31, (3 * 8)(%0)	\n"
-		"	stq	$31, (4 * 8)(%0)	\n"
-		"	stq	$31, (5 * 8)(%0)	\n"
-		"	stq	$31, (6 * 8)(%0)	\n"
-		"	stq	$31, (7 * 8)(%0)	\n"
-		"					\n"
-		"	addq	%3, (8 * 8), %0		\n"
-		"	stq	$31, (0 * 8)(%1)	\n"
-		"	stq	$31, (1 * 8)(%1)	\n"
-		"	stq	$31, (2 * 8)(%1)	\n"
-		"	stq	$31, (3 * 8)(%1)	\n"
-		"	stq	$31, (4 * 8)(%1)	\n"
-		"	stq	$31, (5 * 8)(%1)	\n"
-		"	stq	$31, (6 * 8)(%1)	\n"
-		"	stq	$31, (7 * 8)(%1)	\n"
-		"	# END loop body"
-		: "=r" (p0), "=r" (p1)
-		: "0" (p0), "1" (p1)
-		: "memory");
-	} while (p0 < pend);
-}
-
-/*
- * pmap_copy_page:		[ INTERFACE ]
- *
- *	Copy the specified (machine independent) page by mapping the page
- *	into virtual memory and using memcpy to copy the page, one machine
- *	dependent page at a time.
- *
- *	Note: no locking is necessary in this function.
- */
-void
-pmap_copy_page(paddr_t src, paddr_t dst)
-{
-	const void *s;
-	void *d;
+/* pmap_zero_page() is in pmap_subr.s */
 
-#ifdef DEBUG
-	if (pmapdebug & PDB_FOLLOW)
-		printf("pmap_copy_page(%lx, %lx)\n", src, dst);
-#endif
-	s = (const void *)ALPHA_PHYS_TO_K0SEG(src);
-	d = (void *)ALPHA_PHYS_TO_K0SEG(dst);
-	memcpy(d, s, PAGE_SIZE);
-}
+/* pmap_copy_page() is in pmap_subr.s */
 
 /*
  * pmap_pageidlezero:		[ INTERFACE ]

Added files:

Index: src/sys/arch/alpha/alpha/pmap_subr.s
diff -u /dev/null src/sys/arch/alpha/alpha/pmap_subr.s:1.1
--- /dev/null	Sat Jul 10 20:22:38 2021
+++ src/sys/arch/alpha/alpha/pmap_subr.s	Sat Jul 10 20:22:37 2021
@@ -0,0 +1,165 @@
+/* $NetBSD: pmap_subr.s,v 1.1 2021/07/10 20:22:37 thorpej Exp $ */
+
+/*-
+ * Copyright (c) 2021 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jason R. Thorpe.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+__KERNEL_RCSID(7, "$NetBSD: pmap_subr.s,v 1.1 2021/07/10 20:22:37 thorpej Exp $")
+
+/*
+ * Optimized pmap subroutines.
+ */
+
+	.text
+inc7:	.stabs	__FILE__,132,0,0,inc7;	.loc	1 __LINE__
+
+/*
+ * pmap_zero_page:		[ INTERFACE ]
+ *
+ *	Zero the specified (machine independent) page by mapping the page
+ *	into virtual memory and clear its contents, one machine dependent
+ *	page at a time.
+ *
+ *	Note: no locking is necessary in this function.
+ */
+	.p2align 4
+LEAF(pmap_zero_page, 1)
+	/* No global references - skip LDGP() */
+
+	/*
+	 * Code here is arranged to keep branch targets on 16-byte
+	 * boundaries, minimize result latencies in the loop, unroll
+	 * the loop to at least 20 insns, and to dual-issue when
+	 * feasible.
+	 *
+	 * In the setup, we use nop and unop to minimize pipline stalls
+	 * on dependent instruction pairs.
+	 */
+
+	/* ---- */
+	lda	t0, -1
+	nop
+	sll	t0, 42, t0		/* t0 = ALPHA_K0SEG_BASE */
+	/*
+	 * Loop counter:
+	 * PAGE_SIZE / 8 bytes per store / 16 stores per iteration
+	 */
+	lda	v0, ((ALPHA_PGBYTES / 8) / 16)
+	/* ---- */
+	or	a0, t0, a0		/* a0 = ALPHA_PHYS_TO_K0SEG(a0) */
+	nop
+	addq	a0, (8*8), a2		/* a2 = a0 + 8-quads */
+	unop
+	/* ---- */
+1:	stq	zero, (0*8)(a0)		/* 0 */
+	stq	zero, (1*8)(a0)		/* 1 */
+	stq	zero, (2*8)(a0)		/* 2 */
+	stq	zero, (3*8)(a0)		/* 3 */
+	/* ---- */
+	stq	zero, (4*8)(a0)		/* 4 */
+	stq	zero, (5*8)(a0)		/* 5 */
+	stq	zero, (6*8)(a0)		/* 6 */
+	stq	zero, (7*8)(a0)		/* 7 */
+	/* ---- */
+	addq	a2, (8*8), a0		/* a0 = a2 + 8-quads */
+	stq	zero, (0*8)(a2)		/* 8 */
+	stq	zero, (1*8)(a2)		/* 9 */
+	stq	zero, (2*8)(a2)		/* 10 */
+	/* --- */
+	subq	v0, 1, v0		/* count-- */
+	stq	zero, (3*8)(a2)		/* 11 */
+	stq	zero, (4*8)(a2)		/* 12 */
+	stq	zero, (5*8)(a2)		/* 13 */
+	/* ---- */
+	stq	zero, (6*8)(a2)		/* 14 */
+	stq	zero, (7*8)(a2)		/* 15 */
+	addq	a0, (8*8), a2		/* a2 = a0 + 8-quads */
+	bne	v0, 1b			/* loop around if count != 0 */
+	/* ---- */
+
+	RET
+	END(pmap_zero_page)
+
+/*
+ * pmap_copy_page:		[ INTERFACE ]
+ *
+ *	Copy the specified (machine independent) page by mapping the page
+ *	into virtual memory and copying the page, one machine dependent
+ *	page at a time.
+ *
+ *	Note: no locking is necessary in this function.
+ */
+	.p2align 4
+LEAF(pmap_copy_page, 2)
+	/* No global references - skip LDGP() */
+
+	/* See above. */
+
+	/* ---- */
+	lda	t0, -1
+	nop
+	sll	t0, 42, t0		/* t0 = ALPHA_K0SEG_BASE */
+	/*
+	 * Loop counter:
+	 * PAGE_SIZE / 8 bytes per store / 8 stores per iteration
+	 */
+	lda	v0, ((ALPHA_PGBYTES / 8) / 8)
+	/* ---- */
+	or	a0, t0, a0		/* a0 = ALPHA_PHYS_TO_K0SEG(a0) */
+	unop
+	or	a1, t0, a1		/* a1 = ALPHA_PHYS_TO_K0SEG(a1) */
+	unop
+	/* ---- */
+1:	ldq	t0, (0*8)(a0)		/* load 0 */
+	ldq	t1, (1*8)(a0)		/* load 1 */
+	ldq	t2, (2*8)(a0)		/* load 2 */
+	ldq	t3, (3*8)(a0)		/* load 3 */
+	/* ---- */
+	ldq	t4, (4*8)(a0)		/* load 4 */
+	ldq	t5, (5*8)(a0)		/* load 5 */
+	ldq	t6, (6*8)(a0)		/* load 6 */
+	ldq	t7, (7*8)(a0)		/* load 7 */
+	/* ---- */
+	addq	a0, (8*8), a0		/* a0 = a0 + 8-quads */
+	stq	t0, (0*8)(a1)		/* store 0 */
+	stq	t1, (1*8)(a1)		/* store 1 */
+	stq	t2, (2*8)(a1)		/* store 2 */
+	/* ---- */
+	subq	v0, 1, v0		/* count-- */
+	stq	t3, (3*8)(a1)		/* store 3 */
+	stq	t4, (4*8)(a1)		/* store 4 */
+	stq	t5, (5*8)(a1)		/* store 5 */
+	/* ---- */
+	stq	t6, (6*8)(a1)		/* store 6 */
+	stq	t7, (7*8)(a1)		/* store 7 */
+	addq	a1, (8*8), a1		/* a1 = a1 + 8-quads */
+	bne	v0, 1b			/* loop around if count != 0 */
+	/* ---- */
+
+	RET
+	END(pmap_copy_page)

Reply via email to