Module Name:    src
Committed By:   rmind
Date:           Mon Apr 26 02:43:35 UTC 2010

Modified Files:
        src/sys/arch/i386/i386 [rmind-uvmplock]: genassym.cf vector.S
        src/sys/arch/x86/include [rmind-uvmplock]: cpu.h i82489var.h pmap.h
        src/sys/arch/x86/x86 [rmind-uvmplock]: cpu.c lapic.c pmap.c

Log Message:
Apply renovated patch to significantly reduce TLB shootdowns in x86 pmap,
also provide TLBSTATS option to measure and track TLB shootdowns.  Details:

http://mail-index.netbsd.org/port-i386/2009/01/11/msg001018.html

Patch from Andrew Doran, proposed on tech-x86 [sic], in January 2009.

XXX: amd64 and xen are not yet; work in progress.


To generate a diff of this commit:
cvs rdiff -u -r1.85 -r1.85.2.1 src/sys/arch/i386/i386/genassym.cf
cvs rdiff -u -r1.53 -r1.53.2.1 src/sys/arch/i386/i386/vector.S
cvs rdiff -u -r1.20 -r1.20.4.1 src/sys/arch/x86/include/cpu.h
cvs rdiff -u -r1.12 -r1.12.22.1 src/sys/arch/x86/include/i82489var.h
cvs rdiff -u -r1.29.2.1 -r1.29.2.2 src/sys/arch/x86/include/pmap.h
cvs rdiff -u -r1.69 -r1.69.2.1 src/sys/arch/x86/x86/cpu.c
cvs rdiff -u -r1.44 -r1.44.4.1 src/sys/arch/x86/x86/lapic.c
cvs rdiff -u -r1.105.2.3 -r1.105.2.4 src/sys/arch/x86/x86/pmap.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/i386/i386/genassym.cf
diff -u src/sys/arch/i386/i386/genassym.cf:1.85 src/sys/arch/i386/i386/genassym.cf:1.85.2.1
--- src/sys/arch/i386/i386/genassym.cf:1.85	Mon Feb 22 23:52:17 2010
+++ src/sys/arch/i386/i386/genassym.cf	Mon Apr 26 02:43:34 2010
@@ -1,4 +1,4 @@
-#	$NetBSD: genassym.cf,v 1.85 2010/02/22 23:52:17 jym Exp $
+#	$NetBSD: genassym.cf,v 1.85.2.1 2010/04/26 02:43:34 rmind Exp $
 
 #
 # Copyright (c) 1998, 2006, 2007, 2008 The NetBSD Foundation, Inc.
@@ -160,6 +160,7 @@
 define	PG_V			PG_V
 define	PG_KW			PG_KW
 define	PG_KR			PG_KR
+define	PG_G			PG_G
 define	PGEX_U			PGEX_U
 
 define	L2_SLOT_KERNBASE	pl2_pi(KERNBASE)
@@ -288,7 +289,6 @@
 define	CPU_INFO_SELF		offsetof(struct cpu_info, ci_self)
 define	CPU_INFO_RESCHED	offsetof(struct cpu_info, ci_want_resched)
 define	CPU_INFO_WANT_PMAPLOAD	offsetof(struct cpu_info, ci_want_pmapload)
-define	CPU_INFO_PMAP_CPU	offsetof(struct cpu_info, ci_pmap_cpu)
 define	CPU_INFO_TLBSTATE	offsetof(struct cpu_info, ci_tlbstate)
 define	TLBSTATE_VALID		TLBSTATE_VALID
 define	TLBSTATE_LAZY		TLBSTATE_LAZY
@@ -426,12 +426,11 @@
 define	RW_READER		RW_READER
 define	RW_WRITER		RW_WRITER
 
-define	MB_POINTER		offsetof(struct pmap_mbox, mb_pointer)
-define	MB_GLOBAL		offsetof(struct pmap_mbox, mb_global)
-define	MB_ADDR1		offsetof(struct pmap_mbox, mb_addr1)
-define	MB_ADDR2		offsetof(struct pmap_mbox, mb_addr2)
-define	MB_HEAD			offsetof(struct pmap_mbox, mb_head)
-define	MB_TAIL			offsetof(struct pmap_mbox, mb_tail)
+define	TM_PENDING		offsetof(struct pmap_tlb_mailbox, tm_pending)
+define	TP_COUNT		offsetof(struct pmap_tlb_packet, tp_count)
+define	TP_VA			offsetof(struct pmap_tlb_packet, tp_va)
+define	TP_USERMASK		offsetof(struct pmap_tlb_packet, tp_usermask)
+define	TP_PTE			offsetof(struct pmap_tlb_packet, tp_pte)
 
 define	PM_CPUS			offsetof(struct pmap, pm_cpus)
 

Index: src/sys/arch/i386/i386/vector.S
diff -u src/sys/arch/i386/i386/vector.S:1.53 src/sys/arch/i386/i386/vector.S:1.53.2.1
--- src/sys/arch/i386/i386/vector.S:1.53	Mon Feb 22 06:42:14 2010
+++ src/sys/arch/i386/i386/vector.S	Mon Apr 26 02:43:34 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: vector.S,v 1.53 2010/02/22 06:42:14 darran Exp $	*/
+/*	$NetBSD: vector.S,v 1.53.2.1 2010/04/26 02:43:34 rmind Exp $	*/
 
 /*
  * Copyright 2002 (c) Wasabi Systems, Inc.
@@ -65,7 +65,7 @@
  */
 
 #include <machine/asm.h>
-__KERNEL_RCSID(0, "$NetBSD: vector.S,v 1.53 2010/02/22 06:42:14 darran Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vector.S,v 1.53.2.1 2010/04/26 02:43:34 rmind Exp $");
 
 #include "opt_ddb.h"
 #include "opt_multiprocessor.h"
@@ -186,10 +186,10 @@
 IDTVEC_END(resume_lapic_ipi)
 
 /*
- * Multicast TLB shootdown handler for !kernel_pmap.
+ * TLB shootdown handler.
  */
-IDTVEC(intr_lapic_tlb_mcast)
-	/* Save state. */
+IDTVEC(intr_lapic_tlb)
+	/* Save state and ack the interrupt. */
 	pushl	%eax
 	pushl	%ebx
 	pushl	%ecx
@@ -198,45 +198,44 @@
 	pushl	%fs
 	movl	$GSEL(GDATA_SEL, SEL_KPL), %eax
 	movl	$GSEL(GCPU_SEL, SEL_KPL), %edx
-	movl	%eax, %ds
-	movl	%edx, %fs
-	/* Count it. */
-	addl	$1, CPUVAR(TLB_EVCNT)+EV_COUNT
-	adcl	$0, CPUVAR(TLB_EVCNT)+EV_COUNT+4
-	/* Find out what we need to invalidate. */
-	movl	CPUVAR(PMAP_CPU), %ecx
-	movl	MB_ADDR1(%ecx), %eax
-	movl	MB_ADDR2(%ecx), %edx
-	xorl	%ebx, %ebx
-	xchgl	MB_POINTER(%ecx), %ebx
+	mov	%ax, %ds
+	mov	%dx, %fs
 	movl	$0, _C_LABEL(local_apic)+LAPIC_EOI
-	cmpl	$-1, %eax
+
+	/* Find out what we need to invalidate. */
+	leal	_C_LABEL(pmap_tlb_packet), %ebx
+	movswl	TP_COUNT(%ebx), %ecx
+	cmpl	$-1, %ecx
 	je	4f
+	leal	TP_VA(%ebx), %edx
 1:
 	/* Invalidate a single page or a range of pages. */
+	movl	(%edx), %eax
 	invlpg	(%eax)
-	addl	$PAGE_SIZE, %eax
-	cmpl	%edx, %eax
-	jb	1b
+	addl	$4, %edx
+	decl	%ecx
+	jg	1b
 2:
-	/* Ack the request. */
-	lock
-	incl	(%ebx)
 	/*
-	 * Check the current TLB state.  If we don't want further
+	 * Check the current TLB state.  If we do not want further
 	 * invalidations for this pmap, then take the CPU out of
 	 * the pmap's bitmask.
 	 */
+	movl	CPUVAR(CPUMASK), %eax
 	cmpl	$TLBSTATE_LAZY, CPUVAR(TLBSTATE)
 	jne	3f
+	testl	%eax, TP_USERMASK(%ebx)
+	jz	3f
 	movl	CPUVAR(PMAP), %edx
-	movl	CPUVAR(CPUMASK), %ecx
+	movl	%eax, %ecx
 	notl	%ecx
 	lock
 	andl	%ecx, PM_CPUS(%edx)
 	movl	$TLBSTATE_STALE, CPUVAR(TLBSTATE)
 3:
-	/* Restore state and return. */
+	/* Ack the request, restore state & return. */
+	lock
+	xorl	%eax, _C_LABEL(pmap_tlb_mailbox)+TM_PENDING
 	popl	%fs
 	popl	%ds
 	popl	%edx
@@ -245,11 +244,18 @@
 	popl	%eax
 	iret
 4:
+	/* Invalidate whole address space: */
+	testw	$PG_G, TP_PTE(%ebx)
+	jnz	5f
 	/*
-	 * Get the emap generation number.  Invalidate user TLB entries.
-	 * Perform emap update, pass the generation number.  Note that
-	 * caller-save registers might be modified (all saved in the
-	 * beginning).  Only %ebx value is used by 2b context.
+	 * a) Invalidating user TLB entries only.
+	 *
+	 * - Get the emap generation number.
+	 * - Invalidate TLB entries.
+	 * - Perform emap update, pass the generation number.
+	 *
+	 * Note that caller-save registers might be modified (all saved in the
+	 * beginning).  Only %ebx value must be preserved for the 2b context.
 	 */
 	call	_C_LABEL(uvm_emap_gen_return)
 	movl	%eax, %edx
@@ -259,83 +265,24 @@
 	call	_C_LABEL(uvm_emap_update)
 	addl	$4, %esp
 	jmp	2b
-IDTVEC_END(intr_lapic_tlb_mcast)
-
-/*
- * Broadcast TLB shootdown handler for kernel_pmap.
- */
-IDTVEC(intr_lapic_tlb_bcast)
-	/* Save state and ack the interrupt. */
-	pushl	%eax
-	pushl	%ebx
-	pushl	%ecx
-	pushl	%edx
-	pushl	%ds
-	pushl	%fs
-	movl	$GSEL(GDATA_SEL, SEL_KPL), %eax
-	movl	$GSEL(GCPU_SEL, SEL_KPL), %edx
-	movl	%eax, %ds
-	movl	%edx, %fs
-	/* Find out what we need to invalidate. */
-	movl	%ss:_C_LABEL(pmap_mbox)+MB_ADDR1, %eax
-	movl	%ss:_C_LABEL(pmap_mbox)+MB_ADDR2, %edx
-	movl	%ss:_C_LABEL(pmap_mbox)+MB_GLOBAL, %ebx
-	movl	$0, %ss:_C_LABEL(local_apic)+LAPIC_EOI
-	cmpl	$-1, %eax
-	je,pn	3f
-1:
-	/* Invalidate a single page or a range of pages. */
-	invlpg	%ss:(%eax)
-	addl	$PAGE_SIZE, %eax
-	cmpl	%edx, %eax
-	jb	1b
-2:
-	/* Ack the request, restore state & return. */
-	lock
-	incl	%ss:_C_LABEL(pmap_mbox)+MB_TAIL
-	popl	%fs
-	popl	%ds
-	popl	%edx
-	popl	%ecx
-	popl	%ebx
-	popl	%eax
-	iret
-3:
-	testl	%ebx, %ebx
-	jz	4f
+5:
 	/*
-	 * If we have been asked to invalidate the entire TLB we arrive here.
-	 * Get the emap generation before flush, and use it after for update.
-	 * Note that caller-save registers might be modified, though no
-	 * registers need to be preserved for 2b context.
+	 * b) Invalidating user and kernel TLB entries.
+	 *
+	 * See notes above.
 	 */
 	call	_C_LABEL(uvm_emap_gen_return)
-	movl	%eax, %ebx
+	movl	%eax, %ecx
 	movl	%cr4, %eax
 	movl	%eax, %edx
 	andl	$~CR4_PGE, %edx
 	movl	%edx, %cr4
 	movl	%eax, %cr4
-	pushl	%ebx
-	call	_C_LABEL(uvm_emap_update)
-	addl	$4, %esp
-	jmp	2b
-4:
-	/*
-	 * Get the emap generation number.  Invalidate user TLB entries.
-	 * Perform emap update, pass the generation number.  Note that
-	 * caller-save registers might be modified, though no registers
-	 * need to be preserved for 2b context.
-	 */
-	call	_C_LABEL(uvm_emap_gen_return)
-	movl	%eax, %ebx
-	movl	%cr3, %eax
-	movl	%eax, %cr3
-	pushl	%ebx
+	pushl	%ecx
 	call	_C_LABEL(uvm_emap_update)
 	addl	$4, %esp
 	jmp	2b
-IDTVEC_END(intr_lapic_tlb_bcast)
+IDTVEC_END(intr_lapic_tlb)
 
 #if defined(DDB)
 IDTVEC(intrddbipi)

Index: src/sys/arch/x86/include/cpu.h
diff -u src/sys/arch/x86/include/cpu.h:1.20 src/sys/arch/x86/include/cpu.h:1.20.4.1
--- src/sys/arch/x86/include/cpu.h:1.20	Mon Jan 18 16:40:17 2010
+++ src/sys/arch/x86/include/cpu.h	Mon Apr 26 02:43:34 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.h,v 1.20 2010/01/18 16:40:17 rmind Exp $	*/
+/*	$NetBSD: cpu.h,v 1.20.4.1 2010/04/26 02:43:34 rmind Exp $	*/
 
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
@@ -88,7 +88,6 @@
 	 */
 	struct cpu_info *ci_next;	/* next cpu */
 	struct lwp *ci_curlwp;		/* current owner of the processor */
-	struct pmap_cpu *ci_pmap_cpu;	/* per-CPU pmap data */
 	struct lwp *ci_fpcurlwp;	/* current owner of the FPU */
 	int	ci_fpsaving;		/* save in progress */
 	int	ci_fpused;		/* XEN: FPU was used by curlwp */
@@ -114,6 +113,7 @@
 	int ci_curldt;		/* current LDT descriptor */
 	int ci_nintrhand;	/* number of H/W interrupt handlers */
 	uint64_t ci_scratch;
+	uintptr_t ci_pmap_data[128 / sizeof(uintptr_t)];
 
 #ifdef XEN
 	struct iplsource  *ci_isources[NIPL];

Index: src/sys/arch/x86/include/i82489var.h
diff -u src/sys/arch/x86/include/i82489var.h:1.12 src/sys/arch/x86/include/i82489var.h:1.12.22.1
--- src/sys/arch/x86/include/i82489var.h:1.12	Mon Apr 28 20:23:40 2008
+++ src/sys/arch/x86/include/i82489var.h	Mon Apr 26 02:43:34 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: i82489var.h,v 1.12 2008/04/28 20:23:40 martin Exp $	*/
+/*	$NetBSD: i82489var.h,v 1.12.22.1 2010/04/26 02:43:34 rmind Exp $	*/
 
 /*-
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
@@ -76,11 +76,8 @@
 extern void Xresume_lapic_ipi(void);
 #define LAPIC_IPI_VECTOR			0xe0
 
-extern void Xintr_lapic_tlb_bcast(void);
-#define LAPIC_TLB_BCAST_VECTOR			0xe1
-
-extern void Xintr_lapic_tlb_mcast(void);
-#define LAPIC_TLB_MCAST_VECTOR			0xe2
+extern void Xintr_lapic_tlb(void);
+#define LAPIC_TLB_VECTOR			0xe1
 
 /*
  * Vector used for local apic timer interrupts.

Index: src/sys/arch/x86/include/pmap.h
diff -u src/sys/arch/x86/include/pmap.h:1.29.2.1 src/sys/arch/x86/include/pmap.h:1.29.2.2
--- src/sys/arch/x86/include/pmap.h:1.29.2.1	Tue Mar 16 15:38:04 2010
+++ src/sys/arch/x86/include/pmap.h	Mon Apr 26 02:43:34 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.h,v 1.29.2.1 2010/03/16 15:38:04 rmind Exp $	*/
+/*	$NetBSD: pmap.h,v 1.29.2.2 2010/04/26 02:43:34 rmind Exp $	*/
 
 /*
  *
@@ -165,6 +165,8 @@
 	uint32_t pm_cpus;		/* mask of CPUs using pmap */
 	uint32_t pm_kernel_cpus;	/* mask of CPUs using kernel part
 					 of pmap */
+	uint64_t pm_ncsw;		/* for assertions */
+	struct vm_page *pm_gc_ptp;	/* pages from pmap g/c */
 };
 
 /* macro to access pm_pdirpa */
@@ -231,8 +233,26 @@
 
 vaddr_t reserve_dumppages(vaddr_t); /* XXX: not a pmap fn */
 
-void	pmap_tlb_shootdown(pmap_t, vaddr_t, vaddr_t, pt_entry_t);
-void	pmap_tlb_shootwait(void);
+typedef enum tlbwhy {
+	TLBSHOOT_APTE,
+	TLBSHOOT_KENTER,
+	TLBSHOOT_KREMOVE,
+	TLBSHOOT_FREE_PTP1,
+	TLBSHOOT_FREE_PTP2,
+	TLBSHOOT_REMOVE_PTE,
+	TLBSHOOT_REMOVE_PTES,
+	TLBSHOOT_SYNC_PV1,
+	TLBSHOOT_SYNC_PV2,
+	TLBSHOOT_WRITE_PROTECT,
+	TLBSHOOT_ENTER,
+	TLBSHOOT_UPDATE,
+	TLBSHOOT_BUS_DMA,
+	TLBSHOOT_BUS_SPACE,
+	TLBSHOOT__MAX,
+} tlbwhy_t;
+
+void	pmap_tlb_shootdown(pmap_t, vaddr_t, pt_entry_t, tlbwhy_t);
+void	pmap_tlb_shootnow(void);
 
 #define	__HAVE_PMAP_EMAP
 
@@ -348,8 +368,6 @@
 
 paddr_t vtophys(vaddr_t);
 vaddr_t	pmap_map(vaddr_t, paddr_t, paddr_t, vm_prot_t);
-void	pmap_cpu_init_early(struct cpu_info *);
-void	pmap_cpu_init_late(struct cpu_info *);
 bool	sse2_idlezero_page(void *);
 
 
@@ -419,16 +437,28 @@
 #define	POOL_VTOPHYS(va)	vtophys((vaddr_t) (va))
 
 /*
- * TLB shootdown mailbox.
+ * TLB shootdown structures.
  */
 
-struct pmap_mbox {
-	volatile void		*mb_pointer;
-	volatile uintptr_t	mb_addr1;
-	volatile uintptr_t	mb_addr2;
-	volatile uintptr_t	mb_head;
-	volatile uintptr_t	mb_tail;
-	volatile uintptr_t	mb_global;
+struct pmap_tlb_packet {
+#ifdef _LP64
+	uintptr_t	tp_va[14];	/* whole struct: 128 bytes */
+#else
+	uintptr_t	tp_va[13];	/* whole struct: 64 bytes */
+#endif
+	uint16_t	tp_count;
+	uint16_t	tp_pte;
+	uint32_t	tp_cpumask;
+	uint32_t	tp_usermask;
+};
+#define	TP_MAXVA	6		/* no more than N seperate invlpg */
+
+struct pmap_tlb_mailbox {
+	uintptr_t	tm_pending;
+	uintptr_t	tm_gen;
+	uintptr_t	tm_usergen;
+	uintptr_t	tm_globalgen;
+	char		tm_pad[64 - sizeof(uintptr_t) * 4];
 };
 
 #endif /* _KERNEL */

Index: src/sys/arch/x86/x86/cpu.c
diff -u src/sys/arch/x86/x86/cpu.c:1.69 src/sys/arch/x86/x86/cpu.c:1.69.2.1
--- src/sys/arch/x86/x86/cpu.c:1.69	Wed Feb 24 22:37:55 2010
+++ src/sys/arch/x86/x86/cpu.c	Mon Apr 26 02:43:35 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.c,v 1.69 2010/02/24 22:37:55 dyoung Exp $	*/
+/*	$NetBSD: cpu.c,v 1.69.2.1 2010/04/26 02:43:35 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2000, 2006, 2007, 2008 The NetBSD Foundation, Inc.
@@ -62,7 +62,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.69 2010/02/24 22:37:55 dyoung Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.69.2.1 2010/04/26 02:43:35 rmind Exp $");
 
 #include "opt_ddb.h"
 #include "opt_mpbios.h"		/* for MPDEBUG */
@@ -152,6 +152,7 @@
 	.ci_idepth = -1,
 	.ci_curlwp = &lwp0,
 	.ci_curldt = -1,
+	.ci_cpumask = 1,
 #ifdef TRAPLOG
 	.ci_tlog_base = &tlog_primary,
 #endif /* !TRAPLOG */
@@ -168,7 +169,7 @@
 static void	cpu_init_idle_lwp(struct cpu_info *);
 
 uint32_t cpus_attached = 0;
-uint32_t cpus_running = 0;
+uint32_t cpus_running = 1;
 
 extern char x86_64_doubleflt_stack[];
 
@@ -342,6 +343,7 @@
 		cpu_init_tss(ci);
 	} else {
 		KASSERT(ci->ci_data.cpu_idlelwp != NULL);
+		cpus_running = (1 << cpu_index(ci));
 	}
 
 	ci->ci_cpumask = (1 << cpu_index(ci));
@@ -360,7 +362,6 @@
 		cpu_get_tsc_freq(ci);
 		cpu_init(ci);
 		cpu_set_tss_gates(ci);
-		pmap_cpu_init_late(ci);
 		if (caa->cpu_role != CPU_ROLE_SP) {
 			/* Enable lapic. */
 			lapic_enable();
@@ -396,8 +397,6 @@
 		cpu_intr_init(ci);
 		gdt_alloc_cpu(ci);
 		cpu_set_tss_gates(ci);
-		pmap_cpu_init_early(ci);
-		pmap_cpu_init_late(ci);
 		cpu_start_secondary(ci);
 		if (ci->ci_flags & CPUF_PRESENT) {
 			struct cpu_info *tmp;
@@ -708,6 +707,7 @@
 	/* Because the text may have been patched in x86_patch(). */
 	wbinvd();
 	x86_flush();
+	tlbflushg();
 
 	KASSERT((ci->ci_flags & CPUF_RUNNING) == 0);
 

Index: src/sys/arch/x86/x86/lapic.c
diff -u src/sys/arch/x86/x86/lapic.c:1.44 src/sys/arch/x86/x86/lapic.c:1.44.4.1
--- src/sys/arch/x86/x86/lapic.c:1.44	Sat Nov 21 03:11:01 2009
+++ src/sys/arch/x86/x86/lapic.c	Mon Apr 26 02:43:35 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: lapic.c,v 1.44 2009/11/21 03:11:01 rmind Exp $	*/
+/*	$NetBSD: lapic.c,v 1.44.4.1 2010/04/26 02:43:35 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2000, 2008 The NetBSD Foundation, Inc.
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: lapic.c,v 1.44 2009/11/21 03:11:01 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lapic.c,v 1.44.4.1 2010/04/26 02:43:35 rmind Exp $");
 
 #include "opt_ddb.h"
 #include "opt_mpbios.h"		/* for MPDEBUG */
@@ -226,10 +226,8 @@
 #ifdef MULTIPROCESSOR
 	idt_vec_reserve(LAPIC_IPI_VECTOR);
 	idt_vec_set(LAPIC_IPI_VECTOR, Xintr_lapic_ipi);
-	idt_vec_reserve(LAPIC_TLB_MCAST_VECTOR);
-	idt_vec_set(LAPIC_TLB_MCAST_VECTOR, Xintr_lapic_tlb_mcast);
-	idt_vec_reserve(LAPIC_TLB_BCAST_VECTOR);
-	idt_vec_set(LAPIC_TLB_BCAST_VECTOR, Xintr_lapic_tlb_bcast);
+	idt_vec_reserve(LAPIC_TLB_VECTOR);
+	idt_vec_set(LAPIC_TLB_VECTOR, Xintr_lapic_tlb);
 #endif
 	idt_vec_reserve(LAPIC_SPURIOUS_VECTOR);
 	idt_vec_set(LAPIC_SPURIOUS_VECTOR, Xintrspurious);

Index: src/sys/arch/x86/x86/pmap.c
diff -u src/sys/arch/x86/x86/pmap.c:1.105.2.3 src/sys/arch/x86/x86/pmap.c:1.105.2.4
--- src/sys/arch/x86/x86/pmap.c:1.105.2.3	Sun Apr 25 21:08:43 2010
+++ src/sys/arch/x86/x86/pmap.c	Mon Apr 26 02:43:35 2010
@@ -1,4 +1,33 @@
-/*	$NetBSD: pmap.c,v 1.105.2.3 2010/04/25 21:08:43 rmind Exp $	*/
+/*	$NetBSD: pmap.c,v 1.105.2.4 2010/04/26 02:43:35 rmind Exp $	*/
+
+/*-
+ * Copyright (c) 2008, 2010 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Andrew Doran.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
 
 /*
  * Copyright (c) 2007 Manuel Bouyer.
@@ -42,7 +71,6 @@
  */
 
 /*
- *
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
  * All rights reserved.
  *
@@ -149,7 +177,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.105.2.3 2010/04/25 21:08:43 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.105.2.4 2010/04/26 02:43:35 rmind Exp $");
 
 #include "opt_user_ldt.h"
 #include "opt_lockdebug.h"
@@ -298,21 +326,10 @@
  *
  * tlb shootdowns are hard interrupts that operate outside the spl
  * framework: they don't need to be blocked provided that the pmap module
- * gets the order of events correct.  the calls are made by talking directly
- * to the lapic.  the stubs to handle the interrupts are quite short and do
- * one of the following: invalidate a single page, a range of pages, all
- * user tlb entries or the entire tlb.
- * 
- * the cpus synchronize with each other using pmap_mbox structures which are
- * aligned on 64-byte cache lines.  tlb shootdowns against the kernel pmap
- * use a global mailbox and are generated using a broadcast ipi (broadcast
- * to all but the sending cpu).  shootdowns against regular pmaps use
- * per-cpu mailboxes and are multicast.  kernel and user shootdowns can
- * execute simultaneously, as can shootdowns within different multithreaded
- * processes.  TODO:
- * 
- *   1. figure out which waitpoints can be deferered to pmap_update().
- *   2. see if there is a cheap way to batch some updates.
+ * gets the order of events correct.  the calls are made by poking the
+ * lapic directly.  the stub to handle the interrupts is short and does
+ * one of the following: invalidate a set of pages, all user tlb entries
+ * or the entire tlb.
  */
 
 const vaddr_t ptp_masks[] = PTP_MASK_INITIALIZER;
@@ -355,24 +372,39 @@
 struct evcnt pmap_ldt_evcnt;
 
 /*
- * Global TLB shootdown mailbox.
+ * TLB shootdown state.
  */
 struct evcnt pmap_tlb_evcnt __aligned(64);
-struct pmap_mbox pmap_mbox __aligned(64);
+struct pmap_tlb_packet pmap_tlb_packet __aligned(64);
+struct pmap_tlb_mailbox pmap_tlb_mailbox __aligned(64);
 
 /*
- * Per-CPU data.  The pmap mailbox is cache intensive so gets its
- * own line.  Note that the mailbox must be the first item.
+ * TLB shootdown statistics.
  */
-struct pmap_cpu {
-	/* TLB shootdown */
-	struct pmap_mbox pc_mbox;
-};
 
-union {
-	struct pmap_cpu pc;
-	uint8_t padding[64];
-} pmap_cpu[MAXCPUS] __aligned(64);
+#ifdef TLBSTATS
+static struct evcnt tlbstat_local[TLBSHOOT__MAX];
+static struct evcnt tlbstat_remote[TLBSHOOT__MAX];
+static struct evcnt tlbstat_kernel[TLBSHOOT__MAX];
+static struct evcnt tlbstat_single_req;
+static struct evcnt tlbstat_single_issue;
+static const char *tlbstat_name[] = {
+	"APTE",
+	"KENTER",
+	"KREMOVE",
+	"FREE_PTP1",
+	"FREE_PTP2",
+	"REMOVE_PTE",
+	"REMOVE_PTES",
+	"SYNC_PV1",
+	"SYNC_PV2",
+	"WRITE_PROTECT",
+	"ENTER",
+	"UPDATE",
+	"BUS_DMA",
+	"BUS_SPACE",
+};
+#endif
 
 /*
  * global data structures
@@ -579,12 +611,14 @@
 static bool		 pmap_remove_pte(struct pmap *, struct vm_page *,
 					 pt_entry_t *, vaddr_t,
 					 struct pv_entry **);
-static pt_entry_t	 pmap_remove_ptes(struct pmap *, struct vm_page *,
+static void		 pmap_remove_ptes(struct pmap *, struct vm_page *,
 					  vaddr_t, vaddr_t, vaddr_t,
 					  struct pv_entry **);
 
 static void		 pmap_unmap_ptes(struct pmap *, struct pmap *);
+#ifdef XEN
 static void		 pmap_unmap_apdp(void);
+#endif
 static bool		 pmap_get_physpage(vaddr_t, int, paddr_t *);
 static int		 pmap_pdes_invalid(vaddr_t, pd_entry_t * const *,
 					   pd_entry_t *);
@@ -721,6 +755,7 @@
 	    (kernel && (pmap->pm_kernel_cpus & ci->ci_cpumask) != 0));
 }
 
+#ifdef XEN
 static void
 pmap_apte_flush(struct pmap *pmap)
 {
@@ -734,8 +769,8 @@
 	 *
 	 * XXXthorpej -- find a way to defer the IPI.
 	 */
-	pmap_tlb_shootdown(pmap, (vaddr_t)-1LL, 0, 0);
-	pmap_tlb_shootwait();
+	pmap_tlb_shootdown(pmap, (vaddr_t)-1LL, 0, TLBSHOOT_APTE);
+	pmap_tlb_shootnow();
 }
 
 /*
@@ -747,13 +782,15 @@
 
 	for (i = 0; i < PDP_SIZE; i++) {
 		pmap_pte_set(APDP_PDE+i, 0);
-#if defined (XEN) && defined (PAE)
+#if defined (PAE)
 		/* clear shadow entries too */
 		pmap_pte_set(APDP_PDE_SHADOW+i, 0);
 #endif
 	}
 }
 
+#endif /* XEN */
+
 /*
  *	Add a reference to the specified pmap.
  */
@@ -776,15 +813,14 @@
 pmap_map_ptes(struct pmap *pmap, struct pmap **pmap2,
     pd_entry_t **ptepp, pd_entry_t * const **pdeppp)
 {
+#ifdef XEN
 	pd_entry_t opde, npde;
 	struct pmap *ourpmap;
 	struct cpu_info *ci;
 	struct lwp *l;
 	bool iscurrent;
 	uint64_t ncsw;
-#ifdef XEN
 	int s;
-#endif
 
 	/* the kernel's pmap is always accessible */
 	if (pmap == pmap_kernel()) {
@@ -800,14 +836,14 @@
 	ncsw = l->l_ncsw;
  	ourpmap = NULL;
 	ci = curcpu();
-#if defined(XEN) && defined(__x86_64__)
+#if defined(__x86_64__)
 	/*
 	 * curmap can only be pmap_kernel so at this point
 	 * pmap_is_curpmap is always false
 	 */
 	iscurrent = 0;
 	ourpmap = pmap_kernel();
-#else /* XEN && __x86_64__*/
+#else /* __x86_64__*/
 	if (ci->ci_want_pmapload &&
 	    vm_map_pmap(&l->l_proc->p_vmspace->vm_map) == pmap) {
 		pmap_load();
@@ -824,7 +860,7 @@
 		goto out;
 	}
 	ourpmap = ci->ci_pmap;
-#endif /* XEN && __x86_64__ */
+#endif /* __x86_64__ */
 
 	/* need to lock both curpmap and pmap: use ordered locking */
 	pmap_reference(ourpmap);
@@ -844,7 +880,6 @@
 	opde = *APDP_PDE;
 	if (!pmap_valid_entry(opde) ||
 	    pmap_pte2pa(opde) != pmap_pdirpa(pmap, 0)) {
-#ifdef XEN
 		int i;
 		s = splvm();
 		/* Make recursive entry usable in user PGD */
@@ -867,23 +902,12 @@
 		if (pmap_valid_entry(opde))
 			pmap_apte_flush(ourpmap);
 		splx(s);
-#else /* XEN */
-		int i;
-		for (i = 0; i < PDP_SIZE; i++) {
-			npde = pmap_pa2pte(
-			    pmap_pdirpa(pmap, i * NPDPG)) | PG_RW | PG_V;
-			pmap_pte_set(APDP_PDE+i, npde);
-		}
-		pmap_pte_flush();
-		if (pmap_valid_entry(opde))
-			pmap_apte_flush(ourpmap);
-#endif /* XEN */
 	}
 	*pmap2 = ourpmap;
 	*ptepp = APTE_BASE;
 	*pdeppp = alternate_pdes;
 	KASSERT(l->l_ncsw == ncsw);
-#if !defined(XEN) || !defined(__x86_64__)
+#if !defined(__x86_64__)
  out:
 #endif
  	/*
@@ -898,8 +922,62 @@
 		mutex_exit(pmap->pm_lock);
 		goto retry;
 	}
+#else /* XEN */
+	struct pmap *curpmap;
+	struct cpu_info *ci;
+	uint32_t cpumask;
+	lwp_t *l;
 
-	return;
+	/* The kernel's pmap is always accessible. */
+	if (pmap == pmap_kernel()) {
+		*pmap2 = NULL;
+		*ptepp = PTE_BASE;
+		*pdeppp = normal_pdes;
+		return;
+	}
+	KASSERT(kpreempt_disabled());
+
+	l = curlwp;
+ retry:
+	ci = curcpu();
+	mutex_enter(pmap->pm_lock);
+	curpmap = ci->ci_pmap;
+	if (vm_map_pmap(&l->l_proc->p_vmspace->vm_map) == pmap) {
+		/* Our own pmap so just load it: easy. */
+		if (ci->ci_want_pmapload) {
+			mutex_exit(pmap->pm_lock);
+			pmap_load();
+			goto retry;
+		}
+		KASSERT(pmap == curpmap);
+	} else if (pmap == curpmap) {
+		/*
+		 * Already on the CPU: make it valid.  This is very
+		 * often the case during exit(), when we have switched
+		 * to the kernel pmap in order to destroy a user pmap.
+		 */
+		if (!pmap_reactivate(pmap)) {
+			tlbflush();
+		}
+	} else {
+		/*
+		 * Toss current pmap from CPU, but keep ref to it.
+		 * Can happen if we block during exit().
+		 */
+		cpumask = ci->ci_cpumask;
+		atomic_and_32(&curpmap->pm_cpus, ~cpumask);
+		atomic_and_32(&curpmap->pm_kernel_cpus, ~cpumask);
+		ci->ci_pmap = pmap;
+		ci->ci_tlbstate = TLBSTATE_VALID;
+		atomic_or_32(&pmap->pm_cpus, cpumask);
+		atomic_or_32(&pmap->pm_kernel_cpus, cpumask);
+		lcr3(pmap->pm_pdirpa);
+	}
+	pmap->pm_ncsw = l->l_ncsw;
+	*pmap2 = curpmap;
+	*ptepp = PTE_BASE;
+	*pdeppp = normal_pdes;
+#endif /* XEN */
 }
 
 /*
@@ -909,6 +987,7 @@
 static void
 pmap_unmap_ptes(struct pmap *pmap, struct pmap *pmap2)
 {
+#ifdef XEN 
 
 	if (pmap == pmap_kernel()) {
 		return;
@@ -917,7 +996,7 @@
 	if (pmap2 == NULL) {
 		mutex_exit(pmap->pm_lock);
 	} else {
-#if defined(XEN) && defined(__x86_64__)
+#if defined(__x86_64__)
 		KASSERT(pmap2 == pmap_kernel());
 #else
 		KASSERT(curcpu()->ci_pmap == pmap2);
@@ -926,12 +1005,53 @@
 		pmap_unmap_apdp();
 		pmap_pte_flush();
 		pmap_apte_flush(pmap2);
-#endif
+#endif /* MULTIPROCESSOR */
 		COUNT(apdp_pde_unmap);
 		mutex_exit(pmap->pm_lock);
 		mutex_exit(pmap2->pm_lock);
 		pmap_destroy(pmap2);
 	}
+#else /* XEN */
+	struct cpu_info *ci;
+	struct pmap *mypmap;
+
+	KASSERT(kpreempt_disabled());
+
+	/* The kernel's pmap is always accessible. */
+	if (pmap == pmap_kernel()) {
+		return;
+	}
+
+	/*
+	 * We cannot tolerate context switches while mapped in.
+	 * if it's our own pmap all we have to do is unlock.
+	 */
+	KASSERT(pmap->pm_ncsw == curlwp->l_ncsw);
+	mypmap = vm_map_pmap(&curproc->p_vmspace->vm_map);
+	if (pmap == mypmap) {
+		mutex_exit(pmap->pm_lock);
+		return;
+	}
+
+	/*
+	 * Mark whatever's on the cpu now as lazy and unlock.
+	 * If the pmap was already installed, we are done.
+	 */
+	ci = curcpu();
+	ci->ci_tlbstate = TLBSTATE_LAZY;
+	ci->ci_want_pmapload = (mypmap != pmap_kernel());
+	mutex_exit(pmap->pm_lock);
+	if (pmap == pmap2) {
+		return;
+	}
+
+	/*
+	 * We installed another pmap on the CPU.  Grab a reference to
+	 * it and leave in place.  Toss the evicted pmap (can block).
+	 */
+	pmap_reference(pmap);
+	pmap_destroy(pmap2);
+#endif /* XEN */
 }
 
 inline static void
@@ -1047,9 +1167,12 @@
 		panic("pmap_kenter_pa: PG_PS");
 #endif
 	if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) {
-		/* This should not happen, so no need to batch updates. */
+#if defined(DIAGNOSTIC)
+		printf("pmap_kenter_pa: mapping already present\n");
+#endif
+		/* This should not happen. */
 		kpreempt_disable();
-		pmap_tlb_shootdown(pmap_kernel(), va, 0, opte);
+		pmap_tlb_shootdown(pmap_kernel(), va, opte, TLBSHOOT_KENTER);
 		kpreempt_enable();
 	}
 }
@@ -1147,7 +1270,7 @@
 	if (pmap_valid_entry(opte)) {
 #if defined(MULTIPROCESSOR)
 		kpreempt_disable();
-		pmap_tlb_shootdown(pmap_kernel(), va, 0, opte);
+		pmap_tlb_shootdown(pmap_kernel(), va, opte, TLBSHOOT_KENTER);
 		kpreempt_enable();
 #else
 		/* Don't bother deferring in the single CPU case. */
@@ -1206,32 +1329,26 @@
 void
 pmap_kremove(vaddr_t sva, vsize_t len)
 {
-	pt_entry_t *pte, xpte;
+	pt_entry_t *pte, opte;
 	vaddr_t va, eva;
 
 	eva = sva + len;
-	xpte = 0;
 
+	kpreempt_disable();
 	for (va = sva; va < eva; va += PAGE_SIZE) {
 		if (va < VM_MIN_KERNEL_ADDRESS)
 			pte = vtopte(va);
 		else
 			pte = kvtopte(va);
-		xpte |= pmap_pte_testset(pte, 0); /* zap! */
-#if defined(DIAGNOSTIC)
-		/* XXX For now... */
-		if (xpte & PG_PS)
-			panic("pmap_kremove: PG_PS");
-		if (xpte & PG_PVLIST)
-			panic("pmap_kremove: PG_PVLIST mapping for 0x%lx",
-			      va);
-#endif
-	}
-	if ((xpte & (PG_V | PG_U)) == (PG_V | PG_U)) {
-		kpreempt_disable();
-		pmap_tlb_shootdown(pmap_kernel(), sva, eva, xpte);
-		kpreempt_enable();
+		opte = pmap_pte_testset(pte, 0); /* zap! */
+		if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) {
+			pmap_tlb_shootdown(pmap_kernel(), va, opte,
+			    TLBSHOOT_KREMOVE);
+		}
+		KASSERT((opte & PG_PS) == 0);
+		KASSERT((opte & PG_PVLIST) == 0);
 	}
+	kpreempt_enable();
 }
 
 /*
@@ -1373,7 +1490,7 @@
 		 * "Intel Architecture Software Developer's Manual,
 		 * Volume 3: System Programming".
 		 */
-		tlbflush();
+		tlbflushg();
 
 		/*
 		 * now, remap the kernel text using large pages.  we
@@ -1386,7 +1503,7 @@
 			pde = &L2_BASE[pl2_i(kva)];
 			*pde = pa | pmap_pg_g | PG_PS |
 			    PG_KR | PG_V;	/* zap! */
-			tlbflush();
+			tlbflushg();
 		}
 #if defined(DEBUG)
 		aprint_normal("kernel text is mapped with %" PRIuPSIZE " large "
@@ -1532,7 +1649,6 @@
 
 	mutex_init(&pmaps_lock, MUTEX_DEFAULT, IPL_NONE);
 	LIST_INIT(&pmaps);
-	pmap_cpu_init_early(curcpu());
 
 	/*
 	 * initialize caches.
@@ -1556,7 +1672,7 @@
 	 * ensure the TLB is sync'd with reality by flushing it...
 	 */
 
-	tlbflush();
+	tlbflushg();
 
 	/*
 	 * calculate pmap_maxkvaddr from nkptp[].
@@ -1645,6 +1761,32 @@
 		mutex_init(&pv_hash_locks[i].lock, MUTEX_NODEBUG, IPL_VM);
 	}
 
+#ifdef TLBSTATS
+	for (i = 0; i < TLBSHOOT__MAX; i++) {
+		evcnt_attach_dynamic(&tlbstat_local[i], EVCNT_TYPE_MISC,
+		    NULL, "tlbshoot local", tlbstat_name[i]);
+	}
+	for (i = 0; i < TLBSHOOT__MAX; i++) {
+		evcnt_attach_dynamic(&tlbstat_remote[i], EVCNT_TYPE_MISC,
+		    NULL, "tlbshoot remote", tlbstat_name[i]);
+	}
+	for (i = 0; i < TLBSHOOT__MAX; i++) {
+		evcnt_attach_dynamic(&tlbstat_kernel[i], EVCNT_TYPE_MISC,
+		    NULL, "tlbshoot kernel", tlbstat_name[i]);
+	}
+	evcnt_attach_dynamic(&tlbstat_single_req, EVCNT_TYPE_MISC,
+	    NULL, "tlbshoot single page", "requests");
+	evcnt_attach_dynamic(&tlbstat_single_issue, EVCNT_TYPE_MISC,
+	    NULL, "tlbshoot single page", "issues");
+#endif
+
+	evcnt_attach_dynamic(&pmap_tlb_evcnt, EVCNT_TYPE_INTR,
+	    NULL, "TLB", "shootdown");
+	evcnt_attach_dynamic(&pmap_iobmp_evcnt, EVCNT_TYPE_MISC,
+	    NULL, "x86", "io bitmap copy");
+	evcnt_attach_dynamic(&pmap_ldt_evcnt, EVCNT_TYPE_MISC,
+	    NULL, "x86", "ldt sync");
+
 	/*
 	 * done: pmap module is up (and ready for business)
 	 */
@@ -1653,41 +1795,6 @@
 }
 
 /*
- * pmap_cpu_init_early: perform early per-CPU initialization.
- */
-
-void
-pmap_cpu_init_early(struct cpu_info *ci)
-{
-	struct pmap_cpu *pc;
-	static uint8_t pmap_cpu_alloc;
-
-	pc = &pmap_cpu[pmap_cpu_alloc++].pc;
-	ci->ci_pmap_cpu = pc;
-}
-
-/*
- * pmap_cpu_init_late: perform late per-CPU initialization.
- */
-
-void
-pmap_cpu_init_late(struct cpu_info *ci)
-{
-
-	if (ci == &cpu_info_primary) {
-		evcnt_attach_dynamic(&pmap_tlb_evcnt, EVCNT_TYPE_INTR,
-		    NULL, "global", "TLB IPI");
-		evcnt_attach_dynamic(&pmap_iobmp_evcnt, EVCNT_TYPE_MISC,
-		    NULL, "x86", "io bitmap copy");
-		evcnt_attach_dynamic(&pmap_ldt_evcnt, EVCNT_TYPE_MISC,
-		    NULL, "x86", "ldt sync");
-	}
-
-	evcnt_attach_dynamic(&ci->ci_tlb_evcnt, EVCNT_TYPE_MISC,
-	    NULL, device_xname(ci->ci_dev), "TLB IPI");
-}
-
-/*
  * p v _ e n t r y   f u n c t i o n s
  */
 
@@ -1876,11 +1983,13 @@
 	unsigned long index;
 	int level;
 	vaddr_t invaladdr;
+	pd_entry_t opde;
+#ifdef XEN
+	struct pmap *curpmap = vm_map_pmap(&curlwp->l_proc->p_vmspace->vm_map);
 #ifdef MULTIPROCESSOR
 	vaddr_t invaladdr2;
 #endif
-	pd_entry_t opde;
-	struct pmap *curpmap = vm_map_pmap(&curlwp->l_proc->p_vmspace->vm_map);
+#endif
 
 	KASSERT(pmap != pmap_kernel());
 	KASSERT(mutex_owned(pmap->pm_lock));
@@ -1890,7 +1999,8 @@
 	do {
 		index = pl_i(va, level + 1);
 		opde = pmap_pte_testset(&pdes[level - 1][index], 0);
-#if defined(XEN) && defined(__x86_64__)
+#if defined(XEN)
+#  if defined(__x86_64__)
 		/*
 		 * If ptp is a L3 currently mapped in kernel space,
 		 * clear it before freeing
@@ -1898,20 +2008,26 @@
 		if (pmap->pm_pdirpa == xen_current_user_pgd
 		    && level == PTP_LEVELS - 1)
 			pmap_pte_set(&pmap_kernel()->pm_pdir[index], 0);
-#endif /* XEN && __x86_64__ */
-		pmap_freepage(pmap, ptp, level);
+#  endif /*__x86_64__ */
 		invaladdr = level == 1 ? (vaddr_t)ptes :
 		    (vaddr_t)pdes[level - 2];
 		pmap_tlb_shootdown(curpmap, invaladdr + index * PAGE_SIZE,
-		    0, opde);
-#if defined(MULTIPROCESSOR)
+		    opde, TLBSHOOT_FREE_PTP1);
+#  if defined(MULTIPROCESSOR)
 		invaladdr2 = level == 1 ? (vaddr_t)PTE_BASE :
 		    (vaddr_t)normal_pdes[level - 2];
 		if (pmap != curpmap || invaladdr != invaladdr2) {
 			pmap_tlb_shootdown(pmap, invaladdr2 + index * PAGE_SIZE,
-			    0, opde);
+			    opde, TLBSHOOT_FREE_PTP2);
 		}
-#endif
+#  endif /* MULTIPROCESSOR */
+#else	/* XEN */
+		invaladdr = level == 1 ? (vaddr_t)ptes :
+		    (vaddr_t)pdes[level - 2];
+		pmap_tlb_shootdown(pmap, invaladdr + index * PAGE_SIZE,
+		    opde, TLBSHOOT_FREE_PTP1);
+#endif	/* XEN */
+		pmap_freepage(pmap, ptp, level);
 		if (level < PTP_LEVELS - 1) {
 			ptp = pmap_find_ptp(pmap, va, (paddr_t)-1, level + 1);
 			ptp->wire_count--;
@@ -2220,6 +2336,7 @@
 	pmap->pm_flags = 0;
 	pmap->pm_cpus = 0;
 	pmap->pm_kernel_cpus = 0;
+	pmap->pm_gc_ptp = NULL;
 
 	/* init the LDT */
 	pmap->pm_ldt = NULL;
@@ -2254,6 +2371,24 @@
 }
 
 /*
+ * pmap_free_ptps: put a list of ptps back to the freelist.
+ */
+
+static void
+pmap_free_ptps(struct vm_page *empty_ptps)
+{
+	struct vm_page *ptp;
+	struct pmap_page *pp;
+
+	while ((ptp = empty_ptps) != NULL) {
+		pp = VM_PAGE_TO_PP(ptp);
+		empty_ptps = pp->pp_link;
+		LIST_INIT(&pp->pp_head.pvh_list);
+		uvm_pagefree(ptp);
+	}
+}
+
+/*
  * pmap_destroy: drop reference count on pmap.   free pmap if
  *	reference count goes to zero.
  */
@@ -2266,13 +2401,24 @@
 	struct cpu_info *ci;
 	CPU_INFO_ITERATOR cii;
 #endif /* DIAGNOSTIC */
+	lwp_t *l;
 
 	/*
-	 * if we have torn down this pmap, process deferred frees and
-	 * invalidations now.
+	 * If we have torn down this pmap, process deferred frees and
+	 * invalidations.  Free now if the system is low on memory.
+	 * Otherwise, free when the pmap is destroyed thus avoiding a
+	 * TLB shootdown.
 	 */
-	if (__predict_false(curlwp->l_md.md_gc_pmap == pmap)) {
-		pmap_update(pmap);
+	l = curlwp;
+	if (__predict_false(l->l_md.md_gc_pmap == pmap)) {
+		if (uvmexp.free < uvmexp.freetarg) {
+			pmap_update(pmap);
+		} else {
+			KASSERT(pmap->pm_gc_ptp == NULL);
+			pmap->pm_gc_ptp = l->l_md.md_gc_ptp;
+			l->l_md.md_gc_ptp = NULL;
+			l->l_md.md_gc_pmap = NULL;
+		}
 	}
 
 	/*
@@ -2315,6 +2461,13 @@
 	mutex_exit(&pmaps_lock);
 
 	/*
+	 * Process deferred PTP frees.  No TLB shootdown required, as the
+	 * PTP pages are no longer visible to any CPU.
+	 */
+
+	pmap_free_ptps(pmap->pm_gc_ptp);
+
+	/*
 	 * destroyed pmap shouldn't have remaining PTPs
 	 */
 
@@ -2816,7 +2969,7 @@
 	 * be coming off the CPU before it has a chance to call
 	 * pmap_update().
 	 */
-	pmap_tlb_shootwait();
+	pmap_tlb_shootnow();
 
 	ci = curcpu();
 
@@ -3230,13 +3383,13 @@
  * => returns composite pte if at least one page should be shot down
  */
 
-static pt_entry_t
+static void
 pmap_remove_ptes(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva,
 		 vaddr_t startva, vaddr_t endva, struct pv_entry **pv_tofree)
 {
 	struct pv_entry *pve;
 	pt_entry_t *pte = (pt_entry_t *) ptpva;
-	pt_entry_t opte, xpte = 0;
+	pt_entry_t opte;
 
 	KASSERT(pmap == pmap_kernel() || mutex_owned(pmap->pm_lock));
 	KASSERT(kpreempt_disabled());
@@ -3266,13 +3419,17 @@
 
 		pmap_exec_account(pmap, startva, opte, 0);
 		pmap_stats_update_bypte(pmap, 0, opte);
-		xpte |= opte;
 
 		if (ptp) {
 			ptp->wire_count--;		/* dropping a PTE */
 			/* Make sure that the PDE is flushed */
 			if (ptp->wire_count <= 1)
-				xpte |= PG_U;
+				opte |= PG_U;
+		}
+
+		if ((opte & PG_U) != 0) {
+			pmap_tlb_shootdown(pmap, startva, opte,
+			    TLBSHOOT_REMOVE_PTES);
 		}
 
 		/*
@@ -3308,8 +3465,6 @@
 
 		/* end of "for" loop: time for next pte */
 	}
-
-	return xpte;
 }
 
 
@@ -3347,14 +3502,15 @@
 	pmap_exec_account(pmap, va, opte, 0);
 	pmap_stats_update_bypte(pmap, 0, opte);
 
-	if (opte & PG_U)
-		pmap_tlb_shootdown(pmap, va, 0, opte);
-
 	if (ptp) {
 		ptp->wire_count--;		/* dropping a PTE */
 		/* Make sure that the PDE is flushed */
-		if ((ptp->wire_count <= 1) && !(opte & PG_U))
-			pmap_tlb_shootdown(pmap, va, 0, opte);
+		if (ptp->wire_count <= 1)
+			opte |= PG_U;
+	}
+
+	if ((opte & PG_U) != 0) {
+		pmap_tlb_shootdown(pmap, va, opte, TLBSHOOT_REMOVE_PTE);
 	}
 
 	/*
@@ -3400,7 +3556,7 @@
 void
 pmap_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
 {
-	pt_entry_t *ptes, xpte = 0;
+	pt_entry_t *ptes;
 	pd_entry_t pde;
 	pd_entry_t * const *pdes;
 	struct pv_entry *pv_tofree = NULL;
@@ -3498,15 +3654,13 @@
 				      "detected");
 #endif
 		}
-		xpte |= pmap_remove_ptes(pmap, ptp,
-		    (vaddr_t)&ptes[pl1_i(va)], va, blkendva, &pv_tofree);
+		pmap_remove_ptes(pmap, ptp, (vaddr_t)&ptes[pl1_i(va)], va,
+		    blkendva, &pv_tofree);
 
 		/* if PTP is no longer being used, free it! */
 		if (ptp && ptp->wire_count <= 1) {
 			pmap_free_ptp(pmap, ptp, va, ptes, pdes);
 		}
-		if ((xpte & PG_U) != 0)
-			pmap_tlb_shootdown(pmap, sva, eva, xpte);
 	}
 	pmap_unmap_ptes(pmap, pmap2);		/* unlock pmap */
 	kpreempt_enable();
@@ -3565,8 +3719,9 @@
 
 			pmap_unmap_pte();
 			if (clearbits != 0) {
-				pmap_tlb_shootdown(pmap, va, 0,
-				    (pmap == pmap_kernel() ? PG_G : 0));
+				pmap_tlb_shootdown(pmap, va,
+				    (pmap == pmap_kernel() ? PG_G : 0),
+				    TLBSHOOT_SYNC_PV1);
 			}
 			return EAGAIN;
 		}
@@ -3605,7 +3760,7 @@
 	} while (pmap_pte_cas(ptep, opte, npte) != opte);
 
 	if (need_shootdown) {
-		pmap_tlb_shootdown(pmap, va, 0, opte);
+		pmap_tlb_shootdown(pmap, va, opte, TLBSHOOT_SYNC_PV2);
 	}
 	pmap_unmap_pte();
 
@@ -3680,7 +3835,7 @@
 
 			KASSERT(pmap != pmap_kernel());
 
-			pmap_tlb_shootwait();
+			pmap_tlb_shootnow();
 			pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
 			pmap_stats_update_bypte(pmap, 0, opte);
 			ptp->wire_count--;
@@ -3889,7 +4044,8 @@
 				vaddr_t tva;
 
 				tva = x86_ptob(spte - ptes);
-				pmap_tlb_shootdown(pmap, tva, 0, opte);
+				pmap_tlb_shootdown(pmap, tva, opte,
+				    TLBSHOOT_WRITE_PROTECT);
 			}
 next:;
 		}
@@ -4164,7 +4320,7 @@
 
 	if ((~opte & (PG_V | PG_U)) == 0 &&
 	    ((opte ^ npte) & (PG_FRAME | PG_RW)) != 0) {
-		pmap_tlb_shootdown(pmap, va, 0, opte);
+		pmap_tlb_shootdown(pmap, va, opte, TLBSHOOT_ENTER);
 	}
 
 	error = 0;
@@ -4461,259 +4617,253 @@
 }
 #endif
 
+static inline void
+pmap_tlbstat_count(struct pmap *pm, vaddr_t va, tlbwhy_t why)
+{
+#ifdef TLBSTATS
+	uint32_t mask;
+
+	if (va != (vaddr_t)-1LL) {
+		atomic_inc_64(&tlbstat_single_req.ev_count);
+	}
+	if (pm == pmap_kernel()) {
+		atomic_inc_64(&tlbstat_kernel[why].ev_count);
+		return;
+	
+	}
+	if (va >= VM_MAXUSER_ADDRESS) {
+		mask = pm->pm_cpus | pm->pm_kernel_cpus;
+	} else {
+		mask = pm->pm_cpus;
+	}
+	if ((mask & curcpu()->ci_cpumask) != 0) {
+		atomic_inc_64(&tlbstat_local[why].ev_count);
+	}
+	if ((mask & ~curcpu()->ci_cpumask) != 0) {
+		atomic_inc_64(&tlbstat_remote[why].ev_count);
+	}
+#endif
+}
+
 /*
- * pmap_tlb_shootdown: invalidate pages on all CPUs using pmap 'pm'
- *
- * => always invalidates locally before returning
- * => returns before remote CPUs have invalidated
- * => must be called with preemption disabled
+ * pmap_tlb_shootdown: invalidate a page on all CPUs using pmap 'pm'
  */
 
-void
-pmap_tlb_shootdown(struct pmap *pm, vaddr_t sva, vaddr_t eva, pt_entry_t pte)
+__noinline void
+pmap_tlb_shootdown(struct pmap *pm, vaddr_t va, pt_entry_t pte, tlbwhy_t why)
 {
-#ifdef MULTIPROCESSOR
-	extern bool x86_mp_online;
-	struct cpu_info *ci;
-	struct pmap_mbox *mb, *selfmb;
-	CPU_INFO_ITERATOR cii;
-	uintptr_t head;
-	u_int count;
+	struct pmap_tlb_packet *tp;
 	int s;
-#endif	/* MULTIPROCESSOR */
-	struct cpu_info *self;
-	bool kernel;
 
-	KASSERT(eva == 0 || eva >= sva);
-	KASSERT(kpreempt_disabled());
-
-	if (pte & PG_PS)
-		sva &= PG_LGFRAME;
-	pte &= PG_G;
-	self = curcpu();
-
-	if (sva == (vaddr_t)-1LL) {
-		kernel = true;
-	} else {
-		if (eva == 0)
-			eva = sva + PAGE_SIZE;
-		kernel = sva >= VM_MAXUSER_ADDRESS;
-		KASSERT(kernel == (eva > VM_MAXUSER_ADDRESS));
-	}
+	KASSERT((pte & PG_G) == 0 || pm == pmap_kernel());
 
 	/*
-	 * if tearing down the pmap, do nothing.  we'll flush later
-	 * when we're ready to recycle/destroy it.
+	 * If tearing down the pmap, do nothing.  We will flush later
+	 * when we are ready to recycle/destroy it.
 	 */
 	if (__predict_false(curlwp->l_md.md_gc_pmap == pm)) {
 		return;
 	}
 
+	if ((pte & PG_PS) != 0) {
+		va &= PG_LGFRAME;
+	}
+
 	/*
-	 * If the range is larger than 32 pages, then invalidate
-	 * everything.
-	 */
-	if (sva != (vaddr_t)-1LL && eva - sva > (32 * PAGE_SIZE)) {
-		sva = (vaddr_t)-1LL;
-		eva = sva;
+	 * Add the shootdown operation to our pending set.
+	 */ 
+	s = splvm();
+	tp = (struct pmap_tlb_packet *)curcpu()->ci_pmap_data;
+	tp->tp_pte |= (uint16_t)pte;
+	if (tp->tp_count < TP_MAXVA && va != (vaddr_t)-1LL) {
+		/* Flush a single page. */
+		tp->tp_va[tp->tp_count++] = va;
+	} else {
+		/* Flush everything. */
+		tp->tp_count = (uint16_t)-1;
 	}
 
-#ifdef MULTIPROCESSOR
-	if (ncpu > 1 && x86_mp_online) {
-		selfmb = &self->ci_pmap_cpu->pc_mbox;
+	if (pm == pmap_kernel()) {
+		tp->tp_cpumask = cpus_running;
+	} else if (va >= VM_MAXUSER_ADDRESS) {
+		tp->tp_cpumask |= (pm->pm_cpus | pm->pm_kernel_cpus);
+		tp->tp_usermask |= (pm->pm_cpus | pm->pm_kernel_cpus);;
+	} else {
+		tp->tp_cpumask |= pm->pm_cpus;
+		tp->tp_usermask |= pm->pm_cpus;
+	}
+	pmap_tlbstat_count(pm, va, why);
+	splx(s);
+}
 
+/*
+ * pmap_tlb_shootnow: process pending TLB shootdowns queued on current CPU.
+ *
+ * => Must be called with preemption disabled.
+ */
+
+__noinline void
+pmap_tlb_shootnow(void)
+{
+	struct pmap_tlb_packet *tp;
+	struct pmap_tlb_mailbox *tm;
+	struct cpu_info *ci, *lci;
+	CPU_INFO_ITERATOR cii;
+	uint32_t remote;
+	uintptr_t gen;
+	int s, err, i, count;
+
+	KASSERT(kpreempt_disabled());
+
+	s = splvm();
+	ci = curcpu();
+	tp = (struct pmap_tlb_packet *)ci->ci_pmap_data;
+	if (tp->tp_count == 0) {
+		splx(s);
+		return;
+	}
+	tm = &pmap_tlb_mailbox;
+	remote = tp->tp_cpumask & ~ci->ci_cpumask;
+	gen = 0;	/* XXXgcc */
+	if (remote != 0) {
 		/*
-		 * If the CPUs have no notion of global pages then
-		 * reload of %cr3 is sufficient.
+		 * Gain ownership of the shootdown mailbox.  We must stay
+		 * at splvm once we own it or could deadlock against an
+		 * interrupt on this cpu trying to do the same.
 		 */
-		if (pte != 0 && (cpu_feature & CPUID_PGE) == 0)
-			pte = 0;
-
-		if (pm == pmap_kernel()) {
-			/*
-			 * Mapped on all CPUs: use the broadcast mechanism.
-			 * Once we have the lock, increment the counter.
-			 */
-			s = splvm();
-			mb = &pmap_mbox;
+		while (atomic_cas_32(&tm->tm_pending, 0, remote) != 0) {
+			splx(s);
 			count = SPINLOCK_BACKOFF_MIN;
-			do {
-				if ((head = mb->mb_head) != mb->mb_tail) {
-					splx(s);
-					while ((head = mb->mb_head) !=
-					    mb->mb_tail)
-						SPINLOCK_BACKOFF(count);
-					s = splvm();
-				}
-			} while (atomic_cas_ulong(
-			    (volatile u_long *)&mb->mb_head,
-			    head, head + ncpu - 1) != head);
+			while (tm->tm_pending != 0) {
+				SPINLOCK_BACKOFF(count);
+			}
+			s = splvm();
+			/* An interrupt might have done it for us. */
+			if (tp->tp_count == 0) {
+				splx(s);
+				return;
+			}
+		}
 
-			/*
-			 * Once underway we must stay at IPL_VM until the
-			 * IPI is dispatched.  Otherwise interrupt handlers
-			 * on this CPU can deadlock against us.
-			 */
-			pmap_tlb_evcnt.ev_count++;
-			mb->mb_pointer = self;
-			mb->mb_addr1 = sva;
-			mb->mb_addr2 = eva;
-			mb->mb_global = pte;
-			x86_ipi(LAPIC_TLB_BCAST_VECTOR, LAPIC_DEST_ALLEXCL,
-			    LAPIC_DLMODE_FIXED);
-			self->ci_need_tlbwait = 1;
-			splx(s);
-		} else if ((pm->pm_cpus & ~self->ci_cpumask) != 0 ||
-		    (kernel && (pm->pm_kernel_cpus & ~self->ci_cpumask) != 0)) {
-			/*
-			 * We don't bother traversing the CPU list if only
-			 * used by this CPU.
-			 *
-			 * We can't do global flushes with the multicast
-			 * mechanism.
-			 */
-			KASSERT(pte == 0);
+		/*
+		 * Start a new generation of updates.  Copy our shootdown
+		 * requests into the global buffer.
+		 */
+		gen = ++tm->tm_gen;
+		memcpy(&pmap_tlb_packet, tp, sizeof(*tp));
+		pmap_tlb_evcnt.ev_count++;
 
-			/*
-			 * Take ownership of the shootdown mailbox on each
-			 * CPU, fill the details and fire it off.
-			 */
-			s = splvm();
-			for (CPU_INFO_FOREACH(cii, ci)) {
-				if (ci == self ||
-				    !pmap_is_active(pm, ci, kernel) ||
-				    !(ci->ci_flags & CPUF_RUNNING))
+		/*
+		 * Initiate shootdowns on remote CPUs.
+		 */
+		if (tp->tp_cpumask == cpus_running) {
+			err = x86_ipi(LAPIC_TLB_VECTOR, LAPIC_DEST_ALLEXCL,
+			    LAPIC_DLMODE_FIXED);
+		} else {
+			err = 0;
+			for (CPU_INFO_FOREACH(cii, lci)) {
+				if ((lci->ci_cpumask & remote) == 0) {
+					continue;
+				}
+				if ((lci->ci_flags & CPUF_RUNNING) == 0) {
+					remote &= ~lci->ci_cpumask;
+					atomic_and_32(&tm->tm_pending, remote);
 					continue;
-				selfmb->mb_head++;
-				mb = &ci->ci_pmap_cpu->pc_mbox;
-				count = SPINLOCK_BACKOFF_MIN;
-				while (atomic_cas_ulong(
-				    (u_long *)&mb->mb_pointer,
-				    0, (u_long)&selfmb->mb_tail) != 0) {
-				    	splx(s);
-					while (mb->mb_pointer != 0)
-						SPINLOCK_BACKOFF(count);
-					s = splvm();
 				}
-				mb->mb_addr1 = sva;
-				mb->mb_addr2 = eva;
-				mb->mb_global = pte;
-				if (x86_ipi(LAPIC_TLB_MCAST_VECTOR,
-				    ci->ci_cpuid, LAPIC_DLMODE_FIXED))
-					panic("pmap_tlb_shootdown: ipi failed");
+				err |= x86_ipi(LAPIC_TLB_VECTOR,
+				lci->ci_cpuid, LAPIC_DLMODE_FIXED);
 			}
-			self->ci_need_tlbwait = 1;
-			splx(s);
+		}
+		if (__predict_false(err != 0)) {
+			panic("pmap_tlb_shootdown: IPI failed");
 		}
 	}
-#endif	/* MULTIPROCESSOR */
 
-	/* Update the current CPU before waiting for others. */
-	if (!pmap_is_active(pm, self, kernel))
-		return;
-
-	if (sva == (vaddr_t)-1LL) {
-		u_int gen = uvm_emap_gen_return();
-		if (pte != 0) {
-			tlbflushg();
+	/*
+	 * Shootdowns on remote CPUs are now in flight.  In the meantime,
+	 * perform local shootdowns and do not forget to update emap gen.
+	 */
+	if ((tp->tp_cpumask & ci->ci_cpumask) != 0) {
+		if (tp->tp_count == (uint16_t)-1) {
+			u_int gen = uvm_emap_gen_return();
+			if ((tp->tp_pte & PG_G) != 0) {
+				tlbflushg();
+			} else {
+				tlbflush();
+			}
+			uvm_emap_update(gen);
 		} else {
-			tlbflush();
+			for (i = tp->tp_count - 1; i >= 0; i--) {
+				pmap_update_pg(tp->tp_va[i]);
+			}
 		}
-		uvm_emap_update(gen);
-	} else {
-		do {
-			pmap_update_pg(sva);
-			sva += PAGE_SIZE;
-		} while (sva < eva);
 	}
-}
-
-/*
- * pmap_tlb_shootwait: wait for pending TLB shootdowns to complete
- *
- * => only waits for operations generated by the current CPU
- * => must be called with preemption disabled
- */
-
-void
-pmap_tlb_shootwait(void)
-{
-	struct cpu_info *self;
-	struct pmap_mbox *mb;
-
-	KASSERT(kpreempt_disabled());
 
 	/*
-	 * Anything to do?  XXX Really we want to avoid touching the cache
-	 * lines of the two mailboxes, but the processor may read ahead.
+	 * Clear out our local buffer.
 	 */
-	self = curcpu();
-	if (!self->ci_need_tlbwait)
-		return;
-	self->ci_need_tlbwait = 0;
-
-	/* If we own the global mailbox, wait for it to drain. */
-	mb = &pmap_mbox;
-	while (mb->mb_pointer == self && mb->mb_head != mb->mb_tail)
-		x86_pause();
+#ifdef TLBSTATS
+	if (tp->tp_count != (uint16_t)-1) {
+		atomic_add_64(&tlbstat_single_issue.ev_count, tp->tp_count);
+	}
+#endif
+	tp->tp_count = 0;
+	tp->tp_pte = 0;
+	tp->tp_cpumask = 0;
+	tp->tp_usermask = 0;
+	splx(s);
 
-	/* If we own other CPU's mailboxes, wait for them to drain. */
-	mb = &self->ci_pmap_cpu->pc_mbox;
-	KASSERT(mb->mb_pointer != &mb->mb_tail);
-	while (mb->mb_head != mb->mb_tail)
-		x86_pause();
+	/*
+	 * Now wait for the current generation of updates to be
+	 * processed by remote CPUs.
+	 */
+	if (remote != 0 && tm->tm_pending != 0) {
+		count = SPINLOCK_BACKOFF_MIN;
+		while (tm->tm_pending != 0 && tm->tm_gen == gen) {
+			SPINLOCK_BACKOFF(count);
+		}
+	}
 }
 
 /*
- * pmap_update: process deferred invalidations
+ * pmap_update: process deferred invalidations and frees.
  */
 
 void
 pmap_update(struct pmap *pmap)
 {
-	struct vm_page *ptp, *empty_ptps;
-	struct pmap_page *pp;
-	lwp_t *l;
+	struct vm_page *empty_ptps;
+	lwp_t *l = curlwp;
 
 	/*
-	 * if we have torn down this pmap, invalidate non-global TLB
+	 * If we have torn down this pmap, invalidate non-global TLB
 	 * entries on any processors using it.
 	 */
-	l = curlwp;
+	KPREEMPT_DISABLE(l);
 	if (__predict_false(l->l_md.md_gc_pmap == pmap)) {
 		l->l_md.md_gc_pmap = NULL;
-		KPREEMPT_DISABLE(l);
-		pmap_tlb_shootdown(pmap, -1, -1, 0);
-		KPREEMPT_ENABLE(l);
+		pmap_tlb_shootdown(pmap, (vaddr_t)-1LL, 0, TLBSHOOT_UPDATE);
 	}
-
 	/*
-	 * wait for tlb shootdowns to complete before returning control
-	 * to the caller.
+	 * Initiate any pending TLB shootdowns.  Wait for them to
+	 * complete before returning control to the caller.
 	 */
-	kpreempt_disable();
-	pmap_tlb_shootwait();
-	kpreempt_enable();
+	if (((struct pmap_tlb_packet *)curcpu()->ci_pmap_data)->tp_count) {
+		pmap_tlb_shootnow();
+	}
+	KPREEMPT_ENABLE(l);
 
 	/*
-	 * now that shootdowns are complete, process deferred frees,
+	 * Now that shootdowns are complete, process deferred frees,
 	 * but not from interrupt context.
 	 */
 	if (l->l_md.md_gc_ptp != NULL) {
 		if (cpu_intr_p() || (l->l_pflag & LP_INTR) != 0) {
 			return;
 		}
-
 		empty_ptps = l->l_md.md_gc_ptp;
 		l->l_md.md_gc_ptp = NULL;
-
-		while ((ptp = empty_ptps) != NULL) {
-			ptp->flags |= PG_ZERO;
-			pp = VM_PAGE_TO_PP(ptp);
-			empty_ptps = pp->pp_link;
-			LIST_INIT(&pp->pp_head.pvh_list);
-			uvm_pagefree(ptp);
-		}
+		pmap_free_ptps(empty_ptps);
 	}
 }
 

Reply via email to