Module Name:    src
Committed By:   matt
Date:           Thu Feb 25 05:53:23 UTC 2010

Modified Files:
        src/sys/arch/mips/include [matt-nb5-mips64]: pmap.h
        src/sys/arch/mips/mips [matt-nb5-mips64]: pmap.c pmap_tlb.c

Log Message:
Make the UP and MP ASID allocation algorithm common.  Significantly improve
the algorithm.  Now when we exhaust the ASIDs, interrogate the TLB for active
ASIDS and release all the other for future allocations.  This leaves the
TLB entries with ASIDs valid avoiding the need to re-incur TLB misses for
them.


To generate a diff of this commit:
cvs rdiff -u -r1.54.26.9 -r1.54.26.10 src/sys/arch/mips/include/pmap.h
cvs rdiff -u -r1.179.16.17 -r1.179.16.18 src/sys/arch/mips/mips/pmap.c
cvs rdiff -u -r1.1.2.3 -r1.1.2.4 src/sys/arch/mips/mips/pmap_tlb.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/mips/include/pmap.h
diff -u src/sys/arch/mips/include/pmap.h:1.54.26.9 src/sys/arch/mips/include/pmap.h:1.54.26.10
--- src/sys/arch/mips/include/pmap.h:1.54.26.9	Tue Feb 23 20:33:47 2010
+++ src/sys/arch/mips/include/pmap.h	Thu Feb 25 05:53:23 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.h,v 1.54.26.9 2010/02/23 20:33:47 matt Exp $	*/
+/*	$NetBSD: pmap.h,v 1.54.26.10 2010/02/25 05:53:23 matt Exp $	*/
 
 /*
  * Copyright (c) 1992, 1993
@@ -143,27 +143,14 @@
  * Per TLB (normally same as CPU) asid info
  */
 struct pmap_asid_info {
-#ifdef MULTIPROCESSOR
 	LIST_ENTRY(pmap_asid_info) pai_link;
-#else
-	uint32_t	pai_asid_generation; /* its generation number */
-#endif
 	uint32_t	pai_asid;	/* TLB address space tag */
 };
 
-#ifdef MULTIPROCESSOR
-#define	TLBINFO_LOCK(ti)	mutex_spin_enter((ti)->ti_lock)
-#define	TLBINFO_UNLOCK(ti)	mutex_spin_exit((ti)->ti_lock)
-#define	PMAP_PAI_ASIDVALID_P(pai, ti)	\
-	((pai)->pai_asid != 0)
-#else
-#define	TLBINFO_LOCK(ti)	do { } while (/*CONSTCOND*/0)
-#define	TLBINFO_UNLOCK(ti)	do { } while (/*CONSTCOND*/0)
-#define	PMAP_PAI_ASIDVALID_P(pai, ti)	\
-	((pai)->pai_asid != 0 \
-	 && (pai)->pai_asid_generation == (ti)->ti_asid_generation)
-#endif
-#define	PMAP_PAI(pmap, ti)	(&(pmap)->pm_pai[tlbinfo_index(ti)])
+#define	TLBINFO_LOCK(ti)		mutex_spin_enter((ti)->ti_lock)
+#define	TLBINFO_UNLOCK(ti)		mutex_spin_exit((ti)->ti_lock)
+#define	PMAP_PAI_ASIDVALID_P(pai, ti)	((pai)->pai_asid != 0)
+#define	PMAP_PAI(pmap, ti)		(&(pmap)->pm_pai[tlbinfo_index(ti)])
 #define	PAI_PMAP(pai, ti)	\
 	((pmap_t)((intptr_t)(pai) \
 	    - offsetof(struct pmap, pm_pai[tlbinfo_index(ti)])))
@@ -193,23 +180,23 @@
 
 struct pmap_tlb_info {
 	uint32_t ti_asid_hint;		/* probable next ASID to use */
-#ifdef MULTIPROCESSOR
 	uint32_t ti_asids_free;		/* # of ASIDs free */
-	u_int ti_asid_bitmap[MIPS_TLB_NUM_PIDS / (sizeof(u_int) * 8)];
+#define	tlbinfo_noasids_p(ti)	((ti)->ti_asids_free == 0)
+	u_long ti_asid_bitmap[MIPS_TLB_NUM_PIDS / (sizeof(u_long) * 8)];
 	kmutex_t *ti_lock;
+	u_int ti_wired;			/* # of wired TLB entries */
+	uint32_t ti_asid_mask;
+	uint32_t ti_asid_max;
 	LIST_HEAD(, pmap_asid_info) ti_pais; /* list of active ASIDs */
+#ifdef MULTIPROCESSOR
 	pmap_t ti_victim;
 	uint32_t ti_cpu_mask;		/* bitmask of CPUs sharing this TLB */
 	enum tlb_invalidate_op ti_tlbinvop;
 	u_int ti_index;
 #define tlbinfo_index(ti)	((ti)->ti_index)
-#define	tlbinfo_noasids_p(ti)	((ti)->ti_asids_free == 0)
 #else
 #define tlbinfo_index(ti)	(0)
-#define	tlbinfo_noasids_p(ti)	((ti)->ti_asid_hint == 0)
-	uint32_t ti_asid_generation;
-#endif /* MULTIPROCESSOR */
-	u_int ti_wired;			/* # of wired TLB entries */
+#endif
 };
 
 
@@ -253,8 +240,8 @@
 void	pmap_tlb_shootdown_process(void);
 bool	pmap_tlb_shootdown_bystanders(pmap_t pmap);
 void	pmap_tlb_info_attach(struct pmap_tlb_info *, struct cpu_info *);
-void	pmap_tlb_info_init(struct pmap_tlb_info *);
 #endif
+void	pmap_tlb_info_init(struct pmap_tlb_info *);
 void	pmap_tlb_asid_acquire(pmap_t pmap, struct lwp *l);
 void	pmap_tlb_asid_deactivate(pmap_t pmap);
 void	pmap_tlb_asid_release_all(pmap_t pmap);

Index: src/sys/arch/mips/mips/pmap.c
diff -u src/sys/arch/mips/mips/pmap.c:1.179.16.17 src/sys/arch/mips/mips/pmap.c:1.179.16.18
--- src/sys/arch/mips/mips/pmap.c:1.179.16.17	Wed Feb 24 00:09:04 2010
+++ src/sys/arch/mips/mips/pmap.c	Thu Feb 25 05:53:23 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.c,v 1.179.16.17 2010/02/24 00:09:04 matt Exp $	*/
+/*	$NetBSD: pmap.c,v 1.179.16.18 2010/02/25 05:53:23 matt Exp $	*/
 
 /*-
  * Copyright (c) 1998, 2001 The NetBSD Foundation, Inc.
@@ -67,7 +67,7 @@
 
 #include <sys/cdefs.h>
 
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.179.16.17 2010/02/24 00:09:04 matt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.179.16.18 2010/02/25 05:53:23 matt Exp $");
 
 /*
  *	Manages physical address maps.
@@ -464,9 +464,7 @@
 {
 	vsize_t bufsz;
 
-#ifdef MULTIPROCESSOR
-	pmap_tlb_info_init(&pmap_tlb_info);
-#endif
+	pmap_tlb_info_init(&pmap_tlb_info);	/* init the lock */
 
 	/*
 	 * Compute the number of pages kmem_map will have.

Index: src/sys/arch/mips/mips/pmap_tlb.c
diff -u src/sys/arch/mips/mips/pmap_tlb.c:1.1.2.3 src/sys/arch/mips/mips/pmap_tlb.c:1.1.2.4
--- src/sys/arch/mips/mips/pmap_tlb.c:1.1.2.3	Wed Feb 24 00:30:21 2010
+++ src/sys/arch/mips/mips/pmap_tlb.c	Thu Feb 25 05:53:23 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap_tlb.c,v 1.1.2.3 2010/02/24 00:30:21 matt Exp $	*/
+/*	$NetBSD: pmap_tlb.c,v 1.1.2.4 2010/02/25 05:53:23 matt Exp $	*/
 
 /*-
  * Copyright (c) 2010 The NetBSD Foundation, Inc.
@@ -31,42 +31,49 @@
 
 #include <sys/cdefs.h>
 
-__KERNEL_RCSID(0, "$NetBSD: pmap_tlb.c,v 1.1.2.3 2010/02/24 00:30:21 matt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap_tlb.c,v 1.1.2.4 2010/02/25 05:53:23 matt Exp $");
 
 /*
  * Manages address spaces in a TLB.
  *
- * Normally there is a 1:1 mapping between a TLB and a CPU.  However some
+ * Normally there is a 1:1 mapping between a TLB and a CPU.  However, some
  * implementations may share a TLB between multiple CPUs (really CPU thread
  * contexts).  This requires the TLB abstraction to be separated from the
- * CPU abstraction.  It also requires that the TLB be locked while doing 
+ * CPU abstraction.  It also requires that the TLB be locked while doing
  * TLB activities.
- * 
- * For uniprocessors, since we know we can never have asynchronous 
- * reinitializations of the ASID space, we use a simple generational method
- * for ASIDs.  Simply allocate an ASID starting from MAX ASID - 1 and when
- * 0 (which is reserved for the kernel) is reached, increment the ASID
- * generation, flush the TLB of entries belonging to user ASIDs, and start
- * again from MAX ASID - 1.  At this point, all user pmaps have an invalid ASID.
- *
- * For multiprocessors, it just isn't that easy since we need to preserve the
- * ASIDs of any active LWPs.  We do use an algorithm similar to one used by
- * uniprocessors.  We allocate ASIDs until we have exhausted the supply, then
- * reinitialize the ASID space, and start allocating again.  However we need to
- * to preserve the ASIDs of any user pmaps assigned to a processor.  We do this
- * by keeping a bitmap of allocated ASIDs.  Whenever we reinitialize the TLB's
- * ASID space, we leave allocated any ASID in use by an "onproc" pmap.  When
- * allocating from the bitmap, we skip any ASID who has a corresponding bit set
- * in the bitmap.  Eventually this cause the bitmap to fill and cause a 
- * reinitialization of the ASID space.
+ *
+ * For each TLB, we track the ASIDs in use in a bitmap and a list of pmaps
+ * that have a valid ASID.
+ *
+ * We allocate ASIDs in increasing order until we have exhausted the supply,
+ * then reinitialize the ASID space, and start allocating again at 1.  When
+ * allocating from the ASID bitmap, we skip any ASID who has a corresponding
+ * bit set in the ASID bitmap.  Eventually this causes the ASID bitmap to fill
+ * and, when completely filled, a reinitialization of the ASID space.
+ *
+ * To reinitialize the ASID space, the ASID bitmap is reset and then the ASIDs
+ * of non-kernel TLB entries get recorded in the ASID bitmap.  If the entries
+ * in TLB consume more than half of the ASID space, all ASIDs are invalidated,
+ * the ASID bitmap is recleared, and the list of pmaps is emptied.  Otherwise,
+ * (the normal case), any ASID present in the TLB (even those which are no
+ * longer used by a pmap) will remain active (allocated) and all other ASIDs
+ * will be freed.  If the size of the TLB is much smaller than the ASID space,
+ * this algorithm completely avoids TLB invalidation.
+ *
+ * For multiprocessors, we also have to deal TLB invalidation requests from
+ * other CPUs, some of which are dealt with the reinitialization of the ASID
+ * space.  Whereas above we keep the ASIDs of those pmaps which have active
+ * TLB entries, this type of reinitialization preserves the ASIDs of any
+ * "onproc" user pmap and all other ASIDs will be freed.  We must do this
+ * since we can't change the current ASID.
  *
  * Each pmap has two bitmaps: pm_active and pm_onproc.  Each bit in pm_active
  * indicates whether that pmap has an allocated ASID for a CPU.  Each bit in
  * pm_onproc indicates that pmap's ASID is active (equal to the ASID in COP 0
  * register EntryHi) on a CPU.  The bit number comes from the CPU's cpu_index().
  * Even though these bitmaps contain the bits for all CPUs, the bits that
- * correspond to the bits beloning to the CPUs sharing a TLB can only be
- * manipulated while holding that TLB's lock.  Atomic ops must be used to 
+ * correspond to the bits belonging to the CPUs sharing a TLB can only be
+ * manipulated while holding that TLB's lock.  Atomic ops must be used to
  * update them since multiple CPUs may be changing different sets of bits at
  * same time but these sets never overlap.
  *
@@ -85,11 +92,11 @@
  * will allocate a new ASID and any existing TLB entries will be orphaned.
  * Only in the case that pmap has an "onproc" ASID do we actually have to send
  * an IPI.
- * 
+ *
  * Once we determined we must send an IPI to shootdown a TLB, we need to send
  * it to one of CPUs that share that TLB.  We choose the lowest numbered CPU
  * that has one of the pmap's ASID "onproc".  In reality, any CPU sharing that
- * TLB would do but interrupting an active CPU seems best.
+ * TLB would do, but interrupting an active CPU seems best.
  *
  * A TLB might have multiple shootdowns active concurrently.  The shootdown
  * logic compresses these into a few cases:
@@ -134,8 +141,8 @@
 #include <mips/locore.h>
 #include <mips/pte.h>
 
-#ifdef MULTIPROCESSOR
 static kmutex_t pmap_tlb0_mutex __aligned(32);
+#ifdef MULTIPROCESSOR
 static struct pmap_tlb_info *pmap_tlbs[MAXCPUS] = {
 	[0] = &pmap_tlb_info,
 };
@@ -143,32 +150,33 @@
 #endif
 
 struct pmap_tlb_info pmap_tlb_info = {
-#ifdef MULTIPROCESSOR
 	.ti_asid_hint = 1,
+	.ti_asid_mask = MIPS_TLB_NUM_PIDS - 1,
+	.ti_asid_max = MIPS_TLB_NUM_PIDS - 1,
 	.ti_asids_free = MIPS_TLB_NUM_PIDS - 1,
+	.ti_asid_bitmap[0] = 1,
+	.ti_wired = MIPS3_TLB_WIRED_UPAGES,
 	.ti_lock = &pmap_tlb0_mutex,
+	.ti_pais = LIST_HEAD_INITIALIZER(pmap_tlb_info.ti_pais),
+#ifdef MULTIPROCESSOR
 	.ti_cpu_mask = 1,
 	.ti_tlbinvop = TLBINV_NOBODY,
-	.ti_pais = LIST_HEAD_INITIALIZER(pmap_tlb_info.ti_pais),
-#else
-	.ti_asid_hint = MIPS_TLB_NUM_PIDS - 1,
 #endif
-	.ti_wired = MIPS3_TLB_WIRED_UPAGES,
 };
 
-#ifdef MULTIPROCESSOR
 #define	__BITMAP_SET(bm, n) \
 	((bm)[(n) / (8*sizeof(bm[0]))] |= 1LU << ((n) % (8*sizeof(bm[0]))))
 #define	__BITMAP_CLR(bm, n) \
 	((bm)[(n) / (8*sizeof(bm[0]))] &= ~(1LU << ((n) % (8*sizeof(bm[0])))))
 #define	__BITMAP_ISSET_P(bm, n) \
-	((bm)[(n) / (8*sizeof(bm[0]))] & (1LU << ((n) % (8*sizeof(bm[0])))))
+	(((bm)[(n) / (8*sizeof(bm[0]))] & (1LU << ((n) % (8*sizeof(bm[0]))))) != 0)
 
 #define	TLBINFO_ASID_MARK_USED(ti, asid) \
 	__BITMAP_SET((ti)->ti_asid_bitmap, (asid))
 #define	TLBINFO_ASID_INUSE_P(ti, asid) \
 	__BITMAP_ISSET_P((ti)->ti_asid_bitmap, (asid))
 
+
 static inline void
 pmap_pai_reset(struct pmap_tlb_info *ti, struct pmap_asid_info *pai,
 	struct pmap *pm)
@@ -177,31 +185,36 @@
 	 * We must have an ASID but it must not be onproc (on a processor).
 	 */
 	KASSERT(pai->pai_asid);
+#ifdef MULTIPROCESSOR
 	KASSERT((pm->pm_onproc & ti->ti_cpu_mask) == 0);
+#endif
 	LIST_REMOVE(pai, pai_link);
 #ifdef DIAGNOSTIC
 	pai->pai_link.le_prev = NULL;	/* tagged as unlinked */
 #endif
 	/*
 	 * Note that we don't mark the ASID as not in use in the TLB's ASID
-	 * bitmap (thus it can't allocated until the ASID space is exhausted
+	 * bitmap (thus it can't be allocated until the ASID space is exhausted
 	 * and therefore reinitialized).  We don't want to flush the TLB for
 	 * entries belonging to this ASID so we will let natural TLB entry
 	 * replacement flush them out of the TLB.  Any new entries for this
 	 * pmap will need a new ASID allocated.
 	 */
-	pai->pai_asid = 0; 
+	pai->pai_asid = 0;
 
+#ifdef MULTIPROCESSOR
 	/*
 	 * The bits in pm_active belonging to this TLB can only be changed
 	 * while this TLB's lock is held.
 	 */
 	atomic_and_32(&pm->pm_active, ~ti->ti_cpu_mask);
+#endif /* MULTIPROCESSOR */
 }
 
 void
 pmap_tlb_info_init(struct pmap_tlb_info *ti)
 {
+#ifdef MULTIPROCESSOR
 	if (ti == &pmap_tlb_info) {
 		mutex_init(ti->ti_lock, MUTEX_DEFAULT, IPL_SCHED);
 		return;
@@ -212,15 +225,33 @@
 	ti->ti_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
 	ti->ti_asid_bitmap[0] = 1;
 	ti->ti_asid_hint = 1;
-	ti->ti_asids_free = MIPS_TLB_NUM_PIDS - 1;
+	ti->ti_asid_max = pmap_tlbs[0]->ti_asid_max;
+	ti->ti_asid_mask = pmap_tlbs[0]->ti_asid_mask;
+	ti->ti_asids_free = ti->ti_asid_max;
 	ti->ti_tlbinvop = TLBINV_NOBODY,
 	ti->ti_victim = NULL;
 	ti->ti_cpu_mask = 0;
 	ti->ti_index = pmap_ntlbs++;
 	ti->ti_wired = 0;
 	pmap_tlbs[ti->ti_index] = ti;
+#else
+	KASSERT(ti == &pmap_tlb_info);
+	mutex_init(ti->ti_lock, MUTEX_DEFAULT, IPL_SCHED);
+	if (!CPUISMIPSNN) {
+		ti->ti_asid_max = mips_options.mips_num_tlb_entries - 1;
+		ti->ti_asids_free = ti->ti_asid_max;
+		ti->ti_asid_mask = ti->ti_asid_max;
+		/*
+		 * Now figure out what mask we need to focus on asid_max.
+		 */
+		while ((ti->ti_asid_mask + 1) & ti->ti_asid_mask) {
+			ti->ti_asid_mask |= ti->ti_asid_mask >> 1;
+		}
+	}
+#endif /* MULTIPROCESSOR */
 }
 
+#ifdef MULTIPROCESSOR
 void
 pmap_tlb_info_attach(struct pmap_tlb_info *ti, struct cpu_info *ci)
 {
@@ -242,49 +273,98 @@
 }
 #endif /* MULTIPROCESSOR */
 
-static void
-pmap_tlb_asid_reinitialize(struct pmap_tlb_info *ti, bool all)
+#ifdef DIAGNOSTIC
+static size_t
+pmap_tlb_asid_count(struct pmap_tlb_info *ti)
 {
-	if (all)
-		tlb_invalidate_all();
-	else
-		tlb_invalidate_asids(1, MIPS_TLB_NUM_PIDS);
-
-#ifdef MULTIPROCESSOR
-	struct pmap_asid_info *pai, *next;
+	size_t count = 0;
+	for (uint32_t asid = 1; asid <= ti->ti_asid_max; asid++) {
+		count += TLBINFO_ASID_INUSE_P(ti, asid);
+	}
+	return count;
+}
+#endif
 
+static void
+pmap_tlb_asid_reinitialize(struct pmap_tlb_info *ti, enum tlb_invalidate_op op)
+{
 	/*
 	 * First, clear the ASID bitmap (except for ASID 0 which belongs
 	 * to the kernel).
 	 */
-	ti->ti_asids_free = MIPS_TLB_NUM_PIDS - 1;
-	ti->ti_asid_bitmap[0] = 1;
+	ti->ti_asids_free = ti->ti_asid_max;
 	ti->ti_asid_hint = 1;
+	ti->ti_asid_bitmap[0] = 1;
 	for (size_t i = 1; i < __arraycount(ti->ti_asid_bitmap); i++)
 		ti->ti_asid_bitmap[i] = 0;
 
+	switch (op) {
+	case TLBINV_ALL:
+		tlb_invalidate_all();
+		break;
+	case TLBINV_ALLUSER:
+		tlb_invalidate_asids(1, ti->ti_asid_mask);
+		break;
+	case TLBINV_NOBODY: {
+		/*
+		 * If we are just reclaiming ASIDs in the TLB, let's go find
+		 * what ASIDs are in use in the TLB.  Since this is a
+		 * semi-expensive operation, we don't want to do it too often.
+		 * So if more half of the ASIDs are in use, we don't have
+		 * enough free ASIDs so invalidate the TLB entries with ASIDs
+		 * and clear the ASID bitmap.  That will force everyone to
+		 * allocate a new ASID.
+		 */
+		const u_int asids_found = tlb_record_asids(ti->ti_asid_bitmap,
+		    ti->ti_asid_mask);
+		KASSERT(asids_found == pmap_tlb_asid_count(ti));
+		if (__predict_false(asids_found >= ti->ti_asid_max / 2)) {
+			tlb_invalidate_asids(1, ti->ti_asid_mask);
+			ti->ti_asid_bitmap[0] = 1;
+			for (size_t i = 1;
+			     i < __arraycount(ti->ti_asid_bitmap);
+			     i++) {
+				ti->ti_asid_bitmap[i] = 0;
+			}
+		} else {
+			ti->ti_asids_free -= asids_found;
+		}
+		break;
+	}
+	default:
+		panic("%s: unexpected op %d", __func__, op);
+	}
+
 	/*
-	 * Now go through the active ASIDs and release them unless they
-	 * are currently on a processor.  If they are on a process, then 
-	 * mark them as in use.
+	 * Now go through the active ASIDs.  If the ASID is on a processor or
+	 * we aren't invalidating all ASIDs and the TLB has an entry owned by
+	 * that ASID, mark it as in use.  Otherwise release the ASID.
 	 */
+	struct pmap_asid_info *pai, *next;
 	for (pai = LIST_FIRST(&ti->ti_pais); pai != NULL; pai = next) {
 		struct pmap * const pm = PAI_PMAP(pai, ti);
 		next = LIST_NEXT(pai, pai_link);
+		KASSERT(pai->pai_asid != 0);
+#ifdef MULTIPROCESSOR
 		if (pm->pm_onproc & ti->ti_cpu_mask) {
-			KASSERT(pai->pai_asid != 0);
-			TLBINFO_ASID_MARK_USED(ti, pai->pai_asid);
-			ti->ti_asids_free--;
+			if (!TLBINFO_ASID_INUSE_P(ti, pai->pai_asid)) {
+				TLBINFO_ASID_MARK_USED(ti, pai->pai_asid);
+				ti->ti_asids_free--;
+			}
+			continue;
+		}
+#endif /* MULTIPROCESSOR */
+		if (TLBINFO_ASID_INUSE_P(ti, pai->pai_asid)) {
+			KASSERT(op == TLBINV_NOBODY);
 		} else {
 			pmap_pai_reset(ti, pai, pm);
 		}
 	}
-
-	ti->ti_tlbinvop = TLBINV_NOBODY;
-	ti->ti_victim = NULL;
-#else /* !MULTIPROCESSOR */
-	ti->ti_asid_generation++;	/* ok to wrap to 0 */
-	ti->ti_asid_hint = MIPS_TLB_NUM_PIDS - 1;
+#ifdef DIAGNOSTIC
+	size_t free_count = ti->ti_asid_max - pmap_tlb_asid_count(ti);
+	if (free_count != ti->ti_asids_free)
+		panic("%s: bitmap error: %zu != %u",
+		    __func__, free_count, ti->ti_asids_free);
 #endif
 }
 
@@ -328,7 +408,7 @@
 		/*
 		 * Flush all user TLB entries.
 		 */
-		pmap_tlb_asid_reinitialize(ti, false);
+		pmap_tlb_asid_reinitialize(ti, TLBINV_ALLUSER);
 		break;
 	case TLBINV_ALLKERNEL:
 		/*
@@ -340,7 +420,7 @@
 		/*
 		 * Flush all the TLB entries (user and kernel).
 		 */
-		pmap_tlb_asid_reinitialize(ti, true);
+		pmap_tlb_asid_reinitialize(ti, TLBINV_ALL);
 		break;
 	case TLBINV_NOBODY:
 		/*
@@ -503,22 +583,24 @@
 pmap_tlb_asid_alloc(struct pmap_tlb_info *ti, pmap_t pm,
 	struct pmap_asid_info *pai)
 {
-#ifdef MULTIPROCESSOR
 	/*
 	 * We shouldn't have an ASID assigned, and thusly must not be onproc
 	 * nor active.
 	 */
 	KASSERT(pai->pai_asid == 0);
 	KASSERT(pai->pai_link.le_prev == NULL);
+#ifdef MULTIPROCESSOR
 	KASSERT((pm->pm_onproc & ti->ti_cpu_mask) == 0);
 	KASSERT((pm->pm_active & ti->ti_cpu_mask) == 0);
-	KASSERT(ti->ti_asid_hint < MIPS_TLB_NUM_PIDS);
+#endif
+	KASSERT(ti->ti_asids_free > 0);
+	KASSERT(ti->ti_asid_hint <= ti->ti_asid_max);
 
 	/*
 	 * Let's see if the hinted ASID is free.  If not search for
 	 * a new one.
 	 */
-	if (TLBINFO_ASID_INUSE_P(ti, ti->ti_asid_hint)) {
+	if (__predict_false(TLBINFO_ASID_INUSE_P(ti, ti->ti_asid_hint))) {
 		const size_t words = __arraycount(ti->ti_asid_bitmap);
 		const size_t nbpw = 8 * sizeof(ti->ti_asid_bitmap[0]);
 		for (size_t i = 0; i < ti->ti_asid_hint / nbpw; i++) {
@@ -530,15 +612,22 @@
 			 * ffs was to find the first bit set while we want the
 			 * to find the first bit cleared.
 			 */
-			const u_int bits = ~ti->ti_asid_bitmap[i]; 
-			if (bits) {
-				u_int n = ffs(bits) - 1;
-				KASSERT(n < 32);
+			u_long bits = ~ti->ti_asid_bitmap[i];
+			if (__predict_true(bits)) {
+				u_int n = 0;
+				if ((bits & 0xffffffff) == 0)  {
+					bits = (bits >> 31) >> 1;
+					KASSERT(bits);
+					n += 32;
+				}
+				n += ffs(bits) - 1;
+				KASSERT(n < nbpw);
 				ti->ti_asid_hint = n + i * nbpw;
 				break;
 			}
 		}
 		KASSERT(ti->ti_asid_hint != 0);
+		KASSERT(TLBINFO_ASID_INUSE_P(ti, ti->ti_asid_hint-1));
 		KASSERT(!TLBINFO_ASID_INUSE_P(ti, ti->ti_asid_hint));
 	}
 
@@ -552,34 +641,19 @@
 	LIST_INSERT_HEAD(&ti->ti_pais, pai, pai_link);
 	ti->ti_asids_free--;
 
+#ifdef MULTIPROCESSOR
 	/*
 	 * Mark that we now an active ASID for all CPUs sharing this TLB.
 	 * The bits in pm_active belonging to this TLB can only be changed
 	 * while this TLBs lock is held.
 	 */
 	atomic_or_32(&pm->pm_active, ti->ti_cpu_mask);
-#else
-	/*
-	 * Just grab the next ASID and remember the current generation.
-	 */
-	KASSERT(ti->ti_asid_hint < MIPS_TLB_NUM_PIDS);
-	KASSERT(ti->ti_asid_hint > 0);
-	pai->pai_asid = ti->ti_asid_hint--;
-	pai->pai_asid_generation = ti->ti_asid_generation;
 #endif
 }
 
 /*
  * Acquire a TLB address space tag (called ASID or TLBPID) and return it.
- *
- * Since all hw-threads share the TLB, we can't invalidate all non-global TLB
- * entries.  Instead we need to make sure they match the proper range of ASIDs
- * reserved for that CPU.
- *
- * Therefore, when we allocate a new ASID, we just take the next number. When
- * we run out of numbers, we flush the ASIDs the TLB, increment the generation
- * count and start over. The low ASID of the range is reserved for kernel use
- * (even though we only use 0 for that purpose).
+ * ASID might have already been previously acquired.
  */
 void
 pmap_tlb_asid_acquire(pmap_t pm, struct lwp *l)
@@ -595,13 +669,13 @@
 		return;
 
 	TLBINFO_LOCK(ti);
-	if (!PMAP_PAI_ASIDVALID_P(pai, ti)) {
+	if (__predict_false(!PMAP_PAI_ASIDVALID_P(pai, ti))) {
 		/*
 		 * If we've run out ASIDs, reinitialize the ASID space.
 		 */
 		if (__predict_false(tlbinfo_noasids_p(ti))) {
 			KASSERT(l == curlwp);
-			pmap_tlb_asid_reinitialize(ti, false);
+			pmap_tlb_asid_reinitialize(ti, TLBINV_NOBODY);
 		}
 
 		/*

Reply via email to