Author: alc
Date: Sat May 26 06:10:25 2012
New Revision: 236045
URL: http://svn.freebsd.org/changeset/base/236045

Log:
  Rename pmap_collect() to pmap_pv_reclaim() and rewrite it such that it no
  longer uses the active and inactive paging queues.  Instead, the pmap now
  maintains an LRU-ordered list of pv entry pages, and pmap_pv_reclaim() uses
  this list to select pv entries for reclamation.
  
  Note: The old pmap_collect() tried to avoid reclaiming mappings for pages
  that have either a hold_count or a busy field that is non-zero.  However,
  this isn't necessary for correctness, and the locking in pmap_collect() was
  insufficient to guarantee that such mappings weren't reclaimed.  The new
  pmap_pv_reclaim() doesn't even try.
  
  MFC after:    5 weeks

Modified:
  head/sys/i386/i386/pmap.c
  head/sys/i386/include/pmap.h

Modified: head/sys/i386/i386/pmap.c
==============================================================================
--- head/sys/i386/i386/pmap.c   Sat May 26 05:29:53 2012        (r236044)
+++ head/sys/i386/i386/pmap.c   Sat May 26 06:10:25 2012        (r236045)
@@ -233,6 +233,7 @@ static int pat_index[PAT_INDEX_SIZE];       /*
 /*
  * Data for the pv entry allocation mechanism
  */
+static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
 static struct md_page *pv_table;
 static int shpgperproc = PMAP_SHPGPERPROC;
@@ -2187,69 +2188,144 @@ SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry
        "Current number of pv entry allocs");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
        "Current number of spare pv entries");
-
-static int pmap_collect_inactive, pmap_collect_active;
-
-SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_inactive, CTLFLAG_RD, 
&pmap_collect_inactive, 0,
-       "Current number times pmap_collect called on inactive queue");
-SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_active, CTLFLAG_RD, 
&pmap_collect_active, 0,
-       "Current number times pmap_collect called on active queue");
 #endif
 
 /*
  * We are in a serious low memory condition.  Resort to
  * drastic measures to free some pages so we can allocate
- * another pv entry chunk.  This is normally called to
- * unmap inactive pages, and if necessary, active pages.
+ * another pv entry chunk.
  */
-static void
-pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq)
+static vm_page_t
+pmap_pv_reclaim(pmap_t locked_pmap)
 {
+       struct pch newtail;
+       struct pv_chunk *pc;
+       struct md_page *pvh;
        pd_entry_t *pde;
        pmap_t pmap;
        pt_entry_t *pte, tpte;
-       pv_entry_t next_pv, pv;
+       pv_entry_t pv;
        vm_offset_t va;
-       vm_page_t m, free;
-
+       vm_page_t free, m, m_pc;
+       uint32_t inuse, freemask;
+       int bit, field, freed;
+
+       PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
+       pmap = NULL;
+       free = m_pc = NULL;
+       TAILQ_INIT(&newtail);
        sched_pin();
-       TAILQ_FOREACH(m, &vpq->pl, pageq) {
-               if ((m->flags & PG_MARKER) != 0 || m->hold_count || m->busy)
-                       continue;
-               TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
-                       va = pv->pv_va;
-                       pmap = PV_PMAP(pv);
+       while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && (pv_vafree == 0 ||
+           free == NULL)) {
+               TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
+               if (pmap != pc->pc_pmap) {
+                       if (pmap != NULL) {
+                               pmap_invalidate_all(pmap);
+                               if (pmap != locked_pmap)
+                                       PMAP_UNLOCK(pmap);
+                       }
+                       pmap = pc->pc_pmap;
                        /* Avoid deadlock and lock recursion. */
                        if (pmap > locked_pmap)
                                PMAP_LOCK(pmap);
-                       else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
+                       else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) {
+                               pmap = NULL;
+                               TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
                                continue;
-                       pmap->pm_stats.resident_count--;
-                       pde = pmap_pde(pmap, va);
-                       KASSERT((*pde & PG_PS) == 0, ("pmap_collect: found"
-                           " a 4mpage in page %p's pv list", m));
-                       pte = pmap_pte_quick(pmap, va);
-                       tpte = pte_load_clear(pte);
-                       KASSERT((tpte & PG_W) == 0,
-                           ("pmap_collect: wired pte %#jx", (uintmax_t)tpte));
-                       if (tpte & PG_A)
-                               vm_page_aflag_set(m, PGA_REFERENCED);
-                       if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
-                               vm_page_dirty(m);
-                       free = NULL;
-                       pmap_unuse_pt(pmap, va, &free);
-                       pmap_invalidate_page(pmap, va);
-                       pmap_free_zero_pages(free);
-                       TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
-                       free_pv_entry(pmap, pv);
-                       if (pmap != locked_pmap)
-                               PMAP_UNLOCK(pmap);
+                       }
+               }
+
+               /*
+                * Destroy every non-wired, 4 KB page mapping in the chunk.
+                */
+               freed = 0;
+               for (field = 0; field < _NPCM; field++) {
+                       freemask = 0;
+                       for (inuse = ~pc->pc_map[field] & pc_freemask[field];
+                           inuse != 0; inuse &= ~(1UL << bit)) {
+                               bit = bsfl(inuse);
+                               pv = &pc->pc_pventry[field * 32 + bit];
+                               va = pv->pv_va;
+                               pde = pmap_pde(pmap, va);
+                               if ((*pde & PG_PS) != 0)
+                                       continue;
+                               pte = pmap_pte_quick(pmap, va);
+                               if ((*pte & PG_W) != 0)
+                                       continue;
+                               tpte = pte_load_clear(pte);
+                               if ((tpte & PG_G) != 0)
+                                       pmap_invalidate_page(pmap, va);
+                               m = PHYS_TO_VM_PAGE(tpte & PG_FRAME);
+                               if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
+                                       vm_page_dirty(m);
+                               if ((tpte & PG_A) != 0)
+                                       vm_page_aflag_set(m, PGA_REFERENCED);
+                               TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
+                               if (TAILQ_EMPTY(&m->md.pv_list) &&
+                                   (m->flags & PG_FICTITIOUS) == 0) {
+                                       pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
+                                       if (TAILQ_EMPTY(&pvh->pv_list)) {
+                                               vm_page_aflag_clear(m,
+                                                   PGA_WRITEABLE);
+                                       }
+                               }
+                               pmap_unuse_pt(pmap, va, &free);
+                               freemask |= 1UL << bit;
+                               freed++;
+                       }
+                       pc->pc_map[field] |= freemask;
+               }
+               if (freed == 0) {
+                       TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
+                       continue;
+               }
+               pmap->pm_stats.resident_count -= freed;
+               PV_STAT(pv_entry_frees += freed);
+               PV_STAT(pv_entry_spare += freed);
+               pv_entry_count -= freed;
+               TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+               for (field = 0; field < _NPCM; field++)
+                       if (pc->pc_map[field] != pc_freemask[field]) {
+                               TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
+                                   pc_list);
+                               TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
+
+                               /*
+                                * One freed pv entry in locked_pmap is
+                                * sufficient.
+                                */
+                               if (pmap == locked_pmap)
+                                       goto out;
+                               break;
+                       }
+               if (field == _NPCM) {
+                       PV_STAT(pv_entry_spare -= _NPCPV);
+                       PV_STAT(pc_chunk_count--);
+                       PV_STAT(pc_chunk_frees++);
+                       /* Entire chunk is free; return it. */
+                       m_pc = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
+                       pmap_qremove((vm_offset_t)pc, 1);
+                       pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc);
+                       break;
                }
-               if (TAILQ_EMPTY(&m->md.pv_list) &&
-                   TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list))
-                       vm_page_aflag_clear(m, PGA_WRITEABLE);
        }
+out:
        sched_unpin();
+       TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru);
+       if (pmap != NULL) {
+               pmap_invalidate_all(pmap);
+               if (pmap != locked_pmap)
+                       PMAP_UNLOCK(pmap);
+       }
+       if (m_pc == NULL && pv_vafree != 0 && free != NULL) {
+               m_pc = free;
+               free = m_pc->right;
+               /* Recycle a freed page table page. */
+               m_pc->wire_count = 1;
+               atomic_add_int(&cnt.v_wire_count, 1);
+       }
+       pmap_free_zero_pages(free);
+       return (m_pc);
 }
 
 
@@ -2280,6 +2356,7 @@ free_pv_entry(pmap_t pmap, pv_entry_t pv
                        TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
                        return;
                }
+       TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
        PV_STAT(pv_entry_spare -= _NPCPV);
        PV_STAT(pc_chunk_count--);
        PV_STAT(pc_chunk_frees++);
@@ -2300,7 +2377,6 @@ get_pv_entry(pmap_t pmap, int try)
 {
        static const struct timeval printinterval = { 60, 0 };
        static struct timeval lastprint;
-       struct vpgqueues *pq;
        int bit, field;
        pv_entry_t pv;
        struct pv_chunk *pc;
@@ -2315,7 +2391,6 @@ get_pv_entry(pmap_t pmap, int try)
                        printf("Approaching the limit on PV entries, consider "
                            "increasing either the vm.pmap.shpgperproc or the "
                            "vm.pmap.pv_entry_max tunable.\n");
-       pq = NULL;
 retry:
        pc = TAILQ_FIRST(&pmap->pm_pvchunk);
        if (pc != NULL) {
@@ -2336,6 +2411,10 @@ retry:
                                }
                        TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
                        TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
+                       if (pc != TAILQ_LAST(&pv_chunks, pch)) {
+                               TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
+                               TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
+                       }
                        PV_STAT(pv_entry_spare--);
                        return (pv);
                }
@@ -2345,29 +2424,16 @@ retry:
         * queues lock.  If "pv_vafree" is currently non-empty, it will
         * remain non-empty until pmap_ptelist_alloc() completes.
         */
-       if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, (pq ==
-           &vm_page_queues[PQ_ACTIVE] ? VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL) |
+       if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
            VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
                if (try) {
                        pv_entry_count--;
                        PV_STAT(pc_chunk_tryfail++);
                        return (NULL);
                }
-               /*
-                * Reclaim pv entries: At first, destroy mappings to
-                * inactive pages.  After that, if a pv chunk entry
-                * is still needed, destroy mappings to active pages.
-                */
-               if (pq == NULL) {
-                       PV_STAT(pmap_collect_inactive++);
-                       pq = &vm_page_queues[PQ_INACTIVE];
-               } else if (pq == &vm_page_queues[PQ_INACTIVE]) {
-                       PV_STAT(pmap_collect_active++);
-                       pq = &vm_page_queues[PQ_ACTIVE];
-               } else
-                       panic("get_pv_entry: increase vm.pmap.shpgperproc");
-               pmap_collect(pmap, pq);
-               goto retry;
+               m = pmap_pv_reclaim(pmap);
+               if (m == NULL)
+                       goto retry;
        }
        PV_STAT(pc_chunk_count++);
        PV_STAT(pc_chunk_allocs++);
@@ -2377,6 +2443,7 @@ retry:
        pc->pc_map[0] = pc_freemask[0] & ~1ul;  /* preallocated bit 0 */
        for (field = 1; field < _NPCM; field++)
                pc->pc_map[field] = pc_freemask[field];
+       TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
        pv = &pc->pc_pventry[0];
        TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
        PV_STAT(pv_entry_spare += _NPCPV - 1);
@@ -4374,6 +4441,7 @@ pmap_remove_pages(pmap_t pmap)
                        PV_STAT(pc_chunk_count--);
                        PV_STAT(pc_chunk_frees++);
                        TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+                       TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
                        m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
                        pmap_qremove((vm_offset_t)pc, 1);
                        vm_page_unwire(m, 0);

Modified: head/sys/i386/include/pmap.h
==============================================================================
--- head/sys/i386/include/pmap.h        Sat May 26 05:29:53 2012        
(r236044)
+++ head/sys/i386/include/pmap.h        Sat May 26 06:10:25 2012        
(r236045)
@@ -481,7 +481,7 @@ struct pv_chunk {
        pmap_t                  pc_pmap;
        TAILQ_ENTRY(pv_chunk)   pc_list;
        uint32_t                pc_map[_NPCM];  /* bitmap; 1 = free */
-       uint32_t                pc_spare[2];
+       TAILQ_ENTRY(pv_chunk)   pc_lru;
        struct pv_entry         pc_pventry[_NPCPV];
 };
 
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to