On 12/23/2011 16:25, Sean Bruno wrote:
On Wed, 2011-12-21 at 12:47 -0800, Alan Cox wrote:
Can you please try the attached patch?  I'm trying to reduce the number
of differences between the native and Xen pmap implementations.

Alan


Without really looking at the output, I note that this didn't apply
cleanly against -head ... can you regenerate it?


My bad.  I gave you the wrong patch.  Try this instead.

Alan

Index: i386/i386/pmap.c
===================================================================
--- i386/i386/pmap.c    (revision 228570)
+++ i386/i386/pmap.c    (working copy)
@@ -330,7 +330,7 @@ static void pmap_update_pde_invalidate(vm_offset_t
 
 static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
 
-static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags);
+static vm_page_t _pmap_allocpte(pmap_t pmap, u_int ptepindex, int flags);
 static int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free);
 static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va);
 static void pmap_pte_release(pt_entry_t *pte);
@@ -340,6 +340,8 @@ static void *pmap_pdpt_allocf(uma_zone_t zone, int
 #endif
 static void pmap_set_pg(void);
 
+static __inline void pagezero(void *page);
+
 CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t));
 CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t));
 
@@ -1216,7 +1218,7 @@ pmap_is_current(pmap_t pmap)
 {
 
        return (pmap == kernel_pmap ||
-               (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) &&
+           (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) &&
            (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)));
 }
 
@@ -1759,7 +1761,6 @@ pmap_pinit(pmap_t pmap)
        if (pmap->pm_pdir == NULL) {
                pmap->pm_pdir = (pd_entry_t *)kmem_alloc_nofault(kernel_map,
                    NBPTD);
-
                if (pmap->pm_pdir == NULL) {
                        PMAP_LOCK_DESTROY(pmap);
                        return (0);
@@ -1792,10 +1793,9 @@ pmap_pinit(pmap_t pmap)
 
        pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD);
 
-       for (i = 0; i < NPGPTD; i++) {
+       for (i = 0; i < NPGPTD; i++)
                if ((ptdpg[i]->flags & PG_ZERO) == 0)
-                       bzero(pmap->pm_pdir + (i * NPDEPG), PAGE_SIZE);
-       }
+                       pagezero(pmap->pm_pdir + (i * NPDEPG));
 
        mtx_lock_spin(&allpmaps_lock);
        LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
@@ -1824,7 +1824,7 @@ pmap_pinit(pmap_t pmap)
  * mapped correctly.
  */
 static vm_page_t
-_pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags)
+_pmap_allocpte(pmap_t pmap, u_int ptepindex, int flags)
 {
        vm_paddr_t ptepa;
        vm_page_t m;
@@ -1872,7 +1872,7 @@ static vm_page_t
 static vm_page_t
 pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
 {
-       unsigned ptepindex;
+       u_int ptepindex;
        pd_entry_t ptepa;
        vm_page_t m;
 
@@ -2020,7 +2020,7 @@ pmap_lazyfix(pmap_t pmap)
        cr3 = vtophys(pmap->pm_pdir);
        if (cr3 == rcr3()) {
                load_cr3(PCPU_GET(curpcb)->pcb_cr3);
-               CPU_CLR(PCPU_GET(cpuid), &pmap->pm_active); 
+               CPU_CLR(PCPU_GET(cpuid), &pmap->pm_active);
        }
 }
 #endif /* SMP */
@@ -2849,7 +2849,7 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offse
        }
 
        for (; sva < eva; sva = pdnxt) {
-               unsigned pdirindex;
+               u_int pdirindex;
 
                /*
                 * Calculate index for next page table.
@@ -3070,7 +3070,7 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offs
        PMAP_LOCK(pmap);
        for (; sva < eva; sva = pdnxt) {
                pt_entry_t obits, pbits;
-               unsigned pdirindex;
+               u_int pdirindex;
 
                pdnxt = (sva + NBPDR) & ~PDRMASK;
                if (pdnxt < sva)
@@ -3596,7 +3596,7 @@ pmap_enter_object(pmap_t pmap, vm_offset_t start,
                m = TAILQ_NEXT(m, listq);
        }
        vm_page_unlock_queues();
-       PMAP_UNLOCK(pmap);
+       PMAP_UNLOCK(pmap);
 }
 
 /*
@@ -3638,7 +3638,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t v
         * resident, we are creating it here.
         */
        if (va < VM_MAXUSER_ADDRESS) {
-               unsigned ptepindex;
+               u_int ptepindex;
                pd_entry_t ptepa;
 
                /*
@@ -3904,7 +3904,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_off
                pt_entry_t *src_pte, *dst_pte;
                vm_page_t dstmpte, srcmpte;
                pd_entry_t srcptepaddr;
-               unsigned ptepindex;
+               u_int ptepindex;
 
                KASSERT(addr < UPT_MIN_ADDRESS,
                    ("pmap_copy: invalid to pmap_copy page tables"));
@@ -5244,7 +5244,7 @@ pmap_pid_dump(int pid)
 #if defined(DEBUG)
 
 static void    pads(pmap_t pm);
-void           pmap_pvdump(vm_offset_t pa);
+void           pmap_pvdump(vm_paddr_t pa);
 
 /* print address space of pmap*/
 static void
Index: i386/xen/pmap.c
===================================================================
--- i386/xen/pmap.c     (revision 228746)
+++ i386/xen/pmap.c     (working copy)
@@ -125,6 +125,8 @@ __FBSDID("$FreeBSD$");
 #include <sys/sysctl.h>
 #ifdef SMP
 #include <sys/smp.h>
+#else
+#include <sys/cpuset.h>
 #endif
 
 #include <vm/vm.h>
@@ -221,6 +223,8 @@ extern u_int32_t KERNend;
 pt_entry_t pg_nx;
 #endif
 
+static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
+
 static int pat_works;                  /* Is page attribute table sane? */
 
 /*
@@ -273,19 +277,6 @@ SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFL
           "Number of times pmap_pte_quick didn't change PMAP1");
 static struct mtx PMAP2mutex;
 
-static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
-
-SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0,
-       "Max number of PV entries");
-SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0,
-       "Page share factor per proc");
-static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0,
-    "2/4MB page mapping counters");
-
-static u_long pmap_pde_mappings;
-SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD,
-    &pmap_pde_mappings, 0, "2/4MB page mappings");
-
 static void    free_pv_entry(pmap_t pmap, pv_entry_t pv);
 static pv_entry_t get_pv_entry(pmap_t locked_pmap, int try);
 static void    pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
@@ -294,6 +285,8 @@ static pv_entry_t pmap_pvh_remove(struct md_page *
 
 static vm_page_t pmap_enter_quick_locked(multicall_entry_t **mcl, int *count, 
pmap_t pmap, vm_offset_t va,
     vm_page_t m, vm_prot_t prot, vm_page_t mpte);
+static void pmap_flush_page(vm_page_t m);
+static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
 static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
     vm_page_t *free);
 static void pmap_remove_page(struct pmap *pmap, vm_offset_t va,
@@ -305,14 +298,12 @@ static boolean_t pmap_try_insert_pv_entry(pmap_t p
 
 static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
 
-static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags);
+static vm_page_t _pmap_allocpte(pmap_t pmap, u_int ptepindex, int flags);
 static int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free);
 static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va);
 static void pmap_pte_release(pt_entry_t *pte);
 static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t *);
-static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
 static boolean_t pmap_is_prefaultable_locked(pmap_t pmap, vm_offset_t addr);
-static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
 
 static __inline void pagezero(void *page);
 
@@ -326,8 +317,6 @@ CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t));
  */
 CTASSERT(KERNBASE % (1 << 24) == 0);
 
-
-
 void 
 pd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type)
 {
@@ -359,24 +348,6 @@ pd_set(struct pmap *pmap, int ptepindex, vm_paddr_
 }
 
 /*
- * Move the kernel virtual free pointer to the next
- * 4MB.  This is used to help improve performance
- * by using a large (4MB) page for much of the kernel
- * (.text, .data, .bss)
- */
-static vm_offset_t
-pmap_kmem_choose(vm_offset_t addr)
-{
-       vm_offset_t newaddr = addr;
-
-#ifndef DISABLE_PSE
-       if (cpu_feature & CPUID_PSE)
-               newaddr = (addr + PDRMASK) & ~PDRMASK;
-#endif
-       return newaddr;
-}
-
-/*
  *     Bootstrap the system enough to run with virtual memory.
  *
  *     On the i386 this is called after mapping has already been enabled
@@ -395,15 +366,13 @@ pmap_bootstrap(vm_paddr_t firstaddr)
        int i;
 
        /*
-        * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE 
too
-        * large. It should instead be correctly calculated in locore.s and
-        * not based on 'first' (which is a physical address, not a virtual
-        * address, for the start of unused physical memory). The kernel
-        * page tables are NOT double mapped and thus should not be included
-        * in this calculation.
+        * Initialize the first available kernel virtual address.  However,
+        * using "firstaddr" may waste a few pages of the kernel virtual
+        * address space, because locore may not have mapped every physical
+        * page that it allocated.  Preferably, locore would provide a first
+        * unused virtual address in addition to "firstaddr".
         */
        virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
-       virtual_avail = pmap_kmem_choose(virtual_avail);
 
        virtual_end = VM_MAX_KERNEL_ADDRESS;
 
@@ -468,8 +437,8 @@ pmap_bootstrap(vm_paddr_t firstaddr)
        /*
         * ptemap is used for pmap_pte_quick
         */
-       SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1);
-       SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1);
+       SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1)
+       SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1)
 
        mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF);
 
@@ -650,6 +619,18 @@ pmap_init(void)
 }
 
 
+SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0,
+       "Max number of PV entries");
+SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0,
+       "Page share factor per proc");
+
+static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0,
+    "2/4MB page mapping counters");
+
+static u_long pmap_pde_mappings;
+SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD,
+    &pmap_pde_mappings, 0, "2/4MB page mappings");
+
 /***************************************************
  * Low level helper routines.....
  ***************************************************/
@@ -896,6 +877,8 @@ pmap_invalidate_cache(void)
 }
 #endif /* !SMP */
 
+#define        PMAP_CLFLUSH_THRESHOLD  (2 * 1024 * 1024)
+
 void
 pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva)
 {
@@ -907,7 +890,8 @@ pmap_invalidate_cache_range(vm_offset_t sva, vm_of
 
        if (cpu_feature & CPUID_SS)
                ; /* If "Self Snoop" is supported, do nothing. */
-       else if (cpu_feature & CPUID_CLFSH) {
+       else if ((cpu_feature & CPUID_CLFSH) != 0 &&
+           eva - sva < PMAP_CLFLUSH_THRESHOLD) {
 
                /*
                 * Otherwise, do per-cache line flush.  Use the mfence
@@ -924,12 +908,27 @@ pmap_invalidate_cache_range(vm_offset_t sva, vm_of
 
                /*
                 * No targeted cache flush methods are supported by CPU,
-                * globally invalidate cache as a last resort.
+                * or the supplied range is bigger than 2MB.
+                * Globally invalidate cache.
                 */
                pmap_invalidate_cache();
        }
 }
 
+void
+pmap_invalidate_cache_pages(vm_page_t *pages, int count)
+{
+       int i;
+
+       if (count >= PMAP_CLFLUSH_THRESHOLD / PAGE_SIZE ||
+           (cpu_feature & CPUID_CLFSH) == 0) {
+               pmap_invalidate_cache();
+       } else {
+               for (i = 0; i < count; i++)
+                       pmap_flush_page(pages[i]);
+       }
+}
+
 /*
  * Are we current address space or kernel?  N.B. We return FALSE when
  * a pmap's page table is in use because a kernel thread is borrowing
@@ -942,7 +941,7 @@ pmap_is_current(pmap_t pmap)
 
        return (pmap == kernel_pmap ||
            (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) &&
-               (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)));
+           (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)));
 }
 
 /*
@@ -971,10 +970,9 @@ pmap_pte(pmap_t pmap, vm_offset_t va)
                        CTR3(KTR_PMAP, "pmap_pte: pmap=%p va=0x%x 
newpte=0x%08x",
                            pmap, va, (*PMAP2 & 0xffffffff));
                }
-               
                return (PADDR2 + (i386_btop(va) & (NPTEPG - 1)));
        }
-       return (0);
+       return (NULL);
 }
 
 /*
@@ -1065,7 +1063,7 @@ pmap_extract(pmap_t pmap, vm_offset_t va)
        pt_entry_t *pte;
        pd_entry_t pde;
        pt_entry_t pteval;
-       
+
        rtval = 0;
        PMAP_LOCK(pmap);
        pde = pmap->pm_pdir[va >> PDRSHIFT];
@@ -1170,10 +1168,13 @@ retry:
 /*
  * Add a wired page to the kva.
  * Note: not SMP coherent.
+ *
+ * This function may be used before pmap_bootstrap() is called.
  */
 void 
 pmap_kenter(vm_offset_t va, vm_paddr_t pa)
 {
+
        PT_SET_MA(va, xpmap_ptom(pa)| PG_RW | PG_V | pgeflag);
 }
 
@@ -1186,16 +1187,18 @@ pmap_kenter_ma(vm_offset_t va, vm_paddr_t ma)
        pte_store_ma(pte, ma | PG_RW | PG_V | pgeflag);
 }
 
-
-static __inline void 
+static __inline void
 pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode)
 {
+
        PT_SET_MA(va, pa | PG_RW | PG_V | pgeflag | pmap_cache_bits(mode, 0));
 }
 
 /*
  * Remove a page from the kernel pagetables.
  * Note: not SMP coherent.
+ *
+ * This function may be used before pmap_bootstrap() is called.
  */
 PMAP_INLINE void
 pmap_kremove(vm_offset_t va)
@@ -1292,7 +1295,6 @@ pmap_qenter(vm_offset_t sva, vm_page_t *ma, int co
 #endif 
 }
 
-
 /*
  * This routine tears out page mappings from the
  * kernel -- it is meant only for temporary mappings.
@@ -1342,9 +1344,9 @@ pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_
 
        --m->wire_count;
        if (m->wire_count == 0)
-               return _pmap_unwire_pte_hold(pmap, m, free);
+               return (_pmap_unwire_pte_hold(pmap, m, free));
        else
-               return 0;
+               return (0);
 }
 
 static int 
@@ -1385,7 +1387,7 @@ _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm
        m->right = *free;
        *free = m;
 
-       return 1;
+       return (1);
 }
 
 /*
@@ -1399,17 +1401,25 @@ pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page
        vm_page_t mpte;
 
        if (va >= VM_MAXUSER_ADDRESS)
-               return 0;
+               return (0);
        ptepde = PT_GET(pmap_pde(pmap, va));
        mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME);
-       return pmap_unwire_pte_hold(pmap, mpte, free);
+       return (pmap_unwire_pte_hold(pmap, mpte, free));
 }
 
+/*
+ * Initialize the pmap for the swapper process.
+ */
 void
 pmap_pinit0(pmap_t pmap)
 {
 
        PMAP_LOCK_INIT(pmap);
+       /*
+        * Since the page table directory is shared with the kernel pmap,
+        * which is already included in the list "allpmaps", this pmap does
+        * not need to be inserted into that list.
+        */
        pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD);
 #ifdef PAE
        pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
@@ -1418,9 +1428,6 @@ pmap_pinit0(pmap_t pmap)
        PCPU_SET(curpmap, pmap);
        TAILQ_INIT(&pmap->pm_pvchunk);
        bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
-       mtx_lock_spin(&allpmaps_lock);
-       LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
-       mtx_unlock_spin(&allpmaps_lock);
 }
 
 /*
@@ -1471,18 +1478,19 @@ pmap_pinit(pmap_t pmap)
                        ptdpg[i++] = m;
                }
        }
+
        pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD);
-       for (i = 0; i < NPGPTD; i++) {
+
+       for (i = 0; i < NPGPTD; i++)
                if ((ptdpg[i]->flags & PG_ZERO) == 0)
-                       pagezero(&pmap->pm_pdir[i*NPTEPG]);
-       }
+                       pagezero(pmap->pm_pdir + (i * NPDEPG));
 
        mtx_lock_spin(&allpmaps_lock);
        LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
+       /* Copy the kernel page table directory entries. */
+       bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t));
        mtx_unlock_spin(&allpmaps_lock);
-       /* Wire in kernel global address entries. */
 
-       bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t));
 #ifdef PAE
        pmap_qenter((vm_offset_t)pmap->pm_pdpt, &ptdpg[NPGPTD], 1);
        if ((ptdpg[NPGPTD]->flags & PG_ZERO) == 0)
@@ -1534,7 +1542,7 @@ pmap_pinit(pmap_t pmap)
  * mapped correctly.
  */
 static vm_page_t
-_pmap_allocpte(pmap_t pmap, unsigned int ptepindex, int flags)
+_pmap_allocpte(pmap_t pmap, u_int ptepindex, int flags)
 {
        vm_paddr_t ptema;
        vm_page_t m;
@@ -1569,6 +1577,7 @@ static vm_page_t
         * Map the pagetable page into the process address space, if
         * it isn't already there.
         */
+
        pmap->pm_stats.resident_count++;
 
        ptema = VM_PAGE_TO_MACH(m);
@@ -1584,7 +1593,7 @@ static vm_page_t
 static vm_page_t
 pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
 {
-       unsigned ptepindex;
+       u_int ptepindex;
        pd_entry_t ptema;
        vm_page_t m;
 
@@ -1762,6 +1771,7 @@ pmap_release(pmap_t pmap)
 #else
        int npgptd = NPGPTD;
 #endif
+
        KASSERT(pmap->pm_stats.resident_count == 0,
            ("pmap_release: pmap resident count %ld != 0",
            pmap->pm_stats.resident_count));
@@ -1817,7 +1827,7 @@ kvm_size(SYSCTL_HANDLER_ARGS)
 {
        unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE;
 
-       return sysctl_handle_long(oidp, &ksize, 0, req);
+       return (sysctl_handle_long(oidp, &ksize, 0, req));
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 
     0, 0, kvm_size, "IU", "Size of KVM");
@@ -1827,7 +1837,7 @@ kvm_free(SYSCTL_HANDLER_ARGS)
 {
        unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
 
-       return sysctl_handle_long(oidp, &kfree, 0, req);
+       return (sysctl_handle_long(oidp, &kfree, 0, req));
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 
     0, 0, kvm_free, "IU", "Amount of KVM free");
@@ -1856,12 +1866,12 @@ pmap_growkernel(vm_offset_t addr)
                        }
                }
        }
-       addr = roundup2(addr, PAGE_SIZE * NPTEPG);
+       addr = roundup2(addr, NBPDR);
        if (addr - 1 >= kernel_map->max_offset)
                addr = kernel_map->max_offset;
        while (kernel_vm_end < addr) {
                if (pdir_pde(PTD, kernel_vm_end)) {
-                       kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & 
~(PAGE_SIZE * NPTEPG - 1);
+                       kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
                        if (kernel_vm_end - 1 >= kernel_map->max_offset) {
                                kernel_vm_end = kernel_map->max_offset;
                                break;
@@ -1869,17 +1879,16 @@ pmap_growkernel(vm_offset_t addr)
                        continue;
                }
 
-               /*
-                * This index is bogus, but out of the way
-                */
-               nkpg = vm_page_alloc(NULL, nkpt,
-                   VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED);
-               if (!nkpg)
+               nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDRSHIFT,
+                   VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
+                   VM_ALLOC_ZERO);
+               if (nkpg == NULL)
                        panic("pmap_growkernel: no memory to grow kernel");
 
                nkpt++;
 
-               pmap_zero_page(nkpg);
+               if ((nkpg->flags & PG_ZERO) == 0)
+                       pmap_zero_page(nkpg);
                ptppaddr = VM_PAGE_TO_PHYS(nkpg);
                newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
                vm_page_lock_queues();
@@ -1891,7 +1900,7 @@ pmap_growkernel(vm_offset_t addr)
                mtx_unlock_spin(&allpmaps_lock);
                vm_page_unlock_queues();
 
-               kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & 
~(PAGE_SIZE * NPTEPG - 1);
+               kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
                if (kernel_vm_end - 1 >= kernel_map->max_offset) {
                        kernel_vm_end = kernel_map->max_offset;
                        break;
@@ -1911,7 +1920,7 @@ static __inline struct pv_chunk *
 pv_to_chunk(pv_entry_t pv)
 {
 
-       return (struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK);
+       return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
 }
 
 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
@@ -2033,15 +2042,15 @@ free_pv_entry(pmap_t pmap, pv_entry_t pv)
        pc->pc_map[field] |= 1ul << bit;
        /* move to head of list */
        TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
-       TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
        for (idx = 0; idx < _NPCM; idx++)
-               if (pc->pc_map[idx] != pc_freemask[idx])
+               if (pc->pc_map[idx] != pc_freemask[idx]) {
+                       TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
                        return;
+               }
        PV_STAT(pv_entry_spare -= _NPCPV);
        PV_STAT(pc_chunk_count--);
        PV_STAT(pc_chunk_frees++);
        /* entire chunk is free, return it */
-       TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
        m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
        pmap_qremove((vm_offset_t)pc, 1);
        vm_page_unwire(m, 0);
@@ -2270,10 +2279,10 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offse
        pt_entry_t *pte;
        vm_page_t free = NULL;
        int anyvalid;
-       
+
        CTR3(KTR_PMAP, "pmap_remove: pmap=%p sva=0x%x eva=0x%x",
            pmap, sva, eva);
-       
+
        /*
         * Perform an unsynchronized read.  This is, however, safe.
         */
@@ -2298,7 +2307,7 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offse
        }
 
        for (; sva < eva; sva = pdnxt) {
-               unsigned pdirindex;
+               u_int pdirindex;
 
                /*
                 * Calculate index for next page table.
@@ -2393,7 +2402,6 @@ pmap_remove_all(vm_page_t m)
                PMAP_LOCK(pmap);
                pmap->pm_stats.resident_count--;
                pte = pmap_pte_quick(pmap, pv->pv_va);
-
                tpte = *pte;
                PT_SET_VA_MA(pte, 0, TRUE);
                if (tpte & PG_W)
@@ -2457,7 +2465,7 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offs
        PMAP_LOCK(pmap);
        for (; sva < eva; sva = pdnxt) {
                pt_entry_t obits, pbits;
-               unsigned pdirindex;
+               u_int pdirindex;
 
                pdnxt = (sva + NBPDR) & ~PDRMASK;
 
@@ -2569,7 +2577,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t
        KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS,
            ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)",
            va));
-       KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0,
+       KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0 ||
+           VM_OBJECT_LOCKED(m->object),
            ("pmap_enter: page %p is not busy", m));
 
        mpte = NULL;
@@ -2772,10 +2781,9 @@ pmap_enter_object(pmap_t pmap, vm_offset_t start,
        multicall_entry_t mcl[16];
        multicall_entry_t *mclp = mcl;
        int error, count = 0;
-       
+
        VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
        psize = atop(end - start);
-           
        mpte = NULL;
        m = m_start;
        vm_page_lock_queues();
@@ -2814,7 +2822,7 @@ pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_p
        multicall_entry_t mcl, *mclp;
        int count = 0;
        mclp = &mcl;
-       
+
        CTR4(KTR_PMAP, "pmap_enter_quick: pmap=%p va=0x%x m=%p prot=0x%x",
            pmap, va, m, prot);
        
@@ -2865,7 +2873,7 @@ pmap_enter_quick_locked(multicall_entry_t **mclpp,
        vm_paddr_t pa;
        vm_page_t free;
        multicall_entry_t *mcl = *mclpp;
-       
+
        KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
            (m->oflags & VPO_UNMANAGED) != 0,
            ("pmap_enter_quick_locked: managed mapping within the clean 
submap"));
@@ -2877,7 +2885,7 @@ pmap_enter_quick_locked(multicall_entry_t **mclpp,
         * resident, we are creating it here.
         */
        if (va < VM_MAXUSER_ADDRESS) {
-               unsigned ptepindex;
+               u_int ptepindex;
                pd_entry_t ptema;
 
                /*
@@ -2981,7 +2989,7 @@ pmap_enter_quick_locked(multicall_entry_t **mclpp,
        *mclpp = mcl + 1;
        *count = *count + 1;
 #endif 
-       return mpte;
+       return (mpte);
 }
 
 /*
@@ -3006,9 +3014,8 @@ pmap_kenter_temporary(vm_paddr_t pa, int i)
  * are taken, but the code works.
  */
 void
-pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
-                   vm_object_t object, vm_pindex_t pindex,
-                   vm_size_t size)
+pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
+    vm_pindex_t pindex, vm_size_t size)
 {
        pd_entry_t *pde;
        vm_paddr_t pa, ptepa;
@@ -3026,6 +3033,7 @@ void
                KASSERT(p->valid == VM_PAGE_BITS_ALL,
                    ("pmap_object_init_pt: invalid page %p", p));
                pat_mode = p->md.pat_mode;
+
                /*
                 * Abort the mapping if the first page is not physically
                 * aligned to a 2/4MB page boundary.
@@ -3033,6 +3041,7 @@ void
                ptepa = VM_PAGE_TO_PHYS(p);
                if (ptepa & (NBPDR - 1))
                        return;
+
                /*
                 * Skip the first page.  Abort the mapping if the rest of
                 * the pages are not physically contiguous or have differing
@@ -3048,7 +3057,12 @@ void
                                return;
                        p = TAILQ_NEXT(p, listq);
                }
-               /* Map using 2/4MB pages. */
+
+               /*
+                * Map using 2/4MB pages.  Since "ptepa" is 2/4M aligned and
+                * "size" is a multiple of 2/4M, adding the PAT setting to
+                * "pa" will not affect the termination of this loop.
+                */
                PMAP_LOCK(pmap);
                for (pa = ptepa | pmap_cache_bits(pat_mode, 1); pa < ptepa +
                    size; pa += NBPDR) {
@@ -3112,7 +3126,7 @@ pmap_change_wiring(pmap_t pmap, vm_offset_t va, bo
 
 void
 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t 
len,
-         vm_offset_t src_addr)
+    vm_offset_t src_addr)
 {
        vm_page_t   free;
        vm_offset_t addr;
@@ -3149,7 +3163,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_off
                pt_entry_t *src_pte, *dst_pte;
                vm_page_t dstmpte, srcmpte;
                pd_entry_t srcptepaddr;
-               unsigned ptepindex;
+               u_int ptepindex;
 
                KASSERT(addr < UPT_MIN_ADDRESS,
                    ("pmap_copy: invalid to pmap_copy page tables"));
@@ -3188,7 +3202,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_off
                                dstmpte = pmap_allocpte(dst_pmap, addr,
                                    M_NOWAIT);
                                if (dstmpte == NULL)
-                                       break;
+                                       goto out;
                                dst_pte = pmap_pte_quick(dst_pmap, addr);
                                if (*dst_pte == 0 &&
                                    pmap_try_insert_pv_entry(dst_pmap, addr,
@@ -3212,6 +3226,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_off
                                                    addr);
                                                pmap_free_zero_pages(free);
                                        }
+                                       goto out;
                                }
                                if (dstmpte->wire_count >= srcmpte->wire_count)
                                        break;
@@ -3220,6 +3235,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_off
                        src_pte++;
                }
        }
+out:
        PT_UPDATES_FLUSH();
        sched_unpin();
        vm_page_unlock_queues();
@@ -3282,7 +3298,7 @@ pmap_zero_page_area(vm_page_t m, int off, int size
        sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
        mtx_lock(&sysmaps->lock);
        if (*sysmaps->CMAP2)
-               panic("pmap_zero_page: CMAP2 busy");
+               panic("pmap_zero_page_area: CMAP2 busy");
        sched_pin();
        PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | 
PG_M);
 
@@ -3306,7 +3322,7 @@ pmap_zero_page_idle(vm_page_t m)
 {
 
        if (*CMAP3)
-               panic("pmap_zero_page: CMAP3 busy");
+               panic("pmap_zero_page_idle: CMAP3 busy");
        sched_pin();
        PT_SET_MA(CADDR3, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M);
        pagezero(CADDR3);
@@ -3770,7 +3786,6 @@ pmap_ts_referenced(vm_page_t m)
        PT_UPDATES_FLUSH();
        if (*PMAP1)
                PT_SET_MA(PADDR1, 0);
-
        sched_unpin();
        vm_page_unlock_queues();
        return (rtval);
@@ -3805,7 +3820,7 @@ pmap_clear_modify(vm_page_t m)
                pmap = PV_PMAP(pv);
                PMAP_LOCK(pmap);
                pte = pmap_pte_quick(pmap, pv->pv_va);
-               if ((*pte & PG_M) != 0) {
+               if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
                        /*
                         * Regardless of whether a pte is 32 or 64 bits
                         * in size, PG_M is among the least significant
@@ -3927,8 +3942,6 @@ pmap_unmapdev(vm_offset_t va, vm_size_t size)
 void
 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
 {
-       struct sysmaps *sysmaps;
-       vm_offset_t sva, eva;
 
        m->md.pat_mode = ma;
        if ((m->flags & PG_FICTITIOUS) != 0)
@@ -3951,11 +3964,21 @@ pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma
         * invalidation. In the worst case, whole cache is flushed by
         * pmap_invalidate_cache_range().
         */
-       if ((cpu_feature & (CPUID_SS|CPUID_CLFSH)) == CPUID_CLFSH) {
+       if ((cpu_feature & CPUID_SS) == 0)
+               pmap_flush_page(m);
+}
+
+static void
+pmap_flush_page(vm_page_t m)
+{
+       struct sysmaps *sysmaps;
+       vm_offset_t sva, eva;
+
+       if ((cpu_feature & CPUID_CLFSH) != 0) {
                sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
                mtx_lock(&sysmaps->lock);
                if (*sysmaps->CMAP2)
-                       panic("pmap_page_set_memattr: CMAP2 busy");
+                       panic("pmap_flush_page: CMAP2 busy");
                sched_pin();
                PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW |
                    VM_PAGE_TO_MACH(m) | PG_A | PG_M |
@@ -3963,21 +3986,35 @@ pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma
                invlcaddr(sysmaps->CADDR2);
                sva = (vm_offset_t)sysmaps->CADDR2;
                eva = sva + PAGE_SIZE;
-       } else
-               sva = eva = 0; /* gcc */
-       pmap_invalidate_cache_range(sva, eva);
-       if (sva != 0) {
+
+               /*
+                * Use mfence despite the ordering implied by
+                * mtx_{un,}lock() because clflush is not guaranteed
+                * to be ordered by any other instruction.
+                */
+               mfence();
+               for (; sva < eva; sva += cpu_clflush_line_size)
+                       clflush(sva);
+               mfence();
                PT_SET_MA(sysmaps->CADDR2, 0);
                sched_unpin();
                mtx_unlock(&sysmaps->lock);
-       }
+       } else
+               pmap_invalidate_cache();
 }
 
+/*
+ * Changes the specified virtual address range's memory type to that given by
+ * the parameter "mode".  The specified virtual address range must be
+ * completely contained within either the kernel map.
+ *
+ * Returns zero if the change completed successfully, and either EINVAL or
+ * ENOMEM if the change failed.  Specifically, EINVAL is returned if some part
+ * of the virtual address range was not mapped, and ENOMEM is returned if
+ * there was insufficient memory available to complete the change.
+ */
 int
-pmap_change_attr(va, size, mode)
-       vm_offset_t va;
-       vm_size_t size;
-       int mode;
+pmap_change_attr(vm_offset_t va, vm_size_t size, int mode)
 {
        vm_offset_t base, offset, tmpva;
        pt_entry_t *pte;
@@ -4031,8 +4068,8 @@ int
        }
 
        /*
-        * Flush CPU caches to make sure any data isn't cached that shouldn't
-        * be, etc.
+        * Flush CPU caches to make sure any data isn't cached that
+        * shouldn't be, etc.
         */
        if (changed) {
                pmap_invalidate_range(kernel_pmap, base, tmpva);
@@ -4050,7 +4087,7 @@ pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_pad
        pt_entry_t *ptep, pte;
        vm_paddr_t pa;
        int val;
-       
+
        PMAP_LOCK(pmap);
 retry:
        ptep = pmap_pte(pmap, addr);
@@ -4233,7 +4270,7 @@ pmap_pid_dump(int pid)
                                                                printf("\n");
                                                        }
                                                        
sx_sunlock(&allproc_lock);
-                                                       return npte;
+                                                       return (npte);
                                                }
                                                pte = pmap_pte(pmap, va);
                                                if (pte && pmap_pte_v(pte)) {
@@ -4258,7 +4295,7 @@ pmap_pid_dump(int pid)
                }
        }
        sx_sunlock(&allproc_lock);
-       return npte;
+       return (npte);
 }
 #endif
 
_______________________________________________
freebsd-xen@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-xen
To unsubscribe, send any mail to "freebsd-xen-unsubscr...@freebsd.org"

Reply via email to