from:"Alan Cox"

git: 5ee5c40402c9 - main - arm64 pmap: Defer bti lookup

2024-06-08 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=5ee5c40402c92a498ed8d6eeb6cf0b5c1680817b

commit 5ee5c40402c92a498ed8d6eeb6cf0b5c1680817b
Author: Alan Cox 
AuthorDate: 2024-06-07 05:23:59 +
Commit: Alan Cox 
CommitDate: 2024-06-08 07:26:55 +

arm64 pmap: Defer bti lookup

Defer the bti lookup until after page table page allocation is complete.
We sometimes release the pmap lock and sleep during page table page
allocation.  Consequently, the result of a bti lookup from before
page table page allocation could be stale when we finally create the
mapping based on it.

Modify pmap_bti_same() to update the prototype PTE at the same time as
checking the address range.  This eliminates the need for calling
pmap_pte_bti() in addition to pmap_bti_same().  pmap_bti_same() was
already doing most of the work of pmap_pte_bti().

Reviewed by:markj
Differential Revision:  https://reviews.freebsd.org/D45502
---
 sys/arm64/arm64/pmap.c | 73 ++
 1 file changed, 44 insertions(+), 29 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 92c1c824ba4e..7b30b2a6ae37 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -508,7 +508,8 @@ static void pmap_update_entry(pmap_t pmap, pd_entry_t *pte, 
pd_entry_t newpte,
 static __inline vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va);
 
 static uma_zone_t pmap_bti_ranges_zone;
-static bool pmap_bti_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
+static bool pmap_bti_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
+pt_entry_t *pte);
 static pt_entry_t pmap_pte_bti(pmap_t pmap, vm_offset_t va);
 static void pmap_bti_on_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
 static void *bti_dup_range(void *ctx, void *data);
@@ -4955,21 +4956,22 @@ set_l3:
 #endif /* VM_NRESERVLEVEL > 0 */
 
 static int
-pmap_enter_largepage(pmap_t pmap, vm_offset_t va, pt_entry_t newpte, int flags,
+pmap_enter_largepage(pmap_t pmap, vm_offset_t va, pt_entry_t pte, int flags,
 int psind)
 {
-   pd_entry_t *l0p, *l1p, *l2p, origpte;
+   pd_entry_t *l0p, *l1p, *l2p, newpte, origpte;
vm_page_t mp;
 
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT(psind > 0 && psind < MAXPAGESIZES,
("psind %d unexpected", psind));
-   KASSERT((PTE_TO_PHYS(newpte) & (pagesizes[psind] - 1)) == 0,
-   ("unaligned phys address %#lx newpte %#lx psind %d",
-   PTE_TO_PHYS(newpte), newpte, psind));
+   KASSERT((PTE_TO_PHYS(pte) & (pagesizes[psind] - 1)) == 0,
+   ("unaligned phys address %#lx pte %#lx psind %d",
+   PTE_TO_PHYS(pte), pte, psind));
 
 restart:
-   if (!pmap_bti_same(pmap, va, va + pagesizes[psind]))
+   newpte = pte;
+   if (!pmap_bti_same(pmap, va, va + pagesizes[psind], ))
return (KERN_PROTECTION_FAILURE);
if (psind == 2) {
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
@@ -5123,9 +5125,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, 
vm_prot_t prot,
 
lock = NULL;
PMAP_LOCK(pmap);
-   /* Wait until we lock the pmap to protect the bti rangeset */
-   new_l3 |= pmap_pte_bti(pmap, va);
-
if ((flags & PMAP_ENTER_LARGEPAGE) != 0) {
KASSERT((m->oflags & VPO_UNMANAGED) != 0,
("managed largepage va %#lx flags %#x", va, flags));
@@ -5197,6 +5196,7 @@ havel3:
orig_l3 = pmap_load(l3);
opa = PTE_TO_PHYS(orig_l3);
pv = NULL;
+   new_l3 |= pmap_pte_bti(pmap, va);
 
/*
 * Is the specified virtual address already mapped?
@@ -5405,7 +5405,6 @@ pmap_enter_l2_rx(pmap_t pmap, vm_offset_t va, vm_page_t 
m, vm_prot_t prot,
new_l2 = (pd_entry_t)(VM_PAGE_TO_PTE(m) | ATTR_DEFAULT |
ATTR_S1_IDX(m->md.pv_memattr) | ATTR_S1_AP(ATTR_S1_AP_RO) |
L2_BLOCK);
-   new_l2 |= pmap_pte_bti(pmap, va);
if ((m->oflags & VPO_UNMANAGED) == 0) {
new_l2 |= ATTR_SW_MANAGED;
new_l2 &= ~ATTR_AF;
@@ -5478,7 +5477,7 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t 
new_l2, u_int flags,
 * and let vm_fault() cope.  Check after l2 allocation, since
 * it could sleep.
 */
-   if (!pmap_bti_same(pmap, va, va + L2_SIZE)) {
+   if (!pmap_bti_same(pmap, va, va + L2_SIZE, _l2)) {
KASSERT(l2pg != NULL, ("pmap_enter_l2: missing L2 PTP"));
pmap_abort_ptp(pmap, va, l2pg);
return (KERN_PROTECTION_FAILURE);
@@ -5633,7 +5632,6 @@ pmap_enter_l3c_rx(pmap_t pmap, vm_offset_t va, vm_page_t 
m, vm_page_t *ml3p,
l3e = VM_PAGE_TO_PTE(m) | ATTR_DEFAULT |
ATTR_S1_IDX(m->md.pv_memattr) | ATTR_S1_AP(ATTR_S1_AP_RO) |
ATTR_CO

git: 5ee5c40402c9 - main - arm64 pmap: Defer bti lookup

2024-06-08 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=5ee5c40402c92a498ed8d6eeb6cf0b5c1680817b

commit 5ee5c40402c92a498ed8d6eeb6cf0b5c1680817b
Author: Alan Cox 
AuthorDate: 2024-06-07 05:23:59 +
Commit: Alan Cox 
CommitDate: 2024-06-08 07:26:55 +

arm64 pmap: Defer bti lookup

Defer the bti lookup until after page table page allocation is complete.
We sometimes release the pmap lock and sleep during page table page
allocation.  Consequently, the result of a bti lookup from before
page table page allocation could be stale when we finally create the
mapping based on it.

Modify pmap_bti_same() to update the prototype PTE at the same time as
checking the address range.  This eliminates the need for calling
pmap_pte_bti() in addition to pmap_bti_same().  pmap_bti_same() was
already doing most of the work of pmap_pte_bti().

Reviewed by:markj
Differential Revision:  https://reviews.freebsd.org/D45502
---
 sys/arm64/arm64/pmap.c | 73 ++
 1 file changed, 44 insertions(+), 29 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 92c1c824ba4e..7b30b2a6ae37 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -508,7 +508,8 @@ static void pmap_update_entry(pmap_t pmap, pd_entry_t *pte, 
pd_entry_t newpte,
 static __inline vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va);
 
 static uma_zone_t pmap_bti_ranges_zone;
-static bool pmap_bti_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
+static bool pmap_bti_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
+pt_entry_t *pte);
 static pt_entry_t pmap_pte_bti(pmap_t pmap, vm_offset_t va);
 static void pmap_bti_on_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
 static void *bti_dup_range(void *ctx, void *data);
@@ -4955,21 +4956,22 @@ set_l3:
 #endif /* VM_NRESERVLEVEL > 0 */
 
 static int
-pmap_enter_largepage(pmap_t pmap, vm_offset_t va, pt_entry_t newpte, int flags,
+pmap_enter_largepage(pmap_t pmap, vm_offset_t va, pt_entry_t pte, int flags,
 int psind)
 {
-   pd_entry_t *l0p, *l1p, *l2p, origpte;
+   pd_entry_t *l0p, *l1p, *l2p, newpte, origpte;
vm_page_t mp;
 
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT(psind > 0 && psind < MAXPAGESIZES,
("psind %d unexpected", psind));
-   KASSERT((PTE_TO_PHYS(newpte) & (pagesizes[psind] - 1)) == 0,
-   ("unaligned phys address %#lx newpte %#lx psind %d",
-   PTE_TO_PHYS(newpte), newpte, psind));
+   KASSERT((PTE_TO_PHYS(pte) & (pagesizes[psind] - 1)) == 0,
+   ("unaligned phys address %#lx pte %#lx psind %d",
+   PTE_TO_PHYS(pte), pte, psind));
 
 restart:
-   if (!pmap_bti_same(pmap, va, va + pagesizes[psind]))
+   newpte = pte;
+   if (!pmap_bti_same(pmap, va, va + pagesizes[psind], ))
return (KERN_PROTECTION_FAILURE);
if (psind == 2) {
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
@@ -5123,9 +5125,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, 
vm_prot_t prot,
 
lock = NULL;
PMAP_LOCK(pmap);
-   /* Wait until we lock the pmap to protect the bti rangeset */
-   new_l3 |= pmap_pte_bti(pmap, va);
-
if ((flags & PMAP_ENTER_LARGEPAGE) != 0) {
KASSERT((m->oflags & VPO_UNMANAGED) != 0,
("managed largepage va %#lx flags %#x", va, flags));
@@ -5197,6 +5196,7 @@ havel3:
orig_l3 = pmap_load(l3);
opa = PTE_TO_PHYS(orig_l3);
pv = NULL;
+   new_l3 |= pmap_pte_bti(pmap, va);
 
/*
 * Is the specified virtual address already mapped?
@@ -5405,7 +5405,6 @@ pmap_enter_l2_rx(pmap_t pmap, vm_offset_t va, vm_page_t 
m, vm_prot_t prot,
new_l2 = (pd_entry_t)(VM_PAGE_TO_PTE(m) | ATTR_DEFAULT |
ATTR_S1_IDX(m->md.pv_memattr) | ATTR_S1_AP(ATTR_S1_AP_RO) |
L2_BLOCK);
-   new_l2 |= pmap_pte_bti(pmap, va);
if ((m->oflags & VPO_UNMANAGED) == 0) {
new_l2 |= ATTR_SW_MANAGED;
new_l2 &= ~ATTR_AF;
@@ -5478,7 +5477,7 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t 
new_l2, u_int flags,
 * and let vm_fault() cope.  Check after l2 allocation, since
 * it could sleep.
 */
-   if (!pmap_bti_same(pmap, va, va + L2_SIZE)) {
+   if (!pmap_bti_same(pmap, va, va + L2_SIZE, _l2)) {
KASSERT(l2pg != NULL, ("pmap_enter_l2: missing L2 PTP"));
pmap_abort_ptp(pmap, va, l2pg);
return (KERN_PROTECTION_FAILURE);
@@ -5633,7 +5632,6 @@ pmap_enter_l3c_rx(pmap_t pmap, vm_offset_t va, vm_page_t 
m, vm_page_t *ml3p,
l3e = VM_PAGE_TO_PTE(m) | ATTR_DEFAULT |
ATTR_S1_IDX(m->md.pv_memattr) | ATTR_S1_AP(ATTR_S1_AP_RO) |
ATTR_CO

git: 60847070f908 - main - vm: Eliminate a redundant call to vm_reserv_break_all()

2024-06-05 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=60847070f908c7c5ebb2ea4c851f8b98680fd01a

commit 60847070f908c7c5ebb2ea4c851f8b98680fd01a
Author: Alan Cox 
AuthorDate: 2024-06-05 06:40:20 +
Commit: Alan Cox 
CommitDate: 2024-06-05 17:39:47 +

vm: Eliminate a redundant call to vm_reserv_break_all()

When vm_object_collapse() was changed in commit 98087a0 to call
vm_object_terminate(), rather than destroying the object directly, its
call to vm_reserv_break_all() should have been removed, as
vm_object_terminate() calls vm_reserv_break_all().

Reviewed by:kib, markj
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D45495
---
 sys/vm/vm_object.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index 905df5454355..0af4402938ba 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -1953,14 +1953,6 @@ vm_object_collapse(vm_object_t object)
 */
vm_object_collapse_scan(object);
 
-#if VM_NRESERVLEVEL > 0
-   /*
-* Break any reservations from backing_object.
-*/
-   if (__predict_false(!LIST_EMPTY(_object->rvq)))
-   vm_reserv_break_all(backing_object);
-#endif
-
/*
 * Move the pager from backing_object to object.
 *

git: 60847070f908 - main - vm: Eliminate a redundant call to vm_reserv_break_all()

2024-06-05 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=60847070f908c7c5ebb2ea4c851f8b98680fd01a

commit 60847070f908c7c5ebb2ea4c851f8b98680fd01a
Author: Alan Cox 
AuthorDate: 2024-06-05 06:40:20 +
Commit: Alan Cox 
CommitDate: 2024-06-05 17:39:47 +

vm: Eliminate a redundant call to vm_reserv_break_all()

When vm_object_collapse() was changed in commit 98087a0 to call
vm_object_terminate(), rather than destroying the object directly, its
call to vm_reserv_break_all() should have been removed, as
vm_object_terminate() calls vm_reserv_break_all().

Reviewed by:kib, markj
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D45495
---
 sys/vm/vm_object.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index 905df5454355..0af4402938ba 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -1953,14 +1953,6 @@ vm_object_collapse(vm_object_t object)
 */
vm_object_collapse_scan(object);
 
-#if VM_NRESERVLEVEL > 0
-   /*
-* Break any reservations from backing_object.
-*/
-   if (__predict_false(!LIST_EMPTY(_object->rvq)))
-   vm_reserv_break_all(backing_object);
-#endif
-
/*
 * Move the pager from backing_object to object.
 *

git: 41dfea24eec2 - main - arm64 pmap: Enable L3C promotions by pmap_enter_quick()

2024-06-04 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=41dfea24eec242e1e083e2a879483a7c05c7e2ff

commit 41dfea24eec242e1e083e2a879483a7c05c7e2ff
Author: Alan Cox 
AuthorDate: 2024-06-01 18:17:52 +
Commit: Alan Cox 
CommitDate: 2024-06-05 04:25:51 +

arm64 pmap: Enable L3C promotions by pmap_enter_quick()

More precisely, implement L3C (64KB/2MB, depending on base page size)
promotion in pmap_enter_quick()'s helper function,
pmap_enter_quick_locked().  At the same time, use the recently
introduced flag VM_PROT_NO_PROMOTE from pmap_enter_object() to
pmap_enter_quick_locked() to avoid L3C promotion attempts that will
fail.

Reviewed by:kib
Differential Revision:  https://reviews.freebsd.org/D45445
---
 sys/arm64/arm64/pmap.c | 29 +++--
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 03d0a1cc6676..8ac7b8f6a135 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -5883,9 +5883,19 @@ pmap_enter_object(pmap_t pmap, vm_offset_t start, 
vm_offset_t end,
((rv = pmap_enter_l3c_rx(pmap, va, m, , prot,
)) == KERN_SUCCESS || rv == KERN_NO_SPACE))
m = [L3C_ENTRIES - 1];
-   else
-   mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte,
-   );
+   else {
+   /*
+* In general, if a superpage mapping were possible,
+* it would have been created above.  That said, if
+* start and end are not superpage aligned, then
+* promotion might be possible at the ends of [start,
+* end).  However, in practice, those promotion
+* attempts are so unlikely to succeed that they are
+* not worth trying.
+*/
+   mpte = pmap_enter_quick_locked(pmap, va, m, prot |
+   VM_PROT_NO_PROMOTE, mpte, );
+   }
m = TAILQ_NEXT(m, listq);
}
if (lock != NULL)
@@ -6048,12 +6058,19 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 
vm_page_t m,
 
 #if VM_NRESERVLEVEL > 0
/*
-* If both the PTP and the reservation are fully populated, then
-* attempt promotion.
+* First, attempt L3C promotion, if the virtual and physical addresses
+* are aligned with each other and an underlying reservation has the
+* neighboring L3 pages allocated.  The first condition is simply an
+* optimization that recognizes some eventual promotion failures early
+* at a lower run-time cost.  Then, attempt L2 promotion, if both the
+* PTP and the reservation are fully populated.
 */
if ((prot & VM_PROT_NO_PROMOTE) == 0 &&
-   (mpte == NULL || mpte->ref_count == NL3PG) &&
+   (va & L3C_OFFSET) == (pa & L3C_OFFSET) &&
(m->flags & PG_FICTITIOUS) == 0 &&
+   vm_reserv_is_populated(m, L3C_ENTRIES) &&
+   pmap_promote_l3c(pmap, l3, va) &&
+   (mpte == NULL || mpte->ref_count == NL3PG) &&
vm_reserv_level_iffullpop(m) == 0) {
if (l2 == NULL)
l2 = pmap_pde(pmap, va, );

git: 41dfea24eec2 - main - arm64 pmap: Enable L3C promotions by pmap_enter_quick()

2024-06-04 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=41dfea24eec242e1e083e2a879483a7c05c7e2ff

commit 41dfea24eec242e1e083e2a879483a7c05c7e2ff
Author: Alan Cox 
AuthorDate: 2024-06-01 18:17:52 +
Commit: Alan Cox 
CommitDate: 2024-06-05 04:25:51 +

arm64 pmap: Enable L3C promotions by pmap_enter_quick()

More precisely, implement L3C (64KB/2MB, depending on base page size)
promotion in pmap_enter_quick()'s helper function,
pmap_enter_quick_locked().  At the same time, use the recently
introduced flag VM_PROT_NO_PROMOTE from pmap_enter_object() to
pmap_enter_quick_locked() to avoid L3C promotion attempts that will
fail.

Reviewed by:kib
Differential Revision:  https://reviews.freebsd.org/D45445
---
 sys/arm64/arm64/pmap.c | 29 +++--
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 03d0a1cc6676..8ac7b8f6a135 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -5883,9 +5883,19 @@ pmap_enter_object(pmap_t pmap, vm_offset_t start, 
vm_offset_t end,
((rv = pmap_enter_l3c_rx(pmap, va, m, , prot,
)) == KERN_SUCCESS || rv == KERN_NO_SPACE))
m = [L3C_ENTRIES - 1];
-   else
-   mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte,
-   );
+   else {
+   /*
+* In general, if a superpage mapping were possible,
+* it would have been created above.  That said, if
+* start and end are not superpage aligned, then
+* promotion might be possible at the ends of [start,
+* end).  However, in practice, those promotion
+* attempts are so unlikely to succeed that they are
+* not worth trying.
+*/
+   mpte = pmap_enter_quick_locked(pmap, va, m, prot |
+   VM_PROT_NO_PROMOTE, mpte, );
+   }
m = TAILQ_NEXT(m, listq);
}
if (lock != NULL)
@@ -6048,12 +6058,19 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 
vm_page_t m,
 
 #if VM_NRESERVLEVEL > 0
/*
-* If both the PTP and the reservation are fully populated, then
-* attempt promotion.
+* First, attempt L3C promotion, if the virtual and physical addresses
+* are aligned with each other and an underlying reservation has the
+* neighboring L3 pages allocated.  The first condition is simply an
+* optimization that recognizes some eventual promotion failures early
+* at a lower run-time cost.  Then, attempt L2 promotion, if both the
+* PTP and the reservation are fully populated.
 */
if ((prot & VM_PROT_NO_PROMOTE) == 0 &&
-   (mpte == NULL || mpte->ref_count == NL3PG) &&
+   (va & L3C_OFFSET) == (pa & L3C_OFFSET) &&
(m->flags & PG_FICTITIOUS) == 0 &&
+   vm_reserv_is_populated(m, L3C_ENTRIES) &&
+   pmap_promote_l3c(pmap, l3, va) &&
+   (mpte == NULL || mpte->ref_count == NL3PG) &&
vm_reserv_level_iffullpop(m) == 0) {
if (l2 == NULL)
l2 = pmap_pde(pmap, va, );

git: f1d73aacdc47 - main - pmap: Skip some superpage promotion attempts that will fail

2024-06-03 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=f1d73aacdc47529310e2302094685295c032e28f

commit f1d73aacdc47529310e2302094685295c032e28f
Author: Alan Cox 
AuthorDate: 2024-06-02 08:56:47 +
Commit: Alan Cox 
CommitDate: 2024-06-04 05:38:05 +

pmap: Skip some superpage promotion attempts that will fail

Implement a simple heuristic to skip pointless promotion attempts by
pmap_enter_quick_locked() and moea64_enter().  Specifically, when
vm_fault() calls pmap_enter_quick() to map neighboring pages at the end
of a copy-on-write fault, there is no point in attempting promotion in
pmap_enter_quick_locked() and moea64_enter().  Promotion will fail
because the base pages have differing protection.

Reviewed by:kib
Differential Revision:  https://reviews.freebsd.org/D45431
MFC after:  1 week
---
 sys/amd64/amd64/pmap.c  |  3 ++-
 sys/arm64/arm64/pmap.c  |  3 ++-
 sys/i386/i386/pmap.c|  3 ++-
 sys/powerpc/aim/mmu_oea64.c |  9 +++--
 sys/riscv/riscv/pmap.c  |  3 ++-
 sys/vm/vm.h |  1 +
 sys/vm/vm_fault.c   | 11 ++-
 7 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 8105c9d92478..2f3119aede67 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -7818,7 +7818,8 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 
vm_page_t m,
 * If both the PTP and the reservation are fully populated, then
 * attempt promotion.
 */
-   if ((mpte == NULL || mpte->ref_count == NPTEPG) &&
+   if ((prot & VM_PROT_NO_PROMOTE) == 0 &&
+   (mpte == NULL || mpte->ref_count == NPTEPG) &&
(m->flags & PG_FICTITIOUS) == 0 &&
vm_reserv_level_iffullpop(m) == 0) {
if (pde == NULL)
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index aaba6ca189a1..b6bc113ba8a4 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -6052,7 +6052,8 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 
vm_page_t m,
 * If both the PTP and the reservation are fully populated, then
 * attempt promotion.
 */
-   if ((mpte == NULL || mpte->ref_count == NL3PG) &&
+   if ((prot & VM_PROT_NO_PROMOTE) == 0 &&
+   (mpte == NULL || mpte->ref_count == NL3PG) &&
(m->flags & PG_FICTITIOUS) == 0 &&
vm_reserv_level_iffullpop(m) == 0) {
if (l2 == NULL)
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index 40d8ceaf42b9..5808c31a99af 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -4250,7 +4250,8 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 
vm_page_t m,
 * If both the PTP and the reservation are fully populated, then
 * attempt promotion.
 */
-   if ((mpte == NULL || mpte->ref_count == NPTEPG) &&
+   if ((prot & VM_PROT_NO_PROMOTE) == 0 &&
+   (mpte == NULL || mpte->ref_count == NPTEPG) &&
(m->flags & PG_FICTITIOUS) == 0 &&
vm_reserv_level_iffullpop(m) == 0) {
if (pde == NULL)
diff --git a/sys/powerpc/aim/mmu_oea64.c b/sys/powerpc/aim/mmu_oea64.c
index 391f90bb04eb..273dc38214e2 100644
--- a/sys/powerpc/aim/mmu_oea64.c
+++ b/sys/powerpc/aim/mmu_oea64.c
@@ -1755,10 +1755,14 @@ out:
 * If the VA of the entered page is not aligned with its PA,
 * don't try page promotion as it is not possible.
 * This reduces the number of promotion failures dramatically.
+*
+* Ignore VM_PROT_NO_PROMOTE unless PMAP_ENTER_QUICK_LOCKED.
 */
if (moea64_ps_enabled(pmap) && pmap != kernel_pmap && pvo != NULL &&
(pvo->pvo_vaddr & PVO_MANAGED) != 0 &&
(va & HPT_SP_MASK) == (pa & HPT_SP_MASK) &&
+   ((prot & VM_PROT_NO_PROMOTE) == 0 ||
+   (flags & PMAP_ENTER_QUICK_LOCKED) == 0) &&
(m->flags & PG_FICTITIOUS) == 0 &&
vm_reserv_level_iffullpop(m) == 0)
moea64_sp_promote(pmap, va, m);
@@ -1850,8 +1854,9 @@ moea64_enter_quick(pmap_t pm, vm_offset_t va, vm_page_t m,
 vm_prot_t prot)
 {
 
-   moea64_enter(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE),
-   PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED, 0);
+   moea64_enter(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE |
+   VM_PROT_NO_PROMOTE), PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED,
+   0);
 }
 
 vm_paddr_t
diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c
index 1e507f62696e..e8504bcb0f59 100644
--- a/sys/riscv/riscv/pmap.c
+++ b/sys/riscv/riscv/pmap.c
@@ -3519,7 +3519,8 @@

git: f1d73aacdc47 - main - pmap: Skip some superpage promotion attempts that will fail

2024-06-03 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=f1d73aacdc47529310e2302094685295c032e28f

commit f1d73aacdc47529310e2302094685295c032e28f
Author: Alan Cox 
AuthorDate: 2024-06-02 08:56:47 +
Commit: Alan Cox 
CommitDate: 2024-06-04 05:38:05 +

pmap: Skip some superpage promotion attempts that will fail

Implement a simple heuristic to skip pointless promotion attempts by
pmap_enter_quick_locked() and moea64_enter().  Specifically, when
vm_fault() calls pmap_enter_quick() to map neighboring pages at the end
of a copy-on-write fault, there is no point in attempting promotion in
pmap_enter_quick_locked() and moea64_enter().  Promotion will fail
because the base pages have differing protection.

Reviewed by:kib
Differential Revision:  https://reviews.freebsd.org/D45431
MFC after:  1 week
---
 sys/amd64/amd64/pmap.c  |  3 ++-
 sys/arm64/arm64/pmap.c  |  3 ++-
 sys/i386/i386/pmap.c|  3 ++-
 sys/powerpc/aim/mmu_oea64.c |  9 +++--
 sys/riscv/riscv/pmap.c  |  3 ++-
 sys/vm/vm.h |  1 +
 sys/vm/vm_fault.c   | 11 ++-
 7 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 8105c9d92478..2f3119aede67 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -7818,7 +7818,8 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 
vm_page_t m,
 * If both the PTP and the reservation are fully populated, then
 * attempt promotion.
 */
-   if ((mpte == NULL || mpte->ref_count == NPTEPG) &&
+   if ((prot & VM_PROT_NO_PROMOTE) == 0 &&
+   (mpte == NULL || mpte->ref_count == NPTEPG) &&
(m->flags & PG_FICTITIOUS) == 0 &&
vm_reserv_level_iffullpop(m) == 0) {
if (pde == NULL)
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index aaba6ca189a1..b6bc113ba8a4 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -6052,7 +6052,8 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 
vm_page_t m,
 * If both the PTP and the reservation are fully populated, then
 * attempt promotion.
 */
-   if ((mpte == NULL || mpte->ref_count == NL3PG) &&
+   if ((prot & VM_PROT_NO_PROMOTE) == 0 &&
+   (mpte == NULL || mpte->ref_count == NL3PG) &&
(m->flags & PG_FICTITIOUS) == 0 &&
vm_reserv_level_iffullpop(m) == 0) {
if (l2 == NULL)
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index 40d8ceaf42b9..5808c31a99af 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -4250,7 +4250,8 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 
vm_page_t m,
 * If both the PTP and the reservation are fully populated, then
 * attempt promotion.
 */
-   if ((mpte == NULL || mpte->ref_count == NPTEPG) &&
+   if ((prot & VM_PROT_NO_PROMOTE) == 0 &&
+   (mpte == NULL || mpte->ref_count == NPTEPG) &&
(m->flags & PG_FICTITIOUS) == 0 &&
vm_reserv_level_iffullpop(m) == 0) {
if (pde == NULL)
diff --git a/sys/powerpc/aim/mmu_oea64.c b/sys/powerpc/aim/mmu_oea64.c
index 391f90bb04eb..273dc38214e2 100644
--- a/sys/powerpc/aim/mmu_oea64.c
+++ b/sys/powerpc/aim/mmu_oea64.c
@@ -1755,10 +1755,14 @@ out:
 * If the VA of the entered page is not aligned with its PA,
 * don't try page promotion as it is not possible.
 * This reduces the number of promotion failures dramatically.
+*
+* Ignore VM_PROT_NO_PROMOTE unless PMAP_ENTER_QUICK_LOCKED.
 */
if (moea64_ps_enabled(pmap) && pmap != kernel_pmap && pvo != NULL &&
(pvo->pvo_vaddr & PVO_MANAGED) != 0 &&
(va & HPT_SP_MASK) == (pa & HPT_SP_MASK) &&
+   ((prot & VM_PROT_NO_PROMOTE) == 0 ||
+   (flags & PMAP_ENTER_QUICK_LOCKED) == 0) &&
(m->flags & PG_FICTITIOUS) == 0 &&
vm_reserv_level_iffullpop(m) == 0)
moea64_sp_promote(pmap, va, m);
@@ -1850,8 +1854,9 @@ moea64_enter_quick(pmap_t pm, vm_offset_t va, vm_page_t m,
 vm_prot_t prot)
 {
 
-   moea64_enter(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE),
-   PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED, 0);
+   moea64_enter(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE |
+   VM_PROT_NO_PROMOTE), PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED,
+   0);
 }
 
 vm_paddr_t
diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c
index 1e507f62696e..e8504bcb0f59 100644
--- a/sys/riscv/riscv/pmap.c
+++ b/sys/riscv/riscv/pmap.c
@@ -3519,7 +3519,8 @@

git: 3dc2a8848986 - main - arm64 pmap: Convert panic()s to KASSERT()s

2024-05-31 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=3dc2a8848986df2c10ae7df4ce87a1538f549a85

commit 3dc2a8848986df2c10ae7df4ce87a1538f549a85
Author: Alan Cox 
AuthorDate: 2024-05-31 17:22:14 +
Commit: Alan Cox 
CommitDate: 2024-05-31 21:54:27 +

arm64 pmap: Convert panic()s to KASSERT()s

There is no reason for the ATTR_SW_NO_PROMOTE checks in
pmap_update_{entry,strided}() to be panic()s instead of KASSERT()s.

Requested by:   markj
Reviewed by:markj
Differential Revision:  https://reviews.freebsd.org/D45424
---
 sys/arm64/arm64/pmap.c | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index cd7837e58380..aaba6ca189a1 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -4565,9 +4565,8 @@ pmap_update_entry(pmap_t pmap, pd_entry_t *ptep, 
pd_entry_t newpte,
register_t intr;
 
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-
-   if ((newpte & ATTR_SW_NO_PROMOTE) != 0)
-   panic("%s: Updating non-promote pte", __func__);
+   KASSERT((newpte & ATTR_SW_NO_PROMOTE) == 0,
+   ("%s: Updating non-promote pte", __func__));
 
/*
 * Ensure we don't get switched out with the page table in an
@@ -4608,9 +4607,8 @@ pmap_update_strided(pmap_t pmap, pd_entry_t *ptep, 
pd_entry_t *ptep_end,
register_t intr;
 
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-
-   if ((newpte & ATTR_SW_NO_PROMOTE) != 0)
-   panic("%s: Updating non-promote pte", __func__);
+   KASSERT((newpte & ATTR_SW_NO_PROMOTE) == 0,
+   ("%s: Updating non-promote pte", __func__));
 
/*
 * Ensure we don't get switched out with the page table in an

git: 3dc2a8848986 - main - arm64 pmap: Convert panic()s to KASSERT()s

2024-05-31 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=3dc2a8848986df2c10ae7df4ce87a1538f549a85

commit 3dc2a8848986df2c10ae7df4ce87a1538f549a85
Author: Alan Cox 
AuthorDate: 2024-05-31 17:22:14 +
Commit: Alan Cox 
CommitDate: 2024-05-31 21:54:27 +

arm64 pmap: Convert panic()s to KASSERT()s

There is no reason for the ATTR_SW_NO_PROMOTE checks in
pmap_update_{entry,strided}() to be panic()s instead of KASSERT()s.

Requested by:   markj
Reviewed by:markj
Differential Revision:  https://reviews.freebsd.org/D45424
---
 sys/arm64/arm64/pmap.c | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index cd7837e58380..aaba6ca189a1 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -4565,9 +4565,8 @@ pmap_update_entry(pmap_t pmap, pd_entry_t *ptep, 
pd_entry_t newpte,
register_t intr;
 
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-
-   if ((newpte & ATTR_SW_NO_PROMOTE) != 0)
-   panic("%s: Updating non-promote pte", __func__);
+   KASSERT((newpte & ATTR_SW_NO_PROMOTE) == 0,
+   ("%s: Updating non-promote pte", __func__));
 
/*
 * Ensure we don't get switched out with the page table in an
@@ -4608,9 +4607,8 @@ pmap_update_strided(pmap_t pmap, pd_entry_t *ptep, 
pd_entry_t *ptep_end,
register_t intr;
 
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-
-   if ((newpte & ATTR_SW_NO_PROMOTE) != 0)
-   panic("%s: Updating non-promote pte", __func__);
+   KASSERT((newpte & ATTR_SW_NO_PROMOTE) == 0,
+   ("%s: Updating non-promote pte", __func__));
 
/*
 * Ensure we don't get switched out with the page table in an

git: 9fc5e3fb39ca - main - arm64: set ATTR_CONTIGUOUS on the DMAP's L2 blocks

2024-05-22 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=9fc5e3fb39ca5b2239066b750bea2ce5775bd79b

commit 9fc5e3fb39ca5b2239066b750bea2ce5775bd79b
Author: Alan Cox 
AuthorDate: 2024-05-13 06:39:28 +
Commit: Alan Cox 
CommitDate: 2024-05-23 03:09:43 +

arm64: set ATTR_CONTIGUOUS on the DMAP's L2 blocks

On systems configured with 16KB pages, this change creates 1GB page
mappings in the direct map where possible.  Previously, the largest page
size that was used to implement the direct map was 32MB.  Similarly, on
systems configured with 4KB pages, this change creates 32MB page
mappings, instead of 2MB, in the direct map where 1GB is too large.

Implement demotion on L2C (32MB/1GB) page mappings within the DMAP.

Update sysctl vm.pmap.kernel_maps to report on L2C page mappings.

Reviewed by:markj
Tested by:  gallatin, Eliot Solomon 
Differential Revision:  https://reviews.freebsd.org/D45224
---
 sys/arm64/arm64/pmap.c  | 264 ++--
 sys/arm64/include/pte.h |   5 +
 2 files changed, 237 insertions(+), 32 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 269513589d78..2ce313de36cf 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -429,7 +429,6 @@ void (*pmap_stage2_invalidate_all)(uint64_t);
 #defineTLBI_VA_SHIFT   12
 #defineTLBI_VA_MASK((1ul << 44) - 1)
 #defineTLBI_VA(addr)   (((addr) >> TLBI_VA_SHIFT) & 
TLBI_VA_MASK)
-#defineTLBI_VA_L3_INCR (L3_SIZE >> TLBI_VA_SHIFT)
 
 static int __read_frequently superpages_enabled = 1;
 SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled,
@@ -470,6 +469,7 @@ static pt_entry_t *pmap_demote_l1(pmap_t pmap, pt_entry_t 
*l1, vm_offset_t va);
 static pt_entry_t *pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2,
 vm_offset_t va, struct rwlock **lockp);
 static pt_entry_t *pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va);
+static bool pmap_demote_l2c(pmap_t pmap, pt_entry_t *l2p, vm_offset_t va);
 static bool pmap_demote_l3c(pmap_t pmap, pt_entry_t *l3p, vm_offset_t va);
 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
 vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
@@ -1108,6 +1108,7 @@ pmap_bootstrap_l2_table(struct pmap_bootstrap_state 
*state)
 static void
 pmap_bootstrap_l2_block(struct pmap_bootstrap_state *state, int i)
 {
+   pt_entry_t contig;
u_int l2_slot;
bool first;
 
@@ -1118,7 +1119,7 @@ pmap_bootstrap_l2_block(struct pmap_bootstrap_state 
*state, int i)
pmap_bootstrap_l1_table(state);
 
MPASS((state->va & L2_OFFSET) == 0);
-   for (first = true;
+   for (first = true, contig = 0;
state->va < DMAP_MAX_ADDRESS &&
(physmap[i + 1] - state->pa) >= L2_SIZE;
state->va += L2_SIZE, state->pa += L2_SIZE) {
@@ -1129,13 +1130,27 @@ pmap_bootstrap_l2_block(struct pmap_bootstrap_state 
*state, int i)
if (!first && (state->pa & L1_OFFSET) == 0)
break;
 
+   /*
+* If we have an aligned, contiguous chunk of L2C_ENTRIES
+* L2 blocks, set the contiguous bit within each PTE so that
+* the chunk can be cached using only one TLB entry.
+*/
+   if ((state->pa & L2C_OFFSET) == 0) {
+   if (state->va + L2C_SIZE < DMAP_MAX_ADDRESS &&
+   physmap[i + 1] - state->pa >= L2C_SIZE) {
+   contig = ATTR_CONTIGUOUS;
+   } else {
+   contig = 0;
+   }
+   }
+
first = false;
l2_slot = pmap_l2_index(state->va);
MPASS((state->pa & L2_OFFSET) == 0);
MPASS(state->l2[l2_slot] == 0);
pmap_store(>l2[l2_slot], PHYS_TO_PTE(state->pa) |
ATTR_DEFAULT | ATTR_S1_XN | ATTR_KERN_GP |
-   ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) | L2_BLOCK);
+   ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) | contig | L2_BLOCK);
}
MPASS(state->va == (state->pa - dmap_phys_base + DMAP_MIN_ADDRESS));
 }
@@ -1667,6 +1682,20 @@ pmap_init(void)
vm_initialized = 1;
 }
 
+static SYSCTL_NODE(_vm_pmap, OID_AUTO, l1, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
+"L1 (1GB/64GB) page mapping counters");
+
+static COUNTER_U64_DEFINE_EARLY(pmap_l1_demotions);
+SYSCTL_COUNTER_U64(_vm_pmap_l1, OID_AUTO, demotions, CTLFLAG_RD,
+_l1_demotions, "L1 (1GB/64GB) page demotions");
+
+static SYSCTL_NODE(_vm_pmap, OID_AUTO, l2c, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
+

git: 9fc5e3fb39ca - main - arm64: set ATTR_CONTIGUOUS on the DMAP's L2 blocks

2024-05-22 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=9fc5e3fb39ca5b2239066b750bea2ce5775bd79b

commit 9fc5e3fb39ca5b2239066b750bea2ce5775bd79b
Author: Alan Cox 
AuthorDate: 2024-05-13 06:39:28 +
Commit: Alan Cox 
CommitDate: 2024-05-23 03:09:43 +

arm64: set ATTR_CONTIGUOUS on the DMAP's L2 blocks

On systems configured with 16KB pages, this change creates 1GB page
mappings in the direct map where possible.  Previously, the largest page
size that was used to implement the direct map was 32MB.  Similarly, on
systems configured with 4KB pages, this change creates 32MB page
mappings, instead of 2MB, in the direct map where 1GB is too large.

Implement demotion on L2C (32MB/1GB) page mappings within the DMAP.

Update sysctl vm.pmap.kernel_maps to report on L2C page mappings.

Reviewed by:markj
Tested by:  gallatin, Eliot Solomon 
Differential Revision:  https://reviews.freebsd.org/D45224
---
 sys/arm64/arm64/pmap.c  | 264 ++--
 sys/arm64/include/pte.h |   5 +
 2 files changed, 237 insertions(+), 32 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 269513589d78..2ce313de36cf 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -429,7 +429,6 @@ void (*pmap_stage2_invalidate_all)(uint64_t);
 #defineTLBI_VA_SHIFT   12
 #defineTLBI_VA_MASK((1ul << 44) - 1)
 #defineTLBI_VA(addr)   (((addr) >> TLBI_VA_SHIFT) & 
TLBI_VA_MASK)
-#defineTLBI_VA_L3_INCR (L3_SIZE >> TLBI_VA_SHIFT)
 
 static int __read_frequently superpages_enabled = 1;
 SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled,
@@ -470,6 +469,7 @@ static pt_entry_t *pmap_demote_l1(pmap_t pmap, pt_entry_t 
*l1, vm_offset_t va);
 static pt_entry_t *pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2,
 vm_offset_t va, struct rwlock **lockp);
 static pt_entry_t *pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va);
+static bool pmap_demote_l2c(pmap_t pmap, pt_entry_t *l2p, vm_offset_t va);
 static bool pmap_demote_l3c(pmap_t pmap, pt_entry_t *l3p, vm_offset_t va);
 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
 vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
@@ -1108,6 +1108,7 @@ pmap_bootstrap_l2_table(struct pmap_bootstrap_state 
*state)
 static void
 pmap_bootstrap_l2_block(struct pmap_bootstrap_state *state, int i)
 {
+   pt_entry_t contig;
u_int l2_slot;
bool first;
 
@@ -1118,7 +1119,7 @@ pmap_bootstrap_l2_block(struct pmap_bootstrap_state 
*state, int i)
pmap_bootstrap_l1_table(state);
 
MPASS((state->va & L2_OFFSET) == 0);
-   for (first = true;
+   for (first = true, contig = 0;
state->va < DMAP_MAX_ADDRESS &&
(physmap[i + 1] - state->pa) >= L2_SIZE;
state->va += L2_SIZE, state->pa += L2_SIZE) {
@@ -1129,13 +1130,27 @@ pmap_bootstrap_l2_block(struct pmap_bootstrap_state 
*state, int i)
if (!first && (state->pa & L1_OFFSET) == 0)
break;
 
+   /*
+* If we have an aligned, contiguous chunk of L2C_ENTRIES
+* L2 blocks, set the contiguous bit within each PTE so that
+* the chunk can be cached using only one TLB entry.
+*/
+   if ((state->pa & L2C_OFFSET) == 0) {
+   if (state->va + L2C_SIZE < DMAP_MAX_ADDRESS &&
+   physmap[i + 1] - state->pa >= L2C_SIZE) {
+   contig = ATTR_CONTIGUOUS;
+   } else {
+   contig = 0;
+   }
+   }
+
first = false;
l2_slot = pmap_l2_index(state->va);
MPASS((state->pa & L2_OFFSET) == 0);
MPASS(state->l2[l2_slot] == 0);
pmap_store(>l2[l2_slot], PHYS_TO_PTE(state->pa) |
ATTR_DEFAULT | ATTR_S1_XN | ATTR_KERN_GP |
-   ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) | L2_BLOCK);
+   ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) | contig | L2_BLOCK);
}
MPASS(state->va == (state->pa - dmap_phys_base + DMAP_MIN_ADDRESS));
 }
@@ -1667,6 +1682,20 @@ pmap_init(void)
vm_initialized = 1;
 }
 
+static SYSCTL_NODE(_vm_pmap, OID_AUTO, l1, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
+"L1 (1GB/64GB) page mapping counters");
+
+static COUNTER_U64_DEFINE_EARLY(pmap_l1_demotions);
+SYSCTL_COUNTER_U64(_vm_pmap_l1, OID_AUTO, demotions, CTLFLAG_RD,
+_l1_demotions, "L1 (1GB/64GB) page demotions");
+
+static SYSCTL_NODE(_vm_pmap, OID_AUTO, l2c, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
+

git: 4f77144279f2 - main - arm64 pmap: eliminate a redundant variable

2024-05-19 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=4f77144279f210ce65d77c13470c6363c3ce3c57

commit 4f77144279f210ce65d77c13470c6363c3ce3c57
Author: Alan Cox 
AuthorDate: 2024-05-19 19:22:53 +
Commit: Alan Cox 
CommitDate: 2024-05-19 19:33:19 +

arm64 pmap: eliminate a redundant variable

Moreover, if we attempt an L2 promotion on the kernel pmap from
pmap_enter_quick_locked(), this change eliminates the recomputation of
the L2 entry's address.

MFC after:  1 week
---
 sys/arm64/arm64/pmap.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index a6056a5edfc2..269513589d78 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -5848,7 +5848,6 @@ static vm_page_t
 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
 vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp)
 {
-   pd_entry_t *pde;
pt_entry_t *l1, *l2, *l3, l3_val;
vm_paddr_t pa;
int lvl;
@@ -5913,13 +5912,13 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 
vm_page_t m,
l3 = [pmap_l3_index(va)];
} else {
mpte = NULL;
-   pde = pmap_pde(kernel_pmap, va, );
-   KASSERT(pde != NULL,
+   l2 = pmap_pde(kernel_pmap, va, );
+   KASSERT(l2 != NULL,
("pmap_enter_quick_locked: Invalid page entry, va: 0x%lx",
 va));
KASSERT(lvl == 2,
("pmap_enter_quick_locked: Invalid level %d", lvl));
-   l3 = pmap_l2_to_l3(pde, va);
+   l3 = pmap_l2_to_l3(l2, va);
}
 
/*

git: 4f77144279f2 - main - arm64 pmap: eliminate a redundant variable

2024-05-19 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=4f77144279f210ce65d77c13470c6363c3ce3c57

commit 4f77144279f210ce65d77c13470c6363c3ce3c57
Author: Alan Cox 
AuthorDate: 2024-05-19 19:22:53 +
Commit: Alan Cox 
CommitDate: 2024-05-19 19:33:19 +

arm64 pmap: eliminate a redundant variable

Moreover, if we attempt an L2 promotion on the kernel pmap from
pmap_enter_quick_locked(), this change eliminates the recomputation of
the L2 entry's address.

MFC after:  1 week
---
 sys/arm64/arm64/pmap.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index a6056a5edfc2..269513589d78 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -5848,7 +5848,6 @@ static vm_page_t
 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
 vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp)
 {
-   pd_entry_t *pde;
pt_entry_t *l1, *l2, *l3, l3_val;
vm_paddr_t pa;
int lvl;
@@ -5913,13 +5912,13 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 
vm_page_t m,
l3 = [pmap_l3_index(va)];
} else {
mpte = NULL;
-   pde = pmap_pde(kernel_pmap, va, );
-   KASSERT(pde != NULL,
+   l2 = pmap_pde(kernel_pmap, va, );
+   KASSERT(l2 != NULL,
("pmap_enter_quick_locked: Invalid page entry, va: 0x%lx",
 va));
KASSERT(lvl == 2,
("pmap_enter_quick_locked: Invalid level %d", lvl));
-   l3 = pmap_l2_to_l3(pde, va);
+   l3 = pmap_l2_to_l3(l2, va);
}
 
/*

git: 94b09d388b81 - main - arm64: map kernel using large pages when page size is 16K

2024-05-12 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=94b09d388b81eb724769e506cdf0f51bba9b73fb

commit 94b09d388b81eb724769e506cdf0f51bba9b73fb
Author: Alan Cox 
AuthorDate: 2024-05-11 06:09:39 +
Commit: Alan Cox 
CommitDate: 2024-05-12 23:22:38 +

arm64: map kernel using large pages when page size is 16K

When the page size is 16K, use ATTR_CONTIGUOUS to map the kernel code
and data sections using 2M pages.  Previously, they were mapped using
16K pages.

Reviewed by:markj
Tested by:  markj
Differential Revision:  https://reviews.freebsd.org/D45162
---
 sys/arm64/arm64/locore.S | 26 +++---
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/sys/arm64/arm64/locore.S b/sys/arm64/arm64/locore.S
index f53cd365de55..fffebe8f2b02 100644
--- a/sys/arm64/arm64/locore.S
+++ b/sys/arm64/arm64/locore.S
@@ -516,11 +516,10 @@ booti_no_fdt:
 common:
 #if PAGE_SIZE != PAGE_SIZE_4K
/*
-* Create L3 pages. The kernel will be loaded at a 2M aligned
-* address, however L2 blocks are too large when the page size is
-* not 4k to map the kernel with such an aligned address. However,
-* when the page size is larger than 4k, L2 blocks are too large to
-* map the kernel with such an alignment.
+* Create L3 and L3C pages. The kernel will be loaded at a 2M aligned
+* address, enabling the creation of L3C pages. However, when the page
+* size is larger than 4k, L2 blocks are too large to map the kernel
+* with 2M alignment.
 */
 #definePTE_SHIFT   L3_SHIFT
 #defineBUILD_PTE_FUNC  build_l3_page_pagetable
@@ -784,13 +783,17 @@ LENTRY(link_l2_pagetable)
 LEND(link_l2_pagetable)
 
 /*
- * Builds count level 3 page table entries
+ * Builds count level 3 page table entries. Uses ATTR_CONTIGUOUS to create
+ * large page (L3C) mappings when the current VA and remaining count allow
+ * it.
  *  x6  = L3 table
  *  x7  = Block attributes
  *  x8  = VA start
  *  x9  = PA start (trashed)
  *  x10 = Entry count (trashed)
  *  x11, x12 and x13 are trashed
+ *
+ * VA start (x8) modulo L3C_SIZE must equal PA start (x9) modulo L3C_SIZE.
  */
 LENTRY(build_l3_page_pagetable)
/*
@@ -811,8 +814,17 @@ LENTRY(build_l3_page_pagetable)
/* Only use the output address bits */
lsr x9, x9, #L3_SHIFT
 
+   /* Check if an ATTR_CONTIGUOUS mapping is possible */
+1: tst x11, #(L3C_ENTRIES - 1)
+   b.ne2f
+   cmp x10, #L3C_ENTRIES
+   b.lo3f
+   orr x12, x12, #(ATTR_CONTIGUOUS)
+   b   2f
+3: and x12, x12, #(~ATTR_CONTIGUOUS)
+
/* Set the physical address for this virtual address */
-1: orr x13, x12, x9, lsl #L3_SHIFT
+2: orr x13, x12, x9, lsl #L3_SHIFT
 
/* Store the entry */
str x13, [x6, x11, lsl #3]

git: 94b09d388b81 - main - arm64: map kernel using large pages when page size is 16K

2024-05-12 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=94b09d388b81eb724769e506cdf0f51bba9b73fb

commit 94b09d388b81eb724769e506cdf0f51bba9b73fb
Author: Alan Cox 
AuthorDate: 2024-05-11 06:09:39 +
Commit: Alan Cox 
CommitDate: 2024-05-12 23:22:38 +

arm64: map kernel using large pages when page size is 16K

When the page size is 16K, use ATTR_CONTIGUOUS to map the kernel code
and data sections using 2M pages.  Previously, they were mapped using
16K pages.

Reviewed by:markj
Tested by:  markj
Differential Revision:  https://reviews.freebsd.org/D45162
---
 sys/arm64/arm64/locore.S | 26 +++---
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/sys/arm64/arm64/locore.S b/sys/arm64/arm64/locore.S
index f53cd365de55..fffebe8f2b02 100644
--- a/sys/arm64/arm64/locore.S
+++ b/sys/arm64/arm64/locore.S
@@ -516,11 +516,10 @@ booti_no_fdt:
 common:
 #if PAGE_SIZE != PAGE_SIZE_4K
/*
-* Create L3 pages. The kernel will be loaded at a 2M aligned
-* address, however L2 blocks are too large when the page size is
-* not 4k to map the kernel with such an aligned address. However,
-* when the page size is larger than 4k, L2 blocks are too large to
-* map the kernel with such an alignment.
+* Create L3 and L3C pages. The kernel will be loaded at a 2M aligned
+* address, enabling the creation of L3C pages. However, when the page
+* size is larger than 4k, L2 blocks are too large to map the kernel
+* with 2M alignment.
 */
 #definePTE_SHIFT   L3_SHIFT
 #defineBUILD_PTE_FUNC  build_l3_page_pagetable
@@ -784,13 +783,17 @@ LENTRY(link_l2_pagetable)
 LEND(link_l2_pagetable)
 
 /*
- * Builds count level 3 page table entries
+ * Builds count level 3 page table entries. Uses ATTR_CONTIGUOUS to create
+ * large page (L3C) mappings when the current VA and remaining count allow
+ * it.
  *  x6  = L3 table
  *  x7  = Block attributes
  *  x8  = VA start
  *  x9  = PA start (trashed)
  *  x10 = Entry count (trashed)
  *  x11, x12 and x13 are trashed
+ *
+ * VA start (x8) modulo L3C_SIZE must equal PA start (x9) modulo L3C_SIZE.
  */
 LENTRY(build_l3_page_pagetable)
/*
@@ -811,8 +814,17 @@ LENTRY(build_l3_page_pagetable)
/* Only use the output address bits */
lsr x9, x9, #L3_SHIFT
 
+   /* Check if an ATTR_CONTIGUOUS mapping is possible */
+1: tst x11, #(L3C_ENTRIES - 1)
+   b.ne2f
+   cmp x10, #L3C_ENTRIES
+   b.lo3f
+   orr x12, x12, #(ATTR_CONTIGUOUS)
+   b   2f
+3: and x12, x12, #(~ATTR_CONTIGUOUS)
+
/* Set the physical address for this virtual address */
-1: orr x13, x12, x9, lsl #L3_SHIFT
+2: orr x13, x12, x9, lsl #L3_SHIFT
 
/* Store the entry */
str x13, [x6, x11, lsl #3]

git: a803837cec6e - main - arm64 pmap: Add ATTR_CONTIGUOUS support [Part 3]

2024-05-07 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=a803837cec6e17e04849d59afac7b6431c70cb93

commit a803837cec6e17e04849d59afac7b6431c70cb93
Author: Alan Cox 
AuthorDate: 2024-04-17 16:39:46 +
Commit: Alan Cox 
CommitDate: 2024-05-08 02:31:14 +

arm64 pmap: Add ATTR_CONTIGUOUS support [Part 3]

Introduce L3C promotion of base page mappings.  When the base page size
is 4KB, use ATTR_CONTIGUOUS to promote 16 aligned, contiguous base page
mappings to a 64KB mapping.  Alternatively, when the base page size is
16KB, use ATTR_CONTIGUOUS to promote 128 aligned, contiguous base page
mappings to a 2MB mapping.

Given the frequency of L3C counter updates, switch to per-CPU counters
to avoid cache line ping ponging.

Revise the L3C counter descriptions to reflect the fact that the size
of an L3C mapping varies depending on the base page size.

Co-authored-by: Eliot Solomon 
Reviewed by:markj
Differential Revision:  https://reviews.freebsd.org/D44983
---
 sys/arm64/arm64/pmap.c | 168 -
 1 file changed, 154 insertions(+), 14 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index ea7ff18971e4..b1a85befa4e1 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -1684,15 +1684,23 @@ SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, promotions, 
CTLFLAG_RD,
 _l2_promotions, 0, "2MB page promotions");
 
 static SYSCTL_NODE(_vm_pmap, OID_AUTO, l3c, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
-"64KB page mapping counters");
+"L3C (64KB/2MB) page mapping counters");
 
-static u_long pmap_l3c_demotions;
-SYSCTL_ULONG(_vm_pmap_l3c, OID_AUTO, demotions, CTLFLAG_RD,
-_l3c_demotions, 0, "64KB page demotions");
+static COUNTER_U64_DEFINE_EARLY(pmap_l3c_demotions);
+SYSCTL_COUNTER_U64(_vm_pmap_l3c, OID_AUTO, demotions, CTLFLAG_RD,
+_l3c_demotions, "L3C (64KB/2MB) page demotions");
 
-static u_long pmap_l3c_mappings;
-SYSCTL_ULONG(_vm_pmap_l3c, OID_AUTO, mappings, CTLFLAG_RD,
-_l3c_mappings, 0, "64KB page mappings");
+static COUNTER_U64_DEFINE_EARLY(pmap_l3c_mappings);
+SYSCTL_COUNTER_U64(_vm_pmap_l3c, OID_AUTO, mappings, CTLFLAG_RD,
+_l3c_mappings, "L3C (64KB/2MB) page mappings");
+
+static COUNTER_U64_DEFINE_EARLY(pmap_l3c_p_failures);
+SYSCTL_COUNTER_U64(_vm_pmap_l3c, OID_AUTO, p_failures, CTLFLAG_RD,
+_l3c_p_failures, "L3C (64KB/2MB) page promotion failures");
+
+static COUNTER_U64_DEFINE_EARLY(pmap_l3c_promotions);
+SYSCTL_COUNTER_U64(_vm_pmap_l3c, OID_AUTO, promotions, CTLFLAG_RD,
+_l3c_promotions, "L3C (64KB/2MB) page promotions");
 
 /*
  * If the given value for "final_only" is false, then any cached intermediate-
@@ -4547,7 +4555,7 @@ pmap_update_entry(pmap_t pmap, pd_entry_t *ptep, 
pd_entry_t newpte,
 * be cached, so we invalidate intermediate entries as well as final
 * entries.
 */
-   pmap_s1_invalidate_range(pmap, va, va + size, false);
+   pmap_s1_invalidate_range(pmap, va, va + size, size == L3C_SIZE);
 
/* Create the new mapping */
for (lip = ptep; lip < ptep_end; lip++) {
@@ -4749,6 +4757,131 @@ setl3:
pmap);
return (true);
 }
+
+/*
+ * Tries to promote an aligned, contiguous set of base page mappings to a
+ * single L3C page mapping.  For promotion to occur, two conditions must be
+ * met: (1) the base page mappings must map aligned, contiguous physical
+ * memory and (2) the base page mappings must have identical characteristics
+ * except for the accessed flag.
+ */
+static bool
+pmap_promote_l3c(pmap_t pmap, pd_entry_t *l3p, vm_offset_t va)
+{
+   pd_entry_t all_l3e_AF, firstl3c, *l3, oldl3, pa;
+
+   PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+
+   /*
+* Currently, this function only supports promotion on stage 1 pmaps
+* because it tests stage 1 specific fields and performs a break-
+* before-make sequence that is incorrect for stage 2 pmaps.
+*/
+   if (pmap->pm_stage != PM_STAGE1 || !pmap_ps_enabled(pmap))
+   return (false);
+
+   /*
+* Compute the address of the first L3 entry in the superpage
+* candidate.
+*/
+   l3p = (pt_entry_t *)((uintptr_t)l3p & ~((L3C_ENTRIES *
+   sizeof(pt_entry_t)) - 1));
+
+   firstl3c = pmap_load(l3p);
+
+   /*
+* Examine the first L3 entry. Abort if this L3E is ineligible for
+* promotion...
+*/
+   if ((firstl3c & ATTR_SW_NO_PROMOTE) != 0)
+   return (false);
+   /* ...is not properly aligned... */
+   if ((PTE_TO_PHYS(firstl3c) & L3C_OFFSET) != 0 ||
+   (firstl3c & ATTR_DESCR_MASK) != L3_PAGE) { /* ...or is invalid. */
+   counter_u64_add(pmap_l3c_p_failures, 1);
+   CTR2(KTR

git: a803837cec6e - main - arm64 pmap: Add ATTR_CONTIGUOUS support [Part 3]

2024-05-07 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=a803837cec6e17e04849d59afac7b6431c70cb93

commit a803837cec6e17e04849d59afac7b6431c70cb93
Author: Alan Cox 
AuthorDate: 2024-04-17 16:39:46 +
Commit: Alan Cox 
CommitDate: 2024-05-08 02:31:14 +

arm64 pmap: Add ATTR_CONTIGUOUS support [Part 3]

Introduce L3C promotion of base page mappings.  When the base page size
is 4KB, use ATTR_CONTIGUOUS to promote 16 aligned, contiguous base page
mappings to a 64KB mapping.  Alternatively, when the base page size is
16KB, use ATTR_CONTIGUOUS to promote 128 aligned, contiguous base page
mappings to a 2MB mapping.

Given the frequency of L3C counter updates, switch to per-CPU counters
to avoid cache line ping ponging.

Revise the L3C counter descriptions to reflect the fact that the size
of an L3C mapping varies depending on the base page size.

Co-authored-by: Eliot Solomon 
Reviewed by:markj
Differential Revision:  https://reviews.freebsd.org/D44983
---
 sys/arm64/arm64/pmap.c | 168 -
 1 file changed, 154 insertions(+), 14 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index ea7ff18971e4..b1a85befa4e1 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -1684,15 +1684,23 @@ SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, promotions, 
CTLFLAG_RD,
 _l2_promotions, 0, "2MB page promotions");
 
 static SYSCTL_NODE(_vm_pmap, OID_AUTO, l3c, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
-"64KB page mapping counters");
+"L3C (64KB/2MB) page mapping counters");
 
-static u_long pmap_l3c_demotions;
-SYSCTL_ULONG(_vm_pmap_l3c, OID_AUTO, demotions, CTLFLAG_RD,
-_l3c_demotions, 0, "64KB page demotions");
+static COUNTER_U64_DEFINE_EARLY(pmap_l3c_demotions);
+SYSCTL_COUNTER_U64(_vm_pmap_l3c, OID_AUTO, demotions, CTLFLAG_RD,
+_l3c_demotions, "L3C (64KB/2MB) page demotions");
 
-static u_long pmap_l3c_mappings;
-SYSCTL_ULONG(_vm_pmap_l3c, OID_AUTO, mappings, CTLFLAG_RD,
-_l3c_mappings, 0, "64KB page mappings");
+static COUNTER_U64_DEFINE_EARLY(pmap_l3c_mappings);
+SYSCTL_COUNTER_U64(_vm_pmap_l3c, OID_AUTO, mappings, CTLFLAG_RD,
+_l3c_mappings, "L3C (64KB/2MB) page mappings");
+
+static COUNTER_U64_DEFINE_EARLY(pmap_l3c_p_failures);
+SYSCTL_COUNTER_U64(_vm_pmap_l3c, OID_AUTO, p_failures, CTLFLAG_RD,
+_l3c_p_failures, "L3C (64KB/2MB) page promotion failures");
+
+static COUNTER_U64_DEFINE_EARLY(pmap_l3c_promotions);
+SYSCTL_COUNTER_U64(_vm_pmap_l3c, OID_AUTO, promotions, CTLFLAG_RD,
+_l3c_promotions, "L3C (64KB/2MB) page promotions");
 
 /*
  * If the given value for "final_only" is false, then any cached intermediate-
@@ -4547,7 +4555,7 @@ pmap_update_entry(pmap_t pmap, pd_entry_t *ptep, 
pd_entry_t newpte,
 * be cached, so we invalidate intermediate entries as well as final
 * entries.
 */
-   pmap_s1_invalidate_range(pmap, va, va + size, false);
+   pmap_s1_invalidate_range(pmap, va, va + size, size == L3C_SIZE);
 
/* Create the new mapping */
for (lip = ptep; lip < ptep_end; lip++) {
@@ -4749,6 +4757,131 @@ setl3:
pmap);
return (true);
 }
+
+/*
+ * Tries to promote an aligned, contiguous set of base page mappings to a
+ * single L3C page mapping.  For promotion to occur, two conditions must be
+ * met: (1) the base page mappings must map aligned, contiguous physical
+ * memory and (2) the base page mappings must have identical characteristics
+ * except for the accessed flag.
+ */
+static bool
+pmap_promote_l3c(pmap_t pmap, pd_entry_t *l3p, vm_offset_t va)
+{
+   pd_entry_t all_l3e_AF, firstl3c, *l3, oldl3, pa;
+
+   PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+
+   /*
+* Currently, this function only supports promotion on stage 1 pmaps
+* because it tests stage 1 specific fields and performs a break-
+* before-make sequence that is incorrect for stage 2 pmaps.
+*/
+   if (pmap->pm_stage != PM_STAGE1 || !pmap_ps_enabled(pmap))
+   return (false);
+
+   /*
+* Compute the address of the first L3 entry in the superpage
+* candidate.
+*/
+   l3p = (pt_entry_t *)((uintptr_t)l3p & ~((L3C_ENTRIES *
+   sizeof(pt_entry_t)) - 1));
+
+   firstl3c = pmap_load(l3p);
+
+   /*
+* Examine the first L3 entry. Abort if this L3E is ineligible for
+* promotion...
+*/
+   if ((firstl3c & ATTR_SW_NO_PROMOTE) != 0)
+   return (false);
+   /* ...is not properly aligned... */
+   if ((PTE_TO_PHYS(firstl3c) & L3C_OFFSET) != 0 ||
+   (firstl3c & ATTR_DESCR_MASK) != L3_PAGE) { /* ...or is invalid. */
+   counter_u64_add(pmap_l3c_p_failures, 1);
+   CTR2(KTR

git: 841cf52595b6 - main - arm64 pmap: Add ATTR_CONTIGUOUS support [Part 2]

2024-04-09 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=841cf52595b6a6b98e266b63e54a7cf6fb6ca73e

commit 841cf52595b6a6b98e266b63e54a7cf6fb6ca73e
Author: Alan Cox 
AuthorDate: 2024-04-08 05:05:54 +
Commit: Alan Cox 
CommitDate: 2024-04-09 16:21:08 +

arm64 pmap: Add ATTR_CONTIGUOUS support [Part 2]

Create ATTR_CONTIGUOUS mappings in pmap_enter_object().  As a result,
when the base page size is 4 KB, the read-only data and text sections
of large (2 MB+) executables, e.g., clang, can be mapped using 64 KB
pages.  Similarly, when the base page size is 16 KB, the read-only
data section of large executables can be mapped using 2 MB pages.

Rename pmap_enter_2mpage().  Given that we have grown support for 16 KB
base pages, we should no longer include page sizes that may vary, e.g.,
2mpage, in pmap function names.  Requested by: andrew

Co-authored-by: Eliot Solomon 
Differential Revision:  https://reviews.freebsd.org/D44575
---
 sys/arm64/arm64/pmap.c | 252 +++--
 1 file changed, 245 insertions(+), 7 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 258aa141653b..ea7ff18971e4 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -472,6 +472,8 @@ static vm_page_t pmap_enter_quick_locked(pmap_t pmap, 
vm_offset_t va,
 vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
 static int pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2,
 u_int flags, vm_page_t m, struct rwlock **lockp);
+static int pmap_enter_l3c(pmap_t pmap, vm_offset_t va, pt_entry_t l3e, u_int 
flags,
+vm_page_t m, vm_page_t *ml3p, struct rwlock **lockp);
 static bool pmap_every_pte_zero(vm_paddr_t pa);
 static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted,
 bool all_l3e_AF_set);
@@ -5177,13 +5179,13 @@ out:
 }
 
 /*
- * Tries to create a read- and/or execute-only 2MB page mapping.  Returns
+ * Tries to create a read- and/or execute-only L2 page mapping.  Returns
  * KERN_SUCCESS if the mapping was created.  Otherwise, returns an error
  * value.  See pmap_enter_l2() for the possible error values when "no sleep",
  * "no replace", and "no reclaim" are specified.
  */
 static int
-pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
+pmap_enter_l2_rx(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 struct rwlock **lockp)
 {
pd_entry_t new_l2;
@@ -5233,13 +5235,13 @@ pmap_every_pte_zero(vm_paddr_t pa)
 }
 
 /*
- * Tries to create the specified 2MB page mapping.  Returns KERN_SUCCESS if
+ * Tries to create the specified L2 page mapping.  Returns KERN_SUCCESS if
  * the mapping was created, and one of KERN_FAILURE, KERN_NO_SPACE, or
  * KERN_RESOURCE_SHORTAGE otherwise.  Returns KERN_FAILURE if
- * PMAP_ENTER_NOREPLACE was specified and a 4KB page mapping already exists
- * within the 2MB virtual address range starting at the specified virtual
+ * PMAP_ENTER_NOREPLACE was specified and a base page mapping already exists
+ * within the L2 virtual address range starting at the specified virtual
  * address.  Returns KERN_NO_SPACE if PMAP_ENTER_NOREPLACE was specified and a
- * 2MB page mapping already exists at the specified virtual address.  Returns
+ * L2 page mapping already exists at the specified virtual address.  Returns
  * KERN_RESOURCE_SHORTAGE if either (1) PMAP_ENTER_NOSLEEP was specified and a
  * page table page allocation failed or (2) PMAP_ENTER_NORECLAIM was specified
  * and a PV entry allocation failed.
@@ -5405,6 +5407,235 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t 
new_l2, u_int flags,
return (KERN_SUCCESS);
 }
 
+/*
+ * Tries to create a read- and/or execute-only L3C page mapping.  Returns
+ * KERN_SUCCESS if the mapping was created.  Otherwise, returns an error
+ * value.
+ */
+static int
+pmap_enter_l3c_rx(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t *ml3p,
+vm_prot_t prot, struct rwlock **lockp)
+{
+   pt_entry_t l3e;
+
+   PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+   PMAP_ASSERT_STAGE1(pmap);
+   KASSERT(ADDR_IS_CANONICAL(va),
+   ("%s: Address not in canonical form: %lx", __func__, va));
+
+   l3e = PHYS_TO_PTE(VM_PAGE_TO_PHYS(m)) | ATTR_DEFAULT |
+   ATTR_S1_IDX(m->md.pv_memattr) | ATTR_S1_AP(ATTR_S1_AP_RO) |
+   ATTR_CONTIGUOUS | L3_PAGE;
+   l3e |= pmap_pte_bti(pmap, va);
+   if ((m->oflags & VPO_UNMANAGED) == 0) {
+   l3e |= ATTR_SW_MANAGED;
+   l3e &= ~ATTR_AF;
+   }
+   if ((prot & VM_PROT_EXECUTE) == 0 ||
+   m->md.pv_memattr == VM_MEMATTR_DEVICE)
+   l3e |= ATTR_S1_XN;
+   if (!ADDR_IS_KERNEL(va))
+   l3e |= ATTR_S1_AP(ATTR_S1_AP_USER) | ATTR_S1_PXN;
+   else
+   l3e |= ATTR_S1_UXN;
+   if (pmap != k

git: 841cf52595b6 - main - arm64 pmap: Add ATTR_CONTIGUOUS support [Part 2]

2024-04-09 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=841cf52595b6a6b98e266b63e54a7cf6fb6ca73e

commit 841cf52595b6a6b98e266b63e54a7cf6fb6ca73e
Author: Alan Cox 
AuthorDate: 2024-04-08 05:05:54 +
Commit: Alan Cox 
CommitDate: 2024-04-09 16:21:08 +

arm64 pmap: Add ATTR_CONTIGUOUS support [Part 2]

Create ATTR_CONTIGUOUS mappings in pmap_enter_object().  As a result,
when the base page size is 4 KB, the read-only data and text sections
of large (2 MB+) executables, e.g., clang, can be mapped using 64 KB
pages.  Similarly, when the base page size is 16 KB, the read-only
data section of large executables can be mapped using 2 MB pages.

Rename pmap_enter_2mpage().  Given that we have grown support for 16 KB
base pages, we should no longer include page sizes that may vary, e.g.,
2mpage, in pmap function names.  Requested by: andrew

Co-authored-by: Eliot Solomon 
Differential Revision:  https://reviews.freebsd.org/D44575
---
 sys/arm64/arm64/pmap.c | 252 +++--
 1 file changed, 245 insertions(+), 7 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 258aa141653b..ea7ff18971e4 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -472,6 +472,8 @@ static vm_page_t pmap_enter_quick_locked(pmap_t pmap, 
vm_offset_t va,
 vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
 static int pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2,
 u_int flags, vm_page_t m, struct rwlock **lockp);
+static int pmap_enter_l3c(pmap_t pmap, vm_offset_t va, pt_entry_t l3e, u_int 
flags,
+vm_page_t m, vm_page_t *ml3p, struct rwlock **lockp);
 static bool pmap_every_pte_zero(vm_paddr_t pa);
 static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted,
 bool all_l3e_AF_set);
@@ -5177,13 +5179,13 @@ out:
 }
 
 /*
- * Tries to create a read- and/or execute-only 2MB page mapping.  Returns
+ * Tries to create a read- and/or execute-only L2 page mapping.  Returns
  * KERN_SUCCESS if the mapping was created.  Otherwise, returns an error
  * value.  See pmap_enter_l2() for the possible error values when "no sleep",
  * "no replace", and "no reclaim" are specified.
  */
 static int
-pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
+pmap_enter_l2_rx(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 struct rwlock **lockp)
 {
pd_entry_t new_l2;
@@ -5233,13 +5235,13 @@ pmap_every_pte_zero(vm_paddr_t pa)
 }
 
 /*
- * Tries to create the specified 2MB page mapping.  Returns KERN_SUCCESS if
+ * Tries to create the specified L2 page mapping.  Returns KERN_SUCCESS if
  * the mapping was created, and one of KERN_FAILURE, KERN_NO_SPACE, or
  * KERN_RESOURCE_SHORTAGE otherwise.  Returns KERN_FAILURE if
- * PMAP_ENTER_NOREPLACE was specified and a 4KB page mapping already exists
- * within the 2MB virtual address range starting at the specified virtual
+ * PMAP_ENTER_NOREPLACE was specified and a base page mapping already exists
+ * within the L2 virtual address range starting at the specified virtual
  * address.  Returns KERN_NO_SPACE if PMAP_ENTER_NOREPLACE was specified and a
- * 2MB page mapping already exists at the specified virtual address.  Returns
+ * L2 page mapping already exists at the specified virtual address.  Returns
  * KERN_RESOURCE_SHORTAGE if either (1) PMAP_ENTER_NOSLEEP was specified and a
  * page table page allocation failed or (2) PMAP_ENTER_NORECLAIM was specified
  * and a PV entry allocation failed.
@@ -5405,6 +5407,235 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t 
new_l2, u_int flags,
return (KERN_SUCCESS);
 }
 
+/*
+ * Tries to create a read- and/or execute-only L3C page mapping.  Returns
+ * KERN_SUCCESS if the mapping was created.  Otherwise, returns an error
+ * value.
+ */
+static int
+pmap_enter_l3c_rx(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t *ml3p,
+vm_prot_t prot, struct rwlock **lockp)
+{
+   pt_entry_t l3e;
+
+   PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+   PMAP_ASSERT_STAGE1(pmap);
+   KASSERT(ADDR_IS_CANONICAL(va),
+   ("%s: Address not in canonical form: %lx", __func__, va));
+
+   l3e = PHYS_TO_PTE(VM_PAGE_TO_PHYS(m)) | ATTR_DEFAULT |
+   ATTR_S1_IDX(m->md.pv_memattr) | ATTR_S1_AP(ATTR_S1_AP_RO) |
+   ATTR_CONTIGUOUS | L3_PAGE;
+   l3e |= pmap_pte_bti(pmap, va);
+   if ((m->oflags & VPO_UNMANAGED) == 0) {
+   l3e |= ATTR_SW_MANAGED;
+   l3e &= ~ATTR_AF;
+   }
+   if ((prot & VM_PROT_EXECUTE) == 0 ||
+   m->md.pv_memattr == VM_MEMATTR_DEVICE)
+   l3e |= ATTR_S1_XN;
+   if (!ADDR_IS_KERNEL(va))
+   l3e |= ATTR_S1_AP(ATTR_S1_AP_USER) | ATTR_S1_PXN;
+   else
+   l3e |= ATTR_S1_UXN;
+   if (pmap != k

git: 22c098843127 - main - arm64: correctly handle a failed BTI check in pmap_enter_l2()

2024-04-03 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=22c098843127f6a31e25e94b07b35677f038f6d6

commit 22c098843127f6a31e25e94b07b35677f038f6d6
Author: Alan Cox 
AuthorDate: 2024-04-03 05:21:08 +
Commit: Alan Cox 
CommitDate: 2024-04-03 16:19:30 +

arm64: correctly handle a failed BTI check in pmap_enter_l2()

If pmap_enter_l2() does not create a mapping because the BTI check
fails, then we should release the reference on the page table page
acquired from pmap_alloc_l2().  Otherwise, the page table page will
never be reclaimed.
---
 sys/arm64/arm64/pmap.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 12e5e1d73b38..258aa141653b 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -5269,8 +5269,11 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t 
new_l2, u_int flags,
 * and let vm_fault() cope.  Check after l2 allocation, since
 * it could sleep.
 */
-   if (!pmap_bti_same(pmap, va, va + L2_SIZE))
+   if (!pmap_bti_same(pmap, va, va + L2_SIZE)) {
+   KASSERT(l2pg != NULL, ("pmap_enter_l2: missing L2 PTP"));
+   pmap_abort_ptp(pmap, va, l2pg);
return (KERN_PROTECTION_FAILURE);
+   }
 
/*
 * If there are existing mappings, either abort or remove them.

git: 22c098843127 - main - arm64: correctly handle a failed BTI check in pmap_enter_l2()

2024-04-03 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=22c098843127f6a31e25e94b07b35677f038f6d6

commit 22c098843127f6a31e25e94b07b35677f038f6d6
Author: Alan Cox 
AuthorDate: 2024-04-03 05:21:08 +
Commit: Alan Cox 
CommitDate: 2024-04-03 16:19:30 +

arm64: correctly handle a failed BTI check in pmap_enter_l2()

If pmap_enter_l2() does not create a mapping because the BTI check
fails, then we should release the reference on the page table page
acquired from pmap_alloc_l2().  Otherwise, the page table page will
never be reclaimed.
---
 sys/arm64/arm64/pmap.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 12e5e1d73b38..258aa141653b 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -5269,8 +5269,11 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t 
new_l2, u_int flags,
 * and let vm_fault() cope.  Check after l2 allocation, since
 * it could sleep.
 */
-   if (!pmap_bti_same(pmap, va, va + L2_SIZE))
+   if (!pmap_bti_same(pmap, va, va + L2_SIZE)) {
+   KASSERT(l2pg != NULL, ("pmap_enter_l2: missing L2 PTP"));
+   pmap_abort_ptp(pmap, va, l2pg);
return (KERN_PROTECTION_FAILURE);
+   }
 
/*
 * If there are existing mappings, either abort or remove them.

git: e0388a906ca7 - main - arm64: enable superpage mappings by pmap_mapdev{,_attr}()

2024-03-30 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=e0388a906ca77d07c99e8762d47dccaaaefd8bab

commit e0388a906ca77d07c99e8762d47dccaaaefd8bab
Author: Alan Cox 
AuthorDate: 2024-03-30 20:35:32 +
Commit: Alan Cox 
CommitDate: 2024-03-30 20:41:30 +

arm64: enable superpage mappings by pmap_mapdev{,_attr}()

In order for pmap_kenter{,_device}() to create superpage mappings,
either 64 KB or 2 MB, pmap_mapdev{,_attr}() must request appropriately
aligned virtual addresses.

Reviewed by:markj
Tested by:  gallatin
Differential Revision:  https://reviews.freebsd.org/D42737
---
 sys/kern/subr_devmap.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/sys/kern/subr_devmap.c b/sys/kern/subr_devmap.c
index 5976f16c7577..441ffeb1270a 100644
--- a/sys/kern/subr_devmap.c
+++ b/sys/kern/subr_devmap.c
@@ -273,6 +273,13 @@ pmap_mapdev(vm_paddr_t pa, vm_size_t size)
KASSERT(va >= VM_MAX_KERNEL_ADDRESS - PMAP_MAPDEV_EARLY_SIZE,
("Too many early devmap mappings"));
} else
+#endif
+#ifdef __aarch64__
+   if (size >= L2_SIZE && (pa & L2_OFFSET) == 0)
+   va = kva_alloc_aligned(size, L2_SIZE);
+   else if (size >= L3C_SIZE && (pa & L3C_OFFSET) == 0)
+   va = kva_alloc_aligned(size, L3C_SIZE);
+   else
 #endif
va = kva_alloc(size);
if (!va)
@@ -304,6 +311,13 @@ pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, 
vm_memattr_t ma)
KASSERT(va >= (VM_MAX_KERNEL_ADDRESS - 
(PMAP_MAPDEV_EARLY_SIZE)),
("Too many early devmap mappings 2"));
} else
+#ifdef __aarch64__
+   if (size >= L2_SIZE && (pa & L2_OFFSET) == 0)
+   va = kva_alloc_aligned(size, L2_SIZE);
+   else if (size >= L3C_SIZE && (pa & L3C_OFFSET) == 0)
+   va = kva_alloc_aligned(size, L3C_SIZE);
+   else
+#endif
va = kva_alloc(size);
if (!va)
panic("pmap_mapdev: Couldn't alloc kernel virtual memory");

git: e0388a906ca7 - main - arm64: enable superpage mappings by pmap_mapdev{,_attr}()

2024-03-30 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=e0388a906ca77d07c99e8762d47dccaaaefd8bab

commit e0388a906ca77d07c99e8762d47dccaaaefd8bab
Author: Alan Cox 
AuthorDate: 2024-03-30 20:35:32 +
Commit: Alan Cox 
CommitDate: 2024-03-30 20:41:30 +

arm64: enable superpage mappings by pmap_mapdev{,_attr}()

In order for pmap_kenter{,_device}() to create superpage mappings,
either 64 KB or 2 MB, pmap_mapdev{,_attr}() must request appropriately
aligned virtual addresses.

Reviewed by:markj
Tested by:  gallatin
Differential Revision:  https://reviews.freebsd.org/D42737
---
 sys/kern/subr_devmap.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/sys/kern/subr_devmap.c b/sys/kern/subr_devmap.c
index 5976f16c7577..441ffeb1270a 100644
--- a/sys/kern/subr_devmap.c
+++ b/sys/kern/subr_devmap.c
@@ -273,6 +273,13 @@ pmap_mapdev(vm_paddr_t pa, vm_size_t size)
KASSERT(va >= VM_MAX_KERNEL_ADDRESS - PMAP_MAPDEV_EARLY_SIZE,
("Too many early devmap mappings"));
} else
+#endif
+#ifdef __aarch64__
+   if (size >= L2_SIZE && (pa & L2_OFFSET) == 0)
+   va = kva_alloc_aligned(size, L2_SIZE);
+   else if (size >= L3C_SIZE && (pa & L3C_OFFSET) == 0)
+   va = kva_alloc_aligned(size, L3C_SIZE);
+   else
 #endif
va = kva_alloc(size);
if (!va)
@@ -304,6 +311,13 @@ pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, 
vm_memattr_t ma)
KASSERT(va >= (VM_MAX_KERNEL_ADDRESS - 
(PMAP_MAPDEV_EARLY_SIZE)),
("Too many early devmap mappings 2"));
} else
+#ifdef __aarch64__
+   if (size >= L2_SIZE && (pa & L2_OFFSET) == 0)
+   va = kva_alloc_aligned(size, L2_SIZE);
+   else if (size >= L3C_SIZE && (pa & L3C_OFFSET) == 0)
+   va = kva_alloc_aligned(size, L3C_SIZE);
+   else
+#endif
va = kva_alloc(size);
if (!va)
panic("pmap_mapdev: Couldn't alloc kernel virtual memory");

git: fd6cb031f577 - main - arm64 pmap: Add ATTR_CONTIGUOUS support [Part 1]

2024-03-30 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=fd6cb031f577a449894e73daa8f6bd309ba27c73

commit fd6cb031f577a449894e73daa8f6bd309ba27c73
Author: Eliot Solomon 
AuthorDate: 2024-03-24 19:01:47 +
Commit: Alan Cox 
CommitDate: 2024-03-30 18:37:17 +

arm64 pmap: Add ATTR_CONTIGUOUS support [Part 1]

The ATTR_CONTIGUOUS bit within an L3 page table entry designates that
L3 page as being part of an aligned, physically contiguous collection
of L3 pages.  For example, 16 aligned, physically contiguous 4 KB pages
can form a 64 KB superpage, occupying a single TLB entry.  While this
change only creates ATTR_CONTIGUOUS mappings in a few places,
specifically, the direct map and pmap_kenter{,_device}(), it adds all
of the necessary code for handling them once they exist, including
demotion, protection, and removal.  Consequently, new ATTR_CONTIGUOUS
usage can be added (and tested) incrementally.

Modify the implementation of sysctl vm.pmap.kernel_maps so that it
correctly reports the number of ATTR_CONTIGUOUS mappings on machines
configured to use a 16 KB base page size, where an ATTR_CONTIGUOUS
mapping consists of 128 base pages.

Additionally, this change adds support for creating L2 superpage
mappings to pmap_kenter{,_device}().

Reviewed by:markj
Tested by:  gallatin
Differential Revision:  https://reviews.freebsd.org/D42737
---
 sys/arm64/arm64/pmap.c  | 767 +---
 sys/arm64/include/pte.h |  21 ++
 2 files changed, 740 insertions(+), 48 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index ba72f1dac8d0..12e5e1d73b38 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -461,18 +461,33 @@ static bool pmap_activate_int(pmap_t pmap);
 static void pmap_alloc_asid(pmap_t pmap);
 static int pmap_change_props_locked(vm_offset_t va, vm_size_t size,
 vm_prot_t prot, int mode, bool skip_unmapped);
+static bool pmap_copy_l3c(pmap_t pmap, pt_entry_t *l3p, vm_offset_t va,
+pt_entry_t l3e, vm_page_t ml3, struct rwlock **lockp);
 static pt_entry_t *pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va);
 static pt_entry_t *pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2,
 vm_offset_t va, struct rwlock **lockp);
 static pt_entry_t *pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va);
+static bool pmap_demote_l3c(pmap_t pmap, pt_entry_t *l3p, vm_offset_t va);
 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
 vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
 static int pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2,
 u_int flags, vm_page_t m, struct rwlock **lockp);
+static bool pmap_every_pte_zero(vm_paddr_t pa);
+static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted,
+bool all_l3e_AF_set);
+static pt_entry_t pmap_load_l3c(pt_entry_t *l3p);
+static void pmap_mask_set_l3c(pmap_t pmap, pt_entry_t *l3p, vm_offset_t va,
+vm_offset_t *vap, vm_offset_t va_next, pt_entry_t mask, pt_entry_t nbits);
+static bool pmap_pv_insert_l3c(pmap_t pmap, vm_offset_t va, vm_page_t m,
+struct rwlock **lockp);
+static void pmap_remove_kernel_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va);
 static int pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva,
 pd_entry_t l1e, struct spglist *free, struct rwlock **lockp);
 static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva,
 pd_entry_t l2e, struct spglist *free, struct rwlock **lockp);
+static bool pmap_remove_l3c(pmap_t pmap, pt_entry_t *l3p, vm_offset_t va,
+vm_offset_t *vap, vm_offset_t va_next, vm_page_t ml3, struct spglist *free,
+struct rwlock **lockp);
 static void pmap_reset_asid_set(pmap_t pmap);
 static bool pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
 vm_page_t m, struct rwlock **lockp);
@@ -483,6 +498,8 @@ static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t 
ptepindex,
 static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m,
 struct spglist *free);
 static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, struct spglist *);
+static void pmap_update_entry(pmap_t pmap, pd_entry_t *pte, pd_entry_t newpte,
+vm_offset_t va, vm_size_t size);
 static __inline vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va);
 
 static uma_zone_t pmap_bti_ranges_zone;
@@ -1121,19 +1138,20 @@ pmap_bootstrap_l2_block(struct pmap_bootstrap_state 
*state, int i)
 static void
 pmap_bootstrap_l3_page(struct pmap_bootstrap_state *state, int i)
 {
+   pt_entry_t contig;
u_int l3_slot;
bool first;
 
-   if ((physmap[i + 1] - state->pa) < L3_SIZE)
+   if (physmap[i + 1] - state->pa < L3_SIZE)
return;
 
/* Make sure there is a valid L2 table */
pmap_bootstrap_l2_table(state);
 
MPASS((state->va &a

git: fd6cb031f577 - main - arm64 pmap: Add ATTR_CONTIGUOUS support [Part 1]

2024-03-30 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=fd6cb031f577a449894e73daa8f6bd309ba27c73

commit fd6cb031f577a449894e73daa8f6bd309ba27c73
Author: Eliot Solomon 
AuthorDate: 2024-03-24 19:01:47 +
Commit: Alan Cox 
CommitDate: 2024-03-30 18:37:17 +

arm64 pmap: Add ATTR_CONTIGUOUS support [Part 1]

The ATTR_CONTIGUOUS bit within an L3 page table entry designates that
L3 page as being part of an aligned, physically contiguous collection
of L3 pages.  For example, 16 aligned, physically contiguous 4 KB pages
can form a 64 KB superpage, occupying a single TLB entry.  While this
change only creates ATTR_CONTIGUOUS mappings in a few places,
specifically, the direct map and pmap_kenter{,_device}(), it adds all
of the necessary code for handling them once they exist, including
demotion, protection, and removal.  Consequently, new ATTR_CONTIGUOUS
usage can be added (and tested) incrementally.

Modify the implementation of sysctl vm.pmap.kernel_maps so that it
correctly reports the number of ATTR_CONTIGUOUS mappings on machines
configured to use a 16 KB base page size, where an ATTR_CONTIGUOUS
mapping consists of 128 base pages.

Additionally, this change adds support for creating L2 superpage
mappings to pmap_kenter{,_device}().

Reviewed by:markj
Tested by:  gallatin
Differential Revision:  https://reviews.freebsd.org/D42737
---
 sys/arm64/arm64/pmap.c  | 767 +---
 sys/arm64/include/pte.h |  21 ++
 2 files changed, 740 insertions(+), 48 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index ba72f1dac8d0..12e5e1d73b38 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -461,18 +461,33 @@ static bool pmap_activate_int(pmap_t pmap);
 static void pmap_alloc_asid(pmap_t pmap);
 static int pmap_change_props_locked(vm_offset_t va, vm_size_t size,
 vm_prot_t prot, int mode, bool skip_unmapped);
+static bool pmap_copy_l3c(pmap_t pmap, pt_entry_t *l3p, vm_offset_t va,
+pt_entry_t l3e, vm_page_t ml3, struct rwlock **lockp);
 static pt_entry_t *pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va);
 static pt_entry_t *pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2,
 vm_offset_t va, struct rwlock **lockp);
 static pt_entry_t *pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va);
+static bool pmap_demote_l3c(pmap_t pmap, pt_entry_t *l3p, vm_offset_t va);
 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
 vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
 static int pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2,
 u_int flags, vm_page_t m, struct rwlock **lockp);
+static bool pmap_every_pte_zero(vm_paddr_t pa);
+static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted,
+bool all_l3e_AF_set);
+static pt_entry_t pmap_load_l3c(pt_entry_t *l3p);
+static void pmap_mask_set_l3c(pmap_t pmap, pt_entry_t *l3p, vm_offset_t va,
+vm_offset_t *vap, vm_offset_t va_next, pt_entry_t mask, pt_entry_t nbits);
+static bool pmap_pv_insert_l3c(pmap_t pmap, vm_offset_t va, vm_page_t m,
+struct rwlock **lockp);
+static void pmap_remove_kernel_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va);
 static int pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva,
 pd_entry_t l1e, struct spglist *free, struct rwlock **lockp);
 static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva,
 pd_entry_t l2e, struct spglist *free, struct rwlock **lockp);
+static bool pmap_remove_l3c(pmap_t pmap, pt_entry_t *l3p, vm_offset_t va,
+vm_offset_t *vap, vm_offset_t va_next, vm_page_t ml3, struct spglist *free,
+struct rwlock **lockp);
 static void pmap_reset_asid_set(pmap_t pmap);
 static bool pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
 vm_page_t m, struct rwlock **lockp);
@@ -483,6 +498,8 @@ static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t 
ptepindex,
 static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m,
 struct spglist *free);
 static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, struct spglist *);
+static void pmap_update_entry(pmap_t pmap, pd_entry_t *pte, pd_entry_t newpte,
+vm_offset_t va, vm_size_t size);
 static __inline vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va);
 
 static uma_zone_t pmap_bti_ranges_zone;
@@ -1121,19 +1138,20 @@ pmap_bootstrap_l2_block(struct pmap_bootstrap_state 
*state, int i)
 static void
 pmap_bootstrap_l3_page(struct pmap_bootstrap_state *state, int i)
 {
+   pt_entry_t contig;
u_int l3_slot;
bool first;
 
-   if ((physmap[i + 1] - state->pa) < L3_SIZE)
+   if (physmap[i + 1] - state->pa < L3_SIZE)
return;
 
/* Make sure there is a valid L2 table */
pmap_bootstrap_l2_table(state);
 
MPASS((state->va &a

git: 9fabf97682ce - main - arm64: fix free queue and reservation configuration for 16KB pages

2024-03-24 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=9fabf97682ce494865c8b26c218f2d00a36c99ea

commit 9fabf97682ce494865c8b26c218f2d00a36c99ea
Author: Eliot Solomon 
AuthorDate: 2023-11-18 21:13:21 +
Commit: Alan Cox 
CommitDate: 2024-03-24 17:22:20 +

arm64: fix free queue and reservation configuration for 16KB pages

Correctly configure the free page queues and the reservation size when
the base page size is 16KB.  In particular, the reservation size was
less than the L2 Block size, making L2 promotions and mappings all but
impossible.

Reviewed by:markj
Tested by:  gallatin
Differential Revision:  https://reviews.freebsd.org/D42737
---
 sys/arm64/arm64/copyinout.S |  1 +
 sys/arm64/include/vmparam.h | 18 +-
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/sys/arm64/arm64/copyinout.S b/sys/arm64/arm64/copyinout.S
index 005fa61bfe82..23f56ae85daa 100644
--- a/sys/arm64/arm64/copyinout.S
+++ b/sys/arm64/arm64/copyinout.S
@@ -30,6 +30,7 @@
 #include 
 #include 
 
+#include 
 #include 
 
 #include "assym.inc"
diff --git a/sys/arm64/include/vmparam.h b/sys/arm64/include/vmparam.h
index 0967d3c0aedf..d5d4a5691f37 100644
--- a/sys/arm64/include/vmparam.h
+++ b/sys/arm64/include/vmparam.h
@@ -99,8 +99,17 @@
  * are used by UMA, the physical memory allocator reduces the likelihood of
  * both 2MB page TLB misses and cache misses during the page table walk when
  * a 2MB page TLB miss does occur.
+ *
+ * When PAGE_SIZE is 16KB, an allocation size of 32MB is supported.  This
+ * size is used by level 0 reservations and L2 BLOCK mappings.
  */
+#if PAGE_SIZE == PAGE_SIZE_4K
 #defineVM_NFREEORDER   13
+#elif PAGE_SIZE == PAGE_SIZE_16K
+#defineVM_NFREEORDER   12
+#else
+#error Unsupported page size
+#endif
 
 /*
  * Enable superpage reservations: 1 level.
@@ -110,10 +119,17 @@
 #endif
 
 /*
- * Level 0 reservations consist of 512 pages.
+ * Level 0 reservations consist of 512 pages when PAGE_SIZE is 4KB, and
+ * 2048 pages when PAGE_SIZE is 16KB.
  */
 #ifndefVM_LEVEL_0_ORDER
+#if PAGE_SIZE == PAGE_SIZE_4K
 #defineVM_LEVEL_0_ORDER9
+#elif PAGE_SIZE == PAGE_SIZE_16K
+#defineVM_LEVEL_0_ORDER11
+#else
+#error Unsupported page size
+#endif
 #endif
 
 /**

git: 9fabf97682ce - main - arm64: fix free queue and reservation configuration for 16KB pages

2024-03-24 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=9fabf97682ce494865c8b26c218f2d00a36c99ea

commit 9fabf97682ce494865c8b26c218f2d00a36c99ea
Author: Eliot Solomon 
AuthorDate: 2023-11-18 21:13:21 +
Commit: Alan Cox 
CommitDate: 2024-03-24 17:22:20 +

arm64: fix free queue and reservation configuration for 16KB pages

Correctly configure the free page queues and the reservation size when
the base page size is 16KB.  In particular, the reservation size was
less than the L2 Block size, making L2 promotions and mappings all but
impossible.

Reviewed by:markj
Tested by:  gallatin
Differential Revision:  https://reviews.freebsd.org/D42737
---
 sys/arm64/arm64/copyinout.S |  1 +
 sys/arm64/include/vmparam.h | 18 +-
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/sys/arm64/arm64/copyinout.S b/sys/arm64/arm64/copyinout.S
index 005fa61bfe82..23f56ae85daa 100644
--- a/sys/arm64/arm64/copyinout.S
+++ b/sys/arm64/arm64/copyinout.S
@@ -30,6 +30,7 @@
 #include 
 #include 
 
+#include 
 #include 
 
 #include "assym.inc"
diff --git a/sys/arm64/include/vmparam.h b/sys/arm64/include/vmparam.h
index 0967d3c0aedf..d5d4a5691f37 100644
--- a/sys/arm64/include/vmparam.h
+++ b/sys/arm64/include/vmparam.h
@@ -99,8 +99,17 @@
  * are used by UMA, the physical memory allocator reduces the likelihood of
  * both 2MB page TLB misses and cache misses during the page table walk when
  * a 2MB page TLB miss does occur.
+ *
+ * When PAGE_SIZE is 16KB, an allocation size of 32MB is supported.  This
+ * size is used by level 0 reservations and L2 BLOCK mappings.
  */
+#if PAGE_SIZE == PAGE_SIZE_4K
 #defineVM_NFREEORDER   13
+#elif PAGE_SIZE == PAGE_SIZE_16K
+#defineVM_NFREEORDER   12
+#else
+#error Unsupported page size
+#endif
 
 /*
  * Enable superpage reservations: 1 level.
@@ -110,10 +119,17 @@
 #endif
 
 /*
- * Level 0 reservations consist of 512 pages.
+ * Level 0 reservations consist of 512 pages when PAGE_SIZE is 4KB, and
+ * 2048 pages when PAGE_SIZE is 16KB.
  */
 #ifndefVM_LEVEL_0_ORDER
+#if PAGE_SIZE == PAGE_SIZE_4K
 #defineVM_LEVEL_0_ORDER9
+#elif PAGE_SIZE == PAGE_SIZE_16K
+#defineVM_LEVEL_0_ORDER11
+#else
+#error Unsupported page size
+#endif
 #endif
 
 /**

git: 902ed64fecbe - main - i386 pmap: Adapt recent amd64/arm64 superpage improvements

2023-09-26 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=902ed64fecbe078e1cdd527b97af3958b413da11

commit 902ed64fecbe078e1cdd527b97af3958b413da11
Author: Alan Cox 
AuthorDate: 2023-09-24 18:21:36 +
Commit: Alan Cox 
CommitDate: 2023-09-26 17:41:20 +

i386 pmap: Adapt recent amd64/arm64 superpage improvements

Don't recompute mpte during promotion.

Optimize MADV_WILLNEED on existing superpages.

Standardize promotion conditions across amd64, arm64, and i386.

Stop requiring the accessed bit for superpage promotion.

Tidy up pmap_promote_pde() calls.

Retire PMAP_INLINE.  It's no longer used.

Note: Some of these changes are a prerequisite to fixing a panic that
arises when attempting to create a wired superpage mapping by
pmap_enter(psind=1) (as opposed to promotion).

Reviewed by:kib
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D41944
---
 sys/i386/i386/pmap.c| 200 
 sys/i386/include/pmap.h |   2 +-
 2 files changed, 137 insertions(+), 65 deletions(-)

diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index 4198849b1a5a..2d19fc51dd53 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -159,16 +159,6 @@
 #endif
 #include 
 
-#if !defined(DIAGNOSTIC)
-#ifdef __GNUC_GNU_INLINE__
-#define PMAP_INLINE__attribute__((__gnu_inline__)) inline
-#else
-#define PMAP_INLINEextern inline
-#endif
-#else
-#define PMAP_INLINE
-#endif
-
 #ifdef PV_STATS
 #define PV_STAT(x) do { x ; } while (0)
 #else
@@ -311,13 +301,14 @@ static intpmap_pvh_wired_mappings(struct md_page 
*pvh, int count);
 
 static voidpmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t mpte);
 static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
-static boolpmap_enter_4mpage(pmap_t pmap, vm_offset_t va, vm_page_t m,
+static int pmap_enter_4mpage(pmap_t pmap, vm_offset_t va, vm_page_t m,
vm_prot_t prot);
 static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde,
u_int flags, vm_page_t m);
 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
 vm_page_t m, vm_prot_t prot, vm_page_t mpte);
-static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted);
+static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted,
+bool allpte_PG_A_set);
 static void pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va,
pd_entry_t pde);
 static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
@@ -327,7 +318,8 @@ static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, 
int mode);
 static void pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde);
 static void pmap_pde_attr(pd_entry_t *pde, int cache_bits);
 #if VM_NRESERVLEVEL > 0
-static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
+static bool pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
+vm_page_t mpte);
 #endif
 static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t 
sva,
 vm_prot_t prot);
@@ -993,7 +985,7 @@ __CONCAT(PMTYPE, init)(void)
 */
if (pseflag != 0 &&
KERNBASE <= i << PDRSHIFT && i << PDRSHIFT < KERNend &&
-   pmap_insert_pt_page(kernel_pmap, mpte, true))
+   pmap_insert_pt_page(kernel_pmap, mpte, true, true))
panic("pmap_init: pmap_insert_pt_page failed");
}
PMAP_UNLOCK(kernel_pmap);
@@ -1928,14 +1920,26 @@ pmap_add_delayed_free_list(vm_page_t m, struct spglist 
*free,
  * for mapping a distinct range of virtual addresses.  The pmap's collection is
  * ordered by this virtual address range.
  *
- * If "promoted" is false, then the page table page "mpte" must be zero filled.
+ * If "promoted" is false, then the page table page "mpte" must be zero filled;
+ * "mpte"'s valid field will be set to 0.
+ *
+ * If "promoted" is true and "allpte_PG_A_set" is false, then "mpte" must
+ * contain valid mappings with identical attributes except for PG_A; "mpte"'s
+ * valid field will be set to 1.
+ *
+ * If "promoted" and "allpte_PG_A_set" are both true, then "mpte" must contain
+ * valid mappings with identical attributes including PG_A; "mpte"'s valid
+ * field will be set to VM_PAGE_BITS_ALL.
  */
 static __inline int
-pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted)
+pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted,
+bool allpte_PG_A_set)
 {
 
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-   mpte->valid = promoted ? VM_PAGE_BITS_ALL : 0;
+   KASSERT(promoted || !allpte_P

git: 902ed64fecbe - main - i386 pmap: Adapt recent amd64/arm64 superpage improvements

2023-09-26 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=902ed64fecbe078e1cdd527b97af3958b413da11

commit 902ed64fecbe078e1cdd527b97af3958b413da11
Author: Alan Cox 
AuthorDate: 2023-09-24 18:21:36 +
Commit: Alan Cox 
CommitDate: 2023-09-26 17:41:20 +

i386 pmap: Adapt recent amd64/arm64 superpage improvements

Don't recompute mpte during promotion.

Optimize MADV_WILLNEED on existing superpages.

Standardize promotion conditions across amd64, arm64, and i386.

Stop requiring the accessed bit for superpage promotion.

Tidy up pmap_promote_pde() calls.

Retire PMAP_INLINE.  It's no longer used.

Note: Some of these changes are a prerequisite to fixing a panic that
arises when attempting to create a wired superpage mapping by
pmap_enter(psind=1) (as opposed to promotion).

Reviewed by:kib
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D41944
---
 sys/i386/i386/pmap.c| 200 
 sys/i386/include/pmap.h |   2 +-
 2 files changed, 137 insertions(+), 65 deletions(-)

diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index 4198849b1a5a..2d19fc51dd53 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -159,16 +159,6 @@
 #endif
 #include 
 
-#if !defined(DIAGNOSTIC)
-#ifdef __GNUC_GNU_INLINE__
-#define PMAP_INLINE__attribute__((__gnu_inline__)) inline
-#else
-#define PMAP_INLINEextern inline
-#endif
-#else
-#define PMAP_INLINE
-#endif
-
 #ifdef PV_STATS
 #define PV_STAT(x) do { x ; } while (0)
 #else
@@ -311,13 +301,14 @@ static intpmap_pvh_wired_mappings(struct md_page 
*pvh, int count);
 
 static voidpmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t mpte);
 static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
-static boolpmap_enter_4mpage(pmap_t pmap, vm_offset_t va, vm_page_t m,
+static int pmap_enter_4mpage(pmap_t pmap, vm_offset_t va, vm_page_t m,
vm_prot_t prot);
 static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde,
u_int flags, vm_page_t m);
 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
 vm_page_t m, vm_prot_t prot, vm_page_t mpte);
-static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted);
+static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted,
+bool allpte_PG_A_set);
 static void pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va,
pd_entry_t pde);
 static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
@@ -327,7 +318,8 @@ static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, 
int mode);
 static void pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde);
 static void pmap_pde_attr(pd_entry_t *pde, int cache_bits);
 #if VM_NRESERVLEVEL > 0
-static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
+static bool pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
+vm_page_t mpte);
 #endif
 static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t 
sva,
 vm_prot_t prot);
@@ -993,7 +985,7 @@ __CONCAT(PMTYPE, init)(void)
 */
if (pseflag != 0 &&
KERNBASE <= i << PDRSHIFT && i << PDRSHIFT < KERNend &&
-   pmap_insert_pt_page(kernel_pmap, mpte, true))
+   pmap_insert_pt_page(kernel_pmap, mpte, true, true))
panic("pmap_init: pmap_insert_pt_page failed");
}
PMAP_UNLOCK(kernel_pmap);
@@ -1928,14 +1920,26 @@ pmap_add_delayed_free_list(vm_page_t m, struct spglist 
*free,
  * for mapping a distinct range of virtual addresses.  The pmap's collection is
  * ordered by this virtual address range.
  *
- * If "promoted" is false, then the page table page "mpte" must be zero filled.
+ * If "promoted" is false, then the page table page "mpte" must be zero filled;
+ * "mpte"'s valid field will be set to 0.
+ *
+ * If "promoted" is true and "allpte_PG_A_set" is false, then "mpte" must
+ * contain valid mappings with identical attributes except for PG_A; "mpte"'s
+ * valid field will be set to 1.
+ *
+ * If "promoted" and "allpte_PG_A_set" are both true, then "mpte" must contain
+ * valid mappings with identical attributes including PG_A; "mpte"'s valid
+ * field will be set to VM_PAGE_BITS_ALL.
  */
 static __inline int
-pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted)
+pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted,
+bool allpte_PG_A_set)
 {
 
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-   mpte->valid = promoted ? VM_PAGE_BITS_ALL : 0;
+   KASSERT(promoted || !allpte_P

Re: [Sdcc-user] Assembler variable overflow

2023-08-17 Thread Alan Cox

> How do you check over/underflow in C without wasting memory?

If your compiler is smart enough then for unsigned maths

r = a + b;
if (r < a)

will be optimized nicely. I've not checked if sdcc knows that and will
turn it into a carry check at least for uint8 and uint16.

For signed maths there is not nice portable approach because signed maths
overflow is undefined in C and indeed there are even processors where you
get a maths exception for your trouble.

Alan


___
Sdcc-user mailing list
Sdcc-user@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/sdcc-user

git: 37e5d49e1e5e - main - vm: Fix address hints of 0 with MAP_32BIT

2023-08-12 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=37e5d49e1e5e750bf2a200ef2e117d14c4e9a578

commit 37e5d49e1e5e750bf2a200ef2e117d14c4e9a578
Author: Alan Cox 
AuthorDate: 2023-08-03 07:07:14 +
Commit: Alan Cox 
CommitDate: 2023-08-12 07:35:21 +

vm: Fix address hints of 0 with MAP_32BIT

Also, rename min_addr to default_addr, which better reflects what it
represents.  The min_addr is not a minimum address in the same way that
max_addr is actually a maximum address that can be allocated.  For
example, a non-zero hint can be less than min_addr and be allocated.

Reported by:dchagin
Reviewed by:dchagin, kib, markj
Fixes:  d8e6f4946cec0 "vm: Fix anonymous memory clustering under ASLR"
Differential Revision:  https://reviews.freebsd.org/D41397
---
 sys/vm/vm_map.c  | 16 
 sys/vm/vm_mmap.c | 14 ++
 2 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index 8d98af7709cd..c77c00b8b5c6 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -2255,19 +2255,19 @@ done:
 
 /*
  * vm_map_find_min() is a variant of vm_map_find() that takes an
- * additional parameter (min_addr) and treats the given address
- * (*addr) differently.  Specifically, it treats *addr as a hint
+ * additional parameter ("default_addr") and treats the given address
+ * ("*addr") differently.  Specifically, it treats "*addr" as a hint
  * and not as the minimum address where the mapping is created.
  *
  * This function works in two phases.  First, it tries to
  * allocate above the hint.  If that fails and the hint is
- * greater than min_addr, it performs a second pass, replacing
- * the hint with min_addr as the minimum address for the
+ * greater than "default_addr", it performs a second pass, replacing
+ * the hint with "default_addr" as the minimum address for the
  * allocation.
  */
 int
 vm_map_find_min(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
-vm_offset_t *addr, vm_size_t length, vm_offset_t min_addr,
+vm_offset_t *addr, vm_size_t length, vm_offset_t default_addr,
 vm_offset_t max_addr, int find_space, vm_prot_t prot, vm_prot_t max,
 int cow)
 {
@@ -2277,14 +2277,14 @@ vm_map_find_min(vm_map_t map, vm_object_t object, 
vm_ooffset_t offset,
hint = *addr;
if (hint == 0) {
cow |= MAP_NO_HINT;
-   *addr = hint = min_addr;
+   *addr = hint = default_addr;
}
for (;;) {
rv = vm_map_find(map, object, offset, addr, length, max_addr,
find_space, prot, max, cow);
-   if (rv == KERN_SUCCESS || min_addr >= hint)
+   if (rv == KERN_SUCCESS || default_addr >= hint)
return (rv);
-   *addr = hint = min_addr;
+   *addr = hint = default_addr;
}
 }
 
diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
index 7876a055ca91..d904c4f38e40 100644
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c
@@ -1555,7 +1555,7 @@ vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t 
size, vm_prot_t prot,
 vm_prot_t maxprot, int flags, vm_object_t object, vm_ooffset_t foff,
 boolean_t writecounted, struct thread *td)
 {
-   vm_offset_t max_addr;
+   vm_offset_t default_addr, max_addr;
int docow, error, findspace, rv;
bool curmap, fitit;
 
@@ -1630,10 +1630,16 @@ vm_mmap_object(vm_map_t map, vm_offset_t *addr, 
vm_size_t size, vm_prot_t prot,
max_addr = MAP_32BIT_MAX_ADDR;
 #endif
if (curmap) {
-   rv = vm_map_find_min(map, object, foff, addr, size,
+   default_addr =
round_page((vm_offset_t)td->td_proc->p_vmspace->
-   vm_daddr + lim_max(td, RLIMIT_DATA)), max_addr,
-   findspace, prot, maxprot, docow);
+   vm_daddr + lim_max(td, RLIMIT_DATA));
+#ifdef MAP_32BIT
+   if ((flags & MAP_32BIT) != 0)
+   default_addr = 0;
+#endif
+   rv = vm_map_find_min(map, object, foff, addr, size,
+   default_addr, max_addr, findspace, prot, maxprot,
+   docow);
} else {
rv = vm_map_find(map, object, foff, addr, size,
max_addr, findspace, prot, maxprot, docow);

git: 37e5d49e1e5e - main - vm: Fix address hints of 0 with MAP_32BIT

2023-08-12 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=37e5d49e1e5e750bf2a200ef2e117d14c4e9a578

commit 37e5d49e1e5e750bf2a200ef2e117d14c4e9a578
Author: Alan Cox 
AuthorDate: 2023-08-03 07:07:14 +
Commit: Alan Cox 
CommitDate: 2023-08-12 07:35:21 +

vm: Fix address hints of 0 with MAP_32BIT

Also, rename min_addr to default_addr, which better reflects what it
represents.  The min_addr is not a minimum address in the same way that
max_addr is actually a maximum address that can be allocated.  For
example, a non-zero hint can be less than min_addr and be allocated.

Reported by:dchagin
Reviewed by:dchagin, kib, markj
Fixes:  d8e6f4946cec0 "vm: Fix anonymous memory clustering under ASLR"
Differential Revision:  https://reviews.freebsd.org/D41397
---
 sys/vm/vm_map.c  | 16 
 sys/vm/vm_mmap.c | 14 ++
 2 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index 8d98af7709cd..c77c00b8b5c6 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -2255,19 +2255,19 @@ done:
 
 /*
  * vm_map_find_min() is a variant of vm_map_find() that takes an
- * additional parameter (min_addr) and treats the given address
- * (*addr) differently.  Specifically, it treats *addr as a hint
+ * additional parameter ("default_addr") and treats the given address
+ * ("*addr") differently.  Specifically, it treats "*addr" as a hint
  * and not as the minimum address where the mapping is created.
  *
  * This function works in two phases.  First, it tries to
  * allocate above the hint.  If that fails and the hint is
- * greater than min_addr, it performs a second pass, replacing
- * the hint with min_addr as the minimum address for the
+ * greater than "default_addr", it performs a second pass, replacing
+ * the hint with "default_addr" as the minimum address for the
  * allocation.
  */
 int
 vm_map_find_min(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
-vm_offset_t *addr, vm_size_t length, vm_offset_t min_addr,
+vm_offset_t *addr, vm_size_t length, vm_offset_t default_addr,
 vm_offset_t max_addr, int find_space, vm_prot_t prot, vm_prot_t max,
 int cow)
 {
@@ -2277,14 +2277,14 @@ vm_map_find_min(vm_map_t map, vm_object_t object, 
vm_ooffset_t offset,
hint = *addr;
if (hint == 0) {
cow |= MAP_NO_HINT;
-   *addr = hint = min_addr;
+   *addr = hint = default_addr;
}
for (;;) {
rv = vm_map_find(map, object, offset, addr, length, max_addr,
find_space, prot, max, cow);
-   if (rv == KERN_SUCCESS || min_addr >= hint)
+   if (rv == KERN_SUCCESS || default_addr >= hint)
return (rv);
-   *addr = hint = min_addr;
+   *addr = hint = default_addr;
}
 }
 
diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
index 7876a055ca91..d904c4f38e40 100644
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c
@@ -1555,7 +1555,7 @@ vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t 
size, vm_prot_t prot,
 vm_prot_t maxprot, int flags, vm_object_t object, vm_ooffset_t foff,
 boolean_t writecounted, struct thread *td)
 {
-   vm_offset_t max_addr;
+   vm_offset_t default_addr, max_addr;
int docow, error, findspace, rv;
bool curmap, fitit;
 
@@ -1630,10 +1630,16 @@ vm_mmap_object(vm_map_t map, vm_offset_t *addr, 
vm_size_t size, vm_prot_t prot,
max_addr = MAP_32BIT_MAX_ADDR;
 #endif
if (curmap) {
-   rv = vm_map_find_min(map, object, foff, addr, size,
+   default_addr =
round_page((vm_offset_t)td->td_proc->p_vmspace->
-   vm_daddr + lim_max(td, RLIMIT_DATA)), max_addr,
-   findspace, prot, maxprot, docow);
+   vm_daddr + lim_max(td, RLIMIT_DATA));
+#ifdef MAP_32BIT
+   if ((flags & MAP_32BIT) != 0)
+   default_addr = 0;
+#endif
+   rv = vm_map_find_min(map, object, foff, addr, size,
+   default_addr, max_addr, findspace, prot, maxprot,
+   docow);
} else {
rv = vm_map_find(map, object, foff, addr, size,
max_addr, findspace, prot, maxprot, docow);

Re: git: 50d663b14b31 - main - vm: Fix vm_map_find_min()

2023-07-30 Thread Alan Cox

I see.  That change fixed the case where the address hint is non-zero, 
e.g., 0x10, but not zero.


On 7/30/23 05:58, Dmitry Chagin wrote:

On Sun, Jul 30, 2023 at 01:30:37PM +0300, Dmitry Chagin wrote:

On Wed, Jul 26, 2023 at 05:25:37AM +, Alan Cox wrote:

The branch main has been updated by alc:

URL: 
https://urldefense.com/v3/__https://cgit.FreeBSD.org/src/commit/?id=50d663b14b310d6020b4b6cc92d4fae985f086f2__;!!BuQPrrmRaQ!mJGmkdRJ06TT6ocFnVe7lPS7hSNIIhrCQH9IKMbB6XZVJuiUc2_wPJ55o1zzD6AhClmQwgQKHvKnW4rs75yVrtax$

commit 50d663b14b310d6020b4b6cc92d4fae985f086f2
Author: Alan Cox 
AuthorDate: 2023-07-25 07:24:19 +
Commit: Alan Cox 
CommitDate: 2023-07-26 05:24:50 +

 vm: Fix vm_map_find_min()
 
 Fix the handling of address hints that are less than min_addr by

 vm_map_find_min().
 

Thank you for fixing that, however it still fails under Linuxulator.


#include 
#include 

#include 
#include 
#include 
#include 
#include 
#include 

int
main(int argc, char** argv)
{
struct stat sb;
void *s32;
int f, r;

f = open(argv[0], O_RDONLY);
assert(f > 0);

r = fstat(f, );
assert(r == 0);

s32  = mmap(NULL, sb.st_size, PROT_READ,
MAP_32BIT|MAP_PRIVATE, f, 0);
assert(s32 != MAP_FAILED);
assert((uintptr_t)s32 < 0x8000);

close(f);
munmap(s32, sb.st_size);
return (0);
}


hmm, it also fails natively with disable aslr





 Reported by:dchagin
 Reviewed by:kib
 Fixes:  d8e6f4946cec0 "vm: Fix anonymous memory clustering under ASLR"
 Differential Revision:  
https://urldefense.com/v3/__https://reviews.freebsd.org/D41159__;!!BuQPrrmRaQ!mJGmkdRJ06TT6ocFnVe7lPS7hSNIIhrCQH9IKMbB6XZVJuiUc2_wPJ55o1zzD6AhClmQwgQKHvKnW4rs70ygLqzX$
---
  sys/vm/vm_map.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index 444e09986d4e..eb607d519247 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -2255,10 +2255,10 @@ vm_map_find_min(vm_map_t map, vm_object_t object, 
vm_ooffset_t offset,
int rv;
  
  	hint = *addr;

-   if (hint == 0)
+   if (hint == 0) {
cow |= MAP_NO_HINT;
-   if (hint < min_addr)
*addr = hint = min_addr;
+   }
for (;;) {
rv = vm_map_find(map, object, offset, addr, length, max_addr,
find_space, prot, max, cow);

Re: git: 50d663b14b31 - main - vm: Fix vm_map_find_min()

2023-07-30 Thread Alan Cox

I see.  That change fixed the case where the address hint is non-zero, 
e.g., 0x10, but not zero.


On 7/30/23 05:58, Dmitry Chagin wrote:

On Sun, Jul 30, 2023 at 01:30:37PM +0300, Dmitry Chagin wrote:

On Wed, Jul 26, 2023 at 05:25:37AM +, Alan Cox wrote:

The branch main has been updated by alc:

URL: 
https://urldefense.com/v3/__https://cgit.FreeBSD.org/src/commit/?id=50d663b14b310d6020b4b6cc92d4fae985f086f2__;!!BuQPrrmRaQ!mJGmkdRJ06TT6ocFnVe7lPS7hSNIIhrCQH9IKMbB6XZVJuiUc2_wPJ55o1zzD6AhClmQwgQKHvKnW4rs75yVrtax$

commit 50d663b14b310d6020b4b6cc92d4fae985f086f2
Author: Alan Cox 
AuthorDate: 2023-07-25 07:24:19 +
Commit: Alan Cox 
CommitDate: 2023-07-26 05:24:50 +

 vm: Fix vm_map_find_min()
 
 Fix the handling of address hints that are less than min_addr by

 vm_map_find_min().
 

Thank you for fixing that, however it still fails under Linuxulator.


#include 
#include 

#include 
#include 
#include 
#include 
#include 
#include 

int
main(int argc, char** argv)
{
struct stat sb;
void *s32;
int f, r;

f = open(argv[0], O_RDONLY);
assert(f > 0);

r = fstat(f, );
assert(r == 0);

s32  = mmap(NULL, sb.st_size, PROT_READ,
MAP_32BIT|MAP_PRIVATE, f, 0);
assert(s32 != MAP_FAILED);
assert((uintptr_t)s32 < 0x8000);

close(f);
munmap(s32, sb.st_size);
return (0);
}


hmm, it also fails natively with disable aslr





 Reported by:dchagin
 Reviewed by:kib
 Fixes:  d8e6f4946cec0 "vm: Fix anonymous memory clustering under ASLR"
 Differential Revision:  
https://urldefense.com/v3/__https://reviews.freebsd.org/D41159__;!!BuQPrrmRaQ!mJGmkdRJ06TT6ocFnVe7lPS7hSNIIhrCQH9IKMbB6XZVJuiUc2_wPJ55o1zzD6AhClmQwgQKHvKnW4rs70ygLqzX$
---
  sys/vm/vm_map.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index 444e09986d4e..eb607d519247 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -2255,10 +2255,10 @@ vm_map_find_min(vm_map_t map, vm_object_t object, 
vm_ooffset_t offset,
int rv;
  
  	hint = *addr;

-   if (hint == 0)
+   if (hint == 0) {
cow |= MAP_NO_HINT;
-   if (hint < min_addr)
*addr = hint = min_addr;
+   }
for (;;) {
rv = vm_map_find(map, object, offset, addr, length, max_addr,
find_space, prot, max, cow);

git: 3d7c37425ee0 - main - amd64 pmap: Catch up with pctrie changes

2023-07-28 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=3d7c37425ee07186c65d424306c1b295c30fa592

commit 3d7c37425ee07186c65d424306c1b295c30fa592
Author: Alan Cox 
AuthorDate: 2023-07-28 20:13:13 +
Commit: Alan Cox 
CommitDate: 2023-07-28 20:13:13 +

amd64 pmap: Catch up with pctrie changes

Recent changes to the pctrie code make it necessary to initialize the
kernel pmap's rangeset for PKU.
---
 sys/amd64/amd64/pmap.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index a4b8c6dc4c06..c1968fc11844 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -1995,6 +1995,10 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
kernel_pmap->pm_stats.resident_count = res;
vm_radix_init(_pmap->pm_root);
kernel_pmap->pm_flags = pmap_flags;
+   if ((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) {
+   rangeset_init(_pmap->pm_pkru, pkru_dup_range,
+   pkru_free_range, kernel_pmap, M_NOWAIT);
+   }
 
/*
 * The kernel pmap is always active on all CPUs.  Once CPUs are

git: 3d7c37425ee0 - main - amd64 pmap: Catch up with pctrie changes

2023-07-28 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=3d7c37425ee07186c65d424306c1b295c30fa592

commit 3d7c37425ee07186c65d424306c1b295c30fa592
Author: Alan Cox 
AuthorDate: 2023-07-28 20:13:13 +
Commit: Alan Cox 
CommitDate: 2023-07-28 20:13:13 +

amd64 pmap: Catch up with pctrie changes

Recent changes to the pctrie code make it necessary to initialize the
kernel pmap's rangeset for PKU.
---
 sys/amd64/amd64/pmap.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index a4b8c6dc4c06..c1968fc11844 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -1995,6 +1995,10 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
kernel_pmap->pm_stats.resident_count = res;
vm_radix_init(_pmap->pm_root);
kernel_pmap->pm_flags = pmap_flags;
+   if ((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) {
+   rangeset_init(_pmap->pm_pkru, pkru_dup_range,
+   pkru_free_range, kernel_pmap, M_NOWAIT);
+   }
 
/*
 * The kernel pmap is always active on all CPUs.  Once CPUs are

git: 5ec2d94ade51 - main - vm_mmap_object: Update the spelling of true/false

2023-07-26 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=5ec2d94ade51b2f2f129cf0c7f695582c7dccb81

commit 5ec2d94ade51b2f2f129cf0c7f695582c7dccb81
Author: Alan Cox 
AuthorDate: 2023-07-26 05:58:51 +
Commit: Alan Cox 
CommitDate: 2023-07-27 05:25:53 +

vm_mmap_object: Update the spelling of true/false

Since fitit is already a bool, use true/false instead of TRUE/FALSE.

MFC after:  2 weeks
---
 sys/vm/vm_mmap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
index 408e077476dd..328fef007b1e 100644
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c
@@ -1577,12 +1577,12 @@ vm_mmap_object(vm_map_t map, vm_offset_t *addr, 
vm_size_t size, vm_prot_t prot,
return (EINVAL);
 
if ((flags & MAP_FIXED) == 0) {
-   fitit = TRUE;
+   fitit = true;
*addr = round_page(*addr);
} else {
if (*addr != trunc_page(*addr))
return (EINVAL);
-   fitit = FALSE;
+   fitit = false;
}
 
if (flags & MAP_ANON) {

git: 5ec2d94ade51 - main - vm_mmap_object: Update the spelling of true/false

2023-07-26 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=5ec2d94ade51b2f2f129cf0c7f695582c7dccb81

commit 5ec2d94ade51b2f2f129cf0c7f695582c7dccb81
Author: Alan Cox 
AuthorDate: 2023-07-26 05:58:51 +
Commit: Alan Cox 
CommitDate: 2023-07-27 05:25:53 +

vm_mmap_object: Update the spelling of true/false

Since fitit is already a bool, use true/false instead of TRUE/FALSE.

MFC after:  2 weeks
---
 sys/vm/vm_mmap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
index 408e077476dd..328fef007b1e 100644
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c
@@ -1577,12 +1577,12 @@ vm_mmap_object(vm_map_t map, vm_offset_t *addr, 
vm_size_t size, vm_prot_t prot,
return (EINVAL);
 
if ((flags & MAP_FIXED) == 0) {
-   fitit = TRUE;
+   fitit = true;
*addr = round_page(*addr);
} else {
if (*addr != trunc_page(*addr))
return (EINVAL);
-   fitit = FALSE;
+   fitit = false;
}
 
if (flags & MAP_ANON) {

git: a98a0090b2ba - main - arm64 pmap: Eliminate unnecessary TLB invalidations

2023-07-25 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=a98a0090b2ba64ff0bc3cf71a00fb5f9e31fc1d3

commit a98a0090b2ba64ff0bc3cf71a00fb5f9e31fc1d3
Author: Alan Cox 
AuthorDate: 2023-07-23 07:11:43 +
Commit: Alan Cox 
CommitDate: 2023-07-26 05:37:13 +

arm64 pmap: Eliminate unnecessary TLB invalidations

Eliminate unnecessary TLB invalidations by pmap_kenter(),
pmap_qenter(), and pmap_mapbios() when the old page table entries
were invalid.

While I'm here, correct some nearby whitespace issues.

MFC after:  2 weeks
---
 sys/arm64/arm64/pmap.c | 49 ++---
 1 file changed, 38 insertions(+), 11 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 379296f375ae..fa09d2026550 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -1972,19 +1972,20 @@ void
 pmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t pa, int mode)
 {
pd_entry_t *pde;
-   pt_entry_t *pte, attr;
+   pt_entry_t attr, old_l3e, *pte;
vm_offset_t va;
int lvl;
 
KASSERT((pa & L3_OFFSET) == 0,
-  ("pmap_kenter: Invalid physical address"));
+   ("pmap_kenter: Invalid physical address"));
KASSERT((sva & L3_OFFSET) == 0,
-  ("pmap_kenter: Invalid virtual address"));
+   ("pmap_kenter: Invalid virtual address"));
KASSERT((size & PAGE_MASK) == 0,
("pmap_kenter: Mapping is not page-sized"));
 
attr = ATTR_DEFAULT | ATTR_S1_AP(ATTR_S1_AP_RW) | ATTR_S1_XN |
ATTR_S1_IDX(mode) | L3_PAGE;
+   old_l3e = 0;
va = sva;
while (size != 0) {
pde = pmap_pde(kernel_pmap, va, );
@@ -1993,13 +1994,21 @@ pmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t 
pa, int mode)
KASSERT(lvl == 2, ("pmap_kenter: Invalid level %d", lvl));
 
pte = pmap_l2_to_l3(pde, va);
-   pmap_load_store(pte, PHYS_TO_PTE(pa) | attr);
+   old_l3e |= pmap_load_store(pte, PHYS_TO_PTE(pa) | attr);
 
va += PAGE_SIZE;
pa += PAGE_SIZE;
size -= PAGE_SIZE;
}
-   pmap_s1_invalidate_range(kernel_pmap, sva, va, true);
+   if ((old_l3e & ATTR_DESCR_VALID) != 0)
+   pmap_s1_invalidate_range(kernel_pmap, sva, va, true);
+   else {
+   /*
+* Because the old entries were invalid and the new mappings
+* are not executable, an isb is not required.
+*/
+   dsb(ishst);
+   }
 }
 
 void
@@ -2082,11 +2091,12 @@ void
 pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
 {
pd_entry_t *pde;
-   pt_entry_t *pte, pa, attr;
+   pt_entry_t attr, old_l3e, pa, *pte;
vm_offset_t va;
vm_page_t m;
int i, lvl;
 
+   old_l3e = 0;
va = sva;
for (i = 0; i < count; i++) {
pde = pmap_pde(kernel_pmap, va, );
@@ -2100,11 +2110,19 @@ pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
attr = ATTR_DEFAULT | ATTR_S1_AP(ATTR_S1_AP_RW) | ATTR_S1_XN |
ATTR_S1_IDX(m->md.pv_memattr) | L3_PAGE;
pte = pmap_l2_to_l3(pde, va);
-   pmap_load_store(pte, PHYS_TO_PTE(pa) | attr);
+   old_l3e |= pmap_load_store(pte, PHYS_TO_PTE(pa) | attr);
 
va += L3_SIZE;
}
-   pmap_s1_invalidate_range(kernel_pmap, sva, va, true);
+   if ((old_l3e & ATTR_DESCR_VALID) != 0)
+   pmap_s1_invalidate_range(kernel_pmap, sva, va, true);
+   else {
+   /*
+* Because the old entries were invalid and the new mappings
+* are not executable, an isb is not required.
+*/
+   dsb(ishst);
+   }
 }
 
 /*
@@ -6441,7 +6459,7 @@ pmap_mapbios(vm_paddr_t pa, vm_size_t size)
 {
struct pmap_preinit_mapping *ppim;
vm_offset_t va, offset;
-   pd_entry_t *pde;
+   pd_entry_t old_l2e, *pde;
pt_entry_t *l2;
int i, lvl, l2_blocks, free_l2_count, start_idx;
 
@@ -6501,6 +6519,7 @@ pmap_mapbios(vm_paddr_t pa, vm_size_t size)
 
/* Map L2 blocks */
pa = rounddown2(pa, L2_SIZE);
+   old_l2e = 0;
for (i = 0; i < l2_blocks; i++) {
pde = pmap_pde(kernel_pmap, va, );
KASSERT(pde != NULL,
@@ -6511,14 +6530,22 @@ pmap_mapbios(vm_paddr_t pa, vm_size_t size)
 
/* Insert L2_BLOCK */
l2 = pmap_l1_to_l2(pde, va);
-   pmap_load_store(l2,
+   old_l2e |= pmap_load_store(l2,
PHYS_TO_PTE(pa) | ATTR_DEFAULT | ATTR_S1_XN |

git: a98a0090b2ba - main - arm64 pmap: Eliminate unnecessary TLB invalidations

2023-07-25 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=a98a0090b2ba64ff0bc3cf71a00fb5f9e31fc1d3

commit a98a0090b2ba64ff0bc3cf71a00fb5f9e31fc1d3
Author: Alan Cox 
AuthorDate: 2023-07-23 07:11:43 +
Commit: Alan Cox 
CommitDate: 2023-07-26 05:37:13 +

arm64 pmap: Eliminate unnecessary TLB invalidations

Eliminate unnecessary TLB invalidations by pmap_kenter(),
pmap_qenter(), and pmap_mapbios() when the old page table entries
were invalid.

While I'm here, correct some nearby whitespace issues.

MFC after:  2 weeks
---
 sys/arm64/arm64/pmap.c | 49 ++---
 1 file changed, 38 insertions(+), 11 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 379296f375ae..fa09d2026550 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -1972,19 +1972,20 @@ void
 pmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t pa, int mode)
 {
pd_entry_t *pde;
-   pt_entry_t *pte, attr;
+   pt_entry_t attr, old_l3e, *pte;
vm_offset_t va;
int lvl;
 
KASSERT((pa & L3_OFFSET) == 0,
-  ("pmap_kenter: Invalid physical address"));
+   ("pmap_kenter: Invalid physical address"));
KASSERT((sva & L3_OFFSET) == 0,
-  ("pmap_kenter: Invalid virtual address"));
+   ("pmap_kenter: Invalid virtual address"));
KASSERT((size & PAGE_MASK) == 0,
("pmap_kenter: Mapping is not page-sized"));
 
attr = ATTR_DEFAULT | ATTR_S1_AP(ATTR_S1_AP_RW) | ATTR_S1_XN |
ATTR_S1_IDX(mode) | L3_PAGE;
+   old_l3e = 0;
va = sva;
while (size != 0) {
pde = pmap_pde(kernel_pmap, va, );
@@ -1993,13 +1994,21 @@ pmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t 
pa, int mode)
KASSERT(lvl == 2, ("pmap_kenter: Invalid level %d", lvl));
 
pte = pmap_l2_to_l3(pde, va);
-   pmap_load_store(pte, PHYS_TO_PTE(pa) | attr);
+   old_l3e |= pmap_load_store(pte, PHYS_TO_PTE(pa) | attr);
 
va += PAGE_SIZE;
pa += PAGE_SIZE;
size -= PAGE_SIZE;
}
-   pmap_s1_invalidate_range(kernel_pmap, sva, va, true);
+   if ((old_l3e & ATTR_DESCR_VALID) != 0)
+   pmap_s1_invalidate_range(kernel_pmap, sva, va, true);
+   else {
+   /*
+* Because the old entries were invalid and the new mappings
+* are not executable, an isb is not required.
+*/
+   dsb(ishst);
+   }
 }
 
 void
@@ -2082,11 +2091,12 @@ void
 pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
 {
pd_entry_t *pde;
-   pt_entry_t *pte, pa, attr;
+   pt_entry_t attr, old_l3e, pa, *pte;
vm_offset_t va;
vm_page_t m;
int i, lvl;
 
+   old_l3e = 0;
va = sva;
for (i = 0; i < count; i++) {
pde = pmap_pde(kernel_pmap, va, );
@@ -2100,11 +2110,19 @@ pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
attr = ATTR_DEFAULT | ATTR_S1_AP(ATTR_S1_AP_RW) | ATTR_S1_XN |
ATTR_S1_IDX(m->md.pv_memattr) | L3_PAGE;
pte = pmap_l2_to_l3(pde, va);
-   pmap_load_store(pte, PHYS_TO_PTE(pa) | attr);
+   old_l3e |= pmap_load_store(pte, PHYS_TO_PTE(pa) | attr);
 
va += L3_SIZE;
}
-   pmap_s1_invalidate_range(kernel_pmap, sva, va, true);
+   if ((old_l3e & ATTR_DESCR_VALID) != 0)
+   pmap_s1_invalidate_range(kernel_pmap, sva, va, true);
+   else {
+   /*
+* Because the old entries were invalid and the new mappings
+* are not executable, an isb is not required.
+*/
+   dsb(ishst);
+   }
 }
 
 /*
@@ -6441,7 +6459,7 @@ pmap_mapbios(vm_paddr_t pa, vm_size_t size)
 {
struct pmap_preinit_mapping *ppim;
vm_offset_t va, offset;
-   pd_entry_t *pde;
+   pd_entry_t old_l2e, *pde;
pt_entry_t *l2;
int i, lvl, l2_blocks, free_l2_count, start_idx;
 
@@ -6501,6 +6519,7 @@ pmap_mapbios(vm_paddr_t pa, vm_size_t size)
 
/* Map L2 blocks */
pa = rounddown2(pa, L2_SIZE);
+   old_l2e = 0;
for (i = 0; i < l2_blocks; i++) {
pde = pmap_pde(kernel_pmap, va, );
KASSERT(pde != NULL,
@@ -6511,14 +6530,22 @@ pmap_mapbios(vm_paddr_t pa, vm_size_t size)
 
/* Insert L2_BLOCK */
l2 = pmap_l1_to_l2(pde, va);
-   pmap_load_store(l2,
+   old_l2e |= pmap_load_store(l2,
PHYS_TO_PTE(pa) | ATTR_DEFAULT | ATTR_S1_XN |

git: 50d663b14b31 - main - vm: Fix vm_map_find_min()

2023-07-25 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=50d663b14b310d6020b4b6cc92d4fae985f086f2

commit 50d663b14b310d6020b4b6cc92d4fae985f086f2
Author: Alan Cox 
AuthorDate: 2023-07-25 07:24:19 +
Commit: Alan Cox 
CommitDate: 2023-07-26 05:24:50 +

vm: Fix vm_map_find_min()

Fix the handling of address hints that are less than min_addr by
vm_map_find_min().

Reported by:dchagin
Reviewed by:kib
Fixes:  d8e6f4946cec0 "vm: Fix anonymous memory clustering under ASLR"
Differential Revision:  https://reviews.freebsd.org/D41159
---
 sys/vm/vm_map.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index 444e09986d4e..eb607d519247 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -2255,10 +2255,10 @@ vm_map_find_min(vm_map_t map, vm_object_t object, 
vm_ooffset_t offset,
int rv;
 
hint = *addr;
-   if (hint == 0)
+   if (hint == 0) {
cow |= MAP_NO_HINT;
-   if (hint < min_addr)
*addr = hint = min_addr;
+   }
for (;;) {
rv = vm_map_find(map, object, offset, addr, length, max_addr,
find_space, prot, max, cow);

git: 50d663b14b31 - main - vm: Fix vm_map_find_min()

2023-07-25 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=50d663b14b310d6020b4b6cc92d4fae985f086f2

commit 50d663b14b310d6020b4b6cc92d4fae985f086f2
Author: Alan Cox 
AuthorDate: 2023-07-25 07:24:19 +
Commit: Alan Cox 
CommitDate: 2023-07-26 05:24:50 +

vm: Fix vm_map_find_min()

Fix the handling of address hints that are less than min_addr by
vm_map_find_min().

Reported by:dchagin
Reviewed by:kib
Fixes:  d8e6f4946cec0 "vm: Fix anonymous memory clustering under ASLR"
Differential Revision:  https://reviews.freebsd.org/D41159
---
 sys/vm/vm_map.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index 444e09986d4e..eb607d519247 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -2255,10 +2255,10 @@ vm_map_find_min(vm_map_t map, vm_object_t object, 
vm_ooffset_t offset,
int rv;
 
hint = *addr;
-   if (hint == 0)
+   if (hint == 0) {
cow |= MAP_NO_HINT;
-   if (hint < min_addr)
*addr = hint = min_addr;
+   }
for (;;) {
rv = vm_map_find(map, object, offset, addr, length, max_addr,
find_space, prot, max, cow);

Re: [Sdcc-user] Interest in an r800 port to better support R800 and Z280?

2023-07-24 Thread Alan Cox

On Sun, 23 Jul 2023 10:19:32 +0200
Philipp Klaus Krause  wrote:

> I wonder if SDCC users would be interested in an r800 port (and maybe 
> help a bit with the work).
> This port would target the R800 (its instruction set is a superset of 
> the Z80 and subset of the Z280), and thus be useful to both R800 and 
> Z280 users.
> 
> Feature request: https://sourceforge.net/p/sdcc/feature-requests/882/
> 
> The effort required would be substantially less than for a full z280 port.

Some Z280 would be useful (the full Z280 assembler changes were already
done but never merged with upstream it seems). If it's just the R800 mul
and using ixl/ixh then it would have some use - and if it can be
done without the multiply also for Z80 only code where they are illegals
but always work.

The big stuff in Z280 proper though is the stack relative and index
relative word operations which doesn't seem much different to the
existing paths for rabbit and ez80 ?

Alan

___
Sdcc-user mailing list
Sdcc-user@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/sdcc-user

git: 7b1e606c7222 - main - arm64 pmap: Retire PMAP_INLINE

2023-07-22 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=7b1e606c7acdaea613924f566ffe9b65c068

commit 7b1e606c7acdaea613924f566ffe9b65c068
Author: Alan Cox 
AuthorDate: 2023-07-22 17:55:43 +
Commit: Alan Cox 
CommitDate: 2023-07-23 05:34:17 +

arm64 pmap: Retire PMAP_INLINE

Neither of the remaining callers to pmap_kremove() warrant inlining.
Those calls rarely occur.  In other words, we were optimizing for the
uncommon case.

MFC after:  1 week
---
 sys/arm64/arm64/pmap.c | 12 +---
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index dfed0142f273..379296f375ae 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -170,16 +170,6 @@ __FBSDID("$FreeBSD$");
 #defineNUL1E   (NUL0E * NL1PG)
 #defineNUL2E   (NUL1E * NL2PG)
 
-#if !defined(DIAGNOSTIC)
-#ifdef __GNUC_GNU_INLINE__
-#define PMAP_INLINE__attribute__((__gnu_inline__)) inline
-#else
-#define PMAP_INLINEextern inline
-#endif
-#else
-#define PMAP_INLINE
-#endif
-
 #ifdef PV_STATS
 #define PV_STAT(x) do { x ; } while (0)
 #define __pvused
@@ -2022,7 +2012,7 @@ pmap_kenter_device(vm_offset_t sva, vm_size_t size, 
vm_paddr_t pa)
 /*
  * Remove a page from the kernel pagetables.
  */
-PMAP_INLINE void
+void
 pmap_kremove(vm_offset_t va)
 {
pt_entry_t *pte;

git: 7b1e606c7222 - main - arm64 pmap: Retire PMAP_INLINE

2023-07-22 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=7b1e606c7acdaea613924f566ffe9b65c068

commit 7b1e606c7acdaea613924f566ffe9b65c068
Author: Alan Cox 
AuthorDate: 2023-07-22 17:55:43 +
Commit: Alan Cox 
CommitDate: 2023-07-23 05:34:17 +

arm64 pmap: Retire PMAP_INLINE

Neither of the remaining callers to pmap_kremove() warrant inlining.
Those calls rarely occur.  In other words, we were optimizing for the
uncommon case.

MFC after:  1 week
---
 sys/arm64/arm64/pmap.c | 12 +---
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index dfed0142f273..379296f375ae 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -170,16 +170,6 @@ __FBSDID("$FreeBSD$");
 #defineNUL1E   (NUL0E * NL1PG)
 #defineNUL2E   (NUL1E * NL2PG)
 
-#if !defined(DIAGNOSTIC)
-#ifdef __GNUC_GNU_INLINE__
-#define PMAP_INLINE__attribute__((__gnu_inline__)) inline
-#else
-#define PMAP_INLINEextern inline
-#endif
-#else
-#define PMAP_INLINE
-#endif
-
 #ifdef PV_STATS
 #define PV_STAT(x) do { x ; } while (0)
 #define __pvused
@@ -2022,7 +2012,7 @@ pmap_kenter_device(vm_offset_t sva, vm_size_t size, 
vm_paddr_t pa)
 /*
  * Remove a page from the kernel pagetables.
  */
-PMAP_INLINE void
+void
 pmap_kremove(vm_offset_t va)
 {
pt_entry_t *pte;

git: 0aebcfc9f4d6 - main - arm64 pmap: Eliminate some duplication of code

2023-07-22 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=0aebcfc9f4d642a8bef95504dc928fab78af33bf

commit 0aebcfc9f4d642a8bef95504dc928fab78af33bf
Author: Alan Cox 
AuthorDate: 2023-07-22 17:41:49 +
Commit: Alan Cox 
CommitDate: 2023-07-23 05:34:17 +

arm64 pmap: Eliminate some duplication of code

pmap_unmapbios() can simply call pmap_kremove_device() rather than
duplicating its code.

While I'm here, add a comment to pmap_kremove_device() explaining its
proper use, and fix a whitespace issue.

MFC after:  1 week
---
 sys/arm64/arm64/pmap.c | 19 ++-
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index b2591437b3b3..dfed0142f273 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -2032,6 +2032,13 @@ pmap_kremove(vm_offset_t va)
pmap_s1_invalidate_page(kernel_pmap, va, true);
 }
 
+/*
+ * Remove the specified range of mappings from the kernel address space.
+ *
+ * Should only be applied to mappings that were created by pmap_kenter() or
+ * pmap_kenter_device().  Nothing about this function is actually specific
+ * to device mappings.
+ */
 void
 pmap_kremove_device(vm_offset_t sva, vm_size_t size)
 {
@@ -2039,7 +2046,7 @@ pmap_kremove_device(vm_offset_t sva, vm_size_t size)
vm_offset_t va;
 
KASSERT((sva & L3_OFFSET) == 0,
-  ("pmap_kremove_device: Invalid virtual address"));
+   ("pmap_kremove_device: Invalid virtual address"));
KASSERT((size & PAGE_MASK) == 0,
("pmap_kremove_device: Mapping is not page-sized"));
 
@@ -6550,7 +6557,7 @@ void
 pmap_unmapbios(void *p, vm_size_t size)
 {
struct pmap_preinit_mapping *ppim;
-   vm_offset_t offset, tmpsize, va, va_trunc;
+   vm_offset_t offset, va, va_trunc;
pd_entry_t *pde;
pt_entry_t *l2;
int i, lvl, l2_blocks, block;
@@ -6600,14 +6607,8 @@ pmap_unmapbios(void *p, vm_size_t size)
size = round_page(offset + size);
va = trunc_page(va);
 
-   pde = pmap_pde(kernel_pmap, va, );
-   KASSERT(pde != NULL,
-   ("pmap_unmapbios: Invalid page entry, va: 0x%lx", va));
-   KASSERT(lvl == 2, ("pmap_unmapbios: Invalid level %d", lvl));
-
/* Unmap and invalidate the pages */
-for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE)
-   pmap_kremove(va + tmpsize);
+   pmap_kremove_device(va, size);
 
kva_free(va, size);
}

git: 0aebcfc9f4d6 - main - arm64 pmap: Eliminate some duplication of code

2023-07-22 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=0aebcfc9f4d642a8bef95504dc928fab78af33bf

commit 0aebcfc9f4d642a8bef95504dc928fab78af33bf
Author: Alan Cox 
AuthorDate: 2023-07-22 17:41:49 +
Commit: Alan Cox 
CommitDate: 2023-07-23 05:34:17 +

arm64 pmap: Eliminate some duplication of code

pmap_unmapbios() can simply call pmap_kremove_device() rather than
duplicating its code.

While I'm here, add a comment to pmap_kremove_device() explaining its
proper use, and fix a whitespace issue.

MFC after:  1 week
---
 sys/arm64/arm64/pmap.c | 19 ++-
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index b2591437b3b3..dfed0142f273 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -2032,6 +2032,13 @@ pmap_kremove(vm_offset_t va)
pmap_s1_invalidate_page(kernel_pmap, va, true);
 }
 
+/*
+ * Remove the specified range of mappings from the kernel address space.
+ *
+ * Should only be applied to mappings that were created by pmap_kenter() or
+ * pmap_kenter_device().  Nothing about this function is actually specific
+ * to device mappings.
+ */
 void
 pmap_kremove_device(vm_offset_t sva, vm_size_t size)
 {
@@ -2039,7 +2046,7 @@ pmap_kremove_device(vm_offset_t sva, vm_size_t size)
vm_offset_t va;
 
KASSERT((sva & L3_OFFSET) == 0,
-  ("pmap_kremove_device: Invalid virtual address"));
+   ("pmap_kremove_device: Invalid virtual address"));
KASSERT((size & PAGE_MASK) == 0,
("pmap_kremove_device: Mapping is not page-sized"));
 
@@ -6550,7 +6557,7 @@ void
 pmap_unmapbios(void *p, vm_size_t size)
 {
struct pmap_preinit_mapping *ppim;
-   vm_offset_t offset, tmpsize, va, va_trunc;
+   vm_offset_t offset, va, va_trunc;
pd_entry_t *pde;
pt_entry_t *l2;
int i, lvl, l2_blocks, block;
@@ -6600,14 +6607,8 @@ pmap_unmapbios(void *p, vm_size_t size)
size = round_page(offset + size);
va = trunc_page(va);
 
-   pde = pmap_pde(kernel_pmap, va, );
-   KASSERT(pde != NULL,
-   ("pmap_unmapbios: Invalid page entry, va: 0x%lx", va));
-   KASSERT(lvl == 2, ("pmap_unmapbios: Invalid level %d", lvl));
-
/* Unmap and invalidate the pages */
-for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE)
-   pmap_kremove(va + tmpsize);
+   pmap_kremove_device(va, size);
 
kva_free(va, size);
}

git: 29edff0dea0f - main - arm64/riscv pmap: Initialize the pmap's pm_pvchunk field

2023-07-21 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=29edff0dea0f7a2df710dd649d0cbcd4a2da3692

commit 29edff0dea0f7a2df710dd649d0cbcd4a2da3692
Author: Alan Cox 
AuthorDate: 2023-07-16 20:58:04 +
Commit: Alan Cox 
CommitDate: 2023-07-22 04:58:18 +

arm64/riscv pmap: Initialize the pmap's pm_pvchunk field

I believe that there are two reasons that the missing TAILQ
initialization operations haven't caused a problem.  First, the TAILQ
head's first field is being initialized to zeroes elsewhere.  Second,
the first access to the TAILQ head's last field is by
TAILQ_INSERT_HEAD(), which assigns to the last field without reading
it when the first field is NULL.

Reviewed by:kib, markj
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D41118
---
 sys/arm64/arm64/pmap.c | 3 +++
 sys/riscv/riscv/pmap.c | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index c2681104c961..b2591437b3b3 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -1291,6 +1291,7 @@ pmap_bootstrap(vm_paddr_t kernstart, vm_size_t kernlen)
PMAP_LOCK_INIT(kernel_pmap);
kernel_pmap->pm_l0_paddr =
pmap_early_vtophys((vm_offset_t)kernel_pmap_store.pm_l0);
+   TAILQ_INIT(_pmap->pm_pvchunk);
vm_radix_init(_pmap->pm_root);
kernel_pmap->pm_cookie = COOKIE_FROM(-1, INT_MIN);
kernel_pmap->pm_stage = PM_STAGE1;
@@ -2270,6 +2271,7 @@ pmap_pinit0(pmap_t pmap)
bzero(>pm_stats, sizeof(pmap->pm_stats));
pmap->pm_l0_paddr = READ_SPECIALREG(ttbr0_el1);
pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(pmap->pm_l0_paddr);
+   TAILQ_INIT(>pm_pvchunk);
vm_radix_init(>pm_root);
pmap->pm_cookie = COOKIE_FROM(ASID_RESERVED_FOR_PID_0, INT_MIN);
pmap->pm_stage = PM_STAGE1;
@@ -2293,6 +2295,7 @@ pmap_pinit_stage(pmap_t pmap, enum pmap_stage stage, int 
levels)
pmap->pm_l0_paddr = VM_PAGE_TO_PHYS(m);
pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(pmap->pm_l0_paddr);
 
+   TAILQ_INIT(>pm_pvchunk);
vm_radix_init(>pm_root);
bzero(>pm_stats, sizeof(pmap->pm_stats));
pmap->pm_cookie = COOKIE_FROM(-1, INT_MAX);
diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c
index 7580f091ad86..3732eea14f7d 100644
--- a/sys/riscv/riscv/pmap.c
+++ b/sys/riscv/riscv/pmap.c
@@ -646,6 +646,7 @@ pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, 
vm_size_t kernlen)
/* Set this early so we can use the pagetable walking functions */
kernel_pmap_store.pm_top = (pd_entry_t *)l1pt;
PMAP_LOCK_INIT(kernel_pmap);
+   TAILQ_INIT(_pmap->pm_pvchunk);
vm_radix_init(_pmap->pm_root);
 
rw_init(_global_lock, "pmap pv global");
@@ -1327,6 +1328,7 @@ pmap_pinit0(pmap_t pmap)
pmap->pm_satp = pmap_satp_mode() |
(vtophys(pmap->pm_top) >> PAGE_SHIFT);
CPU_ZERO(>pm_active);
+   TAILQ_INIT(>pm_pvchunk);
vm_radix_init(>pm_root);
pmap_activate_boot(pmap);
 }
@@ -1369,6 +1371,7 @@ pmap_pinit(pmap_t pmap)
pmap->pm_top[i] = kernel_pmap->pm_top[i];
}
 
+   TAILQ_INIT(>pm_pvchunk);
vm_radix_init(>pm_root);
 
return (1);

git: 29edff0dea0f - main - arm64/riscv pmap: Initialize the pmap's pm_pvchunk field

2023-07-21 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=29edff0dea0f7a2df710dd649d0cbcd4a2da3692

commit 29edff0dea0f7a2df710dd649d0cbcd4a2da3692
Author: Alan Cox 
AuthorDate: 2023-07-16 20:58:04 +
Commit: Alan Cox 
CommitDate: 2023-07-22 04:58:18 +

arm64/riscv pmap: Initialize the pmap's pm_pvchunk field

I believe that there are two reasons that the missing TAILQ
initialization operations haven't caused a problem.  First, the TAILQ
head's first field is being initialized to zeroes elsewhere.  Second,
the first access to the TAILQ head's last field is by
TAILQ_INSERT_HEAD(), which assigns to the last field without reading
it when the first field is NULL.

Reviewed by:kib, markj
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D41118
---
 sys/arm64/arm64/pmap.c | 3 +++
 sys/riscv/riscv/pmap.c | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index c2681104c961..b2591437b3b3 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -1291,6 +1291,7 @@ pmap_bootstrap(vm_paddr_t kernstart, vm_size_t kernlen)
PMAP_LOCK_INIT(kernel_pmap);
kernel_pmap->pm_l0_paddr =
pmap_early_vtophys((vm_offset_t)kernel_pmap_store.pm_l0);
+   TAILQ_INIT(_pmap->pm_pvchunk);
vm_radix_init(_pmap->pm_root);
kernel_pmap->pm_cookie = COOKIE_FROM(-1, INT_MIN);
kernel_pmap->pm_stage = PM_STAGE1;
@@ -2270,6 +2271,7 @@ pmap_pinit0(pmap_t pmap)
bzero(>pm_stats, sizeof(pmap->pm_stats));
pmap->pm_l0_paddr = READ_SPECIALREG(ttbr0_el1);
pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(pmap->pm_l0_paddr);
+   TAILQ_INIT(>pm_pvchunk);
vm_radix_init(>pm_root);
pmap->pm_cookie = COOKIE_FROM(ASID_RESERVED_FOR_PID_0, INT_MIN);
pmap->pm_stage = PM_STAGE1;
@@ -2293,6 +2295,7 @@ pmap_pinit_stage(pmap_t pmap, enum pmap_stage stage, int 
levels)
pmap->pm_l0_paddr = VM_PAGE_TO_PHYS(m);
pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(pmap->pm_l0_paddr);
 
+   TAILQ_INIT(>pm_pvchunk);
vm_radix_init(>pm_root);
bzero(>pm_stats, sizeof(pmap->pm_stats));
pmap->pm_cookie = COOKIE_FROM(-1, INT_MAX);
diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c
index 7580f091ad86..3732eea14f7d 100644
--- a/sys/riscv/riscv/pmap.c
+++ b/sys/riscv/riscv/pmap.c
@@ -646,6 +646,7 @@ pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, 
vm_size_t kernlen)
/* Set this early so we can use the pagetable walking functions */
kernel_pmap_store.pm_top = (pd_entry_t *)l1pt;
PMAP_LOCK_INIT(kernel_pmap);
+   TAILQ_INIT(_pmap->pm_pvchunk);
vm_radix_init(_pmap->pm_root);
 
rw_init(_global_lock, "pmap pv global");
@@ -1327,6 +1328,7 @@ pmap_pinit0(pmap_t pmap)
pmap->pm_satp = pmap_satp_mode() |
(vtophys(pmap->pm_top) >> PAGE_SHIFT);
CPU_ZERO(>pm_active);
+   TAILQ_INIT(>pm_pvchunk);
vm_radix_init(>pm_root);
pmap_activate_boot(pmap);
 }
@@ -1369,6 +1371,7 @@ pmap_pinit(pmap_t pmap)
pmap->pm_top[i] = kernel_pmap->pm_top[i];
}
 
+   TAILQ_INIT(>pm_pvchunk);
vm_radix_init(>pm_root);
 
return (1);

git: 294c52d969df - main - amd64 pmap: Fix compilation when superpage reservations are disabled

2023-07-12 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=294c52d969dfdaf1d9b3f4a1de76b702ee724afc

commit 294c52d969dfdaf1d9b3f4a1de76b702ee724afc
Author: Yufeng Zhou 
AuthorDate: 2023-07-12 07:52:02 +
Commit: Alan Cox 
CommitDate: 2023-07-12 17:07:42 +

amd64 pmap: Fix compilation when superpage reservations are disabled

The function pmap_pde_ept_executable() should not be conditionally
compiled based on VM_NRESERVLEVEL. It is required indirectly by
pmap_enter(..., psind=1) even when reservation-based allocation is
disabled at compile time.

Reviewed by:alc
MFC after:  1 week
---
 sys/amd64/amd64/pmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 3215a7f8d559..896078f3c456 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -6839,7 +6839,6 @@ retry:
PMAP_UNLOCK(pmap);
 }
 
-#if VM_NRESERVLEVEL > 0
 static bool
 pmap_pde_ept_executable(pmap_t pmap, pd_entry_t pde)
 {
@@ -6849,6 +6848,7 @@ pmap_pde_ept_executable(pmap_t pmap, pd_entry_t pde)
return ((pde & EPT_PG_EXECUTE) != 0);
 }
 
+#if VM_NRESERVLEVEL > 0
 /*
  * Tries to promote the 512, contiguous 4KB page mappings that are within a
  * single page table page (PTP) to a single 2MB page mapping.  For promotion

git: 294c52d969df - main - amd64 pmap: Fix compilation when superpage reservations are disabled

2023-07-12 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=294c52d969dfdaf1d9b3f4a1de76b702ee724afc

commit 294c52d969dfdaf1d9b3f4a1de76b702ee724afc
Author: Yufeng Zhou 
AuthorDate: 2023-07-12 07:52:02 +
Commit: Alan Cox 
CommitDate: 2023-07-12 17:07:42 +

amd64 pmap: Fix compilation when superpage reservations are disabled

The function pmap_pde_ept_executable() should not be conditionally
compiled based on VM_NRESERVLEVEL. It is required indirectly by
pmap_enter(..., psind=1) even when reservation-based allocation is
disabled at compile time.

Reviewed by:alc
MFC after:  1 week
---
 sys/amd64/amd64/pmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 3215a7f8d559..896078f3c456 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -6839,7 +6839,6 @@ retry:
PMAP_UNLOCK(pmap);
 }
 
-#if VM_NRESERVLEVEL > 0
 static bool
 pmap_pde_ept_executable(pmap_t pmap, pd_entry_t pde)
 {
@@ -6849,6 +6848,7 @@ pmap_pde_ept_executable(pmap_t pmap, pd_entry_t pde)
return ((pde & EPT_PG_EXECUTE) != 0);
 }
 
+#if VM_NRESERVLEVEL > 0
 /*
  * Tries to promote the 512, contiguous 4KB page mappings that are within a
  * single page table page (PTP) to a single 2MB page mapping.  For promotion

git: e59d202312f9 - main - arm64: make VM_NFREEORDER and the comment describing it match

2023-06-29 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=e59d202312f9868583c6603030ded2476085920d

commit e59d202312f9868583c6603030ded2476085920d
Author: Alan Cox 
AuthorDate: 2023-06-28 08:23:09 +
Commit: Alan Cox 
CommitDate: 2023-06-29 17:48:48 +

arm64: make VM_NFREEORDER and the comment describing it match

The setting of VM_NFREEORDER and the comment describing it were copied
from sparc64 where both the page size and the number of page table
entries that fit in a cache line are different from arm64.

Reviewed by:andrew, kib, markj
MFC after:  2 weeks
Differential Revision:  https://reviews.freebsd.org/D40782
---
 sys/arm64/include/vmparam.h | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/sys/arm64/include/vmparam.h b/sys/arm64/include/vmparam.h
index b28a79256453..23b7d0d87c94 100644
--- a/sys/arm64/include/vmparam.h
+++ b/sys/arm64/include/vmparam.h
@@ -89,14 +89,15 @@
 #defineVM_FREELIST_DEFAULT 0
 
 /*
- * An allocation size of 16MB is supported in order to optimize the
- * use of the direct map by UMA.  Specifically, a cache line contains
- * at most four TTEs, collectively mapping 16MB of physical memory.
- * By reducing the number of distinct 16MB "pages" that are used by UMA,
- * the physical memory allocator reduces the likelihood of both 4MB
- * page TLB misses and cache misses caused by 4MB page TLB misses.
+ * When PAGE_SIZE is 4KB, an allocation size of 16MB is supported in order
+ * to optimize the use of the direct map by UMA.  Specifically, a 64-byte
+ * cache line contains at most 8 L2 BLOCK entries, collectively mapping 16MB
+ * of physical memory.  By reducing the number of distinct 16MB "pages" that
+ * are used by UMA, the physical memory allocator reduces the likelihood of
+ * both 2MB page TLB misses and cache misses during the page table walk when
+ * a 2MB page TLB miss does occur.
  */
-#defineVM_NFREEORDER   12
+#defineVM_NFREEORDER   13
 
 /*
  * Enable superpage reservations: 1 level.

git: e59d202312f9 - main - arm64: make VM_NFREEORDER and the comment describing it match

2023-06-29 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=e59d202312f9868583c6603030ded2476085920d

commit e59d202312f9868583c6603030ded2476085920d
Author: Alan Cox 
AuthorDate: 2023-06-28 08:23:09 +
Commit: Alan Cox 
CommitDate: 2023-06-29 17:48:48 +

arm64: make VM_NFREEORDER and the comment describing it match

The setting of VM_NFREEORDER and the comment describing it were copied
from sparc64 where both the page size and the number of page table
entries that fit in a cache line are different from arm64.

Reviewed by:andrew, kib, markj
MFC after:  2 weeks
Differential Revision:  https://reviews.freebsd.org/D40782
---
 sys/arm64/include/vmparam.h | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/sys/arm64/include/vmparam.h b/sys/arm64/include/vmparam.h
index b28a79256453..23b7d0d87c94 100644
--- a/sys/arm64/include/vmparam.h
+++ b/sys/arm64/include/vmparam.h
@@ -89,14 +89,15 @@
 #defineVM_FREELIST_DEFAULT 0
 
 /*
- * An allocation size of 16MB is supported in order to optimize the
- * use of the direct map by UMA.  Specifically, a cache line contains
- * at most four TTEs, collectively mapping 16MB of physical memory.
- * By reducing the number of distinct 16MB "pages" that are used by UMA,
- * the physical memory allocator reduces the likelihood of both 4MB
- * page TLB misses and cache misses caused by 4MB page TLB misses.
+ * When PAGE_SIZE is 4KB, an allocation size of 16MB is supported in order
+ * to optimize the use of the direct map by UMA.  Specifically, a 64-byte
+ * cache line contains at most 8 L2 BLOCK entries, collectively mapping 16MB
+ * of physical memory.  By reducing the number of distinct 16MB "pages" that
+ * are used by UMA, the physical memory allocator reduces the likelihood of
+ * both 2MB page TLB misses and cache misses during the page table walk when
+ * a 2MB page TLB miss does occur.
  */
-#defineVM_NFREEORDER   12
+#defineVM_NFREEORDER   13
 
 /*
  * Enable superpage reservations: 1 level.

git: 3767de839742 - main - arm64 pmap: Tidy up pmap_promote_l2() calls

2023-06-28 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=3767de83974206e4267dabf7fbe66b151c1a0b14

commit 3767de83974206e4267dabf7fbe66b151c1a0b14
Author: Alan Cox 
AuthorDate: 2023-06-28 07:08:02 +
Commit: Alan Cox 
CommitDate: 2023-06-28 17:46:15 +

arm64 pmap: Tidy up pmap_promote_l2() calls

Since pmap_ps_enabled() is true by default, check it inside of
pmap_promote_l2() instead of at every call site.

Modify pmap_promote_l2() to return true if the promotion succeeded and
false otherwise.

(A similar change was applied to the amd64 pmap in 0d2f98c2f092.)

Reviewed by:kib, markj
Differential Revision:  https://reviews.freebsd.org/D40781
---
 sys/arm64/arm64/pmap.c | 41 ++---
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 3166b3d7959b..46520889728f 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -435,7 +435,7 @@ void (*pmap_stage2_invalidate_all)(uint64_t);
 #defineTLBI_VA(addr)   (((addr) >> TLBI_VA_SHIFT) & 
TLBI_VA_MASK)
 #defineTLBI_VA_L3_INCR (L3_SIZE >> TLBI_VA_SHIFT)
 
-static int superpages_enabled = 1;
+static int __read_frequently superpages_enabled = 1;
 SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled,
 CTLFLAG_RDTUN | CTLFLAG_NOFETCH, _enabled, 0,
 "Are large page mappings enabled?");
@@ -4141,14 +4141,21 @@ pmap_pv_promote_l2(pmap_t pmap, vm_offset_t va, 
vm_paddr_t pa,
  * aligned, contiguous physical memory and (2) the 4KB page mappings must have
  * identical characteristics.
  */
-static void
+static bool
 pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, vm_page_t mpte,
 struct rwlock **lockp)
 {
pt_entry_t all_l3e_AF, *firstl3, *l3, newl2, oldl3, pa;
 
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-   PMAP_ASSERT_STAGE1(pmap);
+
+   /*
+* Currently, this function only supports promotion on stage 1 pmaps
+* because it tests stage 1 specific fields and performs a break-
+* before-make sequence that is incorrect for stage 2 pmaps.
+*/
+   if (pmap->pm_stage != PM_STAGE1 || !pmap_ps_enabled(pmap))
+   return (false);
 
/*
 * Examine the first L3E in the specified PTP.  Abort if this L3E is
@@ -4157,14 +4164,14 @@ pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, 
vm_offset_t va, vm_page_t mpte,
firstl3 = (pt_entry_t *)PHYS_TO_DMAP(PTE_TO_PHYS(pmap_load(l2)));
newl2 = pmap_load(firstl3);
if ((newl2 & ATTR_SW_NO_PROMOTE) != 0)
-   return;
+   return (false);
/* ... is not the first physical page within an L2 block */
if ((PTE_TO_PHYS(newl2) & L2_OFFSET) != 0 ||
((newl2 & ATTR_DESCR_MASK) != L3_PAGE)) { /* ... or is invalid */
atomic_add_long(_l2_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx"
" in pmap %p", va, pmap);
-   return;
+   return (false);
}
 
/*
@@ -4212,7 +4219,7 @@ setl2:
atomic_add_long(_l2_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx"
" in pmap %p", va, pmap);
-   return;
+   return (false);
}
 setl3:
if ((oldl3 & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) ==
@@ -4232,7 +4239,7 @@ setl3:
atomic_add_long(_l2_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx"
" in pmap %p", va, pmap);
-   return;
+   return (false);
}
all_l3e_AF &= oldl3;
pa -= PAGE_SIZE;
@@ -4263,7 +4270,7 @@ setl3:
CTR2(KTR_PMAP,
"pmap_promote_l2: failure for va %#lx in pmap %p", va,
pmap);
-   return;
+   return (false);
}
 
if ((newl2 & ATTR_SW_MANAGED) != 0)
@@ -4277,6 +4284,7 @@ setl3:
atomic_add_long(_l2_promotions, 1);
CTR2(KTR_PMAP, "pmap_promote_l2: success for va %#lx in pmap %p", va,
pmap);
+   return (true);
 }
 #endif /* VM_NRESERVLEVEL > 0 */
 
@@ -4681,17 +4689,13 @@ validate:
 
 #if VM_NRESERVLEVEL > 0
/*
-* Try to promote from level 3 pages to a level 2 superpage. This
-* currently only works on stage 1 pmaps as pmap_promote_l2 looks at
-* stage 1 specific fields and performs a break-before-make sequence
-* that is incorrect a stage 2 pmap.
+* If both the page table page an

git: 3767de839742 - main - arm64 pmap: Tidy up pmap_promote_l2() calls

2023-06-28 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=3767de83974206e4267dabf7fbe66b151c1a0b14

commit 3767de83974206e4267dabf7fbe66b151c1a0b14
Author: Alan Cox 
AuthorDate: 2023-06-28 07:08:02 +
Commit: Alan Cox 
CommitDate: 2023-06-28 17:46:15 +

arm64 pmap: Tidy up pmap_promote_l2() calls

Since pmap_ps_enabled() is true by default, check it inside of
pmap_promote_l2() instead of at every call site.

Modify pmap_promote_l2() to return true if the promotion succeeded and
false otherwise.

(A similar change was applied to the amd64 pmap in 0d2f98c2f092.)

Reviewed by:kib, markj
Differential Revision:  https://reviews.freebsd.org/D40781
---
 sys/arm64/arm64/pmap.c | 41 ++---
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 3166b3d7959b..46520889728f 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -435,7 +435,7 @@ void (*pmap_stage2_invalidate_all)(uint64_t);
 #defineTLBI_VA(addr)   (((addr) >> TLBI_VA_SHIFT) & 
TLBI_VA_MASK)
 #defineTLBI_VA_L3_INCR (L3_SIZE >> TLBI_VA_SHIFT)
 
-static int superpages_enabled = 1;
+static int __read_frequently superpages_enabled = 1;
 SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled,
 CTLFLAG_RDTUN | CTLFLAG_NOFETCH, _enabled, 0,
 "Are large page mappings enabled?");
@@ -4141,14 +4141,21 @@ pmap_pv_promote_l2(pmap_t pmap, vm_offset_t va, 
vm_paddr_t pa,
  * aligned, contiguous physical memory and (2) the 4KB page mappings must have
  * identical characteristics.
  */
-static void
+static bool
 pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, vm_page_t mpte,
 struct rwlock **lockp)
 {
pt_entry_t all_l3e_AF, *firstl3, *l3, newl2, oldl3, pa;
 
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-   PMAP_ASSERT_STAGE1(pmap);
+
+   /*
+* Currently, this function only supports promotion on stage 1 pmaps
+* because it tests stage 1 specific fields and performs a break-
+* before-make sequence that is incorrect for stage 2 pmaps.
+*/
+   if (pmap->pm_stage != PM_STAGE1 || !pmap_ps_enabled(pmap))
+   return (false);
 
/*
 * Examine the first L3E in the specified PTP.  Abort if this L3E is
@@ -4157,14 +4164,14 @@ pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, 
vm_offset_t va, vm_page_t mpte,
firstl3 = (pt_entry_t *)PHYS_TO_DMAP(PTE_TO_PHYS(pmap_load(l2)));
newl2 = pmap_load(firstl3);
if ((newl2 & ATTR_SW_NO_PROMOTE) != 0)
-   return;
+   return (false);
/* ... is not the first physical page within an L2 block */
if ((PTE_TO_PHYS(newl2) & L2_OFFSET) != 0 ||
((newl2 & ATTR_DESCR_MASK) != L3_PAGE)) { /* ... or is invalid */
atomic_add_long(_l2_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx"
" in pmap %p", va, pmap);
-   return;
+   return (false);
}
 
/*
@@ -4212,7 +4219,7 @@ setl2:
atomic_add_long(_l2_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx"
" in pmap %p", va, pmap);
-   return;
+   return (false);
}
 setl3:
if ((oldl3 & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) ==
@@ -4232,7 +4239,7 @@ setl3:
atomic_add_long(_l2_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx"
" in pmap %p", va, pmap);
-   return;
+   return (false);
}
all_l3e_AF &= oldl3;
pa -= PAGE_SIZE;
@@ -4263,7 +4270,7 @@ setl3:
CTR2(KTR_PMAP,
"pmap_promote_l2: failure for va %#lx in pmap %p", va,
pmap);
-   return;
+   return (false);
}
 
if ((newl2 & ATTR_SW_MANAGED) != 0)
@@ -4277,6 +4284,7 @@ setl3:
atomic_add_long(_l2_promotions, 1);
CTR2(KTR_PMAP, "pmap_promote_l2: success for va %#lx in pmap %p", va,
pmap);
+   return (true);
 }
 #endif /* VM_NRESERVLEVEL > 0 */
 
@@ -4681,17 +4689,13 @@ validate:
 
 #if VM_NRESERVLEVEL > 0
/*
-* Try to promote from level 3 pages to a level 2 superpage. This
-* currently only works on stage 1 pmaps as pmap_promote_l2 looks at
-* stage 1 specific fields and performs a break-before-make sequence
-* that is incorrect a stage 2 pmap.
+* If both the page table page an

git: d8e6f4946cec - main - vm: Fix anonymous memory clustering under ASLR

2023-06-26 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=d8e6f4946cec0b84a6997d62e791b8cf993741b2

commit d8e6f4946cec0b84a6997d62e791b8cf993741b2
Author: Alan Cox 
AuthorDate: 2023-06-23 17:00:32 +
Commit: Alan Cox 
CommitDate: 2023-06-27 04:42:48 +

vm: Fix anonymous memory clustering under ASLR

By default, our ASLR implementation is supposed to cluster anonymous
memory allocations, unless the application's mmap(..., MAP_ANON, ...)
call included a non-zero address hint.  Unfortunately, clustering
never occurred because kern_mmap() always replaced the given address
hint when it was zero.  So, the ASLR implementation always believed
that a non-zero hint had been provided and randomized the mapping's
location in the address space.  To fix this problem, I'm pushing down
the point at which we convert a hint of zero to the minimum allocatable
address from kern_mmap() to vm_map_find_min().

Reviewed by:kib
MFC after:  2 weeks
Differential Revision:  https://reviews.freebsd.org/D40743
---
 sys/vm/vm_map.c  | 10 +++---
 sys/vm/vm_map.h  |  1 +
 sys/vm/vm_mmap.c |  8 +---
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index f5863a9b9939..a02107b5e64d 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -1981,14 +1981,14 @@ SYSCTL_INT(_vm, OID_AUTO, cluster_anon, CTLFLAG_RW,
 "Cluster anonymous mappings: 0 = no, 1 = yes if no hint, 2 = always");
 
 static bool
-clustering_anon_allowed(vm_offset_t addr)
+clustering_anon_allowed(vm_offset_t addr, int cow)
 {
 
switch (cluster_anon) {
case 0:
return (false);
case 1:
-   return (addr == 0);
+   return (addr == 0 || (cow & MAP_NO_HINT) != 0);
case 2:
default:
return (true);
@@ -2111,7 +2111,7 @@ vm_map_find(vm_map_t map, vm_object_t object, 
vm_ooffset_t offset,
} else
alignment = 0;
en_aslr = (map->flags & MAP_ASLR) != 0;
-   update_anon = cluster = clustering_anon_allowed(*addr) &&
+   update_anon = cluster = clustering_anon_allowed(*addr, cow) &&
(map->flags & MAP_IS_SUB_MAP) == 0 && max_addr == 0 &&
find_space != VMFS_NO_SPACE && object == NULL &&
(cow & (MAP_INHERIT_SHARE | MAP_STACK_GROWS_UP |
@@ -2255,6 +2255,10 @@ vm_map_find_min(vm_map_t map, vm_object_t object, 
vm_ooffset_t offset,
int rv;
 
hint = *addr;
+   if (hint == 0)
+   cow |= MAP_NO_HINT;
+   if (hint < min_addr)
+   *addr = hint = min_addr;
for (;;) {
rv = vm_map_find(map, object, offset, addr, length, max_addr,
find_space, prot, max, cow);
diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h
index 2ac54a39a57b..fd8b606e8ddc 100644
--- a/sys/vm/vm_map.h
+++ b/sys/vm/vm_map.h
@@ -383,6 +383,7 @@ long vmspace_resident_count(struct vmspace *vmspace);
 #defineMAP_CREATE_STACK_GAP_DN 0x0002
 #defineMAP_VN_EXEC 0x0004
 #defineMAP_SPLIT_BOUNDARY_MASK 0x0018
+#defineMAP_NO_HINT 0x0020
 
 #defineMAP_SPLIT_BOUNDARY_SHIFT 19
 
diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
index 56345fcaf560..408e077476dd 100644
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c
@@ -353,10 +353,12 @@ kern_mmap(struct thread *td, const struct mmap_req *mrp)
 * the hint would fall in the potential heap space,
 * place it after the end of the largest possible heap.
 *
-* There should really be a pmap call to determine a reasonable
-* location.
+* For anonymous mappings within the address space of the
+* calling process, the absence of a hint is handled at a
+* lower level in order to implement different clustering
+* strategies for ASLR.
 */
-   if (addr == 0 ||
+   if (((flags & MAP_ANON) == 0 && addr == 0) ||
(addr >= round_page((vm_offset_t)vms->vm_taddr) &&
addr < round_page((vm_offset_t)vms->vm_daddr +
lim_max(td, RLIMIT_DATA

git: d8e6f4946cec - main - vm: Fix anonymous memory clustering under ASLR

2023-06-26 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=d8e6f4946cec0b84a6997d62e791b8cf993741b2

commit d8e6f4946cec0b84a6997d62e791b8cf993741b2
Author: Alan Cox 
AuthorDate: 2023-06-23 17:00:32 +
Commit: Alan Cox 
CommitDate: 2023-06-27 04:42:48 +

vm: Fix anonymous memory clustering under ASLR

By default, our ASLR implementation is supposed to cluster anonymous
memory allocations, unless the application's mmap(..., MAP_ANON, ...)
call included a non-zero address hint.  Unfortunately, clustering
never occurred because kern_mmap() always replaced the given address
hint when it was zero.  So, the ASLR implementation always believed
that a non-zero hint had been provided and randomized the mapping's
location in the address space.  To fix this problem, I'm pushing down
the point at which we convert a hint of zero to the minimum allocatable
address from kern_mmap() to vm_map_find_min().

Reviewed by:kib
MFC after:  2 weeks
Differential Revision:  https://reviews.freebsd.org/D40743
---
 sys/vm/vm_map.c  | 10 +++---
 sys/vm/vm_map.h  |  1 +
 sys/vm/vm_mmap.c |  8 +---
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index f5863a9b9939..a02107b5e64d 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -1981,14 +1981,14 @@ SYSCTL_INT(_vm, OID_AUTO, cluster_anon, CTLFLAG_RW,
 "Cluster anonymous mappings: 0 = no, 1 = yes if no hint, 2 = always");
 
 static bool
-clustering_anon_allowed(vm_offset_t addr)
+clustering_anon_allowed(vm_offset_t addr, int cow)
 {
 
switch (cluster_anon) {
case 0:
return (false);
case 1:
-   return (addr == 0);
+   return (addr == 0 || (cow & MAP_NO_HINT) != 0);
case 2:
default:
return (true);
@@ -2111,7 +2111,7 @@ vm_map_find(vm_map_t map, vm_object_t object, 
vm_ooffset_t offset,
} else
alignment = 0;
en_aslr = (map->flags & MAP_ASLR) != 0;
-   update_anon = cluster = clustering_anon_allowed(*addr) &&
+   update_anon = cluster = clustering_anon_allowed(*addr, cow) &&
(map->flags & MAP_IS_SUB_MAP) == 0 && max_addr == 0 &&
find_space != VMFS_NO_SPACE && object == NULL &&
(cow & (MAP_INHERIT_SHARE | MAP_STACK_GROWS_UP |
@@ -2255,6 +2255,10 @@ vm_map_find_min(vm_map_t map, vm_object_t object, 
vm_ooffset_t offset,
int rv;
 
hint = *addr;
+   if (hint == 0)
+   cow |= MAP_NO_HINT;
+   if (hint < min_addr)
+   *addr = hint = min_addr;
for (;;) {
rv = vm_map_find(map, object, offset, addr, length, max_addr,
find_space, prot, max, cow);
diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h
index 2ac54a39a57b..fd8b606e8ddc 100644
--- a/sys/vm/vm_map.h
+++ b/sys/vm/vm_map.h
@@ -383,6 +383,7 @@ long vmspace_resident_count(struct vmspace *vmspace);
 #defineMAP_CREATE_STACK_GAP_DN 0x0002
 #defineMAP_VN_EXEC 0x0004
 #defineMAP_SPLIT_BOUNDARY_MASK 0x0018
+#defineMAP_NO_HINT 0x0020
 
 #defineMAP_SPLIT_BOUNDARY_SHIFT 19
 
diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
index 56345fcaf560..408e077476dd 100644
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c
@@ -353,10 +353,12 @@ kern_mmap(struct thread *td, const struct mmap_req *mrp)
 * the hint would fall in the potential heap space,
 * place it after the end of the largest possible heap.
 *
-* There should really be a pmap call to determine a reasonable
-* location.
+* For anonymous mappings within the address space of the
+* calling process, the absence of a hint is handled at a
+* lower level in order to implement different clustering
+* strategies for ASLR.
 */
-   if (addr == 0 ||
+   if (((flags & MAP_ANON) == 0 && addr == 0) ||
(addr >= round_page((vm_offset_t)vms->vm_taddr) &&
addr < round_page((vm_offset_t)vms->vm_daddr +
lim_max(td, RLIMIT_DATA

git: 0d2f98c2f092 - main - amd64 pmap: Tidy up pmap_promote_pde() calls

2023-06-24 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=0d2f98c2f0928a8ee8446c3e5e0e4fb93f1dd9c5

commit 0d2f98c2f0928a8ee8446c3e5e0e4fb93f1dd9c5
Author: Alan Cox 
AuthorDate: 2023-06-17 17:18:33 +
Commit: Alan Cox 
CommitDate: 2023-06-24 18:09:04 +

amd64 pmap: Tidy up pmap_promote_pde() calls

Since pmap_ps_enabled() is true by default, check it inside of
pmap_promote_pde() instead of at every call site.

Modify pmap_promote_pde() to return true if the promotion succeeded and
false otherwise.  Use this return value in a couple places.

Reviewed by:kib, markj
Differential Revision:  https://reviews.freebsd.org/D40744
---
 sys/amd64/amd64/pmap.c | 36 ++--
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 3cb02a4f9daa..3215a7f8d559 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -436,7 +436,7 @@ pt_entry_t pg_nx;
 static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
 "VM/pmap parameters");
 
-static int pg_ps_enabled = 1;
+static int __read_frequently pg_ps_enabled = 1;
 SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
 _ps_enabled, 0, "Are large page mappings enabled?");
 
@@ -1318,7 +1318,7 @@ static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t 
pa, int mode);
 static vm_page_t pmap_large_map_getptp_unlocked(void);
 static vm_paddr_t pmap_large_map_kextract(vm_offset_t va);
 #if VM_NRESERVLEVEL > 0
-static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
+static bool pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
 vm_page_t mpte, struct rwlock **lockp);
 #endif
 static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t 
sva,
@@ -6856,7 +6856,7 @@ pmap_pde_ept_executable(pmap_t pmap, pd_entry_t pde)
  * aligned, contiguous physical memory and (2) the 4KB page mappings must have
  * identical characteristics. 
  */
-static void
+static bool
 pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, vm_page_t mpte,
 struct rwlock **lockp)
 {
@@ -6865,6 +6865,10 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, 
vm_offset_t va, vm_page_t mpte,
pt_entry_t allpte_PG_A, PG_A, PG_G, PG_M, PG_PKU_MASK, PG_RW, PG_V;
int PG_PTE_CACHE;
 
+   PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+   if (!pmap_ps_enabled(pmap))
+   return (false);
+
PG_A = pmap_accessed_bit(pmap);
PG_G = pmap_global_bit(pmap);
PG_M = pmap_modified_bit(pmap);
@@ -6873,8 +6877,6 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, 
vm_offset_t va, vm_page_t mpte,
PG_PKU_MASK = pmap_pku_mask_bit(pmap);
PG_PTE_CACHE = pmap_cache_mask(pmap, 0);
 
-   PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-
/*
 * Examine the first PTE in the specified PTP.  Abort if this PTE is
 * ineligible for promotion due to hardware errata, invalid, or does
@@ -6883,12 +6885,12 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, 
vm_offset_t va, vm_page_t mpte,
firstpte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME);
newpde = *firstpte;
if (!pmap_allow_2m_x_page(pmap, pmap_pde_ept_executable(pmap, newpde)))
-   return;
+   return (false);
if ((newpde & ((PG_FRAME & PDRMASK) | PG_V)) != PG_V) {
counter_u64_add(pmap_pde_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
" in pmap %p", va, pmap);
-   return;
+   return (false);
}
 
/*
@@ -6933,7 +6935,7 @@ setpde:
counter_u64_add(pmap_pde_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
" in pmap %p", va, pmap);
-   return;
+   return (false);
}
 setpte:
if ((oldpte & (PG_M | PG_RW)) == PG_RW) {
@@ -6952,7 +6954,7 @@ setpte:
counter_u64_add(pmap_pde_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
" in pmap %p", va, pmap);
-   return;
+   return (false);
}
allpte_PG_A &= oldpte;
pa -= PAGE_SIZE;
@@ -6993,7 +6995,7 @@ setpte:
CTR2(KTR_PMAP,
"pmap_promote_pde: failure for va %#lx in pmap %p", va,
pmap);
-   return;
+   return (false);
}
 
/*
@@ -7018,6 +7020,7 @@ setpte:
counter_u64_add(pmap_pde_promotions, 1);
CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#lx&qu

git: 0d2f98c2f092 - main - amd64 pmap: Tidy up pmap_promote_pde() calls

2023-06-24 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=0d2f98c2f0928a8ee8446c3e5e0e4fb93f1dd9c5

commit 0d2f98c2f0928a8ee8446c3e5e0e4fb93f1dd9c5
Author: Alan Cox 
AuthorDate: 2023-06-17 17:18:33 +
Commit: Alan Cox 
CommitDate: 2023-06-24 18:09:04 +

amd64 pmap: Tidy up pmap_promote_pde() calls

Since pmap_ps_enabled() is true by default, check it inside of
pmap_promote_pde() instead of at every call site.

Modify pmap_promote_pde() to return true if the promotion succeeded and
false otherwise.  Use this return value in a couple places.

Reviewed by:kib, markj
Differential Revision:  https://reviews.freebsd.org/D40744
---
 sys/amd64/amd64/pmap.c | 36 ++--
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 3cb02a4f9daa..3215a7f8d559 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -436,7 +436,7 @@ pt_entry_t pg_nx;
 static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
 "VM/pmap parameters");
 
-static int pg_ps_enabled = 1;
+static int __read_frequently pg_ps_enabled = 1;
 SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
 _ps_enabled, 0, "Are large page mappings enabled?");
 
@@ -1318,7 +1318,7 @@ static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t 
pa, int mode);
 static vm_page_t pmap_large_map_getptp_unlocked(void);
 static vm_paddr_t pmap_large_map_kextract(vm_offset_t va);
 #if VM_NRESERVLEVEL > 0
-static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
+static bool pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
 vm_page_t mpte, struct rwlock **lockp);
 #endif
 static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t 
sva,
@@ -6856,7 +6856,7 @@ pmap_pde_ept_executable(pmap_t pmap, pd_entry_t pde)
  * aligned, contiguous physical memory and (2) the 4KB page mappings must have
  * identical characteristics. 
  */
-static void
+static bool
 pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, vm_page_t mpte,
 struct rwlock **lockp)
 {
@@ -6865,6 +6865,10 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, 
vm_offset_t va, vm_page_t mpte,
pt_entry_t allpte_PG_A, PG_A, PG_G, PG_M, PG_PKU_MASK, PG_RW, PG_V;
int PG_PTE_CACHE;
 
+   PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+   if (!pmap_ps_enabled(pmap))
+   return (false);
+
PG_A = pmap_accessed_bit(pmap);
PG_G = pmap_global_bit(pmap);
PG_M = pmap_modified_bit(pmap);
@@ -6873,8 +6877,6 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, 
vm_offset_t va, vm_page_t mpte,
PG_PKU_MASK = pmap_pku_mask_bit(pmap);
PG_PTE_CACHE = pmap_cache_mask(pmap, 0);
 
-   PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-
/*
 * Examine the first PTE in the specified PTP.  Abort if this PTE is
 * ineligible for promotion due to hardware errata, invalid, or does
@@ -6883,12 +6885,12 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, 
vm_offset_t va, vm_page_t mpte,
firstpte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME);
newpde = *firstpte;
if (!pmap_allow_2m_x_page(pmap, pmap_pde_ept_executable(pmap, newpde)))
-   return;
+   return (false);
if ((newpde & ((PG_FRAME & PDRMASK) | PG_V)) != PG_V) {
counter_u64_add(pmap_pde_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
" in pmap %p", va, pmap);
-   return;
+   return (false);
}
 
/*
@@ -6933,7 +6935,7 @@ setpde:
counter_u64_add(pmap_pde_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
" in pmap %p", va, pmap);
-   return;
+   return (false);
}
 setpte:
if ((oldpte & (PG_M | PG_RW)) == PG_RW) {
@@ -6952,7 +6954,7 @@ setpte:
counter_u64_add(pmap_pde_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
" in pmap %p", va, pmap);
-   return;
+   return (false);
}
allpte_PG_A &= oldpte;
pa -= PAGE_SIZE;
@@ -6993,7 +6995,7 @@ setpte:
CTR2(KTR_PMAP,
"pmap_promote_pde: failure for va %#lx in pmap %p", va,
pmap);
-   return;
+   return (false);
}
 
/*
@@ -7018,6 +7020,7 @@ setpte:
counter_u64_add(pmap_pde_promotions, 1);
CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#lx&qu

git: 58d427172157 - main - vm_phys: Fix typo in 9e8174289236

2023-06-16 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=58d427172157dedf82e46014e7d19cf973186dd9

commit 58d427172157dedf82e46014e7d19cf973186dd9
Author: Alan Cox 
AuthorDate: 2023-06-16 08:12:42 +
Commit: Alan Cox 
CommitDate: 2023-06-16 08:12:42 +

vm_phys: Fix typo in 9e8174289236
---
 sys/vm/vm_phys.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sys/vm/vm_phys.c b/sys/vm/vm_phys.c
index a0b53f0f7c4b..28f12231e01c 100644
--- a/sys/vm/vm_phys.c
+++ b/sys/vm/vm_phys.c
@@ -1246,7 +1246,7 @@ vm_phys_find_range(vm_page_t bounds[], int segind, int 
domain,
struct vm_phys_seg *end_seg, *seg;
 
KASSERT(npages > 0, ("npages is zero"));
-   KASSERT(domain >= 0 && domain < vm_ndomain, ("domain out of range"));
+   KASSERT(domain >= 0 && domain < vm_ndomains, ("domain out of range"));
end_seg = _phys_segs[vm_phys_nsegs];
for (seg = _phys_segs[segind]; seg < end_seg; seg++) {
if (seg->domain != domain)

git: 58d427172157 - main - vm_phys: Fix typo in 9e8174289236

2023-06-16 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=58d427172157dedf82e46014e7d19cf973186dd9

commit 58d427172157dedf82e46014e7d19cf973186dd9
Author: Alan Cox 
AuthorDate: 2023-06-16 08:12:42 +
Commit: Alan Cox 
CommitDate: 2023-06-16 08:12:42 +

vm_phys: Fix typo in 9e8174289236
---
 sys/vm/vm_phys.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sys/vm/vm_phys.c b/sys/vm/vm_phys.c
index a0b53f0f7c4b..28f12231e01c 100644
--- a/sys/vm/vm_phys.c
+++ b/sys/vm/vm_phys.c
@@ -1246,7 +1246,7 @@ vm_phys_find_range(vm_page_t bounds[], int segind, int 
domain,
struct vm_phys_seg *end_seg, *seg;
 
KASSERT(npages > 0, ("npages is zero"));
-   KASSERT(domain >= 0 && domain < vm_ndomain, ("domain out of range"));
+   KASSERT(domain >= 0 && domain < vm_ndomains, ("domain out of range"));
end_seg = _phys_segs[vm_phys_nsegs];
for (seg = _phys_segs[segind]; seg < end_seg; seg++) {
if (seg->domain != domain)

git: 34eeabff5a86 - main - amd64/arm64 pmap: Stop requiring the accessed bit for superpage promotion

2023-06-12 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=34eeabff5a8636155bb02985c5928c1844fd3178

commit 34eeabff5a8636155bb02985c5928c1844fd3178
Author: Alan Cox 
AuthorDate: 2023-05-31 23:10:41 +
Commit: Alan Cox 
CommitDate: 2023-06-12 18:40:57 +

amd64/arm64 pmap: Stop requiring the accessed bit for superpage promotion

Stop requiring all of the PTEs to have the accessed bit set for superpage
promotion to occur.  Given that change, add support for promotion to
pmap_enter_quick(), which does not set the accessed bit in the PTE that
it creates.

Since the final mapping within a superpage-aligned and sized region of a
memory-mapped file is typically created by a call to pmap_enter_quick(),
we now achieve promotions in circumstances where they did not occur
before, for example, the X server's read-only mapping of libLLVM-15.so.

See also https://www.usenix.org/system/files/atc20-zhu-weixi_0.pdf

Reviewed by:kib, markj
MFC after:  2 weeks
Differential Revision:  https://reviews.freebsd.org/D40478
---
 sys/amd64/amd64/pmap.c   | 154 +++
 sys/amd64/include/pmap.h |   2 +-
 sys/arm64/arm64/pmap.c   | 122 +++--
 3 files changed, 205 insertions(+), 73 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 123811ed573f..3cb02a4f9daa 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -313,6 +313,33 @@ pmap_pku_mask_bit(pmap_t pmap)
return (pmap->pm_type == PT_X86 ? X86_PG_PKU_MASK : 0);
 }
 
+static __inline boolean_t
+safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte)
+{
+
+   if (!pmap_emulate_ad_bits(pmap))
+   return (TRUE);
+
+   KASSERT(pmap->pm_type == PT_EPT, ("invalid pm_type %d", pmap->pm_type));
+
+   /*
+* XWR = 010 or 110 will cause an unconditional EPT misconfiguration
+* so we don't let the referenced (aka EPT_PG_READ) bit to be cleared
+* if the EPT_PG_WRITE bit is set.
+*/
+   if ((pte & EPT_PG_WRITE) != 0)
+   return (FALSE);
+
+   /*
+* XWR = 100 is allowed only if the PMAP_SUPPORTS_EXEC_ONLY is set.
+*/
+   if ((pte & EPT_PG_EXECUTE) == 0 ||
+   ((pmap->pm_flags & PMAP_SUPPORTS_EXEC_ONLY) != 0))
+   return (TRUE);
+   else
+   return (FALSE);
+}
+
 #if !defined(DIAGNOSTIC)
 #ifdef __GNUC_GNU_INLINE__
 #define PMAP_INLINE__attribute__((__gnu_inline__)) inline
@@ -1279,7 +1306,8 @@ static intpmap_enter_pde(pmap_t pmap, vm_offset_t 
va, pd_entry_t newpde,
 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
 vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
 static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
-static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted);
+static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted,
+bool allpte_PG_A_set);
 static void pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva,
 vm_offset_t eva);
 static void pmap_invalidate_cache_range_all(vm_offset_t sva,
@@ -2491,7 +2519,7 @@ pmap_init(void)
 */
if ((i == 0 ||
kernphys + ((vm_paddr_t)(i - 1) << PDRSHIFT) < KERNend) &&
-   pmap_insert_pt_page(kernel_pmap, mpte, false))
+   pmap_insert_pt_page(kernel_pmap, mpte, false, false))
panic("pmap_init: pmap_insert_pt_page failed");
}
PMAP_UNLOCK(kernel_pmap);
@@ -4061,14 +4089,26 @@ pmap_add_delayed_free_list(vm_page_t m, struct spglist 
*free,
  * for mapping a distinct range of virtual addresses.  The pmap's collection is
  * ordered by this virtual address range.
  *
- * If "promoted" is false, then the page table page "mpte" must be zero filled.
+ * If "promoted" is false, then the page table page "mpte" must be zero filled;
+ * "mpte"'s valid field will be set to 0.
+ *
+ * If "promoted" is true and "allpte_PG_A_set" is false, then "mpte" must
+ * contain valid mappings with identical attributes except for PG_A; "mpte"'s
+ * valid field will be set to 1.
+ *
+ * If "promoted" and "allpte_PG_A_set" are both true, then "mpte" must contain
+ * valid mappings with identical attributes including PG_A; "mpte"'s valid
+ * field will be set to VM_PAGE_BITS_ALL.
  */
 static __inline int
-pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted)
+pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted,
+bool allpte_PG_A_set)
 {
 
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-   mpte->valid = promoted ? VM_PAGE_BITS_ALL : 0;
+   KASSER

git: 34eeabff5a86 - main - amd64/arm64 pmap: Stop requiring the accessed bit for superpage promotion

2023-06-12 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=34eeabff5a8636155bb02985c5928c1844fd3178

commit 34eeabff5a8636155bb02985c5928c1844fd3178
Author: Alan Cox 
AuthorDate: 2023-05-31 23:10:41 +
Commit: Alan Cox 
CommitDate: 2023-06-12 18:40:57 +

amd64/arm64 pmap: Stop requiring the accessed bit for superpage promotion

Stop requiring all of the PTEs to have the accessed bit set for superpage
promotion to occur.  Given that change, add support for promotion to
pmap_enter_quick(), which does not set the accessed bit in the PTE that
it creates.

Since the final mapping within a superpage-aligned and sized region of a
memory-mapped file is typically created by a call to pmap_enter_quick(),
we now achieve promotions in circumstances where they did not occur
before, for example, the X server's read-only mapping of libLLVM-15.so.

See also https://www.usenix.org/system/files/atc20-zhu-weixi_0.pdf

Reviewed by:kib, markj
MFC after:  2 weeks
Differential Revision:  https://reviews.freebsd.org/D40478
---
 sys/amd64/amd64/pmap.c   | 154 +++
 sys/amd64/include/pmap.h |   2 +-
 sys/arm64/arm64/pmap.c   | 122 +++--
 3 files changed, 205 insertions(+), 73 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 123811ed573f..3cb02a4f9daa 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -313,6 +313,33 @@ pmap_pku_mask_bit(pmap_t pmap)
return (pmap->pm_type == PT_X86 ? X86_PG_PKU_MASK : 0);
 }
 
+static __inline boolean_t
+safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte)
+{
+
+   if (!pmap_emulate_ad_bits(pmap))
+   return (TRUE);
+
+   KASSERT(pmap->pm_type == PT_EPT, ("invalid pm_type %d", pmap->pm_type));
+
+   /*
+* XWR = 010 or 110 will cause an unconditional EPT misconfiguration
+* so we don't let the referenced (aka EPT_PG_READ) bit to be cleared
+* if the EPT_PG_WRITE bit is set.
+*/
+   if ((pte & EPT_PG_WRITE) != 0)
+   return (FALSE);
+
+   /*
+* XWR = 100 is allowed only if the PMAP_SUPPORTS_EXEC_ONLY is set.
+*/
+   if ((pte & EPT_PG_EXECUTE) == 0 ||
+   ((pmap->pm_flags & PMAP_SUPPORTS_EXEC_ONLY) != 0))
+   return (TRUE);
+   else
+   return (FALSE);
+}
+
 #if !defined(DIAGNOSTIC)
 #ifdef __GNUC_GNU_INLINE__
 #define PMAP_INLINE__attribute__((__gnu_inline__)) inline
@@ -1279,7 +1306,8 @@ static intpmap_enter_pde(pmap_t pmap, vm_offset_t 
va, pd_entry_t newpde,
 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
 vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
 static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
-static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted);
+static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted,
+bool allpte_PG_A_set);
 static void pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva,
 vm_offset_t eva);
 static void pmap_invalidate_cache_range_all(vm_offset_t sva,
@@ -2491,7 +2519,7 @@ pmap_init(void)
 */
if ((i == 0 ||
kernphys + ((vm_paddr_t)(i - 1) << PDRSHIFT) < KERNend) &&
-   pmap_insert_pt_page(kernel_pmap, mpte, false))
+   pmap_insert_pt_page(kernel_pmap, mpte, false, false))
panic("pmap_init: pmap_insert_pt_page failed");
}
PMAP_UNLOCK(kernel_pmap);
@@ -4061,14 +4089,26 @@ pmap_add_delayed_free_list(vm_page_t m, struct spglist 
*free,
  * for mapping a distinct range of virtual addresses.  The pmap's collection is
  * ordered by this virtual address range.
  *
- * If "promoted" is false, then the page table page "mpte" must be zero filled.
+ * If "promoted" is false, then the page table page "mpte" must be zero filled;
+ * "mpte"'s valid field will be set to 0.
+ *
+ * If "promoted" is true and "allpte_PG_A_set" is false, then "mpte" must
+ * contain valid mappings with identical attributes except for PG_A; "mpte"'s
+ * valid field will be set to 1.
+ *
+ * If "promoted" and "allpte_PG_A_set" are both true, then "mpte" must contain
+ * valid mappings with identical attributes including PG_A; "mpte"'s valid
+ * field will be set to VM_PAGE_BITS_ALL.
  */
 static __inline int
-pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted)
+pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted,
+bool allpte_PG_A_set)
 {
 
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-   mpte->valid = promoted ? VM_PAGE_BITS_ALL : 0;
+   KASSER

git: 3e7e2bb2467e - main - arm64 pmap: Make VM_PAGE_TO_PV_LIST_LOCK() a constant-time operation

2023-05-29 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=3e7e2bb2467e8bb682176125397168c88c3913c6

commit 3e7e2bb2467e8bb682176125397168c88c3913c6
Author: Alan Cox 
AuthorDate: 2023-05-29 06:01:37 +
Commit: Alan Cox 
CommitDate: 2023-05-29 16:22:55 +

arm64 pmap: Make VM_PAGE_TO_PV_LIST_LOCK() a constant-time operation

The prior implementation of VM_PAGE_TO_PV_LIST_LOCK() performed a
linear-time search of the vm_phys_segs[] array.  However, in contrast to
PHYS_TO_PV_LIST_LOCK(), that search is unnecessary because every (non-
fictitious) vm_page contains the index of the vm_phys_seg in which it
resides.

Change most of the remaining uses of CHANGE_PV_LIST_LOCK_TO_PHYS() and
PHYS_TO_PV_LIST_LOCK() to CHANGE_PV_LIST_LOCK_TO_VM_PAGE() and
VM_PAGE_TO_PV_LIST_LOCK(), respectively.

Collectively, these changes also reduce the size of a GENERIC-NODEBUG
kernel's pmap.

Before:

  text   databss dec   hex   filename
 70144   3200   2248   75592   0x12748   pmap.o

After:

  text   databss dec   hex   filename
 69192   3200   2248   74640   0x12390   pmap.o

Reviewed by:kib, markj
Differential Revision:  https://reviews.freebsd.org/D40306
---
 sys/arm64/arm64/pmap.c | 43 ++-
 1 file changed, 26 insertions(+), 17 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 6bc9adba71e0..150532b68c75 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -202,6 +202,10 @@ struct pmap_large_md_page {
int pv_pad[2];
 };
 
+__exclusive_cache_line static struct pmap_large_md_page pv_dummy_large;
+#define pv_dummy pv_dummy_large.pv_page
+__read_mostly static struct pmap_large_md_page *pv_table;
+
 static struct pmap_large_md_page *
 _pa_to_pmdp(vm_paddr_t pa)
 {
@@ -252,11 +256,19 @@ page_to_pmdp(vm_page_t m)
_lock;  \
 })
 
-#defineCHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa)  do {\
+static struct rwlock *
+VM_PAGE_TO_PV_LIST_LOCK(vm_page_t m)
+{
+   if ((m->flags & PG_FICTITIOUS) == 0)
+   return (_to_pmdp(m)->pv_lock);
+   else
+   return (_dummy_large.pv_lock);
+}
+
+#defineCHANGE_PV_LIST_LOCK(lockp, new_lock)do {\
struct rwlock **_lockp = (lockp);   \
-   struct rwlock *_new_lock;   \
+   struct rwlock *_new_lock = (new_lock);  \
\
-   _new_lock = PHYS_TO_PV_LIST_LOCK(pa);   \
if (_new_lock != *_lockp) { \
if (*_lockp != NULL)\
rw_wunlock(*_lockp);\
@@ -265,8 +277,11 @@ page_to_pmdp(vm_page_t m)
}   \
 } while (0)
 
+#defineCHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa)  \
+   CHANGE_PV_LIST_LOCK(lockp, PHYS_TO_PV_LIST_LOCK(pa))
+
 #defineCHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m)\
-   CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m))
+   CHANGE_PV_LIST_LOCK(lockp, VM_PAGE_TO_PV_LIST_LOCK(m))
 
 #defineRELEASE_PV_LIST_LOCK(lockp) do {\
struct rwlock **_lockp = (lockp);   \
@@ -277,9 +292,6 @@ page_to_pmdp(vm_page_t m)
}   \
 } while (0)
 
-#defineVM_PAGE_TO_PV_LIST_LOCK(m)  \
-   PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m))
-
 /*
  * The presence of this flag indicates that the mapping is writeable.
  * If the ATTR_S1_AP_RO bit is also set, then the mapping is clean, otherwise
@@ -338,10 +350,6 @@ struct pv_chunks_list {
 
 struct pv_chunks_list __exclusive_cache_line pv_chunks[PMAP_MEMDOM];
 
-__exclusive_cache_line static struct pmap_large_md_page pv_dummy_large;
-#define pv_dummy pv_dummy_large.pv_page
-__read_mostly static struct pmap_large_md_page *pv_table;
-
 vm_paddr_t dmap_phys_base; /* The start of the dmap region */
 vm_paddr_t dmap_phys_max;  /* The limit of the dmap region */
 vm_offset_t dmap_max_addr; /* The virtual address limit of the dmap */
@@ -3427,7 +3435,7 @@ pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t 
sva,
if (old_l2 & ATTR_SW_MANAGED) {
m = PHYS_TO_VM_PAGE(PTE_TO_PHYS(old_l2));
pvh = page_to_pvh(m);
-   CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, PTE_TO_PHYS(old_l2));
+   CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
pmap_pvh_free(pvh, pmap, sva);
for (mt = m; mt < [L2_SIZE / PAGE_SIZE]; mt++) {
if (pmap_pte_dirty(pmap, old_l2))
@@ -3533,7 +3541,7 @@ pmap_remove_l3_range(pmap_t pma

git: 3e7e2bb2467e - main - arm64 pmap: Make VM_PAGE_TO_PV_LIST_LOCK() a constant-time operation

2023-05-29 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=3e7e2bb2467e8bb682176125397168c88c3913c6

commit 3e7e2bb2467e8bb682176125397168c88c3913c6
Author: Alan Cox 
AuthorDate: 2023-05-29 06:01:37 +
Commit: Alan Cox 
CommitDate: 2023-05-29 16:22:55 +

arm64 pmap: Make VM_PAGE_TO_PV_LIST_LOCK() a constant-time operation

The prior implementation of VM_PAGE_TO_PV_LIST_LOCK() performed a
linear-time search of the vm_phys_segs[] array.  However, in contrast to
PHYS_TO_PV_LIST_LOCK(), that search is unnecessary because every (non-
fictitious) vm_page contains the index of the vm_phys_seg in which it
resides.

Change most of the remaining uses of CHANGE_PV_LIST_LOCK_TO_PHYS() and
PHYS_TO_PV_LIST_LOCK() to CHANGE_PV_LIST_LOCK_TO_VM_PAGE() and
VM_PAGE_TO_PV_LIST_LOCK(), respectively.

Collectively, these changes also reduce the size of a GENERIC-NODEBUG
kernel's pmap.

Before:

  text   databss dec   hex   filename
 70144   3200   2248   75592   0x12748   pmap.o

After:

  text   databss dec   hex   filename
 69192   3200   2248   74640   0x12390   pmap.o

Reviewed by:kib, markj
Differential Revision:  https://reviews.freebsd.org/D40306
---
 sys/arm64/arm64/pmap.c | 43 ++-
 1 file changed, 26 insertions(+), 17 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 6bc9adba71e0..150532b68c75 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -202,6 +202,10 @@ struct pmap_large_md_page {
int pv_pad[2];
 };
 
+__exclusive_cache_line static struct pmap_large_md_page pv_dummy_large;
+#define pv_dummy pv_dummy_large.pv_page
+__read_mostly static struct pmap_large_md_page *pv_table;
+
 static struct pmap_large_md_page *
 _pa_to_pmdp(vm_paddr_t pa)
 {
@@ -252,11 +256,19 @@ page_to_pmdp(vm_page_t m)
_lock;  \
 })
 
-#defineCHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa)  do {\
+static struct rwlock *
+VM_PAGE_TO_PV_LIST_LOCK(vm_page_t m)
+{
+   if ((m->flags & PG_FICTITIOUS) == 0)
+   return (_to_pmdp(m)->pv_lock);
+   else
+   return (_dummy_large.pv_lock);
+}
+
+#defineCHANGE_PV_LIST_LOCK(lockp, new_lock)do {\
struct rwlock **_lockp = (lockp);   \
-   struct rwlock *_new_lock;   \
+   struct rwlock *_new_lock = (new_lock);  \
\
-   _new_lock = PHYS_TO_PV_LIST_LOCK(pa);   \
if (_new_lock != *_lockp) { \
if (*_lockp != NULL)\
rw_wunlock(*_lockp);\
@@ -265,8 +277,11 @@ page_to_pmdp(vm_page_t m)
}   \
 } while (0)
 
+#defineCHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa)  \
+   CHANGE_PV_LIST_LOCK(lockp, PHYS_TO_PV_LIST_LOCK(pa))
+
 #defineCHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m)\
-   CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m))
+   CHANGE_PV_LIST_LOCK(lockp, VM_PAGE_TO_PV_LIST_LOCK(m))
 
 #defineRELEASE_PV_LIST_LOCK(lockp) do {\
struct rwlock **_lockp = (lockp);   \
@@ -277,9 +292,6 @@ page_to_pmdp(vm_page_t m)
}   \
 } while (0)
 
-#defineVM_PAGE_TO_PV_LIST_LOCK(m)  \
-   PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m))
-
 /*
  * The presence of this flag indicates that the mapping is writeable.
  * If the ATTR_S1_AP_RO bit is also set, then the mapping is clean, otherwise
@@ -338,10 +350,6 @@ struct pv_chunks_list {
 
 struct pv_chunks_list __exclusive_cache_line pv_chunks[PMAP_MEMDOM];
 
-__exclusive_cache_line static struct pmap_large_md_page pv_dummy_large;
-#define pv_dummy pv_dummy_large.pv_page
-__read_mostly static struct pmap_large_md_page *pv_table;
-
 vm_paddr_t dmap_phys_base; /* The start of the dmap region */
 vm_paddr_t dmap_phys_max;  /* The limit of the dmap region */
 vm_offset_t dmap_max_addr; /* The virtual address limit of the dmap */
@@ -3427,7 +3435,7 @@ pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t 
sva,
if (old_l2 & ATTR_SW_MANAGED) {
m = PHYS_TO_VM_PAGE(PTE_TO_PHYS(old_l2));
pvh = page_to_pvh(m);
-   CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, PTE_TO_PHYS(old_l2));
+   CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
pmap_pvh_free(pvh, pmap, sva);
for (mt = m; mt < [L2_SIZE / PAGE_SIZE]; mt++) {
if (pmap_pte_dirty(pmap, old_l2))
@@ -3533,7 +3541,7 @@ pmap_remove_l3_range(pmap_t pma

git: 5d1ee799de65 - main - arm64 pmap: Eliminate an unused global variable

2023-05-27 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=5d1ee799de65ca62cd94c1602b41255bdbc3312d

commit 5d1ee799de65ca62cd94c1602b41255bdbc3312d
Author: Alan Cox 
AuthorDate: 2023-05-27 06:23:48 +
Commit: Alan Cox 
CommitDate: 2023-05-27 06:38:20 +

arm64 pmap: Eliminate an unused global variable

The global variable "pmap_last_pa" was copied from the amd64 pmap as a
part of commit c15085278cb5 "arm64 pmap: implement per-superpage locks"
but it is neither used nor needed by the arm64 pmap.
---
 sys/arm64/arm64/pmap.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 16e671295ca6..6bc9adba71e0 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -341,7 +341,6 @@ struct pv_chunks_list __exclusive_cache_line 
pv_chunks[PMAP_MEMDOM];
 __exclusive_cache_line static struct pmap_large_md_page pv_dummy_large;
 #define pv_dummy pv_dummy_large.pv_page
 __read_mostly static struct pmap_large_md_page *pv_table;
-__read_mostly vm_paddr_t pmap_last_pa;
 
 vm_paddr_t dmap_phys_base; /* The start of the dmap region */
 vm_paddr_t dmap_phys_max;  /* The limit of the dmap region */

git: 5d1ee799de65 - main - arm64 pmap: Eliminate an unused global variable

2023-05-27 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=5d1ee799de65ca62cd94c1602b41255bdbc3312d

commit 5d1ee799de65ca62cd94c1602b41255bdbc3312d
Author: Alan Cox 
AuthorDate: 2023-05-27 06:23:48 +
Commit: Alan Cox 
CommitDate: 2023-05-27 06:38:20 +

arm64 pmap: Eliminate an unused global variable

The global variable "pmap_last_pa" was copied from the amd64 pmap as a
part of commit c15085278cb5 "arm64 pmap: implement per-superpage locks"
but it is neither used nor needed by the arm64 pmap.
---
 sys/arm64/arm64/pmap.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 16e671295ca6..6bc9adba71e0 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -341,7 +341,6 @@ struct pv_chunks_list __exclusive_cache_line 
pv_chunks[PMAP_MEMDOM];
 __exclusive_cache_line static struct pmap_large_md_page pv_dummy_large;
 #define pv_dummy pv_dummy_large.pv_page
 __read_mostly static struct pmap_large_md_page *pv_table;
-__read_mostly vm_paddr_t pmap_last_pa;
 
 vm_paddr_t dmap_phys_base; /* The start of the dmap region */
 vm_paddr_t dmap_phys_max;  /* The limit of the dmap region */

git: f0878da03b37 - main - pmap: standardize promotion conditions between amd64 and arm64

2022-12-12 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=f0878da03b374e3fa3578b363f02bfd50ac0e5bd

commit f0878da03b374e3fa3578b363f02bfd50ac0e5bd
Author: Alan Cox 
AuthorDate: 2022-10-08 07:20:25 +
Commit: Alan Cox 
CommitDate: 2022-12-12 17:32:50 +

pmap: standardize promotion conditions between amd64 and arm64

On amd64, don't abort promotion due to a missing accessed bit in a
mapping before possibly write protecting that mapping.  Previously,
in some cases, we might not repromote after madvise(MADV_FREE) because
there was no write fault to trigger the repromotion.  Conversely, on
arm64, don't pointlessly, yet harmlessly, write protect physical pages
that aren't part of the physical superpage.

Don't count aborted promotions due to explicit promotion prohibition
(arm64) or hardware errata (amd64) as ordinary promotion failures.

Reviewed by:kib, markj
MFC after:  2 weeks
Differential Revision:  https://reviews.freebsd.org/D36916
---
 sys/amd64/amd64/pmap.c | 37 ++---
 sys/arm64/arm64/pmap.c | 50 --
 2 files changed, 74 insertions(+), 13 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index eb8980ae4fed..a44993efb409 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -6771,19 +6771,36 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, 
vm_offset_t va, vm_page_t mpte,
 
/*
 * Examine the first PTE in the specified PTP.  Abort if this PTE is
-* either invalid, unused, or does not map the first 4KB physical page
-* within a 2MB page. 
+* ineligible for promotion due to hardware errata, invalid, or does
+* not map the first 4KB physical page within a 2MB page.
 */
firstpte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME);
newpde = *firstpte;
-   if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V) ||
-   !pmap_allow_2m_x_page(pmap, pmap_pde_ept_executable(pmap,
-   newpde))) {
+   if (!pmap_allow_2m_x_page(pmap, pmap_pde_ept_executable(pmap, newpde)))
+   return;
+   if ((newpde & ((PG_FRAME & PDRMASK) | PG_V)) != PG_V) {
counter_u64_add(pmap_pde_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
" in pmap %p", va, pmap);
return;
}
+
+   /*
+* Both here and in the below "for" loop, to allow for repromotion
+* after MADV_FREE, conditionally write protect a clean PTE before
+* possibly aborting the promotion due to other PTE attributes.  Why?
+* Suppose that MADV_FREE is applied to a part of a superpage, the
+* address range [S, E).  pmap_advise() will demote the superpage
+* mapping, destroy the 4KB page mapping at the end of [S, E), and
+* clear PG_M and PG_A in the PTEs for the rest of [S, E).  Later,
+* imagine that the memory in [S, E) is recycled, but the last 4KB
+* page in [S, E) is not the last to be rewritten, or simply accessed.
+* In other words, there is still a 4KB page in [S, E), call it P,
+* that is writeable but PG_M and PG_A are clear in P's PTE.  Unless
+* we write protect P before aborting the promotion, if and when P is
+* finally rewritten, there won't be a page fault to trigger
+* repromotion.
+*/
 setpde:
if ((newpde & (PG_M | PG_RW)) == PG_RW) {
/*
@@ -6794,16 +6811,22 @@ setpde:
goto setpde;
newpde &= ~PG_RW;
}
+   if ((newpde & PG_A) == 0) {
+   counter_u64_add(pmap_pde_p_failures, 1);
+   CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
+   " in pmap %p", va, pmap);
+   return;
+   }
 
/*
 * Examine each of the other PTEs in the specified PTP.  Abort if this
 * PTE maps an unexpected 4KB physical page or does not have identical
 * characteristics to the first PTE.
 */
-   pa = (newpde & (PG_PS_FRAME | PG_A | PG_V)) + NBPDR - PAGE_SIZE;
+   pa = (newpde & (PG_PS_FRAME | PG_V)) + NBPDR - PAGE_SIZE;
for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) {
oldpte = *pte;
-   if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) {
+   if ((oldpte & (PG_FRAME | PG_V)) != pa) {
counter_u64_add(pmap_pde_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
" in pmap %p", va, pmap);
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 3f4665921631..7e2a423025ec 100644

git: f0878da03b37 - main - pmap: standardize promotion conditions between amd64 and arm64

2022-12-12 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=f0878da03b374e3fa3578b363f02bfd50ac0e5bd

commit f0878da03b374e3fa3578b363f02bfd50ac0e5bd
Author: Alan Cox 
AuthorDate: 2022-10-08 07:20:25 +
Commit: Alan Cox 
CommitDate: 2022-12-12 17:32:50 +

pmap: standardize promotion conditions between amd64 and arm64

On amd64, don't abort promotion due to a missing accessed bit in a
mapping before possibly write protecting that mapping.  Previously,
in some cases, we might not repromote after madvise(MADV_FREE) because
there was no write fault to trigger the repromotion.  Conversely, on
arm64, don't pointlessly, yet harmlessly, write protect physical pages
that aren't part of the physical superpage.

Don't count aborted promotions due to explicit promotion prohibition
(arm64) or hardware errata (amd64) as ordinary promotion failures.

Reviewed by:kib, markj
MFC after:  2 weeks
Differential Revision:  https://reviews.freebsd.org/D36916
---
 sys/amd64/amd64/pmap.c | 37 ++---
 sys/arm64/arm64/pmap.c | 50 --
 2 files changed, 74 insertions(+), 13 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index eb8980ae4fed..a44993efb409 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -6771,19 +6771,36 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, 
vm_offset_t va, vm_page_t mpte,
 
/*
 * Examine the first PTE in the specified PTP.  Abort if this PTE is
-* either invalid, unused, or does not map the first 4KB physical page
-* within a 2MB page. 
+* ineligible for promotion due to hardware errata, invalid, or does
+* not map the first 4KB physical page within a 2MB page.
 */
firstpte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME);
newpde = *firstpte;
-   if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V) ||
-   !pmap_allow_2m_x_page(pmap, pmap_pde_ept_executable(pmap,
-   newpde))) {
+   if (!pmap_allow_2m_x_page(pmap, pmap_pde_ept_executable(pmap, newpde)))
+   return;
+   if ((newpde & ((PG_FRAME & PDRMASK) | PG_V)) != PG_V) {
counter_u64_add(pmap_pde_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
" in pmap %p", va, pmap);
return;
}
+
+   /*
+* Both here and in the below "for" loop, to allow for repromotion
+* after MADV_FREE, conditionally write protect a clean PTE before
+* possibly aborting the promotion due to other PTE attributes.  Why?
+* Suppose that MADV_FREE is applied to a part of a superpage, the
+* address range [S, E).  pmap_advise() will demote the superpage
+* mapping, destroy the 4KB page mapping at the end of [S, E), and
+* clear PG_M and PG_A in the PTEs for the rest of [S, E).  Later,
+* imagine that the memory in [S, E) is recycled, but the last 4KB
+* page in [S, E) is not the last to be rewritten, or simply accessed.
+* In other words, there is still a 4KB page in [S, E), call it P,
+* that is writeable but PG_M and PG_A are clear in P's PTE.  Unless
+* we write protect P before aborting the promotion, if and when P is
+* finally rewritten, there won't be a page fault to trigger
+* repromotion.
+*/
 setpde:
if ((newpde & (PG_M | PG_RW)) == PG_RW) {
/*
@@ -6794,16 +6811,22 @@ setpde:
goto setpde;
newpde &= ~PG_RW;
}
+   if ((newpde & PG_A) == 0) {
+   counter_u64_add(pmap_pde_p_failures, 1);
+   CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
+   " in pmap %p", va, pmap);
+   return;
+   }
 
/*
 * Examine each of the other PTEs in the specified PTP.  Abort if this
 * PTE maps an unexpected 4KB physical page or does not have identical
 * characteristics to the first PTE.
 */
-   pa = (newpde & (PG_PS_FRAME | PG_A | PG_V)) + NBPDR - PAGE_SIZE;
+   pa = (newpde & (PG_PS_FRAME | PG_V)) + NBPDR - PAGE_SIZE;
for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) {
oldpte = *pte;
-   if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) {
+   if ((oldpte & (PG_FRAME | PG_V)) != pa) {
counter_u64_add(pmap_pde_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
" in pmap %p", va, pmap);
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 3f4665921631..7e2a423025ec 100644

Re: [Sdcc-user] Why are my string constants being redundantly duplicated?

2022-10-02 Thread Alan Cox

On Sat, 1 Oct 2022 12:47:51 +0100
Basil Hussain  wrote:

> I have come across a problem with SDCC for some reason including 
> constant string data twice inside compiled binaries.

Move them out of the function and the problem goes away. It seems to be a
bug but SDCC has behaved this way since forever.



___
Sdcc-user mailing list
Sdcc-user@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/sdcc-user

git: 1d5ebad06c20 - main - pmap: optimize MADV_WILLNEED on existing superpages

2022-09-30 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=1d5ebad06c20b1aed3b0c323c4675678afec5e55

commit 1d5ebad06c20b1aed3b0c323c4675678afec5e55
Author: Alan Cox 
AuthorDate: 2022-09-30 06:54:02 +
Commit: Alan Cox 
CommitDate: 2022-09-30 17:14:05 +

pmap: optimize MADV_WILLNEED on existing superpages

Specifically, avoid pointless calls to pmap_enter_quick_locked() when
madvise(MADV_WILLNEED) is applied to an existing superpage mapping.

Reported by:mhorne
Reviewed by:kib, markj
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D36801
---
 sys/amd64/amd64/pmap.c | 64 +++---
 sys/arm64/arm64/pmap.c | 59 +++---
 2 files changed, 75 insertions(+), 48 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index f4df664f0cca..b9b031d55d7d 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -1258,7 +1258,7 @@ static boolean_t pmap_demote_pde_locked(pmap_t pmap, 
pd_entry_t *pde,
 vm_offset_t va, struct rwlock **lockp);
 static boolean_t pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe,
 vm_offset_t va);
-static boolpmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m,
+static int pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m,
vm_prot_t prot, struct rwlock **lockp);
 static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde,
u_int flags, vm_page_t m, struct rwlock **lockp);
@@ -7271,13 +7271,12 @@ out:
 }
 
 /*
- * Tries to create a read- and/or execute-only 2MB page mapping.  Returns true
- * if successful.  Returns false if (1) a page table page cannot be allocated
- * without sleeping, (2) a mapping already exists at the specified virtual
- * address, or (3) a PV entry cannot be allocated without reclaiming another
- * PV entry.
+ * Tries to create a read- and/or execute-only 2MB page mapping.  Returns
+ * KERN_SUCCESS if the mapping was created.  Otherwise, returns an error
+ * value.  See pmap_enter_pde() for the possible error values when "no sleep",
+ * "no replace", and "no reclaim" are specified.
  */
-static bool
+static int
 pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 struct rwlock **lockp)
 {
@@ -7295,8 +7294,7 @@ pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t 
m, vm_prot_t prot,
if (va < VM_MAXUSER_ADDRESS)
newpde |= PG_U;
return (pmap_enter_pde(pmap, va, newpde, PMAP_ENTER_NOSLEEP |
-   PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL, lockp) ==
-   KERN_SUCCESS);
+   PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL, lockp));
 }
 
 /*
@@ -7319,12 +7317,19 @@ pmap_every_pte_zero(vm_paddr_t pa)
 
 /*
  * Tries to create the specified 2MB page mapping.  Returns KERN_SUCCESS if
- * the mapping was created, and either KERN_FAILURE or KERN_RESOURCE_SHORTAGE
- * otherwise.  Returns KERN_FAILURE if PMAP_ENTER_NOREPLACE was specified and
- * a mapping already exists at the specified virtual address.  Returns
- * KERN_RESOURCE_SHORTAGE if PMAP_ENTER_NOSLEEP was specified and a page table
- * page allocation failed.  Returns KERN_RESOURCE_SHORTAGE if
- * PMAP_ENTER_NORECLAIM was specified and a PV entry allocation failed.
+ * the mapping was created, and one of KERN_FAILURE, KERN_NO_SPACE,
+ * KERN_PROTECTION_FAILURE, or KERN_RESOURCE_FAILURE otherwise.  Returns
+ * KERN_FAILURE if either (1) PMAP_ENTER_NOREPLACE was specified and a 4KB
+ * page mapping already exists within the 2MB virtual address range starting
+ * at the specified virtual address or (2) the requested 2MB page mapping is
+ * not supported due to hardware errata.  Returns KERN_NO_SPACE if
+ * PMAP_ENTER_NOREPLACE was specified and a 2MB page mapping already exists at
+ * the specified virtual address.  Returns KERN_PROTECTION_FAILURE if the PKRU
+ * settings are not the same across the 2MB virtual address range starting at
+ * the specified virtual address.  Returns KERN_RESOURCE_SHORTAGE if either
+ * (1) PMAP_ENTER_NOSLEEP was specified and a page table page allocation
+ * failed or (2) PMAP_ENTER_NORECLAIM was specified and a PV entry allocation
+ * failed.
  *
  * The parameter "m" is only used when creating a managed, writeable mapping.
  */
@@ -7380,14 +7385,23 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t 
newpde, u_int flags,
if ((oldpde & PG_V) != 0) {
KASSERT(pdpg == NULL || pdpg->ref_count > 1,
("pmap_enter_pde: pdpg's reference count is too low"));
-   if ((flags & PMAP_ENTER_NOREPLACE) != 0 && (va <
-   VM_MAXUSER_ADDRESS || (oldpde & PG_PS) != 0 ||
-   !pmap_every_pte_zero(oldpde & PG_FRAME))) {
-

git: 1d5ebad06c20 - main - pmap: optimize MADV_WILLNEED on existing superpages

2022-09-30 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=1d5ebad06c20b1aed3b0c323c4675678afec5e55

commit 1d5ebad06c20b1aed3b0c323c4675678afec5e55
Author: Alan Cox 
AuthorDate: 2022-09-30 06:54:02 +
Commit: Alan Cox 
CommitDate: 2022-09-30 17:14:05 +

pmap: optimize MADV_WILLNEED on existing superpages

Specifically, avoid pointless calls to pmap_enter_quick_locked() when
madvise(MADV_WILLNEED) is applied to an existing superpage mapping.

Reported by:mhorne
Reviewed by:kib, markj
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D36801
---
 sys/amd64/amd64/pmap.c | 64 +++---
 sys/arm64/arm64/pmap.c | 59 +++---
 2 files changed, 75 insertions(+), 48 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index f4df664f0cca..b9b031d55d7d 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -1258,7 +1258,7 @@ static boolean_t pmap_demote_pde_locked(pmap_t pmap, 
pd_entry_t *pde,
 vm_offset_t va, struct rwlock **lockp);
 static boolean_t pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe,
 vm_offset_t va);
-static boolpmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m,
+static int pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m,
vm_prot_t prot, struct rwlock **lockp);
 static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde,
u_int flags, vm_page_t m, struct rwlock **lockp);
@@ -7271,13 +7271,12 @@ out:
 }
 
 /*
- * Tries to create a read- and/or execute-only 2MB page mapping.  Returns true
- * if successful.  Returns false if (1) a page table page cannot be allocated
- * without sleeping, (2) a mapping already exists at the specified virtual
- * address, or (3) a PV entry cannot be allocated without reclaiming another
- * PV entry.
+ * Tries to create a read- and/or execute-only 2MB page mapping.  Returns
+ * KERN_SUCCESS if the mapping was created.  Otherwise, returns an error
+ * value.  See pmap_enter_pde() for the possible error values when "no sleep",
+ * "no replace", and "no reclaim" are specified.
  */
-static bool
+static int
 pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 struct rwlock **lockp)
 {
@@ -7295,8 +7294,7 @@ pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t 
m, vm_prot_t prot,
if (va < VM_MAXUSER_ADDRESS)
newpde |= PG_U;
return (pmap_enter_pde(pmap, va, newpde, PMAP_ENTER_NOSLEEP |
-   PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL, lockp) ==
-   KERN_SUCCESS);
+   PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL, lockp));
 }
 
 /*
@@ -7319,12 +7317,19 @@ pmap_every_pte_zero(vm_paddr_t pa)
 
 /*
  * Tries to create the specified 2MB page mapping.  Returns KERN_SUCCESS if
- * the mapping was created, and either KERN_FAILURE or KERN_RESOURCE_SHORTAGE
- * otherwise.  Returns KERN_FAILURE if PMAP_ENTER_NOREPLACE was specified and
- * a mapping already exists at the specified virtual address.  Returns
- * KERN_RESOURCE_SHORTAGE if PMAP_ENTER_NOSLEEP was specified and a page table
- * page allocation failed.  Returns KERN_RESOURCE_SHORTAGE if
- * PMAP_ENTER_NORECLAIM was specified and a PV entry allocation failed.
+ * the mapping was created, and one of KERN_FAILURE, KERN_NO_SPACE,
+ * KERN_PROTECTION_FAILURE, or KERN_RESOURCE_FAILURE otherwise.  Returns
+ * KERN_FAILURE if either (1) PMAP_ENTER_NOREPLACE was specified and a 4KB
+ * page mapping already exists within the 2MB virtual address range starting
+ * at the specified virtual address or (2) the requested 2MB page mapping is
+ * not supported due to hardware errata.  Returns KERN_NO_SPACE if
+ * PMAP_ENTER_NOREPLACE was specified and a 2MB page mapping already exists at
+ * the specified virtual address.  Returns KERN_PROTECTION_FAILURE if the PKRU
+ * settings are not the same across the 2MB virtual address range starting at
+ * the specified virtual address.  Returns KERN_RESOURCE_SHORTAGE if either
+ * (1) PMAP_ENTER_NOSLEEP was specified and a page table page allocation
+ * failed or (2) PMAP_ENTER_NORECLAIM was specified and a PV entry allocation
+ * failed.
  *
  * The parameter "m" is only used when creating a managed, writeable mapping.
  */
@@ -7380,14 +7385,23 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t 
newpde, u_int flags,
if ((oldpde & PG_V) != 0) {
KASSERT(pdpg == NULL || pdpg->ref_count > 1,
("pmap_enter_pde: pdpg's reference count is too low"));
-   if ((flags & PMAP_ENTER_NOREPLACE) != 0 && (va <
-   VM_MAXUSER_ADDRESS || (oldpde & PG_PS) != 0 ||
-   !pmap_every_pte_zero(oldpde & PG_FRAME))) {
-

git: 8d7ee2047c5e - main - pmap: don't recompute mpte during promotion

2022-09-11 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=8d7ee2047c5e8b4db51c682aee4161ebfd1238e5

commit 8d7ee2047c5e8b4db51c682aee4161ebfd1238e5
Author: Alan Cox 
AuthorDate: 2022-09-09 23:34:58 +
Commit: Alan Cox 
CommitDate: 2022-09-11 06:19:22 +

pmap: don't recompute mpte during promotion

When attempting to promote 4KB user-space mappings to a 2MB user-space
mapping, the address of the struct vm_page representing the page table
page that contains the 4KB mappings is already known to the caller.
Pass that address to the promotion function rather than making the
promotion function recompute it, which on arm64 entails iteration over
the vm_phys_segs array by PHYS_TO_VM_PAGE().  And, while I'm here,
eliminate unnecessary arithmetic from the calculation of the first PTE's
address on arm64.

MFC after:  1 week
---
 sys/amd64/amd64/pmap.c | 12 ++--
 sys/arm64/arm64/pmap.c | 14 ++
 2 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 326103a1affb..e3f281784893 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -1277,7 +1277,7 @@ static vm_page_t pmap_large_map_getptp_unlocked(void);
 static vm_paddr_t pmap_large_map_kextract(vm_offset_t va);
 #if VM_NRESERVLEVEL > 0
 static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
-struct rwlock **lockp);
+vm_page_t mpte, struct rwlock **lockp);
 #endif
 static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t 
sva,
 vm_prot_t prot);
@@ -6737,13 +6737,12 @@ pmap_pde_ept_executable(pmap_t pmap, pd_entry_t pde)
  * identical characteristics. 
  */
 static void
-pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
+pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, vm_page_t mpte,
 struct rwlock **lockp)
 {
pd_entry_t newpde;
pt_entry_t *firstpte, oldpte, pa, *pte;
pt_entry_t PG_G, PG_A, PG_M, PG_RW, PG_V, PG_PKU_MASK;
-   vm_page_t mpte;
int PG_PTE_CACHE;
 
PG_A = pmap_accessed_bit(pmap);
@@ -6823,7 +6822,8 @@ setpte:
 * mapping the superpage is demoted by pmap_demote_pde() or
 * destroyed by pmap_remove_pde(). 
 */
-   mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
+   if (mpte == NULL)
+   mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
KASSERT(mpte >= vm_page_array &&
mpte < _page_array[vm_page_array_size],
("pmap_promote_pde: page table page is out of range"));
@@ -7237,7 +7237,7 @@ unchanged:
pmap_ps_enabled(pmap) &&
(m->flags & PG_FICTITIOUS) == 0 &&
vm_reserv_level_iffullpop(m) == 0)
-   pmap_promote_pde(pmap, pde, va, );
+   pmap_promote_pde(pmap, pde, va, mpte, );
 #endif
 
rv = KERN_SUCCESS;
@@ -10183,7 +10183,7 @@ pmap_emulate_accessed_dirty(pmap_t pmap, vm_offset_t 
va, int ftype)
pmap_ps_enabled(pmap) &&
(m->flags & PG_FICTITIOUS) == 0 &&
vm_reserv_level_iffullpop(m) == 0) {
-   pmap_promote_pde(pmap, pde, va, );
+   pmap_promote_pde(pmap, pde, va, mpte, );
 #ifdef INVARIANTS
atomic_add_long(_emulation_superpage_promotions, 1);
 #endif
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index deea00bc5d13..c86e9f562729 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -3787,18 +3787,15 @@ pmap_pv_promote_l2(pmap_t pmap, vm_offset_t va, 
vm_paddr_t pa,
  * identical characteristics.
  */
 static void
-pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va,
+pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, vm_page_t mpte,
 struct rwlock **lockp)
 {
pt_entry_t *firstl3, *l3, newl2, oldl3, pa;
-   vm_page_t mpte;
-   vm_offset_t sva;
 
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
PMAP_ASSERT_STAGE1(pmap);
 
-   sva = va & ~L2_OFFSET;
-   firstl3 = pmap_l2_to_l3(l2, sva);
+   firstl3 = (pt_entry_t *)PHYS_TO_DMAP(pmap_load(l2) & ~ATTR_MASK);
newl2 = pmap_load(firstl3);
 
if (((newl2 & (~ATTR_MASK | ATTR_AF)) & L2_OFFSET) != ATTR_AF ||
@@ -3851,7 +3848,8 @@ setl3:
 * mapping the superpage is demoted by pmap_demote_l2() or
 * destroyed by pmap_remove_l3().
 */
-   mpte = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK);
+   if (mpte == NULL)
+   mpte = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK);
KASSERT(mpte >= vm_page_array &&
mpte < _page_array[vm_page_array_size],
("pmap_promote_l2: page table page is out of range"));
@@ -3871,7 +3869,7 @@ setl3:
newl2 &= ~ATTR_DESCR_MASK;
newl2 |= L2_BLOCK;
 
-   pmap_update_entry(pmap, l2,

git: 8d7ee2047c5e - main - pmap: don't recompute mpte during promotion

2022-09-11 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=8d7ee2047c5e8b4db51c682aee4161ebfd1238e5

commit 8d7ee2047c5e8b4db51c682aee4161ebfd1238e5
Author: Alan Cox 
AuthorDate: 2022-09-09 23:34:58 +
Commit: Alan Cox 
CommitDate: 2022-09-11 06:19:22 +

pmap: don't recompute mpte during promotion

When attempting to promote 4KB user-space mappings to a 2MB user-space
mapping, the address of the struct vm_page representing the page table
page that contains the 4KB mappings is already known to the caller.
Pass that address to the promotion function rather than making the
promotion function recompute it, which on arm64 entails iteration over
the vm_phys_segs array by PHYS_TO_VM_PAGE().  And, while I'm here,
eliminate unnecessary arithmetic from the calculation of the first PTE's
address on arm64.

MFC after:  1 week
---
 sys/amd64/amd64/pmap.c | 12 ++--
 sys/arm64/arm64/pmap.c | 14 ++
 2 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 326103a1affb..e3f281784893 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -1277,7 +1277,7 @@ static vm_page_t pmap_large_map_getptp_unlocked(void);
 static vm_paddr_t pmap_large_map_kextract(vm_offset_t va);
 #if VM_NRESERVLEVEL > 0
 static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
-struct rwlock **lockp);
+vm_page_t mpte, struct rwlock **lockp);
 #endif
 static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t 
sva,
 vm_prot_t prot);
@@ -6737,13 +6737,12 @@ pmap_pde_ept_executable(pmap_t pmap, pd_entry_t pde)
  * identical characteristics. 
  */
 static void
-pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
+pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, vm_page_t mpte,
 struct rwlock **lockp)
 {
pd_entry_t newpde;
pt_entry_t *firstpte, oldpte, pa, *pte;
pt_entry_t PG_G, PG_A, PG_M, PG_RW, PG_V, PG_PKU_MASK;
-   vm_page_t mpte;
int PG_PTE_CACHE;
 
PG_A = pmap_accessed_bit(pmap);
@@ -6823,7 +6822,8 @@ setpte:
 * mapping the superpage is demoted by pmap_demote_pde() or
 * destroyed by pmap_remove_pde(). 
 */
-   mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
+   if (mpte == NULL)
+   mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
KASSERT(mpte >= vm_page_array &&
mpte < _page_array[vm_page_array_size],
("pmap_promote_pde: page table page is out of range"));
@@ -7237,7 +7237,7 @@ unchanged:
pmap_ps_enabled(pmap) &&
(m->flags & PG_FICTITIOUS) == 0 &&
vm_reserv_level_iffullpop(m) == 0)
-   pmap_promote_pde(pmap, pde, va, );
+   pmap_promote_pde(pmap, pde, va, mpte, );
 #endif
 
rv = KERN_SUCCESS;
@@ -10183,7 +10183,7 @@ pmap_emulate_accessed_dirty(pmap_t pmap, vm_offset_t 
va, int ftype)
pmap_ps_enabled(pmap) &&
(m->flags & PG_FICTITIOUS) == 0 &&
vm_reserv_level_iffullpop(m) == 0) {
-   pmap_promote_pde(pmap, pde, va, );
+   pmap_promote_pde(pmap, pde, va, mpte, );
 #ifdef INVARIANTS
atomic_add_long(_emulation_superpage_promotions, 1);
 #endif
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index deea00bc5d13..c86e9f562729 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -3787,18 +3787,15 @@ pmap_pv_promote_l2(pmap_t pmap, vm_offset_t va, 
vm_paddr_t pa,
  * identical characteristics.
  */
 static void
-pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va,
+pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, vm_page_t mpte,
 struct rwlock **lockp)
 {
pt_entry_t *firstl3, *l3, newl2, oldl3, pa;
-   vm_page_t mpte;
-   vm_offset_t sva;
 
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
PMAP_ASSERT_STAGE1(pmap);
 
-   sva = va & ~L2_OFFSET;
-   firstl3 = pmap_l2_to_l3(l2, sva);
+   firstl3 = (pt_entry_t *)PHYS_TO_DMAP(pmap_load(l2) & ~ATTR_MASK);
newl2 = pmap_load(firstl3);
 
if (((newl2 & (~ATTR_MASK | ATTR_AF)) & L2_OFFSET) != ATTR_AF ||
@@ -3851,7 +3848,8 @@ setl3:
 * mapping the superpage is demoted by pmap_demote_l2() or
 * destroyed by pmap_remove_l3().
 */
-   mpte = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK);
+   if (mpte == NULL)
+   mpte = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK);
KASSERT(mpte >= vm_page_array &&
mpte < _page_array[vm_page_array_size],
("pmap_promote_l2: page table page is out of range"));
@@ -3871,7 +3869,7 @@ setl3:
newl2 &= ~ATTR_DESCR_MASK;
newl2 |= L2_BLOCK;
 
-   pmap_update_entry(pmap, l2,

git: 7f46deccbed7 - main - x86/iommu: Reduce the number of queued invalidation interrupts

2022-08-06 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=7f46deccbed74436b62f8fd02655ff4ad89f1023

commit 7f46deccbed74436b62f8fd02655ff4ad89f1023
Author: Alan Cox 
AuthorDate: 2022-07-31 19:28:30 +
Commit: Alan Cox 
CommitDate: 2022-08-06 18:05:58 +

x86/iommu: Reduce the number of queued invalidation interrupts

Restructure dmar_qi_task() so as to reduce the number of invalidation
completion interrupts.  Specifically, because processing completed
invalidations in dmar_qi_task() can take quite some time, don't reenable
completion interrupts until processing has completed a first time. Then,
check a second time after reenabling completion interrupts, so that
any invalidations that complete just before interrupts are reenabled
do not linger until a future invalidation might raise an interrupt.
(Recent changes have made checking for completed invalidations cheap; no
locking is required.)

Reviewed by:kib
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D36054
---
 sys/x86/iommu/intel_qi.c | 45 +
 1 file changed, 29 insertions(+), 16 deletions(-)

diff --git a/sys/x86/iommu/intel_qi.c b/sys/x86/iommu/intel_qi.c
index baaf5b472a2c..8a8e656083e3 100644
--- a/sys/x86/iommu/intel_qi.c
+++ b/sys/x86/iommu/intel_qi.c
@@ -411,14 +411,34 @@ dmar_qi_intr(void *arg)
return (FILTER_HANDLED);
 }
 
+static void
+dmar_qi_drain_tlb_flush(struct dmar_unit *unit)
+{
+   struct iommu_map_entry *entry, *head;
+
+   for (head = unit->tlb_flush_head;; head = entry) {
+   entry = (struct iommu_map_entry *)
+   atomic_load_acq_ptr((uintptr_t *)>tlb_flush_next);
+   if (entry == NULL ||
+   !dmar_qi_seq_processed(unit, >gseq))
+   break;
+   unit->tlb_flush_head = entry;
+   iommu_gas_free_entry(head);
+   if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
+   iommu_gas_free_region(entry);
+   else
+   iommu_gas_free_space(entry);
+   }
+}
+
 static void
 dmar_qi_task(void *arg, int pending __unused)
 {
struct dmar_unit *unit;
-   struct iommu_map_entry *entry, *head;
uint32_t ics;
 
unit = arg;
+   dmar_qi_drain_tlb_flush(unit);
 
/*
 * Request an interrupt on the completion of the next invalidation
@@ -428,23 +448,16 @@ dmar_qi_task(void *arg, int pending __unused)
if ((ics & DMAR_ICS_IWC) != 0) {
ics = DMAR_ICS_IWC;
dmar_write4(unit, DMAR_ICS_REG, ics);
-   }
 
-   for (;;) {
-   head = unit->tlb_flush_head;
-   entry = (struct iommu_map_entry *)
-   atomic_load_acq_ptr((uintptr_t *)>tlb_flush_next);
-   if (entry == NULL)
-   break;
-   if (!dmar_qi_seq_processed(unit, >gseq))
-   break;
-   unit->tlb_flush_head = entry;
-   iommu_gas_free_entry(head);
-   if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
-   iommu_gas_free_region(entry);
-   else
-   iommu_gas_free_space(entry);
+   /*
+* Drain a second time in case the DMAR processes an entry
+* after the first call and before clearing DMAR_ICS_IWC.
+* Otherwise, such entries will linger until a later entry
+* that requests an interrupt is processed.
+*/
+   dmar_qi_drain_tlb_flush(unit);
}
+
if (unit->inv_seq_waiters > 0) {
/*
 * Acquire the DMAR lock so that wakeup() is called only after

git: 7f46deccbed7 - main - x86/iommu: Reduce the number of queued invalidation interrupts

2022-08-06 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=7f46deccbed74436b62f8fd02655ff4ad89f1023

commit 7f46deccbed74436b62f8fd02655ff4ad89f1023
Author: Alan Cox 
AuthorDate: 2022-07-31 19:28:30 +
Commit: Alan Cox 
CommitDate: 2022-08-06 18:05:58 +

x86/iommu: Reduce the number of queued invalidation interrupts

Restructure dmar_qi_task() so as to reduce the number of invalidation
completion interrupts.  Specifically, because processing completed
invalidations in dmar_qi_task() can take quite some time, don't reenable
completion interrupts until processing has completed a first time. Then,
check a second time after reenabling completion interrupts, so that
any invalidations that complete just before interrupts are reenabled
do not linger until a future invalidation might raise an interrupt.
(Recent changes have made checking for completed invalidations cheap; no
locking is required.)

Reviewed by:kib
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D36054
---
 sys/x86/iommu/intel_qi.c | 45 +
 1 file changed, 29 insertions(+), 16 deletions(-)

diff --git a/sys/x86/iommu/intel_qi.c b/sys/x86/iommu/intel_qi.c
index baaf5b472a2c..8a8e656083e3 100644
--- a/sys/x86/iommu/intel_qi.c
+++ b/sys/x86/iommu/intel_qi.c
@@ -411,14 +411,34 @@ dmar_qi_intr(void *arg)
return (FILTER_HANDLED);
 }
 
+static void
+dmar_qi_drain_tlb_flush(struct dmar_unit *unit)
+{
+   struct iommu_map_entry *entry, *head;
+
+   for (head = unit->tlb_flush_head;; head = entry) {
+   entry = (struct iommu_map_entry *)
+   atomic_load_acq_ptr((uintptr_t *)>tlb_flush_next);
+   if (entry == NULL ||
+   !dmar_qi_seq_processed(unit, >gseq))
+   break;
+   unit->tlb_flush_head = entry;
+   iommu_gas_free_entry(head);
+   if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
+   iommu_gas_free_region(entry);
+   else
+   iommu_gas_free_space(entry);
+   }
+}
+
 static void
 dmar_qi_task(void *arg, int pending __unused)
 {
struct dmar_unit *unit;
-   struct iommu_map_entry *entry, *head;
uint32_t ics;
 
unit = arg;
+   dmar_qi_drain_tlb_flush(unit);
 
/*
 * Request an interrupt on the completion of the next invalidation
@@ -428,23 +448,16 @@ dmar_qi_task(void *arg, int pending __unused)
if ((ics & DMAR_ICS_IWC) != 0) {
ics = DMAR_ICS_IWC;
dmar_write4(unit, DMAR_ICS_REG, ics);
-   }
 
-   for (;;) {
-   head = unit->tlb_flush_head;
-   entry = (struct iommu_map_entry *)
-   atomic_load_acq_ptr((uintptr_t *)>tlb_flush_next);
-   if (entry == NULL)
-   break;
-   if (!dmar_qi_seq_processed(unit, >gseq))
-   break;
-   unit->tlb_flush_head = entry;
-   iommu_gas_free_entry(head);
-   if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
-   iommu_gas_free_region(entry);
-   else
-   iommu_gas_free_space(entry);
+   /*
+* Drain a second time in case the DMAR processes an entry
+* after the first call and before clearing DMAR_ICS_IWC.
+* Otherwise, such entries will linger until a later entry
+* that requests an interrupt is processed.
+*/
+   dmar_qi_drain_tlb_flush(unit);
}
+
if (unit->inv_seq_waiters > 0) {
/*
 * Acquire the DMAR lock so that wakeup() is called only after

Re: [Sdcc-user] new sdcc calling conventions for z80/gbz80 together with __banked

2022-08-04 Thread Alan Cox

On Thu, 4 Aug 2022 09:06:35 +0200
Philipp Klaus Krause  wrote:

> Am 29.05.22 um 21:44 schrieb Tony Pavlov via Sdcc-user:
> 
> > 
> > another annoying thing is one byte hidden parameter for the bank number on 
> > the Z80 target.
> > inc sp/dec sp everywhere, while on gbz80 two bytes are reserved - that is 
> > much faster! why
> > not unify that? also, on systems like MSX you may want to save/restore more 
> > than one page
> > and two bytes may be very useful here!
> > 
> >   
> 
> Well, I'm personally not very familiar with all that banking stuff. I 
> try not to break it, but otherwise leave it to other sdcc devs (or wait 
> for patches from users). But since I'm not that familiar with it, I'm 
> reluctant to make changes that might be a problem for other users.

You actually want the smarts in the linker not the compiler IMHO
(especially on Z180 and R2K/R3K).

On Z80 with the Fuzix patches for transparent banked support I use

push af
call foo
pop af

which has a cost but creates the needed consistent extra stack offset for
all functions. The linker rewrites those 5 bytes into something else for
a cross bank (or overlay..) function. That allows arbitrary calling
between banks to work properly as you've got the 2 bytes needed.
Typically it's something like

call __bank1_2   ; from 1 to 2
.word foo

You also have to rewrite function pointers for it to work properly so
that any function pointer is turned into the address of a stub in common
space that does the banked call needed. This is also needed for standards
compliance so that all references to the address of the function give the
same value.

R2K/R3K is a bit different because the processor is designed to keep a
rolling window of paged in code with most common space for data but apart
from having extra hardware support the same basic logic applies along
with rather more linker magic to pack functions so no function crosses an
8K boundary. It's something the official compiler does (did - it's
basically dead software you have to run under emulators) but would be a
big change to the very primitive linker SDCC relies upon.

Whether explicit or automatic banking is the best option is another topic
altogether and does depend a lot on use cases

Alan

___
Sdcc-user mailing list
Sdcc-user@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/sdcc-user

git: 4670f90846d4 - main - iommu_gas: Eliminate redundant parameters and push down lock acquisition

2022-07-30 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=4670f90846d49027bf23435a30895a74264f1e79

commit 4670f90846d49027bf23435a30895a74264f1e79
Author: Alan Cox 
AuthorDate: 2022-07-29 06:14:46 +
Commit: Alan Cox 
CommitDate: 2022-07-30 19:28:48 +

iommu_gas: Eliminate redundant parameters and push down lock acquisition

Since IOMMU map entries store a reference to the domain in which they
reside, there is no need to pass the domain to iommu_gas_free_entry(),
iommu_gas_free_space(), and iommu_gas_free_region().

Push down the acquisition and release of the IOMMU domain lock into
iommu_gas_free_space() and iommu_gas_free_region().

Both of these changes allow for simplifications in the callers of the
functions without really complicating the functions themselves.
Moreover, the latter change eliminates the direct use of the IOMMU
domain lock from the x86-specific DMAR code.

Reviewed by:kib
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D35995
---
 sys/arm64/iommu/iommu.c  | 10 ++
 sys/dev/iommu/busdma_iommu.c |  4 ++--
 sys/dev/iommu/iommu.h|  9 +++--
 sys/dev/iommu/iommu_gas.c| 44 +---
 sys/x86/iommu/intel_ctx.c| 13 -
 sys/x86/iommu/intel_qi.c | 10 +++---
 6 files changed, 39 insertions(+), 51 deletions(-)

diff --git a/sys/arm64/iommu/iommu.c b/sys/arm64/iommu/iommu.c
index 0080ab4ff316..d24cad94e966 100644
--- a/sys/arm64/iommu/iommu.c
+++ b/sys/arm64/iommu/iommu.c
@@ -410,16 +410,10 @@ iommu_free_ctx(struct iommu_ctx *ioctx)
 static void
 iommu_domain_free_entry(struct iommu_map_entry *entry, bool free)
 {
-   struct iommu_domain *iodom;
-
-   iodom = entry->domain;
-
-   IOMMU_DOMAIN_LOCK(iodom);
-   iommu_gas_free_space(iodom, entry);
-   IOMMU_DOMAIN_UNLOCK(iodom);
+   iommu_gas_free_space(entry);
 
if (free)
-   iommu_gas_free_entry(iodom, entry);
+   iommu_gas_free_entry(entry);
else
entry->flags = 0;
 }
diff --git a/sys/dev/iommu/busdma_iommu.c b/sys/dev/iommu/busdma_iommu.c
index 67e82fe43e58..8f63d8b47f19 100644
--- a/sys/dev/iommu/busdma_iommu.c
+++ b/sys/dev/iommu/busdma_iommu.c
@@ -1040,7 +1040,7 @@ bus_dma_iommu_load_ident(bus_dma_tag_t dmat, bus_dmamap_t 
map1,
ma = malloc(sizeof(vm_page_t) * atop(length), M_TEMP, waitok ?
M_WAITOK : M_NOWAIT);
if (ma == NULL) {
-   iommu_gas_free_entry(domain, entry);
+   iommu_gas_free_entry(entry);
return (ENOMEM);
}
for (i = 0; i < atop(length); i++) {
@@ -1055,7 +1055,7 @@ bus_dma_iommu_load_ident(bus_dma_tag_t dmat, bus_dmamap_t 
map1,
TAILQ_INSERT_TAIL(>map_entries, entry, dmamap_link);
IOMMU_DMAMAP_UNLOCK(map);
} else {
-   iommu_gas_free_entry(domain, entry);
+   iommu_gas_free_entry(entry);
}
for (i = 0; i < atop(length); i++)
vm_page_putfake(ma[i]);
diff --git a/sys/dev/iommu/iommu.h b/sys/dev/iommu/iommu.h
index fefd0f615be5..ae4022c5c4f7 100644
--- a/sys/dev/iommu/iommu.h
+++ b/sys/dev/iommu/iommu.h
@@ -169,15 +169,12 @@ void iommu_gas_init_domain(struct iommu_domain *domain);
 void iommu_gas_fini_domain(struct iommu_domain *domain);
 struct iommu_map_entry *iommu_gas_alloc_entry(struct iommu_domain *domain,
 u_int flags);
-void iommu_gas_free_entry(struct iommu_domain *domain,
-struct iommu_map_entry *entry);
-void iommu_gas_free_space(struct iommu_domain *domain,
-struct iommu_map_entry *entry);
+void iommu_gas_free_entry(struct iommu_map_entry *entry);
+void iommu_gas_free_space(struct iommu_map_entry *entry);
 int iommu_gas_map(struct iommu_domain *domain,
 const struct bus_dma_tag_common *common, iommu_gaddr_t size, int offset,
 u_int eflags, u_int flags, vm_page_t *ma, struct iommu_map_entry **res);
-void iommu_gas_free_region(struct iommu_domain *domain,
-struct iommu_map_entry *entry);
+void iommu_gas_free_region(struct iommu_map_entry *entry);
 int iommu_gas_map_region(struct iommu_domain *domain,
 struct iommu_map_entry *entry, u_int eflags, u_int flags, vm_page_t *ma);
 int iommu_gas_reserve_region(struct iommu_domain *domain, iommu_gaddr_t start,
diff --git a/sys/dev/iommu/iommu_gas.c b/sys/dev/iommu/iommu_gas.c
index bac15edcf849..bad56ab9140e 100644
--- a/sys/dev/iommu/iommu_gas.c
+++ b/sys/dev/iommu/iommu_gas.c
@@ -107,12 +107,11 @@ iommu_gas_alloc_entry(struct iommu_domain *domain, u_int 
flags)
 }
 
 void
-iommu_gas_free_entry(struct iommu_domain *domain, struct iommu_map_entry 
*entry)
+iommu_gas_free_entry(struct iommu_map_entry *entry)
 {
+   struct iommu_domain *domain;
 
-   KASSERT(domain == entry->domain,
-   ("mismatched free domain %p entry %p en

git: 4670f90846d4 - main - iommu_gas: Eliminate redundant parameters and push down lock acquisition

2022-07-30 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=4670f90846d49027bf23435a30895a74264f1e79

commit 4670f90846d49027bf23435a30895a74264f1e79
Author: Alan Cox 
AuthorDate: 2022-07-29 06:14:46 +
Commit: Alan Cox 
CommitDate: 2022-07-30 19:28:48 +

iommu_gas: Eliminate redundant parameters and push down lock acquisition

Since IOMMU map entries store a reference to the domain in which they
reside, there is no need to pass the domain to iommu_gas_free_entry(),
iommu_gas_free_space(), and iommu_gas_free_region().

Push down the acquisition and release of the IOMMU domain lock into
iommu_gas_free_space() and iommu_gas_free_region().

Both of these changes allow for simplifications in the callers of the
functions without really complicating the functions themselves.
Moreover, the latter change eliminates the direct use of the IOMMU
domain lock from the x86-specific DMAR code.

Reviewed by:kib
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D35995
---
 sys/arm64/iommu/iommu.c  | 10 ++
 sys/dev/iommu/busdma_iommu.c |  4 ++--
 sys/dev/iommu/iommu.h|  9 +++--
 sys/dev/iommu/iommu_gas.c| 44 +---
 sys/x86/iommu/intel_ctx.c| 13 -
 sys/x86/iommu/intel_qi.c | 10 +++---
 6 files changed, 39 insertions(+), 51 deletions(-)

diff --git a/sys/arm64/iommu/iommu.c b/sys/arm64/iommu/iommu.c
index 0080ab4ff316..d24cad94e966 100644
--- a/sys/arm64/iommu/iommu.c
+++ b/sys/arm64/iommu/iommu.c
@@ -410,16 +410,10 @@ iommu_free_ctx(struct iommu_ctx *ioctx)
 static void
 iommu_domain_free_entry(struct iommu_map_entry *entry, bool free)
 {
-   struct iommu_domain *iodom;
-
-   iodom = entry->domain;
-
-   IOMMU_DOMAIN_LOCK(iodom);
-   iommu_gas_free_space(iodom, entry);
-   IOMMU_DOMAIN_UNLOCK(iodom);
+   iommu_gas_free_space(entry);
 
if (free)
-   iommu_gas_free_entry(iodom, entry);
+   iommu_gas_free_entry(entry);
else
entry->flags = 0;
 }
diff --git a/sys/dev/iommu/busdma_iommu.c b/sys/dev/iommu/busdma_iommu.c
index 67e82fe43e58..8f63d8b47f19 100644
--- a/sys/dev/iommu/busdma_iommu.c
+++ b/sys/dev/iommu/busdma_iommu.c
@@ -1040,7 +1040,7 @@ bus_dma_iommu_load_ident(bus_dma_tag_t dmat, bus_dmamap_t 
map1,
ma = malloc(sizeof(vm_page_t) * atop(length), M_TEMP, waitok ?
M_WAITOK : M_NOWAIT);
if (ma == NULL) {
-   iommu_gas_free_entry(domain, entry);
+   iommu_gas_free_entry(entry);
return (ENOMEM);
}
for (i = 0; i < atop(length); i++) {
@@ -1055,7 +1055,7 @@ bus_dma_iommu_load_ident(bus_dma_tag_t dmat, bus_dmamap_t 
map1,
TAILQ_INSERT_TAIL(>map_entries, entry, dmamap_link);
IOMMU_DMAMAP_UNLOCK(map);
} else {
-   iommu_gas_free_entry(domain, entry);
+   iommu_gas_free_entry(entry);
}
for (i = 0; i < atop(length); i++)
vm_page_putfake(ma[i]);
diff --git a/sys/dev/iommu/iommu.h b/sys/dev/iommu/iommu.h
index fefd0f615be5..ae4022c5c4f7 100644
--- a/sys/dev/iommu/iommu.h
+++ b/sys/dev/iommu/iommu.h
@@ -169,15 +169,12 @@ void iommu_gas_init_domain(struct iommu_domain *domain);
 void iommu_gas_fini_domain(struct iommu_domain *domain);
 struct iommu_map_entry *iommu_gas_alloc_entry(struct iommu_domain *domain,
 u_int flags);
-void iommu_gas_free_entry(struct iommu_domain *domain,
-struct iommu_map_entry *entry);
-void iommu_gas_free_space(struct iommu_domain *domain,
-struct iommu_map_entry *entry);
+void iommu_gas_free_entry(struct iommu_map_entry *entry);
+void iommu_gas_free_space(struct iommu_map_entry *entry);
 int iommu_gas_map(struct iommu_domain *domain,
 const struct bus_dma_tag_common *common, iommu_gaddr_t size, int offset,
 u_int eflags, u_int flags, vm_page_t *ma, struct iommu_map_entry **res);
-void iommu_gas_free_region(struct iommu_domain *domain,
-struct iommu_map_entry *entry);
+void iommu_gas_free_region(struct iommu_map_entry *entry);
 int iommu_gas_map_region(struct iommu_domain *domain,
 struct iommu_map_entry *entry, u_int eflags, u_int flags, vm_page_t *ma);
 int iommu_gas_reserve_region(struct iommu_domain *domain, iommu_gaddr_t start,
diff --git a/sys/dev/iommu/iommu_gas.c b/sys/dev/iommu/iommu_gas.c
index bac15edcf849..bad56ab9140e 100644
--- a/sys/dev/iommu/iommu_gas.c
+++ b/sys/dev/iommu/iommu_gas.c
@@ -107,12 +107,11 @@ iommu_gas_alloc_entry(struct iommu_domain *domain, u_int 
flags)
 }
 
 void
-iommu_gas_free_entry(struct iommu_domain *domain, struct iommu_map_entry 
*entry)
+iommu_gas_free_entry(struct iommu_map_entry *entry)
 {
+   struct iommu_domain *domain;
 
-   KASSERT(domain == entry->domain,
-   ("mismatched free domain %p entry %p en

git: 42736dc44dd0 - main - x86/iommu: Reduce DMAR lock contention

2022-07-28 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=42736dc44dd0151546db3f2e145ae1cfd4546fe1

commit 42736dc44dd0151546db3f2e145ae1cfd4546fe1
Author: Alan Cox 
AuthorDate: 2022-07-26 06:04:54 +
Commit: Alan Cox 
CommitDate: 2022-07-29 05:11:33 +

x86/iommu: Reduce DMAR lock contention

Replace the DMAR unit's tlb_flush TAILQ by a custom list implementation
that enables dmar_qi_task() to dequeue entries without holding the DMAR
lock.

Reviewed by:kib
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D35951
---
 sys/dev/iommu/iommu.h  |   5 +-
 sys/dev/iommu/iommu_gas.c  |   5 +-
 sys/x86/iommu/intel_ctx.c  |  16 +++
 sys/x86/iommu/intel_dmar.h |  33 +++--
 sys/x86/iommu/intel_qi.c   | 113 ++---
 5 files changed, 140 insertions(+), 32 deletions(-)

diff --git a/sys/dev/iommu/iommu.h b/sys/dev/iommu/iommu.h
index 65fefe3ada7b..fefd0f615be5 100644
--- a/sys/dev/iommu/iommu.h
+++ b/sys/dev/iommu/iommu.h
@@ -56,7 +56,10 @@ struct iommu_map_entry {
iommu_gaddr_t free_down;/* Max free space below the
   current R/B tree node */
u_int flags;
-   TAILQ_ENTRY(iommu_map_entry) dmamap_link; /* Link for dmamap entries */
+   union {
+   TAILQ_ENTRY(iommu_map_entry) dmamap_link; /* DMA map entries */
+   struct iommu_map_entry *tlb_flush_next;
+   };
RB_ENTRY(iommu_map_entry) rb_entry;  /* Links for domain entries */
struct iommu_domain *domain;
struct iommu_qi_genseq gseq;
diff --git a/sys/dev/iommu/iommu_gas.c b/sys/dev/iommu/iommu_gas.c
index ec456e2ec48b..bac15edcf849 100644
--- a/sys/dev/iommu/iommu_gas.c
+++ b/sys/dev/iommu/iommu_gas.c
@@ -99,7 +99,7 @@ iommu_gas_alloc_entry(struct iommu_domain *domain, u_int 
flags)
 
res = uma_zalloc(iommu_map_entry_zone, ((flags & IOMMU_PGF_WAITOK) !=
0 ? M_WAITOK : M_NOWAIT) | M_ZERO);
-   if (res != NULL) {
+   if (res != NULL && domain != NULL) {
res->domain = domain;
atomic_add_int(>entries_cnt, 1);
}
@@ -113,7 +113,8 @@ iommu_gas_free_entry(struct iommu_domain *domain, struct 
iommu_map_entry *entry)
KASSERT(domain == entry->domain,
("mismatched free domain %p entry %p entry->domain %p", domain,
entry, entry->domain));
-   atomic_subtract_int(>entries_cnt, 1);
+   if (domain != NULL)
+   atomic_subtract_int(>entries_cnt, 1);
uma_zfree(iommu_map_entry_zone, entry);
 }
 
diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c
index 936cf8bb7632..3bd425aeecbd 100644
--- a/sys/x86/iommu/intel_ctx.c
+++ b/sys/x86/iommu/intel_ctx.c
@@ -867,6 +867,10 @@ dmar_domain_free_entry(struct iommu_map_entry *entry, bool 
free)
entry->flags = 0;
 }
 
+/*
+ * If the given value for "free" is true, then the caller must not be using
+ * the entry's dmamap_link field.
+ */
 void
 iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
 bool cansleep)
@@ -885,10 +889,7 @@ iommu_domain_unload_entry(struct iommu_map_entry *entry, 
bool free,
if (unit->qi_enabled) {
if (free) {
DMAR_LOCK(unit);
-   dmar_qi_invalidate_locked(domain, entry->start,
-   entry->end - entry->start, >gseq, true);
-   TAILQ_INSERT_TAIL(>tlb_flush_entries, entry,
-   dmamap_link);
+   dmar_qi_invalidate_locked(domain, entry, true);
DMAR_UNLOCK(unit);
} else {
dmar_qi_invalidate_sync(domain, entry->start,
@@ -942,12 +943,11 @@ iommu_domain_unload(struct iommu_domain *iodom,
 
KASSERT(unit->qi_enabled, ("loaded entry left"));
DMAR_LOCK(unit);
-   TAILQ_FOREACH(entry, entries, dmamap_link) {
-   dmar_qi_invalidate_locked(domain, entry->start, entry->end -
-   entry->start, >gseq,
+   while ((entry = TAILQ_FIRST(entries)) != NULL) {
+   TAILQ_REMOVE(entries, entry, dmamap_link);
+   dmar_qi_invalidate_locked(domain, entry,
dmar_domain_unload_emit_wait(domain, entry));
}
-   TAILQ_CONCAT(>tlb_flush_entries, entries, dmamap_link);
DMAR_UNLOCK(unit);
 }
 
diff --git a/sys/x86/iommu/intel_dmar.h b/sys/x86/iommu/intel_dmar.h
index 06cecdf704ff..1234ee058ffd 100644
--- a/sys/x86/iommu/intel_dmar.h
+++ b/sys/x86/iommu/intel_dmar.h
@@ -177,8 +177,33 @@ struct dmar_unit {
u_int irte_cnt;
vmem_t *irtids;
 
-   /* Delayed freeing of map entries queue processing */
-   struct iommu_m

git: 42736dc44dd0 - main - x86/iommu: Reduce DMAR lock contention

2022-07-28 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=42736dc44dd0151546db3f2e145ae1cfd4546fe1

commit 42736dc44dd0151546db3f2e145ae1cfd4546fe1
Author: Alan Cox 
AuthorDate: 2022-07-26 06:04:54 +
Commit: Alan Cox 
CommitDate: 2022-07-29 05:11:33 +

x86/iommu: Reduce DMAR lock contention

Replace the DMAR unit's tlb_flush TAILQ by a custom list implementation
that enables dmar_qi_task() to dequeue entries without holding the DMAR
lock.

Reviewed by:kib
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D35951
---
 sys/dev/iommu/iommu.h  |   5 +-
 sys/dev/iommu/iommu_gas.c  |   5 +-
 sys/x86/iommu/intel_ctx.c  |  16 +++
 sys/x86/iommu/intel_dmar.h |  33 +++--
 sys/x86/iommu/intel_qi.c   | 113 ++---
 5 files changed, 140 insertions(+), 32 deletions(-)

diff --git a/sys/dev/iommu/iommu.h b/sys/dev/iommu/iommu.h
index 65fefe3ada7b..fefd0f615be5 100644
--- a/sys/dev/iommu/iommu.h
+++ b/sys/dev/iommu/iommu.h
@@ -56,7 +56,10 @@ struct iommu_map_entry {
iommu_gaddr_t free_down;/* Max free space below the
   current R/B tree node */
u_int flags;
-   TAILQ_ENTRY(iommu_map_entry) dmamap_link; /* Link for dmamap entries */
+   union {
+   TAILQ_ENTRY(iommu_map_entry) dmamap_link; /* DMA map entries */
+   struct iommu_map_entry *tlb_flush_next;
+   };
RB_ENTRY(iommu_map_entry) rb_entry;  /* Links for domain entries */
struct iommu_domain *domain;
struct iommu_qi_genseq gseq;
diff --git a/sys/dev/iommu/iommu_gas.c b/sys/dev/iommu/iommu_gas.c
index ec456e2ec48b..bac15edcf849 100644
--- a/sys/dev/iommu/iommu_gas.c
+++ b/sys/dev/iommu/iommu_gas.c
@@ -99,7 +99,7 @@ iommu_gas_alloc_entry(struct iommu_domain *domain, u_int 
flags)
 
res = uma_zalloc(iommu_map_entry_zone, ((flags & IOMMU_PGF_WAITOK) !=
0 ? M_WAITOK : M_NOWAIT) | M_ZERO);
-   if (res != NULL) {
+   if (res != NULL && domain != NULL) {
res->domain = domain;
atomic_add_int(>entries_cnt, 1);
}
@@ -113,7 +113,8 @@ iommu_gas_free_entry(struct iommu_domain *domain, struct 
iommu_map_entry *entry)
KASSERT(domain == entry->domain,
("mismatched free domain %p entry %p entry->domain %p", domain,
entry, entry->domain));
-   atomic_subtract_int(>entries_cnt, 1);
+   if (domain != NULL)
+   atomic_subtract_int(>entries_cnt, 1);
uma_zfree(iommu_map_entry_zone, entry);
 }
 
diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c
index 936cf8bb7632..3bd425aeecbd 100644
--- a/sys/x86/iommu/intel_ctx.c
+++ b/sys/x86/iommu/intel_ctx.c
@@ -867,6 +867,10 @@ dmar_domain_free_entry(struct iommu_map_entry *entry, bool 
free)
entry->flags = 0;
 }
 
+/*
+ * If the given value for "free" is true, then the caller must not be using
+ * the entry's dmamap_link field.
+ */
 void
 iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
 bool cansleep)
@@ -885,10 +889,7 @@ iommu_domain_unload_entry(struct iommu_map_entry *entry, 
bool free,
if (unit->qi_enabled) {
if (free) {
DMAR_LOCK(unit);
-   dmar_qi_invalidate_locked(domain, entry->start,
-   entry->end - entry->start, >gseq, true);
-   TAILQ_INSERT_TAIL(>tlb_flush_entries, entry,
-   dmamap_link);
+   dmar_qi_invalidate_locked(domain, entry, true);
DMAR_UNLOCK(unit);
} else {
dmar_qi_invalidate_sync(domain, entry->start,
@@ -942,12 +943,11 @@ iommu_domain_unload(struct iommu_domain *iodom,
 
KASSERT(unit->qi_enabled, ("loaded entry left"));
DMAR_LOCK(unit);
-   TAILQ_FOREACH(entry, entries, dmamap_link) {
-   dmar_qi_invalidate_locked(domain, entry->start, entry->end -
-   entry->start, >gseq,
+   while ((entry = TAILQ_FIRST(entries)) != NULL) {
+   TAILQ_REMOVE(entries, entry, dmamap_link);
+   dmar_qi_invalidate_locked(domain, entry,
dmar_domain_unload_emit_wait(domain, entry));
}
-   TAILQ_CONCAT(>tlb_flush_entries, entries, dmamap_link);
DMAR_UNLOCK(unit);
 }
 
diff --git a/sys/x86/iommu/intel_dmar.h b/sys/x86/iommu/intel_dmar.h
index 06cecdf704ff..1234ee058ffd 100644
--- a/sys/x86/iommu/intel_dmar.h
+++ b/sys/x86/iommu/intel_dmar.h
@@ -177,8 +177,33 @@ struct dmar_unit {
u_int irte_cnt;
vmem_t *irtids;
 
-   /* Delayed freeing of map entries queue processing */
-   struct iommu_m

git: c25156347083 - main - x86/iommu: Correct a recent change to iommu_domain_unload_entry()

2022-07-26 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=c251563470831c34cf53242936425a0d4d995edf

commit c251563470831c34cf53242936425a0d4d995edf
Author: Alan Cox 
AuthorDate: 2022-07-26 04:53:15 +
Commit: Alan Cox 
CommitDate: 2022-07-26 06:07:21 +

x86/iommu: Correct a recent change to iommu_domain_unload_entry()

Correct 8bc367384745.  When iommu_domain_unload_entry() performs a
synchronous IOTLB invalidation, it must call dmar_domain_free_entry()
to remove the entry from the domain's RB_TREE.

Push down the acquisition and release of the DMAR lock into the
recently introduced function dmar_qi_invalidate_sync_locked() and
remove the _locked suffix.

MFC with:   8bc367384745
---
 sys/x86/iommu/intel_ctx.c  | 7 ---
 sys/x86/iommu/intel_dmar.h | 4 ++--
 sys/x86/iommu/intel_qi.c   | 9 ++---
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c
index 5e13f020264b..936cf8bb7632 100644
--- a/sys/x86/iommu/intel_ctx.c
+++ b/sys/x86/iommu/intel_ctx.c
@@ -883,17 +883,18 @@ iommu_domain_unload_entry(struct iommu_map_entry *entry, 
bool free,
 * dmar_qi_task() is finished processing it.
 */
if (unit->qi_enabled) {
-   DMAR_LOCK(unit);
if (free) {
+   DMAR_LOCK(unit);
dmar_qi_invalidate_locked(domain, entry->start,
entry->end - entry->start, >gseq, true);
TAILQ_INSERT_TAIL(>tlb_flush_entries, entry,
dmamap_link);
+   DMAR_UNLOCK(unit);
} else {
-   dmar_qi_invalidate_sync_locked(domain, entry->start,
+   dmar_qi_invalidate_sync(domain, entry->start,
entry->end - entry->start, cansleep);
+   dmar_domain_free_entry(entry, false);
}
-   DMAR_UNLOCK(unit);
} else {
domain_flush_iotlb_sync(domain, entry->start, entry->end -
entry->start);
diff --git a/sys/x86/iommu/intel_dmar.h b/sys/x86/iommu/intel_dmar.h
index 0f811d760bb7..06cecdf704ff 100644
--- a/sys/x86/iommu/intel_dmar.h
+++ b/sys/x86/iommu/intel_dmar.h
@@ -251,8 +251,8 @@ int dmar_init_qi(struct dmar_unit *unit);
 void dmar_fini_qi(struct dmar_unit *unit);
 void dmar_qi_invalidate_locked(struct dmar_domain *domain, iommu_gaddr_t start,
 iommu_gaddr_t size, struct iommu_qi_genseq *psec, bool emit_wait);
-void dmar_qi_invalidate_sync_locked(struct dmar_domain *domain,
-iommu_gaddr_t start, iommu_gaddr_t size, bool cansleep);
+void dmar_qi_invalidate_sync(struct dmar_domain *domain, iommu_gaddr_t start,
+iommu_gaddr_t size, bool cansleep);
 void dmar_qi_invalidate_ctx_glob_locked(struct dmar_unit *unit);
 void dmar_qi_invalidate_iotlb_glob_locked(struct dmar_unit *unit);
 void dmar_qi_invalidate_iec_glob(struct dmar_unit *unit);
diff --git a/sys/x86/iommu/intel_qi.c b/sys/x86/iommu/intel_qi.c
index 174cf9ea19a8..32f01a2787b0 100644
--- a/sys/x86/iommu/intel_qi.c
+++ b/sys/x86/iommu/intel_qi.c
@@ -243,14 +243,17 @@ dmar_qi_invalidate_locked(struct dmar_domain *domain, 
iommu_gaddr_t base,
 }
 
 void
-dmar_qi_invalidate_sync_locked(struct dmar_domain *domain, iommu_gaddr_t base,
+dmar_qi_invalidate_sync(struct dmar_domain *domain, iommu_gaddr_t base,
 iommu_gaddr_t size, bool cansleep)
 {
+   struct dmar_unit *unit;
struct iommu_qi_genseq gseq;
 
-   DMAR_ASSERT_LOCKED(domain->dmar);
+   unit = domain->dmar;
+   DMAR_LOCK(unit);
dmar_qi_invalidate_locked(domain, base, size, , true);
-   dmar_qi_wait_for_seq(domain->dmar, , !cansleep);
+   dmar_qi_wait_for_seq(unit, , !cansleep);
+   DMAR_UNLOCK(unit);
 }
 
 void

git: c25156347083 - main - x86/iommu: Correct a recent change to iommu_domain_unload_entry()

2022-07-26 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=c251563470831c34cf53242936425a0d4d995edf

commit c251563470831c34cf53242936425a0d4d995edf
Author: Alan Cox 
AuthorDate: 2022-07-26 04:53:15 +
Commit: Alan Cox 
CommitDate: 2022-07-26 06:07:21 +

x86/iommu: Correct a recent change to iommu_domain_unload_entry()

Correct 8bc367384745.  When iommu_domain_unload_entry() performs a
synchronous IOTLB invalidation, it must call dmar_domain_free_entry()
to remove the entry from the domain's RB_TREE.

Push down the acquisition and release of the DMAR lock into the
recently introduced function dmar_qi_invalidate_sync_locked() and
remove the _locked suffix.

MFC with:   8bc367384745
---
 sys/x86/iommu/intel_ctx.c  | 7 ---
 sys/x86/iommu/intel_dmar.h | 4 ++--
 sys/x86/iommu/intel_qi.c   | 9 ++---
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c
index 5e13f020264b..936cf8bb7632 100644
--- a/sys/x86/iommu/intel_ctx.c
+++ b/sys/x86/iommu/intel_ctx.c
@@ -883,17 +883,18 @@ iommu_domain_unload_entry(struct iommu_map_entry *entry, 
bool free,
 * dmar_qi_task() is finished processing it.
 */
if (unit->qi_enabled) {
-   DMAR_LOCK(unit);
if (free) {
+   DMAR_LOCK(unit);
dmar_qi_invalidate_locked(domain, entry->start,
entry->end - entry->start, >gseq, true);
TAILQ_INSERT_TAIL(>tlb_flush_entries, entry,
dmamap_link);
+   DMAR_UNLOCK(unit);
} else {
-   dmar_qi_invalidate_sync_locked(domain, entry->start,
+   dmar_qi_invalidate_sync(domain, entry->start,
entry->end - entry->start, cansleep);
+   dmar_domain_free_entry(entry, false);
}
-   DMAR_UNLOCK(unit);
} else {
domain_flush_iotlb_sync(domain, entry->start, entry->end -
entry->start);
diff --git a/sys/x86/iommu/intel_dmar.h b/sys/x86/iommu/intel_dmar.h
index 0f811d760bb7..06cecdf704ff 100644
--- a/sys/x86/iommu/intel_dmar.h
+++ b/sys/x86/iommu/intel_dmar.h
@@ -251,8 +251,8 @@ int dmar_init_qi(struct dmar_unit *unit);
 void dmar_fini_qi(struct dmar_unit *unit);
 void dmar_qi_invalidate_locked(struct dmar_domain *domain, iommu_gaddr_t start,
 iommu_gaddr_t size, struct iommu_qi_genseq *psec, bool emit_wait);
-void dmar_qi_invalidate_sync_locked(struct dmar_domain *domain,
-iommu_gaddr_t start, iommu_gaddr_t size, bool cansleep);
+void dmar_qi_invalidate_sync(struct dmar_domain *domain, iommu_gaddr_t start,
+iommu_gaddr_t size, bool cansleep);
 void dmar_qi_invalidate_ctx_glob_locked(struct dmar_unit *unit);
 void dmar_qi_invalidate_iotlb_glob_locked(struct dmar_unit *unit);
 void dmar_qi_invalidate_iec_glob(struct dmar_unit *unit);
diff --git a/sys/x86/iommu/intel_qi.c b/sys/x86/iommu/intel_qi.c
index 174cf9ea19a8..32f01a2787b0 100644
--- a/sys/x86/iommu/intel_qi.c
+++ b/sys/x86/iommu/intel_qi.c
@@ -243,14 +243,17 @@ dmar_qi_invalidate_locked(struct dmar_domain *domain, 
iommu_gaddr_t base,
 }
 
 void
-dmar_qi_invalidate_sync_locked(struct dmar_domain *domain, iommu_gaddr_t base,
+dmar_qi_invalidate_sync(struct dmar_domain *domain, iommu_gaddr_t base,
 iommu_gaddr_t size, bool cansleep)
 {
+   struct dmar_unit *unit;
struct iommu_qi_genseq gseq;
 
-   DMAR_ASSERT_LOCKED(domain->dmar);
+   unit = domain->dmar;
+   DMAR_LOCK(unit);
dmar_qi_invalidate_locked(domain, base, size, , true);
-   dmar_qi_wait_for_seq(domain->dmar, , !cansleep);
+   dmar_qi_wait_for_seq(unit, , !cansleep);
+   DMAR_UNLOCK(unit);
 }
 
 void

git: 8bc367384745 - main - iommu_gas: Eliminate a possible case of use-after-free

2022-07-25 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=8bc3673847453ca51237b5c85fe57f3f02e17a4b

commit 8bc3673847453ca51237b5c85fe57f3f02e17a4b
Author: Alan Cox 
AuthorDate: 2022-07-22 17:00:26 +
Commit: Alan Cox 
CommitDate: 2022-07-25 16:14:58 +

iommu_gas: Eliminate a possible case of use-after-free

Eliminate a possible case of use-after-free in an error handling path
after a mapping failure.  Specifically, eliminate IOMMU_MAP_ENTRY_QI_NF
and instead perform the IOTLB invalidation synchronously.  Otherwise,
when iommu_domain_unload_entry() is called and told not to free the
IOMMU map entry, the caller could free the entry before dmar_qi_task()
is finished with it.

Reviewed by:kib
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D35878
---
 sys/arm64/iommu/iommu.c|  3 ++-
 sys/dev/iommu/iommu.h  |  3 ++-
 sys/dev/iommu/iommu_gas.c  |  6 --
 sys/dev/iommu/iommu_gas.h  |  1 -
 sys/x86/iommu/intel_ctx.c  | 28 +++-
 sys/x86/iommu/intel_dmar.h |  2 ++
 sys/x86/iommu/intel_qi.c   | 14 --
 7 files changed, 41 insertions(+), 16 deletions(-)

diff --git a/sys/arm64/iommu/iommu.c b/sys/arm64/iommu/iommu.c
index aa48dcf5ab5e..0080ab4ff316 100644
--- a/sys/arm64/iommu/iommu.c
+++ b/sys/arm64/iommu/iommu.c
@@ -509,7 +509,8 @@ iommu_find(device_t dev, bool verbose)
 }
 
 void
-iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free)
+iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
+bool cansleep __unused)
 {
 
dprintf("%s\n", __func__);
diff --git a/sys/dev/iommu/iommu.h b/sys/dev/iommu/iommu.h
index 62b5659b6e83..65fefe3ada7b 100644
--- a/sys/dev/iommu/iommu.h
+++ b/sys/dev/iommu/iommu.h
@@ -151,7 +151,8 @@ void iommu_free_ctx_locked(struct iommu_unit *iommu, struct 
iommu_ctx *ctx);
 struct iommu_ctx *iommu_get_ctx(struct iommu_unit *, device_t dev,
 uint16_t rid, bool id_mapped, bool rmrr_init);
 struct iommu_unit *iommu_find(device_t dev, bool verbose);
-void iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free);
+void iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
+bool cansleep);
 void iommu_domain_unload(struct iommu_domain *domain,
 struct iommu_map_entries_tailq *entries, bool cansleep);
 
diff --git a/sys/dev/iommu/iommu_gas.c b/sys/dev/iommu/iommu_gas.c
index 86dc919e4572..ec456e2ec48b 100644
--- a/sys/dev/iommu/iommu_gas.c
+++ b/sys/dev/iommu/iommu_gas.c
@@ -638,7 +638,8 @@ iommu_gas_map(struct iommu_domain *domain,
entry->end - entry->start, ma, eflags,
((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0));
if (error == ENOMEM) {
-   iommu_domain_unload_entry(entry, true);
+   iommu_domain_unload_entry(entry, true,
+   (flags & IOMMU_MF_CANWAIT) != 0);
return (error);
}
KASSERT(error == 0,
@@ -676,7 +677,8 @@ iommu_gas_map_region(struct iommu_domain *domain, struct 
iommu_map_entry *entry,
entry->end - entry->start, ma + OFF_TO_IDX(start - entry->start),
eflags, ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0));
if (error == ENOMEM) {
-   iommu_domain_unload_entry(entry, false);
+   iommu_domain_unload_entry(entry, false,
+   (flags & IOMMU_MF_CANWAIT) != 0);
return (error);
}
KASSERT(error == 0,
diff --git a/sys/dev/iommu/iommu_gas.h b/sys/dev/iommu/iommu_gas.h
index c32a098538b0..a9d0df5f272f 100644
--- a/sys/dev/iommu/iommu_gas.h
+++ b/sys/dev/iommu/iommu_gas.h
@@ -50,7 +50,6 @@
 #defineIOMMU_MAP_ENTRY_MAP 0x0004  /* Busdma created, linked by
   dmamap_link */
 #defineIOMMU_MAP_ENTRY_UNMAPPED0x0010  /* No backing pages */
-#defineIOMMU_MAP_ENTRY_QI_NF   0x0020  /* qi task, do not free entry */
 #defineIOMMU_MAP_ENTRY_READ0x1000  /* Read permitted */
 #defineIOMMU_MAP_ENTRY_WRITE   0x2000  /* Write permitted */
 #defineIOMMU_MAP_ENTRY_SNOOP   0x4000  /* Snoop */
diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c
index bfc607674b57..5e13f020264b 100644
--- a/sys/x86/iommu/intel_ctx.c
+++ b/sys/x86/iommu/intel_ctx.c
@@ -868,25 +868,35 @@ dmar_domain_free_entry(struct iommu_map_entry *entry, 
bool free)
 }
 
 void
-iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free)
+iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
+bool cansleep)
 {
struct dmar_domain *domain;
struct dmar_unit *unit;
 
domain = IODOM2DOM(entry->domain);
unit = DOM2DMAR(domain);
+
+   /*
+* If "free" is false, then the IOTLB invalidation must be performed
+* synchronously.  Otherwise, the call

git: 8bc367384745 - main - iommu_gas: Eliminate a possible case of use-after-free

2022-07-25 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=8bc3673847453ca51237b5c85fe57f3f02e17a4b

commit 8bc3673847453ca51237b5c85fe57f3f02e17a4b
Author: Alan Cox 
AuthorDate: 2022-07-22 17:00:26 +
Commit: Alan Cox 
CommitDate: 2022-07-25 16:14:58 +

iommu_gas: Eliminate a possible case of use-after-free

Eliminate a possible case of use-after-free in an error handling path
after a mapping failure.  Specifically, eliminate IOMMU_MAP_ENTRY_QI_NF
and instead perform the IOTLB invalidation synchronously.  Otherwise,
when iommu_domain_unload_entry() is called and told not to free the
IOMMU map entry, the caller could free the entry before dmar_qi_task()
is finished with it.

Reviewed by:kib
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D35878
---
 sys/arm64/iommu/iommu.c|  3 ++-
 sys/dev/iommu/iommu.h  |  3 ++-
 sys/dev/iommu/iommu_gas.c  |  6 --
 sys/dev/iommu/iommu_gas.h  |  1 -
 sys/x86/iommu/intel_ctx.c  | 28 +++-
 sys/x86/iommu/intel_dmar.h |  2 ++
 sys/x86/iommu/intel_qi.c   | 14 --
 7 files changed, 41 insertions(+), 16 deletions(-)

diff --git a/sys/arm64/iommu/iommu.c b/sys/arm64/iommu/iommu.c
index aa48dcf5ab5e..0080ab4ff316 100644
--- a/sys/arm64/iommu/iommu.c
+++ b/sys/arm64/iommu/iommu.c
@@ -509,7 +509,8 @@ iommu_find(device_t dev, bool verbose)
 }
 
 void
-iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free)
+iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
+bool cansleep __unused)
 {
 
dprintf("%s\n", __func__);
diff --git a/sys/dev/iommu/iommu.h b/sys/dev/iommu/iommu.h
index 62b5659b6e83..65fefe3ada7b 100644
--- a/sys/dev/iommu/iommu.h
+++ b/sys/dev/iommu/iommu.h
@@ -151,7 +151,8 @@ void iommu_free_ctx_locked(struct iommu_unit *iommu, struct 
iommu_ctx *ctx);
 struct iommu_ctx *iommu_get_ctx(struct iommu_unit *, device_t dev,
 uint16_t rid, bool id_mapped, bool rmrr_init);
 struct iommu_unit *iommu_find(device_t dev, bool verbose);
-void iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free);
+void iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
+bool cansleep);
 void iommu_domain_unload(struct iommu_domain *domain,
 struct iommu_map_entries_tailq *entries, bool cansleep);
 
diff --git a/sys/dev/iommu/iommu_gas.c b/sys/dev/iommu/iommu_gas.c
index 86dc919e4572..ec456e2ec48b 100644
--- a/sys/dev/iommu/iommu_gas.c
+++ b/sys/dev/iommu/iommu_gas.c
@@ -638,7 +638,8 @@ iommu_gas_map(struct iommu_domain *domain,
entry->end - entry->start, ma, eflags,
((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0));
if (error == ENOMEM) {
-   iommu_domain_unload_entry(entry, true);
+   iommu_domain_unload_entry(entry, true,
+   (flags & IOMMU_MF_CANWAIT) != 0);
return (error);
}
KASSERT(error == 0,
@@ -676,7 +677,8 @@ iommu_gas_map_region(struct iommu_domain *domain, struct 
iommu_map_entry *entry,
entry->end - entry->start, ma + OFF_TO_IDX(start - entry->start),
eflags, ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0));
if (error == ENOMEM) {
-   iommu_domain_unload_entry(entry, false);
+   iommu_domain_unload_entry(entry, false,
+   (flags & IOMMU_MF_CANWAIT) != 0);
return (error);
}
KASSERT(error == 0,
diff --git a/sys/dev/iommu/iommu_gas.h b/sys/dev/iommu/iommu_gas.h
index c32a098538b0..a9d0df5f272f 100644
--- a/sys/dev/iommu/iommu_gas.h
+++ b/sys/dev/iommu/iommu_gas.h
@@ -50,7 +50,6 @@
 #defineIOMMU_MAP_ENTRY_MAP 0x0004  /* Busdma created, linked by
   dmamap_link */
 #defineIOMMU_MAP_ENTRY_UNMAPPED0x0010  /* No backing pages */
-#defineIOMMU_MAP_ENTRY_QI_NF   0x0020  /* qi task, do not free entry */
 #defineIOMMU_MAP_ENTRY_READ0x1000  /* Read permitted */
 #defineIOMMU_MAP_ENTRY_WRITE   0x2000  /* Write permitted */
 #defineIOMMU_MAP_ENTRY_SNOOP   0x4000  /* Snoop */
diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c
index bfc607674b57..5e13f020264b 100644
--- a/sys/x86/iommu/intel_ctx.c
+++ b/sys/x86/iommu/intel_ctx.c
@@ -868,25 +868,35 @@ dmar_domain_free_entry(struct iommu_map_entry *entry, 
bool free)
 }
 
 void
-iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free)
+iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
+bool cansleep)
 {
struct dmar_domain *domain;
struct dmar_unit *unit;
 
domain = IODOM2DOM(entry->domain);
unit = DOM2DMAR(domain);
+
+   /*
+* If "free" is false, then the IOTLB invalidation must be performed
+* synchronously.  Otherwise, the call

Re: [Sdcc-user] What are the main (dis)advantages of SDCC vs. non-free compilers?

2022-07-22 Thread Alan Cox

On Thu, 21 Jul 2022 14:06:42 +0200
Philipp Klaus Krause  wrote:

> Dear SDCC users,
> 
> you have chosen SDCC over non-free alternatives. I'm a bit interested in 
> knowing the reasons. And also in knowing in which areas you think SDCC 
> is lacking compared to non-free alternatives. Also: do you use non-free 
> compilers for some of your projects for architectures supported by SDCC? 
> Knowing which architectures these (dis)advantages apply to would also be 
> helpful.

I use it because I don't want to be dependent upon compilers I can't hack
fix, or modify.

Good bits
- Code generation is best I've seen for Z80 (yes it's bad for some stuff
  but it's still beating the rest by a lot)
- Actually maintained
- Being open I could hack my own fork to support RAM based binaries well
  and also transparent code banking
- Code quality is reasonable and it's possible to make changes and work
  on it a bit.

Bad bits
- Stability. Each release seems to fix 5 things and break 4 different
  ones which makes it harder to manage
- Lack of proper support for RAM based binaries on Z80
- Doesn't follow any C compiler conventions about object file naming, not
  filling the current directory with trash files I don't want etc
- Compile time peformance (although this has improved)
- Doesn't know how to build relocatable binaries (but there are patches
  and way to deal with this plus forks like Z88DK)

Some people will know I plan to drop SDCC for Fuzix eventually but
to be clear that is solely because I intend to have a self-hosting
compiler not because I dislike SDCC.



___
Sdcc-user mailing list
Sdcc-user@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/sdcc-user

git: dfabdacb279c - main - iommu_gas: Avoid double unmapping on error

2022-07-21 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=dfabdacb279ca603d008a0e7e952c5c59ac51da4

commit dfabdacb279ca603d008a0e7e952c5c59ac51da4
Author: Alan Cox 
AuthorDate: 2022-07-21 06:53:54 +
Commit: Alan Cox 
CommitDate: 2022-07-21 07:00:46 +

iommu_gas: Avoid double unmapping on error

In the extremely unlikely case that the iommu_gas_map_region() call in
bus_dma_iommu_load_ident() failed, we would attempt to unmap the failed
entry twice, first in iommu_gas_map_region(), and a second time in the
caller.  Once is enough, and twice is problematic because it leads to a
second RB_REMOVE call on the same tree node.  Like it or not, RB_TREE
does not handle that possibility.

Reviewed by:kib
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D35869
---
 sys/dev/iommu/busdma_iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sys/dev/iommu/busdma_iommu.c b/sys/dev/iommu/busdma_iommu.c
index 10e7476b35eb..67e82fe43e58 100644
--- a/sys/dev/iommu/busdma_iommu.c
+++ b/sys/dev/iommu/busdma_iommu.c
@@ -1055,7 +1055,7 @@ bus_dma_iommu_load_ident(bus_dma_tag_t dmat, bus_dmamap_t 
map1,
TAILQ_INSERT_TAIL(>map_entries, entry, dmamap_link);
IOMMU_DMAMAP_UNLOCK(map);
} else {
-   iommu_domain_unload_entry(entry, true);
+   iommu_gas_free_entry(domain, entry);
}
for (i = 0; i < atop(length); i++)
vm_page_putfake(ma[i]);

git: dfabdacb279c - main - iommu_gas: Avoid double unmapping on error

2022-07-21 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=dfabdacb279ca603d008a0e7e952c5c59ac51da4

commit dfabdacb279ca603d008a0e7e952c5c59ac51da4
Author: Alan Cox 
AuthorDate: 2022-07-21 06:53:54 +
Commit: Alan Cox 
CommitDate: 2022-07-21 07:00:46 +

iommu_gas: Avoid double unmapping on error

In the extremely unlikely case that the iommu_gas_map_region() call in
bus_dma_iommu_load_ident() failed, we would attempt to unmap the failed
entry twice, first in iommu_gas_map_region(), and a second time in the
caller.  Once is enough, and twice is problematic because it leads to a
second RB_REMOVE call on the same tree node.  Like it or not, RB_TREE
does not handle that possibility.

Reviewed by:kib
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D35869
---
 sys/dev/iommu/busdma_iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sys/dev/iommu/busdma_iommu.c b/sys/dev/iommu/busdma_iommu.c
index 10e7476b35eb..67e82fe43e58 100644
--- a/sys/dev/iommu/busdma_iommu.c
+++ b/sys/dev/iommu/busdma_iommu.c
@@ -1055,7 +1055,7 @@ bus_dma_iommu_load_ident(bus_dma_tag_t dmat, bus_dmamap_t 
map1,
TAILQ_INSERT_TAIL(>map_entries, entry, dmamap_link);
IOMMU_DMAMAP_UNLOCK(map);
} else {
-   iommu_domain_unload_entry(entry, true);
+   iommu_gas_free_entry(domain, entry);
}
for (i = 0; i < atop(length); i++)
vm_page_putfake(ma[i]);

git: 54291f7d6506 - main - swap_pager: Reduce the scope of the object lock in putpages

2022-07-18 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=54291f7d6506e6c6087433c5bbdb2224b6cef23b

commit 54291f7d6506e6c6087433c5bbdb2224b6cef23b
Author: Alan Cox 
AuthorDate: 2022-07-19 03:28:07 +
Commit: Alan Cox 
CommitDate: 2022-07-19 03:35:49 +

swap_pager: Reduce the scope of the object lock in putpages

We don't need to hold the object lock while allocating swap space, so
don't.

Reviewed by:dougm, kib, markj
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D35839
---
 sys/vm/swap_pager.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index c20360975c4b..67cc3bf017d2 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -1506,10 +1506,8 @@ swap_pager_putpages(vm_object_t object, vm_page_t *ma, 
int count,
}
 
/* Get a block of swap of size up to size n. */
-   VM_OBJECT_WLOCK(object);
blk = swp_pager_getswapspace();
if (blk == SWAPBLK_NONE) {
-   VM_OBJECT_WUNLOCK(object);
mtx_lock(_mtx);
if (++nsw_wcount_async == 1)
wakeup(_wcount_async);
@@ -1518,6 +1516,7 @@ swap_pager_putpages(vm_object_t object, vm_page_t *ma, 
int count,
rtvals[i + j] = VM_PAGER_FAIL;
continue;
}
+   VM_OBJECT_WLOCK(object);
for (j = 0; j < n; ++j) {
mreq = ma[i + j];
vm_page_aflag_clear(mreq, PGA_SWAP_FREE);

git: 54291f7d6506 - main - swap_pager: Reduce the scope of the object lock in putpages

2022-07-18 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=54291f7d6506e6c6087433c5bbdb2224b6cef23b

commit 54291f7d6506e6c6087433c5bbdb2224b6cef23b
Author: Alan Cox 
AuthorDate: 2022-07-19 03:28:07 +
Commit: Alan Cox 
CommitDate: 2022-07-19 03:35:49 +

swap_pager: Reduce the scope of the object lock in putpages

We don't need to hold the object lock while allocating swap space, so
don't.

Reviewed by:dougm, kib, markj
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D35839
---
 sys/vm/swap_pager.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index c20360975c4b..67cc3bf017d2 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -1506,10 +1506,8 @@ swap_pager_putpages(vm_object_t object, vm_page_t *ma, 
int count,
}
 
/* Get a block of swap of size up to size n. */
-   VM_OBJECT_WLOCK(object);
blk = swp_pager_getswapspace();
if (blk == SWAPBLK_NONE) {
-   VM_OBJECT_WUNLOCK(object);
mtx_lock(_mtx);
if (++nsw_wcount_async == 1)
wakeup(_wcount_async);
@@ -1518,6 +1516,7 @@ swap_pager_putpages(vm_object_t object, vm_page_t *ma, 
int count,
rtvals[i + j] = VM_PAGER_FAIL;
continue;
}
+   VM_OBJECT_WLOCK(object);
for (j = 0; j < n; ++j) {
mreq = ma[i + j];
vm_page_aflag_clear(mreq, PGA_SWAP_FREE);

git: 4eaaacc75535 - main - x86/iommu: Shrink the critical section in dmar_qi_task()

2022-07-18 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=4eaaacc75535befdb9894cca4e0d8da376328fa4

commit 4eaaacc75535befdb9894cca4e0d8da376328fa4
Author: Alan Cox 
AuthorDate: 2022-07-18 00:56:39 +
Commit: Alan Cox 
CommitDate: 2022-07-19 03:23:13 +

x86/iommu: Shrink the critical section in dmar_qi_task()

It is safe to test and clear the Invalidation Wait Descriptor
Complete flag before acquiring the DMAR lock in dmar_qi_task(),
rather than waiting until the lock is held.

Reviewed by:kib
MFC after:  2 weeks
---
 sys/x86/iommu/intel_qi.c | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/sys/x86/iommu/intel_qi.c b/sys/x86/iommu/intel_qi.c
index 894e3d537ac7..ca58715a227c 100644
--- a/sys/x86/iommu/intel_qi.c
+++ b/sys/x86/iommu/intel_qi.c
@@ -343,6 +343,16 @@ dmar_qi_task(void *arg, int pending __unused)
 
unit = arg;
 
+   /*
+* Request an interrupt on the completion of the next invalidation
+* wait descriptor with the IF field set.
+*/
+   ics = dmar_read4(unit, DMAR_ICS_REG);
+   if ((ics & DMAR_ICS_IWC) != 0) {
+   ics = DMAR_ICS_IWC;
+   dmar_write4(unit, DMAR_ICS_REG, ics);
+   }
+
DMAR_LOCK(unit);
for (;;) {
entry = TAILQ_FIRST(>tlb_flush_entries);
@@ -356,11 +366,6 @@ dmar_qi_task(void *arg, int pending __unused)
IOMMU_MAP_ENTRY_QI_NF) == 0);
DMAR_LOCK(unit);
}
-   ics = dmar_read4(unit, DMAR_ICS_REG);
-   if ((ics & DMAR_ICS_IWC) != 0) {
-   ics = DMAR_ICS_IWC;
-   dmar_write4(unit, DMAR_ICS_REG, ics);
-   }
if (unit->inv_seq_waiters > 0)
wakeup(>inv_seq_waiters);
DMAR_UNLOCK(unit);

git: 4eaaacc75535 - main - x86/iommu: Shrink the critical section in dmar_qi_task()

2022-07-18 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=4eaaacc75535befdb9894cca4e0d8da376328fa4

commit 4eaaacc75535befdb9894cca4e0d8da376328fa4
Author: Alan Cox 
AuthorDate: 2022-07-18 00:56:39 +
Commit: Alan Cox 
CommitDate: 2022-07-19 03:23:13 +

x86/iommu: Shrink the critical section in dmar_qi_task()

It is safe to test and clear the Invalidation Wait Descriptor
Complete flag before acquiring the DMAR lock in dmar_qi_task(),
rather than waiting until the lock is held.

Reviewed by:kib
MFC after:  2 weeks
---
 sys/x86/iommu/intel_qi.c | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/sys/x86/iommu/intel_qi.c b/sys/x86/iommu/intel_qi.c
index 894e3d537ac7..ca58715a227c 100644
--- a/sys/x86/iommu/intel_qi.c
+++ b/sys/x86/iommu/intel_qi.c
@@ -343,6 +343,16 @@ dmar_qi_task(void *arg, int pending __unused)
 
unit = arg;
 
+   /*
+* Request an interrupt on the completion of the next invalidation
+* wait descriptor with the IF field set.
+*/
+   ics = dmar_read4(unit, DMAR_ICS_REG);
+   if ((ics & DMAR_ICS_IWC) != 0) {
+   ics = DMAR_ICS_IWC;
+   dmar_write4(unit, DMAR_ICS_REG, ics);
+   }
+
DMAR_LOCK(unit);
for (;;) {
entry = TAILQ_FIRST(>tlb_flush_entries);
@@ -356,11 +366,6 @@ dmar_qi_task(void *arg, int pending __unused)
IOMMU_MAP_ENTRY_QI_NF) == 0);
DMAR_LOCK(unit);
}
-   ics = dmar_read4(unit, DMAR_ICS_REG);
-   if ((ics & DMAR_ICS_IWC) != 0) {
-   ics = DMAR_ICS_IWC;
-   dmar_write4(unit, DMAR_ICS_REG, ics);
-   }
if (unit->inv_seq_waiters > 0)
wakeup(>inv_seq_waiters);
DMAR_UNLOCK(unit);

git: da55f86c6146 - main - x86/iommu: Eliminate redundant wrappers

2022-07-16 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=da55f86c61462b119fd1306d12411989d6610650

commit da55f86c61462b119fd1306d12411989d6610650
Author: Alan Cox 
AuthorDate: 2022-07-16 04:25:11 +
Commit: Alan Cox 
CommitDate: 2022-07-16 23:05:37 +

x86/iommu: Eliminate redundant wrappers

Reviewed by:kib
MFC after:  2 weeks
Differential Revision:  https://reviews.freebsd.org/D35832
---
 sys/x86/iommu/intel_ctx.c  | 26 --
 sys/x86/iommu/intel_dmar.h |  3 ---
 2 files changed, 4 insertions(+), 25 deletions(-)

diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c
index 79e2a15d80c7..bfc607674b57 100644
--- a/sys/x86/iommu/intel_ctx.c
+++ b/sys/x86/iommu/intel_ctx.c
@@ -868,7 +868,7 @@ dmar_domain_free_entry(struct iommu_map_entry *entry, bool 
free)
 }
 
 void
-dmar_domain_unload_entry(struct iommu_map_entry *entry, bool free)
+iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free)
 {
struct dmar_domain *domain;
struct dmar_unit *unit;
@@ -902,15 +902,15 @@ dmar_domain_unload_emit_wait(struct dmar_domain *domain,
 }
 
 void
-dmar_domain_unload(struct dmar_domain *domain,
+iommu_domain_unload(struct iommu_domain *iodom,
 struct iommu_map_entries_tailq *entries, bool cansleep)
 {
+   struct dmar_domain *domain;
struct dmar_unit *unit;
-   struct iommu_domain *iodom;
struct iommu_map_entry *entry, *entry1;
int error __diagused;
 
-   iodom = DOM2IODOM(domain);
+   domain = IODOM2DOM(iodom);
unit = DOM2DMAR(domain);
 
TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) {
@@ -975,21 +975,3 @@ iommu_free_ctx(struct iommu_ctx *context)
 
dmar_free_ctx(ctx);
 }
-
-void
-iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free)
-{
-
-   dmar_domain_unload_entry(entry, free);
-}
-
-void
-iommu_domain_unload(struct iommu_domain *iodom,
-struct iommu_map_entries_tailq *entries, bool cansleep)
-{
-   struct dmar_domain *domain;
-
-   domain = IODOM2DOM(iodom);
-
-   dmar_domain_unload(domain, entries, cansleep);
-}
diff --git a/sys/x86/iommu/intel_dmar.h b/sys/x86/iommu/intel_dmar.h
index b34505a4e5d0..05793ed9f238 100644
--- a/sys/x86/iommu/intel_dmar.h
+++ b/sys/x86/iommu/intel_dmar.h
@@ -277,9 +277,6 @@ int dmar_move_ctx_to_domain(struct dmar_domain *domain, 
struct dmar_ctx *ctx);
 void dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx);
 void dmar_free_ctx(struct dmar_ctx *ctx);
 struct dmar_ctx *dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid);
-void dmar_domain_unload_entry(struct iommu_map_entry *entry, bool free);
-void dmar_domain_unload(struct dmar_domain *domain,
-struct iommu_map_entries_tailq *entries, bool cansleep);
 void dmar_domain_free_entry(struct iommu_map_entry *entry, bool free);
 
 void dmar_dev_parse_rmrr(struct dmar_domain *domain, int dev_domain,

git: da55f86c6146 - main - x86/iommu: Eliminate redundant wrappers

2022-07-16 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=da55f86c61462b119fd1306d12411989d6610650

commit da55f86c61462b119fd1306d12411989d6610650
Author: Alan Cox 
AuthorDate: 2022-07-16 04:25:11 +
Commit: Alan Cox 
CommitDate: 2022-07-16 23:05:37 +

x86/iommu: Eliminate redundant wrappers

Reviewed by:kib
MFC after:  2 weeks
Differential Revision:  https://reviews.freebsd.org/D35832
---
 sys/x86/iommu/intel_ctx.c  | 26 --
 sys/x86/iommu/intel_dmar.h |  3 ---
 2 files changed, 4 insertions(+), 25 deletions(-)

diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c
index 79e2a15d80c7..bfc607674b57 100644
--- a/sys/x86/iommu/intel_ctx.c
+++ b/sys/x86/iommu/intel_ctx.c
@@ -868,7 +868,7 @@ dmar_domain_free_entry(struct iommu_map_entry *entry, bool 
free)
 }
 
 void
-dmar_domain_unload_entry(struct iommu_map_entry *entry, bool free)
+iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free)
 {
struct dmar_domain *domain;
struct dmar_unit *unit;
@@ -902,15 +902,15 @@ dmar_domain_unload_emit_wait(struct dmar_domain *domain,
 }
 
 void
-dmar_domain_unload(struct dmar_domain *domain,
+iommu_domain_unload(struct iommu_domain *iodom,
 struct iommu_map_entries_tailq *entries, bool cansleep)
 {
+   struct dmar_domain *domain;
struct dmar_unit *unit;
-   struct iommu_domain *iodom;
struct iommu_map_entry *entry, *entry1;
int error __diagused;
 
-   iodom = DOM2IODOM(domain);
+   domain = IODOM2DOM(iodom);
unit = DOM2DMAR(domain);
 
TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) {
@@ -975,21 +975,3 @@ iommu_free_ctx(struct iommu_ctx *context)
 
dmar_free_ctx(ctx);
 }
-
-void
-iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free)
-{
-
-   dmar_domain_unload_entry(entry, free);
-}
-
-void
-iommu_domain_unload(struct iommu_domain *iodom,
-struct iommu_map_entries_tailq *entries, bool cansleep)
-{
-   struct dmar_domain *domain;
-
-   domain = IODOM2DOM(iodom);
-
-   dmar_domain_unload(domain, entries, cansleep);
-}
diff --git a/sys/x86/iommu/intel_dmar.h b/sys/x86/iommu/intel_dmar.h
index b34505a4e5d0..05793ed9f238 100644
--- a/sys/x86/iommu/intel_dmar.h
+++ b/sys/x86/iommu/intel_dmar.h
@@ -277,9 +277,6 @@ int dmar_move_ctx_to_domain(struct dmar_domain *domain, 
struct dmar_ctx *ctx);
 void dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx);
 void dmar_free_ctx(struct dmar_ctx *ctx);
 struct dmar_ctx *dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid);
-void dmar_domain_unload_entry(struct iommu_map_entry *entry, bool free);
-void dmar_domain_unload(struct dmar_domain *domain,
-struct iommu_map_entries_tailq *entries, bool cansleep);
 void dmar_domain_free_entry(struct iommu_map_entry *entry, bool free);
 
 void dmar_dev_parse_rmrr(struct dmar_domain *domain, int dev_domain,

git: db0110a536bf - main - iommu: Shrink the iommu map entry structure

2022-07-15 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=db0110a536bf70c1ff55f3b3f46a0b5a9af46058

commit db0110a536bf70c1ff55f3b3f46a0b5a9af46058
Author: Alan Cox 
AuthorDate: 2022-07-11 03:52:52 +
Commit: Alan Cox 
CommitDate: 2022-07-16 03:24:52 +

iommu: Shrink the iommu map entry structure

Eliminate the unroll_entry field from struct iommu_map_entry, shrinking
the struct by 16 bytes on 64-bit architectures.

Reviewed by:kib
MFC after:  2 weeks
Differential Revision:  https://reviews.freebsd.org/D35769
---
 sys/dev/iommu/busdma_iommu.c | 33 ++---
 sys/dev/iommu/iommu.h|  2 --
 sys/x86/iommu/intel_ctx.c|  4 ++--
 sys/x86/iommu/intel_drv.c|  2 +-
 4 files changed, 13 insertions(+), 28 deletions(-)

diff --git a/sys/dev/iommu/busdma_iommu.c b/sys/dev/iommu/busdma_iommu.c
index 69cf9dd12e7e..10e7476b35eb 100644
--- a/sys/dev/iommu/busdma_iommu.c
+++ b/sys/dev/iommu/busdma_iommu.c
@@ -558,7 +558,7 @@ static int
 iommu_bus_dmamap_load_something1(struct bus_dma_tag_iommu *tag,
 struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen,
 int flags, bus_dma_segment_t *segs, int *segp,
-struct iommu_map_entries_tailq *unroll_list)
+struct iommu_map_entries_tailq *entries)
 {
struct iommu_ctx *ctx;
struct iommu_domain *domain;
@@ -626,10 +626,7 @@ iommu_bus_dmamap_load_something1(struct bus_dma_tag_iommu 
*tag,
 
KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0,
("entry %p missing IOMMU_MAP_ENTRY_MAP", entry));
-   IOMMU_DMAMAP_LOCK(map);
-   TAILQ_INSERT_TAIL(>map_entries, entry, dmamap_link);
-   IOMMU_DMAMAP_UNLOCK(map);
-   TAILQ_INSERT_TAIL(unroll_list, entry, unroll_link);
+   TAILQ_INSERT_TAIL(entries, entry, dmamap_link);
 
segs[seg].ds_addr = entry->start + offset;
segs[seg].ds_len = buflen1;
@@ -651,36 +648,26 @@ iommu_bus_dmamap_load_something(struct bus_dma_tag_iommu 
*tag,
 {
struct iommu_ctx *ctx;
struct iommu_domain *domain;
-   struct iommu_map_entry *entry;
-   struct iommu_map_entries_tailq entries, unroll_list;
+   struct iommu_map_entries_tailq entries;
int error;
 
ctx = tag->ctx;
domain = ctx->domain;
atomic_add_long(>loads, 1);
 
-   TAILQ_INIT(_list);
+   TAILQ_INIT();
error = iommu_bus_dmamap_load_something1(tag, map, ma, offset,
-   buflen, flags, segs, segp, _list);
-   if (error != 0 && !TAILQ_EMPTY(_list)) {
+   buflen, flags, segs, segp, );
+   if (error == 0) {
+   IOMMU_DMAMAP_LOCK(map);
+   TAILQ_CONCAT(>map_entries, , dmamap_link);
+   IOMMU_DMAMAP_UNLOCK(map);
+   } else if (!TAILQ_EMPTY()) {
/*
 * The busdma interface does not allow us to report
 * partial buffer load, so unfortunately we have to
 * revert all work done.
 */
-   TAILQ_INIT();
-   IOMMU_DMAMAP_LOCK(map);
-   TAILQ_FOREACH(entry, _list, unroll_link) {
-   /*
-* No entries other than what we have created
-* during the failed run might have been
-* inserted there in between, since we own ctx
-* pglock.
-*/
-   TAILQ_REMOVE(>map_entries, entry, dmamap_link);
-   TAILQ_INSERT_TAIL(, entry, dmamap_link);
-   }
-   IOMMU_DMAMAP_UNLOCK(map);
IOMMU_DOMAIN_LOCK(domain);
TAILQ_CONCAT(>unload_entries, , dmamap_link);
IOMMU_DOMAIN_UNLOCK(domain);
diff --git a/sys/dev/iommu/iommu.h b/sys/dev/iommu/iommu.h
index 3800213a1d64..62b5659b6e83 100644
--- a/sys/dev/iommu/iommu.h
+++ b/sys/dev/iommu/iommu.h
@@ -58,8 +58,6 @@ struct iommu_map_entry {
u_int flags;
TAILQ_ENTRY(iommu_map_entry) dmamap_link; /* Link for dmamap entries */
RB_ENTRY(iommu_map_entry) rb_entry;  /* Links for domain entries */
-   TAILQ_ENTRY(iommu_map_entry) unroll_link; /* Link for unroll after
-   dmamap_load failure */
struct iommu_domain *domain;
struct iommu_qi_genseq gseq;
 };
diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c
index 815dc6146b00..79e2a15d80c7 100644
--- a/sys/x86/iommu/intel_ctx.c
+++ b/sys/x86/iommu/intel_ctx.c
@@ -245,7 +245,7 @@ domain_init_rmrr(struct dmar_domain *domain, device_t dev, 
int bus,
TAILQ_INIT(_entries);
dmar_dev_parse_rmrr(domain, dev_domain, dev_busno, dev_path,
dev_path_len, _entries);
-   TAILQ_FOREACH_SAFE(entry, _entries,

git: db0110a536bf - main - iommu: Shrink the iommu map entry structure

2022-07-15 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=db0110a536bf70c1ff55f3b3f46a0b5a9af46058

commit db0110a536bf70c1ff55f3b3f46a0b5a9af46058
Author: Alan Cox 
AuthorDate: 2022-07-11 03:52:52 +
Commit: Alan Cox 
CommitDate: 2022-07-16 03:24:52 +

iommu: Shrink the iommu map entry structure

Eliminate the unroll_entry field from struct iommu_map_entry, shrinking
the struct by 16 bytes on 64-bit architectures.

Reviewed by:kib
MFC after:  2 weeks
Differential Revision:  https://reviews.freebsd.org/D35769
---
 sys/dev/iommu/busdma_iommu.c | 33 ++---
 sys/dev/iommu/iommu.h|  2 --
 sys/x86/iommu/intel_ctx.c|  4 ++--
 sys/x86/iommu/intel_drv.c|  2 +-
 4 files changed, 13 insertions(+), 28 deletions(-)

diff --git a/sys/dev/iommu/busdma_iommu.c b/sys/dev/iommu/busdma_iommu.c
index 69cf9dd12e7e..10e7476b35eb 100644
--- a/sys/dev/iommu/busdma_iommu.c
+++ b/sys/dev/iommu/busdma_iommu.c
@@ -558,7 +558,7 @@ static int
 iommu_bus_dmamap_load_something1(struct bus_dma_tag_iommu *tag,
 struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen,
 int flags, bus_dma_segment_t *segs, int *segp,
-struct iommu_map_entries_tailq *unroll_list)
+struct iommu_map_entries_tailq *entries)
 {
struct iommu_ctx *ctx;
struct iommu_domain *domain;
@@ -626,10 +626,7 @@ iommu_bus_dmamap_load_something1(struct bus_dma_tag_iommu 
*tag,
 
KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0,
("entry %p missing IOMMU_MAP_ENTRY_MAP", entry));
-   IOMMU_DMAMAP_LOCK(map);
-   TAILQ_INSERT_TAIL(>map_entries, entry, dmamap_link);
-   IOMMU_DMAMAP_UNLOCK(map);
-   TAILQ_INSERT_TAIL(unroll_list, entry, unroll_link);
+   TAILQ_INSERT_TAIL(entries, entry, dmamap_link);
 
segs[seg].ds_addr = entry->start + offset;
segs[seg].ds_len = buflen1;
@@ -651,36 +648,26 @@ iommu_bus_dmamap_load_something(struct bus_dma_tag_iommu 
*tag,
 {
struct iommu_ctx *ctx;
struct iommu_domain *domain;
-   struct iommu_map_entry *entry;
-   struct iommu_map_entries_tailq entries, unroll_list;
+   struct iommu_map_entries_tailq entries;
int error;
 
ctx = tag->ctx;
domain = ctx->domain;
atomic_add_long(>loads, 1);
 
-   TAILQ_INIT(_list);
+   TAILQ_INIT();
error = iommu_bus_dmamap_load_something1(tag, map, ma, offset,
-   buflen, flags, segs, segp, _list);
-   if (error != 0 && !TAILQ_EMPTY(_list)) {
+   buflen, flags, segs, segp, );
+   if (error == 0) {
+   IOMMU_DMAMAP_LOCK(map);
+   TAILQ_CONCAT(>map_entries, , dmamap_link);
+   IOMMU_DMAMAP_UNLOCK(map);
+   } else if (!TAILQ_EMPTY()) {
/*
 * The busdma interface does not allow us to report
 * partial buffer load, so unfortunately we have to
 * revert all work done.
 */
-   TAILQ_INIT();
-   IOMMU_DMAMAP_LOCK(map);
-   TAILQ_FOREACH(entry, _list, unroll_link) {
-   /*
-* No entries other than what we have created
-* during the failed run might have been
-* inserted there in between, since we own ctx
-* pglock.
-*/
-   TAILQ_REMOVE(>map_entries, entry, dmamap_link);
-   TAILQ_INSERT_TAIL(, entry, dmamap_link);
-   }
-   IOMMU_DMAMAP_UNLOCK(map);
IOMMU_DOMAIN_LOCK(domain);
TAILQ_CONCAT(>unload_entries, , dmamap_link);
IOMMU_DOMAIN_UNLOCK(domain);
diff --git a/sys/dev/iommu/iommu.h b/sys/dev/iommu/iommu.h
index 3800213a1d64..62b5659b6e83 100644
--- a/sys/dev/iommu/iommu.h
+++ b/sys/dev/iommu/iommu.h
@@ -58,8 +58,6 @@ struct iommu_map_entry {
u_int flags;
TAILQ_ENTRY(iommu_map_entry) dmamap_link; /* Link for dmamap entries */
RB_ENTRY(iommu_map_entry) rb_entry;  /* Links for domain entries */
-   TAILQ_ENTRY(iommu_map_entry) unroll_link; /* Link for unroll after
-   dmamap_load failure */
struct iommu_domain *domain;
struct iommu_qi_genseq gseq;
 };
diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c
index 815dc6146b00..79e2a15d80c7 100644
--- a/sys/x86/iommu/intel_ctx.c
+++ b/sys/x86/iommu/intel_ctx.c
@@ -245,7 +245,7 @@ domain_init_rmrr(struct dmar_domain *domain, device_t dev, 
int bus,
TAILQ_INIT(_entries);
dmar_dev_parse_rmrr(domain, dev_domain, dev_busno, dev_path,
dev_path_len, _entries);
-   TAILQ_FOREACH_SAFE(entry, _entries,

Re: [Sdcc-user] Possible changes to inline assembler syntax

2022-07-14 Thread Alan Cox

On Thu, 14 Jul 2022 12:58:07 +0200
Philipp Klaus Krause  wrote:

> SDCC currently supports two forms of inline assembler:
> 
> 1:
> __asm
> asm code here
> __endasm;
> 
> and
> 
> 2:
> __asm("asm code here");
> 
> Form 1 requires some ugly hacks to avoid conflicts in the preprocessor.
> I wonder if we could drop those, and maybe even change the keyword in 
> form 2 to __asm__.
> 
> Does anyone rely on form 1 and would not want support for it dropped? 
> Any opinions on the possible rename of form 2?

Fuzix relies on form 1 but at the moment I'm stuck with the older SDCC
anyway due to all the register and other changes.



___
Sdcc-user mailing list
Sdcc-user@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/sdcc-user

Re: [Sdcc-user] Spurious Link Option

2022-07-14 Thread Alan Cox

On Wed, 13 Jul 2022 14:35:31 +0100
William Brendling  wrote:

> I am using the following SDCC version:
> 
> SDCC : 
> mcs51/z80/z180/r2k/r2ka/r3ka/sm83/tlcs90/ez80_z80/z80n/ds390/pic16/pic14/TININative/ds400/hc08/s08/stm8/pdk13/pdk14/pdk15/mos6502
> 4.2.0 #13081 (MINGW64)
> published under GNU General Public License (GPL)
> 
> I am compiling with the line:
> 
> sdcc -mz80 --code-loc 0x8000 main.c
> 
> The resulting "main.lk" file is:
> 
> -mjwx
> -i main.ihx
> -b _CODE = 0x8000
> -b _DATA = 0x8000
> -k C:\Program Files\SDCC\bin\..\lib\z80
> -l z80
> C:\Program Files\SDCC\bin\..\lib\z80\crt0.rel
> main.rel
> 
> -e
> 
> This contains the spurious line "-b _DATA = 0x8000". As a result the
> data overlaps the code.
> 
> Is there any way of preventing SDCC from emitting a "-b DATA"
> directive so that the linker just packs the DATA section after the
> CODE section in the order specified by the "crt0.s" file?

Dont use sdcc to do the link but write your own link file. Note btw that
SDCC doesn't support anything but ROM based content unless you fiddle
with it - it's wedded to the idea that initialized variables are
copied from ROM somewhere so if you are running from RAM you'll also need
to massage the binary and stuff.

The Z88DK fork of SDCC is often more useful for retrcomputing and RAM
based setups.

Alan


___
Sdcc-user mailing list
Sdcc-user@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/sdcc-user

git: 7b39a9bc1df3 - main - iommu_gas: Fix a recent regression with IOMMU_MF_CANSPLIT

2022-06-26 Thread Alan Cox

The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=7b39a9bc1df37502e8186593f3427b7ff0e4cc71

commit 7b39a9bc1df37502e8186593f3427b7ff0e4cc71
Author: Alan Cox 
AuthorDate: 2022-06-26 16:48:12 +
Commit: Alan Cox 
CommitDate: 2022-06-26 21:31:54 +

iommu_gas: Fix a recent regression with IOMMU_MF_CANSPLIT

As of 19bb5a7244ff, the IOMMU_MF_CANSPLIT case in iommu_gas_match_one()
must take into account the specified offset.  Otherwise, the recently
changed end calculation in iommu_gas_match_insert() could produce an
end address that crosses the specified boundary by one page.

Reviewed by:dougm
MFC with:   19bb5a7244ff
---
 sys/dev/iommu/iommu_gas.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sys/dev/iommu/iommu_gas.c b/sys/dev/iommu/iommu_gas.c
index 2647c2ce6612..bb6cde2721a6 100644
--- a/sys/dev/iommu/iommu_gas.c
+++ b/sys/dev/iommu/iommu_gas.c
@@ -350,7 +350,7 @@ iommu_gas_match_one(struct iommu_gas_match_args *a, 
iommu_gaddr_t beg,
 * the next entry, then we do not have gap.  Ignore for now.
 */
if ((a->gas_flags & IOMMU_MF_CANSPLIT) != 0) {
-   a->size = bs - a->entry->start;
+   a->size = bs - a->entry->start - a->offset;
return (true);
}

1 2 3 4 5 6 7 8 9 10 >

1 - 100 of 21258 matches

Mail list logo