From: Dave Airlie <[email protected]> I want to increase the counters here and start tracking LPTs as well as there are certain situations where userspace with mixed page sizes can cause ref/unrefs to live longer so need better reference counting.
This should be entirely non-functional. Signed-off-by: Dave Airlie <[email protected]> --- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c | 41 ++++++++++--------- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h | 14 +++++-- 2 files changed, 31 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c index f95c58b67633..efc334f6104c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c @@ -53,7 +53,7 @@ nvkm_vmm_pt_new(const struct nvkm_vmm_desc *desc, bool sparse, } } - if (!(pgt = kzalloc(sizeof(*pgt) + lpte, GFP_KERNEL))) + if (!(pgt = kzalloc(sizeof(*pgt) + (sizeof(pgt->pte[0]) * lpte), GFP_KERNEL))) return NULL; pgt->page = page ? page->shift : 0; pgt->sparse = sparse; @@ -208,7 +208,7 @@ nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt, */ for (lpti = ptei >> sptb; ptes; spti = 0, lpti++) { const u32 pten = min(sptn - spti, ptes); - pgt->pte[lpti] -= pten; + pgt->pte[lpti].s.sptes -= pten; ptes -= pten; } @@ -218,9 +218,9 @@ nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt, for (ptei = pteb = ptei >> sptb; ptei < lpti; pteb = ptei) { /* Skip over any LPTEs that still have valid SPTEs. */ - if (pgt->pte[pteb] & NVKM_VMM_PTE_SPTES) { + if (pgt->pte[pteb].s.sptes) { for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) { - if (!(pgt->pte[ptei] & NVKM_VMM_PTE_SPTES)) + if (!(pgt->pte[ptei].s.sptes)) break; } continue; @@ -232,14 +232,14 @@ nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt, * * Determine how many LPTEs need to transition state. */ - pgt->pte[ptei] &= ~NVKM_VMM_PTE_VALID; + pgt->pte[ptei].s.spte_valid = false; for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) { - if (pgt->pte[ptei] & NVKM_VMM_PTE_SPTES) + if (pgt->pte[ptei].s.sptes) break; - pgt->pte[ptei] &= ~NVKM_VMM_PTE_VALID; + pgt->pte[ptei].s.spte_valid = false; } - if (pgt->pte[pteb] & NVKM_VMM_PTE_SPARSE) { + if (pgt->pte[pteb].s.sparse) { TRA(it, "LPTE %05x: U -> S %d PTEs", pteb, ptes); pair->func->sparse(vmm, pgt->pt[0], pteb, ptes); } else @@ -307,7 +307,7 @@ nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt, */ for (lpti = ptei >> sptb; ptes; spti = 0, lpti++) { const u32 pten = min(sptn - spti, ptes); - pgt->pte[lpti] += pten; + pgt->pte[lpti].s.sptes += pten; ptes -= pten; } @@ -317,9 +317,9 @@ nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt, for (ptei = pteb = ptei >> sptb; ptei < lpti; pteb = ptei) { /* Skip over any LPTEs that already have valid SPTEs. */ - if (pgt->pte[pteb] & NVKM_VMM_PTE_VALID) { + if (pgt->pte[pteb].s.spte_valid) { for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) { - if (!(pgt->pte[ptei] & NVKM_VMM_PTE_VALID)) + if (!pgt->pte[ptei].s.spte_valid) break; } continue; @@ -331,14 +331,14 @@ nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt, * * Determine how many LPTEs need to transition state. */ - pgt->pte[ptei] |= NVKM_VMM_PTE_VALID; + pgt->pte[ptei].s.spte_valid = true; for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) { - if (pgt->pte[ptei] & NVKM_VMM_PTE_VALID) + if (pgt->pte[ptei].s.spte_valid) break; - pgt->pte[ptei] |= NVKM_VMM_PTE_VALID; + pgt->pte[ptei].s.spte_valid = true; } - if (pgt->pte[pteb] & NVKM_VMM_PTE_SPARSE) { + if (pgt->pte[pteb].s.sparse) { const u32 spti = pteb * sptn; const u32 sptc = ptes * sptn; /* The entire LPTE is marked as sparse, we need @@ -386,7 +386,8 @@ nvkm_vmm_sparse_ptes(const struct nvkm_vmm_desc *desc, pgt->pde[ptei++] = NVKM_VMM_PDE_SPARSE; } else if (desc->type == LPT) { - memset(&pgt->pte[ptei], NVKM_VMM_PTE_SPARSE, ptes); + union nvkm_pte_tracker sparse = { .s.sparse = 1 }; + memset(&pgt->pte[ptei].u, sparse.u, ptes); } } @@ -398,7 +399,7 @@ nvkm_vmm_sparse_unref_ptes(struct nvkm_vmm_iter *it, bool pfn, u32 ptei, u32 pte memset(&pt->pde[ptei], 0x00, sizeof(pt->pde[0]) * ptes); else if (it->desc->type == LPT) - memset(&pt->pte[ptei], 0x00, sizeof(pt->pte[0]) * ptes); + memset(&pt->pte[ptei].u, 0x00, sizeof(pt->pte[0]) * ptes); return nvkm_vmm_unref_ptes(it, pfn, ptei, ptes); } @@ -445,9 +446,9 @@ nvkm_vmm_ref_hwpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei) * the SPTEs on some GPUs. */ for (ptei = pteb = 0; ptei < pten; pteb = ptei) { - bool spte = pgt->pte[ptei] & NVKM_VMM_PTE_SPTES; + bool spte = !!pgt->pte[ptei].s.sptes; for (ptes = 1, ptei++; ptei < pten; ptes++, ptei++) { - bool next = pgt->pte[ptei] & NVKM_VMM_PTE_SPTES; + bool next = !!pgt->pte[ptei].s.sptes; if (spte != next) break; } @@ -461,7 +462,7 @@ nvkm_vmm_ref_hwpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei) } else { desc->func->unmap(vmm, pt, pteb, ptes); while (ptes--) - pgt->pte[pteb++] |= NVKM_VMM_PTE_VALID; + pgt->pte[pteb++].s.spte_valid = true; } } } else { diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h index 4586a425dbe4..a6312a0e6b84 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h @@ -4,6 +4,15 @@ #include <core/memory.h> enum nvkm_memory_target; +union nvkm_pte_tracker { + u8 u; + struct { + u8 sparse:1; + u8 spte_valid:1; + u8 sptes:6; + } s; +}; + struct nvkm_vmm_pt { /* Some GPUs have a mapping level with a dual page tables to * support large and small pages in the same address-range. @@ -44,10 +53,7 @@ struct nvkm_vmm_pt { * * This information is used to manage LPTE state transitions. */ -#define NVKM_VMM_PTE_SPARSE 0x80 -#define NVKM_VMM_PTE_VALID 0x40 -#define NVKM_VMM_PTE_SPTES 0x3f - u8 pte[]; + union nvkm_pte_tracker pte[]; }; typedef void (*nvkm_vmm_pxe_func)(struct nvkm_vmm *, -- 2.52.0
