Re: [Intel-gfx] [PATCH v6 05/32] drm/i915: Track GEN6 page table usage
Mika Kuoppala writes: > Michel Thierry writes: > >> From: Ben Widawsky >> >> Instead of implementing the full tracking + dynamic allocation, this >> patch does a bit less than half of the work, by tracking and warning on >> unexpected conditions. The tracking itself follows which PTEs within a >> page table are currently being used for objects. The next patch will >> modify this to actually allocate the page tables only when necessary. >> >> With the current patch there isn't much in the way of making a gen >> agnostic range allocation function. However, in the next patch we'll add >> more specificity which makes having separate functions a bit easier to >> manage. >> >> One important change introduced here is that DMA mappings are >> created/destroyed at the same page directories/tables are >> allocated/deallocated. >> >> Notice that aliasing PPGTT is not managed here. The patch which actually >> begins dynamic allocation/teardown explains the reasoning for this. >> >> v2: s/pdp.page_directory/pdp.page_directorys >> Make a scratch page allocation helper >> >> v3: Rebase and expand commit message. >> >> v4: Allocate required pagetables only when it is needed, _bind_to_vm >> instead of bind_vma (Daniel). >> >> v5: Rebased to remove the unnecessary noise in the diff, also: >> - PDE mask is GEN agnostic, renamed GEN6_PDE_MASK to I915_PDE_MASK. >> - Removed unnecessary checks in gen6_alloc_va_range. >> - Changed map/unmap_px_single macros to use dma functions directly and >>be part of a static inline function instead. >> - Moved drm_device plumbing through page tables operation to its own >>patch. >> - Moved allocate/teardown_va_range calls until they are fully >>implemented (in subsequent patch). >> - Merged pt and scratch_pt unmap_and_free path. >> - Moved scratch page allocator helper to the patch that will use it. >> >> v6: Reduce complexity by not tearing down pagetables dynamically, the >> same can be achieved while freeing empty vms. (Daniel) >> >> v7: s/i915_dma_map_px_single/i915_dma_map_single >> s/gen6_write_pdes/gen6_write_pde >> Prevent a NULL case when only GGTT is available. (Mika) >> >> v8: Rebased after s/page_tables/page_table/. >> >> Cc: Daniel Vetter >> Cc: Mika Kuoppala >> Signed-off-by: Ben Widawsky >> Signed-off-by: Michel Thierry (v3+) >> --- >> drivers/gpu/drm/i915/i915_gem_gtt.c | 198 >> +--- >> drivers/gpu/drm/i915/i915_gem_gtt.h | 75 ++ >> 2 files changed, 211 insertions(+), 62 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c >> b/drivers/gpu/drm/i915/i915_gem_gtt.c >> index e05488e..f9354c7 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c >> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c >> @@ -278,29 +278,88 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, >> return pte; >> } >> >> -static void unmap_and_free_pt(struct i915_page_table_entry *pt, struct >> drm_device *dev) >> +#define i915_dma_unmap_single(px, dev) \ >> +__i915_dma_unmap_single((px)->daddr, dev) >> + >> +static inline void __i915_dma_unmap_single(dma_addr_t daddr, >> +struct drm_device *dev) >> +{ >> +struct device *device = &dev->pdev->dev; >> + >> +dma_unmap_page(device, daddr, 4096, PCI_DMA_BIDIRECTIONAL); >> +} >> + >> +/** >> + * i915_dma_map_single() - Create a dma mapping for a page table/dir/etc. >> + * @px: Page table/dir/etc to get a DMA map for >> + * @dev:drm device >> + * >> + * Page table allocations are unified across all gens. They always require a >> + * single 4k allocation, as well as a DMA mapping. If we keep the structs >> + * symmetric here, the simple macro covers us for every page table type. >> + * >> + * Return: 0 if success. >> + */ >> +#define i915_dma_map_single(px, dev) \ >> +i915_dma_map_page_single((px)->page, (dev), &(px)->daddr) >> + >> +static inline int i915_dma_map_page_single(struct page *page, >> + struct drm_device *dev, >> + dma_addr_t *daddr) >> +{ >> +struct device *device = &dev->pdev->dev; >> + >> +*daddr = dma_map_page(device, page, 0, 4096, PCI_DMA_BIDIRECTIONAL); >> +return dma_mapping_error(device, *daddr); >> +} >> + >> +static void unmap_and_free_pt(struct i915_page_table_entry *pt, >> + struct drm_device *dev) >> { >> if (WARN_ON(!pt->page)) >> return; >> + >> +i915_dma_unmap_single(pt, dev); >> __free_page(pt->page); >> +kfree(pt->used_ptes); >> kfree(pt); >> } >> >> static struct i915_page_table_entry *alloc_pt_single(struct drm_device *dev) >> { >> struct i915_page_table_entry *pt; >> +const size_t count = INTEL_INFO(dev)->gen >= 8 ? >> +GEN8_PTES_PER_PAGE : I915_PPGTT_PT_ENTRIES; >> +int ret = -ENOMEM; >> >> pt = kzalloc(sizeof(*pt), GFP_KERNEL); >> if (!pt) >> return ERR_PTR(-ENOMEM); >>
Re: [Intel-gfx] [PATCH v6 05/32] drm/i915: Track GEN6 page table usage
Michel Thierry writes: > From: Ben Widawsky > > Instead of implementing the full tracking + dynamic allocation, this > patch does a bit less than half of the work, by tracking and warning on > unexpected conditions. The tracking itself follows which PTEs within a > page table are currently being used for objects. The next patch will > modify this to actually allocate the page tables only when necessary. > > With the current patch there isn't much in the way of making a gen > agnostic range allocation function. However, in the next patch we'll add > more specificity which makes having separate functions a bit easier to > manage. > > One important change introduced here is that DMA mappings are > created/destroyed at the same page directories/tables are > allocated/deallocated. > > Notice that aliasing PPGTT is not managed here. The patch which actually > begins dynamic allocation/teardown explains the reasoning for this. > > v2: s/pdp.page_directory/pdp.page_directorys > Make a scratch page allocation helper > > v3: Rebase and expand commit message. > > v4: Allocate required pagetables only when it is needed, _bind_to_vm > instead of bind_vma (Daniel). > > v5: Rebased to remove the unnecessary noise in the diff, also: > - PDE mask is GEN agnostic, renamed GEN6_PDE_MASK to I915_PDE_MASK. > - Removed unnecessary checks in gen6_alloc_va_range. > - Changed map/unmap_px_single macros to use dma functions directly and >be part of a static inline function instead. > - Moved drm_device plumbing through page tables operation to its own >patch. > - Moved allocate/teardown_va_range calls until they are fully >implemented (in subsequent patch). > - Merged pt and scratch_pt unmap_and_free path. > - Moved scratch page allocator helper to the patch that will use it. > > v6: Reduce complexity by not tearing down pagetables dynamically, the > same can be achieved while freeing empty vms. (Daniel) > > v7: s/i915_dma_map_px_single/i915_dma_map_single > s/gen6_write_pdes/gen6_write_pde > Prevent a NULL case when only GGTT is available. (Mika) > > v8: Rebased after s/page_tables/page_table/. > > Cc: Daniel Vetter > Cc: Mika Kuoppala > Signed-off-by: Ben Widawsky > Signed-off-by: Michel Thierry (v3+) > --- > drivers/gpu/drm/i915/i915_gem_gtt.c | 198 > +--- > drivers/gpu/drm/i915/i915_gem_gtt.h | 75 ++ > 2 files changed, 211 insertions(+), 62 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c > b/drivers/gpu/drm/i915/i915_gem_gtt.c > index e05488e..f9354c7 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -278,29 +278,88 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, > return pte; > } > > -static void unmap_and_free_pt(struct i915_page_table_entry *pt, struct > drm_device *dev) > +#define i915_dma_unmap_single(px, dev) \ > + __i915_dma_unmap_single((px)->daddr, dev) > + > +static inline void __i915_dma_unmap_single(dma_addr_t daddr, > + struct drm_device *dev) > +{ > + struct device *device = &dev->pdev->dev; > + > + dma_unmap_page(device, daddr, 4096, PCI_DMA_BIDIRECTIONAL); > +} > + > +/** > + * i915_dma_map_single() - Create a dma mapping for a page table/dir/etc. > + * @px: Page table/dir/etc to get a DMA map for > + * @dev: drm device > + * > + * Page table allocations are unified across all gens. They always require a > + * single 4k allocation, as well as a DMA mapping. If we keep the structs > + * symmetric here, the simple macro covers us for every page table type. > + * > + * Return: 0 if success. > + */ > +#define i915_dma_map_single(px, dev) \ > + i915_dma_map_page_single((px)->page, (dev), &(px)->daddr) > + > +static inline int i915_dma_map_page_single(struct page *page, > +struct drm_device *dev, > +dma_addr_t *daddr) > +{ > + struct device *device = &dev->pdev->dev; > + > + *daddr = dma_map_page(device, page, 0, 4096, PCI_DMA_BIDIRECTIONAL); > + return dma_mapping_error(device, *daddr); > +} > + > +static void unmap_and_free_pt(struct i915_page_table_entry *pt, > +struct drm_device *dev) > { > if (WARN_ON(!pt->page)) > return; > + > + i915_dma_unmap_single(pt, dev); > __free_page(pt->page); > + kfree(pt->used_ptes); > kfree(pt); > } > > static struct i915_page_table_entry *alloc_pt_single(struct drm_device *dev) > { > struct i915_page_table_entry *pt; > + const size_t count = INTEL_INFO(dev)->gen >= 8 ? > + GEN8_PTES_PER_PAGE : I915_PPGTT_PT_ENTRIES; > + int ret = -ENOMEM; > > pt = kzalloc(sizeof(*pt), GFP_KERNEL); > if (!pt) > return ERR_PTR(-ENOMEM); > > + pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes), > + GFP_KERNEL); > +
[Intel-gfx] [PATCH v6 05/32] drm/i915: Track GEN6 page table usage
From: Ben Widawsky Instead of implementing the full tracking + dynamic allocation, this patch does a bit less than half of the work, by tracking and warning on unexpected conditions. The tracking itself follows which PTEs within a page table are currently being used for objects. The next patch will modify this to actually allocate the page tables only when necessary. With the current patch there isn't much in the way of making a gen agnostic range allocation function. However, in the next patch we'll add more specificity which makes having separate functions a bit easier to manage. One important change introduced here is that DMA mappings are created/destroyed at the same page directories/tables are allocated/deallocated. Notice that aliasing PPGTT is not managed here. The patch which actually begins dynamic allocation/teardown explains the reasoning for this. v2: s/pdp.page_directory/pdp.page_directorys Make a scratch page allocation helper v3: Rebase and expand commit message. v4: Allocate required pagetables only when it is needed, _bind_to_vm instead of bind_vma (Daniel). v5: Rebased to remove the unnecessary noise in the diff, also: - PDE mask is GEN agnostic, renamed GEN6_PDE_MASK to I915_PDE_MASK. - Removed unnecessary checks in gen6_alloc_va_range. - Changed map/unmap_px_single macros to use dma functions directly and be part of a static inline function instead. - Moved drm_device plumbing through page tables operation to its own patch. - Moved allocate/teardown_va_range calls until they are fully implemented (in subsequent patch). - Merged pt and scratch_pt unmap_and_free path. - Moved scratch page allocator helper to the patch that will use it. v6: Reduce complexity by not tearing down pagetables dynamically, the same can be achieved while freeing empty vms. (Daniel) v7: s/i915_dma_map_px_single/i915_dma_map_single s/gen6_write_pdes/gen6_write_pde Prevent a NULL case when only GGTT is available. (Mika) v8: Rebased after s/page_tables/page_table/. Cc: Daniel Vetter Cc: Mika Kuoppala Signed-off-by: Ben Widawsky Signed-off-by: Michel Thierry (v3+) --- drivers/gpu/drm/i915/i915_gem_gtt.c | 198 +--- drivers/gpu/drm/i915/i915_gem_gtt.h | 75 ++ 2 files changed, 211 insertions(+), 62 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index e05488e..f9354c7 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -278,29 +278,88 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, return pte; } -static void unmap_and_free_pt(struct i915_page_table_entry *pt, struct drm_device *dev) +#define i915_dma_unmap_single(px, dev) \ + __i915_dma_unmap_single((px)->daddr, dev) + +static inline void __i915_dma_unmap_single(dma_addr_t daddr, + struct drm_device *dev) +{ + struct device *device = &dev->pdev->dev; + + dma_unmap_page(device, daddr, 4096, PCI_DMA_BIDIRECTIONAL); +} + +/** + * i915_dma_map_single() - Create a dma mapping for a page table/dir/etc. + * @px:Page table/dir/etc to get a DMA map for + * @dev: drm device + * + * Page table allocations are unified across all gens. They always require a + * single 4k allocation, as well as a DMA mapping. If we keep the structs + * symmetric here, the simple macro covers us for every page table type. + * + * Return: 0 if success. + */ +#define i915_dma_map_single(px, dev) \ + i915_dma_map_page_single((px)->page, (dev), &(px)->daddr) + +static inline int i915_dma_map_page_single(struct page *page, + struct drm_device *dev, + dma_addr_t *daddr) +{ + struct device *device = &dev->pdev->dev; + + *daddr = dma_map_page(device, page, 0, 4096, PCI_DMA_BIDIRECTIONAL); + return dma_mapping_error(device, *daddr); +} + +static void unmap_and_free_pt(struct i915_page_table_entry *pt, + struct drm_device *dev) { if (WARN_ON(!pt->page)) return; + + i915_dma_unmap_single(pt, dev); __free_page(pt->page); + kfree(pt->used_ptes); kfree(pt); } static struct i915_page_table_entry *alloc_pt_single(struct drm_device *dev) { struct i915_page_table_entry *pt; + const size_t count = INTEL_INFO(dev)->gen >= 8 ? + GEN8_PTES_PER_PAGE : I915_PPGTT_PT_ENTRIES; + int ret = -ENOMEM; pt = kzalloc(sizeof(*pt), GFP_KERNEL); if (!pt) return ERR_PTR(-ENOMEM); + pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes), + GFP_KERNEL); + + if (!pt->used_ptes) + goto fail_bitmap; + pt->page = alloc_page(GFP_KERNEL | __GFP_ZERO); - if (!pt->page) { - kfree(pt); - return ERR_PTR(-ENOMEM); -