Re: [PATCH RFC PKS/PMEM 09/58] drivers/gpu: Utilize new kmap_thread()
On Sat, Oct 10, 2020 at 12:03:49AM +0200, Daniel Vetter wrote: > On Fri, Oct 09, 2020 at 12:49:44PM -0700, ira.we...@intel.com wrote: > > From: Ira Weiny > > > > These kmap() calls in the gpu stack are localized to a single thread. > > To avoid the over head of global PKRS updates use the new kmap_thread() > > call. > > > > Cc: David Airlie > > Cc: Daniel Vetter > > Cc: Patrik Jakobsson > > Signed-off-by: Ira Weiny > > I'm guessing the entire pile goes in through some other tree. > Apologies for not realizing there were multiple maintainers here. But, I was thinking it would land together through the mm tree once the core support lands. I've tried to split these out in a way they can be easily reviewed/acked by the correct developers. > If so: > > Acked-by: Daniel Vetter > > If you want this to land through maintainer trees, then we need a > per-driver split (since aside from amdgpu and radeon they're all different > subtrees). It is just RFC for the moment. I need to get the core support accepted first then this can land. > > btw the two kmap calls in drm you highlight in the cover letter should > also be convertible to kmap_thread. We only hold vmalloc mappings for a > longer time (or it'd be quite a driver bug). So if you want maybe throw > those two as two additional patches on top, and we can do some careful > review & testing for them. Cool. I'll add them in. Ira > -Daniel > > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 12 ++-- > > drivers/gpu/drm/gma500/gma_display.c | 4 ++-- > > drivers/gpu/drm/gma500/mmu.c | 10 +- > > drivers/gpu/drm/i915/gem/i915_gem_shmem.c| 4 ++-- > > .../gpu/drm/i915/gem/selftests/i915_gem_context.c| 4 ++-- > > drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 8 > > drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c | 4 ++-- > > drivers/gpu/drm/i915/gt/intel_gtt.c | 4 ++-- > > drivers/gpu/drm/i915/gt/shmem_utils.c| 4 ++-- > > drivers/gpu/drm/i915/i915_gem.c | 8 > > drivers/gpu/drm/i915/i915_gpu_error.c| 4 ++-- > > drivers/gpu/drm/i915/selftests/i915_perf.c | 4 ++-- > > drivers/gpu/drm/radeon/radeon_ttm.c | 4 ++-- > > 13 files changed, 37 insertions(+), 37 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > > index 978bae731398..bd564bccb7a3 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > > @@ -2437,11 +2437,11 @@ static ssize_t amdgpu_ttm_gtt_read(struct file *f, > > char __user *buf, > > > > page = adev->gart.pages[p]; > > if (page) { > > - ptr = kmap(page); > > + ptr = kmap_thread(page); > > ptr += off; > > > > r = copy_to_user(buf, ptr, cur_size); > > - kunmap(adev->gart.pages[p]); > > + kunmap_thread(adev->gart.pages[p]); > > } else > > r = clear_user(buf, cur_size); > > > > @@ -2507,9 +2507,9 @@ static ssize_t amdgpu_iomem_read(struct file *f, char > > __user *buf, > > if (p->mapping != adev->mman.bdev.dev_mapping) > > return -EPERM; > > > > - ptr = kmap(p); > > + ptr = kmap_thread(p); > > r = copy_to_user(buf, ptr + off, bytes); > > - kunmap(p); > > + kunmap_thread(p); > > if (r) > > return -EFAULT; > > > > @@ -2558,9 +2558,9 @@ static ssize_t amdgpu_iomem_write(struct file *f, > > const char __user *buf, > > if (p->mapping != adev->mman.bdev.dev_mapping) > > return -EPERM; > > > > - ptr = kmap(p); > > + ptr = kmap_thread(p); > > r = copy_from_user(ptr + off, buf, bytes); > > - kunmap(p); > > + kunmap_thread(p); > > if (r) > > return -EFAULT; > > > > diff --git a/drivers/gpu/drm/gma500/gma_display.c > > b/drivers/gpu/drm/gma500/gma_display.c > > index 3df6d6e850f5..35f4e55c941f 100644 > > --- a/drivers/gpu/drm/gma500/gma_display.c > > +++ b/drivers/gpu/drm/gma500/gma_display.c > > @@ -400,9 +400,9 @@ int gma_crtc_cursor_set(struct drm_crtc *crtc, > > /* Copy the cursor to cursor mem */ > > tmp_dst = dev_priv->vram_addr + cursor_gt->offset; > > for (i = 0; i < cursor_pages; i++) { > > - tmp_src = kmap(gt->pages[i]); > > + tmp_src = kmap_thread(gt->pages[i]); > > memcpy(tmp_dst, tmp_src, PAGE_SIZE); > > - kunmap(gt->pages[i]); > > + kunmap_thread(gt->pages[i]); > > tmp_dst += PAGE_SIZE; > > } > > > > diff --git a/d
Re: [PATCH RFC PKS/PMEM 09/58] drivers/gpu: Utilize new kmap_thread()
On Fri, Oct 09, 2020 at 12:49:44PM -0700, ira.we...@intel.com wrote: > From: Ira Weiny > > These kmap() calls in the gpu stack are localized to a single thread. > To avoid the over head of global PKRS updates use the new kmap_thread() > call. > > Cc: David Airlie > Cc: Daniel Vetter > Cc: Patrik Jakobsson > Signed-off-by: Ira Weiny I'm guessing the entire pile goes in through some other tree. If so: Acked-by: Daniel Vetter If you want this to land through maintainer trees, then we need a per-driver split (since aside from amdgpu and radeon they're all different subtrees). btw the two kmap calls in drm you highlight in the cover letter should also be convertible to kmap_thread. We only hold vmalloc mappings for a longer time (or it'd be quite a driver bug). So if you want maybe throw those two as two additional patches on top, and we can do some careful review & testing for them. -Daniel > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 12 ++-- > drivers/gpu/drm/gma500/gma_display.c | 4 ++-- > drivers/gpu/drm/gma500/mmu.c | 10 +- > drivers/gpu/drm/i915/gem/i915_gem_shmem.c| 4 ++-- > .../gpu/drm/i915/gem/selftests/i915_gem_context.c| 4 ++-- > drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 8 > drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c | 4 ++-- > drivers/gpu/drm/i915/gt/intel_gtt.c | 4 ++-- > drivers/gpu/drm/i915/gt/shmem_utils.c| 4 ++-- > drivers/gpu/drm/i915/i915_gem.c | 8 > drivers/gpu/drm/i915/i915_gpu_error.c| 4 ++-- > drivers/gpu/drm/i915/selftests/i915_perf.c | 4 ++-- > drivers/gpu/drm/radeon/radeon_ttm.c | 4 ++-- > 13 files changed, 37 insertions(+), 37 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index 978bae731398..bd564bccb7a3 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -2437,11 +2437,11 @@ static ssize_t amdgpu_ttm_gtt_read(struct file *f, > char __user *buf, > > page = adev->gart.pages[p]; > if (page) { > - ptr = kmap(page); > + ptr = kmap_thread(page); > ptr += off; > > r = copy_to_user(buf, ptr, cur_size); > - kunmap(adev->gart.pages[p]); > + kunmap_thread(adev->gart.pages[p]); > } else > r = clear_user(buf, cur_size); > > @@ -2507,9 +2507,9 @@ static ssize_t amdgpu_iomem_read(struct file *f, char > __user *buf, > if (p->mapping != adev->mman.bdev.dev_mapping) > return -EPERM; > > - ptr = kmap(p); > + ptr = kmap_thread(p); > r = copy_to_user(buf, ptr + off, bytes); > - kunmap(p); > + kunmap_thread(p); > if (r) > return -EFAULT; > > @@ -2558,9 +2558,9 @@ static ssize_t amdgpu_iomem_write(struct file *f, const > char __user *buf, > if (p->mapping != adev->mman.bdev.dev_mapping) > return -EPERM; > > - ptr = kmap(p); > + ptr = kmap_thread(p); > r = copy_from_user(ptr + off, buf, bytes); > - kunmap(p); > + kunmap_thread(p); > if (r) > return -EFAULT; > > diff --git a/drivers/gpu/drm/gma500/gma_display.c > b/drivers/gpu/drm/gma500/gma_display.c > index 3df6d6e850f5..35f4e55c941f 100644 > --- a/drivers/gpu/drm/gma500/gma_display.c > +++ b/drivers/gpu/drm/gma500/gma_display.c > @@ -400,9 +400,9 @@ int gma_crtc_cursor_set(struct drm_crtc *crtc, > /* Copy the cursor to cursor mem */ > tmp_dst = dev_priv->vram_addr + cursor_gt->offset; > for (i = 0; i < cursor_pages; i++) { > - tmp_src = kmap(gt->pages[i]); > + tmp_src = kmap_thread(gt->pages[i]); > memcpy(tmp_dst, tmp_src, PAGE_SIZE); > - kunmap(gt->pages[i]); > + kunmap_thread(gt->pages[i]); > tmp_dst += PAGE_SIZE; > } > > diff --git a/drivers/gpu/drm/gma500/mmu.c b/drivers/gpu/drm/gma500/mmu.c > index 505044c9a673..fba7a3a461fd 100644 > --- a/drivers/gpu/drm/gma500/mmu.c > +++ b/drivers/gpu/drm/gma500/mmu.c > @@ -192,20 +192,20 @@ struct psb_mmu_pd *psb_mmu_alloc_pd(struct > psb_mmu_driver *driver, > pd->invalid_pte = 0; > } > > - v = kmap(pd->dummy_pt); > + v = kmap_thread(pd->dummy_pt); > for (i = 0; i < (PAGE_SIZE / sizeof(uint32_t)); ++i) > v[i] = pd->invalid_pte; > > - kunmap(pd->dummy_pt); > + kunmap_thread(pd->dummy_pt); > > - v = kmap(pd->p); > +
[PATCH RFC PKS/PMEM 09/58] drivers/gpu: Utilize new kmap_thread()
From: Ira Weiny These kmap() calls in the gpu stack are localized to a single thread. To avoid the over head of global PKRS updates use the new kmap_thread() call. Cc: David Airlie Cc: Daniel Vetter Cc: Patrik Jakobsson Signed-off-by: Ira Weiny --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 12 ++-- drivers/gpu/drm/gma500/gma_display.c | 4 ++-- drivers/gpu/drm/gma500/mmu.c | 10 +- drivers/gpu/drm/i915/gem/i915_gem_shmem.c| 4 ++-- .../gpu/drm/i915/gem/selftests/i915_gem_context.c| 4 ++-- drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 8 drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c | 4 ++-- drivers/gpu/drm/i915/gt/intel_gtt.c | 4 ++-- drivers/gpu/drm/i915/gt/shmem_utils.c| 4 ++-- drivers/gpu/drm/i915/i915_gem.c | 8 drivers/gpu/drm/i915/i915_gpu_error.c| 4 ++-- drivers/gpu/drm/i915/selftests/i915_perf.c | 4 ++-- drivers/gpu/drm/radeon/radeon_ttm.c | 4 ++-- 13 files changed, 37 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 978bae731398..bd564bccb7a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -2437,11 +2437,11 @@ static ssize_t amdgpu_ttm_gtt_read(struct file *f, char __user *buf, page = adev->gart.pages[p]; if (page) { - ptr = kmap(page); + ptr = kmap_thread(page); ptr += off; r = copy_to_user(buf, ptr, cur_size); - kunmap(adev->gart.pages[p]); + kunmap_thread(adev->gart.pages[p]); } else r = clear_user(buf, cur_size); @@ -2507,9 +2507,9 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf, if (p->mapping != adev->mman.bdev.dev_mapping) return -EPERM; - ptr = kmap(p); + ptr = kmap_thread(p); r = copy_to_user(buf, ptr + off, bytes); - kunmap(p); + kunmap_thread(p); if (r) return -EFAULT; @@ -2558,9 +2558,9 @@ static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf, if (p->mapping != adev->mman.bdev.dev_mapping) return -EPERM; - ptr = kmap(p); + ptr = kmap_thread(p); r = copy_from_user(ptr + off, buf, bytes); - kunmap(p); + kunmap_thread(p); if (r) return -EFAULT; diff --git a/drivers/gpu/drm/gma500/gma_display.c b/drivers/gpu/drm/gma500/gma_display.c index 3df6d6e850f5..35f4e55c941f 100644 --- a/drivers/gpu/drm/gma500/gma_display.c +++ b/drivers/gpu/drm/gma500/gma_display.c @@ -400,9 +400,9 @@ int gma_crtc_cursor_set(struct drm_crtc *crtc, /* Copy the cursor to cursor mem */ tmp_dst = dev_priv->vram_addr + cursor_gt->offset; for (i = 0; i < cursor_pages; i++) { - tmp_src = kmap(gt->pages[i]); + tmp_src = kmap_thread(gt->pages[i]); memcpy(tmp_dst, tmp_src, PAGE_SIZE); - kunmap(gt->pages[i]); + kunmap_thread(gt->pages[i]); tmp_dst += PAGE_SIZE; } diff --git a/drivers/gpu/drm/gma500/mmu.c b/drivers/gpu/drm/gma500/mmu.c index 505044c9a673..fba7a3a461fd 100644 --- a/drivers/gpu/drm/gma500/mmu.c +++ b/drivers/gpu/drm/gma500/mmu.c @@ -192,20 +192,20 @@ struct psb_mmu_pd *psb_mmu_alloc_pd(struct psb_mmu_driver *driver, pd->invalid_pte = 0; } - v = kmap(pd->dummy_pt); + v = kmap_thread(pd->dummy_pt); for (i = 0; i < (PAGE_SIZE / sizeof(uint32_t)); ++i) v[i] = pd->invalid_pte; - kunmap(pd->dummy_pt); + kunmap_thread(pd->dummy_pt); - v = kmap(pd->p); + v = kmap_thread(pd->p); for (i = 0; i < (PAGE_SIZE / sizeof(uint32_t)); ++i) v[i] = pd->invalid_pde; - kunmap(pd->p); + kunmap_thread(pd->p); clear_page(kmap(pd->dummy_page)); - kunmap(pd->dummy_page); + kunmap_thread(pd->dummy_page); pd->tables = vmalloc_user(sizeof(struct psb_mmu_pt *) * 1024); if (!pd->tables) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 38113d3c0138..274424795fb7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -566,9 +566,9 @@ i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv, if (err < 0)