Re: [Intel-gfx] [PATCH v2] drm/i915: Fix DMA mapped scatterlist lookup

2020-10-06 Thread Tvrtko Ursulin



On 15/09/2020 09:49, Chris Wilson wrote:

Quoting Tvrtko Ursulin (2020-09-10 15:50:18)

From: Tvrtko Ursulin 

As the previous patch fixed the places where we walk the whole scatterlist
for DMA addresses, this patch fixes the random lookup functionality.

To achieve this we have to add a second lookup iterator and add a
i915_gem_object_get_sg_dma helper, to be used analoguous to existing
i915_gem_object_get_sg_dma. Therefore two lookup caches are maintained per
object and they are flushed at the same point for simplicity. (Strictly
speaking the DMA cache should be flushed from i915_gem_gtt_finish_pages,
but today this conincides with unsetting of the pages in general.)

Partial VMA view is then fixed to use the new DMA lookup and properly
query sg length.

v2:
  * Checkpatch.

Signed-off-by: Tvrtko Ursulin 
Cc: Joonas Lahtinen 
Cc: Chris Wilson 
Cc: Matthew Auld 
Cc: Lu Baolu 
Cc: Tom Murphy 
Cc: Logan Gunthorpe 
---
  drivers/gpu/drm/i915/gem/i915_gem_object.c|  2 ++
  drivers/gpu/drm/i915/gem/i915_gem_object.h| 20 +-
  .../gpu/drm/i915/gem/i915_gem_object_types.h  | 17 ---
  drivers/gpu/drm/i915/gem/i915_gem_pages.c | 21 ---
  drivers/gpu/drm/i915/gt/intel_ggtt.c  |  4 ++--
  drivers/gpu/drm/i915/i915_scatterlist.h   |  5 +
  6 files changed, 51 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index c8421fd9d2dc..ffeaf1b9b1bb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -73,6 +73,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 obj->mm.madv = I915_MADV_WILLNEED;
 INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
 mutex_init(&obj->mm.get_page.lock);
+   INIT_RADIX_TREE(&obj->mm.get_dma_page.radix, GFP_KERNEL | __GFP_NOWARN);
+   mutex_init(&obj->mm.get_dma_page.lock);
  
 if (IS_ENABLED(CONFIG_LOCKDEP) && i915_gem_object_is_shrinkable(obj))

 i915_gem_shrinker_taints_mutex(to_i915(obj->base.dev),
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index d46db8d8f38e..44c6910e2669 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -275,8 +275,26 @@ int i915_gem_object_set_tiling(struct drm_i915_gem_object 
*obj,
unsigned int tiling, unsigned int stride);
  
  struct scatterlist *

+__i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
+struct i915_gem_object_page_iter *iter,
+unsigned int n,
+unsigned int *offset);
+
+static inline struct scatterlist *
  i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
-  unsigned int n, unsigned int *offset);
+  unsigned int n,
+  unsigned int *offset)
+{
+   return __i915_gem_object_get_sg(obj, &obj->mm.get_page, n, offset);
+}


I wonder if get_sg_phys() is worth it to make it completely clear the
difference between it and get_sg_dma() (and .get_phys_page?) ?


+
+static inline struct scatterlist *
+i915_gem_object_get_sg_dma(struct drm_i915_gem_object *obj,
+  unsigned int n,
+  unsigned int *offset)
+{
+   return __i915_gem_object_get_sg(obj, &obj->mm.get_dma_page, n, offset);
+}
  
  struct page *

  i915_gem_object_get_page(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index b5c15557cc87..fedfebf13344 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -80,6 +80,14 @@ struct i915_mmap_offset {
 struct rb_node offset;
  };
  
+struct i915_gem_object_page_iter {

+   struct scatterlist *sg_pos;
+   unsigned int sg_idx; /* in pages, but 32bit eek! */
+
+   struct radix_tree_root radix;
+   struct mutex lock; /* protects this cache */
+};


All alternatives to trying to avoid a second random lookup were
squashed, it really is two lists within one scatterlist and we do use
both page/dma lookups in non-trivial ways.


+
  struct drm_i915_gem_object {
 struct drm_gem_object base;
  
@@ -246,13 +254,8 @@ struct drm_i915_gem_object {
  
 I915_SELFTEST_DECLARE(unsigned int page_mask);
  
-   struct i915_gem_object_page_iter {

-   struct scatterlist *sg_pos;
-   unsigned int sg_idx; /* in pages, but 32bit eek! */
-
-   struct radix_tree_root radix;
-   struct mutex lock; /* protects this cache */
-   } get_page;
+   struct i915_gem_object_page_iter get_page;
+   struct i915_gem_object_page_iter get

Re: [Intel-gfx] [PATCH v2] drm/i915: Fix DMA mapped scatterlist lookup

2020-09-15 Thread Chris Wilson
Quoting Tvrtko Ursulin (2020-09-10 15:50:18)
> From: Tvrtko Ursulin 
> 
> As the previous patch fixed the places where we walk the whole scatterlist
> for DMA addresses, this patch fixes the random lookup functionality.
> 
> To achieve this we have to add a second lookup iterator and add a
> i915_gem_object_get_sg_dma helper, to be used analoguous to existing
> i915_gem_object_get_sg_dma. Therefore two lookup caches are maintained per
> object and they are flushed at the same point for simplicity. (Strictly
> speaking the DMA cache should be flushed from i915_gem_gtt_finish_pages,
> but today this conincides with unsetting of the pages in general.)
> 
> Partial VMA view is then fixed to use the new DMA lookup and properly
> query sg length.
> 
> v2:
>  * Checkpatch.
> 
> Signed-off-by: Tvrtko Ursulin 
> Cc: Joonas Lahtinen 
> Cc: Chris Wilson 
> Cc: Matthew Auld 
> Cc: Lu Baolu 
> Cc: Tom Murphy 
> Cc: Logan Gunthorpe 
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_object.c|  2 ++
>  drivers/gpu/drm/i915/gem/i915_gem_object.h| 20 +-
>  .../gpu/drm/i915/gem/i915_gem_object_types.h  | 17 ---
>  drivers/gpu/drm/i915/gem/i915_gem_pages.c | 21 ---
>  drivers/gpu/drm/i915/gt/intel_ggtt.c  |  4 ++--
>  drivers/gpu/drm/i915/i915_scatterlist.h   |  5 +
>  6 files changed, 51 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
> b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> index c8421fd9d2dc..ffeaf1b9b1bb 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> @@ -73,6 +73,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
> obj->mm.madv = I915_MADV_WILLNEED;
> INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
> mutex_init(&obj->mm.get_page.lock);
> +   INIT_RADIX_TREE(&obj->mm.get_dma_page.radix, GFP_KERNEL | 
> __GFP_NOWARN);
> +   mutex_init(&obj->mm.get_dma_page.lock);
>  
> if (IS_ENABLED(CONFIG_LOCKDEP) && i915_gem_object_is_shrinkable(obj))
> i915_gem_shrinker_taints_mutex(to_i915(obj->base.dev),
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
> b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> index d46db8d8f38e..44c6910e2669 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> @@ -275,8 +275,26 @@ int i915_gem_object_set_tiling(struct 
> drm_i915_gem_object *obj,
>unsigned int tiling, unsigned int stride);
>  
>  struct scatterlist *
> +__i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
> +struct i915_gem_object_page_iter *iter,
> +unsigned int n,
> +unsigned int *offset);
> +
> +static inline struct scatterlist *
>  i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
> -  unsigned int n, unsigned int *offset);
> +  unsigned int n,
> +  unsigned int *offset)
> +{
> +   return __i915_gem_object_get_sg(obj, &obj->mm.get_page, n, offset);
> +}

I wonder if get_sg_phys() is worth it to make it completely clear the
difference between it and get_sg_dma() (and .get_phys_page?) ?

> +
> +static inline struct scatterlist *
> +i915_gem_object_get_sg_dma(struct drm_i915_gem_object *obj,
> +  unsigned int n,
> +  unsigned int *offset)
> +{
> +   return __i915_gem_object_get_sg(obj, &obj->mm.get_dma_page, n, 
> offset);
> +}
>  
>  struct page *
>  i915_gem_object_get_page(struct drm_i915_gem_object *obj,
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
> b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> index b5c15557cc87..fedfebf13344 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> @@ -80,6 +80,14 @@ struct i915_mmap_offset {
> struct rb_node offset;
>  };
>  
> +struct i915_gem_object_page_iter {
> +   struct scatterlist *sg_pos;
> +   unsigned int sg_idx; /* in pages, but 32bit eek! */
> +
> +   struct radix_tree_root radix;
> +   struct mutex lock; /* protects this cache */
> +};

All alternatives to trying to avoid a second random lookup were
squashed, it really is two lists within one scatterlist and we do use
both page/dma lookups in non-trivial ways.

> +
>  struct drm_i915_gem_object {
> struct drm_gem_object base;
>  
> @@ -246,13 +254,8 @@ struct drm_i915_gem_object {
>  
> I915_SELFTEST_DECLARE(unsigned int page_mask);
>  
> -   struct i915_gem_object_page_iter {
> -   struct scatterlist *sg_pos;
> -   unsigned int sg_idx; /* in pages, but 32bit eek! */
> -
> -   struct radix_tree_root radix;
> -   struct mutex lock; /* protects this cac

[Intel-gfx] [PATCH v2] drm/i915: Fix DMA mapped scatterlist lookup

2020-09-10 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

As the previous patch fixed the places where we walk the whole scatterlist
for DMA addresses, this patch fixes the random lookup functionality.

To achieve this we have to add a second lookup iterator and add a
i915_gem_object_get_sg_dma helper, to be used analoguous to existing
i915_gem_object_get_sg_dma. Therefore two lookup caches are maintained per
object and they are flushed at the same point for simplicity. (Strictly
speaking the DMA cache should be flushed from i915_gem_gtt_finish_pages,
but today this conincides with unsetting of the pages in general.)

Partial VMA view is then fixed to use the new DMA lookup and properly
query sg length.

v2:
 * Checkpatch.

Signed-off-by: Tvrtko Ursulin 
Cc: Joonas Lahtinen 
Cc: Chris Wilson 
Cc: Matthew Auld 
Cc: Lu Baolu 
Cc: Tom Murphy 
Cc: Logan Gunthorpe 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c|  2 ++
 drivers/gpu/drm/i915/gem/i915_gem_object.h| 20 +-
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 17 ---
 drivers/gpu/drm/i915/gem/i915_gem_pages.c | 21 ---
 drivers/gpu/drm/i915/gt/intel_ggtt.c  |  4 ++--
 drivers/gpu/drm/i915/i915_scatterlist.h   |  5 +
 6 files changed, 51 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index c8421fd9d2dc..ffeaf1b9b1bb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -73,6 +73,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
obj->mm.madv = I915_MADV_WILLNEED;
INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
mutex_init(&obj->mm.get_page.lock);
+   INIT_RADIX_TREE(&obj->mm.get_dma_page.radix, GFP_KERNEL | __GFP_NOWARN);
+   mutex_init(&obj->mm.get_dma_page.lock);
 
if (IS_ENABLED(CONFIG_LOCKDEP) && i915_gem_object_is_shrinkable(obj))
i915_gem_shrinker_taints_mutex(to_i915(obj->base.dev),
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index d46db8d8f38e..44c6910e2669 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -275,8 +275,26 @@ int i915_gem_object_set_tiling(struct drm_i915_gem_object 
*obj,
   unsigned int tiling, unsigned int stride);
 
 struct scatterlist *
+__i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
+struct i915_gem_object_page_iter *iter,
+unsigned int n,
+unsigned int *offset);
+
+static inline struct scatterlist *
 i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
-  unsigned int n, unsigned int *offset);
+  unsigned int n,
+  unsigned int *offset)
+{
+   return __i915_gem_object_get_sg(obj, &obj->mm.get_page, n, offset);
+}
+
+static inline struct scatterlist *
+i915_gem_object_get_sg_dma(struct drm_i915_gem_object *obj,
+  unsigned int n,
+  unsigned int *offset)
+{
+   return __i915_gem_object_get_sg(obj, &obj->mm.get_dma_page, n, offset);
+}
 
 struct page *
 i915_gem_object_get_page(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index b5c15557cc87..fedfebf13344 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -80,6 +80,14 @@ struct i915_mmap_offset {
struct rb_node offset;
 };
 
+struct i915_gem_object_page_iter {
+   struct scatterlist *sg_pos;
+   unsigned int sg_idx; /* in pages, but 32bit eek! */
+
+   struct radix_tree_root radix;
+   struct mutex lock; /* protects this cache */
+};
+
 struct drm_i915_gem_object {
struct drm_gem_object base;
 
@@ -246,13 +254,8 @@ struct drm_i915_gem_object {
 
I915_SELFTEST_DECLARE(unsigned int page_mask);
 
-   struct i915_gem_object_page_iter {
-   struct scatterlist *sg_pos;
-   unsigned int sg_idx; /* in pages, but 32bit eek! */
-
-   struct radix_tree_root radix;
-   struct mutex lock; /* protects this cache */
-   } get_page;
+   struct i915_gem_object_page_iter get_page;
+   struct i915_gem_object_page_iter get_dma_page;
 
/**
 * Element within i915->mm.unbound_list or i915->mm.bound_list,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c 
b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index e8a083743e09..04a3c1233f80 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -33,6 +33,8 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object 
*obj,