[Intel-gfx] [PATCH 2/9] drm/i915: Use insert_page for pwrite_fast

2015-12-13 Thread ankitprasad . r . sharma
From: Ankitprasad Sharma 

In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
we try a nonblocking pin for the whole object (since that is fastest if
reused), then failing that we try to grab one page in the mappable
aperture. It also allows us to handle objects larger than the mappable
aperture (e.g. if we need to pwrite with vGPU restricting the aperture
to a measely 8MiB or something like that).

v2: Pin pages before starting pwrite, Combined duplicate loops (Chris)

v3: Combined loops based on local patch by Chris (Chris)

v4: Added i915 wrapper function for drm_mm_insert_node_in_range (Chris)

Signed-off-by: Ankitprasad Sharma 
Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c | 86 ++---
 1 file changed, 64 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index bf7f203..46c1e75 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -61,6 +61,21 @@ static bool cpu_write_needs_clflush(struct 
drm_i915_gem_object *obj)
return obj->pin_display;
 }
 
+static int
+i915_gem_insert_node_in_range(struct drm_i915_private *i915,
+ struct drm_mm_node *node, u64 size,
+ unsigned alignment, u64 start, u64 end)
+{
+   int ret;
+
+   ret = drm_mm_insert_node_in_range_generic(&i915->gtt.base.mm, node,
+ size, alignment, 0, start,
+ end, DRM_MM_SEARCH_DEFAULT,
+ DRM_MM_SEARCH_DEFAULT);
+
+   return ret;
+}
+
 /* some bookkeeping */
 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
  size_t size)
@@ -760,20 +775,29 @@ fast_user_write(struct io_mapping *mapping,
  * user into the GTT, uncached.
  */
 static int
-i915_gem_gtt_pwrite_fast(struct drm_device *dev,
+i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
 struct drm_i915_gem_object *obj,
 struct drm_i915_gem_pwrite *args,
 struct drm_file *file)
 {
-   struct drm_i915_private *dev_priv = dev->dev_private;
-   ssize_t remain;
-   loff_t offset, page_base;
+   struct drm_mm_node node;
+   uint64_t remain, offset;
char __user *user_data;
-   int page_offset, page_length, ret;
+   int ret;
 
ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
-   if (ret)
-   goto out;
+   if (ret) {
+   memset(&node, 0, sizeof(node));
+   ret = i915_gem_insert_node_in_range(i915, &node, 4096, 0,
+   0, i915->gtt.mappable_end);
+   if (ret)
+   goto out;
+
+   i915_gem_object_pin_pages(obj);
+   } else {
+   node.start = i915_gem_obj_ggtt_offset(obj);
+   node.allocated = false;
+   }
 
ret = i915_gem_object_set_to_gtt_domain(obj, true);
if (ret)
@@ -783,31 +807,39 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
if (ret)
goto out_unpin;
 
-   user_data = to_user_ptr(args->data_ptr);
-   remain = args->size;
-
-   offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
-
intel_fb_obj_invalidate(obj, ORIGIN_GTT);
+   obj->dirty = true;
 
-   while (remain > 0) {
+   user_data = to_user_ptr(args->data_ptr);
+   offset = args->offset;
+   remain = args->size;
+   while (remain) {
/* Operation in this page
 *
 * page_base = page offset within aperture
 * page_offset = offset within page
 * page_length = bytes to copy for this page
 */
-   page_base = offset & PAGE_MASK;
-   page_offset = offset_in_page(offset);
-   page_length = remain;
-   if ((page_offset + remain) > PAGE_SIZE)
-   page_length = PAGE_SIZE - page_offset;
-
+   u32 page_base = node.start;
+   unsigned page_offset = offset_in_page(offset);
+   unsigned page_length = PAGE_SIZE - page_offset;
+   page_length = remain < page_length ? remain : page_length;
+   if (node.allocated) {
+   wmb();
+   i915->gtt.base.insert_page(&i915->gtt.base,
+  
i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
+  node.start,
+  I915_CACHE_NONE,
+  0);
+   wmb();
+   } else {
+   page_base += offset & PAGE_MASK;
+

Re: [Intel-gfx] [PATCH 2/9] drm/i915: Use insert_page for pwrite_fast

2015-12-14 Thread Chris Wilson
On Mon, Dec 14, 2015 at 11:16:04AM +0530, ankitprasad.r.sha...@intel.com wrote:
> From: Ankitprasad Sharma 
> 
> In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> we try a nonblocking pin for the whole object (since that is fastest if
> reused), then failing that we try to grab one page in the mappable
> aperture. It also allows us to handle objects larger than the mappable
> aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> to a measely 8MiB or something like that).
> 
> v2: Pin pages before starting pwrite, Combined duplicate loops (Chris)
> 
> v3: Combined loops based on local patch by Chris (Chris)
> 
> v4: Added i915 wrapper function for drm_mm_insert_node_in_range (Chris)
> 
> Signed-off-by: Ankitprasad Sharma 
> Signed-off-by: Chris Wilson 
> ---
>  drivers/gpu/drm/i915/i915_gem.c | 86 
> ++---
>  1 file changed, 64 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index bf7f203..46c1e75 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -61,6 +61,21 @@ static bool cpu_write_needs_clflush(struct 
> drm_i915_gem_object *obj)
>   return obj->pin_display;
>  }
>  
> +static int
> +i915_gem_insert_node_in_range(struct drm_i915_private *i915,
> +   struct drm_mm_node *node, u64 size,
> +   unsigned alignment, u64 start, u64 end)
> +{
> + int ret;
> +
> + ret = drm_mm_insert_node_in_range_generic(&i915->gtt.base.mm, node,
> +   size, alignment, 0, start,
> +   end, DRM_MM_SEARCH_DEFAULT,
> +   DRM_MM_SEARCH_DEFAULT);
> +
> + return ret;
> +}

No. It encodes a very bad assumption (i915->gtt) that is not made clear
in anyway.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/9] drm/i915: Use insert_page for pwrite_fast

2015-12-14 Thread Chris Wilson
On Mon, Dec 14, 2015 at 09:54:06AM +, Chris Wilson wrote:
> On Mon, Dec 14, 2015 at 11:16:04AM +0530, ankitprasad.r.sha...@intel.com 
> wrote:
> > From: Ankitprasad Sharma 
> > 
> > In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> > we try a nonblocking pin for the whole object (since that is fastest if
> > reused), then failing that we try to grab one page in the mappable
> > aperture. It also allows us to handle objects larger than the mappable
> > aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> > to a measely 8MiB or something like that).
> > 
> > v2: Pin pages before starting pwrite, Combined duplicate loops (Chris)
> > 
> > v3: Combined loops based on local patch by Chris (Chris)
> > 
> > v4: Added i915 wrapper function for drm_mm_insert_node_in_range (Chris)
> > 
> > Signed-off-by: Ankitprasad Sharma 
> > Signed-off-by: Chris Wilson 
> > ---
> >  drivers/gpu/drm/i915/i915_gem.c | 86 
> > ++---
> >  1 file changed, 64 insertions(+), 22 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c 
> > b/drivers/gpu/drm/i915/i915_gem.c
> > index bf7f203..46c1e75 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -61,6 +61,21 @@ static bool cpu_write_needs_clflush(struct 
> > drm_i915_gem_object *obj)
> > return obj->pin_display;
> >  }
> >  
> > +static int
> > +i915_gem_insert_node_in_range(struct drm_i915_private *i915,
> > + struct drm_mm_node *node, u64 size,
> > + unsigned alignment, u64 start, u64 end)
> > +{
> > +   int ret;
> > +
> > +   ret = drm_mm_insert_node_in_range_generic(&i915->gtt.base.mm, node,
> > + size, alignment, 0, start,
> > + end, DRM_MM_SEARCH_DEFAULT,
> > + DRM_MM_SEARCH_DEFAULT);
> > +
> > +   return ret;
> > +}
> 
> No. It encodes a very bad assumption (i915->gtt) that is not made clear
> in anyway.

static int
insert_mappable_node(struct drm_i915_private *i915,
 struct drm_mm_node *node)
{
return drm_mm_insert_node_in_range_generic(&i915->gtt.base.mm, node,
   4096, 0, 0,
   0, i915->gtt.mappable_end,
   DRM_MM_SEARCH_DEFAULT,
   DRM_MM_SEARCH_DEFAULT);
}

Should do the trick
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/9] drm/i915: Use insert_page for pwrite_fast

2015-12-14 Thread Chris Wilson
On Mon, Dec 14, 2015 at 10:48:51AM +, Chris Wilson wrote:
> On Mon, Dec 14, 2015 at 09:54:06AM +, Chris Wilson wrote:
> > On Mon, Dec 14, 2015 at 11:16:04AM +0530, ankitprasad.r.sha...@intel.com 
> > wrote:
> > > From: Ankitprasad Sharma 
> > > 
> > > In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> > > we try a nonblocking pin for the whole object (since that is fastest if
> > > reused), then failing that we try to grab one page in the mappable
> > > aperture. It also allows us to handle objects larger than the mappable
> > > aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> > > to a measely 8MiB or something like that).
> > > 
> > > v2: Pin pages before starting pwrite, Combined duplicate loops (Chris)
> > > 
> > > v3: Combined loops based on local patch by Chris (Chris)
> > > 
> > > v4: Added i915 wrapper function for drm_mm_insert_node_in_range (Chris)
> > > 
> > > Signed-off-by: Ankitprasad Sharma 
> > > Signed-off-by: Chris Wilson 
> > > ---
> > >  drivers/gpu/drm/i915/i915_gem.c | 86 
> > > ++---
> > >  1 file changed, 64 insertions(+), 22 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/i915_gem.c 
> > > b/drivers/gpu/drm/i915/i915_gem.c
> > > index bf7f203..46c1e75 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > > @@ -61,6 +61,21 @@ static bool cpu_write_needs_clflush(struct 
> > > drm_i915_gem_object *obj)
> > >   return obj->pin_display;
> > >  }
> > >  
> > > +static int
> > > +i915_gem_insert_node_in_range(struct drm_i915_private *i915,
> > > +   struct drm_mm_node *node, u64 size,
> > > +   unsigned alignment, u64 start, u64 end)
> > > +{
> > > + int ret;
> > > +
> > > + ret = drm_mm_insert_node_in_range_generic(&i915->gtt.base.mm, node,
> > > +   size, alignment, 0, start,
> > > +   end, DRM_MM_SEARCH_DEFAULT,
> > > +   DRM_MM_SEARCH_DEFAULT);
> > > +
> > > + return ret;
> > > +}
> > 
> > No. It encodes a very bad assumption (i915->gtt) that is not made clear
> > in anyway.
> 
> static int
> insert_mappable_node(struct drm_i915_private *i915,
>struct drm_mm_node *node)
> {
>   return drm_mm_insert_node_in_range_generic(&i915->gtt.base.mm, node,
>  4096, 0, 0,
>  0, i915->gtt.mappable_end,
>  DRM_MM_SEARCH_DEFAULT,
>  DRM_MM_SEARCH_DEFAULT);

DRM_MM_SEARCH_DEFAULT, DRM_MM_CREATE_DEFAULT
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/9] drm/i915: Use insert_page for pwrite_fast

2015-12-17 Thread Tvrtko Ursulin


Hi,

On 14/12/15 05:46, ankitprasad.r.sha...@intel.com wrote:

From: Ankitprasad Sharma 

In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
we try a nonblocking pin for the whole object (since that is fastest if
reused), then failing that we try to grab one page in the mappable
aperture. It also allows us to handle objects larger than the mappable
aperture (e.g. if we need to pwrite with vGPU restricting the aperture
to a measely 8MiB or something like that).

v2: Pin pages before starting pwrite, Combined duplicate loops (Chris)

v3: Combined loops based on local patch by Chris (Chris)

v4: Added i915 wrapper function for drm_mm_insert_node_in_range (Chris)

Signed-off-by: Ankitprasad Sharma 
Signed-off-by: Chris Wilson 
---
  drivers/gpu/drm/i915/i915_gem.c | 86 ++---
  1 file changed, 64 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index bf7f203..46c1e75 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -61,6 +61,21 @@ static bool cpu_write_needs_clflush(struct 
drm_i915_gem_object *obj)
return obj->pin_display;
  }

+static int
+i915_gem_insert_node_in_range(struct drm_i915_private *i915,
+ struct drm_mm_node *node, u64 size,
+ unsigned alignment, u64 start, u64 end)
+{
+   int ret;
+
+   ret = drm_mm_insert_node_in_range_generic(&i915->gtt.base.mm, node,
+ size, alignment, 0, start,
+ end, DRM_MM_SEARCH_DEFAULT,
+ DRM_MM_SEARCH_DEFAULT);
+
+   return ret;
+}
+
  /* some bookkeeping */
  static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
  size_t size)
@@ -760,20 +775,29 @@ fast_user_write(struct io_mapping *mapping,
   * user into the GTT, uncached.
   */
  static int
-i915_gem_gtt_pwrite_fast(struct drm_device *dev,
+i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
 struct drm_i915_gem_object *obj,
 struct drm_i915_gem_pwrite *args,
 struct drm_file *file)
  {
-   struct drm_i915_private *dev_priv = dev->dev_private;
-   ssize_t remain;
-   loff_t offset, page_base;
+   struct drm_mm_node node;
+   uint64_t remain, offset;
char __user *user_data;
-   int page_offset, page_length, ret;
+   int ret;

ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
-   if (ret)
-   goto out;
+   if (ret) {
+   memset(&node, 0, sizeof(node));
+   ret = i915_gem_insert_node_in_range(i915, &node, 4096, 0,
+   0, i915->gtt.mappable_end);


Suggest PAGE_SIZE instead of 4096 to match the main loop below.


+   if (ret)
+   goto out;
+
+   i915_gem_object_pin_pages(obj);


i915_gem_object_get_pages is missing again before pin pages I think.

If true it means we need an IGT to exercise this path. Should be easy 
with a huge object and just pwrite a small chunk?



+   } else {
+   node.start = i915_gem_obj_ggtt_offset(obj);
+   node.allocated = false;
+   }

ret = i915_gem_object_set_to_gtt_domain(obj, true);
if (ret)
@@ -783,31 +807,39 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
if (ret)
goto out_unpin;

-   user_data = to_user_ptr(args->data_ptr);
-   remain = args->size;
-
-   offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
-
intel_fb_obj_invalidate(obj, ORIGIN_GTT);
+   obj->dirty = true;

-   while (remain > 0) {
+   user_data = to_user_ptr(args->data_ptr);
+   offset = args->offset;
+   remain = args->size;
+   while (remain) {
/* Operation in this page
 *
 * page_base = page offset within aperture
 * page_offset = offset within page
 * page_length = bytes to copy for this page
 */
-   page_base = offset & PAGE_MASK;
-   page_offset = offset_in_page(offset);
-   page_length = remain;
-   if ((page_offset + remain) > PAGE_SIZE)
-   page_length = PAGE_SIZE - page_offset;
-
+   u32 page_base = node.start;


Compiler does not complain about possible truncation?


+   unsigned page_offset = offset_in_page(offset);
+   unsigned page_length = PAGE_SIZE - page_offset;
+   page_length = remain < page_length ? remain : page_length;
+   if (node.allocated) {
+   wmb();
+   i915->gtt.base.insert_page(&i915->gtt.base,
+

Re: [Intel-gfx] [PATCH 2/9] drm/i915: Use insert_page for pwrite_fast

2015-12-17 Thread Chris Wilson
On Thu, Dec 17, 2015 at 10:45:15AM +, Tvrtko Ursulin wrote:
> 
> Hi,
> 
> On 14/12/15 05:46, ankitprasad.r.sha...@intel.com wrote:
> >From: Ankitprasad Sharma 
> >
> >In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> >we try a nonblocking pin for the whole object (since that is fastest if
> >reused), then failing that we try to grab one page in the mappable
> >aperture. It also allows us to handle objects larger than the mappable
> >aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> >to a measely 8MiB or something like that).
> >
> >v2: Pin pages before starting pwrite, Combined duplicate loops (Chris)
> >
> >v3: Combined loops based on local patch by Chris (Chris)
> >
> >v4: Added i915 wrapper function for drm_mm_insert_node_in_range (Chris)
> >
> >Signed-off-by: Ankitprasad Sharma 
> >Signed-off-by: Chris Wilson 
> >---
> >  drivers/gpu/drm/i915/i915_gem.c | 86 
> > ++---
> >  1 file changed, 64 insertions(+), 22 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_gem.c 
> >b/drivers/gpu/drm/i915/i915_gem.c
> >index bf7f203..46c1e75 100644
> >--- a/drivers/gpu/drm/i915/i915_gem.c
> >+++ b/drivers/gpu/drm/i915/i915_gem.c
> >@@ -61,6 +61,21 @@ static bool cpu_write_needs_clflush(struct 
> >drm_i915_gem_object *obj)
> > return obj->pin_display;
> >  }
> >
> >+static int
> >+i915_gem_insert_node_in_range(struct drm_i915_private *i915,
> >+  struct drm_mm_node *node, u64 size,
> >+  unsigned alignment, u64 start, u64 end)
> >+{
> >+int ret;
> >+
> >+ret = drm_mm_insert_node_in_range_generic(&i915->gtt.base.mm, node,
> >+  size, alignment, 0, start,
> >+  end, DRM_MM_SEARCH_DEFAULT,
> >+  DRM_MM_SEARCH_DEFAULT);
> >+
> >+return ret;
> >+}
> >+
> >  /* some bookkeeping */
> >  static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
> >   size_t size)
> >@@ -760,20 +775,29 @@ fast_user_write(struct io_mapping *mapping,
> >   * user into the GTT, uncached.
> >   */
> >  static int
> >-i915_gem_gtt_pwrite_fast(struct drm_device *dev,
> >+i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
> >  struct drm_i915_gem_object *obj,
> >  struct drm_i915_gem_pwrite *args,
> >  struct drm_file *file)
> >  {
> >-struct drm_i915_private *dev_priv = dev->dev_private;
> >-ssize_t remain;
> >-loff_t offset, page_base;
> >+struct drm_mm_node node;
> >+uint64_t remain, offset;
> > char __user *user_data;
> >-int page_offset, page_length, ret;
> >+int ret;
> >
> > ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
> >-if (ret)
> >-goto out;
> >+if (ret) {
> >+memset(&node, 0, sizeof(node));
> >+ret = i915_gem_insert_node_in_range(i915, &node, 4096, 0,
> >+0, i915->gtt.mappable_end);
> 
> Suggest PAGE_SIZE instead of 4096 to match the main loop below.
> 
> >+if (ret)
> >+goto out;
> >+
> >+i915_gem_object_pin_pages(obj);
> 
> i915_gem_object_get_pages is missing again before pin pages I think.

That's due to rebasing my patch where I merge the get_pages call into
pin_pages, sorry.

> If true it means we need an IGT to exercise this path. Should be
> easy with a huge object and just pwrite a small chunk?

Hmm, it should be hit by gem_pwrite/big-gtt + huge-gtt. If not, then we
do indeed more testing.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx