Re: [Intel-gfx] [PATCH 1/2] drm/i915/ttm: Reorganize the ttm move code somewhat

2021-06-30 Thread Daniel Vetter
On Wed, Jun 30, 2021 at 6:54 PM Matthew Auld
 wrote:
>
> On Wed, 30 Jun 2021 at 16:27, Thomas Hellström
>  wrote:
> >
> > On Wed, 2021-06-30 at 15:19 +0100, Matthew Auld wrote:
> > > On Thu, 24 Jun 2021 at 20:31, Thomas Hellström
> > >  wrote:
> > > >
> > > > In order to make the code a bit more readable and to facilitate
> > > > async memcpy moves, reorganize the move code a little. Determine
> > > > at an early stage whether to copy or to clear.
> > > >
> > > > Signed-off-by: Thomas Hellström 
> > > > ---
> > > >  drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 70 ++---
> > > > 
> > > >  1 file changed, 40 insertions(+), 30 deletions(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > > b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > > index c39d982c4fa6..4e529adcdfc7 100644
> > > > --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > > @@ -431,6 +431,7 @@ i915_ttm_resource_get_st(struct
> > > > drm_i915_gem_object *obj,
> > > >  }
> > > >
> > > >  static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
> > > > +  bool clear,
> > > >struct ttm_resource *dst_mem,
> > > >struct sg_table *dst_st)
> > > >  {
> > > > @@ -449,13 +450,10 @@ static int i915_ttm_accel_move(struct
> > > > ttm_buffer_object *bo,
> > > > return -EINVAL;
> > > >
> > > > dst_level = i915_ttm_cache_level(i915, dst_mem, ttm);
> > > > -   if (!ttm || !ttm_tt_is_populated(ttm)) {
> > > > +   if (clear) {
> > > > if (bo->type == ttm_bo_type_kernel)
> > > > return -EINVAL;
> > >
> > > Was that meant to be:
> > > return 0:
> > >
> > > ?
> > >
> > > Also does that mean we are incorrectly falling back to memset, for
> > > non-userspace objects, instead of making it a noop?
> >
> > No, we're deliberately falling back to memset for non-userspace
> > objects, but the logic only memsets in the BO_ALLOC_CPU_CLEAR case if
> > everything is implemented correctly.
> >
> > >
> > > >
> > > > -   if (ttm && !(ttm->page_flags &
> > > > TTM_PAGE_FLAG_ZERO_ALLOC))
> > > > -   return 0;
> > > > -
> > > > intel_engine_pm_get(i915->gt.migrate.context-
> > > > >engine);
> > > > ret = intel_context_migrate_clear(i915-
> > > > >gt.migrate.context, NULL,
> > > >   dst_st->sgl,
> > > > dst_level,
> > > > @@ -489,27 +487,53 @@ static int i915_ttm_accel_move(struct
> > > > ttm_buffer_object *bo,
> > > > return ret;
> > > >  }
> > > >
> > > > -static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
> > > > -struct ttm_operation_ctx *ctx,
> > > > -struct ttm_resource *dst_mem,
> > > > -struct ttm_place *hop)
> > > > +static void __i915_ttm_move(struct ttm_buffer_object *bo, bool
> > > > clear,
> > > > +   struct ttm_resource *dst_mem,
> > > > +   struct sg_table *dst_st)
> > > >  {
> > > > struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
> > > > -   struct ttm_resource_manager *dst_man =
> > > > -   ttm_manager_type(bo->bdev, dst_mem->mem_type);
> > > > struct intel_memory_region *dst_reg, *src_reg;
> > > > union {
> > > > struct ttm_kmap_iter_tt tt;
> > > > struct ttm_kmap_iter_iomap io;
> > > > } _dst_iter, _src_iter;
> > > > struct ttm_kmap_iter *dst_iter, *src_iter;
> > > > -   struct sg_table *dst_st;
> > > > int ret;
> > > >
> > > > dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type);
> > > > src_reg = i915_ttm_region(bo->bdev, bo->resource-
> > > > >mem_type);
> > > > GEM_BUG_ON(!dst_reg || !src_reg);
> > > >
> > > > +   ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_st);
> > > > +   if (ret) {
> > >
> > > One future consideration is flat CCS where I don't think we can
> > > easily
> > > fall back to memcpy for userspace objects. Maybe we can make this
> > > fallback conditional on DG1 or !ALLOC_USER for now, or just add a
> > > TODO?
> >
> > Ugh. Is that true for both clearing and copying, or is it only copying?
>
> With clearing I think we are required to nuke the aux CCS state using
> some special blitter command.
>
> For copying/moving I think it's a similar story, where special care
> might be needed for the aux state, which likely requires the blitter.
> Although tbh I don't really remember all the details.

There's more than just flat CCS, for dg2 we'll also support resizeable
BAR with the goal to make the non-mappable lmem available too. Afaik
there's no fallback way to access that memory without a copy engine.

I think on those platforms we simply have to go back to wedging the
driver if reset of the copy 

Re: [Intel-gfx] [PATCH 1/2] drm/i915/ttm: Reorganize the ttm move code somewhat

2021-06-30 Thread Matthew Auld
On Wed, 30 Jun 2021 at 16:27, Thomas Hellström
 wrote:
>
> On Wed, 2021-06-30 at 15:19 +0100, Matthew Auld wrote:
> > On Thu, 24 Jun 2021 at 20:31, Thomas Hellström
> >  wrote:
> > >
> > > In order to make the code a bit more readable and to facilitate
> > > async memcpy moves, reorganize the move code a little. Determine
> > > at an early stage whether to copy or to clear.
> > >
> > > Signed-off-by: Thomas Hellström 
> > > ---
> > >  drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 70 ++---
> > > 
> > >  1 file changed, 40 insertions(+), 30 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > index c39d982c4fa6..4e529adcdfc7 100644
> > > --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > @@ -431,6 +431,7 @@ i915_ttm_resource_get_st(struct
> > > drm_i915_gem_object *obj,
> > >  }
> > >
> > >  static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
> > > +  bool clear,
> > >struct ttm_resource *dst_mem,
> > >struct sg_table *dst_st)
> > >  {
> > > @@ -449,13 +450,10 @@ static int i915_ttm_accel_move(struct
> > > ttm_buffer_object *bo,
> > > return -EINVAL;
> > >
> > > dst_level = i915_ttm_cache_level(i915, dst_mem, ttm);
> > > -   if (!ttm || !ttm_tt_is_populated(ttm)) {
> > > +   if (clear) {
> > > if (bo->type == ttm_bo_type_kernel)
> > > return -EINVAL;
> >
> > Was that meant to be:
> > return 0:
> >
> > ?
> >
> > Also does that mean we are incorrectly falling back to memset, for
> > non-userspace objects, instead of making it a noop?
>
> No, we're deliberately falling back to memset for non-userspace
> objects, but the logic only memsets in the BO_ALLOC_CPU_CLEAR case if
> everything is implemented correctly.
>
> >
> > >
> > > -   if (ttm && !(ttm->page_flags &
> > > TTM_PAGE_FLAG_ZERO_ALLOC))
> > > -   return 0;
> > > -
> > > intel_engine_pm_get(i915->gt.migrate.context-
> > > >engine);
> > > ret = intel_context_migrate_clear(i915-
> > > >gt.migrate.context, NULL,
> > >   dst_st->sgl,
> > > dst_level,
> > > @@ -489,27 +487,53 @@ static int i915_ttm_accel_move(struct
> > > ttm_buffer_object *bo,
> > > return ret;
> > >  }
> > >
> > > -static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
> > > -struct ttm_operation_ctx *ctx,
> > > -struct ttm_resource *dst_mem,
> > > -struct ttm_place *hop)
> > > +static void __i915_ttm_move(struct ttm_buffer_object *bo, bool
> > > clear,
> > > +   struct ttm_resource *dst_mem,
> > > +   struct sg_table *dst_st)
> > >  {
> > > struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
> > > -   struct ttm_resource_manager *dst_man =
> > > -   ttm_manager_type(bo->bdev, dst_mem->mem_type);
> > > struct intel_memory_region *dst_reg, *src_reg;
> > > union {
> > > struct ttm_kmap_iter_tt tt;
> > > struct ttm_kmap_iter_iomap io;
> > > } _dst_iter, _src_iter;
> > > struct ttm_kmap_iter *dst_iter, *src_iter;
> > > -   struct sg_table *dst_st;
> > > int ret;
> > >
> > > dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type);
> > > src_reg = i915_ttm_region(bo->bdev, bo->resource-
> > > >mem_type);
> > > GEM_BUG_ON(!dst_reg || !src_reg);
> > >
> > > +   ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_st);
> > > +   if (ret) {
> >
> > One future consideration is flat CCS where I don't think we can
> > easily
> > fall back to memcpy for userspace objects. Maybe we can make this
> > fallback conditional on DG1 or !ALLOC_USER for now, or just add a
> > TODO?
>
> Ugh. Is that true for both clearing and copying, or is it only copying?

With clearing I think we are required to nuke the aux CCS state using
some special blitter command.

For copying/moving I think it's a similar story, where special care
might be needed for the aux state, which likely requires the blitter.
Although tbh I don't really remember all the details.

>
> Problem is if we hit an engine reset and fence error during initial
> clearing / swapin, the plan moving forward is to intercept that and
> resort to cpu clearing / copying for security reasons. In the worst
> case we at least need to be able to clear.
>
> /Thomas
>
>
> >
> > > +   dst_iter = !cpu_maps_iomem(dst_mem) ?
> > > +   ttm_kmap_iter_tt_init(&_dst_iter.tt, bo-
> > > >ttm) :
> > > +   ttm_kmap_iter_iomap_init(&_dst_iter.io,
> > > _reg->iomap,
> > > +dst_st, dst_reg-
> > 

Re: [Intel-gfx] [PATCH 1/2] drm/i915/ttm: Reorganize the ttm move code somewhat

2021-06-30 Thread Thomas Hellström
On Wed, 2021-06-30 at 15:19 +0100, Matthew Auld wrote:
> On Thu, 24 Jun 2021 at 20:31, Thomas Hellström
>  wrote:
> > 
> > In order to make the code a bit more readable and to facilitate
> > async memcpy moves, reorganize the move code a little. Determine
> > at an early stage whether to copy or to clear.
> > 
> > Signed-off-by: Thomas Hellström 
> > ---
> >  drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 70 ++---
> > 
> >  1 file changed, 40 insertions(+), 30 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > index c39d982c4fa6..4e529adcdfc7 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > @@ -431,6 +431,7 @@ i915_ttm_resource_get_st(struct
> > drm_i915_gem_object *obj,
> >  }
> > 
> >  static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
> > +  bool clear,
> >    struct ttm_resource *dst_mem,
> >    struct sg_table *dst_st)
> >  {
> > @@ -449,13 +450,10 @@ static int i915_ttm_accel_move(struct
> > ttm_buffer_object *bo,
> >     return -EINVAL;
> > 
> >     dst_level = i915_ttm_cache_level(i915, dst_mem, ttm);
> > -   if (!ttm || !ttm_tt_is_populated(ttm)) {
> > +   if (clear) {
> >     if (bo->type == ttm_bo_type_kernel)
> >     return -EINVAL;
> 
> Was that meant to be:
> return 0:
> 
> ?
> 
> Also does that mean we are incorrectly falling back to memset, for
> non-userspace objects, instead of making it a noop?

No, we're deliberately falling back to memset for non-userspace
objects, but the logic only memsets in the BO_ALLOC_CPU_CLEAR case if
everything is implemented correctly.

> 
> > 
> > -   if (ttm && !(ttm->page_flags &
> > TTM_PAGE_FLAG_ZERO_ALLOC))
> > -   return 0;
> > -
> >     intel_engine_pm_get(i915->gt.migrate.context-
> > >engine);
> >     ret = intel_context_migrate_clear(i915-
> > >gt.migrate.context, NULL,
> >   dst_st->sgl,
> > dst_level,
> > @@ -489,27 +487,53 @@ static int i915_ttm_accel_move(struct
> > ttm_buffer_object *bo,
> >     return ret;
> >  }
> > 
> > -static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
> > -    struct ttm_operation_ctx *ctx,
> > -    struct ttm_resource *dst_mem,
> > -    struct ttm_place *hop)
> > +static void __i915_ttm_move(struct ttm_buffer_object *bo, bool
> > clear,
> > +   struct ttm_resource *dst_mem,
> > +   struct sg_table *dst_st)
> >  {
> >     struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
> > -   struct ttm_resource_manager *dst_man =
> > -   ttm_manager_type(bo->bdev, dst_mem->mem_type);
> >     struct intel_memory_region *dst_reg, *src_reg;
> >     union {
> >     struct ttm_kmap_iter_tt tt;
> >     struct ttm_kmap_iter_iomap io;
> >     } _dst_iter, _src_iter;
> >     struct ttm_kmap_iter *dst_iter, *src_iter;
> > -   struct sg_table *dst_st;
> >     int ret;
> > 
> >     dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type);
> >     src_reg = i915_ttm_region(bo->bdev, bo->resource-
> > >mem_type);
> >     GEM_BUG_ON(!dst_reg || !src_reg);
> > 
> > +   ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_st);
> > +   if (ret) {
> 
> One future consideration is flat CCS where I don't think we can
> easily
> fall back to memcpy for userspace objects. Maybe we can make this
> fallback conditional on DG1 or !ALLOC_USER for now, or just add a
> TODO?

Ugh. Is that true for both clearing and copying, or is it only copying?

Problem is if we hit an engine reset and fence error during initial
clearing / swapin, the plan moving forward is to intercept that and
resort to cpu clearing / copying for security reasons. In the worst
case we at least need to be able to clear.

/Thomas


> 
> > +   dst_iter = !cpu_maps_iomem(dst_mem) ?
> > +   ttm_kmap_iter_tt_init(&_dst_iter.tt, bo-
> > >ttm) :
> > +   ttm_kmap_iter_iomap_init(&_dst_iter.io,
> > _reg->iomap,
> > +    dst_st, dst_reg-
> > >region.start);
> > +
> > +   src_iter = !cpu_maps_iomem(bo->resource) ?
> > +   ttm_kmap_iter_tt_init(&_src_iter.tt, bo-
> > >ttm) :
> > +   ttm_kmap_iter_iomap_init(&_src_iter.io,
> > _reg->iomap,
> > +    obj-
> > >ttm.cached_io_st,
> > +    src_reg-
> > >region.start);
> > +
> > +   ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter,
> > src_iter);
> > +   }
> > +}
> > +
> > +static int i915_ttm_move(struct 

Re: [Intel-gfx] [PATCH 1/2] drm/i915/ttm: Reorganize the ttm move code somewhat

2021-06-30 Thread Matthew Auld
On Thu, 24 Jun 2021 at 20:31, Thomas Hellström
 wrote:
>
> In order to make the code a bit more readable and to facilitate
> async memcpy moves, reorganize the move code a little. Determine
> at an early stage whether to copy or to clear.
>
> Signed-off-by: Thomas Hellström 
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 70 ++---
>  1 file changed, 40 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
> b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> index c39d982c4fa6..4e529adcdfc7 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> @@ -431,6 +431,7 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
>  }
>
>  static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
> +  bool clear,
>struct ttm_resource *dst_mem,
>struct sg_table *dst_st)
>  {
> @@ -449,13 +450,10 @@ static int i915_ttm_accel_move(struct ttm_buffer_object 
> *bo,
> return -EINVAL;
>
> dst_level = i915_ttm_cache_level(i915, dst_mem, ttm);
> -   if (!ttm || !ttm_tt_is_populated(ttm)) {
> +   if (clear) {
> if (bo->type == ttm_bo_type_kernel)
> return -EINVAL;

Was that meant to be:
return 0:

?

Also does that mean we are incorrectly falling back to memset, for
non-userspace objects, instead of making it a noop?

>
> -   if (ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC))
> -   return 0;
> -
> intel_engine_pm_get(i915->gt.migrate.context->engine);
> ret = intel_context_migrate_clear(i915->gt.migrate.context, 
> NULL,
>   dst_st->sgl, dst_level,
> @@ -489,27 +487,53 @@ static int i915_ttm_accel_move(struct ttm_buffer_object 
> *bo,
> return ret;
>  }
>
> -static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
> -struct ttm_operation_ctx *ctx,
> -struct ttm_resource *dst_mem,
> -struct ttm_place *hop)
> +static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
> +   struct ttm_resource *dst_mem,
> +   struct sg_table *dst_st)
>  {
> struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
> -   struct ttm_resource_manager *dst_man =
> -   ttm_manager_type(bo->bdev, dst_mem->mem_type);
> struct intel_memory_region *dst_reg, *src_reg;
> union {
> struct ttm_kmap_iter_tt tt;
> struct ttm_kmap_iter_iomap io;
> } _dst_iter, _src_iter;
> struct ttm_kmap_iter *dst_iter, *src_iter;
> -   struct sg_table *dst_st;
> int ret;
>
> dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type);
> src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type);
> GEM_BUG_ON(!dst_reg || !src_reg);
>
> +   ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_st);
> +   if (ret) {

One future consideration is flat CCS where I don't think we can easily
fall back to memcpy for userspace objects. Maybe we can make this
fallback conditional on DG1 or !ALLOC_USER for now, or just add a
TODO?

> +   dst_iter = !cpu_maps_iomem(dst_mem) ?
> +   ttm_kmap_iter_tt_init(&_dst_iter.tt, bo->ttm) :
> +   ttm_kmap_iter_iomap_init(&_dst_iter.io, 
> _reg->iomap,
> +dst_st, 
> dst_reg->region.start);
> +
> +   src_iter = !cpu_maps_iomem(bo->resource) ?
> +   ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) :
> +   ttm_kmap_iter_iomap_init(&_src_iter.io, 
> _reg->iomap,
> +obj->ttm.cached_io_st,
> +src_reg->region.start);
> +
> +   ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter);
> +   }
> +}
> +
> +static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
> +struct ttm_operation_ctx *ctx,
> +struct ttm_resource *dst_mem,
> +struct ttm_place *hop)
> +{
> +   struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
> +   struct ttm_resource_manager *dst_man =
> +   ttm_manager_type(bo->bdev, dst_mem->mem_type);
> +   struct ttm_tt *ttm = bo->ttm;
> +   struct sg_table *dst_st;
> +   bool clear;
> +   int ret;
> +
> /* Sync for now. We could do the actual copy async. */
> ret = ttm_bo_wait_ctx(bo, ctx);
> if (ret)
> @@ -526,9 +550,8 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, 
> bool evict,
> }
>
> /* Populate ttm with pages if needed. Typically system memory. */
> -   if (bo->ttm 

[Intel-gfx] [PATCH 1/2] drm/i915/ttm: Reorganize the ttm move code somewhat

2021-06-24 Thread Thomas Hellström
In order to make the code a bit more readable and to facilitate
async memcpy moves, reorganize the move code a little. Determine
at an early stage whether to copy or to clear.

Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 70 ++---
 1 file changed, 40 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index c39d982c4fa6..4e529adcdfc7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -431,6 +431,7 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
 }
 
 static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
+  bool clear,
   struct ttm_resource *dst_mem,
   struct sg_table *dst_st)
 {
@@ -449,13 +450,10 @@ static int i915_ttm_accel_move(struct ttm_buffer_object 
*bo,
return -EINVAL;
 
dst_level = i915_ttm_cache_level(i915, dst_mem, ttm);
-   if (!ttm || !ttm_tt_is_populated(ttm)) {
+   if (clear) {
if (bo->type == ttm_bo_type_kernel)
return -EINVAL;
 
-   if (ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC))
-   return 0;
-
intel_engine_pm_get(i915->gt.migrate.context->engine);
ret = intel_context_migrate_clear(i915->gt.migrate.context, 
NULL,
  dst_st->sgl, dst_level,
@@ -489,27 +487,53 @@ static int i915_ttm_accel_move(struct ttm_buffer_object 
*bo,
return ret;
 }
 
-static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
-struct ttm_operation_ctx *ctx,
-struct ttm_resource *dst_mem,
-struct ttm_place *hop)
+static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
+   struct ttm_resource *dst_mem,
+   struct sg_table *dst_st)
 {
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
-   struct ttm_resource_manager *dst_man =
-   ttm_manager_type(bo->bdev, dst_mem->mem_type);
struct intel_memory_region *dst_reg, *src_reg;
union {
struct ttm_kmap_iter_tt tt;
struct ttm_kmap_iter_iomap io;
} _dst_iter, _src_iter;
struct ttm_kmap_iter *dst_iter, *src_iter;
-   struct sg_table *dst_st;
int ret;
 
dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type);
src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type);
GEM_BUG_ON(!dst_reg || !src_reg);
 
+   ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_st);
+   if (ret) {
+   dst_iter = !cpu_maps_iomem(dst_mem) ?
+   ttm_kmap_iter_tt_init(&_dst_iter.tt, bo->ttm) :
+   ttm_kmap_iter_iomap_init(&_dst_iter.io, _reg->iomap,
+dst_st, dst_reg->region.start);
+
+   src_iter = !cpu_maps_iomem(bo->resource) ?
+   ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) :
+   ttm_kmap_iter_iomap_init(&_src_iter.io, _reg->iomap,
+obj->ttm.cached_io_st,
+src_reg->region.start);
+
+   ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter);
+   }
+}
+
+static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
+struct ttm_operation_ctx *ctx,
+struct ttm_resource *dst_mem,
+struct ttm_place *hop)
+{
+   struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+   struct ttm_resource_manager *dst_man =
+   ttm_manager_type(bo->bdev, dst_mem->mem_type);
+   struct ttm_tt *ttm = bo->ttm;
+   struct sg_table *dst_st;
+   bool clear;
+   int ret;
+
/* Sync for now. We could do the actual copy async. */
ret = ttm_bo_wait_ctx(bo, ctx);
if (ret)
@@ -526,9 +550,8 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool 
evict,
}
 
/* Populate ttm with pages if needed. Typically system memory. */
-   if (bo->ttm && (dst_man->use_tt ||
-   (bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED))) {
-   ret = ttm_tt_populate(bo->bdev, bo->ttm, ctx);
+   if (ttm && (dst_man->use_tt || (ttm->page_flags & 
TTM_PAGE_FLAG_SWAPPED))) {
+   ret = ttm_tt_populate(bo->bdev, ttm, ctx);
if (ret)
return ret;
}
@@ -537,23 +560,10 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, 
bool evict,
if (IS_ERR(dst_st))
return PTR_ERR(dst_st);
 
-   ret = i915_ttm_accel_move(bo, dst_mem, dst_st);
-   if (ret) {
-   /* If