Chris, 

I tested this together with your v3 (Mark cache dirty...)
patch and verified tests are all passing.

Tested-by : Dongwon Kim <dongwon....@intel.com>

On Thu, May 18, 2017 at 10:46:17AM +0100, Chris Wilson wrote:
> For ease of use (i.e. avoiding a few checks and function calls), store
> the object's cache coherency next to the cache is dirty bit.
> 
> Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
> Cc: Dongwon Kim <dongwon....@intel.com>
> Cc: Matt Roper <matthew.d.ro...@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem.c                  | 14 +++++++-------
>  drivers/gpu/drm/i915/i915_gem_clflush.c          |  2 +-
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c       |  2 +-
>  drivers/gpu/drm/i915/i915_gem_internal.c         |  3 ++-
>  drivers/gpu/drm/i915/i915_gem_object.h           |  1 +
>  drivers/gpu/drm/i915/i915_gem_stolen.c           |  1 +
>  drivers/gpu/drm/i915/i915_gem_userptr.c          |  3 ++-
>  drivers/gpu/drm/i915/selftests/huge_gem_object.c |  3 ++-
>  8 files changed, 17 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 155dd52f2d18..870659c13de3 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -52,7 +52,7 @@ static bool cpu_write_needs_clflush(struct 
> drm_i915_gem_object *obj)
>       if (obj->cache_dirty)
>               return false;
>  
> -     if (!i915_gem_object_is_coherent(obj))
> +     if (!obj->cache_coherent)
>               return true;
>  
>       return obj->pin_display;
> @@ -253,7 +253,7 @@ __i915_gem_object_release_shmem(struct 
> drm_i915_gem_object *obj,
>  
>       if (needs_clflush &&
>           (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
> -         !i915_gem_object_is_coherent(obj))
> +         !obj->cache_coherent)
>               drm_clflush_sg(pages);
>  
>       __start_cpu_write(obj);
> @@ -856,8 +856,7 @@ int i915_gem_obj_prepare_shmem_read(struct 
> drm_i915_gem_object *obj,
>       if (ret)
>               return ret;
>  
> -     if (i915_gem_object_is_coherent(obj) ||
> -         !static_cpu_has(X86_FEATURE_CLFLUSH)) {
> +     if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) {
>               ret = i915_gem_object_set_to_cpu_domain(obj, false);
>               if (ret)
>                       goto err_unpin;
> @@ -909,8 +908,7 @@ int i915_gem_obj_prepare_shmem_write(struct 
> drm_i915_gem_object *obj,
>       if (ret)
>               return ret;
>  
> -     if (i915_gem_object_is_coherent(obj) ||
> -         !static_cpu_has(X86_FEATURE_CLFLUSH)) {
> +     if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) {
>               ret = i915_gem_object_set_to_cpu_domain(obj, true);
>               if (ret)
>                       goto err_unpin;
> @@ -3661,6 +3659,7 @@ int i915_gem_object_set_cache_level(struct 
> drm_i915_gem_object *obj,
>       list_for_each_entry(vma, &obj->vma_list, obj_link)
>               vma->node.color = cache_level;
>       obj->cache_level = cache_level;
> +     obj->cache_coherent = i915_gem_object_is_coherent(obj);
>       obj->cache_dirty = true; /* Always invalidate stale cachelines */
>  
>       return 0;
> @@ -4320,7 +4319,8 @@ i915_gem_object_create(struct drm_i915_private 
> *dev_priv, u64 size)
>       } else
>               obj->cache_level = I915_CACHE_NONE;
>  
> -     obj->cache_dirty = !i915_gem_object_is_coherent(obj);
> +     obj->cache_coherent = i915_gem_object_is_coherent(obj);
> +     obj->cache_dirty = !obj->cache_coherent;
>  
>       trace_i915_gem_object_create(obj);
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c 
> b/drivers/gpu/drm/i915/i915_gem_clflush.c
> index 17b207e963c2..152f16c11878 100644
> --- a/drivers/gpu/drm/i915/i915_gem_clflush.c
> +++ b/drivers/gpu/drm/i915/i915_gem_clflush.c
> @@ -139,7 +139,7 @@ void i915_gem_clflush_object(struct drm_i915_gem_object 
> *obj,
>        * snooping behaviour occurs naturally as the result of our domain
>        * tracking.
>        */
> -     if (!(flags & I915_CLFLUSH_FORCE) && i915_gem_object_is_coherent(obj))
> +     if (!(flags & I915_CLFLUSH_FORCE) && obj->cache_coherent)
>               return;
>  
>       trace_i915_gem_object_clflush(obj);
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
> b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 0b8ae0f56675..2e5f513087a8 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1129,7 +1129,7 @@ i915_gem_execbuffer_move_to_gpu(struct 
> drm_i915_gem_request *req,
>               if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
>                       continue;
>  
> -             if (obj->cache_dirty)
> +             if (obj->cache_dirty & ~obj->cache_coherent)
>                       i915_gem_clflush_object(obj, 0);
>  
>               ret = i915_gem_request_await_object
> diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c 
> b/drivers/gpu/drm/i915/i915_gem_internal.c
> index 58e93e87d573..568bf83af1f5 100644
> --- a/drivers/gpu/drm/i915/i915_gem_internal.c
> +++ b/drivers/gpu/drm/i915/i915_gem_internal.c
> @@ -191,7 +191,8 @@ i915_gem_object_create_internal(struct drm_i915_private 
> *i915,
>       obj->base.read_domains = I915_GEM_DOMAIN_CPU;
>       obj->base.write_domain = I915_GEM_DOMAIN_CPU;
>       obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
> -     obj->cache_dirty = !i915_gem_object_is_coherent(obj);
> +     obj->cache_coherent = i915_gem_object_is_coherent(obj);
> +     obj->cache_dirty = !obj->cache_coherent;
>  
>       return obj;
>  }
> diff --git a/drivers/gpu/drm/i915/i915_gem_object.h 
> b/drivers/gpu/drm/i915/i915_gem_object.h
> index 174cf923c236..dca15adc91de 100644
> --- a/drivers/gpu/drm/i915/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/i915_gem_object.h
> @@ -106,6 +106,7 @@ struct drm_i915_gem_object {
>       unsigned long gt_ro:1;
>       unsigned int cache_level:3;
>       unsigned int cache_dirty:1;
> +     unsigned int cache_coherent:1;
>  
>       atomic_t frontbuffer_bits;
>       unsigned int frontbuffer_ggtt_origin; /* write once */
> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c 
> b/drivers/gpu/drm/i915/i915_gem_stolen.c
> index f3abdc27c5dd..68af4a39973d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> @@ -592,6 +592,7 @@ _i915_gem_object_create_stolen(struct drm_i915_private 
> *dev_priv,
>       obj->stolen = stolen;
>       obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT;
>       obj->cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE;
> +     obj->cache_coherent = true; /* assumptions! more like cache_oblivious */
>  
>       if (i915_gem_object_pin_pages(obj))
>               goto cleanup;
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c 
> b/drivers/gpu/drm/i915/i915_gem_userptr.c
> index 9f84be171ad2..4ec9a04aa165 100644
> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -805,7 +805,8 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void 
> *data, struct drm_file *file
>       obj->base.read_domains = I915_GEM_DOMAIN_CPU;
>       obj->base.write_domain = I915_GEM_DOMAIN_CPU;
>       obj->cache_level = I915_CACHE_LLC;
> -     obj->cache_dirty = !i915_gem_object_is_coherent(obj);
> +     obj->cache_coherent = i915_gem_object_is_coherent(obj);
> +     obj->cache_dirty = !obj->cache_coherent;
>  
>       obj->userptr.ptr = args->user_ptr;
>       obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
> diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.c 
> b/drivers/gpu/drm/i915/selftests/huge_gem_object.c
> index 0ca867a877b6..caf76af36aba 100644
> --- a/drivers/gpu/drm/i915/selftests/huge_gem_object.c
> +++ b/drivers/gpu/drm/i915/selftests/huge_gem_object.c
> @@ -129,7 +129,8 @@ huge_gem_object(struct drm_i915_private *i915,
>       obj->base.read_domains = I915_GEM_DOMAIN_CPU;
>       obj->base.write_domain = I915_GEM_DOMAIN_CPU;
>       obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
> -     obj->cache_dirty = !i915_gem_object_is_coherent(obj);
> +     obj->cache_coherent = i915_gem_object_is_coherent(obj);
> +     obj->cache_dirty = !obj->cache_coherent;
>       obj->scratch = phys_size;
>  
>       return obj;
> -- 
> 2.11.0
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to