[Intel-gfx] [PATCH v15 0/1] drm/i915: Allow user to set cache at BO creation
From: Fei Yang This series introduce a new extension for GEM_CREATE, 1. end support for set caching ioctl [PATCH 1/2] 2. add set_pat extension for gem_create [PATCH 2/2] v2: drop one patch that was merged separately commit 341ad0e8e254 ("drm/i915/mtl: Add PTE encode function") v3: rebased on https://patchwork.freedesktop.org/series/117082/ v4: fix missing unlock introduced in v3, and solve a rebase conflict v5: replace obj->cache_level with pat_set_by_user, fix i915_cache_level_str() for legacy platforms. v6: rebased on https://patchwork.freedesktop.org/series/117480/ v7: rebased on https://patchwork.freedesktop.org/series/117528/ v8: dropped the two dependent patches that has been merged separately. Add IGT link and Tested-by (MESA). v9: addressing comments (Andi) v10: acked-by and tested-by MESA v11: drop "end support for set caching ioctl" (merged) remove tools/include/uapi/drm/i915_drm.h v12: drop Bspec reference in comment. add to commit message instead v13: sent to test with igt@gem_create@create-ext-set-pat v14: sent to test with igt@gem_create@create-ext-set-pat v15: update commit message with documentation note and t-b/a-b from Media driver folks. Fei Yang (1): drm/i915: Allow user to set cache at BO creation drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 +++ drivers/gpu/drm/i915/gem/i915_gem_object.c | 6 include/uapi/drm/i915_drm.h| 41 ++ 3 files changed, 83 insertions(+) -- 2.25.1
[Intel-gfx] [PATCH v15 1/1] drm/i915: Allow user to set cache at BO creation
From: Fei Yang To comply with the design that buffer objects shall have immutable cache setting through out their life cycle, {set, get}_caching ioctl's are no longer supported from MTL onward. With that change caching policy can only be set at object creation time. The current code applies a default (platform dependent) cache setting for all objects. However this is not optimal for performance tuning. The patch extends the existing gem_create uAPI to let user set PAT index for the object at creation time. The new extension is platform independent, so UMD's can switch to using this extension for older platforms as well, while {set, get}_caching are still supported on these legacy paltforms for compatibility reason. Note: The detailed description of PAT index is missing in current PRM even for older hardware and will be added by the next PRM update under chapter name "Memory Views". BSpec: 45101 Mesa support has been submitted in this merge request: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22878 The media driver is supported by the following commits: https://github.com/intel/media-driver/commit/92c00a857433ebb34ec575e9834f473c6fcb6341 https://github.com/intel/media-driver/commit/fd375cf2c5e1f6bf6b43258ff797b3134aadc9fd https://github.com/intel/media-driver/commit/08dd244b22484770a33464c2c8ae85430e548000 The IGT test related to this change is igt@gem_create@create-ext-set-pat Signed-off-by: Fei Yang Cc: Chris Wilson Cc: Matt Roper Cc: Andi Shyti Reviewed-by: Andi Shyti Acked-by: Jordan Justen Tested-by: Jordan Justen Acked-by: Carl Zhang Tested-by: Lihao Gu --- drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 +++ drivers/gpu/drm/i915/gem/i915_gem_object.c | 6 include/uapi/drm/i915_drm.h| 41 ++ 3 files changed, 83 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index bfe1dbda4cb7..644a936248ad 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -245,6 +245,7 @@ struct create_ext { unsigned int n_placements; unsigned int placement_mask; unsigned long flags; + unsigned int pat_index; }; static void repr_placements(char *buf, size_t size, @@ -394,11 +395,39 @@ static int ext_set_protected(struct i915_user_extension __user *base, void *data return 0; } +static int ext_set_pat(struct i915_user_extension __user *base, void *data) +{ + struct create_ext *ext_data = data; + struct drm_i915_private *i915 = ext_data->i915; + struct drm_i915_gem_create_ext_set_pat ext; + unsigned int max_pat_index; + + BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) != +offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd)); + + if (copy_from_user(&ext, base, sizeof(ext))) + return -EFAULT; + + max_pat_index = INTEL_INFO(i915)->max_pat_index; + + if (ext.pat_index > max_pat_index) { + drm_dbg(&i915->drm, "PAT index is invalid: %u\n", + ext.pat_index); + return -EINVAL; + } + + ext_data->pat_index = ext.pat_index; + + return 0; +} + static const i915_user_extension_fn create_extensions[] = { [I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements, [I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected, + [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat, }; +#define PAT_INDEX_NOT_SET 0x /** * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to it. * @dev: drm device pointer @@ -418,6 +447,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) return -EINVAL; + ext_data.pat_index = PAT_INDEX_NOT_SET; ret = i915_user_extensions(u64_to_user_ptr(args->extensions), create_extensions, ARRAY_SIZE(create_extensions), @@ -454,5 +484,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, if (IS_ERR(obj)) return PTR_ERR(obj); + if (ext_data.pat_index != PAT_INDEX_NOT_SET) { + i915_gem_object_set_pat_index(obj, ext_data.pat_index); + /* Mark pat_index is set by UMD */ + obj->pat_set_by_user = true; + } + return i915_gem_publish(obj, file, &args->size, &args->handle); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 46a19b099ec8..97ac6fb37958 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -208,6 +208,12 @@ bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *o
[Intel-gfx] [PATCH v12 1/1] drm/i915: Allow user to set cache at BO creation
From: Fei Yang To comply with the design that buffer objects shall have immutable cache setting through out their life cycle, {set, get}_caching ioctl's are no longer supported from MTL onward. With that change caching policy can only be set at object creation time. The current code applies a default (platform dependent) cache setting for all objects. However this is not optimal for performance tuning. The patch extends the existing gem_create uAPI to let user set PAT index for the object at creation time. The new extension is platform independent, so UMD's can switch to using this extension for older platforms as well, while {set, get}_caching are still supported on these legacy paltforms for compatibility reason. BSpec: 45101 Test igt@gem_create@create_ext_set_pat posted at https://patchwork.freedesktop.org/series/118314/ Tested with https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22878 Signed-off-by: Fei Yang Cc: Chris Wilson Cc: Matt Roper Cc: Andi Shyti Reviewed-by: Andi Shyti Acked-by: Jordan Justen Tested-by: Jordan Justen --- drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 +++ drivers/gpu/drm/i915/gem/i915_gem_object.c | 6 include/uapi/drm/i915_drm.h| 41 ++ 3 files changed, 83 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index bfe1dbda4cb7..644a936248ad 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -245,6 +245,7 @@ struct create_ext { unsigned int n_placements; unsigned int placement_mask; unsigned long flags; + unsigned int pat_index; }; static void repr_placements(char *buf, size_t size, @@ -394,11 +395,39 @@ static int ext_set_protected(struct i915_user_extension __user *base, void *data return 0; } +static int ext_set_pat(struct i915_user_extension __user *base, void *data) +{ + struct create_ext *ext_data = data; + struct drm_i915_private *i915 = ext_data->i915; + struct drm_i915_gem_create_ext_set_pat ext; + unsigned int max_pat_index; + + BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) != +offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd)); + + if (copy_from_user(&ext, base, sizeof(ext))) + return -EFAULT; + + max_pat_index = INTEL_INFO(i915)->max_pat_index; + + if (ext.pat_index > max_pat_index) { + drm_dbg(&i915->drm, "PAT index is invalid: %u\n", + ext.pat_index); + return -EINVAL; + } + + ext_data->pat_index = ext.pat_index; + + return 0; +} + static const i915_user_extension_fn create_extensions[] = { [I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements, [I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected, + [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat, }; +#define PAT_INDEX_NOT_SET 0x /** * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to it. * @dev: drm device pointer @@ -418,6 +447,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) return -EINVAL; + ext_data.pat_index = PAT_INDEX_NOT_SET; ret = i915_user_extensions(u64_to_user_ptr(args->extensions), create_extensions, ARRAY_SIZE(create_extensions), @@ -454,5 +484,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, if (IS_ERR(obj)) return PTR_ERR(obj); + if (ext_data.pat_index != PAT_INDEX_NOT_SET) { + i915_gem_object_set_pat_index(obj, ext_data.pat_index); + /* Mark pat_index is set by UMD */ + obj->pat_set_by_user = true; + } + return i915_gem_publish(obj, file, &args->size, &args->handle); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 46a19b099ec8..97ac6fb37958 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -208,6 +208,12 @@ bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj) if (!(obj->flags & I915_BO_ALLOC_USER)) return false; + /* +* Always flush cache for UMD objects at creation time. +*/ + if (obj->pat_set_by_user) + return true; + /* * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it * possible for userspace to bypass the GTT caching bits set by the diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index f31dfacde601..4083a23e0614 100644 --- a/include/uapi/drm/i915_drm
[Intel-gfx] [PATCH v12 0/1] drm/i915: Allow user to set cache at BO creation
From: Fei Yang This series introduce a new extension for GEM_CREATE, 1. end support for set caching ioctl [PATCH 1/2] 2. add set_pat extension for gem_create [PATCH 2/2] v2: drop one patch that was merged separately commit 341ad0e8e254 ("drm/i915/mtl: Add PTE encode function") v3: rebased on https://patchwork.freedesktop.org/series/117082/ v4: fix missing unlock introduced in v3, and solve a rebase conflict v5: replace obj->cache_level with pat_set_by_user, fix i915_cache_level_str() for legacy platforms. v6: rebased on https://patchwork.freedesktop.org/series/117480/ v7: rebased on https://patchwork.freedesktop.org/series/117528/ v8: dropped the two dependent patches that has been merged separately. Add IGT link and Tested-by (MESA). v9: addressing comments (Andi) v10: acked-by and tested-by MESA v11: drop "end support for set caching ioctl" (merged) remove tools/include/uapi/drm/i915_drm.h v12: drop Bspec reference in comment. add to commit message instead Fei Yang (1): drm/i915: Allow user to set cache at BO creation drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 +++ drivers/gpu/drm/i915/gem/i915_gem_object.c | 6 include/uapi/drm/i915_drm.h| 41 ++ 3 files changed, 83 insertions(+) -- 2.25.1
[Intel-gfx] [PATCH v11 1/1] drm/i915: Allow user to set cache at BO creation
From: Fei Yang To comply with the design that buffer objects shall have immutable cache setting through out their life cycle, {set, get}_caching ioctl's are no longer supported from MTL onward. With that change caching policy can only be set at object creation time. The current code applies a default (platform dependent) cache setting for all objects. However this is not optimal for performance tuning. The patch extends the existing gem_create uAPI to let user set PAT index for the object at creation time. The new extension is platform independent, so UMD's can switch to using this extension for older platforms as well, while {set, get}_caching are still supported on these legacy paltforms for compatibility reason. Test igt@gem_create@create_ext_set_pat posted at https://patchwork.freedesktop.org/series/118314/ Tested with https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22878 Signed-off-by: Fei Yang Cc: Chris Wilson Cc: Matt Roper Cc: Andi Shyti Reviewed-by: Andi Shyti Acked-by: Jordan Justen Tested-by: Jordan Justen --- drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 +++ drivers/gpu/drm/i915/gem/i915_gem_object.c | 6 include/uapi/drm/i915_drm.h| 42 ++ 3 files changed, 84 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index bfe1dbda4cb7..644a936248ad 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -245,6 +245,7 @@ struct create_ext { unsigned int n_placements; unsigned int placement_mask; unsigned long flags; + unsigned int pat_index; }; static void repr_placements(char *buf, size_t size, @@ -394,11 +395,39 @@ static int ext_set_protected(struct i915_user_extension __user *base, void *data return 0; } +static int ext_set_pat(struct i915_user_extension __user *base, void *data) +{ + struct create_ext *ext_data = data; + struct drm_i915_private *i915 = ext_data->i915; + struct drm_i915_gem_create_ext_set_pat ext; + unsigned int max_pat_index; + + BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) != +offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd)); + + if (copy_from_user(&ext, base, sizeof(ext))) + return -EFAULT; + + max_pat_index = INTEL_INFO(i915)->max_pat_index; + + if (ext.pat_index > max_pat_index) { + drm_dbg(&i915->drm, "PAT index is invalid: %u\n", + ext.pat_index); + return -EINVAL; + } + + ext_data->pat_index = ext.pat_index; + + return 0; +} + static const i915_user_extension_fn create_extensions[] = { [I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements, [I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected, + [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat, }; +#define PAT_INDEX_NOT_SET 0x /** * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to it. * @dev: drm device pointer @@ -418,6 +447,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) return -EINVAL; + ext_data.pat_index = PAT_INDEX_NOT_SET; ret = i915_user_extensions(u64_to_user_ptr(args->extensions), create_extensions, ARRAY_SIZE(create_extensions), @@ -454,5 +484,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, if (IS_ERR(obj)) return PTR_ERR(obj); + if (ext_data.pat_index != PAT_INDEX_NOT_SET) { + i915_gem_object_set_pat_index(obj, ext_data.pat_index); + /* Mark pat_index is set by UMD */ + obj->pat_set_by_user = true; + } + return i915_gem_publish(obj, file, &args->size, &args->handle); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 46a19b099ec8..97ac6fb37958 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -208,6 +208,12 @@ bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj) if (!(obj->flags & I915_BO_ALLOC_USER)) return false; + /* +* Always flush cache for UMD objects at creation time. +*/ + if (obj->pat_set_by_user) + return true; + /* * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it * possible for userspace to bypass the GTT caching bits set by the diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index f31dfacde601..75426fcb4b2e 100644 --- a/include/uapi/drm/i915_drm
[Intel-gfx] [PATCH v11 0/1] drm/i915: Allow user to set cache at BO creation
From: Fei Yang This series introduce a new extension for GEM_CREATE, 1. end support for set caching ioctl [PATCH 1/2] 2. add set_pat extension for gem_create [PATCH 2/2] v2: drop one patch that was merged separately commit 341ad0e8e254 ("drm/i915/mtl: Add PTE encode function") v3: rebased on https://patchwork.freedesktop.org/series/117082/ v4: fix missing unlock introduced in v3, and solve a rebase conflict v5: replace obj->cache_level with pat_set_by_user, fix i915_cache_level_str() for legacy platforms. v6: rebased on https://patchwork.freedesktop.org/series/117480/ v7: rebased on https://patchwork.freedesktop.org/series/117528/ v8: dropped the two dependent patches that has been merged separately. Add IGT link and Tested-by (MESA). v9: addressing comments (Andi) v10: acked-by and tested-by MESA v11: drop "end support for set caching ioctl" (merged) remove tools/include/uapi/drm/i915_drm.h Fei Yang (1): drm/i915: Allow user to set cache at BO creation drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 +++ drivers/gpu/drm/i915/gem/i915_gem_object.c | 6 include/uapi/drm/i915_drm.h| 42 ++ 3 files changed, 84 insertions(+) -- 2.25.1
[Intel-gfx] [PATCH v10 2/2] drm/i915: Allow user to set cache at BO creation
From: Fei Yang To comply with the design that buffer objects shall have immutable cache setting through out their life cycle, {set, get}_caching ioctl's are no longer supported from MTL onward. With that change caching policy can only be set at object creation time. The current code applies a default (platform dependent) cache setting for all objects. However this is not optimal for performance tuning. The patch extends the existing gem_create uAPI to let user set PAT index for the object at creation time. The new extension is platform independent, so UMD's can switch to using this extension for older platforms as well, while {set, get}_caching are still supported on these legacy paltforms for compatibility reason. Test igt@gem_create@create_ext_set_pat posted at https://patchwork.freedesktop.org/series/117695/ Tested with https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22878 Signed-off-by: Fei Yang Cc: Chris Wilson Cc: Matt Roper Cc: Andi Shyti Reviewed-by: Andi Shyti Acked-by: Jordan Justen Tested-by: Jordan Justen --- drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 +++ drivers/gpu/drm/i915/gem/i915_gem_object.c | 6 include/uapi/drm/i915_drm.h| 42 ++ tools/include/uapi/drm/i915_drm.h | 42 ++ 4 files changed, 126 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index bfe1dbda4cb7..644a936248ad 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -245,6 +245,7 @@ struct create_ext { unsigned int n_placements; unsigned int placement_mask; unsigned long flags; + unsigned int pat_index; }; static void repr_placements(char *buf, size_t size, @@ -394,11 +395,39 @@ static int ext_set_protected(struct i915_user_extension __user *base, void *data return 0; } +static int ext_set_pat(struct i915_user_extension __user *base, void *data) +{ + struct create_ext *ext_data = data; + struct drm_i915_private *i915 = ext_data->i915; + struct drm_i915_gem_create_ext_set_pat ext; + unsigned int max_pat_index; + + BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) != +offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd)); + + if (copy_from_user(&ext, base, sizeof(ext))) + return -EFAULT; + + max_pat_index = INTEL_INFO(i915)->max_pat_index; + + if (ext.pat_index > max_pat_index) { + drm_dbg(&i915->drm, "PAT index is invalid: %u\n", + ext.pat_index); + return -EINVAL; + } + + ext_data->pat_index = ext.pat_index; + + return 0; +} + static const i915_user_extension_fn create_extensions[] = { [I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements, [I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected, + [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat, }; +#define PAT_INDEX_NOT_SET 0x /** * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to it. * @dev: drm device pointer @@ -418,6 +447,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) return -EINVAL; + ext_data.pat_index = PAT_INDEX_NOT_SET; ret = i915_user_extensions(u64_to_user_ptr(args->extensions), create_extensions, ARRAY_SIZE(create_extensions), @@ -454,5 +484,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, if (IS_ERR(obj)) return PTR_ERR(obj); + if (ext_data.pat_index != PAT_INDEX_NOT_SET) { + i915_gem_object_set_pat_index(obj, ext_data.pat_index); + /* Mark pat_index is set by UMD */ + obj->pat_set_by_user = true; + } + return i915_gem_publish(obj, file, &args->size, &args->handle); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 46a19b099ec8..97ac6fb37958 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -208,6 +208,12 @@ bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj) if (!(obj->flags & I915_BO_ALLOC_USER)) return false; + /* +* Always flush cache for UMD objects at creation time. +*/ + if (obj->pat_set_by_user) + return true; + /* * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it * possible for userspace to bypass the GTT caching bits set by the diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index
[Intel-gfx] [PATCH v10 0/2] drm/i915: Allow user to set cache at BO creation
From: Fei Yang This series introduce a new extension for GEM_CREATE, 1. end support for set caching ioctl [PATCH 1/2] 2. add set_pat extension for gem_create [PATCH 2/2] v2: drop one patch that was merged separately commit 341ad0e8e254 ("drm/i915/mtl: Add PTE encode function") v3: rebased on https://patchwork.freedesktop.org/series/117082/ v4: fix missing unlock introduced in v3, and solve a rebase conflict v5: replace obj->cache_level with pat_set_by_user, fix i915_cache_level_str() for legacy platforms. v6: rebased on https://patchwork.freedesktop.org/series/117480/ v7: rebased on https://patchwork.freedesktop.org/series/117528/ v8: dropped the two dependent patches that has been merged separately. Add IGT link and Tested-by (MESA). v9: addressing comments (Andi) v10: acked-by and tested-by MESA Fei Yang (2): drm/i915/mtl: end support for set caching ioctl drm/i915: Allow user to set cache at BO creation drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 +++ drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 ++ drivers/gpu/drm/i915/gem/i915_gem_object.c | 6 drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 9 - include/uapi/drm/i915_drm.h| 42 ++ tools/include/uapi/drm/i915_drm.h | 42 ++ 6 files changed, 137 insertions(+), 1 deletion(-) -- 2.25.1
[Intel-gfx] [PATCH v10 1/2] drm/i915/mtl: end support for set caching ioctl
From: Fei Yang The design is to keep Buffer Object's caching policy immutable through out its life cycle. This patch ends the support for set caching ioctl from MTL onward. While doing that we also set BO's to be 1-way coherent at creation time because GPU is no longer automatically snooping CPU cache. For userspace components needing to fine tune the caching policy for BO's, a follow up patch will extend the GEM_CREATE uAPI to allow them specify caching mode at BO creation time. Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda --- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +++ drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 9 - 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 05107a6efe45..dfaaa8b66ac3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -350,6 +350,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, if (IS_DGFX(i915)) return -ENODEV; + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + return -EOPNOTSUPP; + switch (args->caching) { case I915_CACHING_NONE: level = I915_CACHE_NONE; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 37d1efcd3ca6..cad4a6017f4b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -601,7 +601,14 @@ static int shmem_object_init(struct intel_memory_region *mem, obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; - if (HAS_LLC(i915)) + /* +* MTL doesn't snoop CPU cache by default for GPU access (namely +* 1-way coherency). However some UMD's are currently depending on +* that. Make 1-way coherent the default setting for MTL. A follow +* up patch will extend the GEM_CREATE uAPI to allow UMD's specify +* caching mode at BO creation time +*/ + if (HAS_LLC(i915) || (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))) /* On some devices, we can have the GPU use the LLC (the CPU * cache) for about a 10% performance improvement * compared to uncached. Graphics requests other than -- 2.25.1
[Intel-gfx] [PATCH v9 1/2] drm/i915/mtl: end support for set caching ioctl
From: Fei Yang The design is to keep Buffer Object's caching policy immutable through out its life cycle. This patch ends the support for set caching ioctl from MTL onward. While doing that we also set BO's to be 1-way coherent at creation time because GPU is no longer automatically snooping CPU cache. For userspace components needing to fine tune the caching policy for BO's, a follow up patch will extend the GEM_CREATE uAPI to allow them specify caching mode at BO creation time. Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda --- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +++ drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 9 - 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 05107a6efe45..dfaaa8b66ac3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -350,6 +350,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, if (IS_DGFX(i915)) return -ENODEV; + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + return -EOPNOTSUPP; + switch (args->caching) { case I915_CACHING_NONE: level = I915_CACHE_NONE; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 37d1efcd3ca6..cad4a6017f4b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -601,7 +601,14 @@ static int shmem_object_init(struct intel_memory_region *mem, obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; - if (HAS_LLC(i915)) + /* +* MTL doesn't snoop CPU cache by default for GPU access (namely +* 1-way coherency). However some UMD's are currently depending on +* that. Make 1-way coherent the default setting for MTL. A follow +* up patch will extend the GEM_CREATE uAPI to allow UMD's specify +* caching mode at BO creation time +*/ + if (HAS_LLC(i915) || (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))) /* On some devices, we can have the GPU use the LLC (the CPU * cache) for about a 10% performance improvement * compared to uncached. Graphics requests other than -- 2.25.1
[Intel-gfx] [PATCH v9 2/2] drm/i915: Allow user to set cache at BO creation
From: Fei Yang To comply with the design that buffer objects shall have immutable cache setting through out their life cycle, {set, get}_caching ioctl's are no longer supported from MTL onward. With that change caching policy can only be set at object creation time. The current code applies a default (platform dependent) cache setting for all objects. However this is not optimal for performance tuning. The patch extends the existing gem_create uAPI to let user set PAT index for the object at creation time. The new extension is platform independent, so UMD's can switch to using this extension for older platforms as well, while {set, get}_caching are still supported on these legacy paltforms for compatibility reason. Test igt@gem_create@create_ext_set_pat posted at https://patchwork.freedesktop.org/series/117695/ Tested with https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22878 Signed-off-by: Fei Yang Cc: Chris Wilson Cc: Matt Roper Cc: Andi Shyti Reviewed-by: Andi Shyti Tested-by: Jordan Justen --- drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 +++ drivers/gpu/drm/i915/gem/i915_gem_object.c | 6 include/uapi/drm/i915_drm.h| 42 ++ tools/include/uapi/drm/i915_drm.h | 42 ++ 4 files changed, 126 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index bfe1dbda4cb7..644a936248ad 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -245,6 +245,7 @@ struct create_ext { unsigned int n_placements; unsigned int placement_mask; unsigned long flags; + unsigned int pat_index; }; static void repr_placements(char *buf, size_t size, @@ -394,11 +395,39 @@ static int ext_set_protected(struct i915_user_extension __user *base, void *data return 0; } +static int ext_set_pat(struct i915_user_extension __user *base, void *data) +{ + struct create_ext *ext_data = data; + struct drm_i915_private *i915 = ext_data->i915; + struct drm_i915_gem_create_ext_set_pat ext; + unsigned int max_pat_index; + + BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) != +offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd)); + + if (copy_from_user(&ext, base, sizeof(ext))) + return -EFAULT; + + max_pat_index = INTEL_INFO(i915)->max_pat_index; + + if (ext.pat_index > max_pat_index) { + drm_dbg(&i915->drm, "PAT index is invalid: %u\n", + ext.pat_index); + return -EINVAL; + } + + ext_data->pat_index = ext.pat_index; + + return 0; +} + static const i915_user_extension_fn create_extensions[] = { [I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements, [I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected, + [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat, }; +#define PAT_INDEX_NOT_SET 0x /** * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to it. * @dev: drm device pointer @@ -418,6 +447,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) return -EINVAL; + ext_data.pat_index = PAT_INDEX_NOT_SET; ret = i915_user_extensions(u64_to_user_ptr(args->extensions), create_extensions, ARRAY_SIZE(create_extensions), @@ -454,5 +484,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, if (IS_ERR(obj)) return PTR_ERR(obj); + if (ext_data.pat_index != PAT_INDEX_NOT_SET) { + i915_gem_object_set_pat_index(obj, ext_data.pat_index); + /* Mark pat_index is set by UMD */ + obj->pat_set_by_user = true; + } + return i915_gem_publish(obj, file, &args->size, &args->handle); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 46a19b099ec8..97ac6fb37958 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -208,6 +208,12 @@ bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj) if (!(obj->flags & I915_BO_ALLOC_USER)) return false; + /* +* Always flush cache for UMD objects at creation time. +*/ + if (obj->pat_set_by_user) + return true; + /* * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it * possible for userspace to bypass the GTT caching bits set by the diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index ba40855dbc93..4f3fd650e5e1 1006
[Intel-gfx] [PATCH v9 0/2] drm/i915: Allow user to set cache at BO creation
From: Fei Yang This series introduce a new extension for GEM_CREATE, 1. end support for set caching ioctl [PATCH 4/5] 2. add set_pat extension for gem_create [PATCH 5/5] v2: drop one patch that was merged separately commit 341ad0e8e254 ("drm/i915/mtl: Add PTE encode function") v3: rebased on https://patchwork.freedesktop.org/series/117082/ v4: fix missing unlock introduced in v3, and solve a rebase conflict v5: replace obj->cache_level with pat_set_by_user, fix i915_cache_level_str() for legacy platforms. v6: rebased on https://patchwork.freedesktop.org/series/117480/ v7: rebased on https://patchwork.freedesktop.org/series/117528/ v8: dropped the two dependent patches that has been merged separately. Add IGT link and Tested-by (MESA). v9: addressing comments (Andi) Fei Yang (2): drm/i915/mtl: end support for set caching ioctl drm/i915: Allow user to set cache at BO creation drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 +++ drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 ++ drivers/gpu/drm/i915/gem/i915_gem_object.c | 6 drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 9 - include/uapi/drm/i915_drm.h| 42 ++ tools/include/uapi/drm/i915_drm.h | 42 ++ 6 files changed, 137 insertions(+), 1 deletion(-) -- 2.25.1
[Intel-gfx] [PATCH v8 1/2] drm/i915/mtl: end support for set caching ioctl
From: Fei Yang The design is to keep Buffer Object's caching policy immutable through out its life cycle. This patch ends the support for set caching ioctl from MTL onward. While doing that we also set BO's to be 1-way coherent at creation time because GPU is no longer automatically snooping CPU cache. For userspace components needing to fine tune the caching policy for BO's, a follow up patch will extend the GEM_CREATE uAPI to allow them specify caching mode at BO creation time. Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda --- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +++ drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 9 - 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 05107a6efe45..dfaaa8b66ac3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -350,6 +350,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, if (IS_DGFX(i915)) return -ENODEV; + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + return -EOPNOTSUPP; + switch (args->caching) { case I915_CACHING_NONE: level = I915_CACHE_NONE; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 37d1efcd3ca6..cad4a6017f4b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -601,7 +601,14 @@ static int shmem_object_init(struct intel_memory_region *mem, obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; - if (HAS_LLC(i915)) + /* +* MTL doesn't snoop CPU cache by default for GPU access (namely +* 1-way coherency). However some UMD's are currently depending on +* that. Make 1-way coherent the default setting for MTL. A follow +* up patch will extend the GEM_CREATE uAPI to allow UMD's specify +* caching mode at BO creation time +*/ + if (HAS_LLC(i915) || (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))) /* On some devices, we can have the GPU use the LLC (the CPU * cache) for about a 10% performance improvement * compared to uncached. Graphics requests other than -- 2.25.1
[Intel-gfx] [PATCH v8 2/2] drm/i915: Allow user to set cache at BO creation
From: Fei Yang To comply with the design that buffer objects shall have immutable cache setting through out their life cycle, {set, get}_caching ioctl's are no longer supported from MTL onward. With that change caching policy can only be set at object creation time. The current code applies a default (platform dependent) cache setting for all objects. However this is not optimal for performance tuning. The patch extends the existing gem_create uAPI to let user set PAT index for the object at creation time. The new extension is platform independent, so UMD's can switch to using this extension for older platforms as well, while {set, get}_caching are still supported on these legacy paltforms for compatibility reason. IGT posted at https://patchwork.freedesktop.org/series/117695/ Tested with https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22878 Tested-by: Jordan Justen Cc: Chris Wilson Cc: Matt Roper Cc: Andi Shyti Signed-off-by: Fei Yang Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 ++ drivers/gpu/drm/i915/gem/i915_gem_object.c | 6 include/uapi/drm/i915_drm.h| 36 ++ tools/include/uapi/drm/i915_drm.h | 36 ++ 4 files changed, 114 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index bfe1dbda4cb7..644a936248ad 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -245,6 +245,7 @@ struct create_ext { unsigned int n_placements; unsigned int placement_mask; unsigned long flags; + unsigned int pat_index; }; static void repr_placements(char *buf, size_t size, @@ -394,11 +395,39 @@ static int ext_set_protected(struct i915_user_extension __user *base, void *data return 0; } +static int ext_set_pat(struct i915_user_extension __user *base, void *data) +{ + struct create_ext *ext_data = data; + struct drm_i915_private *i915 = ext_data->i915; + struct drm_i915_gem_create_ext_set_pat ext; + unsigned int max_pat_index; + + BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) != +offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd)); + + if (copy_from_user(&ext, base, sizeof(ext))) + return -EFAULT; + + max_pat_index = INTEL_INFO(i915)->max_pat_index; + + if (ext.pat_index > max_pat_index) { + drm_dbg(&i915->drm, "PAT index is invalid: %u\n", + ext.pat_index); + return -EINVAL; + } + + ext_data->pat_index = ext.pat_index; + + return 0; +} + static const i915_user_extension_fn create_extensions[] = { [I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements, [I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected, + [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat, }; +#define PAT_INDEX_NOT_SET 0x /** * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to it. * @dev: drm device pointer @@ -418,6 +447,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) return -EINVAL; + ext_data.pat_index = PAT_INDEX_NOT_SET; ret = i915_user_extensions(u64_to_user_ptr(args->extensions), create_extensions, ARRAY_SIZE(create_extensions), @@ -454,5 +484,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, if (IS_ERR(obj)) return PTR_ERR(obj); + if (ext_data.pat_index != PAT_INDEX_NOT_SET) { + i915_gem_object_set_pat_index(obj, ext_data.pat_index); + /* Mark pat_index is set by UMD */ + obj->pat_set_by_user = true; + } + return i915_gem_publish(obj, file, &args->size, &args->handle); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 46a19b099ec8..97ac6fb37958 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -208,6 +208,12 @@ bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj) if (!(obj->flags & I915_BO_ALLOC_USER)) return false; + /* +* Always flush cache for UMD objects at creation time. +*/ + if (obj->pat_set_by_user) + return true; + /* * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it * possible for userspace to bypass the GTT caching bits set by the diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index ba40855dbc93..7f5597920257 100644 --- a/include/uapi/drm/i915_drm
[Intel-gfx] [PATCH v8 0/2] drm/i915: Allow user to set cache at BO creation
From: Fei Yang The first three patches in this series are taken from https://patchwork.freedesktop.org/series/116868/ These patches are included here because the last patch has dependency on the pat_index refactor. This series is focusing on uAPI changes, 1. end support for set caching ioctl [PATCH 4/5] 2. add set_pat extension for gem_create [PATCH 5/5] v2: drop one patch that was merged separately commit 341ad0e8e254 ("drm/i915/mtl: Add PTE encode function") v3: rebased on https://patchwork.freedesktop.org/series/117082/ v4: fix missing unlock introduced in v3, and solve a rebase conflict v5: replace obj->cache_level with pat_set_by_user, fix i915_cache_level_str() for legacy platforms. v6: rebased on https://patchwork.freedesktop.org/series/117480/ v7: rebased on https://patchwork.freedesktop.org/series/117528/ v8: dropped the two dependent patches that has been merged separately. Add IGT link and Tested-by (MESA). Fei Yang (2): drm/i915/mtl: end support for set caching ioctl drm/i915: Allow user to set cache at BO creation drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 ++ drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 ++ drivers/gpu/drm/i915/gem/i915_gem_object.c | 6 drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 9 +- include/uapi/drm/i915_drm.h| 36 ++ tools/include/uapi/drm/i915_drm.h | 36 ++ 6 files changed, 125 insertions(+), 1 deletion(-) -- 2.25.1
[Intel-gfx] [PATCH v7 1/4] drm/i915: preparation for using PAT index
From: Fei Yang This patch is a preparation for replacing enum i915_cache_level with PAT index. Caching policy for buffer objects is set through the PAT index in PTE, the old i915_cache_level is not sufficient to represent all caching modes supported by the hardware. Preparing the transition by adding some platform dependent data structures and helper functions to translate the cache_level to pat_index. cachelevel_to_pat: a platform dependent array mapping cache_level to pat_index. max_pat_index: the maximum PAT index recommended in hardware specification Needed for validating the PAT index passed in from user space. i915_gem_get_pat_index: function to convert cache_level to PAT index. obj_to_i915(obj): macro moved to header file for wider usage. I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the convenience of coding. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda --- drivers/gpu/drm/i915/gem/i915_gem_object.c| 9 +++ drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 7 ++ drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 6 ++ drivers/gpu/drm/i915/gt/intel_ggtt.c | 6 ++ drivers/gpu/drm/i915/i915_pci.c | 79 --- drivers/gpu/drm/i915/intel_device_info.h | 5 ++ .../gpu/drm/i915/selftests/mock_gem_device.c | 9 +++ 9 files changed, 116 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 4666bb82f312..8c70a0ec7d2f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects; static const struct drm_gem_object_funcs i915_gem_object_funcs; +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level) +{ + if (drm_WARN_ON(&i915->drm, level >= I915_MAX_CACHE_LEVEL)) + return 0; + + return INTEL_INFO(i915)->cachelevel_to_pat[level]; +} + struct drm_i915_gem_object *i915_gem_object_alloc(void) { struct drm_i915_gem_object *obj; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index bc1291887d4f..284e1aa396cd 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -20,6 +20,8 @@ enum intel_region_id; +#define obj_to_i915(obj__) to_i915((obj__)->base.dev) + static inline bool i915_gem_object_size_2big(u64 size) { struct drm_i915_gem_object *obj; @@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size) return false; } +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level); void i915_gem_init__objects(struct drm_i915_private *i915); void i915_objects_module_exit(void); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 830c11431ee8..bf0bd8e11355 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -194,6 +194,13 @@ enum i915_cache_level { * engine. */ I915_CACHE_WT, + /** +* @I915_MAX_CACHE_LEVEL: +* +* Mark the last entry in the enum. Used for defining cachelevel_to_pat +* array for cache_level to pat translation table. +*/ + I915_MAX_CACHE_LEVEL, }; enum i915_map_type { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index b1672e054b21..214763942aa2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, fs_reclaim_release(GFP_KERNEL); } -#define obj_to_i915(obj__) to_i915((obj__)->base.dev) - /** * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By * default all object types that support shrinking(see IS_SHRINKABLE), will also diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 22ec1566d2a7..bb6998d67133 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr, case I915_CACHE_WT: pte |= GEN12_PPGTT_PTE_PAT0; break; + default: + /* This should never happen. Added to deal with the compile +* error due to the addition of I915_MAX_CACHE_LEVEL. Will +* be removed by
[Intel-gfx] [PATCH v7 4/4] drm/i915: Allow user to set cache at BO creation
From: Fei Yang To comply with the design that buffer objects shall have immutable cache setting through out their life cycle, {set, get}_caching ioctl's are no longer supported from MTL onward. With that change caching policy can only be set at object creation time. The current code applies a default (platform dependent) cache setting for all objects. However this is not optimal for performance tuning. The patch extends the existing gem_create uAPI to let user set PAT index for the object at creation time. The new extension is platform independent, so UMD's can switch to using this extension for older platforms as well, while {set, get}_caching are still supported on these legacy paltforms for compatibility reason. Cc: Chris Wilson Cc: Matt Roper Cc: Andi Shyti Signed-off-by: Fei Yang Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 ++ drivers/gpu/drm/i915/gem/i915_gem_object.c | 6 include/uapi/drm/i915_drm.h| 36 ++ tools/include/uapi/drm/i915_drm.h | 36 ++ 4 files changed, 114 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index bfe1dbda4cb7..644a936248ad 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -245,6 +245,7 @@ struct create_ext { unsigned int n_placements; unsigned int placement_mask; unsigned long flags; + unsigned int pat_index; }; static void repr_placements(char *buf, size_t size, @@ -394,11 +395,39 @@ static int ext_set_protected(struct i915_user_extension __user *base, void *data return 0; } +static int ext_set_pat(struct i915_user_extension __user *base, void *data) +{ + struct create_ext *ext_data = data; + struct drm_i915_private *i915 = ext_data->i915; + struct drm_i915_gem_create_ext_set_pat ext; + unsigned int max_pat_index; + + BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) != +offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd)); + + if (copy_from_user(&ext, base, sizeof(ext))) + return -EFAULT; + + max_pat_index = INTEL_INFO(i915)->max_pat_index; + + if (ext.pat_index > max_pat_index) { + drm_dbg(&i915->drm, "PAT index is invalid: %u\n", + ext.pat_index); + return -EINVAL; + } + + ext_data->pat_index = ext.pat_index; + + return 0; +} + static const i915_user_extension_fn create_extensions[] = { [I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements, [I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected, + [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat, }; +#define PAT_INDEX_NOT_SET 0x /** * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to it. * @dev: drm device pointer @@ -418,6 +447,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) return -EINVAL; + ext_data.pat_index = PAT_INDEX_NOT_SET; ret = i915_user_extensions(u64_to_user_ptr(args->extensions), create_extensions, ARRAY_SIZE(create_extensions), @@ -454,5 +484,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, if (IS_ERR(obj)) return PTR_ERR(obj); + if (ext_data.pat_index != PAT_INDEX_NOT_SET) { + i915_gem_object_set_pat_index(obj, ext_data.pat_index); + /* Mark pat_index is set by UMD */ + obj->pat_set_by_user = true; + } + return i915_gem_publish(obj, file, &args->size, &args->handle); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 46a19b099ec8..97ac6fb37958 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -208,6 +208,12 @@ bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj) if (!(obj->flags & I915_BO_ALLOC_USER)) return false; + /* +* Always flush cache for UMD objects at creation time. +*/ + if (obj->pat_set_by_user) + return true; + /* * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it * possible for userspace to bypass the GTT caching bits set by the diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index dba7c5a5b25e..03c5c314846e 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -3630,9 +3630,13 @@ struct drm_i915_gem_create_ext { * * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage
[Intel-gfx] [PATCH v7 3/4] drm/i915/mtl: end support for set caching ioctl
From: Fei Yang The design is to keep Buffer Object's caching policy immutable through out its life cycle. This patch ends the support for set caching ioctl from MTL onward. While doing that we also set BO's to be 1-way coherent at creation time because GPU is no longer automatically snooping CPU cache. For userspace components needing to fine tune the caching policy for BO's, a follow up patch will extend the GEM_CREATE uAPI to allow them specify caching mode at BO creation time. Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda --- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +++ drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 9 - 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 05107a6efe45..dfaaa8b66ac3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -350,6 +350,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, if (IS_DGFX(i915)) return -ENODEV; + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + return -EOPNOTSUPP; + switch (args->caching) { case I915_CACHING_NONE: level = I915_CACHE_NONE; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 37d1efcd3ca6..cad4a6017f4b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -601,7 +601,14 @@ static int shmem_object_init(struct intel_memory_region *mem, obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; - if (HAS_LLC(i915)) + /* +* MTL doesn't snoop CPU cache by default for GPU access (namely +* 1-way coherency). However some UMD's are currently depending on +* that. Make 1-way coherent the default setting for MTL. A follow +* up patch will extend the GEM_CREATE uAPI to allow UMD's specify +* caching mode at BO creation time +*/ + if (HAS_LLC(i915) || (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))) /* On some devices, we can have the GPU use the LLC (the CPU * cache) for about a 10% performance improvement * compared to uncached. Graphics requests other than -- 2.25.1
[Intel-gfx] [PATCH v7 2/4] drm/i915: use pat_index instead of cache_level
From: Fei Yang Currently the KMD is using enum i915_cache_level to set caching policy for buffer objects. This is flaky because the PAT index which really controls the caching behavior in PTE has far more levels than what's defined in the enum. In addition, the PAT index is platform dependent, having to translate between i915_cache_level and PAT index is not reliable, and makes the code more complicated. >From UMD's perspective there is also a necessity to set caching policy for performance fine tuning. It's much easier for the UMD to directly use PAT index because the behavior of each PAT index is clearly defined in Bspec. Having the abstracted i915_cache_level sitting in between would only cause more ambiguity. PAT is expected to work much like MOCS already works today, and by design userspace is expected to select the index that exactly matches the desired behavior described in the hardware specification. For these reasons this patch replaces i915_cache_level with PAT index. Also note, the cache_level is not completely removed yet, because the KMD still has the need of creating buffer objects with simple cache settings such as cached, uncached, or writethrough. For kernel objects, cache_level is used for simplicity and backward compatibility. For Pre-gen12 platforms PAT can have 1:1 mapping to i915_cache_level, so these two are interchangeable. see the use of LEGACY_CACHELEVEL. One consequence of this change is that gen8_pte_encode is no longer working for gen12 platforms due to the fact that gen12 platforms has different PAT definitions. In the meantime the mtl_pte_encode introduced specfically for MTL becomes generic for all gen12 platforms. This patch renames the MTL PTE encode function into gen12_pte_encode and apply it to all gen12. Even though this change looks unrelated, but separating them would temporarily break gen12 PTE encoding, thus squash them in one patch. Special note: this patch changes the way caching behavior is controlled in the sense that some objects are left to be managed by userspace. For such objects we need to be careful not to change the userspace settings.There are kerneldoc and comments added around obj->cache_coherent, cache_dirty, and how to bypass the checkings by i915_gem_object_has_cache_level. For full understanding, these changes need to be looked at together with the two follow-up patches, one disables the {set|get}_caching ioctl's and the other adds set_pat extension to the GEM_CREATE uAPI. Bspec: 63019 Cc: Chris Wilson Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Matt Roper To be squashed --- drivers/gpu/drm/i915/display/intel_dpt.c | 12 +-- drivers/gpu/drm/i915/gem/i915_gem_domain.c| 58 + .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 15 +++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 11 ++- drivers/gpu/drm/i915/gem/i915_gem_object.c| 51 ++- drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 46 +- drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 8 +- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 84 +-- drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 82 +- drivers/gpu/drm/i915/gt/intel_gtt.h | 18 ++-- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 4 +- drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 53 +--- drivers/gpu/drm/i915/i915_gem.c | 27 +- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- 36 files changed, 453 insertions(+), 238 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index c5eacfdba1a5..7c5fddb203ba 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/in
[Intel-gfx] [PATCH v7 0/4] drm/i915: Allow user to set cache at BO creation
From: Fei Yang The first three patches in this series are taken from https://patchwork.freedesktop.org/series/116868/ These patches are included here because the last patch has dependency on the pat_index refactor. This series is focusing on uAPI changes, 1. end support for set caching ioctl [PATCH 4/5] 2. add set_pat extension for gem_create [PATCH 5/5] v2: drop one patch that was merged separately commit 341ad0e8e254 ("drm/i915/mtl: Add PTE encode function") v3: rebased on https://patchwork.freedesktop.org/series/117082/ v4: fix missing unlock introduced in v3, and solve a rebase conflict v5: replace obj->cache_level with pat_set_by_user, fix i915_cache_level_str() for legacy platforms. v6: rebased on https://patchwork.freedesktop.org/series/117480/ v7: rebased on https://patchwork.freedesktop.org/series/117528/ Fei Yang (4): drm/i915: preparation for using PAT index drm/i915: use pat_index instead of cache_level drm/i915/mtl: end support for set caching ioctl drm/i915: Allow user to set cache at BO creation drivers/gpu/drm/i915/display/intel_dpt.c | 12 +-- drivers/gpu/drm/i915/gem/i915_gem_create.c| 36 + drivers/gpu/drm/i915/gem/i915_gem_domain.c| 61 +- .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 15 +++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 11 ++- drivers/gpu/drm/i915/gem/i915_gem_object.c| 66 +++- drivers/gpu/drm/i915/gem/i915_gem_object.h| 8 ++ .../gpu/drm/i915/gem/i915_gem_object_types.h | 53 - drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 9 ++- drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 8 +- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 78 +- drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 76 +- drivers/gpu/drm/i915/gt/intel_gtt.h | 18 ++--- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 4 +- drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 +-- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 53 ++--- drivers/gpu/drm/i915/i915_gem.c | 27 ++- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_pci.c | 79 --- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/intel_device_info.h | 5 ++ drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 9 +++ drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- include/uapi/drm/i915_drm.h | 36 + tools/include/uapi/drm/i915_drm.h | 36 + 44 files changed, 682 insertions(+), 238 deletions(-) -- 2.25.1
[Intel-gfx] [PATCH v8 2/2] drm/i915: use pat_index instead of cache_level
From: Fei Yang Currently the KMD is using enum i915_cache_level to set caching policy for buffer objects. This is flaky because the PAT index which really controls the caching behavior in PTE has far more levels than what's defined in the enum. In addition, the PAT index is platform dependent, having to translate between i915_cache_level and PAT index is not reliable, and makes the code more complicated. >From UMD's perspective there is also a necessity to set caching policy for performance fine tuning. It's much easier for the UMD to directly use PAT index because the behavior of each PAT index is clearly defined in Bspec. Having the abstracted i915_cache_level sitting in between would only cause more ambiguity. PAT is expected to work much like MOCS already works today, and by design userspace is expected to select the index that exactly matches the desired behavior described in the hardware specification. For these reasons this patch replaces i915_cache_level with PAT index. Also note, the cache_level is not completely removed yet, because the KMD still has the need of creating buffer objects with simple cache settings such as cached, uncached, or writethrough. For kernel objects, cache_level is used for simplicity and backward compatibility. For Pre-gen12 platforms PAT can have 1:1 mapping to i915_cache_level, so these two are interchangeable. see the use of LEGACY_CACHELEVEL. One consequence of this change is that gen8_pte_encode is no longer working for gen12 platforms due to the fact that gen12 platforms has different PAT definitions. In the meantime the mtl_pte_encode introduced specfically for MTL becomes generic for all gen12 platforms. This patch renames the MTL PTE encode function into gen12_pte_encode and apply it to all gen12. Even though this change looks unrelated, but separating them would temporarily break gen12 PTE encoding, thus squash them in one patch. Special note: this patch changes the way caching behavior is controlled in the sense that some objects are left to be managed by userspace. For such objects we need to be careful not to change the userspace settings.There are kerneldoc and comments added around obj->cache_coherent, cache_dirty, and how to bypass the checkings by i915_gem_object_has_cache_level. For full understanding, these changes need to be looked at together with the two follow-up patches, one disables the {set|get}_caching ioctl's and the other adds set_pat extension to the GEM_CREATE uAPI. Bspec: 63019 Cc: Chris Wilson Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Matt Roper To be squashed --- drivers/gpu/drm/i915/display/intel_dpt.c | 12 +-- drivers/gpu/drm/i915/gem/i915_gem_domain.c| 58 + .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 15 +++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 11 ++- drivers/gpu/drm/i915/gem/i915_gem_object.c| 51 ++- drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 46 +- drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 8 +- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 84 +-- drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 82 +- drivers/gpu/drm/i915/gt/intel_gtt.h | 18 ++-- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 4 +- drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 53 +--- drivers/gpu/drm/i915/i915_gem.c | 27 +- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- 36 files changed, 453 insertions(+), 238 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index c5eacfdba1a5..7c5fddb203ba 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/in
[Intel-gfx] [PATCH v8 0/2] drm/i915: use pat_index instead of cache_level
From: Fei Yang This patch set was posted at https://patchwork.freedesktop.org/series/116868/ Change title since the PTE patch was merged separately. These patches are extracted from series https://patchwork.freedesktop.org/series/115980/ This series refactor the cache policy programming so that the PTE encode functions can be unified across all GEN12 platforms. This refactor is also important in implementing the design which allows uerspace to directly set cache policy for each Buffer Object. v2: drop one patch that was merged separately 341ad0e8e254 drm/i915/mtl: Add PTE encode function v3: disable {get, set}_caching ioctl v4: fix missing unlock introduced in v3, and solve a rebase conflict v5: replace obj->cache_level with pat_set_by_user, fix i915_cache_level_str() for legacy platforms. v6: squash the pte_encode patch because separating them causes bisect probelm. Also addressing some review comments from Tvrtko and Matt. v7: fix checkpatch errors and warnings. v8: BUILD_BUG_ON instead of WARN_ON_ONCE. Some updates in comments. Fei Yang (2): drm/i915: preparation for using PAT index drm/i915: use pat_index instead of cache_level drivers/gpu/drm/i915/display/intel_dpt.c | 12 +-- drivers/gpu/drm/i915/gem/i915_gem_domain.c| 58 +- .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 15 +++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 11 ++- drivers/gpu/drm/i915/gem/i915_gem_object.c| 60 +- drivers/gpu/drm/i915/gem/i915_gem_object.h| 8 ++ .../gpu/drm/i915/gem/i915_gem_object_types.h | 53 - drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 8 +- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 78 +- drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 76 +- drivers/gpu/drm/i915/gt/intel_gtt.h | 18 ++--- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 4 +- drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 +-- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 53 ++--- drivers/gpu/drm/i915/i915_gem.c | 27 ++- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_pci.c | 79 --- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/intel_device_info.h | 5 ++ drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 9 +++ drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- 40 files changed, 557 insertions(+), 237 deletions(-) -- 2.25.1
[Intel-gfx] [PATCH v8 1/2] drm/i915: preparation for using PAT index
From: Fei Yang This patch is a preparation for replacing enum i915_cache_level with PAT index. Caching policy for buffer objects is set through the PAT index in PTE, the old i915_cache_level is not sufficient to represent all caching modes supported by the hardware. Preparing the transition by adding some platform dependent data structures and helper functions to translate the cache_level to pat_index. cachelevel_to_pat: a platform dependent array mapping cache_level to pat_index. max_pat_index: the maximum PAT index recommended in hardware specification Needed for validating the PAT index passed in from user space. i915_gem_get_pat_index: function to convert cache_level to PAT index. obj_to_i915(obj): macro moved to header file for wider usage. I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the convenience of coding. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda --- drivers/gpu/drm/i915/gem/i915_gem_object.c| 9 +++ drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 7 ++ drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 6 ++ drivers/gpu/drm/i915/gt/intel_ggtt.c | 6 ++ drivers/gpu/drm/i915/i915_pci.c | 79 --- drivers/gpu/drm/i915/intel_device_info.h | 5 ++ .../gpu/drm/i915/selftests/mock_gem_device.c | 9 +++ 9 files changed, 116 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 4666bb82f312..8c70a0ec7d2f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects; static const struct drm_gem_object_funcs i915_gem_object_funcs; +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level) +{ + if (drm_WARN_ON(&i915->drm, level >= I915_MAX_CACHE_LEVEL)) + return 0; + + return INTEL_INFO(i915)->cachelevel_to_pat[level]; +} + struct drm_i915_gem_object *i915_gem_object_alloc(void) { struct drm_i915_gem_object *obj; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index bc1291887d4f..284e1aa396cd 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -20,6 +20,8 @@ enum intel_region_id; +#define obj_to_i915(obj__) to_i915((obj__)->base.dev) + static inline bool i915_gem_object_size_2big(u64 size) { struct drm_i915_gem_object *obj; @@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size) return false; } +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level); void i915_gem_init__objects(struct drm_i915_private *i915); void i915_objects_module_exit(void); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 830c11431ee8..bf0bd8e11355 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -194,6 +194,13 @@ enum i915_cache_level { * engine. */ I915_CACHE_WT, + /** +* @I915_MAX_CACHE_LEVEL: +* +* Mark the last entry in the enum. Used for defining cachelevel_to_pat +* array for cache_level to pat translation table. +*/ + I915_MAX_CACHE_LEVEL, }; enum i915_map_type { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index b1672e054b21..214763942aa2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, fs_reclaim_release(GFP_KERNEL); } -#define obj_to_i915(obj__) to_i915((obj__)->base.dev) - /** * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By * default all object types that support shrinking(see IS_SHRINKABLE), will also diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 22ec1566d2a7..bb6998d67133 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr, case I915_CACHE_WT: pte |= GEN12_PPGTT_PTE_PAT0; break; + default: + /* This should never happen. Added to deal with the compile +* error due to the addition of I915_MAX_CACHE_LEVEL. Will +* be removed by
[Intel-gfx] [PATCH v6 2/4] drm/i915: use pat_index instead of cache_level
From: Fei Yang Currently the KMD is using enum i915_cache_level to set caching policy for buffer objects. This is flaky because the PAT index which really controls the caching behavior in PTE has far more levels than what's defined in the enum. In addition, the PAT index is platform dependent, having to translate between i915_cache_level and PAT index is not reliable, and makes the code more complicated. >From UMD's perspective there is also a necessity to set caching policy for performance fine tuning. It's much easier for the UMD to directly use PAT index because the behavior of each PAT index is clearly defined in Bspec. Having the abstracted i915_cache_level sitting in between would only cause more ambiguity. PAT is expected to work much like MOCS already works today, and by design userspace is expected to select the index that exactly matches the desired behavior described in the hardware specification. For these reasons this patch replaces i915_cache_level with PAT index. Also note, the cache_level is not completely removed yet, because the KMD still has the need of creating buffer objects with simple cache settings such as cached, uncached, or writethrough. For kernel objects, cache_level is used for simplicity and backward compatibility. For Pre-gen12 platforms PAT can have 1:1 mapping to i915_cache_level, so these two are interchangeable. see the use of LEGACY_CACHELEVEL. One consequence of this change is that gen8_pte_encode is no longer working for gen12 platforms due to the fact that gen12 platforms has different PAT definitions. In the meantime the mtl_pte_encode introduced specfically for MTL becomes generic for all gen12 platforms. This patch renames the MTL PTE encode function into gen12_pte_encode and apply it to all gen12. Even though this change looks unrelated, but separating them would temporarily break gen12 PTE encoding, thus squash them in one patch. Special note: this patch changes the way caching behavior is controlled in the sense that some objects are left to be managed by userspace. For such objects we need to be careful not to change the userspace settings.There are kerneldoc and comments added around obj->cache_coherent, cache_dirty, and how to bypass the checkings by i915_gem_object_has_cache_level. For full understanding, these changes need to be looked at together with the two follow-up patches, one disables the {set|get}_caching ioctl's and the other adds set_pat extension to the GEM_CREATE uAPI. Bspec: 63019 Cc: Chris Wilson Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Matt Roper To be squashed --- drivers/gpu/drm/i915/display/intel_dpt.c | 12 +-- drivers/gpu/drm/i915/gem/i915_gem_domain.c| 58 + .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 15 +++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 11 ++- drivers/gpu/drm/i915/gem/i915_gem_object.c| 51 ++- drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 44 +- drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 8 +- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 84 +-- drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 82 +- drivers/gpu/drm/i915/gt/intel_gtt.h | 18 ++-- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 4 +- drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 53 +--- drivers/gpu/drm/i915/i915_gem.c | 27 +- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- 36 files changed, 451 insertions(+), 238 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index c5eacfdba1a5..7c5fddb203ba 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/in
[Intel-gfx] [PATCH v6 4/4] drm/i915: Allow user to set cache at BO creation
From: Fei Yang To comply with the design that buffer objects shall have immutable cache setting through out their life cycle, {set, get}_caching ioctl's are no longer supported from MTL onward. With that change caching policy can only be set at object creation time. The current code applies a default (platform dependent) cache setting for all objects. However this is not optimal for performance tuning. The patch extends the existing gem_create uAPI to let user set PAT index for the object at creation time. The new extension is platform independent, so UMD's can switch to using this extension for older platforms as well, while {set, get}_caching are still supported on these legacy paltforms for compatibility reason. Cc: Chris Wilson Cc: Matt Roper Cc: Andi Shyti Signed-off-by: Fei Yang Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 ++ drivers/gpu/drm/i915/gem/i915_gem_object.c | 6 include/uapi/drm/i915_drm.h| 36 ++ tools/include/uapi/drm/i915_drm.h | 36 ++ 4 files changed, 114 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index bfe1dbda4cb7..644a936248ad 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -245,6 +245,7 @@ struct create_ext { unsigned int n_placements; unsigned int placement_mask; unsigned long flags; + unsigned int pat_index; }; static void repr_placements(char *buf, size_t size, @@ -394,11 +395,39 @@ static int ext_set_protected(struct i915_user_extension __user *base, void *data return 0; } +static int ext_set_pat(struct i915_user_extension __user *base, void *data) +{ + struct create_ext *ext_data = data; + struct drm_i915_private *i915 = ext_data->i915; + struct drm_i915_gem_create_ext_set_pat ext; + unsigned int max_pat_index; + + BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) != +offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd)); + + if (copy_from_user(&ext, base, sizeof(ext))) + return -EFAULT; + + max_pat_index = INTEL_INFO(i915)->max_pat_index; + + if (ext.pat_index > max_pat_index) { + drm_dbg(&i915->drm, "PAT index is invalid: %u\n", + ext.pat_index); + return -EINVAL; + } + + ext_data->pat_index = ext.pat_index; + + return 0; +} + static const i915_user_extension_fn create_extensions[] = { [I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements, [I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected, + [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat, }; +#define PAT_INDEX_NOT_SET 0x /** * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to it. * @dev: drm device pointer @@ -418,6 +447,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) return -EINVAL; + ext_data.pat_index = PAT_INDEX_NOT_SET; ret = i915_user_extensions(u64_to_user_ptr(args->extensions), create_extensions, ARRAY_SIZE(create_extensions), @@ -454,5 +484,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, if (IS_ERR(obj)) return PTR_ERR(obj); + if (ext_data.pat_index != PAT_INDEX_NOT_SET) { + i915_gem_object_set_pat_index(obj, ext_data.pat_index); + /* Mark pat_index is set by UMD */ + obj->pat_set_by_user = true; + } + return i915_gem_publish(obj, file, &args->size, &args->handle); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 46a19b099ec8..97ac6fb37958 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -208,6 +208,12 @@ bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj) if (!(obj->flags & I915_BO_ALLOC_USER)) return false; + /* +* Always flush cache for UMD objects at creation time. +*/ + if (obj->pat_set_by_user) + return true; + /* * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it * possible for userspace to bypass the GTT caching bits set by the diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index dba7c5a5b25e..03c5c314846e 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -3630,9 +3630,13 @@ struct drm_i915_gem_create_ext { * * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage
[Intel-gfx] [PATCH v6 3/4] drm/i915/mtl: end support for set caching ioctl
From: Fei Yang The design is to keep Buffer Object's caching policy immutable through out its life cycle. This patch ends the support for set caching ioctl from MTL onward. While doing that we also set BO's to be 1-way coherent at creation time because GPU is no longer automatically snooping CPU cache. For userspace components needing to fine tune the caching policy for BO's, a follow up patch will extend the GEM_CREATE uAPI to allow them specify caching mode at BO creation time. Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda --- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +++ drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 9 - 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 05107a6efe45..dfaaa8b66ac3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -350,6 +350,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, if (IS_DGFX(i915)) return -ENODEV; + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + return -EOPNOTSUPP; + switch (args->caching) { case I915_CACHING_NONE: level = I915_CACHE_NONE; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 37d1efcd3ca6..cad4a6017f4b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -601,7 +601,14 @@ static int shmem_object_init(struct intel_memory_region *mem, obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; - if (HAS_LLC(i915)) + /* +* MTL doesn't snoop CPU cache by default for GPU access (namely +* 1-way coherency). However some UMD's are currently depending on +* that. Make 1-way coherent the default setting for MTL. A follow +* up patch will extend the GEM_CREATE uAPI to allow UMD's specify +* caching mode at BO creation time +*/ + if (HAS_LLC(i915) || (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))) /* On some devices, we can have the GPU use the LLC (the CPU * cache) for about a 10% performance improvement * compared to uncached. Graphics requests other than -- 2.25.1
[Intel-gfx] [PATCH v6 1/4] drm/i915: preparation for using PAT index
From: Fei Yang This patch is a preparation for replacing enum i915_cache_level with PAT index. Caching policy for buffer objects is set through the PAT index in PTE, the old i915_cache_level is not sufficient to represent all caching modes supported by the hardware. Preparing the transition by adding some platform dependent data structures and helper functions to translate the cache_level to pat_index. cachelevel_to_pat: a platform dependent array mapping cache_level to pat_index. max_pat_index: the maximum PAT index recommended in hardware specification Needed for validating the PAT index passed in from user space. i915_gem_get_pat_index: function to convert cache_level to PAT index. obj_to_i915(obj): macro moved to header file for wider usage. I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the convenience of coding. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda --- drivers/gpu/drm/i915/gem/i915_gem_object.c| 9 +++ drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 7 ++ drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 6 ++ drivers/gpu/drm/i915/gt/intel_ggtt.c | 6 ++ drivers/gpu/drm/i915/i915_pci.c | 79 --- drivers/gpu/drm/i915/intel_device_info.h | 5 ++ .../gpu/drm/i915/selftests/mock_gem_device.c | 9 +++ 9 files changed, 116 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 4666bb82f312..8c70a0ec7d2f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects; static const struct drm_gem_object_funcs i915_gem_object_funcs; +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level) +{ + if (drm_WARN_ON(&i915->drm, level >= I915_MAX_CACHE_LEVEL)) + return 0; + + return INTEL_INFO(i915)->cachelevel_to_pat[level]; +} + struct drm_i915_gem_object *i915_gem_object_alloc(void) { struct drm_i915_gem_object *obj; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index bc1291887d4f..284e1aa396cd 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -20,6 +20,8 @@ enum intel_region_id; +#define obj_to_i915(obj__) to_i915((obj__)->base.dev) + static inline bool i915_gem_object_size_2big(u64 size) { struct drm_i915_gem_object *obj; @@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size) return false; } +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level); void i915_gem_init__objects(struct drm_i915_private *i915); void i915_objects_module_exit(void); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 830c11431ee8..bf0bd8e11355 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -194,6 +194,13 @@ enum i915_cache_level { * engine. */ I915_CACHE_WT, + /** +* @I915_MAX_CACHE_LEVEL: +* +* Mark the last entry in the enum. Used for defining cachelevel_to_pat +* array for cache_level to pat translation table. +*/ + I915_MAX_CACHE_LEVEL, }; enum i915_map_type { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index b1672e054b21..214763942aa2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, fs_reclaim_release(GFP_KERNEL); } -#define obj_to_i915(obj__) to_i915((obj__)->base.dev) - /** * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By * default all object types that support shrinking(see IS_SHRINKABLE), will also diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 22ec1566d2a7..bb6998d67133 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr, case I915_CACHE_WT: pte |= GEN12_PPGTT_PTE_PAT0; break; + default: + /* This should never happen. Added to deal with the compile +* error due to the addition of I915_MAX_CACHE_LEVEL. Will +* be removed by
[Intel-gfx] [PATCH v6 0/4] drm/i915: Allow user to set cache at BO creation
From: Fei Yang The first three patches in this series are taken from https://patchwork.freedesktop.org/series/116868/ These patches are included here because the last patch has dependency on the pat_index refactor. This series is focusing on uAPI changes, 1. end support for set caching ioctl [PATCH 4/5] 2. add set_pat extension for gem_create [PATCH 5/5] v2: drop one patch that was merged separately 341ad0e8e254 drm/i915/mtl: Add PTE encode function v3: rebase on https://patchwork.freedesktop.org/series/117082/ v4: fix missing unlock introduced in v3, and solve a rebase conflict v5: replace obj->cache_level with pat_set_by_user, fix i915_cache_level_str() for legacy platforms. v6: rebased on https://patchwork.freedesktop.org/series/117480/ Fei Yang (4): drm/i915: preparation for using PAT index drm/i915: use pat_index instead of cache_level drm/i915/mtl: end support for set caching ioctl drm/i915: Allow user to set cache at BO creation drivers/gpu/drm/i915/display/intel_dpt.c | 12 +-- drivers/gpu/drm/i915/gem/i915_gem_create.c| 36 + drivers/gpu/drm/i915/gem/i915_gem_domain.c| 61 +- .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 15 +++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 11 ++- drivers/gpu/drm/i915/gem/i915_gem_object.c| 66 +++- drivers/gpu/drm/i915/gem/i915_gem_object.h| 8 ++ .../gpu/drm/i915/gem/i915_gem_object_types.h | 51 +++- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 9 ++- drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 8 +- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 78 +- drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 76 +- drivers/gpu/drm/i915/gt/intel_gtt.h | 18 ++--- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 4 +- drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 +-- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 53 ++--- drivers/gpu/drm/i915/i915_gem.c | 27 ++- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_pci.c | 79 --- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/intel_device_info.h | 5 ++ drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 9 +++ drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- include/uapi/drm/i915_drm.h | 36 + tools/include/uapi/drm/i915_drm.h | 36 + 44 files changed, 680 insertions(+), 238 deletions(-) -- 2.25.1
[Intel-gfx] [PATCH v7 2/2] drm/i915: use pat_index instead of cache_level
From: Fei Yang Currently the KMD is using enum i915_cache_level to set caching policy for buffer objects. This is flaky because the PAT index which really controls the caching behavior in PTE has far more levels than what's defined in the enum. In addition, the PAT index is platform dependent, having to translate between i915_cache_level and PAT index is not reliable, and makes the code more complicated. >From UMD's perspective there is also a necessity to set caching policy for performance fine tuning. It's much easier for the UMD to directly use PAT index because the behavior of each PAT index is clearly defined in Bspec. Having the abstracted i915_cache_level sitting in between would only cause more ambiguity. PAT is expected to work much like MOCS already works today, and by design userspace is expected to select the index that exactly matches the desired behavior described in the hardware specification. For these reasons this patch replaces i915_cache_level with PAT index. Also note, the cache_level is not completely removed yet, because the KMD still has the need of creating buffer objects with simple cache settings such as cached, uncached, or writethrough. For kernel objects, cache_level is used for simplicity and backward compatibility. For Pre-gen12 platforms PAT can have 1:1 mapping to i915_cache_level, so these two are interchangeable. see the use of LEGACY_CACHELEVEL. One consequence of this change is that gen8_pte_encode is no longer working for gen12 platforms due to the fact that gen12 platforms has different PAT definitions. In the meantime the mtl_pte_encode introduced specfically for MTL becomes generic for all gen12 platforms. This patch renames the MTL PTE encode function into gen12_pte_encode and apply it to all gen12. Even though this change looks unrelated, but separating them would temporarily break gen12 PTE encoding, thus squash them in one patch. Special note: this patch changes the way caching behavior is controlled in the sense that some objects are left to be managed by userspace. For such objects we need to be careful not to change the userspace settings.There are kerneldoc and comments added around obj->cache_coherent, cache_dirty, and how to bypass the checkings by i915_gem_object_has_cache_level. For full understanding, these changes need to be looked at together with the two follow-up patches, one disables the {set|get}_caching ioctl's and the other adds set_pat extension to the GEM_CREATE uAPI. Bspec: 63019 Cc: Chris Wilson Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Matt Roper To be squashed --- drivers/gpu/drm/i915/display/intel_dpt.c | 12 +-- drivers/gpu/drm/i915/gem/i915_gem_domain.c| 58 + .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 15 +++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 11 ++- drivers/gpu/drm/i915/gem/i915_gem_object.c| 51 ++- drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 44 +- drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 8 +- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 84 +-- drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 82 +- drivers/gpu/drm/i915/gt/intel_gtt.h | 18 ++-- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 4 +- drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 53 +--- drivers/gpu/drm/i915/i915_gem.c | 27 +- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- 36 files changed, 451 insertions(+), 238 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index c5eacfdba1a5..7c5fddb203ba 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/in
[Intel-gfx] [PATCH v7 1/2] drm/i915: preparation for using PAT index
From: Fei Yang This patch is a preparation for replacing enum i915_cache_level with PAT index. Caching policy for buffer objects is set through the PAT index in PTE, the old i915_cache_level is not sufficient to represent all caching modes supported by the hardware. Preparing the transition by adding some platform dependent data structures and helper functions to translate the cache_level to pat_index. cachelevel_to_pat: a platform dependent array mapping cache_level to pat_index. max_pat_index: the maximum PAT index recommended in hardware specification Needed for validating the PAT index passed in from user space. i915_gem_get_pat_index: function to convert cache_level to PAT index. obj_to_i915(obj): macro moved to header file for wider usage. I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the convenience of coding. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda --- drivers/gpu/drm/i915/gem/i915_gem_object.c| 9 +++ drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 7 ++ drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 6 ++ drivers/gpu/drm/i915/gt/intel_ggtt.c | 6 ++ drivers/gpu/drm/i915/i915_pci.c | 79 --- drivers/gpu/drm/i915/intel_device_info.h | 5 ++ .../gpu/drm/i915/selftests/mock_gem_device.c | 9 +++ 9 files changed, 116 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 4666bb82f312..8c70a0ec7d2f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects; static const struct drm_gem_object_funcs i915_gem_object_funcs; +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level) +{ + if (drm_WARN_ON(&i915->drm, level >= I915_MAX_CACHE_LEVEL)) + return 0; + + return INTEL_INFO(i915)->cachelevel_to_pat[level]; +} + struct drm_i915_gem_object *i915_gem_object_alloc(void) { struct drm_i915_gem_object *obj; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index bc1291887d4f..284e1aa396cd 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -20,6 +20,8 @@ enum intel_region_id; +#define obj_to_i915(obj__) to_i915((obj__)->base.dev) + static inline bool i915_gem_object_size_2big(u64 size) { struct drm_i915_gem_object *obj; @@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size) return false; } +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level); void i915_gem_init__objects(struct drm_i915_private *i915); void i915_objects_module_exit(void); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 830c11431ee8..bf0bd8e11355 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -194,6 +194,13 @@ enum i915_cache_level { * engine. */ I915_CACHE_WT, + /** +* @I915_MAX_CACHE_LEVEL: +* +* Mark the last entry in the enum. Used for defining cachelevel_to_pat +* array for cache_level to pat translation table. +*/ + I915_MAX_CACHE_LEVEL, }; enum i915_map_type { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index b1672e054b21..214763942aa2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, fs_reclaim_release(GFP_KERNEL); } -#define obj_to_i915(obj__) to_i915((obj__)->base.dev) - /** * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By * default all object types that support shrinking(see IS_SHRINKABLE), will also diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 22ec1566d2a7..bb6998d67133 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr, case I915_CACHE_WT: pte |= GEN12_PPGTT_PTE_PAT0; break; + default: + /* This should never happen. Added to deal with the compile +* error due to the addition of I915_MAX_CACHE_LEVEL. Will +* be removed by
[Intel-gfx] [PATCH v7 0/2] drm/i915: use pat_index instead of cache_level
From: Fei Yang This patch set was posted at https://patchwork.freedesktop.org/series/116868/ Change title since the PTE patch was merged separately. These patches are extracted from series https://patchwork.freedesktop.org/series/115980/ This series refactor the cache policy programming so that the PTE encode functions can be unified across all GEN12 platforms. This refactor is also important in implementing the design which allows uerspace to directly set cache policy for each Buffer Object. v2: drop one patch that was merged separately 341ad0e8e254 drm/i915/mtl: Add PTE encode function v3: disable {get, set}_caching ioctl v4: fix missing unlock introduced in v3, and solve a rebase conflict v5: replace obj->cache_level with pat_set_by_user, fix i915_cache_level_str() for legacy platforms. v6: squash the pte_encode patch because separating them causes bisect probelm. Also addressing some review comments from Tvrtko and Matt. v7: fix checkpatch errors and warnings. Fei Yang (2): drm/i915: preparation for using PAT index drm/i915: use pat_index instead of cache_level drivers/gpu/drm/i915/display/intel_dpt.c | 12 +-- drivers/gpu/drm/i915/gem/i915_gem_domain.c| 58 +- .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 15 +++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 11 ++- drivers/gpu/drm/i915/gem/i915_gem_object.c| 60 +- drivers/gpu/drm/i915/gem/i915_gem_object.h| 8 ++ .../gpu/drm/i915/gem/i915_gem_object_types.h | 51 +++- drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 8 +- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 78 +- drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 76 +- drivers/gpu/drm/i915/gt/intel_gtt.h | 18 ++--- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 4 +- drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 +-- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 53 ++--- drivers/gpu/drm/i915/i915_gem.c | 27 ++- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_pci.c | 79 --- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/intel_device_info.h | 5 ++ drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 9 +++ drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- 40 files changed, 555 insertions(+), 237 deletions(-) -- 2.25.1
[Intel-gfx] [PATCH v6 2/2] drm/i915: use pat_index instead of cache_level
From: Fei Yang Currently the KMD is using enum i915_cache_level to set caching policy for buffer objects. This is flaky because the PAT index which really controls the caching behavior in PTE has far more levels than what's defined in the enum. In addition, the PAT index is platform dependent, having to translate between i915_cache_level and PAT index is not reliable, and makes the code more complicated. >From UMD's perspective there is also a necessity to set caching policy for performance fine tuning. It's much easier for the UMD to directly use PAT index because the behavior of each PAT index is clearly defined in Bspec. Having the abstracted i915_cache_level sitting in between would only cause more ambiguity. PAT is expected to work much like MOCS already works today, and by design userspace is expected to select the index that exactly matches the desired behavior described in the hardware specification. For these reasons this patch replaces i915_cache_level with PAT index. Also note, the cache_level is not completely removed yet, because the KMD still has the need of creating buffer objects with simple cache settings such as cached, uncached, or writethrough. For kernel objects, cache_level is used for simplicity and backward compatibility. For Pre-gen12 platforms PAT can have 1:1 mapping to i915_cache_level, so these two are interchangeable. see the use of LEGACY_CACHELEVEL. One consequence of this change is that gen8_pte_encode is no longer working for gen12 platforms due to the fact that gen12 platforms has different PAT definitions. In the meantime the mtl_pte_encode introduced specfically for MTL becomes generic for all gen12 platforms. This patch renames the MTL PTE encode function into gen12_pte_encode and apply it to all gen12. Even though this change looks unrelated, but separating them would temporarily break gen12 PTE encoding, thus squash them in one patch. Special note: this patch changes the way caching behavior is controled in the sense that some objects are left to be managed by userspace. For such objects we need to be careful not to change the userspace settings.There are kerneldoc and comments added around obj->cache_coherent, cache_dirty, and how to bypass the checkings by i915_gem_object_has_cache_level. For full understanding, these changes need to be looked at together with the two follow-up patches, one disables the {set|get}_caching ioctl's and the other adds set_pat extension to the GEM_CREATE uAPI. Bspec: 63019 Cc: Chris Wilson Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Matt Roper --- drivers/gpu/drm/i915/display/intel_dpt.c | 12 +-- drivers/gpu/drm/i915/gem/i915_gem_domain.c| 58 + .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 15 +++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 11 ++- drivers/gpu/drm/i915/gem/i915_gem_object.c| 51 ++- drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 44 +- drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 8 +- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 84 +-- drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 82 +- drivers/gpu/drm/i915/gt/intel_gtt.h | 18 ++-- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 4 +- drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 52 +--- drivers/gpu/drm/i915/i915_gem.c | 27 +- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- 36 files changed, 450 insertions(+), 238 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index c5eacfdba1a5..7c5fddb203ba 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -4
[Intel-gfx] [PATCH v6 0/2] drm/i915: use pat_index instead of cache_level
From: Fei Yang This patch set was posted at https://patchwork.freedesktop.org/series/116868/ Change title since the PTE patch was merged separately. These patches are extracted from series https://patchwork.freedesktop.org/series/115980/ This series refactor the cache policy programming so that the PTE encode functions can be unified across all GEN12 platforms. This refactor is also important in implementing the design which allows uerspace to directly set cache policy for each Buffer Object. v2: drop one patch that was merged separately 341ad0e8e254 drm/i915/mtl: Add PTE encode function v3: disable {get, set}_caching ioctl v4: fix missing unlock introduced in v3, and solve a rebase conflict v5: replace obj->cache_level with pat_set_by_user, fix i915_cache_level_str() for legacy platforms. v6: squash the pte_encode patch because separating them causes bisect probelm. Also addressing some review comments from Tvrtko and Matt. Fei Yang (2): drm/i915: preparation for using PAT index drm/i915: use pat_index instead of cache_level drivers/gpu/drm/i915/display/intel_dpt.c | 12 +-- drivers/gpu/drm/i915/gem/i915_gem_domain.c| 58 +- .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 15 +++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 11 ++- drivers/gpu/drm/i915/gem/i915_gem_object.c| 60 +- drivers/gpu/drm/i915/gem/i915_gem_object.h| 8 ++ .../gpu/drm/i915/gem/i915_gem_object_types.h | 51 +++- drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 8 +- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 78 +- drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 76 +- drivers/gpu/drm/i915/gt/intel_gtt.h | 18 ++--- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 4 +- drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 +-- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 52 +--- drivers/gpu/drm/i915/i915_gem.c | 27 ++- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_pci.c | 79 --- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/intel_device_info.h | 5 ++ drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 9 +++ drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- 40 files changed, 554 insertions(+), 237 deletions(-) -- 2.25.1
[Intel-gfx] [PATCH v6 1/2] drm/i915: preparation for using PAT index
From: Fei Yang This patch is a preparation for replacing enum i915_cache_level with PAT index. Caching policy for buffer objects is set through the PAT index in PTE, the old i915_cache_level is not sufficient to represent all caching modes supported by the hardware. Preparing the transition by adding some platform dependent data structures and helper functions to translate the cache_level to pat_index. cachelevel_to_pat: a platform dependent array mapping cache_level to pat_index. max_pat_index: the maximum PAT index recommended in hardware specification Needed for validating the PAT index passed in from user space. i915_gem_get_pat_index: function to convert cache_level to PAT index. obj_to_i915(obj): macro moved to header file for wider usage. I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the convenience of coding. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda --- drivers/gpu/drm/i915/gem/i915_gem_object.c| 9 +++ drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 7 ++ drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 6 ++ drivers/gpu/drm/i915/gt/intel_ggtt.c | 6 ++ drivers/gpu/drm/i915/i915_pci.c | 79 --- drivers/gpu/drm/i915/intel_device_info.h | 5 ++ .../gpu/drm/i915/selftests/mock_gem_device.c | 9 +++ 9 files changed, 116 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 4666bb82f312..8c70a0ec7d2f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects; static const struct drm_gem_object_funcs i915_gem_object_funcs; +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level) +{ + if (drm_WARN_ON(&i915->drm, level >= I915_MAX_CACHE_LEVEL)) + return 0; + + return INTEL_INFO(i915)->cachelevel_to_pat[level]; +} + struct drm_i915_gem_object *i915_gem_object_alloc(void) { struct drm_i915_gem_object *obj; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index bc1291887d4f..284e1aa396cd 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -20,6 +20,8 @@ enum intel_region_id; +#define obj_to_i915(obj__) to_i915((obj__)->base.dev) + static inline bool i915_gem_object_size_2big(u64 size) { struct drm_i915_gem_object *obj; @@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size) return false; } +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level); void i915_gem_init__objects(struct drm_i915_private *i915); void i915_objects_module_exit(void); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 830c11431ee8..bf0bd8e11355 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -194,6 +194,13 @@ enum i915_cache_level { * engine. */ I915_CACHE_WT, + /** +* @I915_MAX_CACHE_LEVEL: +* +* Mark the last entry in the enum. Used for defining cachelevel_to_pat +* array for cache_level to pat translation table. +*/ + I915_MAX_CACHE_LEVEL, }; enum i915_map_type { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index b1672e054b21..214763942aa2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, fs_reclaim_release(GFP_KERNEL); } -#define obj_to_i915(obj__) to_i915((obj__)->base.dev) - /** * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By * default all object types that support shrinking(see IS_SHRINKABLE), will also diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 22ec1566d2a7..bb6998d67133 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr, case I915_CACHE_WT: pte |= GEN12_PPGTT_PTE_PAT0; break; + default: + /* This should never happen. Added to deal with the compile +* error due to the addition of I915_MAX_CACHE_LEVEL. Will +* be removed by
[Intel-gfx] [PATCH v5 2/5] drm/i915: use pat_index instead of cache_level
From: Fei Yang Currently the KMD is using enum i915_cache_level to set caching policy for buffer objects. This is flaky because the PAT index which really controls the caching behavior in PTE has far more levels than what's defined in the enum. In addition, the PAT index is platform dependent, having to translate between i915_cache_level and PAT index is not reliable, and makes the code more complicated. >From UMD's perspective there is also a necessity to set caching policy for performance fine tuning. It's much easier for the UMD to directly use PAT index because the behavior of each PAT index is clearly defined in Bspec. Having the abstracted i915_cache_level sitting in between would only cause more ambiguity. For these reasons this patch replaces i915_cache_level with PAT index. Also note, the cache_level is not completely removed yet, because the KMD still has the need of creating buffer objects with simple cache settings such as cached, uncached, or writethrough. For such simple cases, using cache_level would help simplify the code. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/display/intel_dpt.c | 12 +-- drivers/gpu/drm/i915/gem/i915_gem_domain.c| 45 ++ .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_object.c| 51 +++- drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 25 +- drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 16 ++-- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 71 drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 82 +-- drivers/gpu/drm/i915/gt/intel_gtt.h | 20 ++--- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 6 +- drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 52 +--- drivers/gpu/drm/i915/i915_gem.c | 16 +++- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- 36 files changed, 391 insertions(+), 240 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index c5eacfdba1a5..7c5fddb203ba 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -43,24 +43,24 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) static void dpt_insert_page(struct i915_address_space *vm, dma_addr_t addr, u64 offset, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { struct i915_dpt *dpt = i915_vm_to_dpt(vm); gen8_pte_t __iomem *base = dpt->iomem; gen8_set_pte(base + offset / I915_GTT_PAGE_SIZE, -vm->pte_encode(addr, level, flags)); +vm->pte_encode(addr, pat_index, flags)); } static void dpt_insert_entries(struct i915_address_space *vm, struct i915_vma_resource *vma_res, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { struct i915_dpt *dpt = i915_vm_to_dpt(vm); gen8_pte_t __iomem *base = dpt->iomem; - const gen8_pte_t pte_encode = vm->pte_encode(0, level, flags); + const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags); struct sgt_iter sgt_iter; dma_addr_t addr; int i; @@ -83,7 +83,7 @@ static void dpt_clear_range(struct i915_address_space *vm, static void dpt_bind_vma(struct i915_address_space *vm,
[Intel-gfx] [PATCH v5 5/5] drm/i915: Allow user to set cache at BO creation
From: Fei Yang To comply with the design that buffer objects shall have immutable cache setting through out their life cycle, {set, get}_caching ioctl's are no longer supported from MTL onward. With that change caching policy can only be set at object creation time. The current code applies a default (platform dependent) cache setting for all objects. However this is not optimal for performance tuning. The patch extends the existing gem_create uAPI to let user set PAT index for the object at creation time. The new extension is platform independent, so UMD's can switch to using this extension for older platforms as well, while {set, get}_caching are still supported on these legacy paltforms for compatibility reason. Cc: Chris Wilson Cc: Matt Roper Cc: Andi Shyti Signed-off-by: Fei Yang Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 ++ drivers/gpu/drm/i915/gem/i915_gem_object.c | 6 include/uapi/drm/i915_drm.h| 36 ++ tools/include/uapi/drm/i915_drm.h | 36 ++ 4 files changed, 114 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index bfe1dbda4cb7..644a936248ad 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -245,6 +245,7 @@ struct create_ext { unsigned int n_placements; unsigned int placement_mask; unsigned long flags; + unsigned int pat_index; }; static void repr_placements(char *buf, size_t size, @@ -394,11 +395,39 @@ static int ext_set_protected(struct i915_user_extension __user *base, void *data return 0; } +static int ext_set_pat(struct i915_user_extension __user *base, void *data) +{ + struct create_ext *ext_data = data; + struct drm_i915_private *i915 = ext_data->i915; + struct drm_i915_gem_create_ext_set_pat ext; + unsigned int max_pat_index; + + BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) != +offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd)); + + if (copy_from_user(&ext, base, sizeof(ext))) + return -EFAULT; + + max_pat_index = INTEL_INFO(i915)->max_pat_index; + + if (ext.pat_index > max_pat_index) { + drm_dbg(&i915->drm, "PAT index is invalid: %u\n", + ext.pat_index); + return -EINVAL; + } + + ext_data->pat_index = ext.pat_index; + + return 0; +} + static const i915_user_extension_fn create_extensions[] = { [I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements, [I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected, + [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat, }; +#define PAT_INDEX_NOT_SET 0x /** * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to it. * @dev: drm device pointer @@ -418,6 +447,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) return -EINVAL; + ext_data.pat_index = PAT_INDEX_NOT_SET; ret = i915_user_extensions(u64_to_user_ptr(args->extensions), create_extensions, ARRAY_SIZE(create_extensions), @@ -454,5 +484,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, if (IS_ERR(obj)) return PTR_ERR(obj); + if (ext_data.pat_index != PAT_INDEX_NOT_SET) { + i915_gem_object_set_pat_index(obj, ext_data.pat_index); + /* Mark pat_index is set by UMD */ + obj->pat_set_by_user = true; + } + return i915_gem_publish(obj, file, &args->size, &args->handle); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 46a19b099ec8..97ac6fb37958 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -208,6 +208,12 @@ bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj) if (!(obj->flags & I915_BO_ALLOC_USER)) return false; + /* +* Always flush cache for UMD objects at creation time. +*/ + if (obj->pat_set_by_user) + return true; + /* * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it * possible for userspace to bypass the GTT caching bits set by the diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index dba7c5a5b25e..03c5c314846e 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -3630,9 +3630,13 @@ struct drm_i915_gem_create_ext { * * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage
[Intel-gfx] [PATCH v5 3/5] drm/i915: make sure correct pte encode is used
From: Fei Yang PTE encode is platform dependent. After replacing cache_level with pat_index, the newly introduced mtl_pte_encode is actually generic for all gen12 platforms, thus rename it to gen12_pte_encode and apply it to all gen12 platforms. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index f2334a713c4e..d1e3d3b90e95 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -55,9 +55,9 @@ static u64 gen8_pte_encode(dma_addr_t addr, return pte; } -static u64 mtl_pte_encode(dma_addr_t addr, - unsigned int pat_index, - u32 flags) +static u64 gen12_pte_encode(dma_addr_t addr, + unsigned int pat_index, + u32 flags) { gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; @@ -995,8 +995,8 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, */ ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; - if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) - ppgtt->vm.pte_encode = mtl_pte_encode; + if (GRAPHICS_VER(gt->i915) >= 12) + ppgtt->vm.pte_encode = gen12_pte_encode; else ppgtt->vm.pte_encode = gen8_pte_encode; -- 2.25.1
[Intel-gfx] [PATCH v5 4/5] drm/i915/mtl: end support for set caching ioctl
From: Fei Yang The design is to keep Buffer Object's caching policy immutable through out its life cycle. This patch ends the support for set caching ioctl from MTL onward. While doing that we also set BO's to be 1-way coherent at creation time because GPU is no longer automatically snooping CPU cache. For userspace components needing to fine tune the caching policy for BO's, a follow up patch will extend the GEM_CREATE uAPI to allow them specify caching mode at BO creation time. Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda --- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +++ drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 9 - 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index ae99b4be5918..53282c6d3873 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -337,6 +337,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, if (IS_DGFX(i915)) return -ENODEV; + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + return -EOPNOTSUPP; + switch (args->caching) { case I915_CACHING_NONE: level = I915_CACHE_NONE; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 37d1efcd3ca6..cad4a6017f4b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -601,7 +601,14 @@ static int shmem_object_init(struct intel_memory_region *mem, obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; - if (HAS_LLC(i915)) + /* +* MTL doesn't snoop CPU cache by default for GPU access (namely +* 1-way coherency). However some UMD's are currently depending on +* that. Make 1-way coherent the default setting for MTL. A follow +* up patch will extend the GEM_CREATE uAPI to allow UMD's specify +* caching mode at BO creation time +*/ + if (HAS_LLC(i915) || (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))) /* On some devices, we can have the GPU use the LLC (the CPU * cache) for about a 10% performance improvement * compared to uncached. Graphics requests other than -- 2.25.1
[Intel-gfx] [PATCH v5 0/5] drm/i915: Allow user to set cache at BO creation
From: Fei Yang The first three patches in this series are taken from https://patchwork.freedesktop.org/series/116868/ These patches are included here because the last patch has dependency on the pat_index refactor. This series is focusing on uAPI changes, 1. end support for set caching ioctl [PATCH 4/5] 2. add set_pat extension for gem_create [PATCH 5/5] v2: drop one patch that was merged separately 341ad0e8e254 drm/i915/mtl: Add PTE encode function v3: rebase on https://patchwork.freedesktop.org/series/117082/ v4: fix missing unlock introduced in v3, and solve a rebase conflict v5: replace obj->cache_level with pat_set_by_user, fix i915_cache_level_str() for legacy platforms. Fei Yang (5): drm/i915: preparation for using PAT index drm/i915: use pat_index instead of cache_level drm/i915: make sure correct pte encode is used drm/i915/mtl: end support for set caching ioctl drm/i915: Allow user to set cache at BO creation drivers/gpu/drm/i915/display/intel_dpt.c | 12 +-- drivers/gpu/drm/i915/gem/i915_gem_create.c| 36 + drivers/gpu/drm/i915/gem/i915_gem_domain.c| 48 ++- .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_object.c| 66 +++- drivers/gpu/drm/i915/gem/i915_gem_object.h| 8 ++ .../gpu/drm/i915/gem/i915_gem_object_types.h | 26 +- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 9 ++- drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 16 ++-- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 73 + drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 76 +- drivers/gpu/drm/i915/gt/intel_gtt.h | 20 +++-- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 6 +- drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 +-- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 52 +--- drivers/gpu/drm/i915/i915_gem.c | 16 +++- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_pci.c | 79 --- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/intel_device_info.h | 5 ++ drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 9 +++ drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- include/uapi/drm/i915_drm.h | 36 + tools/include/uapi/drm/i915_drm.h | 36 + 44 files changed, 618 insertions(+), 244 deletions(-) -- 2.25.1
[Intel-gfx] [PATCH v5 1/5] drm/i915: preparation for using PAT index
From: Fei Yang This patch is a preparation for replacing enum i915_cache_level with PAT index. Caching policy for buffer objects is set through the PAT index in PTE, the old i915_cache_level is not sufficient to represent all caching modes supported by the hardware. Preparing the transition by adding some platform dependent data structures and helper functions to translate the cache_level to pat_index. cachelevel_to_pat: a platform dependent array mapping cache_level to pat_index. max_pat_index: the maximum PAT index recommended in hardware specification Needed for validating the PAT index passed in from user space. i915_gem_get_pat_index: function to convert cache_level to PAT index. obj_to_i915(obj): macro moved to header file for wider usage. I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the convenience of coding. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda --- drivers/gpu/drm/i915/gem/i915_gem_object.c| 9 +++ drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 6 ++ drivers/gpu/drm/i915/gt/intel_ggtt.c | 6 ++ drivers/gpu/drm/i915/i915_pci.c | 79 --- drivers/gpu/drm/i915/intel_device_info.h | 5 ++ .../gpu/drm/i915/selftests/mock_gem_device.c | 9 +++ 9 files changed, 110 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 4666bb82f312..8c70a0ec7d2f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects; static const struct drm_gem_object_funcs i915_gem_object_funcs; +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level) +{ + if (drm_WARN_ON(&i915->drm, level >= I915_MAX_CACHE_LEVEL)) + return 0; + + return INTEL_INFO(i915)->cachelevel_to_pat[level]; +} + struct drm_i915_gem_object *i915_gem_object_alloc(void) { struct drm_i915_gem_object *obj; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 885ccde9dc3c..4c92e17b4337 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -20,6 +20,8 @@ enum intel_region_id; +#define obj_to_i915(obj__) to_i915((obj__)->base.dev) + static inline bool i915_gem_object_size_2big(u64 size) { struct drm_i915_gem_object *obj; @@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size) return false; } +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level); void i915_gem_init__objects(struct drm_i915_private *i915); void i915_objects_module_exit(void); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 830c11431ee8..41b35abccf88 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -194,6 +194,7 @@ enum i915_cache_level { * engine. */ I915_CACHE_WT, + I915_MAX_CACHE_LEVEL, }; enum i915_map_type { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index b1672e054b21..214763942aa2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, fs_reclaim_release(GFP_KERNEL); } -#define obj_to_i915(obj__) to_i915((obj__)->base.dev) - /** * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By * default all object types that support shrinking(see IS_SHRINKABLE), will also diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 22ec1566d2a7..bb6998d67133 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr, case I915_CACHE_WT: pte |= GEN12_PPGTT_PTE_PAT0; break; + default: + /* This should never happen. Added to deal with the compile +* error due to the addition of I915_MAX_CACHE_LEVEL. Will +* be removed by the pat_index patch. +*/ + break; } return pte; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 20915edc8bd9..c8390d03fce2 100644 --- a/driv
[Intel-gfx] [PATCH v5 3/3] drm/i915: make sure correct pte encode is used
From: Fei Yang PTE encode is platform dependent. After replacing cache_level with pat_index, the newly introduced mtl_pte_encode is actually generic for all gen12 platforms, thus rename it to gen12_pte_encode and apply it to all gen12 platforms. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index f2334a713c4e..d1e3d3b90e95 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -55,9 +55,9 @@ static u64 gen8_pte_encode(dma_addr_t addr, return pte; } -static u64 mtl_pte_encode(dma_addr_t addr, - unsigned int pat_index, - u32 flags) +static u64 gen12_pte_encode(dma_addr_t addr, + unsigned int pat_index, + u32 flags) { gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; @@ -995,8 +995,8 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, */ ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; - if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) - ppgtt->vm.pte_encode = mtl_pte_encode; + if (GRAPHICS_VER(gt->i915) >= 12) + ppgtt->vm.pte_encode = gen12_pte_encode; else ppgtt->vm.pte_encode = gen8_pte_encode; -- 2.25.1
[Intel-gfx] [PATCH v5 0/3] drm/i915: use pat_index instead of cache_level
From: Fei Yang This patch set was posted at https://patchwork.freedesktop.org/series/116868/ Change title since the PTE patch was merged separately. These patches are extracted from series https://patchwork.freedesktop.org/series/115980/ This series refactor the cache policy programming so that the PTE encode functions can be unified across all GEN12 platforms. This refactor is also important in implementing the design which allows uerspace to directly set cache policy for each Buffer Object. v2: drop one patch that was merged separately 341ad0e8e254 drm/i915/mtl: Add PTE encode function v3: disable {get, set}_caching ioctl v4: fix missing unlock introduced in v3, and solve a rebase conflict v5: replace obj->cache_level with pat_set_by_user, fix i915_cache_level_str() for legacy platforms. Fei Yang (3): drm/i915: preparation for using PAT index drm/i915: use pat_index instead of cache_level drm/i915: make sure correct pte encode is used drivers/gpu/drm/i915/display/intel_dpt.c | 12 +-- drivers/gpu/drm/i915/gem/i915_gem_domain.c| 45 ++- .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_object.c| 60 +- drivers/gpu/drm/i915/gem/i915_gem_object.h| 8 ++ .../gpu/drm/i915/gem/i915_gem_object_types.h | 26 +- drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 16 ++-- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 73 + drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 76 +- drivers/gpu/drm/i915/gt/intel_gtt.h | 20 +++-- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 6 +- drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 +-- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 52 +--- drivers/gpu/drm/i915/i915_gem.c | 16 +++- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_pci.c | 79 --- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/intel_device_info.h | 5 ++ drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 9 +++ drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- 40 files changed, 493 insertions(+), 243 deletions(-) -- 2.25.1
[Intel-gfx] [PATCH v5 1/3] drm/i915: preparation for using PAT index
From: Fei Yang This patch is a preparation for replacing enum i915_cache_level with PAT index. Caching policy for buffer objects is set through the PAT index in PTE, the old i915_cache_level is not sufficient to represent all caching modes supported by the hardware. Preparing the transition by adding some platform dependent data structures and helper functions to translate the cache_level to pat_index. cachelevel_to_pat: a platform dependent array mapping cache_level to pat_index. max_pat_index: the maximum PAT index recommended in hardware specification Needed for validating the PAT index passed in from user space. i915_gem_get_pat_index: function to convert cache_level to PAT index. obj_to_i915(obj): macro moved to header file for wider usage. I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the convenience of coding. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda --- drivers/gpu/drm/i915/gem/i915_gem_object.c| 9 +++ drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 6 ++ drivers/gpu/drm/i915/gt/intel_ggtt.c | 6 ++ drivers/gpu/drm/i915/i915_pci.c | 79 --- drivers/gpu/drm/i915/intel_device_info.h | 5 ++ .../gpu/drm/i915/selftests/mock_gem_device.c | 9 +++ 9 files changed, 110 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 4666bb82f312..8c70a0ec7d2f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects; static const struct drm_gem_object_funcs i915_gem_object_funcs; +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level) +{ + if (drm_WARN_ON(&i915->drm, level >= I915_MAX_CACHE_LEVEL)) + return 0; + + return INTEL_INFO(i915)->cachelevel_to_pat[level]; +} + struct drm_i915_gem_object *i915_gem_object_alloc(void) { struct drm_i915_gem_object *obj; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 885ccde9dc3c..4c92e17b4337 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -20,6 +20,8 @@ enum intel_region_id; +#define obj_to_i915(obj__) to_i915((obj__)->base.dev) + static inline bool i915_gem_object_size_2big(u64 size) { struct drm_i915_gem_object *obj; @@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size) return false; } +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level); void i915_gem_init__objects(struct drm_i915_private *i915); void i915_objects_module_exit(void); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 830c11431ee8..41b35abccf88 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -194,6 +194,7 @@ enum i915_cache_level { * engine. */ I915_CACHE_WT, + I915_MAX_CACHE_LEVEL, }; enum i915_map_type { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index b1672e054b21..214763942aa2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, fs_reclaim_release(GFP_KERNEL); } -#define obj_to_i915(obj__) to_i915((obj__)->base.dev) - /** * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By * default all object types that support shrinking(see IS_SHRINKABLE), will also diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 22ec1566d2a7..bb6998d67133 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr, case I915_CACHE_WT: pte |= GEN12_PPGTT_PTE_PAT0; break; + default: + /* This should never happen. Added to deal with the compile +* error due to the addition of I915_MAX_CACHE_LEVEL. Will +* be removed by the pat_index patch. +*/ + break; } return pte; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 20915edc8bd9..c8390d03fce2 100644 --- a/driv
[Intel-gfx] [PATCH v5 2/3] drm/i915: use pat_index instead of cache_level
From: Fei Yang Currently the KMD is using enum i915_cache_level to set caching policy for buffer objects. This is flaky because the PAT index which really controls the caching behavior in PTE has far more levels than what's defined in the enum. In addition, the PAT index is platform dependent, having to translate between i915_cache_level and PAT index is not reliable, and makes the code more complicated. >From UMD's perspective there is also a necessity to set caching policy for performance fine tuning. It's much easier for the UMD to directly use PAT index because the behavior of each PAT index is clearly defined in Bspec. Having the abstracted i915_cache_level sitting in between would only cause more ambiguity. For these reasons this patch replaces i915_cache_level with PAT index. Also note, the cache_level is not completely removed yet, because the KMD still has the need of creating buffer objects with simple cache settings such as cached, uncached, or writethrough. For such simple cases, using cache_level would help simplify the code. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/display/intel_dpt.c | 12 +-- drivers/gpu/drm/i915/gem/i915_gem_domain.c| 45 ++ .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_object.c| 51 +++- drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 25 +- drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 16 ++-- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 71 drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 82 +-- drivers/gpu/drm/i915/gt/intel_gtt.h | 20 ++--- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 6 +- drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 52 +--- drivers/gpu/drm/i915/i915_gem.c | 16 +++- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- 36 files changed, 391 insertions(+), 240 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index c5eacfdba1a5..7c5fddb203ba 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -43,24 +43,24 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) static void dpt_insert_page(struct i915_address_space *vm, dma_addr_t addr, u64 offset, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { struct i915_dpt *dpt = i915_vm_to_dpt(vm); gen8_pte_t __iomem *base = dpt->iomem; gen8_set_pte(base + offset / I915_GTT_PAGE_SIZE, -vm->pte_encode(addr, level, flags)); +vm->pte_encode(addr, pat_index, flags)); } static void dpt_insert_entries(struct i915_address_space *vm, struct i915_vma_resource *vma_res, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { struct i915_dpt *dpt = i915_vm_to_dpt(vm); gen8_pte_t __iomem *base = dpt->iomem; - const gen8_pte_t pte_encode = vm->pte_encode(0, level, flags); + const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags); struct sgt_iter sgt_iter; dma_addr_t addr; int i; @@ -83,7 +83,7 @@ static void dpt_clear_range(struct i915_address_space *vm, static void dpt_bind_vma(struct i915_address_space *vm,
[Intel-gfx] [PATCH v4 1/5] drm/i915: preparation for using PAT index
From: Fei Yang This patch is a preparation for replacing enum i915_cache_level with PAT index. Caching policy for buffer objects is set through the PAT index in PTE, the old i915_cache_level is not sufficient to represent all caching modes supported by the hardware. Preparing the transition by adding some platform dependent data structures and helper functions to translate the cache_level to pat_index. cachelevel_to_pat: a platform dependent array mapping cache_level to pat_index. max_pat_index: the maximum PAT index recommended in hardware specification Needed for validating the PAT index passed in from user space. i915_gem_get_pat_index: function to convert cache_level to PAT index. obj_to_i915(obj): macro moved to header file for wider usage. I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the convenience of coding. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda --- drivers/gpu/drm/i915/gem/i915_gem_object.c| 9 +++ drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 6 ++ drivers/gpu/drm/i915/gt/intel_ggtt.c | 6 ++ drivers/gpu/drm/i915/i915_pci.c | 79 --- drivers/gpu/drm/i915/intel_device_info.h | 5 ++ .../gpu/drm/i915/selftests/mock_gem_device.c | 9 +++ 9 files changed, 110 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 4666bb82f312..8c70a0ec7d2f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects; static const struct drm_gem_object_funcs i915_gem_object_funcs; +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level) +{ + if (drm_WARN_ON(&i915->drm, level >= I915_MAX_CACHE_LEVEL)) + return 0; + + return INTEL_INFO(i915)->cachelevel_to_pat[level]; +} + struct drm_i915_gem_object *i915_gem_object_alloc(void) { struct drm_i915_gem_object *obj; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 885ccde9dc3c..4c92e17b4337 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -20,6 +20,8 @@ enum intel_region_id; +#define obj_to_i915(obj__) to_i915((obj__)->base.dev) + static inline bool i915_gem_object_size_2big(u64 size) { struct drm_i915_gem_object *obj; @@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size) return false; } +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level); void i915_gem_init__objects(struct drm_i915_private *i915); void i915_objects_module_exit(void); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 830c11431ee8..41b35abccf88 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -194,6 +194,7 @@ enum i915_cache_level { * engine. */ I915_CACHE_WT, + I915_MAX_CACHE_LEVEL, }; enum i915_map_type { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index b1672e054b21..214763942aa2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, fs_reclaim_release(GFP_KERNEL); } -#define obj_to_i915(obj__) to_i915((obj__)->base.dev) - /** * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By * default all object types that support shrinking(see IS_SHRINKABLE), will also diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 22ec1566d2a7..bb6998d67133 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr, case I915_CACHE_WT: pte |= GEN12_PPGTT_PTE_PAT0; break; + default: + /* This should never happen. Added to deal with the compile +* error due to the addition of I915_MAX_CACHE_LEVEL. Will +* be removed by the pat_index patch. +*/ + break; } return pte; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 20915edc8bd9..c8390d03fce2 100644 --- a/driv
[Intel-gfx] [PATCH v4 5/5] drm/i915: Allow user to set cache at BO creation
From: Fei Yang To comply with the design that buffer objects shall have immutable cache setting through out their life cycle, {set, get}_caching ioctl's are no longer supported from MTL onward. With that change caching policy can only be set at object creation time. The current code applies a default (platform dependent) cache setting for all objects. However this is not optimal for performance tuning. The patch extends the existing gem_create uAPI to let user set PAT index for the object at creation time. The new extension is platform independent, so UMD's can switch to using this extension for older platforms as well, while {set, get}_caching are still supported on these legacy paltforms for compatibility reason. Cc: Chris Wilson Cc: Matt Roper Cc: Andi Shyti Signed-off-by: Fei Yang Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 ++ drivers/gpu/drm/i915/gem/i915_gem_object.c | 6 include/uapi/drm/i915_drm.h| 36 ++ tools/include/uapi/drm/i915_drm.h | 36 ++ 4 files changed, 114 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index bfe1dbda4cb7..723c3ddd6c74 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -245,6 +245,7 @@ struct create_ext { unsigned int n_placements; unsigned int placement_mask; unsigned long flags; + unsigned int pat_index; }; static void repr_placements(char *buf, size_t size, @@ -394,11 +395,39 @@ static int ext_set_protected(struct i915_user_extension __user *base, void *data return 0; } +static int ext_set_pat(struct i915_user_extension __user *base, void *data) +{ + struct create_ext *ext_data = data; + struct drm_i915_private *i915 = ext_data->i915; + struct drm_i915_gem_create_ext_set_pat ext; + unsigned int max_pat_index; + + BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) != +offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd)); + + if (copy_from_user(&ext, base, sizeof(ext))) + return -EFAULT; + + max_pat_index = INTEL_INFO(i915)->max_pat_index; + + if (ext.pat_index > max_pat_index) { + drm_dbg(&i915->drm, "PAT index is invalid: %u\n", + ext.pat_index); + return -EINVAL; + } + + ext_data->pat_index = ext.pat_index; + + return 0; +} + static const i915_user_extension_fn create_extensions[] = { [I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements, [I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected, + [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat, }; +#define PAT_INDEX_NOT_SET 0x /** * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to it. * @dev: drm device pointer @@ -418,6 +447,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) return -EINVAL; + ext_data.pat_index = PAT_INDEX_NOT_SET; ret = i915_user_extensions(u64_to_user_ptr(args->extensions), create_extensions, ARRAY_SIZE(create_extensions), @@ -454,5 +484,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, if (IS_ERR(obj)) return PTR_ERR(obj); + if (ext_data.pat_index != PAT_INDEX_NOT_SET) { + i915_gem_object_set_pat_index(obj, ext_data.pat_index); + /* Mark pat_index is set by UMD */ + obj->cache_level = I915_CACHE_INVAL; + } + return i915_gem_publish(obj, file, &args->size, &args->handle); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 27c948350b5b..61651f7e5806 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -209,6 +209,12 @@ bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj) if (!(obj->flags & I915_BO_ALLOC_USER)) return false; + /* +* Always flush cache for UMD objects at creation time. +*/ + if (obj->cache_level == I915_CACHE_INVAL) + return true; + /* * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it * possible for userspace to bypass the GTT caching bits set by the diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index dba7c5a5b25e..03c5c314846e 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -3630,9 +3630,13 @@ struct drm_i915_gem_create_ext { * * For I915_GEM_CREATE_E
[Intel-gfx] [PATCH v4 3/5] drm/i915: make sure correct pte encode is used
From: Fei Yang PTE encode is platform dependent. After replacing cache_level with pat_index, the newly introduced mtl_pte_encode is actually generic for all gen12 platforms, thus rename it to gen12_pte_encode and apply it to all gen12 platforms. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index f2334a713c4e..d1e3d3b90e95 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -55,9 +55,9 @@ static u64 gen8_pte_encode(dma_addr_t addr, return pte; } -static u64 mtl_pte_encode(dma_addr_t addr, - unsigned int pat_index, - u32 flags) +static u64 gen12_pte_encode(dma_addr_t addr, + unsigned int pat_index, + u32 flags) { gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; @@ -995,8 +995,8 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, */ ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; - if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) - ppgtt->vm.pte_encode = mtl_pte_encode; + if (GRAPHICS_VER(gt->i915) >= 12) + ppgtt->vm.pte_encode = gen12_pte_encode; else ppgtt->vm.pte_encode = gen8_pte_encode; -- 2.25.1
[Intel-gfx] [PATCH v4 2/5] drm/i915: use pat_index instead of cache_level
From: Fei Yang Currently the KMD is using enum i915_cache_level to set caching policy for buffer objects. This is flaky because the PAT index which really controls the caching behavior in PTE has far more levels than what's defined in the enum. In addition, the PAT index is platform dependent, having to translate between i915_cache_level and PAT index is not reliable, and makes the code more complicated. >From UMD's perspective there is also a necessity to set caching policy for performance fine tuning. It's much easier for the UMD to directly use PAT index because the behavior of each PAT index is clearly defined in Bspec. Having the abstracted i915_cache_level sitting in between would only cause more ambiguity. For these reasons this patch replaces i915_cache_level with PAT index. Also note, the cache_level is not completely removed yet, because the KMD still has the need of creating buffer objects with simple cache settings such as cached, uncached, or writethrough. For such simple cases, using cache_level would help simplify the code. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/display/intel_dpt.c | 12 +-- drivers/gpu/drm/i915/gem/i915_gem_domain.c| 47 +++ .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_object.c| 52 +++- drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 25 +- drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 16 ++-- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 71 drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 82 +-- drivers/gpu/drm/i915/gt/intel_gtt.h | 20 ++--- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 6 +- drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 55 ++--- drivers/gpu/drm/i915/i915_gem.c | 16 +++- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- 36 files changed, 398 insertions(+), 239 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index c5eacfdba1a5..7c5fddb203ba 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -43,24 +43,24 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) static void dpt_insert_page(struct i915_address_space *vm, dma_addr_t addr, u64 offset, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { struct i915_dpt *dpt = i915_vm_to_dpt(vm); gen8_pte_t __iomem *base = dpt->iomem; gen8_set_pte(base + offset / I915_GTT_PAGE_SIZE, -vm->pte_encode(addr, level, flags)); +vm->pte_encode(addr, pat_index, flags)); } static void dpt_insert_entries(struct i915_address_space *vm, struct i915_vma_resource *vma_res, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { struct i915_dpt *dpt = i915_vm_to_dpt(vm); gen8_pte_t __iomem *base = dpt->iomem; - const gen8_pte_t pte_encode = vm->pte_encode(0, level, flags); + const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags); struct sgt_iter sgt_iter; dma_addr_t addr; int i; @@ -83,7 +83,7 @@ static void dpt_clear_range(struct i915_address_space *vm, static void dpt_bind_vma(struct i915_address_space *vm,
[Intel-gfx] [PATCH v4 4/5] drm/i915/mtl: end support for set caching ioctl
From: Fei Yang The design is to keep Buffer Object's caching policy immutable through out its life cycle. This patch ends the support for set caching ioctl from MTL onward. While doing that we also set BO's to be 1-way coherent at creation time because GPU is no longer automatically snooping CPU cache. For userspace components needing to fine tune the caching policy for BO's, a follow up patch will extend the GEM_CREATE uAPI to allow them specify caching mode at BO creation time. Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda --- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +++ drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 9 - 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 95a0d845d949..e62db612aea6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -338,6 +338,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, if (IS_DGFX(i915)) return -ENODEV; + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + return -EOPNOTSUPP; + switch (args->caching) { case I915_CACHING_NONE: level = I915_CACHE_NONE; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 37d1efcd3ca6..cad4a6017f4b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -601,7 +601,14 @@ static int shmem_object_init(struct intel_memory_region *mem, obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; - if (HAS_LLC(i915)) + /* +* MTL doesn't snoop CPU cache by default for GPU access (namely +* 1-way coherency). However some UMD's are currently depending on +* that. Make 1-way coherent the default setting for MTL. A follow +* up patch will extend the GEM_CREATE uAPI to allow UMD's specify +* caching mode at BO creation time +*/ + if (HAS_LLC(i915) || (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))) /* On some devices, we can have the GPU use the LLC (the CPU * cache) for about a 10% performance improvement * compared to uncached. Graphics requests other than -- 2.25.1
[Intel-gfx] [PATCH v4 0/5] drm/i915: Allow user to set cache at BO creation
From: Fei Yang The first three patches in this series are taken from https://patchwork.freedesktop.org/series/116868/ These patches are included here because the last patch has dependency on the pat_index refactor. This series is focusing on uAPI changes, 1. end support for set caching ioctl [PATCH 4/5] 2. add set_pat extension for gem_create [PATCH 5/5] v2: drop one patch that was merged separately 341ad0e8e254 drm/i915/mtl: Add PTE encode function v3: rebase on https://patchwork.freedesktop.org/series/117082/ v4: fix missing unlock introduced in v3, and solve a rebase conflict Fei Yang (5): drm/i915: preparation for using PAT index drm/i915: use pat_index instead of cache_level drm/i915: make sure correct pte encode is used drm/i915/mtl: end support for set caching ioctl drm/i915: Allow user to set cache at BO creation drivers/gpu/drm/i915/display/intel_dpt.c | 12 +-- drivers/gpu/drm/i915/gem/i915_gem_create.c| 36 + drivers/gpu/drm/i915/gem/i915_gem_domain.c| 50 +++- .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_object.c| 67 +++- drivers/gpu/drm/i915/gem/i915_gem_object.h| 8 ++ .../gpu/drm/i915/gem/i915_gem_object_types.h | 26 +- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 9 ++- drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 16 ++-- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 73 + drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 76 +- drivers/gpu/drm/i915/gt/intel_gtt.h | 20 +++-- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 6 +- drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 +-- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 55 ++--- drivers/gpu/drm/i915/i915_gem.c | 16 +++- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_pci.c | 79 --- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/intel_device_info.h | 5 ++ drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 9 +++ drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- include/uapi/drm/i915_drm.h | 36 + tools/include/uapi/drm/i915_drm.h | 36 + 44 files changed, 625 insertions(+), 243 deletions(-) -- 2.25.1
[Intel-gfx] [PATCH v4 2/3] drm/i915: use pat_index instead of cache_level
From: Fei Yang Currently the KMD is using enum i915_cache_level to set caching policy for buffer objects. This is flaky because the PAT index which really controls the caching behavior in PTE has far more levels than what's defined in the enum. In addition, the PAT index is platform dependent, having to translate between i915_cache_level and PAT index is not reliable, and makes the code more complicated. >From UMD's perspective there is also a necessity to set caching policy for performance fine tuning. It's much easier for the UMD to directly use PAT index because the behavior of each PAT index is clearly defined in Bspec. Having the abstracted i915_cache_level sitting in between would only cause more ambiguity. For these reasons this patch replaces i915_cache_level with PAT index. Also note, the cache_level is not completely removed yet, because the KMD still has the need of creating buffer objects with simple cache settings such as cached, uncached, or writethrough. For such simple cases, using cache_level would help simplify the code. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/display/intel_dpt.c | 12 +-- drivers/gpu/drm/i915/gem/i915_gem_domain.c| 47 +++ .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_object.c| 52 +++- drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 25 +- drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 16 ++-- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 71 drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 82 +-- drivers/gpu/drm/i915/gt/intel_gtt.h | 20 ++--- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 6 +- drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 55 ++--- drivers/gpu/drm/i915/i915_gem.c | 16 +++- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- 36 files changed, 398 insertions(+), 239 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index c5eacfdba1a5..7c5fddb203ba 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -43,24 +43,24 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) static void dpt_insert_page(struct i915_address_space *vm, dma_addr_t addr, u64 offset, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { struct i915_dpt *dpt = i915_vm_to_dpt(vm); gen8_pte_t __iomem *base = dpt->iomem; gen8_set_pte(base + offset / I915_GTT_PAGE_SIZE, -vm->pte_encode(addr, level, flags)); +vm->pte_encode(addr, pat_index, flags)); } static void dpt_insert_entries(struct i915_address_space *vm, struct i915_vma_resource *vma_res, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { struct i915_dpt *dpt = i915_vm_to_dpt(vm); gen8_pte_t __iomem *base = dpt->iomem; - const gen8_pte_t pte_encode = vm->pte_encode(0, level, flags); + const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags); struct sgt_iter sgt_iter; dma_addr_t addr; int i; @@ -83,7 +83,7 @@ static void dpt_clear_range(struct i915_address_space *vm, static void dpt_bind_vma(struct i915_address_space *vm,
[Intel-gfx] [PATCH v4 1/3] drm/i915: preparation for using PAT index
From: Fei Yang This patch is a preparation for replacing enum i915_cache_level with PAT index. Caching policy for buffer objects is set through the PAT index in PTE, the old i915_cache_level is not sufficient to represent all caching modes supported by the hardware. Preparing the transition by adding some platform dependent data structures and helper functions to translate the cache_level to pat_index. cachelevel_to_pat: a platform dependent array mapping cache_level to pat_index. max_pat_index: the maximum PAT index recommended in hardware specification Needed for validating the PAT index passed in from user space. i915_gem_get_pat_index: function to convert cache_level to PAT index. obj_to_i915(obj): macro moved to header file for wider usage. I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the convenience of coding. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda --- drivers/gpu/drm/i915/gem/i915_gem_object.c| 9 +++ drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 6 ++ drivers/gpu/drm/i915/gt/intel_ggtt.c | 6 ++ drivers/gpu/drm/i915/i915_pci.c | 79 --- drivers/gpu/drm/i915/intel_device_info.h | 5 ++ .../gpu/drm/i915/selftests/mock_gem_device.c | 9 +++ 9 files changed, 110 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 4666bb82f312..8c70a0ec7d2f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects; static const struct drm_gem_object_funcs i915_gem_object_funcs; +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level) +{ + if (drm_WARN_ON(&i915->drm, level >= I915_MAX_CACHE_LEVEL)) + return 0; + + return INTEL_INFO(i915)->cachelevel_to_pat[level]; +} + struct drm_i915_gem_object *i915_gem_object_alloc(void) { struct drm_i915_gem_object *obj; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 885ccde9dc3c..4c92e17b4337 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -20,6 +20,8 @@ enum intel_region_id; +#define obj_to_i915(obj__) to_i915((obj__)->base.dev) + static inline bool i915_gem_object_size_2big(u64 size) { struct drm_i915_gem_object *obj; @@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size) return false; } +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level); void i915_gem_init__objects(struct drm_i915_private *i915); void i915_objects_module_exit(void); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 830c11431ee8..41b35abccf88 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -194,6 +194,7 @@ enum i915_cache_level { * engine. */ I915_CACHE_WT, + I915_MAX_CACHE_LEVEL, }; enum i915_map_type { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index b1672e054b21..214763942aa2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, fs_reclaim_release(GFP_KERNEL); } -#define obj_to_i915(obj__) to_i915((obj__)->base.dev) - /** * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By * default all object types that support shrinking(see IS_SHRINKABLE), will also diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 22ec1566d2a7..bb6998d67133 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr, case I915_CACHE_WT: pte |= GEN12_PPGTT_PTE_PAT0; break; + default: + /* This should never happen. Added to deal with the compile +* error due to the addition of I915_MAX_CACHE_LEVEL. Will +* be removed by the pat_index patch. +*/ + break; } return pte; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 20915edc8bd9..c8390d03fce2 100644 --- a/driv
[Intel-gfx] [PATCH v4 0/3] drm/i915: use pat_index instead of cache_level
From: Fei Yang This patch set was posted at https://patchwork.freedesktop.org/series/116868/ Change title since the PTE patch was merged separately. These patches are extracted from series https://patchwork.freedesktop.org/series/115980/ This series refactor the cache policy programming so that the PTE encode functions can be unified across all GEN12 platforms. This refactor is also important in implementing the design which allows uerspace to directly set cache policy for each Buffer Object. v2: drop one patch that was merged separately 341ad0e8e254 drm/i915/mtl: Add PTE encode function v3: disable {get, set}_caching ioctl v4: fix missing unlock introduced in v3, and solve a rebase conflict Fei Yang (3): drm/i915: preparation for using PAT index drm/i915: use pat_index instead of cache_level drm/i915: make sure correct pte encode is used drivers/gpu/drm/i915/display/intel_dpt.c | 12 +-- drivers/gpu/drm/i915/gem/i915_gem_domain.c| 47 ++- .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_object.c| 61 +- drivers/gpu/drm/i915/gem/i915_gem_object.h| 8 ++ .../gpu/drm/i915/gem/i915_gem_object_types.h | 26 +- drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 16 ++-- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 73 + drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 76 +- drivers/gpu/drm/i915/gt/intel_gtt.h | 20 +++-- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 6 +- drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 +-- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 55 ++--- drivers/gpu/drm/i915/i915_gem.c | 16 +++- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_pci.c | 79 --- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/intel_device_info.h | 5 ++ drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 9 +++ drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- 40 files changed, 500 insertions(+), 242 deletions(-) -- 2.25.1
[Intel-gfx] [PATCH v4 3/3] drm/i915: make sure correct pte encode is used
From: Fei Yang PTE encode is platform dependent. After replacing cache_level with pat_index, the newly introduced mtl_pte_encode is actually generic for all gen12 platforms, thus rename it to gen12_pte_encode and apply it to all gen12 platforms. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index f2334a713c4e..d1e3d3b90e95 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -55,9 +55,9 @@ static u64 gen8_pte_encode(dma_addr_t addr, return pte; } -static u64 mtl_pte_encode(dma_addr_t addr, - unsigned int pat_index, - u32 flags) +static u64 gen12_pte_encode(dma_addr_t addr, + unsigned int pat_index, + u32 flags) { gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; @@ -995,8 +995,8 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, */ ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; - if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) - ppgtt->vm.pte_encode = mtl_pte_encode; + if (GRAPHICS_VER(gt->i915) >= 12) + ppgtt->vm.pte_encode = gen12_pte_encode; else ppgtt->vm.pte_encode = gen8_pte_encode; -- 2.25.1
[Intel-gfx] [PATCH v3 2/5] drm/i915: use pat_index instead of cache_level
From: Fei Yang Currently the KMD is using enum i915_cache_level to set caching policy for buffer objects. This is flaky because the PAT index which really controls the caching behavior in PTE has far more levels than what's defined in the enum. In addition, the PAT index is platform dependent, having to translate between i915_cache_level and PAT index is not reliable, and makes the code more complicated. >From UMD's perspective there is also a necessity to set caching policy for performance fine tuning. It's much easier for the UMD to directly use PAT index because the behavior of each PAT index is clearly defined in Bspec. Having the abstracted i915_cache_level sitting in between would only cause more ambiguity. For these reasons this patch replaces i915_cache_level with PAT index. Also note, the cache_level is not completely removed yet, because the KMD still has the need of creating buffer objects with simple cache settings such as cached, uncached, or writethrough. For such simple cases, using cache_level would help simplify the code. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/display/intel_dpt.c | 12 +-- drivers/gpu/drm/i915/gem/i915_gem_domain.c| 43 ++ .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_object.c| 52 +++- drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 25 +- drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 16 ++-- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 71 drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 82 +-- drivers/gpu/drm/i915/gt/intel_gtt.h | 20 ++--- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 6 +- drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 55 ++--- drivers/gpu/drm/i915/i915_gem.c | 16 +++- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- 36 files changed, 394 insertions(+), 239 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index c5eacfdba1a5..7c5fddb203ba 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -43,24 +43,24 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) static void dpt_insert_page(struct i915_address_space *vm, dma_addr_t addr, u64 offset, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { struct i915_dpt *dpt = i915_vm_to_dpt(vm); gen8_pte_t __iomem *base = dpt->iomem; gen8_set_pte(base + offset / I915_GTT_PAGE_SIZE, -vm->pte_encode(addr, level, flags)); +vm->pte_encode(addr, pat_index, flags)); } static void dpt_insert_entries(struct i915_address_space *vm, struct i915_vma_resource *vma_res, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { struct i915_dpt *dpt = i915_vm_to_dpt(vm); gen8_pte_t __iomem *base = dpt->iomem; - const gen8_pte_t pte_encode = vm->pte_encode(0, level, flags); + const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags); struct sgt_iter sgt_iter; dma_addr_t addr; int i; @@ -83,7 +83,7 @@ static void dpt_clear_range(struct i915_address_space *vm, static void dpt_bind_vma(struct i915_address_space *vm,
[Intel-gfx] [PATCH v3 4/5] drm/i915/mtl: end support for set caching ioctl
From: Fei Yang The design is to keep Buffer Object's caching policy immutable through out its life cycle. This patch ends the support for set caching ioctl from MTL onward. While doing that we also set BO's to be 1-way coherent at creation time because GPU is no longer automatically snooping CPU cache. For userspace components needing to fine tune the caching policy for BO's, a follow up patch will extend the GEM_CREATE uAPI to allow them specify caching mode at BO creation time. Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda --- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +++ drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 9 - 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 17375935ce1e..dcd6056ef90d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -336,6 +336,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, if (IS_DGFX(i915)) return -ENODEV; + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + return -EOPNOTSUPP; + switch (args->caching) { case I915_CACHING_NONE: level = I915_CACHE_NONE; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 37d1efcd3ca6..cad4a6017f4b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -601,7 +601,14 @@ static int shmem_object_init(struct intel_memory_region *mem, obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; - if (HAS_LLC(i915)) + /* +* MTL doesn't snoop CPU cache by default for GPU access (namely +* 1-way coherency). However some UMD's are currently depending on +* that. Make 1-way coherent the default setting for MTL. A follow +* up patch will extend the GEM_CREATE uAPI to allow UMD's specify +* caching mode at BO creation time +*/ + if (HAS_LLC(i915) || (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))) /* On some devices, we can have the GPU use the LLC (the CPU * cache) for about a 10% performance improvement * compared to uncached. Graphics requests other than -- 2.25.1
[Intel-gfx] [PATCH v3 0/5] drm/i915: Allow user to set cache at BO creation
From: Fei Yang The first three patches in this series are taken from https://patchwork.freedesktop.org/series/116868/ These patches are included here because the last patch has dependency on the pat_index refactor. This series is focusing on uAPI changes, 1. end support for set caching ioctl [PATCH 4/5] 2. add set_pat extension for gem_create [PATCH 5/5] v2: drop one patch that was merged separately 341ad0e8e254 drm/i915/mtl: Add PTE encode function v3: rebase on https://patchwork.freedesktop.org/series/117082/ Fei Yang (5): drm/i915: preparation for using PAT index drm/i915: use pat_index instead of cache_level drm/i915: make sure correct pte encode is used drm/i915/mtl: end support for set caching ioctl drm/i915: Allow user to set cache at BO creation drivers/gpu/drm/i915/display/intel_dpt.c | 12 +-- drivers/gpu/drm/i915/gem/i915_gem_create.c| 36 + drivers/gpu/drm/i915/gem/i915_gem_domain.c| 46 ++- .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_object.c| 67 +++- drivers/gpu/drm/i915/gem/i915_gem_object.h| 8 ++ .../gpu/drm/i915/gem/i915_gem_object_types.h | 26 +- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 9 ++- drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 16 ++-- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 73 + drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 76 +- drivers/gpu/drm/i915/gt/intel_gtt.h | 20 +++-- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 6 +- drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 +-- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 55 ++--- drivers/gpu/drm/i915/i915_gem.c | 16 +++- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_pci.c | 79 --- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/intel_device_info.h | 5 ++ drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 9 +++ drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- include/uapi/drm/i915_drm.h | 36 + tools/include/uapi/drm/i915_drm.h | 36 + 44 files changed, 621 insertions(+), 243 deletions(-) -- 2.25.1
[Intel-gfx] [PATCH v3 5/5] drm/i915: Allow user to set cache at BO creation
From: Fei Yang To comply with the design that buffer objects shall have immutable cache setting through out their life cycle, {set, get}_caching ioctl's are no longer supported from MTL onward. With that change caching policy can only be set at object creation time. The current code applies a default (platform dependent) cache setting for all objects. However this is not optimal for performance tuning. The patch extends the existing gem_create uAPI to let user set PAT index for the object at creation time. The new extension is platform independent, so UMD's can switch to using this extension for older platforms as well, while {set, get}_caching are still supported on these legacy paltforms for compatibility reason. Cc: Chris Wilson Cc: Matt Roper Cc: Andi Shyti Signed-off-by: Fei Yang Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 ++ drivers/gpu/drm/i915/gem/i915_gem_object.c | 6 include/uapi/drm/i915_drm.h| 36 ++ tools/include/uapi/drm/i915_drm.h | 36 ++ 4 files changed, 114 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index bfe1dbda4cb7..723c3ddd6c74 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -245,6 +245,7 @@ struct create_ext { unsigned int n_placements; unsigned int placement_mask; unsigned long flags; + unsigned int pat_index; }; static void repr_placements(char *buf, size_t size, @@ -394,11 +395,39 @@ static int ext_set_protected(struct i915_user_extension __user *base, void *data return 0; } +static int ext_set_pat(struct i915_user_extension __user *base, void *data) +{ + struct create_ext *ext_data = data; + struct drm_i915_private *i915 = ext_data->i915; + struct drm_i915_gem_create_ext_set_pat ext; + unsigned int max_pat_index; + + BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) != +offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd)); + + if (copy_from_user(&ext, base, sizeof(ext))) + return -EFAULT; + + max_pat_index = INTEL_INFO(i915)->max_pat_index; + + if (ext.pat_index > max_pat_index) { + drm_dbg(&i915->drm, "PAT index is invalid: %u\n", + ext.pat_index); + return -EINVAL; + } + + ext_data->pat_index = ext.pat_index; + + return 0; +} + static const i915_user_extension_fn create_extensions[] = { [I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements, [I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected, + [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat, }; +#define PAT_INDEX_NOT_SET 0x /** * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to it. * @dev: drm device pointer @@ -418,6 +447,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) return -EINVAL; + ext_data.pat_index = PAT_INDEX_NOT_SET; ret = i915_user_extensions(u64_to_user_ptr(args->extensions), create_extensions, ARRAY_SIZE(create_extensions), @@ -454,5 +484,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, if (IS_ERR(obj)) return PTR_ERR(obj); + if (ext_data.pat_index != PAT_INDEX_NOT_SET) { + i915_gem_object_set_pat_index(obj, ext_data.pat_index); + /* Mark pat_index is set by UMD */ + obj->cache_level = I915_CACHE_INVAL; + } + return i915_gem_publish(obj, file, &args->size, &args->handle); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 27c948350b5b..61651f7e5806 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -209,6 +209,12 @@ bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj) if (!(obj->flags & I915_BO_ALLOC_USER)) return false; + /* +* Always flush cache for UMD objects at creation time. +*/ + if (obj->cache_level == I915_CACHE_INVAL) + return true; + /* * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it * possible for userspace to bypass the GTT caching bits set by the diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index dba7c5a5b25e..03c5c314846e 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -3630,9 +3630,13 @@ struct drm_i915_gem_create_ext { * * For I915_GEM_CREATE_E
[Intel-gfx] [PATCH v3 1/5] drm/i915: preparation for using PAT index
From: Fei Yang This patch is a preparation for replacing enum i915_cache_level with PAT index. Caching policy for buffer objects is set through the PAT index in PTE, the old i915_cache_level is not sufficient to represent all caching modes supported by the hardware. Preparing the transition by adding some platform dependent data structures and helper functions to translate the cache_level to pat_index. cachelevel_to_pat: a platform dependent array mapping cache_level to pat_index. max_pat_index: the maximum PAT index recommended in hardware specification Needed for validating the PAT index passed in from user space. i915_gem_get_pat_index: function to convert cache_level to PAT index. obj_to_i915(obj): macro moved to header file for wider usage. I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the convenience of coding. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda --- drivers/gpu/drm/i915/gem/i915_gem_object.c| 9 +++ drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 6 ++ drivers/gpu/drm/i915/gt/intel_ggtt.c | 6 ++ drivers/gpu/drm/i915/i915_pci.c | 79 --- drivers/gpu/drm/i915/intel_device_info.h | 5 ++ .../gpu/drm/i915/selftests/mock_gem_device.c | 9 +++ 9 files changed, 110 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 4666bb82f312..8c70a0ec7d2f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects; static const struct drm_gem_object_funcs i915_gem_object_funcs; +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level) +{ + if (drm_WARN_ON(&i915->drm, level >= I915_MAX_CACHE_LEVEL)) + return 0; + + return INTEL_INFO(i915)->cachelevel_to_pat[level]; +} + struct drm_i915_gem_object *i915_gem_object_alloc(void) { struct drm_i915_gem_object *obj; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 885ccde9dc3c..4c92e17b4337 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -20,6 +20,8 @@ enum intel_region_id; +#define obj_to_i915(obj__) to_i915((obj__)->base.dev) + static inline bool i915_gem_object_size_2big(u64 size) { struct drm_i915_gem_object *obj; @@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size) return false; } +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level); void i915_gem_init__objects(struct drm_i915_private *i915); void i915_objects_module_exit(void); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 830c11431ee8..41b35abccf88 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -194,6 +194,7 @@ enum i915_cache_level { * engine. */ I915_CACHE_WT, + I915_MAX_CACHE_LEVEL, }; enum i915_map_type { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index b1672e054b21..214763942aa2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, fs_reclaim_release(GFP_KERNEL); } -#define obj_to_i915(obj__) to_i915((obj__)->base.dev) - /** * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By * default all object types that support shrinking(see IS_SHRINKABLE), will also diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 4c9a2f2db908..4c6b760d4774 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr, case I915_CACHE_WT: pte |= GEN12_PPGTT_PTE_PAT0; break; + default: + /* This should never happen. Added to deal with the compile +* error due to the addition of I915_MAX_CACHE_LEVEL. Will +* be removed by the pat_index patch. +*/ + break; } return pte; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 20915edc8bd9..c8390d03fce2 100644 --- a/driv
[Intel-gfx] [PATCH v3 3/5] drm/i915: make sure correct pte encode is used
From: Fei Yang PTE encode is platform dependent. After replacing cache_level with pat_index, the newly introduced mtl_pte_encode is actually generic for all gen12 platforms, thus rename it to gen12_pte_encode and apply it to all gen12 platforms. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index ee52e5833c50..81b7725812ce 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -55,9 +55,9 @@ static u64 gen8_pte_encode(dma_addr_t addr, return pte; } -static u64 mtl_pte_encode(dma_addr_t addr, - unsigned int pat_index, - u32 flags) +static u64 gen12_pte_encode(dma_addr_t addr, + unsigned int pat_index, + u32 flags) { gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; @@ -994,8 +994,8 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, */ ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; - if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) - ppgtt->vm.pte_encode = mtl_pte_encode; + if (GRAPHICS_VER(gt->i915) >= 12) + ppgtt->vm.pte_encode = gen12_pte_encode; else ppgtt->vm.pte_encode = gen8_pte_encode; -- 2.25.1
[Intel-gfx] [PATCH v3 2/3] drm/i915: use pat_index instead of cache_level
From: Fei Yang Currently the KMD is using enum i915_cache_level to set caching policy for buffer objects. This is flaky because the PAT index which really controls the caching behavior in PTE has far more levels than what's defined in the enum. In addition, the PAT index is platform dependent, having to translate between i915_cache_level and PAT index is not reliable, and makes the code more complicated. >From UMD's perspective there is also a necessity to set caching policy for performance fine tuning. It's much easier for the UMD to directly use PAT index because the behavior of each PAT index is clearly defined in Bspec. Having the abstracted i915_cache_level sitting in between would only cause more ambiguity. For these reasons this patch replaces i915_cache_level with PAT index. Also note, the cache_level is not completely removed yet, because the KMD still has the need of creating buffer objects with simple cache settings such as cached, uncached, or writethrough. For such simple cases, using cache_level would help simplify the code. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/display/intel_dpt.c | 12 +-- drivers/gpu/drm/i915/gem/i915_gem_domain.c| 43 ++ .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_object.c| 52 +++- drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 25 +- drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 16 ++-- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 71 drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 82 +-- drivers/gpu/drm/i915/gt/intel_gtt.h | 20 ++--- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 6 +- drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 55 ++--- drivers/gpu/drm/i915/i915_gem.c | 16 +++- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- 36 files changed, 394 insertions(+), 239 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index c5eacfdba1a5..7c5fddb203ba 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -43,24 +43,24 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) static void dpt_insert_page(struct i915_address_space *vm, dma_addr_t addr, u64 offset, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { struct i915_dpt *dpt = i915_vm_to_dpt(vm); gen8_pte_t __iomem *base = dpt->iomem; gen8_set_pte(base + offset / I915_GTT_PAGE_SIZE, -vm->pte_encode(addr, level, flags)); +vm->pte_encode(addr, pat_index, flags)); } static void dpt_insert_entries(struct i915_address_space *vm, struct i915_vma_resource *vma_res, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { struct i915_dpt *dpt = i915_vm_to_dpt(vm); gen8_pte_t __iomem *base = dpt->iomem; - const gen8_pte_t pte_encode = vm->pte_encode(0, level, flags); + const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags); struct sgt_iter sgt_iter; dma_addr_t addr; int i; @@ -83,7 +83,7 @@ static void dpt_clear_range(struct i915_address_space *vm, static void dpt_bind_vma(struct i915_address_space *vm,
[Intel-gfx] [PATCH v3 1/3] drm/i915: preparation for using PAT index
From: Fei Yang This patch is a preparation for replacing enum i915_cache_level with PAT index. Caching policy for buffer objects is set through the PAT index in PTE, the old i915_cache_level is not sufficient to represent all caching modes supported by the hardware. Preparing the transition by adding some platform dependent data structures and helper functions to translate the cache_level to pat_index. cachelevel_to_pat: a platform dependent array mapping cache_level to pat_index. max_pat_index: the maximum PAT index recommended in hardware specification Needed for validating the PAT index passed in from user space. i915_gem_get_pat_index: function to convert cache_level to PAT index. obj_to_i915(obj): macro moved to header file for wider usage. I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the convenience of coding. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda --- drivers/gpu/drm/i915/gem/i915_gem_object.c| 9 +++ drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 6 ++ drivers/gpu/drm/i915/gt/intel_ggtt.c | 6 ++ drivers/gpu/drm/i915/i915_pci.c | 79 --- drivers/gpu/drm/i915/intel_device_info.h | 5 ++ .../gpu/drm/i915/selftests/mock_gem_device.c | 9 +++ 9 files changed, 110 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 4666bb82f312..8c70a0ec7d2f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects; static const struct drm_gem_object_funcs i915_gem_object_funcs; +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level) +{ + if (drm_WARN_ON(&i915->drm, level >= I915_MAX_CACHE_LEVEL)) + return 0; + + return INTEL_INFO(i915)->cachelevel_to_pat[level]; +} + struct drm_i915_gem_object *i915_gem_object_alloc(void) { struct drm_i915_gem_object *obj; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 885ccde9dc3c..4c92e17b4337 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -20,6 +20,8 @@ enum intel_region_id; +#define obj_to_i915(obj__) to_i915((obj__)->base.dev) + static inline bool i915_gem_object_size_2big(u64 size) { struct drm_i915_gem_object *obj; @@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size) return false; } +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level); void i915_gem_init__objects(struct drm_i915_private *i915); void i915_objects_module_exit(void); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 830c11431ee8..41b35abccf88 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -194,6 +194,7 @@ enum i915_cache_level { * engine. */ I915_CACHE_WT, + I915_MAX_CACHE_LEVEL, }; enum i915_map_type { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index b1672e054b21..214763942aa2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, fs_reclaim_release(GFP_KERNEL); } -#define obj_to_i915(obj__) to_i915((obj__)->base.dev) - /** * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By * default all object types that support shrinking(see IS_SHRINKABLE), will also diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 4c9a2f2db908..4c6b760d4774 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr, case I915_CACHE_WT: pte |= GEN12_PPGTT_PTE_PAT0; break; + default: + /* This should never happen. Added to deal with the compile +* error due to the addition of I915_MAX_CACHE_LEVEL. Will +* be removed by the pat_index patch. +*/ + break; } return pte; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 20915edc8bd9..c8390d03fce2 100644 --- a/driv
[Intel-gfx] [PATCH v3 3/3] drm/i915: make sure correct pte encode is used
From: Fei Yang PTE encode is platform dependent. After replacing cache_level with pat_index, the newly introduced mtl_pte_encode is actually generic for all gen12 platforms, thus rename it to gen12_pte_encode and apply it to all gen12 platforms. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index ee52e5833c50..81b7725812ce 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -55,9 +55,9 @@ static u64 gen8_pte_encode(dma_addr_t addr, return pte; } -static u64 mtl_pte_encode(dma_addr_t addr, - unsigned int pat_index, - u32 flags) +static u64 gen12_pte_encode(dma_addr_t addr, + unsigned int pat_index, + u32 flags) { gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; @@ -994,8 +994,8 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, */ ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; - if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) - ppgtt->vm.pte_encode = mtl_pte_encode; + if (GRAPHICS_VER(gt->i915) >= 12) + ppgtt->vm.pte_encode = gen12_pte_encode; else ppgtt->vm.pte_encode = gen8_pte_encode; -- 2.25.1
[Intel-gfx] [PATCH v3 0/3] drm/i915: use pat_index instead of cache_level
From: Fei Yang This patch set was posted at https://patchwork.freedesktop.org/series/116868/ Change title since the PTE patch was merged separately. These patches are extracted from series https://patchwork.freedesktop.org/series/115980/ This series refactor the cache policy programming so that the PTE encode functions can be unified across all GEN12 platforms. This refactor is also important in implementing the design which allows uerspace to directly set cache policy for each Buffer Object. v2: drop one patch that was merged separately 341ad0e8e254 drm/i915/mtl: Add PTE encode function v3: disable {get, set}_caching ioctl Fei Yang (3): drm/i915: preparation for using PAT index drm/i915: use pat_index instead of cache_level drm/i915: make sure correct pte encode is used drivers/gpu/drm/i915/display/intel_dpt.c | 12 +-- drivers/gpu/drm/i915/gem/i915_gem_domain.c| 43 +- .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_object.c| 61 +- drivers/gpu/drm/i915/gem/i915_gem_object.h| 8 ++ .../gpu/drm/i915/gem/i915_gem_object_types.h | 26 +- drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 16 ++-- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 73 + drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 76 +- drivers/gpu/drm/i915/gt/intel_gtt.h | 20 +++-- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 6 +- drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 +-- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 55 ++--- drivers/gpu/drm/i915/i915_gem.c | 16 +++- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_pci.c | 79 --- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/intel_device_info.h | 5 ++ drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 9 +++ drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- 40 files changed, 496 insertions(+), 242 deletions(-) -- 2.25.1
[Intel-gfx] [PATCH v2 4/5] drm/i915/mtl: end support for set caching ioctl
From: Fei Yang The design is to keep Buffer Object's caching policy immutable through out its life cycle. This patch ends the support for set caching ioctl from MTL onward. While doing that we also set BO's to be 1-way coherent at creation time because GPU is no longer automatically snooping CPU cache. For userspace components needing to fine tune the caching policy for BO's, a follow up patch will extend the GEM_CREATE uAPI to allow them specify caching mode at BO creation time. Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda --- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +++ drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 9 - 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 89938084af97..d5fd4c9cd9f8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -328,6 +328,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, if (IS_DGFX(i915)) return -ENODEV; + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + return -EOPNOTSUPP; + switch (args->caching) { case I915_CACHING_NONE: level = I915_CACHE_NONE; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 37d1efcd3ca6..cad4a6017f4b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -601,7 +601,14 @@ static int shmem_object_init(struct intel_memory_region *mem, obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; - if (HAS_LLC(i915)) + /* +* MTL doesn't snoop CPU cache by default for GPU access (namely +* 1-way coherency). However some UMD's are currently depending on +* that. Make 1-way coherent the default setting for MTL. A follow +* up patch will extend the GEM_CREATE uAPI to allow UMD's specify +* caching mode at BO creation time +*/ + if (HAS_LLC(i915) || (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))) /* On some devices, we can have the GPU use the LLC (the CPU * cache) for about a 10% performance improvement * compared to uncached. Graphics requests other than -- 2.25.1
[Intel-gfx] [PATCH v2 2/5] drm/i915: use pat_index instead of cache_level
From: Fei Yang Currently the KMD is using enum i915_cache_level to set caching policy for buffer objects. This is flaky because the PAT index which really controls the caching behavior in PTE has far more levels than what's defined in the enum. In addition, the PAT index is platform dependent, having to translate between i915_cache_level and PAT index is not reliable, and makes the code more complicated. >From UMD's perspective there is also a necessity to set caching policy for performance fine tuning. It's much easier for the UMD to directly use PAT index because the behavior of each PAT index is clearly defined in Bspec. Having the abstracted i915_cache_level sitting in between would only cause more ambiguity. For these reasons this patch replaces i915_cache_level with PAT index. Also note, the cache_level is not completely removed yet, because the KMD still has the need of creating buffer objects with simple cache settings such as cached, uncached, or writethrough. For such simple cases, using cache_level would help simplify the code. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/display/intel_dpt.c | 12 +-- drivers/gpu/drm/i915/gem/i915_gem_domain.c| 27 ++ .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_object.c| 52 +++- drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 25 +- drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 16 ++-- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 71 drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 82 +-- drivers/gpu/drm/i915/gt/intel_gtt.h | 20 ++--- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 6 +- drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 55 ++--- drivers/gpu/drm/i915/i915_gem.c | 16 +++- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- 36 files changed, 378 insertions(+), 239 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index c5eacfdba1a5..7c5fddb203ba 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -43,24 +43,24 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) static void dpt_insert_page(struct i915_address_space *vm, dma_addr_t addr, u64 offset, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { struct i915_dpt *dpt = i915_vm_to_dpt(vm); gen8_pte_t __iomem *base = dpt->iomem; gen8_set_pte(base + offset / I915_GTT_PAGE_SIZE, -vm->pte_encode(addr, level, flags)); +vm->pte_encode(addr, pat_index, flags)); } static void dpt_insert_entries(struct i915_address_space *vm, struct i915_vma_resource *vma_res, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { struct i915_dpt *dpt = i915_vm_to_dpt(vm); gen8_pte_t __iomem *base = dpt->iomem; - const gen8_pte_t pte_encode = vm->pte_encode(0, level, flags); + const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags); struct sgt_iter sgt_iter; dma_addr_t addr; int i; @@ -83,7 +83,7 @@ static void dpt_clear_range(struct i915_address_space *vm, static void dpt_bind_vma(struct i915_address_space *vm, struct i915_vm_pt_stash *st
[Intel-gfx] [PATCH v2 5/5] drm/i915: Allow user to set cache at BO creation
From: Fei Yang To comply with the design that buffer objects shall have immutable cache setting through out their life cycle, {set, get}_caching ioctl's are no longer supported from MTL onward. With that change caching policy can only be set at object creation time. The current code applies a default (platform dependent) cache setting for all objects. However this is not optimal for performance tuning. The patch extends the existing gem_create uAPI to let user set PAT index for the object at creation time. The new extension is platform independent, so UMD's can switch to using this extension for older platforms as well, while {set, get}_caching are still supported on these legacy paltforms for compatibility reason. Cc: Chris Wilson Cc: Matt Roper Cc: Andi Shyti Signed-off-by: Fei Yang Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 ++ drivers/gpu/drm/i915/gem/i915_gem_object.c | 6 include/uapi/drm/i915_drm.h| 36 ++ tools/include/uapi/drm/i915_drm.h | 36 ++ 4 files changed, 114 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index bfe1dbda4cb7..723c3ddd6c74 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -245,6 +245,7 @@ struct create_ext { unsigned int n_placements; unsigned int placement_mask; unsigned long flags; + unsigned int pat_index; }; static void repr_placements(char *buf, size_t size, @@ -394,11 +395,39 @@ static int ext_set_protected(struct i915_user_extension __user *base, void *data return 0; } +static int ext_set_pat(struct i915_user_extension __user *base, void *data) +{ + struct create_ext *ext_data = data; + struct drm_i915_private *i915 = ext_data->i915; + struct drm_i915_gem_create_ext_set_pat ext; + unsigned int max_pat_index; + + BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) != +offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd)); + + if (copy_from_user(&ext, base, sizeof(ext))) + return -EFAULT; + + max_pat_index = INTEL_INFO(i915)->max_pat_index; + + if (ext.pat_index > max_pat_index) { + drm_dbg(&i915->drm, "PAT index is invalid: %u\n", + ext.pat_index); + return -EINVAL; + } + + ext_data->pat_index = ext.pat_index; + + return 0; +} + static const i915_user_extension_fn create_extensions[] = { [I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements, [I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected, + [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat, }; +#define PAT_INDEX_NOT_SET 0x /** * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to it. * @dev: drm device pointer @@ -418,6 +447,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) return -EINVAL; + ext_data.pat_index = PAT_INDEX_NOT_SET; ret = i915_user_extensions(u64_to_user_ptr(args->extensions), create_extensions, ARRAY_SIZE(create_extensions), @@ -454,5 +484,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, if (IS_ERR(obj)) return PTR_ERR(obj); + if (ext_data.pat_index != PAT_INDEX_NOT_SET) { + i915_gem_object_set_pat_index(obj, ext_data.pat_index); + /* Mark pat_index is set by UMD */ + obj->cache_level = I915_CACHE_INVAL; + } + return i915_gem_publish(obj, file, &args->size, &args->handle); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 27c948350b5b..61651f7e5806 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -209,6 +209,12 @@ bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj) if (!(obj->flags & I915_BO_ALLOC_USER)) return false; + /* +* Always flush cache for UMD objects at creation time. +*/ + if (obj->cache_level == I915_CACHE_INVAL) + return true; + /* * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it * possible for userspace to bypass the GTT caching bits set by the diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index dba7c5a5b25e..03c5c314846e 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -3630,9 +3630,13 @@ struct drm_i915_gem_create_ext { * * For I915_GEM_CREATE_E
[Intel-gfx] [PATCH v2 1/5] drm/i915: preparation for using PAT index
From: Fei Yang This patch is a preparation for replacing enum i915_cache_level with PAT index. Caching policy for buffer objects is set through the PAT index in PTE, the old i915_cache_level is not sufficient to represent all caching modes supported by the hardware. Preparing the transition by adding some platform dependent data structures and helper functions to translate the cache_level to pat_index. cachelevel_to_pat: a platform dependent array mapping cache_level to pat_index. max_pat_index: the maximum PAT index recommended in hardware specification Needed for validating the PAT index passed in from user space. i915_gem_get_pat_index: function to convert cache_level to PAT index. obj_to_i915(obj): macro moved to header file for wider usage. I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the convenience of coding. Cc: Chris Wilson Cc: Matt Roper Cc: Andi Shyti Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda --- drivers/gpu/drm/i915/gem/i915_gem_object.c| 9 +++ drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 6 ++ drivers/gpu/drm/i915/gt/intel_ggtt.c | 6 ++ drivers/gpu/drm/i915/i915_pci.c | 79 --- drivers/gpu/drm/i915/intel_device_info.h | 5 ++ .../gpu/drm/i915/selftests/mock_gem_device.c | 9 +++ 9 files changed, 110 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 4666bb82f312..8c70a0ec7d2f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects; static const struct drm_gem_object_funcs i915_gem_object_funcs; +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level) +{ + if (drm_WARN_ON(&i915->drm, level >= I915_MAX_CACHE_LEVEL)) + return 0; + + return INTEL_INFO(i915)->cachelevel_to_pat[level]; +} + struct drm_i915_gem_object *i915_gem_object_alloc(void) { struct drm_i915_gem_object *obj; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 885ccde9dc3c..4c92e17b4337 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -20,6 +20,8 @@ enum intel_region_id; +#define obj_to_i915(obj__) to_i915((obj__)->base.dev) + static inline bool i915_gem_object_size_2big(u64 size) { struct drm_i915_gem_object *obj; @@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size) return false; } +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level); void i915_gem_init__objects(struct drm_i915_private *i915); void i915_objects_module_exit(void); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 830c11431ee8..41b35abccf88 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -194,6 +194,7 @@ enum i915_cache_level { * engine. */ I915_CACHE_WT, + I915_MAX_CACHE_LEVEL, }; enum i915_map_type { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index b1672e054b21..214763942aa2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, fs_reclaim_release(GFP_KERNEL); } -#define obj_to_i915(obj__) to_i915((obj__)->base.dev) - /** * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By * default all object types that support shrinking(see IS_SHRINKABLE), will also diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 4c9a2f2db908..4c6b760d4774 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr, case I915_CACHE_WT: pte |= GEN12_PPGTT_PTE_PAT0; break; + default: + /* This should never happen. Added to deal with the compile +* error due to the addition of I915_MAX_CACHE_LEVEL. Will +* be removed by the pat_index patch. +*/ + break; } return pte; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 20915edc8bd9..c8390d03fce2 1
[Intel-gfx] [PATCH v2 0/5] drm/i915: Allow user to set cache at BO creation
From: Fei Yang The first three patches in this series are taken from https://patchwork.freedesktop.org/series/116868/ These patches are included here because the last patch has dependency on the pat_index refactor. This series is focusing on uAPI changes, 1. end support for set caching ioctl [PATCH 4/5] 2. add set_pat extension for gem_create [PATCH 5/5] v2: drop one patch that was merged separately 341ad0e8e254 drm/i915/mtl: Add PTE encode function Fei Yang (5): drm/i915: preparation for using PAT index drm/i915: use pat_index instead of cache_level drm/i915: make sure correct pte encode is used drm/i915/mtl: end support for set caching ioctl drm/i915: Allow user to set cache at BO creation drivers/gpu/drm/i915/display/intel_dpt.c | 12 +-- drivers/gpu/drm/i915/gem/i915_gem_create.c| 36 + drivers/gpu/drm/i915/gem/i915_gem_domain.c| 30 +++ .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_object.c| 67 +++- drivers/gpu/drm/i915/gem/i915_gem_object.h| 8 ++ .../gpu/drm/i915/gem/i915_gem_object_types.h | 26 +- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 9 ++- drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 16 ++-- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 73 + drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 76 +- drivers/gpu/drm/i915/gt/intel_gtt.h | 20 +++-- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 6 +- drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 +-- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 55 ++--- drivers/gpu/drm/i915/i915_gem.c | 16 +++- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_pci.c | 79 --- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/intel_device_info.h | 5 ++ drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 9 +++ drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- include/uapi/drm/i915_drm.h | 36 + tools/include/uapi/drm/i915_drm.h | 36 + 44 files changed, 605 insertions(+), 243 deletions(-) -- 2.25.1
[Intel-gfx] [PATCH v2 3/5] drm/i915: make sure correct pte encode is used
From: Fei Yang PTE encode is platform dependent. After replacing cache_level with pat_index, the newly introduced mtl_pte_encode is actually generic for all gen12 platforms, thus rename it to gen12_pte_encode and apply it to all gen12 platforms. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index ee52e5833c50..81b7725812ce 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -55,9 +55,9 @@ static u64 gen8_pte_encode(dma_addr_t addr, return pte; } -static u64 mtl_pte_encode(dma_addr_t addr, - unsigned int pat_index, - u32 flags) +static u64 gen12_pte_encode(dma_addr_t addr, + unsigned int pat_index, + u32 flags) { gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; @@ -994,8 +994,8 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, */ ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; - if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) - ppgtt->vm.pte_encode = mtl_pte_encode; + if (GRAPHICS_VER(gt->i915) >= 12) + ppgtt->vm.pte_encode = gen12_pte_encode; else ppgtt->vm.pte_encode = gen8_pte_encode; -- 2.25.1
[Intel-gfx] [PATCH v2 2/3] drm/i915: use pat_index instead of cache_level
From: Fei Yang Currently the KMD is using enum i915_cache_level to set caching policy for buffer objects. This is flaky because the PAT index which really controls the caching behavior in PTE has far more levels than what's defined in the enum. In addition, the PAT index is platform dependent, having to translate between i915_cache_level and PAT index is not reliable, and makes the code more complicated. >From UMD's perspective there is also a necessity to set caching policy for performance fine tuning. It's much easier for the UMD to directly use PAT index because the behavior of each PAT index is clearly defined in Bspec. Having the abstracted i915_cache_level sitting in between would only cause more ambiguity. For these reasons this patch replaces i915_cache_level with PAT index. Also note, the cache_level is not completely removed yet, because the KMD still has the need of creating buffer objects with simple cache settings such as cached, uncached, or writethrough. For such simple cases, using cache_level would help simplify the code. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/display/intel_dpt.c | 12 +-- drivers/gpu/drm/i915/gem/i915_gem_domain.c| 27 ++ .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_object.c| 52 +++- drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 25 +- drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 16 ++-- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 71 drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 82 +-- drivers/gpu/drm/i915/gt/intel_gtt.h | 20 ++--- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 6 +- drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 55 ++--- drivers/gpu/drm/i915/i915_gem.c | 16 +++- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- 36 files changed, 378 insertions(+), 239 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index c5eacfdba1a5..7c5fddb203ba 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -43,24 +43,24 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) static void dpt_insert_page(struct i915_address_space *vm, dma_addr_t addr, u64 offset, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { struct i915_dpt *dpt = i915_vm_to_dpt(vm); gen8_pte_t __iomem *base = dpt->iomem; gen8_set_pte(base + offset / I915_GTT_PAGE_SIZE, -vm->pte_encode(addr, level, flags)); +vm->pte_encode(addr, pat_index, flags)); } static void dpt_insert_entries(struct i915_address_space *vm, struct i915_vma_resource *vma_res, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { struct i915_dpt *dpt = i915_vm_to_dpt(vm); gen8_pte_t __iomem *base = dpt->iomem; - const gen8_pte_t pte_encode = vm->pte_encode(0, level, flags); + const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags); struct sgt_iter sgt_iter; dma_addr_t addr; int i; @@ -83,7 +83,7 @@ static void dpt_clear_range(struct i915_address_space *vm, static void dpt_bind_vma(struct i915_address_space *vm, struct i915_vm_pt_stash *st
[Intel-gfx] [PATCH v2 1/3] drm/i915: preparation for using PAT index
From: Fei Yang This patch is a preparation for replacing enum i915_cache_level with PAT index. Caching policy for buffer objects is set through the PAT index in PTE, the old i915_cache_level is not sufficient to represent all caching modes supported by the hardware. Preparing the transition by adding some platform dependent data structures and helper functions to translate the cache_level to pat_index. cachelevel_to_pat: a platform dependent array mapping cache_level to pat_index. max_pat_index: the maximum PAT index recommended in hardware specification Needed for validating the PAT index passed in from user space. i915_gem_get_pat_index: function to convert cache_level to PAT index. obj_to_i915(obj): macro moved to header file for wider usage. I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the convenience of coding. Cc: Chris Wilson Cc: Matt Roper Cc: Andi Shyti Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda --- drivers/gpu/drm/i915/gem/i915_gem_object.c| 9 +++ drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 6 ++ drivers/gpu/drm/i915/gt/intel_ggtt.c | 6 ++ drivers/gpu/drm/i915/i915_pci.c | 79 --- drivers/gpu/drm/i915/intel_device_info.h | 5 ++ .../gpu/drm/i915/selftests/mock_gem_device.c | 9 +++ 9 files changed, 110 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 4666bb82f312..8c70a0ec7d2f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects; static const struct drm_gem_object_funcs i915_gem_object_funcs; +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level) +{ + if (drm_WARN_ON(&i915->drm, level >= I915_MAX_CACHE_LEVEL)) + return 0; + + return INTEL_INFO(i915)->cachelevel_to_pat[level]; +} + struct drm_i915_gem_object *i915_gem_object_alloc(void) { struct drm_i915_gem_object *obj; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 885ccde9dc3c..4c92e17b4337 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -20,6 +20,8 @@ enum intel_region_id; +#define obj_to_i915(obj__) to_i915((obj__)->base.dev) + static inline bool i915_gem_object_size_2big(u64 size) { struct drm_i915_gem_object *obj; @@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size) return false; } +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level); void i915_gem_init__objects(struct drm_i915_private *i915); void i915_objects_module_exit(void); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 830c11431ee8..41b35abccf88 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -194,6 +194,7 @@ enum i915_cache_level { * engine. */ I915_CACHE_WT, + I915_MAX_CACHE_LEVEL, }; enum i915_map_type { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index b1672e054b21..214763942aa2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, fs_reclaim_release(GFP_KERNEL); } -#define obj_to_i915(obj__) to_i915((obj__)->base.dev) - /** * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By * default all object types that support shrinking(see IS_SHRINKABLE), will also diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 4c9a2f2db908..4c6b760d4774 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr, case I915_CACHE_WT: pte |= GEN12_PPGTT_PTE_PAT0; break; + default: + /* This should never happen. Added to deal with the compile +* error due to the addition of I915_MAX_CACHE_LEVEL. Will +* be removed by the pat_index patch. +*/ + break; } return pte; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 20915edc8bd9..c8390d03fce2 1
[Intel-gfx] [PATCH v2 3/3] drm/i915: make sure correct pte encode is used
From: Fei Yang PTE encode is platform dependent. After replacing cache_level with pat_index, the newly introduced mtl_pte_encode is actually generic for all gen12 platforms, thus rename it to gen12_pte_encode and apply it to all gen12 platforms. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index ee52e5833c50..81b7725812ce 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -55,9 +55,9 @@ static u64 gen8_pte_encode(dma_addr_t addr, return pte; } -static u64 mtl_pte_encode(dma_addr_t addr, - unsigned int pat_index, - u32 flags) +static u64 gen12_pte_encode(dma_addr_t addr, + unsigned int pat_index, + u32 flags) { gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; @@ -994,8 +994,8 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, */ ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; - if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) - ppgtt->vm.pte_encode = mtl_pte_encode; + if (GRAPHICS_VER(gt->i915) >= 12) + ppgtt->vm.pte_encode = gen12_pte_encode; else ppgtt->vm.pte_encode = gen8_pte_encode; -- 2.25.1
[Intel-gfx] [PATCH v2 0/3] drm/i915/mtl: add PTE encode function
From: Fei Yang These patches are extracted from series https://patchwork.freedesktop.org/series/115980/ This series refactor the cache policy programming so that the PTE encode functions can be unified across all GEN12 platforms. This refactor is also important in implementing the design which allows uerspace to directly set cache policy for each Buffer Object. v2: drop one patch that was merged separately 341ad0e8e254 drm/i915/mtl: Add PTE encode function Fei Yang (3): drm/i915: preparation for using PAT index drm/i915: use pat_index instead of cache_level drm/i915: make sure correct pte encode is used drivers/gpu/drm/i915/display/intel_dpt.c | 12 +-- drivers/gpu/drm/i915/gem/i915_gem_domain.c| 27 +++ .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_object.c| 61 +- drivers/gpu/drm/i915/gem/i915_gem_object.h| 8 ++ .../gpu/drm/i915/gem/i915_gem_object_types.h | 26 +- drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 16 ++-- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 73 + drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 76 +- drivers/gpu/drm/i915/gt/intel_gtt.h | 20 +++-- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 6 +- drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 +-- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 55 ++--- drivers/gpu/drm/i915/i915_gem.c | 16 +++- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_pci.c | 79 --- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/intel_device_info.h | 5 ++ drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 9 +++ drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- 40 files changed, 480 insertions(+), 242 deletions(-) -- 2.25.1
[Intel-gfx] [PATCH v2 2/2] drm/i915/mtl: workaround coherency issue for Media
From: Fei Yang This patch implements Wa_22016122933. In MTL, memory writes initiated by the Media tile update the whole cache line, even for partial writes. This creates a coherency problem for cacheable memory if both CPU and GPU are writing data to different locations within a single cache line. This patch circumvents the issue by making CPU/GPU shared memory uncacheable (WC on CPU side, and PAT index 2 for GPU). Additionally, it ensures that CPU writes are visible to the GPU with an intel_guc_write_barrier(). While fixing the CTB issue, we noticed some random GSC firmware loading failure because the share buffers are cacheable (WB) on CPU side but uncached on GPU side. To fix these issues we need to map such shared buffers as WC on CPU side. Since such allocations are not all done through GuC allocator, to avoid too many code changes, the i915_coherent_map_type() is now hard coded to return WC for MTL. v2: Simplify the commit message(Matt). BSpec: 45101 Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Acked-by: Nirmoy Das Reviewed-by: Andrzej Hajda Reviewed-by: Matt Roper Signed-off-by: Nirmoy Das --- drivers/gpu/drm/i915/gem/i915_gem_pages.c | 5 - drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c | 13 + drivers/gpu/drm/i915/gt/uc/intel_guc.c| 7 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 6 ++ 4 files changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index ecd86130b74f..89fc8ea6bcfc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -469,7 +469,10 @@ enum i915_map_type i915_coherent_map_type(struct drm_i915_private *i915, struct drm_i915_gem_object *obj, bool always_coherent) { - if (i915_gem_object_is_lmem(obj)) + /* +* Wa_22016122933: always return I915_MAP_WC for MTL +*/ + if (i915_gem_object_is_lmem(obj) || IS_METEORLAKE(i915)) return I915_MAP_WC; if (HAS_LLC(i915) || always_coherent) return I915_MAP_WB; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c index 1d9fdfb11268..236673c02f9a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c @@ -110,6 +110,13 @@ static int gsc_fw_load_prepare(struct intel_gsc_uc *gsc) if (obj->base.size < gsc->fw.size) return -ENOSPC; + /* +* Wa_22016122933: For MTL the shared memory needs to be mapped +* as WC on CPU side and UC (PAT index 2) on GPU side +*/ + if (IS_METEORLAKE(i915)) + i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); + dst = i915_gem_object_pin_map_unlocked(obj, i915_coherent_map_type(i915, obj, true)); if (IS_ERR(dst)) @@ -125,6 +132,12 @@ static int gsc_fw_load_prepare(struct intel_gsc_uc *gsc) memset(dst, 0, obj->base.size); memcpy(dst, src, gsc->fw.size); + /* +* Wa_22016122933: Making sure the data in dst is +* visible to GSC right away +*/ + intel_guc_write_barrier(>->uc.guc); + i915_gem_object_unpin_map(gsc->fw.obj); i915_gem_object_unpin_map(obj); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index e89f16ecf1ae..c9f20385f6a0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -744,6 +744,13 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) if (IS_ERR(obj)) return ERR_CAST(obj); + /* +* Wa_22016122933: For MTL the shared memory needs to be mapped +* as WC on CPU side and UC (PAT index 2) on GPU side +*/ + if (IS_METEORLAKE(gt->i915)) + i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); if (IS_ERR(vma)) goto err; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 1803a633ed64..99a0a89091e7 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -902,6 +902,12 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg) /* now update descriptor */ WRITE_ONCE(desc->head, head); + /* +* Wa_22016122933: Making sure the head update is +* visible to GuC right away +*/ + intel_guc_write_barrier(ct_to_guc(ct)); + return available - len; corrupted: -- 2.25.1
[Intel-gfx] [PATCH v2 0/2] drm/i915/mtl: Add PTE encode functions
From: Fei Yang Extract PTE patch from https://patchwork.freedesktop.org/series/116868/ to fix MTL boot issue caused by MOCS/PAT update. v2: address comment from Matt. Fei Yang (2): drm/i915/mtl: Add PTE encode function drm/i915/mtl: workaround coherency issue for Media drivers/gpu/drm/i915/display/intel_dpt.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_pages.c | 5 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 45 +++ drivers/gpu/drm/i915/gt/intel_ggtt.c | 36 -- drivers/gpu/drm/i915/gt/intel_gtt.h | 12 +- drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c | 13 +++ drivers/gpu/drm/i915/gt/uc/intel_guc.c| 7 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 6 +++ 8 files changed, 112 insertions(+), 14 deletions(-) -- 2.25.1
[Intel-gfx] [PATCH v2 1/2] drm/i915/mtl: Add PTE encode function
From: Fei Yang PTE encode functions are platform dependent. This patch implements PTE functions for MTL, and ensures the correct PTE encode function is used by calling pte_encode function pointer instead of the hardcoded gen8 version of PTE encode. Fixes: b76c0deef627 ("drm/i915/mtl: Define MOCS and PAT tables for MTL") Signed-off-by: Fei Yang Reviewed-by: Andrzej Hajda Reviewed-by: Andi Shyti Acked-by: Nirmoy Das --- drivers/gpu/drm/i915/display/intel_dpt.c | 2 +- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 45 drivers/gpu/drm/i915/gt/intel_ggtt.c | 36 +-- drivers/gpu/drm/i915/gt/intel_gtt.h | 12 +-- 4 files changed, 82 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index b8027392144d..c5eacfdba1a5 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -300,7 +300,7 @@ intel_dpt_create(struct intel_framebuffer *fb) vm->vma_ops.bind_vma= dpt_bind_vma; vm->vma_ops.unbind_vma = dpt_unbind_vma; - vm->pte_encode = gen8_ggtt_pte_encode; + vm->pte_encode = vm->gt->ggtt->vm.pte_encode; dpt->obj = dpt_obj; dpt->obj->is_dpt = true; diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 4daaa6f55668..4c9a2f2db908 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -55,6 +55,34 @@ static u64 gen8_pte_encode(dma_addr_t addr, return pte; } +static u64 mtl_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; + + if (unlikely(flags & PTE_READ_ONLY)) + pte &= ~GEN8_PAGE_RW; + + if (flags & PTE_LM) + pte |= GEN12_PPGTT_PTE_LM; + + switch (level) { + case I915_CACHE_NONE: + pte |= GEN12_PPGTT_PTE_PAT1; + break; + case I915_CACHE_LLC: + case I915_CACHE_L3_LLC: + pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1; + break; + case I915_CACHE_WT: + pte |= GEN12_PPGTT_PTE_PAT0; + break; + } + + return pte; +} + static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) { struct drm_i915_private *i915 = ppgtt->vm.i915; @@ -427,7 +455,7 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, u32 flags) { struct i915_page_directory *pd; - const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); + const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, cache_level, flags); gen8_pte_t *vaddr; pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2)); @@ -580,7 +608,7 @@ static void gen8_ppgtt_insert_huge(struct i915_address_space *vm, enum i915_cache_level cache_level, u32 flags) { - const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); + const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags); unsigned int rem = sg_dma_len(iter->sg); u64 start = vma_res->start; @@ -743,7 +771,7 @@ static void gen8_ppgtt_insert_entry(struct i915_address_space *vm, GEM_BUG_ON(pt->is_compact); vaddr = px_vaddr(pt); - vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags); + vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, level, flags); drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr)); } @@ -773,7 +801,7 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm, } vaddr = px_vaddr(pt); - vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags); + vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, level, flags); } static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm, @@ -820,8 +848,8 @@ static int gen8_init_scratch(struct i915_address_space *vm) pte_flags |= PTE_LM; vm->scratch[0]->encode = - gen8_pte_encode(px_dma(vm->scratch[0]), - I915_CACHE_NONE, pte_flags); + vm->pte_encode(px_dma(vm->scratch[0]), + I915_CACHE_NONE, pte_flags); for (i = 1; i <= vm->top; i++) { struct drm_i915_gem_object *obj; @@ -963,7 +991,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, */ ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; - ppgtt->vm.pte_encode = gen8_pte_encode; + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) + ppgtt->vm.pte_encode = mtl_p
[Intel-gfx] [PATCH 2/2] drm/i915/mtl: workaround coherency issue for Media
From: Fei Yang This patch implements Wa_22016122933. In MTL, memory writes initiated by the Media tile update the whole cache line, even for partial writes. This creates a coherency problem for cacheable memory if both CPU and GPU are writing data to different locations within a single cache line. This patch circumvents the issue by making CPU/GPU shared memory uncacheable (WC on CPU side, and PAT index 2 for GPU). Additionally, it ensures that CPU writes are visible to the GPU with an intel_guc_write_barrier(). While fixing the CTB issue, we noticed some random GSC firmware loading failure because the share buffers are cacheable (WB) on CPU side but uncached on GPU side. To fix these issues we need to map such shared buffers as WC on CPU side. Since such allocations are not all done through GuC allocator, to avoid too many code changes, the i915_coherent_map_type() is now hard coded to return WC for MTL. v2: Simplify the commit message(Matt). BSpec: 45101 Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Acked-by: Nirmoy Das Reviewed-by: Andrzej Hajda Reviewed-by: Matt Roper Signed-off-by: Nirmoy Das --- drivers/gpu/drm/i915/gem/i915_gem_pages.c | 5 - drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c | 13 + drivers/gpu/drm/i915/gt/uc/intel_guc.c| 7 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 6 ++ 4 files changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index ecd86130b74f..89fc8ea6bcfc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -469,7 +469,10 @@ enum i915_map_type i915_coherent_map_type(struct drm_i915_private *i915, struct drm_i915_gem_object *obj, bool always_coherent) { - if (i915_gem_object_is_lmem(obj)) + /* +* Wa_22016122933: always return I915_MAP_WC for MTL +*/ + if (i915_gem_object_is_lmem(obj) || IS_METEORLAKE(i915)) return I915_MAP_WC; if (HAS_LLC(i915) || always_coherent) return I915_MAP_WB; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c index 1d9fdfb11268..236673c02f9a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c @@ -110,6 +110,13 @@ static int gsc_fw_load_prepare(struct intel_gsc_uc *gsc) if (obj->base.size < gsc->fw.size) return -ENOSPC; + /* +* Wa_22016122933: For MTL the shared memory needs to be mapped +* as WC on CPU side and UC (PAT index 2) on GPU side +*/ + if (IS_METEORLAKE(i915)) + i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); + dst = i915_gem_object_pin_map_unlocked(obj, i915_coherent_map_type(i915, obj, true)); if (IS_ERR(dst)) @@ -125,6 +132,12 @@ static int gsc_fw_load_prepare(struct intel_gsc_uc *gsc) memset(dst, 0, obj->base.size); memcpy(dst, src, gsc->fw.size); + /* +* Wa_22016122933: Making sure the data in dst is +* visible to GSC right away +*/ + intel_guc_write_barrier(>->uc.guc); + i915_gem_object_unpin_map(gsc->fw.obj); i915_gem_object_unpin_map(obj); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index e89f16ecf1ae..c9f20385f6a0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -744,6 +744,13 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) if (IS_ERR(obj)) return ERR_CAST(obj); + /* +* Wa_22016122933: For MTL the shared memory needs to be mapped +* as WC on CPU side and UC (PAT index 2) on GPU side +*/ + if (IS_METEORLAKE(gt->i915)) + i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); if (IS_ERR(vma)) goto err; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 1803a633ed64..99a0a89091e7 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -902,6 +902,12 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg) /* now update descriptor */ WRITE_ONCE(desc->head, head); + /* +* Wa_22016122933: Making sure the head update is +* visible to GuC right away +*/ + intel_guc_write_barrier(ct_to_guc(ct)); + return available - len; corrupted: -- 2.25.1
[Intel-gfx] [PATCH 1/2] drm/i915/mtl: Add PTE encode function
From: Fei Yang PTE encode functions are platform dependent. This patch implements PTE functions for MTL, and ensures the correct PTE encode function is used by calling pte_encode function pointer instead of the hardcoded gen8 version of PTE encode. Fixes: b76c0deef627 ("drm/i915/mtl: Define MOCS and PAT tables for MTL") Signed-off-by: Fei Yang Reviewed-by: Andrzej Hajda Reviewed-by: Andi Shyti Acked-by: Nirmoy Das --- drivers/gpu/drm/i915/display/intel_dpt.c | 2 +- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 45 drivers/gpu/drm/i915/gt/intel_ggtt.c | 36 +-- drivers/gpu/drm/i915/gt/intel_gtt.h | 13 +-- 4 files changed, 83 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index b8027392144d..c5eacfdba1a5 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -300,7 +300,7 @@ intel_dpt_create(struct intel_framebuffer *fb) vm->vma_ops.bind_vma= dpt_bind_vma; vm->vma_ops.unbind_vma = dpt_unbind_vma; - vm->pte_encode = gen8_ggtt_pte_encode; + vm->pte_encode = vm->gt->ggtt->vm.pte_encode; dpt->obj = dpt_obj; dpt->obj->is_dpt = true; diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 4daaa6f55668..11b91e0453c8 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -55,6 +55,34 @@ static u64 gen8_pte_encode(dma_addr_t addr, return pte; } +static u64 mtl_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; + + if (unlikely(flags & PTE_READ_ONLY)) + pte &= ~GEN8_PAGE_RW; + + if (flags & PTE_LM) + pte |= GEN12_PPGTT_PTE_LM | GEN12_PPGTT_PTE_NC; + + switch (level) { + case I915_CACHE_NONE: + pte |= GEN12_PPGTT_PTE_PAT1; + break; + case I915_CACHE_LLC: + case I915_CACHE_L3_LLC: + pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1; + break; + case I915_CACHE_WT: + pte |= GEN12_PPGTT_PTE_PAT0; + break; + } + + return pte; +} + static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) { struct drm_i915_private *i915 = ppgtt->vm.i915; @@ -427,7 +455,7 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, u32 flags) { struct i915_page_directory *pd; - const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); + const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, cache_level, flags); gen8_pte_t *vaddr; pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2)); @@ -580,7 +608,7 @@ static void gen8_ppgtt_insert_huge(struct i915_address_space *vm, enum i915_cache_level cache_level, u32 flags) { - const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); + const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags); unsigned int rem = sg_dma_len(iter->sg); u64 start = vma_res->start; @@ -743,7 +771,7 @@ static void gen8_ppgtt_insert_entry(struct i915_address_space *vm, GEM_BUG_ON(pt->is_compact); vaddr = px_vaddr(pt); - vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags); + vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, level, flags); drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr)); } @@ -773,7 +801,7 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm, } vaddr = px_vaddr(pt); - vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags); + vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, level, flags); } static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm, @@ -820,8 +848,8 @@ static int gen8_init_scratch(struct i915_address_space *vm) pte_flags |= PTE_LM; vm->scratch[0]->encode = - gen8_pte_encode(px_dma(vm->scratch[0]), - I915_CACHE_NONE, pte_flags); + vm->pte_encode(px_dma(vm->scratch[0]), + I915_CACHE_NONE, pte_flags); for (i = 1; i <= vm->top; i++) { struct drm_i915_gem_object *obj; @@ -963,7 +991,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, */ ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; - ppgtt->vm.pte_encode = gen8_pte_encode; + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) + ppgtt->
[Intel-gfx] [PATCH 0/2] drm/i915/mtl: Add PTE encode functions
From: Fei Yang Extract PTE patch from https://patchwork.freedesktop.org/series/116868/ to fix MTL boot issue caused by MOCS/PAT update. Fei Yang (2): drm/i915/mtl: Add PTE encode function drm/i915/mtl: workaround coherency issue for Media drivers/gpu/drm/i915/display/intel_dpt.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_pages.c | 5 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 45 +++ drivers/gpu/drm/i915/gt/intel_ggtt.c | 36 -- drivers/gpu/drm/i915/gt/intel_gtt.h | 13 ++- drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c | 13 +++ drivers/gpu/drm/i915/gt/uc/intel_guc.c| 7 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 6 +++ 8 files changed, 113 insertions(+), 14 deletions(-) -- 2.25.1
[Intel-gfx] [PATCH v1 6/6] drm/i915: Allow user to set cache at BO creation
From: Fei Yang To comply with the design that buffer objects shall have immutable cache setting through out their life cycle, {set, get}_caching ioctl's are no longer supported from MTL onward. With that change caching policy can only be set at object creation time. The current code applies a default (platform dependent) cache setting for all objects. However this is not optimal for performance tuning. The patch extends the existing gem_create uAPI to let user set PAT index for the object at creation time. The new extension is platform independent, so UMD's can switch to using this extension for older platforms as well, while {set, get}_caching are still supported on these legacy paltforms for compatibility reason. Cc: Chris Wilson Cc: Matt Roper Cc: Andi Shyti Signed-off-by: Fei Yang Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 ++ drivers/gpu/drm/i915/gem/i915_gem_object.c | 6 include/uapi/drm/i915_drm.h| 36 ++ tools/include/uapi/drm/i915_drm.h | 36 ++ 4 files changed, 114 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index bfe1dbda4cb7..723c3ddd6c74 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -245,6 +245,7 @@ struct create_ext { unsigned int n_placements; unsigned int placement_mask; unsigned long flags; + unsigned int pat_index; }; static void repr_placements(char *buf, size_t size, @@ -394,11 +395,39 @@ static int ext_set_protected(struct i915_user_extension __user *base, void *data return 0; } +static int ext_set_pat(struct i915_user_extension __user *base, void *data) +{ + struct create_ext *ext_data = data; + struct drm_i915_private *i915 = ext_data->i915; + struct drm_i915_gem_create_ext_set_pat ext; + unsigned int max_pat_index; + + BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) != +offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd)); + + if (copy_from_user(&ext, base, sizeof(ext))) + return -EFAULT; + + max_pat_index = INTEL_INFO(i915)->max_pat_index; + + if (ext.pat_index > max_pat_index) { + drm_dbg(&i915->drm, "PAT index is invalid: %u\n", + ext.pat_index); + return -EINVAL; + } + + ext_data->pat_index = ext.pat_index; + + return 0; +} + static const i915_user_extension_fn create_extensions[] = { [I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements, [I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected, + [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat, }; +#define PAT_INDEX_NOT_SET 0x /** * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to it. * @dev: drm device pointer @@ -418,6 +447,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) return -EINVAL; + ext_data.pat_index = PAT_INDEX_NOT_SET; ret = i915_user_extensions(u64_to_user_ptr(args->extensions), create_extensions, ARRAY_SIZE(create_extensions), @@ -454,5 +484,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, if (IS_ERR(obj)) return PTR_ERR(obj); + if (ext_data.pat_index != PAT_INDEX_NOT_SET) { + i915_gem_object_set_pat_index(obj, ext_data.pat_index); + /* Mark pat_index is set by UMD */ + obj->cache_level = I915_CACHE_INVAL; + } + return i915_gem_publish(obj, file, &args->size, &args->handle); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 27c948350b5b..61651f7e5806 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -209,6 +209,12 @@ bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj) if (!(obj->flags & I915_BO_ALLOC_USER)) return false; + /* +* Always flush cache for UMD objects at creation time. +*/ + if (obj->cache_level == I915_CACHE_INVAL) + return true; + /* * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it * possible for userspace to bypass the GTT caching bits set by the diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index dba7c5a5b25e..03c5c314846e 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -3630,9 +3630,13 @@ struct drm_i915_gem_create_ext { * * For I915_GEM_CREATE_E
[Intel-gfx] [PATCH v1 3/6] drm/i915: use pat_index instead of cache_level
From: Fei Yang Currently the KMD is using enum i915_cache_level to set caching policy for buffer objects. This is flaky because the PAT index which really controls the caching behavior in PTE has far more levels than what's defined in the enum. In addition, the PAT index is platform dependent, having to translate between i915_cache_level and PAT index is not reliable, and makes the code more complicated. >From UMD's perspective there is also a necessity to set caching policy for performance fine tuning. It's much easier for the UMD to directly use PAT index because the behavior of each PAT index is clearly defined in Bspec. Having the abstracted i915_cache_level sitting in between would only cause more ambiguity. For these reasons this patch replaces i915_cache_level with PAT index. Also note, the cache_level is not completely removed yet, because the KMD still has the need of creating buffer objects with simple cache settings such as cached, uncached, or writethrough. For such simple cases, using cache_level would help simplify the code. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/display/intel_dpt.c | 12 +-- drivers/gpu/drm/i915/gem/i915_gem_domain.c| 27 ++ .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_object.c| 52 +++- drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 25 +- drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 16 ++-- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 71 drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 82 +-- drivers/gpu/drm/i915/gt/intel_gtt.h | 20 ++--- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 6 +- drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 55 ++--- drivers/gpu/drm/i915/i915_gem.c | 16 +++- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- 36 files changed, 378 insertions(+), 239 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index c5eacfdba1a5..7c5fddb203ba 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -43,24 +43,24 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) static void dpt_insert_page(struct i915_address_space *vm, dma_addr_t addr, u64 offset, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { struct i915_dpt *dpt = i915_vm_to_dpt(vm); gen8_pte_t __iomem *base = dpt->iomem; gen8_set_pte(base + offset / I915_GTT_PAGE_SIZE, -vm->pte_encode(addr, level, flags)); +vm->pte_encode(addr, pat_index, flags)); } static void dpt_insert_entries(struct i915_address_space *vm, struct i915_vma_resource *vma_res, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { struct i915_dpt *dpt = i915_vm_to_dpt(vm); gen8_pte_t __iomem *base = dpt->iomem; - const gen8_pte_t pte_encode = vm->pte_encode(0, level, flags); + const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags); struct sgt_iter sgt_iter; dma_addr_t addr; int i; @@ -83,7 +83,7 @@ static void dpt_clear_range(struct i915_address_space *vm, static void dpt_bind_vma(struct i915_address_space *vm, struct i915_vm_pt_stash *st
[Intel-gfx] [PATCH v1 1/6] drm/i915/mtl: Add PTE encode function
From: Fei Yang PTE encode functions are platform dependent. This patch implements PTE functions for MTL, and ensures the correct PTE encode function is used by calling pte_encode function pointer instead of the hardcoded gen8 version of PTE encode. Signed-off-by: Fei Yang Reviewed-by: Andrzej Hajda Reviewed-by: Andi Shyti Acked-by: Nirmoy Das --- drivers/gpu/drm/i915/display/intel_dpt.c | 2 +- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 45 drivers/gpu/drm/i915/gt/intel_ggtt.c | 36 +-- drivers/gpu/drm/i915/gt/intel_gtt.h | 13 +-- 4 files changed, 83 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index b8027392144d..c5eacfdba1a5 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -300,7 +300,7 @@ intel_dpt_create(struct intel_framebuffer *fb) vm->vma_ops.bind_vma= dpt_bind_vma; vm->vma_ops.unbind_vma = dpt_unbind_vma; - vm->pte_encode = gen8_ggtt_pte_encode; + vm->pte_encode = vm->gt->ggtt->vm.pte_encode; dpt->obj = dpt_obj; dpt->obj->is_dpt = true; diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 4daaa6f55668..11b91e0453c8 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -55,6 +55,34 @@ static u64 gen8_pte_encode(dma_addr_t addr, return pte; } +static u64 mtl_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; + + if (unlikely(flags & PTE_READ_ONLY)) + pte &= ~GEN8_PAGE_RW; + + if (flags & PTE_LM) + pte |= GEN12_PPGTT_PTE_LM | GEN12_PPGTT_PTE_NC; + + switch (level) { + case I915_CACHE_NONE: + pte |= GEN12_PPGTT_PTE_PAT1; + break; + case I915_CACHE_LLC: + case I915_CACHE_L3_LLC: + pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1; + break; + case I915_CACHE_WT: + pte |= GEN12_PPGTT_PTE_PAT0; + break; + } + + return pte; +} + static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) { struct drm_i915_private *i915 = ppgtt->vm.i915; @@ -427,7 +455,7 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, u32 flags) { struct i915_page_directory *pd; - const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); + const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, cache_level, flags); gen8_pte_t *vaddr; pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2)); @@ -580,7 +608,7 @@ static void gen8_ppgtt_insert_huge(struct i915_address_space *vm, enum i915_cache_level cache_level, u32 flags) { - const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); + const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags); unsigned int rem = sg_dma_len(iter->sg); u64 start = vma_res->start; @@ -743,7 +771,7 @@ static void gen8_ppgtt_insert_entry(struct i915_address_space *vm, GEM_BUG_ON(pt->is_compact); vaddr = px_vaddr(pt); - vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags); + vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, level, flags); drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr)); } @@ -773,7 +801,7 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm, } vaddr = px_vaddr(pt); - vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags); + vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, level, flags); } static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm, @@ -820,8 +848,8 @@ static int gen8_init_scratch(struct i915_address_space *vm) pte_flags |= PTE_LM; vm->scratch[0]->encode = - gen8_pte_encode(px_dma(vm->scratch[0]), - I915_CACHE_NONE, pte_flags); + vm->pte_encode(px_dma(vm->scratch[0]), + I915_CACHE_NONE, pte_flags); for (i = 1; i <= vm->top; i++) { struct drm_i915_gem_object *obj; @@ -963,7 +991,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, */ ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; - ppgtt->vm.pte_encode = gen8_pte_encode; + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) + ppgtt->vm.pte_encode = mtl_pte_encode; + else + ppgtt->vm.pte_encode =
[Intel-gfx] [PATCH v1 5/6] drm/i915/mtl: end support for set caching ioctl
From: Fei Yang The design is to keep Buffer Object's caching policy immutable through out its life cycle. This patch ends the support for set caching ioctl from MTL onward. While doing that we also set BO's to be 1-way coherent at creation time because GPU is no longer automatically snooping CPU cache. For userspace components needing to fine tune the caching policy for BO's, a follow up patch will extend the GEM_CREATE uAPI to allow them specify caching mode at BO creation time. Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda --- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +++ drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 9 - 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 89938084af97..d5fd4c9cd9f8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -328,6 +328,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, if (IS_DGFX(i915)) return -ENODEV; + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + return -EOPNOTSUPP; + switch (args->caching) { case I915_CACHING_NONE: level = I915_CACHE_NONE; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 37d1efcd3ca6..cad4a6017f4b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -601,7 +601,14 @@ static int shmem_object_init(struct intel_memory_region *mem, obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; - if (HAS_LLC(i915)) + /* +* MTL doesn't snoop CPU cache by default for GPU access (namely +* 1-way coherency). However some UMD's are currently depending on +* that. Make 1-way coherent the default setting for MTL. A follow +* up patch will extend the GEM_CREATE uAPI to allow UMD's specify +* caching mode at BO creation time +*/ + if (HAS_LLC(i915) || (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))) /* On some devices, we can have the GPU use the LLC (the CPU * cache) for about a 10% performance improvement * compared to uncached. Graphics requests other than -- 2.25.1
[Intel-gfx] [PATCH v1 4/6] drm/i915: make sure correct pte encode is used
From: Fei Yang PTE encode is platform dependent. After replacing cache_level with pat_index, the newly introduced mtl_pte_encode is actually generic for all gen12 platforms, thus rename it to gen12_pte_encode and apply it to all gen12 platforms. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index c046813514f4..a738a25dd857 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -55,9 +55,9 @@ static u64 gen8_pte_encode(dma_addr_t addr, return pte; } -static u64 mtl_pte_encode(dma_addr_t addr, - unsigned int pat_index, - u32 flags) +static u64 gen12_pte_encode(dma_addr_t addr, + unsigned int pat_index, + u32 flags) { gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; @@ -994,8 +994,8 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, */ ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; - if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) - ppgtt->vm.pte_encode = mtl_pte_encode; + if (GRAPHICS_VER(gt->i915) >= 12) + ppgtt->vm.pte_encode = gen12_pte_encode; else ppgtt->vm.pte_encode = gen8_pte_encode; -- 2.25.1
[Intel-gfx] [PATCH v1 2/6] drm/i915: preparation for using PAT index
From: Fei Yang This patch is a preparation for replacing enum i915_cache_level with PAT index. Caching policy for buffer objects is set through the PAT index in PTE, the old i915_cache_level is not sufficient to represent all caching modes supported by the hardware. Preparing the transition by adding some platform dependent data structures and helper functions to translate the cache_level to pat_index. cachelevel_to_pat: a platform dependent array mapping cache_level to pat_index. max_pat_index: the maximum PAT index recommended in hardware specification Needed for validating the PAT index passed in from user space. i915_gem_get_pat_index: function to convert cache_level to PAT index. obj_to_i915(obj): macro moved to header file for wider usage. I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the convenience of coding. Cc: Chris Wilson Cc: Matt Roper Cc: Andi Shyti Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda --- drivers/gpu/drm/i915/gem/i915_gem_object.c| 9 +++ drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 6 ++ drivers/gpu/drm/i915/gt/intel_ggtt.c | 6 ++ drivers/gpu/drm/i915/i915_pci.c | 79 --- drivers/gpu/drm/i915/intel_device_info.h | 5 ++ .../gpu/drm/i915/selftests/mock_gem_device.c | 9 +++ 9 files changed, 110 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 4666bb82f312..8c70a0ec7d2f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects; static const struct drm_gem_object_funcs i915_gem_object_funcs; +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level) +{ + if (drm_WARN_ON(&i915->drm, level >= I915_MAX_CACHE_LEVEL)) + return 0; + + return INTEL_INFO(i915)->cachelevel_to_pat[level]; +} + struct drm_i915_gem_object *i915_gem_object_alloc(void) { struct drm_i915_gem_object *obj; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 885ccde9dc3c..4c92e17b4337 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -20,6 +20,8 @@ enum intel_region_id; +#define obj_to_i915(obj__) to_i915((obj__)->base.dev) + static inline bool i915_gem_object_size_2big(u64 size) { struct drm_i915_gem_object *obj; @@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size) return false; } +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level); void i915_gem_init__objects(struct drm_i915_private *i915); void i915_objects_module_exit(void); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 830c11431ee8..41b35abccf88 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -194,6 +194,7 @@ enum i915_cache_level { * engine. */ I915_CACHE_WT, + I915_MAX_CACHE_LEVEL, }; enum i915_map_type { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index b1672e054b21..214763942aa2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, fs_reclaim_release(GFP_KERNEL); } -#define obj_to_i915(obj__) to_i915((obj__)->base.dev) - /** * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By * default all object types that support shrinking(see IS_SHRINKABLE), will also diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 11b91e0453c8..7a4b1d1afce9 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr, case I915_CACHE_WT: pte |= GEN12_PPGTT_PTE_PAT0; break; + default: + /* This should never happen. Added to deal with the compile +* error due to the addition of I915_MAX_CACHE_LEVEL. Will +* be removed by the pat_index patch. +*/ + break; } return pte; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 20915edc8bd9..c8390d03fce2 1
[Intel-gfx] [PATCH v1 0/6] drm/i915: Allow user to set cache at BO creation
From: Fei Yang The first four patches in this series are taken from https://patchwork.freedesktop.org/series/116868/ These patches are included here because the last patch has dependency on the pat_index refactor. This series is focusing on uAPI changes, 1. end support for set caching ioctl [PATCH 5/6] 2. add set_pat extension for gem_create [PATCH 6/6] Fei Yang (6): drm/i915/mtl: Add PTE encode function drm/i915: preparation for using PAT index drm/i915: use pat_index instead of cache_level drm/i915: make sure correct pte encode is used drm/i915/mtl: end support for set caching ioctl drm/i915: Allow user to set cache at BO creation drivers/gpu/drm/i915/display/intel_dpt.c | 14 ++-- drivers/gpu/drm/i915/gem/i915_gem_create.c| 36 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 30 +++ .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_object.c| 67 ++- drivers/gpu/drm/i915/gem/i915_gem_object.h| 8 ++ .../gpu/drm/i915/gem/i915_gem_object_types.h | 26 +- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 9 +- drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 16 ++-- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 76 - drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 84 +-- drivers/gpu/drm/i915/gt/intel_gtt.h | 31 --- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 6 +- drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 55 +--- drivers/gpu/drm/i915/i915_gem.c | 16 +++- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_pci.c | 79 +++-- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/intel_device_info.h | 5 ++ drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 9 ++ drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- include/uapi/drm/i915_drm.h | 36 tools/include/uapi/drm/i915_drm.h | 36 44 files changed, 652 insertions(+), 220 deletions(-) -- 2.25.1
[Intel-gfx] [PATCH v1 4/4] drm/i915: make sure correct pte encode is used
From: Fei Yang PTE encode is platform dependent. After replacing cache_level with pat_index, the newly introduced mtl_pte_encode is actually generic for all gen12 platforms, thus rename it to gen12_pte_encode and apply it to all gen12 platforms. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index c046813514f4..a738a25dd857 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -55,9 +55,9 @@ static u64 gen8_pte_encode(dma_addr_t addr, return pte; } -static u64 mtl_pte_encode(dma_addr_t addr, - unsigned int pat_index, - u32 flags) +static u64 gen12_pte_encode(dma_addr_t addr, + unsigned int pat_index, + u32 flags) { gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; @@ -994,8 +994,8 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, */ ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; - if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) - ppgtt->vm.pte_encode = mtl_pte_encode; + if (GRAPHICS_VER(gt->i915) >= 12) + ppgtt->vm.pte_encode = gen12_pte_encode; else ppgtt->vm.pte_encode = gen8_pte_encode; -- 2.25.1
[Intel-gfx] [PATCH v1 1/4] drm/i915/mtl: Add PTE encode function
From: Fei Yang PTE encode functions are platform dependent. This patch implements PTE functions for MTL, and ensures the correct PTE encode function is used by calling pte_encode function pointer instead of the hardcoded gen8 version of PTE encode. Signed-off-by: Fei Yang Reviewed-by: Andrzej Hajda Reviewed-by: Andi Shyti Acked-by: Nirmoy Das --- drivers/gpu/drm/i915/display/intel_dpt.c | 2 +- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 45 drivers/gpu/drm/i915/gt/intel_ggtt.c | 36 +-- drivers/gpu/drm/i915/gt/intel_gtt.h | 13 +-- 4 files changed, 83 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index b8027392144d..c5eacfdba1a5 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -300,7 +300,7 @@ intel_dpt_create(struct intel_framebuffer *fb) vm->vma_ops.bind_vma= dpt_bind_vma; vm->vma_ops.unbind_vma = dpt_unbind_vma; - vm->pte_encode = gen8_ggtt_pte_encode; + vm->pte_encode = vm->gt->ggtt->vm.pte_encode; dpt->obj = dpt_obj; dpt->obj->is_dpt = true; diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 4daaa6f55668..11b91e0453c8 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -55,6 +55,34 @@ static u64 gen8_pte_encode(dma_addr_t addr, return pte; } +static u64 mtl_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; + + if (unlikely(flags & PTE_READ_ONLY)) + pte &= ~GEN8_PAGE_RW; + + if (flags & PTE_LM) + pte |= GEN12_PPGTT_PTE_LM | GEN12_PPGTT_PTE_NC; + + switch (level) { + case I915_CACHE_NONE: + pte |= GEN12_PPGTT_PTE_PAT1; + break; + case I915_CACHE_LLC: + case I915_CACHE_L3_LLC: + pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1; + break; + case I915_CACHE_WT: + pte |= GEN12_PPGTT_PTE_PAT0; + break; + } + + return pte; +} + static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) { struct drm_i915_private *i915 = ppgtt->vm.i915; @@ -427,7 +455,7 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, u32 flags) { struct i915_page_directory *pd; - const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); + const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, cache_level, flags); gen8_pte_t *vaddr; pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2)); @@ -580,7 +608,7 @@ static void gen8_ppgtt_insert_huge(struct i915_address_space *vm, enum i915_cache_level cache_level, u32 flags) { - const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); + const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags); unsigned int rem = sg_dma_len(iter->sg); u64 start = vma_res->start; @@ -743,7 +771,7 @@ static void gen8_ppgtt_insert_entry(struct i915_address_space *vm, GEM_BUG_ON(pt->is_compact); vaddr = px_vaddr(pt); - vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags); + vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, level, flags); drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr)); } @@ -773,7 +801,7 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm, } vaddr = px_vaddr(pt); - vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags); + vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, level, flags); } static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm, @@ -820,8 +848,8 @@ static int gen8_init_scratch(struct i915_address_space *vm) pte_flags |= PTE_LM; vm->scratch[0]->encode = - gen8_pte_encode(px_dma(vm->scratch[0]), - I915_CACHE_NONE, pte_flags); + vm->pte_encode(px_dma(vm->scratch[0]), + I915_CACHE_NONE, pte_flags); for (i = 1; i <= vm->top; i++) { struct drm_i915_gem_object *obj; @@ -963,7 +991,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, */ ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; - ppgtt->vm.pte_encode = gen8_pte_encode; + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) + ppgtt->vm.pte_encode = mtl_pte_encode; + else + ppgtt->vm.pte_encode =
[Intel-gfx] [PATCH v1 2/4] drm/i915: preparation for using PAT index
From: Fei Yang This patch is a preparation for replacing enum i915_cache_level with PAT index. Caching policy for buffer objects is set through the PAT index in PTE, the old i915_cache_level is not sufficient to represent all caching modes supported by the hardware. Preparing the transition by adding some platform dependent data structures and helper functions to translate the cache_level to pat_index. cachelevel_to_pat: a platform dependent array mapping cache_level to pat_index. max_pat_index: the maximum PAT index recommended in hardware specification Needed for validating the PAT index passed in from user space. i915_gem_get_pat_index: function to convert cache_level to PAT index. obj_to_i915(obj): macro moved to header file for wider usage. I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the convenience of coding. Cc: Chris Wilson Cc: Matt Roper Cc: Andi Shyti Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda --- drivers/gpu/drm/i915/gem/i915_gem_object.c| 9 +++ drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 6 ++ drivers/gpu/drm/i915/gt/intel_ggtt.c | 6 ++ drivers/gpu/drm/i915/i915_pci.c | 79 --- drivers/gpu/drm/i915/intel_device_info.h | 5 ++ .../gpu/drm/i915/selftests/mock_gem_device.c | 9 +++ 9 files changed, 110 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 4666bb82f312..8c70a0ec7d2f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects; static const struct drm_gem_object_funcs i915_gem_object_funcs; +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level) +{ + if (drm_WARN_ON(&i915->drm, level >= I915_MAX_CACHE_LEVEL)) + return 0; + + return INTEL_INFO(i915)->cachelevel_to_pat[level]; +} + struct drm_i915_gem_object *i915_gem_object_alloc(void) { struct drm_i915_gem_object *obj; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 885ccde9dc3c..4c92e17b4337 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -20,6 +20,8 @@ enum intel_region_id; +#define obj_to_i915(obj__) to_i915((obj__)->base.dev) + static inline bool i915_gem_object_size_2big(u64 size) { struct drm_i915_gem_object *obj; @@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size) return false; } +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level); void i915_gem_init__objects(struct drm_i915_private *i915); void i915_objects_module_exit(void); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 830c11431ee8..41b35abccf88 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -194,6 +194,7 @@ enum i915_cache_level { * engine. */ I915_CACHE_WT, + I915_MAX_CACHE_LEVEL, }; enum i915_map_type { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index b1672e054b21..214763942aa2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, fs_reclaim_release(GFP_KERNEL); } -#define obj_to_i915(obj__) to_i915((obj__)->base.dev) - /** * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By * default all object types that support shrinking(see IS_SHRINKABLE), will also diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 11b91e0453c8..7a4b1d1afce9 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr, case I915_CACHE_WT: pte |= GEN12_PPGTT_PTE_PAT0; break; + default: + /* This should never happen. Added to deal with the compile +* error due to the addition of I915_MAX_CACHE_LEVEL. Will +* be removed by the pat_index patch. +*/ + break; } return pte; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 20915edc8bd9..c8390d03fce2 1
[Intel-gfx] [PATCH v1 3/4] drm/i915: use pat_index instead of cache_level
From: Fei Yang Currently the KMD is using enum i915_cache_level to set caching policy for buffer objects. This is flaky because the PAT index which really controls the caching behavior in PTE has far more levels than what's defined in the enum. In addition, the PAT index is platform dependent, having to translate between i915_cache_level and PAT index is not reliable, and makes the code more complicated. >From UMD's perspective there is also a necessity to set caching policy for performance fine tuning. It's much easier for the UMD to directly use PAT index because the behavior of each PAT index is clearly defined in Bspec. Having the abstracted i915_cache_level sitting in between would only cause more ambiguity. For these reasons this patch replaces i915_cache_level with PAT index. Also note, the cache_level is not completely removed yet, because the KMD still has the need of creating buffer objects with simple cache settings such as cached, uncached, or writethrough. For such simple cases, using cache_level would help simplify the code. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/display/intel_dpt.c | 12 +-- drivers/gpu/drm/i915/gem/i915_gem_domain.c| 27 ++ .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_object.c| 52 +++- drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 25 +- drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 16 ++-- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 71 drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 82 +-- drivers/gpu/drm/i915/gt/intel_gtt.h | 20 ++--- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 6 +- drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 55 ++--- drivers/gpu/drm/i915/i915_gem.c | 16 +++- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- 36 files changed, 378 insertions(+), 239 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index c5eacfdba1a5..7c5fddb203ba 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -43,24 +43,24 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) static void dpt_insert_page(struct i915_address_space *vm, dma_addr_t addr, u64 offset, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { struct i915_dpt *dpt = i915_vm_to_dpt(vm); gen8_pte_t __iomem *base = dpt->iomem; gen8_set_pte(base + offset / I915_GTT_PAGE_SIZE, -vm->pte_encode(addr, level, flags)); +vm->pte_encode(addr, pat_index, flags)); } static void dpt_insert_entries(struct i915_address_space *vm, struct i915_vma_resource *vma_res, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { struct i915_dpt *dpt = i915_vm_to_dpt(vm); gen8_pte_t __iomem *base = dpt->iomem; - const gen8_pte_t pte_encode = vm->pte_encode(0, level, flags); + const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags); struct sgt_iter sgt_iter; dma_addr_t addr; int i; @@ -83,7 +83,7 @@ static void dpt_clear_range(struct i915_address_space *vm, static void dpt_bind_vma(struct i915_address_space *vm, struct i915_vm_pt_stash *st
[Intel-gfx] [PATCH v1 0/4] drm/i915/mtl: add PTE encode function
From: Fei Yang These patches are extracted from series https://patchwork.freedesktop.org/series/115980/ This series start with adding PTE encode functions for MTL as it can no longer reuse the PTE encode functions for GEN8 due to PAT index changes. Then there are patches refactoring the cache policy programming so that the PTE encode functions can be unified across all GEN12 platforms. This refactor is also important in implementing the design which allows uerspace to directly set cache policy for each Buffer Object. Fei Yang (4): drm/i915/mtl: Add PTE encode function drm/i915: preparation for using PAT index drm/i915: use pat_index instead of cache_level drm/i915: make sure correct pte encode is used drivers/gpu/drm/i915/display/intel_dpt.c | 14 ++-- drivers/gpu/drm/i915/gem/i915_gem_domain.c| 27 ++ .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_object.c| 61 +- drivers/gpu/drm/i915/gem/i915_gem_object.h| 8 ++ .../gpu/drm/i915/gem/i915_gem_object_types.h | 26 +- drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 16 ++-- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 76 - drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 84 +-- drivers/gpu/drm/i915/gt/intel_gtt.h | 31 --- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 6 +- drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 55 +--- drivers/gpu/drm/i915/i915_gem.c | 16 +++- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_pci.c | 79 +++-- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/intel_device_info.h | 5 ++ drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 9 ++ drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- 40 files changed, 527 insertions(+), 219 deletions(-) -- 2.25.1
[Intel-gfx] [PATCH v4 5/8] drm/i915: use pat_index instead of cache_level
From: Fei Yang Currently the KMD is using enum i915_cache_level to set caching policy for buffer objects. This is flaky because the PAT index which really controls the caching behavior in PTE has far more levels than what's defined in the enum. In addition, the PAT index is platform dependent, having to translate between i915_cache_level and PAT index is not reliable, and makes the code more complicated. >From UMD's perspective there is also a necessity to set caching policy for performance fine tuning. It's much easier for the UMD to directly use PAT index because the behavior of each PAT index is clearly defined in Bspec. Having the abstracted i915_cache_level sitting in between would only cause more ambiguity. For these reasons this patch replaces i915_cache_level with PAT index. Also note, the cache_level is not completely removed yet, because the KMD still has the need of creating buffer objects with simple cache settings such as cached, uncached, or writethrough. For such simple cases, using cache_level would help simplify the code. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/display/intel_dpt.c | 12 +-- drivers/gpu/drm/i915/gem/i915_gem_domain.c| 27 ++ .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_object.c| 52 +++- drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 25 +- drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 16 ++-- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 71 drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 82 +-- drivers/gpu/drm/i915/gt/intel_gtt.h | 20 ++--- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 6 +- drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 55 ++--- drivers/gpu/drm/i915/i915_gem.c | 16 +++- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- 36 files changed, 378 insertions(+), 239 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index c5eacfdba1a5..7c5fddb203ba 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -43,24 +43,24 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) static void dpt_insert_page(struct i915_address_space *vm, dma_addr_t addr, u64 offset, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { struct i915_dpt *dpt = i915_vm_to_dpt(vm); gen8_pte_t __iomem *base = dpt->iomem; gen8_set_pte(base + offset / I915_GTT_PAGE_SIZE, -vm->pte_encode(addr, level, flags)); +vm->pte_encode(addr, pat_index, flags)); } static void dpt_insert_entries(struct i915_address_space *vm, struct i915_vma_resource *vma_res, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { struct i915_dpt *dpt = i915_vm_to_dpt(vm); gen8_pte_t __iomem *base = dpt->iomem; - const gen8_pte_t pte_encode = vm->pte_encode(0, level, flags); + const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags); struct sgt_iter sgt_iter; dma_addr_t addr; int i; @@ -83,7 +83,7 @@ static void dpt_clear_range(struct i915_address_space *vm, static void dpt_bind_vma(struct i915_address_space *vm, struct i915_vm_pt_stash *st
[Intel-gfx] [PATCH v4 4/8] drm/i915: preparation for using PAT index
From: Fei Yang This patch is a preparation for replacing enum i915_cache_level with PAT index. Caching policy for buffer objects is set through the PAT index in PTE, the old i915_cache_level is not sufficient to represent all caching modes supported by the hardware. Preparing the transition by adding some platform dependent data structures and helper functions to translate the cache_level to pat_index. cachelevel_to_pat: a platform dependent array mapping cache_level to pat_index. max_pat_index: the maximum PAT index supported by the hardware. Needed for validating the PAT index passed in from user space. i915_gem_get_pat_index: function to convert cache_level to PAT index. obj_to_i915(obj): macro moved to header file for wider usage. I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the convenience of coding. Cc: Chris Wilson Cc: Matt Roper Cc: Andi Shyti Signed-off-by: Fei Yang Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/gem/i915_gem_object.c| 9 +++ drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 6 ++ drivers/gpu/drm/i915/gt/intel_ggtt.c | 6 ++ drivers/gpu/drm/i915/i915_pci.c | 79 --- drivers/gpu/drm/i915/intel_device_info.h | 5 ++ .../gpu/drm/i915/selftests/mock_gem_device.c | 9 +++ 9 files changed, 110 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 4666bb82f312..8c70a0ec7d2f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects; static const struct drm_gem_object_funcs i915_gem_object_funcs; +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level) +{ + if (drm_WARN_ON(&i915->drm, level >= I915_MAX_CACHE_LEVEL)) + return 0; + + return INTEL_INFO(i915)->cachelevel_to_pat[level]; +} + struct drm_i915_gem_object *i915_gem_object_alloc(void) { struct drm_i915_gem_object *obj; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 885ccde9dc3c..4c92e17b4337 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -20,6 +20,8 @@ enum intel_region_id; +#define obj_to_i915(obj__) to_i915((obj__)->base.dev) + static inline bool i915_gem_object_size_2big(u64 size) { struct drm_i915_gem_object *obj; @@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size) return false; } +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level); void i915_gem_init__objects(struct drm_i915_private *i915); void i915_objects_module_exit(void); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 830c11431ee8..41b35abccf88 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -194,6 +194,7 @@ enum i915_cache_level { * engine. */ I915_CACHE_WT, + I915_MAX_CACHE_LEVEL, }; enum i915_map_type { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index b1672e054b21..214763942aa2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, fs_reclaim_release(GFP_KERNEL); } -#define obj_to_i915(obj__) to_i915((obj__)->base.dev) - /** * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By * default all object types that support shrinking(see IS_SHRINKABLE), will also diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 11b91e0453c8..7a4b1d1afce9 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr, case I915_CACHE_WT: pte |= GEN12_PPGTT_PTE_PAT0; break; + default: + /* This should never happen. Added to deal with the compile +* error due to the addition of I915_MAX_CACHE_LEVEL. Will +* be removed by the pat_index patch. +*/ + break; } return pte; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 20915edc8bd9..c8390d03fce2 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c ++
[Intel-gfx] [PATCH v4 7/8] drm/i915/mtl: end support for set caching ioctl
From: Fei Yang The design is to keep Buffer Object's caching policy immutable through out its life cycle. This patch ends the support for set caching ioctl from MTL onward. While doing that we also set BO's to be 1-way coherent at creation time because GPU is no longer automatically snooping CPU cache. For UMD's need to fine tune the caching policy for BO's, a follow up patch will extend the GEM_CREATE uAPI to allow UMD's specify caching mode at BO creation time. Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda --- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +++ drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 9 - 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 89938084af97..d5fd4c9cd9f8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -328,6 +328,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, if (IS_DGFX(i915)) return -ENODEV; + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + return -EOPNOTSUPP; + switch (args->caching) { case I915_CACHING_NONE: level = I915_CACHE_NONE; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 37d1efcd3ca6..cad4a6017f4b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -601,7 +601,14 @@ static int shmem_object_init(struct intel_memory_region *mem, obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; - if (HAS_LLC(i915)) + /* +* MTL doesn't snoop CPU cache by default for GPU access (namely +* 1-way coherency). However some UMD's are currently depending on +* that. Make 1-way coherent the default setting for MTL. A follow +* up patch will extend the GEM_CREATE uAPI to allow UMD's specify +* caching mode at BO creation time +*/ + if (HAS_LLC(i915) || (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))) /* On some devices, we can have the GPU use the LLC (the CPU * cache) for about a 10% performance improvement * compared to uncached. Graphics requests other than -- 2.25.1
[Intel-gfx] [PATCH v4 8/8] drm/i915: Allow user to set cache at BO creation
From: Fei Yang To comply with the design that buffer objects shall have immutable cache setting through out their life cycle, {set, get}_caching ioctl's are no longer supported from MTL onward. With that change caching policy can only be set at object creation time. The current code applies a default (platform dependent) cache setting for all objects. However this is not optimal for performance tuning. The patch extends the existing gem_create uAPI to let user set PAT index for the object at creation time. The new extension is platform independent, so UMD's can switch to using this extension for older platforms as well, while {set, get}_caching are still supported on these legacy paltforms for compatibility reason. Cc: Chris Wilson Cc: Matt Roper Cc: Andi Shyti Signed-off-by: Fei Yang Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 ++ drivers/gpu/drm/i915/gem/i915_gem_object.c | 6 include/uapi/drm/i915_drm.h| 36 ++ tools/include/uapi/drm/i915_drm.h | 36 ++ 4 files changed, 114 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index bfe1dbda4cb7..723c3ddd6c74 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -245,6 +245,7 @@ struct create_ext { unsigned int n_placements; unsigned int placement_mask; unsigned long flags; + unsigned int pat_index; }; static void repr_placements(char *buf, size_t size, @@ -394,11 +395,39 @@ static int ext_set_protected(struct i915_user_extension __user *base, void *data return 0; } +static int ext_set_pat(struct i915_user_extension __user *base, void *data) +{ + struct create_ext *ext_data = data; + struct drm_i915_private *i915 = ext_data->i915; + struct drm_i915_gem_create_ext_set_pat ext; + unsigned int max_pat_index; + + BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) != +offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd)); + + if (copy_from_user(&ext, base, sizeof(ext))) + return -EFAULT; + + max_pat_index = INTEL_INFO(i915)->max_pat_index; + + if (ext.pat_index > max_pat_index) { + drm_dbg(&i915->drm, "PAT index is invalid: %u\n", + ext.pat_index); + return -EINVAL; + } + + ext_data->pat_index = ext.pat_index; + + return 0; +} + static const i915_user_extension_fn create_extensions[] = { [I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements, [I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected, + [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat, }; +#define PAT_INDEX_NOT_SET 0x /** * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to it. * @dev: drm device pointer @@ -418,6 +447,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) return -EINVAL; + ext_data.pat_index = PAT_INDEX_NOT_SET; ret = i915_user_extensions(u64_to_user_ptr(args->extensions), create_extensions, ARRAY_SIZE(create_extensions), @@ -454,5 +484,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, if (IS_ERR(obj)) return PTR_ERR(obj); + if (ext_data.pat_index != PAT_INDEX_NOT_SET) { + i915_gem_object_set_pat_index(obj, ext_data.pat_index); + /* Mark pat_index is set by UMD */ + obj->cache_level = I915_CACHE_INVAL; + } + return i915_gem_publish(obj, file, &args->size, &args->handle); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 27c948350b5b..61651f7e5806 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -209,6 +209,12 @@ bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj) if (!(obj->flags & I915_BO_ALLOC_USER)) return false; + /* +* Always flush cache for UMD objects at creation time. +*/ + if (obj->cache_level == I915_CACHE_INVAL) + return true; + /* * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it * possible for userspace to bypass the GTT caching bits set by the diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index dba7c5a5b25e..03c5c314846e 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -3630,9 +3630,13 @@ struct drm_i915_gem_create_ext { * * For I915_GEM_CREATE_E
[Intel-gfx] [PATCH v4 6/8] drm/i915: make sure correct pte encode is used
From: Fei Yang PTE encode is platform dependent. After replacing cache_level with pat_index, the newly introduced mtl_pte_encode is actually generic for all gen12 platforms, thus rename it to gen12_pte_encode and apply it to all gen12 platforms. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index c046813514f4..3373b2aeae86 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -55,7 +55,7 @@ static u64 gen8_pte_encode(dma_addr_t addr, return pte; } -static u64 mtl_pte_encode(dma_addr_t addr, +static u64 gen12_pte_encode(dma_addr_t addr, unsigned int pat_index, u32 flags) { @@ -994,8 +994,8 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, */ ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; - if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) - ppgtt->vm.pte_encode = mtl_pte_encode; + if (GRAPHICS_VER(gt->i915) >= 12) + ppgtt->vm.pte_encode = gen12_pte_encode; else ppgtt->vm.pte_encode = gen8_pte_encode; -- 2.25.1
[Intel-gfx] [PATCH v4 3/8] drm/i915/mtl: Add PTE encode function
From: Fei Yang PTE encode functions are platform dependent. This patch implements PTE functions for MTL, and ensures the correct PTE encode function is used by calling pte_encode function pointer instead of the hardcoded gen8 version of PTE encode. Signed-off-by: Fei Yang Reviewed-by: Andrzej Hajda Reviewed-by: Andi Shyti Acked-by: Nirmoy Das --- drivers/gpu/drm/i915/display/intel_dpt.c | 2 +- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 45 drivers/gpu/drm/i915/gt/intel_ggtt.c | 36 +-- drivers/gpu/drm/i915/gt/intel_gtt.h | 13 +-- 4 files changed, 83 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index b8027392144d..c5eacfdba1a5 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -300,7 +300,7 @@ intel_dpt_create(struct intel_framebuffer *fb) vm->vma_ops.bind_vma= dpt_bind_vma; vm->vma_ops.unbind_vma = dpt_unbind_vma; - vm->pte_encode = gen8_ggtt_pte_encode; + vm->pte_encode = vm->gt->ggtt->vm.pte_encode; dpt->obj = dpt_obj; dpt->obj->is_dpt = true; diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 4daaa6f55668..11b91e0453c8 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -55,6 +55,34 @@ static u64 gen8_pte_encode(dma_addr_t addr, return pte; } +static u64 mtl_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; + + if (unlikely(flags & PTE_READ_ONLY)) + pte &= ~GEN8_PAGE_RW; + + if (flags & PTE_LM) + pte |= GEN12_PPGTT_PTE_LM | GEN12_PPGTT_PTE_NC; + + switch (level) { + case I915_CACHE_NONE: + pte |= GEN12_PPGTT_PTE_PAT1; + break; + case I915_CACHE_LLC: + case I915_CACHE_L3_LLC: + pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1; + break; + case I915_CACHE_WT: + pte |= GEN12_PPGTT_PTE_PAT0; + break; + } + + return pte; +} + static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) { struct drm_i915_private *i915 = ppgtt->vm.i915; @@ -427,7 +455,7 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, u32 flags) { struct i915_page_directory *pd; - const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); + const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, cache_level, flags); gen8_pte_t *vaddr; pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2)); @@ -580,7 +608,7 @@ static void gen8_ppgtt_insert_huge(struct i915_address_space *vm, enum i915_cache_level cache_level, u32 flags) { - const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); + const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags); unsigned int rem = sg_dma_len(iter->sg); u64 start = vma_res->start; @@ -743,7 +771,7 @@ static void gen8_ppgtt_insert_entry(struct i915_address_space *vm, GEM_BUG_ON(pt->is_compact); vaddr = px_vaddr(pt); - vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags); + vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, level, flags); drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr)); } @@ -773,7 +801,7 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm, } vaddr = px_vaddr(pt); - vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags); + vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, level, flags); } static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm, @@ -820,8 +848,8 @@ static int gen8_init_scratch(struct i915_address_space *vm) pte_flags |= PTE_LM; vm->scratch[0]->encode = - gen8_pte_encode(px_dma(vm->scratch[0]), - I915_CACHE_NONE, pte_flags); + vm->pte_encode(px_dma(vm->scratch[0]), + I915_CACHE_NONE, pte_flags); for (i = 1; i <= vm->top; i++) { struct drm_i915_gem_object *obj; @@ -963,7 +991,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, */ ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; - ppgtt->vm.pte_encode = gen8_pte_encode; + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) + ppgtt->vm.pte_encode = mtl_pte_encode; + else + ppgtt->vm.pte_encode =
[Intel-gfx] [PATCH v4 0/8] drm/i915/mtl: Define MOCS and PAT tables for MTL
From: Fei Yang The series includes patches needed to enable MTL. Also add new extension for GEM_CREATE uAPI to let user space set cache policy for buffer objects. v2: addressing review comments and checkpatch warnings v3: make mtl_ggtt_pte_encode static v4: addressing more comments from Matt drop two patches from previous version (to be merged separately) extend mtl_pte_encode to cover all gen12 Fei Yang (7): drm/i915/mtl: fix mocs selftest drm/i915/mtl: Add PTE encode function drm/i915: preparation for using PAT index drm/i915: use pat_index instead of cache_level drm/i915: make sure correct pte encode is used drm/i915/mtl: end support for set caching ioctl drm/i915: Allow user to set cache at BO creation Madhumitha Tolakanahalli Pradeep (1): drm/i915/mtl: Define MOCS and PAT tables for MTL drivers/gpu/drm/i915/display/intel_dpt.c | 14 ++-- drivers/gpu/drm/i915/gem/i915_gem_create.c| 36 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 30 +++ .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_object.c| 67 ++- drivers/gpu/drm/i915/gem/i915_gem_object.h| 8 ++ .../gpu/drm/i915/gem/i915_gem_object_types.h | 26 +- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 9 +- drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 16 ++-- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 76 - drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 84 +-- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 6 +- drivers/gpu/drm/i915/gt/intel_gtt.c | 47 ++- drivers/gpu/drm/i915/gt/intel_gtt.h | 37 +--- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_mocs.c | 70 +++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 6 +- drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++- drivers/gpu/drm/i915/gt/selftest_mocs.c | 3 +- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 55 +--- drivers/gpu/drm/i915/i915_gem.c | 16 +++- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_pci.c | 79 +++-- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/intel_device_info.h | 5 ++ drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 9 ++ drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- include/uapi/drm/i915_drm.h | 36 tools/include/uapi/drm/i915_drm.h | 36 48 files changed, 781 insertions(+), 223 deletions(-) -- 2.25.1
[Intel-gfx] [PATCH v4 1/8] drm/i915/mtl: Define MOCS and PAT tables for MTL
From: Madhumitha Tolakanahalli Pradeep On MTL, GT can no longer allocate on LLC - only the CPU can. This, along with programming new register bits that MTL requires calls for a MOCS/PAT table update. Also the PAT index registers are multicasted for primary GT, and there is an address jump from index 7 to 8. This patch makes sure that these registers are programmed in the proper way. BSpec: 44509, 45101, 44235 Cc: Matt Roper Cc: Lucas De Marchi Signed-off-by: Madhumitha Tolakanahalli Pradeep Signed-off-by: Aravind Iddamsetty Signed-off-by: Nirmoy Das Signed-off-by: Fei Yang Reviewed-by: Andrzej Hajda Reviewed-by: Nirmoy Das Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 6 ++- drivers/gpu/drm/i915/gt/intel_gtt.c | 47 - drivers/gpu/drm/i915/gt/intel_gtt.h | 8 +++ drivers/gpu/drm/i915/gt/intel_mocs.c| 70 - 4 files changed, 128 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index fd1f9cd35e9d..e8c3b762a92a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -356,7 +356,11 @@ #define GEN7_TLB_RD_ADDR _MMIO(0x4700) #define GEN12_PAT_INDEX(index) _MMIO(0x4800 + (index) * 4) -#define XEHP_PAT_INDEX(index) MCR_REG(0x4800 + (index) * 4) +#define _PAT_INDEX(index) _PICK_EVEN_2RANGES(index, 8, \ + 0x4800, 0x4804, \ + 0x4848, 0x484c) +#define XEHP_PAT_INDEX(index) MCR_REG(_PAT_INDEX(index)) +#define XELPMP_PAT_INDEX(index)_MMIO(_PAT_INDEX(index)) #define XEHP_TILE0_ADDR_RANGE MCR_REG(0x4900) #define XEHP_TILE_LMEM_RANGE_SHIFT 8 diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 4f436ba7a3c8..2f6a9be0ffe6 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -468,6 +468,44 @@ void gtt_write_workarounds(struct intel_gt *gt) } } +static void xelpmp_setup_private_ppat(struct intel_uncore *uncore) +{ + intel_uncore_write(uncore, XELPMP_PAT_INDEX(0), + MTL_PPAT_L4_0_WB); + intel_uncore_write(uncore, XELPMP_PAT_INDEX(1), + MTL_PPAT_L4_1_WT); + intel_uncore_write(uncore, XELPMP_PAT_INDEX(2), + MTL_PPAT_L4_3_UC); + intel_uncore_write(uncore, XELPMP_PAT_INDEX(3), + MTL_PPAT_L4_0_WB | MTL_2_COH_1W); + intel_uncore_write(uncore, XELPMP_PAT_INDEX(4), + MTL_PPAT_L4_0_WB | MTL_3_COH_2W); + + /* +* Remaining PAT entries are left at the hardware-default +* fully-cached setting +*/ +} + +static void xelpg_setup_private_ppat(struct intel_gt *gt) +{ + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(0), +MTL_PPAT_L4_0_WB); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(1), +MTL_PPAT_L4_1_WT); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(2), +MTL_PPAT_L4_3_UC); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(3), +MTL_PPAT_L4_0_WB | MTL_2_COH_1W); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(4), +MTL_PPAT_L4_0_WB | MTL_3_COH_2W); + + /* +* Remaining PAT entries are left at the hardware-default +* fully-cached setting +*/ +} + static void tgl_setup_private_ppat(struct intel_uncore *uncore) { /* TGL doesn't support LLC or AGE settings */ @@ -603,7 +641,14 @@ void setup_private_pat(struct intel_gt *gt) GEM_BUG_ON(GRAPHICS_VER(i915) < 8); - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) + if (gt->type == GT_MEDIA) { + xelpmp_setup_private_ppat(gt->uncore); + return; + } + + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + xelpg_setup_private_ppat(gt); + else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) xehp_setup_private_ppat(gt); else if (GRAPHICS_VER(i915) >= 12) tgl_setup_private_ppat(uncore); diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index 69ce55f517f5..ea17849e7a5c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -147,6 +147,14 @@ typedef u64 gen8_pte_t; #define GEN8_PDE_IPS_64K BIT(11) #define GEN8_PDE_PS_2M BIT(7) +#define MTL_PPAT_L4_CACHE_POLICY_MASK REG_GENMASK(3, 2) +#define MTL_PAT_INDEX_COH_MODE_MASKREG_GENMASK(1, 0) +#define MTL_PPAT_L4