Re: [PATCH 6/6] drm/i915: Implement fdinfo memory stats printing
On 22-09-2023 19:17, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin > > Use the newly added drm_print_memory_stats helper to show memory > utilisation of our objects in drm/driver specific fdinfo output. > > To collect the stats we walk the per memory regions object lists > and accumulate object size into the respective drm_memory_stats > categories. > > v2: > * Only account against the active region. > * Use DMA_RESV_USAGE_BOOKKEEP when testing for active. (Tejas) > > v3: > * Update commit text. (Aravind) > * Update to use memory regions uabi names. > > Signed-off-by: Tvrtko Ursulin > Cc: Aravind Iddamsetty > Cc: Rob Clark > Cc: Andi Shyti > Cc: Tejas Upadhyay > Reviewed-by: Andi Shyti # v1 > Reviewed-by: Aravind Iddamsetty # v2 Reviewed-by: Aravind Iddamsetty Thanks, Aravind. > --- > drivers/gpu/drm/i915/i915_drm_client.c | 64 ++ > 1 file changed, 64 insertions(+) > > diff --git a/drivers/gpu/drm/i915/i915_drm_client.c > b/drivers/gpu/drm/i915/i915_drm_client.c > index a61356012df8..7efffdaa508d 100644 > --- a/drivers/gpu/drm/i915/i915_drm_client.c > +++ b/drivers/gpu/drm/i915/i915_drm_client.c > @@ -45,6 +45,68 @@ void __i915_drm_client_free(struct kref *kref) > } > > #ifdef CONFIG_PROC_FS > +static void > +obj_meminfo(struct drm_i915_gem_object *obj, > + struct drm_memory_stats stats[INTEL_REGION_UNKNOWN]) > +{ > + const enum intel_region_id id = obj->mm.region ? > + obj->mm.region->id : INTEL_REGION_SMEM; > + const u64 sz = obj->base.size; > + > + if (obj->base.handle_count > 1) > + stats[id].shared += sz; > + else > + stats[id].private += sz; > + > + if (i915_gem_object_has_pages(obj)) { > + stats[id].resident += sz; > + > + if (!dma_resv_test_signaled(obj->base.resv, > + DMA_RESV_USAGE_BOOKKEEP)) > + stats[id].active += sz; > + else if (i915_gem_object_is_shrinkable(obj) && > + obj->mm.madv == I915_MADV_DONTNEED) > + stats[id].purgeable += sz; > + } > +} > + > +static void show_meminfo(struct drm_printer *p, struct drm_file *file) > +{ > + struct drm_memory_stats stats[INTEL_REGION_UNKNOWN] = {}; > + struct drm_i915_file_private *fpriv = file->driver_priv; > + struct i915_drm_client *client = fpriv->client; > + struct drm_i915_private *i915 = fpriv->i915; > + struct drm_i915_gem_object *obj; > + struct intel_memory_region *mr; > + struct list_head *pos; > + unsigned int id; > + > + /* Public objects. */ > + spin_lock(&file->table_lock); > + idr_for_each_entry(&file->object_idr, obj, id) > + obj_meminfo(obj, stats); > + spin_unlock(&file->table_lock); > + > + /* Internal objects. */ > + rcu_read_lock(); > + list_for_each_rcu(pos, &client->objects_list) { > + obj = i915_gem_object_get_rcu(list_entry(pos, typeof(*obj), > + client_link)); > + if (!obj) > + continue; > + obj_meminfo(obj, stats); > + i915_gem_object_put(obj); > + } > + rcu_read_unlock(); > + > + for_each_memory_region(mr, i915, id) > + drm_print_memory_stats(p, > +&stats[id], > +DRM_GEM_OBJECT_RESIDENT | > +DRM_GEM_OBJECT_PURGEABLE, > +mr->uabi_name); > +} > + > static const char * const uabi_class_names[] = { > [I915_ENGINE_CLASS_RENDER] = "render", > [I915_ENGINE_CLASS_COPY] = "copy", > @@ -106,6 +168,8 @@ void i915_drm_client_fdinfo(struct drm_printer *p, struct > drm_file *file) >* ** >*/ > > + show_meminfo(p, file); > + > if (GRAPHICS_VER(i915) < 8) > return; >
Re: [PATCH 5/6] drm/i915: Add stable memory region names
On 26-09-2023 21:12, Tvrtko Ursulin wrote: > > On 26/09/2023 16:29, Iddamsetty, Aravind wrote: >> On 22-09-2023 19:16, Tvrtko Ursulin wrote: >>> From: Tvrtko Ursulin >>> >>> At the moment memory region names are a bit too varied and too >>> inconsistent to be used for ABI purposes, like for upcoming fdinfo >>> memory stats. >>> >>> System memory can be either system or system-ttm. Local memory has the >>> instance number appended, others do not. Not only incosistent but thi >>> kind of implementation detail is uninteresting for intended users of >>> fdinfo memory stats. >>> >>> Add a stable name always formed as $type$instance. Could have chosen a >>> different stable scheme, but I think any consistent and stable scheme >>> should do just fine. >>> >>> Signed-off-by: Tvrtko Ursulin >>> --- >>> drivers/gpu/drm/i915/intel_memory_region.c | 19 +++ >>> drivers/gpu/drm/i915/intel_memory_region.h | 1 + >>> 2 files changed, 20 insertions(+) >>> >>> diff --git a/drivers/gpu/drm/i915/intel_memory_region.c >>> b/drivers/gpu/drm/i915/intel_memory_region.c >>> index 3d1fdea9811d..60a03340bbd4 100644 >>> --- a/drivers/gpu/drm/i915/intel_memory_region.c >>> +++ b/drivers/gpu/drm/i915/intel_memory_region.c >>> @@ -216,6 +216,22 @@ static int intel_memory_region_memtest(struct >>> intel_memory_region *mem, >>> return err; >>> } >>> +static const char *region_type_str(u16 type) >>> +{ >>> + switch (type) { >>> + case INTEL_MEMORY_SYSTEM: >>> + return "system"; >>> + case INTEL_MEMORY_LOCAL: >>> + return "local"; >>> + case INTEL_MEMORY_STOLEN_LOCAL: >>> + return "stolen-local"; >>> + case INTEL_MEMORY_STOLEN_SYSTEM: >>> + return "stolen-system"; >>> + default: >>> + return "unknown"; >>> + } >>> +} >>> + >>> struct intel_memory_region * >>> intel_memory_region_create(struct drm_i915_private *i915, >>> resource_size_t start, >>> @@ -244,6 +260,9 @@ intel_memory_region_create(struct >>> drm_i915_private *i915, >>> mem->type = type; >>> mem->instance = instance; >>> + snprintf(mem->uabi_name, sizeof(mem->uabi_name), "%s%u", >>> + region_type_str(type), instance); >>> + >>> mutex_init(&mem->objects.lock); >>> INIT_LIST_HEAD(&mem->objects.list); >>> diff --git a/drivers/gpu/drm/i915/intel_memory_region.h >>> b/drivers/gpu/drm/i915/intel_memory_region.h >>> index 2953ed5c3248..9ba36454e51b 100644 >>> --- a/drivers/gpu/drm/i915/intel_memory_region.h >>> +++ b/drivers/gpu/drm/i915/intel_memory_region.h >>> @@ -80,6 +80,7 @@ struct intel_memory_region { >>> u16 instance; >>> enum intel_region_id id; >>> char name[16]; >>> + char uabi_name[16]; >> >> Just a thought instead of creating a new field, can't we derive this >> with name and instance? > > I'd rather not snprintf on every fdinfo read - for every pid and every > drm fd versus 2-3 strings kept around. ya agreed makes no sense. > > I did briefly wonder if mr->name could be dropped, that is renamed to > mr->uabi_name, but I guess there is some value to print the internal > name in some log messages, to leave a trace of what underlying > implementation is used. Although I am not too sure about the value of > that either since it is implied from the kernel version. > > Then on top the usage in i915_gem_create/repr_name I could replace with > mr->uabi_name and simplify. If there is any value in printing the name > there, versus just uabi type:instance integers. Dunno. All I know is > fdinfo should have stable names and not confuse with implementation > details so I need something.. Ok. LGTM Reviewed-by: Aravind Iddamsetty Thanks, Aravind. > > Regards, > > Tvrtko
Re: [PATCH 5/6] drm/i915: Add stable memory region names
On 22-09-2023 19:16, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin > > At the moment memory region names are a bit too varied and too > inconsistent to be used for ABI purposes, like for upcoming fdinfo > memory stats. > > System memory can be either system or system-ttm. Local memory has the > instance number appended, others do not. Not only incosistent but thi > kind of implementation detail is uninteresting for intended users of > fdinfo memory stats. > > Add a stable name always formed as $type$instance. Could have chosen a > different stable scheme, but I think any consistent and stable scheme > should do just fine. > > Signed-off-by: Tvrtko Ursulin > --- > drivers/gpu/drm/i915/intel_memory_region.c | 19 +++ > drivers/gpu/drm/i915/intel_memory_region.h | 1 + > 2 files changed, 20 insertions(+) > > diff --git a/drivers/gpu/drm/i915/intel_memory_region.c > b/drivers/gpu/drm/i915/intel_memory_region.c > index 3d1fdea9811d..60a03340bbd4 100644 > --- a/drivers/gpu/drm/i915/intel_memory_region.c > +++ b/drivers/gpu/drm/i915/intel_memory_region.c > @@ -216,6 +216,22 @@ static int intel_memory_region_memtest(struct > intel_memory_region *mem, > return err; > } > > +static const char *region_type_str(u16 type) > +{ > + switch (type) { > + case INTEL_MEMORY_SYSTEM: > + return "system"; > + case INTEL_MEMORY_LOCAL: > + return "local"; > + case INTEL_MEMORY_STOLEN_LOCAL: > + return "stolen-local"; > + case INTEL_MEMORY_STOLEN_SYSTEM: > + return "stolen-system"; > + default: > + return "unknown"; > + } > +} > + > struct intel_memory_region * > intel_memory_region_create(struct drm_i915_private *i915, > resource_size_t start, > @@ -244,6 +260,9 @@ intel_memory_region_create(struct drm_i915_private *i915, > mem->type = type; > mem->instance = instance; > > + snprintf(mem->uabi_name, sizeof(mem->uabi_name), "%s%u", > + region_type_str(type), instance); > + > mutex_init(&mem->objects.lock); > INIT_LIST_HEAD(&mem->objects.list); > > diff --git a/drivers/gpu/drm/i915/intel_memory_region.h > b/drivers/gpu/drm/i915/intel_memory_region.h > index 2953ed5c3248..9ba36454e51b 100644 > --- a/drivers/gpu/drm/i915/intel_memory_region.h > +++ b/drivers/gpu/drm/i915/intel_memory_region.h > @@ -80,6 +80,7 @@ struct intel_memory_region { > u16 instance; > enum intel_region_id id; > char name[16]; > + char uabi_name[16]; Just a thought instead of creating a new field, can't we derive this with name and instance? Thanks, Aravind. > bool private; /* not for userspace */ > > struct {
Re: [PATCH 5/5] drm/i915: Implement fdinfo memory stats printing
On 22-09-2023 16:27, Tvrtko Ursulin wrote: > > On 22/09/2023 09:48, Iddamsetty, Aravind wrote: >> >> >> On 21-09-2023 17:18, Tvrtko Ursulin wrote: >>> From: Tvrtko Ursulin >>> >>> Use the newly added drm_print_memory_stats helper to show memory >>> utilisation of our objects in drm/driver specific fdinfo output. >>> >>> To collect the stats we walk the per memory regions object lists >>> and accumulate object size into the respective drm_memory_stats >>> categories. >>> >>> Objects with multiple possible placements are reported in multiple >>> regions for total and shared sizes, while other categories are >> >> I guess you forgot to correct this. > > Ah yes, will fix. > >> >>> counted only for the currently active region. >>> >>> v2: >>> * Only account against the active region. >>> * Use DMA_RESV_USAGE_BOOKKEEP when testing for active. (Tejas) >>> >>> Signed-off-by: Tvrtko Ursulin >>> Cc: Aravind Iddamsetty >>> Cc: Rob Clark >>> Cc: Andi Shyti >>> Cc: Tejas Upadhyay >>> Reviewed-by: Andi Shyti # v1 >>> --- >>> drivers/gpu/drm/i915/i915_drm_client.c | 64 ++ >>> 1 file changed, 64 insertions(+) >>> >>> diff --git a/drivers/gpu/drm/i915/i915_drm_client.c >>> b/drivers/gpu/drm/i915/i915_drm_client.c >>> index a61356012df8..94abc2fb2ea6 100644 >>> --- a/drivers/gpu/drm/i915/i915_drm_client.c >>> +++ b/drivers/gpu/drm/i915/i915_drm_client.c >>> @@ -45,6 +45,68 @@ void __i915_drm_client_free(struct kref *kref) >>> } >>> #ifdef CONFIG_PROC_FS >>> +static void >>> +obj_meminfo(struct drm_i915_gem_object *obj, >>> + struct drm_memory_stats stats[INTEL_REGION_UNKNOWN]) >>> +{ >>> + const enum intel_region_id id = obj->mm.region ? >>> + obj->mm.region->id : INTEL_REGION_SMEM; >>> + const u64 sz = obj->base.size; >>> + >>> + if (obj->base.handle_count > 1) >>> + stats[id].shared += sz; >>> + else >>> + stats[id].private += sz; >>> + >>> + if (i915_gem_object_has_pages(obj)) { >>> + stats[id].resident += sz; >>> + >>> + if (!dma_resv_test_signaled(obj->base.resv, >>> + DMA_RESV_USAGE_BOOKKEEP)) >>> + stats[id].active += sz; >>> + else if (i915_gem_object_is_shrinkable(obj) && >>> + obj->mm.madv == I915_MADV_DONTNEED) >>> + stats[id].purgeable += sz; >>> + } >>> +} >>> + >>> +static void show_meminfo(struct drm_printer *p, struct drm_file *file) >>> +{ >>> + struct drm_memory_stats stats[INTEL_REGION_UNKNOWN] = {}; >>> + struct drm_i915_file_private *fpriv = file->driver_priv; >>> + struct i915_drm_client *client = fpriv->client; >>> + struct drm_i915_private *i915 = fpriv->i915; >>> + struct drm_i915_gem_object *obj; >>> + struct intel_memory_region *mr; >>> + struct list_head *pos; >>> + unsigned int id; >>> + >>> + /* Public objects. */ >>> + spin_lock(&file->table_lock); >>> + idr_for_each_entry(&file->object_idr, obj, id) >>> + obj_meminfo(obj, stats); >>> + spin_unlock(&file->table_lock); >>> + >>> + /* Internal objects. */ >>> + rcu_read_lock(); >>> + list_for_each_rcu(pos, &client->objects_list) { >>> + obj = i915_gem_object_get_rcu(list_entry(pos, typeof(*obj), >>> + client_link)); >>> + if (!obj) >>> + continue; >>> + obj_meminfo(obj, stats); >>> + i915_gem_object_put(obj); >>> + } >>> + rcu_read_unlock(); >>> + >>> + for_each_memory_region(mr, i915, id) >>> + drm_print_memory_stats(p, >>> + &stats[id], >>> + DRM_GEM_OBJECT_RESIDENT | >>> + DRM_GEM_OBJECT_PURGEABLE, >>> + mr->name); >>> +} >>> + >>> static const char * const uabi_class_names[] = { >>> [I915_ENGINE_CLASS_RENDER] = "render", >>> [I915_ENGINE_CLASS_COPY] = "copy", >>> @@ -106,6 +168,8 @@ void i915_drm_client_fdinfo(struct drm_printer >>> *p, struct drm_file *file) >>> * >>> ** >>> */ >>> + show_meminfo(p, file); >>> + >>> if (GRAPHICS_VER(i915) < 8) >>> return; >>> >> >> Reviewed-by: Aravind Iddamsetty > > Thank you! Would you be able to also look at the IGTs I posted yesterday? Ya sure will take a look. Thanks, Aravind. > > Regards, > > Tvrtko
Re: [PATCH 5/5] drm/i915: Implement fdinfo memory stats printing
On 21-09-2023 17:18, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin > > Use the newly added drm_print_memory_stats helper to show memory > utilisation of our objects in drm/driver specific fdinfo output. > > To collect the stats we walk the per memory regions object lists > and accumulate object size into the respective drm_memory_stats > categories. > > Objects with multiple possible placements are reported in multiple > regions for total and shared sizes, while other categories are I guess you forgot to correct this. > counted only for the currently active region. > > v2: > * Only account against the active region. > * Use DMA_RESV_USAGE_BOOKKEEP when testing for active. (Tejas) > > Signed-off-by: Tvrtko Ursulin > Cc: Aravind Iddamsetty > Cc: Rob Clark > Cc: Andi Shyti > Cc: Tejas Upadhyay > Reviewed-by: Andi Shyti # v1 > --- > drivers/gpu/drm/i915/i915_drm_client.c | 64 ++ > 1 file changed, 64 insertions(+) > > diff --git a/drivers/gpu/drm/i915/i915_drm_client.c > b/drivers/gpu/drm/i915/i915_drm_client.c > index a61356012df8..94abc2fb2ea6 100644 > --- a/drivers/gpu/drm/i915/i915_drm_client.c > +++ b/drivers/gpu/drm/i915/i915_drm_client.c > @@ -45,6 +45,68 @@ void __i915_drm_client_free(struct kref *kref) > } > > #ifdef CONFIG_PROC_FS > +static void > +obj_meminfo(struct drm_i915_gem_object *obj, > + struct drm_memory_stats stats[INTEL_REGION_UNKNOWN]) > +{ > + const enum intel_region_id id = obj->mm.region ? > + obj->mm.region->id : INTEL_REGION_SMEM; > + const u64 sz = obj->base.size; > + > + if (obj->base.handle_count > 1) > + stats[id].shared += sz; > + else > + stats[id].private += sz; > + > + if (i915_gem_object_has_pages(obj)) { > + stats[id].resident += sz; > + > + if (!dma_resv_test_signaled(obj->base.resv, > + DMA_RESV_USAGE_BOOKKEEP)) > + stats[id].active += sz; > + else if (i915_gem_object_is_shrinkable(obj) && > + obj->mm.madv == I915_MADV_DONTNEED) > + stats[id].purgeable += sz; > + } > +} > + > +static void show_meminfo(struct drm_printer *p, struct drm_file *file) > +{ > + struct drm_memory_stats stats[INTEL_REGION_UNKNOWN] = {}; > + struct drm_i915_file_private *fpriv = file->driver_priv; > + struct i915_drm_client *client = fpriv->client; > + struct drm_i915_private *i915 = fpriv->i915; > + struct drm_i915_gem_object *obj; > + struct intel_memory_region *mr; > + struct list_head *pos; > + unsigned int id; > + > + /* Public objects. */ > + spin_lock(&file->table_lock); > + idr_for_each_entry(&file->object_idr, obj, id) > + obj_meminfo(obj, stats); > + spin_unlock(&file->table_lock); > + > + /* Internal objects. */ > + rcu_read_lock(); > + list_for_each_rcu(pos, &client->objects_list) { > + obj = i915_gem_object_get_rcu(list_entry(pos, typeof(*obj), > + client_link)); > + if (!obj) > + continue; > + obj_meminfo(obj, stats); > + i915_gem_object_put(obj); > + } > + rcu_read_unlock(); > + > + for_each_memory_region(mr, i915, id) > + drm_print_memory_stats(p, > +&stats[id], > +DRM_GEM_OBJECT_RESIDENT | > +DRM_GEM_OBJECT_PURGEABLE, > +mr->name); > +} > + > static const char * const uabi_class_names[] = { > [I915_ENGINE_CLASS_RENDER] = "render", > [I915_ENGINE_CLASS_COPY] = "copy", > @@ -106,6 +168,8 @@ void i915_drm_client_fdinfo(struct drm_printer *p, struct > drm_file *file) >* ** >*/ > > + show_meminfo(p, file); > + > if (GRAPHICS_VER(i915) < 8) > return; > Reviewed-by: Aravind Iddamsetty Thanks, Aravind.
Re: [PATCH 5/5] drm/i915: Implement fdinfo memory stats printing
On 03-08-2023 14:19, Tvrtko Ursulin wrote: > > On 03/08/2023 06:15, Iddamsetty, Aravind wrote: >> On 27-07-2023 15:43, Tvrtko Ursulin wrote: >>> From: Tvrtko Ursulin >>> >>> Use the newly added drm_print_memory_stats helper to show memory >>> utilisation of our objects in drm/driver specific fdinfo output. >>> >>> To collect the stats we walk the per memory regions object lists >>> and accumulate object size into the respective drm_memory_stats >>> categories. >>> >>> Objects with multiple possible placements are reported in multiple >>> regions for total and shared sizes, while other categories are >>> counted only for the currently active region. >>> >>> Signed-off-by: Tvrtko Ursulin >>> Cc: Aravind Iddamsetty >>> Cc: Rob Clark > --- >>> drivers/gpu/drm/i915/i915_drm_client.c | 85 ++ >>> 1 file changed, 85 insertions(+) >>> >>> diff --git a/drivers/gpu/drm/i915/i915_drm_client.c >>> b/drivers/gpu/drm/i915/i915_drm_client.c >>> index a61356012df8..9e7a6075ee25 100644 >>> --- a/drivers/gpu/drm/i915/i915_drm_client.c >>> +++ b/drivers/gpu/drm/i915/i915_drm_client.c >>> @@ -45,6 +45,89 @@ void __i915_drm_client_free(struct kref *kref) >>> } >>> #ifdef CONFIG_PROC_FS >>> +static void >>> +obj_meminfo(struct drm_i915_gem_object *obj, >>> + struct drm_memory_stats stats[INTEL_REGION_UNKNOWN]) >>> +{ >>> + struct intel_memory_region *mr; >>> + u64 sz = obj->base.size; >>> + enum intel_region_id id; >>> + unsigned int i; >>> + >>> + /* Attribute size and shared to all possible memory regions. */ >>> + for (i = 0; i < obj->mm.n_placements; i++) { >>> + mr = obj->mm.placements[i]; >>> + id = mr->id; >>> + >>> + if (obj->base.handle_count > 1) >>> + stats[id].shared += sz; >>> + else >>> + stats[id].private += sz; >>> + } >>> + >>> + /* Attribute other categories to only the current region. */ >>> + mr = obj->mm.region; >>> + if (mr) >>> + id = mr->id; >>> + else >>> + id = INTEL_REGION_SMEM; >>> + >>> + if (!obj->mm.n_placements) { >> >> I guess we do not expect to have n_placements set to public objects, is >> that right? > > I think they are the only ones which can have placements. It is via > I915_GEM_CREATE_EXT_MEMORY_REGIONS userspace is able to create them. > > My main conundrum in this patch is a few lines above, the loop which > adds shared and private. > > Question is, if an object can be either smem or lmem, how do we want to > report it? This patch adds the size for all possible regions and > resident and active only to the currently active. But perhaps that is > wrong. Maybe I should change it is only against the active region and > multiple regions are just ignored. Then if object is migrated do access > patterns or memory pressure, the total size would migrate too. > > I think I was trying to achieve something here (have more visibility on > what kind of backing store clients are allocating) which maybe does not > work to well with the current categories. > > Namely if userspace allocates say one 1MiB object with placement in > either smem or lmem, and it is currently resident in lmem, I wanted it > to show as: > > total-smem: 1 MiB > resident-smem: 0 > total-lmem: 1 MiB > resident-lmem: 1 MiB > > To constantly show how in theory client could be using memory from > either region. Maybe that is misleading and should instead be: > > total-smem: 0 > resident-smem: 0 > total-lmem: 1 MiB > resident-lmem: 1 MiB > > ? I think the current implementation will not match with the memregion info in query ioctl as well. While what you say is true I'm not sure if there can be a client who is tracking the allocation say for an obj who has 2 placements LMEM and SMEM, and might assume since I had made a reservation in SMEM it shall not fail when i try to migrate there later. Thanks, Aravind. > > And then if/when the same object gets migrated to smem it changes to > (lets assume it is also not resident any more but got swapped out): > > total-smem: 1 MiB > resident-smem: 0 > total-lmem: 0 > resident-lmem: 0 > > Regards, > > Tvrtko > >>> + if (obj->base.handle_count > 1) >>> + stats[id].shared += sz; &
Re: [Intel-gfx] [PATCH 1/5] drm/i915: Add ability for tracking buffer objects per client
On 27-07-2023 15:43, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin > > In order to show per client memory usage lets add some infrastructure > which enables tracking buffer objects owned by clients. > > We add a per client list protected by a new per client lock and to support > delayed destruction (post client exit) we make tracked objects hold > references to the owning client. > > Also, object memory region teardown is moved to the existing RCU free > callback to allow safe dereference from the fdinfo RCU read section. This is same as the earlier series, which I had reviewed but forgot to give r-b. sorry for the delay. Reviewed-by: Aravind Iddamsetty Thanks, Aravind. > > Signed-off-by: Tvrtko Ursulin > --- > drivers/gpu/drm/i915/gem/i915_gem_object.c| 13 +-- > .../gpu/drm/i915/gem/i915_gem_object_types.h | 12 +++ > drivers/gpu/drm/i915/i915_drm_client.c| 36 +++ > drivers/gpu/drm/i915/i915_drm_client.h| 32 + > 4 files changed, 90 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c > b/drivers/gpu/drm/i915/gem/i915_gem_object.c > index 97ac6fb37958..3dc4fbb67d2b 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c > @@ -105,6 +105,10 @@ void i915_gem_object_init(struct drm_i915_gem_object > *obj, > > INIT_LIST_HEAD(&obj->mm.link); > > +#ifdef CONFIG_PROC_FS > + INIT_LIST_HEAD(&obj->client_link); > +#endif > + > INIT_LIST_HEAD(&obj->lut_list); > spin_lock_init(&obj->lut_lock); > > @@ -292,6 +296,10 @@ void __i915_gem_free_object_rcu(struct rcu_head *head) > container_of(head, typeof(*obj), rcu); > struct drm_i915_private *i915 = to_i915(obj->base.dev); > > + /* We need to keep this alive for RCU read access from fdinfo. */ > + if (obj->mm.n_placements > 1) > + kfree(obj->mm.placements); > + > i915_gem_object_free(obj); > > GEM_BUG_ON(!atomic_read(&i915->mm.free_count)); > @@ -388,9 +396,6 @@ void __i915_gem_free_object(struct drm_i915_gem_object > *obj) > if (obj->ops->release) > obj->ops->release(obj); > > - if (obj->mm.n_placements > 1) > - kfree(obj->mm.placements); > - > if (obj->shares_resv_from) > i915_vm_resv_put(obj->shares_resv_from); > > @@ -441,6 +446,8 @@ static void i915_gem_free_object(struct drm_gem_object > *gem_obj) > > GEM_BUG_ON(i915_gem_object_is_framebuffer(obj)); > > + i915_drm_client_remove_object(obj); > + > /* >* Before we free the object, make sure any pure RCU-only >* read-side critical sections are complete, e.g. > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h > b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h > index e72c57716bee..8de2b91b3edf 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h > @@ -300,6 +300,18 @@ struct drm_i915_gem_object { >*/ > struct i915_address_space *shares_resv_from; > > +#ifdef CONFIG_PROC_FS > + /** > + * @client: @i915_drm_client which created the object > + */ > + struct i915_drm_client *client; > + > + /** > + * @client_link: Link into @i915_drm_client.objects_list > + */ > + struct list_head client_link; > +#endif > + > union { > struct rcu_head rcu; > struct llist_node freed; > diff --git a/drivers/gpu/drm/i915/i915_drm_client.c > b/drivers/gpu/drm/i915/i915_drm_client.c > index 2a44b3876cb5..2e5e69edc0f9 100644 > --- a/drivers/gpu/drm/i915/i915_drm_client.c > +++ b/drivers/gpu/drm/i915/i915_drm_client.c > @@ -28,6 +28,10 @@ struct i915_drm_client *i915_drm_client_alloc(void) > kref_init(&client->kref); > spin_lock_init(&client->ctx_lock); > INIT_LIST_HEAD(&client->ctx_list); > +#ifdef CONFIG_PROC_FS > + spin_lock_init(&client->objects_lock); > + INIT_LIST_HEAD(&client->objects_list); > +#endif > > return client; > } > @@ -108,4 +112,36 @@ void i915_drm_client_fdinfo(struct drm_printer *p, > struct drm_file *file) > for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++) > show_client_class(p, i915, file_priv->client, i); > } > + > +void i915_drm_client_add_object(struct i915_drm_client *client, > + struct drm_i915_gem_object *obj) > +{ > + unsigned long flags; > + > + GEM_WARN_ON(obj->client); > + GEM_WARN_ON(!list_empty(&obj->client_link)); > + > + spin_lock_irqsave(&client->objects_lock, flags); > + obj->client = i915_drm_client_get(client); > + list_add_tail_rcu(&obj->client_link, &client->objects_list); > + spin_unlock_irqrestore(&client->objects_lock, flags); > +} > + > +bool i915_drm_client_remove_object(struct drm_i915_gem_object *obj) > +{ > + struct i915_drm_client *client = fetch_and_zero(&obj->clien
Re: [PATCH 5/5] drm/i915: Implement fdinfo memory stats printing
On 27-07-2023 15:43, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin > > Use the newly added drm_print_memory_stats helper to show memory > utilisation of our objects in drm/driver specific fdinfo output. > > To collect the stats we walk the per memory regions object lists > and accumulate object size into the respective drm_memory_stats > categories. > > Objects with multiple possible placements are reported in multiple > regions for total and shared sizes, while other categories are > counted only for the currently active region. > > Signed-off-by: Tvrtko Ursulin > Cc: Aravind Iddamsetty > Cc: Rob Clark > --- > drivers/gpu/drm/i915/i915_drm_client.c | 85 ++ > 1 file changed, 85 insertions(+) > > diff --git a/drivers/gpu/drm/i915/i915_drm_client.c > b/drivers/gpu/drm/i915/i915_drm_client.c > index a61356012df8..9e7a6075ee25 100644 > --- a/drivers/gpu/drm/i915/i915_drm_client.c > +++ b/drivers/gpu/drm/i915/i915_drm_client.c > @@ -45,6 +45,89 @@ void __i915_drm_client_free(struct kref *kref) > } > > #ifdef CONFIG_PROC_FS > +static void > +obj_meminfo(struct drm_i915_gem_object *obj, > + struct drm_memory_stats stats[INTEL_REGION_UNKNOWN]) > +{ > + struct intel_memory_region *mr; > + u64 sz = obj->base.size; > + enum intel_region_id id; > + unsigned int i; > + > + /* Attribute size and shared to all possible memory regions. */ > + for (i = 0; i < obj->mm.n_placements; i++) { > + mr = obj->mm.placements[i]; > + id = mr->id; > + > + if (obj->base.handle_count > 1) > + stats[id].shared += sz; > + else > + stats[id].private += sz; > + } > + > + /* Attribute other categories to only the current region. */ > + mr = obj->mm.region; > + if (mr) > + id = mr->id; > + else > + id = INTEL_REGION_SMEM; > + > + if (!obj->mm.n_placements) { I guess we do not expect to have n_placements set to public objects, is that right? Thanks, Aravind. > + if (obj->base.handle_count > 1) > + stats[id].shared += sz; > + else > + stats[id].private += sz; > + } > + > + if (i915_gem_object_has_pages(obj)) { > + stats[id].resident += sz; > + > + if (!dma_resv_test_signaled(obj->base.resv, > + dma_resv_usage_rw(true))) > + stats[id].active += sz; > + else if (i915_gem_object_is_shrinkable(obj) && > + obj->mm.madv == I915_MADV_DONTNEED) > + stats[id].purgeable += sz; > + } > +} > + > +static void show_meminfo(struct drm_printer *p, struct drm_file *file) > +{ > + struct drm_memory_stats stats[INTEL_REGION_UNKNOWN] = {}; > + struct drm_i915_file_private *fpriv = file->driver_priv; > + struct i915_drm_client *client = fpriv->client; > + struct drm_i915_private *i915 = fpriv->i915; > + struct drm_i915_gem_object *obj; > + struct intel_memory_region *mr; > + struct list_head *pos; > + unsigned int id; > + > + /* Public objects. */ > + spin_lock(&file->table_lock); > + idr_for_each_entry(&file->object_idr, obj, id) > + obj_meminfo(obj, stats); > + spin_unlock(&file->table_lock); > + > + /* Internal objects. */ > + rcu_read_lock(); > + list_for_each_rcu(pos, &client->objects_list) { > + obj = i915_gem_object_get_rcu(list_entry(pos, typeof(*obj), > + client_link)); > + if (!obj) > + continue; > + obj_meminfo(obj, stats); > + i915_gem_object_put(obj); > + } > + rcu_read_unlock(); > + > + for_each_memory_region(mr, i915, id) > + drm_print_memory_stats(p, > +&stats[id], > +DRM_GEM_OBJECT_RESIDENT | > +DRM_GEM_OBJECT_PURGEABLE, > +mr->name); > +} > + > static const char * const uabi_class_names[] = { > [I915_ENGINE_CLASS_RENDER] = "render", > [I915_ENGINE_CLASS_COPY] = "copy", > @@ -106,6 +189,8 @@ void i915_drm_client_fdinfo(struct drm_printer *p, struct > drm_file *file) >* ** >*/ > > + show_meminfo(p, file); > + > if (GRAPHICS_VER(i915) < 8) > return; >
Re: [Intel-gfx] [PATCH 4/5] drm/i915: Account ring buffer and context state storage
On 07-07-2023 18:32, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin > > Account ring buffers and logical context space against the owning client > memory usage stats. > > Signed-off-by: Tvrtko Ursulin > --- > drivers/gpu/drm/i915/gt/intel_context.c | 13 + > drivers/gpu/drm/i915/i915_drm_client.c | 10 ++ > drivers/gpu/drm/i915/i915_drm_client.h | 8 > 3 files changed, 31 insertions(+) > > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c > b/drivers/gpu/drm/i915/gt/intel_context.c > index a53b26178f0a..8a395b9201e9 100644 > --- a/drivers/gpu/drm/i915/gt/intel_context.c > +++ b/drivers/gpu/drm/i915/gt/intel_context.c > @@ -6,6 +6,7 @@ > #include "gem/i915_gem_context.h" > #include "gem/i915_gem_pm.h" > > +#include "i915_drm_client.h" > #include "i915_drv.h" > #include "i915_trace.h" > > @@ -50,6 +51,7 @@ intel_context_create(struct intel_engine_cs *engine) > > int intel_context_alloc_state(struct intel_context *ce) > { > + struct i915_gem_context *ctx; > int err = 0; > > if (mutex_lock_interruptible(&ce->pin_mutex)) > @@ -66,6 +68,17 @@ int intel_context_alloc_state(struct intel_context *ce) > goto unlock; > > set_bit(CONTEXT_ALLOC_BIT, &ce->flags); > + > + rcu_read_lock(); > + ctx = rcu_dereference(ce->gem_context); > + if (ctx && !kref_get_unless_zero(&ctx->ref)) > + ctx = NULL; > + rcu_read_unlock(); > + if (ctx) { > + if (ctx->client) > + i915_drm_client_add_context(ctx->client, ce); > + i915_gem_context_put(ctx); > + } > } > > unlock: > diff --git a/drivers/gpu/drm/i915/i915_drm_client.c > b/drivers/gpu/drm/i915/i915_drm_client.c > index 2e5e69edc0f9..ffccb6239789 100644 > --- a/drivers/gpu/drm/i915/i915_drm_client.c > +++ b/drivers/gpu/drm/i915/i915_drm_client.c > @@ -144,4 +144,14 @@ bool i915_drm_client_remove_object(struct > drm_i915_gem_object *obj) > > return true; > } > + > +void i915_drm_client_add_context(struct i915_drm_client *client, > + struct intel_context *ce) do you think we can rename to i915_drm_client_add_context_objects? > +{ > + if (ce->state) > + i915_drm_client_add_object(client, ce->state->obj); > + > + if (ce->ring != ce->engine->legacy.ring && ce->ring->vma) > + i915_drm_client_add_object(client, ce->ring->vma->obj); > +} > #endif > diff --git a/drivers/gpu/drm/i915/i915_drm_client.h > b/drivers/gpu/drm/i915/i915_drm_client.h > index 5f58fdf7dcb8..39616b10a51f 100644 > --- a/drivers/gpu/drm/i915/i915_drm_client.h > +++ b/drivers/gpu/drm/i915/i915_drm_client.h > @@ -14,6 +14,7 @@ > > #include "i915_file_private.h" > #include "gem/i915_gem_object_types.h" > +#include "gt/intel_context_types.h" > > #define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE > > @@ -70,6 +71,8 @@ void i915_drm_client_fdinfo(struct drm_printer *p, struct > drm_file *file); > void i915_drm_client_add_object(struct i915_drm_client *client, > struct drm_i915_gem_object *obj); > bool i915_drm_client_remove_object(struct drm_i915_gem_object *obj); > +void i915_drm_client_add_context(struct i915_drm_client *client, > + struct intel_context *ce); > #else > static inline void i915_drm_client_add_object(struct i915_drm_client *client, > struct drm_i915_gem_object *obj) > @@ -79,6 +82,11 @@ static inline void i915_drm_client_add_object(struct > i915_drm_client *client, > static inline bool i915_drm_client_remove_object(struct drm_i915_gem_object > *obj) > { > } > + > +static inline void i915_drm_client_add_context(struct i915_drm_client > *client, > +struct intel_context *ce) > +{ > +} > #endif > > #endif /* !__I915_DRM_CLIENT_H__ */ Reviewed-by: Aravind Iddamsetty Thanks, Aravind.
Re: [Intel-gfx] [PATCH 3/5] drm/i915: Track page table backing store usage
On 07-07-2023 18:32, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin > > Account page table backing store against the owning client memory usage > stats. > > Signed-off-by: Tvrtko Ursulin > --- > drivers/gpu/drm/i915/gt/intel_gtt.c | 6 ++ > 1 file changed, 6 insertions(+) > > diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c > b/drivers/gpu/drm/i915/gt/intel_gtt.c > index 2f6a9be0ffe6..126269a0d728 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gtt.c > +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c > @@ -58,6 +58,9 @@ struct drm_i915_gem_object *alloc_pt_lmem(struct > i915_address_space *vm, int sz) > if (!IS_ERR(obj)) { > obj->base.resv = i915_vm_resv_get(vm); > obj->shares_resv_from = vm; > + > + if (vm->fpriv) > + i915_drm_client_add_object(vm->fpriv->client, obj); > } > > return obj; > @@ -79,6 +82,9 @@ struct drm_i915_gem_object *alloc_pt_dma(struct > i915_address_space *vm, int sz) > if (!IS_ERR(obj)) { > obj->base.resv = i915_vm_resv_get(vm); > obj->shares_resv_from = vm; > + > + if (vm->fpriv) > + i915_drm_client_add_object(vm->fpriv->client, obj); > } > > return obj; Reviewed-by: Aravind Iddamsetty Thanks, Aravind.
Re: [PATCH 2/5] drm/i915: Record which client owns a VM
On 07-07-2023 18:32, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin > > To enable accounting of indirect client memory usage (such as page tables) > in the following patch, lets start recording the creator of each PPGTT. > > Signed-off-by: Tvrtko Ursulin > --- > drivers/gpu/drm/i915/gem/i915_gem_context.c | 11 --- > drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 3 +++ > drivers/gpu/drm/i915/gem/selftests/mock_context.c | 4 ++-- > drivers/gpu/drm/i915/gt/intel_gtt.h | 1 + > 4 files changed, 14 insertions(+), 5 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c > b/drivers/gpu/drm/i915/gem/i915_gem_context.c > index 9a9ff84c90d7..35cf6608180e 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c > @@ -279,7 +279,8 @@ static int proto_context_set_protected(struct > drm_i915_private *i915, > } > > static struct i915_gem_proto_context * > -proto_context_create(struct drm_i915_private *i915, unsigned int flags) > +proto_context_create(struct drm_i915_file_private *fpriv, > + struct drm_i915_private *i915, unsigned int flags) > { > struct i915_gem_proto_context *pc, *err; > > @@ -287,6 +288,7 @@ proto_context_create(struct drm_i915_private *i915, > unsigned int flags) > if (!pc) > return ERR_PTR(-ENOMEM); > > + pc->fpriv = fpriv; > pc->num_user_engines = -1; > pc->user_engines = NULL; > pc->user_flags = BIT(UCONTEXT_BANNABLE) | > @@ -1621,6 +1623,7 @@ i915_gem_create_context(struct drm_i915_private *i915, > err = PTR_ERR(ppgtt); > goto err_ctx; > } > + ppgtt->vm.fpriv = pc->fpriv; > vm = &ppgtt->vm; > } > if (vm) > @@ -1740,7 +1743,7 @@ int i915_gem_context_open(struct drm_i915_private *i915, > /* 0 reserved for invalid/unassigned ppgtt */ > xa_init_flags(&file_priv->vm_xa, XA_FLAGS_ALLOC1); > > - pc = proto_context_create(i915, 0); > + pc = proto_context_create(file_priv, i915, 0); > if (IS_ERR(pc)) { > err = PTR_ERR(pc); > goto err; > @@ -1822,6 +1825,7 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, > void *data, > > GEM_BUG_ON(id == 0); /* reserved for invalid/unassigned ppgtt */ > args->vm_id = id; > + ppgtt->vm.fpriv = file_priv; > return 0; > > err_put: > @@ -2284,7 +2288,8 @@ int i915_gem_context_create_ioctl(struct drm_device > *dev, void *data, > return -EIO; > } > > - ext_data.pc = proto_context_create(i915, args->flags); > + ext_data.pc = proto_context_create(file->driver_priv, i915, > +args->flags); > if (IS_ERR(ext_data.pc)) > return PTR_ERR(ext_data.pc); > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h > b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h > index cb78214a7dcd..c573c067779f 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h > @@ -188,6 +188,9 @@ struct i915_gem_proto_engine { > * CONTEXT_CREATE_SET_PARAM during GEM_CONTEXT_CREATE. > */ > struct i915_gem_proto_context { > + /** @fpriv: Client which creates the context */ > + struct drm_i915_file_private *fpriv; > + > /** @vm: See &i915_gem_context.vm */ > struct i915_address_space *vm; > > diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c > b/drivers/gpu/drm/i915/gem/selftests/mock_context.c > index 8ac6726ec16b..125584ada282 100644 > --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c > +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c > @@ -83,7 +83,7 @@ live_context(struct drm_i915_private *i915, struct file > *file) > int err; > u32 id; > > - pc = proto_context_create(i915, 0); > + pc = proto_context_create(fpriv, i915, 0); > if (IS_ERR(pc)) > return ERR_CAST(pc); > > @@ -152,7 +152,7 @@ kernel_context(struct drm_i915_private *i915, > struct i915_gem_context *ctx; > struct i915_gem_proto_context *pc; > > - pc = proto_context_create(i915, 0); > + pc = proto_context_create(NULL, i915, 0); > if (IS_ERR(pc)) > return ERR_CAST(pc); > > diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h > b/drivers/gpu/drm/i915/gt/intel_gtt.h > index 4d6296cdbcfd..7192a534a654 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gtt.h > +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h > @@ -248,6 +248,7 @@ struct i915_address_space { > struct drm_mm mm; > struct intel_gt *gt; > struct drm_i915_private *i915; > + struct drm_i915_file_private *fpriv; > struct device *dma; > u64 total; /* size addr space maps (ex. 2GB for ggtt) */ > u64 reserved; /* size addr space reserved */ Looks good to me. Reviewed-by
Re: [Intel-gfx] [PATCH 1/5] drm/i915: Add ability for tracking buffer objects per client
On 10-07-2023 18:50, Tvrtko Ursulin wrote: > > On 10/07/2023 11:44, Iddamsetty, Aravind wrote: >> On 07-07-2023 18:32, Tvrtko Ursulin wrote: >>> From: Tvrtko Ursulin >>> >>> In order to show per client memory usage lets add some infrastructure >>> which enables tracking buffer objects owned by clients. >>> >>> We add a per client list protected by a new per client lock and to >>> support >>> delayed destruction (post client exit) we make tracked objects hold >>> references to the owning client. >>> >>> Also, object memory region teardown is moved to the existing RCU free >>> callback to allow safe dereference from the fdinfo RCU read section. >>> >>> Signed-off-by: Tvrtko Ursulin >>> --- >>> drivers/gpu/drm/i915/gem/i915_gem_object.c | 13 +-- >>> .../gpu/drm/i915/gem/i915_gem_object_types.h | 12 +++ >>> drivers/gpu/drm/i915/i915_drm_client.c | 36 +++ >>> drivers/gpu/drm/i915/i915_drm_client.h | 32 + >>> 4 files changed, 90 insertions(+), 3 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c >>> b/drivers/gpu/drm/i915/gem/i915_gem_object.c >>> index 97ac6fb37958..3dc4fbb67d2b 100644 >>> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c >>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c >>> @@ -105,6 +105,10 @@ void i915_gem_object_init(struct >>> drm_i915_gem_object *obj, >>> INIT_LIST_HEAD(&obj->mm.link); >>> +#ifdef CONFIG_PROC_FS >>> + INIT_LIST_HEAD(&obj->client_link); >>> +#endif >>> + >>> INIT_LIST_HEAD(&obj->lut_list); >>> spin_lock_init(&obj->lut_lock); >>> @@ -292,6 +296,10 @@ void __i915_gem_free_object_rcu(struct >>> rcu_head *head) >>> container_of(head, typeof(*obj), rcu); >>> struct drm_i915_private *i915 = to_i915(obj->base.dev); >>> + /* We need to keep this alive for RCU read access from fdinfo. */ >>> + if (obj->mm.n_placements > 1) >>> + kfree(obj->mm.placements); >>> + >>> i915_gem_object_free(obj); >>> GEM_BUG_ON(!atomic_read(&i915->mm.free_count)); >>> @@ -388,9 +396,6 @@ void __i915_gem_free_object(struct >>> drm_i915_gem_object *obj) >>> if (obj->ops->release) >>> obj->ops->release(obj); >>> - if (obj->mm.n_placements > 1) >>> - kfree(obj->mm.placements); >>> - >>> if (obj->shares_resv_from) >>> i915_vm_resv_put(obj->shares_resv_from); >>> @@ -441,6 +446,8 @@ static void i915_gem_free_object(struct >>> drm_gem_object *gem_obj) >>> GEM_BUG_ON(i915_gem_object_is_framebuffer(obj)); >>> + i915_drm_client_remove_object(obj); >>> + >>> /* >>> * Before we free the object, make sure any pure RCU-only >>> * read-side critical sections are complete, e.g. >>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h >>> b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h >>> index e72c57716bee..8de2b91b3edf 100644 >>> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h >>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h >>> @@ -300,6 +300,18 @@ struct drm_i915_gem_object { >>> */ >>> struct i915_address_space *shares_resv_from; >>> +#ifdef CONFIG_PROC_FS >>> + /** >>> + * @client: @i915_drm_client which created the object >>> + */ >>> + struct i915_drm_client *client; >>> + >>> + /** >>> + * @client_link: Link into @i915_drm_client.objects_list >>> + */ >>> + struct list_head client_link; >>> +#endif >>> + >>> union { >>> struct rcu_head rcu; >>> struct llist_node freed; >>> diff --git a/drivers/gpu/drm/i915/i915_drm_client.c >>> b/drivers/gpu/drm/i915/i915_drm_client.c >>> index 2a44b3876cb5..2e5e69edc0f9 100644 >>> --- a/drivers/gpu/drm/i915/i915_drm_client.c >>> +++ b/drivers/gpu/drm/i915/i915_drm_client.c >>> @@ -28,6 +28,10 @@ struct i915_drm_client *i915_drm_client_alloc(void) >>> kref_init(&client->kref); >>> spin_lock_init(&client->ctx_lock); >>> INIT_LIST_HEAD(&client->ctx_list); >>> +#ifdef
Re: [Intel-gfx] [PATCH 1/5] drm/i915: Add ability for tracking buffer objects per client
On 07-07-2023 18:32, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin > > In order to show per client memory usage lets add some infrastructure > which enables tracking buffer objects owned by clients. > > We add a per client list protected by a new per client lock and to support > delayed destruction (post client exit) we make tracked objects hold > references to the owning client. > > Also, object memory region teardown is moved to the existing RCU free > callback to allow safe dereference from the fdinfo RCU read section. > > Signed-off-by: Tvrtko Ursulin > --- > drivers/gpu/drm/i915/gem/i915_gem_object.c| 13 +-- > .../gpu/drm/i915/gem/i915_gem_object_types.h | 12 +++ > drivers/gpu/drm/i915/i915_drm_client.c| 36 +++ > drivers/gpu/drm/i915/i915_drm_client.h| 32 + > 4 files changed, 90 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c > b/drivers/gpu/drm/i915/gem/i915_gem_object.c > index 97ac6fb37958..3dc4fbb67d2b 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c > @@ -105,6 +105,10 @@ void i915_gem_object_init(struct drm_i915_gem_object > *obj, > > INIT_LIST_HEAD(&obj->mm.link); > > +#ifdef CONFIG_PROC_FS > + INIT_LIST_HEAD(&obj->client_link); > +#endif > + > INIT_LIST_HEAD(&obj->lut_list); > spin_lock_init(&obj->lut_lock); > > @@ -292,6 +296,10 @@ void __i915_gem_free_object_rcu(struct rcu_head *head) > container_of(head, typeof(*obj), rcu); > struct drm_i915_private *i915 = to_i915(obj->base.dev); > > + /* We need to keep this alive for RCU read access from fdinfo. */ > + if (obj->mm.n_placements > 1) > + kfree(obj->mm.placements); > + > i915_gem_object_free(obj); > > GEM_BUG_ON(!atomic_read(&i915->mm.free_count)); > @@ -388,9 +396,6 @@ void __i915_gem_free_object(struct drm_i915_gem_object > *obj) > if (obj->ops->release) > obj->ops->release(obj); > > - if (obj->mm.n_placements > 1) > - kfree(obj->mm.placements); > - > if (obj->shares_resv_from) > i915_vm_resv_put(obj->shares_resv_from); > > @@ -441,6 +446,8 @@ static void i915_gem_free_object(struct drm_gem_object > *gem_obj) > > GEM_BUG_ON(i915_gem_object_is_framebuffer(obj)); > > + i915_drm_client_remove_object(obj); > + > /* >* Before we free the object, make sure any pure RCU-only >* read-side critical sections are complete, e.g. > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h > b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h > index e72c57716bee..8de2b91b3edf 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h > @@ -300,6 +300,18 @@ struct drm_i915_gem_object { >*/ > struct i915_address_space *shares_resv_from; > > +#ifdef CONFIG_PROC_FS > + /** > + * @client: @i915_drm_client which created the object > + */ > + struct i915_drm_client *client; > + > + /** > + * @client_link: Link into @i915_drm_client.objects_list > + */ > + struct list_head client_link; > +#endif > + > union { > struct rcu_head rcu; > struct llist_node freed; > diff --git a/drivers/gpu/drm/i915/i915_drm_client.c > b/drivers/gpu/drm/i915/i915_drm_client.c > index 2a44b3876cb5..2e5e69edc0f9 100644 > --- a/drivers/gpu/drm/i915/i915_drm_client.c > +++ b/drivers/gpu/drm/i915/i915_drm_client.c > @@ -28,6 +28,10 @@ struct i915_drm_client *i915_drm_client_alloc(void) > kref_init(&client->kref); > spin_lock_init(&client->ctx_lock); > INIT_LIST_HEAD(&client->ctx_list); > +#ifdef CONFIG_PROC_FS > + spin_lock_init(&client->objects_lock); > + INIT_LIST_HEAD(&client->objects_list); > +#endif > > return client; > } > @@ -108,4 +112,36 @@ void i915_drm_client_fdinfo(struct drm_printer *p, > struct drm_file *file) > for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++) > show_client_class(p, i915, file_priv->client, i); > } > + > +void i915_drm_client_add_object(struct i915_drm_client *client, > + struct drm_i915_gem_object *obj) > +{ > + unsigned long flags; > + > + GEM_WARN_ON(obj->client); > + GEM_WARN_ON(!list_empty(&obj->client_link)); > + > + spin_lock_irqsave(&client->objects_lock, flags); > + obj->client = i915_drm_client_get(client); > + list_add_tail_rcu(&obj->client_link, &client->objects_list); > + spin_unlock_irqrestore(&client->objects_lock, flags); > +} would it be nice to mention that we use this client infra only to track internal objects. While the user created through file->object_idr added during handle creation time. Thanks, Aravind. > + > +bool i915_drm_client_remove_object(struct drm_i915_gem_object *obj) > +{ > + struct i915_drm_client
Re: [Intel-xe] [RFC 1/5] drm/netlink: Add netlink infrastructure
On 06-06-2023 19:34, Tomer Tayar wrote: > On 05/06/2023 20:18, Iddamsetty, Aravind wrote: >> >> On 04-06-2023 22:37, Tomer Tayar wrote: >>> On 26/05/2023 19:20, Aravind Iddamsetty wrote: >>>> Define the netlink commands and attributes that can be commonly used >>>> across by drm drivers. >>>> >>>> Signed-off-by: Aravind Iddamsetty >>>> --- >>>>include/uapi/drm/drm_netlink.h | 68 ++ >>>>1 file changed, 68 insertions(+) >>>>create mode 100644 include/uapi/drm/drm_netlink.h >>>> >>>> diff --git a/include/uapi/drm/drm_netlink.h >>>> b/include/uapi/drm/drm_netlink.h >>>> new file mode 100644 >>>> index ..28e7a334d0c7 >>>> --- /dev/null >>>> +++ b/include/uapi/drm/drm_netlink.h >>>> @@ -0,0 +1,68 @@ >>>> +/* >>>> + * Copyright 2023 Intel Corporation >>>> + * >>>> + * Permission is hereby granted, free of charge, to any person obtaining a >>>> + * copy of this software and associated documentation files (the >>>> "Software"), >>>> + * to deal in the Software without restriction, including without >>>> limitation >>>> + * the rights to use, copy, modify, merge, publish, distribute, >>>> sublicense, >>>> + * and/or sell copies of the Software, and to permit persons to whom the >>>> + * Software is furnished to do so, subject to the following conditions: >>>> + * >>>> + * The above copyright notice and this permission notice (including the >>>> next >>>> + * paragraph) shall be included in all copies or substantial portions of >>>> the >>>> + * Software. >>>> + * >>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, >>>> EXPRESS OR >>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF >>>> MERCHANTABILITY, >>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT >>>> SHALL >>>> + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES >>>> OR >>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >>>> + * OTHER DEALINGS IN THE SOFTWARE. >>>> + */ >>>> + >>>> +#ifndef _DRM_NETLINK_H_ >>>> +#define _DRM_NETLINK_H_ >>>> + >>>> +#include >>>> +#include >>>> +#include >>> This is a uapi header. >>> Are all header files here available for user? >> no they are not, I later came to know that we should not have any of >> that user can't use so will split the header into 2. >>> Also, should we add here "#if defined(__cplusplus) extern "C" { ..."? >> ya will add that >>>> + >>>> +#define DRM_GENL_VERSION 1 >>>> + >>>> +enum error_cmds { >>>> + DRM_CMD_UNSPEC, >>>> + /* command to list all errors names with config-id */ >>>> + DRM_CMD_QUERY, >>>> + /* command to get a counter for a specific error */ >>>> + DRM_CMD_READ_ONE, >>>> + /* command to get counters of all errors */ >>>> + DRM_CMD_READ_ALL, >>>> + >>>> + __DRM_CMD_MAX, >>>> + DRM_CMD_MAX = __DRM_CMD_MAX - 1, >>>> +}; >>>> + >>>> +enum error_attr { >>>> + DRM_ATTR_UNSPEC, >>>> + DRM_ATTR_PAD = DRM_ATTR_UNSPEC, >>>> + DRM_ATTR_REQUEST, /* NLA_U8 */ >>>> + DRM_ATTR_QUERY_REPLY, /*NLA_NESTED*/ >>> Missing spaces in /*NLA_NESTED*/ >>> >>>> + DRM_ATTR_ERROR_NAME, /* NLA_NUL_STRING */ >>>> + DRM_ATTR_ERROR_ID, /* NLA_U64 */ >>>> + DRM_ATTR_ERROR_VALUE, /* NLA_U64 */ >>>> + >>>> + __DRM_ATTR_MAX, >>>> + DRM_ATTR_MAX = __DRM_ATTR_MAX - 1, >>>> +}; >>>> + >>>> +/* attribute policies */ >>>> +static const struct nla_policy drm_attr_policy_query[DRM_ATTR_MAX + 1] = { >>>> + [DRM_ATTR_REQUEST] = { .type = NLA_U8 }, >>>> +}; >>> Should these policies structures be in uapi? >> so ya these will also likely move into a separate drm header as >> userspace would define there own policy. >>>> + >>>> +static const struct nla_policy dr
Re: [PATCH 6/8] drm: Add drm_gem_prime_fd_to_handle_obj
On 09-06-2023 17:41, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin > > I need a new flavour of the drm_gem_prime_fd_to_handle helper, one which > will return a reference to a newly created GEM objects (if created), in > order to enable tracking of imported i915 GEM objects in the following > patch. instead of this what if we implement the open call back in i915 struct drm_gem_object_funcs { /** * @open: * * Called upon GEM handle creation. * * This callback is optional. */ int (*open)(struct drm_gem_object *obj, struct drm_file *file); which gets called whenever a handle(drm_gem_handle_create_tail) is created and in the open we can check if to_intel_bo(obj)->base.dma_buf then it is imported if not it is owned or created by it. Thanks, Aravind. > > Minor code reshuffule and only trivial additions on top of > drm_gem_prime_fd_to_handle. > > Signed-off-by: Tvrtko Ursulin > --- > drivers/gpu/drm/drm_prime.c | 41 - > include/drm/drm_prime.h | 4 > 2 files changed, 40 insertions(+), 5 deletions(-) > > diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c > index d29dafce9bb0..ef75f67e3057 100644 > --- a/drivers/gpu/drm/drm_prime.c > +++ b/drivers/gpu/drm/drm_prime.c > @@ -284,11 +284,12 @@ void drm_gem_dmabuf_release(struct dma_buf *dma_buf) > EXPORT_SYMBOL(drm_gem_dmabuf_release); > > /** > - * drm_gem_prime_fd_to_handle - PRIME import function for GEM drivers > + * drm_gem_prime_fd_to_handle_obj - PRIME import function for GEM drivers > * @dev: drm_device to import into > * @file_priv: drm file-private structure > * @prime_fd: fd id of the dma-buf which should be imported > * @handle: pointer to storage for the handle of the imported buffer object > + * @objp: optional pointer in which reference to created GEM object can be > returned > * > * This is the PRIME import function which must be used mandatorily by GEM > * drivers to ensure correct lifetime management of the underlying GEM > object. > @@ -297,9 +298,10 @@ EXPORT_SYMBOL(drm_gem_dmabuf_release); > * > * Returns 0 on success or a negative error code on failure. > */ > -int drm_gem_prime_fd_to_handle(struct drm_device *dev, > -struct drm_file *file_priv, int prime_fd, > -uint32_t *handle) > +int drm_gem_prime_fd_to_handle_obj(struct drm_device *dev, > +struct drm_file *file_priv, int prime_fd, > +uint32_t *handle, > +struct drm_gem_object **objp) > { > struct dma_buf *dma_buf; > struct drm_gem_object *obj; > @@ -336,7 +338,8 @@ int drm_gem_prime_fd_to_handle(struct drm_device *dev, > > /* _handle_create_tail unconditionally unlocks dev->object_name_lock. */ > ret = drm_gem_handle_create_tail(file_priv, obj, handle); > - drm_gem_object_put(obj); > + if (!objp) > + drm_gem_object_put(obj); > if (ret) > goto out_put; > > @@ -348,6 +351,9 @@ int drm_gem_prime_fd_to_handle(struct drm_device *dev, > > dma_buf_put(dma_buf); > > + if (objp) > + *objp = obj; > + > return 0; > > fail: > @@ -356,6 +362,8 @@ int drm_gem_prime_fd_to_handle(struct drm_device *dev, >*/ > drm_gem_handle_delete(file_priv, *handle); > dma_buf_put(dma_buf); > + if (objp) > + drm_gem_object_put(obj); > return ret; > > out_unlock: > @@ -365,6 +373,29 @@ int drm_gem_prime_fd_to_handle(struct drm_device *dev, > dma_buf_put(dma_buf); > return ret; > } > +EXPORT_SYMBOL(drm_gem_prime_fd_to_handle_obj); > + > +/** > + * drm_gem_prime_fd_to_handle - PRIME import function for GEM drivers > + * @dev: drm_device to import into > + * @file_priv: drm file-private structure > + * @prime_fd: fd id of the dma-buf which should be imported > + * @handle: pointer to storage for the handle of the imported buffer object > + * > + * This is the PRIME import function which must be used mandatorily by GEM > + * drivers to ensure correct lifetime management of the underlying GEM > object. > + * The actual importing of GEM object from the dma-buf is done through the > + * &drm_driver.gem_prime_import driver callback. > + * > + * Returns 0 on success or a negative error code on failure. > + */ > +int drm_gem_prime_fd_to_handle(struct drm_device *dev, > +struct drm_file *file_priv, int prime_fd, > +uint32_t *handle) > +{ > + return drm_gem_prime_fd_to_handle_obj(dev, file_priv, prime_fd, handle, > + NULL); > +} > EXPORT_SYMBOL(drm_gem_prime_fd_to_handle); > > int drm_prime_fd_to_handle_ioctl(struct drm_device *dev, void *data, > diff --git a/include/drm/drm_prime.h b/include/drm/drm_prime.h > index 2a1d01e5b56b..10d145ce6586 100644
Re: [Intel-gfx] [PATCH 1/5] drm/i915: Track buffer objects belonging to clients
On 08-06-2023 20:21, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin > > In order to show per client memory usage lets start tracking which > objects belong to which clients. > > We start with objects explicitly created by object creation UAPI and > track it on a new per client lists, protected by a new per client lock. > In order for delayed destruction (post client exit), we make tracked > objects hold references to the owning client. > > Signed-off-by: Tvrtko Ursulin > --- > drivers/gpu/drm/i915/gem/i915_gem_create.c| 32 ++-- > drivers/gpu/drm/i915/gem/i915_gem_object.c| 6 +++ > .../gpu/drm/i915/gem/i915_gem_object_types.h | 12 ++ > drivers/gpu/drm/i915/i915_drm_client.c| 36 +- > drivers/gpu/drm/i915/i915_drm_client.h| 37 ++- > drivers/gpu/drm/i915/i915_gem.c | 2 +- > 6 files changed, 119 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c > b/drivers/gpu/drm/i915/gem/i915_gem_create.c > index d24c0ce8805c..4f1957638207 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c > @@ -11,6 +11,7 @@ > #include "gem/i915_gem_region.h" > #include "pxp/intel_pxp.h" > > +#include "i915_drm_client.h" > #include "i915_drv.h" > #include "i915_gem_create.h" > #include "i915_trace.h" > @@ -164,6 +165,14 @@ __i915_gem_object_create_user(struct drm_i915_private > *i915, u64 size, >n_placements, 0); > } > > +static void add_file_obj(struct drm_file *file, > + struct drm_i915_gem_object *obj) > +{ > + struct drm_i915_file_private *fpriv = file->driver_priv; > + > + i915_drm_client_add_object(fpriv->client, obj); > +} > + > int > i915_gem_dumb_create(struct drm_file *file, >struct drm_device *dev, > @@ -174,6 +183,7 @@ i915_gem_dumb_create(struct drm_file *file, > enum intel_memory_type mem_type; > int cpp = DIV_ROUND_UP(args->bpp, 8); > u32 format; > + int ret; > > switch (cpp) { > case 1: > @@ -212,7 +222,12 @@ i915_gem_dumb_create(struct drm_file *file, > if (IS_ERR(obj)) > return PTR_ERR(obj); > > - return i915_gem_publish(obj, file, &args->size, &args->handle); > + ret = i915_gem_publish(obj, file, &args->size, &args->handle); > + > + if (!ret) > + add_file_obj(file, obj); > + > + return ret; > } > > /** > @@ -229,6 +244,7 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, > struct drm_i915_gem_create *args = data; > struct drm_i915_gem_object *obj; > struct intel_memory_region *mr; > + int ret; > > mr = intel_memory_region_by_type(i915, INTEL_MEMORY_SYSTEM); > > @@ -236,7 +252,12 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, > if (IS_ERR(obj)) > return PTR_ERR(obj); Do we intend to track only client created objects and not imported ? or is that taken care by this "obj->base.handle_count > 1" Thanks, Aravind. > > - return i915_gem_publish(obj, file, &args->size, &args->handle); > + ret = i915_gem_publish(obj, file, &args->size, &args->handle); > + > + if (!ret) > + add_file_obj(file, obj); > + > + return ret; > } > > struct create_ext { > @@ -494,5 +515,10 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void > *data, > obj->pat_set_by_user = true; > } > > - return i915_gem_publish(obj, file, &args->size, &args->handle); > + ret = i915_gem_publish(obj, file, &args->size, &args->handle); > + > + if (!ret) > + add_file_obj(file, obj); > + > + return ret; > } > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c > b/drivers/gpu/drm/i915/gem/i915_gem_object.c > index 97ac6fb37958..46de9b1b3f1d 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c > @@ -105,6 +105,10 @@ void i915_gem_object_init(struct drm_i915_gem_object > *obj, > > INIT_LIST_HEAD(&obj->mm.link); > > +#ifdef CONFIG_PROC_FS > + INIT_LIST_HEAD(&obj->client_link); > +#endif > + > INIT_LIST_HEAD(&obj->lut_list); > spin_lock_init(&obj->lut_lock); > > @@ -441,6 +445,8 @@ static void i915_gem_free_object(struct drm_gem_object > *gem_obj) > > GEM_BUG_ON(i915_gem_object_is_framebuffer(obj)); > > + i915_drm_client_remove_object(obj); > + > /* >* Before we free the object, make sure any pure RCU-only >* read-side critical sections are complete, e.g. > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h > b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h > index e72c57716bee..8de2b91b3edf 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h > @@ -300,6 +300,18 @@ struct drm_i915_gem_object { >*/ >
Re: [RFC 0/5] Proposal to use netlink for RAS and Telemetry across drm subsystem
On 05-06-2023 22:17, Alex Deucher wrote: > Adding the relevant AMD folks for RAS. We currently expose RAS via > sysfs, but also have an event interface in KFD which may be somewhat > similar to this. > > If we were to converge on a common RAS interface, would we want to > look at any commonality in bad page storage/reporting for device > memory? Could you please elaborate a bit on this. Thanks, Aravind. > > Alex > > On Fri, May 26, 2023 at 12:21 PM Aravind Iddamsetty > wrote: >> >> Our hardware supports RAS(Reliability, Availability, Serviceability) by >> exposing a set of error counters which can be used by observability >> tools to take corrective actions or repairs. Traditionally there were >> being exposed via PMU (for relative counters) and sysfs interface (for >> absolute value) in our internal branch. But, due to the limitations in >> this approach to use two interfaces and also not able to have an event >> based reporting or configurability, an alternative approach to try >> netlink was suggested by community for drm subsystem wide UAPI for RAS >> and telemetry as discussed in [1]. >> >> This [1] is the inspiration to this series. It uses the generic >> netlink(genl) family subsystem and exposes a set of commands that can >> be used by every drm driver, the framework provides a means to have >> custom commands too. Each drm driver instance in this example xe driver >> instance registers a family and operations to the genl subsystem through >> which it enumerates and reports the error counters. An event based >> notification is also supported to which userpace can subscribe to and >> be notified when any error occurs and read the error counter this avoids >> continuous polling on error counter. This can also be extended to >> threshold based notification. >> >> [1]: >> https://airlied.blogspot.com/2022/09/accelerators-bof-outcomes-summary.html >> >> this series is on top of https://patchwork.freedesktop.org/series/116181/ >> >> Below is an example tool drm_ras which demonstrates the use of the >> supported commands. The tool will be sent to ML with the subject >> "[RFC i-g-t 0/1] A tool to demonstrate use of netlink sockets to read RAS >> error counters" >> >> read single error counter: >> >> $ ./drm_ras READ_ONE --device=drm:/dev/dri/card1 >> --error_id=0x0005 >> counter value 0 >> >> read all error counters: >> >> $ ./drm_ras READ_ALL --device=drm:/dev/dri/card1 >> nameconfig-id >>counter >> >> error-gt0-correctable-guc 0x0001 >>0 >> error-gt0-correctable-slm 0x0003 >>0 >> error-gt0-correctable-eu-ic 0x0004 >>0 >> error-gt0-correctable-eu-grf0x0005 >>0 >> error-gt0-fatal-guc 0x0009 >>0 >> error-gt0-fatal-slm 0x000d >>0 >> error-gt0-fatal-eu-grf 0x000f >>0 >> error-gt0-fatal-fpu 0x0010 >>0 >> error-gt0-fatal-tlb 0x0011 >>0 >> error-gt0-fatal-l3-fabric 0x0012 >>0 >> error-gt0-correctable-subslice 0x0013 >>0 >> error-gt0-correctable-l3bank0x0014 >>0 >> error-gt0-fatal-subslice0x0015 >>0 >> error-gt0-fatal-l3bank 0x0016 >>0 >> error-gt0-sgunit-correctable0x0017 >>0 >> error-gt0-sgunit-nonfatal 0x0018 >>0 >> error-gt0-sgunit-fatal 0x0019 >>0 >> error-gt0-soc-fatal-psf-csc-0 0x001a >>0 >> error-gt0-soc-fatal-psf-csc-1 0x001b >>0 >> error-gt0-soc-fatal-psf-csc-2 0x001c >>0 >> error-gt0-soc-fatal-punit 0x001d >>0 >> error-gt0-soc-fatal-psf-0 0x001e >>0 >> error-gt0-soc-fatal-psf-1 0x001f >>0 >> error-gt0-soc-fatal-psf-2 0x0020 >>0 >> error-gt0-soc-fatal-cd0 0x0021 >>0 >> error-gt0-soc-fatal-cd0-mdfi0x0022 >>0 >> error-gt0-soc-fatal-mdfi-east 0x0023 >>0 >> error-gt0-soc-fatal-mdfi-south
Re: [Intel-xe] [RFC 2/5] drm/xe/RAS: Register a genl netlink family
On 04-06-2023 22:39, Tomer Tayar wrote: > On 26/05/2023 19:20, Aravind Iddamsetty wrote: >> Use the generic netlink(genl) subsystem to expose the RAS counters to >> userspace. We define a family structure and operations and register to >> genl subsystem and these callbacks will be invoked by genl subsystem when >> userspace sends a registered command with a family identifier to genl >> subsystem. >> >> Signed-off-by: Aravind Iddamsetty >> --- >> drivers/gpu/drm/xe/Makefile | 1 + >> drivers/gpu/drm/xe/xe_device.c | 3 + >> drivers/gpu/drm/xe/xe_device_types.h | 2 + >> drivers/gpu/drm/xe/xe_module.c | 2 + >> drivers/gpu/drm/xe/xe_netlink.c | 89 >> drivers/gpu/drm/xe/xe_netlink.h | 14 + >> 6 files changed, 111 insertions(+) >> create mode 100644 drivers/gpu/drm/xe/xe_netlink.c >> create mode 100644 drivers/gpu/drm/xe/xe_netlink.h >> >> diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile >> index b84e191ba14f..2b42165bc824 100644 >> --- a/drivers/gpu/drm/xe/Makefile >> +++ b/drivers/gpu/drm/xe/Makefile >> @@ -67,6 +67,7 @@ xe-y += xe_bb.o \ >> xe_mmio.o \ >> xe_mocs.o \ >> xe_module.o \ >> +xe_netlink.o \ >> xe_pat.o \ >> xe_pci.o \ >> xe_pcode.o \ >> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c >> index 323356a44e7f..aa12ef12d9dc 100644 >> --- a/drivers/gpu/drm/xe/xe_device.c >> +++ b/drivers/gpu/drm/xe/xe_device.c >> @@ -24,6 +24,7 @@ >> #include "xe_irq.h" >> #include "xe_mmio.h" >> #include "xe_module.h" >> +#include "xe_netlink.h" >> #include "xe_pcode.h" >> #include "xe_pm.h" >> #include "xe_query.h" >> @@ -317,6 +318,8 @@ int xe_device_probe(struct xe_device *xe) >> >> xe_display_register(xe); >> >> +xe_genl_register(xe); > > xe_genl_register() can fail That is right but I didn't want to fail the driver load as it would not impact any device functionality but doesn't provide observability. hence a warning would be printed "xe genl family registration failed". > >> + >> xe_debugfs_register(xe); >> >> err = drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe); >> diff --git a/drivers/gpu/drm/xe/xe_device_types.h >> b/drivers/gpu/drm/xe/xe_device_types.h >> index 682ebdd1c09e..c9612a54c48f 100644 >> --- a/drivers/gpu/drm/xe/xe_device_types.h >> +++ b/drivers/gpu/drm/xe/xe_device_types.h >> @@ -10,6 +10,7 @@ >> >> #include >> #include >> +#include >> #include >> >> #include "xe_gt_types.h" >> @@ -347,6 +348,7 @@ struct xe_device { >> u32 lvds_channel_mode; >> } params; >> #endif >> +struct genl_family xe_genl_family; > > Should it be added above, before the "private" section? > Maybe add a kernel-doc comment for it? thanks for pointing out will move it there. > >> }; >> >> /** >> diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c >> index 6860586ce7f8..1eb73eb9a9a5 100644 >> --- a/drivers/gpu/drm/xe/xe_module.c >> +++ b/drivers/gpu/drm/xe/xe_module.c >> @@ -11,6 +11,7 @@ >> #include "xe_drv.h" >> #include "xe_hw_fence.h" >> #include "xe_module.h" >> +#include "xe_netlink.h" >> #include "xe_pci.h" >> #include "xe_sched_job.h" >> >> @@ -67,6 +68,7 @@ static void __exit xe_exit(void) >> { >> int i; >> >> +xe_genl_cleanup(); >> xe_unregister_pci_driver(); >> >> for (i = ARRAY_SIZE(init_funcs) - 1; i >= 0; i--) >> diff --git a/drivers/gpu/drm/xe/xe_netlink.c >> b/drivers/gpu/drm/xe/xe_netlink.c >> new file mode 100644 >> index ..63ef238ebc27 >> --- /dev/null >> +++ b/drivers/gpu/drm/xe/xe_netlink.c >> @@ -0,0 +1,89 @@ >> +// SPDX-License-Identifier: MIT >> +/* >> + * Copyright © 2023 Intel Corporation >> + */ >> + >> +#include >> + >> +#include "xe_device.h" >> + >> +DEFINE_XARRAY(xe_xarray); > > xe_array sounds too generic. Maybe it should be more specific like > xe_genl_xarray? > In addition, it should be probably static. Ok. Thanks, Aravind. > > Thanks, > Tomer > >> + >> +static int xe_genl_list_errors(struct sk_buff *msg, struct genl_info *info) >> +{ >> +return 0; >> +} >> + >> +static int xe_genl_read_error(struct sk_buff *msg, struct genl_info *info) >> +{ >> +return 0; >> +} >> + >> +/* operations definition */ >> +static const struct genl_ops xe_genl_ops[] = { >> +{ >> +.cmd = DRM_CMD_QUERY, >> +.doit = xe_genl_list_errors, >> +.policy = drm_attr_policy_query, >> +}, >> +{ >> +.cmd = DRM_CMD_READ_ONE, >> +.doit = xe_genl_read_error, >> +.policy = drm_attr_policy_read_one, >> +}, >> +{ >> +.cmd = DRM_CMD_READ_ALL, >> +.doit = xe_genl_list_errors, >> +.policy = drm_attr_policy_query, >> +}, >> +}; >> + >> +static void xe_genl_deregister(struct drm_device *dev, void *arg) >> +{ >> +struct xe_device *xe = arg
Re: [Intel-xe] [RFC 1/5] drm/netlink: Add netlink infrastructure
On 04-06-2023 22:37, Tomer Tayar wrote: > On 26/05/2023 19:20, Aravind Iddamsetty wrote: >> Define the netlink commands and attributes that can be commonly used >> across by drm drivers. >> >> Signed-off-by: Aravind Iddamsetty >> --- >> include/uapi/drm/drm_netlink.h | 68 ++ >> 1 file changed, 68 insertions(+) >> create mode 100644 include/uapi/drm/drm_netlink.h >> >> diff --git a/include/uapi/drm/drm_netlink.h b/include/uapi/drm/drm_netlink.h >> new file mode 100644 >> index ..28e7a334d0c7 >> --- /dev/null >> +++ b/include/uapi/drm/drm_netlink.h >> @@ -0,0 +1,68 @@ >> +/* >> + * Copyright 2023 Intel Corporation >> + * >> + * Permission is hereby granted, free of charge, to any person obtaining a >> + * copy of this software and associated documentation files (the >> "Software"), >> + * to deal in the Software without restriction, including without limitation >> + * the rights to use, copy, modify, merge, publish, distribute, sublicense, >> + * and/or sell copies of the Software, and to permit persons to whom the >> + * Software is furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice (including the next >> + * paragraph) shall be included in all copies or substantial portions of the >> + * Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS >> OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >> + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR >> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >> + * OTHER DEALINGS IN THE SOFTWARE. >> + */ >> + >> +#ifndef _DRM_NETLINK_H_ >> +#define _DRM_NETLINK_H_ >> + >> +#include >> +#include >> +#include > > This is a uapi header. > Are all header files here available for user? no they are not, I later came to know that we should not have any of that user can't use so will split the header into 2. > Also, should we add here "#if defined(__cplusplus) extern "C" { ..."? ya will add that > >> + >> +#define DRM_GENL_VERSION 1 >> + >> +enum error_cmds { >> +DRM_CMD_UNSPEC, >> +/* command to list all errors names with config-id */ >> +DRM_CMD_QUERY, >> +/* command to get a counter for a specific error */ >> +DRM_CMD_READ_ONE, >> +/* command to get counters of all errors */ >> +DRM_CMD_READ_ALL, >> + >> +__DRM_CMD_MAX, >> +DRM_CMD_MAX = __DRM_CMD_MAX - 1, >> +}; >> + >> +enum error_attr { >> +DRM_ATTR_UNSPEC, >> +DRM_ATTR_PAD = DRM_ATTR_UNSPEC, >> +DRM_ATTR_REQUEST, /* NLA_U8 */ >> +DRM_ATTR_QUERY_REPLY, /*NLA_NESTED*/ > > Missing spaces in /*NLA_NESTED*/ > >> +DRM_ATTR_ERROR_NAME, /* NLA_NUL_STRING */ >> +DRM_ATTR_ERROR_ID, /* NLA_U64 */ >> +DRM_ATTR_ERROR_VALUE, /* NLA_U64 */ >> + >> +__DRM_ATTR_MAX, >> +DRM_ATTR_MAX = __DRM_ATTR_MAX - 1, >> +}; >> + >> +/* attribute policies */ >> +static const struct nla_policy drm_attr_policy_query[DRM_ATTR_MAX + 1] = { >> +[DRM_ATTR_REQUEST] = { .type = NLA_U8 }, >> +}; > > Should these policies structures be in uapi? so ya these will also likely move into a separate drm header as userspace would define there own policy. > >> + >> +static const struct nla_policy drm_attr_policy_read_one[DRM_ATTR_MAX + 1] = >> { >> +[DRM_ATTR_ERROR_ID] = { .type = NLA_U64 }, >> +}; > > I might miss something here, but why it is not a single policy structure > with entries for DRM_ATTR_REQUEST and DRM_ATTR_ERROR_ID? so each command can have it's own policy defined, i.e what attributes it expects we could define only those, that way we can have a check as well. So, in the present implementation DRM_CMD_QUERY and DRM_CMD_READ_ALL expect only DRM_ATTR_REQUEST and while DRM_CMD_READ_ONE expects only DRM_ATTR_ERROR_ID as part of the incoming message from user. Thanks, Aravind. > > Thanks, > Tomer > >> + >> +#endif > >
Re: [Intel-xe] [RFC 0/5] Proposal to use netlink for RAS and Telemetry across drm subsystem
On 04-06-2023 22:37, Tomer Tayar wrote: > On 26/05/2023 19:20, Aravind Iddamsetty wrote: >> Our hardware supports RAS(Reliability, Availability, Serviceability) by >> exposing a set of error counters which can be used by observability >> tools to take corrective actions or repairs. Traditionally there were >> being exposed via PMU (for relative counters) and sysfs interface (for >> absolute value) in our internal branch. But, due to the limitations in >> this approach to use two interfaces and also not able to have an event >> based reporting or configurability, an alternative approach to try >> netlink was suggested by community for drm subsystem wide UAPI for RAS >> and telemetry as discussed in [1]. >> >> This [1] is the inspiration to this series. It uses the generic >> netlink(genl) family subsystem and exposes a set of commands that can >> be used by every drm driver, the framework provides a means to have >> custom commands too. Each drm driver instance in this example xe driver >> instance registers a family and operations to the genl subsystem through >> which it enumerates and reports the error counters. An event based >> notification is also supported to which userpace can subscribe to and >> be notified when any error occurs and read the error counter this avoids >> continuous polling on error counter. This can also be extended to >> threshold based notification. > > Hi Aravind, Hi Tomer, Thanks a lot for your review. > > The habanalabs driver is another candidate to use this netlink-based drm > framework. > As a single-user device, we have an additional "control" device that > allows multiple applications to query for information and to monitor the > "compute" device. > And while we are about to move the compute device to the accel nodes, we > don't have a real replacement there for the control device. > > Another possible usage of this framework for habanalabs is the events > notification. > Currently we have an eventfd-based mechanism, and after being notified > about an event, user starts querying about the event and the relevant > info, usually in several requests. > With this framework we should be allegedly possible to gather all > relevant info together with the event itself. that is right with the multicast event we can pack data too. > > The current implementation seems intended more to errors (and quite > "tailored" to Xe needs ...), while in habanalabs we would need it also > for non-error static/dynamic info. > Maybe we should revise the existing commands/attributes to be more generic? correct, at present that is the usecase xe driver has and atleast for the error part I believe is generic if not we can make it, the framework is extensible. The idea I had was generic commands which every driver can use will be part of drm framework and if there are specific commands or attributes that shall be part of driver. But some thought is needed here as MAX attributes is needed by userspace and how to define attribute policy etc.., > > Moreover, the drm part is very small, while most of the netlink "mess" > is still done by the specific driver. > So what is the added value in making it a "drm framework"? Do we enforce > something here for drm drivers that use it? Do we help them with simpler > APIs and hiding the internals of netlink?> Maybe it would be worth moving > some functionality from the Xe driver > into drm helpers? your suggestion sounds good and interesting but it might need some analysis like if we move the registration parts to drm framework how would we register the driver private commands and attributes if there are any. But ya having most of the part at drm level helps all the driver. I'll do some analysis and i'll come back on this. Thanks, Aravind. > > Thanks, > Tomer > >> [1]: >> https://airlied.blogspot.com/2022/09/accelerators-bof-outcomes-summary.html >> >> this series is on top of https://patchwork.freedesktop.org/series/116181/ >> >> Below is an example tool drm_ras which demonstrates the use of the >> supported commands. The tool will be sent to ML with the subject >> "[RFC i-g-t 0/1] A tool to demonstrate use of netlink sockets to read RAS >> error counters" >> >> read single error counter: >> >> $ ./drm_ras READ_ONE --device=drm:/dev/dri/card1 >> --error_id=0x0005 >> counter value 0 >> >> read all error counters: >> >> $ ./drm_ras READ_ALL --device=drm:/dev/dri/card1 >> nameconfig-id >>counter >> >> error-gt0-correctable-guc 0x0001 >>0 >> error-gt0-correctable-slm 0x0003 >>0 >> error-gt0-correctable-eu-ic 0x0004 >>0 >> error-gt0-correctable-eu-grf0x0005 >>0 >> error-gt0-fatal-guc 0x0009 >>0 >> error-gt0-fatal-
Re: [Intel-gfx] [PATCH v2] drm/i915: Initialize the obj flags for shmem objects
On 06-02-2023 15:21, Tvrtko Ursulin wrote: > > > On 03/02/2023 13:52, Aravind Iddamsetty wrote: >> Obj flags for shmem objects is not being set correctly. Fixes in setting >> BO_ALLOC_USER flag which applies to shmem objs as well. >> >> Fixes: 13d29c823738 ("drm/i915/ehl: unconditionally flush the pages on >> acquire") >> Cc: # v5.15+ > > These tags should have been grouped with the ones below in one block. > > I have tidied this while pushing, thanks for the fix and review! Thanks Tvrtko. Regards, Aravind. > > Regards, > > Tvrtko > >> v2: Add fixes tag (Tvrtko, Matt A) >> >> Cc: Matthew Auld >> Cc: Tvrtko Ursulin >> Reviewed-by: Matthew Auld >> Signed-off-by: Aravind Iddamsetty >> --- >> drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 2 +- >> 1 file changed, 1 insertion(+), 1 deletion(-) >> >> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c >> b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c >> index 114443096841..37d1efcd3ca6 100644 >> --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c >> +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c >> @@ -596,7 +596,7 @@ static int shmem_object_init(struct >> intel_memory_region *mem, >> mapping_set_gfp_mask(mapping, mask); >> GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); >> - i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, 0); >> + i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, flags); >> obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; >> obj->write_domain = I915_GEM_DOMAIN_CPU; >> obj->read_domains = I915_GEM_DOMAIN_CPU;
Re: [Intel-gfx] [PATCH] Initialize the obj flags for shmem objects
On 03-02-2023 17:40, Tvrtko Ursulin wrote: > > > On 03/02/2023 11:57, Aravind Iddamsetty wrote: >> Obj flags for shmem objects is not being set correctly. >> >> Cc: Matthew Auld >> Signed-off-by: Aravind Iddamsetty > > Could even be: > > Fixes: 13d29c823738 ("drm/i915/ehl: unconditionally flush the pages on > acquire") > Cc: # v5.15+ > > ? Thanks for the review, will resend with this. Aravind. > > Regards, > > Tvrtko > >> --- >> drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 2 +- >> 1 file changed, 1 insertion(+), 1 deletion(-) >> >> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c >> b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c >> index 114443096841..37d1efcd3ca6 100644 >> --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c >> +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c >> @@ -596,7 +596,7 @@ static int shmem_object_init(struct >> intel_memory_region *mem, >> mapping_set_gfp_mask(mapping, mask); >> GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); >> - i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, 0); >> + i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, flags); >> obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; >> obj->write_domain = I915_GEM_DOMAIN_CPU; >> obj->read_domains = I915_GEM_DOMAIN_CPU;
Re: [PATCH] Initialize the obj flags for shmem objects
On 03-02-2023 17:42, Matthew Auld wrote: > On 03/02/2023 11:57, Aravind Iddamsetty wrote: >> Obj flags for shmem objects is not being set correctly. >> >> Cc: Matthew Auld >> Signed-off-by: Aravind Iddamsetty > > Subject should have "drm/i915:" prefix. My bad missed that. > > This is also a bug fix due to not setting BO_ALLOC_USER (the other flags > don't seem to matter for shmem), which is quite important, so we need to > figure out the "Fixes" tag. Maybe mention in the commit message that > this fixes setting ALLOC_USER which is needed even for shmem. > > Looking at the git history, ALLOC_USER looks to be first introduced in > 213d50927763 ("drm/i915/ttm: Introduce a TTM i915 gem object backend"), > but the users of ALLOC_USER at this stage are only interesting for the > ttm backend, and that already passes the flags due to using its own > object_init() vfunc for all normal object types. > > So the first real user impacted by this bug appears to be in: > 13d29c823738 ("drm/i915/ehl: unconditionally flush the pages on acquire"). > > So I think needs: > > Fixes: 13d29c823738 ("drm/i915/ehl: unconditionally flush the pages on > acquire") > Cc: # v5.15+ > > With that, > Reviewed-by: Matthew Auld Thank you will resend the patch. Regards, Aravind. > > >> --- >> drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 2 +- >> 1 file changed, 1 insertion(+), 1 deletion(-) >> >> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c >> b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c >> index 114443096841..37d1efcd3ca6 100644 >> --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c >> +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c >> @@ -596,7 +596,7 @@ static int shmem_object_init(struct >> intel_memory_region *mem, >> mapping_set_gfp_mask(mapping, mask); >> GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); >> - i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, 0); >> + i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, flags); >> obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; >> obj->write_domain = I915_GEM_DOMAIN_CPU; >> obj->read_domains = I915_GEM_DOMAIN_CPU;
Re: [Intel-gfx] [PATCH v4] drm/i915/mtl: Media GT and Render GT share common GGTT
On 29-11-2022 15:41, Tvrtko Ursulin wrote: > > On 22/11/2022 07:01, Aravind Iddamsetty wrote: >> On XE_LPM+ platforms the media engines are carved out into a separate >> GT but have a common GGTMMADR address range which essentially makes >> the GGTT address space to be shared between media and render GT. As a >> result any updates in GGTT shall invalidate TLB of GTs sharing it and >> similarly any operation on GGTT requiring an action on a GT will have to >> involve all GTs sharing it. setup_private_pat was being done on a per >> GGTT based as that doesn't touch any GGTT structures moved it to per GT >> based. >> >> BSPEC: 63834 >> >> v2: >> 1. Add details to commit msg >> 2. includes fix for failure to add item to ggtt->gt_list, as suggested >> by Lucas >> 3. as ggtt_flush() is used only for ggtt drop i915_is_ggtt check within >> it. >> 4. setup_private_pat moved out of intel_gt_tiles_init >> >> v3: >> 1. Move out for_each_gt from i915_driver.c (Jani Nikula) >> >> v4: drop using RCU primitives on ggtt->gt_list as it is not an RCU list >> (Matt Roper) >> >> Cc: Matt Roper >> Signed-off-by: Aravind Iddamsetty >> --- >> drivers/gpu/drm/i915/gt/intel_ggtt.c | 54 +-- >> drivers/gpu/drm/i915/gt/intel_gt.c | 13 +- >> drivers/gpu/drm/i915/gt/intel_gt_types.h | 3 ++ >> drivers/gpu/drm/i915/gt/intel_gtt.h | 4 ++ >> drivers/gpu/drm/i915/i915_driver.c | 12 ++--- >> drivers/gpu/drm/i915/i915_gem.c | 2 + >> drivers/gpu/drm/i915/i915_gem_evict.c | 51 +++-- >> drivers/gpu/drm/i915/i915_vma.c | 5 ++- >> drivers/gpu/drm/i915/selftests/i915_gem.c | 2 + >> 9 files changed, 111 insertions(+), 35 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c >> b/drivers/gpu/drm/i915/gt/intel_ggtt.c >> index 8145851ad23d..7644738b9cdb 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c >> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c >> @@ -8,6 +8,7 @@ >> #include >> #include >> +#include >> #include >> #include >> @@ -196,10 +197,13 @@ void i915_ggtt_suspend_vm(struct >> i915_address_space *vm) >> void i915_ggtt_suspend(struct i915_ggtt *ggtt) >> { >> + struct intel_gt *gt; >> + >> i915_ggtt_suspend_vm(&ggtt->vm); >> ggtt->invalidate(ggtt); >> - intel_gt_check_and_clear_faults(ggtt->vm.gt); >> + list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) >> + intel_gt_check_and_clear_faults(gt); >> } >> void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) >> @@ -225,16 +229,21 @@ static void gen8_ggtt_invalidate(struct >> i915_ggtt *ggtt) >> static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) >> { >> - struct intel_uncore *uncore = ggtt->vm.gt->uncore; >> struct drm_i915_private *i915 = ggtt->vm.i915; >> gen8_ggtt_invalidate(ggtt); >> - if (GRAPHICS_VER(i915) >= 12) >> - intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR, >> - GEN12_GUC_TLB_INV_CR_INVALIDATE); >> - else >> - intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE); >> + if (GRAPHICS_VER(i915) >= 12) { >> + struct intel_gt *gt; >> + >> + list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) >> + intel_uncore_write_fw(gt->uncore, >> + GEN12_GUC_TLB_INV_CR, >> + GEN12_GUC_TLB_INV_CR_INVALIDATE); >> + } else { >> + intel_uncore_write_fw(ggtt->vm.gt->uncore, >> + GEN8_GTCR, GEN8_GTCR_INVALIDATE); >> + } >> } >> u64 gen8_ggtt_pte_encode(dma_addr_t addr, >> @@ -986,8 +995,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) >> ggtt->vm.pte_encode = gen8_ggtt_pte_encode; >> - setup_private_pat(ggtt->vm.gt); >> - >> return ggtt_probe_common(ggtt, size); >> } >> @@ -1196,7 +1203,14 @@ static int ggtt_probe_hw(struct i915_ggtt >> *ggtt, struct intel_gt *gt) >> */ >> int i915_ggtt_probe_hw(struct drm_i915_private *i915) >> { >> - int ret; >> + struct intel_gt *gt; >> + int ret, i; >> + >> + for_each_gt(gt, i915, i) { >> + ret = intel_gt_assign_ggtt(gt); >> + if (ret) >> + return ret; >> + } >> ret = ggtt_probe_hw(to_gt(i915)->ggtt, to_gt(i915)); >> if (ret) >> @@ -1208,6 +1222,19 @@ int i915_ggtt_probe_hw(struct drm_i915_private >> *i915) >> return 0; >> } >> +struct i915_ggtt *i915_ggtt_create(struct drm_i915_private *i915) >> +{ >> + struct i915_ggtt *ggtt; >> + >> + ggtt = drmm_kzalloc(&i915->drm, sizeof(*ggtt), GFP_KERNEL); >> + if (!ggtt) >> + return ERR_PTR(-ENOMEM); >> + >> + INIT_LIST_HEAD(&ggtt->gt_list); >> + >> + return ggtt; >> +} >> + >> int i915_ggtt_enable_hw(struct drm_i915_private *i915) >> { >> if (GRAPHICS_VER(i915) < 6) >> @@ -1296,9 +1323,11 @@ bool i915_ggtt_resume_vm(struct >> i915_address_space *vm) >> void i915_ggtt_resume(struct i915_ggtt *ggtt) >> { >> +
Re: [Intel-gfx] [PATCH v4] drm/i915/mtl: Media GT and Render GT share common GGTT
On 29-11-2022 11:24, Lucas De Marchi wrote: > On Wed, Nov 23, 2022 at 09:47:03AM +0530, Iddamsetty, Aravind wrote: >> >> >> On 23-11-2022 05:29, Matt Roper wrote: >>> On Tue, Nov 22, 2022 at 12:31:26PM +0530, Aravind Iddamsetty wrote: >>>> On XE_LPM+ platforms the media engines are carved out into a separate >>>> GT but have a common GGTMMADR address range which essentially makes >>>> the GGTT address space to be shared between media and render GT. As a >>>> result any updates in GGTT shall invalidate TLB of GTs sharing it and >>>> similarly any operation on GGTT requiring an action on a GT will >>>> have to >>>> involve all GTs sharing it. setup_private_pat was being done on a per >>>> GGTT based as that doesn't touch any GGTT structures moved it to per GT >>>> based. >>>> >>>> BSPEC: 63834 >>>> >>>> v2: >>>> 1. Add details to commit msg >>>> 2. includes fix for failure to add item to ggtt->gt_list, as suggested >>>> by Lucas >>>> 3. as ggtt_flush() is used only for ggtt drop i915_is_ggtt check within >>>> it. >>>> 4. setup_private_pat moved out of intel_gt_tiles_init >>>> >>>> v3: >>>> 1. Move out for_each_gt from i915_driver.c (Jani Nikula) >>>> >>>> v4: drop using RCU primitives on ggtt->gt_list as it is not an RCU list >>>> (Matt Roper) >>>> >>>> Cc: Matt Roper >>>> Signed-off-by: Aravind Iddamsetty >>> >>> Reviewed-by: Matt Roper >> >> Thanks Matt, could you also help with merging the change. >> >> Regards, >> Aravind. >>> >>>> --- >>>> drivers/gpu/drm/i915/gt/intel_ggtt.c | 54 +-- >>>> drivers/gpu/drm/i915/gt/intel_gt.c | 13 +- >>>> drivers/gpu/drm/i915/gt/intel_gt_types.h | 3 ++ >>>> drivers/gpu/drm/i915/gt/intel_gtt.h | 4 ++ >>>> drivers/gpu/drm/i915/i915_driver.c | 12 ++--- >>>> drivers/gpu/drm/i915/i915_gem.c | 2 + >>>> drivers/gpu/drm/i915/i915_gem_evict.c | 51 +++-- >>>> drivers/gpu/drm/i915/i915_vma.c | 5 ++- >>>> drivers/gpu/drm/i915/selftests/i915_gem.c | 2 + >>>> 9 files changed, 111 insertions(+), 35 deletions(-) >>>> >>>> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c >>>> b/drivers/gpu/drm/i915/gt/intel_ggtt.c >>>> index 8145851ad23d..7644738b9cdb 100644 >>>> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c >>>> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c >>>> @@ -8,6 +8,7 @@ >>>> #include >>>> #include >>>> >>>> +#include >>>> #include >>>> #include >>>> >>>> @@ -196,10 +197,13 @@ void i915_ggtt_suspend_vm(struct >>>> i915_address_space *vm) >>>> >>>> void i915_ggtt_suspend(struct i915_ggtt *ggtt) >>>> { >>>> + struct intel_gt *gt; >>>> + >>>> i915_ggtt_suspend_vm(&ggtt->vm); >>>> ggtt->invalidate(ggtt); >>>> >>>> - intel_gt_check_and_clear_faults(ggtt->vm.gt); >>>> + list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) >>>> + intel_gt_check_and_clear_faults(gt); >>>> } >>>> >>>> void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) >>>> @@ -225,16 +229,21 @@ static void gen8_ggtt_invalidate(struct >>>> i915_ggtt *ggtt) >>>> >>>> static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) >>>> { >>>> - struct intel_uncore *uncore = ggtt->vm.gt->uncore; >>>> struct drm_i915_private *i915 = ggtt->vm.i915; >>>> >>>> gen8_ggtt_invalidate(ggtt); >>>> >>>> - if (GRAPHICS_VER(i915) >= 12) >>>> - intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR, >>>> - GEN12_GUC_TLB_INV_CR_INVALIDATE); >>>> - else >>>> - intel_uncore_write_fw(uncore, GEN8_GTCR, >>>> GEN8_GTCR_INVALIDATE); >>>> + if (GRAPHICS_VER(i915) >= 12) { >>>> + struct intel_gt *gt; >>>> + >>>> + list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) >>>> + intel_uncore_write_fw(gt->uncore, >>>> +
Re: [PATCH v4] drm/i915/mtl: Media GT and Render GT share common GGTT
On 23-11-2022 05:29, Matt Roper wrote: > On Tue, Nov 22, 2022 at 12:31:26PM +0530, Aravind Iddamsetty wrote: >> On XE_LPM+ platforms the media engines are carved out into a separate >> GT but have a common GGTMMADR address range which essentially makes >> the GGTT address space to be shared between media and render GT. As a >> result any updates in GGTT shall invalidate TLB of GTs sharing it and >> similarly any operation on GGTT requiring an action on a GT will have to >> involve all GTs sharing it. setup_private_pat was being done on a per >> GGTT based as that doesn't touch any GGTT structures moved it to per GT >> based. >> >> BSPEC: 63834 >> >> v2: >> 1. Add details to commit msg >> 2. includes fix for failure to add item to ggtt->gt_list, as suggested >> by Lucas >> 3. as ggtt_flush() is used only for ggtt drop i915_is_ggtt check within >> it. >> 4. setup_private_pat moved out of intel_gt_tiles_init >> >> v3: >> 1. Move out for_each_gt from i915_driver.c (Jani Nikula) >> >> v4: drop using RCU primitives on ggtt->gt_list as it is not an RCU list >> (Matt Roper) >> >> Cc: Matt Roper >> Signed-off-by: Aravind Iddamsetty > > Reviewed-by: Matt Roper Thanks Matt, could you also help with merging the change. Regards, Aravind. > >> --- >> drivers/gpu/drm/i915/gt/intel_ggtt.c | 54 +-- >> drivers/gpu/drm/i915/gt/intel_gt.c| 13 +- >> drivers/gpu/drm/i915/gt/intel_gt_types.h | 3 ++ >> drivers/gpu/drm/i915/gt/intel_gtt.h | 4 ++ >> drivers/gpu/drm/i915/i915_driver.c| 12 ++--- >> drivers/gpu/drm/i915/i915_gem.c | 2 + >> drivers/gpu/drm/i915/i915_gem_evict.c | 51 +++-- >> drivers/gpu/drm/i915/i915_vma.c | 5 ++- >> drivers/gpu/drm/i915/selftests/i915_gem.c | 2 + >> 9 files changed, 111 insertions(+), 35 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c >> b/drivers/gpu/drm/i915/gt/intel_ggtt.c >> index 8145851ad23d..7644738b9cdb 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c >> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c >> @@ -8,6 +8,7 @@ >> #include >> #include >> >> +#include >> #include >> #include >> >> @@ -196,10 +197,13 @@ void i915_ggtt_suspend_vm(struct i915_address_space >> *vm) >> >> void i915_ggtt_suspend(struct i915_ggtt *ggtt) >> { >> +struct intel_gt *gt; >> + >> i915_ggtt_suspend_vm(&ggtt->vm); >> ggtt->invalidate(ggtt); >> >> -intel_gt_check_and_clear_faults(ggtt->vm.gt); >> +list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) >> +intel_gt_check_and_clear_faults(gt); >> } >> >> void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) >> @@ -225,16 +229,21 @@ static void gen8_ggtt_invalidate(struct i915_ggtt >> *ggtt) >> >> static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) >> { >> -struct intel_uncore *uncore = ggtt->vm.gt->uncore; >> struct drm_i915_private *i915 = ggtt->vm.i915; >> >> gen8_ggtt_invalidate(ggtt); >> >> -if (GRAPHICS_VER(i915) >= 12) >> -intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR, >> - GEN12_GUC_TLB_INV_CR_INVALIDATE); >> -else >> -intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE); >> +if (GRAPHICS_VER(i915) >= 12) { >> +struct intel_gt *gt; >> + >> +list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) >> +intel_uncore_write_fw(gt->uncore, >> + GEN12_GUC_TLB_INV_CR, >> + GEN12_GUC_TLB_INV_CR_INVALIDATE); >> +} else { >> +intel_uncore_write_fw(ggtt->vm.gt->uncore, >> + GEN8_GTCR, GEN8_GTCR_INVALIDATE); >> +} >> } >> >> u64 gen8_ggtt_pte_encode(dma_addr_t addr, >> @@ -986,8 +995,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) >> >> ggtt->vm.pte_encode = gen8_ggtt_pte_encode; >> >> -setup_private_pat(ggtt->vm.gt); >> - >> return ggtt_probe_common(ggtt, size); >> } >> >> @@ -1196,7 +1203,14 @@ static int ggtt_probe_hw(struct i915_ggtt *ggtt, >> struct intel_gt *gt) >> */ >> int i915_ggtt_probe_hw(struct drm_i915_private *i915) >> { >> -int ret; >> +struct intel_gt *gt; >> +int ret, i; >> + >> +for_each_gt(gt, i915, i) { >> +ret = intel_gt_assign_ggtt(gt); >> +if (ret) >> +return ret; >> +} >> >> ret = ggtt_probe_hw(to_gt(i915)->ggtt, to_gt(i915)); >> if (ret) >> @@ -1208,6 +1222,19 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915) >> return 0; >> } >> >> +struct i915_ggtt *i915_ggtt_create(struct drm_i915_private *i915) >> +{ >> +struct i915_ggtt *ggtt; >> + >> +ggtt = drmm_kzalloc(&i915->drm, sizeof(*ggtt), GFP_KERNEL); >> +if (!ggtt) >> +return ERR_PTR(-ENOMEM); >> + >> +INIT_LIST_HEAD(&ggtt->gt_list); >> + >> +return ggtt; >> +} >> + >> int i915_gg
Re: [PATCH v3] drm/i915/mtl: Media GT and Render GT share common GGTT
On 19-11-2022 04:22, Matt Roper wrote: > On Tue, Nov 15, 2022 at 08:34:54PM +0530, Aravind Iddamsetty wrote: >> On XE_LPM+ platforms the media engines are carved out into a separate >> GT but have a common GGTMMADR address range which essentially makes >> the GGTT address space to be shared between media and render GT. As a >> result any updates in GGTT shall invalidate TLB of GTs sharing it and >> similarly any operation on GGTT requiring an action on a GT will have to >> involve all GTs sharing it. setup_private_pat was being done on a per >> GGTT based as that doesn't touch any GGTT structures moved it to per GT >> based. >> >> BSPEC: 63834 >> >> v2: >> 1. Add details to commit msg >> 2. includes fix for failure to add item to ggtt->gt_list, as suggested >> by Lucas >> 3. as ggtt_flush() is used only for ggtt drop i915_is_ggtt check within >> it. >> 4. setup_private_pat moved out of intel_gt_tiles_init >> >> v3: >> 1. Move out for_each_gt from i915_driver.c (Jani Nikula) >> >> Cc: Matt Roper >> Signed-off-by: Aravind Iddamsetty >> --- >> drivers/gpu/drm/i915/gt/intel_ggtt.c | 54 +-- >> drivers/gpu/drm/i915/gt/intel_gt.c| 13 +- >> drivers/gpu/drm/i915/gt/intel_gt_types.h | 3 ++ >> drivers/gpu/drm/i915/gt/intel_gtt.h | 4 ++ >> drivers/gpu/drm/i915/i915_driver.c| 12 ++--- >> drivers/gpu/drm/i915/i915_gem.c | 2 + >> drivers/gpu/drm/i915/i915_gem_evict.c | 51 +++-- >> drivers/gpu/drm/i915/i915_vma.c | 5 ++- >> drivers/gpu/drm/i915/selftests/i915_gem.c | 2 + >> 9 files changed, 111 insertions(+), 35 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c >> b/drivers/gpu/drm/i915/gt/intel_ggtt.c >> index 8145851ad23d..7644738b9cdb 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c >> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c >> @@ -8,6 +8,7 @@ >> #include >> #include >> >> +#include >> #include >> #include >> >> @@ -196,10 +197,13 @@ void i915_ggtt_suspend_vm(struct i915_address_space >> *vm) >> >> void i915_ggtt_suspend(struct i915_ggtt *ggtt) >> { >> +struct intel_gt *gt; >> + >> i915_ggtt_suspend_vm(&ggtt->vm); >> ggtt->invalidate(ggtt); >> >> -intel_gt_check_and_clear_faults(ggtt->vm.gt); >> +list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) >> +intel_gt_check_and_clear_faults(gt); >> } >> >> void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) >> @@ -225,16 +229,21 @@ static void gen8_ggtt_invalidate(struct i915_ggtt >> *ggtt) >> >> static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) >> { >> -struct intel_uncore *uncore = ggtt->vm.gt->uncore; >> struct drm_i915_private *i915 = ggtt->vm.i915; >> >> gen8_ggtt_invalidate(ggtt); >> >> -if (GRAPHICS_VER(i915) >= 12) >> -intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR, >> - GEN12_GUC_TLB_INV_CR_INVALIDATE); >> -else >> -intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE); >> +if (GRAPHICS_VER(i915) >= 12) { >> +struct intel_gt *gt; >> + >> +list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) >> +intel_uncore_write_fw(gt->uncore, >> + GEN12_GUC_TLB_INV_CR, >> + GEN12_GUC_TLB_INV_CR_INVALIDATE); >> +} else { >> +intel_uncore_write_fw(ggtt->vm.gt->uncore, >> + GEN8_GTCR, GEN8_GTCR_INVALIDATE); >> +} >> } >> >> u64 gen8_ggtt_pte_encode(dma_addr_t addr, >> @@ -986,8 +995,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) >> >> ggtt->vm.pte_encode = gen8_ggtt_pte_encode; >> >> -setup_private_pat(ggtt->vm.gt); >> - >> return ggtt_probe_common(ggtt, size); >> } >> >> @@ -1196,7 +1203,14 @@ static int ggtt_probe_hw(struct i915_ggtt *ggtt, >> struct intel_gt *gt) >> */ >> int i915_ggtt_probe_hw(struct drm_i915_private *i915) >> { >> -int ret; >> +struct intel_gt *gt; >> +int ret, i; >> + >> +for_each_gt(gt, i915, i) { >> +ret = intel_gt_assign_ggtt(gt); >> +if (ret) >> +return ret; >> +} >> >> ret = ggtt_probe_hw(to_gt(i915)->ggtt, to_gt(i915)); >> if (ret) >> @@ -1208,6 +1222,19 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915) >> return 0; >> } >> >> +struct i915_ggtt *i915_ggtt_create(struct drm_i915_private *i915) >> +{ >> +struct i915_ggtt *ggtt; >> + >> +ggtt = drmm_kzalloc(&i915->drm, sizeof(*ggtt), GFP_KERNEL); >> +if (!ggtt) >> +return ERR_PTR(-ENOMEM); >> + >> +INIT_LIST_HEAD(&ggtt->gt_list); >> + >> +return ggtt; >> +} >> + >> int i915_ggtt_enable_hw(struct drm_i915_private *i915) >> { >> if (GRAPHICS_VER(i915) < 6) >> @@ -1296,9 +1323,11 @@ bool i915_ggtt_resume_vm(struct i915_address_space >> *vm) >> >> void i915_ggtt_resume(st
Re: [PATCH] drm/i915/mtl: Media GT and Render GT share common GGTT
On 31-10-2022 23:16, Matt Roper wrote: > On Mon, Oct 31, 2022 at 06:01:11PM +0530, Aravind Iddamsetty wrote: >> On XE_LPM+ platforms the media engines are carved out into a separate >> GT but have a common GGTMMADR address range which essentially makes >> the GGTT address space to be shared between media and render GT. > > While this is all true, I feel like this description is lacking a bit of > explanation for why/how that translates into the code changes below. > For example you should elaborate on the areas this impacts, such as the > need to invalidate both GTs' TLBs, retire requests for both GTs, etc. > > Also, the movement of the PAT setup should be noted and explained as > well since it differs from how you approached the other changes here. > >> >> BSPEC: 63834 >> >> Cc: Matt Roper >> Signed-off-by: Aravind Iddamsetty >> --- >> drivers/gpu/drm/i915/gt/intel_ggtt.c | 49 +++--- >> drivers/gpu/drm/i915/gt/intel_gt.c| 15 +- >> drivers/gpu/drm/i915/gt/intel_gt_types.h | 3 ++ >> drivers/gpu/drm/i915/gt/intel_gtt.h | 3 ++ >> drivers/gpu/drm/i915/i915_driver.c| 19 +-- >> drivers/gpu/drm/i915/i915_gem_evict.c | 63 +-- >> drivers/gpu/drm/i915/i915_vma.c | 5 +- >> drivers/gpu/drm/i915/selftests/i915_gem.c | 2 + >> drivers/gpu/drm/i915/selftests/mock_gtt.c | 1 + >> 9 files changed, 115 insertions(+), 45 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c >> b/drivers/gpu/drm/i915/gt/intel_ggtt.c >> index 2518cebbf931..f5c2f3c58627 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c >> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c >> @@ -196,10 +196,13 @@ void i915_ggtt_suspend_vm(struct i915_address_space >> *vm) >> >> void i915_ggtt_suspend(struct i915_ggtt *ggtt) >> { >> +struct intel_gt *gt; >> + >> i915_ggtt_suspend_vm(&ggtt->vm); >> ggtt->invalidate(ggtt); >> >> -intel_gt_check_and_clear_faults(ggtt->vm.gt); >> +list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) >> +intel_gt_check_and_clear_faults(gt); >> } >> >> void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) >> @@ -214,27 +217,36 @@ void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) >> >> static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) >> { >> -struct intel_uncore *uncore = ggtt->vm.gt->uncore; >> +struct intel_uncore *uncore; >> +struct intel_gt *gt; >> >> -/* >> - * Note that as an uncached mmio write, this will flush the >> - * WCB of the writes into the GGTT before it triggers the invalidate. >> - */ >> -intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); >> +list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) { >> +uncore = gt->uncore; >> +/* >> + * Note that as an uncached mmio write, this will flush the >> + * WCB of the writes into the GGTT before it triggers the >> invalidate. >> + */ >> +intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, >> GFX_FLSH_CNTL_EN); > > This isn't a GT register, so writing it for each GT doesn't do anything > different than just writing it once. But actually it doesn't look like > this is even a register we should be writing to anymore since Xe_HP. > The GFX_FLSH_CNTL register no longer lives here. Ok I'll remove the iteration over gt, also i do not see an equivalent register for Xe_HP, so i'll leave this as it is for now. > >> +} >> } >> >> static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) >> { >> -struct intel_uncore *uncore = ggtt->vm.gt->uncore; >> struct drm_i915_private *i915 = ggtt->vm.i915; >> >> gen8_ggtt_invalidate(ggtt); >> >> -if (GRAPHICS_VER(i915) >= 12) >> -intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR, >> - GEN12_GUC_TLB_INV_CR_INVALIDATE); >> -else >> -intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE); >> +if (GRAPHICS_VER(i915) >= 12) { >> +struct intel_gt *gt; >> + >> +list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) >> +intel_uncore_write_fw(gt->uncore, >> + GEN12_GUC_TLB_INV_CR, >> + GEN12_GUC_TLB_INV_CR_INVALIDATE); >> +} else { >> +intel_uncore_write_fw(ggtt->vm.gt->uncore, >> + GEN8_GTCR, GEN8_GTCR_INVALIDATE); >> +} >> } >> >> u64 gen8_ggtt_pte_encode(dma_addr_t addr, >> @@ -986,8 +998,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) >> >> ggtt->vm.pte_encode = gen8_ggtt_pte_encode; >> >> -setup_private_pat(ggtt->vm.gt); >> - >> return ggtt_probe_common(ggtt, size); >> } >> >> @@ -1186,7 +1196,7 @@ static int ggtt_probe_hw(struct i915_ggtt *ggtt, >> struct intel_gt *gt) >> (u64)ggtt->mappable_end >> 20); >> drm_dbg(&i915->drm, "DSM size = %l
Re: [PATCH] drm/i915/mtl: Media GT and Render GT share common GGTT
On 31-10-2022 23:16, Matt Roper wrote: > On Mon, Oct 31, 2022 at 06:01:11PM +0530, Aravind Iddamsetty wrote: >> On XE_LPM+ platforms the media engines are carved out into a separate >> GT but have a common GGTMMADR address range which essentially makes >> the GGTT address space to be shared between media and render GT. > >> >> int intel_gt_init_mmio(struct intel_gt *gt) >> @@ -965,6 +973,9 @@ int intel_gt_tiles_init(struct drm_i915_private *i915) >> int ret; >> >> for_each_gt(gt, i915, id) { >> +if (GRAPHICS_VER(i915) >= 8) >> +setup_private_pat(gt); >> + > > Since the term "tile" is used for PVC-style remote tiles (which we have > some framework for, but haven't enabled yet), it seems confusing to have > the PAT setup for all GTs (including the standalone media GT) in a > function called intel_gt_tiles_init(). Maybe we should also have a prep > patch that renames this function if we're going to start doing non-tile > things in here too? But isn't GT and Tile used interchangeably. Also, Could you please elaborate what do you mean by non tile related things here and as i understand PAT are per GT registers and in case of SA Media the gsi_offset get added. > >> ret = intel_gt_probe_lmem(gt); >> if (ret) >> return ret; Thanks, Aravind.
RE: [Intel-gfx] [PATCH] drm/i915/mtl: Media GT and Render GT share common GGTT
Hi Lucas, > -Original Message- > From: De Marchi, Lucas > Sent: Friday, November 4, 2022 12:36 PM > To: Iddamsetty, Aravind > Cc: intel-...@lists.freedesktop.org; dri-devel@lists.freedesktop.org > Subject: Re: [Intel-gfx] [PATCH] drm/i915/mtl: Media GT and Render GT share > common GGTT > > On Mon, Oct 31, 2022 at 06:01:11PM +0530, Aravind Iddamsetty wrote: > >On XE_LPM+ platforms the media engines are carved out into a separate > >GT but have a common GGTMMADR address range which essentially makes > the > >GGTT address space to be shared between media and render GT. > > > >BSPEC: 63834 > > > >Cc: Matt Roper > >Signed-off-by: Aravind Iddamsetty > >--- > > drivers/gpu/drm/i915/gt/intel_ggtt.c | 49 +++--- > > drivers/gpu/drm/i915/gt/intel_gt.c| 15 +- > > drivers/gpu/drm/i915/gt/intel_gt_types.h | 3 ++ > > drivers/gpu/drm/i915/gt/intel_gtt.h | 3 ++ > > drivers/gpu/drm/i915/i915_driver.c| 19 +-- > > drivers/gpu/drm/i915/i915_gem_evict.c | 63 +-- > > drivers/gpu/drm/i915/i915_vma.c | 5 +- > > drivers/gpu/drm/i915/selftests/i915_gem.c | 2 + > >drivers/gpu/drm/i915/selftests/mock_gtt.c | 1 + > > 9 files changed, 115 insertions(+), 45 deletions(-) > > > >diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c > >b/drivers/gpu/drm/i915/gt/intel_ggtt.c > >index 2518cebbf931..f5c2f3c58627 100644 > >--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c > >+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c > >@@ -196,10 +196,13 @@ void i915_ggtt_suspend_vm(struct > >i915_address_space *vm) > > > > void i915_ggtt_suspend(struct i915_ggtt *ggtt) { > >+struct intel_gt *gt; > >+ > > i915_ggtt_suspend_vm(&ggtt->vm); > > ggtt->invalidate(ggtt); > > > >-intel_gt_check_and_clear_faults(ggtt->vm.gt); > >+list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) > >+intel_gt_check_and_clear_faults(gt); > > } > > > > void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) @@ -214,27 +217,36 > >@@ void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) > > > > static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) { > >-struct intel_uncore *uncore = ggtt->vm.gt->uncore; > >+struct intel_uncore *uncore; > >+struct intel_gt *gt; > > > >-/* > >- * Note that as an uncached mmio write, this will flush the > >- * WCB of the writes into the GGTT before it triggers the invalidate. > >- */ > >-intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, > GFX_FLSH_CNTL_EN); > >+list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) { > >+uncore = gt->uncore; > >+/* > >+ * Note that as an uncached mmio write, this will flush the > >+ * WCB of the writes into the GGTT before it triggers the > invalidate. > >+ */ > >+intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, > GFX_FLSH_CNTL_EN); > >+} > > } > > > > static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) { > >-struct intel_uncore *uncore = ggtt->vm.gt->uncore; > > struct drm_i915_private *i915 = ggtt->vm.i915; > > > > gen8_ggtt_invalidate(ggtt); > > > >-if (GRAPHICS_VER(i915) >= 12) > >-intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR, > >- GEN12_GUC_TLB_INV_CR_INVALIDATE); > >-else > >-intel_uncore_write_fw(uncore, GEN8_GTCR, > GEN8_GTCR_INVALIDATE); > >+if (GRAPHICS_VER(i915) >= 12) { > >+struct intel_gt *gt; > >+ > >+list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) > >+intel_uncore_write_fw(gt->uncore, > >+ GEN12_GUC_TLB_INV_CR, > >+ > GEN12_GUC_TLB_INV_CR_INVALIDATE); > >+} else { > >+intel_uncore_write_fw(ggtt->vm.gt->uncore, > >+ GEN8_GTCR, GEN8_GTCR_INVALIDATE); > >+} > > } > > > > u64 gen8_ggtt_pte_encode(dma_addr_t addr, @@ -986,8 +998,6 @@ > static > >int gen8_gmch_probe(struct i915_ggtt *ggtt) > > > > ggtt->vm.pte_encode = gen8_ggtt_pte_encode; > > > >-setup_private_pat(ggtt->vm.gt); > >- > > return ggtt_probe_common(ggtt, size); } > > > >@@ -1186,7 +1196,7 @@ static int ggtt_probe_hw(struct i915_ggtt *ggtt, > struct intel_gt *gt) > > (u64)ggtt->map
Re: [PATCH v2 5/7] drm/i915/mtl: Handle wopcm per-GT and limit calculations.
On 19-10-2022 06:14, John Harrison wrote: > On 10/12/2022 17:03, Daniele Ceraolo Spurio wrote: >> From: Aravind Iddamsetty >> >> With MTL standalone media architecture the wopcm layout has changed with >> separate partitioning in WOPCM for GCD/GT GuC and SA Media GuC. The size > What is GCD? Graphics Companion Die, no media on it. Thanks, Aravind.
Re: [PATCH v2 1/7] drm/i915/huc: only load HuC on GTs that have VCS engines
On 13-10-2022 05:33, Daniele Ceraolo Spurio wrote: > On MTL the primary GT doesn't have any media capabilities, so no video > engines and no HuC. We must therefore skip the HuC fetch and load on > that specific case. Given that other multi-GT platforms might have HuC > on the primary GT, we can't just check for that and it is easier to > instead check for the lack of VCS engines. > > Based on code from Aravind Iddamsetty > > v2: clarify which engine_mask is used for each GT and why (Tvrtko) > > Signed-off-by: Daniele Ceraolo Spurio > Cc: Aravind Iddamsetty > Cc: John Harrison > Cc: Alan Previn > Cc: Tvrtko Ursulin > Reviewed-by: Aravind Iddamsetty #v1 > --- > drivers/gpu/drm/i915/gt/uc/intel_huc.c | 29 ++ > drivers/gpu/drm/i915/i915_drv.h| 9 +--- > 2 files changed, 35 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c > b/drivers/gpu/drm/i915/gt/uc/intel_huc.c > index 4d1cc383b681..ca170ea3426c 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c > @@ -203,12 +203,41 @@ void intel_huc_unregister_gsc_notifier(struct intel_huc > *huc, struct bus_type *b > huc->delayed_load.nb.notifier_call = NULL; > } > > +static bool vcs_supported(struct intel_gt *gt) > +{ > + intel_engine_mask_t mask = gt->info.engine_mask; > + > + /* > + * We reach here from i915_driver_early_probe for the primary GT before > + * its engine mask is set, so we use the device info engine mask for it; > + * this means we're not taking VCS fusing into account, but if the > + * primary GT supports VCS engines we expect at least one of them to > + * remain unfused so we're fine. > + * For other GTs we expect the GT-specific mask to be set before we > + * call this function. > + */ Comment sounds good to me. as the rest of the change is same as v1, You can have my r-b for this. Thanks, Aravind. > + GEM_BUG_ON(!gt_is_root(gt) && !gt->info.engine_mask); > + > + if (gt_is_root(gt)) > + mask = RUNTIME_INFO(gt->i915)->platform_engine_mask; > + else > + mask = gt->info.engine_mask; > + > + return __ENGINE_INSTANCES_MASK(mask, VCS0, I915_MAX_VCS); > +} > + > void intel_huc_init_early(struct intel_huc *huc) > { > struct drm_i915_private *i915 = huc_to_gt(huc)->i915; > + struct intel_gt *gt = huc_to_gt(huc); > > intel_uc_fw_init_early(&huc->fw, INTEL_UC_FW_TYPE_HUC); > > + if (!vcs_supported(gt)) { > + intel_uc_fw_change_status(&huc->fw, > INTEL_UC_FIRMWARE_NOT_SUPPORTED); > + return; > + } > + > if (GRAPHICS_VER(i915) >= 11) { > huc->status.reg = GEN11_HUC_KERNEL_LOAD_INFO; > huc->status.mask = HUC_LOAD_SUCCESSFUL; > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 90ed8e6db2fe..90a347140e90 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -776,12 +776,15 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, > #define __HAS_ENGINE(engine_mask, id) ((engine_mask) & BIT(id)) > #define HAS_ENGINE(gt, id) __HAS_ENGINE((gt)->info.engine_mask, id) > > -#define ENGINE_INSTANCES_MASK(gt, first, count) ({ \ > +#define __ENGINE_INSTANCES_MASK(mask, first, count) ({ > \ > unsigned int first__ = (first); \ > unsigned int count__ = (count); \ > - ((gt)->info.engine_mask & > \ > - GENMASK(first__ + count__ - 1, first__)) >> first__; \ > + ((mask) & GENMASK(first__ + count__ - 1, first__)) >> first__; \ > }) > + > +#define ENGINE_INSTANCES_MASK(gt, first, count) \ > + __ENGINE_INSTANCES_MASK((gt)->info.engine_mask, first, count) > + > #define RCS_MASK(gt) \ > ENGINE_INSTANCES_MASK(gt, RCS0, I915_MAX_RCS) > #define BCS_MASK(gt) \
Re: [PATCH v3] drm/i915/mtl: enable local stolen memory
On 28-09-2022 03:52, Matt Roper wrote: > On Tue, Sep 27, 2022 at 12:14:24AM +0530, Aravind Iddamsetty wrote: >> As an integrated GPU, MTL does not have local memory and >> HAS_LMEM() returns false. However the platform's stolen memory >> is presented via BAR2 (i.e., the BAR we traditionally consider >> to be the LMEM BAR) and should be managed by the driver the same >> way that local memory is on dgpu platforms (which includes >> setting the "lmem" bit on page table entries). We use the term >> "local stolen memory" to refer to this model. >> >> v2: >> 1. dropped is_dsm_invalid, updated valid_stolen_size check from Lucas >> (Jani, Lucas) >> 2. drop lmembar_is_igpu_stolen >> 3. revert to referring GFXMEM_BAR as GEN12_LMEM_BAR (Lucas) >> >> v3:(Jani) >> 1. rename get_mtl_gms_size to mtl_get_gms_size >> 2. define register for MMIO address >> >> Cc: Matt Roper >> Cc: Lucas De Marchi >> Cc: Jani Nikula >> > > Since this stuff is somewhat hard to find documentation on, you might > want to include a bspec page number or two here. > > Bspec: 63830 > > seems like a useful one for reference at least. sure will add these references. > >> Signed-off-by: CQ Tang >> Signed-off-by: Aravind Iddamsetty >> Original-author: CQ Tang >> --- >> drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 88 ++ >> drivers/gpu/drm/i915/gt/intel_ggtt.c | 2 +- >> drivers/gpu/drm/i915/i915_drv.h| 3 + >> drivers/gpu/drm/i915/i915_reg.h| 5 ++ >> 4 files changed, 81 insertions(+), 17 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c >> b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c >> index c5a4035c99cd..0eb66c55bbf3 100644 >> --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c >> +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c >> @@ -77,9 +77,9 @@ void i915_gem_stolen_remove_node(struct drm_i915_private >> *i915, >> mutex_unlock(&i915->mm.stolen_lock); >> } >> >> -static bool valid_stolen_size(struct resource *dsm) >> +static bool valid_stolen_size(struct drm_i915_private *i915, struct >> resource *dsm) >> { >> -return dsm->start != 0 && dsm->end > dsm->start; >> +return (dsm->start != 0 || HAS_BAR2_SMEM_STOLEN(i915)) && dsm->end > >> dsm->start; >> } >> >> static int adjust_stolen(struct drm_i915_private *i915, >> @@ -88,7 +88,7 @@ static int adjust_stolen(struct drm_i915_private *i915, >> struct i915_ggtt *ggtt = to_gt(i915)->ggtt; >> struct intel_uncore *uncore = ggtt->vm.gt->uncore; >> >> -if (!valid_stolen_size(dsm)) >> +if (!valid_stolen_size(i915, dsm)) >> return -EINVAL; >> >> /* >> @@ -135,7 +135,7 @@ static int adjust_stolen(struct drm_i915_private *i915, >> } >> } >> >> -if (!valid_stolen_size(dsm)) >> +if (!valid_stolen_size(i915, dsm)) >> return -EINVAL; >> >> return 0; >> @@ -148,9 +148,10 @@ static int request_smem_stolen(struct drm_i915_private >> *i915, >> >> /* >> * With stolen lmem, we don't need to request system memory for the >> - * address range since it's local to the gpu. >> + * address range since it's local to the gpu and in some IGFX devices >> + * BAR2 is exposed as stolen > > Minor nitpick: this sentence is a bit hard to read/understand. I'd > leave the original sentence as is and add a separate sentence explaining > the situation for igpu platforms with stolen memory exposed through > BAR2. ok. > >> */ >> -if (HAS_LMEM(i915)) >> +if (HAS_LMEM(i915) || HAS_BAR2_SMEM_STOLEN(i915)) >> return 0; >> >> /* >> @@ -385,8 +386,6 @@ static void icl_get_stolen_reserved(struct >> drm_i915_private *i915, >> >> drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val); >> >> -*base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK; >> - >> switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) { >> case GEN8_STOLEN_RESERVED_1M: >> *size = 1024 * 1024; >> @@ -404,6 +403,12 @@ static void icl_get_stolen_reserved(struct >> drm_i915_private *i915, >> *size = 8 * 1024 * 1024; >> MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK); >> } >> + >> +if (HAS_BAR2_SMEM_STOLEN(i915)) >> +/* the base is initialized to stolen top so subtract size to >> get base */ >> +*base -= *size; >> +else >> +*base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK; >> } >> >> /* >> @@ -833,6 +838,34 @@ static const struct intel_memory_region_ops >> i915_region_stolen_lmem_ops = { >> .init_object = _i915_gem_object_stolen_init, >> }; >> >> +static int mtl_get_gms_size(struct intel_uncore *uncore) >> +{ >> +u16 ggc, gms; >> + >> +ggc = intel_uncore_read16(uncore, GGC); >> + >> +/* check GGMS, should be fixed 0x3 (8MB) */ >> +if ((ggc & GGMS_MASK) != GGMS_MASK) >> +return -EIO; >> + >> +/* return valid GMS value, -EIO if invalid */ >> +gms = (g
Re: [PATCH 1/7] drm/i915/huc: only load HuC on GTs that have VCS engines
On 23-09-2022 03:41, Daniele Ceraolo Spurio wrote: > On MTL the primary GT doesn't have any media capabilities, so no video > engines and no HuC. We must therefore skip the HuC fetch and load on > that specific case. Given that other multi-GT platforms might have HuC > on the primary GT, we can't just check for that and it is easier to > instead check for the lack of VCS engines. > > Based on code from Aravind Iddamsetty > > Signed-off-by: Daniele Ceraolo Spurio > Cc: Aravind Iddamsetty > Cc: John Harrison > Cc: Alan Previn > --- > drivers/gpu/drm/i915/gt/uc/intel_huc.c | 21 + > drivers/gpu/drm/i915/i915_drv.h| 9 ++--- > 2 files changed, 27 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c > b/drivers/gpu/drm/i915/gt/uc/intel_huc.c > index 3bb8838e325a..d4e2b252f16c 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c > @@ -42,12 +42,33 @@ > * HuC-specific commands. > */ > > +static bool vcs_supported(struct intel_gt *gt) > +{ > + intel_engine_mask_t mask = gt->info.engine_mask; > + > + /* > + * we can reach here from i915_driver_early_probe for primary > + * GT with it being not fully setup hence fall back to the device info's > + * engine mask > + */ > + if (!mask && gt_is_root(gt)) > + mask = RUNTIME_INFO(gt->i915)->platform_engine_mask; > + > + return __ENGINE_INSTANCES_MASK(mask, VCS0, I915_MAX_VCS); > +} > + > void intel_huc_init_early(struct intel_huc *huc) > { > struct drm_i915_private *i915 = huc_to_gt(huc)->i915; > + struct intel_gt *gt = huc_to_gt(huc); > > intel_uc_fw_init_early(&huc->fw, INTEL_UC_FW_TYPE_HUC); > > + if (!vcs_supported(gt)) { > + intel_uc_fw_change_status(&huc->fw, > INTEL_UC_FIRMWARE_NOT_SUPPORTED); > + return; > + } > + > if (GRAPHICS_VER(i915) >= 11) { > huc->status.reg = GEN11_HUC_KERNEL_LOAD_INFO; > huc->status.mask = HUC_LOAD_SUCCESSFUL; > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 134fc1621821..8ca575202e5d 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -777,12 +777,15 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, > #define __HAS_ENGINE(engine_mask, id) ((engine_mask) & BIT(id)) > #define HAS_ENGINE(gt, id) __HAS_ENGINE((gt)->info.engine_mask, id) > > -#define ENGINE_INSTANCES_MASK(gt, first, count) ({ \ > +#define __ENGINE_INSTANCES_MASK(mask, first, count) ({ > \ > unsigned int first__ = (first); \ > unsigned int count__ = (count); \ > - ((gt)->info.engine_mask & > \ > - GENMASK(first__ + count__ - 1, first__)) >> first__; \ > + ((mask) & GENMASK(first__ + count__ - 1, first__)) >> first__; \ > }) > + > +#define ENGINE_INSTANCES_MASK(gt, first, count) \ > + __ENGINE_INSTANCES_MASK((gt)->info.engine_mask, first, count) > + > #define RCS_MASK(gt) \ > ENGINE_INSTANCES_MASK(gt, RCS0, I915_MAX_RCS) > #define BCS_MASK(gt) \ LGTM. Reviewed-by: Aravind Iddamsetty Thanks, Aravind.
Re: [Intel-gfx] [PATCH 1/1] drm/i915/mtl: enable local stolen memory
On 22-09-2022 19:26, Lucas De Marchi wrote: > On Wed, Sep 21, 2022 at 12:00:38PM +0530, Iddamsetty, Aravind wrote: >> replying here for earlier comments too. >> >> On 21-09-2022 01:35, Lucas De Marchi wrote: >>> On Tue, Sep 20, 2022 at 01:31:49AM -0700, Lucas De Marchi wrote: >>>> On Tue, Sep 20, 2022 at 12:49:40PM +0530, Aravind Iddamsetty wrote: >>>>> As an integrated GPU, MTL does not have local memory and >>>>> HAS_LMEM() returns false. However the platform's stolen memory >>>>> is presented via BAR2 (i.e., the BAR we traditionally consider >>>>> to be the LMEM BAR) and should be managed by the driver the same >>>>> way that local memory is on dgpu platforms (which includes >>>>> setting the "lmem" bit on page table entries). We use the term >>>>> "local stolen memory" to refer to this model. >>>>> >>>>> Cc: Matt Roper >>>>> Cc: Lucas De Marchi >>>>> >>>>> Signed-off-by: CQ Tang >>>>> Signed-off-by: Aravind Iddamsetty >>>>> Original-author: CQ Tang >>>>> --- >>>>> drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 113 + >>>>> drivers/gpu/drm/i915/gt/intel_ggtt.c | 2 +- >>>>> drivers/gpu/drm/i915/i915_drv.h | 3 + >>>>> 3 files changed, 100 insertions(+), 18 deletions(-) >>>>> >>>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c >>>>> b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c >>>>> index acc561c0f0aa..bad5250fb764 100644 >>>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c >>>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c >>>>> @@ -77,6 +77,19 @@ void i915_gem_stolen_remove_node(struct >>>>> drm_i915_private *i915, >>>>> mutex_unlock(&i915->mm.stolen_lock); >>>>> } >>>>> >>>>> +static bool is_dsm_invalid(struct drm_i915_private *i915, struct >>>>> resource *dsm) >>>>> +{ >>>>> + if (!HAS_BAR2_SMEM_STOLEN(i915)) { >>>> >>>> I called a similar function as is_dsm_valid() in >>>> https://patchwork.freedesktop.org/series/108620/ >>>> >>>> sounds weird with "invalid" and the double negation on return early >>>> style. >> >> sure, will change it hope i can use that from your patch. > > that patch is now pushed, so now you can reuse it. Thanks for the info and help. Aravind. > > Lucas De Marchi
Re: [PATCH 1/1] drm/i915/mtl: enable local stolen memory
On 20-09-2022 13:05, Jani Nikula wrote: > On Tue, 20 Sep 2022, Aravind Iddamsetty wrote: >> As an integrated GPU, MTL does not have local memory and >> HAS_LMEM() returns false. However the platform's stolen memory >> is presented via BAR2 (i.e., the BAR we traditionally consider >> to be the LMEM BAR) and should be managed by the driver the same >> way that local memory is on dgpu platforms (which includes >> setting the "lmem" bit on page table entries). We use the term >> "local stolen memory" to refer to this model. >> >> Cc: Matt Roper >> Cc: Lucas De Marchi >> >> Signed-off-by: CQ Tang >> Signed-off-by: Aravind Iddamsetty >> Original-author: CQ Tang >> --- >> drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 113 + >> drivers/gpu/drm/i915/gt/intel_ggtt.c | 2 +- >> drivers/gpu/drm/i915/i915_drv.h| 3 + >> 3 files changed, 100 insertions(+), 18 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c >> b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c >> index acc561c0f0aa..bad5250fb764 100644 >> --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c >> +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c >> @@ -77,6 +77,19 @@ void i915_gem_stolen_remove_node(struct drm_i915_private >> *i915, >> mutex_unlock(&i915->mm.stolen_lock); >> } >> >> +static bool is_dsm_invalid(struct drm_i915_private *i915, struct resource >> *dsm) > > Abstracting this as a separate function looks like a separate patch. > > I generally recommend using positive naming, "is dsm valid", to avoid > any double negatives that might pop up, now or in the > future. !is_dsm_invalid() gets slower for human brains to parse. sure will change it. Thanks, Aravind. > > BR, > Jani. > > >> +{ >> +if (!HAS_BAR2_SMEM_STOLEN(i915)) { >> +if (dsm->start == 0) >> +return true; >> +} >> + >> +if (dsm->end <= dsm->start) >> +return true; >> + >> +return false; >> +} >> + >> static int i915_adjust_stolen(struct drm_i915_private *i915, >>struct resource *dsm) >> { >> @@ -84,7 +97,7 @@ static int i915_adjust_stolen(struct drm_i915_private >> *i915, >> struct intel_uncore *uncore = ggtt->vm.gt->uncore; >> struct resource *r; >> >> -if (dsm->start == 0 || dsm->end <= dsm->start) >> +if (is_dsm_invalid(i915, dsm)) >> return -EINVAL; >> >> /* >> @@ -136,7 +149,7 @@ static int i915_adjust_stolen(struct drm_i915_private >> *i915, >> * overlaps with the non-stolen system memory range, since lmem is local >> * to the gpu. >> */ >> -if (HAS_LMEM(i915)) >> +if (HAS_LMEM(i915) || HAS_BAR2_SMEM_STOLEN(i915)) >> return 0; >> >> /* >> @@ -371,8 +384,6 @@ static void icl_get_stolen_reserved(struct >> drm_i915_private *i915, >> >> drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val); >> >> -*base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK; >> - >> switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) { >> case GEN8_STOLEN_RESERVED_1M: >> *size = 1024 * 1024; >> @@ -390,6 +401,12 @@ static void icl_get_stolen_reserved(struct >> drm_i915_private *i915, >> *size = 8 * 1024 * 1024; >> MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK); >> } >> + >> +if ((GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) && !IS_DGFX(i915)) >> +/* the base is initialized to stolen top so subtract size to >> get base */ >> +*base -= *size; >> +else >> +*base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK; >> } >> >> static int i915_gem_init_stolen(struct intel_memory_region *mem) >> @@ -423,8 +440,7 @@ static int i915_gem_init_stolen(struct >> intel_memory_region *mem) >> if (i915_adjust_stolen(i915, &i915->dsm)) >> return 0; >> >> -GEM_BUG_ON(i915->dsm.start == 0); >> -GEM_BUG_ON(i915->dsm.end <= i915->dsm.start); >> +GEM_BUG_ON(is_dsm_invalid(i915, &i915->dsm)); >> >> stolen_top = i915->dsm.end + 1; >> reserved_base = stolen_top; >> @@ -796,6 +812,46 @@ static const struct intel_memory_region_ops >> i915_region_stolen_lmem_ops = { >> .init_object = _i915_gem_object_stolen_init, >> }; >> >> +static int get_mtl_gms_size(struct intel_uncore *uncore) >> +{ >> +u16 ggc, gms; >> + >> +ggc = intel_uncore_read16(uncore, _MMIO(0x108040)); >> + >> +/* check GGMS, should be fixed 0x3 (8MB) */ >> +if ((ggc & 0xc0) != 0xc0) >> +return -EIO; >> + >> +/* return valid GMS value, -EIO if invalid */ >> +gms = ggc >> 8; >> +switch (gms) { >> +case 0x0 ... 0x10: >> +return gms * 32; >> +case 0x20: >> +return 1024; >> +case 0x30: >> +return 1536; >> +case 0x40: >> +return 2048; >> +case 0xf0 ... 0xfe: >> +return (gms - 0xf0 + 1) * 4; >> +default: >> +return -E
Re: [Intel-gfx] [PATCH 1/1] drm/i915/mtl: enable local stolen memory
replying here for earlier comments too. On 21-09-2022 01:35, Lucas De Marchi wrote: > On Tue, Sep 20, 2022 at 01:31:49AM -0700, Lucas De Marchi wrote: >> On Tue, Sep 20, 2022 at 12:49:40PM +0530, Aravind Iddamsetty wrote: >>> As an integrated GPU, MTL does not have local memory and >>> HAS_LMEM() returns false. However the platform's stolen memory >>> is presented via BAR2 (i.e., the BAR we traditionally consider >>> to be the LMEM BAR) and should be managed by the driver the same >>> way that local memory is on dgpu platforms (which includes >>> setting the "lmem" bit on page table entries). We use the term >>> "local stolen memory" to refer to this model. >>> >>> Cc: Matt Roper >>> Cc: Lucas De Marchi >>> >>> Signed-off-by: CQ Tang >>> Signed-off-by: Aravind Iddamsetty >>> Original-author: CQ Tang >>> --- >>> drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 113 + >>> drivers/gpu/drm/i915/gt/intel_ggtt.c | 2 +- >>> drivers/gpu/drm/i915/i915_drv.h | 3 + >>> 3 files changed, 100 insertions(+), 18 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c >>> b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c >>> index acc561c0f0aa..bad5250fb764 100644 >>> --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c >>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c >>> @@ -77,6 +77,19 @@ void i915_gem_stolen_remove_node(struct >>> drm_i915_private *i915, >>> mutex_unlock(&i915->mm.stolen_lock); >>> } >>> >>> +static bool is_dsm_invalid(struct drm_i915_private *i915, struct >>> resource *dsm) >>> +{ >>> + if (!HAS_BAR2_SMEM_STOLEN(i915)) { >> >> I called a similar function as is_dsm_valid() in >> https://patchwork.freedesktop.org/series/108620/ >> >> sounds weird with "invalid" and the double negation on return early >> style. sure, will change it hope i can use that from your patch. >> >>> + if (dsm->start == 0) >>> + return true; >>> + } >>> + >>> + if (dsm->end <= dsm->start) >>> + return true; >>> + >>> + return false; >>> +} >>> + >>> static int i915_adjust_stolen(struct drm_i915_private *i915, >>> struct resource *dsm) >>> { >>> @@ -84,7 +97,7 @@ static int i915_adjust_stolen(struct >>> drm_i915_private *i915, >>> struct intel_uncore *uncore = ggtt->vm.gt->uncore; >>> struct resource *r; >>> >>> - if (dsm->start == 0 || dsm->end <= dsm->start) >>> + if (is_dsm_invalid(i915, dsm)) >>> return -EINVAL; >>> >>> /* >>> @@ -136,7 +149,7 @@ static int i915_adjust_stolen(struct >>> drm_i915_private *i915, >>> * overlaps with the non-stolen system memory range, since lmem >>> is local >>> * to the gpu. >>> */ >>> - if (HAS_LMEM(i915)) >>> + if (HAS_LMEM(i915) || HAS_BAR2_SMEM_STOLEN(i915)) >> >> comment above makes no sense when you add this. For this specific case >> it's still system memory, reserved by the BIOS and that we access by >> mapping the lmembar thanks for catching this. >> >>> return 0; >>> >>> /* >>> @@ -371,8 +384,6 @@ static void icl_get_stolen_reserved(struct >>> drm_i915_private *i915, >>> >>> drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val); >>> >>> - *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK; >>> - >>> switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) { >>> case GEN8_STOLEN_RESERVED_1M: >>> *size = 1024 * 1024; >>> @@ -390,6 +401,12 @@ static void icl_get_stolen_reserved(struct >>> drm_i915_private *i915, >>> *size = 8 * 1024 * 1024; >>> MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK); >>> } >>> + >>> + if ((GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) && !IS_DGFX(i915)) >>> + /* the base is initialized to stolen top so subtract size to >>> get base */ >>> + *base -= *size; >> >> that doesn't necessarily holds true. According to the spec the wopcm >> base is 1MB aligned so even if it is "at the top", it may not mean it >> is at the >> very very top that we can just subtract the size. well here the reserved_base is not to the top of the BAR, we got the stolen size from GGC register so base is initialized to end of that stolen size. hence we subtract the reserved size from it. >> >> >>> + else >>> + *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK; >>> } >>> >>> static int i915_gem_init_stolen(struct intel_memory_region *mem) >>> @@ -423,8 +440,7 @@ static int i915_gem_init_stolen(struct >>> intel_memory_region *mem) >>> if (i915_adjust_stolen(i915, &i915->dsm)) >>> return 0; >>> >>> - GEM_BUG_ON(i915->dsm.start == 0); >>> - GEM_BUG_ON(i915->dsm.end <= i915->dsm.start); >>> + GEM_BUG_ON(is_dsm_invalid(i915, &i915->dsm)); >>> >>> stolen_top = i915->dsm.end + 1; >>> reserved_base = stolen_top; >>> @@ -796,6 +812,46 @@ static const struct intel_memory_region_ops >>> i915_region_stolen_lmem_ops = { >>> .init_object = _i915_gem_object_stolen_init, >>> }; >>> >>> +static int get_mtl_gms_size(struct
Re: [PATCH v1 3/4] drm/i915: Split i915_gem_init_stolen()
On 16-09-2022 02:09, Lucas De Marchi wrote: > Add some helpers: adjust_stolen(), request_smem_stolen_() and > init_reserved_stolen() that are now called by i915_gem_init_stolen() to > initialize each part of the Data Stolen Memory region. Main goal is to > split the reserved part, also known as WOPCM, as its calculation changes > often per platform. > > This also fixes a bug in graphics version < 5 (in theory, not tested, > due to no machine available): it would bail out on stolen creation due > to "Stolen reserved area outside stolen memory". Other than that, no > change in behavior. > > Signed-off-by: Lucas De Marchi > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c > b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c > index c34065fe2ecc..0e57a6d81534 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c > @@ -77,22 +77,26 @@ void i915_gem_stolen_remove_node(struct drm_i915_private > *i915, > mutex_unlock(&i915->mm.stolen_lock); > } > > -static int i915_adjust_stolen(struct drm_i915_private *i915, > - struct resource *dsm) > +static bool valid_stolen_size(struct resource *dsm) > +{ > + return dsm->start != 0 && dsm->end > dsm->start; > +} > + > +static int adjust_stolen(struct drm_i915_private *i915, > + struct resource *dsm) > { > struct i915_ggtt *ggtt = to_gt(i915)->ggtt; > struct intel_uncore *uncore = ggtt->vm.gt->uncore; > - struct resource *r; > > - if (dsm->start == 0 || dsm->end <= dsm->start) > + if (!valid_stolen_size(dsm)) > return -EINVAL; > > /* > + * Make sure we don't clobber the GTT if it's within stolen memory > + * >* TODO: We have yet too encounter the case where the GTT wasn't at the >* end of stolen. With that assumption we could simplify this. >*/ > - > - /* Make sure we don't clobber the GTT if it's within stolen memory */ > if (GRAPHICS_VER(i915) <= 4 && > !IS_G33(i915) && !IS_PINEVIEW(i915) && !IS_G4X(i915)) { > struct resource stolen[2] = {*dsm, *dsm}; > @@ -131,10 +135,20 @@ static int i915_adjust_stolen(struct drm_i915_private > *i915, > } > } > > + if (!valid_stolen_size(dsm)) > + return -EINVAL; > + > + return 0; > +} > + > +static int request_smem_stolen(struct drm_i915_private *i915, > +struct resource *dsm) > +{ > + struct resource *r; > + > /* > - * With stolen lmem, we don't need to check if the address range > - * overlaps with the non-stolen system memory range, since lmem is local > - * to the gpu. > + * With stolen lmem, we don't need to request if the address range replace /if/for > + * since lmem is local to the gpu. >*/ > if (HAS_LMEM(i915)) > return 0; > @@ -392,39 +406,22 @@ static void icl_get_stolen_reserved(struct > drm_i915_private *i915, > } > } > > -static int i915_gem_init_stolen(struct intel_memory_region *mem) > +/* > + * Initialize i915->dsm_reserved to contain the reserved space within the > Data > + * Stolen Memory. This is a range on the top of DSM that is reserved, not to > + * be used by driver, so must be excluded from the region passed to the > + * allocator later. In the spec this is also called as WOPCM. > + * > + * Our expectation is that the reserved space is at the top of the stolen > + * region, as it has been the case for every platform, and *never* at the > + * bottom, so the calculation here can be simplified. > + */ > +static int init_reserved_stolen(struct drm_i915_private *i915) > { > - struct drm_i915_private *i915 = mem->i915; > struct intel_uncore *uncore = &i915->uncore; > resource_size_t reserved_base, stolen_top; > - resource_size_t reserved_total, reserved_size; > - > - mutex_init(&i915->mm.stolen_lock); > - > - if (intel_vgpu_active(i915)) { > - drm_notice(&i915->drm, > -"%s, disabling use of stolen memory\n", > -"iGVT-g active"); > - return 0; > - } > - > - if (i915_vtd_active(i915) && GRAPHICS_VER(i915) < 8) { > - drm_notice(&i915->drm, > -"%s, disabling use of stolen memory\n", > -"DMAR active"); > - return 0; > - } > - > - if (resource_size(&mem->region) == 0) > - return 0; > - > - if (i915_adjust_stolen(i915, &mem->region)) > - return 0; > - > - GEM_BUG_ON(i915->dsm.start == 0); > - GEM_BUG_ON(i915->dsm.end <= i915->dsm.start); > - > - i915->dsm = mem->region; > + resource_size_t reserved_size; > + int ret = 0; > > stolen_top = i915->dsm.end + 1; > reserved_base = stolen_top; > @@ -453,19 +450,17 @@ static int i915_gem_init_stolen(struct > intel_memory_region *mem) > } else if (GRAP
Re: [PATCH v1 2/4] drm/i915: Add missing mask when reading GEN12_DSMBASE
On 16-09-2022 02:09, Lucas De Marchi wrote: > DSMBASE register is defined so BDSM bitfield contains the bits 63 to 20 > of the base address of stolen. For the supported platforms bits 0-19 are > zero but that may not be true in future. Add the missing mask. > > Signed-off-by: Lucas De Marchi Acked-by: Aravind Iddamsetty Thanks, Aravind. > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c > b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c > index 42f4769bb4ac..c34065fe2ecc 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c > @@ -814,7 +814,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, > u16 type, > return ERR_PTR(-ENXIO); > > /* Use DSM base address instead for stolen memory */ > - dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE); > + dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE) & GEN12_BDSM_MASK; > if (IS_DG1(uncore->i915)) { > lmem_size = pci_resource_len(pdev, GEN12_LMEM_BAR); > if (WARN_ON(lmem_size < dsm_base)) > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index 1a9bd829fc7e..0301874c76ba 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -7953,6 +7953,7 @@ enum skl_power_gate { > > #define GEN12_GSMBASE_MMIO(0x108100) > #define GEN12_DSMBASE_MMIO(0x1080C0) > +#define GEN12_BDSM_MASKGENMASK(63, 20) > > #define XEHP_CLOCK_GATE_DIS _MMIO(0x101014) > #define SGSI_SIDECLK_DIS REG_BIT(17) >
Re: [PATCH v1 1/4] drm/i915: Move dsm assignment to be after adjustment
On 16-09-2022 02:09, Lucas De Marchi wrote: > Reduce possible side effects of assigning the region and bailing out due > to errors. > > Signed-off-by: Lucas De Marchi > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c > b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c > index acc561c0f0aa..42f4769bb4ac 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c > @@ -418,14 +418,14 @@ static int i915_gem_init_stolen(struct > intel_memory_region *mem) > if (resource_size(&mem->region) == 0) > return 0; > > - i915->dsm = mem->region; > - > - if (i915_adjust_stolen(i915, &i915->dsm)) > + if (i915_adjust_stolen(i915, &mem->region)) > return 0; > > GEM_BUG_ON(i915->dsm.start == 0); > GEM_BUG_ON(i915->dsm.end <= i915->dsm.start); > > + i915->dsm = mem->region; assignment should be above the GEM_BUG_ON. but why don't you squash this into 3rd patch thanks, Aravind. > + > stolen_top = i915->dsm.end + 1; > reserved_base = stolen_top; > reserved_size = 0; >
Re: [PATCH v3 12/14] drm/i915/xelpmp: Expose media as another GT
On 07-09-2022 05:19, Matt Roper wrote: > Xe_LPM+ platforms have "standalone media." I.e., the media unit is > designed as an additional GT with its own engine list, GuC, forcewake, > etc. Let's allow platforms to include media GTs in their device info. > > v2: > - Simplify GSI register handling and split it out to a separate patch >for ease of review. (Daniele) > > Cc: Aravind Iddamsetty > Cc: Daniele Ceraolo Spurio > Signed-off-by: Matt Roper > Reviewed-by: Aravind Iddamsetty > --- > drivers/gpu/drm/i915/Makefile| 1 + > drivers/gpu/drm/i915/gt/intel_gt.c | 6 > drivers/gpu/drm/i915/gt/intel_gt_regs.h | 8 + > drivers/gpu/drm/i915/gt/intel_gt_types.h | 1 + > drivers/gpu/drm/i915/gt/intel_sa_media.c | 39 > drivers/gpu/drm/i915/gt/intel_sa_media.h | 15 + > drivers/gpu/drm/i915/i915_pci.c | 14 + > 7 files changed, 84 insertions(+) > create mode 100644 drivers/gpu/drm/i915/gt/intel_sa_media.c > create mode 100644 drivers/gpu/drm/i915/gt/intel_sa_media.h > > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile > index 522ef9b4aff3..e83e4cd46968 100644 > --- a/drivers/gpu/drm/i915/Makefile > +++ b/drivers/gpu/drm/i915/Makefile > @@ -123,6 +123,7 @@ gt-y += \ > gt/intel_ring.o \ > gt/intel_ring_submission.o \ > gt/intel_rps.o \ > + gt/intel_sa_media.o \ > gt/intel_sseu.o \ > gt/intel_sseu_debugfs.o \ > gt/intel_timeline.o \ > diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c > b/drivers/gpu/drm/i915/gt/intel_gt.c > index aa0e40987798..9b9c0ea73b7f 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gt.c > +++ b/drivers/gpu/drm/i915/gt/intel_gt.c > @@ -31,6 +31,7 @@ > #include "intel_rc6.h" > #include "intel_renderstate.h" > #include "intel_rps.h" > +#include "intel_sa_media.h" > #include "intel_gt_sysfs.h" > #include "intel_uncore.h" > #include "shmem_utils.h" > @@ -864,6 +865,11 @@ int intel_gt_probe_all(struct drm_i915_private *i915) > ret = intel_gt_tile_setup(gt, phys_addr + > gtdef->mapping_base); > break; > > + case GT_MEDIA: > + ret = intel_sa_mediagt_setup(gt, phys_addr + > gtdef->mapping_base, > + gtdef->gsi_offset); > + break; > + > case GT_PRIMARY: > /* Primary GT should not appear in extra GT list */ > default: > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h > b/drivers/gpu/drm/i915/gt/intel_gt_regs.h > index d414785003cc..fb2c56777480 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h > +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h > @@ -1578,4 +1578,12 @@ > > #define GEN12_SFC_DONE(n)_MMIO(0x1cc000 + (n) * 0x1000) > > +/* > + * Standalone Media's non-engine GT registers are located at their regular GT > + * offsets plus 0x38. This extra offset is stored inside the > intel_uncore > + * structure so that the existing code can be used for both GTs without > + * modification. > + */ > +#define MTL_MEDIA_GSI_BASE 0x38 > + > #endif /* __INTEL_GT_REGS__ */ > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h > b/drivers/gpu/drm/i915/gt/intel_gt_types.h > index 82dc28643572..726695936a79 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h > +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h > @@ -84,6 +84,7 @@ struct gt_defaults { > enum intel_gt_type { > GT_PRIMARY, > GT_TILE, > + GT_MEDIA, > }; > > struct intel_gt { > diff --git a/drivers/gpu/drm/i915/gt/intel_sa_media.c > b/drivers/gpu/drm/i915/gt/intel_sa_media.c > new file mode 100644 > index ..8c5c519457cc > --- /dev/null > +++ b/drivers/gpu/drm/i915/gt/intel_sa_media.c > @@ -0,0 +1,39 @@ > +// SPDX-License-Identifier: MIT > +/* > + * Copyright © 2021 Intel Corporation > + */ > + > +#include > + > +#include "i915_drv.h" > +#include "gt/intel_gt.h" > +#include "gt/intel_sa_media.h" > + > +int intel_sa_mediagt_setup(struct intel_gt *gt, phys_addr_t phys_addr, > +u32 gsi_offset) > +{ > + struct drm_i915_private *i915 = gt->i915; > + struct intel_uncore *uncore; > + > + uncore = drmm_kzalloc(&i915->drm, sizeof(*uncore), GFP_KERNEL); > + if (!uncore) > + return -ENOMEM; > + > + uncore->gsi_offset = gsi_offset; > + > + intel_gt_common_init_early(gt); > + intel_uncore_init_early(uncore, gt); > + > + /* > + * Standalone media shares the general MMIO space with the primary > + * GT. We'll re-use the primary GT's mapping. > + */ > + uncore->regs = i915->uncore.regs; > + if (drm_WARN_ON(&i915->drm, uncore->regs == NULL)) > + return -EIO; > + > + gt->uncore = uncore; > + gt->phys_addr = phys_addr; > + > + return 0; > +} > diff --git a/drivers/gpu/drm/i915/gt/intel_sa_media.h > b/driver
Re: [PATCH v3 05/14] drm/i915: Prepare more multi-GT initialization
/* Primary GT should not appear in extra GT list */ > + default: > + MISSING_CASE(gtdef->type); > + ret = -ENODEV; > + } > + > + if (ret) > + goto err; > + > + i915->gt[i] = gt; > + } > + > return 0; > + > +err: > + i915_probe_error(i915, "Failed to initialize %s! (%d)\n", gtdef->name, > ret); > + intel_gt_release_all(i915); > + > + return ret; > } > > int intel_gt_tiles_init(struct drm_i915_private *i915) > diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h > b/drivers/gpu/drm/i915/gt/intel_gt.h > index 40b06adf509a..4d8779529cc2 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gt.h > +++ b/drivers/gpu/drm/i915/gt/intel_gt.h > @@ -54,7 +54,6 @@ void intel_gt_driver_register(struct intel_gt *gt); > void intel_gt_driver_unregister(struct intel_gt *gt); > void intel_gt_driver_remove(struct intel_gt *gt); > void intel_gt_driver_release(struct intel_gt *gt); > - > void intel_gt_driver_late_release_all(struct drm_i915_private *i915); > > int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h > b/drivers/gpu/drm/i915/gt/intel_gt_types.h > index 4d56f7d5a3be..0e139f7d75ed 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h > +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h > @@ -81,8 +81,16 @@ struct gt_defaults { > u32 max_freq; > }; > > +enum intel_gt_type { > + GT_PRIMARY, > + GT_TILE, > +}; > + > struct intel_gt { > struct drm_i915_private *i915; > + const char *name; > + enum intel_gt_type type; > + > struct intel_uncore *uncore; > struct i915_ggtt *ggtt; > > @@ -262,6 +270,13 @@ struct intel_gt { > struct kobject *sysfs_defaults; > }; > > +struct intel_gt_definition { > + enum intel_gt_type type; > + char *name; > + u32 mapping_base; > + intel_engine_mask_t engine_mask; > +}; > + > enum intel_gt_scratch_field { > /* 8 bytes */ > INTEL_GT_SCRATCH_FIELD_DEFAULT = 0, > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index befb167b3c49..f010be8df851 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -916,6 +916,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, > #define HAS_REGION(i915, i) (RUNTIME_INFO(i915)->memory_regions & (i)) > #define HAS_LMEM(i915) HAS_REGION(i915, REGION_LMEM) > > +#define HAS_EXTRA_GT_LIST(dev_priv) (INTEL_INFO(dev_priv)->extra_gt_list) > + > /* > * Platform has the dedicated compression control state for each lmem > surfaces > * stored in lmem to support the 3D and media compression formats. > diff --git a/drivers/gpu/drm/i915/intel_device_info.h > b/drivers/gpu/drm/i915/intel_device_info.h > index 6904ad03ca19..deaa07d8df2c 100644 > --- a/drivers/gpu/drm/i915/intel_device_info.h > +++ b/drivers/gpu/drm/i915/intel_device_info.h > @@ -37,6 +37,7 @@ > > struct drm_printer; > struct drm_i915_private; > +struct intel_gt_definition; > > /* Keep in gen based order, and chronological order within a gen */ > enum intel_platform { > @@ -252,6 +253,8 @@ struct intel_device_info { > > unsigned int dma_mask_size; /* available DMA address bits */ > > + const struct intel_gt_definition *extra_gt_list; > + > u8 gt; /* GT number, 0 if undefined */ > > #define DEFINE_FLAG(name) u8 name:1 > diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c > b/drivers/gpu/drm/i915/selftests/mock_gem_device.c > index f5904e659ef2..915d58ba383e 100644 > --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c > +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c > @@ -115,6 +115,7 @@ static struct dev_pm_domain pm_domain = { > static void mock_gt_probe(struct drm_i915_private *i915) > { > i915->gt[0] = &i915->gt0; > + i915->gt[0]->name = "Mock GT"; > } > > struct drm_i915_private *mock_gem_device(void) LGTM. Reviewed-by: Aravind Iddamsetty Aravind.
Re: [PATCH v3 11/14] drm/i915/mtl: Add gsi_offset when emitting aux table invalidation
On 07-09-2022 05:19, Matt Roper wrote: > The aux table invalidation registers are a bit unique --- they're > engine-centric registers that reside in the GSI register space rather > than within the engines' regular MMIO ranges. That means that when > issuing invalidation on engines in the standalone media GT, the GSI > offset must be added to the regular MMIO offset for the invalidation > registers. > > Cc: Aravind Iddamsetty > Signed-off-by: Matt Roper > --- > drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 15 ++- > drivers/gpu/drm/i915/gt/gen8_engine_cs.h | 3 ++- > drivers/gpu/drm/i915/gt/intel_lrc.c | 9 ++--- > 3 files changed, 18 insertions(+), 9 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c > b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c > index 98645797962f..e49fa6fa6aee 100644 > --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c > +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c > @@ -165,10 +165,12 @@ static u32 preparser_disable(bool state) > return MI_ARB_CHECK | 1 << 8 | state; > } > > -u32 *gen12_emit_aux_table_inv(u32 *cs, const i915_reg_t inv_reg) > +u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t > inv_reg) > { > + u32 gsi_offset = gt->uncore->gsi_offset; > + > *cs++ = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN; > - *cs++ = i915_mmio_reg_offset(inv_reg); > + *cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset; > *cs++ = AUX_INV; > *cs++ = MI_NOOP; > > @@ -254,7 +256,8 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 > mode) > > if (!HAS_FLAT_CCS(rq->engine->i915)) { > /* hsdes: 1809175790 */ > - cs = gen12_emit_aux_table_inv(cs, GEN12_GFX_CCS_AUX_NV); > + cs = gen12_emit_aux_table_inv(rq->engine->gt, > + cs, GEN12_GFX_CCS_AUX_NV); > } > > *cs++ = preparser_disable(false); > @@ -313,9 +316,11 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 > mode) > > if (aux_inv) { /* hsdes: 1809175790 */ > if (rq->engine->class == VIDEO_DECODE_CLASS) > - cs = gen12_emit_aux_table_inv(cs, GEN12_VD0_AUX_NV); > + cs = gen12_emit_aux_table_inv(rq->engine->gt, > + cs, GEN12_VD0_AUX_NV); > else > - cs = gen12_emit_aux_table_inv(cs, GEN12_VE0_AUX_NV); > + cs = gen12_emit_aux_table_inv(rq->engine->gt, > + cs, GEN12_VE0_AUX_NV); > } > > if (mode & EMIT_INVALIDATE) > diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h > b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h > index 32e3d2b831bb..e4d24c811dd6 100644 > --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h > +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h > @@ -13,6 +13,7 @@ > #include "intel_gt_regs.h" > #include "intel_gpu_commands.h" > > +struct intel_gt; > struct i915_request; > > int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode); > @@ -45,7 +46,7 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, > u32 *cs); > u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs); > u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs); > > -u32 *gen12_emit_aux_table_inv(u32 *cs, const i915_reg_t inv_reg); > +u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t > inv_reg); > > static inline u32 * > __gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset) > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c > b/drivers/gpu/drm/i915/gt/intel_lrc.c > index 070cec4ff8a4..08214683e130 100644 > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > @@ -1278,7 +1278,8 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context > *ce, u32 *cs) > > /* hsdes: 1809175790 */ > if (!HAS_FLAT_CCS(ce->engine->i915)) > - cs = gen12_emit_aux_table_inv(cs, GEN12_GFX_CCS_AUX_NV); > + cs = gen12_emit_aux_table_inv(ce->engine->gt, > + cs, GEN12_GFX_CCS_AUX_NV); > > /* Wa_16014892111 */ > if (IS_DG2(ce->engine->i915)) > @@ -1304,9 +1305,11 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context > *ce, u32 *cs) > /* hsdes: 1809175790 */ > if (!HAS_FLAT_CCS(ce->engine->i915)) { > if (ce->engine->class == VIDEO_DECODE_CLASS) > - cs = gen12_emit_aux_table_inv(cs, GEN12_VD0_AUX_NV); > + cs = gen12_emit_aux_table_inv(ce->engine->gt, > + cs, GEN12_VD0_AUX_NV); > else if (ce->engine->class == VIDEO_ENHANCEMENT_CLASS) > - cs = gen12_emit_aux_table_inv(cs, GEN12_VE0_AUX_NV); > + cs = gen12_emit
Re: [Intel-gfx] [PATCH v2 09/12] drm/i915/uncore: Add GSI offset to uncore
On 03-09-2022 05:02, Matt Roper wrote: > GT non-engine registers (referred to as "GSI" registers by the spec) > have the same relative offsets on standalone media as they do on the > primary GT, just with an additional "GSI offset" added to their MMIO > address. If we store this GSI offset in the standalone media's > intel_uncore structure, it can be automatically applied to all GSI reg > reads/writes that happen on that GT, allowing us to re-use our existing > GT code with minimal changes. > > Forcewake and shadowed register tables for the media GT (which will be > added in a future patch) are listed as final addresses that already > include the GSI offset, so we also need to add the GSI offset before > doing lookups of registers in one of those tables. > > Cc: Daniele Ceraolo Spurio > Signed-off-by: Matt Roper > --- > drivers/gpu/drm/i915/gt/intel_gt.c | 17 ++--- > drivers/gpu/drm/i915/intel_device_info.h | 4 +++- > drivers/gpu/drm/i915/intel_uncore.c | 10 -- > drivers/gpu/drm/i915/intel_uncore.h | 22 -- > 4 files changed, 45 insertions(+), 8 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c > b/drivers/gpu/drm/i915/gt/intel_gt.c > index fbb5e32979a4..a6ed11b933eb 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gt.c > +++ b/drivers/gpu/drm/i915/gt/intel_gt.c > @@ -776,10 +776,20 @@ void intel_gt_driver_late_release_all(struct > drm_i915_private *i915) > } > } > > -static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr) > +/* > + * Note: the gsi_offset parameter here isn't used, but we want to keep the > + * function signature equivalent to gtdef->setup() so that it can be plugged > + * in when we enabled remote tiles in the future. > + */ > +static int intel_gt_tile_setup(struct intel_gt *gt, > +phys_addr_t phys_addr, > +u32 gsi_offset) > { > int ret; > > + /* GSI offset is only applicable for media GTs */ > + drm_WARN_ON(>->i915->drm, gsi_offset); > + > if (!gt_is_root(gt)) { > struct intel_uncore *uncore; > > @@ -832,7 +842,7 @@ int intel_gt_probe_all(struct drm_i915_private *i915) > gt->info.engine_mask = RUNTIME_INFO(i915)->platform_engine_mask; > > drm_dbg(&i915->drm, "Setting up %s\n", gt->name); > - ret = intel_gt_tile_setup(gt, phys_addr); > + ret = intel_gt_tile_setup(gt, phys_addr, 0); > if (ret) > return ret; > > @@ -865,7 +875,8 @@ int intel_gt_probe_all(struct drm_i915_private *i915) > goto err; > } > > - ret = gtdef->setup(gt, phys_addr + gtdef->mapping_base); > + ret = gtdef->setup(gt, phys_addr + gtdef->mapping_base, > +gtdef->gsi_offset); > if (ret) > goto err; > > diff --git a/drivers/gpu/drm/i915/intel_device_info.h > b/drivers/gpu/drm/i915/intel_device_info.h > index b408ce384cd7..85e0ef0e91b1 100644 > --- a/drivers/gpu/drm/i915/intel_device_info.h > +++ b/drivers/gpu/drm/i915/intel_device_info.h > @@ -254,8 +254,10 @@ struct intel_gt_definition { > enum intel_gt_type type; > char *name; > int (*setup)(struct intel_gt *gt, > - phys_addr_t phys_addr); > + phys_addr_t phys_addr, > + u32 gsi_offset); > u32 mapping_base; > + u32 gsi_offset; > intel_engine_mask_t engine_mask; > }; > > diff --git a/drivers/gpu/drm/i915/intel_uncore.c > b/drivers/gpu/drm/i915/intel_uncore.c > index 33bdcbc77ab2..ecb02421502d 100644 > --- a/drivers/gpu/drm/i915/intel_uncore.c > +++ b/drivers/gpu/drm/i915/intel_uncore.c > @@ -927,6 +927,9 @@ find_fw_domain(struct intel_uncore *uncore, u32 offset) > { > const struct intel_forcewake_range *entry; > > + if (IS_GSI_REG(offset)) > + offset += uncore->gsi_offset; > + > entry = BSEARCH(offset, > uncore->fw_domains_table, > uncore->fw_domains_table_entries, > @@ -1142,6 +1145,9 @@ static bool is_shadowed(struct intel_uncore *uncore, > u32 offset) > if (drm_WARN_ON(&uncore->i915->drm, !uncore->shadowed_reg_table)) > return false; > > + if (IS_GSI_REG(offset)) > + offset += uncore->gsi_offset; > + > return BSEARCH(offset, > uncore->shadowed_reg_table, > uncore->shadowed_reg_table_entries, > @@ -1994,8 +2000,8 @@ static int __fw_domain_init(struct intel_uncore *uncore, > > d->uncore = uncore; > d->wake_count = 0; > - d->reg_set = uncore->regs + i915_mmio_reg_offset(reg_set); > - d->reg_ack = uncore->regs + i915_mmio_reg_offset(reg_ack); > + d->reg_set = uncore->regs + i915_mmio_reg_offset(reg_set) + > uncore->gsi_offset; > + d->reg_ack = uncore->regs + i915_mmio_reg_offset(reg_ack) + > uncore->gsi_offset; > >
Re: [PATCH v2 10/12] drm/i915/xelpmp: Expose media as another GT
On 03-09-2022 05:02, Matt Roper wrote: > Xe_LPM+ platforms have "standalone media." I.e., the media unit is > designed as an additional GT with its own engine list, GuC, forcewake, > etc. Let's allow platforms to include media GTs in their device info. > > v2: > - Simplify GSI register handling and split it out to a separate patch >for ease of review. (Daniele) > > Cc: Aravind Iddamsetty > Cc: Daniele Ceraolo Spurio > Signed-off-by: Matt Roper > --- > drivers/gpu/drm/i915/Makefile| 1 + > drivers/gpu/drm/i915/gt/intel_gt_regs.h | 8 + > drivers/gpu/drm/i915/gt/intel_sa_media.c | 39 > drivers/gpu/drm/i915/gt/intel_sa_media.h | 15 + > drivers/gpu/drm/i915/i915_pci.c | 15 + > drivers/gpu/drm/i915/intel_device_info.h | 1 + > drivers/gpu/drm/i915/intel_uncore.c | 4 +++ > 7 files changed, 83 insertions(+) > create mode 100644 drivers/gpu/drm/i915/gt/intel_sa_media.c > create mode 100644 drivers/gpu/drm/i915/gt/intel_sa_media.h > > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile > index 522ef9b4aff3..e83e4cd46968 100644 > --- a/drivers/gpu/drm/i915/Makefile > +++ b/drivers/gpu/drm/i915/Makefile > @@ -123,6 +123,7 @@ gt-y += \ > gt/intel_ring.o \ > gt/intel_ring_submission.o \ > gt/intel_rps.o \ > + gt/intel_sa_media.o \ > gt/intel_sseu.o \ > gt/intel_sseu_debugfs.o \ > gt/intel_timeline.o \ > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h > b/drivers/gpu/drm/i915/gt/intel_gt_regs.h > index d414785003cc..fb2c56777480 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h > +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h > @@ -1578,4 +1578,12 @@ > > #define GEN12_SFC_DONE(n)_MMIO(0x1cc000 + (n) * 0x1000) > > +/* > + * Standalone Media's non-engine GT registers are located at their regular GT > + * offsets plus 0x38. This extra offset is stored inside the > intel_uncore > + * structure so that the existing code can be used for both GTs without > + * modification. > + */ > +#define MTL_MEDIA_GSI_BASE 0x38 > + > #endif /* __INTEL_GT_REGS__ */ > diff --git a/drivers/gpu/drm/i915/gt/intel_sa_media.c > b/drivers/gpu/drm/i915/gt/intel_sa_media.c > new file mode 100644 > index ..8c5c519457cc > --- /dev/null > +++ b/drivers/gpu/drm/i915/gt/intel_sa_media.c > @@ -0,0 +1,39 @@ > +// SPDX-License-Identifier: MIT > +/* > + * Copyright © 2021 Intel Corporation > + */ > + > +#include > + > +#include "i915_drv.h" > +#include "gt/intel_gt.h" > +#include "gt/intel_sa_media.h" > + > +int intel_sa_mediagt_setup(struct intel_gt *gt, phys_addr_t phys_addr, > +u32 gsi_offset) > +{ > + struct drm_i915_private *i915 = gt->i915; > + struct intel_uncore *uncore; > + > + uncore = drmm_kzalloc(&i915->drm, sizeof(*uncore), GFP_KERNEL); > + if (!uncore) > + return -ENOMEM; > + > + uncore->gsi_offset = gsi_offset; > + > + intel_gt_common_init_early(gt); > + intel_uncore_init_early(uncore, gt); > + > + /* > + * Standalone media shares the general MMIO space with the primary > + * GT. We'll re-use the primary GT's mapping. > + */ > + uncore->regs = i915->uncore.regs; > + if (drm_WARN_ON(&i915->drm, uncore->regs == NULL)) > + return -EIO; > + > + gt->uncore = uncore; > + gt->phys_addr = phys_addr; > + > + return 0; > +} > diff --git a/drivers/gpu/drm/i915/gt/intel_sa_media.h > b/drivers/gpu/drm/i915/gt/intel_sa_media.h > new file mode 100644 > index ..3afb310de932 > --- /dev/null > +++ b/drivers/gpu/drm/i915/gt/intel_sa_media.h > @@ -0,0 +1,15 @@ > +/* SPDX-License-Identifier: MIT */ > +/* > + * Copyright © 2021 Intel Corporation > + */ > +#ifndef __INTEL_SA_MEDIA__ > +#define __INTEL_SA_MEDIA__ > + > +#include > + > +struct intel_gt; > + > +int intel_sa_mediagt_setup(struct intel_gt *gt, phys_addr_t phys_addr, > +u32 gsi_offset); > + > +#endif /* __INTEL_SA_MEDIA_H__ */ > diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c > index 26b25d9434d6..18d3722331e4 100644 > --- a/drivers/gpu/drm/i915/i915_pci.c > +++ b/drivers/gpu/drm/i915/i915_pci.c > @@ -26,6 +26,9 @@ > #include > #include > > +#include "gt/intel_gt_regs.h" > +#include "gt/intel_sa_media.h" > + > #include "i915_driver.h" > #include "i915_drv.h" > #include "i915_pci.h" > @@ -1115,6 +1118,17 @@ static const struct intel_device_info pvc_info = { > .display.has_cdclk_crawl = 1, \ > .__runtime.fbc_mask = BIT(INTEL_FBC_A) | BIT(INTEL_FBC_B) > > +static const struct intel_gt_definition xelpmp_extra_gt[] = { > + { > + .type = GT_MEDIA, > + .name = "Standalone Media GT", > + .setup = intel_sa_mediagt_setup, > + .gsi_offset = MTL_MEDIA_GSI_BASE, > + .engine_mask = BIT(VECS0) | BIT(VCS0) | BIT(VCS2)