[RFC 05/13] drm/cgroup: Track clients per owning process

2022-11-11 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

To enable propagation of settings from the cgroup drm controller to drm we
need to start tracking which processes own which drm clients.

Implement that by tracking the struct pid pointer of the owning process in
a new XArray, pointing to a structure containing a list of associated
struct drm_file pointers.

Clients are added and removed under the filelist mutex and RCU list
operations are used below it to allow for lockless lookup.

Signed-off-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/Makefile |   1 +
 drivers/gpu/drm/drm_cgroup.c | 123 +++
 drivers/gpu/drm/drm_file.c   |  21 --
 include/drm/drm_clients.h|  44 +
 include/drm/drm_file.h   |   4 ++
 5 files changed, 189 insertions(+), 4 deletions(-)
 create mode 100644 drivers/gpu/drm/drm_cgroup.c
 create mode 100644 include/drm/drm_clients.h

diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index c44a54cadb61..4495dda2a720 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -59,6 +59,7 @@ drm-$(CONFIG_DRM_LEGACY) += \
drm_scatter.o \
drm_vm.o
 drm-$(CONFIG_DRM_LIB_RANDOM) += lib/drm_random.o
+drm-$(CONFIG_CGROUP_DRM) += drm_cgroup.o
 drm-$(CONFIG_COMPAT) += drm_ioc32.o
 drm-$(CONFIG_DRM_PANEL) += drm_panel.o
 drm-$(CONFIG_OF) += drm_of.o
diff --git a/drivers/gpu/drm/drm_cgroup.c b/drivers/gpu/drm/drm_cgroup.c
new file mode 100644
index ..56aa8303974a
--- /dev/null
+++ b/drivers/gpu/drm/drm_cgroup.c
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include 
+#include 
+
+static DEFINE_XARRAY(drm_pid_clients);
+
+static void
+__del_clients(struct drm_pid_clients *clients,
+ struct drm_file *file_priv,
+ unsigned long pid)
+{
+   list_del_rcu(_priv->clink);
+   if (atomic_dec_and_test(>num)) {
+   xa_erase(_pid_clients, pid);
+   kfree_rcu(clients, rcu);
+   }
+}
+
+void drm_clients_close(struct drm_file *file_priv)
+{
+   struct drm_device *dev = file_priv->minor->dev;
+   struct drm_pid_clients *clients;
+   struct pid *pid;
+
+   lockdep_assert_held(>filelist_mutex);
+
+   pid = rcu_access_pointer(file_priv->pid);
+   clients = xa_load(_pid_clients, (unsigned long)pid);
+   if (WARN_ON_ONCE(!clients))
+   return;
+
+   __del_clients(clients, file_priv, (unsigned long)pid);
+}
+
+static struct drm_pid_clients *__alloc_clients(void)
+{
+   struct drm_pid_clients *clients;
+
+   clients = kmalloc(sizeof(*clients), GFP_KERNEL);
+   if (clients) {
+   atomic_set(>num, 0);
+   INIT_LIST_HEAD(>file_list);
+   init_rcu_head(>rcu);
+   }
+
+   return clients;
+}
+
+int drm_clients_open(struct drm_file *file_priv)
+{
+   struct drm_device *dev = file_priv->minor->dev;
+   struct drm_pid_clients *clients;
+   bool new_client = false;
+   unsigned long pid;
+
+   lockdep_assert_held(>filelist_mutex);
+
+   pid = (unsigned long)rcu_access_pointer(file_priv->pid);
+   clients = xa_load(_pid_clients, pid);
+   if (!clients) {
+   clients = __alloc_clients();
+   if (!clients)
+   return -ENOMEM;
+   new_client = true;
+   }
+   atomic_inc(>num);
+   list_add_tail_rcu(_priv->clink, >file_list);
+   if (new_client) {
+   void *xret;
+
+   xret = xa_store(_pid_clients, pid, clients, GFP_KERNEL);
+   if (xa_err(xret)) {
+   list_del_init(_priv->clink);
+   kfree(clients);
+   return PTR_ERR(clients);
+   }
+   }
+
+   return 0;
+}
+
+void
+drm_clients_migrate(struct drm_file *file_priv,
+   unsigned long old,
+   unsigned long new)
+{
+   struct drm_device *dev = file_priv->minor->dev;
+   struct drm_pid_clients *existing_clients;
+   struct drm_pid_clients *clients;
+
+   lockdep_assert_held(>filelist_mutex);
+
+   existing_clients = xa_load(_pid_clients, new);
+   clients = xa_load(_pid_clients, old);
+
+   if (WARN_ON_ONCE(!clients))
+   return;
+   else if (WARN_ON_ONCE(clients == existing_clients))
+   return;
+
+   __del_clients(clients, file_priv, old);
+
+   if (!existing_clients) {
+   void *xret;
+
+   clients = __alloc_clients();
+   if (WARN_ON(!clients))
+   return;
+
+   xret = xa_store(_pid_clients, new, clients, GFP_KERNEL);
+   if (WARN_ON(xa_err(xret)))
+   return;
+   } else {
+   clients = existing_clients;
+   }
+
+   atomic_inc(>num);
+   list_add_tail_rcu(_priv->clink, >file_list);
+}
diff 

[RFC 01/13] drm: Replace DRM_DEBUG with drm_dbg_core in file and ioctl handling

2022-11-11 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

Replace the deprecated macro with the per-device one.

Signed-off-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/drm_file.c  | 21 +++--
 drivers/gpu/drm/drm_ioc32.c | 13 +++--
 drivers/gpu/drm/drm_ioctl.c | 25 +
 3 files changed, 31 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index a8b4d918e9a3..ba5041137b29 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -245,10 +245,10 @@ void drm_file_free(struct drm_file *file)
 
dev = file->minor->dev;
 
-   DRM_DEBUG("comm=\"%s\", pid=%d, dev=0x%lx, open_count=%d\n",
- current->comm, task_pid_nr(current),
- (long)old_encode_dev(file->minor->kdev->devt),
- atomic_read(>open_count));
+   drm_dbg_core(dev, "comm=\"%s\", pid=%d, dev=0x%lx, open_count=%d\n",
+current->comm, task_pid_nr(current),
+(long)old_encode_dev(file->minor->kdev->devt),
+atomic_read(>open_count));
 
 #ifdef CONFIG_DRM_LEGACY
if (drm_core_check_feature(dev, DRIVER_LEGACY) &&
@@ -340,8 +340,8 @@ static int drm_open_helper(struct file *filp, struct 
drm_minor *minor)
dev->switch_power_state != DRM_SWITCH_POWER_DYNAMIC_OFF)
return -EINVAL;
 
-   DRM_DEBUG("comm=\"%s\", pid=%d, minor=%d\n", current->comm,
- task_pid_nr(current), minor->index);
+   drm_dbg_core(dev, "comm=\"%s\", pid=%d, minor=%d\n",
+current->comm, task_pid_nr(current), minor->index);
 
priv = drm_file_alloc(minor);
if (IS_ERR(priv))
@@ -450,11 +450,12 @@ EXPORT_SYMBOL(drm_open);
 
 void drm_lastclose(struct drm_device * dev)
 {
-   DRM_DEBUG("\n");
+   drm_dbg_core(dev, "\n");
 
-   if (dev->driver->lastclose)
+   if (dev->driver->lastclose) {
dev->driver->lastclose(dev);
-   DRM_DEBUG("driver lastclose completed\n");
+   drm_dbg_core(dev, "driver lastclose completed\n");
+   }
 
if (drm_core_check_feature(dev, DRIVER_LEGACY))
drm_legacy_dev_reinit(dev);
@@ -485,7 +486,7 @@ int drm_release(struct inode *inode, struct file *filp)
if (drm_dev_needs_global_mutex(dev))
mutex_lock(_global_mutex);
 
-   DRM_DEBUG("open_count = %d\n", atomic_read(>open_count));
+   drm_dbg_core(dev, "open_count = %d\n", atomic_read(>open_count));
 
drm_close_helper(filp);
 
diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c
index 5d82891c3222..49a743f62b4a 100644
--- a/drivers/gpu/drm/drm_ioc32.c
+++ b/drivers/gpu/drm/drm_ioc32.c
@@ -972,6 +972,7 @@ long drm_compat_ioctl(struct file *filp, unsigned int cmd, 
unsigned long arg)
 {
unsigned int nr = DRM_IOCTL_NR(cmd);
struct drm_file *file_priv = filp->private_data;
+   struct drm_device *dev = file_priv->minor->dev;
drm_ioctl_compat_t *fn;
int ret;
 
@@ -986,14 +987,14 @@ long drm_compat_ioctl(struct file *filp, unsigned int 
cmd, unsigned long arg)
if (!fn)
return drm_ioctl(filp, cmd, arg);
 
-   DRM_DEBUG("comm=\"%s\", pid=%d, dev=0x%lx, auth=%d, %s\n",
- current->comm, task_pid_nr(current),
- (long)old_encode_dev(file_priv->minor->kdev->devt),
- file_priv->authenticated,
- drm_compat_ioctls[nr].name);
+   drm_dbg_core(dev, "comm=\"%s\", pid=%d, dev=0x%lx, auth=%d, %s\n",
+current->comm, task_pid_nr(current),
+(long)old_encode_dev(file_priv->minor->kdev->devt),
+file_priv->authenticated,
+drm_compat_ioctls[nr].name);
ret = (*fn)(filp, cmd, arg);
if (ret)
-   DRM_DEBUG("ret = %d\n", ret);
+   drm_dbg_core(dev, "ret = %d\n", ret);
return ret;
 }
 EXPORT_SYMBOL(drm_compat_ioctl);
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index ca2a6e6101dc..7c9d66ee917d 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -440,7 +440,7 @@ static int drm_setversion(struct drm_device *dev, void 
*data, struct drm_file *f
 int drm_noop(struct drm_device *dev, void *data,
 struct drm_file *file_priv)
 {
-   DRM_DEBUG("\n");
+   drm_dbg_core(dev, "\n");
return 0;
 }
 EXPORT_SYMBOL(drm_noop);
@@ -856,16 +856,16 @@ long drm_ioctl(struct file *filp,
out_size = 0;
ksize = max(max(in_size, out_size), drv_size);
 
-   DRM_DEBUG("comm

[RFC 09/13] drm/cgroup: Only track clients which are providing drm_cgroup_ops

2022-11-11 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

To reduce the number of tracking going on, especially with drivers which
will not support any sort of control from the drm cgroup controller side,
lets express the funcionality as opt-in and use the presence of
drm_cgroup_ops as activation criteria.

Signed-off-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/drm_cgroup.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/drm_cgroup.c b/drivers/gpu/drm/drm_cgroup.c
index e3854741c584..d3050c744e3e 100644
--- a/drivers/gpu/drm/drm_cgroup.c
+++ b/drivers/gpu/drm/drm_cgroup.c
@@ -35,6 +35,9 @@ void drm_clients_close(struct drm_file *file_priv)
 
lockdep_assert_held(>filelist_mutex);
 
+   if (!dev->driver->cg_ops)
+   return;
+
pid = rcu_access_pointer(file_priv->pid);
clients = xa_load(_pid_clients, (unsigned long)pid);
if (WARN_ON_ONCE(!clients))
@@ -66,6 +69,9 @@ int drm_clients_open(struct drm_file *file_priv)
 
lockdep_assert_held(>filelist_mutex);
 
+   if (!dev->driver->cg_ops)
+   return 0;
+
pid = (unsigned long)rcu_access_pointer(file_priv->pid);
clients = xa_load(_pid_clients, pid);
if (!clients) {
@@ -101,6 +107,9 @@ drm_clients_migrate(struct drm_file *file_priv,
 
lockdep_assert_held(>filelist_mutex);
 
+   if (!dev->driver->cg_ops)
+   return;
+
existing_clients = xa_load(_pid_clients, new);
clients = xa_load(_pid_clients, old);
 
-- 
2.34.1



[RFC 08/13] drm/cgroup: Add over budget signalling callback

2022-11-11 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

Add a new callback via which the drm cgroup controller is notifying the
drm core that a certain process is above its allotted GPU time.

Signed-off-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/drm_cgroup.c | 21 +
 include/drm/drm_clients.h|  1 +
 include/drm/drm_drv.h|  8 
 3 files changed, 30 insertions(+)

diff --git a/drivers/gpu/drm/drm_cgroup.c b/drivers/gpu/drm/drm_cgroup.c
index 06810b4c3ff1..e3854741c584 100644
--- a/drivers/gpu/drm/drm_cgroup.c
+++ b/drivers/gpu/drm/drm_cgroup.c
@@ -152,3 +152,24 @@ u64 drm_pid_get_active_time_us(struct pid *pid)
return total;
 }
 EXPORT_SYMBOL_GPL(drm_pid_get_active_time_us);
+
+void drm_pid_signal_budget(struct pid *pid, u64 usage, u64 budget)
+{
+   struct drm_pid_clients *clients;
+
+   rcu_read_lock();
+   clients = xa_load(_pid_clients, (unsigned long)pid);
+   if (clients) {
+   struct drm_file *fpriv;
+
+   list_for_each_entry_rcu(fpriv, >file_list, clink) {
+   const struct drm_cgroup_ops *cg_ops =
+   fpriv->minor->dev->driver->cg_ops;
+
+   if (cg_ops && cg_ops->signal_budget)
+   cg_ops->signal_budget(fpriv, usage, budget);
+   }
+   }
+   rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(drm_pid_signal_budget);
diff --git a/include/drm/drm_clients.h b/include/drm/drm_clients.h
index b9b8009c28a6..356ee92792a6 100644
--- a/include/drm/drm_clients.h
+++ b/include/drm/drm_clients.h
@@ -42,5 +42,6 @@ drm_clients_migrate(struct drm_file *file_priv,
 #endif
 
 u64 drm_pid_get_active_time_us(struct pid *pid);
+void drm_pid_signal_budget(struct pid *pid, u64 usage, u64 budget);
 
 #endif
diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
index c09fe9bd517f..c30afe97f922 100644
--- a/include/drm/drm_drv.h
+++ b/include/drm/drm_drv.h
@@ -164,6 +164,14 @@ struct drm_cgroup_ops {
 * Used by the DRM core when queried by the DRM cgroup controller.
 */
u64 (*active_time_us) (struct drm_file *);
+
+   /**
+* @signal_budget:
+*
+* Optional callback used by the DRM core to forward over/under GPU time
+* messages sent by the DRM cgroup controller.
+*/
+   int (*signal_budget) (struct drm_file *, u64 used, u64 budget);
 };
 
 /**
-- 
2.34.1



[RFC 02/13] drm: Track clients by tgid and not tid

2022-11-11 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

Thread group id (aka pid from userspace point of view) is a more
interesting thing to show as an owner of a DRM fd, so track and show that
instead of the thread id.

In the next patch we will make the owner updated post file descriptor
handover, which will also be tgid based to avoid ping-pong when multiple
threads access the fd.

Signed-off-by: Tvrtko Ursulin 
Cc: Zack Rusin 
Cc: linux-graphics-maintai...@vmware.com
Cc: Alex Deucher 
Cc: "Christian König" 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 2 +-
 drivers/gpu/drm/drm_debugfs.c   | 4 ++--
 drivers/gpu/drm/drm_file.c  | 2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_gem.c | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 8ef31d687ef3..4b940f8bd72b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -967,7 +967,7 @@ static int amdgpu_debugfs_gem_info_show(struct seq_file *m, 
void *unused)
 * Therefore, we need to protect this ->comm access using RCU.
 */
rcu_read_lock();
-   task = pid_task(file->pid, PIDTYPE_PID);
+   task = pid_task(file->pid, PIDTYPE_TGID);
seq_printf(m, "pid %8d command %s:\n", pid_nr(file->pid),
   task ? task->comm : "");
rcu_read_unlock();
diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
index ee445f4605ba..42f657772025 100644
--- a/drivers/gpu/drm/drm_debugfs.c
+++ b/drivers/gpu/drm/drm_debugfs.c
@@ -80,7 +80,7 @@ static int drm_clients_info(struct seq_file *m, void *data)
seq_printf(m,
   "%20s %5s %3s master a %5s %10s\n",
   "command",
-  "pid",
+  "tgid",
   "dev",
   "uid",
   "magic");
@@ -94,7 +94,7 @@ static int drm_clients_info(struct seq_file *m, void *data)
bool is_current_master = drm_is_current_master(priv);
 
rcu_read_lock(); /* locks pid_task()->comm */
-   task = pid_task(priv->pid, PIDTYPE_PID);
+   task = pid_task(priv->pid, PIDTYPE_TGID);
uid = task ? __task_cred(task)->euid : GLOBAL_ROOT_UID;
seq_printf(m, "%20s %5d %3d   %c%c %5d %10u\n",
   task ? task->comm : "",
diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index ba5041137b29..5cde5014cea1 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -156,7 +156,7 @@ struct drm_file *drm_file_alloc(struct drm_minor *minor)
if (!file)
return ERR_PTR(-ENOMEM);
 
-   file->pid = get_pid(task_pid(current));
+   file->pid = get_pid(task_tgid(current));
file->minor = minor;
 
/* for compatibility root is always authenticated */
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c
index ce609e7d758f..f2985337aa53 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c
@@ -260,7 +260,7 @@ static int vmw_debugfs_gem_info_show(struct seq_file *m, 
void *unused)
 * Therefore, we need to protect this ->comm access using RCU.
 */
rcu_read_lock();
-   task = pid_task(file->pid, PIDTYPE_PID);
+   task = pid_task(file->pid, PIDTYPE_TGID);
seq_printf(m, "pid %8d command %s:\n", pid_nr(file->pid),
   task ? task->comm : "");
rcu_read_unlock();
-- 
2.34.1



[RFC 03/13] drm: Update file owner during use

2022-11-11 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

With the typical model where the display server opends the file descriptor
and then hands it over to the client we were showing stale data in
debugfs.

Fix it by updating the drm_file->pid on ioctl access from a different
process.

The field is also made RCU protected to allow for lockless readers. Update
side is protected with dev->filelist_mutex.

Signed-off-by: Tvrtko Ursulin 
Cc: "Christian König" 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c |  6 +++--
 drivers/gpu/drm/drm_auth.c  |  3 ++-
 drivers/gpu/drm/drm_debugfs.c   | 10 +
 drivers/gpu/drm/drm_file.c  | 30 +
 drivers/gpu/drm/drm_ioctl.c |  3 +++
 drivers/gpu/drm/nouveau/nouveau_drm.c   |  5 -
 drivers/gpu/drm/vmwgfx/vmwgfx_gem.c |  6 +++--
 include/drm/drm_file.h  | 13 +--
 8 files changed, 64 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 4b940f8bd72b..d732ffb1c0d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -958,6 +958,7 @@ static int amdgpu_debugfs_gem_info_show(struct seq_file *m, 
void *unused)
list_for_each_entry(file, >filelist, lhead) {
struct task_struct *task;
struct drm_gem_object *gobj;
+   struct pid *pid;
int id;
 
/*
@@ -967,8 +968,9 @@ static int amdgpu_debugfs_gem_info_show(struct seq_file *m, 
void *unused)
 * Therefore, we need to protect this ->comm access using RCU.
 */
rcu_read_lock();
-   task = pid_task(file->pid, PIDTYPE_TGID);
-   seq_printf(m, "pid %8d command %s:\n", pid_nr(file->pid),
+   pid = rcu_dereference(file->pid);
+   task = pid_task(pid, PIDTYPE_TGID);
+   seq_printf(m, "pid %8d command %s:\n", pid_nr(pid),
   task ? task->comm : "");
rcu_read_unlock();
 
diff --git a/drivers/gpu/drm/drm_auth.c b/drivers/gpu/drm/drm_auth.c
index cf92a9ae8034..2ed2585ded37 100644
--- a/drivers/gpu/drm/drm_auth.c
+++ b/drivers/gpu/drm/drm_auth.c
@@ -235,7 +235,8 @@ static int drm_new_set_master(struct drm_device *dev, 
struct drm_file *fpriv)
 static int
 drm_master_check_perm(struct drm_device *dev, struct drm_file *file_priv)
 {
-   if (file_priv->pid == task_pid(current) && file_priv->was_master)
+   if (file_priv->was_master &&
+   rcu_access_pointer(file_priv->pid) == task_pid(current))
return 0;
 
if (!capable(CAP_SYS_ADMIN))
diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
index 42f657772025..cbcd79f01d50 100644
--- a/drivers/gpu/drm/drm_debugfs.c
+++ b/drivers/gpu/drm/drm_debugfs.c
@@ -90,15 +90,17 @@ static int drm_clients_info(struct seq_file *m, void *data)
 */
mutex_lock(>filelist_mutex);
list_for_each_entry_reverse(priv, >filelist, lhead) {
-   struct task_struct *task;
bool is_current_master = drm_is_current_master(priv);
+   struct task_struct *task;
+   struct pid *pid;
 
-   rcu_read_lock(); /* locks pid_task()->comm */
-   task = pid_task(priv->pid, PIDTYPE_TGID);
+   rcu_read_lock(); /* Locks priv->pid and pid_task()->comm! */
+   pid = rcu_dereference(priv->pid);
+   task = pid_task(pid, PIDTYPE_PID);
uid = task ? __task_cred(task)->euid : GLOBAL_ROOT_UID;
seq_printf(m, "%20s %5d %3d   %c%c %5d %10u\n",
   task ? task->comm : "",
-  pid_vnr(priv->pid),
+  pid_vnr(pid),
   priv->minor->index,
   is_current_master ? 'y' : 'n',
   priv->authenticated ? 'y' : 'n',
diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index 5cde5014cea1..4f5cff5c0bea 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -502,6 +502,36 @@ int drm_release(struct inode *inode, struct file *filp)
 }
 EXPORT_SYMBOL(drm_release);
 
+void drm_file_update_pid(struct drm_file *filp)
+{
+   struct drm_device *dev;
+   struct pid *pid, *old;
+
+   /* Master nodes are not expected to be passed between processes. */
+   if (filp->was_master)
+   return;
+
+   pid = task_tgid(current);
+
+   /*
+* Quick unlocked check since the model is a single handover followed by
+* exclusive repeated use.
+*/
+   if (pid == rcu_access_pointer(filp->pid))
+   return;
+
+   dev = filp->minor-

[RFC v2 README 00/13] DRM scheduling cgroup controller

2022-11-11 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

*** vvv Read this first please vvv ***

I am re-sending this to dri-devel directly, having realized neither v1 or v2
have reached dri-devel due possible SMTP server issues. Other recipients and
lists however did get both v1 in October and v2 two days ago.

Hence this is a re-post for the benefit of dri-devel only. And I do understand
any replies would create a fork where not all lists, recipients would be up to
date with discussions. But I thought dri-devel needed to be made aware of the
proposal, while the SMTP server issues are being investigated.

Full v1, with some follow up discussion, can be found at these links:
https://lists.freedesktop.org/archives/intel-gfx/2022-October/309547.html
https://marc.info/?t=16662005993=1=4
https://marc.info/?l=linux-cgroups=166620044012970=2

v2:
https://lists.freedesktop.org/archives/intel-gfx/2022-November/311844.html
https://marc.info/?l=linux-cgroups=166800992215296=2
https://marc.info/?l=linux-kernel=166800993415302=4

*** ^^^ Read this first please ^^^ ***

This series contains a proposal for a DRM scheduling cgroup controller which
implements a weight based hierarchical GPU usage budget based controller
similar in concept to some of the existing controllers.

Motivation mostly comes from my earlier proposal where I identified that GPU
scheduling lags significantly behind what is available for CPU and IO. Whereas
back then I was proposing to somehow tie this with process nice, feedback mostly
was that people wanted cgroups. So here it is - in the world of heterogenous
computing pipelines I think it is time to do something about this gap.

Code is not finished but should survive some light experimenting with. I am
sharing it early since the topic has been controversial in the past. I hope to
demonstrate there are gains to be had in real world usage(*), today, and that
the concepts the proposal relies are well enough established and stable.

*) Specifically under ChromeOS which uses cgroups to control CPU bandwith for
   VMs based on the window focused status. It can be demonstrated how GPU
   scheduling control can easily be integrated into that setup.

There should be no conflict with this proposal and any efforts to implement
memory usage based controller. Skeleton DRM cgroup controller is deliberatly
purely a skeleton patch where any further functionality can be added with no
real conflicts. [In fact, perhaps scheduling is even easier to deal with than
memory accounting.]

Structure of the series is as follows:

  1-3) Improve client ownership tracking in DRM core.
4) Adds a skeleton DRM cgroup controller with no functionality.
 5-10) Laying down some infrastructure to enable the controller.
   11) The controller itself.
12-13) i915 support for the controller.

The proposals defines a delegation of duties between the tree parties: cgroup
controller, DRM core and individual drivers. Two way communication interfaces
are then defined to enable the delegation to work.

DRM scheduling soft limits
~~

Because of the heterogenous hardware and driver DRM capabilities, soft limits
are implemented as a loose co-operative (bi-directional) interface between the
controller and DRM core.

The controller configures the GPU time allowed per group and periodically scans
the belonging tasks to detect the over budget condition, at which point it
invokes a callback notifying the DRM core of the condition.

DRM core provides an API to query per process GPU utilization and 2nd API to
receive notification from the cgroup controller when the group enters or exits
the over budget condition.

Individual DRM drivers which implement the interface are expected to act on this
in the best-effort manner only. There are no guarantees that the soft limits
will be respected.

DRM scheduling soft limits interface files
~~

  drm.weight
Standard cgroup weight based control [1, 1] used to configure the
relative distributing of GPU time between the sibling groups.

  drm.period_us (Most probably only a debugging aid during RFC phase.)
An integer representing the period with which the controller should look
at the GPU usage by the group and potentially send the over/under budget
signal.
Value of zero (defaul) disables the soft limit checking.

This builds upon the per client GPU utilisation work which landed recently for a
few drivers. My thinking is that in principle, an intersect of drivers which
support both that and some sort of scheduling control, like  priorities, could
also in theory support this controller.

Another really interesting angle for this controller is that it mimics the same
control menthod used by the CPU scheduler. That is the proportional/weight based
GPU time budgeting. Which makes it easy to configure and does not need a new
mental model.

However, as the introduction mentions, GPUs are much more heterogenous and
therefore

Re: [Intel-gfx] [PATCH v6 00/20] drm/i915/vm_bind: Add VM_BIND functionality

2022-11-10 Thread Tvrtko Ursulin



On 10/11/2022 05:49, Niranjana Vishwanathapura wrote:

On Wed, Nov 09, 2022 at 04:16:25PM -0800, Zanoni, Paulo R wrote:

On Mon, 2022-11-07 at 00:51 -0800, Niranjana Vishwanathapura wrote:

DRM_I915_GEM_VM_BIND/UNBIND ioctls allows UMD to bind/unbind GEM
buffer objects (BOs) or sections of a BOs at specified GPU virtual
addresses on a specified address space (VM). Multiple mappings can map
to the same physical pages of an object (aliasing). These mappings (also
referred to as persistent mappings) will be persistent across multiple
GPU submissions (execbuf calls) issued by the UMD, without user having
to provide a list of all required mappings during each submission (as
required by older execbuf mode).

This patch series support VM_BIND version 1, as described by the param
I915_PARAM_VM_BIND_VERSION.

Add new execbuf3 ioctl (I915_GEM_EXECBUFFER3) which only works in
vm_bind mode. The vm_bind mode only works with this new execbuf3 ioctl.
The new execbuf3 ioctl will not have any execlist support and all the
legacy support like relocations etc., are removed.

NOTEs:
* It is based on below VM_BIND design+uapi rfc.
  Documentation/gpu/rfc/i915_vm_bind.rst


Hi

One difference for execbuf3 that I noticed that is not mentioned in the
RFC document is that we now don't have a way to signal
EXEC_OBJECT_WRITE. When looking at the Kernel code, some there are some
pieces that check for this flag:

- there's code that deals with frontbuffer rendering
- there's code that deals with fences
- there's code that prevents self-modifying batches
- another that seems related to waiting for objects

Are there any new rules regarding frontbuffer rendering when we use
execbuf3? Any other behavior changes related to the other places that
we should expect when using execbuf3?



Paulo,
Most of the EXEC_OBJECT_WRITE check in execbuf path is related to
implicit dependency tracker which execbuf3 does not support. The
frontbuffer related updated is the only exception and I don't
remember the rationale to not require this on execbuf3.

Matt, Tvrtko, Daniel, can you please comment here?


Does not ring a bell to me. Looking at the code it certainly looks like 
it would be silently failing to handle it properly.


I'll let people with more experience in this area answer, but from my 
point of view, if it is decided that it can be left unsupported, then we 
probably need a way of failing the ioctl is used against a frontbuffer, 
or something, instead of having display corruption.


Regards,

Tvrtko


[PATCH] drm/i915: Simplify internal helper function signature

2022-11-10 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

Since we are now storing the GT backpointer in the wa list we can drop the
explicit struct intel_gt * argument to wa_list_apply.

Signed-off-by: Tvrtko Ursulin 
Cc: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 07bf115029a0..4db04761d5ea 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -1717,9 +1717,9 @@ wa_verify(struct intel_gt *gt, const struct i915_wa *wa, 
u32 cur,
return true;
 }
 
-static void
-wa_list_apply(struct intel_gt *gt, const struct i915_wa_list *wal)
+static void wa_list_apply(const struct i915_wa_list *wal)
 {
+   struct intel_gt *gt = wal->gt;
struct intel_uncore *uncore = gt->uncore;
enum forcewake_domains fw;
unsigned long flags;
@@ -1755,7 +1755,7 @@ wa_list_apply(struct intel_gt *gt, const struct 
i915_wa_list *wal)
intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
intel_uncore_read_fw(uncore, wa->reg);
 
-   wa_verify(wal->gt, wa, val, wal->name, "application");
+   wa_verify(gt, wa, val, wal->name, "application");
}
}
 
@@ -1765,7 +1765,7 @@ wa_list_apply(struct intel_gt *gt, const struct 
i915_wa_list *wal)
 
 void intel_gt_apply_workarounds(struct intel_gt *gt)
 {
-   wa_list_apply(gt, >wa_list);
+   wa_list_apply(>wa_list);
 }
 
 static bool wa_list_verify(struct intel_gt *gt,
@@ -3025,7 +3025,7 @@ void intel_engine_init_workarounds(struct intel_engine_cs 
*engine)
 
 void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
 {
-   wa_list_apply(engine->gt, >wa_list);
+   wa_list_apply(>wa_list);
 }
 
 static const struct i915_range mcr_ranges_gen8[] = {
-- 
2.34.1



Re: [Intel-gfx] [PATCH v3] drm/i915: Partial abandonment of legacy DRM logging macros

2022-11-10 Thread Tvrtko Ursulin



On 10/11/2022 11:07, Andrzej Hajda wrote:

On 09.11.2022 11:46, Tvrtko Ursulin wrote:

From: Tvrtko Ursulin 

Convert some usages of legacy DRM logging macros into versions which tell
us on which device have the events occurred.

v2:
  * Don't have struct drm_device as local. (Jani, Ville)

v3:
  * Store gt, not i915, in workaround list. (John)



Neither gt neither i915 does fit into wa list IMHO.
The best solution would be provide context (i915/gt/whatever)
as a function parameter, every time it is necessary.
On the other side it should not block the patch.
More below.


I thought about the very same lines but then concluded that the only _current_ 
usage of the lists is that they belong to a gt (directly or via engine). So 
having a back pointer felt passable.


Signed-off-by: Tvrtko Ursulin 
Reviewed-by: Andrzej Hajda  # v2
Acked-by: Jani Nikula 
Cc: Jani Nikula 
Cc: John Harrison 
Cc: Ville Syrjälä 
---
  drivers/gpu/drm/i915/gem/i915_gem_context.c   |  2 +-
  .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 26 
  .../drm/i915/gt/intel_execlists_submission.c  | 13 +++---
  drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c  |  4 +-
  drivers/gpu/drm/i915/gt/intel_gt.c    |  4 +-
  drivers/gpu/drm/i915/gt/intel_gt_irq.c    |  8 ++--
  drivers/gpu/drm/i915/gt/intel_rps.c   |  6 ++-
  drivers/gpu/drm/i915/gt/intel_workarounds.c   | 42 +++
  .../gpu/drm/i915/gt/intel_workarounds_types.h |  3 ++
  .../gpu/drm/i915/gt/selftest_workarounds.c    |  4 +-
  drivers/gpu/drm/i915/i915_debugfs.c   |  4 +-
  drivers/gpu/drm/i915/i915_gem.c   |  2 +-
  drivers/gpu/drm/i915/i915_getparam.c  |  2 +-
  drivers/gpu/drm/i915/i915_irq.c   | 12 +++---
  drivers/gpu/drm/i915/i915_perf.c  | 14 ---
  drivers/gpu/drm/i915/i915_query.c | 12 +++---
  drivers/gpu/drm/i915/i915_sysfs.c |  3 +-
  drivers/gpu/drm/i915/i915_vma.c   | 16 +++
  drivers/gpu/drm/i915/intel_uncore.c   | 21 ++
  19 files changed, 117 insertions(+), 81 deletions(-)



(...)

@@ -1749,7 +1755,7 @@ wa_list_apply(struct intel_gt *gt, const struct 
i915_wa_list *wal)

  intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
  intel_uncore_read_fw(uncore, wa->reg);
-    wa_verify(wa, val, wal->name, "application");
+    wa_verify(wal->gt, wa, val, wal->name, "application");


This looks confusing at 1st sight, why wa_verify(wal->gt,...) and not 
wa_verify(gt,...). Can they differ? and similar questions as in case of 
redundant vars.


Would be always the same in current code. But point taken, it is confusing.. 
hm..

./gt/intel_workarounds.c:   wa_list_apply(gt, >wa_list);
./gt/intel_workarounds.c:   wa_list_apply(engine->gt, >wa_list);

Could drop the gt argument now that gt is available in the wa list.


The same apply to wal->engine_name, which is almost unused anyway?
Also AFAIK there is always sequence:
1. wa_init_start
2. *init_workarounds*
3. wa_init_finish - btw funny name.


Why funny? :) Because init collides with finish? Start of initialisation, 
initialisation, end of initialisation. :)


Why not 1 and 3 embed in 2? Do we need this sequence.


It's just some common code so it doesn't have to be duplicated in the callers.
 
Anyway all these comments are for wa handling, which should be addressed 
in other patch. So my r-b still holds, either with wal->i915, either 
with wal->gt.


Reviewed-by: Andrzej Hajda 


Thanks, I think I'll go with v3 and follow up with wa_list_apply cleanup, so 
that my logging changes in gt/ are in before further CI delays and people can 
freely work on the GT logging macros without conflicts.

Regards,

Tvrtko


Re: [Intel-gfx] [PATCH 0/3] add guard padding around i915_vma

2022-11-10 Thread Tvrtko Ursulin



Hi,

On 09/11/2022 18:03, Thomas Hellström wrote:

Hi, Andi,

This has been on the list before (three times I think) and at that
point it (the guard pages) was NAK'd by Daniel as yet another
complication, and a VT-d
scanout workaround was implemented and pushed using a different
approach, initially outlined by Daniel.


I can't find this discussion and NAKs on the list - do you have a link?


Patch is 2ef6efa79fecd. Those suspend/resumes should now be fast.


So the initiator to re-start this series was actually the boot time is 
failing KPIs by quite a margin. Which means we may need a way forward 
after all. Especially if the most churny patch 1 was deemed okay, then I 
don't see why the concept of guard pages should be a problem. But again, 
I couldn't find the discussion you mention to read what were the 
objections..


For 2ef6efa79fecd specifically. I only looked at it today - do you think 
that the heuristic of checking one PTE and deciding all content was 
preserved is safe? What if someone scribbled at random locations? On a 
first thought it is making me a bit uncomfortable.


Regards,

Tvrtko


I then also discussed patch 1 separately with Dave Airlie and Daniel
and since both me and Dave liked it, Daniel OK'd it, but it never made
it upstream.

Just a short heads up on the history.

/Thomas


On Wed, 2022-11-09 at 18:40 +0100, Andi Shyti wrote:

Hi,

This series adds guards around vma's but setting a pages at the
beginning and at the end that work as padding.

The first user of the vma guard are scanout objects which don't
need anymore to add scratch to all the unused ggtt's and speeding
up up considerably the boot and resume by several hundreds of
milliseconds up to over a full second in slower machines.

Andi

Chris Wilson (3):
   drm/i915: Wrap all access to i915_vma.node.start|size
   drm/i915: Introduce guard pages to i915_vma
   drm/i915: Refine VT-d scanout workaround

  drivers/gpu/drm/i915/display/intel_fbdev.c    |  2 +-
  drivers/gpu/drm/i915/gem/i915_gem_domain.c    | 13 
  .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 33 ++-
  drivers/gpu/drm/i915/gem/i915_gem_mman.c  |  2 +-
  drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 +-
  drivers/gpu/drm/i915/gem/i915_gem_tiling.c    |  4 +-
  .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
  .../i915/gem/selftests/i915_gem_client_blt.c  | 23 
  .../drm/i915/gem/selftests/i915_gem_context.c | 15 +++--
  .../drm/i915/gem/selftests/i915_gem_mman.c    |  2 +-
  .../drm/i915/gem/selftests/igt_gem_utils.c    |  7 ++-
  drivers/gpu/drm/i915/gt/gen7_renderclear.c    |  2 +-
  drivers/gpu/drm/i915/gt/intel_ggtt.c  | 39 
  drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c  |  3 +-
  drivers/gpu/drm/i915/gt/intel_renderstate.c   |  2 +-
  .../gpu/drm/i915/gt/intel_ring_submission.c   |  2 +-
  drivers/gpu/drm/i915/gt/selftest_engine_cs.c  |  8 +--
  drivers/gpu/drm/i915/gt/selftest_execlists.c  | 18 +++---
  drivers/gpu/drm/i915/gt/selftest_hangcheck.c  | 15 ++---
  drivers/gpu/drm/i915/gt/selftest_lrc.c    | 16 ++---
  .../drm/i915/gt/selftest_ring_submission.c    |  2 +-
  drivers/gpu/drm/i915/gt/selftest_rps.c    | 12 ++--
  .../gpu/drm/i915/gt/selftest_workarounds.c    |  8 +--
  drivers/gpu/drm/i915/i915_cmd_parser.c    |  4 +-
  drivers/gpu/drm/i915/i915_debugfs.c   |  2 +-
  drivers/gpu/drm/i915/i915_gem_gtt.h   |  3 +-
  drivers/gpu/drm/i915/i915_perf.c  |  2 +-
  drivers/gpu/drm/i915/i915_vma.c   | 59 +
--
  drivers/gpu/drm/i915/i915_vma.h   | 52 +++-
  drivers/gpu/drm/i915/i915_vma_resource.c  |  4 +-
  drivers/gpu/drm/i915/i915_vma_resource.h  | 17 --
  drivers/gpu/drm/i915/i915_vma_types.h |  3 +-
  drivers/gpu/drm/i915/selftests/i915_request.c | 20 +++
  drivers/gpu/drm/i915/selftests/igt_spinner.c  |  8 +--
  34 files changed, 246 insertions(+), 160 deletions(-)





Re: [Intel-gfx] [PATCH 1/2] drm/i915/gt: Add GT oriented dmesg output

2022-11-10 Thread Tvrtko Ursulin



On 09/11/2022 19:57, Michal Wajdeczko wrote:

[snip]


Is it really a problem to merge this patch now to get the process
started? And other sub-components get updated as and when people get the
time to do them? You could maybe even help rather than posting
completely conflicting patch sets that basically duplicate all the
effort for no actual benefit.


Instead of merging this patch now, oriented on GT only, I would rather
wait until we discuss and plan solution for the all sub-components.


Yes, agreed.


Once that's done (with agreement on naming and output) we can start
converting exiting messages.

My proposal would be:
  - use wrappers per component


This is passable to me but Jani has raised a concern on IRC that it 
leads to a lot of macro duplication. Which is I think a valid point, but 
which does not have a completely nice solution. Best I heard so far was 
a suggestion from Joonas to add just a single component formatter macro 
and use the existing drm_xxx helpers.



  - use lower case names


I prefer this as well. Even though usual argument is for macros to be 
upper case, I find the improved readability of lower case trumps that.



  - don't add colon


Not sure, when I look at it below it looks a bit not structured enough 
without the colon, but maybe it is just me.



#define i915_xxx(_i915, _fmt, ...) \
drm_xxx(&(_i915)->drm, _fmt, ##__VA_ARGS__)

#define gt_xxx(_gt, _fmt, ...) \
i915_xxx((_gt)->i915, "GT%u " _fmt, (_gt)->info.id, ..

#define guc_xxx(_guc, _fmt, ...) \
gt_xxx(guc_to_gt(_guc), "GuC " _fmt, ..

#define ct_xxx(_ct, _fmt, ...) \
guc_xxx(ct_to_guc(_ct), "CTB " _fmt, ..

where
xxx = { err, warn, notice, info, dbg }

and then for calls like:

i915_err(i915, "Foo failed (%pe)\n", ERR_PTR(err));
  gt_err(gt,   "Foo failed (%pe)\n", ERR_PTR(err));
 guc_err(guc,  "Foo failed (%pe)\n", ERR_PTR(err));
  ct_err(ct,   "Foo failed (%pe)\n", ERR_PTR(err));


So the macro idea would be like this:

  drm_err(I915_LOG("Foo failed (%pe)\n", i915), ERR_PTR(err));
  drm_err(GT_LOG("Foo failed (%pe)\n", gt), ERR_PTR(err));
  drm_err(GUC_LOG("Foo failed (%pe)\n", guc), ERR_PTR(err));
  drm_err(CT_LOG("Foo failed (%pe)\n", ct), ERR_PTR(err));

Each component would just need to define a single macro and not have to 
duplicate all the err, info, warn, notice, ratelimited, once, whatever 
versions. Which is a benefit but it's a quite a bit uglier to read in 
the code.


Perhaps macro could be called something other than XX_LOG to make it 
more readable, don't know.


Regards,

Tvrtko


Re: [Intel-gfx] [PATCH 1/2] drm/i915/gt: Add GT oriented dmesg output

2022-11-10 Thread Tvrtko Ursulin



On 09/11/2022 17:46, John Harrison wrote:

On 11/9/2022 03:05, Tvrtko Ursulin wrote:

On 08/11/2022 20:15, John Harrison wrote:

On 11/8/2022 01:01, Tvrtko Ursulin wrote:

On 07/11/2022 19:14, John Harrison wrote:

On 11/7/2022 08:17, Tvrtko Ursulin wrote:

On 07/11/2022 09:33, Tvrtko Ursulin wrote:

On 05/11/2022 01:03, Ceraolo Spurio, Daniele wrote:

On 11/4/2022 10:25 AM, john.c.harri...@intel.com wrote:

From: John Harrison 

When trying to analyse bug reports from CI, customers, etc. it 
can be

difficult to work out exactly what is happening on which GT in a
multi-GT system. So add GT oriented debug/error message 
wrappers. If
used instead of the drm_ equivalents, you get the same output 
but with

a GT# prefix on it.

Signed-off-by: John Harrison 


The only downside to this is that we'll print "GT0: " even on 
single-GT devices. We could introduce a gt->info.name and print 
that, so we could have it different per-platform, but IMO it's 
not worth the effort.


Reviewed-by: Daniele Ceraolo Spurio 



I think it might be worth getting an ack from one of the 
maintainers to make sure we're all aligned on transitioning to 
these new logging macro for gt code.


Idea is I think a very good one. First I would suggest 
standardising to lowercase GT in logs because:


$ grep "GT%" i915/ -r
$ grep "gt%" i915/ -r
i915/gt/intel_gt_sysfs.c: gt->i915->sysfs_gt, "gt%d", gt->info.id))
i915/gt/intel_gt_sysfs.c:    "failed to initialize 
gt%d sysfs root\n", gt->info.id);
i915/gt/intel_gt_sysfs_pm.c: "failed to 
create gt%u RC6 sysfs files (%pe)\n",
i915/gt/intel_gt_sysfs_pm.c: "failed to create gt%u RC6p sysfs 
files (%pe)\n",
i915/gt/intel_gt_sysfs_pm.c: "failed to 
create gt%u RPS sysfs files (%pe)",
i915/gt/intel_gt_sysfs_pm.c: "failed to 
create gt%u punit_req_freq_mhz sysfs (%pe)",
i915/gt/intel_gt_sysfs_pm.c: "failed to create gt%u throttle 
sysfs files (%pe)",
i915/gt/intel_gt_sysfs_pm.c: "failed to create gt%u 
media_perf_power_attrs sysfs (%pe)\n",
i915/gt/intel_gt_sysfs_pm.c: "failed to add 
gt%u rps defaults (%pe)\n",
i915/i915_driver.c: drm_err(>i915->drm, "gt%d: 
intel_pcode_init failed %d\n", id, ret);
i915/i915_hwmon.c: snprintf(ddat_gt->name, sizeof(ddat_gt->name), 
"i915_gt%u", i);




Just because there are 11 existing instances of one form doesn't 
mean that the 275 instances that are waiting to be converted should 
be done incorrectly. GT is an acronym and should be capitalised.


Okay just make it consistent then.


Besides:
grep -r "GT " i915 | grep '"'
i915/vlv_suspend.c: drm_err(>drm, "timeout 
disabling GT waking\n");
i915/vlv_suspend.c: "timeout waiting for GT 
wells to go %s\n",
i915/vlv_suspend.c: drm_dbg(>drm, "GT register access 
while GT waking disabled\n");
i915/i915_gpu_error.c:  err_printf(m, "GT awake: %s\n", 
str_yes_no(gt->awake));

i915/i915_debugfs.c:    seq_printf(m, "GT awake? %s [%d], %llums\n",
i915/selftests/i915_gem_evict.c: pr_err("Failed to idle GT (on 
%s)", engine->name);
i915/intel_uncore.c:  "GT thread status wait timed 
out\n");
i915/gt/uc/selftest_guc_multi_lrc.c: drm_err(>i915->drm, "GT 
failed to idle: %d\n", ret);
i915/gt/uc/selftest_guc.c: drm_err(>i915->drm, "GT failed to 
idle: %d\n", ret);
i915/gt/uc/selftest_guc.c: drm_err(>i915->drm, "GT failed to 
idle: %d\n", ret);
i915/gt/intel_gt_mcr.c: * Some GT registers are designed as 
"multicast" or "replicated" registers:
i915/gt/selftest_rps.c: pr_info("%s: rps counted %d 
C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency 
of %uKHz\n",
i915/gt/selftest_hangcheck.c:   pr_err("[%s] GT is 
wedged!\n", engine->name);

i915/gt/selftest_hangcheck.c:   pr_err("GT is wedged!\n");
i915/gt/intel_gt_clock_utils.c: "GT clock frequency 
changed, was %uHz, now %uHz!\n",
i915/gt/selftest_engine_pm.c:   pr_err("Unable to flush GT 
pm before test\n");

i915/gt/selftest_engine_pm.c: pr_err("GT failed to idle\n");
i915/i915_sysfs.c:   "failed to register GT 
sysfs directory\n");
i915/intel_uncore.h: * of the basic non-engine GT registers 
(referred to as "GSI" on
i915/intel_uncore.h: * newer platforms, or "GT block" on older 
platforms)?  If so, we'll




Then there is a question of naming. Are we okay with GT_XXX or, 
do we want intel_gt_, or something completely different. I don't 
have a strong opinion at the moment so I'll add some more folks 
to Cc.


[PULL] drm-intel-fixes

2022-11-10 Thread Tvrtko Ursulin
Hi Dave, Daniel,

Some more fixes for the release candidate window.

Most important are the SG table handling fix for map_dma_buf import, the
userptr probe fixup after VMA iterator conversion and then a display/mouse
stuttering fix for PSR2. Last one only relates to discrete platforms, so
still behind force probe.

Regards,

Tvrtko

drm-intel-fixes-2022-11-10:
- Fix sg_table handling in map_dma_buf (Matthew Auld)
- Send PSR update also on invalidate (Jouni Högander)
- Do not set cache_dirty for DGFX (Niranjana Vishwanathapura)
- Restore userptr probe_range behaviour (Matthew Auld)
The following changes since commit f0c4d9fc9cc9462659728d168387191387e903cc:

  Linux 6.1-rc4 (2022-11-06 15:07:11 -0800)

are available in the Git repository at:

  git://anongit.freedesktop.org/drm/drm-intel tags/drm-intel-fixes-2022-11-10

for you to fetch changes up to 178e31ce82d0308a9e5f2f15bfb4493160136729:

  drm/i915/userptr: restore probe_range behaviour (2022-11-07 13:13:09 +)


- Fix sg_table handling in map_dma_buf (Matthew Auld)
- Send PSR update also on invalidate (Jouni Högander)
- Do not set cache_dirty for DGFX (Niranjana Vishwanathapura)
- Restore userptr probe_range behaviour (Matthew Auld)


Jouni Högander (1):
  drm/i915/psr: Send update also on invalidate

Matthew Auld (2):
  drm/i915/dmabuf: fix sg_table handling in map_dma_buf
  drm/i915/userptr: restore probe_range behaviour

Niranjana Vishwanathapura (1):
  drm/i915: Do not set cache_dirty for DGFX

 drivers/gpu/drm/i915/display/intel_psr.c| 5 -
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c  | 4 ++--
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c   | 4 ++--
 drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 5 +++--
 4 files changed, 11 insertions(+), 7 deletions(-)


Re: [Intel-gfx] [PATCH] drm/i915: Don't wait forever in drop_caches

2022-11-09 Thread Tvrtko Ursulin



On 08/11/2022 19:37, John Harrison wrote:

On 11/8/2022 01:08, Tvrtko Ursulin wrote:

On 07/11/2022 19:45, John Harrison wrote:

On 11/7/2022 06:09, Tvrtko Ursulin wrote:

On 04/11/2022 17:45, John Harrison wrote:

On 11/4/2022 03:01, Tvrtko Ursulin wrote:

On 03/11/2022 19:16, John Harrison wrote:

On 11/3/2022 02:38, Tvrtko Ursulin wrote:

On 03/11/2022 09:18, Tvrtko Ursulin wrote:

On 03/11/2022 01:33, John Harrison wrote:

On 11/2/2022 07:20, Tvrtko Ursulin wrote:

On 02/11/2022 12:12, Jani Nikula wrote:

On Tue, 01 Nov 2022, john.c.harri...@intel.com wrote:

From: John Harrison 

At the end of each test, IGT does a drop caches call via 
sysfs with


sysfs?
Sorry, that was meant to say debugfs. I've also been working 
on some sysfs IGT issues and evidently got my wires crossed!




special flags set. One of the possible paths waits for idle 
with an
infinite timeout. That causes problems for debugging issues 
when CI
catches a "can't go idle" test failure. Best case, the CI 
system times
out (after 90s), attempts a bunch of state dump actions and 
then
reboots the system to recover it. Worst case, the CI system 
can't do
anything at all and then times out (after 1000s) and simply 
reboots.
Sometimes a serial port log of dmesg might be available, 
sometimes not.


So rather than making life hard for ourselves, change the 
timeout to

be 10s rather than infinite. Also, trigger the standard
wedge/reset/recover sequence so that testing can continue 
with a

working system (if possible).

Signed-off-by: John Harrison 
---
  drivers/gpu/drm/i915/i915_debugfs.c | 7 ++-
  1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c

index ae987e92251dd..9d916fbbfc27c 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -641,6 +641,9 @@ 
DEFINE_SIMPLE_ATTRIBUTE(i915_perf_noa_delay_fops,

    DROP_RESET_ACTIVE | \
    DROP_RESET_SEQNO | \
    DROP_RCU)
+
+#define DROP_IDLE_TIMEOUT    (HZ * 10)


I915_IDLE_ENGINES_TIMEOUT is defined in i915_drv.h. It's 
also only used

here.


So move here, dropping i915 prefix, next to the newly 
proposed one?

Sure, can do that.



I915_GEM_IDLE_TIMEOUT is defined in i915_gem.h. It's only 
used in

gt/intel_gt.c.


Move there and rename to GT_IDLE_TIMEOUT?

I915_GT_SUSPEND_IDLE_TIMEOUT is defined and used only in 
intel_gt_pm.c.


No action needed, maybe drop i915 prefix if wanted.

These two are totally unrelated and in code not being touched 
by this change. I would rather not conflate changing random 
other things with fixing this specific issue.



I915_IDLE_ENGINES_TIMEOUT is in ms, the rest are in jiffies.


Add _MS suffix if wanted.


My head spins.


I follow and raise that the newly proposed DROP_IDLE_TIMEOUT 
applies to DROP_ACTIVE and not only DROP_IDLE.
My original intention for the name was that is the 'drop 
caches timeout for intel_gt_wait_for_idle'. Which is quite the 
mouthful and hence abbreviated to DROP_IDLE_TIMEOUT. But yes, 
I realised later that name can be conflated with the DROP_IDLE 
flag. Will rename.





Things get refactored, code moves around, bits get left 
behind, who knows. No reason to get too worked up. :) As long 
as people are taking a wider view when touching the code 
base, and are not afraid to send cleanups, things should be 
good.
On the other hand, if every patch gets blocked in code review 
because someone points out some completely unrelated piece of 
code could be a bit better then nothing ever gets fixed. If 
you spot something that you think should be improved, isn't 
the general idea that you should post a patch yourself to 
improve it?


There's two maintainers per branch and an order of magnitude or 
two more developers so it'd be nice if cleanups would just be 
incoming on self-initiative basis. ;)


For the actual functional change at hand - it would be nice 
if code paths in question could handle SIGINT and then we 
could punt the decision on how long someone wants to wait 
purely to userspace. But it's probably hard and it's only 
debugfs so whatever.


The code paths in question will already abort on a signal 
won't they? Both intel_gt_wait_for_idle() and 
intel_guc_wait_for_pending_msg(), which is where the 
uc_wait_for_idle eventually ends up, have an 
'if(signal_pending) return -EINTR;' check. Beyond that, it 
sounds like what you are asking for is a change in the IGT 
libraries and/or CI framework to start sending signals after 
some specific timeout. That seems like a significantly more 
complex change (in terms of the number of entities affected 
and number of groups involved) and unnecessary.


If you say so, I haven't looked at them all. But if the code 
path in question already aborts on signals then I am not sure 
what is the patch fixing? I assumed you are trying to avoid the 
write stuck in D forever, which then prevents driver unload and 
everything,

Re: [Intel-gfx] [PATCH 1/2] drm/i915/gt: Add GT oriented dmesg output

2022-11-09 Thread Tvrtko Ursulin



On 08/11/2022 20:15, John Harrison wrote:

On 11/8/2022 01:01, Tvrtko Ursulin wrote:

On 07/11/2022 19:14, John Harrison wrote:

On 11/7/2022 08:17, Tvrtko Ursulin wrote:

On 07/11/2022 09:33, Tvrtko Ursulin wrote:

On 05/11/2022 01:03, Ceraolo Spurio, Daniele wrote:

On 11/4/2022 10:25 AM, john.c.harri...@intel.com wrote:

From: John Harrison 

When trying to analyse bug reports from CI, customers, etc. it 
can be

difficult to work out exactly what is happening on which GT in a
multi-GT system. So add GT oriented debug/error message wrappers. If
used instead of the drm_ equivalents, you get the same output but 
with

a GT# prefix on it.

Signed-off-by: John Harrison 


The only downside to this is that we'll print "GT0: " even on 
single-GT devices. We could introduce a gt->info.name and print 
that, so we could have it different per-platform, but IMO it's not 
worth the effort.


Reviewed-by: Daniele Ceraolo Spurio 

I think it might be worth getting an ack from one of the 
maintainers to make sure we're all aligned on transitioning to 
these new logging macro for gt code.


Idea is I think a very good one. First I would suggest 
standardising to lowercase GT in logs because:


$ grep "GT%" i915/ -r
$ grep "gt%" i915/ -r
i915/gt/intel_gt_sysfs.c: gt->i915->sysfs_gt, "gt%d", gt->info.id))
i915/gt/intel_gt_sysfs.c:    "failed to initialize gt%d 
sysfs root\n", gt->info.id);
i915/gt/intel_gt_sysfs_pm.c: "failed to create 
gt%u RC6 sysfs files (%pe)\n",
i915/gt/intel_gt_sysfs_pm.c: "failed to create gt%u RC6p sysfs 
files (%pe)\n",
i915/gt/intel_gt_sysfs_pm.c: "failed to create 
gt%u RPS sysfs files (%pe)",
i915/gt/intel_gt_sysfs_pm.c: "failed to create 
gt%u punit_req_freq_mhz sysfs (%pe)",
i915/gt/intel_gt_sysfs_pm.c: "failed to create gt%u throttle sysfs 
files (%pe)",
i915/gt/intel_gt_sysfs_pm.c: "failed to create gt%u 
media_perf_power_attrs sysfs (%pe)\n",
i915/gt/intel_gt_sysfs_pm.c: "failed to add 
gt%u rps defaults (%pe)\n",
i915/i915_driver.c: drm_err(>i915->drm, "gt%d: intel_pcode_init 
failed %d\n", id, ret);
i915/i915_hwmon.c:  snprintf(ddat_gt->name, 
sizeof(ddat_gt->name), "i915_gt%u", i);




Just because there are 11 existing instances of one form doesn't mean 
that the 275 instances that are waiting to be converted should be 
done incorrectly. GT is an acronym and should be capitalised.


Okay just make it consistent then.


Besides:
grep -r "GT " i915 | grep '"'
i915/vlv_suspend.c: drm_err(>drm, "timeout 
disabling GT waking\n");
i915/vlv_suspend.c: "timeout waiting for GT wells 
to go %s\n",
i915/vlv_suspend.c: drm_dbg(>drm, "GT register access while 
GT waking disabled\n");
i915/i915_gpu_error.c:  err_printf(m, "GT awake: %s\n", 
str_yes_no(gt->awake));

i915/i915_debugfs.c:    seq_printf(m, "GT awake? %s [%d], %llums\n",
i915/selftests/i915_gem_evict.c: pr_err("Failed to idle GT (on %s)", 
engine->name);
i915/intel_uncore.c:  "GT thread status wait timed 
out\n");
i915/gt/uc/selftest_guc_multi_lrc.c: drm_err(>i915->drm, "GT 
failed to idle: %d\n", ret);
i915/gt/uc/selftest_guc.c: drm_err(>i915->drm, "GT failed to 
idle: %d\n", ret);
i915/gt/uc/selftest_guc.c: drm_err(>i915->drm, "GT failed to 
idle: %d\n", ret);
i915/gt/intel_gt_mcr.c: * Some GT registers are designed as 
"multicast" or "replicated" registers:
i915/gt/selftest_rps.c: pr_info("%s: rps counted %d 
C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of 
%uKHz\n",
i915/gt/selftest_hangcheck.c:   pr_err("[%s] GT is 
wedged!\n", engine->name);

i915/gt/selftest_hangcheck.c:   pr_err("GT is wedged!\n");
i915/gt/intel_gt_clock_utils.c: "GT clock frequency 
changed, was %uHz, now %uHz!\n",
i915/gt/selftest_engine_pm.c:   pr_err("Unable to flush GT pm 
before test\n");

i915/gt/selftest_engine_pm.c: pr_err("GT failed to idle\n");
i915/i915_sysfs.c:   "failed to register GT sysfs 
directory\n");
i915/intel_uncore.h: * of the basic non-engine GT registers 
(referred to as "GSI" on
i915/intel_uncore.h: * newer platforms, or "GT block" on older 
platforms)?  If so, we'll




Then there is a question of naming. Are we okay with GT_XXX or, do 
we want intel_gt_, or something completely different. I don't have 
a strong opinion at the moment so I'll add some more folks to Cc.


You mean GT_ERR("msg") vs intel_gt_err("msg")? Personally, I

[PATCH v3] drm/i915: Partial abandonment of legacy DRM logging macros

2022-11-09 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

Convert some usages of legacy DRM logging macros into versions which tell
us on which device have the events occurred.

v2:
 * Don't have struct drm_device as local. (Jani, Ville)

v3:
 * Store gt, not i915, in workaround list. (John)

Signed-off-by: Tvrtko Ursulin 
Reviewed-by: Andrzej Hajda  # v2
Acked-by: Jani Nikula 
Cc: Jani Nikula 
Cc: John Harrison 
Cc: Ville Syrjälä 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |  2 +-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 26 
 .../drm/i915/gt/intel_execlists_submission.c  | 13 +++---
 drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c  |  4 +-
 drivers/gpu/drm/i915/gt/intel_gt.c|  4 +-
 drivers/gpu/drm/i915/gt/intel_gt_irq.c|  8 ++--
 drivers/gpu/drm/i915/gt/intel_rps.c   |  6 ++-
 drivers/gpu/drm/i915/gt/intel_workarounds.c   | 42 +++
 .../gpu/drm/i915/gt/intel_workarounds_types.h |  3 ++
 .../gpu/drm/i915/gt/selftest_workarounds.c|  4 +-
 drivers/gpu/drm/i915/i915_debugfs.c   |  4 +-
 drivers/gpu/drm/i915/i915_gem.c   |  2 +-
 drivers/gpu/drm/i915/i915_getparam.c  |  2 +-
 drivers/gpu/drm/i915/i915_irq.c   | 12 +++---
 drivers/gpu/drm/i915/i915_perf.c  | 14 ---
 drivers/gpu/drm/i915/i915_query.c | 12 +++---
 drivers/gpu/drm/i915/i915_sysfs.c |  3 +-
 drivers/gpu/drm/i915/i915_vma.c   | 16 +++
 drivers/gpu/drm/i915/intel_uncore.c   | 21 ++
 19 files changed, 117 insertions(+), 81 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 01402f3c58f6..7f2831efc798 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -546,7 +546,7 @@ set_proto_ctx_engines_bond(struct i915_user_extension 
__user *base, void *data)
}
 
if (intel_engine_uses_guc(master)) {
-   DRM_DEBUG("bonding extension not supported with GuC 
submission");
+   drm_dbg(>drm, "bonding extension not supported with GuC 
submission");
return -ENODEV;
}
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 280ed90d5001..692b9d03d84b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2149,7 +2149,8 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
return err;
 }
 
-static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
+static int i915_gem_check_execbuffer(struct drm_i915_private *i915,
+struct drm_i915_gem_execbuffer2 *exec)
 {
if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS)
return -EINVAL;
@@ -2162,7 +2163,7 @@ static int i915_gem_check_execbuffer(struct 
drm_i915_gem_execbuffer2 *exec)
}
 
if (exec->DR4 == 0x) {
-   DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
+   drm_dbg(>drm, "UXA submitting garbage DR4, fixing up\n");
exec->DR4 = 0;
}
if (exec->DR1 || exec->DR4)
@@ -2800,7 +2801,8 @@ add_timeline_fence_array(struct i915_execbuffer *eb,
 
syncobj = drm_syncobj_find(eb->file, user_fence.handle);
if (!syncobj) {
-   DRM_DEBUG("Invalid syncobj handle provided\n");
+   drm_dbg(>i915->drm,
+   "Invalid syncobj handle provided\n");
return -ENOENT;
}
 
@@ -2808,7 +2810,8 @@ add_timeline_fence_array(struct i915_execbuffer *eb,
 
if (!fence && user_fence.flags &&
!(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
-   DRM_DEBUG("Syncobj handle has no fence\n");
+   drm_dbg(>i915->drm,
+   "Syncobj handle has no fence\n");
drm_syncobj_put(syncobj);
return -EINVAL;
}
@@ -2817,7 +2820,9 @@ add_timeline_fence_array(struct i915_execbuffer *eb,
err = dma_fence_chain_find_seqno(, point);
 
if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
-   DRM_DEBUG("Syncobj handle missing requested point 
%llu\n", point);
+   drm_dbg(>i915->drm,
+   "Syncobj handle missing requested point %llu\n",
+   point);
dma_fence_put(fence);
drm_syncobj_put(syncobj);
return err;
@@ -2843,7 +2848,8 @@ add_timeline_fence_array(s

Re: [RFC PATCH v3 2/3] accel: add dedicated minor for accelerator devices

2022-11-08 Thread Tvrtko Ursulin



On 06/11/2022 21:02, Oded Gabbay wrote:

The accelerator devices are exposed to user-space using a dedicated
major. In addition, they are represented in /dev with new, dedicated
device char names: /dev/accel/accel*. This is done to make sure any
user-space software that tries to open a graphic card won't open
the accelerator device by mistake.

The above implies that the minor numbering should be separated from
the rest of the DRM devices. However, to avoid code duplication, we
want the drm_minor structure to be able to represent the accelerator
device.

To achieve this, we add a new drm_minor* to drm_device that represents
the accelerator device. This pointer is initialized for drivers that
declare they handle compute accelerator, using a new driver feature
flag called DRIVER_COMPUTE_ACCEL. It is important to note that this
driver feature is mutually exclusive with DRIVER_RENDER. Devices that
want to expose both graphics and compute device char files should be
handled by two drivers that are connected using the auxiliary bus
framework.

In addition, we define a different IDR to handle the accelerators
minors. This is done to make the minor's index be identical to the
device index in /dev/. Any access to the IDR is done solely
by functions in accel_drv.c, as the IDR is define as static. The
DRM core functions call those functions in case they detect the minor's
type is DRM_MINOR_ACCEL.

We define a separate accel_open function (from drm_open) that the
accel drivers should set as their open callback function. Both these
functions eventually call the same drm_open_helper(), which had to be
changed to be non-static so it can be called from accel_drv.c.
accel_open() only partially duplicates drm_open as I removed some code
from it that handles legacy devices.

To help new drivers, I defined DEFINE_DRM_ACCEL_FOPS macro to easily
set the required function operations pointers structure.

Signed-off-by: Oded Gabbay 
---
Changes in v3:
  - Remove useless DRM_DEBUG("\n") at accel_stub_open()
  - Add function of accel_debugfs_init() as accel_debugfs_root is static
member in drm_accel.c
  - Add DRM_ACCEL_FOPS and DEFINE_DRM_ACCEL_FOPS macros
  - Replace minor handling from xarray back to idr, as xarray doesn't handle
well exchanging content of a NULL entry to non-NULL. This should be handled
in a different patch that will either fix xarray code or change DRM minor
init flow.
  - Make accel_minor_replace() to return void.

  drivers/accel/drm_accel.c  | 242 -
  drivers/gpu/drm/drm_file.c |   2 +-
  include/drm/drm_accel.h|  68 ++-
  include/drm/drm_device.h   |   3 +
  include/drm/drm_drv.h  |   8 ++
  include/drm/drm_file.h |  21 +++-
  6 files changed, 340 insertions(+), 4 deletions(-)

diff --git a/drivers/accel/drm_accel.c b/drivers/accel/drm_accel.c
index 943d960ddefc..05167c929866 100644
--- a/drivers/accel/drm_accel.c
+++ b/drivers/accel/drm_accel.c
@@ -8,14 +8,25 @@

  #include 
  #include 
+#include 

  #include 
+#include 
+#include 
+#include 
  #include 
  #include 

+static DEFINE_SPINLOCK(accel_minor_lock);
+static struct idr accel_minors_idr;
+
  static struct dentry *accel_debugfs_root;
  static struct class *accel_class;

+static struct device_type accel_sysfs_device_minor = {
+   .name = "accel_minor"
+};
+
  static char *accel_devnode(struct device *dev, umode_t *mode)
  {
return kasprintf(GFP_KERNEL, "accel/%s", dev_name(dev));
@@ -40,9 +51,235 @@ static void accel_sysfs_destroy(void)
accel_class = NULL;
  }

+static int accel_name_info(struct seq_file *m, void *data)
+{
+   struct drm_info_node *node = (struct drm_info_node *) m->private;
+   struct drm_minor *minor = node->minor;
+   struct drm_device *dev = minor->dev;
+   struct drm_master *master;
+
+   mutex_lock(>master_mutex);
+   master = dev->master;
+   seq_printf(m, "%s", dev->driver->name);
+   if (dev->dev)
+   seq_printf(m, " dev=%s", dev_name(dev->dev));
+   if (master && master->unique)
+   seq_printf(m, " master=%s", master->unique);


Does the all drm_master business apply with accel?


+   if (dev->unique)
+   seq_printf(m, " unique=%s", dev->unique);
+   seq_puts(m, "\n");
+   mutex_unlock(>master_mutex);
+
+   return 0;
+}
+
+static const struct drm_info_list accel_debugfs_list[] = {
+   {"name", accel_name_info, 0}
+};
+#define ACCEL_DEBUGFS_ENTRIES ARRAY_SIZE(accel_debugfs_list)
+
+/**
+ * accel_debugfs_init() - Initialize debugfs for accel minor
+ * @minor: Pointer to the drm_minor instance.
+ * @minor_id: The minor's id
+ *
+ * This function initializes the drm minor's debugfs members and creates
+ * a root directory for the minor in debugfs. It also creates common files
+ * for accelerators and calls the driver's debugfs init callback.
+ */
+void accel_debugfs_init(struct drm_minor *minor, int minor_id)
+{
+   struct drm_device *dev = 

[PATCH v2] drm/i915: Partial abandonment of legacy DRM logging macros

2022-11-08 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

Convert some usages of legacy DRM logging macros into versions which tell
us on which device have the events occurred.

v2:
 * Don't have struct drm_device as local. (Jani, Ville)

Signed-off-by: Tvrtko Ursulin 
Cc: Jani Nikula 
Cc: John Harrison 
Cc: Ville Syrjälä 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |  2 +-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 26 +++
 .../drm/i915/gt/intel_execlists_submission.c  | 13 +++---
 drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c  |  4 +-
 drivers/gpu/drm/i915/gt/intel_gt.c|  4 +-
 drivers/gpu/drm/i915/gt/intel_gt_irq.c|  8 ++--
 drivers/gpu/drm/i915/gt/intel_rps.c   |  6 ++-
 drivers/gpu/drm/i915/gt/intel_workarounds.c   | 43 +++
 .../gpu/drm/i915/gt/intel_workarounds_types.h |  4 ++
 .../gpu/drm/i915/gt/selftest_workarounds.c|  4 +-
 drivers/gpu/drm/i915/i915_debugfs.c   |  4 +-
 drivers/gpu/drm/i915/i915_gem.c   |  2 +-
 drivers/gpu/drm/i915/i915_getparam.c  |  2 +-
 drivers/gpu/drm/i915/i915_irq.c   | 12 +++---
 drivers/gpu/drm/i915/i915_perf.c  | 14 +++---
 drivers/gpu/drm/i915/i915_query.c | 12 +++---
 drivers/gpu/drm/i915/i915_sysfs.c |  3 +-
 drivers/gpu/drm/i915/i915_vma.c   | 16 ---
 drivers/gpu/drm/i915/intel_uncore.c   | 21 +
 19 files changed, 119 insertions(+), 81 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 01402f3c58f6..7f2831efc798 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -546,7 +546,7 @@ set_proto_ctx_engines_bond(struct i915_user_extension 
__user *base, void *data)
}
 
if (intel_engine_uses_guc(master)) {
-   DRM_DEBUG("bonding extension not supported with GuC 
submission");
+   drm_dbg(>drm, "bonding extension not supported with GuC 
submission");
return -ENODEV;
}
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 1160723c9d2d..f65fd03f7cf2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2148,7 +2148,8 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
return err;
 }
 
-static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
+static int i915_gem_check_execbuffer(struct drm_i915_private *i915,
+struct drm_i915_gem_execbuffer2 *exec)
 {
if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS)
return -EINVAL;
@@ -2161,7 +2162,7 @@ static int i915_gem_check_execbuffer(struct 
drm_i915_gem_execbuffer2 *exec)
}
 
if (exec->DR4 == 0x) {
-   DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
+   drm_dbg(>drm, "UXA submitting garbage DR4, fixing up\n");
exec->DR4 = 0;
}
if (exec->DR1 || exec->DR4)
@@ -2799,7 +2800,8 @@ add_timeline_fence_array(struct i915_execbuffer *eb,
 
syncobj = drm_syncobj_find(eb->file, user_fence.handle);
if (!syncobj) {
-   DRM_DEBUG("Invalid syncobj handle provided\n");
+   drm_dbg(>i915->drm,
+   "Invalid syncobj handle provided\n");
return -ENOENT;
}
 
@@ -2807,7 +2809,8 @@ add_timeline_fence_array(struct i915_execbuffer *eb,
 
if (!fence && user_fence.flags &&
!(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
-   DRM_DEBUG("Syncobj handle has no fence\n");
+   drm_dbg(>i915->drm,
+   "Syncobj handle has no fence\n");
drm_syncobj_put(syncobj);
return -EINVAL;
}
@@ -2816,7 +2819,9 @@ add_timeline_fence_array(struct i915_execbuffer *eb,
err = dma_fence_chain_find_seqno(, point);
 
if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
-   DRM_DEBUG("Syncobj handle missing requested point 
%llu\n", point);
+   drm_dbg(>i915->drm,
+   "Syncobj handle missing requested point %llu\n",
+   point);
dma_fence_put(fence);
drm_syncobj_put(syncobj);
return err;
@@ -2842,7 +2847,8 @@ add_timeline_fence_array(struct i915_execbuffer *eb,
 * 0) would break the timeline.
 */
if (user_fen

Re: drm-tip merge conflict caused by recent merge of amd-drm-next into drm-next

2022-11-08 Thread Tvrtko Ursulin



On 08/11/2022 09:24, Hans de Goede wrote:

Hi Alex, et al.,

I just pushed 2 simple DMI quirk patches
(for drivers/gpu/drm/drm_panel_orientation_quirks.c)
to drm-misc-fixes.

At the end of the dim push-branch I noticed that
rebuilding drm-tip failed due to a merge error
when merging in drm-next .

Looking at the 3-way diff of the conflict, this seems
to be caused by amd-drm-next-6.2-2022-11-04 landing
in drm-next.

I'm not familiar with the code causing the conflict
and I believe this is best resolved by someone who
is more familiar with the code.


We really do need a better process for these things. In recent past I 
tried pinging on IRC but that was a bit hit and miss. More miss than hit 
even.


This one I actually fixed up, since I had an i915 conflict to deal with 
due drm-intel-fixes, it looked straight forward and did not feel like 
waiting. Please guys do check if I did it correctly.


Regards,

Tvrtko


Re: [PATCH] drm/i915: Partial abandonment of legacy DRM logging macros

2022-11-08 Thread Tvrtko Ursulin



On 08/11/2022 12:01, Jani Nikula wrote:

On Tue, 08 Nov 2022, Tvrtko Ursulin  wrote:

From: Tvrtko Ursulin 

Convert some usages of legacy DRM logging macros into versions which tell
us on which device have the events occurred.

Signed-off-by: Tvrtko Ursulin 
Cc: Jani Nikula 
Cc: John Harrison 
---
  drivers/gpu/drm/i915/gem/i915_gem_context.c   |  2 +-
  .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 23 ++
  .../drm/i915/gt/intel_execlists_submission.c  | 13 +++---
  drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c  |  4 +-
  drivers/gpu/drm/i915/gt/intel_gt.c|  4 +-
  drivers/gpu/drm/i915/gt/intel_gt_irq.c|  8 ++--
  drivers/gpu/drm/i915/gt/intel_rps.c   |  6 ++-
  drivers/gpu/drm/i915/gt/intel_workarounds.c   | 43 +++
  .../gpu/drm/i915/gt/intel_workarounds_types.h |  4 ++
  .../gpu/drm/i915/gt/selftest_workarounds.c|  4 +-
  drivers/gpu/drm/i915/i915_debugfs.c   |  4 +-
  drivers/gpu/drm/i915/i915_gem.c   |  2 +-
  drivers/gpu/drm/i915/i915_getparam.c  |  2 +-
  drivers/gpu/drm/i915/i915_irq.c   | 12 +++---
  drivers/gpu/drm/i915/i915_perf.c  | 14 +++---
  drivers/gpu/drm/i915/i915_query.c | 12 +++---
  drivers/gpu/drm/i915/i915_sysfs.c |  3 +-
  drivers/gpu/drm/i915/i915_vma.c   | 16 ---
  drivers/gpu/drm/i915/intel_uncore.c   | 21 +
  19 files changed, 116 insertions(+), 81 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 01402f3c58f6..7f2831efc798 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -546,7 +546,7 @@ set_proto_ctx_engines_bond(struct i915_user_extension 
__user *base, void *data)
}
  
  	if (intel_engine_uses_guc(master)) {

-   DRM_DEBUG("bonding extension not supported with GuC 
submission");
+   drm_dbg(>drm, "bonding extension not supported with GuC 
submission");
return -ENODEV;
}
  
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c

index 1160723c9d2d..1eb7b66191b2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2148,7 +2148,8 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
return err;
  }
  
-static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)

+static int i915_gem_check_execbuffer(struct drm_i915_private *i915,
+struct drm_i915_gem_execbuffer2 *exec)
  {
if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS)
return -EINVAL;
@@ -2161,7 +2162,7 @@ static int i915_gem_check_execbuffer(struct 
drm_i915_gem_execbuffer2 *exec)
}
  
  	if (exec->DR4 == 0x) {

-   DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
+   drm_dbg(>drm, "UXA submitting garbage DR4, fixing up\n");
exec->DR4 = 0;
}
if (exec->DR1 || exec->DR4)
@@ -2744,6 +2745,7 @@ add_timeline_fence_array(struct i915_execbuffer *eb,
 const struct 
drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences)
  {
struct drm_i915_gem_exec_fence __user *user_fences;
+   struct drm_device *drm = >i915->drm;


Elsewhere we've been pretty strict about not adding struct drm_device as
a local variable, just struct drm_i915_private *i915. We don't want to
have both, and in general it's more likely i915 is needed than
drm_device, if not now then in the future. Even if it means having to
use >drm here.


Yeah it smelled bad while I was typing it.. will change.

Regards,

Tvrtko


[PATCH] drm/i915: Partial abandonment of legacy DRM logging macros

2022-11-08 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

Convert some usages of legacy DRM logging macros into versions which tell
us on which device have the events occurred.

Signed-off-by: Tvrtko Ursulin 
Cc: Jani Nikula 
Cc: John Harrison 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |  2 +-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 23 ++
 .../drm/i915/gt/intel_execlists_submission.c  | 13 +++---
 drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c  |  4 +-
 drivers/gpu/drm/i915/gt/intel_gt.c|  4 +-
 drivers/gpu/drm/i915/gt/intel_gt_irq.c|  8 ++--
 drivers/gpu/drm/i915/gt/intel_rps.c   |  6 ++-
 drivers/gpu/drm/i915/gt/intel_workarounds.c   | 43 +++
 .../gpu/drm/i915/gt/intel_workarounds_types.h |  4 ++
 .../gpu/drm/i915/gt/selftest_workarounds.c|  4 +-
 drivers/gpu/drm/i915/i915_debugfs.c   |  4 +-
 drivers/gpu/drm/i915/i915_gem.c   |  2 +-
 drivers/gpu/drm/i915/i915_getparam.c  |  2 +-
 drivers/gpu/drm/i915/i915_irq.c   | 12 +++---
 drivers/gpu/drm/i915/i915_perf.c  | 14 +++---
 drivers/gpu/drm/i915/i915_query.c | 12 +++---
 drivers/gpu/drm/i915/i915_sysfs.c |  3 +-
 drivers/gpu/drm/i915/i915_vma.c   | 16 ---
 drivers/gpu/drm/i915/intel_uncore.c   | 21 +
 19 files changed, 116 insertions(+), 81 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 01402f3c58f6..7f2831efc798 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -546,7 +546,7 @@ set_proto_ctx_engines_bond(struct i915_user_extension 
__user *base, void *data)
}
 
if (intel_engine_uses_guc(master)) {
-   DRM_DEBUG("bonding extension not supported with GuC 
submission");
+   drm_dbg(>drm, "bonding extension not supported with GuC 
submission");
return -ENODEV;
}
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 1160723c9d2d..1eb7b66191b2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2148,7 +2148,8 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
return err;
 }
 
-static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
+static int i915_gem_check_execbuffer(struct drm_i915_private *i915,
+struct drm_i915_gem_execbuffer2 *exec)
 {
if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS)
return -EINVAL;
@@ -2161,7 +2162,7 @@ static int i915_gem_check_execbuffer(struct 
drm_i915_gem_execbuffer2 *exec)
}
 
if (exec->DR4 == 0x) {
-   DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
+   drm_dbg(>drm, "UXA submitting garbage DR4, fixing up\n");
exec->DR4 = 0;
}
if (exec->DR1 || exec->DR4)
@@ -2744,6 +2745,7 @@ add_timeline_fence_array(struct i915_execbuffer *eb,
 const struct 
drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences)
 {
struct drm_i915_gem_exec_fence __user *user_fences;
+   struct drm_device *drm = >i915->drm;
u64 __user *user_values;
struct eb_fence *f;
u64 nfences;
@@ -2799,7 +2801,7 @@ add_timeline_fence_array(struct i915_execbuffer *eb,
 
syncobj = drm_syncobj_find(eb->file, user_fence.handle);
if (!syncobj) {
-   DRM_DEBUG("Invalid syncobj handle provided\n");
+   drm_dbg(drm, "Invalid syncobj handle provided\n");
return -ENOENT;
}
 
@@ -2807,7 +2809,7 @@ add_timeline_fence_array(struct i915_execbuffer *eb,
 
if (!fence && user_fence.flags &&
!(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
-   DRM_DEBUG("Syncobj handle has no fence\n");
+   drm_dbg(drm, "Syncobj handle has no fence\n");
drm_syncobj_put(syncobj);
return -EINVAL;
}
@@ -2816,7 +2818,9 @@ add_timeline_fence_array(struct i915_execbuffer *eb,
err = dma_fence_chain_find_seqno(, point);
 
if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
-   DRM_DEBUG("Syncobj handle missing requested point 
%llu\n", point);
+   drm_dbg(drm,
+   "Syncobj handle missing requested point %llu\n",
+   point);
dma_fence_put(fence);
drm_syncobj_put(syncobj);

Re: [PATCH 1/3] Documentation/gpu: Fix section in the wrong scope

2022-11-08 Thread Tvrtko Ursulin



On 07/11/2022 17:32, Lucas De Marchi wrote:

That section should still be inside "DRM client usage stats" rather than
as a sibling.

Signed-off-by: Lucas De Marchi 
---
  Documentation/gpu/drm-usage-stats.rst | 1 -
  1 file changed, 1 deletion(-)

diff --git a/Documentation/gpu/drm-usage-stats.rst 
b/Documentation/gpu/drm-usage-stats.rst
index 92c5117368d7..b46327356e80 100644
--- a/Documentation/gpu/drm-usage-stats.rst
+++ b/Documentation/gpu/drm-usage-stats.rst
@@ -126,7 +126,6 @@ percentage utilization of the engine, whereas 
drm-engine- only reflects
  time active without considering what frequency the engine is operating as a
  percentage of it's maximum frequency.
  
-===

  Driver specific implementations
  ===
  


Oops - yep.

Reviewed-by: Tvrtko Ursulin 

Regards,

Tvrtko


Re: [Intel-gfx] [PATCH] drm/i915: Don't wait forever in drop_caches

2022-11-08 Thread Tvrtko Ursulin



On 07/11/2022 19:45, John Harrison wrote:

On 11/7/2022 06:09, Tvrtko Ursulin wrote:

On 04/11/2022 17:45, John Harrison wrote:

On 11/4/2022 03:01, Tvrtko Ursulin wrote:

On 03/11/2022 19:16, John Harrison wrote:

On 11/3/2022 02:38, Tvrtko Ursulin wrote:

On 03/11/2022 09:18, Tvrtko Ursulin wrote:

On 03/11/2022 01:33, John Harrison wrote:

On 11/2/2022 07:20, Tvrtko Ursulin wrote:

On 02/11/2022 12:12, Jani Nikula wrote:

On Tue, 01 Nov 2022, john.c.harri...@intel.com wrote:

From: John Harrison 

At the end of each test, IGT does a drop caches call via 
sysfs with


sysfs?
Sorry, that was meant to say debugfs. I've also been working on 
some sysfs IGT issues and evidently got my wires crossed!




special flags set. One of the possible paths waits for idle 
with an
infinite timeout. That causes problems for debugging issues 
when CI
catches a "can't go idle" test failure. Best case, the CI 
system times

out (after 90s), attempts a bunch of state dump actions and then
reboots the system to recover it. Worst case, the CI system 
can't do
anything at all and then times out (after 1000s) and simply 
reboots.
Sometimes a serial port log of dmesg might be available, 
sometimes not.


So rather than making life hard for ourselves, change the 
timeout to

be 10s rather than infinite. Also, trigger the standard
wedge/reset/recover sequence so that testing can continue with a
working system (if possible).

Signed-off-by: John Harrison 
---
  drivers/gpu/drm/i915/i915_debugfs.c | 7 ++-
  1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c

index ae987e92251dd..9d916fbbfc27c 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -641,6 +641,9 @@ 
DEFINE_SIMPLE_ATTRIBUTE(i915_perf_noa_delay_fops,

    DROP_RESET_ACTIVE | \
    DROP_RESET_SEQNO | \
    DROP_RCU)
+
+#define DROP_IDLE_TIMEOUT    (HZ * 10)


I915_IDLE_ENGINES_TIMEOUT is defined in i915_drv.h. It's also 
only used

here.


So move here, dropping i915 prefix, next to the newly proposed 
one?

Sure, can do that.




I915_GEM_IDLE_TIMEOUT is defined in i915_gem.h. It's only used in
gt/intel_gt.c.


Move there and rename to GT_IDLE_TIMEOUT?

I915_GT_SUSPEND_IDLE_TIMEOUT is defined and used only in 
intel_gt_pm.c.


No action needed, maybe drop i915 prefix if wanted.

These two are totally unrelated and in code not being touched by 
this change. I would rather not conflate changing random other 
things with fixing this specific issue.



I915_IDLE_ENGINES_TIMEOUT is in ms, the rest are in jiffies.


Add _MS suffix if wanted.


My head spins.


I follow and raise that the newly proposed DROP_IDLE_TIMEOUT 
applies to DROP_ACTIVE and not only DROP_IDLE.
My original intention for the name was that is the 'drop caches 
timeout for intel_gt_wait_for_idle'. Which is quite the mouthful 
and hence abbreviated to DROP_IDLE_TIMEOUT. But yes, I realised 
later that name can be conflated with the DROP_IDLE flag. Will 
rename.





Things get refactored, code moves around, bits get left behind, 
who knows. No reason to get too worked up. :) As long as people 
are taking a wider view when touching the code base, and are 
not afraid to send cleanups, things should be good.
On the other hand, if every patch gets blocked in code review 
because someone points out some completely unrelated piece of 
code could be a bit better then nothing ever gets fixed. If you 
spot something that you think should be improved, isn't the 
general idea that you should post a patch yourself to improve it?


There's two maintainers per branch and an order of magnitude or 
two more developers so it'd be nice if cleanups would just be 
incoming on self-initiative basis. ;)


For the actual functional change at hand - it would be nice if 
code paths in question could handle SIGINT and then we could 
punt the decision on how long someone wants to wait purely to 
userspace. But it's probably hard and it's only debugfs so 
whatever.


The code paths in question will already abort on a signal won't 
they? Both intel_gt_wait_for_idle() and 
intel_guc_wait_for_pending_msg(), which is where the 
uc_wait_for_idle eventually ends up, have an 'if(signal_pending) 
return -EINTR;' check. Beyond that, it sounds like what you are 
asking for is a change in the IGT libraries and/or CI framework 
to start sending signals after some specific timeout. That seems 
like a significantly more complex change (in terms of the number 
of entities affected and number of groups involved) and 
unnecessary.


If you say so, I haven't looked at them all. But if the code path 
in question already aborts on signals then I am not sure what is 
the patch fixing? I assumed you are trying to avoid the write 
stuck in D forever, which then prevents driver unload and 
everything, requiring the test runner to eventually reboot. If 
you say SIGINT works then you can alrea

Re: [Intel-gfx] [PATCH 1/2] drm/i915/gt: Add GT oriented dmesg output

2022-11-08 Thread Tvrtko Ursulin



On 07/11/2022 19:14, John Harrison wrote:

On 11/7/2022 08:17, Tvrtko Ursulin wrote:

On 07/11/2022 09:33, Tvrtko Ursulin wrote:

On 05/11/2022 01:03, Ceraolo Spurio, Daniele wrote:

On 11/4/2022 10:25 AM, john.c.harri...@intel.com wrote:

From: John Harrison 

When trying to analyse bug reports from CI, customers, etc. it can be
difficult to work out exactly what is happening on which GT in a
multi-GT system. So add GT oriented debug/error message wrappers. If
used instead of the drm_ equivalents, you get the same output but with
a GT# prefix on it.

Signed-off-by: John Harrison 


The only downside to this is that we'll print "GT0: " even on 
single-GT devices. We could introduce a gt->info.name and print 
that, so we could have it different per-platform, but IMO it's not 
worth the effort.


Reviewed-by: Daniele Ceraolo Spurio 

I think it might be worth getting an ack from one of the maintainers 
to make sure we're all aligned on transitioning to these new logging 
macro for gt code.


Idea is I think a very good one. First I would suggest standardising 
to lowercase GT in logs because:


$ grep "GT%" i915/ -r
$ grep "gt%" i915/ -r
i915/gt/intel_gt_sysfs.c: gt->i915->sysfs_gt, "gt%d", gt->info.id))
i915/gt/intel_gt_sysfs.c:    "failed to initialize gt%d 
sysfs root\n", gt->info.id);
i915/gt/intel_gt_sysfs_pm.c: "failed to create 
gt%u RC6 sysfs files (%pe)\n",
i915/gt/intel_gt_sysfs_pm.c: "failed to 
create gt%u RC6p sysfs files (%pe)\n",
i915/gt/intel_gt_sysfs_pm.c: "failed to create 
gt%u RPS sysfs files (%pe)",
i915/gt/intel_gt_sysfs_pm.c: "failed to create 
gt%u punit_req_freq_mhz sysfs (%pe)",
i915/gt/intel_gt_sysfs_pm.c: "failed to 
create gt%u throttle sysfs files (%pe)",
i915/gt/intel_gt_sysfs_pm.c: "failed to 
create gt%u media_perf_power_attrs sysfs (%pe)\n",
i915/gt/intel_gt_sysfs_pm.c: "failed to add gt%u 
rps defaults (%pe)\n",
i915/i915_driver.c: drm_err(>i915->drm, "gt%d: intel_pcode_init 
failed %d\n", id, ret);
i915/i915_hwmon.c:  snprintf(ddat_gt->name, 
sizeof(ddat_gt->name), "i915_gt%u", i);




Just because there are 11 existing instances of one form doesn't mean 
that the 275 instances that are waiting to be converted should be done 
incorrectly. GT is an acronym and should be capitalised.


Okay just make it consistent then.


Besides:
grep -r "GT " i915 | grep '"'
i915/vlv_suspend.c: drm_err(>drm, "timeout disabling 
GT waking\n");
i915/vlv_suspend.c: "timeout waiting for GT wells to 
go %s\n",
i915/vlv_suspend.c: drm_dbg(>drm, "GT register access while GT 
waking disabled\n");
i915/i915_gpu_error.c:  err_printf(m, "GT awake: %s\n", 
str_yes_no(gt->awake));

i915/i915_debugfs.c:    seq_printf(m, "GT awake? %s [%d], %llums\n",
i915/selftests/i915_gem_evict.c: pr_err("Failed to idle GT (on %s)", 
engine->name);

i915/intel_uncore.c:  "GT thread status wait timed out\n");
i915/gt/uc/selftest_guc_multi_lrc.c: drm_err(>i915->drm, "GT failed 
to idle: %d\n", ret);
i915/gt/uc/selftest_guc.c: drm_err(>i915->drm, "GT failed to idle: 
%d\n", ret);
i915/gt/uc/selftest_guc.c: drm_err(>i915->drm, "GT failed to idle: 
%d\n", ret);
i915/gt/intel_gt_mcr.c: * Some GT registers are designed as "multicast" 
or "replicated" registers:
i915/gt/selftest_rps.c: pr_info("%s: rps counted %d C0 
cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of 
%uKHz\n",
i915/gt/selftest_hangcheck.c:   pr_err("[%s] GT is 
wedged!\n", engine->name);

i915/gt/selftest_hangcheck.c:   pr_err("GT is wedged!\n");
i915/gt/intel_gt_clock_utils.c: "GT clock frequency 
changed, was %uHz, now %uHz!\n",
i915/gt/selftest_engine_pm.c:   pr_err("Unable to flush GT pm 
before test\n");
i915/gt/selftest_engine_pm.c:   pr_err("GT 
failed to idle\n");
i915/i915_sysfs.c:   "failed to register GT sysfs 
directory\n");
i915/intel_uncore.h: * of the basic non-engine GT registers 
(referred to as "GSI" on
i915/intel_uncore.h: * newer platforms, or "GT block" on older 
platforms)?  If so, we'll




Then there is a question of naming. Are we okay with GT_XXX or, do we 
want intel_gt_, or something completely different. I don't have a 
strong opinion at the moment so I'll add some more folks to Cc.


You mean GT_ERR("msg") vs intel_gt_err(&qu

Re: [Intel-gfx] [PATCH 1/2] drm/i915/gt: Add GT oriented dmesg output

2022-11-07 Thread Tvrtko Ursulin



On 07/11/2022 09:33, Tvrtko Ursulin wrote:


On 05/11/2022 01:03, Ceraolo Spurio, Daniele wrote:



On 11/4/2022 10:25 AM, john.c.harri...@intel.com wrote:

From: John Harrison 

When trying to analyse bug reports from CI, customers, etc. it can be
difficult to work out exactly what is happening on which GT in a
multi-GT system. So add GT oriented debug/error message wrappers. If
used instead of the drm_ equivalents, you get the same output but with
a GT# prefix on it.

Signed-off-by: John Harrison 


The only downside to this is that we'll print "GT0: " even on 
single-GT devices. We could introduce a gt->info.name and print that, 
so we could have it different per-platform, but IMO it's not worth the 
effort.


Reviewed-by: Daniele Ceraolo Spurio 

I think it might be worth getting an ack from one of the maintainers 
to make sure we're all aligned on transitioning to these new logging 
macro for gt code.


Idea is I think a very good one. First I would suggest standardising to 
lowercase GT in logs because:


$ grep "GT%" i915/ -r
$ grep "gt%" i915/ -r
i915/gt/intel_gt_sysfs.c:
gt->i915->sysfs_gt, "gt%d", gt->info.id))
i915/gt/intel_gt_sysfs.c:    "failed to initialize gt%d 
sysfs root\n", gt->info.id);
i915/gt/intel_gt_sysfs_pm.c: "failed to create gt%u 
RC6 sysfs files (%pe)\n",
i915/gt/intel_gt_sysfs_pm.c: "failed to 
create gt%u RC6p sysfs files (%pe)\n",
i915/gt/intel_gt_sysfs_pm.c: "failed to create gt%u 
RPS sysfs files (%pe)",
i915/gt/intel_gt_sysfs_pm.c: "failed to create gt%u 
punit_req_freq_mhz sysfs (%pe)",
i915/gt/intel_gt_sysfs_pm.c: "failed to 
create gt%u throttle sysfs files (%pe)",
i915/gt/intel_gt_sysfs_pm.c: "failed to 
create gt%u media_perf_power_attrs sysfs (%pe)\n",
i915/gt/intel_gt_sysfs_pm.c: "failed to add gt%u rps 
defaults (%pe)\n",
i915/i915_driver.c: drm_err(>i915->drm, "gt%d: 
intel_pcode_init failed %d\n", id, ret);
i915/i915_hwmon.c:  snprintf(ddat_gt->name, 
sizeof(ddat_gt->name), "i915_gt%u", i);


Then there is a question of naming. Are we okay with GT_XXX or, do we 
want intel_gt_, or something completely different. I don't have a strong 
opinion at the moment so I'll add some more folks to Cc.


There was a maintainer level mini-discussion on this topic which I will 
try to summarise.


Main contention point was the maintenance cost and generally an 
undesirable pattern of needing to add many subsystem/component/directory 
specific macros. Which then typically need extra flavours and so on. But 
over verbosity of the code is obviously also bad, so one compromise idea 
was to add a macro which builds the GT string and use drm logging 
helpers directly. This would be something like:


 drm_err(GT_LOG("something went wrong ret=%d\n", gt), ret);
 drm_info(GT_LOG(...same...));

Whether or not to put the gt as parameter to the helper macro or outside 
wasn't really decided upon. Anyway the macro would be adding the magic 
"gt%u: " prefix, drm device and all.


Also the name GT_LOG (or case) is just for illustration, that part 
wasn't really discussed.


If agreeable this pattern could then be used to consolidate some other 
macros that we have. Although apart from CT_DEBUG/ERROR I don't know if 
we have any others.


I hope I have transferred the idea correctly. Please shout if I have not.

Regards,

Tvrtko


What I'd would like to see tried is to converting all of i915/gt within 
one kernel release so we don't have a mish-mash of log formats.


Regards,

Tvrtko


---
  drivers/gpu/drm/i915/gt/intel_gt.h | 15 +++
  1 file changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h 
b/drivers/gpu/drm/i915/gt/intel_gt.h

index e0365d5562484..1e016fb0117a4 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -13,6 +13,21 @@
  struct drm_i915_private;
  struct drm_printer;
+#define GT_ERR(_gt, _fmt, ...) \
+    drm_err(&(_gt)->i915->drm, "GT%u: " _fmt, (_gt)->info.id, 
##__VA_ARGS__)

+
+#define GT_WARN(_gt, _fmt, ...) \
+    drm_warn(&(_gt)->i915->drm, "GT%u: " _fmt, (_gt)->info.id, 
##__VA_ARGS__)

+
+#define GT_NOTICE(_gt, _fmt, ...) \
+    drm_notice(&(_gt)->i915->drm, "GT%u: " _fmt, (_gt)->info.id, 
##__VA_ARGS__)

+
+#define GT_INFO(_gt, _fmt, ...) \
+    drm_info(&(_gt)->i915->drm, "GT%u: " _fmt, (_gt)->info.id, 
##__VA_ARGS__)

+
+#define GT_DBG(_gt, _fmt, ...) \
+    drm_dbg(&(_gt)->i915->drm, "GT%u: " _fmt, (_gt)->info.id, 
##__VA_ARGS__)

+
  #define GT_TRACE(gt, fmt, ...) do {    \
  const struct intel_gt *gt__ __maybe_unused = (gt);    \
  GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev),    \




Re: [Intel-gfx] [PATCH] drm/i915: Don't wait forever in drop_caches

2022-11-07 Thread Tvrtko Ursulin



On 04/11/2022 17:45, John Harrison wrote:

On 11/4/2022 03:01, Tvrtko Ursulin wrote:

On 03/11/2022 19:16, John Harrison wrote:

On 11/3/2022 02:38, Tvrtko Ursulin wrote:

On 03/11/2022 09:18, Tvrtko Ursulin wrote:

On 03/11/2022 01:33, John Harrison wrote:

On 11/2/2022 07:20, Tvrtko Ursulin wrote:

On 02/11/2022 12:12, Jani Nikula wrote:

On Tue, 01 Nov 2022, john.c.harri...@intel.com wrote:

From: John Harrison 

At the end of each test, IGT does a drop caches call via sysfs 
with


sysfs?
Sorry, that was meant to say debugfs. I've also been working on 
some sysfs IGT issues and evidently got my wires crossed!




special flags set. One of the possible paths waits for idle 
with an
infinite timeout. That causes problems for debugging issues 
when CI
catches a "can't go idle" test failure. Best case, the CI 
system times

out (after 90s), attempts a bunch of state dump actions and then
reboots the system to recover it. Worst case, the CI system 
can't do
anything at all and then times out (after 1000s) and simply 
reboots.
Sometimes a serial port log of dmesg might be available, 
sometimes not.


So rather than making life hard for ourselves, change the 
timeout to

be 10s rather than infinite. Also, trigger the standard
wedge/reset/recover sequence so that testing can continue with a
working system (if possible).

Signed-off-by: John Harrison 
---
  drivers/gpu/drm/i915/i915_debugfs.c | 7 ++-
  1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c

index ae987e92251dd..9d916fbbfc27c 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -641,6 +641,9 @@ 
DEFINE_SIMPLE_ATTRIBUTE(i915_perf_noa_delay_fops,

    DROP_RESET_ACTIVE | \
    DROP_RESET_SEQNO | \
    DROP_RCU)
+
+#define DROP_IDLE_TIMEOUT    (HZ * 10)


I915_IDLE_ENGINES_TIMEOUT is defined in i915_drv.h. It's also 
only used

here.


So move here, dropping i915 prefix, next to the newly proposed one?

Sure, can do that.




I915_GEM_IDLE_TIMEOUT is defined in i915_gem.h. It's only used in
gt/intel_gt.c.


Move there and rename to GT_IDLE_TIMEOUT?

I915_GT_SUSPEND_IDLE_TIMEOUT is defined and used only in 
intel_gt_pm.c.


No action needed, maybe drop i915 prefix if wanted.

These two are totally unrelated and in code not being touched by 
this change. I would rather not conflate changing random other 
things with fixing this specific issue.



I915_IDLE_ENGINES_TIMEOUT is in ms, the rest are in jiffies.


Add _MS suffix if wanted.


My head spins.


I follow and raise that the newly proposed DROP_IDLE_TIMEOUT 
applies to DROP_ACTIVE and not only DROP_IDLE.
My original intention for the name was that is the 'drop caches 
timeout for intel_gt_wait_for_idle'. Which is quite the mouthful 
and hence abbreviated to DROP_IDLE_TIMEOUT. But yes, I realised 
later that name can be conflated with the DROP_IDLE flag. Will 
rename.





Things get refactored, code moves around, bits get left behind, 
who knows. No reason to get too worked up. :) As long as people 
are taking a wider view when touching the code base, and are not 
afraid to send cleanups, things should be good.
On the other hand, if every patch gets blocked in code review 
because someone points out some completely unrelated piece of code 
could be a bit better then nothing ever gets fixed. If you spot 
something that you think should be improved, isn't the general 
idea that you should post a patch yourself to improve it?


There's two maintainers per branch and an order of magnitude or two 
more developers so it'd be nice if cleanups would just be incoming 
on self-initiative basis. ;)


For the actual functional change at hand - it would be nice if 
code paths in question could handle SIGINT and then we could punt 
the decision on how long someone wants to wait purely to 
userspace. But it's probably hard and it's only debugfs so whatever.


The code paths in question will already abort on a signal won't 
they? Both intel_gt_wait_for_idle() and 
intel_guc_wait_for_pending_msg(), which is where the 
uc_wait_for_idle eventually ends up, have an 'if(signal_pending) 
return -EINTR;' check. Beyond that, it sounds like what you are 
asking for is a change in the IGT libraries and/or CI framework to 
start sending signals after some specific timeout. That seems like 
a significantly more complex change (in terms of the number of 
entities affected and number of groups involved) and unnecessary.


If you say so, I haven't looked at them all. But if the code path 
in question already aborts on signals then I am not sure what is 
the patch fixing? I assumed you are trying to avoid the write stuck 
in D forever, which then prevents driver unload and everything, 
requiring the test runner to eventually reboot. If you say SIGINT 
works then you can already recover from userspace, no?


Whether or not 10s is enough CI will hopefully tell us. I'd

Re: [Intel-gfx] [PATCH 1/2] drm/i915/gt: Add GT oriented dmesg output

2022-11-07 Thread Tvrtko Ursulin



On 05/11/2022 01:03, Ceraolo Spurio, Daniele wrote:



On 11/4/2022 10:25 AM, john.c.harri...@intel.com wrote:

From: John Harrison 

When trying to analyse bug reports from CI, customers, etc. it can be
difficult to work out exactly what is happening on which GT in a
multi-GT system. So add GT oriented debug/error message wrappers. If
used instead of the drm_ equivalents, you get the same output but with
a GT# prefix on it.

Signed-off-by: John Harrison 


The only downside to this is that we'll print "GT0: " even on single-GT 
devices. We could introduce a gt->info.name and print that, so we could 
have it different per-platform, but IMO it's not worth the effort.


Reviewed-by: Daniele Ceraolo Spurio 

I think it might be worth getting an ack from one of the maintainers to 
make sure we're all aligned on transitioning to these new logging macro 
for gt code.


Idea is I think a very good one. First I would suggest standardising to 
lowercase GT in logs because:

$ grep "GT%" i915/ -r
$ grep "gt%" i915/ -r
i915/gt/intel_gt_sysfs.c:gt->i915->sysfs_gt, 
"gt%d", gt->info.id))
i915/gt/intel_gt_sysfs.c:"failed to initialize gt%d sysfs root\n", 
gt->info.id);
i915/gt/intel_gt_sysfs_pm.c: "failed to create gt%u RC6 sysfs 
files (%pe)\n",
i915/gt/intel_gt_sysfs_pm.c: "failed to create gt%u RC6p 
sysfs files (%pe)\n",
i915/gt/intel_gt_sysfs_pm.c: "failed to create gt%u RPS sysfs 
files (%pe)",
i915/gt/intel_gt_sysfs_pm.c: "failed to create gt%u 
punit_req_freq_mhz sysfs (%pe)",
i915/gt/intel_gt_sysfs_pm.c: "failed to create gt%u 
throttle sysfs files (%pe)",
i915/gt/intel_gt_sysfs_pm.c: "failed to create gt%u 
media_perf_power_attrs sysfs (%pe)\n",
i915/gt/intel_gt_sysfs_pm.c: "failed to add gt%u rps defaults 
(%pe)\n",
i915/i915_driver.c: drm_err(>i915->drm, "gt%d: 
intel_pcode_init failed %d\n", id, ret);
i915/i915_hwmon.c:  snprintf(ddat_gt->name, sizeof(ddat_gt->name), 
"i915_gt%u", i);

Then there is a question of naming. Are we okay with GT_XXX or, do we want 
intel_gt_, or something completely different. I don't have a strong opinion at 
the moment so I'll add some more folks to Cc.

What I'd would like to see tried is to converting all of i915/gt within one 
kernel release so we don't have a mish-mash of log formats.

Regards,

Tvrtko
 

---
  drivers/gpu/drm/i915/gt/intel_gt.h | 15 +++
  1 file changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h 
b/drivers/gpu/drm/i915/gt/intel_gt.h

index e0365d5562484..1e016fb0117a4 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -13,6 +13,21 @@
  struct drm_i915_private;
  struct drm_printer;
+#define GT_ERR(_gt, _fmt, ...) \
+    drm_err(&(_gt)->i915->drm, "GT%u: " _fmt, (_gt)->info.id, 
##__VA_ARGS__)

+
+#define GT_WARN(_gt, _fmt, ...) \
+    drm_warn(&(_gt)->i915->drm, "GT%u: " _fmt, (_gt)->info.id, 
##__VA_ARGS__)

+
+#define GT_NOTICE(_gt, _fmt, ...) \
+    drm_notice(&(_gt)->i915->drm, "GT%u: " _fmt, (_gt)->info.id, 
##__VA_ARGS__)

+
+#define GT_INFO(_gt, _fmt, ...) \
+    drm_info(&(_gt)->i915->drm, "GT%u: " _fmt, (_gt)->info.id, 
##__VA_ARGS__)

+
+#define GT_DBG(_gt, _fmt, ...) \
+    drm_dbg(&(_gt)->i915->drm, "GT%u: " _fmt, (_gt)->info.id, 
##__VA_ARGS__)

+
  #define GT_TRACE(gt, fmt, ...) do {    \
  const struct intel_gt *gt__ __maybe_unused = (gt);    \
  GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev),    \




Re: [Intel-gfx] [PATCH] drm/i915: Don't wait forever in drop_caches

2022-11-04 Thread Tvrtko Ursulin



On 03/11/2022 19:16, John Harrison wrote:

On 11/3/2022 02:38, Tvrtko Ursulin wrote:

On 03/11/2022 09:18, Tvrtko Ursulin wrote:

On 03/11/2022 01:33, John Harrison wrote:

On 11/2/2022 07:20, Tvrtko Ursulin wrote:

On 02/11/2022 12:12, Jani Nikula wrote:

On Tue, 01 Nov 2022, john.c.harri...@intel.com wrote:

From: John Harrison 

At the end of each test, IGT does a drop caches call via sysfs with


sysfs?
Sorry, that was meant to say debugfs. I've also been working on some 
sysfs IGT issues and evidently got my wires crossed!





special flags set. One of the possible paths waits for idle with an
infinite timeout. That causes problems for debugging issues when CI
catches a "can't go idle" test failure. Best case, the CI system 
times

out (after 90s), attempts a bunch of state dump actions and then
reboots the system to recover it. Worst case, the CI system can't do
anything at all and then times out (after 1000s) and simply reboots.
Sometimes a serial port log of dmesg might be available, 
sometimes not.


So rather than making life hard for ourselves, change the timeout to
be 10s rather than infinite. Also, trigger the standard
wedge/reset/recover sequence so that testing can continue with a
working system (if possible).

Signed-off-by: John Harrison 
---
  drivers/gpu/drm/i915/i915_debugfs.c | 7 ++-
  1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c

index ae987e92251dd..9d916fbbfc27c 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -641,6 +641,9 @@ 
DEFINE_SIMPLE_ATTRIBUTE(i915_perf_noa_delay_fops,

    DROP_RESET_ACTIVE | \
    DROP_RESET_SEQNO | \
    DROP_RCU)
+
+#define DROP_IDLE_TIMEOUT    (HZ * 10)


I915_IDLE_ENGINES_TIMEOUT is defined in i915_drv.h. It's also only 
used

here.


So move here, dropping i915 prefix, next to the newly proposed one?

Sure, can do that.




I915_GEM_IDLE_TIMEOUT is defined in i915_gem.h. It's only used in
gt/intel_gt.c.


Move there and rename to GT_IDLE_TIMEOUT?

I915_GT_SUSPEND_IDLE_TIMEOUT is defined and used only in 
intel_gt_pm.c.


No action needed, maybe drop i915 prefix if wanted.

These two are totally unrelated and in code not being touched by 
this change. I would rather not conflate changing random other 
things with fixing this specific issue.



I915_IDLE_ENGINES_TIMEOUT is in ms, the rest are in jiffies.


Add _MS suffix if wanted.


My head spins.


I follow and raise that the newly proposed DROP_IDLE_TIMEOUT 
applies to DROP_ACTIVE and not only DROP_IDLE.
My original intention for the name was that is the 'drop caches 
timeout for intel_gt_wait_for_idle'. Which is quite the mouthful and 
hence abbreviated to DROP_IDLE_TIMEOUT. But yes, I realised later 
that name can be conflated with the DROP_IDLE flag. Will rename.





Things get refactored, code moves around, bits get left behind, who 
knows. No reason to get too worked up. :) As long as people are 
taking a wider view when touching the code base, and are not afraid 
to send cleanups, things should be good.
On the other hand, if every patch gets blocked in code review 
because someone points out some completely unrelated piece of code 
could be a bit better then nothing ever gets fixed. If you spot 
something that you think should be improved, isn't the general idea 
that you should post a patch yourself to improve it?


There's two maintainers per branch and an order of magnitude or two 
more developers so it'd be nice if cleanups would just be incoming on 
self-initiative basis. ;)


For the actual functional change at hand - it would be nice if code 
paths in question could handle SIGINT and then we could punt the 
decision on how long someone wants to wait purely to userspace. But 
it's probably hard and it's only debugfs so whatever.


The code paths in question will already abort on a signal won't 
they? Both intel_gt_wait_for_idle() and 
intel_guc_wait_for_pending_msg(), which is where the 
uc_wait_for_idle eventually ends up, have an 'if(signal_pending) 
return -EINTR;' check. Beyond that, it sounds like what you are 
asking for is a change in the IGT libraries and/or CI framework to 
start sending signals after some specific timeout. That seems like a 
significantly more complex change (in terms of the number of 
entities affected and number of groups involved) and unnecessary.


If you say so, I haven't looked at them all. But if the code path in 
question already aborts on signals then I am not sure what is the 
patch fixing? I assumed you are trying to avoid the write stuck in D 
forever, which then prevents driver unload and everything, requiring 
the test runner to eventually reboot. If you say SIGINT works then 
you can already recover from userspace, no?


Whether or not 10s is enough CI will hopefully tell us. I'd 
probably err on the side of safety and make it longer, but at most 
half from the test runn

Re: [RFC][PATCH v3 13/33] timers: drm: Use timer_shutdown_sync() before freeing timer

2022-11-04 Thread Tvrtko Ursulin



Hi,

On 04/11/2022 05:41, Steven Rostedt wrote:

From: "Steven Rostedt (Google)" 

Before a timer is freed, timer_shutdown_sync() must be called.

Link: https://lore.kernel.org/all/20220407161745.7d675...@gandalf.local.home/

Cc: "Noralf Trønnes" 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: Jani Nikula 
Cc: Joonas Lahtinen 
Cc: Rodrigo Vivi 
Cc: Tvrtko Ursulin 
Cc: dri-devel@lists.freedesktop.org
Cc: intel-...@lists.freedesktop.org
Signed-off-by: Steven Rostedt (Google) 
---
  drivers/gpu/drm/gud/gud_pipe.c   | 2 +-
  drivers/gpu/drm/i915/i915_sw_fence.c | 2 +-


If it stays all DRM drivers in one patch then I guess it needs to go via 
drm-misc, which for i915 would be okay I think in this case since patch 
is extremely unlikely to clash with anything. Or split it up per driver 
and then we can handle it in drm-intel-next once core functionality is in.


We do however have some more calls to del_timer_sync, where freeing is 
perhaps not immediately next to the site in code, but things definitely 
get freed like on module unload. Would we need to convert all of them to 
avoid some, presumably new, warnings?


Regards,

Tvrtko


  2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/gud/gud_pipe.c b/drivers/gpu/drm/gud/gud_pipe.c
index 7c6dc2bcd14a..08429bdd57cf 100644
--- a/drivers/gpu/drm/gud/gud_pipe.c
+++ b/drivers/gpu/drm/gud/gud_pipe.c
@@ -272,7 +272,7 @@ static int gud_usb_bulk(struct gud_device *gdrm, size_t len)
  
  	usb_sg_wait();
  
-	if (!del_timer_sync())

+   if (!timer_shutdown_sync())
ret = -ETIMEDOUT;
else if (ctx.sgr.status < 0)
ret = ctx.sgr.status;
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c 
b/drivers/gpu/drm/i915/i915_sw_fence.c
index 6fc0d1b89690..bfaa9a67dc35 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -465,7 +465,7 @@ static void irq_i915_sw_fence_work(struct irq_work *wrk)
struct i915_sw_dma_fence_cb_timer *cb =
container_of(wrk, typeof(*cb), work);
  
-	del_timer_sync(>timer);

+   timer_shutdown_sync(>timer);
dma_fence_put(cb->dma);
  
  	kfree_rcu(cb, rcu);


Re: [Intel-gfx] [PATCH v2 2/2] drm/i915/guc: Don't deadlock busyness stats vs reset

2022-11-03 Thread Tvrtko Ursulin



On 02/11/2022 19:21, john.c.harri...@intel.com wrote:

From: John Harrison 

The engine busyness stats has a worker function to do things like
64bit extend the 32bit hardware counters. The GuC's reset prepare
function flushes out this worker function to ensure no corruption
happens during the reset. Unforunately, the worker function has an
infinite wait for active resets to finish before doing its work. Thus
a deadlock would occur if the worker function had actually started
just as the reset starts.

The function being used to lock the reset-in-progress mutex is called
intel_gt_reset_trylock(). However, as noted it does not follow
standard 'trylock' conventions and exit if already locked. So rename
the current _trylock function to intel_gt_reset_lock_interruptible(),
which is the behaviour it actually provides. In addition, add a new
implementation of _trylock and call that from the busyness stats
worker instead.

v2: Rename existing trylock to interruptible rather than trying to
preserve the existing (confusing) naming scheme (review comments from
Tvrtko).

Signed-off-by: John Harrison 
---
  drivers/gpu/drm/i915/gem/i915_gem_mman.c   |  2 +-
  drivers/gpu/drm/i915/gt/intel_reset.c  | 18 --
  drivers/gpu/drm/i915/gt/intel_reset.h  |  1 +
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c  |  4 +++-
  4 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c 
b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index e63329bc80659..c29efdef8313a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -330,7 +330,7 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
if (ret)
goto err_rpm;
  
-	ret = intel_gt_reset_trylock(ggtt->vm.gt, );

+   ret = intel_gt_reset_lock_interruptible(ggtt->vm.gt, );
if (ret)
goto err_pages;
  
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c

index 3159df6cdd492..24736ebee17c2 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -1407,15 +1407,19 @@ void intel_gt_handle_error(struct intel_gt *gt,
intel_runtime_pm_put(gt->uncore->rpm, wakeref);
  }
  
-int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu)

+static int _intel_gt_reset_lock(struct intel_gt *gt, int *srcu, bool retry)
  {
might_lock(>reset.backoff_srcu);
-   might_sleep();
+   if (retry)
+   might_sleep();
  
  	rcu_read_lock();

while (test_bit(I915_RESET_BACKOFF, >reset.flags)) {
rcu_read_unlock();
  
+		if (!retry)

+   return -EBUSY;
+
if (wait_event_interruptible(gt->reset.queue,
 !test_bit(I915_RESET_BACKOFF,
   >reset.flags)))
@@ -1429,6 +1433,16 @@ int intel_gt_reset_trylock(struct intel_gt *gt, int 
*srcu)
return 0;
  }
  
+int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu)

+{
+   return _intel_gt_reset_lock(gt, srcu, false);
+}
+
+int intel_gt_reset_lock_interruptible(struct intel_gt *gt, int *srcu)
+{
+   return _intel_gt_reset_lock(gt, srcu, true);
+}
+
  void intel_gt_reset_unlock(struct intel_gt *gt, int tag)
  __releases(>reset.backoff_srcu)
  {
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h 
b/drivers/gpu/drm/i915/gt/intel_reset.h
index adc734e673870..25c975b6e8fc0 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.h
+++ b/drivers/gpu/drm/i915/gt/intel_reset.h
@@ -39,6 +39,7 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine,
  void __i915_request_reset(struct i915_request *rq, bool guilty);
  
  int __must_check intel_gt_reset_trylock(struct intel_gt *gt, int *srcu);

+int __must_check intel_gt_reset_lock_interruptible(struct intel_gt *gt, int 
*srcu);
  void intel_gt_reset_unlock(struct intel_gt *gt, int tag);
  
  void intel_gt_set_wedged(struct intel_gt *gt);

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 941613be3b9dd..92e514061d20b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1401,7 +1401,9 @@ static void guc_timestamp_ping(struct work_struct *wrk)
  
  	/*

 * Synchronize with gt reset to make sure the worker does not
-* corrupt the engine/guc stats.
+* corrupt the engine/guc stats. NB: can't actually block waiting
+* for a reset to complete as the reset requires flushing out
+* this worker thread if started. So waiting would deadlock.
 */
ret = intel_gt_reset_trylock(gt, );
if (ret)


LGTM but I don't remember fully how ping worker and reset interact so 
I'll let Umesh r-b. Like is it okay to skip the ping or we'd need to 
re-schedule it ASAP due wrap issues? Maybe reset makes that 

Re: [Intel-gfx] [PATCH] drm/i915: Don't wait forever in drop_caches

2022-11-03 Thread Tvrtko Ursulin



On 03/11/2022 09:18, Tvrtko Ursulin wrote:


On 03/11/2022 01:33, John Harrison wrote:

On 11/2/2022 07:20, Tvrtko Ursulin wrote:

On 02/11/2022 12:12, Jani Nikula wrote:

On Tue, 01 Nov 2022, john.c.harri...@intel.com wrote:

From: John Harrison 

At the end of each test, IGT does a drop caches call via sysfs with


sysfs?
Sorry, that was meant to say debugfs. I've also been working on some 
sysfs IGT issues and evidently got my wires crossed!





special flags set. One of the possible paths waits for idle with an
infinite timeout. That causes problems for debugging issues when CI
catches a "can't go idle" test failure. Best case, the CI system times
out (after 90s), attempts a bunch of state dump actions and then
reboots the system to recover it. Worst case, the CI system can't do
anything at all and then times out (after 1000s) and simply reboots.
Sometimes a serial port log of dmesg might be available, sometimes 
not.


So rather than making life hard for ourselves, change the timeout to
be 10s rather than infinite. Also, trigger the standard
wedge/reset/recover sequence so that testing can continue with a
working system (if possible).

Signed-off-by: John Harrison 
---
  drivers/gpu/drm/i915/i915_debugfs.c | 7 ++-
  1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c

index ae987e92251dd..9d916fbbfc27c 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -641,6 +641,9 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_perf_noa_delay_fops,
    DROP_RESET_ACTIVE | \
    DROP_RESET_SEQNO | \
    DROP_RCU)
+
+#define DROP_IDLE_TIMEOUT    (HZ * 10)


I915_IDLE_ENGINES_TIMEOUT is defined in i915_drv.h. It's also only used
here.


So move here, dropping i915 prefix, next to the newly proposed one?

Sure, can do that.




I915_GEM_IDLE_TIMEOUT is defined in i915_gem.h. It's only used in
gt/intel_gt.c.


Move there and rename to GT_IDLE_TIMEOUT?


I915_GT_SUSPEND_IDLE_TIMEOUT is defined and used only in intel_gt_pm.c.


No action needed, maybe drop i915 prefix if wanted.

These two are totally unrelated and in code not being touched by this 
change. I would rather not conflate changing random other things with 
fixing this specific issue.



I915_IDLE_ENGINES_TIMEOUT is in ms, the rest are in jiffies.


Add _MS suffix if wanted.


My head spins.


I follow and raise that the newly proposed DROP_IDLE_TIMEOUT applies 
to DROP_ACTIVE and not only DROP_IDLE.
My original intention for the name was that is the 'drop caches 
timeout for intel_gt_wait_for_idle'. Which is quite the mouthful and 
hence abbreviated to DROP_IDLE_TIMEOUT. But yes, I realised later that 
name can be conflated with the DROP_IDLE flag. Will rename.





Things get refactored, code moves around, bits get left behind, who 
knows. No reason to get too worked up. :) As long as people are 
taking a wider view when touching the code base, and are not afraid 
to send cleanups, things should be good.
On the other hand, if every patch gets blocked in code review because 
someone points out some completely unrelated piece of code could be a 
bit better then nothing ever gets fixed. If you spot something that 
you think should be improved, isn't the general idea that you should 
post a patch yourself to improve it?


There's two maintainers per branch and an order of magnitude or two more 
developers so it'd be nice if cleanups would just be incoming on 
self-initiative basis. ;)


For the actual functional change at hand - it would be nice if code 
paths in question could handle SIGINT and then we could punt the 
decision on how long someone wants to wait purely to userspace. But 
it's probably hard and it's only debugfs so whatever.


The code paths in question will already abort on a signal won't they? 
Both intel_gt_wait_for_idle() and intel_guc_wait_for_pending_msg(), 
which is where the uc_wait_for_idle eventually ends up, have an 
'if(signal_pending) return -EINTR;' check. Beyond that, it sounds like 
what you are asking for is a change in the IGT libraries and/or CI 
framework to start sending signals after some specific timeout. That 
seems like a significantly more complex change (in terms of the number 
of entities affected and number of groups involved) and unnecessary.


If you say so, I haven't looked at them all. But if the code path in 
question already aborts on signals then I am not sure what is the patch 
fixing? I assumed you are trying to avoid the write stuck in D forever, 
which then prevents driver unload and everything, requiring the test 
runner to eventually reboot. If you say SIGINT works then you can 
already recover from userspace, no?


Whether or not 10s is enough CI will hopefully tell us. I'd probably 
err on the side of safety and make it longer, but at most half from 
the test runner timeout.
This is supposed to be test clean up. This is not about how long a 
particular 

Re: [Intel-gfx] [PATCH] drm/i915: Don't wait forever in drop_caches

2022-11-03 Thread Tvrtko Ursulin



On 03/11/2022 01:33, John Harrison wrote:

On 11/2/2022 07:20, Tvrtko Ursulin wrote:

On 02/11/2022 12:12, Jani Nikula wrote:

On Tue, 01 Nov 2022, john.c.harri...@intel.com wrote:

From: John Harrison 

At the end of each test, IGT does a drop caches call via sysfs with


sysfs?
Sorry, that was meant to say debugfs. I've also been working on some 
sysfs IGT issues and evidently got my wires crossed!





special flags set. One of the possible paths waits for idle with an
infinite timeout. That causes problems for debugging issues when CI
catches a "can't go idle" test failure. Best case, the CI system times
out (after 90s), attempts a bunch of state dump actions and then
reboots the system to recover it. Worst case, the CI system can't do
anything at all and then times out (after 1000s) and simply reboots.
Sometimes a serial port log of dmesg might be available, sometimes not.

So rather than making life hard for ourselves, change the timeout to
be 10s rather than infinite. Also, trigger the standard
wedge/reset/recover sequence so that testing can continue with a
working system (if possible).

Signed-off-by: John Harrison 
---
  drivers/gpu/drm/i915/i915_debugfs.c | 7 ++-
  1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c

index ae987e92251dd..9d916fbbfc27c 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -641,6 +641,9 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_perf_noa_delay_fops,
    DROP_RESET_ACTIVE | \
    DROP_RESET_SEQNO | \
    DROP_RCU)
+
+#define DROP_IDLE_TIMEOUT    (HZ * 10)


I915_IDLE_ENGINES_TIMEOUT is defined in i915_drv.h. It's also only used
here.


So move here, dropping i915 prefix, next to the newly proposed one?

Sure, can do that.




I915_GEM_IDLE_TIMEOUT is defined in i915_gem.h. It's only used in
gt/intel_gt.c.


Move there and rename to GT_IDLE_TIMEOUT?


I915_GT_SUSPEND_IDLE_TIMEOUT is defined and used only in intel_gt_pm.c.


No action needed, maybe drop i915 prefix if wanted.

These two are totally unrelated and in code not being touched by this 
change. I would rather not conflate changing random other things with 
fixing this specific issue.



I915_IDLE_ENGINES_TIMEOUT is in ms, the rest are in jiffies.


Add _MS suffix if wanted.


My head spins.


I follow and raise that the newly proposed DROP_IDLE_TIMEOUT applies 
to DROP_ACTIVE and not only DROP_IDLE.
My original intention for the name was that is the 'drop caches timeout 
for intel_gt_wait_for_idle'. Which is quite the mouthful and hence 
abbreviated to DROP_IDLE_TIMEOUT. But yes, I realised later that name 
can be conflated with the DROP_IDLE flag. Will rename.





Things get refactored, code moves around, bits get left behind, who 
knows. No reason to get too worked up. :) As long as people are taking 
a wider view when touching the code base, and are not afraid to send 
cleanups, things should be good.
On the other hand, if every patch gets blocked in code review because 
someone points out some completely unrelated piece of code could be a 
bit better then nothing ever gets fixed. If you spot something that you 
think should be improved, isn't the general idea that you should post a 
patch yourself to improve it?


There's two maintainers per branch and an order of magnitude or two more 
developers so it'd be nice if cleanups would just be incoming on 
self-initiative basis. ;)


For the actual functional change at hand - it would be nice if code 
paths in question could handle SIGINT and then we could punt the 
decision on how long someone wants to wait purely to userspace. But 
it's probably hard and it's only debugfs so whatever.


The code paths in question will already abort on a signal won't they? 
Both intel_gt_wait_for_idle() and intel_guc_wait_for_pending_msg(), 
which is where the uc_wait_for_idle eventually ends up, have an 
'if(signal_pending) return -EINTR;' check. Beyond that, it sounds like 
what you are asking for is a change in the IGT libraries and/or CI 
framework to start sending signals after some specific timeout. That 
seems like a significantly more complex change (in terms of the number 
of entities affected and number of groups involved) and unnecessary.


If you say so, I haven't looked at them all. But if the code path in 
question already aborts on signals then I am not sure what is the patch 
fixing? I assumed you are trying to avoid the write stuck in D forever, 
which then prevents driver unload and everything, requiring the test 
runner to eventually reboot. If you say SIGINT works then you can 
already recover from userspace, no?


Whether or not 10s is enough CI will hopefully tell us. I'd probably 
err on the side of safety and make it longer, but at most half from 
the test runner timeout.
This is supposed to be test clean up. This is not about how long a 
particular test takes to complete but about how lon

[PULL] drm-intel-fixes

2022-11-03 Thread Tvrtko Ursulin
Hi Dave, Daniel,

A few fixes for 6.1.

On the display side fixed a race condition in accessing DKL PHY registers
(TGL+), fixed LVDS EDID fixed mode setup and fixed SDVO invalid mode
filtering. On the GEM side fix running under Xen and use DMA API directly
instead of special casing for SWIOTLB only.

drm-intel-fixes-2022-11-03:
- Add locking around DKL PHY register accesses (Imre Deak)
- Stop abusing swiotlb_max_segment (Robert Beckett)
- Filter out invalid outputs more sensibly (Ville Syrjälä)
- Setup DDC fully before output init (Ville Syrjälä)
- Simplify intel_panel_add_edid_alt_fixed_modes() (Ville Syrjälä)
- Grab mode_config.mutex during LVDS init to avoid WARNs (Ville Syrjälä)
The following changes since commit 30a0b95b1335e12efef89dd78518ed3e4a71a763:

  Linux 6.1-rc3 (2022-10-30 15:19:28 -0700)

are available in the Git repository at:

  git://anongit.freedesktop.org/drm/drm-intel tags/drm-intel-fixes-2022-11-03

for you to fetch changes up to 12caf46cf4fc92b1c3884cb363ace2e12732fd2f:

  drm/i915/sdvo: Grab mode_config.mutex during LVDS init to avoid WARNs 
(2022-10-31 14:09:15 +)


- Add locking around DKL PHY register accesses (Imre Deak)
- Stop abusing swiotlb_max_segment (Robert Beckett)
- Filter out invalid outputs more sensibly (Ville Syrjälä)
- Setup DDC fully before output init (Ville Syrjälä)
- Simplify intel_panel_add_edid_alt_fixed_modes() (Ville Syrjälä)
- Grab mode_config.mutex during LVDS init to avoid WARNs (Ville Syrjälä)


Imre Deak (1):
  drm/i915/tgl+: Add locking around DKL PHY register accesses

Robert Beckett (1):
  drm/i915: stop abusing swiotlb_max_segment

Ville Syrjälä (4):
  drm/i915/sdvo: Filter out invalid outputs more sensibly
  drm/i915/sdvo: Setup DDC fully before output init
  drm/i915: Simplify intel_panel_add_edid_alt_fixed_modes()
  drm/i915/sdvo: Grab mode_config.mutex during LVDS init to avoid WARNs

 drivers/gpu/drm/i915/Makefile  |   1 +
 drivers/gpu/drm/i915/display/intel_ddi.c   |  68 ++---
 drivers/gpu/drm/i915/display/intel_display_core.h  |   8 ++
 .../drm/i915/display/intel_display_power_well.c|   7 +-
 drivers/gpu/drm/i915/display/intel_dkl_phy.c   | 109 +
 drivers/gpu/drm/i915/display/intel_dkl_phy.h   |  24 +
 drivers/gpu/drm/i915/display/intel_dp.c|   2 +-
 drivers/gpu/drm/i915/display/intel_dpll_mgr.c  |  59 +--
 drivers/gpu/drm/i915/display/intel_lvds.c  |   3 +-
 drivers/gpu/drm/i915/display/intel_panel.c |   4 +-
 drivers/gpu/drm/i915/display/intel_panel.h |   2 +-
 drivers/gpu/drm/i915/display/intel_sdvo.c  |  64 +++-
 drivers/gpu/drm/i915/gem/i915_gem_internal.c   |  19 +---
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c  |   2 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c|   4 +-
 drivers/gpu/drm/i915/gem/i915_gem_userptr.c|   2 +-
 drivers/gpu/drm/i915/i915_driver.c |   1 +
 drivers/gpu/drm/i915/i915_reg.h|   3 +
 drivers/gpu/drm/i915/i915_scatterlist.h|  34 ---
 19 files changed, 277 insertions(+), 139 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/display/intel_dkl_phy.c
 create mode 100644 drivers/gpu/drm/i915/display/intel_dkl_phy.h


Re: [Intel-gfx] [PATCH] drm/i915: Don't wait forever in drop_caches

2022-11-02 Thread Tvrtko Ursulin



On 02/11/2022 12:12, Jani Nikula wrote:

On Tue, 01 Nov 2022, john.c.harri...@intel.com wrote:

From: John Harrison 

At the end of each test, IGT does a drop caches call via sysfs with


sysfs?


special flags set. One of the possible paths waits for idle with an
infinite timeout. That causes problems for debugging issues when CI
catches a "can't go idle" test failure. Best case, the CI system times
out (after 90s), attempts a bunch of state dump actions and then
reboots the system to recover it. Worst case, the CI system can't do
anything at all and then times out (after 1000s) and simply reboots.
Sometimes a serial port log of dmesg might be available, sometimes not.

So rather than making life hard for ourselves, change the timeout to
be 10s rather than infinite. Also, trigger the standard
wedge/reset/recover sequence so that testing can continue with a
working system (if possible).

Signed-off-by: John Harrison 
---
  drivers/gpu/drm/i915/i915_debugfs.c | 7 ++-
  1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index ae987e92251dd..9d916fbbfc27c 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -641,6 +641,9 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_perf_noa_delay_fops,
  DROP_RESET_ACTIVE | \
  DROP_RESET_SEQNO | \
  DROP_RCU)
+
+#define DROP_IDLE_TIMEOUT  (HZ * 10)


I915_IDLE_ENGINES_TIMEOUT is defined in i915_drv.h. It's also only used
here.


So move here, dropping i915 prefix, next to the newly proposed one?


I915_GEM_IDLE_TIMEOUT is defined in i915_gem.h. It's only used in
gt/intel_gt.c.


Move there and rename to GT_IDLE_TIMEOUT?


I915_GT_SUSPEND_IDLE_TIMEOUT is defined and used only in intel_gt_pm.c.


No action needed, maybe drop i915 prefix if wanted.


I915_IDLE_ENGINES_TIMEOUT is in ms, the rest are in jiffies.


Add _MS suffix if wanted.


My head spins.


I follow and raise that the newly proposed DROP_IDLE_TIMEOUT applies to 
DROP_ACTIVE and not only DROP_IDLE.


Things get refactored, code moves around, bits get left behind, who 
knows. No reason to get too worked up. :) As long as people are taking a 
wider view when touching the code base, and are not afraid to send 
cleanups, things should be good.


For the actual functional change at hand - it would be nice if code 
paths in question could handle SIGINT and then we could punt the 
decision on how long someone wants to wait purely to userspace. But it's 
probably hard and it's only debugfs so whatever.


Whether or not 10s is enough CI will hopefully tell us. I'd probably err 
on the side of safety and make it longer, but at most half from the test 
runner timeout.


I am not convinced that wedging is correct though. Conceptually could be 
just that the timeout is too short. What does wedging really give us, on 
top of limiting the wait, when latter AFAIU is the key factor which 
would prevent the need to reboot the machine?


Regards,

Tvrtko


Re: [Intel-gfx] [PATCH 2/2] drm/i915/guc: Don't deadlock busyness stats vs reset

2022-11-02 Thread Tvrtko Ursulin



On 01/11/2022 16:56, John Harrison wrote:

On 11/1/2022 02:58, Tvrtko Ursulin wrote:

On 31/10/2022 18:30, John Harrison wrote:

On 10/31/2022 05:51, Tvrtko Ursulin wrote:

On 31/10/2022 10:09, Tvrtko Ursulin wrote:

On 28/10/2022 20:46, john.c.harri...@intel.com wrote:

From: John Harrison 

The engine busyness stats has a worker function to do things like
64bit extend the 32bit hardware counters. The GuC's reset prepare
function flushes out this worker function to ensure no corruption
happens during the reset. Unforunately, the worker function has an
infinite wait for active resets to finish before doing its work. Thus
a deadlock would occur if the worker function had actually started
just as the reset starts.

Update the worker to abort if a reset is in progress rather than
waiting for it to complete. It will still acquire the reset lock in
the case where a reset was not already in progress. So the processing
is still safe from corruption, but the deadlock can no longer occur.

Signed-off-by: John Harrison 
---
  drivers/gpu/drm/i915/gt/intel_reset.c | 15 
++-

  drivers/gpu/drm/i915/gt/intel_reset.h |  1 +
  drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c |  6 --
  3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c 
b/drivers/gpu/drm/i915/gt/intel_reset.c

index 3159df6cdd492..2f48c6e4420ea 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -1407,7 +1407,7 @@ void intel_gt_handle_error(struct intel_gt *gt,
  intel_runtime_pm_put(gt->uncore->rpm, wakeref);
  }
-int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu)
+static int _intel_gt_reset_trylock(struct intel_gt *gt, int 
*srcu, bool retry)

  {
  might_lock(>reset.backoff_srcu);
  might_sleep();
@@ -1416,6 +1416,9 @@ int intel_gt_reset_trylock(struct intel_gt 
*gt, int *srcu)

  while (test_bit(I915_RESET_BACKOFF, >reset.flags)) {
  rcu_read_unlock();
+    if (!retry)
+    return -EBUSY;
+
  if (wait_event_interruptible(gt->reset.queue,
   !test_bit(I915_RESET_BACKOFF,
>reset.flags)))


Would it be more obvious to rename the existing semantics to 
intel_gt_reset_interruptible(), while the flavour you add in this 
patch truly is trylock? I am not sure, since it's all a bit 
special, but trylock sure feels confusing if it can sleep forever...
To me, it would seem totally more obvious to have a function called 
'trylock' not wait forever until it can manage to acquire the lock. 
However, according to '2caffbf1176256 drm/i915: Revoke mmaps and 
prevent access to fence registers across reset', the current 
behaviour is exactly how the code was originally written and 
intended. It hasn't just mutated into some confused evolution a 
thousand patches later. So I figure there is some subtle but 
important reason why it was named how it is named and yet does what 
it does. Therefore it seemed safest to not change it unnecessarily.


Yeah I looked at that but honestly I don't see the trylock semantics 
anywhere. The only failure to lock path comes from 
wait_event_interruptible. It could have easily been just a naming mishap.


And I find adding a retry parameter to something called trylock makes 
this even more non-intuitive and would personally rather rename it 
all. Proof in the pudding is that the trylock naming did bite during 
development and review of the code this patch is now fixing.


I do however understand your point about a degree of uncertainty but 
my feeling is to rather err on the side of obvious naming. Shall we 
ask for a third opinion?
Umesh had commented (internally) that the naming seems wrong and would 
be good to change it. So we already have a third :).


To be clear, you are thinking to keep the wrappers but rename to 
intel_gt_reset_trylock() [retry = false] and 
intel_gt_reset_interruptible() [retry = true]? Which will obviously 
involve updating all but one existing user to use the interruptible name 
as the existing name will change behaviour in a backwards breaking manner.


Yes, intel_gt_reset_lock_interruptible and intel_gt_reset_trylock.

I don't get the behaviour breaking part? Only the name will change.

And amount of churn does not seem a problem:

$ grep intel_gt_reset_trylock -r .
./gem/i915_gem_mman.c:  ret = intel_gt_reset_trylock(ggtt->vm.gt, );
./gt/uc/intel_guc_submission.c: ret = intel_gt_reset_trylock(gt, );
./gt/intel_reset.c:int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu)
./gt/intel_reset.h:int __must_check intel_gt_reset_trylock(struct intel_gt *gt, 
int *srcu)

Regards,

Tvrtko



John.



Oh and might_sleep() shouldn't be there with the trylock version - I 
mean any flavour of the real trylock.
You mean if the code is split into two completely separate functions? 
Or do you just mean to wrap the might_sleep() call with 'if(!retry)'?


And just to be totally clear, t

Re: [Intel-gfx] [PATCH 2/2] drm/i915/guc: Don't deadlock busyness stats vs reset

2022-11-01 Thread Tvrtko Ursulin



On 31/10/2022 18:30, John Harrison wrote:

On 10/31/2022 05:51, Tvrtko Ursulin wrote:

On 31/10/2022 10:09, Tvrtko Ursulin wrote:

On 28/10/2022 20:46, john.c.harri...@intel.com wrote:

From: John Harrison 

The engine busyness stats has a worker function to do things like
64bit extend the 32bit hardware counters. The GuC's reset prepare
function flushes out this worker function to ensure no corruption
happens during the reset. Unforunately, the worker function has an
infinite wait for active resets to finish before doing its work. Thus
a deadlock would occur if the worker function had actually started
just as the reset starts.

Update the worker to abort if a reset is in progress rather than
waiting for it to complete. It will still acquire the reset lock in
the case where a reset was not already in progress. So the processing
is still safe from corruption, but the deadlock can no longer occur.

Signed-off-by: John Harrison 
---
  drivers/gpu/drm/i915/gt/intel_reset.c | 15 
++-

  drivers/gpu/drm/i915/gt/intel_reset.h |  1 +
  drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c |  6 --
  3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c 
b/drivers/gpu/drm/i915/gt/intel_reset.c

index 3159df6cdd492..2f48c6e4420ea 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -1407,7 +1407,7 @@ void intel_gt_handle_error(struct intel_gt *gt,
  intel_runtime_pm_put(gt->uncore->rpm, wakeref);
  }
-int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu)
+static int _intel_gt_reset_trylock(struct intel_gt *gt, int *srcu, 
bool retry)

  {
  might_lock(>reset.backoff_srcu);
  might_sleep();
@@ -1416,6 +1416,9 @@ int intel_gt_reset_trylock(struct intel_gt 
*gt, int *srcu)

  while (test_bit(I915_RESET_BACKOFF, >reset.flags)) {
  rcu_read_unlock();
+    if (!retry)
+    return -EBUSY;
+
  if (wait_event_interruptible(gt->reset.queue,
   !test_bit(I915_RESET_BACKOFF,
 >reset.flags)))


Would it be more obvious to rename the existing semantics to 
intel_gt_reset_interruptible(), while the flavour you add in this 
patch truly is trylock? I am not sure, since it's all a bit special, 
but trylock sure feels confusing if it can sleep forever...
To me, it would seem totally more obvious to have a function called 
'trylock' not wait forever until it can manage to acquire the lock. 
However, according to '2caffbf1176256 drm/i915: Revoke mmaps and prevent 
access to fence registers across reset', the current behaviour is 
exactly how the code was originally written and intended. It hasn't just 
mutated into some confused evolution a thousand patches later. So I 
figure there is some subtle but important reason why it was named how it 
is named and yet does what it does. Therefore it seemed safest to not 
change it unnecessarily.


Yeah I looked at that but honestly I don't see the trylock semantics 
anywhere. The only failure to lock path comes from 
wait_event_interruptible. It could have easily been just a naming mishap.


And I find adding a retry parameter to something called trylock makes 
this even more non-intuitive and would personally rather rename it all. 
Proof in the pudding is that the trylock naming did bite during 
development and review of the code this patch is now fixing.


I do however understand your point about a degree of uncertainty but my 
feeling is to rather err on the side of obvious naming. Shall we ask for 
a third opinion?


Oh and might_sleep() shouldn't be there with the trylock version - I 
mean any flavour of the real trylock.
You mean if the code is split into two completely separate functions? Or 
do you just mean to wrap the might_sleep() call with 'if(!retry)'?


And just to be totally clear, the unconditional call to rcu_read_lock() 
is not something that can sleep? One doesn't need a might_sleep() before 
doing that lock?


Corrrect, rcu_read_lock() can not sleep - it just disables preemption. 
So leaving the unconditional might_sleep() would have opportunity for 
false positives.


Regards,

Tvrtko


Re: [Intel-gfx] [PATCH 3/5] drm/i915/mtl: add GSC CS interrupt support

2022-10-31 Thread Tvrtko Ursulin



On 28/10/2022 18:00, Ceraolo Spurio, Daniele wrote:

On 10/28/2022 1:38 AM, Tvrtko Ursulin wrote:


On 27/10/2022 23:15, Daniele Ceraolo Spurio wrote:

The GSC CS re-uses the same interrupt bits that the GSC used in older
platforms. This means that we can now have an engine interrupt coming
out of OTHER_CLASS, so we need to handle that appropriately.

Signed-off-by: Daniele Ceraolo Spurio 
Cc: Matt Roper 
---
  drivers/gpu/drm/i915/gt/intel_gt_irq.c | 78 ++
  1 file changed, 43 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c 
b/drivers/gpu/drm/i915/gt/intel_gt_irq.c

index f26882fdc24c..34ff1ee7e931 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -81,35 +81,27 @@ gen11_other_irq_handler(struct intel_gt *gt, 
const u8 instance,

    instance, iir);
  }
  -static void
-gen11_engine_irq_handler(struct intel_gt *gt, const u8 class,
- const u8 instance, const u16 iir)
+static struct intel_gt *pick_gt(struct intel_gt *gt, u8 class, u8 
instance)

  {
-    struct intel_engine_cs *engine;
-
-    /*
- * Platforms with standalone media have their media engines in 
another

- * GT.
- */
-    if (MEDIA_VER(gt->i915) >= 13 &&
-    (class == VIDEO_DECODE_CLASS || class == 
VIDEO_ENHANCEMENT_CLASS)) {

-    if (!gt->i915->media_gt)
-    goto err;
+    struct intel_gt *media_gt = gt->i915->media_gt;
  -    gt = gt->i915->media_gt;
+    /* we expect the non-media gt to be passed in */
+    GEM_BUG_ON(gt == media_gt);
+
+    if (!media_gt)
+    return gt;
+
+    switch (class) {
+    case VIDEO_DECODE_CLASS:
+    case VIDEO_ENHANCEMENT_CLASS:
+    return media_gt;
+    case OTHER_CLASS:
+    if (instance == OTHER_GSC_INSTANCE && HAS_ENGINE(media_gt, 
GSC0))

+    return media_gt;
+    fallthrough;
+    default:
+    return gt;
  }
-
-    if (instance <= MAX_ENGINE_INSTANCE)
-    engine = gt->engine_class[class][instance];
-    else
-    engine = NULL;
-
-    if (likely(engine))
-    return intel_engine_cs_irq(engine, iir);
-
-err:
-    WARN_ONCE(1, "unhandled engine interrupt class=0x%x, 
instance=0x%x\n",

-  class, instance);
  }
    static void
@@ -118,12 +110,24 @@ gen11_gt_identity_handler(struct intel_gt *gt, 
const u32 identity)

  const u8 class = GEN11_INTR_ENGINE_CLASS(identity);
  const u8 instance = GEN11_INTR_ENGINE_INSTANCE(identity);
  const u16 intr = GEN11_INTR_ENGINE_INTR(identity);
+    struct intel_engine_cs *engine;
    if (unlikely(!intr))
  return;
  -    if (class <= COPY_ENGINE_CLASS || class == COMPUTE_CLASS)
-    return gen11_engine_irq_handler(gt, class, instance, intr);
+    /*
+ * Platforms with standalone media have the media and GSC 
engines in

+ * another GT.
+ */
+    gt = pick_gt(gt, class, instance);
+
+    if (class <= MAX_ENGINE_CLASS && instance <= MAX_ENGINE_INSTANCE)
+    engine = gt->engine_class[class][instance];
+    else
+    engine = NULL;
+
+    if (engine)
+    return intel_engine_cs_irq(engine, intr);


Drive by observation - you could fold the above two ifs into one since 
engine appears unused afterwards.


engine can be NULL in both branches of the if statement, so to get a 
unified if we'd have to do something like:


if (class <= MAX_ENGINE_CLASS && instance <= MAX_ENGINE_INSTANCE) {
         struct intel_engine_cs *engine = 
gt->engine_class[class][instance];

         if (engine)
                 return intel_engine_cs_irq(engine, intr);
}

Is this what you are suggesting?


Right, two ifs are needed after all. Well at least it would avoid the 
pointless engine = NULL assignment. Up to you.


Absence of any out-of-range class/instance logging is intentional?

Regards,

Tvrtko


Re: [Intel-gfx] [PATCH 2/2] drm/i915/guc: Don't deadlock busyness stats vs reset

2022-10-31 Thread Tvrtko Ursulin



On 31/10/2022 10:09, Tvrtko Ursulin wrote:


On 28/10/2022 20:46, john.c.harri...@intel.com wrote:

From: John Harrison 

The engine busyness stats has a worker function to do things like
64bit extend the 32bit hardware counters. The GuC's reset prepare
function flushes out this worker function to ensure no corruption
happens during the reset. Unforunately, the worker function has an
infinite wait for active resets to finish before doing its work. Thus
a deadlock would occur if the worker function had actually started
just as the reset starts.

Update the worker to abort if a reset is in progress rather than
waiting for it to complete. It will still acquire the reset lock in
the case where a reset was not already in progress. So the processing
is still safe from corruption, but the deadlock can no longer occur.

Signed-off-by: John Harrison 
---
  drivers/gpu/drm/i915/gt/intel_reset.c | 15 ++-
  drivers/gpu/drm/i915/gt/intel_reset.h |  1 +
  drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c |  6 --
  3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c 
b/drivers/gpu/drm/i915/gt/intel_reset.c

index 3159df6cdd492..2f48c6e4420ea 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -1407,7 +1407,7 @@ void intel_gt_handle_error(struct intel_gt *gt,
  intel_runtime_pm_put(gt->uncore->rpm, wakeref);
  }
-int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu)
+static int _intel_gt_reset_trylock(struct intel_gt *gt, int *srcu, 
bool retry)

  {
  might_lock(>reset.backoff_srcu);
  might_sleep();
@@ -1416,6 +1416,9 @@ int intel_gt_reset_trylock(struct intel_gt *gt, 
int *srcu)

  while (test_bit(I915_RESET_BACKOFF, >reset.flags)) {
  rcu_read_unlock();
+    if (!retry)
+    return -EBUSY;
+
  if (wait_event_interruptible(gt->reset.queue,
   !test_bit(I915_RESET_BACKOFF,
 >reset.flags)))


Would it be more obvious to rename the existing semantics to 
intel_gt_reset_interruptible(), while the flavour you add in this patch 
truly is trylock? I am not sure, since it's all a bit special, but 
trylock sure feels confusing if it can sleep forever...


Oh and might_sleep() shouldn't be there with the trylock version - I 
mean any flavour of the real trylock.


Regards,

Tvrtko


Re: [Intel-gfx] [PATCH 2/2] drm/i915/guc: Don't deadlock busyness stats vs reset

2022-10-31 Thread Tvrtko Ursulin



On 28/10/2022 20:46, john.c.harri...@intel.com wrote:

From: John Harrison 

The engine busyness stats has a worker function to do things like
64bit extend the 32bit hardware counters. The GuC's reset prepare
function flushes out this worker function to ensure no corruption
happens during the reset. Unforunately, the worker function has an
infinite wait for active resets to finish before doing its work. Thus
a deadlock would occur if the worker function had actually started
just as the reset starts.

Update the worker to abort if a reset is in progress rather than
waiting for it to complete. It will still acquire the reset lock in
the case where a reset was not already in progress. So the processing
is still safe from corruption, but the deadlock can no longer occur.

Signed-off-by: John Harrison 
---
  drivers/gpu/drm/i915/gt/intel_reset.c | 15 ++-
  drivers/gpu/drm/i915/gt/intel_reset.h |  1 +
  drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c |  6 --
  3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c 
b/drivers/gpu/drm/i915/gt/intel_reset.c
index 3159df6cdd492..2f48c6e4420ea 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -1407,7 +1407,7 @@ void intel_gt_handle_error(struct intel_gt *gt,
intel_runtime_pm_put(gt->uncore->rpm, wakeref);
  }
  
-int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu)

+static int _intel_gt_reset_trylock(struct intel_gt *gt, int *srcu, bool retry)
  {
might_lock(>reset.backoff_srcu);
might_sleep();
@@ -1416,6 +1416,9 @@ int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu)
while (test_bit(I915_RESET_BACKOFF, >reset.flags)) {
rcu_read_unlock();
  
+		if (!retry)

+   return -EBUSY;
+
if (wait_event_interruptible(gt->reset.queue,
 !test_bit(I915_RESET_BACKOFF,
   >reset.flags)))


Would it be more obvious to rename the existing semantics to 
intel_gt_reset_interruptible(), while the flavour you add in this patch 
truly is trylock? I am not sure, since it's all a bit special, but 
trylock sure feels confusing if it can sleep forever...


Regards,

Tvrtko


@@ -1429,6 +1432,16 @@ int intel_gt_reset_trylock(struct intel_gt *gt, int 
*srcu)
return 0;
  }
  
+int intel_gt_reset_trylock_noretry(struct intel_gt *gt, int *srcu)

+{
+   return _intel_gt_reset_trylock(gt, srcu, false);
+}
+
+int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu)
+{
+   return _intel_gt_reset_trylock(gt, srcu, true);
+}
+
  void intel_gt_reset_unlock(struct intel_gt *gt, int tag)
  __releases(>reset.backoff_srcu)
  {
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h 
b/drivers/gpu/drm/i915/gt/intel_reset.h
index adc734e673870..7f863726eb6a2 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.h
+++ b/drivers/gpu/drm/i915/gt/intel_reset.h
@@ -38,6 +38,7 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine,
  
  void __i915_request_reset(struct i915_request *rq, bool guilty);
  
+int __must_check intel_gt_reset_trylock_noretry(struct intel_gt *gt, int *srcu);

  int __must_check intel_gt_reset_trylock(struct intel_gt *gt, int *srcu);
  void intel_gt_reset_unlock(struct intel_gt *gt, int tag);
  
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c

index 941613be3b9dd..1fa1bc7dde3df 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1401,9 +1401,11 @@ static void guc_timestamp_ping(struct work_struct *wrk)
  
  	/*

 * Synchronize with gt reset to make sure the worker does not
-* corrupt the engine/guc stats.
+* corrupt the engine/guc stats. NB: can't actually block waiting
+* for a reset to complete as the reset requires flushing out
+* any running worker thread. So waiting would deadlock.
 */
-   ret = intel_gt_reset_trylock(gt, );
+   ret = intel_gt_reset_trylock_noretry(gt, );
if (ret)
return;
  


Re: [Intel-gfx] [PATCH 3/5] drm/i915/mtl: add GSC CS interrupt support

2022-10-28 Thread Tvrtko Ursulin



On 27/10/2022 23:15, Daniele Ceraolo Spurio wrote:

The GSC CS re-uses the same interrupt bits that the GSC used in older
platforms. This means that we can now have an engine interrupt coming
out of OTHER_CLASS, so we need to handle that appropriately.

Signed-off-by: Daniele Ceraolo Spurio 
Cc: Matt Roper 
---
  drivers/gpu/drm/i915/gt/intel_gt_irq.c | 78 ++
  1 file changed, 43 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c 
b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index f26882fdc24c..34ff1ee7e931 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -81,35 +81,27 @@ gen11_other_irq_handler(struct intel_gt *gt, const u8 
instance,
  instance, iir);
  }
  
-static void

-gen11_engine_irq_handler(struct intel_gt *gt, const u8 class,
-const u8 instance, const u16 iir)
+static struct intel_gt *pick_gt(struct intel_gt *gt, u8 class, u8 instance)
  {
-   struct intel_engine_cs *engine;
-
-   /*
-* Platforms with standalone media have their media engines in another
-* GT.
-*/
-   if (MEDIA_VER(gt->i915) >= 13 &&
-   (class == VIDEO_DECODE_CLASS || class == VIDEO_ENHANCEMENT_CLASS)) {
-   if (!gt->i915->media_gt)
-   goto err;
+   struct intel_gt *media_gt = gt->i915->media_gt;
  
-		gt = gt->i915->media_gt;

+   /* we expect the non-media gt to be passed in */
+   GEM_BUG_ON(gt == media_gt);
+
+   if (!media_gt)
+   return gt;
+
+   switch (class) {
+   case VIDEO_DECODE_CLASS:
+   case VIDEO_ENHANCEMENT_CLASS:
+   return media_gt;
+   case OTHER_CLASS:
+   if (instance == OTHER_GSC_INSTANCE && HAS_ENGINE(media_gt, 
GSC0))
+   return media_gt;
+   fallthrough;
+   default:
+   return gt;
}
-
-   if (instance <= MAX_ENGINE_INSTANCE)
-   engine = gt->engine_class[class][instance];
-   else
-   engine = NULL;
-
-   if (likely(engine))
-   return intel_engine_cs_irq(engine, iir);
-
-err:
-   WARN_ONCE(1, "unhandled engine interrupt class=0x%x, instance=0x%x\n",
- class, instance);
  }
  
  static void

@@ -118,12 +110,24 @@ gen11_gt_identity_handler(struct intel_gt *gt, const u32 
identity)
const u8 class = GEN11_INTR_ENGINE_CLASS(identity);
const u8 instance = GEN11_INTR_ENGINE_INSTANCE(identity);
const u16 intr = GEN11_INTR_ENGINE_INTR(identity);
+   struct intel_engine_cs *engine;
  
  	if (unlikely(!intr))

return;
  
-	if (class <= COPY_ENGINE_CLASS || class == COMPUTE_CLASS)

-   return gen11_engine_irq_handler(gt, class, instance, intr);
+   /*
+* Platforms with standalone media have the media and GSC engines in
+* another GT.
+*/
+   gt = pick_gt(gt, class, instance);
+
+   if (class <= MAX_ENGINE_CLASS && instance <= MAX_ENGINE_INSTANCE)
+   engine = gt->engine_class[class][instance];
+   else
+   engine = NULL;
+
+   if (engine)
+   return intel_engine_cs_irq(engine, intr);


Drive by observation - you could fold the above two ifs into one since 
engine appears unused afterwards.


Regards,

Tvrtko

  
  	if (class == OTHER_CLASS)

return gen11_other_irq_handler(gt, instance, intr);
@@ -206,7 +210,7 @@ void gen11_gt_irq_reset(struct intel_gt *gt)
intel_uncore_write(uncore, GEN11_VCS_VECS_INTR_ENABLE,0);
if (CCS_MASK(gt))
intel_uncore_write(uncore, GEN12_CCS_RSVD_INTR_ENABLE, 0);
-   if (HAS_HECI_GSC(gt->i915))
+   if (HAS_HECI_GSC(gt->i915) || HAS_ENGINE(gt, GSC0))
intel_uncore_write(uncore, GEN11_GUNIT_CSME_INTR_ENABLE, 0);
  
  	/* Restore masks irqs on RCS, BCS, VCS and VECS engines. */

@@ -233,7 +237,7 @@ void gen11_gt_irq_reset(struct intel_gt *gt)
intel_uncore_write(uncore, GEN12_CCS0_CCS1_INTR_MASK, ~0);
if (HAS_ENGINE(gt, CCS2) || HAS_ENGINE(gt, CCS3))
intel_uncore_write(uncore, GEN12_CCS2_CCS3_INTR_MASK, ~0);
-   if (HAS_HECI_GSC(gt->i915))
+   if (HAS_HECI_GSC(gt->i915) || HAS_ENGINE(gt, GSC0))
intel_uncore_write(uncore, GEN11_GUNIT_CSME_INTR_MASK, ~0);
  
  	intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0);

@@ -249,7 +253,7 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
  {
struct intel_uncore *uncore = gt->uncore;
u32 irqs = GT_RENDER_USER_INTERRUPT;
-   const u32 gsc_mask = GSC_IRQ_INTF(0) | GSC_IRQ_INTF(1);
+   u32 gsc_mask = 0;
u32 dmask;
u32 smask;
  
@@ -261,6 +265,11 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)

dmask = irqs << 16 | irqs;
smask = irqs << 16;
  
+	if (HAS_ENGINE(gt, GSC0))

+   gsc_mask = 

Re: [PATCH] drm/i915: stop abusing swiotlb_max_segment

2022-10-27 Thread Tvrtko Ursulin



On 20/10/2022 12:03, Christoph Hellwig wrote:

From: Robert Beckett 

swiotlb_max_segment used to return either the maximum size that swiotlb
could bounce, or for Xen PV PAGE_SIZE even if swiotlb could bounce buffer
larger mappings.  This made i915 on Xen PV work as it bypasses the
coherency aspect of the DMA API and can't cope with bounce buffering
and this avoided bounce buffering for the Xen/PV case.

So instead of adding this hack back, check for Xen/PV directly in i915
for the Xen case and otherwise use the proper DMA API helper to query
the maximum mapping size.

Replace swiotlb_max_segment() calls with dma_max_mapping_size().
In i915_gem_object_get_pages_internal() no longer consider max_segment
only if CONFIG_SWIOTLB is enabled. There can be other (iommu related)
causes of specific max segment sizes.

Fixes: a2daa27c0c61 ("swiotlb: simplify swiotlb_max_segment")
Reported-by: Marek Marczykowski-Górecki 
Signed-off-by: Robert Beckett 
Signed-off-by: Christoph Hellwig 
[hch: added the Xen hack, rewrote the changelog]


Reviewed-by: Tvrtko Ursulin 

I'll merge this in a minute - thanks again for the cleanup!

Regards,

Tvrtko


---
  drivers/gpu/drm/i915/gem/i915_gem_internal.c | 19 +++
  drivers/gpu/drm/i915/gem/i915_gem_shmem.c|  2 +-
  drivers/gpu/drm/i915/gem/i915_gem_ttm.c  |  4 +--
  drivers/gpu/drm/i915/gem/i915_gem_userptr.c  |  2 +-
  drivers/gpu/drm/i915/i915_scatterlist.h  | 34 
  5 files changed, 29 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c 
b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
index c698f95af15fe..629acb403a2c9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -6,7 +6,6 @@
  
  #include 

  #include 
-#include 
  
  #include "i915_drv.h"

  #include "i915_gem.h"
@@ -38,22 +37,12 @@ static int i915_gem_object_get_pages_internal(struct 
drm_i915_gem_object *obj)
struct scatterlist *sg;
unsigned int sg_page_sizes;
unsigned int npages;
-   int max_order;
+   int max_order = MAX_ORDER;
+   unsigned int max_segment;
gfp_t gfp;
  
-	max_order = MAX_ORDER;

-#ifdef CONFIG_SWIOTLB
-   if (is_swiotlb_active(obj->base.dev->dev)) {
-   unsigned int max_segment;
-
-   max_segment = swiotlb_max_segment();
-   if (max_segment) {
-   max_segment = max_t(unsigned int, max_segment,
-   PAGE_SIZE) >> PAGE_SHIFT;
-   max_order = min(max_order, ilog2(max_segment));
-   }
-   }
-#endif
+   max_segment = i915_sg_segment_size(i915->drm.dev) >> PAGE_SHIFT;
+   max_order = min(max_order, get_order(max_segment));
  
  	gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE;

if (IS_I965GM(i915) || IS_I965G(i915)) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index f42ca1179f373..11125c32dd35d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -194,7 +194,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
struct intel_memory_region *mem = obj->mm.region;
struct address_space *mapping = obj->base.filp->f_mapping;
const unsigned long page_count = obj->base.size / PAGE_SIZE;
-   unsigned int max_segment = i915_sg_segment_size();
+   unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
struct sg_table *st;
struct sgt_iter sgt_iter;
struct page *page;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 4f861782c3e85..a4aa9500fa179 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -189,7 +189,7 @@ static int i915_ttm_tt_shmem_populate(struct ttm_device 
*bdev,
struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM];
struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
-   const unsigned int max_segment = i915_sg_segment_size();
+   const unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
const size_t size = (size_t)ttm->num_pages << PAGE_SHIFT;
struct file *filp = i915_tt->filp;
struct sgt_iter sgt_iter;
@@ -538,7 +538,7 @@ static struct i915_refct_sgt *i915_ttm_tt_get_st(struct 
ttm_tt *ttm)
ret = sg_alloc_table_from_pages_segment(st,
ttm->pages, ttm->num_pages,
0, (unsigned long)ttm->num_pages << PAGE_SHIFT,
-   i915_sg_segment_size(), GFP_KERNEL);
+   i915_sg_segment_size(i915_tt->dev), GFP_KERNEL);
if (re

[PULL] drm-intel-fixes

2022-10-27 Thread Tvrtko Ursulin
Hi Dave, Daniel,

Three fixes for the next release candidate: one display training fix, one
new workaround and disabling of autosuspend for DG2 until things can get
properly fixed.

Regards,

Tvrtko

drm-intel-fixes-2022-10-27-1:
- Extend Wa_1607297627 to Alderlake-P (José Roberto de Souza)
- Keep PCI autosuspend control 'on' by default on all dGPU (Anshuman Gupta)
- Reset frl trained flag before restarting FRL training (Ankit Nautiyal)
The following changes since commit 247f34f7b80357943234f93f247a1ae6b6c3a740:

  Linux 6.1-rc2 (2022-10-23 15:27:33 -0700)

are available in the Git repository at:

  git://anongit.freedesktop.org/drm/drm-intel tags/drm-intel-fixes-2022-10-27-1

for you to fetch changes up to 63720a561b3c98199adf0c73e152807f15cc3b7f:

  drm/i915/dp: Reset frl trained flag before restarting FRL training 
(2022-10-24 10:14:57 +0100)


- Extend Wa_1607297627 to Alderlake-P (José Roberto de Souza)
- Keep PCI autosuspend control 'on' by default on all dGPU (Anshuman Gupta)
- Reset frl trained flag before restarting FRL training (Ankit Nautiyal)


Ankit Nautiyal (1):
  drm/i915/dp: Reset frl trained flag before restarting FRL training

Anshuman Gupta (1):
  drm/i915/dgfx: Keep PCI autosuspend control 'on' by default on all dGPU

José Roberto de Souza (1):
  drm/i915: Extend Wa_1607297627 to Alderlake-P

 drivers/gpu/drm/i915/display/intel_dp.c |  2 ++
 drivers/gpu/drm/i915/gt/intel_workarounds.c |  4 ++--
 drivers/gpu/drm/i915/intel_runtime_pm.c | 11 +--
 3 files changed, 13 insertions(+), 4 deletions(-)


[CI] mm/huge_memory: do not clobber swp_entry_t during THP split

2022-10-24 Thread Tvrtko Ursulin
From: Mel Gorman 

On Mon, Oct 24, 2022 at 02:04:50PM +0100, Tvrtko Ursulin wrote:
>
> Hi Mel, mm experts,
>
> With 6.1-rc2 we started hitting the WARN_ON added in 71e2d666ef85 
> ("mm/huge_memory: do not clobber swp_entry_t during THP split") in i915 
> automated CI:
>

Thanks for the report. As shmem pages pages are allocated via vma_alloc_folio
and are compound pages, can you try the following patch please?  If it
still triggers, please post the new oops as it'll include the tail page
information.

--8<--
From: Hugh Dickins 
Subject: [PATCH] mm: prep_compound_tail() clear page->private

Although page allocation always clears page->private in the first page
or head page of an allocation, it has never made a point of clearing
page->private in the tails (though 0 is often what is already there).

But now commit 71e2d666ef85 ("mm/huge_memory: do not clobber swp_entry_t
during THP split") issues a warning when page_tail->private is found to
be non-0 (unless it's swapcache).

Change that warning to dump page_tail (which also dumps head), instead
of just the head: so far we have seen dead0122, dead0003,
dead0001 or 0002 in the raw output for tail private.

We could just delete the warning, but today's consensus appears to want
page->private to be 0, unless there's a good reason for it to be set:
so now clear it in prep_compound_tail() (more general than just for THP;
but not for high order allocation, which makes no pass down the tails).

Fixes: 71e2d666ef85 ("mm/huge_memory: do not clobber swp_entry_t during THP 
split")
Signed-off-by: Hugh Dickins 
Cc: Mel Gorman 
Cc: Matthew Wilcox (Oracle) 
Cc: 
---
 mm/huge_memory.c | 2 +-
 mm/page_alloc.c  | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 03fc7e5edf07..561a42567477 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2462,7 +2462,7 @@ static void __split_huge_page_tail(struct page *head, int 
tail,
 * Fix up and warn once if private is unexpectedly set.
 */
if (!folio_test_swapcache(page_folio(head))) {
-   VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, head);
+   VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail);
page_tail->private = 0;
}
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b5a6c815ae28..218b28ee49ed 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -807,6 +807,7 @@ static void prep_compound_tail(struct page *head, int 
tail_idx)
 
p->mapping = TAIL_MAPPING;
set_compound_head(p, head);
+   set_page_private(p, 0);
 }
 
 void prep_compound_page(struct page *page, unsigned int order)
-- 
2.34.1



Re: [PATCH 2/2] drm/i915/pmu: Connect engine busyness stats from GuC to pmu

2022-10-21 Thread Tvrtko Ursulin



On 27/10/2021 01:48, Umesh Nerlige Ramappa wrote:

[snip]


+static void guc_timestamp_ping(struct work_struct *wrk)
+{
+   struct intel_guc *guc = container_of(wrk, typeof(*guc),
+timestamp.work.work);
+   struct intel_uc *uc = container_of(guc, typeof(*uc), guc);
+   struct intel_gt *gt = guc_to_gt(guc);
+   intel_wakeref_t wakeref;
+   unsigned long flags;
+   int srcu, ret;
+
+   /*
+* Synchronize with gt reset to make sure the worker does not
+* corrupt the engine/guc stats.
+*/
+   ret = intel_gt_reset_trylock(gt, );
+   if (ret)
+   return;
+
+   spin_lock_irqsave(>timestamp.lock, flags);
+
+   with_intel_runtime_pm(>i915->runtime_pm, wakeref)
+   __update_guc_busyness_stats(guc);


Spotted one splat today: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12268/bat-adlp-4/igt@i915_pm_...@basic-pci-d3-state.html

Could be that reset lock needs to be inside the rpm get. Haven't really though 
about it much, could you please check?

<4> [300.214744]
<4> [300.214753] ==
<4> [300.214755] WARNING: possible circular locking dependency detected
<4> [300.214758] 6.1.0-rc1-CI_DRM_12268-g86e8558e3283+ #1 Not tainted
<4> [300.214761] --
<4> [300.214762] kworker/10:1H/265 is trying to acquire lock:
<4> [300.214765] 8275e560 (fs_reclaim){+.+.}-{0:0}, at: 
__kmem_cache_alloc_node+0x27/0x170
<4> [300.214780]
but task is already holding lock:
<4> [300.214782] c900013e7e78 
((work_completion)(&(>timestamp.work)->work)){+.+.}-{0:0}, at: 
process_one_work+0x1eb/0x5b0
<4> [300.214793]
which lock already depends on the new lock.
<4> [300.214794]
the existing dependency chain (in reverse order) is:
<4> [300.214796]
-> #2 ((work_completion)(&(>timestamp.work)->work)){+.+.}-{0:0}:
<4> [300.214801]lock_acquire+0xd3/0x310
<4> [300.214806]__flush_work+0x77/0x4e0
<4> [300.214811]__cancel_work_timer+0x14e/0x1f0
<4> [300.214815]intel_guc_submission_reset_prepare+0x7a/0x420 [i915]
<4> [300.215119]intel_uc_reset_prepare+0x44/0x50 [i915]
<4> [300.215360]reset_prepare+0x21/0x80 [i915]
<4> [300.215561]intel_gt_reset+0x143/0x340 [i915]
<4> [300.215757]intel_gt_reset_global+0xeb/0x160 [i915]
<4> [300.215946]intel_gt_handle_error+0x2c2/0x410 [i915]
<4> [300.216137]intel_gt_debugfs_reset_store+0x59/0xc0 [i915]
<4> [300.216333]i915_wedged_set+0xc/0x20 [i915]
<4> [300.216513]simple_attr_write+0xda/0x100
<4> [300.216520]full_proxy_write+0x4e/0x80
<4> [300.216525]vfs_write+0xe3/0x4e0
<4> [300.216531]ksys_write+0x57/0xd0
<4> [300.216535]do_syscall_64+0x37/0x90
<4> [300.216542]entry_SYSCALL_64_after_hwframe+0x63/0xcd
<4> [300.216549]
-> #1 (>reset.mutex){+.+.}-{3:3}:
<4> [300.216556]lock_acquire+0xd3/0x310
<4> [300.216559]i915_gem_shrinker_taints_mutex+0x2d/0x50 [i915]
<4> [300.216799]intel_gt_init_reset+0x61/0x80 [i915]
<4> [300.217018]intel_gt_common_init_early+0x10c/0x190 [i915]
<4> [300.217227]intel_root_gt_init_early+0x44/0x60 [i915]
<4> [300.217434]i915_driver_probe+0x9ab/0xf30 [i915]
<4> [300.217615]i915_pci_probe+0xa5/0x240 [i915]
<4> [300.217796]pci_device_probe+0x95/0x110
<4> [300.217803]really_probe+0xd6/0x350
<4> [300.217811]__driver_probe_device+0x73/0x170
<4> [300.217816]driver_probe_device+0x1a/0x90
<4> [300.217821]__driver_attach+0xbc/0x190
<4> [300.217826]bus_for_each_dev+0x72/0xc0
<4> [300.217831]bus_add_driver+0x1bb/0x210
<4> [300.217835]driver_register+0x66/0xc0
<4> [300.217841]0xa093001f
<4> [300.217844]do_one_initcall+0x53/0x2f0
<4> [300.217849]do_init_module+0x45/0x1c0
<4> [300.217855]load_module+0x1d5e/0x1e90
<4> [300.217859]__do_sys_finit_module+0xaf/0x120
<4> [300.217864]do_syscall_64+0x37/0x90
<4> [300.217869]entry_SYSCALL_64_after_hwframe+0x63/0xcd
<4> [300.217875]
-> #0 (fs_reclaim){+.+.}-{0:0}:
<4> [300.217880]validate_chain+0xb3d/0x2000
<4> [300.217884]__lock_acquire+0x5a4/0xb70
<4> [300.217888]lock_acquire+0xd3/0x310
<4> [300.217891]fs_reclaim_acquire+0xa1/0xd0
<4> [300.217896]__kmem_cache_alloc_node+0x27/0x170
<4> [300.217899]__kmalloc+0x43/0x1a0
<4> [300.217903]acpi_ns_internalize_name+0x44/0x9f
<4> [300.217909]acpi_ns_get_node_unlocked+0x6b/0xd7
<4> [300.217914]acpi_ns_get_node+0x3b/0x54
<4> [300.217918]acpi_get_handle+0x89/0xb7
<4> [300.217922]acpi_has_method+0x1c/0x40
<4> [300.217928]acpi_pci_set_power_state+0x42/0xf0
<4> [300.217935]pci_power_up+0x20/0x1a0
<4> [300.217940]pci_pm_default_resume_early+0x9/0x30
<4> [300.217945]

Re: [PATCH] drm/i915: stop abusing swiotlb_max_segment

2022-10-21 Thread Tvrtko Ursulin



On 20/10/2022 12:03, Christoph Hellwig wrote:

From: Robert Beckett 

swiotlb_max_segment used to return either the maximum size that swiotlb
could bounce, or for Xen PV PAGE_SIZE even if swiotlb could bounce buffer
larger mappings.  This made i915 on Xen PV work as it bypasses the
coherency aspect of the DMA API and can't cope with bounce buffering
and this avoided bounce buffering for the Xen/PV case.

So instead of adding this hack back, check for Xen/PV directly in i915
for the Xen case and otherwise use the proper DMA API helper to query
the maximum mapping size.

Replace swiotlb_max_segment() calls with dma_max_mapping_size().
In i915_gem_object_get_pages_internal() no longer consider max_segment
only if CONFIG_SWIOTLB is enabled. There can be other (iommu related)
causes of specific max segment sizes.

Fixes: a2daa27c0c61 ("swiotlb: simplify swiotlb_max_segment")
Reported-by: Marek Marczykowski-Górecki 
Signed-off-by: Robert Beckett 
Signed-off-by: Christoph Hellwig 
[hch: added the Xen hack, rewrote the changelog]
---
  drivers/gpu/drm/i915/gem/i915_gem_internal.c | 19 +++
  drivers/gpu/drm/i915/gem/i915_gem_shmem.c|  2 +-
  drivers/gpu/drm/i915/gem/i915_gem_ttm.c  |  4 +--
  drivers/gpu/drm/i915/gem/i915_gem_userptr.c  |  2 +-
  drivers/gpu/drm/i915/i915_scatterlist.h  | 34 
  5 files changed, 29 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c 
b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
index c698f95af15fe..629acb403a2c9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -6,7 +6,6 @@
  
  #include 

  #include 
-#include 
  
  #include "i915_drv.h"

  #include "i915_gem.h"
@@ -38,22 +37,12 @@ static int i915_gem_object_get_pages_internal(struct 
drm_i915_gem_object *obj)
struct scatterlist *sg;
unsigned int sg_page_sizes;
unsigned int npages;
-   int max_order;
+   int max_order = MAX_ORDER;
+   unsigned int max_segment;
gfp_t gfp;
  
-	max_order = MAX_ORDER;

-#ifdef CONFIG_SWIOTLB
-   if (is_swiotlb_active(obj->base.dev->dev)) {
-   unsigned int max_segment;
-
-   max_segment = swiotlb_max_segment();
-   if (max_segment) {
-   max_segment = max_t(unsigned int, max_segment,
-   PAGE_SIZE) >> PAGE_SHIFT;
-   max_order = min(max_order, ilog2(max_segment));
-   }
-   }
-#endif
+   max_segment = i915_sg_segment_size(i915->drm.dev) >> PAGE_SHIFT;
+   max_order = min(max_order, get_order(max_segment));
  
  	gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE;

if (IS_I965GM(i915) || IS_I965G(i915)) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index f42ca1179f373..11125c32dd35d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -194,7 +194,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
struct intel_memory_region *mem = obj->mm.region;
struct address_space *mapping = obj->base.filp->f_mapping;
const unsigned long page_count = obj->base.size / PAGE_SIZE;
-   unsigned int max_segment = i915_sg_segment_size();
+   unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
struct sg_table *st;
struct sgt_iter sgt_iter;
struct page *page;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 4f861782c3e85..a4aa9500fa179 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -189,7 +189,7 @@ static int i915_ttm_tt_shmem_populate(struct ttm_device 
*bdev,
struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM];
struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
-   const unsigned int max_segment = i915_sg_segment_size();
+   const unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
const size_t size = (size_t)ttm->num_pages << PAGE_SHIFT;
struct file *filp = i915_tt->filp;
struct sgt_iter sgt_iter;
@@ -538,7 +538,7 @@ static struct i915_refct_sgt *i915_ttm_tt_get_st(struct 
ttm_tt *ttm)
ret = sg_alloc_table_from_pages_segment(st,
ttm->pages, ttm->num_pages,
0, (unsigned long)ttm->num_pages << PAGE_SHIFT,
-   i915_sg_segment_size(), GFP_KERNEL);
+   i915_sg_segment_size(i915_tt->dev), GFP_KERNEL);
if (ret) {
st->sgl = NULL;
return ERR_PTR(ret);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c 
b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index d4398948f0162..f34e01a7fefb9 

Re: [PATCH] drm/i915/selftests: Stop using kthread_stop()

2022-10-20 Thread Tvrtko Ursulin



On 20/10/2022 15:18, Ville Syrjälä wrote:

On Thu, Oct 20, 2022 at 02:08:41PM +0100, Tvrtko Ursulin wrote:

From: Tvrtko Ursulin 

Since a7c01fa93aeb ("signal: break out of wait loops on kthread_stop()")
kthread_stop() started asserting a pending signal which wreaks havoc with
a few of our selftests. Mainly because they are not fully expecting to
handle signals, but also cutting the intended test runtimes short due
signal_pending() now returning true (via __igt_timeout), which therefore
breaks both the patterns of:

   kthread_run()
   ..sleep for igt_timeout_ms to allow test to exercise stuff..
   kthread_stop()

And check for errors recorded in the thread.

And also:

 Main thread  |   Test thread
   ---+--
   kthread_run()  |
   kthread_stop() |  do stuff until __igt_timeout
 |  -- exits early due signal --

Where this kthread_stop() was assume would have a "join" semantics, which
it would have had if not the new signal assertion issue.

To recap, threads are now likely to catch a previously impossible
ERESTARTSYS or EINTR, marking the test as failed, or have a pointlessly
short run time.

To work around this start using kthread_work(er) API which provides
an explicit way of waiting for threads to exit. And for cases where
parent controls the test duration we add explicit signaling which threads
will now use instead of relying on kthread_should_stop().

Signed-off-by: Tvrtko Ursulin 
Cc: Ville Syrjälä 
---
  .../drm/i915/gem/selftests/i915_gem_context.c | 118 
  drivers/gpu/drm/i915/gt/selftest_execlists.c  |  48 ++--
  drivers/gpu/drm/i915/gt/selftest_hangcheck.c  |  51 ++--
  drivers/gpu/drm/i915/selftests/i915_request.c | 252 +++---
  4 files changed, 281 insertions(+), 188 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index c6ad67b90e8a..d886432b 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -179,97 +179,108 @@ static int live_nop_switch(void *arg)
  }
  
  struct parallel_switch {

-   struct task_struct *tsk;
+   struct kthread_worker *worker;
+   struct kthread_work work;
struct intel_context *ce[2];
+   int result;
  };
  
-static int __live_parallel_switch1(void *data)

+static void __live_parallel_switch1(struct kthread_work *work)
  {
-   struct parallel_switch *arg = data;
+   struct parallel_switch *arg =
+   container_of(work, typeof(*arg), work);
IGT_TIMEOUT(end_time);
unsigned long count;
  
  	count = 0;

+   arg->result = 0;
do {
struct i915_request *rq = NULL;
-   int err, n;
+   int n;
  
-		err = 0;

-   for (n = 0; !err && n < ARRAY_SIZE(arg->ce); n++) {
+   for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) {
struct i915_request *prev = rq;
  
  			rq = i915_request_create(arg->ce[n]);

if (IS_ERR(rq)) {
i915_request_put(prev);
-   return PTR_ERR(rq);
+   arg->result = PTR_ERR(rq);
+   break;
}
  
  			i915_request_get(rq);

if (prev) {
-   err = i915_request_await_dma_fence(rq, 
>fence);
+   arg->result =
+   i915_request_await_dma_fence(rq,
+
>fence);
i915_request_put(prev);
}
  
  			i915_request_add(rq);

}
+
+   if (IS_ERR_OR_NULL(rq))
+   break;
+
if (i915_request_wait(rq, 0, HZ) < 0)
-   err = -ETIME;
+   arg->result = -ETIME;
+
i915_request_put(rq);
-   if (err)
-   return err;
  
  		count++;

-   } while (!__igt_timeout(end_time, NULL));
+   } while (!arg->result && !__igt_timeout(end_time, NULL));
  
-	pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count);

-   return 0;
+   pr_info("%s: %lu switches (sync) <%d>\n",
+   arg->ce[0]->engine->name, count, arg->result);
  }
  
-static int __live_parallel_switchN(void *data)

+static void __live_parallel_switchN(struct kthread_work *work)
  {
-   struct parallel_switch *arg = data;
+   struct parallel_switch *arg =
+   container_of(work, typeof(*arg), work);
struct i915_request *rq = NULL;
IGT_TIMEOUT(end_time);
unsigned long

[PATCH] drm/i915/selftests: Stop using kthread_stop()

2022-10-20 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

Since a7c01fa93aeb ("signal: break out of wait loops on kthread_stop()")
kthread_stop() started asserting a pending signal which wreaks havoc with
a few of our selftests. Mainly because they are not fully expecting to
handle signals, but also cutting the intended test runtimes short due
signal_pending() now returning true (via __igt_timeout), which therefore
breaks both the patterns of:

  kthread_run()
  ..sleep for igt_timeout_ms to allow test to exercise stuff..
  kthread_stop()

And check for errors recorded in the thread.

And also:

Main thread  |   Test thread
  ---+--
  kthread_run()  |
  kthread_stop() |  do stuff until __igt_timeout
 |  -- exits early due signal --

Where this kthread_stop() was assume would have a "join" semantics, which
it would have had if not the new signal assertion issue.

To recap, threads are now likely to catch a previously impossible
ERESTARTSYS or EINTR, marking the test as failed, or have a pointlessly
short run time.

To work around this start using kthread_work(er) API which provides
an explicit way of waiting for threads to exit. And for cases where
parent controls the test duration we add explicit signaling which threads
will now use instead of relying on kthread_should_stop().

Signed-off-by: Tvrtko Ursulin 
Cc: Ville Syrjälä 
---
 .../drm/i915/gem/selftests/i915_gem_context.c | 118 
 drivers/gpu/drm/i915/gt/selftest_execlists.c  |  48 ++--
 drivers/gpu/drm/i915/gt/selftest_hangcheck.c  |  51 ++--
 drivers/gpu/drm/i915/selftests/i915_request.c | 252 +++---
 4 files changed, 281 insertions(+), 188 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index c6ad67b90e8a..d886432b 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -179,97 +179,108 @@ static int live_nop_switch(void *arg)
 }
 
 struct parallel_switch {
-   struct task_struct *tsk;
+   struct kthread_worker *worker;
+   struct kthread_work work;
struct intel_context *ce[2];
+   int result;
 };
 
-static int __live_parallel_switch1(void *data)
+static void __live_parallel_switch1(struct kthread_work *work)
 {
-   struct parallel_switch *arg = data;
+   struct parallel_switch *arg =
+   container_of(work, typeof(*arg), work);
IGT_TIMEOUT(end_time);
unsigned long count;
 
count = 0;
+   arg->result = 0;
do {
struct i915_request *rq = NULL;
-   int err, n;
+   int n;
 
-   err = 0;
-   for (n = 0; !err && n < ARRAY_SIZE(arg->ce); n++) {
+   for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) {
struct i915_request *prev = rq;
 
rq = i915_request_create(arg->ce[n]);
if (IS_ERR(rq)) {
i915_request_put(prev);
-   return PTR_ERR(rq);
+   arg->result = PTR_ERR(rq);
+   break;
}
 
i915_request_get(rq);
if (prev) {
-   err = i915_request_await_dma_fence(rq, 
>fence);
+   arg->result =
+   i915_request_await_dma_fence(rq,
+
>fence);
i915_request_put(prev);
}
 
i915_request_add(rq);
}
+
+   if (IS_ERR_OR_NULL(rq))
+   break;
+
if (i915_request_wait(rq, 0, HZ) < 0)
-   err = -ETIME;
+   arg->result = -ETIME;
+
i915_request_put(rq);
-   if (err)
-   return err;
 
count++;
-   } while (!__igt_timeout(end_time, NULL));
+   } while (!arg->result && !__igt_timeout(end_time, NULL));
 
-   pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count);
-   return 0;
+   pr_info("%s: %lu switches (sync) <%d>\n",
+   arg->ce[0]->engine->name, count, arg->result);
 }
 
-static int __live_parallel_switchN(void *data)
+static void __live_parallel_switchN(struct kthread_work *work)
 {
-   struct parallel_switch *arg = data;
+   struct parallel_switch *arg =
+   container_of(work, typeof(*arg), work);
struct i915_request *rq = NULL;
IGT_TIMEOUT(end_time);
unsigned long count;
int n;
 
count = 0;
+   arg->result = 0;

Re: [Intel-gfx] [PATCH] drm/i915/slpc: Optmize waitboost for SLPC

2022-10-20 Thread Tvrtko Ursulin



On 19/10/2022 22:12, Belgaumkar, Vinay wrote:


On 10/19/2022 12:40 AM, Tvrtko Ursulin wrote:


On 18/10/2022 23:15, Vinay Belgaumkar wrote:
Waitboost (when SLPC is enabled) results in a H2G message. This can 
result
in thousands of messages during a stress test and fill up an already 
full
CTB. There is no need to request for RP0 if GuC is already requesting 
the

same.

Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/intel_rps.c | 9 -
  1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c

index fc23c562d9b2..a20ae4fceac8 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -1005,13 +1005,20 @@ void intel_rps_dec_waiters(struct intel_rps 
*rps)

  void intel_rps_boost(struct i915_request *rq)
  {
  struct intel_guc_slpc *slpc;
+    struct intel_rps *rps = _ONCE(rq->engine)->gt->rps;
    if (i915_request_signaled(rq) || i915_request_has_waitboost(rq))
  return;
  +    /* If GuC is already requesting RP0, skip */
+    if (rps_uses_slpc(rps)) {
+    slpc = rps_to_slpc(rps);
+    if (intel_rps_get_requested_frequency(rps) == slpc->rp0_freq)

One correction here is this should be slpc->boost_freq.

+    return;
+    }
+


Feels a little bit like a layering violation. Wait boost reference 
counts and request markings will changed based on asynchronous state - 
a mmio read.


Also, a little below we have this:

"""
/* Serializes with i915_request_retire() */
if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, >fence.flags)) {
    struct intel_rps *rps = _ONCE(rq->engine)->gt->rps;

    if (rps_uses_slpc(rps)) {
    slpc = rps_to_slpc(rps);

    /* Return if old value is non zero */
    if (!atomic_fetch_inc(>num_waiters))

***>>>> Wouldn't it skip doing anything here already? <<<<***
It will skip only if boost is already happening. This patch is trying to 
prevent even that first one if possible.


Do you mean that the first boost request comes outside the driver control?



    schedule_work(>boost_work);

    return;
    }

    if (atomic_fetch_inc(>num_waiters))
    return;
"""

But I wonder if this is not a layering violation already. Looks like 
one for me at the moment. And as it happens there is an ongoing debug 
of clvk slowness where I was a bit puzzled by the lack of "boost 
fence" in trace_printk logs - but now I see how that happens. Does not 
feel right to me that we lose that tracing with SLPC.
Agreed. Will add the trace to the SLPC case as well.  However, the 
question is what does that trace indicate? Even in the host case, we log 
the trace, but may skip the actual boost as the req is already matching 
boost freq. IMO, we should log the trace only when we actually decide to 
boost.


Good question - let me come back to this later when the current 
emergencies subside. Feel free to remind me if I forget.


So in general - why the correct approach wouldn't be to solve this in 
the worker - which perhaps should fork to slpc specific branch and do 
the consolidations/skips based on mmio reads in there?


sure, I can move the mmio read to the SLPC worker thread.


Thanks, yes I think that will even be better since mmio read will only 
happen if the higher level thinks that it should boost. So the hierarchy 
of "duties" would be slightly improved. Driver tracking -> SLPC tracking 
-> HW status.


I'll come back to the latest version of the patch later today or tomorrow.

Regards,

Tvrtko

Thanks,

Vinay.



Regards,

Tvrtko


  /* Serializes with i915_request_retire() */
  if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, >fence.flags)) {
-    struct intel_rps *rps = _ONCE(rq->engine)->gt->rps;
    if (rps_uses_slpc(rps)) {
  slpc = rps_to_slpc(rps);


Re: [Intel-gfx] [PATCH 0/2] Selftest fixes for 6.1

2022-10-19 Thread Tvrtko Ursulin



On 19/10/2022 13:10, Tvrtko Ursulin wrote:

From: Tvrtko Ursulin 

Warning - not much tested, mainly bypassing trybot for quick turnaround.


Please ignore - this is quite broken and problem more complicated.

Regards,

Tvrtko


Tvrtko Ursulin (2):
   drm/i915/selftests: Fix waiting for threads to start
   drm/i915/selftests: Fix selftests for 6.1 kthread_stop semantics

  .../drm/i915/gem/selftests/i915_gem_context.c |   9 +-
  drivers/gpu/drm/i915/gt/selftest_execlists.c  |  13 +-
  drivers/gpu/drm/i915/gt/selftest_hangcheck.c  |   5 +-
  drivers/gpu/drm/i915/i915_selftest.h  |  14 ++
  drivers/gpu/drm/i915/selftests/i915_request.c | 136 --
  .../gpu/drm/i915/selftests/i915_selftest.c|  18 +++
  6 files changed, 140 insertions(+), 55 deletions(-)



[PATCH 2/2] drm/i915/selftests: Fix selftests for 6.1 kthread_stop semantics

2022-10-19 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

Since a7c01fa93aeb ("signal: break out of wait loops on kthread_stop()")
kthread_stop will mark a pending signal which breaks __igt_timeout when
used from selftests threads. Result of this is overly short test execution
time which renders some tests useless.

Add a new __igt_thread_timeout helper and use it from selftest threads.

Signed-off-by: Tvrtko Ursulin 
---
 .../drm/i915/gem/selftests/i915_gem_context.c  |  4 ++--
 drivers/gpu/drm/i915/gt/selftest_execlists.c   |  3 ++-
 drivers/gpu/drm/i915/i915_selftest.h   |  2 ++
 drivers/gpu/drm/i915/selftests/i915_request.c  | 10 +-
 drivers/gpu/drm/i915/selftests/i915_selftest.c | 18 ++
 5 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index f5dc7ba2cdd7..1172d0d6e07a 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -221,7 +221,7 @@ static int __live_parallel_switch1(void *data)
return err;
 
count++;
-   } while (!__igt_timeout(end_time, NULL));
+   } while (!__igt_thread_timeout(end_time, NULL));
 
pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count);
return 0;
@@ -262,7 +262,7 @@ static int __live_parallel_switchN(void *data)
}
 
count++;
-   } while (!__igt_timeout(end_time, NULL));
+   } while (!__igt_thread_timeout(end_time, NULL));
i915_request_put(rq);
 
pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count);
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c 
b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index 07f572ee9923..e63c0ac3d861 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -3558,7 +3558,8 @@ static int smoke_crescendo_thread(void *arg)
return err;
 
count++;
-   } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
+   } while (count < smoke->ncontext &&
+!__igt_thread_timeout(end_time, NULL));
 
smoke->count = count;
return 0;
diff --git a/drivers/gpu/drm/i915/i915_selftest.h 
b/drivers/gpu/drm/i915/i915_selftest.h
index e4fcb71fb0ee..a233f167ec44 100644
--- a/drivers/gpu/drm/i915/i915_selftest.h
+++ b/drivers/gpu/drm/i915/i915_selftest.h
@@ -131,6 +131,8 @@ static inline int i915_perf_selftests(struct pci_dev *pdev) 
{ return 0; }
 
 __printf(2, 3)
 bool __igt_timeout(unsigned long timeout, const char *fmt, ...);
+__printf(2, 3)
+bool __igt_thread_timeout(unsigned long timeout, const char *fmt, ...);
 
 #define igt_timeout(t, fmt, ...) \
__igt_timeout((t), KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c 
b/drivers/gpu/drm/i915/selftests/i915_request.c
index 9c313e9a771b..5c576ee94e5d 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -1467,7 +1467,7 @@ static int __live_parallel_engine1(void *arg)
break;
 
count++;
-   } while (!__igt_timeout(end_time, NULL));
+   } while (!__igt_thread_timeout(end_time, NULL));
intel_engine_pm_put(engine);
 
pr_info("%s: %lu request + sync\n", engine->name, count);
@@ -1496,7 +1496,7 @@ static int __live_parallel_engineN(void *arg)
 
i915_request_add(rq);
count++;
-   } while (!__igt_timeout(end_time, NULL));
+   } while (!__igt_thread_timeout(end_time, NULL));
intel_engine_pm_put(engine);
 
pr_info("%s: %lu requests\n", engine->name, count);
@@ -2978,7 +2978,7 @@ static int p_sync0(void *arg)
break;
 
count++;
-   } while (!__igt_timeout(end_time, NULL));
+   } while (!__igt_thread_timeout(end_time, NULL));
 
if (busy) {
ktime_t now;
@@ -3053,7 +3053,7 @@ static int p_sync1(void *arg)
break;
 
count++;
-   } while (!__igt_timeout(end_time, NULL));
+   } while (!__igt_thread_timeout(end_time, NULL));
i915_request_put(prev);
 
if (busy) {
@@ -3118,7 +3118,7 @@ static int p_many(void *arg)
 
i915_request_add(rq);
count++;
-   } while (!__igt_timeout(end_time, NULL));
+   } while (!__igt_thread_timeout(end_time, NULL));
 
if (busy) {
ktime_t now;
diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c 
b/drivers/gpu/drm/i915/selftests/i915_selftest.c
index 39da0fb0d6d2..afba2c3db1a9 100644
--- a/drivers/gpu/drm/i915/selftests/i915_selftest.c
+++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c

[PATCH 1/2] drm/i915/selftests: Fix waiting for threads to start

2022-10-19 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

Tests which want to make sure all threads have started have to do that
explicitly since one yield() can not guarantee it. Issue is that many
tests then proceed to call kthread_stop() which can therefore return even
before the thread has been started and will instead just return an error
status.

Add a simple macro helper which can wait on a bunch of threads to start
and use it. Also refactor some tests so the helper can be used.

Signed-off-by: Tvrtko Ursulin 
---
 .../drm/i915/gem/selftests/i915_gem_context.c |   5 +-
 drivers/gpu/drm/i915/gt/selftest_execlists.c  |  10 +-
 drivers/gpu/drm/i915/gt/selftest_hangcheck.c  |   5 +-
 drivers/gpu/drm/i915/i915_selftest.h  |  12 ++
 drivers/gpu/drm/i915/selftests/i915_request.c | 126 --
 5 files changed, 111 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index c6ad67b90e8a..f5dc7ba2cdd7 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -181,6 +181,7 @@ static int live_nop_switch(void *arg)
 struct parallel_switch {
struct task_struct *tsk;
struct intel_context *ce[2];
+   bool running;
 };
 
 static int __live_parallel_switch1(void *data)
@@ -189,6 +190,7 @@ static int __live_parallel_switch1(void *data)
IGT_TIMEOUT(end_time);
unsigned long count;
 
+   WRITE_ONCE(arg->running, true);
count = 0;
do {
struct i915_request *rq = NULL;
@@ -233,6 +235,7 @@ static int __live_parallel_switchN(void *data)
unsigned long count;
int n;
 
+   WRITE_ONCE(arg->running, true);
count = 0;
do {
for (n = 0; n < ARRAY_SIZE(arg->ce); n++) {
@@ -370,7 +373,7 @@ static int live_parallel_switch(void *arg)
get_task_struct(data[n].tsk);
}
 
-   yield(); /* start all threads before we kthread_stop() */
+   __igt_start_threads(data, count, tsk, running);
 
for (n = 0; n < count; n++) {
int status;
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c 
b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index 56b7d5b5fea0..07f572ee9923 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -3479,6 +3479,7 @@ struct preempt_smoke {
unsigned int ncontext;
struct rnd_state prng;
unsigned long count;
+   bool running;
 };
 
 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
@@ -3544,6 +3545,7 @@ static int smoke_crescendo_thread(void *arg)
IGT_TIMEOUT(end_time);
unsigned long count;
 
+   WRITE_ONCE(smoke->running, true);
count = 0;
do {
struct i915_gem_context *ctx = smoke_context(smoke);
@@ -3576,23 +3578,25 @@ static int smoke_crescendo(struct preempt_smoke *smoke, 
unsigned int flags)
if (!arg)
return -ENOMEM;
 
+   memset(arg, 0, I915_NUM_ENGINES * sizeof(*arg));
+
for_each_engine(engine, smoke->gt, id) {
arg[id] = *smoke;
-   arg[id].engine = engine;
if (!(flags & BATCH))
arg[id].batch = NULL;
arg[id].count = 0;
 
-   tsk[id] = kthread_run(smoke_crescendo_thread, arg,
+   tsk[id] = kthread_run(smoke_crescendo_thread, [id],
  "igt/smoke:%d", id);
if (IS_ERR(tsk[id])) {
err = PTR_ERR(tsk[id]);
break;
}
+   arg[id].engine = engine;
get_task_struct(tsk[id]);
}
 
-   yield(); /* start all threads before we kthread_stop() */
+   __igt_start_threads(arg, I915_NUM_ENGINES, engine, running);
 
count = 0;
for_each_engine(engine, smoke->gt, id) {
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c 
b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 7f3bb1d34dfb..ea1542e6b157 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -870,6 +870,7 @@ struct active_engine {
struct intel_engine_cs *engine;
unsigned long resets;
unsigned int flags;
+   bool running;
 };
 
 #define TEST_ACTIVEBIT(0)
@@ -910,6 +911,8 @@ static int active_engine(void *data)
unsigned long count;
int err = 0;
 
+   WRITE_ONCE(arg->running, true);
+
for (count = 0; count < ARRAY_SIZE(ce); count++) {
ce[count] = intel_context_create(engine);
if (IS_ERR(ce[count])) {
@@ -1048,7 +1051,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
get_task_struct(tsk);
}
 
-

[PATCH 0/2] Selftest fixes for 6.1

2022-10-19 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

Warning - not much tested, mainly bypassing trybot for quick turnaround.

Tvrtko Ursulin (2):
  drm/i915/selftests: Fix waiting for threads to start
  drm/i915/selftests: Fix selftests for 6.1 kthread_stop semantics

 .../drm/i915/gem/selftests/i915_gem_context.c |   9 +-
 drivers/gpu/drm/i915/gt/selftest_execlists.c  |  13 +-
 drivers/gpu/drm/i915/gt/selftest_hangcheck.c  |   5 +-
 drivers/gpu/drm/i915/i915_selftest.h  |  14 ++
 drivers/gpu/drm/i915/selftests/i915_request.c | 136 --
 .../gpu/drm/i915/selftests/i915_selftest.c|  18 +++
 6 files changed, 140 insertions(+), 55 deletions(-)

-- 
2.34.1



Re: [Intel-gfx] [PATCH] drm/i915/slpc: Optmize waitboost for SLPC

2022-10-19 Thread Tvrtko Ursulin



On 18/10/2022 23:15, Vinay Belgaumkar wrote:

Waitboost (when SLPC is enabled) results in a H2G message. This can result
in thousands of messages during a stress test and fill up an already full
CTB. There is no need to request for RP0 if GuC is already requesting the
same.

Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gt/intel_rps.c | 9 -
  1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index fc23c562d9b2..a20ae4fceac8 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -1005,13 +1005,20 @@ void intel_rps_dec_waiters(struct intel_rps *rps)
  void intel_rps_boost(struct i915_request *rq)
  {
struct intel_guc_slpc *slpc;
+   struct intel_rps *rps = _ONCE(rq->engine)->gt->rps;
  
  	if (i915_request_signaled(rq) || i915_request_has_waitboost(rq))

return;
  
+	/* If GuC is already requesting RP0, skip */

+   if (rps_uses_slpc(rps)) {
+   slpc = rps_to_slpc(rps);
+   if (intel_rps_get_requested_frequency(rps) == slpc->rp0_freq)
+   return;
+   }
+


Feels a little bit like a layering violation. Wait boost reference 
counts and request markings will changed based on asynchronous state - a 
mmio read.


Also, a little below we have this:

"""
/* Serializes with i915_request_retire() */
if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, >fence.flags)) {
struct intel_rps *rps = _ONCE(rq->engine)->gt->rps;

if (rps_uses_slpc(rps)) {
slpc = rps_to_slpc(rps);

/* Return if old value is non zero */
if (!atomic_fetch_inc(>num_waiters))

*** Wouldn't it skip doing anything here already? ***

schedule_work(>boost_work);

return;
}

if (atomic_fetch_inc(>num_waiters))
return;
"""

But I wonder if this is not a layering violation already. Looks like one 
for me at the moment. And as it happens there is an ongoing debug of 
clvk slowness where I was a bit puzzled by the lack of "boost fence" in 
trace_printk logs - but now I see how that happens. Does not feel right 
to me that we lose that tracing with SLPC.


So in general - why the correct approach wouldn't be to solve this in 
the worker - which perhaps should fork to slpc specific branch and do 
the consolidations/skips based on mmio reads in there?


Regards,

Tvrtko


/* Serializes with i915_request_retire() */
if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, >fence.flags)) {
-   struct intel_rps *rps = _ONCE(rq->engine)->gt->rps;
  
  		if (rps_uses_slpc(rps)) {

slpc = rps_to_slpc(rps);


[PULL] drm-intel-next-fixes

2022-10-13 Thread Tvrtko Ursulin
Hi Dave, Daniel,

Not sure if there is time for one more pull during the merge window, but
in case there is here it is. Only one which affects platforms outside
force_probe and that is the fix for DPT PTE corruption after resume. The
rest are all DG2 specific.

Regards,

Tvrtko

drm-intel-next-fixes-2022-10-13:
- Fix revocation of non-persistent contexts (Tvrtko Ursulin)
- Handle migration for dpt (Matthew Auld)
- Fix display problems after resume (Thomas Hellström)
- Allow control over the flags when migrating (Matthew Auld)
- Consider DG2_RC_CCS_CC when migrating buffers (Matthew Auld)
The following changes since commit cdf6428dd518435a05739abf7659589de30970f4:

  drm/i915: Reject excessive dotclocks early (2022-10-03 17:55:32 +0100)

are available in the Git repository at:

  git://anongit.freedesktop.org/drm/drm-intel 
tags/drm-intel-next-fixes-2022-10-13

for you to fetch changes up to ea19684afb545605bbcb690c49a91ce2c8e596dd:

  drm/i915/display: consider DG2_RC_CCS_CC when migrating buffers (2022-10-11 
17:29:05 +0100)


- Fix revocation of non-persistent contexts (Tvrtko Ursulin)
- Handle migration for dpt (Matthew Auld)
- Fix display problems after resume (Thomas Hellström)
- Allow control over the flags when migrating (Matthew Auld)
- Consider DG2_RC_CCS_CC when migrating buffers (Matthew Auld)


Matthew Auld (3):
  drm/i915/display: handle migration for dpt
  drm/i915: allow control over the flags when migrating
  drm/i915/display: consider DG2_RC_CCS_CC when migrating buffers

Thomas Hellström (1):
  drm/i915: Fix display problems after resume

Tvrtko Ursulin (1):
  drm/i915/guc: Fix revocation of non-persistent contexts

 drivers/gpu/drm/i915/display/intel_fb_pin.c   | 62 ---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |  8 +--
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 37 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 ++
 drivers/gpu/drm/i915/gem/i915_gem_object_types.h  |  3 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c   |  5 +-
 drivers/gpu/drm/i915/gt/intel_context.c   |  5 +-
 drivers/gpu/drm/i915/gt/intel_context.h   |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  |  8 ++-
 drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 26 +-
 10 files changed, 113 insertions(+), 48 deletions(-)


Re: [PATCH] drm/i915/trace: Removed unused frequency trace

2022-10-11 Thread Tvrtko Ursulin



On 11/10/2022 14:59, Andi Shyti wrote:

Commit 3e7abf814193 ("drm/i915: Extract GT render power state management")
removes the "trace_intel_gpu_freq_change()" trace points but
their definition was left without users. Remove it.

Suggested-by: Tvrtko Ursulin 
Signed-off-by: Andi Shyti 
Cc: Chris Wilson 
---
  drivers/gpu/drm/i915/i915_trace.h | 15 ---
  1 file changed, 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_trace.h 
b/drivers/gpu/drm/i915/i915_trace.h
index 37b5c9e9d260..c70a02517e02 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -671,21 +671,6 @@ TRACE_EVENT_CONDITION(i915_reg_rw,
(u32)(__entry->val >> 32))
  );
  
-TRACE_EVENT(intel_gpu_freq_change,

-   TP_PROTO(u32 freq),
-   TP_ARGS(freq),
-
-   TP_STRUCT__entry(
-__field(u32, freq)
-),
-
-   TP_fast_assign(
-  __entry->freq = freq;
-  ),
-
-   TP_printk("new_freq=%u", __entry->freq)
-);
-
  /**
   * DOC: i915_ppgtt_create and i915_ppgtt_release tracepoints
   *


Reviewed-by: Tvrtko Ursulin 

Regards,

Tvrtko


Re: [PATCH] drm/i915/perf: remove redundant variable 'taken'

2022-10-10 Thread Tvrtko Ursulin



On 08/10/2022 12:55, Christophe JAILLET wrote:

Le 07/10/2022 à 21:53, Colin Ian King a écrit :

The assignment to variable taken is redundant and so it can be
removed as well as the variable too.

Cleans up clang-scan build warnings:
warning: Although the value stored to 'taken' is used in the enclosing
expression, the value is never actually read from 'taken'
[deadcode.DeadStores]


Hi,

#define OA_TAKEN(tail, head)    ((tail - head) & (OA_BUFFER_SIZE - 1))

So if the result is not used, maybe calling OA_TAKEN() can be removed as 
well?

It looks like a no-op in such a case.


AFAICS result is used, just the copy/local variable is not.

For the patch:

Reviewed-by: Tvrtko Ursulin 

Thanks for the cleanup, will merge.

Regards,

Tvrtko




CJ



Signed-off-by: Colin Ian King 
---
  drivers/gpu/drm/i915/i915_perf.c | 6 ++
  1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c 
b/drivers/gpu/drm/i915/i915_perf.c

index 0defbb43ceea..15816df916c7 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -656,7 +656,6 @@ static int gen8_append_oa_reports(struct 
i915_perf_stream *stream,

  size_t start_offset = *offset;
  unsigned long flags;
  u32 head, tail;
-    u32 taken;
  int ret = 0;
  if (drm_WARN_ON(>i915->drm, !stream->enabled))
@@ -692,7 +691,7 @@ static int gen8_append_oa_reports(struct 
i915_perf_stream *stream,

  for (/* none */;
- (taken = OA_TAKEN(tail, head));
+ OA_TAKEN(tail, head);
   head = (head + report_size) & mask) {
  u8 *report = oa_buf_base + head;
  u32 *report32 = (void *)report;
@@ -950,7 +949,6 @@ static int gen7_append_oa_reports(struct 
i915_perf_stream *stream,

  size_t start_offset = *offset;
  unsigned long flags;
  u32 head, tail;
-    u32 taken;
  int ret = 0;
  if (drm_WARN_ON(>i915->drm, !stream->enabled))
@@ -984,7 +982,7 @@ static int gen7_append_oa_reports(struct 
i915_perf_stream *stream,

  for (/* none */;
- (taken = OA_TAKEN(tail, head));
+ OA_TAKEN(tail, head);
   head = (head + report_size) & mask) {
  u8 *report = oa_buf_base + head;
  u32 *report32 = (void *)report;




Re: [PATCH] drm/i915/gem: remove redundant assignments to variable ret

2022-10-10 Thread Tvrtko Ursulin



On 07/10/2022 20:47, Colin Ian King wrote:

The variable ret is being assigned with a value that is never read
both before and after a while-loop. The variable is being re-assigned
inside the while-loop and afterwards on the call to the function
i915_gem_object_lock_interruptible. Remove the redundants assignments.

Cleans up clang scan-build warnings:

warning: Although the value stored to 'ret' is used in the
enclosing expression, the value is never actually read
from 'ret' [deadcode.DeadStores]

warning: Value stored to 'ret' is never read [deadcode.DeadStores]

Signed-off-by: Colin Ian King 
---
  drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 3 +--
  1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c 
b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index d4398948f016..b7e24476a0fd 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -292,7 +292,7 @@ int i915_gem_object_userptr_submit_init(struct 
drm_i915_gem_object *obj)
if (!i915_gem_object_is_readonly(obj))
gup_flags |= FOLL_WRITE;
  
-	pinned = ret = 0;

+   pinned = 0;
while (pinned < num_pages) {
ret = pin_user_pages_fast(obj->userptr.ptr + pinned * PAGE_SIZE,
  num_pages - pinned, gup_flags,
@@ -302,7 +302,6 @@ int i915_gem_object_userptr_submit_init(struct 
drm_i915_gem_object *obj)
  
  		pinned += ret;

}
-   ret = 0;
  
  	ret = i915_gem_object_lock_interruptible(obj, NULL);

if (ret)


Reviewed-by: Tvrtko Ursulin 

Thanks for the cleanup, will merge.

Regards,

Tvrtko


Re: [Intel-gfx] [PATCH v5 3/4] drm/i915: Make the heartbeat play nice with long pre-emption timeouts

2022-10-07 Thread Tvrtko Ursulin



On 06/10/2022 22:38, john.c.harri...@intel.com wrote:

From: John Harrison 

Compute workloads are inherently not pre-emptible for long periods on
current hardware. As a workaround for this, the pre-emption timeout
for compute capable engines was disabled. This is undesirable with GuC
submission as it prevents per engine reset of hung contexts. Hence the
next patch will re-enable the timeout but bumped up by an order of
magnitude.

However, the heartbeat might not respect that. Depending upon current
activity, a pre-emption to the heartbeat pulse might not even be
attempted until the last heartbeat period. Which means that only one
period is granted for the pre-emption to occur. With the aforesaid
bump, the pre-emption timeout could be significantly larger than this
heartbeat period.

So adjust the heartbeat code to take the pre-emption timeout into
account. When it reaches the final (high priority) period, it now
ensures the delay before hitting reset is bigger than the pre-emption
timeout.

v2: Fix for selftests which adjust the heartbeat period manually.
v3: Add FIXME comment about selftests. Add extra FIXME comment and
drm_notices when setting heartbeat to a non-default value (review
feedback from Tvrtko)

Signed-off-by: John Harrison 
---
  .../gpu/drm/i915/gt/intel_engine_heartbeat.c  | 39 +++
  1 file changed, 39 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c 
b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index a3698f611f457..9a527e1f5be65 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -22,9 +22,37 @@
  
  static bool next_heartbeat(struct intel_engine_cs *engine)

  {
+   struct i915_request *rq;
long delay;
  
  	delay = READ_ONCE(engine->props.heartbeat_interval_ms);

+
+   rq = engine->heartbeat.systole;
+
+   /*
+* FIXME: The final period extension is disabled if the period has been
+* modified from the default. This is to prevent issues with certain
+* selftests which override the value and expect specific behaviour.
+* Once the selftests have been updated to either cope with variable
+* heartbeat periods (or to override the pre-emption timeout as well,
+* or just to add a selftest specific override of the extension), the
+* generic override can be removed.
+*/
+   if (rq && rq->sched.attr.priority >= I915_PRIORITY_BARRIER &&
+   delay == engine->defaults.heartbeat_interval_ms) {
+   long longer;
+
+   /*
+* The final try is at the highest priority possible. Up until 
now
+* a pre-emption might not even have been attempted. So make 
sure
+* this last attempt allows enough time for a pre-emption to 
occur.
+*/
+   longer = READ_ONCE(engine->props.preempt_timeout_ms) * 2;
+   longer = intel_clamp_heartbeat_interval_ms(engine, longer);
+   if (longer > delay)
+   delay = longer;
+   }
+
if (!delay)
return false;
  
@@ -288,6 +316,17 @@ int intel_engine_set_heartbeat(struct intel_engine_cs *engine,

if (!delay && !intel_engine_has_preempt_reset(engine))
return -ENODEV;
  
+	/* FIXME: Remove together with equally marked hack in next_heartbeat. */

+   if (delay != engine->defaults.heartbeat_interval_ms &&
+   delay < 2 * engine->props.preempt_timeout_ms) {
+   if (intel_engine_uses_guc(engine))
+   drm_notice(>i915->drm, "%s heartbeat interval 
adjusted to a non-default value which may downgrade individual engine resets to full GPU 
resets!\n",
+  engine->name);
+   else
+   drm_notice(>i915->drm, "%s heartbeat interval 
adjusted to a non-default value which may cause engine resets to target innocent contexts!\n",
+  engine->name);
+   }
+
intel_engine_pm_get(engine);
  
  	err = mutex_lock_interruptible(>timeline->mutex);


LGTM - hope it is agreeable to you too.

Reviewed-by: Tvrtko Ursulin 

Regards,

Tvrtko



[PULL] drm-intel-next-fixes

2022-10-06 Thread Tvrtko Ursulin
Hi Dave, Daniel,

Some fixes for the merge window - one EHL MOCS table fix and the rest is
in the display area around modifier handling and PSR on Gen12+, one fixup
for g4x+ and one fixing recent fastset refactoring.

Regards,

Tvrtko

drm-intel-next-fixes-2022-10-06-1:
- Round to closest in g4x+ HDMI clock readout (Ville Syrjälä)
- Update MOCS table for EHL (Tejas Upadhyay)
- Fix PSR_IMR/IIR field handling (Jouni Högander)
- Fix watermark calculations for gen12+ RC CCS modifier (Ville Syrjälä)
- Fix watermark calculations for gen12+ MC CCS modifier (Ville Syrjälä)
- Fix watermark calculations for gen12+ CCS+CC modifier (Ville Syrjälä)
- Fix watermark calculations for DG2 CCS modifiers (Ville Syrjälä)
- Fix watermark calculations for DG2 CCS+CC modifier (Ville Syrjälä)
- Reject excessive dotclocks early (Ville Syrjälä)
The following changes since commit 20e377e7b2e7c327039f10db80ba5bcc1f6c882d:

  drm/i915/gt: Use i915_vm_put on ppgtt_create error paths (2022-09-27 11:05:33 
+0100)

are available in the Git repository at:

  git://anongit.freedesktop.org/drm/drm-intel 
tags/drm-intel-next-fixes-2022-10-06-1

for you to fetch changes up to cdf6428dd518435a05739abf7659589de30970f4:

  drm/i915: Reject excessive dotclocks early (2022-10-03 17:55:32 +0100)


- Round to closest in g4x+ HDMI clock readout (Ville Syrjälä)
- Update MOCS table for EHL (Tejas Upadhyay)
- Fix PSR_IMR/IIR field handling (Jouni Högander)
- Fix watermark calculations for gen12+ RC CCS modifier (Ville Syrjälä)
- Fix watermark calculations for gen12+ MC CCS modifier (Ville Syrjälä)
- Fix watermark calculations for gen12+ CCS+CC modifier (Ville Syrjälä)
- Fix watermark calculations for DG2 CCS modifiers (Ville Syrjälä)
- Fix watermark calculations for DG2 CCS+CC modifier (Ville Syrjälä)
- Reject excessive dotclocks early (Ville Syrjälä)


Jouni Högander (1):
  drm/i915/psr: Fix PSR_IMR/IIR field handling

Tejas Upadhyay (1):
  drm/i915/ehl: Update MOCS table for EHL

Ville Syrjälä (7):
  drm/i915: Round to closest in g4x+ HDMI clock readout
  drm/i915: Fix watermark calculations for gen12+ RC CCS modifier
  drm/i915: Fix watermark calculations for gen12+ MC CCS modifier
  drm/i915: Fix watermark calculations for gen12+ CCS+CC modifier
  drm/i915: Fix watermark calculations for DG2 CCS modifiers
  drm/i915: Fix watermark calculations for DG2 CCS+CC modifier
  drm/i915: Reject excessive dotclocks early

 drivers/gpu/drm/i915/display/g4x_hdmi.c  |  2 +-
 drivers/gpu/drm/i915/display/intel_display.c | 18 +++
 drivers/gpu/drm/i915/display/intel_psr.c | 78 +---
 drivers/gpu/drm/i915/display/skl_watermark.c | 16 +-
 drivers/gpu/drm/i915/gt/intel_mocs.c |  8 +++
 drivers/gpu/drm/i915/i915_reg.h  | 16 --
 6 files changed, 100 insertions(+), 38 deletions(-)


Re: [Intel-gfx] [PATCH v4 3/4] drm/i915: Make the heartbeat play nice with long pre-emption timeouts

2022-10-06 Thread Tvrtko Ursulin



On 05/10/2022 19:48, John Harrison wrote:

On 10/3/2022 05:00, Tvrtko Ursulin wrote:

On 03/10/2022 08:53, Tvrtko Ursulin wrote:

On 30/09/2022 18:44, John Harrison wrote:

On 9/30/2022 02:22, Tvrtko Ursulin wrote:

On 29/09/2022 17:21, John Harrison wrote:

On 9/29/2022 00:42, Tvrtko Ursulin wrote:

On 29/09/2022 03:18, john.c.harri...@intel.com wrote:

From: John Harrison 

Compute workloads are inherently not pre-emptible for long 
periods on

current hardware. As a workaround for this, the pre-emption timeout
for compute capable engines was disabled. This is undesirable 
with GuC
submission as it prevents per engine reset of hung contexts. 
Hence the

next patch will re-enable the timeout but bumped up by an order of
magnitude.

However, the heartbeat might not respect that. Depending upon 
current

activity, a pre-emption to the heartbeat pulse might not even be
attempted until the last heartbeat period. Which means that only 
one

period is granted for the pre-emption to occur. With the aforesaid
bump, the pre-emption timeout could be significantly larger than 
this

heartbeat period.

So adjust the heartbeat code to take the pre-emption timeout into
account. When it reaches the final (high priority) period, it now
ensures the delay before hitting reset is bigger than the 
pre-emption

timeout.

v2: Fix for selftests which adjust the heartbeat period manually.

Signed-off-by: John Harrison 
---
  .../gpu/drm/i915/gt/intel_engine_heartbeat.c  | 19 
+++

  1 file changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c 
b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c

index a3698f611f457..823a790a0e2ae 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -22,9 +22,28 @@
    static bool next_heartbeat(struct intel_engine_cs *engine)
  {
+    struct i915_request *rq;
  long delay;
    delay = READ_ONCE(engine->props.heartbeat_interval_ms);
+
+    rq = engine->heartbeat.systole;
+
+    if (rq && rq->sched.attr.priority >= I915_PRIORITY_BARRIER &&
+    delay == engine->defaults.heartbeat_interval_ms) {


Maybe I forgot but what is the reason for the check against the 
default heartbeat interval?
That's the 'v2: fix for selftests that manually adjust the 
heartbeat'. If something (or someone) has explicitly set an 
override of the heartbeat then it has to be assumed that they know 
what they are doing, and if things don't work any more that's 
their problem. But if we don't respect their override then they 
won't get the timings they expect and the selftest will fail.


Isn't this a bit too strict for the non-selftest case? If the new 
concept is extending the last pulse to guarantee preemption, then I 
think we could allow tweaking of the heartbeat period. Like what if 
user wants 1s, or 10s instead of 2.5s - why would that need to 
break the improvement from this patch?

Then the user is back to where they were before this patch.



In what ways selftests fail? Are they trying to guess time to reset 
based on the hearbeat period set? If so perhaps add a helper to 
query it based on the last pulse extension.


I don't recall. It was six months ago when I was actually working on 
this. And right now I do not have the time to go back and re-run all 
the testing and re-write a bunch of self tests with whole new 
helpers and algorithms and whatever else might be necessary to 
polish this to perfection. And in the meantime, all the existing 
issues are still present - there is no range checking on any of this 
stuff, it is very possible for a driver with default settings to 
break a legal workload because the heartbeat and pre-emption are 
fighting with each other, we don't even have per engine resets 
enabled, etc.


Maybe it could be even better with a follow up patch. Feel free to 
do that. But as it stands, this patch set significantly improves the 
situation without making anything worse.


As we seem to be in agreement that the check against default 
heartbeat is a hack with only purpose to work around assumptions made 
by selftests, then please file a Jira about removing it (this hack). 
Then work can be assigned to someone to clean it up. With that done I 
would agree the series is indeed an improvement and it would have my 
ack.

VLK-39595



One more thing - put a comment in the code along the lines of 
"FIXME/HACK: Work around selftests assumptions by only extending the 
last heartbeat if the period is at default value". The the Jira can 
associate to that comment.


Until that is resolve it may also be worth emitting a drm_notice if 
heartbeat is changed via sysfs? Informing users the things will not 
work as expected if they fiddle with it. Whether as a blanket warning 
or checking first the 3-4x heartbeat vs preempt timeout value. That 
message should then go away once the follow up work to fix the 
selftests is done. See wh

Re: [Intel-gfx] [PATCH v2] drm/i915/guc: Fix revocation of non-persistent contexts

2022-10-05 Thread Tvrtko Ursulin



On 04/10/2022 16:13, Ceraolo Spurio, Daniele wrote:

On 10/4/2022 4:14 AM, Tvrtko Ursulin wrote:


On 03/10/2022 13:16, Tvrtko Ursulin wrote:

From: Tvrtko Ursulin 

Patch which added graceful exit for non-persistent contexts missed the
fact it is not enough to set the exiting flag on a context and let the
backend handle it from there.

GuC backend cannot handle it because it runs independently in the
firmware and driver might not see the requests ever again. Patch also
missed the fact some usages of intel_context_is_banned in the GuC 
backend

needed replacing with newly introduced intel_context_is_schedulable.

Fix the first issue by calling into backend revoke when we know this is
the last chance to do it. Fix the second issue by replacing
intel_context_is_banned with intel_context_is_schedulable, which should
always be safe since latter is a superset of the former.

v2:
  * Just call ce->ops->revoke unconditionally. (Andrzej)


CI is happy - could I get some acks for the GuC backend changes please?


I think we still need to have a longer conversation on the revoking 
times, but in the meantime this fixes the immediate concerns, so:


Acked-by: Daniele Ceraolo Spurio 


Thanks, I've pushed it so should unbreak 6.0 via stable.

For follow up work I am okay either with a fixes 20ms timeout (this was 
enough for users which originally reported it), or go with fully 
configurable? Latter feels a bit over the top since it would then me a 
kconfig and sysfs to align with the normal preempt timeout.


Regards,

Tvrtko


Re: [PATCH] drm/i915/pmu: Match frequencies reported by PMU and sysfs

2022-10-04 Thread Tvrtko Ursulin



On 04/10/2022 14:00, Tvrtko Ursulin wrote:


On 04/10/2022 10:29, Tvrtko Ursulin wrote:


On 03/10/2022 20:24, Ashutosh Dixit wrote:
PMU and sysfs use different wakeref's to "interpret" zero freq. Sysfs 
uses

runtime PM wakeref (see intel_rps_read_punit_req and
intel_rps_read_actual_frequency). PMU uses the GT parked/unparked
wakeref. In general the GT wakeref is held for less time that the 
runtime
PM wakeref which causes PMU to report a lower average freq than the 
average

freq obtained from sampling sysfs.

To resolve this, use the same freq functions (and wakeref's) in PMU as
those used in sysfs.

Bug: https://gitlab.freedesktop.org/drm/intel/-/issues/7025
Reported-by: Ashwin Kumar Kulkarni 
Cc: Tvrtko Ursulin 
Signed-off-by: Ashutosh Dixit 
---
  drivers/gpu/drm/i915/i915_pmu.c | 27 ++-
  1 file changed, 2 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c 
b/drivers/gpu/drm/i915/i915_pmu.c

index 958b37123bf1..eda03f264792 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -371,37 +371,16 @@ static void
  frequency_sample(struct intel_gt *gt, unsigned int period_ns)
  {
  struct drm_i915_private *i915 = gt->i915;
-    struct intel_uncore *uncore = gt->uncore;
  struct i915_pmu *pmu = >pmu;
  struct intel_rps *rps = >rps;
  if (!frequency_sampling_enabled(pmu))
  return;
-    /* Report 0/0 (actual/requested) frequency while parked. */
-    if (!intel_gt_pm_get_if_awake(gt))
-    return;
-
  if (pmu->enable & config_mask(I915_PMU_ACTUAL_FREQUENCY)) {
-    u32 val;
-
-    /*
- * We take a quick peek here without using forcewake
- * so that we don't perturb the system under observation
- * (forcewake => !rc6 => increased power use). We expect
- * that if the read fails because it is outside of the
- * mmio power well, then it will return 0 -- in which
- * case we assume the system is running at the intended
- * frequency. Fortunately, the read should rarely fail!
- */
-    val = intel_uncore_read_fw(uncore, GEN6_RPSTAT1);
-    if (val)
-    val = intel_rps_get_cagf(rps, val);
-    else
-    val = rps->cur_freq;
-
  add_sample_mult(>sample[__I915_SAMPLE_FREQ_ACT],
-    intel_gpu_freq(rps, val), period_ns / 1000);
+    intel_rps_read_actual_frequency(rps),
+    period_ns / 1000);
  }
  if (pmu->enable & config_mask(I915_PMU_REQUESTED_FREQUENCY)) {


What is software tracking of requested frequency showing when GT is 
parked or runtime suspended? With this change sampling would be 
outside any such checks so we need to be sure reported value makes sense.


Although more important open is around what is actually correct.

For instance how does the patch affect RC6 and power? I don't know how 
power management of different blocks is wired up, so personally I 
would only be able to look at it empirically. In other words what I am 
asking is this - if we changed from skipping obtaining forcewake even 
when unparked, to obtaining forcewake if not runtime suspended - what 
hardware blocks does that power up and how it affects RC6 and power? 
Can it affect actual frequency or not? (Will "something" power up the 
clocks just because we will be getting forcewake?)


Or maybe question simplified - does 200Hz polling on existing sysfs 
actual frequency field disturbs the system under some circumstances? 
(Increases power and decreases RC6.) If it does then that would be a 
problem. We want a solution which shows the real data, but where the 
act of monitoring itself does not change it too much. If it doesn't 
then it's okay.


Could you somehow investigate on these topics? Maybe log RAPL GPU 
power while polling on sysfs, versus getting the actual frequency from 
the existing PMU implementation and see if that shows anything? Or 
actually simpler - RAPL GPU power for current PMU intel_gpu_top versus 
this patch? On idle(-ish) desktop workloads perhaps? Power and 
frequency graphed for both.


Another thought - considering that bspec says for 0xa01c "This register 
reflects real-time values and thus does not have a pre-determined 
default value out of reset" - could it be that it also does not reflect 
a real value when GPU is not executing anything (so zero), just happens 
to be not runtime suspended? That would mean sysfs reads could maybe 
show last known value? Just a thought to check.


I've also tried on my Alderlake desktop:

1)

while true; do cat gt_act_freq_mhz >/dev/null; sleep 0.005; done

This costs ~120mW of GPU power and ~20% decrease in RC6.


2)

intel_gpu_top -l -s 5 >/dev/null


This "-s 5" was pointless though. :)

Regards,

Tvrtko



This costs no power or RC6.

I have also never observed sysfs to show below min freq. This was with 
no desktop so

Re: [PATCH] drm/i915/pmu: Match frequencies reported by PMU and sysfs

2022-10-04 Thread Tvrtko Ursulin



On 04/10/2022 10:29, Tvrtko Ursulin wrote:


On 03/10/2022 20:24, Ashutosh Dixit wrote:
PMU and sysfs use different wakeref's to "interpret" zero freq. Sysfs 
uses

runtime PM wakeref (see intel_rps_read_punit_req and
intel_rps_read_actual_frequency). PMU uses the GT parked/unparked
wakeref. In general the GT wakeref is held for less time that the runtime
PM wakeref which causes PMU to report a lower average freq than the 
average

freq obtained from sampling sysfs.

To resolve this, use the same freq functions (and wakeref's) in PMU as
those used in sysfs.

Bug: https://gitlab.freedesktop.org/drm/intel/-/issues/7025
Reported-by: Ashwin Kumar Kulkarni 
Cc: Tvrtko Ursulin 
Signed-off-by: Ashutosh Dixit 
---
  drivers/gpu/drm/i915/i915_pmu.c | 27 ++-
  1 file changed, 2 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c 
b/drivers/gpu/drm/i915/i915_pmu.c

index 958b37123bf1..eda03f264792 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -371,37 +371,16 @@ static void
  frequency_sample(struct intel_gt *gt, unsigned int period_ns)
  {
  struct drm_i915_private *i915 = gt->i915;
-    struct intel_uncore *uncore = gt->uncore;
  struct i915_pmu *pmu = >pmu;
  struct intel_rps *rps = >rps;
  if (!frequency_sampling_enabled(pmu))
  return;
-    /* Report 0/0 (actual/requested) frequency while parked. */
-    if (!intel_gt_pm_get_if_awake(gt))
-    return;
-
  if (pmu->enable & config_mask(I915_PMU_ACTUAL_FREQUENCY)) {
-    u32 val;
-
-    /*
- * We take a quick peek here without using forcewake
- * so that we don't perturb the system under observation
- * (forcewake => !rc6 => increased power use). We expect
- * that if the read fails because it is outside of the
- * mmio power well, then it will return 0 -- in which
- * case we assume the system is running at the intended
- * frequency. Fortunately, the read should rarely fail!
- */
-    val = intel_uncore_read_fw(uncore, GEN6_RPSTAT1);
-    if (val)
-    val = intel_rps_get_cagf(rps, val);
-    else
-    val = rps->cur_freq;
-
  add_sample_mult(>sample[__I915_SAMPLE_FREQ_ACT],
-    intel_gpu_freq(rps, val), period_ns / 1000);
+    intel_rps_read_actual_frequency(rps),
+    period_ns / 1000);
  }
  if (pmu->enable & config_mask(I915_PMU_REQUESTED_FREQUENCY)) {


What is software tracking of requested frequency showing when GT is 
parked or runtime suspended? With this change sampling would be outside 
any such checks so we need to be sure reported value makes sense.


Although more important open is around what is actually correct.

For instance how does the patch affect RC6 and power? I don't know how 
power management of different blocks is wired up, so personally I would 
only be able to look at it empirically. In other words what I am asking 
is this - if we changed from skipping obtaining forcewake even when 
unparked, to obtaining forcewake if not runtime suspended - what 
hardware blocks does that power up and how it affects RC6 and power? Can 
it affect actual frequency or not? (Will "something" power up the clocks 
just because we will be getting forcewake?)


Or maybe question simplified - does 200Hz polling on existing sysfs 
actual frequency field disturbs the system under some circumstances? 
(Increases power and decreases RC6.) If it does then that would be a 
problem. We want a solution which shows the real data, but where the act 
of monitoring itself does not change it too much. If it doesn't then 
it's okay.


Could you somehow investigate on these topics? Maybe log RAPL GPU power 
while polling on sysfs, versus getting the actual frequency from the 
existing PMU implementation and see if that shows anything? Or actually 
simpler - RAPL GPU power for current PMU intel_gpu_top versus this 
patch? On idle(-ish) desktop workloads perhaps? Power and frequency 
graphed for both.


Another thought - considering that bspec says for 0xa01c "This register 
reflects real-time values and thus does not have a pre-determined 
default value out of reset" - could it be that it also does not reflect 
a real value when GPU is not executing anything (so zero), just happens 
to be not runtime suspended? That would mean sysfs reads could maybe 
show last known value? Just a thought to check.


I've also tried on my Alderlake desktop:

1)

while true; do cat gt_act_freq_mhz >/dev/null; sleep 0.005; done

This costs ~120mW of GPU power and ~20% decrease in RC6.


2)

intel_gpu_top -l -s 5 >/dev/null

This costs no power or RC6.

I have also never observed sysfs to show below min freq. This was with 
no desktop so it's possible this register indeed does not reflect the 
real situation when things are idle.


So I think it is possible sysfs value is the misleading one.

Regards,

Tvrtko


Re: [Intel-gfx] [PATCH v2] drm/i915/guc: Fix revocation of non-persistent contexts

2022-10-04 Thread Tvrtko Ursulin



On 03/10/2022 13:16, Tvrtko Ursulin wrote:

From: Tvrtko Ursulin 

Patch which added graceful exit for non-persistent contexts missed the
fact it is not enough to set the exiting flag on a context and let the
backend handle it from there.

GuC backend cannot handle it because it runs independently in the
firmware and driver might not see the requests ever again. Patch also
missed the fact some usages of intel_context_is_banned in the GuC backend
needed replacing with newly introduced intel_context_is_schedulable.

Fix the first issue by calling into backend revoke when we know this is
the last chance to do it. Fix the second issue by replacing
intel_context_is_banned with intel_context_is_schedulable, which should
always be safe since latter is a superset of the former.

v2:
  * Just call ce->ops->revoke unconditionally. (Andrzej)


CI is happy - could I get some acks for the GuC backend changes please?

Regards,

Tvrtko


Signed-off-by: Tvrtko Ursulin 
Fixes: 45c64ecf97ee ("drm/i915: Improve user experience and driver robustness under 
SIGINT or similar")
Cc: Andrzej Hajda 
Cc: John Harrison 
Cc: Daniele Ceraolo Spurio 
Cc:  # v6.0+
Reviewed-by: Andrzej Hajda 
---
  drivers/gpu/drm/i915/gem/i915_gem_context.c   |  8 +-
  drivers/gpu/drm/i915/gt/intel_context.c   |  5 ++--
  drivers/gpu/drm/i915/gt/intel_context.h   |  3 +--
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 26 +--
  4 files changed, 17 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 0bcde53c50c6..1e29b1e6d186 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1387,14 +1387,8 @@ kill_engines(struct i915_gem_engines *engines, bool 
exit, bool persistent)
 */
for_each_gem_engine(ce, engines, it) {
struct intel_engine_cs *engine;
-   bool skip = false;
  
-		if (exit)

-   skip = intel_context_set_exiting(ce);
-   else if (!persistent)
-   skip = intel_context_exit_nonpersistent(ce, NULL);
-
-   if (skip)
+   if ((exit || !persistent) && intel_context_revoke(ce))
continue; /* Already marked. */
  
  		/*

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index 654a092ed3d6..e94365b08f1e 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -614,13 +614,12 @@ bool intel_context_ban(struct intel_context *ce, struct 
i915_request *rq)
return ret;
  }
  
-bool intel_context_exit_nonpersistent(struct intel_context *ce,

- struct i915_request *rq)
+bool intel_context_revoke(struct intel_context *ce)
  {
bool ret = intel_context_set_exiting(ce);
  
  	if (ce->ops->revoke)

-   ce->ops->revoke(ce, rq, ce->engine->props.preempt_timeout_ms);
+   ce->ops->revoke(ce, NULL, ce->engine->props.preempt_timeout_ms);
  
  	return ret;

  }
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h 
b/drivers/gpu/drm/i915/gt/intel_context.h
index 8e2d70630c49..be09fb2e883a 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -329,8 +329,7 @@ static inline bool intel_context_set_exiting(struct 
intel_context *ce)
return test_and_set_bit(CONTEXT_EXITING, >flags);
  }
  
-bool intel_context_exit_nonpersistent(struct intel_context *ce,

- struct i915_request *rq);
+bool intel_context_revoke(struct intel_context *ce);
  
  static inline bool

  intel_context_force_single_submission(const struct intel_context *ce)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 0ef295a94060..88a4476b8e92 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -685,7 +685,7 @@ static int __guc_add_request(struct intel_guc *guc, struct 
i915_request *rq)
 * Corner case where requests were sitting in the priority list or a
 * request resubmitted after the context was banned.
 */
-   if (unlikely(intel_context_is_banned(ce))) {
+   if (unlikely(!intel_context_is_schedulable(ce))) {
i915_request_put(i915_request_mark_eio(rq));
intel_engine_signal_breadcrumbs(ce->engine);
return 0;
@@ -871,15 +871,15 @@ static int guc_wq_item_append(struct intel_guc *guc,
  struct i915_request *rq)
  {
struct intel_context *ce = request_to_scheduling_context(rq);
-   int ret = 0;
+   int ret;
  
-	if (likely(!intel_context_is_banned(ce))) {

-   ret = __guc_wq_item_append(rq);
+   if (unlikely(

Re: [Intel-gfx] [PATCH v2 14/14] drm/i915/mtl: Add multicast steering for media GT

2022-10-04 Thread Tvrtko Ursulin



On 03/10/2022 20:32, Matt Roper wrote:

On Mon, Oct 03, 2022 at 09:56:18AM +0100, Tvrtko Ursulin wrote:


Hi Matt,

On 01/10/2022 01:45, Matt Roper wrote:

MTL's media GT only has a single type of steering ("OAADDRM") which
selects between media slice 0 and media slice 1.  We'll always steer to
media slice 0 unless it is fused off (which is the case when VD0, VE0,
and SFC0 are all reported as unavailable).

Bspec: 67789
Signed-off-by: Matt Roper 
---
   drivers/gpu/drm/i915/gt/intel_gt_mcr.c  | 19 +--
   drivers/gpu/drm/i915/gt/intel_gt_types.h|  1 +
   drivers/gpu/drm/i915/gt/intel_workarounds.c | 18 +-
   3 files changed, 35 insertions(+), 3 deletions(-)


[snip]


+static void
+mtl_media_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
+{
+   /*
+* Unlike older platforms, we no longer setup implicit steering here;
+* all MCR accesses are explicitly steered.
+*/
+   if (drm_debug_enabled(DRM_UT_DRIVER)) {
+   struct drm_printer p = drm_debug_printer("MCR Steering:");
+
+   intel_gt_mcr_report_steering(, gt, false);
+   }
+}
+
   static void
   gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
   {
struct drm_i915_private *i915 = gt->i915;
-   if (IS_METEORLAKE(i915) && gt->type == GT_PRIMARY)
+   if (IS_METEORLAKE(i915) && gt->type == GT_MEDIA)
+   mtl_media_gt_workarounds_init(gt, wal);
+   else if (IS_METEORLAKE(i915) && gt->type == GT_PRIMARY)
mtl_3d_gt_workarounds_init(gt, wal);
else if (IS_PONTEVECCHIO(i915))
pvc_gt_workarounds_init(gt, wal);


Casually reading only - wouldn't it be nicer if the if-ladder in here
(gt_init_workarounds) would have a single case per platform, and then you'd
fork further (3d vs media) in MTL specific function?


Actually, that reminds me that we probably need to change this in a
different direction --- starting with MTL, we should stop tying
workarounds to the platform (IS_METEORLAKE) but rather tie them to the
IP version (i.e., GRAPHICS_VER or MEDIA_VER) since in the future the
same chiplets can potentially be re-used on multiple platforms.
Conversely, you could also potentially have variants of the same
"platform" (e.g., MTL) that incorporate chiplets with different IP
versions (and thus need distinct lists of workarounds and such).

At the moment MTL is the only platform we have with the standalone media
design so there's no potential for mix-and-match of chiplets yet, and
IS_METEORLAKE works fine in the short term, but we do need to start
planning ahead and moving off of platform checks in areas of the driver
like this.



Also, series ends up with mtl_media_gt_workarounds_init and
mtl_3d_gt_workarounds_init apparently 100% identical. You will need two
copies in the future?


Yes, the two GTs are expected to end up with completely different sets
of workarounds once the platform is enabled.  We've just been delaying
on actually sending the new MTL workarounds upstream to give the
workaround database a bit more time to settle.


Ah yes, I misread the banner printed from those two "as no workaround 
will be programmed from here" and thought why you'd need two copies of a 
nearly empty function and two identical comments. My bad.


You will end up with three instances of "if debug report steering" so 
could in theory add a helper for that. For some minimal value of 
consolidation.. up to you.


Regards,

Tvrtko


Re: [PATCH] drm/i915/pmu: Match frequencies reported by PMU and sysfs

2022-10-04 Thread Tvrtko Ursulin



On 03/10/2022 20:24, Ashutosh Dixit wrote:

PMU and sysfs use different wakeref's to "interpret" zero freq. Sysfs uses
runtime PM wakeref (see intel_rps_read_punit_req and
intel_rps_read_actual_frequency). PMU uses the GT parked/unparked
wakeref. In general the GT wakeref is held for less time that the runtime
PM wakeref which causes PMU to report a lower average freq than the average
freq obtained from sampling sysfs.

To resolve this, use the same freq functions (and wakeref's) in PMU as
those used in sysfs.

Bug: https://gitlab.freedesktop.org/drm/intel/-/issues/7025
Reported-by: Ashwin Kumar Kulkarni 
Cc: Tvrtko Ursulin 
Signed-off-by: Ashutosh Dixit 
---
  drivers/gpu/drm/i915/i915_pmu.c | 27 ++-
  1 file changed, 2 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 958b37123bf1..eda03f264792 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -371,37 +371,16 @@ static void
  frequency_sample(struct intel_gt *gt, unsigned int period_ns)
  {
struct drm_i915_private *i915 = gt->i915;
-   struct intel_uncore *uncore = gt->uncore;
struct i915_pmu *pmu = >pmu;
struct intel_rps *rps = >rps;
  
  	if (!frequency_sampling_enabled(pmu))

return;
  
-	/* Report 0/0 (actual/requested) frequency while parked. */

-   if (!intel_gt_pm_get_if_awake(gt))
-   return;
-
if (pmu->enable & config_mask(I915_PMU_ACTUAL_FREQUENCY)) {
-   u32 val;
-
-   /*
-* We take a quick peek here without using forcewake
-* so that we don't perturb the system under observation
-* (forcewake => !rc6 => increased power use). We expect
-* that if the read fails because it is outside of the
-* mmio power well, then it will return 0 -- in which
-* case we assume the system is running at the intended
-* frequency. Fortunately, the read should rarely fail!
-*/
-   val = intel_uncore_read_fw(uncore, GEN6_RPSTAT1);
-   if (val)
-   val = intel_rps_get_cagf(rps, val);
-   else
-   val = rps->cur_freq;
-
add_sample_mult(>sample[__I915_SAMPLE_FREQ_ACT],
-   intel_gpu_freq(rps, val), period_ns / 1000);
+   intel_rps_read_actual_frequency(rps),
+   period_ns / 1000);
}
  
  	if (pmu->enable & config_mask(I915_PMU_REQUESTED_FREQUENCY)) {


What is software tracking of requested frequency showing when GT is 
parked or runtime suspended? With this change sampling would be outside 
any such checks so we need to be sure reported value makes sense.


Although more important open is around what is actually correct.

For instance how does the patch affect RC6 and power? I don't know how 
power management of different blocks is wired up, so personally I would 
only be able to look at it empirically. In other words what I am asking 
is this - if we changed from skipping obtaining forcewake even when 
unparked, to obtaining forcewake if not runtime suspended - what 
hardware blocks does that power up and how it affects RC6 and power? Can 
it affect actual frequency or not? (Will "something" power up the clocks 
just because we will be getting forcewake?)


Or maybe question simplified - does 200Hz polling on existing sysfs 
actual frequency field disturbs the system under some circumstances? 
(Increases power and decreases RC6.) If it does then that would be a 
problem. We want a solution which shows the real data, but where the act 
of monitoring itself does not change it too much. If it doesn't then 
it's okay.


Could you somehow investigate on these topics? Maybe log RAPL GPU power 
while polling on sysfs, versus getting the actual frequency from the 
existing PMU implementation and see if that shows anything? Or actually 
simpler - RAPL GPU power for current PMU intel_gpu_top versus this 
patch? On idle(-ish) desktop workloads perhaps? Power and frequency 
graphed for both.


Regards,

Tvrtko


@@ -409,8 +388,6 @@ frequency_sample(struct intel_gt *gt, unsigned int 
period_ns)
intel_rps_get_requested_frequency(rps),
period_ns / 1000);
}
-
-   intel_gt_pm_put_async(gt);
  }
  
  static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)


[PATCH v2] drm/i915/guc: Fix revocation of non-persistent contexts

2022-10-03 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

Patch which added graceful exit for non-persistent contexts missed the
fact it is not enough to set the exiting flag on a context and let the
backend handle it from there.

GuC backend cannot handle it because it runs independently in the
firmware and driver might not see the requests ever again. Patch also
missed the fact some usages of intel_context_is_banned in the GuC backend
needed replacing with newly introduced intel_context_is_schedulable.

Fix the first issue by calling into backend revoke when we know this is
the last chance to do it. Fix the second issue by replacing
intel_context_is_banned with intel_context_is_schedulable, which should
always be safe since latter is a superset of the former.

v2:
 * Just call ce->ops->revoke unconditionally. (Andrzej)

Signed-off-by: Tvrtko Ursulin 
Fixes: 45c64ecf97ee ("drm/i915: Improve user experience and driver robustness 
under SIGINT or similar")
Cc: Andrzej Hajda 
Cc: John Harrison 
Cc: Daniele Ceraolo Spurio 
Cc:  # v6.0+
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |  8 +-
 drivers/gpu/drm/i915/gt/intel_context.c   |  5 ++--
 drivers/gpu/drm/i915/gt/intel_context.h   |  3 +--
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 26 +--
 4 files changed, 17 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 0bcde53c50c6..1e29b1e6d186 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1387,14 +1387,8 @@ kill_engines(struct i915_gem_engines *engines, bool 
exit, bool persistent)
 */
for_each_gem_engine(ce, engines, it) {
struct intel_engine_cs *engine;
-   bool skip = false;
 
-   if (exit)
-   skip = intel_context_set_exiting(ce);
-   else if (!persistent)
-   skip = intel_context_exit_nonpersistent(ce, NULL);
-
-   if (skip)
+   if ((exit || !persistent) && intel_context_revoke(ce))
continue; /* Already marked. */
 
/*
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index 654a092ed3d6..e94365b08f1e 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -614,13 +614,12 @@ bool intel_context_ban(struct intel_context *ce, struct 
i915_request *rq)
return ret;
 }
 
-bool intel_context_exit_nonpersistent(struct intel_context *ce,
- struct i915_request *rq)
+bool intel_context_revoke(struct intel_context *ce)
 {
bool ret = intel_context_set_exiting(ce);
 
if (ce->ops->revoke)
-   ce->ops->revoke(ce, rq, ce->engine->props.preempt_timeout_ms);
+   ce->ops->revoke(ce, NULL, ce->engine->props.preempt_timeout_ms);
 
return ret;
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h 
b/drivers/gpu/drm/i915/gt/intel_context.h
index 8e2d70630c49..be09fb2e883a 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -329,8 +329,7 @@ static inline bool intel_context_set_exiting(struct 
intel_context *ce)
return test_and_set_bit(CONTEXT_EXITING, >flags);
 }
 
-bool intel_context_exit_nonpersistent(struct intel_context *ce,
- struct i915_request *rq);
+bool intel_context_revoke(struct intel_context *ce);
 
 static inline bool
 intel_context_force_single_submission(const struct intel_context *ce)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 0ef295a94060..88a4476b8e92 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -685,7 +685,7 @@ static int __guc_add_request(struct intel_guc *guc, struct 
i915_request *rq)
 * Corner case where requests were sitting in the priority list or a
 * request resubmitted after the context was banned.
 */
-   if (unlikely(intel_context_is_banned(ce))) {
+   if (unlikely(!intel_context_is_schedulable(ce))) {
i915_request_put(i915_request_mark_eio(rq));
intel_engine_signal_breadcrumbs(ce->engine);
return 0;
@@ -871,15 +871,15 @@ static int guc_wq_item_append(struct intel_guc *guc,
  struct i915_request *rq)
 {
struct intel_context *ce = request_to_scheduling_context(rq);
-   int ret = 0;
+   int ret;
 
-   if (likely(!intel_context_is_banned(ce))) {
-   ret = __guc_wq_item_append(rq);
+   if (unlikely(!intel_context_is_schedulable(ce)))
+   return 0;
 
-   if (unlikely(ret == -EBUSY)) {
- 

Re: [Intel-gfx] [PATCH v4 3/4] drm/i915: Make the heartbeat play nice with long pre-emption timeouts

2022-10-03 Thread Tvrtko Ursulin



On 03/10/2022 08:53, Tvrtko Ursulin wrote:


On 30/09/2022 18:44, John Harrison wrote:

On 9/30/2022 02:22, Tvrtko Ursulin wrote:

On 29/09/2022 17:21, John Harrison wrote:

On 9/29/2022 00:42, Tvrtko Ursulin wrote:

On 29/09/2022 03:18, john.c.harri...@intel.com wrote:

From: John Harrison 

Compute workloads are inherently not pre-emptible for long periods on
current hardware. As a workaround for this, the pre-emption timeout
for compute capable engines was disabled. This is undesirable with 
GuC
submission as it prevents per engine reset of hung contexts. Hence 
the

next patch will re-enable the timeout but bumped up by an order of
magnitude.

However, the heartbeat might not respect that. Depending upon current
activity, a pre-emption to the heartbeat pulse might not even be
attempted until the last heartbeat period. Which means that only one
period is granted for the pre-emption to occur. With the aforesaid
bump, the pre-emption timeout could be significantly larger than this
heartbeat period.

So adjust the heartbeat code to take the pre-emption timeout into
account. When it reaches the final (high priority) period, it now
ensures the delay before hitting reset is bigger than the pre-emption
timeout.

v2: Fix for selftests which adjust the heartbeat period manually.

Signed-off-by: John Harrison 
---
  .../gpu/drm/i915/gt/intel_engine_heartbeat.c  | 19 
+++

  1 file changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c 
b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c

index a3698f611f457..823a790a0e2ae 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -22,9 +22,28 @@
    static bool next_heartbeat(struct intel_engine_cs *engine)
  {
+    struct i915_request *rq;
  long delay;
    delay = READ_ONCE(engine->props.heartbeat_interval_ms);
+
+    rq = engine->heartbeat.systole;
+
+    if (rq && rq->sched.attr.priority >= I915_PRIORITY_BARRIER &&
+    delay == engine->defaults.heartbeat_interval_ms) {


Maybe I forgot but what is the reason for the check against the 
default heartbeat interval?
That's the 'v2: fix for selftests that manually adjust the 
heartbeat'. If something (or someone) has explicitly set an override 
of the heartbeat then it has to be assumed that they know what they 
are doing, and if things don't work any more that's their problem. 
But if we don't respect their override then they won't get the 
timings they expect and the selftest will fail.


Isn't this a bit too strict for the non-selftest case? If the new 
concept is extending the last pulse to guarantee preemption, then I 
think we could allow tweaking of the heartbeat period. Like what if 
user wants 1s, or 10s instead of 2.5s - why would that need to break 
the improvement from this patch?

Then the user is back to where they were before this patch.



In what ways selftests fail? Are they trying to guess time to reset 
based on the hearbeat period set? If so perhaps add a helper to query 
it based on the last pulse extension.


I don't recall. It was six months ago when I was actually working on 
this. And right now I do not have the time to go back and re-run all 
the testing and re-write a bunch of self tests with whole new helpers 
and algorithms and whatever else might be necessary to polish this to 
perfection. And in the meantime, all the existing issues are still 
present - there is no range checking on any of this stuff, it is very 
possible for a driver with default settings to break a legal workload 
because the heartbeat and pre-emption are fighting with each other, we 
don't even have per engine resets enabled, etc.


Maybe it could be even better with a follow up patch. Feel free to do 
that. But as it stands, this patch set significantly improves the 
situation without making anything worse.


As we seem to be in agreement that the check against default heartbeat 
is a hack with only purpose to work around assumptions made by 
selftests, then please file a Jira about removing it (this hack). Then 
work can be assigned to someone to clean it up. With that done I would 
agree the series is indeed an improvement and it would have my ack.


One more thing - put a comment in the code along the lines of 
"FIXME/HACK: Work around selftests assumptions by only extending the 
last heartbeat if the period is at default value". The the Jira can 
associate to that comment.


Until that is resolve it may also be worth emitting a drm_notice if 
heartbeat is changed via sysfs? Informing users the things will not work 
as expected if they fiddle with it. Whether as a blanket warning or 
checking first the 3-4x heartbeat vs preempt timeout value. That message 
should then go away once the follow up work to fix the selftests is 
done. See what the other reviewers will think.


Regards,

Tvrtko


Re: [Intel-gfx] [PATCH v2 14/14] drm/i915/mtl: Add multicast steering for media GT

2022-10-03 Thread Tvrtko Ursulin



Hi Matt,

On 01/10/2022 01:45, Matt Roper wrote:

MTL's media GT only has a single type of steering ("OAADDRM") which
selects between media slice 0 and media slice 1.  We'll always steer to
media slice 0 unless it is fused off (which is the case when VD0, VE0,
and SFC0 are all reported as unavailable).

Bspec: 67789
Signed-off-by: Matt Roper 
---
  drivers/gpu/drm/i915/gt/intel_gt_mcr.c  | 19 +--
  drivers/gpu/drm/i915/gt/intel_gt_types.h|  1 +
  drivers/gpu/drm/i915/gt/intel_workarounds.c | 18 +-
  3 files changed, 35 insertions(+), 3 deletions(-)


[snip]


+static void
+mtl_media_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
+{
+   /*
+* Unlike older platforms, we no longer setup implicit steering here;
+* all MCR accesses are explicitly steered.
+*/
+   if (drm_debug_enabled(DRM_UT_DRIVER)) {
+   struct drm_printer p = drm_debug_printer("MCR Steering:");
+
+   intel_gt_mcr_report_steering(, gt, false);
+   }
+}
+
  static void
  gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
  {
struct drm_i915_private *i915 = gt->i915;
  
-	if (IS_METEORLAKE(i915) && gt->type == GT_PRIMARY)

+   if (IS_METEORLAKE(i915) && gt->type == GT_MEDIA)
+   mtl_media_gt_workarounds_init(gt, wal);
+   else if (IS_METEORLAKE(i915) && gt->type == GT_PRIMARY)
mtl_3d_gt_workarounds_init(gt, wal);
else if (IS_PONTEVECCHIO(i915))
pvc_gt_workarounds_init(gt, wal);


Casually reading only - wouldn't it be nicer if the if-ladder in here 
(gt_init_workarounds) would have a single case per platform, and then 
you'd fork further (3d vs media) in MTL specific function?


Also, series ends up with mtl_media_gt_workarounds_init and 
mtl_3d_gt_workarounds_init apparently 100% identical. You will need two 
copies in the future?


Regards,

Tvrtko


Re: [Intel-gfx] [PATCH] drm/i915/guc: Fix revocation of non-persistent contexts

2022-10-03 Thread Tvrtko Ursulin



On 30/09/2022 15:52, Andrzej Hajda wrote:

On 30.09.2022 11:47, Tvrtko Ursulin wrote:

From: Tvrtko Ursulin 

Patch which added graceful exit for non-persistent contexts missed the
fact it is not enough to set the exiting flag on a context and let the
backend handle it from there.

GuC backend cannot handle it because it runs independently in the
firmware and driver might not see the requests ever again. Patch also
missed the fact some usages of intel_context_is_banned in the GuC backend
needed replacing with newly introduced intel_context_is_schedulable.

Fix the first issue by calling into backend revoke when we know this is
the last chance to do it. Fix the second issue by replacing
intel_context_is_banned with intel_context_is_schedulable, which should
always be safe since latter is a superset of the former.


negation of the latter is a ...?


I did not get what you meant here.


Signed-off-by: Tvrtko Ursulin 
Fixes: 45c64ecf97ee ("drm/i915: Improve user experience and driver 
robustness under SIGINT or similar")

Cc: Andrzej Hajda 
Cc: John Harrison 
Cc: Daniele Ceraolo Spurio 
---
  drivers/gpu/drm/i915/gem/i915_gem_context.c   |  8 +-
  drivers/gpu/drm/i915/gt/intel_context.c   | 14 +++---
  drivers/gpu/drm/i915/gt/intel_context.h   |  8 +-
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 26 +--
  4 files changed, 25 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c

index 0bcde53c50c6..1e29b1e6d186 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1387,14 +1387,8 @@ kill_engines(struct i915_gem_engines *engines, 
bool exit, bool persistent)

   */
  for_each_gem_engine(ce, engines, it) {
  struct intel_engine_cs *engine;
-    bool skip = false;
-    if (exit)
-    skip = intel_context_set_exiting(ce);
-    else if (!persistent)
-    skip = intel_context_exit_nonpersistent(ce, NULL); > -
-    if (skip)
+    if ((exit || !persistent) && intel_context_revoke(ce))
  continue; /* Already marked. */
  /*
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c

index 654a092ed3d6..398b2a9eed61 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -614,13 +614,19 @@ bool intel_context_ban(struct intel_context *ce, 
struct i915_request *rq)

  return ret;
  }
-bool intel_context_exit_nonpersistent(struct intel_context *ce,
-  struct i915_request *rq)
+bool intel_context_revoke(struct intel_context *ce)
  {
  bool ret = intel_context_set_exiting(ce);
-    if (ce->ops->revoke)
-    ce->ops->revoke(ce, rq, ce->engine->props.preempt_timeout_ms);
+    if (!ret && intel_engine_uses_guc(ce->engine)) {
+    /*
+ * With GuC backend we have to notify it of revocation as soon
+ * as the exiting flag is set.
+ */
+    if (ce->ops->revoke)
+    ce->ops->revoke(ce, NULL,
+    ce->engine->props.preempt_timeout_ms);
+    }


Now revoke is called only with GuC, previously it was called also for 
other backends in case non-exiting/non-persistent, is it OK?


It is okay (execlists has no revoke vfunc, ringbuffer has it but only 
works if target request is known), but agreed it is a bit ugly. I was in 
two minds which way to go. Perhaps it would indeed be cleaner to go 
unconditional. I will resend with that change, copying stable this time 
round (since 6.0 is out), and can keep your r-b?


Regards,

Tvrtko




  return ret;
  }
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h 
b/drivers/gpu/drm/i915/gt/intel_context.h

index 8e2d70630c49..40f8809d14ea 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -319,18 +319,12 @@ static inline bool 
intel_context_is_schedulable(const struct intel_context *ce)

 !test_bit(CONTEXT_BANNED, >flags);
  }
-static inline bool intel_context_is_exiting(const struct 
intel_context *ce)

-{
-    return test_bit(CONTEXT_EXITING, >flags);
-}
-
  static inline bool intel_context_set_exiting(struct intel_context *ce)
  {
  return test_and_set_bit(CONTEXT_EXITING, >flags);
  }
-bool intel_context_exit_nonpersistent(struct intel_context *ce,
-  struct i915_request *rq);
+bool intel_context_revoke(struct intel_context *ce);
  static inline bool
  intel_context_force_single_submission(const struct intel_context *ce)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c

index 0ef295a94060..88a4476b8e92 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -685,7 +685,7 @@

Re: [Intel-gfx] [PATCH v4 3/4] drm/i915: Make the heartbeat play nice with long pre-emption timeouts

2022-10-03 Thread Tvrtko Ursulin



On 30/09/2022 18:44, John Harrison wrote:

On 9/30/2022 02:22, Tvrtko Ursulin wrote:

On 29/09/2022 17:21, John Harrison wrote:

On 9/29/2022 00:42, Tvrtko Ursulin wrote:

On 29/09/2022 03:18, john.c.harri...@intel.com wrote:

From: John Harrison 

Compute workloads are inherently not pre-emptible for long periods on
current hardware. As a workaround for this, the pre-emption timeout
for compute capable engines was disabled. This is undesirable with GuC
submission as it prevents per engine reset of hung contexts. Hence the
next patch will re-enable the timeout but bumped up by an order of
magnitude.

However, the heartbeat might not respect that. Depending upon current
activity, a pre-emption to the heartbeat pulse might not even be
attempted until the last heartbeat period. Which means that only one
period is granted for the pre-emption to occur. With the aforesaid
bump, the pre-emption timeout could be significantly larger than this
heartbeat period.

So adjust the heartbeat code to take the pre-emption timeout into
account. When it reaches the final (high priority) period, it now
ensures the delay before hitting reset is bigger than the pre-emption
timeout.

v2: Fix for selftests which adjust the heartbeat period manually.

Signed-off-by: John Harrison 
---
  .../gpu/drm/i915/gt/intel_engine_heartbeat.c  | 19 
+++

  1 file changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c 
b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c

index a3698f611f457..823a790a0e2ae 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -22,9 +22,28 @@
    static bool next_heartbeat(struct intel_engine_cs *engine)
  {
+    struct i915_request *rq;
  long delay;
    delay = READ_ONCE(engine->props.heartbeat_interval_ms);
+
+    rq = engine->heartbeat.systole;
+
+    if (rq && rq->sched.attr.priority >= I915_PRIORITY_BARRIER &&
+    delay == engine->defaults.heartbeat_interval_ms) {


Maybe I forgot but what is the reason for the check against the 
default heartbeat interval?
That's the 'v2: fix for selftests that manually adjust the 
heartbeat'. If something (or someone) has explicitly set an override 
of the heartbeat then it has to be assumed that they know what they 
are doing, and if things don't work any more that's their problem. 
But if we don't respect their override then they won't get the 
timings they expect and the selftest will fail.


Isn't this a bit too strict for the non-selftest case? If the new 
concept is extending the last pulse to guarantee preemption, then I 
think we could allow tweaking of the heartbeat period. Like what if 
user wants 1s, or 10s instead of 2.5s - why would that need to break 
the improvement from this patch?

Then the user is back to where they were before this patch.



In what ways selftests fail? Are they trying to guess time to reset 
based on the hearbeat period set? If so perhaps add a helper to query 
it based on the last pulse extension.


I don't recall. It was six months ago when I was actually working on 
this. And right now I do not have the time to go back and re-run all the 
testing and re-write a bunch of self tests with whole new helpers and 
algorithms and whatever else might be necessary to polish this to 
perfection. And in the meantime, all the existing issues are still 
present - there is no range checking on any of this stuff, it is very 
possible for a driver with default settings to break a legal workload 
because the heartbeat and pre-emption are fighting with each other, we 
don't even have per engine resets enabled, etc.


Maybe it could be even better with a follow up patch. Feel free to do 
that. But as it stands, this patch set significantly improves the 
situation without making anything worse.


As we seem to be in agreement that the check against default heartbeat 
is a hack with only purpose to work around assumptions made by 
selftests, then please file a Jira about removing it (this hack). Then 
work can be assigned to someone to clean it up. With that done I would 
agree the series is indeed an improvement and it would have my ack.


Regards,

Tvrtko


[PATCH] drm/i915/guc: Fix revocation of non-persistent contexts

2022-09-30 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

Patch which added graceful exit for non-persistent contexts missed the
fact it is not enough to set the exiting flag on a context and let the
backend handle it from there.

GuC backend cannot handle it because it runs independently in the
firmware and driver might not see the requests ever again. Patch also
missed the fact some usages of intel_context_is_banned in the GuC backend
needed replacing with newly introduced intel_context_is_schedulable.

Fix the first issue by calling into backend revoke when we know this is
the last chance to do it. Fix the second issue by replacing
intel_context_is_banned with intel_context_is_schedulable, which should
always be safe since latter is a superset of the former.

Signed-off-by: Tvrtko Ursulin 
Fixes: 45c64ecf97ee ("drm/i915: Improve user experience and driver robustness 
under SIGINT or similar")
Cc: Andrzej Hajda 
Cc: John Harrison 
Cc: Daniele Ceraolo Spurio 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |  8 +-
 drivers/gpu/drm/i915/gt/intel_context.c   | 14 +++---
 drivers/gpu/drm/i915/gt/intel_context.h   |  8 +-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 26 +--
 4 files changed, 25 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 0bcde53c50c6..1e29b1e6d186 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1387,14 +1387,8 @@ kill_engines(struct i915_gem_engines *engines, bool 
exit, bool persistent)
 */
for_each_gem_engine(ce, engines, it) {
struct intel_engine_cs *engine;
-   bool skip = false;
 
-   if (exit)
-   skip = intel_context_set_exiting(ce);
-   else if (!persistent)
-   skip = intel_context_exit_nonpersistent(ce, NULL);
-
-   if (skip)
+   if ((exit || !persistent) && intel_context_revoke(ce))
continue; /* Already marked. */
 
/*
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index 654a092ed3d6..398b2a9eed61 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -614,13 +614,19 @@ bool intel_context_ban(struct intel_context *ce, struct 
i915_request *rq)
return ret;
 }
 
-bool intel_context_exit_nonpersistent(struct intel_context *ce,
- struct i915_request *rq)
+bool intel_context_revoke(struct intel_context *ce)
 {
bool ret = intel_context_set_exiting(ce);
 
-   if (ce->ops->revoke)
-   ce->ops->revoke(ce, rq, ce->engine->props.preempt_timeout_ms);
+   if (!ret && intel_engine_uses_guc(ce->engine)) {
+   /*
+* With GuC backend we have to notify it of revocation as soon
+* as the exiting flag is set.
+*/
+   if (ce->ops->revoke)
+   ce->ops->revoke(ce, NULL,
+   ce->engine->props.preempt_timeout_ms);
+   }
 
return ret;
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h 
b/drivers/gpu/drm/i915/gt/intel_context.h
index 8e2d70630c49..40f8809d14ea 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -319,18 +319,12 @@ static inline bool intel_context_is_schedulable(const 
struct intel_context *ce)
   !test_bit(CONTEXT_BANNED, >flags);
 }
 
-static inline bool intel_context_is_exiting(const struct intel_context *ce)
-{
-   return test_bit(CONTEXT_EXITING, >flags);
-}
-
 static inline bool intel_context_set_exiting(struct intel_context *ce)
 {
return test_and_set_bit(CONTEXT_EXITING, >flags);
 }
 
-bool intel_context_exit_nonpersistent(struct intel_context *ce,
- struct i915_request *rq);
+bool intel_context_revoke(struct intel_context *ce);
 
 static inline bool
 intel_context_force_single_submission(const struct intel_context *ce)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 0ef295a94060..88a4476b8e92 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -685,7 +685,7 @@ static int __guc_add_request(struct intel_guc *guc, struct 
i915_request *rq)
 * Corner case where requests were sitting in the priority list or a
 * request resubmitted after the context was banned.
 */
-   if (unlikely(intel_context_is_banned(ce))) {
+   if (unlikely(!intel_context_is_schedulable(ce))) {
i915_request_put(i915_request_mark_eio(rq));
intel_engine_signal_breadcrumbs(ce->engine);

Re: [Intel-gfx] [PATCH v4 3/4] drm/i915: Make the heartbeat play nice with long pre-emption timeouts

2022-09-30 Thread Tvrtko Ursulin



On 29/09/2022 17:21, John Harrison wrote:

On 9/29/2022 00:42, Tvrtko Ursulin wrote:

On 29/09/2022 03:18, john.c.harri...@intel.com wrote:

From: John Harrison 

Compute workloads are inherently not pre-emptible for long periods on
current hardware. As a workaround for this, the pre-emption timeout
for compute capable engines was disabled. This is undesirable with GuC
submission as it prevents per engine reset of hung contexts. Hence the
next patch will re-enable the timeout but bumped up by an order of
magnitude.

However, the heartbeat might not respect that. Depending upon current
activity, a pre-emption to the heartbeat pulse might not even be
attempted until the last heartbeat period. Which means that only one
period is granted for the pre-emption to occur. With the aforesaid
bump, the pre-emption timeout could be significantly larger than this
heartbeat period.

So adjust the heartbeat code to take the pre-emption timeout into
account. When it reaches the final (high priority) period, it now
ensures the delay before hitting reset is bigger than the pre-emption
timeout.

v2: Fix for selftests which adjust the heartbeat period manually.

Signed-off-by: John Harrison 
---
  .../gpu/drm/i915/gt/intel_engine_heartbeat.c  | 19 +++
  1 file changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c 
b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c

index a3698f611f457..823a790a0e2ae 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -22,9 +22,28 @@
    static bool next_heartbeat(struct intel_engine_cs *engine)
  {
+    struct i915_request *rq;
  long delay;
    delay = READ_ONCE(engine->props.heartbeat_interval_ms);
+
+    rq = engine->heartbeat.systole;
+
+    if (rq && rq->sched.attr.priority >= I915_PRIORITY_BARRIER &&
+    delay == engine->defaults.heartbeat_interval_ms) {


Maybe I forgot but what is the reason for the check against the 
default heartbeat interval?
That's the 'v2: fix for selftests that manually adjust the heartbeat'. 
If something (or someone) has explicitly set an override of the 
heartbeat then it has to be assumed that they know what they are doing, 
and if things don't work any more that's their problem. But if we don't 
respect their override then they won't get the timings they expect and 
the selftest will fail.


Isn't this a bit too strict for the non-selftest case? If the new 
concept is extending the last pulse to guarantee preemption, then I 
think we could allow tweaking of the heartbeat period. Like what if user 
wants 1s, or 10s instead of 2.5s - why would that need to break the 
improvement from this patch?


In what ways selftests fail? Are they trying to guess time to reset 
based on the hearbeat period set? If so perhaps add a helper to query it 
based on the last pulse extension.


Regards,

Tvrtko


[PULL] drm-intel-next-fixes

2022-09-29 Thread Tvrtko Ursulin
Hi Dave, Daniel,

A few fixes for the upcoming merge window. Not many but most are pretty
important.

Another rather important one is missing due being too conflicty, but will
arrive via drm-intel-fixes (7738be973fc4 ("drm/i915/gt: Perf_limit_reasons
are only available for Gen11+")).

Regards,

Tvrtko

drm-intel-next-fixes-2022-09-29:
- Fix release build bug in 'remove GuC log size module parameters' (John 
Harrison)
- Remove ipc_enabled from struct drm_i915_private (Jani Nikula)
- Do not cleanup obj with NULL bo->resource (Nirmoy Das)
- Fix device info for devices without display (Jani Nikula)
- Force DPLL calculation for TC ports after readout (Ville Syrjälä)
- Use i915_vm_put on ppgtt_create error paths (Chris Wilson)
The following changes since commit 320305923c88258ce50c75bf721e9bf2e420ab27:

  Merge tag 'du-next-20220907' of git://linuxtv.org/pinchartl/media into 
drm-next (2022-09-23 03:52:08 +1000)

are available in the Git repository at:

  git://anongit.freedesktop.org/drm/drm-intel 
tags/drm-intel-next-fixes-2022-09-29

for you to fetch changes up to 20e377e7b2e7c327039f10db80ba5bcc1f6c882d:

  drm/i915/gt: Use i915_vm_put on ppgtt_create error paths (2022-09-27 11:05:33 
+0100)


- Fix release build bug in 'remove GuC log size module parameters' (John 
Harrison)
- Remove ipc_enabled from struct drm_i915_private (Jani Nikula)
- Do not cleanup obj with NULL bo->resource (Nirmoy Das)
- Fix device info for devices without display (Jani Nikula)
- Force DPLL calculation for TC ports after readout (Ville Syrjälä)
- Use i915_vm_put on ppgtt_create error paths (Chris Wilson)


Chris Wilson (1):
  drm/i915/gt: Use i915_vm_put on ppgtt_create error paths

Jani Nikula (2):
  drm/i915/display: remove ipc_enabled from struct drm_i915_private
  drm/i915: fix device info for devices without display

John Harrison (1):
  drm/i915/guc: Fix release build bug in 'remove log size module parameters'

Nirmoy Das (1):
  drm/i915: Do not cleanup obj with NULL bo->resource

Ville Syrjälä (1):
  drm/i915: Force DPLL calculation for TC ports after readout

 drivers/gpu/drm/i915/display/intel_ddi.c   | 18 --
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c   | 16 -
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c   | 58 +++---
 drivers/gpu/drm/i915/gt/intel_gtt.c|  3 ++
 drivers/gpu/drm/i915/gt/uc/intel_guc_log.c | 25 +
 drivers/gpu/drm/i915/i915_drv.h|  2 --
 drivers/gpu/drm/i915/i915_pci.c| 11 +++---
 drivers/gpu/drm/i915/intel_device_info.c   |  6 
 9 files changed, 70 insertions(+), 71 deletions(-)


Re: [Intel-gfx] [PATCH v4 4/4] drm/i915: Improve long running compute w/a for GuC submission

2022-09-29 Thread Tvrtko Ursulin



On 29/09/2022 03:18, john.c.harri...@intel.com wrote:

From: John Harrison 

A workaround was added to the driver to allow compute workloads to run
'forever' by disabling pre-emption on the RCS engine for Gen12.
It is not totally unbound as the heartbeat will kick in eventually
and cause a reset of the hung engine.

However, this does not work well in GuC submission mode. In GuC mode,
the pre-emption timeout is how GuC detects hung contexts and triggers
a per engine reset. Thus, disabling the timeout means also losing all
per engine reset ability. A full GT reset will still occur when the
heartbeat finally expires, but that is a much more destructive and
undesirable mechanism.

The purpose of the workaround is actually to give compute tasks longer
to reach a pre-emption point after a pre-emption request has been
issued. This is necessary because Gen12 does not support mid-thread
pre-emption and compute tasks can have long running threads.

So, rather than disabling the timeout completely, just set it to a
'long' value.

v2: Review feedback from Tvrtko - must hard code the 'long' value
instead of determining it algorithmically. So make it an extra CONFIG
definition. Also, remove the execlist centric comment from the
existing pre-emption timeout CONFIG option given that it applies to
more than just execlists.

Signed-off-by: John Harrison 
Reviewed-by: Daniele Ceraolo Spurio  (v1)
Acked-by: Michal Mrozek 


Acked-by: Tvrtko Ursulin 

Regards,

Tvrtko


---
  drivers/gpu/drm/i915/Kconfig.profile  | 26 +++
  drivers/gpu/drm/i915/gt/intel_engine_cs.c |  9 ++--
  2 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig.profile 
b/drivers/gpu/drm/i915/Kconfig.profile
index 39328567c2007..7cc38d25ee5c8 100644
--- a/drivers/gpu/drm/i915/Kconfig.profile
+++ b/drivers/gpu/drm/i915/Kconfig.profile
@@ -57,10 +57,28 @@ config DRM_I915_PREEMPT_TIMEOUT
default 640 # milliseconds
help
  How long to wait (in milliseconds) for a preemption event to occur
- when submitting a new context via execlists. If the current context
- does not hit an arbitration point and yield to HW before the timer
- expires, the HW will be reset to allow the more important context
- to execute.
+ when submitting a new context. If the current context does not hit
+ an arbitration point and yield to HW before the timer expires, the
+ HW will be reset to allow the more important context to execute.
+
+ This is adjustable via
+ /sys/class/drm/card?/engine/*/preempt_timeout_ms
+
+ May be 0 to disable the timeout.
+
+ The compiled in default may get overridden at driver probe time on
+ certain platforms and certain engines which will be reflected in the
+ sysfs control.
+
+config DRM_I915_PREEMPT_TIMEOUT_COMPUTE
+   int "Preempt timeout for compute engines (ms, jiffy granularity)"
+   default 7500 # milliseconds
+   help
+ How long to wait (in milliseconds) for a preemption event to occur
+ when submitting a new context to a compute capable engine. If the
+ current context does not hit an arbitration point and yield to HW
+ before the timer expires, the HW will be reset to allow the more
+ important context to execute.
  
  	  This is adjustable via

  /sys/class/drm/card?/engine/*/preempt_timeout_ms
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index fcbccd8d244e9..c1257723d1949 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -508,9 +508,14 @@ static int intel_engine_setup(struct intel_gt *gt, enum 
intel_engine_id id,
engine->props.timeslice_duration_ms =
CONFIG_DRM_I915_TIMESLICE_DURATION;
  
-	/* Override to uninterruptible for OpenCL workloads. */

+   /*
+* Mid-thread pre-emption is not available in Gen12. Unfortunately,
+* some compute workloads run quite long threads. That means they get
+* reset due to not pre-empting in a timely manner. So, bump the
+* pre-emption timeout value to be much higher for compute engines.
+*/
if (GRAPHICS_VER(i915) == 12 && (engine->flags & 
I915_ENGINE_HAS_RCS_REG_STATE))
-   engine->props.preempt_timeout_ms = 0;
+   engine->props.preempt_timeout_ms = 
CONFIG_DRM_I915_PREEMPT_TIMEOUT_COMPUTE;
  
  	/* Cap properties according to any system limits */

  #define CLAMP_PROP(field) \


Re: [Intel-gfx] [PATCH v4 3/4] drm/i915: Make the heartbeat play nice with long pre-emption timeouts

2022-09-29 Thread Tvrtko Ursulin



On 29/09/2022 03:18, john.c.harri...@intel.com wrote:

From: John Harrison 

Compute workloads are inherently not pre-emptible for long periods on
current hardware. As a workaround for this, the pre-emption timeout
for compute capable engines was disabled. This is undesirable with GuC
submission as it prevents per engine reset of hung contexts. Hence the
next patch will re-enable the timeout but bumped up by an order of
magnitude.

However, the heartbeat might not respect that. Depending upon current
activity, a pre-emption to the heartbeat pulse might not even be
attempted until the last heartbeat period. Which means that only one
period is granted for the pre-emption to occur. With the aforesaid
bump, the pre-emption timeout could be significantly larger than this
heartbeat period.

So adjust the heartbeat code to take the pre-emption timeout into
account. When it reaches the final (high priority) period, it now
ensures the delay before hitting reset is bigger than the pre-emption
timeout.

v2: Fix for selftests which adjust the heartbeat period manually.

Signed-off-by: John Harrison 
---
  .../gpu/drm/i915/gt/intel_engine_heartbeat.c  | 19 +++
  1 file changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c 
b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index a3698f611f457..823a790a0e2ae 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -22,9 +22,28 @@
  
  static bool next_heartbeat(struct intel_engine_cs *engine)

  {
+   struct i915_request *rq;
long delay;
  
  	delay = READ_ONCE(engine->props.heartbeat_interval_ms);

+
+   rq = engine->heartbeat.systole;
+
+   if (rq && rq->sched.attr.priority >= I915_PRIORITY_BARRIER &&
+   delay == engine->defaults.heartbeat_interval_ms) {


Maybe I forgot but what is the reason for the check against the default 
heartbeat interval?


Regards,

Tvrtko


+   long longer;
+
+   /*
+* The final try is at the highest priority possible. Up until 
now
+* a pre-emption might not even have been attempted. So make 
sure
+* this last attempt allows enough time for a pre-emption to 
occur.
+*/
+   longer = READ_ONCE(engine->props.preempt_timeout_ms) * 2;
+   longer = intel_clamp_heartbeat_interval_ms(engine, longer);
+   if (longer > delay)
+   delay = longer;
+   }
+
if (!delay)
return false;
  


Re: [Intel-gfx] [PATCH v4 1/4] drm/i915/guc: Limit scheduling properties to avoid overflow

2022-09-29 Thread Tvrtko Ursulin



On 29/09/2022 03:18, john.c.harri...@intel.com wrote:

From: John Harrison 

GuC converts the pre-emption timeout and timeslice quantum values into
clock ticks internally. That significantly reduces the point of 32bit
overflow. On current platforms, worst case scenario is approximately
110 seconds. Rather than allowing the user to set higher values and
then get confused by early timeouts, add limits when setting these
values.

v2: Add helper functions for clamping (review feedback from Tvrtko).
v3: Add a bunch of BUG_ON range checks in addition to the checks
already in the clamping functions (Tvrtko)

Signed-off-by: John Harrison 
Reviewed-by: Daniele Ceraolo Spurio  (v1)


Acked-by: Tvrtko Ursulin 

Regards,

Tvrtko


---
  drivers/gpu/drm/i915/gt/intel_engine.h|  6 ++
  drivers/gpu/drm/i915/gt/intel_engine_cs.c | 69 +++
  drivers/gpu/drm/i915/gt/sysfs_engines.c   | 25 ---
  drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   | 21 ++
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  8 +++
  5 files changed, 119 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h 
b/drivers/gpu/drm/i915/gt/intel_engine.h
index 04e435bce79bd..cbc8b857d5f7a 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -348,4 +348,10 @@ intel_engine_get_hung_context(struct intel_engine_cs 
*engine)
return engine->hung_ce;
  }
  
+u64 intel_clamp_heartbeat_interval_ms(struct intel_engine_cs *engine, u64 value);

+u64 intel_clamp_max_busywait_duration_ns(struct intel_engine_cs *engine, u64 
value);
+u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value);
+u64 intel_clamp_stop_timeout_ms(struct intel_engine_cs *engine, u64 value);
+u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 
value);
+
  #endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 2ddcad497fa30..8f16955f0821e 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -512,6 +512,26 @@ static int intel_engine_setup(struct intel_gt *gt, enum 
intel_engine_id id,
engine->flags |= I915_ENGINE_HAS_EU_PRIORITY;
}
  
+	/* Cap properties according to any system limits */

+#define CLAMP_PROP(field) \
+   do { \
+   u64 clamp = intel_clamp_##field(engine, engine->props.field); \
+   if (clamp != engine->props.field) { \
+   drm_notice(>i915->drm, \
+  "Warning, clamping %s to %lld to prevent 
overflow\n", \
+  #field, clamp); \
+   engine->props.field = clamp; \
+   } \
+   } while (0)
+
+   CLAMP_PROP(heartbeat_interval_ms);
+   CLAMP_PROP(max_busywait_duration_ns);
+   CLAMP_PROP(preempt_timeout_ms);
+   CLAMP_PROP(stop_timeout_ms);
+   CLAMP_PROP(timeslice_duration_ms);
+
+#undef CLAMP_PROP
+
engine->defaults = engine->props; /* never to change again */
  
  	engine->context_size = intel_engine_context_size(gt, engine->class);

@@ -534,6 +554,55 @@ static int intel_engine_setup(struct intel_gt *gt, enum 
intel_engine_id id,
return 0;
  }
  
+u64 intel_clamp_heartbeat_interval_ms(struct intel_engine_cs *engine, u64 value)

+{
+   value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
+
+   return value;
+}
+
+u64 intel_clamp_max_busywait_duration_ns(struct intel_engine_cs *engine, u64 
value)
+{
+   value = min(value, jiffies_to_nsecs(2));
+
+   return value;
+}
+
+u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value)
+{
+   /*
+* NB: The GuC API only supports 32bit values. However, the limit is 
further
+* reduced due to internal calculations which would otherwise overflow.
+*/
+   if (intel_guc_submission_is_wanted(>gt->uc.guc))
+   value = min_t(u64, value, guc_policy_max_preempt_timeout_ms());
+
+   value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
+
+   return value;
+}
+
+u64 intel_clamp_stop_timeout_ms(struct intel_engine_cs *engine, u64 value)
+{
+   value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
+
+   return value;
+}
+
+u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 
value)
+{
+   /*
+* NB: The GuC API only supports 32bit values. However, the limit is 
further
+* reduced due to internal calculations which would otherwise overflow.
+*/
+   if (intel_guc_submission_is_wanted(>gt->uc.guc))
+   value = min_t(u64, value, guc_policy_max_exec_quantum_ms());
+
+   value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
+
+   return value;
+}
+
  static void __setup_engine_capabili

Re: [Intel-gfx] [PATCH 04/16] drm/i915/vm_bind: Add support to create persistent vma

2022-09-28 Thread Tvrtko Ursulin



On 28/09/2022 07:19, Niranjana Vishwanathapura wrote:

Add i915_vma_instance_persistent() to create persistent vmas.
Persistent vmas will use i915_gtt_view to support partial binding.

vma_lookup is tied to segment of the object instead of section
of VA space. Hence, it do not support aliasing. ie., multiple
mappings (at different VA) point to the same gtt_view of object.
Skip vma_lookup for persistent vmas to support aliasing.

Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Andi Shyti 
---
  drivers/gpu/drm/i915/i915_vma.c   | 39 ---
  drivers/gpu/drm/i915/i915_vma.h   | 16 +--
  drivers/gpu/drm/i915/i915_vma_types.h |  7 +
  3 files changed, 57 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index f17c09ead7d7..5839e1f55f00 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -109,7 +109,8 @@ static void __i915_vma_retire(struct i915_active *ref)
  static struct i915_vma *
  vma_create(struct drm_i915_gem_object *obj,
   struct i915_address_space *vm,
-  const struct i915_gtt_view *view)
+  const struct i915_gtt_view *view,
+  bool skip_lookup_cache)
  {
struct i915_vma *pos = ERR_PTR(-E2BIG);
struct i915_vma *vma;
@@ -196,6 +197,9 @@ vma_create(struct drm_i915_gem_object *obj,
__set_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(vma));
}
  
+	if (skip_lookup_cache)

+   goto skip_rb_insert;
+
rb = NULL;
p = >vma.tree.rb_node;
while (*p) {
@@ -220,6 +224,7 @@ vma_create(struct drm_i915_gem_object *obj,
rb_link_node(>obj_node, rb, p);
rb_insert_color(>obj_node, >vma.tree);
  
+skip_rb_insert:

if (i915_vma_is_ggtt(vma))
/*
 * We put the GGTT vma at the start of the vma-list, followed
@@ -299,7 +304,34 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
  
  	/* vma_create() will resolve the race if another creates the vma */

if (unlikely(!vma))
-   vma = vma_create(obj, vm, view);
+   vma = vma_create(obj, vm, view, false);
+
+   GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view));
+   return vma;
+}
+
+/**
+ * i915_vma_create_persistent - create a persistent VMA
+ * @obj: parent  drm_i915_gem_object to be mapped
+ * @vm: address space in which the mapping is located
+ * @view: additional mapping requirements
+ *
+ * Creates a persistent vma.
+ *
+ * Returns the vma, or an error pointer.
+ */
+struct i915_vma *
+i915_vma_create_persistent(struct drm_i915_gem_object *obj,
+  struct i915_address_space *vm,
+  const struct i915_gtt_view *view)
+{
+   struct i915_vma *vma;
+
+   GEM_BUG_ON(!kref_read(>ref));
+
+   vma = vma_create(obj, vm, view, true);
+   if (!IS_ERR(vma))
+   i915_vma_set_persistent(vma);
  
  	GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view));

return vma;
@@ -1666,7 +1698,8 @@ static void release_references(struct i915_vma *vma, 
struct intel_gt *gt,
  
  	spin_lock(>vma.lock);

list_del(>obj_link);
-   if (!RB_EMPTY_NODE(>obj_node))
+   if (!i915_vma_is_persistent(vma) &&


Thinking out loud - maybe you don't need the extra condition? But it is 
good for self-documenting purposes in any case.



+   !RB_EMPTY_NODE(>obj_node))
rb_erase(>obj_node, >vma.tree);
  
  	spin_unlock(>vma.lock);

diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index aecd9c64486b..51e712de380a 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -44,6 +44,10 @@ struct i915_vma *
  i915_vma_instance(struct drm_i915_gem_object *obj,
  struct i915_address_space *vm,
  const struct i915_gtt_view *view);
+struct i915_vma *
+i915_vma_create_persistent(struct drm_i915_gem_object *obj,
+  struct i915_address_space *vm,
+  const struct i915_gtt_view *view);
  
  void i915_vma_unpin_and_release(struct i915_vma **p_vma, unsigned int flags);

  #define I915_VMA_RELEASE_MAP BIT(0)
@@ -138,6 +142,16 @@ static inline u32 i915_ggtt_pin_bias(struct i915_vma *vma)
return i915_vm_to_ggtt(vma->vm)->pin_bias;
  }
  
+static inline bool i915_vma_is_persistent(const struct i915_vma *vma)

+{
+   return test_bit(I915_VMA_PERSISTENT_BIT, __i915_vma_flags(vma));
+}
+
+static inline void i915_vma_set_persistent(struct i915_vma *vma)
+{
+   set_bit(I915_VMA_PERSISTENT_BIT, __i915_vma_flags(vma));
+}
+
  static inline struct i915_vma *i915_vma_get(struct i915_vma *vma)
  {
i915_gem_object_get(vma->obj);
@@ -164,8 +178,6 @@ i915_vma_compare(struct i915_vma *vma,
  {
ptrdiff_t cmp;
  
-	GEM_BUG_ON(view && !i915_is_ggtt_or_dpt(vm));

Or explicitly add persistent?

Regards,

Tvrtko


-
   

Re: [Intel-gfx] [RFC v4 13/14] drm/i915/vm_bind: Skip vma_lookup for persistent vmas

2022-09-27 Thread Tvrtko Ursulin



On 26/09/2022 18:09, Niranjana Vishwanathapura wrote:

On Mon, Sep 26, 2022 at 05:26:12PM +0100, Tvrtko Ursulin wrote:


On 24/09/2022 05:30, Niranjana Vishwanathapura wrote:

On Fri, Sep 23, 2022 at 09:40:20AM +0100, Tvrtko Ursulin wrote:


On 21/09/2022 08:09, Niranjana Vishwanathapura wrote:

vma_lookup is tied to segment of the object instead of section


Can be, but not only that. It would be more accurate to say it is 
based of gtt views.


Yah, but new code is also based on gtt views, the only difference
is that now there can be multiple mappings (at different VAs)
to the same gtt_view of the object.




of VA space. Hence, it do not support aliasing (ie., multiple
bindings to the same section of the object).
Skip vma_lookup for persistent vmas as it supports aliasing.


What's broken without this patch? If something is, should it go 
somewhere earlier in the series? If so should be mentioned in the 
commit message.


Or is it just a performance optimisation to skip unused tracking? If 
so should also be mentioned in the commit message.




No, it is not a performance optimization.
The vma_lookup is based on the fact that there can be only one mapping
for a given gtt_view of the object.
So, it was looking for gtt_view to find the mapping.

But now, as I mentioned above, there can be multiple mappings for a
given gtt_view of the object. Hence the vma_lookup method won't work
here. Hence, it is being skipped for persistent vmas.


Right, so in that case isn't this patch too late in the series? 
Granted you only allow _userspace_ to use vm bind in 14/14, but the 
kernel infrastructure is there and if there was a selftest it would be 
able to fail without this patch, no?




Yes it is incorrect patch ordering. I am fixing it by moving this patch
to early in the series and adding a new i915_vma_create_persistent()
function and avoid touching i915_vma_instance() everywhere (as you
suggested).




--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -110,7 +110,8 @@ static void __i915_vma_retire(struct 
i915_active *ref)

 static struct i915_vma *
 vma_create(struct drm_i915_gem_object *obj,
    struct i915_address_space *vm,
-   const struct i915_gtt_view *view)
+   const struct i915_gtt_view *view,
+   bool persistent)
 {
 struct i915_vma *pos = ERR_PTR(-E2BIG);
 struct i915_vma *vma;
@@ -197,6 +198,9 @@ vma_create(struct drm_i915_gem_object *obj,
 __set_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(vma));
 }
+    if (persistent)
+    goto skip_rb_insert;


Oh so you don't use the gtt_view's fully at all. I now have 
reservations whether that was the right approach. Since you are not 
using the existing rb tree tracking I mean..


You know if a vma is persistent right? So you could have just added 
special case for persistent vmas to __i915_vma_get_pages and still 
call intel_partial_pages from there. Maybe union over struct 
i915_gtt_view in i915_vma for either the view or struct 
intel_partial_info for persistent ones.




We are using the gtt_view fully in this patch for persistent vmas.


I guess yours and mine definition of fully are different. :)


But as mentioned above, now we have support multiple mappings
for the same gtt_view of the object. For this, the current
vma_lookup() falls short. So, we are skipping it.


I get it - but then, having only now noticed how it will be used, I am 
less convinced touching the ggtt_view code was the right approach.


What about what I proposed above? That you just add code to 
__i915_vma_get_pages, which in case of a persistent VMA would call 
intel_partial_pages from there.


If that works I think it's cleaner and we'd just revert the ggtt_view 
to gtt_view rename.




I don't think that is any cleaner. We need to store the partial view
information somewhere for the persistent vmas as well. Why not use
the existing gtt_view for that instead of a new data structure?
In fact long back I had such an implementation and it was looking
odd and was suggested to use the existing infrastructure (gtt_view).

Besides, I think the current i915_vma_lookup method is no longer valid.
(Ever since we had softpinning, lookup should have be based on the VA
and not the vma's view of the object).


As a side note I don't think soft pinning was a problem. That did not establish 
a partial VMA concept, nor had any interaction view ggtt_views. It was still 
one obj - one vma per vm relationship.

But okay, it is okay to do it like this. I think when you change to separate 
create/lookup entry points for persistent it will become much cleaner. I do acknowledge 
you have to "hide" them from normal lookup to avoid confusing the legacy code 
paths.

One more note - I think patch 6 should be before or together with patch 4. In 
general infrastructure to handle vm bind should all be in place before code 
starts using it.

Regards,

Tvrtko


Re: [PATCH] drm/i915: Stop using flush_scheduled_work on driver remove

2022-09-26 Thread Tvrtko Ursulin



On 23/09/2022 17:16, Ville Syrjälä wrote:

On Fri, Sep 23, 2022 at 03:29:34PM +0100, Tvrtko Ursulin wrote:

From: Tvrtko Ursulin 

Kernel is trying to eliminate callers of flush_scheduled_work so lets
try to accommodate.

We currently call it from intel_modeset_driver_remove_noirq on the driver
remove path but the comment next to it does not tell me what exact work it
wants to flush.

I can spot three (or four) works using the system_wq:

   ..hotplug.reenable_work
   ..hotplug.hotplug_work


Looks like we at least try to shoot those down via
intel_irq_uninstall()
  ->intel_hpd_cancel_work()
   ->cancel_delayed_work_sync()

But I'm not sure how broken the hpd disable path is here.
I know hpd cancel vs. irq disable has some known ordering
issues during suspend at least, some of which I think may
have gotten fixed recently. But hpd cancel is still a bit
of a mess in general.

Here we at least do cancel all the hpd works after irqs
have been disabled, so I don't think any further flushing
should help with whatever races we have left in there.


   ..psr.dc3co_work


I think the whole dc3co thing should be disabled atm,
so nothing should ever schedule this. We should
probably garbage collect the whole thing...


   ..crtc->drrs.work


That one should have been killed in
intel_display_driver_unregister()
  ->drm_atomic_helper_shutdown()
   ->...
->intel_drrs_deactivate()
 ->cancel_delayed_work_sync()


So if I replace it with intel_hpd_cancel_work() that appears would handle
the first two. What about the other two?


Other stuff that comes to mind is the pps vdd_off work.
But looks like that should get taken down in the
encoder->destroy() hook at the latest (via
intel_mode_config_cleanup()).

psr.work at least has a cancel_work_sync() in intel_psr_disable(),
so should hopefully get killed the same way as drrs.

opregion.asle_work seems to get cancelled from the unregister path.

The ones that look broken to me are dmc.work and fbc underrun_work.


Right, so I missed some and things are a bit more complicated. Okay to 
leave this with you, even if on a backlog?


Regards,

Tvrtko





Signed-off-by: Tvrtko Ursulin 
Cc: Jani Nikula 
Cc: Ville Syrjälä 
Cc: Tetsuo Handa 
---
I am clueless about the display paths and only send this because Jani
convinced me to send a patch to kick off the discussion. No expectations
whatsoever this is correct or complete.
---
  drivers/gpu/drm/i915/display/intel_display.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c 
b/drivers/gpu/drm/i915/display/intel_display.c
index 2d0018ae34b1..0eb72530a003 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -8980,7 +8980,7 @@ void intel_modeset_driver_remove_noirq(struct 
drm_i915_private *i915)
intel_unregister_dsm_handler();
  
  	/* flush any delayed tasks or pending work */

-   flush_scheduled_work();
+   intel_hpd_cancel_work(i915);
  
  	intel_hdcp_component_fini(i915);
  
--

2.34.1




Re: [Intel-gfx] [RFC v4 13/14] drm/i915/vm_bind: Skip vma_lookup for persistent vmas

2022-09-26 Thread Tvrtko Ursulin



On 24/09/2022 05:30, Niranjana Vishwanathapura wrote:

On Fri, Sep 23, 2022 at 09:40:20AM +0100, Tvrtko Ursulin wrote:


On 21/09/2022 08:09, Niranjana Vishwanathapura wrote:

vma_lookup is tied to segment of the object instead of section


Can be, but not only that. It would be more accurate to say it is 
based of gtt views.


Yah, but new code is also based on gtt views, the only difference
is that now there can be multiple mappings (at different VAs)
to the same gtt_view of the object.




of VA space. Hence, it do not support aliasing (ie., multiple
bindings to the same section of the object).
Skip vma_lookup for persistent vmas as it supports aliasing.


What's broken without this patch? If something is, should it go 
somewhere earlier in the series? If so should be mentioned in the 
commit message.


Or is it just a performance optimisation to skip unused tracking? If 
so should also be mentioned in the commit message.




No, it is not a performance optimization.
The vma_lookup is based on the fact that there can be only one mapping
for a given gtt_view of the object.
So, it was looking for gtt_view to find the mapping.

But now, as I mentioned above, there can be multiple mappings for a
given gtt_view of the object. Hence the vma_lookup method won't work
here. Hence, it is being skipped for persistent vmas.


Right, so in that case isn't this patch too late in the series? Granted 
you only allow _userspace_ to use vm bind in 14/14, but the kernel 
infrastructure is there and if there was a selftest it would be able to 
fail without this patch, no?


Signed-off-by: Niranjana Vishwanathapura 


Signed-off-by: Andi Shyti 
---
 drivers/gpu/drm/i915/display/intel_fb_pin.c   |  2 +-
 .../drm/i915/display/intel_plane_initial.c    |  2 +-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  4 +-
 .../drm/i915/gem/i915_gem_vm_bind_object.c    |  2 +-
 .../gpu/drm/i915/gem/selftests/huge_pages.c   | 16 +++
 .../i915/gem/selftests/i915_gem_client_blt.c  |  2 +-
 .../drm/i915/gem/selftests/i915_gem_context.c | 12 ++---
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c    |  6 ++-
 .../drm/i915/gem/selftests/igt_gem_utils.c    |  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  |  2 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c |  2 +-
 drivers/gpu/drm/i915/gt/intel_gt.c    |  2 +-
 drivers/gpu/drm/i915/gt/intel_gtt.c   |  2 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c   |  4 +-
 drivers/gpu/drm/i915/gt/intel_renderstate.c   |  2 +-
 drivers/gpu/drm/i915/gt/intel_ring.c  |  2 +-
 .../gpu/drm/i915/gt/intel_ring_submission.c   |  4 +-
 drivers/gpu/drm/i915/gt/intel_timeline.c  |  2 +-
 drivers/gpu/drm/i915/gt/mock_engine.c |  2 +-
 drivers/gpu/drm/i915/gt/selftest_engine_cs.c  |  4 +-
 drivers/gpu/drm/i915/gt/selftest_execlists.c  | 16 +++
 drivers/gpu/drm/i915/gt/selftest_hangcheck.c  |  6 +--
 drivers/gpu/drm/i915/gt/selftest_lrc.c    |  2 +-
 .../drm/i915/gt/selftest_ring_submission.c    |  2 +-
 drivers/gpu/drm/i915/gt/selftest_rps.c    |  2 +-
 .../gpu/drm/i915/gt/selftest_workarounds.c    |  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc.c    |  2 +-
 drivers/gpu/drm/i915/i915_gem.c   |  2 +-
 drivers/gpu/drm/i915/i915_perf.c  |  2 +-
 drivers/gpu/drm/i915/i915_vma.c   | 26 +++
 drivers/gpu/drm/i915/i915_vma.h   |  3 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 44 +--
 drivers/gpu/drm/i915/selftests/i915_request.c |  4 +-
 drivers/gpu/drm/i915/selftests/i915_vma.c |  2 +-
 drivers/gpu/drm/i915/selftests/igt_spinner.c  |  2 +-
 .../drm/i915/selftests/intel_memory_region.c  |  2 +-
 37 files changed, 106 insertions(+), 93 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_fb_pin.c 
b/drivers/gpu/drm/i915/display/intel_fb_pin.c

index c86e5d4ee016..5a718b247bb3 100644
--- a/drivers/gpu/drm/i915/display/intel_fb_pin.c
+++ b/drivers/gpu/drm/i915/display/intel_fb_pin.c
@@ -47,7 +47,7 @@ intel_pin_fb_obj_dpt(struct drm_framebuffer *fb,
 goto err;
 }
-    vma = i915_vma_instance(obj, vm, view);
+    vma = i915_vma_instance(obj, vm, view, false);


Hey why are you touching all the legacy paths? >:P


 if (IS_ERR(vma))
 goto err;
diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c 
b/drivers/gpu/drm/i915/display/intel_plane_initial.c

index 76be796df255..7667e2faa3fb 100644
--- a/drivers/gpu/drm/i915/display/intel_plane_initial.c
+++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c
@@ -136,7 +136,7 @@ initial_plane_vma(struct drm_i915_private *i915,
 goto err_obj;
 }
-    vma = i915_vma_instance(obj, _gt(i915)->ggtt->vm, NULL);
+    vma = i915_vma_instance(obj, _gt(i915)->ggtt->vm, NULL, false);
 if (IS_ERR(vma))
 goto err_obj;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_exec

Re: [Intel-gfx] [PATCH 1/7] drm/i915/huc: only load HuC on GTs that have VCS engines

2022-09-26 Thread Tvrtko Ursulin



On 23/09/2022 16:41, Ceraolo Spurio, Daniele wrote:

On 9/23/2022 3:53 AM, Tvrtko Ursulin wrote:


On 22/09/2022 23:11, Daniele Ceraolo Spurio wrote:

On MTL the primary GT doesn't have any media capabilities, so no video
engines and no HuC. We must therefore skip the HuC fetch and load on
that specific case. Given that other multi-GT platforms might have HuC
on the primary GT, we can't just check for that and it is easier to
instead check for the lack of VCS engines.

Based on code from Aravind Iddamsetty

Signed-off-by: Daniele Ceraolo Spurio 
Cc: Aravind Iddamsetty 
Cc: John Harrison 
Cc: Alan Previn 
---
  drivers/gpu/drm/i915/gt/uc/intel_huc.c | 21 +
  drivers/gpu/drm/i915/i915_drv.h    |  9 ++---
  2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_huc.c

index 3bb8838e325a..d4e2b252f16c 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -42,12 +42,33 @@
   * HuC-specific commands.
   */
  +static bool vcs_supported(struct intel_gt *gt)
+{
+    intel_engine_mask_t mask = gt->info.engine_mask;
+
+    /*
+ * we can reach here from i915_driver_early_probe for primary
+ * GT with it being not fully setup hence fall back to the 
device info's

+ * engine mask
+ */
+    if (!mask && gt_is_root(gt))
+    mask = RUNTIME_INFO(gt->i915)->platform_engine_mask;


Is it possible for all instances to be fused off? Wondering if the 
function shouldn't just use platform_engine_mask.


The spec says that there is always going to be at least 1 VCS (bspec 
55417 in case you want to double-check). I don't see that changing in 
the future, because what's the point of having a media GT if you don't 
have any enabled VCS engines on it?


That was my gut feeling as well, however..

Also, platform_engine_mask only contains the entries of the primary GT, 
for the other GTs we'd have to navigate the array in the device info 
structure and I don't think we want to do that from here when we've 
already copied the mask inside gt->info.engine_mask.


... this is very annoying. Because function is now a bit dodgy, no? 
Maybe gets the caller a real answer for a _specific_ gt, or maybe gets a 
fake-ish answer for a root gt. Or if not a root gt and called too early 
maybe it returns a false zero?


Hm would GEM_BUG_ON(!mask && !gt_is_root(gt)) be correct?

And not even bother to implement is as fallback?

if (gt_is_root)
return platform_mask;
else
return gt_mask;

Would that be clearer? Coupled with the comment from the patch, maybe 
expanded with the statement that if there are some vcs engines, at least 
one must remain post fusing?


Regards,

Tvrtko


+
+    return __ENGINE_INSTANCES_MASK(mask, VCS0, I915_MAX_VCS);
+}
+
  void intel_huc_init_early(struct intel_huc *huc)
  {
  struct drm_i915_private *i915 = huc_to_gt(huc)->i915;
+    struct intel_gt *gt = huc_to_gt(huc);
    intel_uc_fw_init_early(>fw, INTEL_UC_FW_TYPE_HUC);
  +    if (!vcs_supported(gt)) {
+    intel_uc_fw_change_status(>fw, 
INTEL_UC_FIRMWARE_NOT_SUPPORTED);

+    return;
+    }
+
  if (GRAPHICS_VER(i915) >= 11) {
  huc->status.reg = GEN11_HUC_KERNEL_LOAD_INFO;
  huc->status.mask = HUC_LOAD_SUCCESSFUL;
diff --git a/drivers/gpu/drm/i915/i915_drv.h 
b/drivers/gpu/drm/i915/i915_drv.h

index 134fc1621821..8ca575202e5d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -777,12 +777,15 @@ IS_SUBPLATFORM(const struct drm_i915_private 
*i915,

  #define __HAS_ENGINE(engine_mask, id) ((engine_mask) & BIT(id))
  #define HAS_ENGINE(gt, id) __HAS_ENGINE((gt)->info.engine_mask, id)
  -#define ENGINE_INSTANCES_MASK(gt, first, count) ({    \
+#define __ENGINE_INSTANCES_MASK(mask, first, count) ({    \
  unsigned int first__ = (first);    \
  unsigned int count__ = (count);    \
-    ((gt)->info.engine_mask &    \
- GENMASK(first__ + count__ - 1, first__)) >> first__;    \
+    ((mask) & GENMASK(first__ + count__ - 1, first__)) >> first__;    \
  })
+
+#define ENGINE_INSTANCES_MASK(gt, first, count) \
+    __ENGINE_INSTANCES_MASK((gt)->info.engine_mask, first, count)
+
  #define RCS_MASK(gt) \
  ENGINE_INSTANCES_MASK(gt, RCS0, I915_MAX_RCS)
  #define BCS_MASK(gt) \




[PATCH] drm/i915: Stop using flush_scheduled_work on driver remove

2022-09-23 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

Kernel is trying to eliminate callers of flush_scheduled_work so lets
try to accommodate.

We currently call it from intel_modeset_driver_remove_noirq on the driver
remove path but the comment next to it does not tell me what exact work it
wants to flush.

I can spot three (or four) works using the system_wq:

  ..hotplug.reenable_work
  ..hotplug.hotplug_work
  ..psr.dc3co_work
  ..crtc->drrs.work

So if I replace it with intel_hpd_cancel_work() that appears would handle
the first two. What about the other two?

Signed-off-by: Tvrtko Ursulin 
Cc: Jani Nikula 
Cc: Ville Syrjälä 
Cc: Tetsuo Handa 
---
I am clueless about the display paths and only send this because Jani
convinced me to send a patch to kick off the discussion. No expectations
whatsoever this is correct or complete.
---
 drivers/gpu/drm/i915/display/intel_display.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c 
b/drivers/gpu/drm/i915/display/intel_display.c
index 2d0018ae34b1..0eb72530a003 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -8980,7 +8980,7 @@ void intel_modeset_driver_remove_noirq(struct 
drm_i915_private *i915)
intel_unregister_dsm_handler();
 
/* flush any delayed tasks or pending work */
-   flush_scheduled_work();
+   intel_hpd_cancel_work(i915);
 
intel_hdcp_component_fini(i915);
 
-- 
2.34.1



Re: [PATCH] drm/i915/selftests: Remove flush_scheduled_work() from live_execlists

2022-09-23 Thread Tvrtko Ursulin



On 23/09/2022 11:32, Das, Nirmoy wrote:

Reviewed-by: Nirmoy Das 


Thanks!

Pushed now. Should land with 6.2.

Regards,

Tvrtko


On 6/30/2022 2:57 PM, Tvrtko Ursulin wrote:

From: Tvrtko Ursulin 

There are ongoing efforts to remove usages of flush_scheduled_work() from
drivers in order to avoid several cases of potentential problems when
flushing is done from certain contexts.

Remove the call from the live_execlists selftest. Its purpose was to be
thorough and sync with the execlists capture state handling, but that is
not strictly required for the test to function and can be removed.

Signed-off-by: Tvrtko Ursulin 
Cc: Tetsuo Handa 
---
  drivers/gpu/drm/i915/gt/selftest_execlists.c | 2 --
  1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c 
b/drivers/gpu/drm/i915/gt/selftest_execlists.c

index 09f8cd2d0e2c..e62d089257ae 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -85,8 +85,6 @@ static int wait_for_reset(struct intel_engine_cs 
*engine,

  break;
  } while (time_before(jiffies, timeout));
-    flush_scheduled_work();
-
  if (rq->fence.error != -EIO) {
  pr_err("%s: hanging request %llx:%lld not reset\n",
 engine->name,


Re: [Intel-gfx] [PATCH 1/7] drm/i915/huc: only load HuC on GTs that have VCS engines

2022-09-23 Thread Tvrtko Ursulin



On 22/09/2022 23:11, Daniele Ceraolo Spurio wrote:

On MTL the primary GT doesn't have any media capabilities, so no video
engines and no HuC. We must therefore skip the HuC fetch and load on
that specific case. Given that other multi-GT platforms might have HuC
on the primary GT, we can't just check for that and it is easier to
instead check for the lack of VCS engines.

Based on code from Aravind Iddamsetty

Signed-off-by: Daniele Ceraolo Spurio 
Cc: Aravind Iddamsetty 
Cc: John Harrison 
Cc: Alan Previn 
---
  drivers/gpu/drm/i915/gt/uc/intel_huc.c | 21 +
  drivers/gpu/drm/i915/i915_drv.h|  9 ++---
  2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index 3bb8838e325a..d4e2b252f16c 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -42,12 +42,33 @@
   * HuC-specific commands.
   */
  
+static bool vcs_supported(struct intel_gt *gt)

+{
+   intel_engine_mask_t mask = gt->info.engine_mask;
+
+   /*
+* we can reach here from i915_driver_early_probe for primary
+* GT with it being not fully setup hence fall back to the device info's
+* engine mask
+*/
+   if (!mask && gt_is_root(gt))
+   mask = RUNTIME_INFO(gt->i915)->platform_engine_mask;


Is it possible for all instances to be fused off? Wondering if the 
function shouldn't just use platform_engine_mask.


Regards,

Tvrtko


+
+   return __ENGINE_INSTANCES_MASK(mask, VCS0, I915_MAX_VCS);
+}
+
  void intel_huc_init_early(struct intel_huc *huc)
  {
struct drm_i915_private *i915 = huc_to_gt(huc)->i915;
+   struct intel_gt *gt = huc_to_gt(huc);
  
  	intel_uc_fw_init_early(>fw, INTEL_UC_FW_TYPE_HUC);
  
+	if (!vcs_supported(gt)) {

+   intel_uc_fw_change_status(>fw, 
INTEL_UC_FIRMWARE_NOT_SUPPORTED);
+   return;
+   }
+
if (GRAPHICS_VER(i915) >= 11) {
huc->status.reg = GEN11_HUC_KERNEL_LOAD_INFO;
huc->status.mask = HUC_LOAD_SUCCESSFUL;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 134fc1621821..8ca575202e5d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -777,12 +777,15 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
  #define __HAS_ENGINE(engine_mask, id) ((engine_mask) & BIT(id))
  #define HAS_ENGINE(gt, id) __HAS_ENGINE((gt)->info.engine_mask, id)
  
-#define ENGINE_INSTANCES_MASK(gt, first, count) ({		\

+#define __ENGINE_INSTANCES_MASK(mask, first, count) ({ \
unsigned int first__ = (first); \
unsigned int count__ = (count); \
-   ((gt)->info.engine_mask &   
 \
-GENMASK(first__ + count__ - 1, first__)) >> first__; \
+   ((mask) & GENMASK(first__ + count__ - 1, first__)) >> first__;\
  })
+
+#define ENGINE_INSTANCES_MASK(gt, first, count) \
+   __ENGINE_INSTANCES_MASK((gt)->info.engine_mask, first, count)
+
  #define RCS_MASK(gt) \
ENGINE_INSTANCES_MASK(gt, RCS0, I915_MAX_RCS)
  #define BCS_MASK(gt) \


Re: [Intel-gfx] [RFC v4 13/14] drm/i915/vm_bind: Skip vma_lookup for persistent vmas

2022-09-23 Thread Tvrtko Ursulin



On 21/09/2022 08:09, Niranjana Vishwanathapura wrote:

vma_lookup is tied to segment of the object instead of section


Can be, but not only that. It would be more accurate to say it is based 
of gtt views.



of VA space. Hence, it do not support aliasing (ie., multiple
bindings to the same section of the object).
Skip vma_lookup for persistent vmas as it supports aliasing.


What's broken without this patch? If something is, should it go 
somewhere earlier in the series? If so should be mentioned in the commit 
message.


Or is it just a performance optimisation to skip unused tracking? If so 
should also be mentioned in the commit message.




Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Andi Shyti 
---
  drivers/gpu/drm/i915/display/intel_fb_pin.c   |  2 +-
  .../drm/i915/display/intel_plane_initial.c|  2 +-
  .../gpu/drm/i915/gem/i915_gem_execbuffer.c|  4 +-
  .../drm/i915/gem/i915_gem_vm_bind_object.c|  2 +-
  .../gpu/drm/i915/gem/selftests/huge_pages.c   | 16 +++
  .../i915/gem/selftests/i915_gem_client_blt.c  |  2 +-
  .../drm/i915/gem/selftests/i915_gem_context.c | 12 ++---
  .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
  .../drm/i915/gem/selftests/i915_gem_mman.c|  6 ++-
  .../drm/i915/gem/selftests/igt_gem_utils.c|  2 +-
  drivers/gpu/drm/i915/gt/gen6_ppgtt.c  |  2 +-
  drivers/gpu/drm/i915/gt/intel_engine_cs.c |  2 +-
  drivers/gpu/drm/i915/gt/intel_gt.c|  2 +-
  drivers/gpu/drm/i915/gt/intel_gtt.c   |  2 +-
  drivers/gpu/drm/i915/gt/intel_lrc.c   |  4 +-
  drivers/gpu/drm/i915/gt/intel_renderstate.c   |  2 +-
  drivers/gpu/drm/i915/gt/intel_ring.c  |  2 +-
  .../gpu/drm/i915/gt/intel_ring_submission.c   |  4 +-
  drivers/gpu/drm/i915/gt/intel_timeline.c  |  2 +-
  drivers/gpu/drm/i915/gt/mock_engine.c |  2 +-
  drivers/gpu/drm/i915/gt/selftest_engine_cs.c  |  4 +-
  drivers/gpu/drm/i915/gt/selftest_execlists.c  | 16 +++
  drivers/gpu/drm/i915/gt/selftest_hangcheck.c  |  6 +--
  drivers/gpu/drm/i915/gt/selftest_lrc.c|  2 +-
  .../drm/i915/gt/selftest_ring_submission.c|  2 +-
  drivers/gpu/drm/i915/gt/selftest_rps.c|  2 +-
  .../gpu/drm/i915/gt/selftest_workarounds.c|  4 +-
  drivers/gpu/drm/i915/gt/uc/intel_guc.c|  2 +-
  drivers/gpu/drm/i915/i915_gem.c   |  2 +-
  drivers/gpu/drm/i915/i915_perf.c  |  2 +-
  drivers/gpu/drm/i915/i915_vma.c   | 26 +++
  drivers/gpu/drm/i915/i915_vma.h   |  3 +-
  drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 44 +--
  drivers/gpu/drm/i915/selftests/i915_request.c |  4 +-
  drivers/gpu/drm/i915/selftests/i915_vma.c |  2 +-
  drivers/gpu/drm/i915/selftests/igt_spinner.c  |  2 +-
  .../drm/i915/selftests/intel_memory_region.c  |  2 +-
  37 files changed, 106 insertions(+), 93 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_fb_pin.c 
b/drivers/gpu/drm/i915/display/intel_fb_pin.c
index c86e5d4ee016..5a718b247bb3 100644
--- a/drivers/gpu/drm/i915/display/intel_fb_pin.c
+++ b/drivers/gpu/drm/i915/display/intel_fb_pin.c
@@ -47,7 +47,7 @@ intel_pin_fb_obj_dpt(struct drm_framebuffer *fb,
goto err;
}
  
-	vma = i915_vma_instance(obj, vm, view);

+   vma = i915_vma_instance(obj, vm, view, false);


Hey why are you touching all the legacy paths? >:P


if (IS_ERR(vma))
goto err;
  
diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c b/drivers/gpu/drm/i915/display/intel_plane_initial.c

index 76be796df255..7667e2faa3fb 100644
--- a/drivers/gpu/drm/i915/display/intel_plane_initial.c
+++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c
@@ -136,7 +136,7 @@ initial_plane_vma(struct drm_i915_private *i915,
goto err_obj;
}
  
-	vma = i915_vma_instance(obj, _gt(i915)->ggtt->vm, NULL);

+   vma = i915_vma_instance(obj, _gt(i915)->ggtt->vm, NULL, false);
if (IS_ERR(vma))
goto err_obj;
  
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c

index 363b2a788cdf..0ee43cb601b5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -876,7 +876,7 @@ static struct i915_vma *eb_lookup_vma(struct 
i915_execbuffer *eb, u32 handle)
}
}
  
-		vma = i915_vma_instance(obj, vm, NULL);

+   vma = i915_vma_instance(obj, vm, NULL, false);
if (IS_ERR(vma)) {
i915_gem_object_put(obj);
return vma;
@@ -2208,7 +2208,7 @@ shadow_batch_pin(struct i915_execbuffer *eb,
struct i915_vma *vma;
int err;
  
-	vma = i915_vma_instance(obj, vm, NULL);

+   vma = i915_vma_instance(obj, vm, NULL, false);
if (IS_ERR(vma))
return vma;
  
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c 

Re: [Intel-gfx] [RFC v4 03/14] drm/i915/vm_bind: Expose i915_gem_object_max_page_size()

2022-09-23 Thread Tvrtko Ursulin



On 22/09/2022 17:18, Matthew Auld wrote:

On 22/09/2022 09:09, Tvrtko Ursulin wrote:


On 21/09/2022 19:00, Niranjana Vishwanathapura wrote:

On Wed, Sep 21, 2022 at 10:13:12AM +0100, Tvrtko Ursulin wrote:


On 21/09/2022 08:09, Niranjana Vishwanathapura wrote:

Expose i915_gem_object_max_page_size() function non-static
which will be used by the vm_bind feature.

Signed-off-by: Niranjana Vishwanathapura 


Signed-off-by: Andi Shyti 
---
 drivers/gpu/drm/i915/gem/i915_gem_create.c | 20 +++-
 drivers/gpu/drm/i915/gem/i915_gem_object.h |  2 ++
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c 
b/drivers/gpu/drm/i915/gem/i915_gem_create.c

index 33673fe7ee0a..3b3ab4abb0a3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -11,14 +11,24 @@
 #include "pxp/intel_pxp.h"
 #include "i915_drv.h"
+#include "i915_gem_context.h"


I can't spot that you are adding any code which would need this? 
I915_GTT_PAGE_SIZE_4K? It is in intel_gtt.h.


This include should have been added in a later patch for calling
i915_gem_vm_lookup(). But got added here while patch refactoring.
Will fix.




 #include "i915_gem_create.h"
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
-static u32 object_max_page_size(struct intel_memory_region 
**placements,

-    unsigned int n_placements)
+/**
+ * i915_gem_object_max_page_size() - max of min_page_size of the 
regions

+ * @placements:  list of regions
+ * @n_placements: number of the placements
+ *
+ * Calculates the max of the min_page_size of a list of placements 
passed in.

+ *
+ * Return: max of the min_page_size
+ */
+u32 i915_gem_object_max_page_size(struct intel_memory_region 
**placements,

+  unsigned int n_placements)
 {
-    u32 max_page_size = 0;
+    u32 max_page_size = I915_GTT_PAGE_SIZE_4K;
 int i;
 for (i = 0; i < n_placements; i++) {
@@ -28,7 +38,6 @@ static u32 object_max_page_size(struct 
intel_memory_region **placements,

 max_page_size = max_t(u32, max_page_size, mr->min_page_size);
 }
-    GEM_BUG_ON(!max_page_size);
 return max_page_size;
 }
@@ -99,7 +108,8 @@ __i915_gem_object_create_user_ext(struct 
drm_i915_private *i915, u64 size,

 i915_gem_flush_free_objects(i915);
-    size = round_up(size, object_max_page_size(placements, 
n_placements));

+    size = round_up(size, i915_gem_object_max_page_size(placements,
+    n_placements));
 if (size == 0)
 return ERR_PTR(-EINVAL);


Because of the changes above this path is now unreachable. I suppose 
it was meant to tell the user "you have supplied no placements"? But 
then GEM_BUG_ON (which you remove) used to be wrong.




Yah, looks like an existing problem. May be this "size == 0" check
should have been made before we do the round_up()? ie., check input 
'size'

paramter is not 0?
I think for now, I will remove this check as it was unreachable anyhow.


Hm that's true as well. i915_gem_create_ext_ioctl ensures at least one 
placement and internal callers do as well.


To be safe, instead of removing maybe move to before "size = " and 
change to "if (GEM_WARN_ON(n_placements == 0))"? Not sure.. Matt any 
thoughts here given the changes in this patch?


The check is also to reject a zero sized object with args->size = 0, i.e 
round_up(0, PAGE_SIZE) == 0. So for sure that is still needed here.


Oh yeah sneaky round up.. Thanks, my bad.

Regards,

Tvrtko


Re: [PATCH] drm/i915: Fix CFI violations in gt_sysfs

2022-09-23 Thread Tvrtko Ursulin



Hi Nathan,

On 22/09/2022 20:51, Nathan Chancellor wrote:

When booting with clang's kernel control flow integrity series [1],
there are numerous violations when accessing the files under
/sys/devices/pci:00/:00:02.0/drm/card0/gt/gt0:

   $ cd /sys/devices/pci:00/:00:02.0/drm/card0/gt/gt0

   $ grep . *
   id:0
   punit_req_freq_mhz:350
   rc6_enable:1
   rc6_residency_ms:214934
   rps_act_freq_mhz:1300
   rps_boost_freq_mhz:1300
   rps_cur_freq_mhz:350
   rps_max_freq_mhz:1300
   rps_min_freq_mhz:350
   rps_RP0_freq_mhz:1300
   rps_RP1_freq_mhz:350
   rps_RPn_freq_mhz:350
   throttle_reason_pl1:0
   throttle_reason_pl2:0
   throttle_reason_pl4:0
   throttle_reason_prochot:0
   throttle_reason_ratl:0
   throttle_reason_status:0
   throttle_reason_thermal:0
   throttle_reason_vr_tdc:0
   throttle_reason_vr_thermalert:0

   $ sudo dmesg &| grep "CFI failure at"


CFI = control flow integrity - adding Andi and Ashutosh as primary 
authors of the code in question on our side to take a look please.


Regards,

Tvrtko


   [  214.595903] CFI failure at kobj_attr_show+0x19/0x30 (target: 
id_show+0x0/0x70 [i915]; expected type: 0xc527b809)
   [  214.596064] CFI failure at kobj_attr_show+0x19/0x30 (target: 
punit_req_freq_mhz_show+0x0/0x40 [i915]; expected type: 0xc527b809)
   [  214.596407] CFI failure at kobj_attr_show+0x19/0x30 (target: 
rc6_enable_show+0x0/0x40 [i915]; expected type: 0xc527b809)
   [  214.596528] CFI failure at kobj_attr_show+0x19/0x30 (target: 
rc6_residency_ms_show+0x0/0x270 [i915]; expected type: 0xc527b809)
   [  214.596682] CFI failure at kobj_attr_show+0x19/0x30 (target: 
act_freq_mhz_show+0x0/0xe0 [i915]; expected type: 0xc527b809)
   [  214.596792] CFI failure at kobj_attr_show+0x19/0x30 (target: 
boost_freq_mhz_show+0x0/0xe0 [i915]; expected type: 0xc527b809)
   [  214.596893] CFI failure at kobj_attr_show+0x19/0x30 (target: 
cur_freq_mhz_show+0x0/0xe0 [i915]; expected type: 0xc527b809)
   [  214.596996] CFI failure at kobj_attr_show+0x19/0x30 (target: 
max_freq_mhz_show+0x0/0xe0 [i915]; expected type: 0xc527b809)
   [  214.597099] CFI failure at kobj_attr_show+0x19/0x30 (target: 
min_freq_mhz_show+0x0/0xe0 [i915]; expected type: 0xc527b809)
   [  214.597198] CFI failure at kobj_attr_show+0x19/0x30 (target: 
RP0_freq_mhz_show+0x0/0xe0 [i915]; expected type: 0xc527b809)
   [  214.597301] CFI failure at kobj_attr_show+0x19/0x30 (target: 
RP1_freq_mhz_show+0x0/0xe0 [i915]; expected type: 0xc527b809)
   [  214.597405] CFI failure at kobj_attr_show+0x19/0x30 (target: 
RPn_freq_mhz_show+0x0/0xe0 [i915]; expected type: 0xc527b809)
   [  214.597538] CFI failure at kobj_attr_show+0x19/0x30 (target: 
throttle_reason_bool_show+0x0/0x50 [i915]; expected type: 0xc527b809)
   [  214.597701] CFI failure at kobj_attr_show+0x19/0x30 (target: 
throttle_reason_bool_show+0x0/0x50 [i915]; expected type: 0xc527b809)
   [  214.597836] CFI failure at kobj_attr_show+0x19/0x30 (target: 
throttle_reason_bool_show+0x0/0x50 [i915]; expected type: 0xc527b809)
   [  214.597952] CFI failure at kobj_attr_show+0x19/0x30 (target: 
throttle_reason_bool_show+0x0/0x50 [i915]; expected type: 0xc527b809)
   [  214.598071] CFI failure at kobj_attr_show+0x19/0x30 (target: 
throttle_reason_bool_show+0x0/0x50 [i915]; expected type: 0xc527b809)
   [  214.598177] CFI failure at kobj_attr_show+0x19/0x30 (target: 
throttle_reason_bool_show+0x0/0x50 [i915]; expected type: 0xc527b809)
   [  214.598307] CFI failure at kobj_attr_show+0x19/0x30 (target: 
throttle_reason_bool_show+0x0/0x50 [i915]; expected type: 0xc527b809)
   [  214.598439] CFI failure at kobj_attr_show+0x19/0x30 (target: 
throttle_reason_bool_show+0x0/0x50 [i915]; expected type: 0xc527b809)
   [  214.598542] CFI failure at kobj_attr_show+0x19/0x30 (target: 
throttle_reason_bool_show+0x0/0x50 [i915]; expected type: 0xc527b809)

With kCFI, indirect calls are validated against their expected type
versus actual type and failures occur when the two types do not match.
The ultimate issue is that these sysfs functions are expecting to be
called via dev_attr_show() but they may also be called via
kobj_attr_show(), as certain files are created under two different
kobjects that have two different sysfs_ops in intel_gt_sysfs_register(),
hence the warnings above. When accessing the gt_ files under
/sys/devices/pci:00/:00:02.0/drm/card0, which are using the same
sysfs functions, there are no violations, meaning the functions are
being called with the proper type.

To make everything work properly, adjust certain functions to match the
type of the ->show() and ->store() members in 'struct kobj_attribute'.
Add a macro to generate functions for that can be called via both
dev_attr_{show,store}() or kobj_attr_{show,store}() so that they can be
called through both kobject locations without violating kCFI and adjust
the attribute groups to account for this.

[1]: https://lore.kernel.org/20220908215504.3686827-1-samitolva...@google.com/

Link: 

Re: [Intel-gfx] [PATCH] drm/i915: Remove unused function parameter

2022-09-22 Thread Tvrtko Ursulin



On 22/09/2022 05:43, Niranjana Vishwanathapura wrote:

The function parameter 'exclude' in funciton
i915_sw_fence_await_reservation() is not used.
Remove it.

Signed-off-by: Niranjana Vishwanathapura 
---
  drivers/gpu/drm/i915/display/intel_atomic_plane.c | 5 ++---
  drivers/gpu/drm/i915/gem/i915_gem_clflush.c   | 2 +-
  drivers/gpu/drm/i915/i915_sw_fence.c  | 1 -
  drivers/gpu/drm/i915/i915_sw_fence.h  | 1 -
  4 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c 
b/drivers/gpu/drm/i915/display/intel_atomic_plane.c
index aaa6708256d5..ecb8d71d36c0 100644
--- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c
@@ -1005,7 +1005,7 @@ intel_prepare_plane_fb(struct drm_plane *_plane,
 */
if (intel_crtc_needs_modeset(crtc_state)) {
ret = 
i915_sw_fence_await_reservation(>commit_ready,
- 
old_obj->base.resv, NULL,
+ 
old_obj->base.resv,
  false, 0,
  GFP_KERNEL);
if (ret < 0)
@@ -1039,8 +1039,7 @@ intel_prepare_plane_fb(struct drm_plane *_plane,
struct dma_fence *fence;
  
  		ret = i915_sw_fence_await_reservation(>commit_ready,

- obj->base.resv, NULL,
- false,
+ obj->base.resv, false,
  
i915_fence_timeout(dev_priv),
  GFP_KERNEL);
if (ret < 0)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c 
b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
index 0512afdd20d8..b3b398fe689c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
@@ -113,7 +113,7 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object 
*obj,
clflush = clflush_work_create(obj);
if (clflush) {
i915_sw_fence_await_reservation(>base.chain,
-   obj->base.resv, NULL, true,
+   obj->base.resv, true,
i915_fence_timeout(i915),
I915_FENCE_GFP);
dma_resv_add_fence(obj->base.resv, >base.dma,
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c 
b/drivers/gpu/drm/i915/i915_sw_fence.c
index 6fc0d1b89690..cc2a8821d22a 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -571,7 +571,6 @@ int __i915_sw_fence_await_dma_fence(struct i915_sw_fence 
*fence,
  
  int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,

struct dma_resv *resv,
-   const struct dma_fence_ops *exclude,
bool write,
unsigned long timeout,
gfp_t gfp)
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h 
b/drivers/gpu/drm/i915/i915_sw_fence.h
index 619fc5a22f0c..f752bfc7c6e1 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.h
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -91,7 +91,6 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
  
  int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,

struct dma_resv *resv,
-   const struct dma_fence_ops *exclude,
bool write,
unsigned long timeout,
    gfp_t gfp);


Reviewed-by: Tvrtko Ursulin 

Regards,

Tvrtko


Re: [Intel-gfx] [RFC v4 08/14] drm/i915/vm_bind: Abstract out common execbuf functions

2022-09-22 Thread Tvrtko Ursulin



On 21/09/2022 19:17, Niranjana Vishwanathapura wrote:

On Wed, Sep 21, 2022 at 11:18:53AM +0100, Tvrtko Ursulin wrote:


On 21/09/2022 08:09, Niranjana Vishwanathapura wrote:

The new execbuf3 ioctl path and the legacy execbuf ioctl
paths have many common functionalities.
Share code between these two paths by abstracting out the
common functionalities into a separate file where possible.


Looks like a good start to me. A couple comments/questions below.

Signed-off-by: Niranjana Vishwanathapura 


---
 drivers/gpu/drm/i915/Makefile |   1 +
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 507 ++---
 .../drm/i915/gem/i915_gem_execbuffer_common.c | 530 ++
 .../drm/i915/gem/i915_gem_execbuffer_common.h |  47 ++
 4 files changed, 612 insertions(+), 473 deletions(-)
 create mode 100644 
drivers/gpu/drm/i915/gem/i915_gem_execbuffer_common.c
 create mode 100644 
drivers/gpu/drm/i915/gem/i915_gem_execbuffer_common.h


diff --git a/drivers/gpu/drm/i915/Makefile 
b/drivers/gpu/drm/i915/Makefile

index 9bf939ef18ea..bf952f478555 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -148,6 +148,7 @@ gem-y += \
 gem/i915_gem_create.o \
 gem/i915_gem_dmabuf.o \
 gem/i915_gem_domain.o \
+    gem/i915_gem_execbuffer_common.o \
 gem/i915_gem_execbuffer.o \
 gem/i915_gem_internal.o \
 gem/i915_gem_object.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c

index 33d989a20227..363b2a788cdf 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -9,8 +9,6 @@
 #include 
 #include 
-#include 
-
 #include "display/intel_frontbuffer.h"
 #include "gem/i915_gem_ioctls.h"
@@ -28,6 +26,7 @@
 #include "i915_file_private.h"
 #include "i915_gem_clflush.h"
 #include "i915_gem_context.h"
+#include "i915_gem_execbuffer_common.h"
 #include "i915_gem_evict.h"
 #include "i915_gem_ioctls.h"
 #include "i915_trace.h"
@@ -235,13 +234,6 @@ enum {
  * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
  */
-struct eb_fence {
-    struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */
-    struct dma_fence *dma_fence;
-    u64 value;
-    struct dma_fence_chain *chain_fence;
-};
-
 struct i915_execbuffer {
 struct drm_i915_private *i915; /** i915 backpointer */
 struct drm_file *file; /** per-file lookup tables and limits */
@@ -2446,164 +2438,29 @@ static const enum intel_engine_id 
user_ring_map[] = {

 [I915_EXEC_VEBOX]    = VECS0
 };
-static struct i915_request *eb_throttle(struct i915_execbuffer *eb, 
struct intel_context *ce)

-{
-    struct intel_ring *ring = ce->ring;
-    struct intel_timeline *tl = ce->timeline;
-    struct i915_request *rq;
-
-    /*
- * Completely unscientific finger-in-the-air estimates for suitable
- * maximum user request size (to avoid blocking) and then backoff.
- */
-    if (intel_ring_update_space(ring) >= PAGE_SIZE)
-    return NULL;
-
-    /*
- * Find a request that after waiting upon, there will be at 
least half

- * the ring available. The hysteresis allows us to compete for the
- * shared ring and should mean that we sleep less often prior to
- * claiming our resources, but not so long that the ring completely
- * drains before we can submit our next request.
- */
-    list_for_each_entry(rq, >requests, link) {
-    if (rq->ring != ring)
-    continue;
-
-    if (__intel_ring_space(rq->postfix,
-   ring->emit, ring->size) > ring->size / 2)
-    break;
-    }
-    if (>link == >requests)
-    return NULL; /* weird, we will check again later for real */
-
-    return i915_request_get(rq);
-}
-
-static int eb_pin_timeline(struct i915_execbuffer *eb, struct 
intel_context *ce,

-   bool throttle)
-{
-    struct intel_timeline *tl;
-    struct i915_request *rq = NULL;
-
-    /*
- * Take a local wakeref for preparing to dispatch the execbuf as
- * we expect to access the hardware fairly frequently in the
- * process, and require the engine to be kept awake between 
accesses.

- * Upon dispatch, we acquire another prolonged wakeref that we hold
- * until the timeline is idle, which in turn releases the wakeref
- * taken on the engine, and the parent device.
- */
-    tl = intel_context_timeline_lock(ce);
-    if (IS_ERR(tl))
-    return PTR_ERR(tl);
-
-    intel_context_enter(ce);
-    if (throttle)
-    rq = eb_throttle(eb, ce);
-    intel_context_timeline_unlock(tl);
-
-    if (rq) {
-    bool nonblock = eb->file->filp->f_flags & O_NONBLOCK;
-    long timeout = nonblock ? 0 : MAX_SCHEDULE_TIMEOUT;
-
-    if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE,
-  

Re: [Intel-gfx] [RFC v4 03/14] drm/i915/vm_bind: Expose i915_gem_object_max_page_size()

2022-09-22 Thread Tvrtko Ursulin



On 21/09/2022 19:00, Niranjana Vishwanathapura wrote:

On Wed, Sep 21, 2022 at 10:13:12AM +0100, Tvrtko Ursulin wrote:


On 21/09/2022 08:09, Niranjana Vishwanathapura wrote:

Expose i915_gem_object_max_page_size() function non-static
which will be used by the vm_bind feature.

Signed-off-by: Niranjana Vishwanathapura 


Signed-off-by: Andi Shyti 
---
 drivers/gpu/drm/i915/gem/i915_gem_create.c | 20 +++-
 drivers/gpu/drm/i915/gem/i915_gem_object.h |  2 ++
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c 
b/drivers/gpu/drm/i915/gem/i915_gem_create.c

index 33673fe7ee0a..3b3ab4abb0a3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -11,14 +11,24 @@
 #include "pxp/intel_pxp.h"
 #include "i915_drv.h"
+#include "i915_gem_context.h"


I can't spot that you are adding any code which would need this? 
I915_GTT_PAGE_SIZE_4K? It is in intel_gtt.h.


This include should have been added in a later patch for calling
i915_gem_vm_lookup(). But got added here while patch refactoring.
Will fix.




 #include "i915_gem_create.h"
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
-static u32 object_max_page_size(struct intel_memory_region 
**placements,

-    unsigned int n_placements)
+/**
+ * i915_gem_object_max_page_size() - max of min_page_size of the 
regions

+ * @placements:  list of regions
+ * @n_placements: number of the placements
+ *
+ * Calculates the max of the min_page_size of a list of placements 
passed in.

+ *
+ * Return: max of the min_page_size
+ */
+u32 i915_gem_object_max_page_size(struct intel_memory_region 
**placements,

+  unsigned int n_placements)
 {
-    u32 max_page_size = 0;
+    u32 max_page_size = I915_GTT_PAGE_SIZE_4K;
 int i;
 for (i = 0; i < n_placements; i++) {
@@ -28,7 +38,6 @@ static u32 object_max_page_size(struct 
intel_memory_region **placements,

 max_page_size = max_t(u32, max_page_size, mr->min_page_size);
 }
-    GEM_BUG_ON(!max_page_size);
 return max_page_size;
 }
@@ -99,7 +108,8 @@ __i915_gem_object_create_user_ext(struct 
drm_i915_private *i915, u64 size,

 i915_gem_flush_free_objects(i915);
-    size = round_up(size, object_max_page_size(placements, 
n_placements));

+    size = round_up(size, i915_gem_object_max_page_size(placements,
+    n_placements));
 if (size == 0)
 return ERR_PTR(-EINVAL);


Because of the changes above this path is now unreachable. I suppose 
it was meant to tell the user "you have supplied no placements"? But 
then GEM_BUG_ON (which you remove) used to be wrong.




Yah, looks like an existing problem. May be this "size == 0" check
should have been made before we do the round_up()? ie., check input 'size'
paramter is not 0?
I think for now, I will remove this check as it was unreachable anyhow.


Hm that's true as well. i915_gem_create_ext_ioctl ensures at least one 
placement and internal callers do as well.


To be safe, instead of removing maybe move to before "size = " and 
change to "if (GEM_WARN_ON(n_placements == 0))"? Not sure.. Matt any 
thoughts here given the changes in this patch?


Regards,

Tvrtko



Niranjana


Regards,

Tvrtko

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h

index 7317d4102955..8c97bddad921 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -47,6 +47,8 @@ static inline bool i915_gem_object_size_2big(u64 size)
 }
 void i915_gem_init__objects(struct drm_i915_private *i915);
+u32 i915_gem_object_max_page_size(struct intel_memory_region 
**placements,

+  unsigned int n_placements);
 void i915_objects_module_exit(void);
 int i915_objects_module_init(void);


Re: [Intel-gfx] [PATCH 3/7] drm/i915/hwmon: Power PL1 limit and TDP setting

2022-09-21 Thread Tvrtko Ursulin



On 21/09/2022 01:02, Dixit, Ashutosh wrote:

On Fri, 16 Sep 2022 08:00:50 -0700, Badal Nilawar wrote:


diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon 
b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
index e2974f928e58..bc061238e35c 100644
--- a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
+++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
@@ -5,3 +5,23 @@ Contact:   dri-devel@lists.freedesktop.org
  Description:  RO. Current Voltage in millivolt.

Only supported for particular Intel i915 graphics platforms.
+
+What:  /sys/devices/.../hwmon/hwmon/power1_max
+Date:  September 2022
+KernelVersion: 6


Maybe we should ask someone but even if we merge this today to drm-tip this
will appear in kernel.org Linus' version only in 6.2. So I think we should
set this as 6.2 on all patches.


Correct, if merged today it will appear in 6.2 so please change to that 
before merging.


As for the date that's harder to predict and I am not really sure how 
best to handle it. Crystal ball predicts February 2023 fwiw so maybe go 
with that for now. Seems less important than the release for me anyway.


Regards,

Tvrtko


Except for this, thanks for making the changes, this is:

Reviewed-by: Ashutosh Dixit 


Re: [Intel-gfx] [RFC v4 08/14] drm/i915/vm_bind: Abstract out common execbuf functions

2022-09-21 Thread Tvrtko Ursulin



On 21/09/2022 08:09, Niranjana Vishwanathapura wrote:

The new execbuf3 ioctl path and the legacy execbuf ioctl
paths have many common functionalities.
Share code between these two paths by abstracting out the
common functionalities into a separate file where possible.


Looks like a good start to me. A couple comments/questions below.


Signed-off-by: Niranjana Vishwanathapura 
---
  drivers/gpu/drm/i915/Makefile |   1 +
  .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 507 ++---
  .../drm/i915/gem/i915_gem_execbuffer_common.c | 530 ++
  .../drm/i915/gem/i915_gem_execbuffer_common.h |  47 ++
  4 files changed, 612 insertions(+), 473 deletions(-)
  create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_execbuffer_common.c
  create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_execbuffer_common.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 9bf939ef18ea..bf952f478555 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -148,6 +148,7 @@ gem-y += \
gem/i915_gem_create.o \
gem/i915_gem_dmabuf.o \
gem/i915_gem_domain.o \
+   gem/i915_gem_execbuffer_common.o \
gem/i915_gem_execbuffer.o \
gem/i915_gem_internal.o \
gem/i915_gem_object.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 33d989a20227..363b2a788cdf 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -9,8 +9,6 @@
  #include 
  #include 
  
-#include 

-
  #include "display/intel_frontbuffer.h"
  
  #include "gem/i915_gem_ioctls.h"

@@ -28,6 +26,7 @@
  #include "i915_file_private.h"
  #include "i915_gem_clflush.h"
  #include "i915_gem_context.h"
+#include "i915_gem_execbuffer_common.h"
  #include "i915_gem_evict.h"
  #include "i915_gem_ioctls.h"
  #include "i915_trace.h"
@@ -235,13 +234,6 @@ enum {
   * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
   */
  
-struct eb_fence {

-   struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */
-   struct dma_fence *dma_fence;
-   u64 value;
-   struct dma_fence_chain *chain_fence;
-};
-
  struct i915_execbuffer {
struct drm_i915_private *i915; /** i915 backpointer */
struct drm_file *file; /** per-file lookup tables and limits */
@@ -2446,164 +2438,29 @@ static const enum intel_engine_id user_ring_map[] = {
[I915_EXEC_VEBOX]   = VECS0
  };
  
-static struct i915_request *eb_throttle(struct i915_execbuffer *eb, struct intel_context *ce)

-{
-   struct intel_ring *ring = ce->ring;
-   struct intel_timeline *tl = ce->timeline;
-   struct i915_request *rq;
-
-   /*
-* Completely unscientific finger-in-the-air estimates for suitable
-* maximum user request size (to avoid blocking) and then backoff.
-*/
-   if (intel_ring_update_space(ring) >= PAGE_SIZE)
-   return NULL;
-
-   /*
-* Find a request that after waiting upon, there will be at least half
-* the ring available. The hysteresis allows us to compete for the
-* shared ring and should mean that we sleep less often prior to
-* claiming our resources, but not so long that the ring completely
-* drains before we can submit our next request.
-*/
-   list_for_each_entry(rq, >requests, link) {
-   if (rq->ring != ring)
-   continue;
-
-   if (__intel_ring_space(rq->postfix,
-  ring->emit, ring->size) > ring->size / 2)
-   break;
-   }
-   if (>link == >requests)
-   return NULL; /* weird, we will check again later for real */
-
-   return i915_request_get(rq);
-}
-
-static int eb_pin_timeline(struct i915_execbuffer *eb, struct intel_context 
*ce,
-  bool throttle)
-{
-   struct intel_timeline *tl;
-   struct i915_request *rq = NULL;
-
-   /*
-* Take a local wakeref for preparing to dispatch the execbuf as
-* we expect to access the hardware fairly frequently in the
-* process, and require the engine to be kept awake between accesses.
-* Upon dispatch, we acquire another prolonged wakeref that we hold
-* until the timeline is idle, which in turn releases the wakeref
-* taken on the engine, and the parent device.
-*/
-   tl = intel_context_timeline_lock(ce);
-   if (IS_ERR(tl))
-   return PTR_ERR(tl);
-
-   intel_context_enter(ce);
-   if (throttle)
-   rq = eb_throttle(eb, ce);
-   intel_context_timeline_unlock(tl);
-
-   if (rq) {
-   bool nonblock = eb->file->filp->f_flags & O_NONBLOCK;
-   long timeout = nonblock ? 0 : MAX_SCHEDULE_TIMEOUT;
-
-   if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE,
-  

Re: [Intel-gfx] [RFC v4 03/14] drm/i915/vm_bind: Expose i915_gem_object_max_page_size()

2022-09-21 Thread Tvrtko Ursulin



On 21/09/2022 08:09, Niranjana Vishwanathapura wrote:

Expose i915_gem_object_max_page_size() function non-static
which will be used by the vm_bind feature.

Signed-off-by: Niranjana Vishwanathapura 
Signed-off-by: Andi Shyti 
---
  drivers/gpu/drm/i915/gem/i915_gem_create.c | 20 +++-
  drivers/gpu/drm/i915/gem/i915_gem_object.h |  2 ++
  2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c 
b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index 33673fe7ee0a..3b3ab4abb0a3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -11,14 +11,24 @@
  #include "pxp/intel_pxp.h"
  
  #include "i915_drv.h"

+#include "i915_gem_context.h"


I can't spot that you are adding any code which would need this? 
I915_GTT_PAGE_SIZE_4K? It is in intel_gtt.h.



  #include "i915_gem_create.h"
  #include "i915_trace.h"
  #include "i915_user_extensions.h"
  
-static u32 object_max_page_size(struct intel_memory_region **placements,

-   unsigned int n_placements)
+/**
+ * i915_gem_object_max_page_size() - max of min_page_size of the regions
+ * @placements:  list of regions
+ * @n_placements: number of the placements
+ *
+ * Calculates the max of the min_page_size of a list of placements passed in.
+ *
+ * Return: max of the min_page_size
+ */
+u32 i915_gem_object_max_page_size(struct intel_memory_region **placements,
+ unsigned int n_placements)
  {
-   u32 max_page_size = 0;
+   u32 max_page_size = I915_GTT_PAGE_SIZE_4K;
int i;
  
  	for (i = 0; i < n_placements; i++) {

@@ -28,7 +38,6 @@ static u32 object_max_page_size(struct intel_memory_region 
**placements,
max_page_size = max_t(u32, max_page_size, mr->min_page_size);
}
  
-	GEM_BUG_ON(!max_page_size);

return max_page_size;
  }
  
@@ -99,7 +108,8 @@ __i915_gem_object_create_user_ext(struct drm_i915_private *i915, u64 size,
  
  	i915_gem_flush_free_objects(i915);
  
-	size = round_up(size, object_max_page_size(placements, n_placements));

+   size = round_up(size, i915_gem_object_max_page_size(placements,
+   n_placements));
if (size == 0)
return ERR_PTR(-EINVAL);


Because of the changes above this path is now unreachable. I suppose it 
was meant to tell the user "you have supplied no placements"? But then 
GEM_BUG_ON (which you remove) used to be wrong.


Regards,

Tvrtko

  
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h

index 7317d4102955..8c97bddad921 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -47,6 +47,8 @@ static inline bool i915_gem_object_size_2big(u64 size)
  }
  
  void i915_gem_init__objects(struct drm_i915_private *i915);

+u32 i915_gem_object_max_page_size(struct intel_memory_region **placements,
+ unsigned int n_placements);
  
  void i915_objects_module_exit(void);

  int i915_objects_module_init(void);


Re: [Intel-gfx] [RFC v4 02/14] drm/i915/vm_bind: Add __i915_sw_fence_await_reservation()

2022-09-21 Thread Tvrtko Ursulin



On 21/09/2022 08:09, Niranjana Vishwanathapura wrote:

Add function __i915_sw_fence_await_reservation() for
asynchronous wait on a dma-resv object with specified
dma_resv_usage. This is required for async vma unbind
with vm_bind.

Signed-off-by: Niranjana Vishwanathapura 
---
  drivers/gpu/drm/i915/i915_sw_fence.c | 25 ++---
  drivers/gpu/drm/i915/i915_sw_fence.h |  7 ++-
  2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c 
b/drivers/gpu/drm/i915/i915_sw_fence.c
index 6fc0d1b89690..0ce8f4efc1ed 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -569,12 +569,11 @@ int __i915_sw_fence_await_dma_fence(struct i915_sw_fence 
*fence,
return ret;
  }
  
-int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,

-   struct dma_resv *resv,
-   const struct dma_fence_ops *exclude,
-   bool write,
-   unsigned long timeout,
-   gfp_t gfp)
+int __i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
+ struct dma_resv *resv,
+ enum dma_resv_usage usage,
+ unsigned long timeout,
+ gfp_t gfp)
  {
struct dma_resv_iter cursor;
struct dma_fence *f;
@@ -583,7 +582,7 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence 
*fence,
debug_fence_assert(fence);
might_sleep_if(gfpflags_allow_blocking(gfp));
  
-	dma_resv_iter_begin(, resv, dma_resv_usage_rw(write));

+   dma_resv_iter_begin(, resv, usage);
dma_resv_for_each_fence_unlocked(, f) {
pending = i915_sw_fence_await_dma_fence(fence, f, timeout,
gfp);
@@ -598,6 +597,18 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence 
*fence,
return ret;
  }
  
+int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,

+   struct dma_resv *resv,
+   const struct dma_fence_ops *exclude,
+   bool write,
+   unsigned long timeout,
+   gfp_t gfp)
+{
+   return __i915_sw_fence_await_reservation(fence, resv,
+dma_resv_usage_rw(write),
+timeout, gfp);
+}


Drive by observation - it looked dodgy that you create a wrapper here 
which ignores one function parameter.


On a more detailed look it seems no callers actually use exclude and 
it's even unused inside this function since 1b5bdf071e62 ("drm/i915: use 
the new iterator in i915_sw_fence_await_reservation v3").


So a cleanup patch before this one?

Regards,

Tvrtko



+
  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
  #include "selftests/lib_sw_fence.c"
  #include "selftests/i915_sw_fence.c"
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h 
b/drivers/gpu/drm/i915/i915_sw_fence.h
index 619fc5a22f0c..3cf4b6e16f35 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.h
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -10,13 +10,13 @@
  #define _I915_SW_FENCE_H_
  
  #include 

+#include 
  #include 
  #include 
  #include  /* for NOTIFY_DONE */
  #include 
  
  struct completion;

-struct dma_resv;
  struct i915_sw_fence;
  
  enum i915_sw_fence_notify {

@@ -89,6 +89,11 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence 
*fence,
  unsigned long timeout,
  gfp_t gfp);
  
+int __i915_sw_fence_await_reservation(struct i915_sw_fence *fence,

+ struct dma_resv *resv,
+ enum dma_resv_usage usage,
+ unsigned long timeout,
+ gfp_t gfp);
  int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
struct dma_resv *resv,
const struct dma_fence_ops *exclude,


Re: [PATCH 3/3] drm/etnaviv: export client GPU usage statistics via fdinfo

2022-09-16 Thread Tvrtko Ursulin



On 16/09/2022 10:50, Lucas Stach wrote:

Hi Tvrtko,

Am Freitag, dem 16.09.2022 um 10:31 +0100 schrieb Tvrtko Ursulin:

Hi Lucas,

On 08/09/2022 19:10, Lucas Stach wrote:

This exposes a accumulated GPU active time per client via the
fdinfo infrastructure.

Signed-off-by: Lucas Stach 
---
   drivers/gpu/drm/etnaviv/etnaviv_drv.c | 38 ++-
   1 file changed, 37 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c 
b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index b69edb40ae2a..11b1f11fcb58 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -22,6 +22,7 @@
   #include "etnaviv_gem.h"
   #include "etnaviv_mmu.h"
   #include "etnaviv_perfmon.h"
+#include "common.xml.h"
   
   /*

* DRM operations:
@@ -471,7 +472,42 @@ static const struct drm_ioctl_desc etnaviv_ioctls[] = {
ETNA_IOCTL(PM_QUERY_SIG, pm_query_sig, DRM_RENDER_ALLOW),
   };
   
-DEFINE_DRM_GEM_FOPS(fops);

+static void etnaviv_fop_show_fdinfo(struct seq_file *m, struct file *f)
+{
+   struct drm_file *file = f->private_data;
+   struct drm_device *dev = file->minor->dev;
+   struct etnaviv_drm_private *priv = dev->dev_private;
+   struct etnaviv_file_private *ctx = file->driver_priv;
+   struct drm_printer p = drm_seq_file_printer(m);


Any specific reason not to use seq_printf directly? (May be my ignorance.)


Not really, I just followed what msm was doing here.


Right, no strong feelings either way I just did not see the need to wrap 
it for this use case.



+   int i;
+
+   drm_printf(, "drm-driver:\t%s\n", dev->driver->name);
+   drm_printf(, "drm-client-id:\t%u\n", ctx->id);
+
+   for (i = 0; i < ETNA_MAX_PIPES; i++) {
+struct etnaviv_gpu *gpu = priv->gpu[i];
+   char engine[8];
+   int cur = 0;


Alignment renders odd in my client.


I'll check that, might have messed up here.



+
+   if (!gpu)
+   continue;


I'd stick a comment in here to the effect of "For text output format
description please see drm-usage-stats.rst!".

Just to leave a breadcrumb putting some restraint on adding vendor
specific things which may be already covered by the common spec. To help
with common tools in the future as much as possible.


Yea, it was pretty to clear to me that we want the common format as
much as possible when writing the patches, but it's a good idea to add
a pointer for the future reader.


Thanks!


+
+   if (gpu->identity.features & chipFeatures_PIPE_2D)
+   cur = snprintf(engine, sizeof(engine), "2D");
+   if (gpu->identity.features & chipFeatures_PIPE_3D)
+   cur = snprintf(engine + cur, sizeof(engine) - cur,
+  "%s3D", cur ? "/" : "");
+
+   drm_printf(, "drm-engine-%s:\t%llu ns\n", engine,
+  ctx->sched_entity[i].elapsed_ns);


Two questions:

1)
So you have max four pipes, each can be either only 2d, 3d, or 2d/3d?
Can you have multiple of the same like 2x 3D? If you do, have you
considered exporting one drm-engine-% together with drm-engine-capacity-
for it?


The four pipes is a arbitrary driver limit. Etnaviv is a bit special in
that it collects all Vivante GPUs present in a system into a single DRM
device, each of those GPUs can be either 2D, 3D or a combined core with
both 2D and 3D capabilities. In theory there could be multiple GPUs of
each kind, but for now all real SoC designs we've come across only had
a single one of each kind.

When we add support for a SoC that has multiple GPUs of one kind, I
think exposing drm-engine-capacity, together with hooking them up to
the load balancing in the scheduler is the right thing to do.


2)
Have you tried my, yet unmerged, vendor agnostic gputop tool with your
changes?

https://patchwork.freedesktop.org/series/102175/

It would be interesting to know if it works.


Yes, I did when working on this series. I had some crashes related to
(I believe) double frees in the DRM client discovery, which I hadn't
had time to investigate further. Seems there is a race, as I couldn't
reproduce the crash when running with valgrind.

Other than that, the tool works for exposing the per-client GPU load on
etnaviv.


Cool, at least some success.

Out of curiosity what is the planned consumer in etnaviv landscape?

Regards,

Tvrtko


Re: [PATCH 3/3] drm/etnaviv: export client GPU usage statistics via fdinfo

2022-09-16 Thread Tvrtko Ursulin



Hi Lucas,

On 08/09/2022 19:10, Lucas Stach wrote:

This exposes a accumulated GPU active time per client via the
fdinfo infrastructure.

Signed-off-by: Lucas Stach 
---
  drivers/gpu/drm/etnaviv/etnaviv_drv.c | 38 ++-
  1 file changed, 37 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c 
b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index b69edb40ae2a..11b1f11fcb58 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -22,6 +22,7 @@
  #include "etnaviv_gem.h"
  #include "etnaviv_mmu.h"
  #include "etnaviv_perfmon.h"
+#include "common.xml.h"
  
  /*

   * DRM operations:
@@ -471,7 +472,42 @@ static const struct drm_ioctl_desc etnaviv_ioctls[] = {
ETNA_IOCTL(PM_QUERY_SIG, pm_query_sig, DRM_RENDER_ALLOW),
  };
  
-DEFINE_DRM_GEM_FOPS(fops);

+static void etnaviv_fop_show_fdinfo(struct seq_file *m, struct file *f)
+{
+   struct drm_file *file = f->private_data;
+   struct drm_device *dev = file->minor->dev;
+   struct etnaviv_drm_private *priv = dev->dev_private;
+   struct etnaviv_file_private *ctx = file->driver_priv;
+   struct drm_printer p = drm_seq_file_printer(m);


Any specific reason not to use seq_printf directly? (May be my ignorance.)


+   int i;
+
+   drm_printf(, "drm-driver:\t%s\n", dev->driver->name);
+   drm_printf(, "drm-client-id:\t%u\n", ctx->id);
+
+   for (i = 0; i < ETNA_MAX_PIPES; i++) {
+struct etnaviv_gpu *gpu = priv->gpu[i];
+   char engine[8];
+   int cur = 0;


Alignment renders odd in my client.


+
+   if (!gpu)
+   continue;


I'd stick a comment in here to the effect of "For text output format 
description please see drm-usage-stats.rst!".


Just to leave a breadcrumb putting some restraint on adding vendor 
specific things which may be already covered by the common spec. To help 
with common tools in the future as much as possible.



+
+   if (gpu->identity.features & chipFeatures_PIPE_2D)
+   cur = snprintf(engine, sizeof(engine), "2D");
+   if (gpu->identity.features & chipFeatures_PIPE_3D)
+   cur = snprintf(engine + cur, sizeof(engine) - cur,
+  "%s3D", cur ? "/" : "");
+
+   drm_printf(, "drm-engine-%s:\t%llu ns\n", engine,
+  ctx->sched_entity[i].elapsed_ns);


Two questions:

1)
So you have max four pipes, each can be either only 2d, 3d, or 2d/3d? 
Can you have multiple of the same like 2x 3D? If you do, have you 
considered exporting one drm-engine-% together with drm-engine-capacity- 
for it?


2)
Have you tried my, yet unmerged, vendor agnostic gputop tool with your 
changes?


https://patchwork.freedesktop.org/series/102175/

It would be interesting to know if it works.

Regards,

Tvrtko


+   }
+}
+
+static const struct file_operations fops = {
+.owner = THIS_MODULE,
+DRM_GEM_FOPS,
+.show_fdinfo = etnaviv_fop_show_fdinfo,
+};
  
  static const struct drm_driver etnaviv_drm_driver = {

.driver_features= DRIVER_GEM | DRIVER_RENDER,


Re: [Intel-gfx] [PATCH 1/1] drm/i915/uc: Update to latest GuC and use new-format GuC/HuC names

2022-09-16 Thread Tvrtko Ursulin



On 15/09/2022 21:03, John Harrison wrote:

On 9/15/2022 01:59, Tvrtko Ursulin wrote:


Hi,

On 15/09/2022 00:46, john.c.harri...@intel.com wrote:

From: John Harrison 

Going forwards, the intention is for GuC firmware files to be named
for their major version only and HuC firmware files to have no version
number in the name at all. This patch adds those entries for all
platforms that are officially GuC/HuC enabled.

Also, update the expected GuC version numbers to the latest firmware
release for those platforms.

Signed-off-by: John Harrison 
---
  drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 +++---
  1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c 
b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c

index 1169e2a09da24..b91ad4aede1f7 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -72,12 +72,14 @@ void intel_uc_fw_change_status(struct intel_uc_fw 
*uc_fw,

   * security fixes, etc. to be enabled.
   */
  #define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_maj, guc_mmp) \
-    fw_def(DG2,  0, guc_mmp(dg2,  70, 4, 1)) \
+    fw_def(DG2,  0, guc_maj(dg2,  70, 5)) \


Just glancing over out of curiosity. Part which confused me is that if 
only major is supposed to be used then what is the '5' in guc_maj(dg2, 
70, 5) ?
See the earlier patch that added support for version reduced filenames. 
The minor number is still specified because want to be able to warn the 
user if their firmware is out of date and causing them to miss features, 
security fixes, etc. The driver will still load any old firmware with 
the right name and work with it, but user's need to know that there are 
updates available.


Got it. Release is deemed not important enough to warn about? no 
actually, it's different, I guess we never expect to bump only the 
release with a source level change - so in practice kernel could not 
warn that there is a newer release version since it would never know. In 
other words those ones would only be hitting linux-firmware, while minor 
changes would be kernel patches as well.


I also couldn't find guc_maj with grep so I guess it's some sort of a 
magic concatenation macro or what?
'guc_maj' is a macro parameter as per the definition of the macro three 
lines above. According to where INTEL_GUC_FIRMWARE_DEFS is used, it 
becomes either a mechanism for creating just a 'MODULE_FIRMWARE' 
definition for the firmware file or a table entry giving all the version 
information as well as the filename.


Doh thanks, macro magic was apparently impenetrable to me yesterday.

Regards,

Tvrtko


Re: [Intel-gfx] [PATCH] drm/i915: Split GAM and MSLICE steering

2022-09-16 Thread Tvrtko Ursulin



On 16/09/2022 02:43, Matt Roper wrote:

Although the bspec lists several MMIO ranges as "MSLICE," it turns out
that a subset of these are of a "GAM" subclass that has unique rules and
doesn't followed regular mslice steering behavior.

  * Xe_HP SDV:  GAM ranges must always be steered to 0,0.  These
registers share the regular steering control register (0xFDC) with
other steering types

  * DG2:  GAM ranges must always be steered to 1,0.  GAM registers have a
dedicated steering control register (0xFE0) so we can set the value
once at startup and rely on implicit steering.  Technically the
hardware default should already be set to 1,0 properly, but it never
hurts to ensure that in the driver.


Do you have any data on whether the "technically should" holds in 
practice? What would be the consequences of some platform/machine 
surprising us here?


Regards,

Tvrtko



Bspec: 66534
Signed-off-by: Matt Roper 
---
  drivers/gpu/drm/i915/gt/intel_gt_mcr.c  | 24 +++--
  drivers/gpu/drm/i915/gt/intel_gt_regs.h |  1 +
  drivers/gpu/drm/i915/gt/intel_gt_types.h|  1 +
  drivers/gpu/drm/i915/gt/intel_workarounds.c | 10 +
  4 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c 
b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index e79405a45312..a2047a68ea7a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -40,6 +40,7 @@ static const char * const intel_steering_types[] = {
"L3BANK",
"MSLICE",
"LNCF",
+   "GAM",
"INSTANCE 0",
  };
  
@@ -48,14 +49,23 @@ static const struct intel_mmio_range icl_l3bank_steering_table[] = {

{},
  };
  
+/*

+ * Although the bspec lists more "MSLICE" ranges than shown here, some of those
+ * are of a "GAM" subclass that has special rules.  Thus we use a separate
+ * GAM table farther down for those.
+ */
  static const struct intel_mmio_range xehpsdv_mslice_steering_table[] = {
-   { 0x004000, 0x004AFF },
-   { 0x00C800, 0x00CFFF },
{ 0x00DD00, 0x00DDFF },
{ 0x00E900, 0x00 }, /* 0xEA00 - OxEFFF is unused */
{},
  };
  
+static const struct intel_mmio_range xehpsdv_gam_steering_table[] = {

+   { 0x004000, 0x004AFF },
+   { 0x00C800, 0x00CFFF },
+   {},
+};
+
  static const struct intel_mmio_range xehpsdv_lncf_steering_table[] = {
{ 0x00B000, 0x00B0FF },
{ 0x00D800, 0x00D8FF },
@@ -114,9 +124,15 @@ void intel_gt_mcr_init(struct intel_gt *gt)
} else if (IS_DG2(i915)) {
gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table;
gt->steering_table[LNCF] = dg2_lncf_steering_table;
+   /*
+* No need to hook up the GAM table since it has a dedicated
+* steering control register on DG2 and can use implicit
+* steering.
+*/
} else if (IS_XEHPSDV(i915)) {
gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table;
gt->steering_table[LNCF] = xehpsdv_lncf_steering_table;
+   gt->steering_table[GAM] = xehpsdv_gam_steering_table;
} else if (GRAPHICS_VER(i915) >= 11 &&
   GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) {
gt->steering_table[L3BANK] = icl_l3bank_steering_table;
@@ -351,6 +367,10 @@ static void get_nonterminated_steering(struct intel_gt *gt,
*group = __ffs(gt->info.mslice_mask) << 1;
*instance = 0;  /* unused */
break;
+   case GAM:
+   *group = IS_DG2(gt->i915) ? 1 : 0;
+   *instance = 0;
+   break;
case INSTANCE0:
/*
 * There are a lot of MCR types for which instance (0, 0)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 2275ee47da95..2343b26e0e21 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -42,6 +42,7 @@
  #define MCFG_MCR_SELECTOR _MMIO(0xfd0)
  #define SF_MCR_SELECTOR   _MMIO(0xfd8)
  #define GEN8_MCR_SELECTOR _MMIO(0xfdc)
+#define GAM_MCR_SELECTOR   _MMIO(0xfe0)
  #define   GEN8_MCR_SLICE(slice)   (((slice) & 3) << 26)
  #define   GEN8_MCR_SLICE_MASK GEN8_MCR_SLICE(3)
  #define   GEN8_MCR_SUBSLICE(subslice) (((subslice) & 3) << 24)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h 
b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index f19c2de77ff6..30003d68fd51 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -59,6 +59,7 @@ enum intel_steering_type {
L3BANK,
MSLICE,
LNCF,
+   GAM,
  
  	/*

 * On some platforms there are multiple types of MCR registers that
diff --git 

<    5   6   7   8   9   10   11   12   13   14   >