Re: [PATCH drm-next v2 06/16] drm: debugfs: provide infrastructure to dump a DRM GPU VA space

2023-02-17 Thread kernel test robot
Hi Danilo,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on 48075a66fca613477ac1969b576a93ef5db0164f]

url:
https://github.com/intel-lab-lkp/linux/commits/Danilo-Krummrich/drm-execution-context-for-GEM-buffers/20230217-215101
base:   48075a66fca613477ac1969b576a93ef5db0164f
patch link:
https://lore.kernel.org/r/20230217134820.14672-1-dakr%40redhat.com
patch subject: [PATCH drm-next v2 06/16] drm: debugfs: provide infrastructure 
to dump a DRM GPU VA space
config: mips-allyesconfig 
(https://download.01.org/0day-ci/archive/20230218/202302181014.l0sho3s1-...@intel.com/config)
compiler: mips-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# 
https://github.com/intel-lab-lkp/linux/commit/e1a1c9659baee305780e1ce50c05e53e1d14b245
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review 
Danilo-Krummrich/drm-execution-context-for-GEM-buffers/20230217-215101
git checkout e1a1c9659baee305780e1ce50c05e53e1d14b245
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 
O=build_dir ARCH=mips olddefconfig
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 
O=build_dir ARCH=mips SHELL=/bin/bash drivers/gpu/drm/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot 
| Link: 
https://lore.kernel.org/oe-kbuild-all/202302181014.l0sho3s1-...@intel.com/

All warnings (new ones prefixed by >>):

   drivers/gpu/drm/drm_debugfs.c: In function 'drm_debugfs_gpuva_info':
>> drivers/gpu/drm/drm_debugfs.c:228:28: warning: cast from pointer to integer 
>> of different size [-Wpointer-to-int-cast]
 228 |(u64)va->gem.obj, va->gem.offset);
 |^


vim +228 drivers/gpu/drm/drm_debugfs.c

   178  
   179  /**
   180   * drm_debugfs_gpuva_info - dump the given DRM GPU VA space
   181   * @m: pointer to the &seq_file to write
   182   * @mgr: the &drm_gpuva_manager representing the GPU VA space
   183   *
   184   * Dumps the GPU VA regions and mappings of a given DRM GPU VA manager.
   185   *
   186   * For each DRM GPU VA space drivers should call this function from 
their
   187   * &drm_info_list's show callback.
   188   *
   189   * Returns: 0 on success, -ENODEV if the &mgr is not initialized
   190   */
   191  int drm_debugfs_gpuva_info(struct seq_file *m,
   192 struct drm_gpuva_manager *mgr)
   193  {
   194  DRM_GPUVA_ITER(it, mgr);
   195  DRM_GPUVA_REGION_ITER(__it, mgr);
   196  
   197  if (!mgr->name)
   198  return -ENODEV;
   199  
   200  seq_printf(m, "DRM GPU VA space (%s)\n", mgr->name);
   201  seq_puts  (m, "\n");
   202  seq_puts  (m, " VA regions  | start  | range
  | end| sparse\n");
   203  seq_puts  (m, 
"\n");
   204  seq_printf(m, " VA space| 0x%016llx | 0x%016llx | 0x%016llx 
|   -\n",
   205 mgr->mm_start, mgr->mm_range, mgr->mm_start + 
mgr->mm_range);
   206  seq_puts  (m, 
"---\n");
   207  drm_gpuva_iter_for_each(__it) {
   208  struct drm_gpuva_region *reg = __it.reg;
   209  
   210  if (reg == &mgr->kernel_alloc_region) {
   211  seq_printf(m, " kernel node | 0x%016llx | 
0x%016llx | 0x%016llx |   -\n",
   212 reg->va.addr, reg->va.range, 
reg->va.addr + reg->va.range);
   213  continue;
   214  }
   215  
   216  seq_printf(m, " | 0x%016llx | 0x%016llx | 
0x%016llx | %s\n",
   217 reg->va.addr, reg->va.range, reg->va.addr + 
reg->va.range,
   218 reg->sparse ? "true" : "false");
   219  }
   220  seq_puts(m, "\n");
   221  seq_puts(m, " VAs | start  | range  | 
end| object | object offset\n");
   222  seq_puts(m, 
"-\n");
   223  drm_gpuva_iter_for_each(it) {
   224  

[PATCH 1/2] drm/msm: drop unused ring variable in msm_ioctl_gem_submit()

2023-02-17 Thread Dmitry Baryshkov
The variable ring is not used by msm_parse_deps() and
msm_ioctl_gem_submit() and thus can be dropped.

Signed-off-by: Dmitry Baryshkov 
---
 drivers/gpu/drm/msm/msm_gem_submit.c | 10 +++---
 drivers/gpu/drm/msm/msm_gpu_trace.h  | 10 --
 2 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c 
b/drivers/gpu/drm/msm/msm_gem_submit.c
index ac8ed731f76d..a539eb31042f 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -550,8 +550,7 @@ static struct drm_syncobj **msm_parse_deps(struct 
msm_gem_submit *submit,
struct drm_file *file,
uint64_t in_syncobjs_addr,
uint32_t nr_in_syncobjs,
-   size_t syncobj_stride,
-   struct msm_ringbuffer *ring)
+   size_t syncobj_stride)
 {
struct drm_syncobj **syncobjs = NULL;
struct drm_msm_gem_submit_syncobj syncobj_desc = {0};
@@ -722,7 +721,6 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
struct msm_gem_submit *submit;
struct msm_gpu *gpu = priv->gpu;
struct msm_gpu_submitqueue *queue;
-   struct msm_ringbuffer *ring;
struct msm_submit_post_dep *post_deps = NULL;
struct drm_syncobj **syncobjs_to_reset = NULL;
int out_fence_fd = -1;
@@ -760,8 +758,6 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
if (!queue)
return -ENOENT;
 
-   ring = gpu->rb[queue->ring_nr];
-
if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) {
out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
if (out_fence_fd < 0) {
@@ -774,7 +770,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
if (IS_ERR(submit))
return PTR_ERR(submit);
 
-   trace_msm_gpu_submit(pid_nr(submit->pid), ring->id, submit->ident,
+   trace_msm_gpu_submit(pid_nr(submit->pid), submit->ident,
args->nr_bos, args->nr_cmds);
 
ret = mutex_lock_interruptible(&queue->lock);
@@ -803,7 +799,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
syncobjs_to_reset = msm_parse_deps(submit, file,
   args->in_syncobjs,
   args->nr_in_syncobjs,
-  args->syncobj_stride, ring);
+  args->syncobj_stride);
if (IS_ERR(syncobjs_to_reset)) {
ret = PTR_ERR(syncobjs_to_reset);
goto out_unlock;
diff --git a/drivers/gpu/drm/msm/msm_gpu_trace.h 
b/drivers/gpu/drm/msm/msm_gpu_trace.h
index ac40d857bc45..12ef10f1de4c 100644
--- a/drivers/gpu/drm/msm/msm_gpu_trace.h
+++ b/drivers/gpu/drm/msm/msm_gpu_trace.h
@@ -9,24 +9,22 @@
 #define TRACE_INCLUDE_FILE msm_gpu_trace
 
 TRACE_EVENT(msm_gpu_submit,
-   TP_PROTO(pid_t pid, u32 ringid, u32 id, u32 nr_bos, u32 nr_cmds),
-   TP_ARGS(pid, ringid, id, nr_bos, nr_cmds),
+   TP_PROTO(pid_t pid, u32 id, u32 nr_bos, u32 nr_cmds),
+   TP_ARGS(pid, id, nr_bos, nr_cmds),
TP_STRUCT__entry(
__field(pid_t, pid)
__field(u32, id)
-   __field(u32, ringid)
__field(u32, nr_cmds)
__field(u32, nr_bos)
),
TP_fast_assign(
__entry->pid = pid;
__entry->id = id;
-   __entry->ringid = ringid;
__entry->nr_bos = nr_bos;
__entry->nr_cmds = nr_cmds
),
-   TP_printk("id=%d pid=%d ring=%d bos=%d cmds=%d",
-   __entry->id, __entry->pid, __entry->ringid,
+   TP_printk("id=%d pid=%d bos=%d cmds=%d",
+   __entry->id, __entry->pid,
__entry->nr_bos, __entry->nr_cmds)
 );
 
-- 
2.39.1



[PATCH 2/2] drm/msm: simplify msm_parse_deps() and msm_parse_post_deps()

2023-02-17 Thread Dmitry Baryshkov
Simplify two functions msm_parse_deps() and msm_parse_post_deps():
extract single item parsing function and clean up error path.

Signed-off-by: Dmitry Baryshkov 
---
 drivers/gpu/drm/msm/msm_gem_submit.c | 196 +++
 1 file changed, 106 insertions(+), 90 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c 
b/drivers/gpu/drm/msm/msm_gem_submit.c
index a539eb31042f..c64907f0f249 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -546,6 +546,46 @@ struct msm_submit_post_dep {
struct dma_fence_chain *chain;
 };
 
+static struct drm_syncobj *msm_parse_dep_one(struct msm_gem_submit *submit,
+struct drm_file *file,
+uint64_t address,
+size_t syncobj_stride)
+{
+   struct drm_msm_gem_submit_syncobj syncobj_desc = {0};
+   struct dma_fence *fence;
+   struct drm_syncobj *syncobj = NULL;
+   int ret;
+
+   if (copy_from_user(&syncobj_desc,
+  u64_to_user_ptr(address),
+  min(syncobj_stride, sizeof(syncobj_desc
+   return ERR_PTR(-EFAULT);
+
+   if (syncobj_desc.point &&
+   !drm_core_check_feature(submit->dev, DRIVER_SYNCOBJ_TIMELINE))
+   return ERR_PTR(-EOPNOTSUPP);
+
+   if (syncobj_desc.flags & ~MSM_SUBMIT_SYNCOBJ_FLAGS)
+   return ERR_PTR(-EINVAL);
+
+   ret = drm_syncobj_find_fence(file, syncobj_desc.handle,
+syncobj_desc.point, 0, &fence);
+   if (ret)
+   return ERR_PTR(ret);
+
+   ret = drm_sched_job_add_dependency(&submit->base, fence);
+   if (ret)
+   return ERR_PTR(ret);
+
+   if (syncobj_desc.flags & MSM_SUBMIT_SYNCOBJ_RESET) {
+   syncobj = drm_syncobj_find(file, syncobj_desc.handle);
+   if (!syncobj)
+   return ERR_PTR(-EINVAL);
+   }
+
+   return syncobj;
+}
+
 static struct drm_syncobj **msm_parse_deps(struct msm_gem_submit *submit,
struct drm_file *file,
uint64_t in_syncobjs_addr,
@@ -553,9 +593,8 @@ static struct drm_syncobj **msm_parse_deps(struct 
msm_gem_submit *submit,
size_t syncobj_stride)
 {
struct drm_syncobj **syncobjs = NULL;
-   struct drm_msm_gem_submit_syncobj syncobj_desc = {0};
-   int ret = 0;
-   uint32_t i, j;
+   int ret;
+   int i;
 
syncobjs = kcalloc(nr_in_syncobjs, sizeof(*syncobjs),
   GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
@@ -564,54 +603,26 @@ static struct drm_syncobj **msm_parse_deps(struct 
msm_gem_submit *submit,
 
for (i = 0; i < nr_in_syncobjs; ++i) {
uint64_t address = in_syncobjs_addr + i * syncobj_stride;
-   struct dma_fence *fence;
+   struct drm_syncobj *syncobj;
 
-   if (copy_from_user(&syncobj_desc,
-  u64_to_user_ptr(address),
-  min(syncobj_stride, sizeof(syncobj_desc {
-   ret = -EFAULT;
-   break;
-   }
-
-   if (syncobj_desc.point &&
-   !drm_core_check_feature(submit->dev, 
DRIVER_SYNCOBJ_TIMELINE)) {
-   ret = -EOPNOTSUPP;
-   break;
+   syncobj = msm_parse_dep_one(submit, file, address, 
syncobj_stride);
+   if (IS_ERR(syncobj)) {
+   ret = PTR_ERR(syncobj);
+   goto err;
}
 
-   if (syncobj_desc.flags & ~MSM_SUBMIT_SYNCOBJ_FLAGS) {
-   ret = -EINVAL;
-   break;
-   }
-
-   ret = drm_syncobj_find_fence(file, syncobj_desc.handle,
-syncobj_desc.point, 0, &fence);
-   if (ret)
-   break;
-
-   ret = drm_sched_job_add_dependency(&submit->base, fence);
-   if (ret)
-   break;
-
-   if (syncobj_desc.flags & MSM_SUBMIT_SYNCOBJ_RESET) {
-   syncobjs[i] =
-   drm_syncobj_find(file, syncobj_desc.handle);
-   if (!syncobjs[i]) {
-   ret = -EINVAL;
-   break;
-   }
-   }
+   syncobjs[i] = syncobj;
}
 
-   if (ret) {
-   for (j = 0; j <= i; ++j) {
-   if (syncobjs[j])
-   drm_syncobj_put(syncobjs[j]);
-   }
-   kfree(syncobjs);
-   return ERR_PTR(ret);
-   }
return syncobjs;
+
+err:
+   w

[PATCH 0/2] drm/msm: rework msm_parse_deps() and msm_parse_post_deps()

2023-02-17 Thread Dmitry Baryshkov
As discusssed in the the review of [1], rework these two functions to
separate single point parser and provide clean error path.

Depenencies: [1]

[1] https://lore.kernel.org/all/20230215235048.1166484-1-robdcl...@gmail.com

Dmitry Baryshkov (2):
  drm/msm: drop unused ring variable in msm_ioctl_gem_submit()
  drm/msm: simplify msm_parse_deps() and msm_parse_post_deps()

 drivers/gpu/drm/msm/msm_gem_submit.c | 206 ++-
 drivers/gpu/drm/msm/msm_gpu_trace.h  |  10 +-
 2 files changed, 113 insertions(+), 103 deletions(-)

-- 
2.39.1



Re: [PATCH drm-next v2 13/16] drm/nouveau: nvkm/vmm: implement raw ops to manage uvmm

2023-02-17 Thread kernel test robot
Hi Danilo,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on 48075a66fca613477ac1969b576a93ef5db0164f]

url:
https://github.com/intel-lab-lkp/linux/commits/Danilo-Krummrich/drm-execution-context-for-GEM-buffers/20230217-215101
base:   48075a66fca613477ac1969b576a93ef5db0164f
patch link:
https://lore.kernel.org/r/20230217134820.14672-8-dakr%40redhat.com
patch subject: [PATCH drm-next v2 13/16] drm/nouveau: nvkm/vmm: implement raw 
ops to manage uvmm
config: mips-allyesconfig 
(https://download.01.org/0day-ci/archive/20230218/202302180839.s0w26kcj-...@intel.com/config)
compiler: mips-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# 
https://github.com/intel-lab-lkp/linux/commit/b25c0bcfed93dd62ed732968d8987b92e10c4579
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review 
Danilo-Krummrich/drm-execution-context-for-GEM-buffers/20230217-215101
git checkout b25c0bcfed93dd62ed732968d8987b92e10c4579
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 
O=build_dir ARCH=mips olddefconfig
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 
O=build_dir ARCH=mips SHELL=/bin/bash drivers/gpu/drm/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot 
| Link: 
https://lore.kernel.org/oe-kbuild-all/202302180839.s0w26kcj-...@intel.com/

All warnings (new ones prefixed by >>):

   In file included from drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h:4,
from drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.h:5,
from drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c:22:
   drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c: In function 
'nvkm_uvmm_mthd_raw_map':
>> drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c:422:31: warning: cast to 
>> pointer from integer of different size [-Wint-to-pointer-cast]
 422 |   (void *)args->argv, args->argc);
 |   ^
   drivers/gpu/drm/nouveau/include/nvkm/core/memory.h:66:43: note: in 
definition of macro 'nvkm_memory_map'
  66 | (p)->func->map((p),(o),(vm),(va),(av),(ac))
 |   ^~


vim +422 drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c

   388  
   389  static int
   390  nvkm_uvmm_mthd_raw_map(struct nvkm_uvmm *uvmm, struct nvif_vmm_raw_v0 
*args)
   391  {
   392  struct nvkm_client *client = uvmm->object.client;
   393  struct nvkm_vmm *vmm = uvmm->vmm;
   394  struct nvkm_vma vma = {
   395  .addr = args->addr,
   396  .size = args->size,
   397  .used = true,
   398  .mapref = false,
   399  .no_comp = true,
   400  };
   401  struct nvkm_memory *memory;
   402  u64 handle = args->memory;
   403  u8 refd;
   404  int ret;
   405  
   406  if (!nvkm_vmm_in_managed_range(vmm, args->addr, args->size))
   407  return -EINVAL;
   408  
   409  ret = nvkm_uvmm_page_index(uvmm, args->size, args->shift, 
&refd);
   410  if (ret)
   411  return ret;
   412  
   413  vma.page = vma.refd = refd;
   414  
   415  memory = nvkm_umem_search(client, args->memory);
   416  if (IS_ERR(memory)) {
   417  VMM_DEBUG(vmm, "memory %016llx %ld\n", handle, 
PTR_ERR(memory));
   418  return PTR_ERR(memory);
   419  }
   420  
   421  ret = nvkm_memory_map(memory, args->offset, vmm, &vma,
 > 422(void *)args->argv, args->argc);
   423  
   424  nvkm_memory_unref(&vma.memory);
   425  nvkm_memory_unref(&memory);
   426  return ret;
   427  }
   428  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests


Re: [PATCH drm-next v2 05/16] drm: manager to keep track of GPUs VA mappings

2023-02-17 Thread kernel test robot
Hi Danilo,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on 48075a66fca613477ac1969b576a93ef5db0164f]

url:
https://github.com/intel-lab-lkp/linux/commits/Danilo-Krummrich/drm-execution-context-for-GEM-buffers/20230217-215101
base:   48075a66fca613477ac1969b576a93ef5db0164f
patch link:
https://lore.kernel.org/r/20230217134422.14116-6-dakr%40redhat.com
patch subject: [PATCH drm-next v2 05/16] drm: manager to keep track of GPUs VA 
mappings
config: mips-allyesconfig 
(https://download.01.org/0day-ci/archive/20230218/202302180805.b0ab40v5-...@intel.com/config)
compiler: mips-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# 
https://github.com/intel-lab-lkp/linux/commit/00132cc92b6745cfd51c0d5df4c246a848f2ceaa
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review 
Danilo-Krummrich/drm-execution-context-for-GEM-buffers/20230217-215101
git checkout 00132cc92b6745cfd51c0d5df4c246a848f2ceaa
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 
O=build_dir ARCH=mips olddefconfig
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 
O=build_dir ARCH=mips SHELL=/bin/bash drivers/gpu/drm/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot 
| Link: 
https://lore.kernel.org/oe-kbuild-all/202302180805.b0ab40v5-...@intel.com/

All warnings (new ones prefixed by >>):

>> drivers/gpu/drm/drm_gpuva_mgr.c:1383:5: warning: no previous prototype for 
>> 'drm_gpuva_sm_step' [-Wmissing-prototypes]
1383 | int drm_gpuva_sm_step(struct drm_gpuva_op *__op, void *priv)
 | ^
--
>> drivers/gpu/drm/drm_gpuva_mgr.c:529: warning: expecting prototype for 
>> drm_gpuva_remove_iter(). Prototype was for drm_gpuva_iter_remove() instead
   drivers/gpu/drm/drm_gpuva_mgr.c:549: warning: Excess function parameter 
'addr' description in 'drm_gpuva_insert'
   drivers/gpu/drm/drm_gpuva_mgr.c:549: warning: Excess function parameter 
'range' description in 'drm_gpuva_insert'
   drivers/gpu/drm/drm_gpuva_mgr.c:765: warning: Excess function parameter 
'addr' description in 'drm_gpuva_region_insert'
   drivers/gpu/drm/drm_gpuva_mgr.c:765: warning: Excess function parameter 
'range' description in 'drm_gpuva_region_insert'
   drivers/gpu/drm/drm_gpuva_mgr.c:1345: warning: Excess function parameter 
'ops' description in 'drm_gpuva_sm_unmap'
   drivers/gpu/drm/drm_gpuva_mgr.c:1589: warning: Function parameter or member 
'addr' not described in 'drm_gpuva_prefetch_ops_create'
   drivers/gpu/drm/drm_gpuva_mgr.c:1589: warning: Function parameter or member 
'range' not described in 'drm_gpuva_prefetch_ops_create'
   drivers/gpu/drm/drm_gpuva_mgr.c:1589: warning: Excess function parameter 
'req_addr' description in 'drm_gpuva_prefetch_ops_create'
   drivers/gpu/drm/drm_gpuva_mgr.c:1589: warning: Excess function parameter 
'req_range' description in 'drm_gpuva_prefetch_ops_create'


vim +/drm_gpuva_sm_step +1383 drivers/gpu/drm/drm_gpuva_mgr.c

  1382  
> 1383  int drm_gpuva_sm_step(struct drm_gpuva_op *__op, void *priv)
  1384  {
  1385  struct {
  1386  struct drm_gpuva_manager *mgr;
  1387  struct drm_gpuva_ops *ops;
  1388  } *args = priv;
  1389  struct drm_gpuva_manager *mgr = args->mgr;
  1390  struct drm_gpuva_ops *ops = args->ops;
  1391  struct drm_gpuva_op *op;
  1392  
  1393  op = gpuva_op_alloc(mgr);
  1394  if (unlikely(!op))
  1395  goto err;
  1396  
  1397  memcpy(op, __op, sizeof(*op));
  1398  
  1399  if (op->op == DRM_GPUVA_OP_REMAP) {
  1400  struct drm_gpuva_op_remap *__r = &__op->remap;
  1401  struct drm_gpuva_op_remap *r = &op->remap;
  1402  
  1403  r->unmap = kmemdup(__r->unmap, sizeof(*r->unmap),
  1404 GFP_KERNEL);
  1405  if (unlikely(!r->unmap))
  1406  goto err_free_op;
  1407  
  1408  if (__r->prev) {
  1409  r->prev = kmemdup(__r->prev, sizeof(*r->prev),
  1410GFP_KERNEL);
  1411  if (unlikely(!r->prev))
  1412  goto err_free_unmap;
  1413  }
  1414  
  1415  

[PATCH 1/2] drm/i915/guc: Improve GuC load error reporting

2023-02-17 Thread John . C . Harrison
From: John Harrison 

There are multiple ways in which the GuC load can fail. The driver was
reporting the status register as is, but not everyone can read the
matrix unfiltered. So add decoding of the common error cases.

Also, remove the comment about interrupt based load completion
checking being not recommended. The interrupt was removed from the GuC
firmware some time ago so it is no longer an option anyway. While at
it, also abort the timeout if a known error code is reported. No need
to keep waiting if the GuC has already given up the load.

Signed-off-by: John Harrison 
---
 .../gpu/drm/i915/gt/uc/abi/guc_errors_abi.h   | 17 
 drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 95 +++
 drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h|  4 +-
 3 files changed, 95 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h 
b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
index 8085fb1812748..750fe0c6d8529 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
@@ -21,6 +21,9 @@ enum intel_guc_load_status {
INTEL_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH   = 0x02,
INTEL_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH   = 0x03,
INTEL_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE  = 0x04,
+   INTEL_GUC_LOAD_STATUS_HWCONFIG_START   = 0x05,
+   INTEL_GUC_LOAD_STATUS_HWCONFIG_DONE= 0x06,
+   INTEL_GUC_LOAD_STATUS_HWCONFIG_ERROR   = 0x07,
INTEL_GUC_LOAD_STATUS_GDT_DONE = 0x10,
INTEL_GUC_LOAD_STATUS_IDT_DONE = 0x20,
INTEL_GUC_LOAD_STATUS_LAPIC_DONE   = 0x30,
@@ -38,4 +41,18 @@ enum intel_guc_load_status {
INTEL_GUC_LOAD_STATUS_READY= 0xF0,
 };
 
+enum intel_bootrom_load_status {
+   INTEL_BOOTROM_STATUS_NO_KEY_FOUND = 0x13,
+   INTEL_BOOTROM_STATUS_AES_PROD_KEY_FOUND   = 0x1A,
+   INTEL_BOOTROM_STATUS_RSA_FAILED   = 0x50,
+   INTEL_BOOTROM_STATUS_PAVPC_FAILED = 0x73,
+   INTEL_BOOTROM_STATUS_WOPCM_FAILED = 0x74,
+   INTEL_BOOTROM_STATUS_LOADLOC_FAILED   = 0x75,
+   INTEL_BOOTROM_STATUS_JUMP_PASSED  = 0x76,
+   INTEL_BOOTROM_STATUS_JUMP_FAILED  = 0x77,
+   INTEL_BOOTROM_STATUS_RC6CTXCONFIG_FAILED  = 0x79,
+   INTEL_BOOTROM_STATUS_MPUMAP_INCORRECT = 0x7a,
+   INTEL_BOOTROM_STATUS_EXCEPTION= 0x7E,
+};
+
 #endif /* _ABI_GUC_ERRORS_ABI_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
index 69133420c78b2..2f5942606913d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
@@ -88,31 +88,64 @@ static int guc_xfer_rsa(struct intel_uc_fw *guc_fw,
 /*
  * Read the GuC status register (GUC_STATUS) and store it in the
  * specified location; then return a boolean indicating whether
- * the value matches either of two values representing completion
- * of the GuC boot process.
+ * the value matches either completion or a known failure code.
  *
  * This is used for polling the GuC status in a wait_for()
  * loop below.
  */
-static inline bool guc_ready(struct intel_uncore *uncore, u32 *status)
+static inline bool guc_load_done(struct intel_uncore *uncore, u32 *status, 
bool *success)
 {
u32 val = intel_uncore_read(uncore, GUC_STATUS);
u32 uk_val = REG_FIELD_GET(GS_UKERNEL_MASK, val);
+   u32 br_val = REG_FIELD_GET(GS_BOOTROM_MASK, val);
 
*status = val;
-   return uk_val == INTEL_GUC_LOAD_STATUS_READY;
+   *success = true;
+   switch (uk_val) {
+   case INTEL_GUC_LOAD_STATUS_READY:
+   return true;
+
+   case INTEL_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH:
+   case INTEL_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH:
+   case INTEL_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE:
+   case INTEL_GUC_LOAD_STATUS_HWCONFIG_ERROR:
+   case INTEL_GUC_LOAD_STATUS_DPC_ERROR:
+   case INTEL_GUC_LOAD_STATUS_EXCEPTION:
+   case INTEL_GUC_LOAD_STATUS_INIT_DATA_INVALID:
+   case INTEL_GUC_LOAD_STATUS_MPU_DATA_INVALID:
+   case INTEL_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID:
+   *success = false;
+   return true;
+   }
+
+   switch (br_val) {
+   case INTEL_BOOTROM_STATUS_NO_KEY_FOUND:
+   case INTEL_BOOTROM_STATUS_RSA_FAILED:
+   case INTEL_BOOTROM_STATUS_PAVPC_FAILED:
+   case INTEL_BOOTROM_STATUS_WOPCM_FAILED:
+   case INTEL_BOOTROM_STATUS_LOADLOC_FAILED:
+   case INTEL_BOOTROM_STATUS_JUMP_FAILED:
+   case INTEL_BOOTROM_STATUS_RC6CTXCONFIG_FAILED:
+   case INTEL_BOOTROM_STATUS_MPUMAP_INCORRECT:
+   case INTEL_BOOTROM_STATUS_EXCEPTION:
+   *

[PATCH 2/2] drm/i915/guc: Allow for very slow GuC loading

2023-02-17 Thread John . C . Harrison
From: John Harrison 

A failure to load the GuC is occasionally observed where the GuC log
actually showed that the GuC had loaded just fine. The implication
being that the load just took ever so slightly longer than the 200ms
timeout. Given that the actual time should be tens of milliseconds at
the slowest, this should never happen. So far the issue has generally
been caused by a bad IFWI resulting in low frequencies during boot
(depsite the KMD requesting max frequency). However, the issue seems
to happen more often than one would like.

So a) increase the timeout so that the user still gets a working
system even in the case of slow load. And b) report the frequency
during the load to see if that is the case of the slow down.

Signed-off-by: John Harrison 
---
 drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 37 +--
 1 file changed, 34 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
index 2f5942606913d..72e003f50617d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
@@ -12,6 +12,7 @@
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_mcr.h"
 #include "gt/intel_gt_regs.h"
+#include "gt/intel_rps.h"
 #include "intel_guc_fw.h"
 #include "intel_guc_print.h"
 #include "i915_drv.h"
@@ -139,9 +140,12 @@ static int guc_wait_ucode(struct intel_guc *guc)
 {
struct intel_gt *gt = guc_to_gt(guc);
struct intel_uncore *uncore = gt->uncore;
+   ktime_t before, after, delta;
bool success;
u32 status;
-   int ret;
+   int ret, count;
+   u64 delta_ms;
+   u32 before_freq;
 
/*
 * Wait for the GuC to start up.
@@ -159,13 +163,32 @@ static int guc_wait_ucode(struct intel_guc *guc)
 * issues to be resolved. In the meantime bump the timeout to
 * 200ms. Even at slowest clock, this should be sufficient. And
 * in the working case, a larger timeout makes no difference.
+*
+* IFWI updates have also been seen to cause sporadic failures due to
+* the requested frequency not being granted and thus the firmware
+* load is attempted at minimum frequency. That can lead to load times
+* in the seconds range. However, there is a limit on how long an
+* individual wait_for() can wait. So wrap it in a loop.
 */
-   ret = wait_for(guc_load_done(uncore, &status, &success), 200);
+   before_freq = intel_rps_read_actual_frequency(&uncore->gt->rps);
+   before = ktime_get();
+   for (count = 0; count < 20; count++) {
+   ret = wait_for(guc_load_done(uncore, &status, &success), 1000);
+   if (!ret || !success)
+   break;
+
+   guc_dbg(guc, "load still in progress, count = %d, freq = 
%dMHz\n",
+   count, 
intel_rps_read_actual_frequency(&uncore->gt->rps));
+   }
+   after = ktime_get();
+   delta = ktime_sub(after, before);
+   delta_ms = ktime_to_ms(delta);
if (ret || !success) {
u32 ukernel = REG_FIELD_GET(GS_UKERNEL_MASK, status);
u32 bootrom = REG_FIELD_GET(GS_BOOTROM_MASK, status);
 
-   guc_info(guc, "load failed: status = 0x%08X, ret = %d\n", 
status, ret);
+   guc_info(guc, "load failed: status = 0x%08X, time = %lldms, 
freq = %dMHz, ret = %d\n",
+status, delta_ms, 
intel_rps_read_actual_frequency(&uncore->gt->rps), ret);
guc_info(guc, "load failed: status: Reset = %d, BootROM = 
0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n",
 REG_FIELD_GET(GS_MIA_IN_RESET, status),
 bootrom, ukernel,
@@ -206,6 +229,14 @@ static int guc_wait_ucode(struct intel_guc *guc)
/* Uncommon/unexpected error, see earlier status code print for 
details */
if (ret == 0)
ret = -ENXIO;
+   } else if (delta_ms > 200) {
+   guc_warn(guc, "excessive init time: %lldms! [freq = %dMHz, 
before = %dMHz, status = 0x%08X, count = %d, ret = %d]\n",
+delta_ms, 
intel_rps_read_actual_frequency(&uncore->gt->rps),
+before_freq, status, count, ret);
+   } else {
+   guc_dbg(guc, "init took %lldms, freq = %dMHz, before = %dMHz, 
status = 0x%08X, count = %d, ret = %d\n",
+   delta_ms, 
intel_rps_read_actual_frequency(&uncore->gt->rps),
+   before_freq, status, count, ret);
}
 
return ret;
-- 
2.39.1



[PATCH 0/2] Improvements to GuC load failure handling

2023-02-17 Thread John . C . Harrison
From: John Harrison 

Add more decoding of the GuC load failures. Also include information
about GT frequency to see if timeouts are due to a failure to boost
the clocks. Finally, increase the timeout to accommodate situations
where the clock boost does fail.

Signed-off-by: John Harrison 


John Harrison (2):
  drm/i915/guc: Improve GuC load error reporting
  drm/i915/guc: Allow for very slow GuC loading

 .../gpu/drm/i915/gt/uc/abi/guc_errors_abi.h   |  17 +++
 drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 128 +++---
 drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h|   4 +-
 3 files changed, 127 insertions(+), 22 deletions(-)

-- 
2.39.1



Re: [Intel-gfx] [RFC v2 0/5] Waitboost drm syncobj waits

2023-02-17 Thread Rob Clark
On Fri, Feb 17, 2023 at 12:45 PM Rodrigo Vivi  wrote:
>
> On Fri, Feb 17, 2023 at 09:00:49AM -0800, Rob Clark wrote:
> > On Fri, Feb 17, 2023 at 8:03 AM Tvrtko Ursulin
> >  wrote:
> > >
> > >
> > > On 17/02/2023 14:55, Rob Clark wrote:
> > > > On Fri, Feb 17, 2023 at 4:56 AM Tvrtko Ursulin
> > > >  wrote:
> > > >>
> > > >>
> > > >> On 16/02/2023 18:19, Rodrigo Vivi wrote:
> > > >>> On Tue, Feb 14, 2023 at 11:14:00AM -0800, Rob Clark wrote:
> > >  On Fri, Feb 10, 2023 at 5:07 AM Tvrtko Ursulin
> > >   wrote:
> > > >
> > > > From: Tvrtko Ursulin 
> > > >
> > > > In i915 we have this concept of "wait boosting" where we give a 
> > > > priority boost
> > > > for instance to fences which are actively waited upon from 
> > > > userspace. This has
> > > > it's pros and cons and can certainly be discussed at lenght. 
> > > > However fact is
> > > > some workloads really like it.
> > > >
> > > > Problem is that with the arrival of drm syncobj and a new userspace 
> > > > waiting
> > > > entry point it added, the waitboost mechanism was bypassed. Hence I 
> > > > cooked up
> > > > this mini series really (really) quickly to see if some discussion 
> > > > can be had.
> > > >
> > > > It adds a concept of "wait count" to dma fence, which is 
> > > > incremented for every
> > > > explicit dma_fence_enable_sw_signaling and 
> > > > dma_fence_add_wait_callback (like
> > > > dma_fence_add_callback but from explicit/userspace wait paths).
> > > 
> > >  I was thinking about a similar thing, but in the context of dma_fence
> > >  (or rather sync_file) fd poll()ing.  How does the kernel 
> > >  differentiate
> > >  between "housekeeping" poll()ers that don't want to trigger boost but
> > >  simply know when to do cleanup, and waiters who are waiting with some
> > >  urgency.  I think we could use EPOLLPRI for this purpose.
> > > 
> > >  Not sure how that translates to waits via the syncobj.  But I think 
> > >  we
> > >  want to let userspace give some hint about urgent vs housekeeping
> > >  waits.
> > > >>>
> > > >>> Should the hint be on the waits, or should the hints be on the 
> > > >>> executed
> > > >>> context?
> > > >>>
> > > >>> In the end we need some way to quickly ramp-up the frequency to avoid
> > > >>> the execution bubbles.
> > > >>>
> > > >>> waitboost is trying to guess that, but in some cases it guess wrong
> > > >>> and waste power.
> > > >>
> > > >> Do we have a list of workloads which shows who benefits and who loses
> > > >> from the current implementation of waitboost?
> > > >>> btw, this is something that other drivers might need:
> > > >>>
> > > >>> https://gitlab.freedesktop.org/drm/amd/-/issues/1500#note_825883
> > > >>> Cc: Alex Deucher 
> > > >>
> > > >> I have several issues with the context hint if it would directly
> > > >> influence frequency selection in the "more power" direction.
> > > >>
> > > >> First of all, assume a context hint would replace the waitboost. Which
> > > >> applications would need to set it to restore the lost performance and
> > > >> how would they set it?
> > > >>
> > > >> Then I don't even think userspace necessarily knows. Think of a layer
> > > >> like OpenCL. It doesn't really know in advance the profile of
> > > >> submissions vs waits. It depends on the CPU vs GPU speed, so hardware
> > > >> generation, and the actual size of the workload which can be influenced
> > > >> by the application (or user) and not the library.
> > > >>
> > > >> The approach also lends itself well for the "arms race" where every
> > > >> application can say "Me me me, I am the most important workload there 
> > > >> is!".
> > > >
> > > > since there is discussion happening in two places:
> > > >
> > > > https://gitlab.freedesktop.org/drm/intel/-/issues/8014#note_1777433
> > > >
> > > > What I think you might want is a ctx boost_mask which lets an app or
> > > > driver disable certain boost signals/classes.  Where fence waits is
> > > > one class of boost, but hypothetical other signals like touchscreen
> > > > (or other) input events could be another class of boost.  A compute
> > > > workload might be interested in fence wait boosts but could care less
> > > > about input events.
> > >
> > > I think it can only be apps which could have any chance knowing whether
> > > their use of a library is latency sensitive or not. Which means new
> > > library extensions and their adoption. So I have some strong reservation
> > > that route is feasible.
> > >
> > > Or we tie with priority which many drivers do. Normal and above gets the
> > > boosting and what lowered itself does not (aka SCHED_IDLE/SCHED_BATCH).
> >
> > yeah, that sounds reasonable.
> >
>
> on that gitlab-issue discussion Emma Anholt was against using the priority
> to influence frequency since that should be more about latency.
>
> or we are talking about something different priority 

[pull] amdgpu drm-next-6.3

2023-02-17 Thread Alex Deucher
Hi Dave, Daniel,

Fixes for 6.3.  The big change here is the splitting of dc_link.c into
multiple smaller files.

The following changes since commit 69ed0c5d44d72051b13e65384e9d9354c45d5e14:

  Revert "drm/amd/display: disable S/G display on DCN 3.1.4" (2023-02-03 
15:42:42 -0500)

are available in the Git repository at:

  https://gitlab.freedesktop.org/agd5f/linux.git 
tags/amd-drm-next-6.3-2023-02-17

for you to fetch changes up to 80c6d6804f31451848a3956a70c2bcb1f07cfcb0:

  drm/amd/display: disable SubVP + DRR to prevent underflow (2023-02-15 
22:26:22 -0500)


amd-drm-next-6.3-2023-02-17:

amdgpu:
- GC 11 fixes
- Display fixes
- Backlight cleanup
- SMU13 fixes
- SMU7 regression fix
- GFX9 sw queues fix
- AGP fix for GMC 11
- W1 warning fixes
- S/G display fixes
- Misc spelling fixes
- Driver unload fix
- DCN 3.1.4 fixes
- Display code reorg fixes
- Rotation fixes


Alex Deucher (7):
  drm/amd/pm/smu7: move variables to where they are used
  drm/amdgpu/gmc11: fix system aperture set when AGP is enabled
  drm/amd/display: minor cleanup of vm_setup
  drm/amdgpu: add S/G display parameter
  Revert "drm/amd/display: disable S/G display on DCN 3.1.2/3"
  Revert "drm/amd/display: disable S/G display on DCN 2.1.0"
  Revert "drm/amd/display: disable S/G display on DCN 3.1.5"

Alvin Lee (2):
  drm/amd/display: Set max vratio for prefetch to 7.9 for YUV420 MPO
  drm/amd/display: Fix prefetch vratio check

Anthony Koo (1):
  drm/amd/display: [FW Promotion] Release 0.0.153.0

Aric Cyr (2):
  drm/amd/display: 3.2.222
  drm/amd/display: Promote DAL to 3.2.223

Arnd Bergmann (2):
  drm/amdgpu: fix enum odm_combine_mode mismatch
  drm/amd/display: fix link_validation build failure

Arthur Grillo (6):
  drm/amd/display: Turn global functions into static
  drm/amd/display: Add function prototypes to headers
  drm/amd/amdgpu: Add function prototypes to headers
  drm/amd/display: Add previously missing includes
  drm/amd/display: Fix excess arguments on kernel-doc
  drm/amd/display: Make variables declaration inside ifdef guard

Aurabindo Pillai (3):
  drm/amd/display: Fix null pointer deref error on rotation
  drm/amd/display: fix k1 k2 divider programming for phantom streams
  drm/amd/display: disable SubVP + DRR to prevent underflow

Ayush Gupta (1):
  drm/amd/display: temporary fix for page faulting

Bhawanpreet Lakha (1):
  drm/amd/display: Add support for multiple overlay planes

Charlene Liu (1):
  drm/amd/display: add NULL pointer check

Colin Ian King (1):
  drm/amd/display: Fix spelling mistakes of function name in error message

Daniel Miess (1):
  Revert "drm/amd/display: Correct bw_params population"

Deepak R Varma (2):
  drm/amd/display: Remove duplicate/repeating expression
  drm/amd/display: Remove duplicate/repeating expressions

Evan Quan (3):
  drm/amd/pm: add SMU 13.0.7 missing GetPptLimit message mapping
  drm/amd/pm: bump SMU 13.0.0 driver_if header version
  drm/amd/pm: bump SMU 13.0.7 driver_if header version

Friedrich Vock (1):
  drm/amdgpu: Use the TGID for trace_amdgpu_vm_update_ptes

Guilherme G. Piccoli (1):
  drm/amdgpu/fence: Fix oops due to non-matching drm_sched init/fini

Hamza Mahfooz (2):
  drm/amd/display: fix read errors pertaining to dp_lttpr_status_show()
  drm/amd/display: don't call dc_interrupt_set() for disabled crtcs

Hans de Goede (1):
  drm/amd/display: Drop CONFIG_BACKLIGHT_CLASS_DEVICE ifdefs

Jack Xiao (1):
  drm/amd/amdgpu: fix warning during suspend

Jane Jian (1):
  drm/amdgpu/smu: skip pptable init under sriov

JesseZhang (1):
  amd/amdgpu: remove test ib on hw ring

Jiapeng Chong (2):
  drm/amd/display: Remove the unused variable ds_port
  drm/amd/display: Remove the unused variable pre_connection_type

Jingwen Zhu (1):
  drm/amd/display: avoid disable otg when dig was disabled

Jonathan Gray (2):
  drm/amd/display: avoid unaligned access warnings
  drm/amd/pm: avoid unaligned access warnings

Kenneth Feng (2):
  drm/amd/amdgpu: enable athub cg 11.0.3
  drm/amd/amdgpu: implement mode2 reset on smu_v13_0_10

Kent Russell (2):
  drm/amdgpu: Fix incorrect filenames in sysfs comments
  drm/amdgpu: Add unique_id support for GC 11.0.1/2

Leo (Hanghong) Ma (2):
  drm/amd/display: Add HDMI manufacturer OUI and device id read
  drm/amd/display: Fix FreeSync active bit issue

Leo Li (1):
  drm/amd/display: Fail atomic_check early on normalize_zpos error

Lijo Lazar (1):
  drm/amd/pm: Allocate dummy table only if needed

Ma Jun (1):
  drm/amdgpu: Fix the warning info when unload or remove amdgpu

Melissa Wen (7):
  drm/amd/display: fix cursor offset on rotation 180
  drm/amd/display: ident braces in dcn30_acquire_post_bldn_3dl

[PATCH v2 2/2] drm/i915/guc: Fix missing return code checks in submission init

2023-02-17 Thread John . C . Harrison
From: John Harrison 

The CI results for the 'fast request' patch set (enables error return
codes for fire-and-forget H2G messages) hit an issue with the KMD
sending context submission requests on an invalid context. That was
caused by a fault injection probe failing the context creation of a
kernel context. However, there was no return code checking on any of
the kernel context registration paths. So the driver kept going and
tried to use the kernel context for the record defaults process.

This would not cause any actual problems. The invalid requests would
be rejected by GuC and ultimately the start up sequence would
correctly wedge due to the context creation failure. But fixing the
issue correctly rather ignoring it means we won't get CI complaining
when the fast request patch lands and enables the extra error checking.

So fix it by checking for errors and aborting as appropriate when
creating kernel contexts. While at it, clean up some other submission
init related failure cleanup paths. Also, rename guc_init_lrc_mapping
to guc_init_submission as the former name hasn't been valid in a long
time.

v2: Add another wrapper to keep the flow balanced (Daniele)

Signed-off-by: John Harrison 
---
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 97 ++-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.h |  2 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc.c |  7 +-
 3 files changed, 80 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index a04d7049a2c2f..88e881b100cf0 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1441,7 +1441,7 @@ static int guc_action_enable_usage_stats(struct intel_guc 
*guc)
return intel_guc_send(guc, action, ARRAY_SIZE(action));
 }
 
-static void guc_init_engine_stats(struct intel_guc *guc)
+static int guc_init_engine_stats(struct intel_guc *guc)
 {
struct intel_gt *gt = guc_to_gt(guc);
intel_wakeref_t wakeref;
@@ -1454,6 +1454,13 @@ static void guc_init_engine_stats(struct intel_guc *guc)
guc_err(guc, "Failed to enable usage stats: %pe\n", 
ERR_PTR(ret));
else
guc_enable_busyness_worker(guc);
+
+   return ret;
+}
+
+static void guc_fini_engine_stats(struct intel_guc *guc)
+{
+   guc_cancel_busyness_worker(guc);
 }
 
 void intel_guc_busyness_park(struct intel_gt *gt)
@@ -4109,9 +4116,11 @@ static void guc_set_default_submission(struct 
intel_engine_cs *engine)
engine->submit_request = guc_submit_request;
 }
 
-static inline void guc_kernel_context_pin(struct intel_guc *guc,
- struct intel_context *ce)
+static inline int guc_kernel_context_pin(struct intel_guc *guc,
+struct intel_context *ce)
 {
+   int ret;
+
/*
 * Note: we purposefully do not check the returns below because
 * the registration can only fail if a reset is just starting.
@@ -4119,16 +4128,24 @@ static inline void guc_kernel_context_pin(struct 
intel_guc *guc,
 * isn't happening and even it did this code would be run again.
 */
 
-   if (context_guc_id_invalid(ce))
-   pin_guc_id(guc, ce);
+   if (context_guc_id_invalid(ce)) {
+   ret = pin_guc_id(guc, ce);
+
+   if (ret < 0)
+   return ret;
+   }
 
if (!test_bit(CONTEXT_GUC_INIT, &ce->flags))
guc_context_init(ce);
 
-   try_context_registration(ce, true);
+   ret = try_context_registration(ce, true);
+   if (ret)
+   unpin_guc_id(guc, ce);
+
+   return ret;
 }
 
-static inline void guc_init_lrc_mapping(struct intel_guc *guc)
+static inline int guc_init_submission(struct intel_guc *guc)
 {
struct intel_gt *gt = guc_to_gt(guc);
struct intel_engine_cs *engine;
@@ -4155,9 +4172,17 @@ static inline void guc_init_lrc_mapping(struct intel_guc 
*guc)
struct intel_context *ce;
 
list_for_each_entry(ce, &engine->pinned_contexts_list,
-   pinned_contexts_link)
-   guc_kernel_context_pin(guc, ce);
+   pinned_contexts_link) {
+   int ret = guc_kernel_context_pin(guc, ce);
+
+   if (ret) {
+   /* No point in trying to clean up as i915 will 
wedge on failure */
+   return ret;
+   }
+   }
}
+
+   return 0;
 }
 
 static void guc_release(struct intel_engine_cs *engine)
@@ -4400,31 +4425,57 @@ static int guc_init_global_schedule_policy(struct 
intel_guc *guc)
return ret;
 }
 
-void intel_guc_submission_enable(struct intel_guc *guc)
+static void guc_route_semaphores(struct intel_guc *guc, bool to_guc)
 {
s

[PATCH v2 0/2] Clean up some GuC related failure paths

2023-02-17 Thread John . C . Harrison
From: John Harrison 

Improve failure code handling during GuC intialisation.

v2: Fix function naming and improve on/off balancing (review feedback
from Daniele)

Signed-off-by: John Harrison 


John Harrison (2):
  drm/i915/guc: Improve clean up of busyness stats worker
  drm/i915/guc: Fix missing return code checks in submission init

 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 133 +-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.h |   2 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc.c |   7 +-
 3 files changed, 102 insertions(+), 40 deletions(-)

-- 
2.39.1



[PATCH v2 1/2] drm/i915/guc: Improve clean up of busyness stats worker

2023-02-17 Thread John . C . Harrison
From: John Harrison 

The stats worker thread management was mis-matched between
enable/disable call sites. Fix those up. Also, abstract the
cancel/enable code into a helper function rather than replicating in
multiple places.

v2: Rename the helpers and wrap the enable as well as the cancel
(review feedback from Daniele).

Signed-off-by: John Harrison 
---
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 38 +++
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index be495e657d66b..a04d7049a2c2f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1352,6 +1352,16 @@ static ktime_t guc_engine_busyness(struct 
intel_engine_cs *engine, ktime_t *now)
return ns_to_ktime(total);
 }
 
+static void guc_enable_busyness_worker(struct intel_guc *guc)
+{
+   mod_delayed_work(system_highpri_wq, &guc->timestamp.work, 
guc->timestamp.ping_delay);
+}
+
+static void guc_cancel_busyness_worker(struct intel_guc *guc)
+{
+   cancel_delayed_work_sync(&guc->timestamp.work);
+}
+
 static void __reset_guc_busyness_stats(struct intel_guc *guc)
 {
struct intel_gt *gt = guc_to_gt(guc);
@@ -1360,7 +1370,7 @@ static void __reset_guc_busyness_stats(struct intel_guc 
*guc)
unsigned long flags;
ktime_t unused;
 
-   cancel_delayed_work_sync(&guc->timestamp.work);
+   guc_cancel_busyness_worker(guc);
 
spin_lock_irqsave(&guc->timestamp.lock, flags);
 
@@ -1416,8 +1426,7 @@ static void guc_timestamp_ping(struct work_struct *wrk)
 
intel_gt_reset_unlock(gt, srcu);
 
-   mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
-guc->timestamp.ping_delay);
+   guc_enable_busyness_worker(guc);
 }
 
 static int guc_action_enable_usage_stats(struct intel_guc *guc)
@@ -1436,16 +1445,15 @@ static void guc_init_engine_stats(struct intel_guc *guc)
 {
struct intel_gt *gt = guc_to_gt(guc);
intel_wakeref_t wakeref;
+   int ret;
 
-   mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
-guc->timestamp.ping_delay);
-
-   with_intel_runtime_pm(>->i915->runtime_pm, wakeref) {
-   int ret = guc_action_enable_usage_stats(guc);
+   with_intel_runtime_pm(>->i915->runtime_pm, wakeref)
+   ret = guc_action_enable_usage_stats(guc);
 
-   if (ret)
-   guc_err(guc, "Failed to enable usage stats: %pe\n", 
ERR_PTR(ret));
-   }
+   if (ret)
+   guc_err(guc, "Failed to enable usage stats: %pe\n", 
ERR_PTR(ret));
+   else
+   guc_enable_busyness_worker(guc);
 }
 
 void intel_guc_busyness_park(struct intel_gt *gt)
@@ -1460,7 +1468,7 @@ void intel_guc_busyness_park(struct intel_gt *gt)
 * and causes an unclaimed register access warning. Cancel the worker
 * synchronously here.
 */
-   cancel_delayed_work_sync(&guc->timestamp.work);
+   guc_cancel_busyness_worker(guc);
 
/*
 * Before parking, we should sample engine busyness stats if we need to.
@@ -1487,8 +1495,7 @@ void intel_guc_busyness_unpark(struct intel_gt *gt)
spin_lock_irqsave(&guc->timestamp.lock, flags);
guc_update_pm_timestamp(guc, &unused);
spin_unlock_irqrestore(&guc->timestamp.lock, flags);
-   mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
-guc->timestamp.ping_delay);
+   guc_enable_busyness_worker(guc);
 }
 
 static inline bool
@@ -4408,11 +4415,12 @@ void intel_guc_submission_enable(struct intel_guc *guc)
guc_init_global_schedule_policy(guc);
 }
 
+/* Note: By the time we're here, GuC may have already been reset */
 void intel_guc_submission_disable(struct intel_guc *guc)
 {
struct intel_gt *gt = guc_to_gt(guc);
 
-   /* Note: By the time we're here, GuC may have already been reset */
+   guc_cancel_busyness_worker(guc);
 
/* Disable and route to host */
if (GRAPHICS_VER(gt->i915) >= 12)
-- 
2.39.1



Re: [PATCH v4 4/4] drm/msm/dpu: manage DPU resources if CTM is requested

2023-02-17 Thread Dmitry Baryshkov

On 13/02/2023 13:11, Kalyan Thota wrote:

Allow modeset to be triggered during CTM enable/disable.
In the modeset callbacks, DPU resources required for the
CTM feature are managed appropriately.

Signed-off-by: Kalyan Thota 


Reviewed-by: Dmitry Baryshkov 


---
  drivers/gpu/drm/msm/msm_atomic.c | 18 ++
  drivers/gpu/drm/msm/msm_drv.c|  2 +-
  drivers/gpu/drm/msm/msm_drv.h|  1 +
  3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/msm_atomic.c b/drivers/gpu/drm/msm/msm_atomic.c
index 1686fbb..e3e607c 100644
--- a/drivers/gpu/drm/msm/msm_atomic.c
+++ b/drivers/gpu/drm/msm/msm_atomic.c
@@ -179,6 +179,24 @@ static unsigned get_crtc_mask(struct drm_atomic_state 
*state)
return mask;
  }
  
+int msm_atomic_check(struct drm_device *dev, struct drm_atomic_state *state)

+{
+   struct drm_crtc_state *old_crtc_state, *new_crtc_state;
+   struct drm_crtc *crtc;
+   int i;
+


I hope this can be gone for good if at some point we have CRTC resource 
allocation split from encoder resource alloc.



+   for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state,
+ new_crtc_state, i) {
+   if ((old_crtc_state->ctm && !new_crtc_state->ctm) ||
+   (!old_crtc_state->ctm && new_crtc_state->ctm)) {
+   new_crtc_state->mode_changed = true;
+   state->allow_modeset = true;
+   }
+   }
+
+   return drm_atomic_helper_check(dev, state);
+}
+
  void msm_atomic_commit_tail(struct drm_atomic_state *state)
  {
struct drm_device *dev = state->dev;
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 0759e2d..3221284 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -52,7 +52,7 @@
  static const struct drm_mode_config_funcs mode_config_funcs = {
.fb_create = msm_framebuffer_create,
.output_poll_changed = drm_fb_helper_output_poll_changed,
-   .atomic_check = drm_atomic_helper_check,
+   .atomic_check = msm_atomic_check,
.atomic_commit = drm_atomic_helper_commit,
  };
  
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h

index ea80846..7d0243a 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -209,6 +209,7 @@ int msm_atomic_init_pending_timer(struct msm_pending_timer 
*timer,
struct msm_kms *kms, int crtc_idx);
  void msm_atomic_destroy_pending_timer(struct msm_pending_timer *timer);
  void msm_atomic_commit_tail(struct drm_atomic_state *state);
+int msm_atomic_check(struct drm_device *dev, struct drm_atomic_state *state);
  struct drm_atomic_state *msm_atomic_state_alloc(struct drm_device *dev);
  void msm_atomic_state_clear(struct drm_atomic_state *state);
  void msm_atomic_state_free(struct drm_atomic_state *state);


--
With best wishes
Dmitry



Re: [PATCH v13 13/13] drm/msm/disp/dpu: update dpu_enc crtc state on crtc enable/disable during self refresh

2023-02-17 Thread Dmitry Baryshkov

On 12/02/2023 18:28, Vinod Polimera wrote:

Populate the enocder software structure to reflect the updated
crtc appropriately during crtc enable/disable for a new commit
while taking care of the self refresh transitions when crtc
disable is triggered from the drm self refresh library.

Signed-off-by: Vinod Polimera 
---
  drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c | 29 +
  1 file changed, 25 insertions(+), 4 deletions(-)


Reviewed-by: Dmitry Baryshkov 

--
With best wishes
Dmitry



Re: [PATCH v2 03/14] drm/msm/a6xx: Introduce GMU wrapper support

2023-02-17 Thread Konrad Dybcio



On 17.02.2023 22:44, Dmitry Baryshkov wrote:
> On 17/02/2023 23:41, Konrad Dybcio wrote:
>>
>>
>> On 17.02.2023 22:37, Dmitry Baryshkov wrote:
>>> On 14/02/2023 19:31, Konrad Dybcio wrote:
 Some (particularly SMD_RPM, a.k.a non-RPMh) SoCs implement A6XX GPUs
 but don't implement the associated GMUs. This is due to the fact that
 the GMU directly pokes at RPMh. Sadly, this means we have to take care
 of enabling & scaling power rails, clocks and bandwidth ourselves.

 Reuse existing Adreno-common code and modify the deeply-GMU-infused
 A6XX code to facilitate these GPUs. This involves if-ing out lots
 of GMU callbacks and introducing a new type of GMU - GMU wrapper.
 This is essentially a register region which is convenient to model
 as a device. We'll use it for managing the GDSCs.
>>>
>>> Why do you call it a wrapper?
>> That's what Qualcomm calls it.. The GMU-less GPUs have (almost) all the
>> same GMU GX/CX registers as the real GMUs in this 'wrapper' region, so
>> that lets us reuse some code with gmu_(read/write/rmw) calls.
>>
> 
> Ack. If you can add this to the commit message, it would be great.
Sure! I spent so much time on this that I can't really tell what's obvious
and what's not anymore, heh.

Konrad
> 


Re: [PATCH v2 03/14] drm/msm/a6xx: Introduce GMU wrapper support

2023-02-17 Thread Dmitry Baryshkov

On 17/02/2023 23:41, Konrad Dybcio wrote:



On 17.02.2023 22:37, Dmitry Baryshkov wrote:

On 14/02/2023 19:31, Konrad Dybcio wrote:

Some (particularly SMD_RPM, a.k.a non-RPMh) SoCs implement A6XX GPUs
but don't implement the associated GMUs. This is due to the fact that
the GMU directly pokes at RPMh. Sadly, this means we have to take care
of enabling & scaling power rails, clocks and bandwidth ourselves.

Reuse existing Adreno-common code and modify the deeply-GMU-infused
A6XX code to facilitate these GPUs. This involves if-ing out lots
of GMU callbacks and introducing a new type of GMU - GMU wrapper.
This is essentially a register region which is convenient to model
as a device. We'll use it for managing the GDSCs.


Why do you call it a wrapper?

That's what Qualcomm calls it.. The GMU-less GPUs have (almost) all the
same GMU GX/CX registers as the real GMUs in this 'wrapper' region, so
that lets us reuse some code with gmu_(read/write/rmw) calls.



Ack. If you can add this to the commit message, it would be great.

--
With best wishes
Dmitry



Re: [PATCH v2 09/14] drm/msm/a6xx: Fix some A619 tunables

2023-02-17 Thread Dmitry Baryshkov

On 14/02/2023 19:31, Konrad Dybcio wrote:

Adreno 619 expects some tunables to be set differently. Make up for it.

Fixes: b7616b5c69e6 ("drm/msm/adreno: Add A619 support")
Signed-off-by: Konrad Dybcio 
---
  drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 6 +-
  1 file changed, 5 insertions(+), 1 deletion(-)


Reviewed-by: Dmitry Baryshkov 

--
With best wishes
Dmitry



Re: [PATCH v2 08/14] drm/msm/a6xx: Add A610 support

2023-02-17 Thread Dmitry Baryshkov

On 14/02/2023 19:31, Konrad Dybcio wrote:

A610 is one of (if not the) lowest-tier SKUs in the A6XX family. It
features no GMU, as it's implemented solely on SoCs with SMD_RPM.
What's more interesting is that it does not feature a VDDGX line
either, being powered solely by VDDCX and has an unfortunate hardware
quirk that makes its reset line broken - after a couple of assert/
deassert cycles, it will hang for good and will not wake up again.

This GPU requires mesa changes for proper rendering, and lots of them
at that. The command streams are quite far away from any other A6XX
GPU and hence it needs special care. This patch was validated both
by running an (incomplete) downstream mesa with some hacks (frames
rendered correctly, though some instructions made the GPU hangcheck
which is expected - garbage in, garbage out) and by replaying RD
traces captured with the downstream KGSL driver - no crashes there,
ever.

Add support for this GPU on the kernel side, which comes down to
pretty simply adding A612 HWCG tables, altering a few values and
adding a special case for handling the reset line.

Signed-off-by: Konrad Dybcio 


Reviewed-by: Dmitry Baryshkov 

Minor nit below.


---
  drivers/gpu/drm/msm/adreno/a6xx_gpu.c  | 95 --
  drivers/gpu/drm/msm/adreno/adreno_device.c | 13 +++
  drivers/gpu/drm/msm/adreno/adreno_gpu.h|  8 +-
  3 files changed, 106 insertions(+), 10 deletions(-)



[skipped]


@@ -1087,18 +1144,26 @@ static int hw_init(struct msm_gpu *gpu)
gpu_write(gpu, REG_A6XX_UCHE_FILTER_CNTL, 0x804);
gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, 0x4);
  
-	if (adreno_is_a640_family(adreno_gpu) ||

-   adreno_is_a650_family(adreno_gpu))
+   if (adreno_is_a640_family(adreno_gpu) || 
adreno_is_a650_family(adreno_gpu)) {


Keep this on two lines please.


gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x02000140);
-   else
+   gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c);
+   } else if (adreno_is_a610(adreno_gpu)) {
+   gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x00800060);
+   gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x40201b16);
+   } else {
gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x01c0);
-   gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c);
+   gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c);
+   }
  


[skipped the rest]

--
With best wishes
Dmitry



Re: [PATCH v2 03/14] drm/msm/a6xx: Introduce GMU wrapper support

2023-02-17 Thread Konrad Dybcio



On 17.02.2023 22:37, Dmitry Baryshkov wrote:
> On 14/02/2023 19:31, Konrad Dybcio wrote:
>> Some (particularly SMD_RPM, a.k.a non-RPMh) SoCs implement A6XX GPUs
>> but don't implement the associated GMUs. This is due to the fact that
>> the GMU directly pokes at RPMh. Sadly, this means we have to take care
>> of enabling & scaling power rails, clocks and bandwidth ourselves.
>>
>> Reuse existing Adreno-common code and modify the deeply-GMU-infused
>> A6XX code to facilitate these GPUs. This involves if-ing out lots
>> of GMU callbacks and introducing a new type of GMU - GMU wrapper.
>> This is essentially a register region which is convenient to model
>> as a device. We'll use it for managing the GDSCs.
> 
> Why do you call it a wrapper?
That's what Qualcomm calls it.. The GMU-less GPUs have (almost) all the
same GMU GX/CX registers as the real GMUs in this 'wrapper' region, so
that lets us reuse some code with gmu_(read/write/rmw) calls.

> 
>>
>> Signed-off-by: Konrad Dybcio 
>> ---
>>   drivers/gpu/drm/msm/adreno/a6xx_gmu.c   |  51 -
>>   drivers/gpu/drm/msm/adreno/a6xx_gpu.c   | 198 +---
>>   drivers/gpu/drm/msm/adreno/a6xx_gpu.h   |   1 +
>>   drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c |  14 +-
>>   drivers/gpu/drm/msm/adreno/adreno_gpu.h |   6 +
>>   5 files changed, 233 insertions(+), 37 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c 
>> b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
>> index 90e636dcdd5b..5aa9f3ef41c2 100644
>> --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
>> @@ -1474,6 +1474,7 @@ static int a6xx_gmu_get_irq(struct a6xx_gmu *gmu, 
>> struct platform_device *pdev,
>>     void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu)
>>   {
>> +    struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
>>   struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
>>   struct platform_device *pdev = to_platform_device(gmu->dev);
>>   @@ -1493,10 +1494,12 @@ void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu)
>>   gmu->mmio = NULL;
>>   gmu->rscc = NULL;
>>   -    a6xx_gmu_memory_free(gmu);
>> +    if (!adreno_has_gmu_wrapper(adreno_gpu)) {
>> +    a6xx_gmu_memory_free(gmu);
>>   -    free_irq(gmu->gmu_irq, gmu);
>> -    free_irq(gmu->hfi_irq, gmu);
>> +    free_irq(gmu->gmu_irq, gmu);
>> +    free_irq(gmu->hfi_irq, gmu);
>> +    }
>>     /* Drop reference taken in of_find_device_by_node */
>>   put_device(gmu->dev);
>> @@ -1504,6 +1507,48 @@ void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu)
>>   gmu->initialized = false;
>>   }
>>   +int a6xx_gmu_wrapper_init(struct a6xx_gpu *a6xx_gpu, struct device_node 
>> *node)
>> +{
>> +    struct platform_device *pdev = of_find_device_by_node(node);
>> +    struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
>> +    int ret;
>> +
>> +    if (!pdev)
>> +    return -ENODEV;
>> +
>> +    gmu->dev = &pdev->dev;
>> +
>> +    of_dma_configure(gmu->dev, node, true);
>> +
>> +    pm_runtime_enable(gmu->dev);
>> +
>> +    /* Mark legacy for manual SPTPRAC control */
>> +    gmu->legacy = true;
>> +
>> +    /* Map the GMU registers */
>> +    gmu->mmio = a6xx_gmu_get_mmio(pdev, "gmu");
>> +    if (IS_ERR(gmu->mmio)) {
>> +    ret = PTR_ERR(gmu->mmio);
>> +    goto err_mmio;
>> +    }
>> +
>> +    /* Get a link to the GX power domain to reset the GPU */
>> +    gmu->gxpd = dev_pm_domain_attach_by_name(gmu->dev, "gx");
>> +
>> +    gmu->initialized = true;
>> +
>> +    return 0;
>> +
>> +err_mmio:
>> +    iounmap(gmu->mmio);
>> +    ret = -ENODEV;
>> +
>> +    /* Drop reference taken in of_find_device_by_node */
>> +    put_device(gmu->dev);
>> +
>> +    return ret;
>> +}
>> +
>>   int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node)
>>   {
>>   struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
>> b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> index 8855d798bbb3..72bf5c9f7ff1 100644
>> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> @@ -20,9 +20,11 @@ static inline bool _a6xx_check_idle(struct msm_gpu *gpu)
>>   struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
>>   struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
>>   -    /* Check that the GMU is idle */
>> -    if (!a6xx_gmu_isidle(&a6xx_gpu->gmu))
>> -    return false;
>> +    if (!adreno_has_gmu_wrapper(adreno_gpu)) {
>> +    /* Check that the GMU is idle */
>> +    if (!a6xx_gmu_isidle(&a6xx_gpu->gmu))
>> +    return false;
>> +    }
>>     /* Check tha the CX master is idle */
>>   if (gpu_read(gpu, REG_A6XX_RBBM_STATUS) &
>> @@ -612,13 +614,15 @@ static void a6xx_set_hwcg(struct msm_gpu *gpu, bool 
>> state)
>>   return;
>>     /* Disable SP clock before programming HWCG registers */
>> -    gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0);
>> +    if (!adreno_has_gmu_wrapper(adreno_gpu))
>> +    gmu_rmw(gmu, REG_A6XX_GP

Re: [PATCH v2 03/14] drm/msm/a6xx: Introduce GMU wrapper support

2023-02-17 Thread Dmitry Baryshkov

On 14/02/2023 19:31, Konrad Dybcio wrote:

Some (particularly SMD_RPM, a.k.a non-RPMh) SoCs implement A6XX GPUs
but don't implement the associated GMUs. This is due to the fact that
the GMU directly pokes at RPMh. Sadly, this means we have to take care
of enabling & scaling power rails, clocks and bandwidth ourselves.

Reuse existing Adreno-common code and modify the deeply-GMU-infused
A6XX code to facilitate these GPUs. This involves if-ing out lots
of GMU callbacks and introducing a new type of GMU - GMU wrapper.
This is essentially a register region which is convenient to model
as a device. We'll use it for managing the GDSCs.


Why do you call it a wrapper?



Signed-off-by: Konrad Dybcio 
---
  drivers/gpu/drm/msm/adreno/a6xx_gmu.c   |  51 -
  drivers/gpu/drm/msm/adreno/a6xx_gpu.c   | 198 +---
  drivers/gpu/drm/msm/adreno/a6xx_gpu.h   |   1 +
  drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c |  14 +-
  drivers/gpu/drm/msm/adreno/adreno_gpu.h |   6 +
  5 files changed, 233 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 90e636dcdd5b..5aa9f3ef41c2 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -1474,6 +1474,7 @@ static int a6xx_gmu_get_irq(struct a6xx_gmu *gmu, struct 
platform_device *pdev,
  
  void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu)

  {
+   struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
struct platform_device *pdev = to_platform_device(gmu->dev);
  
@@ -1493,10 +1494,12 @@ void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu)

gmu->mmio = NULL;
gmu->rscc = NULL;
  
-	a6xx_gmu_memory_free(gmu);

+   if (!adreno_has_gmu_wrapper(adreno_gpu)) {
+   a6xx_gmu_memory_free(gmu);
  
-	free_irq(gmu->gmu_irq, gmu);

-   free_irq(gmu->hfi_irq, gmu);
+   free_irq(gmu->gmu_irq, gmu);
+   free_irq(gmu->hfi_irq, gmu);
+   }
  
  	/* Drop reference taken in of_find_device_by_node */

put_device(gmu->dev);
@@ -1504,6 +1507,48 @@ void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu)
gmu->initialized = false;
  }
  
+int a6xx_gmu_wrapper_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node)

+{
+   struct platform_device *pdev = of_find_device_by_node(node);
+   struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
+   int ret;
+
+   if (!pdev)
+   return -ENODEV;
+
+   gmu->dev = &pdev->dev;
+
+   of_dma_configure(gmu->dev, node, true);
+
+   pm_runtime_enable(gmu->dev);
+
+   /* Mark legacy for manual SPTPRAC control */
+   gmu->legacy = true;
+
+   /* Map the GMU registers */
+   gmu->mmio = a6xx_gmu_get_mmio(pdev, "gmu");
+   if (IS_ERR(gmu->mmio)) {
+   ret = PTR_ERR(gmu->mmio);
+   goto err_mmio;
+   }
+
+   /* Get a link to the GX power domain to reset the GPU */
+   gmu->gxpd = dev_pm_domain_attach_by_name(gmu->dev, "gx");
+
+   gmu->initialized = true;
+
+   return 0;
+
+err_mmio:
+   iounmap(gmu->mmio);
+   ret = -ENODEV;
+
+   /* Drop reference taken in of_find_device_by_node */
+   put_device(gmu->dev);
+
+   return ret;
+}
+
  int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node)
  {
struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 8855d798bbb3..72bf5c9f7ff1 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -20,9 +20,11 @@ static inline bool _a6xx_check_idle(struct msm_gpu *gpu)
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
  
-	/* Check that the GMU is idle */

-   if (!a6xx_gmu_isidle(&a6xx_gpu->gmu))
-   return false;
+   if (!adreno_has_gmu_wrapper(adreno_gpu)) {
+   /* Check that the GMU is idle */
+   if (!a6xx_gmu_isidle(&a6xx_gpu->gmu))
+   return false;
+   }
  
  	/* Check tha the CX master is idle */

if (gpu_read(gpu, REG_A6XX_RBBM_STATUS) &
@@ -612,13 +614,15 @@ static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state)
return;
  
  	/* Disable SP clock before programming HWCG registers */

-   gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0);
+   if (!adreno_has_gmu_wrapper(adreno_gpu))
+   gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0);
  
  	for (i = 0; (reg = &adreno_gpu->info->hwcg[i], reg->offset); i++)

gpu_write(gpu, reg->offset, state ? reg->value : 0);
  
  	/* Enable SP clock */

-   gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1);
+   if (!adreno_has_gmu_wrapper(adreno_gpu))
+   gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1);
  
  

Re: [PATCH v2 14/14] drm/msm/a6xx: Add A610 speedbin support

2023-02-17 Thread Dmitry Baryshkov

On 14/02/2023 19:31, Konrad Dybcio wrote:

A610 is implemented on at least three SoCs: SM6115 (bengal), SM6125
(trinket) and SM6225 (khaje). Trinket does not support speed binning
(only a single SKU exists) and we don't yet support khaje upstream.
Hence, add a fuse mapping table for bengal to allow for per-chip
frequency limiting.

Signed-off-by: Konrad Dybcio 
---
  drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 27 +++
  1 file changed, 27 insertions(+)


Reviewed-by: Dmitry Baryshkov 

--
With best wishes
Dmitry



Re: [PATCH v2 13/14] drm/msm/a6xx: Add A619_holi speedbin support

2023-02-17 Thread Dmitry Baryshkov

On 14/02/2023 19:31, Konrad Dybcio wrote:

A619_holi is implemented on at least two SoCs: SM4350 (holi) and SM6375
(blair). This is what seems to be a first occurrence of this happening,
but it's easy to overcome by guarding the SoC-specific fuse values with
of_machine_is_compatible(). Do just that to enable frequency limiting
on these SoCs.

Signed-off-by: Konrad Dybcio 
---
  drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 31 +++
  1 file changed, 31 insertions(+)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index ffe0fd431a76..94b4d93619ed 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -2094,6 +2094,34 @@ static u32 a618_get_speed_bin(u32 fuse)
return UINT_MAX;
  }
  
+static u32 a619_holi_get_speed_bin(u32 fuse)

+{
+   /*
+* There are (at least) two SoCs implementing A619_holi: SM4350 (holi)
+* and SM6375 (blair). Limit the fuse matching to the corresponding
+* SoC to prevent bogus frequency setting (as improbable as it may be,
+* given unexpected fuse values are.. unexpected! But still possible.)
+*/
+
+   if (fuse == 0)
+   return 0;
+
+   if (of_machine_is_compatible("qcom,sm4350")) {
+   if (fuse == 138)
+   return 1;
+   else if (fuse == 92)
+   return 2;
+   } else if (of_machine_is_compatible("qcom,sm6375")) {
+   if (fuse == 190)
+   return 1;
+   else if (fuse == 177)
+   return 2;


Ugh.


+   } else
+   pr_warn("Unknown SoC implementing A619_holi!\n");
+
+   return UINT_MAX;
+}
+
  static u32 a619_get_speed_bin(u32 fuse)
  {
if (fuse == 0)
@@ -2153,6 +2181,9 @@ static u32 fuse_to_supp_hw(struct device *dev, struct 
adreno_rev rev, u32 fuse)
if (adreno_cmp_rev(ADRENO_REV(6, 1, 8, ANY_ID), rev))
val = a618_get_speed_bin(fuse);
  
+	else if (adreno_cmp_rev(ADRENO_REV(6, 1, 9, 1), rev))


I really think it begs to have && !of_find_property(dev->of_node, 
"qcom,gmu") here.



+   val = a619_holi_get_speed_bin(fuse);
+
else if (adreno_cmp_rev(ADRENO_REV(6, 1, 9, ANY_ID), rev))
val = a619_get_speed_bin(fuse);
  


--
With best wishes
Dmitry



Re: [PATCH v2 1/2] dt-bindings: display/msm: dsi-controller-main: Fix deprecated QCM2290 compatible

2023-02-17 Thread Bryan O'Donoghue

On 17/02/2023 21:23, Konrad Dybcio wrote:



On 17.02.2023 22:20, Bryan O'Donoghue wrote:

On 17/02/2023 21:16, Konrad Dybcio wrote:

Correct, but QCM2290 is not supported upstream yet.

SM6115 (a different SoC) however is, but it used the qcm2290 compatible
as it was a convenient hack to get the DSI host ID recognized based on
the (identical-to-qcm2290) base register without additional driver changes.
We're now trying to untangle that mess..


Gand so what we want documented is:

compatible = "qcom,qcs2290-dsi-ctrl", qcom,mdss-dsi-ctrl";

qcm* yes, this became documented with your original cleanup


compatible = "qcom,sm6115-dsi-ctrl", qcom,mdss-dsi-ctrl";

and yes this became documented (well, in the DSI binding) in
my other patch series and is finished being documented in this one



with the old compatible = "qcom,dsi-ctrl-6g-qcm2290"; clanger continuing to be 
deprecated.

correct, we still have to note it but keep it deprecated

Konrad


---
bod


Cool.

That maps to my understanding & the intention of the deprecation.

---
bod


Re: [PATCH v2 1/2] dt-bindings: display/msm: dsi-controller-main: Fix deprecated QCM2290 compatible

2023-02-17 Thread Konrad Dybcio



On 17.02.2023 22:20, Bryan O'Donoghue wrote:
> On 17/02/2023 21:16, Konrad Dybcio wrote:
>> Correct, but QCM2290 is not supported upstream yet.
>>
>> SM6115 (a different SoC) however is, but it used the qcm2290 compatible
>> as it was a convenient hack to get the DSI host ID recognized based on
>> the (identical-to-qcm2290) base register without additional driver changes.
>> We're now trying to untangle that mess..
> 
> Gand so what we want documented is:
> 
> compatible = "qcom,qcs2290-dsi-ctrl", qcom,mdss-dsi-ctrl";
qcm* yes, this became documented with your original cleanup

> compatible = "qcom,sm6115-dsi-ctrl", qcom,mdss-dsi-ctrl";
and yes this became documented (well, in the DSI binding) in
my other patch series and is finished being documented in this one

> 
> with the old compatible = "qcom,dsi-ctrl-6g-qcm2290"; clanger continuing to 
> be deprecated.
correct, we still have to note it but keep it deprecated

Konrad
> 
> ---
> bod


Re: [PATCH v2 12/14] drm/msm/a6xx: Use "else if" in GPU speedbin rev matching

2023-02-17 Thread Dmitry Baryshkov

On 14/02/2023 19:31, Konrad Dybcio wrote:

The GPU can only be one at a time. Turn a series of ifs into if +
elseifs to save some CPU cycles.

Signed-off-by: Konrad Dybcio 


Reviewed-by: Dmitry Baryshkov 


---
  drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 8 
  1 file changed, 4 insertions(+), 4 deletions(-)


--
With best wishes
Dmitry



Re: [PATCH v2 07/14] drm/msm/a6xx: Add support for A619_holi

2023-02-17 Thread Konrad Dybcio



On 17.02.2023 22:19, Dmitry Baryshkov wrote:
> On 14/02/2023 19:31, Konrad Dybcio wrote:
>> A619_holi is a GMU-less variant of the already-supported A619 GPU.
>> It's present on at least SM4350 (holi) and SM6375 (blair). No mesa
>> changes are required. Add the required kernel-side support for it.
>>
>> Signed-off-by: Konrad Dybcio 
>> ---
>>   drivers/gpu/drm/msm/adreno/a6xx_gpu.c  | 37 +-
>>   drivers/gpu/drm/msm/adreno/adreno_device.c | 13 
>>   drivers/gpu/drm/msm/adreno/adreno_gpu.h    |  5 +++
>>   3 files changed, 47 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
>> b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> index 75cf94b03c29..c168712a0dc4 100644
>> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> @@ -614,14 +614,14 @@ static void a6xx_set_hwcg(struct msm_gpu *gpu, bool 
>> state)
>>   return;
>>     /* Disable SP clock before programming HWCG registers */
>> -    if (!adreno_has_gmu_wrapper(adreno_gpu))
>> +    if ((!adreno_has_gmu_wrapper(adreno_gpu) || 
>> adreno_is_a619_holi(adreno_gpu)))
> 
> Extra parenthesis made me interpret this incorrectly. Maybe you can remove 
> them and spit the condition onto two lines? Because my first interpretation 
> was:
> if (!(has_gmu_wrapper || a619_holi)).
Yeah, I agree this is confusing.. will fix.

> 
> 
>>   gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0);
>>     for (i = 0; (reg = &adreno_gpu->info->hwcg[i], reg->offset); i++)
>>   gpu_write(gpu, reg->offset, state ? reg->value : 0);
>>     /* Enable SP clock */
>> -    if (!adreno_has_gmu_wrapper(adreno_gpu))
>> +    if ((!adreno_has_gmu_wrapper(adreno_gpu) || 
>> adreno_is_a619_holi(adreno_gpu)))
>>   gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1);
>>     gpu_write(gpu, REG_A6XX_RBBM_CLOCK_CNTL, state ? clock_cntl_on : 0);
>> @@ -1007,7 +1007,12 @@ static int hw_init(struct msm_gpu *gpu)
>>   }
>>     /* Clear GBIF halt in case GX domain was not collapsed */
>> -    if (a6xx_has_gbif(adreno_gpu)) {
>> +    if (adreno_is_a619_holi(adreno_gpu)) {
>> +    gpu_write(gpu, REG_A6XX_GBIF_HALT, 0);
>> +    gpu_write(gpu, 0x18, 0);
>> +    /* Let's make extra sure that the GPU can access the memory.. */
>> +    mb();
>> +    } else if (a6xx_has_gbif(adreno_gpu)) {
>>   gpu_write(gpu, REG_A6XX_GBIF_HALT, 0);
>>   gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 0);
>>   /* Let's make extra sure that the GPU can access the memory.. */
>> @@ -1016,6 +1021,9 @@ static int hw_init(struct msm_gpu *gpu)
>>     gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0);
>>   +    if (adreno_is_a619_holi(adreno_gpu))
>> +    a6xx_sptprac_enable(gmu);
>> +
>>   /*
>>    * Disable the trusted memory range - we don't actually supported 
>> secure
>>    * memory rendering at this point in time and we don't want to block 
>> off
>> @@ -1293,7 +1301,8 @@ static void a6xx_dump(struct msm_gpu *gpu)
>>   #define GBIF_CLIENT_HALT_MASK    BIT(0)
>>   #define GBIF_ARB_HALT_MASK    BIT(1)
>>   #define VBIF_RESET_ACK_TIMEOUT    100
>> -#define VBIF_RESET_ACK_MASK    0x00f0
>> +#define VBIF_RESET_ACK_MASK    0xF0
>> +#define GPR0_GBIF_HALT_REQUEST    0x1E0
>>     static void a6xx_recover(struct msm_gpu *gpu)
>>   {
>> @@ -1350,10 +1359,16 @@ static void a6xx_recover(struct msm_gpu *gpu)
>>     /* Software-reset the GPU */
>>   if (adreno_has_gmu_wrapper(adreno_gpu)) {
>> -    /* Halt the GX side of GBIF */
>> -    gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, GBIF_GX_HALT_MASK);
>> -    spin_until(gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT_ACK) &
>> -   GBIF_GX_HALT_MASK);
>> +    if (adreno_is_a619_holi(adreno_gpu)) {
>> +    gpu_write(gpu, 0x18, GPR0_GBIF_HALT_REQUEST);
>> +    spin_until((gpu_read(gpu, REG_A6XX_RBBM_VBIF_GX_RESET_STATUS) &
>> +   (VBIF_RESET_ACK_MASK)) == VBIF_RESET_ACK_MASK);
>> +    } else {
>> +    /* Halt the GX side of GBIF */
>> +    gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, GBIF_GX_HALT_MASK);
>> +    spin_until(gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT_ACK) &
>> +   GBIF_GX_HALT_MASK);
>> +    }
>>     /* Halt new client requests on GBIF */
>>   gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK);
>> @@ -1763,6 +1778,9 @@ static int a6xx_pm_resume(struct msm_gpu *gpu)
>>   if (ret)
>>   return ret;
>>   +    if (adreno_is_a619_holi(adreno_gpu))
>> +    a6xx_sptprac_enable(gmu);
>> +
>>   mutex_unlock(&a6xx_gpu->gmu.lock);
>>     msm_devfreq_resume(gpu);
>> @@ -1795,6 +1813,9 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu)
>>     mutex_lock(&a6xx_gpu->gmu.lock);
>>   +    if (adreno_is_a619_holi(adreno_gpu))
>> +    a6xx_sptprac_disable(gmu);
>> +
>>   ret = clk_prepare_

Re: [PATCH v2 1/2] dt-bindings: display/msm: dsi-controller-main: Fix deprecated QCM2290 compatible

2023-02-17 Thread Bryan O'Donoghue

On 17/02/2023 21:16, Konrad Dybcio wrote:

Correct, but QCM2290 is not supported upstream yet.

SM6115 (a different SoC) however is, but it used the qcm2290 compatible
as it was a convenient hack to get the DSI host ID recognized based on
the (identical-to-qcm2290) base register without additional driver changes.
We're now trying to untangle that mess..


Gand so what we want documented is:

compatible = "qcom,qcs2290-dsi-ctrl", qcom,mdss-dsi-ctrl";
compatible = "qcom,sm6115-dsi-ctrl", qcom,mdss-dsi-ctrl";

with the old compatible = "qcom,dsi-ctrl-6g-qcm2290"; clanger continuing 
to be deprecated.


---
bod


Re: [PATCH v2 11/14] drm/msm/a6xx: Enable optional icc voting from OPP tables

2023-02-17 Thread Dmitry Baryshkov

On 14/02/2023 19:31, Konrad Dybcio wrote:

On GMU-equipped GPUs, the GMU requests appropriate bandwidth votes
for us. This is however not the case for the other GPUs. Add the
dev_pm_opp_of_find_icc_paths() call to let the OPP framework handle
bus voting as part of power level setting.

Signed-off-by: Konrad Dybcio 


Reviewed-by: Dmitry Baryshkov 


---
  drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 4 
  1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index d6b38bfdb3b4..b08ed127f8c4 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -2338,5 +2338,9 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu,
a6xx_fault_handler);
  
+	ret = dev_pm_opp_of_find_icc_paths(&pdev->dev, NULL);

+   if (ret)
+   return ERR_PTR(ret);
+
return gpu;
  }


--
With best wishes
Dmitry



Re: [PATCH v2 07/14] drm/msm/a6xx: Add support for A619_holi

2023-02-17 Thread Dmitry Baryshkov

On 14/02/2023 19:31, Konrad Dybcio wrote:

A619_holi is a GMU-less variant of the already-supported A619 GPU.
It's present on at least SM4350 (holi) and SM6375 (blair). No mesa
changes are required. Add the required kernel-side support for it.

Signed-off-by: Konrad Dybcio 
---
  drivers/gpu/drm/msm/adreno/a6xx_gpu.c  | 37 +-
  drivers/gpu/drm/msm/adreno/adreno_device.c | 13 
  drivers/gpu/drm/msm/adreno/adreno_gpu.h|  5 +++
  3 files changed, 47 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 75cf94b03c29..c168712a0dc4 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -614,14 +614,14 @@ static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state)
return;
  
  	/* Disable SP clock before programming HWCG registers */

-   if (!adreno_has_gmu_wrapper(adreno_gpu))
+   if ((!adreno_has_gmu_wrapper(adreno_gpu) || 
adreno_is_a619_holi(adreno_gpu)))


Extra parenthesis made me interpret this incorrectly. Maybe you can 
remove them and spit the condition onto two lines? Because my first 
interpretation was:

if (!(has_gmu_wrapper || a619_holi)).



gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0);
  
  	for (i = 0; (reg = &adreno_gpu->info->hwcg[i], reg->offset); i++)

gpu_write(gpu, reg->offset, state ? reg->value : 0);
  
  	/* Enable SP clock */

-   if (!adreno_has_gmu_wrapper(adreno_gpu))
+   if ((!adreno_has_gmu_wrapper(adreno_gpu) || 
adreno_is_a619_holi(adreno_gpu)))
gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1);
  
  	gpu_write(gpu, REG_A6XX_RBBM_CLOCK_CNTL, state ? clock_cntl_on : 0);

@@ -1007,7 +1007,12 @@ static int hw_init(struct msm_gpu *gpu)
}
  
  	/* Clear GBIF halt in case GX domain was not collapsed */

-   if (a6xx_has_gbif(adreno_gpu)) {
+   if (adreno_is_a619_holi(adreno_gpu)) {
+   gpu_write(gpu, REG_A6XX_GBIF_HALT, 0);
+   gpu_write(gpu, 0x18, 0);
+   /* Let's make extra sure that the GPU can access the memory.. */
+   mb();
+   } else if (a6xx_has_gbif(adreno_gpu)) {
gpu_write(gpu, REG_A6XX_GBIF_HALT, 0);
gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 0);
/* Let's make extra sure that the GPU can access the memory.. */
@@ -1016,6 +1021,9 @@ static int hw_init(struct msm_gpu *gpu)
  
  	gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0);
  
+	if (adreno_is_a619_holi(adreno_gpu))

+   a6xx_sptprac_enable(gmu);
+
/*
 * Disable the trusted memory range - we don't actually supported secure
 * memory rendering at this point in time and we don't want to block off
@@ -1293,7 +1301,8 @@ static void a6xx_dump(struct msm_gpu *gpu)
  #define GBIF_CLIENT_HALT_MASK BIT(0)
  #define GBIF_ARB_HALT_MASKBIT(1)
  #define VBIF_RESET_ACK_TIMEOUT100
-#define VBIF_RESET_ACK_MASK0x00f0
+#define VBIF_RESET_ACK_MASK0xF0
+#define GPR0_GBIF_HALT_REQUEST 0x1E0
  
  static void a6xx_recover(struct msm_gpu *gpu)

  {
@@ -1350,10 +1359,16 @@ static void a6xx_recover(struct msm_gpu *gpu)
  
  	/* Software-reset the GPU */

if (adreno_has_gmu_wrapper(adreno_gpu)) {
-   /* Halt the GX side of GBIF */
-   gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, GBIF_GX_HALT_MASK);
-   spin_until(gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT_ACK) &
-  GBIF_GX_HALT_MASK);
+   if (adreno_is_a619_holi(adreno_gpu)) {
+   gpu_write(gpu, 0x18, GPR0_GBIF_HALT_REQUEST);
+   spin_until((gpu_read(gpu, 
REG_A6XX_RBBM_VBIF_GX_RESET_STATUS) &
+  (VBIF_RESET_ACK_MASK)) == 
VBIF_RESET_ACK_MASK);
+   } else {
+   /* Halt the GX side of GBIF */
+   gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 
GBIF_GX_HALT_MASK);
+   spin_until(gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT_ACK) &
+  GBIF_GX_HALT_MASK);
+   }
  
  		/* Halt new client requests on GBIF */

gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK);
@@ -1763,6 +1778,9 @@ static int a6xx_pm_resume(struct msm_gpu *gpu)
if (ret)
return ret;
  
+		if (adreno_is_a619_holi(adreno_gpu))

+   a6xx_sptprac_enable(gmu);
+
mutex_unlock(&a6xx_gpu->gmu.lock);
  
  		msm_devfreq_resume(gpu);

@@ -1795,6 +1813,9 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu)
  
  		mutex_lock(&a6xx_gpu->gmu.lock);
  
+		if (adreno_is_a619_holi(adreno_gpu))

+   a6xx_sptprac_disable(gmu);
+
ret = clk_prepare_enable(gpu->ebi1_clk);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/msm/adre

Re: [PATCH v2 1/2] dt-bindings: display/msm: dsi-controller-main: Fix deprecated QCM2290 compatible

2023-02-17 Thread Konrad Dybcio



On 17.02.2023 22:13, Bryan O'Donoghue wrote:
> On 17/02/2023 12:24, Krzysztof Kozlowski wrote:
>> First, it would be nice to know what was the intention of Bryan's commit?
> 
> Sorry I've been grazing this thread but, not responding.
> 
> - qcom,dsi-ctrl-6g-qcm2290
> 
> is non-compliant with qcom,socid-dsi-ctrl which is our desired naming 
> convention, so that's what the deprecation is about i.e. moving this compat 
> to "qcom,qcm2290-dsi-ctrl"
> 
> Actually I have the question why we are deciding to go with "sm6115" instead 
> of "qcm2290" ?
> 
> The stamp on the package you receive from Thundercomm says "qcm2290" not 
> "sm6115"
Correct, but QCM2290 is not supported upstream yet.

SM6115 (a different SoC) however is, but it used the qcm2290 compatible
as it was a convenient hack to get the DSI host ID recognized based on
the (identical-to-qcm2290) base register without additional driver changes.
We're now trying to untangle that mess..

Konrad
> 
> ?
> 
> ---
> bod
> 
> 


Re: [PATCH v2 1/2] dt-bindings: display/msm: dsi-controller-main: Fix deprecated QCM2290 compatible

2023-02-17 Thread Bryan O'Donoghue

On 17/02/2023 12:24, Krzysztof Kozlowski wrote:

First, it would be nice to know what was the intention of Bryan's commit?


Sorry I've been grazing this thread but, not responding.

- qcom,dsi-ctrl-6g-qcm2290

is non-compliant with qcom,socid-dsi-ctrl which is our desired naming 
convention, so that's what the deprecation is about i.e. moving this 
compat to "qcom,qcm2290-dsi-ctrl"


Actually I have the question why we are deciding to go with "sm6115" 
instead of "qcm2290" ?


The stamp on the package you receive from Thundercomm says "qcm2290" not 
"sm6115"


?

---
bod




Re: [PATCH v2 06/14] drm/msm/gpu: Use dev_pm_opp_set_rate for non-GMU GPUs

2023-02-17 Thread Dmitry Baryshkov

On 14/02/2023 19:31, Konrad Dybcio wrote:

Currently we only utilize the OPP table connected to the GPU for
getting (available) frequencies. We do however need to scale the
voltage rail(s) accordingly to ensure that we aren't trying to
run the GPU at 1GHz with a VDD_LOW vote, as that would result in
an otherwise inexplainable hang.

Tell the OPP framework that we want to scale the "core" clock
and swap out the clk_set_rate to a dev_pm_opp_set_rate in
msm_devfreq_target() to enable usage of required-opps and by
extension proper voltage level/corner scaling.

Signed-off-by: Konrad Dybcio 
---
  drivers/gpu/drm/msm/adreno/adreno_gpu.c | 4 
  drivers/gpu/drm/msm/msm_gpu_devfreq.c   | 2 +-
  2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index ce6b76c45b6f..15e405e4f977 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -1047,6 +1047,10 @@ int adreno_gpu_init(struct drm_device *drm, struct 
platform_device *pdev,
const char *gpu_name;
u32 speedbin;
  
+	/* This can only be done here, or devm_pm_opp_set_supported_hw will WARN_ON() */

+   if (!IS_ERR(devm_clk_get(dev, "core")))
+   devm_pm_opp_set_clkname(dev, "core");


Can we instead move a call to a6xx_set_supported_hw() / check_speed_bin 
after the adreno_gpu_init() ? It will call msm_gpu_init, which in turn 
sets gpu->core_clk.


Ideally you can call devm_pm_opp_set_clkname() from that function. Or 
maybe completely drop gpu->core_clk and always use 
devm_pm_opp_set_clk_rate().



+
adreno_gpu->funcs = funcs;
adreno_gpu->info = adreno_info(config->rev);
adreno_gpu->gmem = adreno_gpu->info->gmem;
diff --git a/drivers/gpu/drm/msm/msm_gpu_devfreq.c 
b/drivers/gpu/drm/msm/msm_gpu_devfreq.c
index e27dbf12b5e8..ea70c1c32d94 100644
--- a/drivers/gpu/drm/msm/msm_gpu_devfreq.c
+++ b/drivers/gpu/drm/msm/msm_gpu_devfreq.c
@@ -48,7 +48,7 @@ static int msm_devfreq_target(struct device *dev, unsigned 
long *freq,
gpu->funcs->gpu_set_freq(gpu, opp, df->suspended);
mutex_unlock(&df->lock);
} else {
-   clk_set_rate(gpu->core_clk, *freq);
+   dev_pm_opp_set_rate(dev, *freq);


This is not enough, there are calls to clk_set_rate(gpu->core_clk) in 
msm_gpu.c which are called from the suspend/resume path.



}
  
  	dev_pm_opp_put(opp);


--
With best wishes
Dmitry



Re: [PATCH v2 05/14] drm/msm/adreno: Disable has_cached_coherent for A610/A619_holi

2023-02-17 Thread Dmitry Baryshkov

On 14/02/2023 19:31, Konrad Dybcio wrote:

These SKUs don't support the feature. Disable it to make the GPU stop
crashing after almost each and every submission - the received data on
the GPU end was simply incomplete in garbled, resulting in almost nothing
being executed properly.

Signed-off-by: Konrad Dybcio 
---
  drivers/gpu/drm/msm/adreno/adreno_device.c | 8 +++-
  1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c 
b/drivers/gpu/drm/msm/adreno/adreno_device.c
index 36f062c7582f..82757f005a1a 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -540,7 +540,13 @@ static int adreno_bind(struct device *dev, struct device 
*master, void *data)
config.rev.minor, config.rev.patchid);
  
  	priv->is_a2xx = config.rev.core == 2;

-   priv->has_cached_coherent = config.rev.core >= 6;
+
+   if (config.rev.core >= 6) {
+   /* Exclude A610 and A619_holi */
+   if (!(adreno_cmp_rev(ADRENO_REV(6, 1, 0, ANY_ID), config.rev) ||
+ adreno_cmp_rev(ADRENO_REV(6, 1, 9, 1), config.rev)))
+   priv->has_cached_coherent = true;
+   }


I'd suggest something like:

if (config.rev.core >= 6 &&
!(info.quirks & ADRENO_QUIRK_NO_CACHE_COHERENT))
priv->has_cache_coherent = true;

Let's keep all the information and quirks in a single place.

  
  	gpu = info->init(drm);

if (IS_ERR(gpu)) {


--
With best wishes
Dmitry



Re: [RFC PATCH 00/20] Initial Xe driver submission

2023-02-17 Thread Daniel Vetter
Hi all,

[I thought I've sent this out earlier this week, but alas got stuck, kinda
bad timing now since I'm out next week but oh well]

So xe is a quite substantial thing, and I think we need a clear plan how to land
this or it will take forever, and managers will panic. Also I'm not a big fan of
"Dave/me reviews everything", we defacto had that for amd's dc/dal and it was
not fun. The idea here is how to get everything reviewed without having two
people end up somewhat arbitrary as deciders.

I've compiled a bunch of topics on what I think the important areas are, first
code that should be consistent about new-style render drivers that are aimed for
vk/compute userspace as the primary feature driver:

- figure out consensus solution for fw scheduler and drm/sched frontend among
  interested driver parties (probably xe, amdgpu, nouveau, new panfrost)

- for the interface itself it might be good to have the drm_gpu_scheduler as the
  single per-hw-engine driver api object (but internally a new structure), while
  renaming the current drm_gpu_scheduler to drm_gpu_sched_internal. That way I
  think we can address the main critique of the current xe scheduler plan
  - keep the drm_gpu_sched_internal : drm_sched_entity 1:1 relationship for fw
scheduler
  - keep the driver api relationship of drm_gpu_scheduler : drm_sched_entity
1:n, the api functions simply iterate over a mutex protect list of internal
schedulers. this should also help drivers with locking mistakes around
setup/teardown and gpu reset.
  - drivers select with a flag or something between the current mode (where the
drm_gpu_sched_internal is attached to the drm_gpu_scheduler api object) or
the new fw scheduler mode (where drm_gpu_sched_internal is attached to the
drm_sched_entity)
  - overall still no fundamental changes (like the current patches) to drm/sched
data structures and algorithms. But unlike the current patches we keep the
possibility open for eventual refactoring without having to again refactor
all the drivers. Even better, we can delay such refactoring until we have a
handful of real-word drivers test-driving this all so we know we actually do
the right thing. This should allow us to address all the
fairness/efficiency/whatever concerns that have been floating around without
having to fix them all up upfront, before we actually know what needs to be
fixed.

- the generic scheduler code should also including the handling of endless
  compute contexts, with the minimal scaffolding for preempt-ctx fences
  (probably on the drm_sched_entity) and making sure drm/sched can cope with the
  lack of job completion fence. This is very minimal amounts of code, but it
  helps a lot for cross-driver review if this works the same (with the same
  locking and all that) for everyone. Ideally this gets extracted from amdkfd,
  but as long as it's going to be used by all drivers supporting
  endless/compute context going forward it's good enough.

- I'm assuming this also means Matt Brost will include a patch to add himself as
  drm/sched reviewer in MAINTAINERS, or at least something like that

- adopt the gem_exec/vma helpers. again we probably want consensus here among
  the same driver projects. I don't care whether these helpers specify the ioctl
  structs or not, but they absolutely need to enforce the overall locking scheme
  for all major structs and list (so vm and vma).

- we also should have cross-driver consensus on async vm_bind support. I think
  everyone added in-syncobj support, the real fun is probably more in/out
  userspace memory fences (and personally I'm still not sure that's a good idea
  but ... *eh*). I think cross driver consensus on how this should work (ideally
  with helper support so people don't get it wrong in all the possible ways)
  would be best.

- this also means some userptr integration and some consensus how userptr should
  work for vm_bind across drivers. I don't think allowing drivers to reinvent
  that wheel is a bright idea, there's just a bit too much to get wrong here.

- for some of these the consensus might land on more/less shared code than what
  I sketched out above, the important part really is that we have consensus on
  these. Kinda similar to how the atomic kms infrastructure move a _lot_ more of
  the code back into drivers, because they really just needed the flexibility to
  program the hw correctly. Right now we definitely don't have enough shared
  code, for sure with i915-gem, but we also need to make sure we're not
  overcorrecting too badly (a bit of overcorrecting generally doesn't hurt).

All the above will make sure that the driver overall is in concepts and design
aligned with the overall community direction, but I think it'd still be good if
someone outside of the intel gpu group reviews the driver code itself. Last time
we had a huge driver submission (amd's DC/DAL) this fell on Dave&me, but this
time around I think we have

Re: [PATCH v2 02/14] drm/msm/a6xx: Extend UBWC config

2023-02-17 Thread Konrad Dybcio



On 17.02.2023 21:46, Dmitry Baryshkov wrote:
> On 14/02/2023 19:31, Konrad Dybcio wrote:
>> Port setting min_access_length, ubwc_mode and upper_bit from downstream.
>> Values were validated using downstream device trees for SM8[123]50 and
>> left default (as per downstream) elsewhere.
>>
>> Signed-off-by: Konrad Dybcio 
>> ---
>>   drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 29 +++
>>   1 file changed, 21 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
>> b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> index c5f5d0bb3fdc..8855d798bbb3 100644
>> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> @@ -786,17 +786,25 @@ static void a6xx_set_cp_protect(struct msm_gpu *gpu)
>>   static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
>>   {
>>   struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
>> -    u32 lower_bit = 2;
>> +    u32 lower_bit = 1;
> 
> Any reason to change the default value here?
> If it is to match chipsets you are adding, it might be worth splitting this 
> change to that patch.
Not really now that I think about it, especially since the
correct default value should be zero:

-- part of msm-4.19 --
bit = adreno_dev->highest_bank_bit ? adreno_dev->highest_bank_bit - 13 : 0;
lower_bit = bit & 0x3;
upper_bit = (bit >> 0x2) & 1;

where adreno_dev->highest_bank_bit is read from the dt property
"qcom,highest-bank-bit"

Anyway, I should be able to verify it for all the SoCs which
we support.

Konrad
> 
>> +    u32 upper_bit = 0;
>>   u32 amsbc = 0;
>>   u32 rgb565_predicator = 0;
>>   u32 uavflagprd_inv = 0;
>> +    u32 min_acc_len = 0;
>> +    u32 ubwc_mode = 0;
>>     /* a618 is using the hw default values */
>>   if (adreno_is_a618(adreno_gpu))
>>   return;
>>   -    if (adreno_is_a640_family(adreno_gpu))
>> +    if (adreno_is_a630(adreno_gpu))
>> +    lower_bit = 2;
>> +
>> +    if (adreno_is_a640_family(adreno_gpu)) {
>>   amsbc = 1;
>> +    lower_bit = 2;
>> +    }
>>     if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu)) {
>>   /* TODO: get ddr type from bootloader and use 2 for LPDDR4 */
>> @@ -807,18 +815,23 @@ static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
>>   }
>>     if (adreno_is_7c3(adreno_gpu)) {
>> -    lower_bit = 1;
>>   amsbc = 1;
>>   rgb565_predicator = 1;
>>   uavflagprd_inv = 2;
>>   }
>>     gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL,
>> -    rgb565_predicator << 11 | amsbc << 4 | lower_bit << 1);
>> -    gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, lower_bit << 1);
>> -    gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL,
>> -    uavflagprd_inv << 4 | lower_bit << 1);
>> -    gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, lower_bit << 21);
>> +  rgb565_predicator << 11 | upper_bit << 10 | amsbc << 4 |
>> +  min_acc_len << 3 | lower_bit << 1 | ubwc_mode);
>> +
>> +    gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, upper_bit << 4 |
>> +  min_acc_len << 3 | lower_bit << 1 | ubwc_mode);
>> +
>> +    gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, upper_bit << 10 |
>> +  uavflagprd_inv << 4 | min_acc_len << 3 |
>> +  lower_bit << 1 | ubwc_mode);
>> +
>> +    gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, min_acc_len << 23 | lower_bit 
>> << 21);
>>   }
>>     static int a6xx_cp_init(struct msm_gpu *gpu)
> 


Re: [PATCH v2 02/14] drm/msm/a6xx: Extend UBWC config

2023-02-17 Thread Dmitry Baryshkov

On 14/02/2023 19:31, Konrad Dybcio wrote:

Port setting min_access_length, ubwc_mode and upper_bit from downstream.
Values were validated using downstream device trees for SM8[123]50 and
left default (as per downstream) elsewhere.

Signed-off-by: Konrad Dybcio 
---
  drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 29 +++
  1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index c5f5d0bb3fdc..8855d798bbb3 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -786,17 +786,25 @@ static void a6xx_set_cp_protect(struct msm_gpu *gpu)
  static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
  {
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
-   u32 lower_bit = 2;
+   u32 lower_bit = 1;


Any reason to change the default value here?
If it is to match chipsets you are adding, it might be worth splitting 
this change to that patch.



+   u32 upper_bit = 0;
u32 amsbc = 0;
u32 rgb565_predicator = 0;
u32 uavflagprd_inv = 0;
+   u32 min_acc_len = 0;
+   u32 ubwc_mode = 0;
  
  	/* a618 is using the hw default values */

if (adreno_is_a618(adreno_gpu))
return;
  
-	if (adreno_is_a640_family(adreno_gpu))

+   if (adreno_is_a630(adreno_gpu))
+   lower_bit = 2;
+
+   if (adreno_is_a640_family(adreno_gpu)) {
amsbc = 1;
+   lower_bit = 2;
+   }
  
  	if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu)) {

/* TODO: get ddr type from bootloader and use 2 for LPDDR4 */
@@ -807,18 +815,23 @@ static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
}
  
  	if (adreno_is_7c3(adreno_gpu)) {

-   lower_bit = 1;
amsbc = 1;
rgb565_predicator = 1;
uavflagprd_inv = 2;
}
  
  	gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL,

-   rgb565_predicator << 11 | amsbc << 4 | lower_bit << 1);
-   gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, lower_bit << 1);
-   gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL,
-   uavflagprd_inv << 4 | lower_bit << 1);
-   gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, lower_bit << 21);
+ rgb565_predicator << 11 | upper_bit << 10 | amsbc << 4 |
+ min_acc_len << 3 | lower_bit << 1 | ubwc_mode);
+
+   gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, upper_bit << 4 |
+ min_acc_len << 3 | lower_bit << 1 | ubwc_mode);
+
+   gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, upper_bit << 10 |
+ uavflagprd_inv << 4 | min_acc_len << 3 |
+ lower_bit << 1 | ubwc_mode);
+
+   gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, min_acc_len << 23 | lower_bit 
<< 21);
  }
  
  static int a6xx_cp_init(struct msm_gpu *gpu)


--
With best wishes
Dmitry



Re: [Intel-gfx] [RFC v2 0/5] Waitboost drm syncobj waits

2023-02-17 Thread Rodrigo Vivi
On Fri, Feb 17, 2023 at 09:00:49AM -0800, Rob Clark wrote:
> On Fri, Feb 17, 2023 at 8:03 AM Tvrtko Ursulin
>  wrote:
> >
> >
> > On 17/02/2023 14:55, Rob Clark wrote:
> > > On Fri, Feb 17, 2023 at 4:56 AM Tvrtko Ursulin
> > >  wrote:
> > >>
> > >>
> > >> On 16/02/2023 18:19, Rodrigo Vivi wrote:
> > >>> On Tue, Feb 14, 2023 at 11:14:00AM -0800, Rob Clark wrote:
> >  On Fri, Feb 10, 2023 at 5:07 AM Tvrtko Ursulin
> >   wrote:
> > >
> > > From: Tvrtko Ursulin 
> > >
> > > In i915 we have this concept of "wait boosting" where we give a 
> > > priority boost
> > > for instance to fences which are actively waited upon from userspace. 
> > > This has
> > > it's pros and cons and can certainly be discussed at lenght. However 
> > > fact is
> > > some workloads really like it.
> > >
> > > Problem is that with the arrival of drm syncobj and a new userspace 
> > > waiting
> > > entry point it added, the waitboost mechanism was bypassed. Hence I 
> > > cooked up
> > > this mini series really (really) quickly to see if some discussion 
> > > can be had.
> > >
> > > It adds a concept of "wait count" to dma fence, which is incremented 
> > > for every
> > > explicit dma_fence_enable_sw_signaling and 
> > > dma_fence_add_wait_callback (like
> > > dma_fence_add_callback but from explicit/userspace wait paths).
> > 
> >  I was thinking about a similar thing, but in the context of dma_fence
> >  (or rather sync_file) fd poll()ing.  How does the kernel differentiate
> >  between "housekeeping" poll()ers that don't want to trigger boost but
> >  simply know when to do cleanup, and waiters who are waiting with some
> >  urgency.  I think we could use EPOLLPRI for this purpose.
> > 
> >  Not sure how that translates to waits via the syncobj.  But I think we
> >  want to let userspace give some hint about urgent vs housekeeping
> >  waits.
> > >>>
> > >>> Should the hint be on the waits, or should the hints be on the executed
> > >>> context?
> > >>>
> > >>> In the end we need some way to quickly ramp-up the frequency to avoid
> > >>> the execution bubbles.
> > >>>
> > >>> waitboost is trying to guess that, but in some cases it guess wrong
> > >>> and waste power.
> > >>
> > >> Do we have a list of workloads which shows who benefits and who loses
> > >> from the current implementation of waitboost?
> > >>> btw, this is something that other drivers might need:
> > >>>
> > >>> https://gitlab.freedesktop.org/drm/amd/-/issues/1500#note_825883
> > >>> Cc: Alex Deucher 
> > >>
> > >> I have several issues with the context hint if it would directly
> > >> influence frequency selection in the "more power" direction.
> > >>
> > >> First of all, assume a context hint would replace the waitboost. Which
> > >> applications would need to set it to restore the lost performance and
> > >> how would they set it?
> > >>
> > >> Then I don't even think userspace necessarily knows. Think of a layer
> > >> like OpenCL. It doesn't really know in advance the profile of
> > >> submissions vs waits. It depends on the CPU vs GPU speed, so hardware
> > >> generation, and the actual size of the workload which can be influenced
> > >> by the application (or user) and not the library.
> > >>
> > >> The approach also lends itself well for the "arms race" where every
> > >> application can say "Me me me, I am the most important workload there 
> > >> is!".
> > >
> > > since there is discussion happening in two places:
> > >
> > > https://gitlab.freedesktop.org/drm/intel/-/issues/8014#note_1777433
> > >
> > > What I think you might want is a ctx boost_mask which lets an app or
> > > driver disable certain boost signals/classes.  Where fence waits is
> > > one class of boost, but hypothetical other signals like touchscreen
> > > (or other) input events could be another class of boost.  A compute
> > > workload might be interested in fence wait boosts but could care less
> > > about input events.
> >
> > I think it can only be apps which could have any chance knowing whether
> > their use of a library is latency sensitive or not. Which means new
> > library extensions and their adoption. So I have some strong reservation
> > that route is feasible.
> >
> > Or we tie with priority which many drivers do. Normal and above gets the
> > boosting and what lowered itself does not (aka SCHED_IDLE/SCHED_BATCH).
> 
> yeah, that sounds reasonable.
> 

on that gitlab-issue discussion Emma Anholt was against using the priority
to influence frequency since that should be more about latency.

or we are talking about something different priority here?

> > Related note is that we lack any external control of our scheduling
> > decisions so we really do suck compared to other scheduling domains like
> > CPU and IO etc.
> >
> > >> The last concern is for me shared with the proposal to expose deadlines
> > >> or high priority waits as explicit 

Re: [PATCH v2 01/14] drm/msm/a6xx: De-staticize sptprac en/disable functions

2023-02-17 Thread Dmitry Baryshkov

On 14/02/2023 19:31, Konrad Dybcio wrote:

These two will be reused by at least A619_holi in the non-gmu
paths. De-staticize them to make it possible.


Nit: 'remove static annotation' or something like that.

Other than that:

Reviewed-by: Dmitry Baryshkov 



Signed-off-by: Konrad Dybcio 
---
  drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 4 ++--
  drivers/gpu/drm/msm/adreno/a6xx_gmu.h | 2 ++
  2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index f3c9600221d4..90e636dcdd5b 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -354,7 +354,7 @@ void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum 
a6xx_gmu_oob_state state)
  }
  
  /* Enable CPU control of SPTP power power collapse */

-static int a6xx_sptprac_enable(struct a6xx_gmu *gmu)
+int a6xx_sptprac_enable(struct a6xx_gmu *gmu)
  {
int ret;
u32 val;
@@ -376,7 +376,7 @@ static int a6xx_sptprac_enable(struct a6xx_gmu *gmu)
  }
  
  /* Disable CPU control of SPTP power power collapse */

-static void a6xx_sptprac_disable(struct a6xx_gmu *gmu)
+void a6xx_sptprac_disable(struct a6xx_gmu *gmu)
  {
u32 val;
int ret;
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h 
b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
index e034935b3986..ec28abdd327b 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
@@ -186,5 +186,7 @@ int a6xx_hfi_set_freq(struct a6xx_gmu *gmu, int index);
  
  bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu);

  bool a6xx_gmu_sptprac_is_on(struct a6xx_gmu *gmu);
+void a6xx_sptprac_disable(struct a6xx_gmu *gmu);
+int a6xx_sptprac_enable(struct a6xx_gmu *gmu);
  
  #endif


--
With best wishes
Dmitry



Re: [PATCH 2/2] drm/i915/guc: Fix missing return code checks in submission init

2023-02-17 Thread John Harrison

On 1/24/2023 17:01, Ceraolo Spurio, Daniele wrote:

On 1/11/2023 5:54 PM, john.c.harri...@intel.com wrote:

From: John Harrison 

The CI results for the 'fast request' patch set (enables error return
codes for fire-and-forget H2G messages) hit an issue with the KMD
sending context submission requests on an invalid context. That was
caused by a fault injection probe failing the context creation of a
kernel context. However, there was no return code checking on any of
the kernel context registration paths. So the driver kept going and
tried to use the kernel context for the record defaults process.

This would not cause any actual problems. The invalid requests would
be rejected by GuC and ultimately the start up sequence would
correctly wedge due to the context creation failure. But fixing the
issue correctly rather ignoring it means we won't get CI complaining
when the fast request patch lands and enables the extra error checking.

So fix it by checking for errors and aborting as appropriate when
creating kernel contexts. While at it, clean up some other submission
init related failure cleanup paths. Also, rename guc_init_lrc_mapping
to guc_init_submission as the former name hasn't been valid in a long
time.

Signed-off-by: John Harrison 
---
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 91 ++-
  .../gpu/drm/i915/gt/uc/intel_guc_submission.h |  2 +-
  drivers/gpu/drm/i915/gt/uc/intel_uc.c |  7 +-
  3 files changed, 75 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c

index 982364777d0c6..dd856fd92945b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1431,7 +1431,7 @@ static int guc_action_enable_usage_stats(struct 
intel_guc *guc)

  return intel_guc_send(guc, action, ARRAY_SIZE(action));
  }
  -static void guc_init_engine_stats(struct intel_guc *guc)
+static int guc_init_engine_stats(struct intel_guc *guc)
  {
  struct intel_gt *gt = guc_to_gt(guc);
  intel_wakeref_t wakeref;
@@ -1447,6 +1447,8 @@ static void guc_init_engine_stats(struct 
intel_guc *guc)

  cancel_delayed_work_sync(&guc->timestamp.work);
  drm_err(>->i915->drm, "Failed to enable usage stats: 
%d!\n", ret);

  }
+
+    return ret;
  }
    static void guc_park_engine_stats(struct intel_guc *guc)
@@ -4108,9 +4110,11 @@ static void guc_set_default_submission(struct 
intel_engine_cs *engine)

  engine->submit_request = guc_submit_request;
  }
  -static inline void guc_kernel_context_pin(struct intel_guc *guc,
-  struct intel_context *ce)
+static inline int guc_kernel_context_pin(struct intel_guc *guc,
+ struct intel_context *ce)
  {
+    int ret;
+
  /*
   * Note: we purposefully do not check the returns below because
   * the registration can only fail if a reset is just starting.
@@ -4118,16 +4122,24 @@ static inline void 
guc_kernel_context_pin(struct intel_guc *guc,

   * isn't happening and even it did this code would be run again.
   */
  -    if (context_guc_id_invalid(ce))
-    pin_guc_id(guc, ce);
+    if (context_guc_id_invalid(ce)) {
+    int ret = pin_guc_id(guc, ce);


Why do you need a local ret variable inside this if statement, when 
you already have a function-level one? or is it just a cut & paste error?

Yeah, copy/paste thing.




+
+    if (ret < 0)
+    return ret;
+    }
    if (!test_bit(CONTEXT_GUC_INIT, &ce->flags))
  guc_context_init(ce);
  -    try_context_registration(ce, true);
+    ret = try_context_registration(ce, true);
+    if (ret)
+    unpin_guc_id(guc, ce);
+
+    return ret;
  }
  -static inline void guc_init_lrc_mapping(struct intel_guc *guc)
+static inline int guc_init_submission(struct intel_guc *guc)
  {
  struct intel_gt *gt = guc_to_gt(guc);
  struct intel_engine_cs *engine;
@@ -4154,9 +4166,17 @@ static inline void guc_init_lrc_mapping(struct 
intel_guc *guc)

  struct intel_context *ce;
    list_for_each_entry(ce, &engine->pinned_contexts_list,
-    pinned_contexts_link)
-    guc_kernel_context_pin(guc, ce);
+    pinned_contexts_link) {
+    int ret = guc_kernel_context_pin(guc, ce);
+
+    if (ret) {
+    /* No point in trying to clean up as i915 will wedge 
on failure */

+    return ret;
+    }
+    }
  }
+
+    return 0;
  }
    static void guc_release(struct intel_engine_cs *engine)
@@ -4400,30 +4420,57 @@ static int 
guc_init_global_schedule_policy(struct intel_guc *guc)

  return ret;
  }
  -void intel_guc_submission_enable(struct intel_guc *guc)
+static void guc_route_semaphores(struct intel_guc *guc, bool to_guc)
  {
  struct intel_gt *gt = guc_to_gt(guc);
+    u32 val;
  -    /* Enable and route to GuC */
-    if (GRAPHICS_VER(gt->i91

Re: [PATCH] drm/amdkfd: Make kobj_type structures constant

2023-02-17 Thread Alex Deucher
Applied.  Thanks!

Alex

On Wed, Feb 15, 2023 at 8:09 PM Thomas Weißschuh  wrote:
>
> Since commit ee6d3dd4ed48 ("driver core: make kobj_type constant.")
> the driver core allows the usage of const struct kobj_type.
>
> Take advantage of this to constify the structure definitions to prevent
> modification at runtime.
>
> Signed-off-by: Thomas Weißschuh 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c  |  8 
>  drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 10 +-
>  2 files changed, 9 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 51b1683ac5c1..8d719f90db40 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -344,7 +344,7 @@ static const struct sysfs_ops kfd_procfs_ops = {
> .show = kfd_procfs_show,
>  };
>
> -static struct kobj_type procfs_type = {
> +static const struct kobj_type procfs_type = {
> .release = kfd_procfs_kobj_release,
> .sysfs_ops = &kfd_procfs_ops,
>  };
> @@ -469,7 +469,7 @@ static const struct sysfs_ops procfs_queue_ops = {
> .show = kfd_procfs_queue_show,
>  };
>
> -static struct kobj_type procfs_queue_type = {
> +static const struct kobj_type procfs_queue_type = {
> .sysfs_ops = &procfs_queue_ops,
> .default_groups = procfs_queue_groups,
>  };
> @@ -478,7 +478,7 @@ static const struct sysfs_ops procfs_stats_ops = {
> .show = kfd_procfs_stats_show,
>  };
>
> -static struct kobj_type procfs_stats_type = {
> +static const struct kobj_type procfs_stats_type = {
> .sysfs_ops = &procfs_stats_ops,
> .release = kfd_procfs_kobj_release,
>  };
> @@ -487,7 +487,7 @@ static const struct sysfs_ops sysfs_counters_ops = {
> .show = kfd_sysfs_counters_show,
>  };
>
> -static struct kobj_type sysfs_counters_type = {
> +static const struct kobj_type sysfs_counters_type = {
> .sysfs_ops = &sysfs_counters_ops,
> .release = kfd_procfs_kobj_release,
>  };
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> index 3fdaba56be6f..8e4124dcb6e4 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> @@ -278,7 +278,7 @@ static const struct sysfs_ops sysprops_ops = {
> .show = sysprops_show,
>  };
>
> -static struct kobj_type sysprops_type = {
> +static const struct kobj_type sysprops_type = {
> .release = kfd_topology_kobj_release,
> .sysfs_ops = &sysprops_ops,
>  };
> @@ -318,7 +318,7 @@ static const struct sysfs_ops iolink_ops = {
> .show = iolink_show,
>  };
>
> -static struct kobj_type iolink_type = {
> +static const struct kobj_type iolink_type = {
> .release = kfd_topology_kobj_release,
> .sysfs_ops = &iolink_ops,
>  };
> @@ -350,7 +350,7 @@ static const struct sysfs_ops mem_ops = {
> .show = mem_show,
>  };
>
> -static struct kobj_type mem_type = {
> +static const struct kobj_type mem_type = {
> .release = kfd_topology_kobj_release,
> .sysfs_ops = &mem_ops,
>  };
> @@ -395,7 +395,7 @@ static const struct sysfs_ops cache_ops = {
> .show = kfd_cache_show,
>  };
>
> -static struct kobj_type cache_type = {
> +static const struct kobj_type cache_type = {
> .release = kfd_topology_kobj_release,
> .sysfs_ops = &cache_ops,
>  };
> @@ -566,7 +566,7 @@ static const struct sysfs_ops node_ops = {
> .show = node_show,
>  };
>
> -static struct kobj_type node_type = {
> +static const struct kobj_type node_type = {
> .release = kfd_topology_kobj_release,
> .sysfs_ops = &node_ops,
>  };
>
> ---
> base-commit: 033c40a89f55525139fd5b6342281b09b97d05bf
> change-id: 20230216-kobj_type-amdkfd-abd9fe9ab060
>
> Best regards,
> --
> Thomas Weißschuh 
>


Re: [PATCH] drm/amdgpu: make kobj_type structures constant

2023-02-17 Thread Alex Deucher
Applied.  Thanks!

Alex

On Thu, Feb 16, 2023 at 1:59 AM Christian König
 wrote:
>
> Am 16.02.23 um 02:07 schrieb Thomas Weißschuh:
> > Since commit ee6d3dd4ed48 ("driver core: make kobj_type constant.")
> > the driver core allows the usage of const struct kobj_type.
> >
> > Take advantage of this to constify the structure definitions to prevent
> > modification at runtime.
> >
> > Signed-off-by: Thomas Weißschuh 
>
> Reviewed-by: Christian König 
>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 10 +-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c  |  2 +-
> >   2 files changed, 6 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> > index 1bbd56029a4f..8e04952e5144 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> > @@ -704,7 +704,7 @@ static void ip_hw_instance_release(struct kobject *kobj)
> >   kfree(ip_hw_instance);
> >   }
> >
> > -static struct kobj_type ip_hw_instance_ktype = {
> > +static const struct kobj_type ip_hw_instance_ktype = {
> >   .release = ip_hw_instance_release,
> >   .sysfs_ops = &ip_hw_instance_sysfs_ops,
> >   .default_groups = ip_hw_instance_groups,
> > @@ -723,7 +723,7 @@ static void ip_hw_id_release(struct kobject *kobj)
> >   kfree(ip_hw_id);
> >   }
> >
> > -static struct kobj_type ip_hw_id_ktype = {
> > +static const struct kobj_type ip_hw_id_ktype = {
> >   .release = ip_hw_id_release,
> >   .sysfs_ops = &kobj_sysfs_ops,
> >   };
> > @@ -786,18 +786,18 @@ static const struct sysfs_ops ip_die_entry_sysfs_ops 
> > = {
> >   .show = ip_die_entry_attr_show,
> >   };
> >
> > -static struct kobj_type ip_die_entry_ktype = {
> > +static const struct kobj_type ip_die_entry_ktype = {
> >   .release = ip_die_entry_release,
> >   .sysfs_ops = &ip_die_entry_sysfs_ops,
> >   .default_groups = ip_die_entry_groups,
> >   };
> >
> > -static struct kobj_type die_kobj_ktype = {
> > +static const struct kobj_type die_kobj_ktype = {
> >   .release = die_kobj_release,
> >   .sysfs_ops = &kobj_sysfs_ops,
> >   };
> >
> > -static struct kobj_type ip_discovery_ktype = {
> > +static const struct kobj_type ip_discovery_ktype = {
> >   .release = ip_disc_release,
> >   .sysfs_ops = &kobj_sysfs_ops,
> >   };
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> > index 4b9e7b050ccd..6d13ce6ec9cc 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> > @@ -228,7 +228,7 @@ static const struct sysfs_ops amdgpu_xgmi_hive_ops = {
> >   .show = amdgpu_xgmi_show_attrs,
> >   };
> >
> > -struct kobj_type amdgpu_xgmi_hive_type = {
> > +static const struct kobj_type amdgpu_xgmi_hive_type = {
> >   .release = amdgpu_xgmi_hive_release,
> >   .sysfs_ops = &amdgpu_xgmi_hive_ops,
> >   .default_groups = amdgpu_xgmi_hive_groups,
> >
> > ---
> > base-commit: 033c40a89f55525139fd5b6342281b09b97d05bf
> > change-id: 20230216-kobj_type-amdgpu-4d3f0e1e05d4
> >
> > Best regards,
>


Re: [Intel-gfx] [PATCH 1/2] drm/i915/guc: Improve clean up of busyness stats worker

2023-02-17 Thread John Harrison

On 1/24/2023 16:55, Ceraolo Spurio, Daniele wrote:

On 1/11/2023 5:54 PM, john.c.harri...@intel.com wrote:

From: John Harrison 

The stats worker thread management was mis-matched between
enable/disable call sites. Fix those up. Also, abstract the cancel
code into a helper function rather than replicating in multiple places.

Signed-off-by: John Harrison 
---
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 22 ---
  1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c

index b436dd7f12e42..982364777d0c6 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1435,19 +1435,25 @@ static void guc_init_engine_stats(struct 
intel_guc *guc)

  {
  struct intel_gt *gt = guc_to_gt(guc);
  intel_wakeref_t wakeref;
+    int ret;
    mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
   guc->timestamp.ping_delay);
  -    with_intel_runtime_pm(>->i915->runtime_pm, wakeref) {
-    int ret = guc_action_enable_usage_stats(guc);
+    with_intel_runtime_pm(>->i915->runtime_pm, wakeref)
+    ret = guc_action_enable_usage_stats(guc);
  -    if (ret)
-    drm_err(>->i915->drm,
-    "Failed to enable usage stats: %d!\n", ret);
+    if (ret) {
+    cancel_delayed_work_sync(&guc->timestamp.work);


Wouldn't it be easier to just call mod_delayed_work after the H2G if 
ret==0, instead of having it before and cancelling if we get a failure?


+    drm_err(>->i915->drm, "Failed to enable usage stats: 
%d!\n", ret);

  }
  }
  +static void guc_park_engine_stats(struct intel_guc *guc)
+{
+    cancel_delayed_work_sync(&guc->timestamp.work);
+}
+


Now you're asymmetric with the park/unpark, because on the park side 
you have this wrapper, while on the unpark side you directly call 
mod_delayed_work.
The point is that submission disable needs to also cancel the worker. 
But calling the actual busyness park function seems excessive - no need 
to do all the updating if we are about to reset the GuC or unload the 
driver.


Thinking about it more, calling this park_engine_stats is actually wrong 
given that engine stats and busyness are the same thing, so basically we 
would have two functions with the same name where one is a subset of the 
other. Is it simpler (and safe?) to just call the full busyness unpark 
from submission_disable? Or is it better to have a 
cancel/enable_busyness_worker() pair for all instances of turning the 
worker on or off?


John.




Daniele


  void intel_guc_busyness_park(struct intel_gt *gt)
  {
  struct intel_guc *guc = >->uc.guc;
@@ -1460,7 +1466,7 @@ void intel_guc_busyness_park(struct intel_gt *gt)
   * and causes an unclaimed register access warning. Cancel the 
worker

   * synchronously here.
   */
-    cancel_delayed_work_sync(&guc->timestamp.work);
+    guc_park_engine_stats(guc);
    /*
   * Before parking, we should sample engine busyness stats if we 
need to.
@@ -4409,11 +4415,11 @@ void intel_guc_submission_enable(struct 
intel_guc *guc)

  guc_init_global_schedule_policy(guc);
  }
  +/* Note: By the time we're here, GuC may have already been reset */
  void intel_guc_submission_disable(struct intel_guc *guc)
  {
  struct intel_gt *gt = guc_to_gt(guc);
-
-    /* Note: By the time we're here, GuC may have already been reset */
+    guc_park_engine_stats(guc);
    /* Disable and route to host */
  if (GRAPHICS_VER(gt->i915) >= 12)






Re: [PATCH] drm/amd/display: Modify mismatched function name

2023-02-17 Thread Alex Deucher
Applied.  Thanks!

Alex

On Fri, Feb 17, 2023 at 2:46 AM Jiapeng Chong
 wrote:
>
> No functional modification involved.
>
> drivers/gpu/drm/amd/amdgpu/../display/dc/link/link_detection.c:1199: warning: 
> expecting prototype for dc_link_detect_connection_type(). Prototype was for 
> link_detect_connection_type() instead.
>
> Reported-by: Abaci Robot 
> Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=4103
> Signed-off-by: Jiapeng Chong 
> ---
>  drivers/gpu/drm/amd/display/dc/link/link_detection.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c 
> b/drivers/gpu/drm/amd/display/dc/link/link_detection.c
> index 38216c789d77..5394d8a6087a 100644
> --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c
> +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c
> @@ -1189,7 +1189,7 @@ static bool detect_link_and_local_sink(struct dc_link 
> *link,
>  }
>
>  /**
> - * dc_link_detect_connection_type() - Determine if there is a sink connected
> + * link_detect_connection_type() - Determine if there is a sink connected
>   *
>   * @type: Returned connection type
>   * Does not detect downstream devices, such as MST sinks
> --
> 2.20.1.7.g153144c
>


Re: [PATCH 3/3] drm/debugfs: remove dev->debugfs_list and debugfs_mutex

2023-02-17 Thread Christian König

Am 17.02.23 um 20:38 schrieb Daniel Vetter:

On Fri, Feb 17, 2023 at 11:01:18AM +0100, Stanislaw Gruszka wrote:

On Fri, Feb 17, 2023 at 10:22:25AM +0100, Christian König wrote:

Am 16.02.23 um 20:54 schrieb Daniel Vetter:

On Thu, Feb 16, 2023 at 07:08:49PM +0200, Jani Nikula wrote:

On Thu, 16 Feb 2023, Christian König  wrote:

Am 16.02.23 um 17:46 schrieb Jani Nikula:

On Thu, 16 Feb 2023, Christian König  wrote:

Am 16.02.23 um 12:33 schrieb Daniel Vetter:

On Thu, Feb 09, 2023 at 09:18:38AM +0100, Christian König wrote:

The mutex was completely pointless in the first place since any
parallel adding of files to this list would result in random
behavior since the list is filled and consumed multiple times.

Completely drop that approach and just create the files directly.

This also re-adds the debugfs files to the render node directory and
removes drm_debugfs_late_register().

Signed-off-by: Christian König 
---
 drivers/gpu/drm/drm_debugfs.c | 32 +++
 drivers/gpu/drm/drm_drv.c |  3 ---
 drivers/gpu/drm/drm_internal.h|  5 -
 drivers/gpu/drm/drm_mode_config.c |  2 --
 include/drm/drm_device.h  | 15 ---
 5 files changed, 7 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
index 558e3a7271a5..a40288e67264 100644
--- a/drivers/gpu/drm/drm_debugfs.c
+++ b/drivers/gpu/drm/drm_debugfs.c
@@ -246,31 +246,9 @@ void drm_debugfs_dev_register(struct drm_device *dev)
 void drm_debugfs_minor_register(struct drm_minor *minor)
 {
struct drm_device *dev = minor->dev;
-   struct drm_debugfs_entry *entry, *tmp;
if (dev->driver->debugfs_init)
dev->driver->debugfs_init(minor);
-
-   list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
-   debugfs_create_file(entry->file.name, 0444,
-   minor->debugfs_root, entry, 
&drm_debugfs_entry_fops);
-   list_del(&entry->list);
-   }
-}
-
-void drm_debugfs_late_register(struct drm_device *dev)
-{
-   struct drm_minor *minor = dev->primary;
-   struct drm_debugfs_entry *entry, *tmp;
-
-   if (!minor)
-   return;
-
-   list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
-   debugfs_create_file(entry->file.name, 0444,
-   minor->debugfs_root, entry, 
&drm_debugfs_entry_fops);
-   list_del(&entry->list);
-   }
 }
 int drm_debugfs_remove_files(const struct drm_info_list *files, int count,
@@ -343,9 +321,13 @@ void drm_debugfs_add_file(struct drm_device *dev, const 
char *name,
entry->file.data = data;
entry->dev = dev;
-   mutex_lock(&dev->debugfs_mutex);
-   list_add(&entry->list, &dev->debugfs_list);
-   mutex_unlock(&dev->debugfs_mutex);
+   debugfs_create_file(name, 0444, dev->primary->debugfs_root, entry,
+   &drm_debugfs_entry_fops);
+
+   /* TODO: This should probably only be a symlink */
+   if (dev->render)
+   debugfs_create_file(name, 0444, dev->render->debugfs_root,
+   entry, &drm_debugfs_entry_fops);

Nope. You are fundamentally missing the point of all this, which is:

- drivers create debugfs files whenever they want to, as long as it's
  _before_ drm_dev_register is called.

- drm_dev_register will set them all up.

This is necessary because otherwise you have the potential for some nice
oops and stuff when userspace tries to access these files before the
driver is ready.

Note that with sysfs all this infrastructure already exists, which is why
you can create sysfs files whenever you feel like, and things wont go
boom.

Well Yeah I've considered that, I just don't think it's a good idea for
debugfs.

debugfs is meant to be a helper for debugging things and that especially
includes the time between drm_dev_init() and drm_dev_register() because
that's where we probe the hardware and try to get it working.

Not having the debugfs files which allows for things like hardware
register access and reading internal state during that is a really and I
mean REALLY bad idea. This is essentially what we have those files for.

So you mean you want to have early debugfs so you can have some script
hammering the debugfs to get info out between init and register during
probe?

Well not hammering. What we usually do in bringup is to set firmware
timeout to infinity and the driver then sits and waits for the hw.

The tool used to access registers then goes directly through the PCI bar
at the moment, but that's essentially a bad idea for registers which you
grab a lock for to access (like index/data).


I just think registering debugfs before everything is ready is a recipe
for disaster. All of the debugfs needs to check all the conditions that
they need across all of the probe stages. It'll be difficult to g

Re: [PATCH v2 2/2] drm: document DRM_IOCTL_PRIME_HANDLE_TO_FD and PRIME_FD_TO_HANDLE

2023-02-17 Thread Daniel Vetter
On Fri, Feb 17, 2023 at 04:22:04PM +, Simon Ser wrote:
> v2: mention caps, note that the IOCTLs might fail, document that
> user-space needs a data structure to keep track of the
> handles (Daniel V.)
> 
> Signed-off-by: Simon Ser 
> Cc: Daniel Vetter 
> Cc: Pekka Paalanen 
> Cc: Daniel Stone 

On both patches:

Reviewed-by: Daniel Vetter 

> ---
>  include/uapi/drm/drm.h | 30 ++
>  1 file changed, 30 insertions(+)
> 
> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
> index 292e4778a2f4..a87ca2d4 100644
> --- a/include/uapi/drm/drm.h
> +++ b/include/uapi/drm/drm.h
> @@ -1025,7 +1025,37 @@ extern "C" {
>  #define DRM_IOCTL_UNLOCK DRM_IOW( 0x2b, struct drm_lock)
>  #define DRM_IOCTL_FINISH DRM_IOW( 0x2c, struct drm_lock)
>  
> +/**
> + * DRM_IOCTL_PRIME_HANDLE_TO_FD - Convert a GEM handle to a DMA-BUF FD.
> + *
> + * User-space sets &drm_prime_handle.handle with the GEM handle to export and
> + * &drm_prime_handle.flags, and gets back a DMA-BUF file descriptor in
> + * &drm_prime_handle.fd.
> + *
> + * The export can fail for any driver-specific reason, e.g. because export is
> + * not supported for this specific GEM handle (but might be for others).
> + *
> + * Support for exporting DMA-BUFs is advertised via &DRM_PRIME_CAP_EXPORT.
> + */
>  #define DRM_IOCTL_PRIME_HANDLE_TO_FDDRM_IOWR(0x2d, struct 
> drm_prime_handle)
> +/**
> + * DRM_IOCTL_PRIME_FD_TO_HANDLE - Convert a DMA-BUF FD to a GEM handle.
> + *
> + * User-space sets &drm_prime_handle.fd with a DMA-BUF file descriptor to
> + * import, and gets back a GEM handle in &drm_prime_handle.handle.
> + * &drm_prime_handle.flags is unused.
> + *
> + * If an existing GEM handle refers to the memory object backing the DMA-BUF,
> + * that GEM handle is returned. Therefore user-space which needs to handle
> + * arbitrary DMA-BUFs must have a user-space lookup data structure to 
> manually
> + * reference-count duplicated GEM handles. For more information see
> + * &DRM_IOCTL_GEM_CLOSE.
> + *
> + * The import can fail for any driver-specific reason, e.g. because import is
> + * only supported for DMA-BUFs allocated on this DRM device.
> + *
> + * Support for importing DMA-BUFs is advertised via &DRM_PRIME_CAP_IMPORT.
> + */
>  #define DRM_IOCTL_PRIME_FD_TO_HANDLEDRM_IOWR(0x2e, struct 
> drm_prime_handle)
>  
>  #define DRM_IOCTL_AGP_ACQUIREDRM_IO(  0x30)
> -- 
> 2.39.2
> 
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


Re: [PATCH 3/3] drm/debugfs: remove dev->debugfs_list and debugfs_mutex

2023-02-17 Thread Christian König

Am 17.02.23 um 20:42 schrieb Daniel Vetter:

On Fri, Feb 17, 2023 at 04:55:27PM +0100, Christian König wrote:

Am 17.02.23 um 13:37 schrieb Jani Nikula:

On Fri, 17 Feb 2023, Christian König  wrote:

If i915 have such structural problems then I strongly suggest to solve
them inside i915 and not make common code out of that.

All other things aside, that's just a completely unnecessary and
unhelpful remark.

Sorry, but why?

We have gone through the same problems on radeon and it was massively
painful, what I try here is to prevent others from using this bad design as
well. And yes I think devm_ and drmm_ is a bit questionable in that regard
as well.

The goal is not to make it as simple as possible to write a driver, but
rather as defensive as possible. In other words automatically releasing
memory when an object is destroyed might be helpful, but it isn't
automatically a good idea.

What can easily happen for example is that you run into use after free
situations on object reference decommissions, e.g. parent is freed before
child for example.

I know that radeon/amd are going different paths on this, but I think it's
also very clear that you're not really representing the consensus here.
For smaller drivers especially there really isn't anyone arguing against
devm/drmm.


Which I completely agree on. It's just that we shouldn't promote it as 
"Hey this magically makes everything work in your very complex use case".


It can be a good tool to have such stuff which makes sense in a lot of 
use case, but everybody using it should always keep its downsides in 
mind as well.



Similar for uapi interfaces that just do the right thing and prevent
races. You're the very first one who argued this is a good thing to have.
kernfs/kobj/sysfs people spend endless amounts of engineer on trying to
build something that's impossible to get wrong, or at least get as close
to that as feasible.


Yeah, for kernfs/kobj/sysfs it does make complete sense because those 
files are actually sometimes waited on by userspace tools to appear.


I just find it extremely questionable for debugfs.

Regards,
Christian.


I mean the entire rust endeavour flies under that flag too.
-Daniel




Re: [PATCH] drm/fb-helper: Remove drm_fb_helper_unprepare() from drm_fb_helper_fini()

2023-02-17 Thread Daniel Vetter
On Fri, Feb 17, 2023 at 09:18:54AM +0100, Thomas Zimmermann wrote:
> Hi
> 
> Am 16.02.23 um 21:11 schrieb Daniel Vetter:
> > On Thu, Feb 16, 2023 at 03:06:20PM +0100, Thomas Zimmermann wrote:
> > > Move drm_fb_helper_unprepare() from drm_fb_helper_fini() into the
> > > calling fbdev implementation. Avoids a possible stale mutex with
> > > generic fbdev code.
> > > 
> > > As indicated by its name, drm_fb_helper_prepare() prepares struct
> > > drm_fb_helper before setting up the fbdev support with a call to
> > > drm_fb_helper_init(). In legacy fbdev emulation, this happens next
> > > to each other. If successful, drm_fb_helper_fini() later tear down
> > > the fbdev device and also unprepare via drm_fb_helper_unprepare().
> > > 
> > > Generic fbdev emulation prepares struct drm_fb_helper immediately
> > > after allocating the instance. It only calls drm_fb_helper_init()
> > > as part of processing a hotplug event. If the hotplug-handling fails,
> > > it runs drm_fb_helper_fini(). This unprepares the fb-helper instance
> > > and the next hotplug event runs on stale data.
> > > 
> > > Solve this by moving drm_fb_helper_unprepare() from drm_fb_helper_fini()
> > > into the fbdev implementations. Call it right before freeing the
> > > fb-helper instance.
> > > 
> > > Fixes: 4825797c36da ("drm/fb-helper: Introduce drm_fb_helper_unprepare()")
> > > Cc: Thomas Zimmermann 
> > > Cc: Javier Martinez Canillas 
> > > Cc: Maarten Lankhorst 
> > > Cc: Maxime Ripard 
> > > Cc: David Airlie 
> > > Cc: Daniel Vetter 
> > > Cc: dri-devel@lists.freedesktop.org
> > > 
> > > Signed-off-by: Thomas Zimmermann 
> > 
> > This reminds me of an old patch I just recently stumbled over again:
> > 
> > https://lore.kernel.org/dri-devel/Y3St2VHJ7jEmcNFw@phenom.ffwll.local/
> > 
> > Should I resurrect that one maybe and send it out? I think that also ties
> > a bit into your story here.
> 
> I don't think it will be necessary. I began to convert the existing fbdev
> emulation to make use of drm_client, which should resove a number of
> problems. I expect to post this after the various trees have merged the
> recent changes to fbdev helpers.

The only version the patch is fixing is the client one, the old one is
unfixable (I think at least, hence just the comments). Note that the link
is pre-splitting, I do have a rebased version here.

I'll just send that out and head into vacations :-)
-Daniel

> 
> Best regards
> Thomas
> 
> > 
> > > ---
> > >   drivers/gpu/drm/armada/armada_fbdev.c  | 3 +++
> > >   drivers/gpu/drm/drm_fb_helper.c| 2 --
> > >   drivers/gpu/drm/drm_fbdev_generic.c| 2 ++
> > >   drivers/gpu/drm/exynos/exynos_drm_fbdev.c  | 3 ++-
> > >   drivers/gpu/drm/gma500/framebuffer.c   | 2 ++
> > >   drivers/gpu/drm/i915/display/intel_fbdev.c | 1 +
> > >   drivers/gpu/drm/msm/msm_fbdev.c| 2 ++
> > >   drivers/gpu/drm/omapdrm/omap_fbdev.c   | 2 ++
> > >   drivers/gpu/drm/radeon/radeon_fb.c | 2 ++
> > >   drivers/gpu/drm/tegra/fb.c | 1 +
> > >   10 files changed, 17 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/armada/armada_fbdev.c 
> > > b/drivers/gpu/drm/armada/armada_fbdev.c
> > > index 07e410c62b7a..0e44f53e9fa4 100644
> > > --- a/drivers/gpu/drm/armada/armada_fbdev.c
> > > +++ b/drivers/gpu/drm/armada/armada_fbdev.c
> > > @@ -147,6 +147,7 @@ int armada_fbdev_init(struct drm_device *dev)
> > >err_fb_setup:
> > >   drm_fb_helper_fini(fbh);
> > >err_fb_helper:
> > > + drm_fb_helper_unprepare(fbh);
> > >   priv->fbdev = NULL;
> > >   return ret;
> > >   }
> > > @@ -164,6 +165,8 @@ void armada_fbdev_fini(struct drm_device *dev)
> > >   if (fbh->fb)
> > >   fbh->fb->funcs->destroy(fbh->fb);
> > > + drm_fb_helper_unprepare(fbh);
> > > +
> > >   priv->fbdev = NULL;
> > >   }
> > >   }
> > > diff --git a/drivers/gpu/drm/drm_fb_helper.c 
> > > b/drivers/gpu/drm/drm_fb_helper.c
> > > index 28c428e9c530..a39998047f8a 100644
> > > --- a/drivers/gpu/drm/drm_fb_helper.c
> > > +++ b/drivers/gpu/drm/drm_fb_helper.c
> > > @@ -590,8 +590,6 @@ void drm_fb_helper_fini(struct drm_fb_helper 
> > > *fb_helper)
> > 
> > I think it would be good to update the kerneldoc of _init() and _fini()
> > here to mention each another like we usually do with these pairs. Same
> > with prepare/unprepare() although the latter rerfences _prepare() already.
> > 
> > >   }
> > >   mutex_unlock(&kernel_fb_helper_lock);
> > > - drm_fb_helper_unprepare(fb_helper);
> > > -
> > >   if (!fb_helper->client.funcs)
> > >   drm_client_release(&fb_helper->client);
> > >   }
> > > diff --git a/drivers/gpu/drm/drm_fbdev_generic.c 
> > > b/drivers/gpu/drm/drm_fbdev_generic.c
> > > index 365f80717fa1..4d6325e91565 100644
> > > --- a/drivers/gpu/drm/drm_fbdev_generic.c
> > > +++ b/drivers/gpu/drm/drm_fbdev_generic.c
> > > @@ -65,6 +65,8 @@ static void d

[PATCH] drm/fb-helper: Try to protect cleanup against delayed setup

2023-02-17 Thread Daniel Vetter
Some vague evidences suggests this can go wrong. Try to prevent it by
holding the right mutex and clearing ->deferred_setup to make sure we
later on don't accidentally try to re-register the fbdev when the
driver thought it had it all cleaned up already.

v2: I realized that this is fundamentally butchered, and CI complained
about lockdep splats. So limit the critical section again and just add
a few notes what the proper fix is.

References: 
https://intel-gfx-ci.01.org/tree/linux-next/next-20201215/fi-byt-j1900/igt@i915_pm_...@module-reload.html
Signed-off-by: Daniel Vetter 
Cc: Ville Syrjälä 
Cc: Chris Wilson 
Cc: Maarten Lankhorst 
Cc: Maxime Ripard 
Cc: Thomas Zimmermann 
Cc: David Airlie 
Cc: Daniel Vetter 
---
 drivers/gpu/drm/drm_fb_helper.c | 6 ++
 drivers/gpu/drm/drm_fbdev_generic.c | 5 +
 2 files changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 3e17261a12b6..2415a2c7ca44 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -545,6 +545,9 @@ EXPORT_SYMBOL(drm_fb_helper_alloc_info);
  * A wrapper around unregister_framebuffer, to release the fb_info
  * framebuffer device. This must be called before releasing all resources for
  * @fb_helper by calling drm_fb_helper_fini().
+ *
+ * Note that this is fundamentally racy on hotunload because it doen't handle
+ * open fbdev file descriptors at all. Use drm_fbdev_generic_setup() instead.
  */
 void drm_fb_helper_unregister_info(struct drm_fb_helper *fb_helper)
 {
@@ -558,6 +561,9 @@ EXPORT_SYMBOL(drm_fb_helper_unregister_info);
  * @fb_helper: driver-allocated fbdev helper, can be NULL
  *
  * This cleans up all remaining resources associated with @fb_helper.
+ *
+ * Note that this is fundamentally racy on hotunload because it doen't handle
+ * open fbdev file descriptors at all. Use drm_fbdev_generic_setup() instead.
  */
 void drm_fb_helper_fini(struct drm_fb_helper *fb_helper)
 {
diff --git a/drivers/gpu/drm/drm_fbdev_generic.c 
b/drivers/gpu/drm/drm_fbdev_generic.c
index 365f80717fa1..1618109592ce 100644
--- a/drivers/gpu/drm/drm_fbdev_generic.c
+++ b/drivers/gpu/drm/drm_fbdev_generic.c
@@ -347,7 +347,12 @@ static void drm_fbdev_client_unregister(struct 
drm_client_dev *client)
 {
struct drm_fb_helper *fb_helper = drm_fb_helper_from_client(client);
 
+   mutex_lock(&fb_helper->lock);
+   fb_helper->deferred_setup = false;
+   mutex_unlock(&fb_helper->lock);
+
if (fb_helper->info) {
+   /* drm_fbdev_fb_destroy() takes care of cleanup */
drm_fb_helper_unregister_info(fb_helper);
} else {
drm_client_release(&fb_helper->client);
-- 
2.39.0



Re: [PATCH drm-next v2 03/16] maple_tree: split up MA_STATE() macro

2023-02-17 Thread Matthew Wilcox
On Fri, Feb 17, 2023 at 02:44:09PM +0100, Danilo Krummrich wrote:
> \#define SAMPLE_ITER(name, __mgr) \
>   struct sample_iter name = { \
>   .mas = __MA_STATE(&(__mgr)->mt, 0, 0),

This is usually called MA_STATE_INIT()

> #define sample_iter_for_each_range(it__, start__, end__) \
>   for ((it__).mas.index = start__, (it__).entry = mas_find(&(it__).mas, 
> end__ - 1); \
>(it__).entry; (it__).entry = mas_find(&(it__).mas, end__ - 1))

This is a bad iterator design.  It's usually best to do this:

struct sample *sample;
SAMPLE_ITERATOR(si, min);

sample_iter_for_each(&si, sample, max) {
frob(mgr, sample);
}

I don't mind splitting apart MA_STATE_INIT from MA_STATE, and if you
do that, we can also use it in VMA_ITERATOR.


Re: [PATCH 3/3] drm/debugfs: remove dev->debugfs_list and debugfs_mutex

2023-02-17 Thread Daniel Vetter
On Fri, Feb 17, 2023 at 04:55:27PM +0100, Christian König wrote:
> Am 17.02.23 um 13:37 schrieb Jani Nikula:
> > On Fri, 17 Feb 2023, Christian König  
> > wrote:
> > > If i915 have such structural problems then I strongly suggest to solve
> > > them inside i915 and not make common code out of that.
> > All other things aside, that's just a completely unnecessary and
> > unhelpful remark.
> 
> Sorry, but why?
> 
> We have gone through the same problems on radeon and it was massively
> painful, what I try here is to prevent others from using this bad design as
> well. And yes I think devm_ and drmm_ is a bit questionable in that regard
> as well.
> 
> The goal is not to make it as simple as possible to write a driver, but
> rather as defensive as possible. In other words automatically releasing
> memory when an object is destroyed might be helpful, but it isn't
> automatically a good idea.
> 
> What can easily happen for example is that you run into use after free
> situations on object reference decommissions, e.g. parent is freed before
> child for example.

I know that radeon/amd are going different paths on this, but I think it's
also very clear that you're not really representing the consensus here.
For smaller drivers especially there really isn't anyone arguing against
devm/drmm.

Similar for uapi interfaces that just do the right thing and prevent
races. You're the very first one who argued this is a good thing to have.
kernfs/kobj/sysfs people spend endless amounts of engineer on trying to
build something that's impossible to get wrong, or at least get as close
to that as feasible.

I mean the entire rust endeavour flies under that flag too.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


Re: [PATCH drm-next v2 04/16] maple_tree: add flag MT_FLAGS_LOCK_NONE

2023-02-17 Thread Matthew Wilcox
On Fri, Feb 17, 2023 at 02:44:10PM +0100, Danilo Krummrich wrote:
> Generic components making use of the maple tree (such as the
> DRM GPUVA Manager) delegate the responsibility of ensuring mutual
> exclusion to their users.
> 
> While such components could inherit the concept of an external lock,
> some users might just serialize the access to the component and hence to
> the internal maple tree.
> 
> In order to allow such use cases, add a new flag MT_FLAGS_LOCK_NONE to
> indicate not to do any internal lockdep checks.

I'm really against this change.

First, we really should check that users have their locking right.
It's bitten us so many times when they get it wrong.

Second, having a lock allows us to defragment the slab cache.  The
patches to do that haven't gone anywhere recently, but if we drop the
requirement now, we'll never be able to compact ranges of memory that
have slabs allocated to them.



Re: [PATCH 3/3] drm/debugfs: remove dev->debugfs_list and debugfs_mutex

2023-02-17 Thread Daniel Vetter
On Fri, Feb 17, 2023 at 11:01:18AM +0100, Stanislaw Gruszka wrote:
> On Fri, Feb 17, 2023 at 10:22:25AM +0100, Christian König wrote:
> > Am 16.02.23 um 20:54 schrieb Daniel Vetter:
> > > On Thu, Feb 16, 2023 at 07:08:49PM +0200, Jani Nikula wrote:
> > > > On Thu, 16 Feb 2023, Christian König  wrote:
> > > > > Am 16.02.23 um 17:46 schrieb Jani Nikula:
> > > > > > On Thu, 16 Feb 2023, Christian König  
> > > > > > wrote:
> > > > > > > Am 16.02.23 um 12:33 schrieb Daniel Vetter:
> > > > > > > > On Thu, Feb 09, 2023 at 09:18:38AM +0100, Christian König wrote:
> > > > > > > > > The mutex was completely pointless in the first place since 
> > > > > > > > > any
> > > > > > > > > parallel adding of files to this list would result in random
> > > > > > > > > behavior since the list is filled and consumed multiple times.
> > > > > > > > > 
> > > > > > > > > Completely drop that approach and just create the files 
> > > > > > > > > directly.
> > > > > > > > > 
> > > > > > > > > This also re-adds the debugfs files to the render node 
> > > > > > > > > directory and
> > > > > > > > > removes drm_debugfs_late_register().
> > > > > > > > > 
> > > > > > > > > Signed-off-by: Christian König 
> > > > > > > > > ---
> > > > > > > > > drivers/gpu/drm/drm_debugfs.c | 32 
> > > > > > > > > +++
> > > > > > > > > drivers/gpu/drm/drm_drv.c |  3 ---
> > > > > > > > > drivers/gpu/drm/drm_internal.h|  5 -
> > > > > > > > > drivers/gpu/drm/drm_mode_config.c |  2 --
> > > > > > > > > include/drm/drm_device.h  | 15 ---
> > > > > > > > > 5 files changed, 7 insertions(+), 50 deletions(-)
> > > > > > > > > 
> > > > > > > > > diff --git a/drivers/gpu/drm/drm_debugfs.c 
> > > > > > > > > b/drivers/gpu/drm/drm_debugfs.c
> > > > > > > > > index 558e3a7271a5..a40288e67264 100644
> > > > > > > > > --- a/drivers/gpu/drm/drm_debugfs.c
> > > > > > > > > +++ b/drivers/gpu/drm/drm_debugfs.c
> > > > > > > > > @@ -246,31 +246,9 @@ void drm_debugfs_dev_register(struct 
> > > > > > > > > drm_device *dev)
> > > > > > > > > void drm_debugfs_minor_register(struct drm_minor *minor)
> > > > > > > > > {
> > > > > > > > >   struct drm_device *dev = minor->dev;
> > > > > > > > > - struct drm_debugfs_entry *entry, *tmp;
> > > > > > > > >   if (dev->driver->debugfs_init)
> > > > > > > > >   dev->driver->debugfs_init(minor);
> > > > > > > > > -
> > > > > > > > > - list_for_each_entry_safe(entry, tmp, 
> > > > > > > > > &dev->debugfs_list, list) {
> > > > > > > > > - debugfs_create_file(entry->file.name, 0444,
> > > > > > > > > - minor->debugfs_root, entry, 
> > > > > > > > > &drm_debugfs_entry_fops);
> > > > > > > > > - list_del(&entry->list);
> > > > > > > > > - }
> > > > > > > > > -}
> > > > > > > > > -
> > > > > > > > > -void drm_debugfs_late_register(struct drm_device *dev)
> > > > > > > > > -{
> > > > > > > > > - struct drm_minor *minor = dev->primary;
> > > > > > > > > - struct drm_debugfs_entry *entry, *tmp;
> > > > > > > > > -
> > > > > > > > > - if (!minor)
> > > > > > > > > - return;
> > > > > > > > > -
> > > > > > > > > - list_for_each_entry_safe(entry, tmp, 
> > > > > > > > > &dev->debugfs_list, list) {
> > > > > > > > > - debugfs_create_file(entry->file.name, 0444,
> > > > > > > > > - minor->debugfs_root, entry, 
> > > > > > > > > &drm_debugfs_entry_fops);
> > > > > > > > > - list_del(&entry->list);
> > > > > > > > > - }
> > > > > > > > > }
> > > > > > > > > int drm_debugfs_remove_files(const struct drm_info_list 
> > > > > > > > > *files, int count,
> > > > > > > > > @@ -343,9 +321,13 @@ void drm_debugfs_add_file(struct 
> > > > > > > > > drm_device *dev, const char *name,
> > > > > > > > >   entry->file.data = data;
> > > > > > > > >   entry->dev = dev;
> > > > > > > > > - mutex_lock(&dev->debugfs_mutex);
> > > > > > > > > - list_add(&entry->list, &dev->debugfs_list);
> > > > > > > > > - mutex_unlock(&dev->debugfs_mutex);
> > > > > > > > > + debugfs_create_file(name, 0444, 
> > > > > > > > > dev->primary->debugfs_root, entry,
> > > > > > > > > + &drm_debugfs_entry_fops);
> > > > > > > > > +
> > > > > > > > > + /* TODO: This should probably only be a symlink */
> > > > > > > > > + if (dev->render)
> > > > > > > > > + debugfs_create_file(name, 0444, 
> > > > > > > > > dev->render->debugfs_root,
> > > > > > > > > + entry, 
> > > > > > > > > &drm_debugfs_entry_fops);
> > > > > > > > Nope. You are fundamentally missing the point of all this, 
> > > > > > > > which is:
> > > > > > > > 
> > > > > > > > - drivers create debugfs files whenever they want to, as long 
> > > > > > > > as it's
> > > > > > > >  _before_ drm_dev_register is called.
> > > > > > > > 
> > > > > > > > - d

Re: [PATCH 1/2] drm/client: fix circular reference counting issue

2023-02-17 Thread Daniel Vetter
On Fri, 17 Feb 2023 at 13:06, Christian König
 wrote:
>
> Am 16.02.23 um 15:34 schrieb Daniel Vetter:
> > On Thu, Jan 26, 2023 at 03:30:31PM +0100, Thomas Zimmermann wrote:
> >> Hi
> >>
> >> Am 26.01.23 um 11:28 schrieb Christian König:
> >>> We reference dump buffers both by their handle as well as their
> >>> object. The problem is now that when anybody iterates over the DRM
> >>> framebuffers and exports the underlying GEM objects through DMA-buf
> >>> we run into a circular reference count situation.
> >>>
> >>> The result is that the fbdev handling holds the GEM handle preventing
> >>> the DMA-buf in the GEM object to be released. This DMA-buf in turn
> >>> holds a reference to the driver module which on unload would release
> >>> the fbdev.
> >>>
> >>> Break that loop by releasing the handle as soon as the DRM
> >>> framebuffer object is created. The DRM framebuffer and the DRM client
> >>> buffer structure still hold a reference to the underlying GEM object
> >>> preventing its destruction.
> >>>
> >>> Signed-off-by: Christian König 
> >>> Fixes: c76f0f7cb546 ("drm: Begin an API for in-kernel clients")
> >>> Cc: 
> >> I tested with Weston and Gnome in X11 and Wayland mode under simpledrm,
> >> which I started stopped from the console. No obvious problems.
> >>
> >> I heard that sway/wlroots has issues with drivers that don't support
> >> dma-buf. Maybe(!) that could be affected by this patch.
> > dma-buf export should still work. Also the loop is imo a red herring, I
> > think if you force unbind the driver then this should all get resolved
> > automatically.
> >
> > What is true is that once we start refcounting everything correctly then
> > there will be elevated module refcounts, which means you cannot use module
> > unloading to provoke a driver unbind, which would kick out all the
> > leftover references. You instead need to manually unbind the driver first,
> > which should drop all remaining references to zero (might need to kill
> > also any userspace), and only then can you unload the driver.
> >
> > But this confusion is extremely common, a lot of people think that just
> > holding a module reference is enough, we really should also hold a
> > drm_device reference for dma-buf ...
>
> Yeah, hot plug removal of amdgpu revealed a couple of those as well.
>
> Essentially what DMA-buf does with grabbing a module reference on the
> owner of a DMA-buf is a bad idea.
>
> Instead we should reference the device or component which is exporting
> the buffer, but since we don't have a common structure here it's more
> work to generalize that approach.

Well the device/component still needs to eventually hold a reference
on the module, or bad things can happen. But yeah dma-buf also holding
one but not a device/component reference is definitely bad.
-Daniel

>
> Christian.
>
> > -Daniel
> >
> >> Anyway, take my r-b, t-b tags.
> >>
> >> Reviewed-by: Thomas Zimmermann 
> >> Tested-by: Thomas Zimmermann 
> >>
> >> Thank you for fixing this bug.
> >>
> >> Best regards
> >> Thomas
> >>
> >>> ---
> >>>drivers/gpu/drm/drm_client.c | 33 -
> >>>include/drm/drm_client.h |  5 -
> >>>2 files changed, 20 insertions(+), 18 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c
> >>> index 009e7b10455c..f6292ba0e6fc 100644
> >>> --- a/drivers/gpu/drm/drm_client.c
> >>> +++ b/drivers/gpu/drm/drm_client.c
> >>> @@ -243,21 +243,17 @@ void drm_client_dev_restore(struct drm_device *dev)
> >>>static void drm_client_buffer_delete(struct drm_client_buffer *buffer)
> >>>{
> >>> -   struct drm_device *dev = buffer->client->dev;
> >>> -
> >>> if (buffer->gem) {
> >>> drm_gem_vunmap_unlocked(buffer->gem, &buffer->map);
> >>> drm_gem_object_put(buffer->gem);
> >>> }
> >>> -   if (buffer->handle)
> >>> -   drm_mode_destroy_dumb(dev, buffer->handle, 
> >>> buffer->client->file);
> >>> -
> >>> kfree(buffer);
> >>>}
> >>>static struct drm_client_buffer *
> >>> -drm_client_buffer_create(struct drm_client_dev *client, u32 width, u32 
> >>> height, u32 format)
> >>> +drm_client_buffer_create(struct drm_client_dev *client, u32 width, u32 
> >>> height,
> >>> +u32 format, u32 *handle)
> >>>{
> >>> const struct drm_format_info *info = drm_format_info(format);
> >>> struct drm_mode_create_dumb dumb_args = { };
> >>> @@ -279,16 +275,15 @@ drm_client_buffer_create(struct drm_client_dev 
> >>> *client, u32 width, u32 height, u
> >>> if (ret)
> >>> goto err_delete;
> >>> -   buffer->handle = dumb_args.handle;
> >>> -   buffer->pitch = dumb_args.pitch;
> >>> -
> >>> obj = drm_gem_object_lookup(client->file, dumb_args.handle);
> >>> if (!obj)  {
> >>> ret = -ENOENT;
> >>> goto err_delete;
> >>> }
> >>> +   buffer->pitch = dumb_args.pitch;
> >>> buffer->gem = obj;
> >>> +   *handle = dumb_args.handle;
> >>>   

[PATCH] drm/i915/mtl: Add engine TLB invalidation

2023-02-17 Thread Matt Roper
MTL's primary GT can continue to use the same engine TLB invalidation
programming as past Xe_HP-based platforms.  However the media GT needs
some special handling:
 * Invalidation registers on the media GT are singleton registers
   (unlike the primary GT where they are still MCR).
 * Since the GSC is now exposed as an engine, there's a new register to
   use for TLB invalidation.  The offset is identical to the compute
   engine offset, but this is expected --- compute engines only exist on
   the primary GT while the GSC only exists on the media GT.
 * Although there's only a single GSC engine instance, it inexplicably
   uses bit 1 to request invalidations rather than bit 0.

Cc: Tvrtko Ursulin 
Cc: Daniele Ceraolo Spurio 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c | 52 ---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h   |  1 +
 2 files changed, 38 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index f3a91e7f85f7..af8e158fbd84 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1166,6 +1166,11 @@ static int intel_engine_init_tlb_invalidation(struct 
intel_engine_cs *engine)
[COPY_ENGINE_CLASS].mcr_reg   = XEHP_BLT_TLB_INV_CR,
[COMPUTE_CLASS].mcr_reg   = XEHP_COMPCTX_TLB_INV_CR,
};
+   static const union intel_engine_tlb_inv_reg xelpmp_regs[] = {
+   [VIDEO_DECODE_CLASS].reg  = GEN12_VD_TLB_INV_CR,
+   [VIDEO_ENHANCEMENT_CLASS].reg = GEN12_VE_TLB_INV_CR,
+   [OTHER_CLASS].reg = XELPMP_GSC_TLB_INV_CR,
+   };
struct drm_i915_private *i915 = engine->i915;
const unsigned int instance = engine->instance;
const unsigned int class = engine->class;
@@ -1185,19 +1190,28 @@ static int intel_engine_init_tlb_invalidation(struct 
intel_engine_cs *engine)
 * 12.00 -> 12.50 transition multi cast handling is required too.
 */
 
-   if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) ||
-   GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) {
-   regs = xehp_regs;
-   num = ARRAY_SIZE(xehp_regs);
-   } else if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) ||
-  GRAPHICS_VER_FULL(i915) == IP_VER(12, 10)) {
-   regs = gen12_regs;
-   num = ARRAY_SIZE(gen12_regs);
-   } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
-   regs = gen8_regs;
-   num = ARRAY_SIZE(gen8_regs);
-   } else if (GRAPHICS_VER(i915) < 8) {
-   return 0;
+   if (engine->gt->type == GT_MEDIA) {
+   if (MEDIA_VER_FULL(i915) == IP_VER(13, 0)) {
+   regs = xelpmp_regs;
+   num = ARRAY_SIZE(xelpmp_regs);
+   }
+   } else {
+   if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 71) ||
+   GRAPHICS_VER_FULL(i915) == IP_VER(12, 70) ||
+   GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) ||
+   GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) {
+   regs = xehp_regs;
+   num = ARRAY_SIZE(xehp_regs);
+   } else if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) ||
+  GRAPHICS_VER_FULL(i915) == IP_VER(12, 10)) {
+   regs = gen12_regs;
+   num = ARRAY_SIZE(gen12_regs);
+   } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) 
{
+   regs = gen8_regs;
+   num = ARRAY_SIZE(gen8_regs);
+   } else if (GRAPHICS_VER(i915) < 8) {
+   return 0;
+   }
}
 
if (gt_WARN_ONCE(engine->gt, !num,
@@ -1212,7 +1226,14 @@ static int intel_engine_init_tlb_invalidation(struct 
intel_engine_cs *engine)
 
reg = regs[class];
 
-   if (regs == gen8_regs && class == VIDEO_DECODE_CLASS && instance == 1) {
+   if (class == OTHER_CLASS) {
+   /*
+* There's only a single GSC instance, but it uses register bit
+* 1 instead of either 0 or OTHER_GSC_INSTANCE.
+*/
+   GEM_WARN_ON(instance != OTHER_GSC_INSTANCE);
+   val = 1;
+   } else if (regs == gen8_regs && class == VIDEO_DECODE_CLASS && instance 
== 1) {
reg.reg = GEN8_M2TCR;
val = 0;
} else {
@@ -1228,7 +1249,8 @@ static int intel_engine_init_tlb_invalidation(struct 
intel_engine_cs *engine)
if (GRAPHICS_VER(i915) >= 12 &&
(engine->class == VIDEO_DECODE_CLASS ||
 engine->class == VIDEO_ENHANCEMENT_CLASS ||
-engine->class == COMPUTE_CLASS))
+engine->class == COMPUTE_CLASS ||
+engine->class == OTHER_CLASS))
engine->tlb_inv.r

Re: [PATCH drm-next v2 03/16] maple_tree: split up MA_STATE() macro

2023-02-17 Thread Liam R. Howlett
* Danilo Krummrich  [230217 08:44]:
> Split up the MA_STATE() macro such that components using the maple tree
> can easily inherit from struct ma_state and build custom tree walk
> macros to hide their internals from users.
> 
> Example:
> 
> struct sample_iter {
>   struct ma_state mas;
>   struct sample_mgr *mgr;
>   struct sample_entry *entry;
> };
> 
> \#define SAMPLE_ITER(name, __mgr) \
>   struct sample_iter name = { \
>   .mas = __MA_STATE(&(__mgr)->mt, 0, 0),
>   .mgr = __mgr,
>   .entry = NULL,
>   }

I see this patch is to allow for anonymous maple states, this looks
good.

I've a lengthy comment about the iterator that I'm adding here to head
off anyone that may copy your example below.

> 
> \#define sample_iter_for_each_range(it__, start__, end__) \
>   for ((it__).mas.index = start__, (it__).entry = mas_find(&(it__).mas, 
> end__ - 1); \
>(it__).entry; (it__).entry = mas_find(&(it__).mas, end__ - 1))

I see you've added something like the above in your patch set as well.
I'd like to point out that the index isn't the only state information
that needs to be altered here, and in fact, this could go very wrong.

The maple state has a node and an offset within that node.  If you set
the index to lower than the current position of your iterator and call
mas_find() then what happens is somewhat undefined.  I expect you will
get the wrong value (most likely either the current value or the very
next one that the iterator is already pointing to).  I believe you have
been using a fresh maple state for each iterator in your patches, but I
haven't had a deep look into your code yet.

We have methods of resetting the iterator and set the range (mas_set()
and mas_set_range()) which are safe for what you are doing, but they
will start the walk from the root node to the index again.

So, if you know what you are doing is safe, then the way you have
written it will work, but it's worth mentioning that this could occur.

It is also worth pointing out that it would be much safer to use a
function to do the above so you get type safety.. and I was asked to add
this to the VMA interface by Linus [1], which is on its way upstream [2].

1. 
https://lore.kernel.org/linux-mm/CAHk-=wg9wqxbgkndkd2bqocnn73rdswuwsavbb7t-tekyke...@mail.gmail.com/
2. 
https://lore.kernel.org/linux-mm/20230120162650.984577-1-liam.howl...@oracle.com/

> 
> Signed-off-by: Danilo Krummrich 
> ---
>  include/linux/maple_tree.h | 7 +--
>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
> index e594db58a0f1..ca04c900e51a 100644
> --- a/include/linux/maple_tree.h
> +++ b/include/linux/maple_tree.h
> @@ -424,8 +424,8 @@ struct ma_wr_state {
>  #define MA_ERROR(err) \
>   ((struct maple_enode *)(((unsigned long)err << 2) | 2UL))
>  
> -#define MA_STATE(name, mt, first, end)   
> \
> - struct ma_state name = {\
> +#define __MA_STATE(mt, first, end)   \
> + {   \
>   .tree = mt, \
>   .index = first, \
>   .last = end,\
> @@ -435,6 +435,9 @@ struct ma_wr_state {
>   .alloc = NULL,  \
>   }
>  
> +#define MA_STATE(name, mt, first, end)   
> \
> + struct ma_state name = __MA_STATE(mt, first, end)
> +
>  #define MA_WR_STATE(name, ma_state, wr_entry)
> \
>   struct ma_wr_state name = { \
>   .mas = ma_state,\
> -- 
> 2.39.1
> 


Re: [PATCH v12 00/18] drm: Add Samsung MIPI DSIM bridge

2023-02-17 Thread Rasmus Villemoes
On 07/02/2023 10.09, Rasmus Villemoes wrote:

> I managed to get the whole chain lcdif -> mipi -> bridge -> dp-connector
> to probe with these settings
> 
[...]
> Now hotplug-detect doesn't work with the current sn65dsi86 driver, but
> that's a separate issue; when I boot with a monitor attached, its edid
> is correctly read out. But I still don't get any output, and the monitor
> says "no signal" - my naive attempt (which has worked fine in other
> cases) was to just dd /dev/urandom to /dev/fb0, so I'm clearly missing
> some important step.

No idea if it's important, but in the NXP kernel, there's a

display-subsystem {
compatible = "fsl,imx-display-subsystem";
ports = <&lcdif1_disp>,
<&lcdif2_disp>,
<&lcdif3_disp>;
};

node in imx8mp.dtsi, and when commenting out that node, the graphics
ceases to work, even if all the devices in the lcdif->mipi->bridge chain
actually probes. However, adding a corresponding node in mainline, which
does have a driver for that "fsl,imx-display-subsystem", makes no
difference; with or without that, I do get a /dev/fb0 device and the
whole chain probes, but again the monitor says no signal.

Rasmus



Re: [PATCH drm-next v2 04/16] maple_tree: add flag MT_FLAGS_LOCK_NONE

2023-02-17 Thread Liam R. Howlett
* Danilo Krummrich  [230217 08:44]:
> Generic components making use of the maple tree (such as the
> DRM GPUVA Manager) delegate the responsibility of ensuring mutual
> exclusion to their users.
> 
> While such components could inherit the concept of an external lock,
> some users might just serialize the access to the component and hence to
> the internal maple tree.
> 
> In order to allow such use cases, add a new flag MT_FLAGS_LOCK_NONE to
> indicate not to do any internal lockdep checks.
> 
> Signed-off-by: Danilo Krummrich 
> ---
>  include/linux/maple_tree.h | 20 +++-
>  lib/maple_tree.c   |  7 ---
>  2 files changed, 19 insertions(+), 8 deletions(-)
> 
> diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
> index ca04c900e51a..f795e5def8d0 100644
> --- a/include/linux/maple_tree.h
> +++ b/include/linux/maple_tree.h
> @@ -170,10 +170,11 @@ enum maple_type {
>  #define MT_FLAGS_USE_RCU 0x02
>  #define MT_FLAGS_HEIGHT_OFFSET   0x02
>  #define MT_FLAGS_HEIGHT_MASK 0x7C
> -#define MT_FLAGS_LOCK_MASK   0x300
> +#define MT_FLAGS_LOCK_MASK   0x700
>  #define MT_FLAGS_LOCK_IRQ0x100
>  #define MT_FLAGS_LOCK_BH 0x200
>  #define MT_FLAGS_LOCK_EXTERN 0x300
> +#define MT_FLAGS_LOCK_NONE   0x400

Please add this to the documentation above the flags as well.  We should
probably add enough context so that users don't just set this and then
use multiple writers.

>  
>  #define MAPLE_HEIGHT_MAX 31
>  
> @@ -559,11 +560,16 @@ static inline void mas_set(struct ma_state *mas, 
> unsigned long index)
>   mas_set_range(mas, index, index);
>  }
>  
> -static inline bool mt_external_lock(const struct maple_tree *mt)
> +static inline bool mt_lock_external(const struct maple_tree *mt)
>  {
>   return (mt->ma_flags & MT_FLAGS_LOCK_MASK) == MT_FLAGS_LOCK_EXTERN;
>  }
>  
> +static inline bool mt_lock_none(const struct maple_tree *mt)
> +{
> + return (mt->ma_flags & MT_FLAGS_LOCK_MASK) == MT_FLAGS_LOCK_NONE;
> +}
> +
>  /**
>   * mt_init_flags() - Initialise an empty maple tree with flags.
>   * @mt: Maple Tree
> @@ -577,7 +583,7 @@ static inline bool mt_external_lock(const struct 
> maple_tree *mt)
>  static inline void mt_init_flags(struct maple_tree *mt, unsigned int flags)
>  {
>   mt->ma_flags = flags;
> - if (!mt_external_lock(mt))
> + if (!mt_lock_external(mt) && !mt_lock_none(mt))
>   spin_lock_init(&mt->ma_lock);
>   rcu_assign_pointer(mt->ma_root, NULL);
>  }
> @@ -612,9 +618,11 @@ static inline void mt_clear_in_rcu(struct maple_tree *mt)
>   if (!mt_in_rcu(mt))
>   return;
>  
> - if (mt_external_lock(mt)) {
> + if (mt_lock_external(mt)) {
>   BUG_ON(!mt_lock_is_held(mt));
>   mt->ma_flags &= ~MT_FLAGS_USE_RCU;
> + } else if (mt_lock_none(mt)) {
> + mt->ma_flags &= ~MT_FLAGS_USE_RCU;
>   } else {
>   mtree_lock(mt);
>   mt->ma_flags &= ~MT_FLAGS_USE_RCU;
> @@ -631,9 +639,11 @@ static inline void mt_set_in_rcu(struct maple_tree *mt)
>   if (mt_in_rcu(mt))
>   return;
>  
> - if (mt_external_lock(mt)) {
> + if (mt_lock_external(mt)) {
>   BUG_ON(!mt_lock_is_held(mt));
>   mt->ma_flags |= MT_FLAGS_USE_RCU;
> + } else if (mt_lock_none(mt)) {
> + mt->ma_flags |= MT_FLAGS_USE_RCU;
>   } else {
>   mtree_lock(mt);
>   mt->ma_flags |= MT_FLAGS_USE_RCU;
> diff --git a/lib/maple_tree.c b/lib/maple_tree.c
> index 26e2045d3cda..f51c0fd4eaad 100644
> --- a/lib/maple_tree.c
> +++ b/lib/maple_tree.c
> @@ -802,8 +802,8 @@ static inline void __rcu **ma_slots(struct maple_node 
> *mn, enum maple_type mt)
>  
>  static inline bool mt_locked(const struct maple_tree *mt)
>  {
> - return mt_external_lock(mt) ? mt_lock_is_held(mt) :
> - lockdep_is_held(&mt->ma_lock);
> + return mt_lock_external(mt) ? mt_lock_is_held(mt) :
> + mt_lock_none(mt) ? true : lockdep_is_held(&mt->ma_lock);

It might be better to just make this two return statements for clarity.

>  }
>  
>  static inline void *mt_slot(const struct maple_tree *mt,
> @@ -6120,7 +6120,8 @@ bool mas_nomem(struct ma_state *mas, gfp_t gfp)
>   return false;
>   }
>  
> - if (gfpflags_allow_blocking(gfp) && !mt_external_lock(mas->tree)) {
> + if (gfpflags_allow_blocking(gfp) &&
> + !mt_lock_external(mas->tree) && !mt_lock_none(mas->tree)) {
>   mtree_unlock(mas->tree);
>   mas_alloc_nodes(mas, gfp);
>   mtree_lock(mas->tree);
> -- 
> 2.39.1
> 


[PATCH v2 0/3] Resolve warnings from AMDGPU

2023-02-17 Thread Arthur Grillo
Hi,

This series resolve some of the warnings that appear when compiling AMDGPU
with W=1.

Each patch is focused in a specific warning.

This is my First Patch for the GSoC Project Idea about increasing code
coverage of the DRM code[1].

Thanks for reviewing!

Best regards,
Arthur Grillo

[1]: https://www.x.org/wiki/DRMcoverage2023/#firstpatch

---

v1 -> v2: 
https://lore.kernel.org/all/20230213204923.111948-1-arthurgri...@riseup.net/

- Use dm_odm_combine_mode_disabled dm_odm_combine_mode_2to1 instead of an enum 
casting
- Maintain register read

---

Arthur Grillo (3):
  drm/amd/display: Fix implicit enum conversion
  drm/amd/display: Remove unused local variables
  drm/amd/display: Remove unused local variables and function

 .../amd/display/dc/dcn10/dcn10_link_encoder.c |  3 +-
 .../drm/amd/display/dc/dcn201/dcn201_dpp.c|  7 
 .../drm/amd/display/dc/dcn201/dcn201_hwseq.c  |  2 -
 .../gpu/drm/amd/display/dc/dcn30/dcn30_afmt.c |  2 -
 .../gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c |  4 --
 .../drm/amd/display/dc/dcn30/dcn30_hwseq.c|  3 --
 .../gpu/drm/amd/display/dc/dcn31/dcn31_apg.c  | 41 ---
 .../drm/amd/display/dc/dcn32/dcn32_resource.c |  5 +--
 .../display/dc/dcn32/dcn32_resource_helpers.c |  4 --
 .../dc/dml/dcn20/display_mode_vba_20.c|  9 ++--
 .../dc/dml/dcn20/display_mode_vba_20v2.c  | 11 ++---
 .../dc/dml/dcn21/display_mode_vba_21.c| 12 +++---
 .../dc/dml/dcn31/display_rq_dlg_calc_31.c |  2 -
 .../dc/link/protocols/link_dp_capability.c|  4 --
 14 files changed, 19 insertions(+), 90 deletions(-)

-- 
2.39.2



Re: [PATCH v2 2/8] accel/qaic: Add uapi and core driver file

2023-02-17 Thread Jeffrey Hugo

On 2/16/2023 7:13 AM, Jacek Lawrynowicz wrote:

Hi,

On 06.02.2023 16:41, Jeffrey Hugo wrote:

Add the QAIC driver uapi file and core driver file that binds to the PCIe
device.  The core driver file also creates the accel device and manages
all the interconnections between the different parts of the driver.

The driver can be built as a module.  If so, it will be called "qaic.ko".

Signed-off-by: Jeffrey Hugo 
Reviewed-by: Carl Vanderlip 
---
  drivers/accel/qaic/qaic.h | 321 ++
  drivers/accel/qaic/qaic_drv.c | 771 ++
  include/uapi/drm/qaic_accel.h | 283 
  3 files changed, 1375 insertions(+)
  create mode 100644 drivers/accel/qaic/qaic.h
  create mode 100644 drivers/accel/qaic/qaic_drv.c
  create mode 100644 include/uapi/drm/qaic_accel.h

diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h
new file mode 100644
index 000..3f7ea76
--- /dev/null
+++ b/drivers/accel/qaic/qaic.h
@@ -0,0 +1,321 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (c) 2019-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights 
reserved.
+ */
+
+#ifndef QAICINTERNAL_H_


Please use guard macro that matches the file name: _QAIC_H_


Before moving to DRM/ACCEL, this conflicted with the uapi file. 
However, that is no longer the case, so yes, this should be changed. 
Will do.





+#define QAICINTERNAL_H_
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define QAIC_DBC_BASE  0x2
+#define QAIC_DBC_SIZE  0x1000
+
+#define QAIC_NO_PARTITION  -1
+
+#define QAIC_DBC_OFF(i)((i) * QAIC_DBC_SIZE + QAIC_DBC_BASE)
+
+#define to_qaic_bo(obj) container_of(obj, struct qaic_bo, base)
+
+extern bool poll_datapath;
+
+struct qaic_user {
+   /* Uniquely identifies this user for the device */
+   int handle;
+   struct kref ref_count;
+   /* Char device opened by this user */
+   struct qaic_drm_device  *qddev;
+   /* Node in list of users that opened this drm device */
+   struct list_headnode;
+   /* SRCU used to synchronize this user during cleanup */
+   struct srcu_struct  qddev_lock;
+   atomic_tchunk_id;
+};
+
+struct dma_bridge_chan {
+   /* Pointer to device strcut maintained by driver */
+   struct qaic_device  *qdev;
+   /* ID of this DMA bridge channel(DBC) */
+   unsigned intid;
+   /* Synchronizes access to xfer_list */
+   spinlock_t  xfer_lock;
+   /* Base address of request queue */
+   void*req_q_base;
+   /* Base address of response queue */
+   void*rsp_q_base;
+   /*
+* Base bus address of request queue. Response queue bus address can be
+* calculated by adding request queue size to this variable
+*/
+   dma_addr_t  dma_addr;
+   /* Total size of request and response queue in byte */
+   u32 total_size;
+   /* Capacity of request/response queue */
+   u32 nelem;
+   /* The user that opened this DBC */
+   struct qaic_user*usr;
+   /*
+* Request ID of next memory handle that goes in request queue. One
+* memory handle can enqueue more than one request elements, all
+* this requests that belong to same memory handle have same request ID
+*/
+   u16 next_req_id;
+   /* TRUE: DBC is in use; FALSE: DBC not in use */


Use standard "true"/"false" instead of custom "TRUE"/"FALSE" macros.
This applies here and in multiple other places in the driver.


I think you are getting at that the documentation could be confusing.  I 
don't appear to see custom macro use in the code.  Will try to clarify 
that here.



+   boolin_use;
+   /*
+* Base address of device registers. Used to read/write request and
+* response queue's head and tail pointer of this DBC.
+*/
+   void __iomem*dbc_base;
+   /* Head of list where each node is a memory handle queued in request 
queue */
+   struct list_headxfer_list;
+   /* Synchronizes DBC readers during cleanup */
+   struct srcu_struct  ch_lock;
+   /*
+* When this DBC is released, any thread waiting on this wait queue is
+* woken up
+*/
+   wait_queue_head_t   dbc_release;
+   /* Head of list where each node is a bo associated with this DBC */
+   struct list_headbo_lists;
+   /* The irq line for this DBC.  Used for polling */
+   unsigned intirq;
+   /* Polling work item to simulate interrupts */
+   struct work_struct  poll_work;
+};
+
+struct qaic_device {
+  

[PATCH v2 3/3] drm/amd/display: Remove unused local variables and function

2023-02-17 Thread Arthur Grillo
Remove a couple of local variables that are only set but never used,
also remove an static utility function that is never used in consequence
of the variable removal.

This decrease the number of -Wunused-but-set-variable warnings.

Signed-off-by: Arthur Grillo 
---
 .../gpu/drm/amd/display/dc/dcn31/dcn31_apg.c  | 41 ---
 1 file changed, 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.c 
b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.c
index 24e9ff65434d..05aac3e444b4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.c
@@ -72,40 +72,6 @@ static void apg31_disable(
REG_UPDATE(APG_CONTROL2, APG_ENABLE, 0);
 }
 
-static union audio_cea_channels speakers_to_channels(
-   struct audio_speaker_flags speaker_flags)
-{
-   union audio_cea_channels cea_channels = {0};
-
-   /* these are one to one */
-   cea_channels.channels.FL = speaker_flags.FL_FR;
-   cea_channels.channels.FR = speaker_flags.FL_FR;
-   cea_channels.channels.LFE = speaker_flags.LFE;
-   cea_channels.channels.FC = speaker_flags.FC;
-
-   /* if Rear Left and Right exist move RC speaker to channel 7
-* otherwise to channel 5
-*/
-   if (speaker_flags.RL_RR) {
-   cea_channels.channels.RL_RC = speaker_flags.RL_RR;
-   cea_channels.channels.RR = speaker_flags.RL_RR;
-   cea_channels.channels.RC_RLC_FLC = speaker_flags.RC;
-   } else {
-   cea_channels.channels.RL_RC = speaker_flags.RC;
-   }
-
-   /* FRONT Left Right Center and REAR Left Right Center are exclusive */
-   if (speaker_flags.FLC_FRC) {
-   cea_channels.channels.RC_RLC_FLC = speaker_flags.FLC_FRC;
-   cea_channels.channels.RRC_FRC = speaker_flags.FLC_FRC;
-   } else {
-   cea_channels.channels.RC_RLC_FLC = speaker_flags.RLC_RRC;
-   cea_channels.channels.RRC_FRC = speaker_flags.RLC_RRC;
-   }
-
-   return cea_channels;
-}
-
 static void apg31_se_audio_setup(
struct apg *apg,
unsigned int az_inst,
@@ -113,24 +79,17 @@ static void apg31_se_audio_setup(
 {
struct dcn31_apg *apg31 = DCN31_APG_FROM_APG(apg);
 
-   uint32_t speakers = 0;
-   uint32_t channels = 0;
-
ASSERT(audio_info);
/* This should not happen.it does so we don't get BSOD*/
if (audio_info == NULL)
return;
 
-   speakers = audio_info->flags.info.ALLSPEAKERS;
-   channels = speakers_to_channels(audio_info->flags.speaker_flags).all;
-
/* DisplayPort only allows for one audio stream with stream ID 0 */
REG_UPDATE(APG_CONTROL2, APG_DP_AUDIO_STREAM_ID, 0);
 
/* When running in "pair mode", pairs of audio channels have their own 
enable
 * this is for really old audio drivers */
REG_UPDATE(APG_DBG_GEN_CONTROL, APG_DBG_AUDIO_CHANNEL_ENABLE, 0xFF);
-   // REG_UPDATE(APG_DBG_GEN_CONTROL, APG_DBG_AUDIO_CHANNEL_ENABLE, 
channels);
 
/* Disable forced mem power off */
REG_UPDATE(APG_MEM_PWR, APG_MEM_PWR_FORCE, 0);
-- 
2.39.2



[PATCH v2 2/3] drm/amd/display: Remove unused local variables

2023-02-17 Thread Arthur Grillo
Remove local variables that were just set but were never used. This
decrease the number of -Wunused-but-set-variable warnings.

Signed-off-by: Arthur Grillo 
---
 drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c  | 3 +--
 drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dpp.c | 7 ---
 drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hwseq.c   | 2 --
 drivers/gpu/drm/amd/display/dc/dcn30/dcn30_afmt.c  | 2 --
 drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c  | 4 
 drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c | 3 ---
 drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c  | 5 +
 .../gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c  | 4 
 .../drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c  | 2 --
 .../drm/amd/display/dc/link/protocols/link_dp_capability.c | 4 
 10 files changed, 2 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c 
b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c
index c4287147b853..ee08b545aaea 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c
@@ -1219,7 +1219,6 @@ void 
dcn10_link_encoder_update_mst_stream_allocation_table(
const struct link_mst_stream_allocation_table *table)
 {
struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
-   uint32_t value0 = 0;
uint32_t value1 = 0;
uint32_t value2 = 0;
uint32_t slots = 0;
@@ -1321,7 +1320,7 @@ void 
dcn10_link_encoder_update_mst_stream_allocation_table(
do {
udelay(10);
 
-   value0 = REG_READ(DP_MSE_SAT_UPDATE);
+   REG_READ(DP_MSE_SAT_UPDATE);
 
REG_GET(DP_MSE_SAT_UPDATE,
DP_MSE_SAT_UPDATE, &value1);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dpp.c 
b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dpp.c
index f50ab961bc17..a7268027a472 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dpp.c
@@ -185,13 +185,6 @@ static bool dpp201_get_optimal_number_of_taps(
struct scaler_data *scl_data,
const struct scaling_taps *in_taps)
 {
-   uint32_t pixel_width;
-
-   if (scl_data->viewport.width > scl_data->recout.width)
-   pixel_width = scl_data->recout.width;
-   else
-   pixel_width = scl_data->viewport.width;
-
if (scl_data->viewport.width  != scl_data->h_active &&
scl_data->viewport.height != scl_data->v_active &&
dpp->caps->dscl_data_proc_format == 
DSCL_DATA_PRCESSING_FIXED_FORMAT &&
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hwseq.c 
b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hwseq.c
index 61bcfa03c4e7..1aeb04fbd89d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hwseq.c
@@ -541,8 +541,6 @@ void dcn201_pipe_control_lock(
bool lock)
 {
struct dce_hwseq *hws = dc->hwseq;
-   struct hubp *hubp = NULL;
-   hubp = dc->res_pool->hubps[pipe->pipe_idx];
/* use TG master update lock to lock everything on the TG
 * therefore only top pipe need to lock
 */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_afmt.c 
b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_afmt.c
index 95528e5ef89e..55e388c4c98b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_afmt.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_afmt.c
@@ -123,7 +123,6 @@ void afmt3_se_audio_setup(
 {
struct dcn30_afmt *afmt3 = DCN30_AFMT_FROM_AFMT(afmt);
 
-   uint32_t speakers = 0;
uint32_t channels = 0;
 
ASSERT(audio_info);
@@ -131,7 +130,6 @@ void afmt3_se_audio_setup(
if (audio_info == NULL)
return;
 
-   speakers = audio_info->flags.info.ALLSPEAKERS;
channels = speakers_to_channels(audio_info->flags.speaker_flags).all;
 
/* setup the audio stream source select (audio -> dig mapping) */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c 
b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c
index dc3e8df706b3..e46bbe7ddcc9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c
@@ -47,13 +47,9 @@ void hubp3_set_vm_system_aperture_settings(struct hubp *hubp,
 {
struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
 
-   PHYSICAL_ADDRESS_LOC mc_vm_apt_default;
PHYSICAL_ADDRESS_LOC mc_vm_apt_low;
PHYSICAL_ADDRESS_LOC mc_vm_apt_high;
 
-   // The format of default addr is 48:12 of the 48 bit addr
-   mc_vm_apt_default.quad_part = apt->sys_default.quad_part >> 12;
-
// The format of high/low are 48:18 of the 48 bit addr
mc_vm_apt_low.quad_part = apt->sys_low.quad_part >> 18;
mc_vm_apt_high.quad_part = apt->sys_

[PATCH v2 1/3] drm/amd/display: Fix implicit enum conversion

2023-02-17 Thread Arthur Grillo
Make implicit enum conversion to avoid -Wenum-conversion warning, such
as:

drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn21/display_mode_vba_21.c:4109:88:
 warning: implicit conversion from ‘enum ’ to ‘enum 
odm_combine_mode’ [-Wenum-conversion]
 4109 | 
locals->ODMCombineEnablePerState[i][k] = true;
  | 
   ^

Signed-off-by: Arthur Grillo 
---
 .../amd/display/dc/dml/dcn20/display_mode_vba_20.c   |  9 +
 .../amd/display/dc/dml/dcn20/display_mode_vba_20v2.c | 11 ++-
 .../amd/display/dc/dml/dcn21/display_mode_vba_21.c   | 12 ++--
 3 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c
index d3b5b6fedf04..1b47249f01d8 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c
@@ -26,6 +26,7 @@
 #include "../display_mode_lib.h"
 #include "display_mode_vba_20.h"
 #include "../dml_inline_defs.h"
+#include "dml/display_mode_enums.h"
 
 /*
  * NOTE:
@@ -3897,14 +3898,14 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct 
display_mode_lib *mode_l

mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] 
/ 2
* (1 + 
mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
 
-   locals->ODMCombineEnablePerState[i][k] = false;
+   locals->ODMCombineEnablePerState[i][k] = 
dm_odm_combine_mode_disabled;
mode_lib->vba.PlaneRequiredDISPCLK = 
mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine;
if (mode_lib->vba.ODMCapability) {
if 
(locals->PlaneRequiredDISPCLKWithoutODMCombine > 
mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity) {
-   
locals->ODMCombineEnablePerState[i][k] = true;
+   
locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;

mode_lib->vba.PlaneRequiredDISPCLK = 
mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
} else if (locals->HActive[k] > 
DCN20_MAX_420_IMAGE_WIDTH && locals->OutputFormat[k] == dm_420) {
-   
locals->ODMCombineEnablePerState[i][k] = true;
+   
locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;

mode_lib->vba.PlaneRequiredDISPCLK = 
mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
}
}
@@ -3957,7 +3958,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct 
display_mode_lib *mode_l
locals->RequiredDISPCLK[i][j] = 0.0;
locals->DISPCLK_DPPCLK_Support[i][j] = true;
for (k = 0; k <= 
mode_lib->vba.NumberOfActivePlanes - 1; k++) {
-   locals->ODMCombineEnablePerState[i][k] 
= false;
+   locals->ODMCombineEnablePerState[i][k] 
= dm_odm_combine_mode_disabled;
if (locals->SwathWidthYSingleDPP[k] <= 
locals->MaximumSwathWidth[k]) {
locals->NoOfDPP[i][j][k] = 1;
locals->RequiredDPPCLK[i][j][k] 
= locals->MinDPPCLKUsingSingleDPP[k]
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
index edd098c7eb92..4781bf82eec6 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
@@ -26,6 +26,7 @@
 #include "../display_mode_lib.h"
 #include "display_mode_vba_20v2.h"
 #include "../dml_inline_defs.h"
+#include "dml/display_mode_enums.h"
 
 /*
  * NOTE:
@@ -4008,17 +4009,17 @@ void 
dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode

mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] 
/ 2
* (1 + 
mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
 
-   locals->ODMCombineEnablePerState[i][k] = false;
+   locals->ODMCombineEnablePerState[i][k] = 
dm_odm_combine_mode_disabled;
mode_lib->vba.PlaneRequiredDISPCLK = 

Re: [PATCH v3 0/2] Don't use stolen memory or BAR mappings for ring buffers

2023-02-17 Thread John Harrison

On 2/17/2023 00:39, Hogander, Jouni wrote:

On Wed, 2023-02-15 at 17:10 -0800, john.c.harri...@intel.com wrote:

From: John Harrison 

Instruction from hardware arch is that stolen memory and BAR mappings
are unsafe for use as ring buffers. There can be issues with cache
aliasing due to the CPU access going to memory via the BAR. So, don't
do it.

Tested these patches for GPU Hang I was debugging. Seem to fix that one
as well:

Tested-by: Jouni Högander 
Sweet! Out of interest, which platform was that? And how reproducible 
was it? It would be interesting to know if an IGT was actually regularly 
showing the issue and we had just been ignoring it!


John.




v2: Dont use BAR mappings either.
Make conditional on LLC so as not to change platforms that don't need
to change (Daniele).
Add 'Fixes' tags (Tvrtko).
v3: Fix dumb typo.

Signed-off-by: John Harrison 


John Harrison (2):
   drm/i915: Don't use stolen memory for ring buffers with LLC
   drm/i915: Don't use BAR mappings for ring buffers with LLC

  drivers/gpu/drm/i915/gt/intel_ring.c | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)





Re: [Intel-gfx] [RFC v2 0/5] Waitboost drm syncobj waits

2023-02-17 Thread Rob Clark
On Fri, Feb 17, 2023 at 8:03 AM Tvrtko Ursulin
 wrote:
>
>
> On 17/02/2023 14:55, Rob Clark wrote:
> > On Fri, Feb 17, 2023 at 4:56 AM Tvrtko Ursulin
> >  wrote:
> >>
> >>
> >> On 16/02/2023 18:19, Rodrigo Vivi wrote:
> >>> On Tue, Feb 14, 2023 at 11:14:00AM -0800, Rob Clark wrote:
>  On Fri, Feb 10, 2023 at 5:07 AM Tvrtko Ursulin
>   wrote:
> >
> > From: Tvrtko Ursulin 
> >
> > In i915 we have this concept of "wait boosting" where we give a 
> > priority boost
> > for instance to fences which are actively waited upon from userspace. 
> > This has
> > it's pros and cons and can certainly be discussed at lenght. However 
> > fact is
> > some workloads really like it.
> >
> > Problem is that with the arrival of drm syncobj and a new userspace 
> > waiting
> > entry point it added, the waitboost mechanism was bypassed. Hence I 
> > cooked up
> > this mini series really (really) quickly to see if some discussion can 
> > be had.
> >
> > It adds a concept of "wait count" to dma fence, which is incremented 
> > for every
> > explicit dma_fence_enable_sw_signaling and dma_fence_add_wait_callback 
> > (like
> > dma_fence_add_callback but from explicit/userspace wait paths).
> 
>  I was thinking about a similar thing, but in the context of dma_fence
>  (or rather sync_file) fd poll()ing.  How does the kernel differentiate
>  between "housekeeping" poll()ers that don't want to trigger boost but
>  simply know when to do cleanup, and waiters who are waiting with some
>  urgency.  I think we could use EPOLLPRI for this purpose.
> 
>  Not sure how that translates to waits via the syncobj.  But I think we
>  want to let userspace give some hint about urgent vs housekeeping
>  waits.
> >>>
> >>> Should the hint be on the waits, or should the hints be on the executed
> >>> context?
> >>>
> >>> In the end we need some way to quickly ramp-up the frequency to avoid
> >>> the execution bubbles.
> >>>
> >>> waitboost is trying to guess that, but in some cases it guess wrong
> >>> and waste power.
> >>
> >> Do we have a list of workloads which shows who benefits and who loses
> >> from the current implementation of waitboost?
> >>> btw, this is something that other drivers might need:
> >>>
> >>> https://gitlab.freedesktop.org/drm/amd/-/issues/1500#note_825883
> >>> Cc: Alex Deucher 
> >>
> >> I have several issues with the context hint if it would directly
> >> influence frequency selection in the "more power" direction.
> >>
> >> First of all, assume a context hint would replace the waitboost. Which
> >> applications would need to set it to restore the lost performance and
> >> how would they set it?
> >>
> >> Then I don't even think userspace necessarily knows. Think of a layer
> >> like OpenCL. It doesn't really know in advance the profile of
> >> submissions vs waits. It depends on the CPU vs GPU speed, so hardware
> >> generation, and the actual size of the workload which can be influenced
> >> by the application (or user) and not the library.
> >>
> >> The approach also lends itself well for the "arms race" where every
> >> application can say "Me me me, I am the most important workload there is!".
> >
> > since there is discussion happening in two places:
> >
> > https://gitlab.freedesktop.org/drm/intel/-/issues/8014#note_1777433
> >
> > What I think you might want is a ctx boost_mask which lets an app or
> > driver disable certain boost signals/classes.  Where fence waits is
> > one class of boost, but hypothetical other signals like touchscreen
> > (or other) input events could be another class of boost.  A compute
> > workload might be interested in fence wait boosts but could care less
> > about input events.
>
> I think it can only be apps which could have any chance knowing whether
> their use of a library is latency sensitive or not. Which means new
> library extensions and their adoption. So I have some strong reservation
> that route is feasible.
>
> Or we tie with priority which many drivers do. Normal and above gets the
> boosting and what lowered itself does not (aka SCHED_IDLE/SCHED_BATCH).

yeah, that sounds reasonable.

> Related note is that we lack any external control of our scheduling
> decisions so we really do suck compared to other scheduling domains like
> CPU and IO etc.
>
> >> The last concern is for me shared with the proposal to expose deadlines
> >> or high priority waits as explicit uapi knobs. Both come under the "what
> >> application told us it will do" category vs what it actually does. So I
> >> think it is slightly weaker than basing decisions of waits.
> >>
> >> The current waitboost is a bit detached from that problem because when
> >> we waitboost for flips we _know_ it is an actual framebuffer in the flip
> >> chain. When we waitboost for waits we also know someone is waiting. We
> >> are not trusting userspace telling us this will be a buffer 

Re: [PATCH v29 4/7] drm/mediatek: add dma dev get function

2023-02-17 Thread Guillaume Ranquet
On Tue, 27 Dec 2022 09:10, "" wrote:
>This is a preparation for adding support for the ovl_adaptor sub driver
>Ovl_adaptor is a DRM sub driver, which doesn't have dma dev. Add
>dma_dev_get function for getting representative dma dev in ovl_adaptor.
>
>Signed-off-by: Nancy.Lin 
>Reviewed-by: AngeloGioachino Del Regno 
>
>Reviewed-by: CK Hu 
>Tested-by: AngeloGioacchino Del Regno 
>Tested-by: Bo-Chen Chen 
>---
> drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 15 +++
> drivers/gpu/drm/mediatek/mtk_drm_crtc.h |  1 +
> drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h |  8 
> 3 files changed, 24 insertions(+)
>
>diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c 
>b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
>index 112615817dcb..78e20f604158 100644
>--- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
>+++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
>@@ -58,6 +58,7 @@ struct mtk_drm_crtc {
> #endif
>
>   struct device   *mmsys_dev;
>+  struct device   *dma_dev;
>   struct mtk_mutex*mutex;
>   unsigned intddp_comp_nr;
>   struct mtk_ddp_comp **ddp_comp;
>@@ -865,6 +866,13 @@ static int mtk_drm_crtc_init_comp_planes(struct 
>drm_device *drm_dev,
>   return 0;
> }
>
>+struct device *mtk_drm_crtc_dma_dev_get(struct drm_crtc *crtc)
>+{
>+  struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc);
>+
>+  return mtk_crtc->dma_dev;
>+}

While testing out the HDMI patchset for i1200, I've ended up with a
panic here with crtc being NULL.

I've fixed the issue on my side by testing crtc prior doing anything
in that function.

Not sure this is the proper fix.

HTH,
Guillaume.

>+
> int mtk_drm_crtc_create(struct drm_device *drm_dev,
>   const enum mtk_ddp_comp_id *path, unsigned int path_len)
> {
>@@ -953,6 +961,13 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev,
>   return ret;
>   }
>
>+  /*
>+   * Default to use the first component as the dma dev.
>+   * In the case of ovl_adaptor sub driver, it needs to use the
>+   * dma_dev_get function to get representative dma dev.
>+   */
>+  mtk_crtc->dma_dev = mtk_ddp_comp_dma_dev_get(&priv->ddp_comp[path[0]]);
>+
>   ret = mtk_drm_crtc_init(drm_dev, mtk_crtc, pipe);
>   if (ret < 0)
>   return ret;
>diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.h 
>b/drivers/gpu/drm/mediatek/mtk_drm_crtc.h
>index cb9a36c48d4f..f5a6e80c5265 100644
>--- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.h
>+++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.h
>@@ -22,5 +22,6 @@ int mtk_drm_crtc_plane_check(struct drm_crtc *crtc, struct 
>drm_plane *plane,
>struct mtk_plane_state *state);
> void mtk_drm_crtc_async_update(struct drm_crtc *crtc, struct drm_plane *plane,
>  struct drm_atomic_state *plane_state);
>+struct device *mtk_drm_crtc_dma_dev_get(struct drm_crtc *crtc);
>
> #endif /* MTK_DRM_CRTC_H */
>diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h 
>b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h
>index 2d0052c23dcb..364f3f7f59fa 100644
>--- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h
>+++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h
>@@ -71,6 +71,7 @@ struct mtk_ddp_comp_funcs {
>   void (*bgclr_in_off)(struct device *dev);
>   void (*ctm_set)(struct device *dev,
>   struct drm_crtc_state *state);
>+  struct device * (*dma_dev_get)(struct device *dev);
> };
>
> struct mtk_ddp_comp {
>@@ -203,6 +204,13 @@ static inline void mtk_ddp_ctm_set(struct mtk_ddp_comp 
>*comp,
>   comp->funcs->ctm_set(comp->dev, state);
> }
>
>+static inline struct device *mtk_ddp_comp_dma_dev_get(struct mtk_ddp_comp 
>*comp)
>+{
>+  if (comp->funcs && comp->funcs->dma_dev_get)
>+  return comp->funcs->dma_dev_get(comp->dev);
>+  return comp->dev;
>+}
>+
> int mtk_ddp_comp_get_id(struct device_node *node,
>   enum mtk_ddp_comp_type comp_type);
> unsigned int mtk_drm_find_possible_crtc_by_comp(struct drm_device *drm,
>--
>2.18.0
>
>


Re: [PATCH v29 3/7] drm/mediatek: add ovl_adaptor support for MT8195

2023-02-17 Thread Guillaume Ranquet
On Tue, 27 Dec 2022 09:10, "" wrote:

Hi Nancy.

I've been using your patches lately to test out the HDMI series on
mt8195 and I have hit a scheduling bug.

>Add ovl_adaptor driver for MT8195.
>Ovl_adaptor is an encapsulated module and designed for simplified
>DRM control flow. This module is composed of 8 RDMAs, 4 MERGEs and
>an ETHDR. Two RDMAs merge into one layer, so this module support 4
>layers.
>
>Signed-off-by: Nancy.Lin 
>Reviewed-by: Chun-Kuang Hu 
>Reviewed-by: AngeloGioacchino Del Regno 
>
>Tested-by: AngeloGioacchino Del Regno 
>Tested-by: Bo-Chen Chen 
>---
> drivers/gpu/drm/mediatek/Makefile |   1 +
> drivers/gpu/drm/mediatek/mtk_disp_drv.h   |  26 +
> .../gpu/drm/mediatek/mtk_disp_ovl_adaptor.c   | 533 ++
> drivers/gpu/drm/mediatek/mtk_drm_drv.h|   1 +
> 4 files changed, 561 insertions(+)
> create mode 100644 drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
>
>diff --git a/drivers/gpu/drm/mediatek/Makefile 
>b/drivers/gpu/drm/mediatek/Makefile
>index 840f14436d3c..d4d193f60271 100644
>--- a/drivers/gpu/drm/mediatek/Makefile
>+++ b/drivers/gpu/drm/mediatek/Makefile
>@@ -6,6 +6,7 @@ mediatek-drm-y := mtk_disp_aal.o \
> mtk_disp_gamma.o \
> mtk_disp_merge.o \
> mtk_disp_ovl.o \
>+mtk_disp_ovl_adaptor.o \
> mtk_disp_rdma.o \
> mtk_drm_crtc.o \
> mtk_drm_ddp_comp.o \
>diff --git a/drivers/gpu/drm/mediatek/mtk_disp_drv.h 
>b/drivers/gpu/drm/mediatek/mtk_disp_drv.h
>index 33e61a136bbc..654f8e257984 100644
>--- a/drivers/gpu/drm/mediatek/mtk_disp_drv.h
>+++ b/drivers/gpu/drm/mediatek/mtk_disp_drv.h
>@@ -7,6 +7,8 @@
> #define _MTK_DISP_DRV_H_
>
> #include 
>+#include 
>+#include 
> #include "mtk_drm_plane.h"
> #include "mtk_mdp_rdma.h"
>
>@@ -116,6 +118,30 @@ void mtk_rdma_unregister_vblank_cb(struct device *dev);
> void mtk_rdma_enable_vblank(struct device *dev);
> void mtk_rdma_disable_vblank(struct device *dev);
>
>+void mtk_ovl_adaptor_add_comp(struct device *dev, struct mtk_mutex *mutex);
>+void mtk_ovl_adaptor_remove_comp(struct device *dev, struct mtk_mutex *mutex);
>+void mtk_ovl_adaptor_connect(struct device *dev, struct device *mmsys_dev,
>+   unsigned int next);
>+void mtk_ovl_adaptor_disconnect(struct device *dev, struct device *mmsys_dev,
>+  unsigned int next);
>+int mtk_ovl_adaptor_clk_enable(struct device *dev);
>+void mtk_ovl_adaptor_clk_disable(struct device *dev);
>+void mtk_ovl_adaptor_config(struct device *dev, unsigned int w,
>+  unsigned int h, unsigned int vrefresh,
>+  unsigned int bpc, struct cmdq_pkt *cmdq_pkt);
>+void mtk_ovl_adaptor_layer_config(struct device *dev, unsigned int idx,
>+struct mtk_plane_state *state,
>+struct cmdq_pkt *cmdq_pkt);
>+void mtk_ovl_adaptor_register_vblank_cb(struct device *dev, void 
>(*vblank_cb)(void *),
>+  void *vblank_cb_data);
>+void mtk_ovl_adaptor_unregister_vblank_cb(struct device *dev);
>+void mtk_ovl_adaptor_enable_vblank(struct device *dev);
>+void mtk_ovl_adaptor_disable_vblank(struct device *dev);
>+void mtk_ovl_adaptor_start(struct device *dev);
>+void mtk_ovl_adaptor_stop(struct device *dev);
>+unsigned int mtk_ovl_adaptor_layer_nr(struct device *dev);
>+struct device *mtk_ovl_adaptor_dma_dev_get(struct device *dev);
>+
> int mtk_mdp_rdma_clk_enable(struct device *dev);
> void mtk_mdp_rdma_clk_disable(struct device *dev);
> void mtk_mdp_rdma_start(struct device *dev, struct cmdq_pkt *cmdq_pkt);
>diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c 
>b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
>new file mode 100644
>index ..046217828ab3
>--- /dev/null
>+++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
>@@ -0,0 +1,533 @@
>+// SPDX-License-Identifier: GPL-2.0-only
>+/*
>+ * Copyright (c) 2021 MediaTek Inc.
>+ */
>+
>+#include 
>+#include 
>+#include 
>+#include 
>+#include 
>+#include 
>+#include 
>+#include 
>+#include 
>+#include 
>+#include 
>+#include 
>+
>+#include "mtk_disp_drv.h"
>+#include "mtk_drm_crtc.h"
>+#include "mtk_drm_ddp_comp.h"
>+#include "mtk_drm_drv.h"
>+#include "mtk_ethdr.h"
>+
>+#define MTK_OVL_ADAPTOR_RDMA_MAX_WIDTH 1920
>+#define MTK_OVL_ADAPTOR_LAYER_NUM 4
>+
>+enum mtk_ovl_adaptor_comp_type {
>+  OVL_ADAPTOR_TYPE_RDMA = 0,
>+  OVL_ADAPTOR_TYPE_MERGE,
>+  OVL_ADAPTOR_TYPE_ETHDR,
>+  OVL_ADAPTOR_TYPE_NUM,
>+};
>+
>+enum mtk_ovl_adaptor_comp_id {
>+  OVL_ADAPTOR_MDP_RDMA0,
>+  OVL_ADAPTOR_MDP_RDMA1,
>+  OVL_ADAPTOR_MDP_RDMA2,
>+  OVL_ADAPTOR_MDP_RDMA3,
>+  OVL_ADAPTOR_MDP_RDMA4,
>+  OVL_ADAPTOR_MDP_RDMA5,
>+  OVL_ADAPTOR_MDP_RDMA6,
>+  OVL_ADAPTOR_MDP_RDMA7,
>+  OVL_ADAPTOR_MERGE0,
>+  OVL_ADAPTOR_MERGE1,
>+  OVL_ADAPTOR_MERGE2,
>+  OVL_ADAPTOR_MERGE3,
>+  OVL_ADAPTO

[PATCH v2 2/2] drm: document DRM_IOCTL_PRIME_HANDLE_TO_FD and PRIME_FD_TO_HANDLE

2023-02-17 Thread Simon Ser
v2: mention caps, note that the IOCTLs might fail, document that
user-space needs a data structure to keep track of the
handles (Daniel V.)

Signed-off-by: Simon Ser 
Cc: Daniel Vetter 
Cc: Pekka Paalanen 
Cc: Daniel Stone 
---
 include/uapi/drm/drm.h | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 292e4778a2f4..a87ca2d4 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -1025,7 +1025,37 @@ extern "C" {
 #define DRM_IOCTL_UNLOCK   DRM_IOW( 0x2b, struct drm_lock)
 #define DRM_IOCTL_FINISH   DRM_IOW( 0x2c, struct drm_lock)
 
+/**
+ * DRM_IOCTL_PRIME_HANDLE_TO_FD - Convert a GEM handle to a DMA-BUF FD.
+ *
+ * User-space sets &drm_prime_handle.handle with the GEM handle to export and
+ * &drm_prime_handle.flags, and gets back a DMA-BUF file descriptor in
+ * &drm_prime_handle.fd.
+ *
+ * The export can fail for any driver-specific reason, e.g. because export is
+ * not supported for this specific GEM handle (but might be for others).
+ *
+ * Support for exporting DMA-BUFs is advertised via &DRM_PRIME_CAP_EXPORT.
+ */
 #define DRM_IOCTL_PRIME_HANDLE_TO_FDDRM_IOWR(0x2d, struct drm_prime_handle)
+/**
+ * DRM_IOCTL_PRIME_FD_TO_HANDLE - Convert a DMA-BUF FD to a GEM handle.
+ *
+ * User-space sets &drm_prime_handle.fd with a DMA-BUF file descriptor to
+ * import, and gets back a GEM handle in &drm_prime_handle.handle.
+ * &drm_prime_handle.flags is unused.
+ *
+ * If an existing GEM handle refers to the memory object backing the DMA-BUF,
+ * that GEM handle is returned. Therefore user-space which needs to handle
+ * arbitrary DMA-BUFs must have a user-space lookup data structure to manually
+ * reference-count duplicated GEM handles. For more information see
+ * &DRM_IOCTL_GEM_CLOSE.
+ *
+ * The import can fail for any driver-specific reason, e.g. because import is
+ * only supported for DMA-BUFs allocated on this DRM device.
+ *
+ * Support for importing DMA-BUFs is advertised via &DRM_PRIME_CAP_IMPORT.
+ */
 #define DRM_IOCTL_PRIME_FD_TO_HANDLEDRM_IOWR(0x2e, struct drm_prime_handle)
 
 #define DRM_IOCTL_AGP_ACQUIRE  DRM_IO(  0x30)
-- 
2.39.2




[PATCH v2 1/2] drm: document expectations for GETFB2 handles

2023-02-17 Thread Simon Ser
There are two important details missing from the docs:

- If the memory object backing the FB already has a GEM handle,
  it's not re-used, a new one is generated.
- Aliased planes will return the same GEM handle.

v2: document how user-space can obtain DMA-BUF FDs without leaking
handles (Pekka)

Signed-off-by: Simon Ser 
Cc: Daniel Vetter 
Cc: Pekka Paalanen 
Cc: Daniel Stone 
Acked-by: Pekka Paalanen 
---
 include/uapi/drm/drm.h | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index c39fefb54613..292e4778a2f4 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -1117,8 +1117,13 @@ extern "C" {
  * struct as the output.
  *
  * If the client is DRM master or has &CAP_SYS_ADMIN, &drm_mode_fb_cmd2.handles
- * will be filled with GEM buffer handles. Planes are valid until one has a
- * zero handle -- this can be used to compute the number of planes.
+ * will be filled with GEM buffer handles. Fresh new GEM handles are always
+ * returned, even if another GEM handle referring to the same memory object
+ * already exists on the DRM file description. The caller is responsible for
+ * removing the new handles, e.g. via the &DRM_IOCTL_GEM_CLOSE IOCTL. The same
+ * new handle will be returned for multiple planes in case they use the same
+ * memory object. Planes are valid until one has a zero handle -- this can be
+ * used to compute the number of planes.
  *
  * Otherwise, &drm_mode_fb_cmd2.handles will be zeroed and planes are valid
  * until one has a zero &drm_mode_fb_cmd2.pitches.
@@ -1126,6 +1131,11 @@ extern "C" {
  * If the framebuffer has a format modifier, &DRM_MODE_FB_MODIFIERS will be set
  * in &drm_mode_fb_cmd2.flags and &drm_mode_fb_cmd2.modifier will contain the
  * modifier. Otherwise, user-space must ignore &drm_mode_fb_cmd2.modifier.
+ *
+ * To obtain DMA-BUF FDs for each plane without leaking GEM handles, user-space
+ * can export each handle via &DRM_IOCTL_PRIME_HANDLE_TO_FD, then immediately
+ * close each unique handle via &DRM_IOCTL_GEM_CLOSE, making sure to not
+ * double-close handles which are specified multiple times in the array.
  */
 #define DRM_IOCTL_MODE_GETFB2  DRM_IOWR(0xCE, struct drm_mode_fb_cmd2)
 
-- 
2.39.2




Re: [Intel-gfx] [PATCH] drm/i915/guc: avoid FIELD_PREP warning

2023-02-17 Thread Arnd Bergmann
On Fri, Feb 17, 2023, at 16:38, Andrzej Hajda wrote:
> On 17.02.2023 13:46, Arnd Bergmann wrote:
>> From: Arnd Bergmann 
>> 
>> With gcc-7 and earlier, there are lots of warnings like
>> 
>> In file included from :0:0:
>> In function '__guc_context_policy_add_priority.isra.66',
>>  inlined from '__guc_context_set_prio.isra.67' at 
>> drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:3292:3,
>>  inlined from 'guc_context_set_prio' at 
>> drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:3320:2:
>> include/linux/compiler_types.h:399:38: error: call to 
>> '__compiletime_assert_631' declared with attribute error: FIELD_PREP: mask 
>> is not constant
>>_compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
>>^
>> ...
>> drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:2422:3: note: in expansion 
>> of macro 'FIELD_PREP'
>> FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
>> ^~
>> 
>> Make sure that GUC_KLV_0_KEY is an unsigned value to avoid the warning.
>
> Does it mean __builtin_constant_p in gcc7 returns 0 on signed constants?
> I guess there should be more similar errors.

No, it's not as simple as that, I'm not really sure what the underlying
problem is with the compiler, and this is the only file that triggered
this particular warning.

There are other cases where old compilers had the reverse problem,
where they sometimes report a variable to be __builtin_constant_p()==true
if there is a branch that assigns a constant to it.

I think here it happens because GUC_KLV_0_KEY and GUC_KLV_n_VALUE are
negative.

   Arnd


Re: [PATCH v2 0/8] QAIC accel driver

2023-02-17 Thread Jeffrey Hugo

On 2/8/2023 3:01 PM, Jeffrey Hugo wrote:

On 2/6/2023 8:41 AM, Jeffrey Hugo wrote:

Regarding the open userspace (see the documentation patch), the UMD and
compiler are a week or so away from being posted in the indicated repos.
Just need to polish some documentation.


An update to this, the compiler is now live on github at the link 
specified in the documentation patch.


The UMD is now posted.

-Jeff


Re: [PATCH] drm/i915/guc: avoid FIELD_PREP warning

2023-02-17 Thread Rodrigo Vivi
On Fri, Feb 17, 2023 at 01:46:50PM +0100, Arnd Bergmann wrote:
> From: Arnd Bergmann 
> 
> With gcc-7 and earlier, there are lots of warnings like
> 
> In file included from :0:0:
> In function '__guc_context_policy_add_priority.isra.66',
> inlined from '__guc_context_set_prio.isra.67' at 
> drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:3292:3,
> inlined from 'guc_context_set_prio' at 
> drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:3320:2:
> include/linux/compiler_types.h:399:38: error: call to 
> '__compiletime_assert_631' declared with attribute error: FIELD_PREP: mask is 
> not constant
>   _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
>   ^
> ...
> drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:2422:3: note: in expansion 
> of macro 'FIELD_PREP'
>FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
>^~
> 
> Make sure that GUC_KLV_0_KEY is an unsigned value to avoid the warning.
> 
> Fixes: 77b6f79df66e ("drm/i915/guc: Update to GuC version 69.0.3")
> Signed-off-by: Arnd Bergmann 
> ---
>  drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h 
> b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
> index 58012edd4eb0..4f4f53c42a9c 100644
> --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
> +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
> @@ -29,9 +29,9 @@
>   */
>  
>  #define GUC_KLV_LEN_MIN  1u
> -#define GUC_KLV_0_KEY(0x << 16)
> -#define GUC_KLV_0_LEN(0x << 0)
> -#define GUC_KLV_n_VALUE  (0x << 0)
> +#define GUC_KLV_0_KEY(0xu << 16)
> +#define GUC_KLV_0_LEN(0xu << 0)
> +#define GUC_KLV_n_VALUE  (0xu << 0)

what about changing them to GENMASK?

>  
>  /**
>   * DOC: GuC Self Config KLVs
> -- 
> 2.39.1
> 


Re: [Intel-gfx] [RFC v2 0/5] Waitboost drm syncobj waits

2023-02-17 Thread Tvrtko Ursulin



On 17/02/2023 14:55, Rob Clark wrote:

On Fri, Feb 17, 2023 at 4:56 AM Tvrtko Ursulin
 wrote:



On 16/02/2023 18:19, Rodrigo Vivi wrote:

On Tue, Feb 14, 2023 at 11:14:00AM -0800, Rob Clark wrote:

On Fri, Feb 10, 2023 at 5:07 AM Tvrtko Ursulin
 wrote:


From: Tvrtko Ursulin 

In i915 we have this concept of "wait boosting" where we give a priority boost
for instance to fences which are actively waited upon from userspace. This has
it's pros and cons and can certainly be discussed at lenght. However fact is
some workloads really like it.

Problem is that with the arrival of drm syncobj and a new userspace waiting
entry point it added, the waitboost mechanism was bypassed. Hence I cooked up
this mini series really (really) quickly to see if some discussion can be had.

It adds a concept of "wait count" to dma fence, which is incremented for every
explicit dma_fence_enable_sw_signaling and dma_fence_add_wait_callback (like
dma_fence_add_callback but from explicit/userspace wait paths).


I was thinking about a similar thing, but in the context of dma_fence
(or rather sync_file) fd poll()ing.  How does the kernel differentiate
between "housekeeping" poll()ers that don't want to trigger boost but
simply know when to do cleanup, and waiters who are waiting with some
urgency.  I think we could use EPOLLPRI for this purpose.

Not sure how that translates to waits via the syncobj.  But I think we
want to let userspace give some hint about urgent vs housekeeping
waits.


Should the hint be on the waits, or should the hints be on the executed
context?

In the end we need some way to quickly ramp-up the frequency to avoid
the execution bubbles.

waitboost is trying to guess that, but in some cases it guess wrong
and waste power.


Do we have a list of workloads which shows who benefits and who loses
from the current implementation of waitboost?

btw, this is something that other drivers might need:

https://gitlab.freedesktop.org/drm/amd/-/issues/1500#note_825883
Cc: Alex Deucher 


I have several issues with the context hint if it would directly
influence frequency selection in the "more power" direction.

First of all, assume a context hint would replace the waitboost. Which
applications would need to set it to restore the lost performance and
how would they set it?

Then I don't even think userspace necessarily knows. Think of a layer
like OpenCL. It doesn't really know in advance the profile of
submissions vs waits. It depends on the CPU vs GPU speed, so hardware
generation, and the actual size of the workload which can be influenced
by the application (or user) and not the library.

The approach also lends itself well for the "arms race" where every
application can say "Me me me, I am the most important workload there is!".


since there is discussion happening in two places:

https://gitlab.freedesktop.org/drm/intel/-/issues/8014#note_1777433

What I think you might want is a ctx boost_mask which lets an app or
driver disable certain boost signals/classes.  Where fence waits is
one class of boost, but hypothetical other signals like touchscreen
(or other) input events could be another class of boost.  A compute
workload might be interested in fence wait boosts but could care less
about input events.


I think it can only be apps which could have any chance knowing whether 
their use of a library is latency sensitive or not. Which means new 
library extensions and their adoption. So I have some strong reservation 
that route is feasible.


Or we tie with priority which many drivers do. Normal and above gets the 
boosting and what lowered itself does not (aka SCHED_IDLE/SCHED_BATCH).


Related note is that we lack any external control of our scheduling 
decisions so we really do suck compared to other scheduling domains like 
CPU and IO etc.



The last concern is for me shared with the proposal to expose deadlines
or high priority waits as explicit uapi knobs. Both come under the "what
application told us it will do" category vs what it actually does. So I
think it is slightly weaker than basing decisions of waits.

The current waitboost is a bit detached from that problem because when
we waitboost for flips we _know_ it is an actual framebuffer in the flip
chain. When we waitboost for waits we also know someone is waiting. We
are not trusting userspace telling us this will be a buffer in the flip
chain or that this is a context which will have a certain duty-cycle.

But yes, even if the input is truthful, latter is still only a
heuristics because nothing says all waits are important. AFAIU it just
happened to work well in the past.

I do understand I am effectively arguing for more heuristics, which may
sound a bit against the common wisdom. This is because in general I
think the logic to do the right thing, be it in the driver or in the
firmware, can work best if it has a holistic view. Simply put it needs
to have more inputs to the decisions it is making.

That is what my series is proposing - adding a co

Re: [PATCH drm-next v2 01/16] drm: execution context for GEM buffers

2023-02-17 Thread Christian König

Am 17.02.23 um 14:44 schrieb Danilo Krummrich:

From: Christian König 

This adds the infrastructure for an execution context for GEM buffers
which is similar to the existinc TTMs execbuf util and intended to replace
it in the long term.

The basic functionality is that we abstracts the necessary loop to lock
many different GEM buffers with automated deadlock and duplicate handling.

v2: drop xarray and use dynamic resized array instead, the locking
 overhead is unecessary and measureable.


Question for Danilo and probably others: Does Nouveau make use of the 
duplicate tracking at some point?


Background is that I only have two or three use cases for this in 
radeon/amdgpu and would like to make it an optional feature.


Going to take a look at the rest of this series next week.

Regards,
Christian.



Signed-off-by: Christian König 
---
  Documentation/gpu/drm-mm.rst   |  12 ++
  drivers/gpu/drm/Kconfig|   6 +
  drivers/gpu/drm/Makefile   |   2 +
  drivers/gpu/drm/amd/amdgpu/Kconfig |   1 +
  drivers/gpu/drm/drm_exec.c | 295 +
  include/drm/drm_exec.h | 144 ++
  6 files changed, 460 insertions(+)
  create mode 100644 drivers/gpu/drm/drm_exec.c
  create mode 100644 include/drm/drm_exec.h

diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst
index a79fd3549ff8..a52e6f4117d6 100644
--- a/Documentation/gpu/drm-mm.rst
+++ b/Documentation/gpu/drm-mm.rst
@@ -493,6 +493,18 @@ DRM Sync Objects
  .. kernel-doc:: drivers/gpu/drm/drm_syncobj.c
 :export:
  
+DRM Execution context

+=
+
+.. kernel-doc:: drivers/gpu/drm/drm_exec.c
+   :doc: Overview
+
+.. kernel-doc:: include/drm/drm_exec.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/drm_exec.c
+   :export:
+
  GPU Scheduler
  =
  
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig

index f42d4c6a19f2..1573d658fbb5 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -200,6 +200,12 @@ config DRM_TTM
  GPU memory types. Will be enabled automatically if a device driver
  uses it.
  
+config DRM_EXEC

+   tristate
+   depends on DRM
+   help
+ Execution context for command submissions
+
  config DRM_BUDDY
tristate
depends on DRM
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index ab4460fcd63f..d40defbb0347 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -78,6 +78,8 @@ obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += 
drm_panel_orientation_quirks.o
  #
  # Memory-management helpers
  #
+#
+obj-$(CONFIG_DRM_EXEC) += drm_exec.o
  
  obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o
  
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig

index 5341b6b242c3..279fb3bba810 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -11,6 +11,7 @@ config DRM_AMDGPU
select DRM_SCHED
select DRM_TTM
select DRM_TTM_HELPER
+   select DRM_EXEC
select POWER_SUPPLY
select HWMON
select I2C
diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c
new file mode 100644
index ..ed2106c22786
--- /dev/null
+++ b/drivers/gpu/drm/drm_exec.c
@@ -0,0 +1,295 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+
+#include 
+#include 
+#include 
+
+/**
+ * DOC: Overview
+ *
+ * This component mainly abstracts the retry loop necessary for locking
+ * multiple GEM objects while preparing hardware operations (e.g. command
+ * submissions, page table updates etc..).
+ *
+ * If a contention is detected while locking a GEM object the cleanup procedure
+ * unlocks all previously locked GEM objects and locks the contended one first
+ * before locking any further objects.
+ *
+ * After an object is locked fences slots can optionally be reserved on the
+ * dma_resv object inside the GEM object.
+ *
+ * A typical usage pattern should look like this::
+ *
+ * struct drm_gem_object *obj;
+ * struct drm_exec exec;
+ * unsigned long index;
+ * int ret;
+ *
+ * drm_exec_init(&exec, true);
+ * drm_exec_while_not_all_locked(&exec) {
+ * ret = drm_exec_prepare_obj(&exec, boA, 1);
+ * drm_exec_continue_on_contention(&exec);
+ * if (ret)
+ * goto error;
+ *
+ * ret = drm_exec_lock(&exec, boB, 1);
+ * drm_exec_continue_on_contention(&exec);
+ * if (ret)
+ * goto error;
+ * }
+ *
+ * drm_exec_for_each_locked_object(&exec, index, obj) {
+ * dma_resv_add_fence(obj->resv, fence, DMA_RESV_USAGE_READ);
+ * ...
+ * }
+ * drm_exec_fini(&exec);
+ *
+ * See struct dma_exec for more details.
+ */
+
+/* Dummy value used to initially enter the retry loop */
+#define DRM_EXEC_DUMMY (void*)~0
+
+/* Initialize the drm_exec_objects container */
+static void drm_exec_object

Re: [PATCH 3/3] drm/debugfs: remove dev->debugfs_list and debugfs_mutex

2023-02-17 Thread Christian König

Am 17.02.23 um 13:37 schrieb Jani Nikula:

On Fri, 17 Feb 2023, Christian König  wrote:

If i915 have such structural problems then I strongly suggest to solve
them inside i915 and not make common code out of that.

All other things aside, that's just a completely unnecessary and
unhelpful remark.


Sorry, but why?

We have gone through the same problems on radeon and it was massively 
painful, what I try here is to prevent others from using this bad design 
as well. And yes I think devm_ and drmm_ is a bit questionable in that 
regard as well.


The goal is not to make it as simple as possible to write a driver, but 
rather as defensive as possible. In other words automatically releasing 
memory when an object is destroyed might be helpful, but it isn't 
automatically a good idea.


What can easily happen for example is that you run into use after free 
situations on object reference decommissions, e.g. parent is freed 
before child for example.


Regards,
Christian.




BR,
Jani.






Re: [PATCH v3 2/2] drm/i915: Don't use BAR mappings for ring buffers with LLC

2023-02-17 Thread Ceraolo Spurio, Daniele




On 2/15/2023 5:11 PM, john.c.harri...@intel.com wrote:

From: John Harrison 

Direction from hardware is that ring buffers should never be mapped
via the BAR on systems with LLC. There are too many caching pitfalls
due to the way BAR accesses are routed. So it is safest to just not
use it.

Signed-off-by: John Harrison 
Fixes: 9d80841ea4c9 ("drm/i915: Allow ringbuffers to be bound anywhere")
Cc: Chris Wilson 
Cc: Joonas Lahtinen 
Cc: Jani Nikula 
Cc: Rodrigo Vivi 
Cc: Tvrtko Ursulin 
Cc: intel-...@lists.freedesktop.org
Cc:  # v4.9+


I've double-checked the original patches to make sure the 4.9 fixes tag 
was correct for both (which dim confirmed), because if we backport this 
fix without the previous one then the driver would break. Also, the 
original patches were merged as part of the same series 
(https://patchwork.freedesktop.org/series/11278/), so we should be 
guaranteed that they're always there as a pair.


Reviewed-by: Daniele Ceraolo Spurio 

Daniele


---
  drivers/gpu/drm/i915/gt/intel_ring.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c 
b/drivers/gpu/drm/i915/gt/intel_ring.c
index fb1d2595392ed..fb99143be98e7 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -53,7 +53,7 @@ int intel_ring_pin(struct intel_ring *ring, struct 
i915_gem_ww_ctx *ww)
if (unlikely(ret))
goto err_unpin;
  
-	if (i915_vma_is_map_and_fenceable(vma)) {

+   if (i915_vma_is_map_and_fenceable(vma) && !HAS_LLC(vma->vm->i915)) {
addr = (void __force *)i915_vma_pin_iomap(vma);
} else {
int type = i915_coherent_map_type(vma->vm->i915, vma->obj, 
false);
@@ -98,7 +98,7 @@ void intel_ring_unpin(struct intel_ring *ring)
return;
  
  	i915_vma_unset_ggtt_write(vma);

-   if (i915_vma_is_map_and_fenceable(vma))
+   if (i915_vma_is_map_and_fenceable(vma) && !HAS_LLC(vma->vm->i915))
i915_vma_unpin_iomap(vma);
else
i915_gem_object_unpin_map(vma->obj);




Re: [Intel-gfx] [PATCH v3 1/2] drm/i915: Don't use stolen memory for ring buffers with LLC

2023-02-17 Thread Ceraolo Spurio, Daniele




On 2/15/2023 5:11 PM, john.c.harri...@intel.com wrote:

From: John Harrison 

Direction from hardware is that stolen memory should never be used for
ring buffer allocations on platforms with LLC. There are too many
caching pitfalls due to the way stolen memory accesses are routed. So
it is safest to just not use it.

Signed-off-by: John Harrison 
Fixes: c58b735fc762 ("drm/i915: Allocate rings from stolen")
Cc: Chris Wilson 
Cc: Joonas Lahtinen 
Cc: Jani Nikula 
Cc: Rodrigo Vivi 
Cc: Tvrtko Ursulin 
Cc: intel-...@lists.freedesktop.org
Cc:  # v4.9+


Reviewed-by: Daniele Ceraolo Spurio 

Daniele


---
  drivers/gpu/drm/i915/gt/intel_ring.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c 
b/drivers/gpu/drm/i915/gt/intel_ring.c
index 15ec64d881c44..fb1d2595392ed 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -116,7 +116,7 @@ static struct i915_vma *create_ring_vma(struct i915_ggtt 
*ggtt, int size)
  
  	obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_VOLATILE |

  I915_BO_ALLOC_PM_VOLATILE);
-   if (IS_ERR(obj) && i915_ggtt_has_aperture(ggtt))
+   if (IS_ERR(obj) && i915_ggtt_has_aperture(ggtt) && !HAS_LLC(i915))
obj = i915_gem_object_create_stolen(i915, size);
if (IS_ERR(obj))
obj = i915_gem_object_create_internal(i915, size);




Re: [Intel-gfx] [PATCH] drm/i915/guc: avoid FIELD_PREP warning

2023-02-17 Thread Andrzej Hajda

On 17.02.2023 13:46, Arnd Bergmann wrote:

From: Arnd Bergmann 

With gcc-7 and earlier, there are lots of warnings like

In file included from :0:0:
In function '__guc_context_policy_add_priority.isra.66',
 inlined from '__guc_context_set_prio.isra.67' at 
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:3292:3,
 inlined from 'guc_context_set_prio' at 
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:3320:2:
include/linux/compiler_types.h:399:38: error: call to 
'__compiletime_assert_631' declared with attribute error: FIELD_PREP: mask is 
not constant
   _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
   ^
...
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:2422:3: note: in expansion of 
macro 'FIELD_PREP'
FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
^~

Make sure that GUC_KLV_0_KEY is an unsigned value to avoid the warning.


Does it mean __builtin_constant_p in gcc7 returns 0 on signed constants?
I guess there should be more similar errors.

Regards
Andrzej




Fixes: 77b6f79df66e ("drm/i915/guc: Update to GuC version 69.0.3")
Signed-off-by: Arnd Bergmann 
---
  drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h 
b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
index 58012edd4eb0..4f4f53c42a9c 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
@@ -29,9 +29,9 @@
   */
  
  #define GUC_KLV_LEN_MIN1u

-#define GUC_KLV_0_KEY  (0x << 16)
-#define GUC_KLV_0_LEN  (0x << 0)
-#define GUC_KLV_n_VALUE(0x << 0)
+#define GUC_KLV_0_KEY  (0xu << 16)
+#define GUC_KLV_0_LEN  (0xu << 0)
+#define GUC_KLV_n_VALUE(0xu << 0)
  
  /**

   * DOC: GuC Self Config KLVs




Re: [PATCH v11 6/9] drm/bridge: anx7625: Register Type C mode switches

2023-02-17 Thread Nícolas F . R . A . Prado
On Sat, Feb 04, 2023 at 09:30:37PM +0800, Pin-yen Lin wrote:
[..]
> --- a/drivers/gpu/drm/bridge/analogix/anx7625.c
> +++ b/drivers/gpu/drm/bridge/analogix/anx7625.c
[..]
> +static void anx7625_set_crosspoint_switch(struct anx7625_data *ctx,
> +   enum typec_orientation orientation)
> +{
> + if (orientation == TYPEC_ORIENTATION_NORMAL) {
> + anx7625_reg_write(ctx, ctx->i2c.tcpc_client, TCPC_SWITCH_0,
> +   SW_SEL1_SSRX_RX1 | SW_SEL1_DPTX0_RX2);
> + anx7625_reg_write(ctx, ctx->i2c.tcpc_client, TCPC_SWITCH_1,
> +   SW_SEL2_SSTX_TX1 | SW_SEL2_DPTX1_TX2);

This seems inverted compared to the binding. Binding says 

  0, 1, 2, 3 in "data-lanes" maps to SSRX1, SSTX1, SSRX2, SSTX2, respectively.

But in anx7625_register_typec_switches(), lanes 0-1 mean orientation normal,
then in this logic, you set RX2 and TX2 to carry the DP signals. So the driver
is mapping lanes 0-1 to SSRX2/SSTX2 and lanes 2-3 to SSRX1/SSTX1, the opposite
from the binding.

Thanks,
Nícolas

> + } else if (orientation == TYPEC_ORIENTATION_REVERSE) {
> + anx7625_reg_write(ctx, ctx->i2c.tcpc_client, TCPC_SWITCH_0,
> +   SW_SEL1_SSRX_RX2 | SW_SEL1_DPTX0_RX1);
> + anx7625_reg_write(ctx, ctx->i2c.tcpc_client, TCPC_SWITCH_1,
> +   SW_SEL2_SSTX_TX2 | SW_SEL2_DPTX1_TX1);
> + }
> +}
> +
[..]
> +static int anx7625_register_typec_switches(struct device *dev, struct 
> anx7625_data *ctx)
> +{
[..]
> + ctx->port_data[i].orientation = (dp_lanes[0] / 2 == 0) ?
> + TYPEC_ORIENTATION_NORMAL : TYPEC_ORIENTATION_REVERSE;
[..]


Re: [PATCH 6/6] drm/tidss: Implement struct drm_plane_helper_funcs.atomic_enable

2023-02-17 Thread Thomas Zimmermann

Hi

Am 17.02.23 um 15:42 schrieb Tomi Valkeinen:

On 09/02/2023 17:41, Thomas Zimmermann wrote:

Enable the primary plane for tidss hardware via atomic_enable.
Atomic helpers invoke this callback only when the plane becomes
active.

Signed-off-by: Thomas Zimmermann 
---
  drivers/gpu/drm/tidss/tidss_plane.c | 11 +++
  1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/tidss/tidss_plane.c 
b/drivers/gpu/drm/tidss/tidss_plane.c

index 0b12405edb47..6bdd6e4a955a 100644
--- a/drivers/gpu/drm/tidss/tidss_plane.c
+++ b/drivers/gpu/drm/tidss/tidss_plane.c
@@ -124,6 +124,16 @@ static void tidss_plane_atomic_update(struct 
drm_plane *plane,

  hw_videoport = to_tidss_crtc(new_state->crtc)->hw_videoport;
  dispc_plane_setup(tidss->dispc, tplane->hw_plane_id, new_state, 
hw_videoport);

+}
+
+static void tidss_plane_atomic_enable(struct drm_plane *plane,
+  struct drm_atomic_state *state)
+{
+    struct drm_device *ddev = plane->dev;
+    struct tidss_device *tidss = to_tidss(ddev);
+    struct tidss_plane *tplane = to_tidss_plane(plane);
+
+    dev_dbg(ddev->dev, "%s\n", __func__);
  dispc_plane_enable(tidss->dispc, tplane->hw_plane_id, true);
  }
@@ -151,6 +161,7 @@ static void drm_plane_destroy(struct drm_plane 
*plane)

  static const struct drm_plane_helper_funcs tidss_plane_helper_funcs = {
  .atomic_check = tidss_plane_atomic_check,
  .atomic_update = tidss_plane_atomic_update,
+    .atomic_enable = tidss_plane_atomic_enable,
  .atomic_disable = tidss_plane_atomic_disable,
  };


I haven't tested this, but looks fine to me.

Reviewed-by: Tomi Valkeinen 

One thought, though, is that we still do dispc_plane_enable(false) in 
tidss_plane_atomic_update() when the plane is not visible. Not a 
problem, but it would be nice to only enable/disable the plane inside 
atomic_enable/disable.


Or maybe in cases like this the driver should only use atomic_update, 
and do all the enabling and disabling there...


I agree. Drivers that have complex enable/disable semantics should 
probably handle everything in atomic_update.


Enabling/disabling is currently connected to the plane's framebuffer. As 
you said, it would be nice if this could be tied to visibility instead. 
The patch would be trivial, but some drivers might not like the change. 
I guess we could do an RFC patch and gather opinions.


Best regards
Thomas



  Tomi



--
Thomas Zimmermann
Graphics Driver Developer
SUSE Software Solutions Germany GmbH
Maxfeldstr. 5, 90409 Nürnberg, Germany
(HRB 36809, AG Nürnberg)
Geschäftsführer: Ivo Totev


OpenPGP_signature
Description: OpenPGP digital signature


Re: [PATCH 1/3] drm/msm/dpu: Read previously-uninitialized SSPP scaler version from hw

2023-02-17 Thread Dmitry Baryshkov
On Thu, 16 Feb 2023 at 23:46, Marijn Suijten
 wrote:
>
> On 2023-02-16 18:34:43, Dmitry Baryshkov wrote:
> > On 16/02/2023 10:31, Marijn Suijten wrote:
> > > On 2023-02-16 04:22:13, Dmitry Baryshkov wrote:
> > >> On Thu, 16 Feb 2023 at 01:02, Marijn Suijten
> > >>  wrote:
> > >>>
> > >>> DPU's catalog never assigned dpu_scaler_blk::version leading to
> > >>> initialization code in dpu_hw_setup_scaler3 to wander the wrong
> > >>> codepaths.  Instead of hardcoding the correct QSEED algorithm version,
> > >>> read it back from a hardware register.
> > >>>
> > >>> Note that this register is only available starting with QSEED3, where
> > >>> 0x1002 corresponds to QSEED3, 0x2004 to QSEED3LITE and 0x3000 to QSEED4.
> > >>
> > >> This is not purely accurate. 0x1003 (sdm845) also corresponds to QSEED3.
> > >> I'd say instead that there are several variations of QSEED3 scalers,
> > >> where starting from 0x2004 it is called QSEED3LITE and starting from
> > >> 0x3000 it is called QSEED4.
> > >
> > > Good catch, I'll update that.
> > >
> > >>> Fixes: 25fdd5933e4c ("drm/msm: Add SDM845 DPU support")
> > >>> Signed-off-by: Marijn Suijten 
> > >>> ---
> > >>>   drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h | 2 --
> > >>>   drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c| 8 +++-
> > >>>   drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h| 3 +++
> > >>>   3 files changed, 10 insertions(+), 3 deletions(-)
> > >>>
> > >>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h 
> > >>> b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
> > >>> index ddab9caebb18..96ce1766f4a1 100644
> > >>> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
> > >>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
> > >>> @@ -324,11 +324,9 @@ struct dpu_src_blk {
> > >>>   /**
> > >>>* struct dpu_scaler_blk: Scaler information
> > >>>* @info:   HW register and features supported by this sub-blk
> > >>> - * @version: qseed block revision
> > >>>*/
> > >>>   struct dpu_scaler_blk {
> > >>>  DPU_HW_SUBBLK_INFO;
> > >>> -   u32 version;
> > >>
> > >> No. Please keep the version in the scaler subblk.  It is a version of
> > >> the QSEED (scaler block), not the SSPP's version.
> > >
> > > You are right that the new variable in the parent (SSPP) block is
> > > nondescriptive and should have been named scaler_version.
> > >
> > > However.
> > >
> > > dpu_scaler_blk is only used as a const static struct in the catalog,
> > > meaning we cannot (should not!) store a runtime-read register value
> > > here.  Instead I followed your IRC suggestion to read the register in
> > > dpu_hw_sspp_init, but my original implementation called
> > > dpu_hw_get_scaler3_ver in _dpu_hw_sspp_setup_scaler3 where we already
> > > have access to the subblk_offset, allowing us to delete
> > > _dpu_hw_sspp_get_scaler3_ver.  Would you rather have that?  We don't
> > > need the register value anywhere else.
> >
> > After giving it another thought, let's follow the vendor's approach and
> > store the predefined scaler_version in hw catalog (in dpu_scaler_blk, as
> > it currently is). This way we can still drop all QSEED3/3LITE/4
> > crazyness, while keeping the data sane.
>
> You want to drop the descriptive #define's, and replace them with magic
> 0x1002/0x2004/0x3000 and whatever other values we know?

And nothing stops us from adding defines for 0x2004
(SCALER_VERSION_QSEED3LITE) and 0x3000 (SCALER_VERSION_QSEED4). I'm
not sure regarding 0x1002: whether it is used on msm8998 and/or sdm630
too or not.

What I want to remove is the duplication of the information. It was
too easy to miss that vig_mask has version1, while the dpu_caps has
version 2. We are going to replace dpu_caps with scaler_version, but
the problem of having the duplicate still exists. I might have
suggested settling on the dpu_caps.qseed_type or on the bit in
dpu_sspp_cfg.features, but it seems that 0x1002 is not represented
this way. Unless we define something like
DPU_SSPP_SCALER_QSEED3_SDM660.

> That seems
> impossible to port without reading back the register value, which we've
> only done for a handful of SoCs.  I hope I'm misunderstanding you?

Newer vendor dts files provide this value, see the
"qcom,sde-qseed-scalar-version" property.
For older platforms we'd have to read the register. See below

> After all the vendor approach (in a random 4.14 kernel I have open now)
> is to read the register value at runtime but their catalog is also
> dynamic and built at runtime based on version ranges and register reads,
> which sometimes is more sensible.  Ours is const.

In later techpacks (since 5.4) they have switched to the property in the DTS.

>
> > Then _dpu_hw_sspp_get_scaler3_ver() can also be dropped (or you can use
> > it as a safety guard while doing dpu_hw_sspp init).
>
> That (safety guard) is exactly what Abhinav requested against, since the
> kernel (and our catalog) should be trustworthy.  I'll let you two fight
> this out and come to a consensus before sending

Re: [Intel-gfx] [RFC v2 0/5] Waitboost drm syncobj waits

2023-02-17 Thread Rob Clark
On Fri, Feb 17, 2023 at 4:56 AM Tvrtko Ursulin
 wrote:
>
>
> On 16/02/2023 18:19, Rodrigo Vivi wrote:
> > On Tue, Feb 14, 2023 at 11:14:00AM -0800, Rob Clark wrote:
> >> On Fri, Feb 10, 2023 at 5:07 AM Tvrtko Ursulin
> >>  wrote:
> >>>
> >>> From: Tvrtko Ursulin 
> >>>
> >>> In i915 we have this concept of "wait boosting" where we give a priority 
> >>> boost
> >>> for instance to fences which are actively waited upon from userspace. 
> >>> This has
> >>> it's pros and cons and can certainly be discussed at lenght. However fact 
> >>> is
> >>> some workloads really like it.
> >>>
> >>> Problem is that with the arrival of drm syncobj and a new userspace 
> >>> waiting
> >>> entry point it added, the waitboost mechanism was bypassed. Hence I 
> >>> cooked up
> >>> this mini series really (really) quickly to see if some discussion can be 
> >>> had.
> >>>
> >>> It adds a concept of "wait count" to dma fence, which is incremented for 
> >>> every
> >>> explicit dma_fence_enable_sw_signaling and dma_fence_add_wait_callback 
> >>> (like
> >>> dma_fence_add_callback but from explicit/userspace wait paths).
> >>
> >> I was thinking about a similar thing, but in the context of dma_fence
> >> (or rather sync_file) fd poll()ing.  How does the kernel differentiate
> >> between "housekeeping" poll()ers that don't want to trigger boost but
> >> simply know when to do cleanup, and waiters who are waiting with some
> >> urgency.  I think we could use EPOLLPRI for this purpose.
> >>
> >> Not sure how that translates to waits via the syncobj.  But I think we
> >> want to let userspace give some hint about urgent vs housekeeping
> >> waits.
> >
> > Should the hint be on the waits, or should the hints be on the executed
> > context?
> >
> > In the end we need some way to quickly ramp-up the frequency to avoid
> > the execution bubbles.
> >
> > waitboost is trying to guess that, but in some cases it guess wrong
> > and waste power.
>
> Do we have a list of workloads which shows who benefits and who loses
> from the current implementation of waitboost?
> > btw, this is something that other drivers might need:
> >
> > https://gitlab.freedesktop.org/drm/amd/-/issues/1500#note_825883
> > Cc: Alex Deucher 
>
> I have several issues with the context hint if it would directly
> influence frequency selection in the "more power" direction.
>
> First of all, assume a context hint would replace the waitboost. Which
> applications would need to set it to restore the lost performance and
> how would they set it?
>
> Then I don't even think userspace necessarily knows. Think of a layer
> like OpenCL. It doesn't really know in advance the profile of
> submissions vs waits. It depends on the CPU vs GPU speed, so hardware
> generation, and the actual size of the workload which can be influenced
> by the application (or user) and not the library.
>
> The approach also lends itself well for the "arms race" where every
> application can say "Me me me, I am the most important workload there is!".

since there is discussion happening in two places:

https://gitlab.freedesktop.org/drm/intel/-/issues/8014#note_1777433

What I think you might want is a ctx boost_mask which lets an app or
driver disable certain boost signals/classes.  Where fence waits is
one class of boost, but hypothetical other signals like touchscreen
(or other) input events could be another class of boost.  A compute
workload might be interested in fence wait boosts but could care less
about input events.

> The last concern is for me shared with the proposal to expose deadlines
> or high priority waits as explicit uapi knobs. Both come under the "what
> application told us it will do" category vs what it actually does. So I
> think it is slightly weaker than basing decisions of waits.
>
> The current waitboost is a bit detached from that problem because when
> we waitboost for flips we _know_ it is an actual framebuffer in the flip
> chain. When we waitboost for waits we also know someone is waiting. We
> are not trusting userspace telling us this will be a buffer in the flip
> chain or that this is a context which will have a certain duty-cycle.
>
> But yes, even if the input is truthful, latter is still only a
> heuristics because nothing says all waits are important. AFAIU it just
> happened to work well in the past.
>
> I do understand I am effectively arguing for more heuristics, which may
> sound a bit against the common wisdom. This is because in general I
> think the logic to do the right thing, be it in the driver or in the
> firmware, can work best if it has a holistic view. Simply put it needs
> to have more inputs to the decisions it is making.
>
> That is what my series is proposing - adding a common signal of "someone
> in userspace is waiting". What happens with that signal needs not be
> defined (promised) in the uapi contract.
>
> Say you route it to SLPC logic. It doesn't need to do with it what
> legacy i915 is doing today. It just needs to do something which w

Re: [PATCH 6/6] drm/tidss: Implement struct drm_plane_helper_funcs.atomic_enable

2023-02-17 Thread Tomi Valkeinen

On 09/02/2023 17:41, Thomas Zimmermann wrote:

Enable the primary plane for tidss hardware via atomic_enable.
Atomic helpers invoke this callback only when the plane becomes
active.

Signed-off-by: Thomas Zimmermann 
---
  drivers/gpu/drm/tidss/tidss_plane.c | 11 +++
  1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/tidss/tidss_plane.c 
b/drivers/gpu/drm/tidss/tidss_plane.c
index 0b12405edb47..6bdd6e4a955a 100644
--- a/drivers/gpu/drm/tidss/tidss_plane.c
+++ b/drivers/gpu/drm/tidss/tidss_plane.c
@@ -124,6 +124,16 @@ static void tidss_plane_atomic_update(struct drm_plane 
*plane,
hw_videoport = to_tidss_crtc(new_state->crtc)->hw_videoport;
  
  	dispc_plane_setup(tidss->dispc, tplane->hw_plane_id, new_state, hw_videoport);

+}
+
+static void tidss_plane_atomic_enable(struct drm_plane *plane,
+ struct drm_atomic_state *state)
+{
+   struct drm_device *ddev = plane->dev;
+   struct tidss_device *tidss = to_tidss(ddev);
+   struct tidss_plane *tplane = to_tidss_plane(plane);
+
+   dev_dbg(ddev->dev, "%s\n", __func__);
  
  	dispc_plane_enable(tidss->dispc, tplane->hw_plane_id, true);

  }
@@ -151,6 +161,7 @@ static void drm_plane_destroy(struct drm_plane *plane)
  static const struct drm_plane_helper_funcs tidss_plane_helper_funcs = {
.atomic_check = tidss_plane_atomic_check,
.atomic_update = tidss_plane_atomic_update,
+   .atomic_enable = tidss_plane_atomic_enable,
.atomic_disable = tidss_plane_atomic_disable,
  };
  


I haven't tested this, but looks fine to me.

Reviewed-by: Tomi Valkeinen 

One thought, though, is that we still do dispc_plane_enable(false) in 
tidss_plane_atomic_update() when the plane is not visible. Not a 
problem, but it would be nice to only enable/disable the plane inside 
atomic_enable/disable.


Or maybe in cases like this the driver should only use atomic_update, 
and do all the enabling and disabling there...


 Tomi



Re: [PATCH 5/6] drm/tidss: Remove return values from dispc_plane_{setup,enable}()

2023-02-17 Thread Tomi Valkeinen

On 09/02/2023 17:41, Thomas Zimmermann wrote:

Calls to dispc_plane_setup() and dispc_plane_enable() cannot fail.
Remove the return value.

Signed-off-by: Thomas Zimmermann 
---
  drivers/gpu/drm/tidss/tidss_dispc.c | 12 
  drivers/gpu/drm/tidss/tidss_dispc.h |  8 
  drivers/gpu/drm/tidss/tidss_plane.c | 11 +--
  3 files changed, 9 insertions(+), 22 deletions(-)



Reviewed-by: Tomi Valkeinen 

 Tomi



Patch "drm: Disable dynamic debug as broken" has been added to the 6.1-stable tree

2023-02-17 Thread gregkh


This is a note to let you know that I've just added the patch titled

drm: Disable dynamic debug as broken

to the 6.1-stable tree which can be found at:

http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
 drm-disable-dynamic-debug-as-broken.patch
and it can be found in the queue-6.1 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let  know about it.


>From bb2ff6c27bc9e1da4d3ec5e7b1d6b9df1092cb5a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= 
Date: Tue, 7 Feb 2023 16:33:37 +0200
Subject: drm: Disable dynamic debug as broken
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

From: Ville Syrjälä 

commit bb2ff6c27bc9e1da4d3ec5e7b1d6b9df1092cb5a upstream.

CONFIG_DRM_USE_DYNAMIC_DEBUG breaks debug prints for (at least modular)
drm drivers. The debug prints can be reinstated by manually frobbing
/sys/module/drm/parameters/debug after the fact, but at that point the
damage is done and all debugs from driver probe are lost. This makes
drivers totally undebuggable.

There's a more complete fix in progress [1], with further details, but
we need this fixed in stable kernels. Mark the feature as broken and
disable it by default, with hopes distros follow suit and disable it as
well.

[1] https://lore.kernel.org/r/20230125203743.564009-1-jim.cro...@gmail.com

Fixes: 84ec67288c10 ("drm_print: wrap drm_*_dbg in dyndbg descriptor factory 
macro")
Cc: Jim Cromie 
Cc: Greg Kroah-Hartman 
Cc: Maarten Lankhorst 
Cc: Maxime Ripard 
Cc: Thomas Zimmermann 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: dri-devel@lists.freedesktop.org
Cc:  # v6.1+
Signed-off-by: Ville Syrjälä 
Acked-by: Greg Kroah-Hartman 
Acked-by: Jim Cromie 
Acked-by: Maxime Ripard 
Signed-off-by: Jani Nikula 
Link: 
https://patchwork.freedesktop.org/patch/msgid/20230207143337.2126678-1-jani.nik...@intel.com
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/gpu/drm/Kconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 315cbdf61979..9abfb482b615 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -53,7 +53,8 @@ config DRM_DEBUG_MM
 
 config DRM_USE_DYNAMIC_DEBUG
bool "use dynamic debug to implement drm.debug"
-   default y
+   default n
+   depends on BROKEN
depends on DRM
depends on DYNAMIC_DEBUG || DYNAMIC_DEBUG_CORE
depends on JUMP_LABEL
-- 
2.39.1



Patches currently in stable-queue which might be from 
ville.syrj...@linux.intel.com are

queue-6.1/drm-disable-dynamic-debug-as-broken.patch


Re: [PATCH 1/3] drm/suballoc: Introduce a generic suballocation manager

2023-02-17 Thread Thomas Hellström



On 2/17/23 14:18, Christian König wrote:

Am 17.02.23 um 14:10 schrieb Thomas Hellström:

[SNIP]


Any chance you could do a quick performance comparison? If not, 
anything against merging this without the amd / radeon changes 
until we can land a simpler allocator?


Only if you can stick the allocator inside Xe and not drm, cause 
this seems to be for a different use case than the allocators 
inside radeon/amdgpu.


Hmm. No It's allocating in a ring-like fashion as well.  Let me put 
together a unit test for benchmaking. I think it would be a failure 
for the community to end up with three separate suballocators doing 
the exact same thing for the same problem, really.


Well exactly that's the point. Those allocators aren't the same 
because they handle different problems.


The allocator in radeon is simpler because it only had to deal with 
a limited number of fence timelines. The one in amdgpu is a bit more 
complex because of the added complexity for more fence timelines.


We could take the one from amdgpu and use it for radeon and others 
as well, but the allocator proposed here doesn't even remotely 
matches the requirements.


But again, what *are* those missing requirements exactly? What is the 
pathological case you see for the current code?


Well very low CPU overhead and don't do anything in a callback.


Well, dma_fence_wait_any() will IIRC register callbacks on all affected 
fences, although admittedly there is no actual allocator processing in them.






From what I can tell the amdgpu suballocator introduces excessive 
complexity to coalesce waits for fences from the same contexts, 
whereas the present code just frees from the fence callback if the 
fence wasn't already signaled.


And this is exactly the design we had previously which we removed 
after Dave stumbled over tons of problems with it.


So is the worry that those problems have spilled over in this code then? 
It's been pretty extensively tested, or is it you should never really 
use dma-fence callbacks?




The fence signalling code that fires that callback is typcally always 
run anyway on scheduler fences.


The reason we had for not using the amdgpu suballocator as originally 
planned was that this complexity made it very hard for us to 
undertand it and to fix issues we had with it.


Well what are those problems? The idea is actually not that hardware 
to understand.


We hit memory corruption, and we spent substantially more time trying to 
debug it than to put together this patch, while never really 
understanding what  happened, nor why you don't see that with amdgpu.




We could simplify it massively for the cost of only waiting for the 
oldest fence if that helps.


Let me grab the latest version from amdgpu and give it a try again, but 
yes I think that to make it common code we'll need it simpler (and my 
personal wish would be to separate the allocator functionality a bit 
more from the fence waiting, which I guess should be OK if the fence 
waiting is vastly simplified).


/Thomas





Regards,
Christian.



Regards,

Thomas




[PATCH drm-next v2 14/16] drm/nouveau: implement uvmm for user mode bindings

2023-02-17 Thread Danilo Krummrich
uvmm provides the driver abstraction around the DRM GPU VA manager
connecting it to the nouveau infrastructure.

It handles the split and merge operations provided by the DRM GPU VA
manager for map operations colliding with existent mappings and takes
care of the driver specific locking around the DRM GPU VA manager.

Signed-off-by: Danilo Krummrich 
---
 drivers/gpu/drm/nouveau/Kbuild  |1 +
 drivers/gpu/drm/nouveau/nouveau_abi16.c |7 +
 drivers/gpu/drm/nouveau/nouveau_bo.c|  147 +--
 drivers/gpu/drm/nouveau/nouveau_bo.h|2 +-
 drivers/gpu/drm/nouveau/nouveau_drm.c   |2 +
 drivers/gpu/drm/nouveau/nouveau_drv.h   |   48 +
 drivers/gpu/drm/nouveau/nouveau_gem.c   |   25 +-
 drivers/gpu/drm/nouveau/nouveau_mem.h   |5 +
 drivers/gpu/drm/nouveau/nouveau_prime.c |2 +-
 drivers/gpu/drm/nouveau/nouveau_uvmm.c  | 1090 +++
 drivers/gpu/drm/nouveau/nouveau_uvmm.h  |  110 +++
 11 files changed, 1378 insertions(+), 61 deletions(-)
 create mode 100644 drivers/gpu/drm/nouveau/nouveau_uvmm.c
 create mode 100644 drivers/gpu/drm/nouveau/nouveau_uvmm.h

diff --git a/drivers/gpu/drm/nouveau/Kbuild b/drivers/gpu/drm/nouveau/Kbuild
index 5e5617006da5..ee281bb76463 100644
--- a/drivers/gpu/drm/nouveau/Kbuild
+++ b/drivers/gpu/drm/nouveau/Kbuild
@@ -47,6 +47,7 @@ nouveau-y += nouveau_prime.o
 nouveau-y += nouveau_sgdma.o
 nouveau-y += nouveau_ttm.o
 nouveau-y += nouveau_vmm.o
+nouveau-y += nouveau_uvmm.o
 
 # DRM - modesetting
 nouveau-$(CONFIG_DRM_NOUVEAU_BACKLIGHT) += nouveau_backlight.o
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c 
b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index 82dab51d8aeb..36cc80eb0e20 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -261,6 +261,13 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
if (!drm->channel)
return nouveau_abi16_put(abi16, -ENODEV);
 
+   /* If uvmm wasn't initialized until now disable it completely to prevent
+* userspace from mixing up UAPIs.
+*
+* The client lock is already acquired by nouveau_abi16_get().
+*/
+   __nouveau_cli_uvmm_disable(cli);
+
device = &abi16->device;
engine = NV_DEVICE_HOST_RUNLIST_ENGINES_GR;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index bf6984c8754c..f3d73d6edd46 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -199,7 +199,7 @@ nouveau_bo_fixup_align(struct nouveau_bo *nvbo, int *align, 
u64 *size)
 
 struct nouveau_bo *
 nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int *align, u32 domain,
-u32 tile_mode, u32 tile_flags)
+u32 tile_mode, u32 tile_flags, bool internal)
 {
struct nouveau_drm *drm = cli->drm;
struct nouveau_bo *nvbo;
@@ -235,68 +235,103 @@ nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int 
*align, u32 domain,
nvbo->force_coherent = true;
}
 
-   if (cli->device.info.family >= NV_DEVICE_INFO_V0_FERMI) {
-   nvbo->kind = (tile_flags & 0xff00) >> 8;
-   if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) {
-   kfree(nvbo);
-   return ERR_PTR(-EINVAL);
+   nvbo->contig = !(tile_flags & NOUVEAU_GEM_TILE_NONCONTIG);
+   if (!nouveau_cli_uvmm(cli) || internal) {
+   /* for BO noVM allocs, don't assign kinds */
+   if (cli->device.info.family >= NV_DEVICE_INFO_V0_FERMI) {
+   nvbo->kind = (tile_flags & 0xff00) >> 8;
+   if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) {
+   kfree(nvbo);
+   return ERR_PTR(-EINVAL);
+   }
+
+   nvbo->comp = mmu->kind[nvbo->kind] != nvbo->kind;
+   } else if (cli->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
+   nvbo->kind = (tile_flags & 0x7f00) >> 8;
+   nvbo->comp = (tile_flags & 0x0003) >> 16;
+   if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) {
+   kfree(nvbo);
+   return ERR_PTR(-EINVAL);
+   }
+   } else {
+   nvbo->zeta = (tile_flags & 0x0007);
}
+   nvbo->mode = tile_mode;
+
+   /* Determine the desirable target GPU page size for the buffer. 
*/
+   for (i = 0; i < vmm->page_nr; i++) {
+   /* Because we cannot currently allow VMM maps to fail
+* during buffer migration, we need to determine page
+* size for the buffer up-front, and pre-allocate its
+* page tables.
+*
+* Skip page sizes that can't support needed domain

[PATCH drm-next v2 12/16] drm/nouveau: chan: provide nouveau_channel_kill()

2023-02-17 Thread Danilo Krummrich
The new VM_BIND UAPI implementation introduced in subsequent commits
will allow asynchronous jobs processing push buffers and emitting fences.

If a job times out, we need a way to recover from this situation. For
now, simply kill the channel to unblock all hung up jobs and signal
userspace that the device is dead on the next EXEC or VM_BIND ioctl.

Signed-off-by: Danilo Krummrich 
---
 drivers/gpu/drm/nouveau/nouveau_chan.c | 14 +++---
 drivers/gpu/drm/nouveau/nouveau_chan.h |  1 +
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c 
b/drivers/gpu/drm/nouveau/nouveau_chan.c
index 1068abe41024..6f47e997d9cf 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
@@ -40,6 +40,14 @@ MODULE_PARM_DESC(vram_pushbuf, "Create DMA push buffers in 
VRAM");
 int nouveau_vram_pushbuf;
 module_param_named(vram_pushbuf, nouveau_vram_pushbuf, int, 0400);
 
+void
+nouveau_channel_kill(struct nouveau_channel *chan)
+{
+   atomic_set(&chan->killed, 1);
+   if (chan->fence)
+   nouveau_fence_context_kill(chan->fence, -ENODEV);
+}
+
 static int
 nouveau_channel_killed(struct nvif_event *event, void *repv, u32 repc)
 {
@@ -47,9 +55,9 @@ nouveau_channel_killed(struct nvif_event *event, void *repv, 
u32 repc)
struct nouveau_cli *cli = (void *)chan->user.client;
 
NV_PRINTK(warn, cli, "channel %d killed!\n", chan->chid);
-   atomic_set(&chan->killed, 1);
-   if (chan->fence)
-   nouveau_fence_context_kill(chan->fence, -ENODEV);
+
+   if (unlikely(!atomic_read(&chan->killed)))
+   nouveau_channel_kill(chan);
 
return NVIF_EVENT_DROP;
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.h 
b/drivers/gpu/drm/nouveau/nouveau_chan.h
index e06a8ffed31a..e483f4a254da 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.h
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.h
@@ -65,6 +65,7 @@ int  nouveau_channel_new(struct nouveau_drm *, struct 
nvif_device *, bool priv,
 u32 vram, u32 gart, struct nouveau_channel **);
 void nouveau_channel_del(struct nouveau_channel **);
 int  nouveau_channel_idle(struct nouveau_channel *);
+void nouveau_channel_kill(struct nouveau_channel *);
 
 extern int nouveau_vram_pushbuf;
 
-- 
2.39.1



[PATCH drm-next v2 13/16] drm/nouveau: nvkm/vmm: implement raw ops to manage uvmm

2023-02-17 Thread Danilo Krummrich
The new VM_BIND UAPI uses the DRM GPU VA manager to manage the VA space.
Hence, we a need a way to manipulate the MMUs page tables without going
through the internal range allocator implemented by nvkm/vmm.

This patch adds a raw interface for nvkm/vmm to pass the resposibility
for managing the address space and the corresponding map/unmap/sparse
operations to the upper layers.

Signed-off-by: Danilo Krummrich 
---
 drivers/gpu/drm/nouveau/include/nvif/if000c.h |  26 ++-
 drivers/gpu/drm/nouveau/include/nvif/vmm.h|  19 +-
 .../gpu/drm/nouveau/include/nvkm/subdev/mmu.h |  20 +-
 drivers/gpu/drm/nouveau/nouveau_svm.c |   2 +-
 drivers/gpu/drm/nouveau/nouveau_vmm.c |   4 +-
 drivers/gpu/drm/nouveau/nvif/vmm.c| 100 +++-
 .../gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c| 213 --
 drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c | 197 
 drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h |  25 ++
 .../drm/nouveau/nvkm/subdev/mmu/vmmgf100.c|  16 +-
 .../drm/nouveau/nvkm/subdev/mmu/vmmgp100.c|  16 +-
 .../gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c |  27 ++-
 12 files changed, 566 insertions(+), 99 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/include/nvif/if000c.h 
b/drivers/gpu/drm/nouveau/include/nvif/if000c.h
index 9c7ff56831c5..a5a182b3c28d 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/if000c.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/if000c.h
@@ -3,7 +3,10 @@
 struct nvif_vmm_v0 {
__u8  version;
__u8  page_nr;
-   __u8  managed;
+#define NVIF_VMM_V0_TYPE_UNMANAGED 0x00
+#define NVIF_VMM_V0_TYPE_MANAGED   0x01
+#define NVIF_VMM_V0_TYPE_RAW   0x02
+   __u8  type;
__u8  pad03[5];
__u64 addr;
__u64 size;
@@ -17,6 +20,7 @@ struct nvif_vmm_v0 {
 #define NVIF_VMM_V0_UNMAP  0x04
 #define NVIF_VMM_V0_PFNMAP 0x05
 #define NVIF_VMM_V0_PFNCLR 0x06
+#define NVIF_VMM_V0_RAW0x07
 #define NVIF_VMM_V0_MTHD(i) ((i) + 
0x80)
 
 struct nvif_vmm_page_v0 {
@@ -66,6 +70,26 @@ struct nvif_vmm_unmap_v0 {
__u64 addr;
 };
 
+struct nvif_vmm_raw_v0 {
+   __u8 version;
+#define NVIF_VMM_RAW_V0_GET0x0
+#define NVIF_VMM_RAW_V0_PUT0x1
+#define NVIF_VMM_RAW_V0_MAP0x2
+#define NVIF_VMM_RAW_V0_UNMAP  0x3
+#define NVIF_VMM_RAW_V0_SPARSE 0x4
+   __u8  op;
+   __u8  sparse;
+   __u8  ref;
+   __u8  shift;
+   __u32 argc;
+   __u8  pad01[7];
+   __u64 addr;
+   __u64 size;
+   __u64 offset;
+   __u64 memory;
+   __u64 argv;
+};
+
 struct nvif_vmm_pfnmap_v0 {
__u8  version;
__u8  page;
diff --git a/drivers/gpu/drm/nouveau/include/nvif/vmm.h 
b/drivers/gpu/drm/nouveau/include/nvif/vmm.h
index a2ee92201ace..0ecedd0ee0a5 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/vmm.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/vmm.h
@@ -4,6 +4,12 @@
 struct nvif_mem;
 struct nvif_mmu;
 
+enum nvif_vmm_type {
+   UNMANAGED,
+   MANAGED,
+   RAW,
+};
+
 enum nvif_vmm_get {
ADDR,
PTES,
@@ -30,8 +36,9 @@ struct nvif_vmm {
int page_nr;
 };
 
-int nvif_vmm_ctor(struct nvif_mmu *, const char *name, s32 oclass, bool 
managed,
- u64 addr, u64 size, void *argv, u32 argc, struct nvif_vmm *);
+int nvif_vmm_ctor(struct nvif_mmu *, const char *name, s32 oclass,
+ enum nvif_vmm_type, u64 addr, u64 size, void *argv, u32 argc,
+ struct nvif_vmm *);
 void nvif_vmm_dtor(struct nvif_vmm *);
 int nvif_vmm_get(struct nvif_vmm *, enum nvif_vmm_get, bool sparse,
 u8 page, u8 align, u64 size, struct nvif_vma *);
@@ -39,4 +46,12 @@ void nvif_vmm_put(struct nvif_vmm *, struct nvif_vma *);
 int nvif_vmm_map(struct nvif_vmm *, u64 addr, u64 size, void *argv, u32 argc,
 struct nvif_mem *, u64 offset);
 int nvif_vmm_unmap(struct nvif_vmm *, u64);
+
+int nvif_vmm_raw_get(struct nvif_vmm *vmm, u64 addr, u64 size, u8 shift);
+int nvif_vmm_raw_put(struct nvif_vmm *vmm, u64 addr, u64 size, u8 shift);
+int nvif_vmm_raw_map(struct nvif_vmm *vmm, u64 addr, u64 size, u8 shift,
+void *argv, u32 argc, struct nvif_mem *mem, u64 offset);
+int nvif_vmm_raw_unmap(struct nvif_vmm *vmm, u64 addr, u64 size,
+  u8 shift, bool sparse);
+int nvif_vmm_raw_sparse(struct nvif_vmm *vmm, u64 addr, u64 size, bool ref);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h 
b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
index 70e7887ef4b4..2fd2f2433fc7 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
@@ -17,6 +17,7 @@ s

[PATCH drm-next v2 15/16] drm/nouveau: implement new VM_BIND UAPI

2023-02-17 Thread Danilo Krummrich
This commit provides the implementation for the new uapi motivated by the
Vulkan API. It allows user mode drivers (UMDs) to:

1) Initialize a GPU virtual address (VA) space via the new
   DRM_IOCTL_NOUVEAU_VM_INIT ioctl for UMDs to specify the portion of VA
   space managed by the kernel and userspace, respectively.

2) Allocate and free a VA space region as well as bind and unbind memory
   to the GPUs VA space via the new DRM_IOCTL_NOUVEAU_VM_BIND ioctl.
   UMDs can request the named operations to be processed either
   synchronously or asynchronously. It supports DRM syncobjs
   (incl. timelines) as synchronization mechanism. The management of the
   GPU VA mappings is implemented with the DRM GPU VA manager.

3) Execute push buffers with the new DRM_IOCTL_NOUVEAU_EXEC ioctl. The
   execution happens asynchronously. It supports DRM syncobj (incl.
   timelines) as synchronization mechanism. DRM GEM object locking is
   handled with drm_exec.

Both, DRM_IOCTL_NOUVEAU_VM_BIND and DRM_IOCTL_NOUVEAU_EXEC, use the DRM
GPU scheduler for the asynchronous paths.

Signed-off-by: Danilo Krummrich 
---
 Documentation/gpu/driver-uapi.rst   |   3 +
 drivers/gpu/drm/nouveau/Kbuild  |   2 +
 drivers/gpu/drm/nouveau/Kconfig |   2 +
 drivers/gpu/drm/nouveau/nouveau_abi16.c |  16 +
 drivers/gpu/drm/nouveau/nouveau_abi16.h |   1 +
 drivers/gpu/drm/nouveau/nouveau_drm.c   |  24 +-
 drivers/gpu/drm/nouveau/nouveau_drv.h   |   9 +-
 drivers/gpu/drm/nouveau/nouveau_exec.c  | 322 
 drivers/gpu/drm/nouveau/nouveau_exec.h  |  39 ++
 drivers/gpu/drm/nouveau/nouveau_sched.c | 467 
 drivers/gpu/drm/nouveau/nouveau_sched.h |  96 +
 drivers/gpu/drm/nouveau/nouveau_uvmm.c  | 446 ++
 drivers/gpu/drm/nouveau/nouveau_uvmm.h  |  28 ++
 13 files changed, 1451 insertions(+), 4 deletions(-)
 create mode 100644 drivers/gpu/drm/nouveau/nouveau_exec.c
 create mode 100644 drivers/gpu/drm/nouveau/nouveau_exec.h
 create mode 100644 drivers/gpu/drm/nouveau/nouveau_sched.c
 create mode 100644 drivers/gpu/drm/nouveau/nouveau_sched.h

diff --git a/Documentation/gpu/driver-uapi.rst 
b/Documentation/gpu/driver-uapi.rst
index 9c7ca6e33a68..c08bcbb95fb3 100644
--- a/Documentation/gpu/driver-uapi.rst
+++ b/Documentation/gpu/driver-uapi.rst
@@ -13,4 +13,7 @@ drm/nouveau uAPI
 VM_BIND / EXEC uAPI
 ---
 
+.. kernel-doc:: drivers/gpu/drm/nouveau/nouveau_exec.c
+:doc: Overview
+
 .. kernel-doc:: include/uapi/drm/nouveau_drm.h
diff --git a/drivers/gpu/drm/nouveau/Kbuild b/drivers/gpu/drm/nouveau/Kbuild
index ee281bb76463..cf6b3a80c0c8 100644
--- a/drivers/gpu/drm/nouveau/Kbuild
+++ b/drivers/gpu/drm/nouveau/Kbuild
@@ -47,6 +47,8 @@ nouveau-y += nouveau_prime.o
 nouveau-y += nouveau_sgdma.o
 nouveau-y += nouveau_ttm.o
 nouveau-y += nouveau_vmm.o
+nouveau-y += nouveau_exec.o
+nouveau-y += nouveau_sched.o
 nouveau-y += nouveau_uvmm.o
 
 # DRM - modesetting
diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig
index a70bd65e1400..c52e8096cca4 100644
--- a/drivers/gpu/drm/nouveau/Kconfig
+++ b/drivers/gpu/drm/nouveau/Kconfig
@@ -10,6 +10,8 @@ config DRM_NOUVEAU
select DRM_KMS_HELPER
select DRM_TTM
select DRM_TTM_HELPER
+   select DRM_EXEC
+   select DRM_SCHED
select I2C
select I2C_ALGOBIT
select BACKLIGHT_CLASS_DEVICE if DRM_NOUVEAU_BACKLIGHT
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c 
b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index 36cc80eb0e20..694777a58bca 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -35,6 +35,7 @@
 #include "nouveau_chan.h"
 #include "nouveau_abi16.h"
 #include "nouveau_vmm.h"
+#include "nouveau_sched.h"
 
 static struct nouveau_abi16 *
 nouveau_abi16(struct drm_file *file_priv)
@@ -125,6 +126,17 @@ nouveau_abi16_chan_fini(struct nouveau_abi16 *abi16,
 {
struct nouveau_abi16_ntfy *ntfy, *temp;
 
+   /* When a client exits without waiting for it's queued up jobs to
+* finish it might happen that we fault the channel. This is due to
+* drm_file_free() calling drm_gem_release() before the postclose()
+* callback. Hence, we can't tear down this scheduler entity before
+* uvmm mappings are unmapped. Currently, we can't detect this case.
+*
+* However, this should be rare and harmless, since the channel isn't
+* needed anymore.
+*/
+   nouveau_sched_entity_fini(&chan->sched_entity);
+
/* wait for all activity to stop before cleaning up */
if (chan->chan)
nouveau_channel_idle(chan->chan);
@@ -311,6 +323,10 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
if (ret)
goto done;
 
+   ret = nouveau_sched_entity_init(&chan->sched_entity, &drm->sched);
+   if (ret)
+   goto done;
+
init->channel = chan->chan->chid;
 
if (device->info.famil

[PATCH drm-next v2 09/16] drm/nouveau: bo: initialize GEM GPU VA interface

2023-02-17 Thread Danilo Krummrich
Initialize the GEM's DRM GPU VA manager interface in preparation for the
(u)vmm implementation, provided by subsequent commits, to make use of it.

Signed-off-by: Danilo Krummrich 
---
 drivers/gpu/drm/nouveau/nouveau_bo.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index f3039c1f87c9..bf6984c8754c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -215,11 +215,14 @@ nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int 
*align, u32 domain,
nvbo = kzalloc(sizeof(struct nouveau_bo), GFP_KERNEL);
if (!nvbo)
return ERR_PTR(-ENOMEM);
+
INIT_LIST_HEAD(&nvbo->head);
INIT_LIST_HEAD(&nvbo->entry);
INIT_LIST_HEAD(&nvbo->vma_list);
nvbo->bo.bdev = &drm->ttm.bdev;
 
+   drm_gem_gpuva_init(&nvbo->bo.base);
+
/* This is confusing, and doesn't actually mean we want an uncached
 * mapping, but is what NOUVEAU_GEM_DOMAIN_COHERENT gets translated
 * into in nouveau_gem_new().
-- 
2.39.1



[PATCH drm-next v2 16/16] drm/nouveau: debugfs: implement DRM GPU VA debugfs

2023-02-17 Thread Danilo Krummrich
Provide the driver indirection iterating over all DRM GPU VA spaces to
enable the common 'gpuvas' debugfs file for dumping DRM GPU VA spaces.

Signed-off-by: Danilo Krummrich 
---
 drivers/gpu/drm/nouveau/nouveau_debugfs.c | 24 +++
 1 file changed, 24 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c 
b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
index 2a36d1ca8fda..7f6ccc5d1d86 100644
--- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c
+++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
@@ -202,6 +202,29 @@ nouveau_debugfs_pstate_open(struct inode *inode, struct 
file *file)
return single_open(file, nouveau_debugfs_pstate_get, inode->i_private);
 }
 
+static int
+nouveau_debugfs_gpuva(struct seq_file *m, void *data)
+{
+   struct drm_info_node *node = (struct drm_info_node *) m->private;
+   struct nouveau_drm *drm = nouveau_drm(node->minor->dev);
+   struct nouveau_cli *cli;
+
+   mutex_lock(&drm->clients_lock);
+   list_for_each_entry(cli, &drm->clients, head) {
+   struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli);
+
+   if (!uvmm)
+   continue;
+
+   nouveau_uvmm_lock(uvmm);
+   drm_debugfs_gpuva_info(m, &uvmm->umgr);
+   nouveau_uvmm_unlock(uvmm);
+   }
+   mutex_unlock(&drm->clients_lock);
+
+   return 0;
+}
+
 static const struct file_operations nouveau_pstate_fops = {
.owner = THIS_MODULE,
.open = nouveau_debugfs_pstate_open,
@@ -213,6 +236,7 @@ static const struct file_operations nouveau_pstate_fops = {
 static struct drm_info_list nouveau_debugfs_list[] = {
{ "vbios.rom",  nouveau_debugfs_vbios_image, 0, NULL },
{ "strap_peek", nouveau_debugfs_strap_peek, 0, NULL },
+   DRM_DEBUGFS_GPUVA_INFO(nouveau_debugfs_gpuva, NULL),
 };
 #define NOUVEAU_DEBUGFS_ENTRIES ARRAY_SIZE(nouveau_debugfs_list)
 
-- 
2.39.1



[PATCH drm-next v2 02/16] drm/exec: fix memory leak in drm_exec_prepare_obj()

2023-02-17 Thread Danilo Krummrich
Don't call drm_gem_object_get() unconditionally.

Signed-off-by: Danilo Krummrich 
---
 drivers/gpu/drm/drm_exec.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c
index ed2106c22786..5713a589a6a3 100644
--- a/drivers/gpu/drm/drm_exec.c
+++ b/drivers/gpu/drm/drm_exec.c
@@ -282,7 +282,6 @@ int drm_exec_prepare_obj(struct drm_exec *exec, struct 
drm_gem_object *obj,
goto error_unlock;
}
 
-   drm_gem_object_get(obj);
return 0;
 
 error_unlock:
-- 
2.39.1



[PATCH drm-next v2 11/16] drm/nouveau: fence: fail to emit when fence context is killed

2023-02-17 Thread Danilo Krummrich
The new VM_BIND UAPI implementation introduced in subsequent commits
will allow asynchronous jobs processing push buffers and emitting
fences.

If a fence context is killed, e.g. due to a channel fault, jobs which
are already queued for execution might still emit new fences. In such a
case a job would hang forever.

To fix that, fail to emit a new fence on a killed fence context with
-ENODEV to unblock the job.

Signed-off-by: Danilo Krummrich 
---
 drivers/gpu/drm/nouveau/nouveau_fence.c | 7 +++
 drivers/gpu/drm/nouveau/nouveau_fence.h | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c 
b/drivers/gpu/drm/nouveau/nouveau_fence.c
index ee5e9d40c166..62c70d9a32e6 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -96,6 +96,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, 
int error)
if (nouveau_fence_signal(fence))
nvif_event_block(&fctx->event);
}
+   fctx->killed = 1;
spin_unlock_irqrestore(&fctx->lock, flags);
 }
 
@@ -226,6 +227,12 @@ nouveau_fence_emit(struct nouveau_fence *fence, struct 
nouveau_channel *chan)
dma_fence_get(&fence->base);
spin_lock_irq(&fctx->lock);
 
+   if (unlikely(fctx->killed)) {
+   spin_unlock_irq(&fctx->lock);
+   dma_fence_put(&fence->base);
+   return -ENODEV;
+   }
+
if (nouveau_fence_update(chan, fctx))
nvif_event_block(&fctx->event);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h 
b/drivers/gpu/drm/nouveau/nouveau_fence.h
index 0ca2bc85adf6..00a08699bb58 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
@@ -45,7 +45,7 @@ struct nouveau_fence_chan {
char name[32];
 
struct nvif_event event;
-   int notify_ref, dead;
+   int notify_ref, dead, killed;
 };
 
 struct nouveau_fence_priv {
-- 
2.39.1



[PATCH drm-next v2 10/16] drm/nouveau: move usercopy helpers to nouveau_drv.h

2023-02-17 Thread Danilo Krummrich
Move the usercopy helpers to a common driver header file to make it
usable for the new API added in subsequent commits.

Signed-off-by: Danilo Krummrich 
---
 drivers/gpu/drm/nouveau/nouveau_drv.h | 26 ++
 drivers/gpu/drm/nouveau/nouveau_gem.c | 26 --
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h 
b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 81350e685b50..20a7f31b9082 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -130,6 +130,32 @@ nouveau_cli(struct drm_file *fpriv)
return fpriv ? fpriv->driver_priv : NULL;
 }
 
+static inline void
+u_free(void *addr)
+{
+   kvfree(addr);
+}
+
+static inline void *
+u_memcpya(uint64_t user, unsigned nmemb, unsigned size)
+{
+   void *mem;
+   void __user *userptr = (void __force __user *)(uintptr_t)user;
+
+   size *= nmemb;
+
+   mem = kvmalloc(size, GFP_KERNEL);
+   if (!mem)
+   return ERR_PTR(-ENOMEM);
+
+   if (copy_from_user(mem, userptr, size)) {
+   u_free(mem);
+   return ERR_PTR(-EFAULT);
+   }
+
+   return mem;
+}
+
 #include 
 #include 
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c 
b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 08689ced4f6a..4369c8dc8b5b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -613,32 +613,6 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan,
return 0;
 }
 
-static inline void
-u_free(void *addr)
-{
-   kvfree(addr);
-}
-
-static inline void *
-u_memcpya(uint64_t user, unsigned nmemb, unsigned size)
-{
-   void *mem;
-   void __user *userptr = (void __force __user *)(uintptr_t)user;
-
-   size *= nmemb;
-
-   mem = kvmalloc(size, GFP_KERNEL);
-   if (!mem)
-   return ERR_PTR(-ENOMEM);
-
-   if (copy_from_user(mem, userptr, size)) {
-   u_free(mem);
-   return ERR_PTR(-EFAULT);
-   }
-
-   return mem;
-}
-
 static int
 nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli,
struct drm_nouveau_gem_pushbuf *req,
-- 
2.39.1



[PATCH drm-next v2 03/16] maple_tree: split up MA_STATE() macro

2023-02-17 Thread Danilo Krummrich
Split up the MA_STATE() macro such that components using the maple tree
can easily inherit from struct ma_state and build custom tree walk
macros to hide their internals from users.

Example:

struct sample_iter {
struct ma_state mas;
struct sample_mgr *mgr;
struct sample_entry *entry;
};

\#define SAMPLE_ITER(name, __mgr) \
struct sample_iter name = { \
.mas = __MA_STATE(&(__mgr)->mt, 0, 0),
.mgr = __mgr,
.entry = NULL,
}

\#define sample_iter_for_each_range(it__, start__, end__) \
for ((it__).mas.index = start__, (it__).entry = mas_find(&(it__).mas, 
end__ - 1); \
 (it__).entry; (it__).entry = mas_find(&(it__).mas, end__ - 1))

Signed-off-by: Danilo Krummrich 
---
 include/linux/maple_tree.h | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index e594db58a0f1..ca04c900e51a 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -424,8 +424,8 @@ struct ma_wr_state {
 #define MA_ERROR(err) \
((struct maple_enode *)(((unsigned long)err << 2) | 2UL))
 
-#define MA_STATE(name, mt, first, end) \
-   struct ma_state name = {\
+#define __MA_STATE(mt, first, end) \
+   {   \
.tree = mt, \
.index = first, \
.last = end,\
@@ -435,6 +435,9 @@ struct ma_wr_state {
.alloc = NULL,  \
}
 
+#define MA_STATE(name, mt, first, end) \
+   struct ma_state name = __MA_STATE(mt, first, end)
+
 #define MA_WR_STATE(name, ma_state, wr_entry)  \
struct ma_wr_state name = { \
.mas = ma_state,\
-- 
2.39.1



[PATCH drm-next v2 08/16] drm/nouveau: get vmm via nouveau_cli_vmm()

2023-02-17 Thread Danilo Krummrich
Provide a getter function for the client's current vmm context. Since
we'll add a new (u)vmm context for UMD bindings in subsequent commits,
this will keep the code clean.

Signed-off-by: Danilo Krummrich 
---
 drivers/gpu/drm/nouveau/nouveau_bo.c   | 2 +-
 drivers/gpu/drm/nouveau/nouveau_chan.c | 2 +-
 drivers/gpu/drm/nouveau/nouveau_drv.h  | 9 +
 drivers/gpu/drm/nouveau/nouveau_gem.c  | 6 +++---
 4 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 288eebc70a67..f3039c1f87c9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -204,7 +204,7 @@ nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int 
*align, u32 domain,
struct nouveau_drm *drm = cli->drm;
struct nouveau_bo *nvbo;
struct nvif_mmu *mmu = &cli->mmu;
-   struct nvif_vmm *vmm = cli->svm.cli ? &cli->svm.vmm : &cli->vmm.vmm;
+   struct nvif_vmm *vmm = &nouveau_cli_vmm(cli)->vmm;
int i, pi = -1;
 
if (!*size) {
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c 
b/drivers/gpu/drm/nouveau/nouveau_chan.c
index e648ecd0c1a0..1068abe41024 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
@@ -148,7 +148,7 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct 
nvif_device *device,
 
chan->device = device;
chan->drm = drm;
-   chan->vmm = cli->svm.cli ? &cli->svm : &cli->vmm;
+   chan->vmm = nouveau_cli_vmm(cli);
atomic_set(&chan->killed, 0);
 
/* allocate memory for dma push buffer */
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h 
b/drivers/gpu/drm/nouveau/nouveau_drv.h
index b5de312a523f..81350e685b50 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -112,6 +112,15 @@ struct nouveau_cli_work {
struct dma_fence_cb cb;
 };
 
+static inline struct nouveau_vmm *
+nouveau_cli_vmm(struct nouveau_cli *cli)
+{
+   if (cli->svm.cli)
+   return &cli->svm;
+
+   return &cli->vmm;
+}
+
 void nouveau_cli_work_queue(struct nouveau_cli *, struct dma_fence *,
struct nouveau_cli_work *);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c 
b/drivers/gpu/drm/nouveau/nouveau_gem.c
index f77e44958037..08689ced4f6a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -103,7 +103,7 @@ nouveau_gem_object_open(struct drm_gem_object *gem, struct 
drm_file *file_priv)
struct nouveau_bo *nvbo = nouveau_gem_object(gem);
struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
struct device *dev = drm->dev->dev;
-   struct nouveau_vmm *vmm = cli->svm.cli ? &cli->svm : &cli->vmm;
+   struct nouveau_vmm *vmm = nouveau_cli_vmm(cli);
struct nouveau_vma *vma;
int ret;
 
@@ -180,7 +180,7 @@ nouveau_gem_object_close(struct drm_gem_object *gem, struct 
drm_file *file_priv)
struct nouveau_bo *nvbo = nouveau_gem_object(gem);
struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
struct device *dev = drm->dev->dev;
-   struct nouveau_vmm *vmm = cli->svm.cli ? &cli->svm : & cli->vmm;
+   struct nouveau_vmm *vmm = nouveau_cli_vmm(cli);
struct nouveau_vma *vma;
int ret;
 
@@ -269,7 +269,7 @@ nouveau_gem_info(struct drm_file *file_priv, struct 
drm_gem_object *gem,
 {
struct nouveau_cli *cli = nouveau_cli(file_priv);
struct nouveau_bo *nvbo = nouveau_gem_object(gem);
-   struct nouveau_vmm *vmm = cli->svm.cli ? &cli->svm : &cli->vmm;
+   struct nouveau_vmm *vmm = nouveau_cli_vmm(cli);
struct nouveau_vma *vma;
 
if (is_power_of_2(nvbo->valid_domains))
-- 
2.39.1



[PATCH drm-next v2 00/16] [RFC] DRM GPUVA Manager & Nouveau VM_BIND UAPI

2023-02-17 Thread Danilo Krummrich
This patch series provides a new UAPI for the Nouveau driver in order to
support Vulkan features, such as sparse bindings and sparse residency.

Furthermore, with the DRM GPUVA manager it provides a new DRM core feature to
keep track of GPU virtual address (VA) mappings in a more generic way.

The DRM GPUVA manager is indented to help drivers implement userspace-manageable
GPU VA spaces in reference to the Vulkan API. In order to achieve this goal it
serves the following purposes in this context.

1) Provide infrastructure to track GPU VA allocations and mappings,
   making use of the maple_tree.

2) Generically connect GPU VA mappings to their backing buffers, in
   particular DRM GEM objects.

3) Provide a common implementation to perform more complex mapping
   operations on the GPU VA space. In particular splitting and merging
   of GPU VA mappings, e.g. for intersecting mapping requests or partial
   unmap requests.

The new VM_BIND Nouveau UAPI build on top of the DRM GPUVA manager, itself
providing the following new interfaces.

1) Initialize a GPU VA space via the new DRM_IOCTL_NOUVEAU_VM_INIT ioctl
   for UMDs to specify the portion of VA space managed by the kernel and
   userspace, respectively.

2) Allocate and free a VA space region as well as bind and unbind memory
   to the GPUs VA space via the new DRM_IOCTL_NOUVEAU_VM_BIND ioctl.

3) Execute push buffers with the new DRM_IOCTL_NOUVEAU_EXEC ioctl.

Both, DRM_IOCTL_NOUVEAU_VM_BIND and DRM_IOCTL_NOUVEAU_EXEC, make use of the DRM
scheduler to queue jobs and support asynchronous processing with DRM syncobjs
as synchronization mechanism.

By default DRM_IOCTL_NOUVEAU_VM_BIND does synchronous processing,
DRM_IOCTL_NOUVEAU_EXEC supports asynchronous processing only.

The new VM_BIND UAPI for Nouveau makes also use of drm_exec (execution context
for GEM buffers) by Christian König. Since the patch implementing drm_exec was
not yet merged into drm-next it is part of this series, as well as a small fix
for this patch, which was found while testing this series.

This patch series is also available at [1].

There is a Mesa NVK merge request by Dave Airlie [2] implementing the
corresponding userspace parts for this series.

The Vulkan CTS test suite passes the sparse binding and sparse residency test
cases for the new UAPI together with Dave's Mesa work.

There are also some test cases in the igt-gpu-tools project [3] for the new UAPI
and hence the DRM GPU VA manager. However, most of them are testing the DRM GPU
VA manager's logic through Nouveau's new UAPI and should be considered just as
helper for implementation.

However, I absolutely intend to change those test cases to proper kunit test
cases for the DRM GPUVA manager, once and if we agree on it's usefulness and
design.

[1] https://gitlab.freedesktop.org/nouvelles/kernel/-/tree/new-uapi-drm-next /
https://gitlab.freedesktop.org/nouvelles/kernel/-/merge_requests/1
[2] https://gitlab.freedesktop.org/nouveau/mesa/-/merge_requests/150/
[3] https://gitlab.freedesktop.org/dakr/igt-gpu-tools/-/tree/wip_nouveau_vm_bind

Changes in V2:
==
  Nouveau:
- Reworked the Nouveau VM_BIND UAPI to avoid memory allocations in fence
  signalling critical sections. Updates to the VA space are split up in 
three
  separate stages, where only the 2. stage executes in a fence signalling
  critical section:

1. update the VA space, allocate new structures and page tables
2. (un-)map the requested memory bindings
3. free structures and page tables

- Separated generic job scheduler code from specific job implementations.
- Separated the EXEC and VM_BIND implementation of the UAPI.
- Reworked the locking parts of the nvkm/vmm RAW interface, such that
  (un-)map operations can be executed in fence signalling critical sections.

  GPUVA Manager:
- made drm_gpuva_regions optional for users of the GPUVA manager
- allow NULL GEMs for drm_gpuva entries
- swichted from drm_mm to maple_tree for track drm_gpuva / drm_gpuva_region
  entries
- provide callbacks for users to allocate custom drm_gpuva_op structures to
  allow inheritance
- added user bits to drm_gpuva_flags
- added a prefetch operation type in order to support generating prefetch
  operations in the same way other operations generated
- hand the responsibility for mutual exclusion for a GEM's
  drm_gpuva list to the user; simplified corresponding (un-)link functions

  Maple Tree:
- I added two maple tree patches to the series, one to support custom tree
  walk macros and one to hand the locking responsibility to the user of the
  GPUVA manager without pre-defined lockdep checks.

TODO

  Maple Tree:
- Maple tree uses the 'unsinged long' type for node entries. While this
  works for 64bit, it's incompatible with the DRM GPUVA Manager on 32bit,
  since the DRM GPUVA Manager

[PATCH drm-next v2 01/16] drm: execution context for GEM buffers

2023-02-17 Thread Danilo Krummrich
From: Christian König 

This adds the infrastructure for an execution context for GEM buffers
which is similar to the existinc TTMs execbuf util and intended to replace
it in the long term.

The basic functionality is that we abstracts the necessary loop to lock
many different GEM buffers with automated deadlock and duplicate handling.

v2: drop xarray and use dynamic resized array instead, the locking
overhead is unecessary and measureable.

Signed-off-by: Christian König 
---
 Documentation/gpu/drm-mm.rst   |  12 ++
 drivers/gpu/drm/Kconfig|   6 +
 drivers/gpu/drm/Makefile   |   2 +
 drivers/gpu/drm/amd/amdgpu/Kconfig |   1 +
 drivers/gpu/drm/drm_exec.c | 295 +
 include/drm/drm_exec.h | 144 ++
 6 files changed, 460 insertions(+)
 create mode 100644 drivers/gpu/drm/drm_exec.c
 create mode 100644 include/drm/drm_exec.h

diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst
index a79fd3549ff8..a52e6f4117d6 100644
--- a/Documentation/gpu/drm-mm.rst
+++ b/Documentation/gpu/drm-mm.rst
@@ -493,6 +493,18 @@ DRM Sync Objects
 .. kernel-doc:: drivers/gpu/drm/drm_syncobj.c
:export:
 
+DRM Execution context
+=
+
+.. kernel-doc:: drivers/gpu/drm/drm_exec.c
+   :doc: Overview
+
+.. kernel-doc:: include/drm/drm_exec.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/drm_exec.c
+   :export:
+
 GPU Scheduler
 =
 
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index f42d4c6a19f2..1573d658fbb5 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -200,6 +200,12 @@ config DRM_TTM
  GPU memory types. Will be enabled automatically if a device driver
  uses it.
 
+config DRM_EXEC
+   tristate
+   depends on DRM
+   help
+ Execution context for command submissions
+
 config DRM_BUDDY
tristate
depends on DRM
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index ab4460fcd63f..d40defbb0347 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -78,6 +78,8 @@ obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += 
drm_panel_orientation_quirks.o
 #
 # Memory-management helpers
 #
+#
+obj-$(CONFIG_DRM_EXEC) += drm_exec.o
 
 obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o
 
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig 
b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 5341b6b242c3..279fb3bba810 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -11,6 +11,7 @@ config DRM_AMDGPU
select DRM_SCHED
select DRM_TTM
select DRM_TTM_HELPER
+   select DRM_EXEC
select POWER_SUPPLY
select HWMON
select I2C
diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c
new file mode 100644
index ..ed2106c22786
--- /dev/null
+++ b/drivers/gpu/drm/drm_exec.c
@@ -0,0 +1,295 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+
+#include 
+#include 
+#include 
+
+/**
+ * DOC: Overview
+ *
+ * This component mainly abstracts the retry loop necessary for locking
+ * multiple GEM objects while preparing hardware operations (e.g. command
+ * submissions, page table updates etc..).
+ *
+ * If a contention is detected while locking a GEM object the cleanup procedure
+ * unlocks all previously locked GEM objects and locks the contended one first
+ * before locking any further objects.
+ *
+ * After an object is locked fences slots can optionally be reserved on the
+ * dma_resv object inside the GEM object.
+ *
+ * A typical usage pattern should look like this::
+ *
+ * struct drm_gem_object *obj;
+ * struct drm_exec exec;
+ * unsigned long index;
+ * int ret;
+ *
+ * drm_exec_init(&exec, true);
+ * drm_exec_while_not_all_locked(&exec) {
+ * ret = drm_exec_prepare_obj(&exec, boA, 1);
+ * drm_exec_continue_on_contention(&exec);
+ * if (ret)
+ * goto error;
+ *
+ * ret = drm_exec_lock(&exec, boB, 1);
+ * drm_exec_continue_on_contention(&exec);
+ * if (ret)
+ * goto error;
+ * }
+ *
+ * drm_exec_for_each_locked_object(&exec, index, obj) {
+ * dma_resv_add_fence(obj->resv, fence, DMA_RESV_USAGE_READ);
+ * ...
+ * }
+ * drm_exec_fini(&exec);
+ *
+ * See struct dma_exec for more details.
+ */
+
+/* Dummy value used to initially enter the retry loop */
+#define DRM_EXEC_DUMMY (void*)~0
+
+/* Initialize the drm_exec_objects container */
+static void drm_exec_objects_init(struct drm_exec_objects *container)
+{
+   container->objects = kmalloc(PAGE_SIZE, GFP_KERNEL);
+
+   /* If allocation here fails, just delay that till the first use */
+   container->max_objects = container->objects ?
+   PAGE_SIZE / sizeof(void *) : 0;
+   container->num_objects = 0;
+}
+
+/* Cleanup the drm_exec_objects container */
+static void drm_exec_ob

  1   2   >