Re: [PATCH drm-next v2 06/16] drm: debugfs: provide infrastructure to dump a DRM GPU VA space
Hi Danilo, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on 48075a66fca613477ac1969b576a93ef5db0164f] url: https://github.com/intel-lab-lkp/linux/commits/Danilo-Krummrich/drm-execution-context-for-GEM-buffers/20230217-215101 base: 48075a66fca613477ac1969b576a93ef5db0164f patch link: https://lore.kernel.org/r/20230217134820.14672-1-dakr%40redhat.com patch subject: [PATCH drm-next v2 06/16] drm: debugfs: provide infrastructure to dump a DRM GPU VA space config: mips-allyesconfig (https://download.01.org/0day-ci/archive/20230218/202302181014.l0sho3s1-...@intel.com/config) compiler: mips-linux-gcc (GCC) 12.1.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/intel-lab-lkp/linux/commit/e1a1c9659baee305780e1ce50c05e53e1d14b245 git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Danilo-Krummrich/drm-execution-context-for-GEM-buffers/20230217-215101 git checkout e1a1c9659baee305780e1ce50c05e53e1d14b245 # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=mips olddefconfig COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=mips SHELL=/bin/bash drivers/gpu/drm/ If you fix the issue, kindly add following tag where applicable | Reported-by: kernel test robot | Link: https://lore.kernel.org/oe-kbuild-all/202302181014.l0sho3s1-...@intel.com/ All warnings (new ones prefixed by >>): drivers/gpu/drm/drm_debugfs.c: In function 'drm_debugfs_gpuva_info': >> drivers/gpu/drm/drm_debugfs.c:228:28: warning: cast from pointer to integer >> of different size [-Wpointer-to-int-cast] 228 |(u64)va->gem.obj, va->gem.offset); |^ vim +228 drivers/gpu/drm/drm_debugfs.c 178 179 /** 180 * drm_debugfs_gpuva_info - dump the given DRM GPU VA space 181 * @m: pointer to the &seq_file to write 182 * @mgr: the &drm_gpuva_manager representing the GPU VA space 183 * 184 * Dumps the GPU VA regions and mappings of a given DRM GPU VA manager. 185 * 186 * For each DRM GPU VA space drivers should call this function from their 187 * &drm_info_list's show callback. 188 * 189 * Returns: 0 on success, -ENODEV if the &mgr is not initialized 190 */ 191 int drm_debugfs_gpuva_info(struct seq_file *m, 192 struct drm_gpuva_manager *mgr) 193 { 194 DRM_GPUVA_ITER(it, mgr); 195 DRM_GPUVA_REGION_ITER(__it, mgr); 196 197 if (!mgr->name) 198 return -ENODEV; 199 200 seq_printf(m, "DRM GPU VA space (%s)\n", mgr->name); 201 seq_puts (m, "\n"); 202 seq_puts (m, " VA regions | start | range | end| sparse\n"); 203 seq_puts (m, "\n"); 204 seq_printf(m, " VA space| 0x%016llx | 0x%016llx | 0x%016llx | -\n", 205 mgr->mm_start, mgr->mm_range, mgr->mm_start + mgr->mm_range); 206 seq_puts (m, "---\n"); 207 drm_gpuva_iter_for_each(__it) { 208 struct drm_gpuva_region *reg = __it.reg; 209 210 if (reg == &mgr->kernel_alloc_region) { 211 seq_printf(m, " kernel node | 0x%016llx | 0x%016llx | 0x%016llx | -\n", 212 reg->va.addr, reg->va.range, reg->va.addr + reg->va.range); 213 continue; 214 } 215 216 seq_printf(m, " | 0x%016llx | 0x%016llx | 0x%016llx | %s\n", 217 reg->va.addr, reg->va.range, reg->va.addr + reg->va.range, 218 reg->sparse ? "true" : "false"); 219 } 220 seq_puts(m, "\n"); 221 seq_puts(m, " VAs | start | range | end| object | object offset\n"); 222 seq_puts(m, "-\n"); 223 drm_gpuva_iter_for_each(it) { 224
[PATCH 1/2] drm/msm: drop unused ring variable in msm_ioctl_gem_submit()
The variable ring is not used by msm_parse_deps() and msm_ioctl_gem_submit() and thus can be dropped. Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/msm_gem_submit.c | 10 +++--- drivers/gpu/drm/msm/msm_gpu_trace.h | 10 -- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index ac8ed731f76d..a539eb31042f 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -550,8 +550,7 @@ static struct drm_syncobj **msm_parse_deps(struct msm_gem_submit *submit, struct drm_file *file, uint64_t in_syncobjs_addr, uint32_t nr_in_syncobjs, - size_t syncobj_stride, - struct msm_ringbuffer *ring) + size_t syncobj_stride) { struct drm_syncobj **syncobjs = NULL; struct drm_msm_gem_submit_syncobj syncobj_desc = {0}; @@ -722,7 +721,6 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, struct msm_gem_submit *submit; struct msm_gpu *gpu = priv->gpu; struct msm_gpu_submitqueue *queue; - struct msm_ringbuffer *ring; struct msm_submit_post_dep *post_deps = NULL; struct drm_syncobj **syncobjs_to_reset = NULL; int out_fence_fd = -1; @@ -760,8 +758,6 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (!queue) return -ENOENT; - ring = gpu->rb[queue->ring_nr]; - if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) { out_fence_fd = get_unused_fd_flags(O_CLOEXEC); if (out_fence_fd < 0) { @@ -774,7 +770,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (IS_ERR(submit)) return PTR_ERR(submit); - trace_msm_gpu_submit(pid_nr(submit->pid), ring->id, submit->ident, + trace_msm_gpu_submit(pid_nr(submit->pid), submit->ident, args->nr_bos, args->nr_cmds); ret = mutex_lock_interruptible(&queue->lock); @@ -803,7 +799,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, syncobjs_to_reset = msm_parse_deps(submit, file, args->in_syncobjs, args->nr_in_syncobjs, - args->syncobj_stride, ring); + args->syncobj_stride); if (IS_ERR(syncobjs_to_reset)) { ret = PTR_ERR(syncobjs_to_reset); goto out_unlock; diff --git a/drivers/gpu/drm/msm/msm_gpu_trace.h b/drivers/gpu/drm/msm/msm_gpu_trace.h index ac40d857bc45..12ef10f1de4c 100644 --- a/drivers/gpu/drm/msm/msm_gpu_trace.h +++ b/drivers/gpu/drm/msm/msm_gpu_trace.h @@ -9,24 +9,22 @@ #define TRACE_INCLUDE_FILE msm_gpu_trace TRACE_EVENT(msm_gpu_submit, - TP_PROTO(pid_t pid, u32 ringid, u32 id, u32 nr_bos, u32 nr_cmds), - TP_ARGS(pid, ringid, id, nr_bos, nr_cmds), + TP_PROTO(pid_t pid, u32 id, u32 nr_bos, u32 nr_cmds), + TP_ARGS(pid, id, nr_bos, nr_cmds), TP_STRUCT__entry( __field(pid_t, pid) __field(u32, id) - __field(u32, ringid) __field(u32, nr_cmds) __field(u32, nr_bos) ), TP_fast_assign( __entry->pid = pid; __entry->id = id; - __entry->ringid = ringid; __entry->nr_bos = nr_bos; __entry->nr_cmds = nr_cmds ), - TP_printk("id=%d pid=%d ring=%d bos=%d cmds=%d", - __entry->id, __entry->pid, __entry->ringid, + TP_printk("id=%d pid=%d bos=%d cmds=%d", + __entry->id, __entry->pid, __entry->nr_bos, __entry->nr_cmds) ); -- 2.39.1
[PATCH 2/2] drm/msm: simplify msm_parse_deps() and msm_parse_post_deps()
Simplify two functions msm_parse_deps() and msm_parse_post_deps(): extract single item parsing function and clean up error path. Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/msm_gem_submit.c | 196 +++ 1 file changed, 106 insertions(+), 90 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index a539eb31042f..c64907f0f249 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -546,6 +546,46 @@ struct msm_submit_post_dep { struct dma_fence_chain *chain; }; +static struct drm_syncobj *msm_parse_dep_one(struct msm_gem_submit *submit, +struct drm_file *file, +uint64_t address, +size_t syncobj_stride) +{ + struct drm_msm_gem_submit_syncobj syncobj_desc = {0}; + struct dma_fence *fence; + struct drm_syncobj *syncobj = NULL; + int ret; + + if (copy_from_user(&syncobj_desc, + u64_to_user_ptr(address), + min(syncobj_stride, sizeof(syncobj_desc + return ERR_PTR(-EFAULT); + + if (syncobj_desc.point && + !drm_core_check_feature(submit->dev, DRIVER_SYNCOBJ_TIMELINE)) + return ERR_PTR(-EOPNOTSUPP); + + if (syncobj_desc.flags & ~MSM_SUBMIT_SYNCOBJ_FLAGS) + return ERR_PTR(-EINVAL); + + ret = drm_syncobj_find_fence(file, syncobj_desc.handle, +syncobj_desc.point, 0, &fence); + if (ret) + return ERR_PTR(ret); + + ret = drm_sched_job_add_dependency(&submit->base, fence); + if (ret) + return ERR_PTR(ret); + + if (syncobj_desc.flags & MSM_SUBMIT_SYNCOBJ_RESET) { + syncobj = drm_syncobj_find(file, syncobj_desc.handle); + if (!syncobj) + return ERR_PTR(-EINVAL); + } + + return syncobj; +} + static struct drm_syncobj **msm_parse_deps(struct msm_gem_submit *submit, struct drm_file *file, uint64_t in_syncobjs_addr, @@ -553,9 +593,8 @@ static struct drm_syncobj **msm_parse_deps(struct msm_gem_submit *submit, size_t syncobj_stride) { struct drm_syncobj **syncobjs = NULL; - struct drm_msm_gem_submit_syncobj syncobj_desc = {0}; - int ret = 0; - uint32_t i, j; + int ret; + int i; syncobjs = kcalloc(nr_in_syncobjs, sizeof(*syncobjs), GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); @@ -564,54 +603,26 @@ static struct drm_syncobj **msm_parse_deps(struct msm_gem_submit *submit, for (i = 0; i < nr_in_syncobjs; ++i) { uint64_t address = in_syncobjs_addr + i * syncobj_stride; - struct dma_fence *fence; + struct drm_syncobj *syncobj; - if (copy_from_user(&syncobj_desc, - u64_to_user_ptr(address), - min(syncobj_stride, sizeof(syncobj_desc { - ret = -EFAULT; - break; - } - - if (syncobj_desc.point && - !drm_core_check_feature(submit->dev, DRIVER_SYNCOBJ_TIMELINE)) { - ret = -EOPNOTSUPP; - break; + syncobj = msm_parse_dep_one(submit, file, address, syncobj_stride); + if (IS_ERR(syncobj)) { + ret = PTR_ERR(syncobj); + goto err; } - if (syncobj_desc.flags & ~MSM_SUBMIT_SYNCOBJ_FLAGS) { - ret = -EINVAL; - break; - } - - ret = drm_syncobj_find_fence(file, syncobj_desc.handle, -syncobj_desc.point, 0, &fence); - if (ret) - break; - - ret = drm_sched_job_add_dependency(&submit->base, fence); - if (ret) - break; - - if (syncobj_desc.flags & MSM_SUBMIT_SYNCOBJ_RESET) { - syncobjs[i] = - drm_syncobj_find(file, syncobj_desc.handle); - if (!syncobjs[i]) { - ret = -EINVAL; - break; - } - } + syncobjs[i] = syncobj; } - if (ret) { - for (j = 0; j <= i; ++j) { - if (syncobjs[j]) - drm_syncobj_put(syncobjs[j]); - } - kfree(syncobjs); - return ERR_PTR(ret); - } return syncobjs; + +err: + w
[PATCH 0/2] drm/msm: rework msm_parse_deps() and msm_parse_post_deps()
As discusssed in the the review of [1], rework these two functions to separate single point parser and provide clean error path. Depenencies: [1] [1] https://lore.kernel.org/all/20230215235048.1166484-1-robdcl...@gmail.com Dmitry Baryshkov (2): drm/msm: drop unused ring variable in msm_ioctl_gem_submit() drm/msm: simplify msm_parse_deps() and msm_parse_post_deps() drivers/gpu/drm/msm/msm_gem_submit.c | 206 ++- drivers/gpu/drm/msm/msm_gpu_trace.h | 10 +- 2 files changed, 113 insertions(+), 103 deletions(-) -- 2.39.1
Re: [PATCH drm-next v2 13/16] drm/nouveau: nvkm/vmm: implement raw ops to manage uvmm
Hi Danilo, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on 48075a66fca613477ac1969b576a93ef5db0164f] url: https://github.com/intel-lab-lkp/linux/commits/Danilo-Krummrich/drm-execution-context-for-GEM-buffers/20230217-215101 base: 48075a66fca613477ac1969b576a93ef5db0164f patch link: https://lore.kernel.org/r/20230217134820.14672-8-dakr%40redhat.com patch subject: [PATCH drm-next v2 13/16] drm/nouveau: nvkm/vmm: implement raw ops to manage uvmm config: mips-allyesconfig (https://download.01.org/0day-ci/archive/20230218/202302180839.s0w26kcj-...@intel.com/config) compiler: mips-linux-gcc (GCC) 12.1.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/intel-lab-lkp/linux/commit/b25c0bcfed93dd62ed732968d8987b92e10c4579 git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Danilo-Krummrich/drm-execution-context-for-GEM-buffers/20230217-215101 git checkout b25c0bcfed93dd62ed732968d8987b92e10c4579 # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=mips olddefconfig COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=mips SHELL=/bin/bash drivers/gpu/drm/ If you fix the issue, kindly add following tag where applicable | Reported-by: kernel test robot | Link: https://lore.kernel.org/oe-kbuild-all/202302180839.s0w26kcj-...@intel.com/ All warnings (new ones prefixed by >>): In file included from drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h:4, from drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.h:5, from drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c:22: drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c: In function 'nvkm_uvmm_mthd_raw_map': >> drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c:422:31: warning: cast to >> pointer from integer of different size [-Wint-to-pointer-cast] 422 | (void *)args->argv, args->argc); | ^ drivers/gpu/drm/nouveau/include/nvkm/core/memory.h:66:43: note: in definition of macro 'nvkm_memory_map' 66 | (p)->func->map((p),(o),(vm),(va),(av),(ac)) | ^~ vim +422 drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c 388 389 static int 390 nvkm_uvmm_mthd_raw_map(struct nvkm_uvmm *uvmm, struct nvif_vmm_raw_v0 *args) 391 { 392 struct nvkm_client *client = uvmm->object.client; 393 struct nvkm_vmm *vmm = uvmm->vmm; 394 struct nvkm_vma vma = { 395 .addr = args->addr, 396 .size = args->size, 397 .used = true, 398 .mapref = false, 399 .no_comp = true, 400 }; 401 struct nvkm_memory *memory; 402 u64 handle = args->memory; 403 u8 refd; 404 int ret; 405 406 if (!nvkm_vmm_in_managed_range(vmm, args->addr, args->size)) 407 return -EINVAL; 408 409 ret = nvkm_uvmm_page_index(uvmm, args->size, args->shift, &refd); 410 if (ret) 411 return ret; 412 413 vma.page = vma.refd = refd; 414 415 memory = nvkm_umem_search(client, args->memory); 416 if (IS_ERR(memory)) { 417 VMM_DEBUG(vmm, "memory %016llx %ld\n", handle, PTR_ERR(memory)); 418 return PTR_ERR(memory); 419 } 420 421 ret = nvkm_memory_map(memory, args->offset, vmm, &vma, > 422(void *)args->argv, args->argc); 423 424 nvkm_memory_unref(&vma.memory); 425 nvkm_memory_unref(&memory); 426 return ret; 427 } 428 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests
Re: [PATCH drm-next v2 05/16] drm: manager to keep track of GPUs VA mappings
Hi Danilo, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on 48075a66fca613477ac1969b576a93ef5db0164f] url: https://github.com/intel-lab-lkp/linux/commits/Danilo-Krummrich/drm-execution-context-for-GEM-buffers/20230217-215101 base: 48075a66fca613477ac1969b576a93ef5db0164f patch link: https://lore.kernel.org/r/20230217134422.14116-6-dakr%40redhat.com patch subject: [PATCH drm-next v2 05/16] drm: manager to keep track of GPUs VA mappings config: mips-allyesconfig (https://download.01.org/0day-ci/archive/20230218/202302180805.b0ab40v5-...@intel.com/config) compiler: mips-linux-gcc (GCC) 12.1.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/intel-lab-lkp/linux/commit/00132cc92b6745cfd51c0d5df4c246a848f2ceaa git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Danilo-Krummrich/drm-execution-context-for-GEM-buffers/20230217-215101 git checkout 00132cc92b6745cfd51c0d5df4c246a848f2ceaa # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=mips olddefconfig COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=mips SHELL=/bin/bash drivers/gpu/drm/ If you fix the issue, kindly add following tag where applicable | Reported-by: kernel test robot | Link: https://lore.kernel.org/oe-kbuild-all/202302180805.b0ab40v5-...@intel.com/ All warnings (new ones prefixed by >>): >> drivers/gpu/drm/drm_gpuva_mgr.c:1383:5: warning: no previous prototype for >> 'drm_gpuva_sm_step' [-Wmissing-prototypes] 1383 | int drm_gpuva_sm_step(struct drm_gpuva_op *__op, void *priv) | ^ -- >> drivers/gpu/drm/drm_gpuva_mgr.c:529: warning: expecting prototype for >> drm_gpuva_remove_iter(). Prototype was for drm_gpuva_iter_remove() instead drivers/gpu/drm/drm_gpuva_mgr.c:549: warning: Excess function parameter 'addr' description in 'drm_gpuva_insert' drivers/gpu/drm/drm_gpuva_mgr.c:549: warning: Excess function parameter 'range' description in 'drm_gpuva_insert' drivers/gpu/drm/drm_gpuva_mgr.c:765: warning: Excess function parameter 'addr' description in 'drm_gpuva_region_insert' drivers/gpu/drm/drm_gpuva_mgr.c:765: warning: Excess function parameter 'range' description in 'drm_gpuva_region_insert' drivers/gpu/drm/drm_gpuva_mgr.c:1345: warning: Excess function parameter 'ops' description in 'drm_gpuva_sm_unmap' drivers/gpu/drm/drm_gpuva_mgr.c:1589: warning: Function parameter or member 'addr' not described in 'drm_gpuva_prefetch_ops_create' drivers/gpu/drm/drm_gpuva_mgr.c:1589: warning: Function parameter or member 'range' not described in 'drm_gpuva_prefetch_ops_create' drivers/gpu/drm/drm_gpuva_mgr.c:1589: warning: Excess function parameter 'req_addr' description in 'drm_gpuva_prefetch_ops_create' drivers/gpu/drm/drm_gpuva_mgr.c:1589: warning: Excess function parameter 'req_range' description in 'drm_gpuva_prefetch_ops_create' vim +/drm_gpuva_sm_step +1383 drivers/gpu/drm/drm_gpuva_mgr.c 1382 > 1383 int drm_gpuva_sm_step(struct drm_gpuva_op *__op, void *priv) 1384 { 1385 struct { 1386 struct drm_gpuva_manager *mgr; 1387 struct drm_gpuva_ops *ops; 1388 } *args = priv; 1389 struct drm_gpuva_manager *mgr = args->mgr; 1390 struct drm_gpuva_ops *ops = args->ops; 1391 struct drm_gpuva_op *op; 1392 1393 op = gpuva_op_alloc(mgr); 1394 if (unlikely(!op)) 1395 goto err; 1396 1397 memcpy(op, __op, sizeof(*op)); 1398 1399 if (op->op == DRM_GPUVA_OP_REMAP) { 1400 struct drm_gpuva_op_remap *__r = &__op->remap; 1401 struct drm_gpuva_op_remap *r = &op->remap; 1402 1403 r->unmap = kmemdup(__r->unmap, sizeof(*r->unmap), 1404 GFP_KERNEL); 1405 if (unlikely(!r->unmap)) 1406 goto err_free_op; 1407 1408 if (__r->prev) { 1409 r->prev = kmemdup(__r->prev, sizeof(*r->prev), 1410GFP_KERNEL); 1411 if (unlikely(!r->prev)) 1412 goto err_free_unmap; 1413 } 1414 1415
[PATCH 1/2] drm/i915/guc: Improve GuC load error reporting
From: John Harrison There are multiple ways in which the GuC load can fail. The driver was reporting the status register as is, but not everyone can read the matrix unfiltered. So add decoding of the common error cases. Also, remove the comment about interrupt based load completion checking being not recommended. The interrupt was removed from the GuC firmware some time ago so it is no longer an option anyway. While at it, also abort the timeout if a known error code is reported. No need to keep waiting if the GuC has already given up the load. Signed-off-by: John Harrison --- .../gpu/drm/i915/gt/uc/abi/guc_errors_abi.h | 17 drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 95 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h| 4 +- 3 files changed, 95 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h index 8085fb1812748..750fe0c6d8529 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h @@ -21,6 +21,9 @@ enum intel_guc_load_status { INTEL_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH = 0x02, INTEL_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH = 0x03, INTEL_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE = 0x04, + INTEL_GUC_LOAD_STATUS_HWCONFIG_START = 0x05, + INTEL_GUC_LOAD_STATUS_HWCONFIG_DONE= 0x06, + INTEL_GUC_LOAD_STATUS_HWCONFIG_ERROR = 0x07, INTEL_GUC_LOAD_STATUS_GDT_DONE = 0x10, INTEL_GUC_LOAD_STATUS_IDT_DONE = 0x20, INTEL_GUC_LOAD_STATUS_LAPIC_DONE = 0x30, @@ -38,4 +41,18 @@ enum intel_guc_load_status { INTEL_GUC_LOAD_STATUS_READY= 0xF0, }; +enum intel_bootrom_load_status { + INTEL_BOOTROM_STATUS_NO_KEY_FOUND = 0x13, + INTEL_BOOTROM_STATUS_AES_PROD_KEY_FOUND = 0x1A, + INTEL_BOOTROM_STATUS_RSA_FAILED = 0x50, + INTEL_BOOTROM_STATUS_PAVPC_FAILED = 0x73, + INTEL_BOOTROM_STATUS_WOPCM_FAILED = 0x74, + INTEL_BOOTROM_STATUS_LOADLOC_FAILED = 0x75, + INTEL_BOOTROM_STATUS_JUMP_PASSED = 0x76, + INTEL_BOOTROM_STATUS_JUMP_FAILED = 0x77, + INTEL_BOOTROM_STATUS_RC6CTXCONFIG_FAILED = 0x79, + INTEL_BOOTROM_STATUS_MPUMAP_INCORRECT = 0x7a, + INTEL_BOOTROM_STATUS_EXCEPTION= 0x7E, +}; + #endif /* _ABI_GUC_ERRORS_ABI_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index 69133420c78b2..2f5942606913d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -88,31 +88,64 @@ static int guc_xfer_rsa(struct intel_uc_fw *guc_fw, /* * Read the GuC status register (GUC_STATUS) and store it in the * specified location; then return a boolean indicating whether - * the value matches either of two values representing completion - * of the GuC boot process. + * the value matches either completion or a known failure code. * * This is used for polling the GuC status in a wait_for() * loop below. */ -static inline bool guc_ready(struct intel_uncore *uncore, u32 *status) +static inline bool guc_load_done(struct intel_uncore *uncore, u32 *status, bool *success) { u32 val = intel_uncore_read(uncore, GUC_STATUS); u32 uk_val = REG_FIELD_GET(GS_UKERNEL_MASK, val); + u32 br_val = REG_FIELD_GET(GS_BOOTROM_MASK, val); *status = val; - return uk_val == INTEL_GUC_LOAD_STATUS_READY; + *success = true; + switch (uk_val) { + case INTEL_GUC_LOAD_STATUS_READY: + return true; + + case INTEL_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH: + case INTEL_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH: + case INTEL_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE: + case INTEL_GUC_LOAD_STATUS_HWCONFIG_ERROR: + case INTEL_GUC_LOAD_STATUS_DPC_ERROR: + case INTEL_GUC_LOAD_STATUS_EXCEPTION: + case INTEL_GUC_LOAD_STATUS_INIT_DATA_INVALID: + case INTEL_GUC_LOAD_STATUS_MPU_DATA_INVALID: + case INTEL_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID: + *success = false; + return true; + } + + switch (br_val) { + case INTEL_BOOTROM_STATUS_NO_KEY_FOUND: + case INTEL_BOOTROM_STATUS_RSA_FAILED: + case INTEL_BOOTROM_STATUS_PAVPC_FAILED: + case INTEL_BOOTROM_STATUS_WOPCM_FAILED: + case INTEL_BOOTROM_STATUS_LOADLOC_FAILED: + case INTEL_BOOTROM_STATUS_JUMP_FAILED: + case INTEL_BOOTROM_STATUS_RC6CTXCONFIG_FAILED: + case INTEL_BOOTROM_STATUS_MPUMAP_INCORRECT: + case INTEL_BOOTROM_STATUS_EXCEPTION: + *
[PATCH 2/2] drm/i915/guc: Allow for very slow GuC loading
From: John Harrison A failure to load the GuC is occasionally observed where the GuC log actually showed that the GuC had loaded just fine. The implication being that the load just took ever so slightly longer than the 200ms timeout. Given that the actual time should be tens of milliseconds at the slowest, this should never happen. So far the issue has generally been caused by a bad IFWI resulting in low frequencies during boot (depsite the KMD requesting max frequency). However, the issue seems to happen more often than one would like. So a) increase the timeout so that the user still gets a working system even in the case of slow load. And b) report the frequency during the load to see if that is the case of the slow down. Signed-off-by: John Harrison --- drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 37 +-- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index 2f5942606913d..72e003f50617d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -12,6 +12,7 @@ #include "gt/intel_gt.h" #include "gt/intel_gt_mcr.h" #include "gt/intel_gt_regs.h" +#include "gt/intel_rps.h" #include "intel_guc_fw.h" #include "intel_guc_print.h" #include "i915_drv.h" @@ -139,9 +140,12 @@ static int guc_wait_ucode(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); struct intel_uncore *uncore = gt->uncore; + ktime_t before, after, delta; bool success; u32 status; - int ret; + int ret, count; + u64 delta_ms; + u32 before_freq; /* * Wait for the GuC to start up. @@ -159,13 +163,32 @@ static int guc_wait_ucode(struct intel_guc *guc) * issues to be resolved. In the meantime bump the timeout to * 200ms. Even at slowest clock, this should be sufficient. And * in the working case, a larger timeout makes no difference. +* +* IFWI updates have also been seen to cause sporadic failures due to +* the requested frequency not being granted and thus the firmware +* load is attempted at minimum frequency. That can lead to load times +* in the seconds range. However, there is a limit on how long an +* individual wait_for() can wait. So wrap it in a loop. */ - ret = wait_for(guc_load_done(uncore, &status, &success), 200); + before_freq = intel_rps_read_actual_frequency(&uncore->gt->rps); + before = ktime_get(); + for (count = 0; count < 20; count++) { + ret = wait_for(guc_load_done(uncore, &status, &success), 1000); + if (!ret || !success) + break; + + guc_dbg(guc, "load still in progress, count = %d, freq = %dMHz\n", + count, intel_rps_read_actual_frequency(&uncore->gt->rps)); + } + after = ktime_get(); + delta = ktime_sub(after, before); + delta_ms = ktime_to_ms(delta); if (ret || !success) { u32 ukernel = REG_FIELD_GET(GS_UKERNEL_MASK, status); u32 bootrom = REG_FIELD_GET(GS_BOOTROM_MASK, status); - guc_info(guc, "load failed: status = 0x%08X, ret = %d\n", status, ret); + guc_info(guc, "load failed: status = 0x%08X, time = %lldms, freq = %dMHz, ret = %d\n", +status, delta_ms, intel_rps_read_actual_frequency(&uncore->gt->rps), ret); guc_info(guc, "load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n", REG_FIELD_GET(GS_MIA_IN_RESET, status), bootrom, ukernel, @@ -206,6 +229,14 @@ static int guc_wait_ucode(struct intel_guc *guc) /* Uncommon/unexpected error, see earlier status code print for details */ if (ret == 0) ret = -ENXIO; + } else if (delta_ms > 200) { + guc_warn(guc, "excessive init time: %lldms! [freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d]\n", +delta_ms, intel_rps_read_actual_frequency(&uncore->gt->rps), +before_freq, status, count, ret); + } else { + guc_dbg(guc, "init took %lldms, freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d\n", + delta_ms, intel_rps_read_actual_frequency(&uncore->gt->rps), + before_freq, status, count, ret); } return ret; -- 2.39.1
[PATCH 0/2] Improvements to GuC load failure handling
From: John Harrison Add more decoding of the GuC load failures. Also include information about GT frequency to see if timeouts are due to a failure to boost the clocks. Finally, increase the timeout to accommodate situations where the clock boost does fail. Signed-off-by: John Harrison John Harrison (2): drm/i915/guc: Improve GuC load error reporting drm/i915/guc: Allow for very slow GuC loading .../gpu/drm/i915/gt/uc/abi/guc_errors_abi.h | 17 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 128 +++--- drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h| 4 +- 3 files changed, 127 insertions(+), 22 deletions(-) -- 2.39.1
Re: [Intel-gfx] [RFC v2 0/5] Waitboost drm syncobj waits
On Fri, Feb 17, 2023 at 12:45 PM Rodrigo Vivi wrote: > > On Fri, Feb 17, 2023 at 09:00:49AM -0800, Rob Clark wrote: > > On Fri, Feb 17, 2023 at 8:03 AM Tvrtko Ursulin > > wrote: > > > > > > > > > On 17/02/2023 14:55, Rob Clark wrote: > > > > On Fri, Feb 17, 2023 at 4:56 AM Tvrtko Ursulin > > > > wrote: > > > >> > > > >> > > > >> On 16/02/2023 18:19, Rodrigo Vivi wrote: > > > >>> On Tue, Feb 14, 2023 at 11:14:00AM -0800, Rob Clark wrote: > > > On Fri, Feb 10, 2023 at 5:07 AM Tvrtko Ursulin > > > wrote: > > > > > > > > From: Tvrtko Ursulin > > > > > > > > In i915 we have this concept of "wait boosting" where we give a > > > > priority boost > > > > for instance to fences which are actively waited upon from > > > > userspace. This has > > > > it's pros and cons and can certainly be discussed at lenght. > > > > However fact is > > > > some workloads really like it. > > > > > > > > Problem is that with the arrival of drm syncobj and a new userspace > > > > waiting > > > > entry point it added, the waitboost mechanism was bypassed. Hence I > > > > cooked up > > > > this mini series really (really) quickly to see if some discussion > > > > can be had. > > > > > > > > It adds a concept of "wait count" to dma fence, which is > > > > incremented for every > > > > explicit dma_fence_enable_sw_signaling and > > > > dma_fence_add_wait_callback (like > > > > dma_fence_add_callback but from explicit/userspace wait paths). > > > > > > I was thinking about a similar thing, but in the context of dma_fence > > > (or rather sync_file) fd poll()ing. How does the kernel > > > differentiate > > > between "housekeeping" poll()ers that don't want to trigger boost but > > > simply know when to do cleanup, and waiters who are waiting with some > > > urgency. I think we could use EPOLLPRI for this purpose. > > > > > > Not sure how that translates to waits via the syncobj. But I think > > > we > > > want to let userspace give some hint about urgent vs housekeeping > > > waits. > > > >>> > > > >>> Should the hint be on the waits, or should the hints be on the > > > >>> executed > > > >>> context? > > > >>> > > > >>> In the end we need some way to quickly ramp-up the frequency to avoid > > > >>> the execution bubbles. > > > >>> > > > >>> waitboost is trying to guess that, but in some cases it guess wrong > > > >>> and waste power. > > > >> > > > >> Do we have a list of workloads which shows who benefits and who loses > > > >> from the current implementation of waitboost? > > > >>> btw, this is something that other drivers might need: > > > >>> > > > >>> https://gitlab.freedesktop.org/drm/amd/-/issues/1500#note_825883 > > > >>> Cc: Alex Deucher > > > >> > > > >> I have several issues with the context hint if it would directly > > > >> influence frequency selection in the "more power" direction. > > > >> > > > >> First of all, assume a context hint would replace the waitboost. Which > > > >> applications would need to set it to restore the lost performance and > > > >> how would they set it? > > > >> > > > >> Then I don't even think userspace necessarily knows. Think of a layer > > > >> like OpenCL. It doesn't really know in advance the profile of > > > >> submissions vs waits. It depends on the CPU vs GPU speed, so hardware > > > >> generation, and the actual size of the workload which can be influenced > > > >> by the application (or user) and not the library. > > > >> > > > >> The approach also lends itself well for the "arms race" where every > > > >> application can say "Me me me, I am the most important workload there > > > >> is!". > > > > > > > > since there is discussion happening in two places: > > > > > > > > https://gitlab.freedesktop.org/drm/intel/-/issues/8014#note_1777433 > > > > > > > > What I think you might want is a ctx boost_mask which lets an app or > > > > driver disable certain boost signals/classes. Where fence waits is > > > > one class of boost, but hypothetical other signals like touchscreen > > > > (or other) input events could be another class of boost. A compute > > > > workload might be interested in fence wait boosts but could care less > > > > about input events. > > > > > > I think it can only be apps which could have any chance knowing whether > > > their use of a library is latency sensitive or not. Which means new > > > library extensions and their adoption. So I have some strong reservation > > > that route is feasible. > > > > > > Or we tie with priority which many drivers do. Normal and above gets the > > > boosting and what lowered itself does not (aka SCHED_IDLE/SCHED_BATCH). > > > > yeah, that sounds reasonable. > > > > on that gitlab-issue discussion Emma Anholt was against using the priority > to influence frequency since that should be more about latency. > > or we are talking about something different priority
[pull] amdgpu drm-next-6.3
Hi Dave, Daniel, Fixes for 6.3. The big change here is the splitting of dc_link.c into multiple smaller files. The following changes since commit 69ed0c5d44d72051b13e65384e9d9354c45d5e14: Revert "drm/amd/display: disable S/G display on DCN 3.1.4" (2023-02-03 15:42:42 -0500) are available in the Git repository at: https://gitlab.freedesktop.org/agd5f/linux.git tags/amd-drm-next-6.3-2023-02-17 for you to fetch changes up to 80c6d6804f31451848a3956a70c2bcb1f07cfcb0: drm/amd/display: disable SubVP + DRR to prevent underflow (2023-02-15 22:26:22 -0500) amd-drm-next-6.3-2023-02-17: amdgpu: - GC 11 fixes - Display fixes - Backlight cleanup - SMU13 fixes - SMU7 regression fix - GFX9 sw queues fix - AGP fix for GMC 11 - W1 warning fixes - S/G display fixes - Misc spelling fixes - Driver unload fix - DCN 3.1.4 fixes - Display code reorg fixes - Rotation fixes Alex Deucher (7): drm/amd/pm/smu7: move variables to where they are used drm/amdgpu/gmc11: fix system aperture set when AGP is enabled drm/amd/display: minor cleanup of vm_setup drm/amdgpu: add S/G display parameter Revert "drm/amd/display: disable S/G display on DCN 3.1.2/3" Revert "drm/amd/display: disable S/G display on DCN 2.1.0" Revert "drm/amd/display: disable S/G display on DCN 3.1.5" Alvin Lee (2): drm/amd/display: Set max vratio for prefetch to 7.9 for YUV420 MPO drm/amd/display: Fix prefetch vratio check Anthony Koo (1): drm/amd/display: [FW Promotion] Release 0.0.153.0 Aric Cyr (2): drm/amd/display: 3.2.222 drm/amd/display: Promote DAL to 3.2.223 Arnd Bergmann (2): drm/amdgpu: fix enum odm_combine_mode mismatch drm/amd/display: fix link_validation build failure Arthur Grillo (6): drm/amd/display: Turn global functions into static drm/amd/display: Add function prototypes to headers drm/amd/amdgpu: Add function prototypes to headers drm/amd/display: Add previously missing includes drm/amd/display: Fix excess arguments on kernel-doc drm/amd/display: Make variables declaration inside ifdef guard Aurabindo Pillai (3): drm/amd/display: Fix null pointer deref error on rotation drm/amd/display: fix k1 k2 divider programming for phantom streams drm/amd/display: disable SubVP + DRR to prevent underflow Ayush Gupta (1): drm/amd/display: temporary fix for page faulting Bhawanpreet Lakha (1): drm/amd/display: Add support for multiple overlay planes Charlene Liu (1): drm/amd/display: add NULL pointer check Colin Ian King (1): drm/amd/display: Fix spelling mistakes of function name in error message Daniel Miess (1): Revert "drm/amd/display: Correct bw_params population" Deepak R Varma (2): drm/amd/display: Remove duplicate/repeating expression drm/amd/display: Remove duplicate/repeating expressions Evan Quan (3): drm/amd/pm: add SMU 13.0.7 missing GetPptLimit message mapping drm/amd/pm: bump SMU 13.0.0 driver_if header version drm/amd/pm: bump SMU 13.0.7 driver_if header version Friedrich Vock (1): drm/amdgpu: Use the TGID for trace_amdgpu_vm_update_ptes Guilherme G. Piccoli (1): drm/amdgpu/fence: Fix oops due to non-matching drm_sched init/fini Hamza Mahfooz (2): drm/amd/display: fix read errors pertaining to dp_lttpr_status_show() drm/amd/display: don't call dc_interrupt_set() for disabled crtcs Hans de Goede (1): drm/amd/display: Drop CONFIG_BACKLIGHT_CLASS_DEVICE ifdefs Jack Xiao (1): drm/amd/amdgpu: fix warning during suspend Jane Jian (1): drm/amdgpu/smu: skip pptable init under sriov JesseZhang (1): amd/amdgpu: remove test ib on hw ring Jiapeng Chong (2): drm/amd/display: Remove the unused variable ds_port drm/amd/display: Remove the unused variable pre_connection_type Jingwen Zhu (1): drm/amd/display: avoid disable otg when dig was disabled Jonathan Gray (2): drm/amd/display: avoid unaligned access warnings drm/amd/pm: avoid unaligned access warnings Kenneth Feng (2): drm/amd/amdgpu: enable athub cg 11.0.3 drm/amd/amdgpu: implement mode2 reset on smu_v13_0_10 Kent Russell (2): drm/amdgpu: Fix incorrect filenames in sysfs comments drm/amdgpu: Add unique_id support for GC 11.0.1/2 Leo (Hanghong) Ma (2): drm/amd/display: Add HDMI manufacturer OUI and device id read drm/amd/display: Fix FreeSync active bit issue Leo Li (1): drm/amd/display: Fail atomic_check early on normalize_zpos error Lijo Lazar (1): drm/amd/pm: Allocate dummy table only if needed Ma Jun (1): drm/amdgpu: Fix the warning info when unload or remove amdgpu Melissa Wen (7): drm/amd/display: fix cursor offset on rotation 180 drm/amd/display: ident braces in dcn30_acquire_post_bldn_3dl
[PATCH v2 2/2] drm/i915/guc: Fix missing return code checks in submission init
From: John Harrison The CI results for the 'fast request' patch set (enables error return codes for fire-and-forget H2G messages) hit an issue with the KMD sending context submission requests on an invalid context. That was caused by a fault injection probe failing the context creation of a kernel context. However, there was no return code checking on any of the kernel context registration paths. So the driver kept going and tried to use the kernel context for the record defaults process. This would not cause any actual problems. The invalid requests would be rejected by GuC and ultimately the start up sequence would correctly wedge due to the context creation failure. But fixing the issue correctly rather ignoring it means we won't get CI complaining when the fast request patch lands and enables the extra error checking. So fix it by checking for errors and aborting as appropriate when creating kernel contexts. While at it, clean up some other submission init related failure cleanup paths. Also, rename guc_init_lrc_mapping to guc_init_submission as the former name hasn't been valid in a long time. v2: Add another wrapper to keep the flow balanced (Daniele) Signed-off-by: John Harrison --- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 97 ++- .../gpu/drm/i915/gt/uc/intel_guc_submission.h | 2 +- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 7 +- 3 files changed, 80 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index a04d7049a2c2f..88e881b100cf0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1441,7 +1441,7 @@ static int guc_action_enable_usage_stats(struct intel_guc *guc) return intel_guc_send(guc, action, ARRAY_SIZE(action)); } -static void guc_init_engine_stats(struct intel_guc *guc) +static int guc_init_engine_stats(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); intel_wakeref_t wakeref; @@ -1454,6 +1454,13 @@ static void guc_init_engine_stats(struct intel_guc *guc) guc_err(guc, "Failed to enable usage stats: %pe\n", ERR_PTR(ret)); else guc_enable_busyness_worker(guc); + + return ret; +} + +static void guc_fini_engine_stats(struct intel_guc *guc) +{ + guc_cancel_busyness_worker(guc); } void intel_guc_busyness_park(struct intel_gt *gt) @@ -4109,9 +4116,11 @@ static void guc_set_default_submission(struct intel_engine_cs *engine) engine->submit_request = guc_submit_request; } -static inline void guc_kernel_context_pin(struct intel_guc *guc, - struct intel_context *ce) +static inline int guc_kernel_context_pin(struct intel_guc *guc, +struct intel_context *ce) { + int ret; + /* * Note: we purposefully do not check the returns below because * the registration can only fail if a reset is just starting. @@ -4119,16 +4128,24 @@ static inline void guc_kernel_context_pin(struct intel_guc *guc, * isn't happening and even it did this code would be run again. */ - if (context_guc_id_invalid(ce)) - pin_guc_id(guc, ce); + if (context_guc_id_invalid(ce)) { + ret = pin_guc_id(guc, ce); + + if (ret < 0) + return ret; + } if (!test_bit(CONTEXT_GUC_INIT, &ce->flags)) guc_context_init(ce); - try_context_registration(ce, true); + ret = try_context_registration(ce, true); + if (ret) + unpin_guc_id(guc, ce); + + return ret; } -static inline void guc_init_lrc_mapping(struct intel_guc *guc) +static inline int guc_init_submission(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); struct intel_engine_cs *engine; @@ -4155,9 +4172,17 @@ static inline void guc_init_lrc_mapping(struct intel_guc *guc) struct intel_context *ce; list_for_each_entry(ce, &engine->pinned_contexts_list, - pinned_contexts_link) - guc_kernel_context_pin(guc, ce); + pinned_contexts_link) { + int ret = guc_kernel_context_pin(guc, ce); + + if (ret) { + /* No point in trying to clean up as i915 will wedge on failure */ + return ret; + } + } } + + return 0; } static void guc_release(struct intel_engine_cs *engine) @@ -4400,31 +4425,57 @@ static int guc_init_global_schedule_policy(struct intel_guc *guc) return ret; } -void intel_guc_submission_enable(struct intel_guc *guc) +static void guc_route_semaphores(struct intel_guc *guc, bool to_guc) { s
[PATCH v2 0/2] Clean up some GuC related failure paths
From: John Harrison Improve failure code handling during GuC intialisation. v2: Fix function naming and improve on/off balancing (review feedback from Daniele) Signed-off-by: John Harrison John Harrison (2): drm/i915/guc: Improve clean up of busyness stats worker drm/i915/guc: Fix missing return code checks in submission init .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 133 +- .../gpu/drm/i915/gt/uc/intel_guc_submission.h | 2 +- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 7 +- 3 files changed, 102 insertions(+), 40 deletions(-) -- 2.39.1
[PATCH v2 1/2] drm/i915/guc: Improve clean up of busyness stats worker
From: John Harrison The stats worker thread management was mis-matched between enable/disable call sites. Fix those up. Also, abstract the cancel/enable code into a helper function rather than replicating in multiple places. v2: Rename the helpers and wrap the enable as well as the cancel (review feedback from Daniele). Signed-off-by: John Harrison --- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 38 +++ 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index be495e657d66b..a04d7049a2c2f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1352,6 +1352,16 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) return ns_to_ktime(total); } +static void guc_enable_busyness_worker(struct intel_guc *guc) +{ + mod_delayed_work(system_highpri_wq, &guc->timestamp.work, guc->timestamp.ping_delay); +} + +static void guc_cancel_busyness_worker(struct intel_guc *guc) +{ + cancel_delayed_work_sync(&guc->timestamp.work); +} + static void __reset_guc_busyness_stats(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); @@ -1360,7 +1370,7 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc) unsigned long flags; ktime_t unused; - cancel_delayed_work_sync(&guc->timestamp.work); + guc_cancel_busyness_worker(guc); spin_lock_irqsave(&guc->timestamp.lock, flags); @@ -1416,8 +1426,7 @@ static void guc_timestamp_ping(struct work_struct *wrk) intel_gt_reset_unlock(gt, srcu); - mod_delayed_work(system_highpri_wq, &guc->timestamp.work, -guc->timestamp.ping_delay); + guc_enable_busyness_worker(guc); } static int guc_action_enable_usage_stats(struct intel_guc *guc) @@ -1436,16 +1445,15 @@ static void guc_init_engine_stats(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); intel_wakeref_t wakeref; + int ret; - mod_delayed_work(system_highpri_wq, &guc->timestamp.work, -guc->timestamp.ping_delay); - - with_intel_runtime_pm(>->i915->runtime_pm, wakeref) { - int ret = guc_action_enable_usage_stats(guc); + with_intel_runtime_pm(>->i915->runtime_pm, wakeref) + ret = guc_action_enable_usage_stats(guc); - if (ret) - guc_err(guc, "Failed to enable usage stats: %pe\n", ERR_PTR(ret)); - } + if (ret) + guc_err(guc, "Failed to enable usage stats: %pe\n", ERR_PTR(ret)); + else + guc_enable_busyness_worker(guc); } void intel_guc_busyness_park(struct intel_gt *gt) @@ -1460,7 +1468,7 @@ void intel_guc_busyness_park(struct intel_gt *gt) * and causes an unclaimed register access warning. Cancel the worker * synchronously here. */ - cancel_delayed_work_sync(&guc->timestamp.work); + guc_cancel_busyness_worker(guc); /* * Before parking, we should sample engine busyness stats if we need to. @@ -1487,8 +1495,7 @@ void intel_guc_busyness_unpark(struct intel_gt *gt) spin_lock_irqsave(&guc->timestamp.lock, flags); guc_update_pm_timestamp(guc, &unused); spin_unlock_irqrestore(&guc->timestamp.lock, flags); - mod_delayed_work(system_highpri_wq, &guc->timestamp.work, -guc->timestamp.ping_delay); + guc_enable_busyness_worker(guc); } static inline bool @@ -4408,11 +4415,12 @@ void intel_guc_submission_enable(struct intel_guc *guc) guc_init_global_schedule_policy(guc); } +/* Note: By the time we're here, GuC may have already been reset */ void intel_guc_submission_disable(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); - /* Note: By the time we're here, GuC may have already been reset */ + guc_cancel_busyness_worker(guc); /* Disable and route to host */ if (GRAPHICS_VER(gt->i915) >= 12) -- 2.39.1
Re: [PATCH v4 4/4] drm/msm/dpu: manage DPU resources if CTM is requested
On 13/02/2023 13:11, Kalyan Thota wrote: Allow modeset to be triggered during CTM enable/disable. In the modeset callbacks, DPU resources required for the CTM feature are managed appropriately. Signed-off-by: Kalyan Thota Reviewed-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/msm_atomic.c | 18 ++ drivers/gpu/drm/msm/msm_drv.c| 2 +- drivers/gpu/drm/msm/msm_drv.h| 1 + 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_atomic.c b/drivers/gpu/drm/msm/msm_atomic.c index 1686fbb..e3e607c 100644 --- a/drivers/gpu/drm/msm/msm_atomic.c +++ b/drivers/gpu/drm/msm/msm_atomic.c @@ -179,6 +179,24 @@ static unsigned get_crtc_mask(struct drm_atomic_state *state) return mask; } +int msm_atomic_check(struct drm_device *dev, struct drm_atomic_state *state) +{ + struct drm_crtc_state *old_crtc_state, *new_crtc_state; + struct drm_crtc *crtc; + int i; + I hope this can be gone for good if at some point we have CRTC resource allocation split from encoder resource alloc. + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, + new_crtc_state, i) { + if ((old_crtc_state->ctm && !new_crtc_state->ctm) || + (!old_crtc_state->ctm && new_crtc_state->ctm)) { + new_crtc_state->mode_changed = true; + state->allow_modeset = true; + } + } + + return drm_atomic_helper_check(dev, state); +} + void msm_atomic_commit_tail(struct drm_atomic_state *state) { struct drm_device *dev = state->dev; diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 0759e2d..3221284 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -52,7 +52,7 @@ static const struct drm_mode_config_funcs mode_config_funcs = { .fb_create = msm_framebuffer_create, .output_poll_changed = drm_fb_helper_output_poll_changed, - .atomic_check = drm_atomic_helper_check, + .atomic_check = msm_atomic_check, .atomic_commit = drm_atomic_helper_commit, }; diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index ea80846..7d0243a 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -209,6 +209,7 @@ int msm_atomic_init_pending_timer(struct msm_pending_timer *timer, struct msm_kms *kms, int crtc_idx); void msm_atomic_destroy_pending_timer(struct msm_pending_timer *timer); void msm_atomic_commit_tail(struct drm_atomic_state *state); +int msm_atomic_check(struct drm_device *dev, struct drm_atomic_state *state); struct drm_atomic_state *msm_atomic_state_alloc(struct drm_device *dev); void msm_atomic_state_clear(struct drm_atomic_state *state); void msm_atomic_state_free(struct drm_atomic_state *state); -- With best wishes Dmitry
Re: [PATCH v13 13/13] drm/msm/disp/dpu: update dpu_enc crtc state on crtc enable/disable during self refresh
On 12/02/2023 18:28, Vinod Polimera wrote: Populate the enocder software structure to reflect the updated crtc appropriately during crtc enable/disable for a new commit while taking care of the self refresh transitions when crtc disable is triggered from the drm self refresh library. Signed-off-by: Vinod Polimera --- drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c | 29 + 1 file changed, 25 insertions(+), 4 deletions(-) Reviewed-by: Dmitry Baryshkov -- With best wishes Dmitry
Re: [PATCH v2 03/14] drm/msm/a6xx: Introduce GMU wrapper support
On 17.02.2023 22:44, Dmitry Baryshkov wrote: > On 17/02/2023 23:41, Konrad Dybcio wrote: >> >> >> On 17.02.2023 22:37, Dmitry Baryshkov wrote: >>> On 14/02/2023 19:31, Konrad Dybcio wrote: Some (particularly SMD_RPM, a.k.a non-RPMh) SoCs implement A6XX GPUs but don't implement the associated GMUs. This is due to the fact that the GMU directly pokes at RPMh. Sadly, this means we have to take care of enabling & scaling power rails, clocks and bandwidth ourselves. Reuse existing Adreno-common code and modify the deeply-GMU-infused A6XX code to facilitate these GPUs. This involves if-ing out lots of GMU callbacks and introducing a new type of GMU - GMU wrapper. This is essentially a register region which is convenient to model as a device. We'll use it for managing the GDSCs. >>> >>> Why do you call it a wrapper? >> That's what Qualcomm calls it.. The GMU-less GPUs have (almost) all the >> same GMU GX/CX registers as the real GMUs in this 'wrapper' region, so >> that lets us reuse some code with gmu_(read/write/rmw) calls. >> > > Ack. If you can add this to the commit message, it would be great. Sure! I spent so much time on this that I can't really tell what's obvious and what's not anymore, heh. Konrad >
Re: [PATCH v2 03/14] drm/msm/a6xx: Introduce GMU wrapper support
On 17/02/2023 23:41, Konrad Dybcio wrote: On 17.02.2023 22:37, Dmitry Baryshkov wrote: On 14/02/2023 19:31, Konrad Dybcio wrote: Some (particularly SMD_RPM, a.k.a non-RPMh) SoCs implement A6XX GPUs but don't implement the associated GMUs. This is due to the fact that the GMU directly pokes at RPMh. Sadly, this means we have to take care of enabling & scaling power rails, clocks and bandwidth ourselves. Reuse existing Adreno-common code and modify the deeply-GMU-infused A6XX code to facilitate these GPUs. This involves if-ing out lots of GMU callbacks and introducing a new type of GMU - GMU wrapper. This is essentially a register region which is convenient to model as a device. We'll use it for managing the GDSCs. Why do you call it a wrapper? That's what Qualcomm calls it.. The GMU-less GPUs have (almost) all the same GMU GX/CX registers as the real GMUs in this 'wrapper' region, so that lets us reuse some code with gmu_(read/write/rmw) calls. Ack. If you can add this to the commit message, it would be great. -- With best wishes Dmitry
Re: [PATCH v2 09/14] drm/msm/a6xx: Fix some A619 tunables
On 14/02/2023 19:31, Konrad Dybcio wrote: Adreno 619 expects some tunables to be set differently. Make up for it. Fixes: b7616b5c69e6 ("drm/msm/adreno: Add A619 support") Signed-off-by: Konrad Dybcio --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) Reviewed-by: Dmitry Baryshkov -- With best wishes Dmitry
Re: [PATCH v2 08/14] drm/msm/a6xx: Add A610 support
On 14/02/2023 19:31, Konrad Dybcio wrote: A610 is one of (if not the) lowest-tier SKUs in the A6XX family. It features no GMU, as it's implemented solely on SoCs with SMD_RPM. What's more interesting is that it does not feature a VDDGX line either, being powered solely by VDDCX and has an unfortunate hardware quirk that makes its reset line broken - after a couple of assert/ deassert cycles, it will hang for good and will not wake up again. This GPU requires mesa changes for proper rendering, and lots of them at that. The command streams are quite far away from any other A6XX GPU and hence it needs special care. This patch was validated both by running an (incomplete) downstream mesa with some hacks (frames rendered correctly, though some instructions made the GPU hangcheck which is expected - garbage in, garbage out) and by replaying RD traces captured with the downstream KGSL driver - no crashes there, ever. Add support for this GPU on the kernel side, which comes down to pretty simply adding A612 HWCG tables, altering a few values and adding a special case for handling the reset line. Signed-off-by: Konrad Dybcio Reviewed-by: Dmitry Baryshkov Minor nit below. --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 95 -- drivers/gpu/drm/msm/adreno/adreno_device.c | 13 +++ drivers/gpu/drm/msm/adreno/adreno_gpu.h| 8 +- 3 files changed, 106 insertions(+), 10 deletions(-) [skipped] @@ -1087,18 +1144,26 @@ static int hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A6XX_UCHE_FILTER_CNTL, 0x804); gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, 0x4); - if (adreno_is_a640_family(adreno_gpu) || - adreno_is_a650_family(adreno_gpu)) + if (adreno_is_a640_family(adreno_gpu) || adreno_is_a650_family(adreno_gpu)) { Keep this on two lines please. gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x02000140); - else + gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c); + } else if (adreno_is_a610(adreno_gpu)) { + gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x00800060); + gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x40201b16); + } else { gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x01c0); - gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c); + gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c); + } [skipped the rest] -- With best wishes Dmitry
Re: [PATCH v2 03/14] drm/msm/a6xx: Introduce GMU wrapper support
On 17.02.2023 22:37, Dmitry Baryshkov wrote: > On 14/02/2023 19:31, Konrad Dybcio wrote: >> Some (particularly SMD_RPM, a.k.a non-RPMh) SoCs implement A6XX GPUs >> but don't implement the associated GMUs. This is due to the fact that >> the GMU directly pokes at RPMh. Sadly, this means we have to take care >> of enabling & scaling power rails, clocks and bandwidth ourselves. >> >> Reuse existing Adreno-common code and modify the deeply-GMU-infused >> A6XX code to facilitate these GPUs. This involves if-ing out lots >> of GMU callbacks and introducing a new type of GMU - GMU wrapper. >> This is essentially a register region which is convenient to model >> as a device. We'll use it for managing the GDSCs. > > Why do you call it a wrapper? That's what Qualcomm calls it.. The GMU-less GPUs have (almost) all the same GMU GX/CX registers as the real GMUs in this 'wrapper' region, so that lets us reuse some code with gmu_(read/write/rmw) calls. > >> >> Signed-off-by: Konrad Dybcio >> --- >> drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 51 - >> drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 198 +--- >> drivers/gpu/drm/msm/adreno/a6xx_gpu.h | 1 + >> drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 14 +- >> drivers/gpu/drm/msm/adreno/adreno_gpu.h | 6 + >> 5 files changed, 233 insertions(+), 37 deletions(-) >> >> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c >> b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c >> index 90e636dcdd5b..5aa9f3ef41c2 100644 >> --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c >> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c >> @@ -1474,6 +1474,7 @@ static int a6xx_gmu_get_irq(struct a6xx_gmu *gmu, >> struct platform_device *pdev, >> void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu) >> { >> + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; >> struct a6xx_gmu *gmu = &a6xx_gpu->gmu; >> struct platform_device *pdev = to_platform_device(gmu->dev); >> @@ -1493,10 +1494,12 @@ void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu) >> gmu->mmio = NULL; >> gmu->rscc = NULL; >> - a6xx_gmu_memory_free(gmu); >> + if (!adreno_has_gmu_wrapper(adreno_gpu)) { >> + a6xx_gmu_memory_free(gmu); >> - free_irq(gmu->gmu_irq, gmu); >> - free_irq(gmu->hfi_irq, gmu); >> + free_irq(gmu->gmu_irq, gmu); >> + free_irq(gmu->hfi_irq, gmu); >> + } >> /* Drop reference taken in of_find_device_by_node */ >> put_device(gmu->dev); >> @@ -1504,6 +1507,48 @@ void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu) >> gmu->initialized = false; >> } >> +int a6xx_gmu_wrapper_init(struct a6xx_gpu *a6xx_gpu, struct device_node >> *node) >> +{ >> + struct platform_device *pdev = of_find_device_by_node(node); >> + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; >> + int ret; >> + >> + if (!pdev) >> + return -ENODEV; >> + >> + gmu->dev = &pdev->dev; >> + >> + of_dma_configure(gmu->dev, node, true); >> + >> + pm_runtime_enable(gmu->dev); >> + >> + /* Mark legacy for manual SPTPRAC control */ >> + gmu->legacy = true; >> + >> + /* Map the GMU registers */ >> + gmu->mmio = a6xx_gmu_get_mmio(pdev, "gmu"); >> + if (IS_ERR(gmu->mmio)) { >> + ret = PTR_ERR(gmu->mmio); >> + goto err_mmio; >> + } >> + >> + /* Get a link to the GX power domain to reset the GPU */ >> + gmu->gxpd = dev_pm_domain_attach_by_name(gmu->dev, "gx"); >> + >> + gmu->initialized = true; >> + >> + return 0; >> + >> +err_mmio: >> + iounmap(gmu->mmio); >> + ret = -ENODEV; >> + >> + /* Drop reference taken in of_find_device_by_node */ >> + put_device(gmu->dev); >> + >> + return ret; >> +} >> + >> int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) >> { >> struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; >> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c >> b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c >> index 8855d798bbb3..72bf5c9f7ff1 100644 >> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c >> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c >> @@ -20,9 +20,11 @@ static inline bool _a6xx_check_idle(struct msm_gpu *gpu) >> struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); >> struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); >> - /* Check that the GMU is idle */ >> - if (!a6xx_gmu_isidle(&a6xx_gpu->gmu)) >> - return false; >> + if (!adreno_has_gmu_wrapper(adreno_gpu)) { >> + /* Check that the GMU is idle */ >> + if (!a6xx_gmu_isidle(&a6xx_gpu->gmu)) >> + return false; >> + } >> /* Check tha the CX master is idle */ >> if (gpu_read(gpu, REG_A6XX_RBBM_STATUS) & >> @@ -612,13 +614,15 @@ static void a6xx_set_hwcg(struct msm_gpu *gpu, bool >> state) >> return; >> /* Disable SP clock before programming HWCG registers */ >> - gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0); >> + if (!adreno_has_gmu_wrapper(adreno_gpu)) >> + gmu_rmw(gmu, REG_A6XX_GP
Re: [PATCH v2 03/14] drm/msm/a6xx: Introduce GMU wrapper support
On 14/02/2023 19:31, Konrad Dybcio wrote: Some (particularly SMD_RPM, a.k.a non-RPMh) SoCs implement A6XX GPUs but don't implement the associated GMUs. This is due to the fact that the GMU directly pokes at RPMh. Sadly, this means we have to take care of enabling & scaling power rails, clocks and bandwidth ourselves. Reuse existing Adreno-common code and modify the deeply-GMU-infused A6XX code to facilitate these GPUs. This involves if-ing out lots of GMU callbacks and introducing a new type of GMU - GMU wrapper. This is essentially a register region which is convenient to model as a device. We'll use it for managing the GDSCs. Why do you call it a wrapper? Signed-off-by: Konrad Dybcio --- drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 51 - drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 198 +--- drivers/gpu/drm/msm/adreno/a6xx_gpu.h | 1 + drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 14 +- drivers/gpu/drm/msm/adreno/adreno_gpu.h | 6 + 5 files changed, 233 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 90e636dcdd5b..5aa9f3ef41c2 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -1474,6 +1474,7 @@ static int a6xx_gmu_get_irq(struct a6xx_gmu *gmu, struct platform_device *pdev, void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu) { + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; struct a6xx_gmu *gmu = &a6xx_gpu->gmu; struct platform_device *pdev = to_platform_device(gmu->dev); @@ -1493,10 +1494,12 @@ void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu) gmu->mmio = NULL; gmu->rscc = NULL; - a6xx_gmu_memory_free(gmu); + if (!adreno_has_gmu_wrapper(adreno_gpu)) { + a6xx_gmu_memory_free(gmu); - free_irq(gmu->gmu_irq, gmu); - free_irq(gmu->hfi_irq, gmu); + free_irq(gmu->gmu_irq, gmu); + free_irq(gmu->hfi_irq, gmu); + } /* Drop reference taken in of_find_device_by_node */ put_device(gmu->dev); @@ -1504,6 +1507,48 @@ void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu) gmu->initialized = false; } +int a6xx_gmu_wrapper_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) +{ + struct platform_device *pdev = of_find_device_by_node(node); + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + int ret; + + if (!pdev) + return -ENODEV; + + gmu->dev = &pdev->dev; + + of_dma_configure(gmu->dev, node, true); + + pm_runtime_enable(gmu->dev); + + /* Mark legacy for manual SPTPRAC control */ + gmu->legacy = true; + + /* Map the GMU registers */ + gmu->mmio = a6xx_gmu_get_mmio(pdev, "gmu"); + if (IS_ERR(gmu->mmio)) { + ret = PTR_ERR(gmu->mmio); + goto err_mmio; + } + + /* Get a link to the GX power domain to reset the GPU */ + gmu->gxpd = dev_pm_domain_attach_by_name(gmu->dev, "gx"); + + gmu->initialized = true; + + return 0; + +err_mmio: + iounmap(gmu->mmio); + ret = -ENODEV; + + /* Drop reference taken in of_find_device_by_node */ + put_device(gmu->dev); + + return ret; +} + int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) { struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 8855d798bbb3..72bf5c9f7ff1 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -20,9 +20,11 @@ static inline bool _a6xx_check_idle(struct msm_gpu *gpu) struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); - /* Check that the GMU is idle */ - if (!a6xx_gmu_isidle(&a6xx_gpu->gmu)) - return false; + if (!adreno_has_gmu_wrapper(adreno_gpu)) { + /* Check that the GMU is idle */ + if (!a6xx_gmu_isidle(&a6xx_gpu->gmu)) + return false; + } /* Check tha the CX master is idle */ if (gpu_read(gpu, REG_A6XX_RBBM_STATUS) & @@ -612,13 +614,15 @@ static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state) return; /* Disable SP clock before programming HWCG registers */ - gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0); + if (!adreno_has_gmu_wrapper(adreno_gpu)) + gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0); for (i = 0; (reg = &adreno_gpu->info->hwcg[i], reg->offset); i++) gpu_write(gpu, reg->offset, state ? reg->value : 0); /* Enable SP clock */ - gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1); + if (!adreno_has_gmu_wrapper(adreno_gpu)) + gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1);
Re: [PATCH v2 14/14] drm/msm/a6xx: Add A610 speedbin support
On 14/02/2023 19:31, Konrad Dybcio wrote: A610 is implemented on at least three SoCs: SM6115 (bengal), SM6125 (trinket) and SM6225 (khaje). Trinket does not support speed binning (only a single SKU exists) and we don't yet support khaje upstream. Hence, add a fuse mapping table for bengal to allow for per-chip frequency limiting. Signed-off-by: Konrad Dybcio --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 27 +++ 1 file changed, 27 insertions(+) Reviewed-by: Dmitry Baryshkov -- With best wishes Dmitry
Re: [PATCH v2 13/14] drm/msm/a6xx: Add A619_holi speedbin support
On 14/02/2023 19:31, Konrad Dybcio wrote: A619_holi is implemented on at least two SoCs: SM4350 (holi) and SM6375 (blair). This is what seems to be a first occurrence of this happening, but it's easy to overcome by guarding the SoC-specific fuse values with of_machine_is_compatible(). Do just that to enable frequency limiting on these SoCs. Signed-off-by: Konrad Dybcio --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 31 +++ 1 file changed, 31 insertions(+) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index ffe0fd431a76..94b4d93619ed 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -2094,6 +2094,34 @@ static u32 a618_get_speed_bin(u32 fuse) return UINT_MAX; } +static u32 a619_holi_get_speed_bin(u32 fuse) +{ + /* +* There are (at least) two SoCs implementing A619_holi: SM4350 (holi) +* and SM6375 (blair). Limit the fuse matching to the corresponding +* SoC to prevent bogus frequency setting (as improbable as it may be, +* given unexpected fuse values are.. unexpected! But still possible.) +*/ + + if (fuse == 0) + return 0; + + if (of_machine_is_compatible("qcom,sm4350")) { + if (fuse == 138) + return 1; + else if (fuse == 92) + return 2; + } else if (of_machine_is_compatible("qcom,sm6375")) { + if (fuse == 190) + return 1; + else if (fuse == 177) + return 2; Ugh. + } else + pr_warn("Unknown SoC implementing A619_holi!\n"); + + return UINT_MAX; +} + static u32 a619_get_speed_bin(u32 fuse) { if (fuse == 0) @@ -2153,6 +2181,9 @@ static u32 fuse_to_supp_hw(struct device *dev, struct adreno_rev rev, u32 fuse) if (adreno_cmp_rev(ADRENO_REV(6, 1, 8, ANY_ID), rev)) val = a618_get_speed_bin(fuse); + else if (adreno_cmp_rev(ADRENO_REV(6, 1, 9, 1), rev)) I really think it begs to have && !of_find_property(dev->of_node, "qcom,gmu") here. + val = a619_holi_get_speed_bin(fuse); + else if (adreno_cmp_rev(ADRENO_REV(6, 1, 9, ANY_ID), rev)) val = a619_get_speed_bin(fuse); -- With best wishes Dmitry
Re: [PATCH v2 1/2] dt-bindings: display/msm: dsi-controller-main: Fix deprecated QCM2290 compatible
On 17/02/2023 21:23, Konrad Dybcio wrote: On 17.02.2023 22:20, Bryan O'Donoghue wrote: On 17/02/2023 21:16, Konrad Dybcio wrote: Correct, but QCM2290 is not supported upstream yet. SM6115 (a different SoC) however is, but it used the qcm2290 compatible as it was a convenient hack to get the DSI host ID recognized based on the (identical-to-qcm2290) base register without additional driver changes. We're now trying to untangle that mess.. Gand so what we want documented is: compatible = "qcom,qcs2290-dsi-ctrl", qcom,mdss-dsi-ctrl"; qcm* yes, this became documented with your original cleanup compatible = "qcom,sm6115-dsi-ctrl", qcom,mdss-dsi-ctrl"; and yes this became documented (well, in the DSI binding) in my other patch series and is finished being documented in this one with the old compatible = "qcom,dsi-ctrl-6g-qcm2290"; clanger continuing to be deprecated. correct, we still have to note it but keep it deprecated Konrad --- bod Cool. That maps to my understanding & the intention of the deprecation. --- bod
Re: [PATCH v2 1/2] dt-bindings: display/msm: dsi-controller-main: Fix deprecated QCM2290 compatible
On 17.02.2023 22:20, Bryan O'Donoghue wrote: > On 17/02/2023 21:16, Konrad Dybcio wrote: >> Correct, but QCM2290 is not supported upstream yet. >> >> SM6115 (a different SoC) however is, but it used the qcm2290 compatible >> as it was a convenient hack to get the DSI host ID recognized based on >> the (identical-to-qcm2290) base register without additional driver changes. >> We're now trying to untangle that mess.. > > Gand so what we want documented is: > > compatible = "qcom,qcs2290-dsi-ctrl", qcom,mdss-dsi-ctrl"; qcm* yes, this became documented with your original cleanup > compatible = "qcom,sm6115-dsi-ctrl", qcom,mdss-dsi-ctrl"; and yes this became documented (well, in the DSI binding) in my other patch series and is finished being documented in this one > > with the old compatible = "qcom,dsi-ctrl-6g-qcm2290"; clanger continuing to > be deprecated. correct, we still have to note it but keep it deprecated Konrad > > --- > bod
Re: [PATCH v2 12/14] drm/msm/a6xx: Use "else if" in GPU speedbin rev matching
On 14/02/2023 19:31, Konrad Dybcio wrote: The GPU can only be one at a time. Turn a series of ifs into if + elseifs to save some CPU cycles. Signed-off-by: Konrad Dybcio Reviewed-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) -- With best wishes Dmitry
Re: [PATCH v2 07/14] drm/msm/a6xx: Add support for A619_holi
On 17.02.2023 22:19, Dmitry Baryshkov wrote: > On 14/02/2023 19:31, Konrad Dybcio wrote: >> A619_holi is a GMU-less variant of the already-supported A619 GPU. >> It's present on at least SM4350 (holi) and SM6375 (blair). No mesa >> changes are required. Add the required kernel-side support for it. >> >> Signed-off-by: Konrad Dybcio >> --- >> drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 37 +- >> drivers/gpu/drm/msm/adreno/adreno_device.c | 13 >> drivers/gpu/drm/msm/adreno/adreno_gpu.h | 5 +++ >> 3 files changed, 47 insertions(+), 8 deletions(-) >> >> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c >> b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c >> index 75cf94b03c29..c168712a0dc4 100644 >> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c >> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c >> @@ -614,14 +614,14 @@ static void a6xx_set_hwcg(struct msm_gpu *gpu, bool >> state) >> return; >> /* Disable SP clock before programming HWCG registers */ >> - if (!adreno_has_gmu_wrapper(adreno_gpu)) >> + if ((!adreno_has_gmu_wrapper(adreno_gpu) || >> adreno_is_a619_holi(adreno_gpu))) > > Extra parenthesis made me interpret this incorrectly. Maybe you can remove > them and spit the condition onto two lines? Because my first interpretation > was: > if (!(has_gmu_wrapper || a619_holi)). Yeah, I agree this is confusing.. will fix. > > >> gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0); >> for (i = 0; (reg = &adreno_gpu->info->hwcg[i], reg->offset); i++) >> gpu_write(gpu, reg->offset, state ? reg->value : 0); >> /* Enable SP clock */ >> - if (!adreno_has_gmu_wrapper(adreno_gpu)) >> + if ((!adreno_has_gmu_wrapper(adreno_gpu) || >> adreno_is_a619_holi(adreno_gpu))) >> gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1); >> gpu_write(gpu, REG_A6XX_RBBM_CLOCK_CNTL, state ? clock_cntl_on : 0); >> @@ -1007,7 +1007,12 @@ static int hw_init(struct msm_gpu *gpu) >> } >> /* Clear GBIF halt in case GX domain was not collapsed */ >> - if (a6xx_has_gbif(adreno_gpu)) { >> + if (adreno_is_a619_holi(adreno_gpu)) { >> + gpu_write(gpu, REG_A6XX_GBIF_HALT, 0); >> + gpu_write(gpu, 0x18, 0); >> + /* Let's make extra sure that the GPU can access the memory.. */ >> + mb(); >> + } else if (a6xx_has_gbif(adreno_gpu)) { >> gpu_write(gpu, REG_A6XX_GBIF_HALT, 0); >> gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 0); >> /* Let's make extra sure that the GPU can access the memory.. */ >> @@ -1016,6 +1021,9 @@ static int hw_init(struct msm_gpu *gpu) >> gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0); >> + if (adreno_is_a619_holi(adreno_gpu)) >> + a6xx_sptprac_enable(gmu); >> + >> /* >> * Disable the trusted memory range - we don't actually supported >> secure >> * memory rendering at this point in time and we don't want to block >> off >> @@ -1293,7 +1301,8 @@ static void a6xx_dump(struct msm_gpu *gpu) >> #define GBIF_CLIENT_HALT_MASK BIT(0) >> #define GBIF_ARB_HALT_MASK BIT(1) >> #define VBIF_RESET_ACK_TIMEOUT 100 >> -#define VBIF_RESET_ACK_MASK 0x00f0 >> +#define VBIF_RESET_ACK_MASK 0xF0 >> +#define GPR0_GBIF_HALT_REQUEST 0x1E0 >> static void a6xx_recover(struct msm_gpu *gpu) >> { >> @@ -1350,10 +1359,16 @@ static void a6xx_recover(struct msm_gpu *gpu) >> /* Software-reset the GPU */ >> if (adreno_has_gmu_wrapper(adreno_gpu)) { >> - /* Halt the GX side of GBIF */ >> - gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, GBIF_GX_HALT_MASK); >> - spin_until(gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT_ACK) & >> - GBIF_GX_HALT_MASK); >> + if (adreno_is_a619_holi(adreno_gpu)) { >> + gpu_write(gpu, 0x18, GPR0_GBIF_HALT_REQUEST); >> + spin_until((gpu_read(gpu, REG_A6XX_RBBM_VBIF_GX_RESET_STATUS) & >> + (VBIF_RESET_ACK_MASK)) == VBIF_RESET_ACK_MASK); >> + } else { >> + /* Halt the GX side of GBIF */ >> + gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, GBIF_GX_HALT_MASK); >> + spin_until(gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT_ACK) & >> + GBIF_GX_HALT_MASK); >> + } >> /* Halt new client requests on GBIF */ >> gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK); >> @@ -1763,6 +1778,9 @@ static int a6xx_pm_resume(struct msm_gpu *gpu) >> if (ret) >> return ret; >> + if (adreno_is_a619_holi(adreno_gpu)) >> + a6xx_sptprac_enable(gmu); >> + >> mutex_unlock(&a6xx_gpu->gmu.lock); >> msm_devfreq_resume(gpu); >> @@ -1795,6 +1813,9 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu) >> mutex_lock(&a6xx_gpu->gmu.lock); >> + if (adreno_is_a619_holi(adreno_gpu)) >> + a6xx_sptprac_disable(gmu); >> + >> ret = clk_prepare_
Re: [PATCH v2 1/2] dt-bindings: display/msm: dsi-controller-main: Fix deprecated QCM2290 compatible
On 17/02/2023 21:16, Konrad Dybcio wrote: Correct, but QCM2290 is not supported upstream yet. SM6115 (a different SoC) however is, but it used the qcm2290 compatible as it was a convenient hack to get the DSI host ID recognized based on the (identical-to-qcm2290) base register without additional driver changes. We're now trying to untangle that mess.. Gand so what we want documented is: compatible = "qcom,qcs2290-dsi-ctrl", qcom,mdss-dsi-ctrl"; compatible = "qcom,sm6115-dsi-ctrl", qcom,mdss-dsi-ctrl"; with the old compatible = "qcom,dsi-ctrl-6g-qcm2290"; clanger continuing to be deprecated. --- bod
Re: [PATCH v2 11/14] drm/msm/a6xx: Enable optional icc voting from OPP tables
On 14/02/2023 19:31, Konrad Dybcio wrote: On GMU-equipped GPUs, the GMU requests appropriate bandwidth votes for us. This is however not the case for the other GPUs. Add the dev_pm_opp_of_find_icc_paths() call to let the OPP framework handle bus voting as part of power level setting. Signed-off-by: Konrad Dybcio Reviewed-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index d6b38bfdb3b4..b08ed127f8c4 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -2338,5 +2338,9 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a6xx_fault_handler); + ret = dev_pm_opp_of_find_icc_paths(&pdev->dev, NULL); + if (ret) + return ERR_PTR(ret); + return gpu; } -- With best wishes Dmitry
Re: [PATCH v2 07/14] drm/msm/a6xx: Add support for A619_holi
On 14/02/2023 19:31, Konrad Dybcio wrote: A619_holi is a GMU-less variant of the already-supported A619 GPU. It's present on at least SM4350 (holi) and SM6375 (blair). No mesa changes are required. Add the required kernel-side support for it. Signed-off-by: Konrad Dybcio --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 37 +- drivers/gpu/drm/msm/adreno/adreno_device.c | 13 drivers/gpu/drm/msm/adreno/adreno_gpu.h| 5 +++ 3 files changed, 47 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 75cf94b03c29..c168712a0dc4 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -614,14 +614,14 @@ static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state) return; /* Disable SP clock before programming HWCG registers */ - if (!adreno_has_gmu_wrapper(adreno_gpu)) + if ((!adreno_has_gmu_wrapper(adreno_gpu) || adreno_is_a619_holi(adreno_gpu))) Extra parenthesis made me interpret this incorrectly. Maybe you can remove them and spit the condition onto two lines? Because my first interpretation was: if (!(has_gmu_wrapper || a619_holi)). gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0); for (i = 0; (reg = &adreno_gpu->info->hwcg[i], reg->offset); i++) gpu_write(gpu, reg->offset, state ? reg->value : 0); /* Enable SP clock */ - if (!adreno_has_gmu_wrapper(adreno_gpu)) + if ((!adreno_has_gmu_wrapper(adreno_gpu) || adreno_is_a619_holi(adreno_gpu))) gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1); gpu_write(gpu, REG_A6XX_RBBM_CLOCK_CNTL, state ? clock_cntl_on : 0); @@ -1007,7 +1007,12 @@ static int hw_init(struct msm_gpu *gpu) } /* Clear GBIF halt in case GX domain was not collapsed */ - if (a6xx_has_gbif(adreno_gpu)) { + if (adreno_is_a619_holi(adreno_gpu)) { + gpu_write(gpu, REG_A6XX_GBIF_HALT, 0); + gpu_write(gpu, 0x18, 0); + /* Let's make extra sure that the GPU can access the memory.. */ + mb(); + } else if (a6xx_has_gbif(adreno_gpu)) { gpu_write(gpu, REG_A6XX_GBIF_HALT, 0); gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 0); /* Let's make extra sure that the GPU can access the memory.. */ @@ -1016,6 +1021,9 @@ static int hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0); + if (adreno_is_a619_holi(adreno_gpu)) + a6xx_sptprac_enable(gmu); + /* * Disable the trusted memory range - we don't actually supported secure * memory rendering at this point in time and we don't want to block off @@ -1293,7 +1301,8 @@ static void a6xx_dump(struct msm_gpu *gpu) #define GBIF_CLIENT_HALT_MASK BIT(0) #define GBIF_ARB_HALT_MASKBIT(1) #define VBIF_RESET_ACK_TIMEOUT100 -#define VBIF_RESET_ACK_MASK0x00f0 +#define VBIF_RESET_ACK_MASK0xF0 +#define GPR0_GBIF_HALT_REQUEST 0x1E0 static void a6xx_recover(struct msm_gpu *gpu) { @@ -1350,10 +1359,16 @@ static void a6xx_recover(struct msm_gpu *gpu) /* Software-reset the GPU */ if (adreno_has_gmu_wrapper(adreno_gpu)) { - /* Halt the GX side of GBIF */ - gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, GBIF_GX_HALT_MASK); - spin_until(gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT_ACK) & - GBIF_GX_HALT_MASK); + if (adreno_is_a619_holi(adreno_gpu)) { + gpu_write(gpu, 0x18, GPR0_GBIF_HALT_REQUEST); + spin_until((gpu_read(gpu, REG_A6XX_RBBM_VBIF_GX_RESET_STATUS) & + (VBIF_RESET_ACK_MASK)) == VBIF_RESET_ACK_MASK); + } else { + /* Halt the GX side of GBIF */ + gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, GBIF_GX_HALT_MASK); + spin_until(gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT_ACK) & + GBIF_GX_HALT_MASK); + } /* Halt new client requests on GBIF */ gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK); @@ -1763,6 +1778,9 @@ static int a6xx_pm_resume(struct msm_gpu *gpu) if (ret) return ret; + if (adreno_is_a619_holi(adreno_gpu)) + a6xx_sptprac_enable(gmu); + mutex_unlock(&a6xx_gpu->gmu.lock); msm_devfreq_resume(gpu); @@ -1795,6 +1813,9 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu) mutex_lock(&a6xx_gpu->gmu.lock); + if (adreno_is_a619_holi(adreno_gpu)) + a6xx_sptprac_disable(gmu); + ret = clk_prepare_enable(gpu->ebi1_clk); if (ret) return ret; diff --git a/drivers/gpu/drm/msm/adre
Re: [PATCH v2 1/2] dt-bindings: display/msm: dsi-controller-main: Fix deprecated QCM2290 compatible
On 17.02.2023 22:13, Bryan O'Donoghue wrote: > On 17/02/2023 12:24, Krzysztof Kozlowski wrote: >> First, it would be nice to know what was the intention of Bryan's commit? > > Sorry I've been grazing this thread but, not responding. > > - qcom,dsi-ctrl-6g-qcm2290 > > is non-compliant with qcom,socid-dsi-ctrl which is our desired naming > convention, so that's what the deprecation is about i.e. moving this compat > to "qcom,qcm2290-dsi-ctrl" > > Actually I have the question why we are deciding to go with "sm6115" instead > of "qcm2290" ? > > The stamp on the package you receive from Thundercomm says "qcm2290" not > "sm6115" Correct, but QCM2290 is not supported upstream yet. SM6115 (a different SoC) however is, but it used the qcm2290 compatible as it was a convenient hack to get the DSI host ID recognized based on the (identical-to-qcm2290) base register without additional driver changes. We're now trying to untangle that mess.. Konrad > > ? > > --- > bod > >
Re: [PATCH v2 1/2] dt-bindings: display/msm: dsi-controller-main: Fix deprecated QCM2290 compatible
On 17/02/2023 12:24, Krzysztof Kozlowski wrote: First, it would be nice to know what was the intention of Bryan's commit? Sorry I've been grazing this thread but, not responding. - qcom,dsi-ctrl-6g-qcm2290 is non-compliant with qcom,socid-dsi-ctrl which is our desired naming convention, so that's what the deprecation is about i.e. moving this compat to "qcom,qcm2290-dsi-ctrl" Actually I have the question why we are deciding to go with "sm6115" instead of "qcm2290" ? The stamp on the package you receive from Thundercomm says "qcm2290" not "sm6115" ? --- bod
Re: [PATCH v2 06/14] drm/msm/gpu: Use dev_pm_opp_set_rate for non-GMU GPUs
On 14/02/2023 19:31, Konrad Dybcio wrote: Currently we only utilize the OPP table connected to the GPU for getting (available) frequencies. We do however need to scale the voltage rail(s) accordingly to ensure that we aren't trying to run the GPU at 1GHz with a VDD_LOW vote, as that would result in an otherwise inexplainable hang. Tell the OPP framework that we want to scale the "core" clock and swap out the clk_set_rate to a dev_pm_opp_set_rate in msm_devfreq_target() to enable usage of required-opps and by extension proper voltage level/corner scaling. Signed-off-by: Konrad Dybcio --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 4 drivers/gpu/drm/msm/msm_gpu_devfreq.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index ce6b76c45b6f..15e405e4f977 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -1047,6 +1047,10 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, const char *gpu_name; u32 speedbin; + /* This can only be done here, or devm_pm_opp_set_supported_hw will WARN_ON() */ + if (!IS_ERR(devm_clk_get(dev, "core"))) + devm_pm_opp_set_clkname(dev, "core"); Can we instead move a call to a6xx_set_supported_hw() / check_speed_bin after the adreno_gpu_init() ? It will call msm_gpu_init, which in turn sets gpu->core_clk. Ideally you can call devm_pm_opp_set_clkname() from that function. Or maybe completely drop gpu->core_clk and always use devm_pm_opp_set_clk_rate(). + adreno_gpu->funcs = funcs; adreno_gpu->info = adreno_info(config->rev); adreno_gpu->gmem = adreno_gpu->info->gmem; diff --git a/drivers/gpu/drm/msm/msm_gpu_devfreq.c b/drivers/gpu/drm/msm/msm_gpu_devfreq.c index e27dbf12b5e8..ea70c1c32d94 100644 --- a/drivers/gpu/drm/msm/msm_gpu_devfreq.c +++ b/drivers/gpu/drm/msm/msm_gpu_devfreq.c @@ -48,7 +48,7 @@ static int msm_devfreq_target(struct device *dev, unsigned long *freq, gpu->funcs->gpu_set_freq(gpu, opp, df->suspended); mutex_unlock(&df->lock); } else { - clk_set_rate(gpu->core_clk, *freq); + dev_pm_opp_set_rate(dev, *freq); This is not enough, there are calls to clk_set_rate(gpu->core_clk) in msm_gpu.c which are called from the suspend/resume path. } dev_pm_opp_put(opp); -- With best wishes Dmitry
Re: [PATCH v2 05/14] drm/msm/adreno: Disable has_cached_coherent for A610/A619_holi
On 14/02/2023 19:31, Konrad Dybcio wrote: These SKUs don't support the feature. Disable it to make the GPU stop crashing after almost each and every submission - the received data on the GPU end was simply incomplete in garbled, resulting in almost nothing being executed properly. Signed-off-by: Konrad Dybcio --- drivers/gpu/drm/msm/adreno/adreno_device.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 36f062c7582f..82757f005a1a 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -540,7 +540,13 @@ static int adreno_bind(struct device *dev, struct device *master, void *data) config.rev.minor, config.rev.patchid); priv->is_a2xx = config.rev.core == 2; - priv->has_cached_coherent = config.rev.core >= 6; + + if (config.rev.core >= 6) { + /* Exclude A610 and A619_holi */ + if (!(adreno_cmp_rev(ADRENO_REV(6, 1, 0, ANY_ID), config.rev) || + adreno_cmp_rev(ADRENO_REV(6, 1, 9, 1), config.rev))) + priv->has_cached_coherent = true; + } I'd suggest something like: if (config.rev.core >= 6 && !(info.quirks & ADRENO_QUIRK_NO_CACHE_COHERENT)) priv->has_cache_coherent = true; Let's keep all the information and quirks in a single place. gpu = info->init(drm); if (IS_ERR(gpu)) { -- With best wishes Dmitry
Re: [RFC PATCH 00/20] Initial Xe driver submission
Hi all, [I thought I've sent this out earlier this week, but alas got stuck, kinda bad timing now since I'm out next week but oh well] So xe is a quite substantial thing, and I think we need a clear plan how to land this or it will take forever, and managers will panic. Also I'm not a big fan of "Dave/me reviews everything", we defacto had that for amd's dc/dal and it was not fun. The idea here is how to get everything reviewed without having two people end up somewhat arbitrary as deciders. I've compiled a bunch of topics on what I think the important areas are, first code that should be consistent about new-style render drivers that are aimed for vk/compute userspace as the primary feature driver: - figure out consensus solution for fw scheduler and drm/sched frontend among interested driver parties (probably xe, amdgpu, nouveau, new panfrost) - for the interface itself it might be good to have the drm_gpu_scheduler as the single per-hw-engine driver api object (but internally a new structure), while renaming the current drm_gpu_scheduler to drm_gpu_sched_internal. That way I think we can address the main critique of the current xe scheduler plan - keep the drm_gpu_sched_internal : drm_sched_entity 1:1 relationship for fw scheduler - keep the driver api relationship of drm_gpu_scheduler : drm_sched_entity 1:n, the api functions simply iterate over a mutex protect list of internal schedulers. this should also help drivers with locking mistakes around setup/teardown and gpu reset. - drivers select with a flag or something between the current mode (where the drm_gpu_sched_internal is attached to the drm_gpu_scheduler api object) or the new fw scheduler mode (where drm_gpu_sched_internal is attached to the drm_sched_entity) - overall still no fundamental changes (like the current patches) to drm/sched data structures and algorithms. But unlike the current patches we keep the possibility open for eventual refactoring without having to again refactor all the drivers. Even better, we can delay such refactoring until we have a handful of real-word drivers test-driving this all so we know we actually do the right thing. This should allow us to address all the fairness/efficiency/whatever concerns that have been floating around without having to fix them all up upfront, before we actually know what needs to be fixed. - the generic scheduler code should also including the handling of endless compute contexts, with the minimal scaffolding for preempt-ctx fences (probably on the drm_sched_entity) and making sure drm/sched can cope with the lack of job completion fence. This is very minimal amounts of code, but it helps a lot for cross-driver review if this works the same (with the same locking and all that) for everyone. Ideally this gets extracted from amdkfd, but as long as it's going to be used by all drivers supporting endless/compute context going forward it's good enough. - I'm assuming this also means Matt Brost will include a patch to add himself as drm/sched reviewer in MAINTAINERS, or at least something like that - adopt the gem_exec/vma helpers. again we probably want consensus here among the same driver projects. I don't care whether these helpers specify the ioctl structs or not, but they absolutely need to enforce the overall locking scheme for all major structs and list (so vm and vma). - we also should have cross-driver consensus on async vm_bind support. I think everyone added in-syncobj support, the real fun is probably more in/out userspace memory fences (and personally I'm still not sure that's a good idea but ... *eh*). I think cross driver consensus on how this should work (ideally with helper support so people don't get it wrong in all the possible ways) would be best. - this also means some userptr integration and some consensus how userptr should work for vm_bind across drivers. I don't think allowing drivers to reinvent that wheel is a bright idea, there's just a bit too much to get wrong here. - for some of these the consensus might land on more/less shared code than what I sketched out above, the important part really is that we have consensus on these. Kinda similar to how the atomic kms infrastructure move a _lot_ more of the code back into drivers, because they really just needed the flexibility to program the hw correctly. Right now we definitely don't have enough shared code, for sure with i915-gem, but we also need to make sure we're not overcorrecting too badly (a bit of overcorrecting generally doesn't hurt). All the above will make sure that the driver overall is in concepts and design aligned with the overall community direction, but I think it'd still be good if someone outside of the intel gpu group reviews the driver code itself. Last time we had a huge driver submission (amd's DC/DAL) this fell on Dave&me, but this time around I think we have
Re: [PATCH v2 02/14] drm/msm/a6xx: Extend UBWC config
On 17.02.2023 21:46, Dmitry Baryshkov wrote: > On 14/02/2023 19:31, Konrad Dybcio wrote: >> Port setting min_access_length, ubwc_mode and upper_bit from downstream. >> Values were validated using downstream device trees for SM8[123]50 and >> left default (as per downstream) elsewhere. >> >> Signed-off-by: Konrad Dybcio >> --- >> drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 29 +++ >> 1 file changed, 21 insertions(+), 8 deletions(-) >> >> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c >> b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c >> index c5f5d0bb3fdc..8855d798bbb3 100644 >> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c >> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c >> @@ -786,17 +786,25 @@ static void a6xx_set_cp_protect(struct msm_gpu *gpu) >> static void a6xx_set_ubwc_config(struct msm_gpu *gpu) >> { >> struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); >> - u32 lower_bit = 2; >> + u32 lower_bit = 1; > > Any reason to change the default value here? > If it is to match chipsets you are adding, it might be worth splitting this > change to that patch. Not really now that I think about it, especially since the correct default value should be zero: -- part of msm-4.19 -- bit = adreno_dev->highest_bank_bit ? adreno_dev->highest_bank_bit - 13 : 0; lower_bit = bit & 0x3; upper_bit = (bit >> 0x2) & 1; where adreno_dev->highest_bank_bit is read from the dt property "qcom,highest-bank-bit" Anyway, I should be able to verify it for all the SoCs which we support. Konrad > >> + u32 upper_bit = 0; >> u32 amsbc = 0; >> u32 rgb565_predicator = 0; >> u32 uavflagprd_inv = 0; >> + u32 min_acc_len = 0; >> + u32 ubwc_mode = 0; >> /* a618 is using the hw default values */ >> if (adreno_is_a618(adreno_gpu)) >> return; >> - if (adreno_is_a640_family(adreno_gpu)) >> + if (adreno_is_a630(adreno_gpu)) >> + lower_bit = 2; >> + >> + if (adreno_is_a640_family(adreno_gpu)) { >> amsbc = 1; >> + lower_bit = 2; >> + } >> if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu)) { >> /* TODO: get ddr type from bootloader and use 2 for LPDDR4 */ >> @@ -807,18 +815,23 @@ static void a6xx_set_ubwc_config(struct msm_gpu *gpu) >> } >> if (adreno_is_7c3(adreno_gpu)) { >> - lower_bit = 1; >> amsbc = 1; >> rgb565_predicator = 1; >> uavflagprd_inv = 2; >> } >> gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL, >> - rgb565_predicator << 11 | amsbc << 4 | lower_bit << 1); >> - gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, lower_bit << 1); >> - gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, >> - uavflagprd_inv << 4 | lower_bit << 1); >> - gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, lower_bit << 21); >> + rgb565_predicator << 11 | upper_bit << 10 | amsbc << 4 | >> + min_acc_len << 3 | lower_bit << 1 | ubwc_mode); >> + >> + gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, upper_bit << 4 | >> + min_acc_len << 3 | lower_bit << 1 | ubwc_mode); >> + >> + gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, upper_bit << 10 | >> + uavflagprd_inv << 4 | min_acc_len << 3 | >> + lower_bit << 1 | ubwc_mode); >> + >> + gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, min_acc_len << 23 | lower_bit >> << 21); >> } >> static int a6xx_cp_init(struct msm_gpu *gpu) >
Re: [PATCH v2 02/14] drm/msm/a6xx: Extend UBWC config
On 14/02/2023 19:31, Konrad Dybcio wrote: Port setting min_access_length, ubwc_mode and upper_bit from downstream. Values were validated using downstream device trees for SM8[123]50 and left default (as per downstream) elsewhere. Signed-off-by: Konrad Dybcio --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 29 +++ 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index c5f5d0bb3fdc..8855d798bbb3 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -786,17 +786,25 @@ static void a6xx_set_cp_protect(struct msm_gpu *gpu) static void a6xx_set_ubwc_config(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - u32 lower_bit = 2; + u32 lower_bit = 1; Any reason to change the default value here? If it is to match chipsets you are adding, it might be worth splitting this change to that patch. + u32 upper_bit = 0; u32 amsbc = 0; u32 rgb565_predicator = 0; u32 uavflagprd_inv = 0; + u32 min_acc_len = 0; + u32 ubwc_mode = 0; /* a618 is using the hw default values */ if (adreno_is_a618(adreno_gpu)) return; - if (adreno_is_a640_family(adreno_gpu)) + if (adreno_is_a630(adreno_gpu)) + lower_bit = 2; + + if (adreno_is_a640_family(adreno_gpu)) { amsbc = 1; + lower_bit = 2; + } if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu)) { /* TODO: get ddr type from bootloader and use 2 for LPDDR4 */ @@ -807,18 +815,23 @@ static void a6xx_set_ubwc_config(struct msm_gpu *gpu) } if (adreno_is_7c3(adreno_gpu)) { - lower_bit = 1; amsbc = 1; rgb565_predicator = 1; uavflagprd_inv = 2; } gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL, - rgb565_predicator << 11 | amsbc << 4 | lower_bit << 1); - gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, lower_bit << 1); - gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, - uavflagprd_inv << 4 | lower_bit << 1); - gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, lower_bit << 21); + rgb565_predicator << 11 | upper_bit << 10 | amsbc << 4 | + min_acc_len << 3 | lower_bit << 1 | ubwc_mode); + + gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, upper_bit << 4 | + min_acc_len << 3 | lower_bit << 1 | ubwc_mode); + + gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, upper_bit << 10 | + uavflagprd_inv << 4 | min_acc_len << 3 | + lower_bit << 1 | ubwc_mode); + + gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, min_acc_len << 23 | lower_bit << 21); } static int a6xx_cp_init(struct msm_gpu *gpu) -- With best wishes Dmitry
Re: [Intel-gfx] [RFC v2 0/5] Waitboost drm syncobj waits
On Fri, Feb 17, 2023 at 09:00:49AM -0800, Rob Clark wrote: > On Fri, Feb 17, 2023 at 8:03 AM Tvrtko Ursulin > wrote: > > > > > > On 17/02/2023 14:55, Rob Clark wrote: > > > On Fri, Feb 17, 2023 at 4:56 AM Tvrtko Ursulin > > > wrote: > > >> > > >> > > >> On 16/02/2023 18:19, Rodrigo Vivi wrote: > > >>> On Tue, Feb 14, 2023 at 11:14:00AM -0800, Rob Clark wrote: > > On Fri, Feb 10, 2023 at 5:07 AM Tvrtko Ursulin > > wrote: > > > > > > From: Tvrtko Ursulin > > > > > > In i915 we have this concept of "wait boosting" where we give a > > > priority boost > > > for instance to fences which are actively waited upon from userspace. > > > This has > > > it's pros and cons and can certainly be discussed at lenght. However > > > fact is > > > some workloads really like it. > > > > > > Problem is that with the arrival of drm syncobj and a new userspace > > > waiting > > > entry point it added, the waitboost mechanism was bypassed. Hence I > > > cooked up > > > this mini series really (really) quickly to see if some discussion > > > can be had. > > > > > > It adds a concept of "wait count" to dma fence, which is incremented > > > for every > > > explicit dma_fence_enable_sw_signaling and > > > dma_fence_add_wait_callback (like > > > dma_fence_add_callback but from explicit/userspace wait paths). > > > > I was thinking about a similar thing, but in the context of dma_fence > > (or rather sync_file) fd poll()ing. How does the kernel differentiate > > between "housekeeping" poll()ers that don't want to trigger boost but > > simply know when to do cleanup, and waiters who are waiting with some > > urgency. I think we could use EPOLLPRI for this purpose. > > > > Not sure how that translates to waits via the syncobj. But I think we > > want to let userspace give some hint about urgent vs housekeeping > > waits. > > >>> > > >>> Should the hint be on the waits, or should the hints be on the executed > > >>> context? > > >>> > > >>> In the end we need some way to quickly ramp-up the frequency to avoid > > >>> the execution bubbles. > > >>> > > >>> waitboost is trying to guess that, but in some cases it guess wrong > > >>> and waste power. > > >> > > >> Do we have a list of workloads which shows who benefits and who loses > > >> from the current implementation of waitboost? > > >>> btw, this is something that other drivers might need: > > >>> > > >>> https://gitlab.freedesktop.org/drm/amd/-/issues/1500#note_825883 > > >>> Cc: Alex Deucher > > >> > > >> I have several issues with the context hint if it would directly > > >> influence frequency selection in the "more power" direction. > > >> > > >> First of all, assume a context hint would replace the waitboost. Which > > >> applications would need to set it to restore the lost performance and > > >> how would they set it? > > >> > > >> Then I don't even think userspace necessarily knows. Think of a layer > > >> like OpenCL. It doesn't really know in advance the profile of > > >> submissions vs waits. It depends on the CPU vs GPU speed, so hardware > > >> generation, and the actual size of the workload which can be influenced > > >> by the application (or user) and not the library. > > >> > > >> The approach also lends itself well for the "arms race" where every > > >> application can say "Me me me, I am the most important workload there > > >> is!". > > > > > > since there is discussion happening in two places: > > > > > > https://gitlab.freedesktop.org/drm/intel/-/issues/8014#note_1777433 > > > > > > What I think you might want is a ctx boost_mask which lets an app or > > > driver disable certain boost signals/classes. Where fence waits is > > > one class of boost, but hypothetical other signals like touchscreen > > > (or other) input events could be another class of boost. A compute > > > workload might be interested in fence wait boosts but could care less > > > about input events. > > > > I think it can only be apps which could have any chance knowing whether > > their use of a library is latency sensitive or not. Which means new > > library extensions and their adoption. So I have some strong reservation > > that route is feasible. > > > > Or we tie with priority which many drivers do. Normal and above gets the > > boosting and what lowered itself does not (aka SCHED_IDLE/SCHED_BATCH). > > yeah, that sounds reasonable. > on that gitlab-issue discussion Emma Anholt was against using the priority to influence frequency since that should be more about latency. or we are talking about something different priority here? > > Related note is that we lack any external control of our scheduling > > decisions so we really do suck compared to other scheduling domains like > > CPU and IO etc. > > > > >> The last concern is for me shared with the proposal to expose deadlines > > >> or high priority waits as explicit
Re: [PATCH v2 01/14] drm/msm/a6xx: De-staticize sptprac en/disable functions
On 14/02/2023 19:31, Konrad Dybcio wrote: These two will be reused by at least A619_holi in the non-gmu paths. De-staticize them to make it possible. Nit: 'remove static annotation' or something like that. Other than that: Reviewed-by: Dmitry Baryshkov Signed-off-by: Konrad Dybcio --- drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 4 ++-- drivers/gpu/drm/msm/adreno/a6xx_gmu.h | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index f3c9600221d4..90e636dcdd5b 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -354,7 +354,7 @@ void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state) } /* Enable CPU control of SPTP power power collapse */ -static int a6xx_sptprac_enable(struct a6xx_gmu *gmu) +int a6xx_sptprac_enable(struct a6xx_gmu *gmu) { int ret; u32 val; @@ -376,7 +376,7 @@ static int a6xx_sptprac_enable(struct a6xx_gmu *gmu) } /* Disable CPU control of SPTP power power collapse */ -static void a6xx_sptprac_disable(struct a6xx_gmu *gmu) +void a6xx_sptprac_disable(struct a6xx_gmu *gmu) { u32 val; int ret; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h index e034935b3986..ec28abdd327b 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h @@ -186,5 +186,7 @@ int a6xx_hfi_set_freq(struct a6xx_gmu *gmu, int index); bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu); bool a6xx_gmu_sptprac_is_on(struct a6xx_gmu *gmu); +void a6xx_sptprac_disable(struct a6xx_gmu *gmu); +int a6xx_sptprac_enable(struct a6xx_gmu *gmu); #endif -- With best wishes Dmitry
Re: [PATCH 2/2] drm/i915/guc: Fix missing return code checks in submission init
On 1/24/2023 17:01, Ceraolo Spurio, Daniele wrote: On 1/11/2023 5:54 PM, john.c.harri...@intel.com wrote: From: John Harrison The CI results for the 'fast request' patch set (enables error return codes for fire-and-forget H2G messages) hit an issue with the KMD sending context submission requests on an invalid context. That was caused by a fault injection probe failing the context creation of a kernel context. However, there was no return code checking on any of the kernel context registration paths. So the driver kept going and tried to use the kernel context for the record defaults process. This would not cause any actual problems. The invalid requests would be rejected by GuC and ultimately the start up sequence would correctly wedge due to the context creation failure. But fixing the issue correctly rather ignoring it means we won't get CI complaining when the fast request patch lands and enables the extra error checking. So fix it by checking for errors and aborting as appropriate when creating kernel contexts. While at it, clean up some other submission init related failure cleanup paths. Also, rename guc_init_lrc_mapping to guc_init_submission as the former name hasn't been valid in a long time. Signed-off-by: John Harrison --- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 91 ++- .../gpu/drm/i915/gt/uc/intel_guc_submission.h | 2 +- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 7 +- 3 files changed, 75 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 982364777d0c6..dd856fd92945b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1431,7 +1431,7 @@ static int guc_action_enable_usage_stats(struct intel_guc *guc) return intel_guc_send(guc, action, ARRAY_SIZE(action)); } -static void guc_init_engine_stats(struct intel_guc *guc) +static int guc_init_engine_stats(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); intel_wakeref_t wakeref; @@ -1447,6 +1447,8 @@ static void guc_init_engine_stats(struct intel_guc *guc) cancel_delayed_work_sync(&guc->timestamp.work); drm_err(>->i915->drm, "Failed to enable usage stats: %d!\n", ret); } + + return ret; } static void guc_park_engine_stats(struct intel_guc *guc) @@ -4108,9 +4110,11 @@ static void guc_set_default_submission(struct intel_engine_cs *engine) engine->submit_request = guc_submit_request; } -static inline void guc_kernel_context_pin(struct intel_guc *guc, - struct intel_context *ce) +static inline int guc_kernel_context_pin(struct intel_guc *guc, + struct intel_context *ce) { + int ret; + /* * Note: we purposefully do not check the returns below because * the registration can only fail if a reset is just starting. @@ -4118,16 +4122,24 @@ static inline void guc_kernel_context_pin(struct intel_guc *guc, * isn't happening and even it did this code would be run again. */ - if (context_guc_id_invalid(ce)) - pin_guc_id(guc, ce); + if (context_guc_id_invalid(ce)) { + int ret = pin_guc_id(guc, ce); Why do you need a local ret variable inside this if statement, when you already have a function-level one? or is it just a cut & paste error? Yeah, copy/paste thing. + + if (ret < 0) + return ret; + } if (!test_bit(CONTEXT_GUC_INIT, &ce->flags)) guc_context_init(ce); - try_context_registration(ce, true); + ret = try_context_registration(ce, true); + if (ret) + unpin_guc_id(guc, ce); + + return ret; } -static inline void guc_init_lrc_mapping(struct intel_guc *guc) +static inline int guc_init_submission(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); struct intel_engine_cs *engine; @@ -4154,9 +4166,17 @@ static inline void guc_init_lrc_mapping(struct intel_guc *guc) struct intel_context *ce; list_for_each_entry(ce, &engine->pinned_contexts_list, - pinned_contexts_link) - guc_kernel_context_pin(guc, ce); + pinned_contexts_link) { + int ret = guc_kernel_context_pin(guc, ce); + + if (ret) { + /* No point in trying to clean up as i915 will wedge on failure */ + return ret; + } + } } + + return 0; } static void guc_release(struct intel_engine_cs *engine) @@ -4400,30 +4420,57 @@ static int guc_init_global_schedule_policy(struct intel_guc *guc) return ret; } -void intel_guc_submission_enable(struct intel_guc *guc) +static void guc_route_semaphores(struct intel_guc *guc, bool to_guc) { struct intel_gt *gt = guc_to_gt(guc); + u32 val; - /* Enable and route to GuC */ - if (GRAPHICS_VER(gt->i91
Re: [PATCH] drm/amdkfd: Make kobj_type structures constant
Applied. Thanks! Alex On Wed, Feb 15, 2023 at 8:09 PM Thomas Weißschuh wrote: > > Since commit ee6d3dd4ed48 ("driver core: make kobj_type constant.") > the driver core allows the usage of const struct kobj_type. > > Take advantage of this to constify the structure definitions to prevent > modification at runtime. > > Signed-off-by: Thomas Weißschuh > --- > drivers/gpu/drm/amd/amdkfd/kfd_process.c | 8 > drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 10 +- > 2 files changed, 9 insertions(+), 9 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c > b/drivers/gpu/drm/amd/amdkfd/kfd_process.c > index 51b1683ac5c1..8d719f90db40 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c > @@ -344,7 +344,7 @@ static const struct sysfs_ops kfd_procfs_ops = { > .show = kfd_procfs_show, > }; > > -static struct kobj_type procfs_type = { > +static const struct kobj_type procfs_type = { > .release = kfd_procfs_kobj_release, > .sysfs_ops = &kfd_procfs_ops, > }; > @@ -469,7 +469,7 @@ static const struct sysfs_ops procfs_queue_ops = { > .show = kfd_procfs_queue_show, > }; > > -static struct kobj_type procfs_queue_type = { > +static const struct kobj_type procfs_queue_type = { > .sysfs_ops = &procfs_queue_ops, > .default_groups = procfs_queue_groups, > }; > @@ -478,7 +478,7 @@ static const struct sysfs_ops procfs_stats_ops = { > .show = kfd_procfs_stats_show, > }; > > -static struct kobj_type procfs_stats_type = { > +static const struct kobj_type procfs_stats_type = { > .sysfs_ops = &procfs_stats_ops, > .release = kfd_procfs_kobj_release, > }; > @@ -487,7 +487,7 @@ static const struct sysfs_ops sysfs_counters_ops = { > .show = kfd_sysfs_counters_show, > }; > > -static struct kobj_type sysfs_counters_type = { > +static const struct kobj_type sysfs_counters_type = { > .sysfs_ops = &sysfs_counters_ops, > .release = kfd_procfs_kobj_release, > }; > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c > b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c > index 3fdaba56be6f..8e4124dcb6e4 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c > @@ -278,7 +278,7 @@ static const struct sysfs_ops sysprops_ops = { > .show = sysprops_show, > }; > > -static struct kobj_type sysprops_type = { > +static const struct kobj_type sysprops_type = { > .release = kfd_topology_kobj_release, > .sysfs_ops = &sysprops_ops, > }; > @@ -318,7 +318,7 @@ static const struct sysfs_ops iolink_ops = { > .show = iolink_show, > }; > > -static struct kobj_type iolink_type = { > +static const struct kobj_type iolink_type = { > .release = kfd_topology_kobj_release, > .sysfs_ops = &iolink_ops, > }; > @@ -350,7 +350,7 @@ static const struct sysfs_ops mem_ops = { > .show = mem_show, > }; > > -static struct kobj_type mem_type = { > +static const struct kobj_type mem_type = { > .release = kfd_topology_kobj_release, > .sysfs_ops = &mem_ops, > }; > @@ -395,7 +395,7 @@ static const struct sysfs_ops cache_ops = { > .show = kfd_cache_show, > }; > > -static struct kobj_type cache_type = { > +static const struct kobj_type cache_type = { > .release = kfd_topology_kobj_release, > .sysfs_ops = &cache_ops, > }; > @@ -566,7 +566,7 @@ static const struct sysfs_ops node_ops = { > .show = node_show, > }; > > -static struct kobj_type node_type = { > +static const struct kobj_type node_type = { > .release = kfd_topology_kobj_release, > .sysfs_ops = &node_ops, > }; > > --- > base-commit: 033c40a89f55525139fd5b6342281b09b97d05bf > change-id: 20230216-kobj_type-amdkfd-abd9fe9ab060 > > Best regards, > -- > Thomas Weißschuh >
Re: [PATCH] drm/amdgpu: make kobj_type structures constant
Applied. Thanks! Alex On Thu, Feb 16, 2023 at 1:59 AM Christian König wrote: > > Am 16.02.23 um 02:07 schrieb Thomas Weißschuh: > > Since commit ee6d3dd4ed48 ("driver core: make kobj_type constant.") > > the driver core allows the usage of const struct kobj_type. > > > > Take advantage of this to constify the structure definitions to prevent > > modification at runtime. > > > > Signed-off-by: Thomas Weißschuh > > Reviewed-by: Christian König > > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 10 +- > > drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 2 +- > > 2 files changed, 6 insertions(+), 6 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c > > index 1bbd56029a4f..8e04952e5144 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c > > @@ -704,7 +704,7 @@ static void ip_hw_instance_release(struct kobject *kobj) > > kfree(ip_hw_instance); > > } > > > > -static struct kobj_type ip_hw_instance_ktype = { > > +static const struct kobj_type ip_hw_instance_ktype = { > > .release = ip_hw_instance_release, > > .sysfs_ops = &ip_hw_instance_sysfs_ops, > > .default_groups = ip_hw_instance_groups, > > @@ -723,7 +723,7 @@ static void ip_hw_id_release(struct kobject *kobj) > > kfree(ip_hw_id); > > } > > > > -static struct kobj_type ip_hw_id_ktype = { > > +static const struct kobj_type ip_hw_id_ktype = { > > .release = ip_hw_id_release, > > .sysfs_ops = &kobj_sysfs_ops, > > }; > > @@ -786,18 +786,18 @@ static const struct sysfs_ops ip_die_entry_sysfs_ops > > = { > > .show = ip_die_entry_attr_show, > > }; > > > > -static struct kobj_type ip_die_entry_ktype = { > > +static const struct kobj_type ip_die_entry_ktype = { > > .release = ip_die_entry_release, > > .sysfs_ops = &ip_die_entry_sysfs_ops, > > .default_groups = ip_die_entry_groups, > > }; > > > > -static struct kobj_type die_kobj_ktype = { > > +static const struct kobj_type die_kobj_ktype = { > > .release = die_kobj_release, > > .sysfs_ops = &kobj_sysfs_ops, > > }; > > > > -static struct kobj_type ip_discovery_ktype = { > > +static const struct kobj_type ip_discovery_ktype = { > > .release = ip_disc_release, > > .sysfs_ops = &kobj_sysfs_ops, > > }; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > > index 4b9e7b050ccd..6d13ce6ec9cc 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > > @@ -228,7 +228,7 @@ static const struct sysfs_ops amdgpu_xgmi_hive_ops = { > > .show = amdgpu_xgmi_show_attrs, > > }; > > > > -struct kobj_type amdgpu_xgmi_hive_type = { > > +static const struct kobj_type amdgpu_xgmi_hive_type = { > > .release = amdgpu_xgmi_hive_release, > > .sysfs_ops = &amdgpu_xgmi_hive_ops, > > .default_groups = amdgpu_xgmi_hive_groups, > > > > --- > > base-commit: 033c40a89f55525139fd5b6342281b09b97d05bf > > change-id: 20230216-kobj_type-amdgpu-4d3f0e1e05d4 > > > > Best regards, >
Re: [Intel-gfx] [PATCH 1/2] drm/i915/guc: Improve clean up of busyness stats worker
On 1/24/2023 16:55, Ceraolo Spurio, Daniele wrote: On 1/11/2023 5:54 PM, john.c.harri...@intel.com wrote: From: John Harrison The stats worker thread management was mis-matched between enable/disable call sites. Fix those up. Also, abstract the cancel code into a helper function rather than replicating in multiple places. Signed-off-by: John Harrison --- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 22 --- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index b436dd7f12e42..982364777d0c6 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1435,19 +1435,25 @@ static void guc_init_engine_stats(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); intel_wakeref_t wakeref; + int ret; mod_delayed_work(system_highpri_wq, &guc->timestamp.work, guc->timestamp.ping_delay); - with_intel_runtime_pm(>->i915->runtime_pm, wakeref) { - int ret = guc_action_enable_usage_stats(guc); + with_intel_runtime_pm(>->i915->runtime_pm, wakeref) + ret = guc_action_enable_usage_stats(guc); - if (ret) - drm_err(>->i915->drm, - "Failed to enable usage stats: %d!\n", ret); + if (ret) { + cancel_delayed_work_sync(&guc->timestamp.work); Wouldn't it be easier to just call mod_delayed_work after the H2G if ret==0, instead of having it before and cancelling if we get a failure? + drm_err(>->i915->drm, "Failed to enable usage stats: %d!\n", ret); } } +static void guc_park_engine_stats(struct intel_guc *guc) +{ + cancel_delayed_work_sync(&guc->timestamp.work); +} + Now you're asymmetric with the park/unpark, because on the park side you have this wrapper, while on the unpark side you directly call mod_delayed_work. The point is that submission disable needs to also cancel the worker. But calling the actual busyness park function seems excessive - no need to do all the updating if we are about to reset the GuC or unload the driver. Thinking about it more, calling this park_engine_stats is actually wrong given that engine stats and busyness are the same thing, so basically we would have two functions with the same name where one is a subset of the other. Is it simpler (and safe?) to just call the full busyness unpark from submission_disable? Or is it better to have a cancel/enable_busyness_worker() pair for all instances of turning the worker on or off? John. Daniele void intel_guc_busyness_park(struct intel_gt *gt) { struct intel_guc *guc = >->uc.guc; @@ -1460,7 +1466,7 @@ void intel_guc_busyness_park(struct intel_gt *gt) * and causes an unclaimed register access warning. Cancel the worker * synchronously here. */ - cancel_delayed_work_sync(&guc->timestamp.work); + guc_park_engine_stats(guc); /* * Before parking, we should sample engine busyness stats if we need to. @@ -4409,11 +4415,11 @@ void intel_guc_submission_enable(struct intel_guc *guc) guc_init_global_schedule_policy(guc); } +/* Note: By the time we're here, GuC may have already been reset */ void intel_guc_submission_disable(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); - - /* Note: By the time we're here, GuC may have already been reset */ + guc_park_engine_stats(guc); /* Disable and route to host */ if (GRAPHICS_VER(gt->i915) >= 12)
Re: [PATCH] drm/amd/display: Modify mismatched function name
Applied. Thanks! Alex On Fri, Feb 17, 2023 at 2:46 AM Jiapeng Chong wrote: > > No functional modification involved. > > drivers/gpu/drm/amd/amdgpu/../display/dc/link/link_detection.c:1199: warning: > expecting prototype for dc_link_detect_connection_type(). Prototype was for > link_detect_connection_type() instead. > > Reported-by: Abaci Robot > Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=4103 > Signed-off-by: Jiapeng Chong > --- > drivers/gpu/drm/amd/display/dc/link/link_detection.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c > b/drivers/gpu/drm/amd/display/dc/link/link_detection.c > index 38216c789d77..5394d8a6087a 100644 > --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c > +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c > @@ -1189,7 +1189,7 @@ static bool detect_link_and_local_sink(struct dc_link > *link, > } > > /** > - * dc_link_detect_connection_type() - Determine if there is a sink connected > + * link_detect_connection_type() - Determine if there is a sink connected > * > * @type: Returned connection type > * Does not detect downstream devices, such as MST sinks > -- > 2.20.1.7.g153144c >
Re: [PATCH 3/3] drm/debugfs: remove dev->debugfs_list and debugfs_mutex
Am 17.02.23 um 20:38 schrieb Daniel Vetter: On Fri, Feb 17, 2023 at 11:01:18AM +0100, Stanislaw Gruszka wrote: On Fri, Feb 17, 2023 at 10:22:25AM +0100, Christian König wrote: Am 16.02.23 um 20:54 schrieb Daniel Vetter: On Thu, Feb 16, 2023 at 07:08:49PM +0200, Jani Nikula wrote: On Thu, 16 Feb 2023, Christian König wrote: Am 16.02.23 um 17:46 schrieb Jani Nikula: On Thu, 16 Feb 2023, Christian König wrote: Am 16.02.23 um 12:33 schrieb Daniel Vetter: On Thu, Feb 09, 2023 at 09:18:38AM +0100, Christian König wrote: The mutex was completely pointless in the first place since any parallel adding of files to this list would result in random behavior since the list is filled and consumed multiple times. Completely drop that approach and just create the files directly. This also re-adds the debugfs files to the render node directory and removes drm_debugfs_late_register(). Signed-off-by: Christian König --- drivers/gpu/drm/drm_debugfs.c | 32 +++ drivers/gpu/drm/drm_drv.c | 3 --- drivers/gpu/drm/drm_internal.h| 5 - drivers/gpu/drm/drm_mode_config.c | 2 -- include/drm/drm_device.h | 15 --- 5 files changed, 7 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c index 558e3a7271a5..a40288e67264 100644 --- a/drivers/gpu/drm/drm_debugfs.c +++ b/drivers/gpu/drm/drm_debugfs.c @@ -246,31 +246,9 @@ void drm_debugfs_dev_register(struct drm_device *dev) void drm_debugfs_minor_register(struct drm_minor *minor) { struct drm_device *dev = minor->dev; - struct drm_debugfs_entry *entry, *tmp; if (dev->driver->debugfs_init) dev->driver->debugfs_init(minor); - - list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) { - debugfs_create_file(entry->file.name, 0444, - minor->debugfs_root, entry, &drm_debugfs_entry_fops); - list_del(&entry->list); - } -} - -void drm_debugfs_late_register(struct drm_device *dev) -{ - struct drm_minor *minor = dev->primary; - struct drm_debugfs_entry *entry, *tmp; - - if (!minor) - return; - - list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) { - debugfs_create_file(entry->file.name, 0444, - minor->debugfs_root, entry, &drm_debugfs_entry_fops); - list_del(&entry->list); - } } int drm_debugfs_remove_files(const struct drm_info_list *files, int count, @@ -343,9 +321,13 @@ void drm_debugfs_add_file(struct drm_device *dev, const char *name, entry->file.data = data; entry->dev = dev; - mutex_lock(&dev->debugfs_mutex); - list_add(&entry->list, &dev->debugfs_list); - mutex_unlock(&dev->debugfs_mutex); + debugfs_create_file(name, 0444, dev->primary->debugfs_root, entry, + &drm_debugfs_entry_fops); + + /* TODO: This should probably only be a symlink */ + if (dev->render) + debugfs_create_file(name, 0444, dev->render->debugfs_root, + entry, &drm_debugfs_entry_fops); Nope. You are fundamentally missing the point of all this, which is: - drivers create debugfs files whenever they want to, as long as it's _before_ drm_dev_register is called. - drm_dev_register will set them all up. This is necessary because otherwise you have the potential for some nice oops and stuff when userspace tries to access these files before the driver is ready. Note that with sysfs all this infrastructure already exists, which is why you can create sysfs files whenever you feel like, and things wont go boom. Well Yeah I've considered that, I just don't think it's a good idea for debugfs. debugfs is meant to be a helper for debugging things and that especially includes the time between drm_dev_init() and drm_dev_register() because that's where we probe the hardware and try to get it working. Not having the debugfs files which allows for things like hardware register access and reading internal state during that is a really and I mean REALLY bad idea. This is essentially what we have those files for. So you mean you want to have early debugfs so you can have some script hammering the debugfs to get info out between init and register during probe? Well not hammering. What we usually do in bringup is to set firmware timeout to infinity and the driver then sits and waits for the hw. The tool used to access registers then goes directly through the PCI bar at the moment, but that's essentially a bad idea for registers which you grab a lock for to access (like index/data). I just think registering debugfs before everything is ready is a recipe for disaster. All of the debugfs needs to check all the conditions that they need across all of the probe stages. It'll be difficult to g
Re: [PATCH v2 2/2] drm: document DRM_IOCTL_PRIME_HANDLE_TO_FD and PRIME_FD_TO_HANDLE
On Fri, Feb 17, 2023 at 04:22:04PM +, Simon Ser wrote: > v2: mention caps, note that the IOCTLs might fail, document that > user-space needs a data structure to keep track of the > handles (Daniel V.) > > Signed-off-by: Simon Ser > Cc: Daniel Vetter > Cc: Pekka Paalanen > Cc: Daniel Stone On both patches: Reviewed-by: Daniel Vetter > --- > include/uapi/drm/drm.h | 30 ++ > 1 file changed, 30 insertions(+) > > diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h > index 292e4778a2f4..a87ca2d4 100644 > --- a/include/uapi/drm/drm.h > +++ b/include/uapi/drm/drm.h > @@ -1025,7 +1025,37 @@ extern "C" { > #define DRM_IOCTL_UNLOCK DRM_IOW( 0x2b, struct drm_lock) > #define DRM_IOCTL_FINISH DRM_IOW( 0x2c, struct drm_lock) > > +/** > + * DRM_IOCTL_PRIME_HANDLE_TO_FD - Convert a GEM handle to a DMA-BUF FD. > + * > + * User-space sets &drm_prime_handle.handle with the GEM handle to export and > + * &drm_prime_handle.flags, and gets back a DMA-BUF file descriptor in > + * &drm_prime_handle.fd. > + * > + * The export can fail for any driver-specific reason, e.g. because export is > + * not supported for this specific GEM handle (but might be for others). > + * > + * Support for exporting DMA-BUFs is advertised via &DRM_PRIME_CAP_EXPORT. > + */ > #define DRM_IOCTL_PRIME_HANDLE_TO_FDDRM_IOWR(0x2d, struct > drm_prime_handle) > +/** > + * DRM_IOCTL_PRIME_FD_TO_HANDLE - Convert a DMA-BUF FD to a GEM handle. > + * > + * User-space sets &drm_prime_handle.fd with a DMA-BUF file descriptor to > + * import, and gets back a GEM handle in &drm_prime_handle.handle. > + * &drm_prime_handle.flags is unused. > + * > + * If an existing GEM handle refers to the memory object backing the DMA-BUF, > + * that GEM handle is returned. Therefore user-space which needs to handle > + * arbitrary DMA-BUFs must have a user-space lookup data structure to > manually > + * reference-count duplicated GEM handles. For more information see > + * &DRM_IOCTL_GEM_CLOSE. > + * > + * The import can fail for any driver-specific reason, e.g. because import is > + * only supported for DMA-BUFs allocated on this DRM device. > + * > + * Support for importing DMA-BUFs is advertised via &DRM_PRIME_CAP_IMPORT. > + */ > #define DRM_IOCTL_PRIME_FD_TO_HANDLEDRM_IOWR(0x2e, struct > drm_prime_handle) > > #define DRM_IOCTL_AGP_ACQUIREDRM_IO( 0x30) > -- > 2.39.2 > > -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch
Re: [PATCH 3/3] drm/debugfs: remove dev->debugfs_list and debugfs_mutex
Am 17.02.23 um 20:42 schrieb Daniel Vetter: On Fri, Feb 17, 2023 at 04:55:27PM +0100, Christian König wrote: Am 17.02.23 um 13:37 schrieb Jani Nikula: On Fri, 17 Feb 2023, Christian König wrote: If i915 have such structural problems then I strongly suggest to solve them inside i915 and not make common code out of that. All other things aside, that's just a completely unnecessary and unhelpful remark. Sorry, but why? We have gone through the same problems on radeon and it was massively painful, what I try here is to prevent others from using this bad design as well. And yes I think devm_ and drmm_ is a bit questionable in that regard as well. The goal is not to make it as simple as possible to write a driver, but rather as defensive as possible. In other words automatically releasing memory when an object is destroyed might be helpful, but it isn't automatically a good idea. What can easily happen for example is that you run into use after free situations on object reference decommissions, e.g. parent is freed before child for example. I know that radeon/amd are going different paths on this, but I think it's also very clear that you're not really representing the consensus here. For smaller drivers especially there really isn't anyone arguing against devm/drmm. Which I completely agree on. It's just that we shouldn't promote it as "Hey this magically makes everything work in your very complex use case". It can be a good tool to have such stuff which makes sense in a lot of use case, but everybody using it should always keep its downsides in mind as well. Similar for uapi interfaces that just do the right thing and prevent races. You're the very first one who argued this is a good thing to have. kernfs/kobj/sysfs people spend endless amounts of engineer on trying to build something that's impossible to get wrong, or at least get as close to that as feasible. Yeah, for kernfs/kobj/sysfs it does make complete sense because those files are actually sometimes waited on by userspace tools to appear. I just find it extremely questionable for debugfs. Regards, Christian. I mean the entire rust endeavour flies under that flag too. -Daniel
Re: [PATCH] drm/fb-helper: Remove drm_fb_helper_unprepare() from drm_fb_helper_fini()
On Fri, Feb 17, 2023 at 09:18:54AM +0100, Thomas Zimmermann wrote: > Hi > > Am 16.02.23 um 21:11 schrieb Daniel Vetter: > > On Thu, Feb 16, 2023 at 03:06:20PM +0100, Thomas Zimmermann wrote: > > > Move drm_fb_helper_unprepare() from drm_fb_helper_fini() into the > > > calling fbdev implementation. Avoids a possible stale mutex with > > > generic fbdev code. > > > > > > As indicated by its name, drm_fb_helper_prepare() prepares struct > > > drm_fb_helper before setting up the fbdev support with a call to > > > drm_fb_helper_init(). In legacy fbdev emulation, this happens next > > > to each other. If successful, drm_fb_helper_fini() later tear down > > > the fbdev device and also unprepare via drm_fb_helper_unprepare(). > > > > > > Generic fbdev emulation prepares struct drm_fb_helper immediately > > > after allocating the instance. It only calls drm_fb_helper_init() > > > as part of processing a hotplug event. If the hotplug-handling fails, > > > it runs drm_fb_helper_fini(). This unprepares the fb-helper instance > > > and the next hotplug event runs on stale data. > > > > > > Solve this by moving drm_fb_helper_unprepare() from drm_fb_helper_fini() > > > into the fbdev implementations. Call it right before freeing the > > > fb-helper instance. > > > > > > Fixes: 4825797c36da ("drm/fb-helper: Introduce drm_fb_helper_unprepare()") > > > Cc: Thomas Zimmermann > > > Cc: Javier Martinez Canillas > > > Cc: Maarten Lankhorst > > > Cc: Maxime Ripard > > > Cc: David Airlie > > > Cc: Daniel Vetter > > > Cc: dri-devel@lists.freedesktop.org > > > > > > Signed-off-by: Thomas Zimmermann > > > > This reminds me of an old patch I just recently stumbled over again: > > > > https://lore.kernel.org/dri-devel/Y3St2VHJ7jEmcNFw@phenom.ffwll.local/ > > > > Should I resurrect that one maybe and send it out? I think that also ties > > a bit into your story here. > > I don't think it will be necessary. I began to convert the existing fbdev > emulation to make use of drm_client, which should resove a number of > problems. I expect to post this after the various trees have merged the > recent changes to fbdev helpers. The only version the patch is fixing is the client one, the old one is unfixable (I think at least, hence just the comments). Note that the link is pre-splitting, I do have a rebased version here. I'll just send that out and head into vacations :-) -Daniel > > Best regards > Thomas > > > > > > --- > > > drivers/gpu/drm/armada/armada_fbdev.c | 3 +++ > > > drivers/gpu/drm/drm_fb_helper.c| 2 -- > > > drivers/gpu/drm/drm_fbdev_generic.c| 2 ++ > > > drivers/gpu/drm/exynos/exynos_drm_fbdev.c | 3 ++- > > > drivers/gpu/drm/gma500/framebuffer.c | 2 ++ > > > drivers/gpu/drm/i915/display/intel_fbdev.c | 1 + > > > drivers/gpu/drm/msm/msm_fbdev.c| 2 ++ > > > drivers/gpu/drm/omapdrm/omap_fbdev.c | 2 ++ > > > drivers/gpu/drm/radeon/radeon_fb.c | 2 ++ > > > drivers/gpu/drm/tegra/fb.c | 1 + > > > 10 files changed, 17 insertions(+), 3 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/armada/armada_fbdev.c > > > b/drivers/gpu/drm/armada/armada_fbdev.c > > > index 07e410c62b7a..0e44f53e9fa4 100644 > > > --- a/drivers/gpu/drm/armada/armada_fbdev.c > > > +++ b/drivers/gpu/drm/armada/armada_fbdev.c > > > @@ -147,6 +147,7 @@ int armada_fbdev_init(struct drm_device *dev) > > >err_fb_setup: > > > drm_fb_helper_fini(fbh); > > >err_fb_helper: > > > + drm_fb_helper_unprepare(fbh); > > > priv->fbdev = NULL; > > > return ret; > > > } > > > @@ -164,6 +165,8 @@ void armada_fbdev_fini(struct drm_device *dev) > > > if (fbh->fb) > > > fbh->fb->funcs->destroy(fbh->fb); > > > + drm_fb_helper_unprepare(fbh); > > > + > > > priv->fbdev = NULL; > > > } > > > } > > > diff --git a/drivers/gpu/drm/drm_fb_helper.c > > > b/drivers/gpu/drm/drm_fb_helper.c > > > index 28c428e9c530..a39998047f8a 100644 > > > --- a/drivers/gpu/drm/drm_fb_helper.c > > > +++ b/drivers/gpu/drm/drm_fb_helper.c > > > @@ -590,8 +590,6 @@ void drm_fb_helper_fini(struct drm_fb_helper > > > *fb_helper) > > > > I think it would be good to update the kerneldoc of _init() and _fini() > > here to mention each another like we usually do with these pairs. Same > > with prepare/unprepare() although the latter rerfences _prepare() already. > > > > > } > > > mutex_unlock(&kernel_fb_helper_lock); > > > - drm_fb_helper_unprepare(fb_helper); > > > - > > > if (!fb_helper->client.funcs) > > > drm_client_release(&fb_helper->client); > > > } > > > diff --git a/drivers/gpu/drm/drm_fbdev_generic.c > > > b/drivers/gpu/drm/drm_fbdev_generic.c > > > index 365f80717fa1..4d6325e91565 100644 > > > --- a/drivers/gpu/drm/drm_fbdev_generic.c > > > +++ b/drivers/gpu/drm/drm_fbdev_generic.c > > > @@ -65,6 +65,8 @@ static void d
[PATCH] drm/fb-helper: Try to protect cleanup against delayed setup
Some vague evidences suggests this can go wrong. Try to prevent it by holding the right mutex and clearing ->deferred_setup to make sure we later on don't accidentally try to re-register the fbdev when the driver thought it had it all cleaned up already. v2: I realized that this is fundamentally butchered, and CI complained about lockdep splats. So limit the critical section again and just add a few notes what the proper fix is. References: https://intel-gfx-ci.01.org/tree/linux-next/next-20201215/fi-byt-j1900/igt@i915_pm_...@module-reload.html Signed-off-by: Daniel Vetter Cc: Ville Syrjälä Cc: Chris Wilson Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: David Airlie Cc: Daniel Vetter --- drivers/gpu/drm/drm_fb_helper.c | 6 ++ drivers/gpu/drm/drm_fbdev_generic.c | 5 + 2 files changed, 11 insertions(+) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 3e17261a12b6..2415a2c7ca44 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -545,6 +545,9 @@ EXPORT_SYMBOL(drm_fb_helper_alloc_info); * A wrapper around unregister_framebuffer, to release the fb_info * framebuffer device. This must be called before releasing all resources for * @fb_helper by calling drm_fb_helper_fini(). + * + * Note that this is fundamentally racy on hotunload because it doen't handle + * open fbdev file descriptors at all. Use drm_fbdev_generic_setup() instead. */ void drm_fb_helper_unregister_info(struct drm_fb_helper *fb_helper) { @@ -558,6 +561,9 @@ EXPORT_SYMBOL(drm_fb_helper_unregister_info); * @fb_helper: driver-allocated fbdev helper, can be NULL * * This cleans up all remaining resources associated with @fb_helper. + * + * Note that this is fundamentally racy on hotunload because it doen't handle + * open fbdev file descriptors at all. Use drm_fbdev_generic_setup() instead. */ void drm_fb_helper_fini(struct drm_fb_helper *fb_helper) { diff --git a/drivers/gpu/drm/drm_fbdev_generic.c b/drivers/gpu/drm/drm_fbdev_generic.c index 365f80717fa1..1618109592ce 100644 --- a/drivers/gpu/drm/drm_fbdev_generic.c +++ b/drivers/gpu/drm/drm_fbdev_generic.c @@ -347,7 +347,12 @@ static void drm_fbdev_client_unregister(struct drm_client_dev *client) { struct drm_fb_helper *fb_helper = drm_fb_helper_from_client(client); + mutex_lock(&fb_helper->lock); + fb_helper->deferred_setup = false; + mutex_unlock(&fb_helper->lock); + if (fb_helper->info) { + /* drm_fbdev_fb_destroy() takes care of cleanup */ drm_fb_helper_unregister_info(fb_helper); } else { drm_client_release(&fb_helper->client); -- 2.39.0
Re: [PATCH drm-next v2 03/16] maple_tree: split up MA_STATE() macro
On Fri, Feb 17, 2023 at 02:44:09PM +0100, Danilo Krummrich wrote: > \#define SAMPLE_ITER(name, __mgr) \ > struct sample_iter name = { \ > .mas = __MA_STATE(&(__mgr)->mt, 0, 0), This is usually called MA_STATE_INIT() > #define sample_iter_for_each_range(it__, start__, end__) \ > for ((it__).mas.index = start__, (it__).entry = mas_find(&(it__).mas, > end__ - 1); \ >(it__).entry; (it__).entry = mas_find(&(it__).mas, end__ - 1)) This is a bad iterator design. It's usually best to do this: struct sample *sample; SAMPLE_ITERATOR(si, min); sample_iter_for_each(&si, sample, max) { frob(mgr, sample); } I don't mind splitting apart MA_STATE_INIT from MA_STATE, and if you do that, we can also use it in VMA_ITERATOR.
Re: [PATCH 3/3] drm/debugfs: remove dev->debugfs_list and debugfs_mutex
On Fri, Feb 17, 2023 at 04:55:27PM +0100, Christian König wrote: > Am 17.02.23 um 13:37 schrieb Jani Nikula: > > On Fri, 17 Feb 2023, Christian König > > wrote: > > > If i915 have such structural problems then I strongly suggest to solve > > > them inside i915 and not make common code out of that. > > All other things aside, that's just a completely unnecessary and > > unhelpful remark. > > Sorry, but why? > > We have gone through the same problems on radeon and it was massively > painful, what I try here is to prevent others from using this bad design as > well. And yes I think devm_ and drmm_ is a bit questionable in that regard > as well. > > The goal is not to make it as simple as possible to write a driver, but > rather as defensive as possible. In other words automatically releasing > memory when an object is destroyed might be helpful, but it isn't > automatically a good idea. > > What can easily happen for example is that you run into use after free > situations on object reference decommissions, e.g. parent is freed before > child for example. I know that radeon/amd are going different paths on this, but I think it's also very clear that you're not really representing the consensus here. For smaller drivers especially there really isn't anyone arguing against devm/drmm. Similar for uapi interfaces that just do the right thing and prevent races. You're the very first one who argued this is a good thing to have. kernfs/kobj/sysfs people spend endless amounts of engineer on trying to build something that's impossible to get wrong, or at least get as close to that as feasible. I mean the entire rust endeavour flies under that flag too. -Daniel -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch
Re: [PATCH drm-next v2 04/16] maple_tree: add flag MT_FLAGS_LOCK_NONE
On Fri, Feb 17, 2023 at 02:44:10PM +0100, Danilo Krummrich wrote: > Generic components making use of the maple tree (such as the > DRM GPUVA Manager) delegate the responsibility of ensuring mutual > exclusion to their users. > > While such components could inherit the concept of an external lock, > some users might just serialize the access to the component and hence to > the internal maple tree. > > In order to allow such use cases, add a new flag MT_FLAGS_LOCK_NONE to > indicate not to do any internal lockdep checks. I'm really against this change. First, we really should check that users have their locking right. It's bitten us so many times when they get it wrong. Second, having a lock allows us to defragment the slab cache. The patches to do that haven't gone anywhere recently, but if we drop the requirement now, we'll never be able to compact ranges of memory that have slabs allocated to them.
Re: [PATCH 3/3] drm/debugfs: remove dev->debugfs_list and debugfs_mutex
On Fri, Feb 17, 2023 at 11:01:18AM +0100, Stanislaw Gruszka wrote: > On Fri, Feb 17, 2023 at 10:22:25AM +0100, Christian König wrote: > > Am 16.02.23 um 20:54 schrieb Daniel Vetter: > > > On Thu, Feb 16, 2023 at 07:08:49PM +0200, Jani Nikula wrote: > > > > On Thu, 16 Feb 2023, Christian König wrote: > > > > > Am 16.02.23 um 17:46 schrieb Jani Nikula: > > > > > > On Thu, 16 Feb 2023, Christian König > > > > > > wrote: > > > > > > > Am 16.02.23 um 12:33 schrieb Daniel Vetter: > > > > > > > > On Thu, Feb 09, 2023 at 09:18:38AM +0100, Christian König wrote: > > > > > > > > > The mutex was completely pointless in the first place since > > > > > > > > > any > > > > > > > > > parallel adding of files to this list would result in random > > > > > > > > > behavior since the list is filled and consumed multiple times. > > > > > > > > > > > > > > > > > > Completely drop that approach and just create the files > > > > > > > > > directly. > > > > > > > > > > > > > > > > > > This also re-adds the debugfs files to the render node > > > > > > > > > directory and > > > > > > > > > removes drm_debugfs_late_register(). > > > > > > > > > > > > > > > > > > Signed-off-by: Christian König > > > > > > > > > --- > > > > > > > > > drivers/gpu/drm/drm_debugfs.c | 32 > > > > > > > > > +++ > > > > > > > > > drivers/gpu/drm/drm_drv.c | 3 --- > > > > > > > > > drivers/gpu/drm/drm_internal.h| 5 - > > > > > > > > > drivers/gpu/drm/drm_mode_config.c | 2 -- > > > > > > > > > include/drm/drm_device.h | 15 --- > > > > > > > > > 5 files changed, 7 insertions(+), 50 deletions(-) > > > > > > > > > > > > > > > > > > diff --git a/drivers/gpu/drm/drm_debugfs.c > > > > > > > > > b/drivers/gpu/drm/drm_debugfs.c > > > > > > > > > index 558e3a7271a5..a40288e67264 100644 > > > > > > > > > --- a/drivers/gpu/drm/drm_debugfs.c > > > > > > > > > +++ b/drivers/gpu/drm/drm_debugfs.c > > > > > > > > > @@ -246,31 +246,9 @@ void drm_debugfs_dev_register(struct > > > > > > > > > drm_device *dev) > > > > > > > > > void drm_debugfs_minor_register(struct drm_minor *minor) > > > > > > > > > { > > > > > > > > > struct drm_device *dev = minor->dev; > > > > > > > > > - struct drm_debugfs_entry *entry, *tmp; > > > > > > > > > if (dev->driver->debugfs_init) > > > > > > > > > dev->driver->debugfs_init(minor); > > > > > > > > > - > > > > > > > > > - list_for_each_entry_safe(entry, tmp, > > > > > > > > > &dev->debugfs_list, list) { > > > > > > > > > - debugfs_create_file(entry->file.name, 0444, > > > > > > > > > - minor->debugfs_root, entry, > > > > > > > > > &drm_debugfs_entry_fops); > > > > > > > > > - list_del(&entry->list); > > > > > > > > > - } > > > > > > > > > -} > > > > > > > > > - > > > > > > > > > -void drm_debugfs_late_register(struct drm_device *dev) > > > > > > > > > -{ > > > > > > > > > - struct drm_minor *minor = dev->primary; > > > > > > > > > - struct drm_debugfs_entry *entry, *tmp; > > > > > > > > > - > > > > > > > > > - if (!minor) > > > > > > > > > - return; > > > > > > > > > - > > > > > > > > > - list_for_each_entry_safe(entry, tmp, > > > > > > > > > &dev->debugfs_list, list) { > > > > > > > > > - debugfs_create_file(entry->file.name, 0444, > > > > > > > > > - minor->debugfs_root, entry, > > > > > > > > > &drm_debugfs_entry_fops); > > > > > > > > > - list_del(&entry->list); > > > > > > > > > - } > > > > > > > > > } > > > > > > > > > int drm_debugfs_remove_files(const struct drm_info_list > > > > > > > > > *files, int count, > > > > > > > > > @@ -343,9 +321,13 @@ void drm_debugfs_add_file(struct > > > > > > > > > drm_device *dev, const char *name, > > > > > > > > > entry->file.data = data; > > > > > > > > > entry->dev = dev; > > > > > > > > > - mutex_lock(&dev->debugfs_mutex); > > > > > > > > > - list_add(&entry->list, &dev->debugfs_list); > > > > > > > > > - mutex_unlock(&dev->debugfs_mutex); > > > > > > > > > + debugfs_create_file(name, 0444, > > > > > > > > > dev->primary->debugfs_root, entry, > > > > > > > > > + &drm_debugfs_entry_fops); > > > > > > > > > + > > > > > > > > > + /* TODO: This should probably only be a symlink */ > > > > > > > > > + if (dev->render) > > > > > > > > > + debugfs_create_file(name, 0444, > > > > > > > > > dev->render->debugfs_root, > > > > > > > > > + entry, > > > > > > > > > &drm_debugfs_entry_fops); > > > > > > > > Nope. You are fundamentally missing the point of all this, > > > > > > > > which is: > > > > > > > > > > > > > > > > - drivers create debugfs files whenever they want to, as long > > > > > > > > as it's > > > > > > > > _before_ drm_dev_register is called. > > > > > > > > > > > > > > > > - d
Re: [PATCH 1/2] drm/client: fix circular reference counting issue
On Fri, 17 Feb 2023 at 13:06, Christian König wrote: > > Am 16.02.23 um 15:34 schrieb Daniel Vetter: > > On Thu, Jan 26, 2023 at 03:30:31PM +0100, Thomas Zimmermann wrote: > >> Hi > >> > >> Am 26.01.23 um 11:28 schrieb Christian König: > >>> We reference dump buffers both by their handle as well as their > >>> object. The problem is now that when anybody iterates over the DRM > >>> framebuffers and exports the underlying GEM objects through DMA-buf > >>> we run into a circular reference count situation. > >>> > >>> The result is that the fbdev handling holds the GEM handle preventing > >>> the DMA-buf in the GEM object to be released. This DMA-buf in turn > >>> holds a reference to the driver module which on unload would release > >>> the fbdev. > >>> > >>> Break that loop by releasing the handle as soon as the DRM > >>> framebuffer object is created. The DRM framebuffer and the DRM client > >>> buffer structure still hold a reference to the underlying GEM object > >>> preventing its destruction. > >>> > >>> Signed-off-by: Christian König > >>> Fixes: c76f0f7cb546 ("drm: Begin an API for in-kernel clients") > >>> Cc: > >> I tested with Weston and Gnome in X11 and Wayland mode under simpledrm, > >> which I started stopped from the console. No obvious problems. > >> > >> I heard that sway/wlroots has issues with drivers that don't support > >> dma-buf. Maybe(!) that could be affected by this patch. > > dma-buf export should still work. Also the loop is imo a red herring, I > > think if you force unbind the driver then this should all get resolved > > automatically. > > > > What is true is that once we start refcounting everything correctly then > > there will be elevated module refcounts, which means you cannot use module > > unloading to provoke a driver unbind, which would kick out all the > > leftover references. You instead need to manually unbind the driver first, > > which should drop all remaining references to zero (might need to kill > > also any userspace), and only then can you unload the driver. > > > > But this confusion is extremely common, a lot of people think that just > > holding a module reference is enough, we really should also hold a > > drm_device reference for dma-buf ... > > Yeah, hot plug removal of amdgpu revealed a couple of those as well. > > Essentially what DMA-buf does with grabbing a module reference on the > owner of a DMA-buf is a bad idea. > > Instead we should reference the device or component which is exporting > the buffer, but since we don't have a common structure here it's more > work to generalize that approach. Well the device/component still needs to eventually hold a reference on the module, or bad things can happen. But yeah dma-buf also holding one but not a device/component reference is definitely bad. -Daniel > > Christian. > > > -Daniel > > > >> Anyway, take my r-b, t-b tags. > >> > >> Reviewed-by: Thomas Zimmermann > >> Tested-by: Thomas Zimmermann > >> > >> Thank you for fixing this bug. > >> > >> Best regards > >> Thomas > >> > >>> --- > >>>drivers/gpu/drm/drm_client.c | 33 - > >>>include/drm/drm_client.h | 5 - > >>>2 files changed, 20 insertions(+), 18 deletions(-) > >>> > >>> diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c > >>> index 009e7b10455c..f6292ba0e6fc 100644 > >>> --- a/drivers/gpu/drm/drm_client.c > >>> +++ b/drivers/gpu/drm/drm_client.c > >>> @@ -243,21 +243,17 @@ void drm_client_dev_restore(struct drm_device *dev) > >>>static void drm_client_buffer_delete(struct drm_client_buffer *buffer) > >>>{ > >>> - struct drm_device *dev = buffer->client->dev; > >>> - > >>> if (buffer->gem) { > >>> drm_gem_vunmap_unlocked(buffer->gem, &buffer->map); > >>> drm_gem_object_put(buffer->gem); > >>> } > >>> - if (buffer->handle) > >>> - drm_mode_destroy_dumb(dev, buffer->handle, > >>> buffer->client->file); > >>> - > >>> kfree(buffer); > >>>} > >>>static struct drm_client_buffer * > >>> -drm_client_buffer_create(struct drm_client_dev *client, u32 width, u32 > >>> height, u32 format) > >>> +drm_client_buffer_create(struct drm_client_dev *client, u32 width, u32 > >>> height, > >>> +u32 format, u32 *handle) > >>>{ > >>> const struct drm_format_info *info = drm_format_info(format); > >>> struct drm_mode_create_dumb dumb_args = { }; > >>> @@ -279,16 +275,15 @@ drm_client_buffer_create(struct drm_client_dev > >>> *client, u32 width, u32 height, u > >>> if (ret) > >>> goto err_delete; > >>> - buffer->handle = dumb_args.handle; > >>> - buffer->pitch = dumb_args.pitch; > >>> - > >>> obj = drm_gem_object_lookup(client->file, dumb_args.handle); > >>> if (!obj) { > >>> ret = -ENOENT; > >>> goto err_delete; > >>> } > >>> + buffer->pitch = dumb_args.pitch; > >>> buffer->gem = obj; > >>> + *handle = dumb_args.handle; > >>>
[PATCH] drm/i915/mtl: Add engine TLB invalidation
MTL's primary GT can continue to use the same engine TLB invalidation programming as past Xe_HP-based platforms. However the media GT needs some special handling: * Invalidation registers on the media GT are singleton registers (unlike the primary GT where they are still MCR). * Since the GSC is now exposed as an engine, there's a new register to use for TLB invalidation. The offset is identical to the compute engine offset, but this is expected --- compute engines only exist on the primary GT while the GSC only exists on the media GT. * Although there's only a single GSC engine instance, it inexplicably uses bit 1 to request invalidations rather than bit 0. Cc: Tvrtko Ursulin Cc: Daniele Ceraolo Spurio Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 52 --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 1 + 2 files changed, 38 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index f3a91e7f85f7..af8e158fbd84 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1166,6 +1166,11 @@ static int intel_engine_init_tlb_invalidation(struct intel_engine_cs *engine) [COPY_ENGINE_CLASS].mcr_reg = XEHP_BLT_TLB_INV_CR, [COMPUTE_CLASS].mcr_reg = XEHP_COMPCTX_TLB_INV_CR, }; + static const union intel_engine_tlb_inv_reg xelpmp_regs[] = { + [VIDEO_DECODE_CLASS].reg = GEN12_VD_TLB_INV_CR, + [VIDEO_ENHANCEMENT_CLASS].reg = GEN12_VE_TLB_INV_CR, + [OTHER_CLASS].reg = XELPMP_GSC_TLB_INV_CR, + }; struct drm_i915_private *i915 = engine->i915; const unsigned int instance = engine->instance; const unsigned int class = engine->class; @@ -1185,19 +1190,28 @@ static int intel_engine_init_tlb_invalidation(struct intel_engine_cs *engine) * 12.00 -> 12.50 transition multi cast handling is required too. */ - if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) || - GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) { - regs = xehp_regs; - num = ARRAY_SIZE(xehp_regs); - } else if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) || - GRAPHICS_VER_FULL(i915) == IP_VER(12, 10)) { - regs = gen12_regs; - num = ARRAY_SIZE(gen12_regs); - } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) { - regs = gen8_regs; - num = ARRAY_SIZE(gen8_regs); - } else if (GRAPHICS_VER(i915) < 8) { - return 0; + if (engine->gt->type == GT_MEDIA) { + if (MEDIA_VER_FULL(i915) == IP_VER(13, 0)) { + regs = xelpmp_regs; + num = ARRAY_SIZE(xelpmp_regs); + } + } else { + if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 71) || + GRAPHICS_VER_FULL(i915) == IP_VER(12, 70) || + GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) || + GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) { + regs = xehp_regs; + num = ARRAY_SIZE(xehp_regs); + } else if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) || + GRAPHICS_VER_FULL(i915) == IP_VER(12, 10)) { + regs = gen12_regs; + num = ARRAY_SIZE(gen12_regs); + } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) { + regs = gen8_regs; + num = ARRAY_SIZE(gen8_regs); + } else if (GRAPHICS_VER(i915) < 8) { + return 0; + } } if (gt_WARN_ONCE(engine->gt, !num, @@ -1212,7 +1226,14 @@ static int intel_engine_init_tlb_invalidation(struct intel_engine_cs *engine) reg = regs[class]; - if (regs == gen8_regs && class == VIDEO_DECODE_CLASS && instance == 1) { + if (class == OTHER_CLASS) { + /* +* There's only a single GSC instance, but it uses register bit +* 1 instead of either 0 or OTHER_GSC_INSTANCE. +*/ + GEM_WARN_ON(instance != OTHER_GSC_INSTANCE); + val = 1; + } else if (regs == gen8_regs && class == VIDEO_DECODE_CLASS && instance == 1) { reg.reg = GEN8_M2TCR; val = 0; } else { @@ -1228,7 +1249,8 @@ static int intel_engine_init_tlb_invalidation(struct intel_engine_cs *engine) if (GRAPHICS_VER(i915) >= 12 && (engine->class == VIDEO_DECODE_CLASS || engine->class == VIDEO_ENHANCEMENT_CLASS || -engine->class == COMPUTE_CLASS)) +engine->class == COMPUTE_CLASS || +engine->class == OTHER_CLASS)) engine->tlb_inv.r
Re: [PATCH drm-next v2 03/16] maple_tree: split up MA_STATE() macro
* Danilo Krummrich [230217 08:44]: > Split up the MA_STATE() macro such that components using the maple tree > can easily inherit from struct ma_state and build custom tree walk > macros to hide their internals from users. > > Example: > > struct sample_iter { > struct ma_state mas; > struct sample_mgr *mgr; > struct sample_entry *entry; > }; > > \#define SAMPLE_ITER(name, __mgr) \ > struct sample_iter name = { \ > .mas = __MA_STATE(&(__mgr)->mt, 0, 0), > .mgr = __mgr, > .entry = NULL, > } I see this patch is to allow for anonymous maple states, this looks good. I've a lengthy comment about the iterator that I'm adding here to head off anyone that may copy your example below. > > \#define sample_iter_for_each_range(it__, start__, end__) \ > for ((it__).mas.index = start__, (it__).entry = mas_find(&(it__).mas, > end__ - 1); \ >(it__).entry; (it__).entry = mas_find(&(it__).mas, end__ - 1)) I see you've added something like the above in your patch set as well. I'd like to point out that the index isn't the only state information that needs to be altered here, and in fact, this could go very wrong. The maple state has a node and an offset within that node. If you set the index to lower than the current position of your iterator and call mas_find() then what happens is somewhat undefined. I expect you will get the wrong value (most likely either the current value or the very next one that the iterator is already pointing to). I believe you have been using a fresh maple state for each iterator in your patches, but I haven't had a deep look into your code yet. We have methods of resetting the iterator and set the range (mas_set() and mas_set_range()) which are safe for what you are doing, but they will start the walk from the root node to the index again. So, if you know what you are doing is safe, then the way you have written it will work, but it's worth mentioning that this could occur. It is also worth pointing out that it would be much safer to use a function to do the above so you get type safety.. and I was asked to add this to the VMA interface by Linus [1], which is on its way upstream [2]. 1. https://lore.kernel.org/linux-mm/CAHk-=wg9wqxbgkndkd2bqocnn73rdswuwsavbb7t-tekyke...@mail.gmail.com/ 2. https://lore.kernel.org/linux-mm/20230120162650.984577-1-liam.howl...@oracle.com/ > > Signed-off-by: Danilo Krummrich > --- > include/linux/maple_tree.h | 7 +-- > 1 file changed, 5 insertions(+), 2 deletions(-) > > diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h > index e594db58a0f1..ca04c900e51a 100644 > --- a/include/linux/maple_tree.h > +++ b/include/linux/maple_tree.h > @@ -424,8 +424,8 @@ struct ma_wr_state { > #define MA_ERROR(err) \ > ((struct maple_enode *)(((unsigned long)err << 2) | 2UL)) > > -#define MA_STATE(name, mt, first, end) > \ > - struct ma_state name = {\ > +#define __MA_STATE(mt, first, end) \ > + { \ > .tree = mt, \ > .index = first, \ > .last = end,\ > @@ -435,6 +435,9 @@ struct ma_wr_state { > .alloc = NULL, \ > } > > +#define MA_STATE(name, mt, first, end) > \ > + struct ma_state name = __MA_STATE(mt, first, end) > + > #define MA_WR_STATE(name, ma_state, wr_entry) > \ > struct ma_wr_state name = { \ > .mas = ma_state,\ > -- > 2.39.1 >
Re: [PATCH v12 00/18] drm: Add Samsung MIPI DSIM bridge
On 07/02/2023 10.09, Rasmus Villemoes wrote: > I managed to get the whole chain lcdif -> mipi -> bridge -> dp-connector > to probe with these settings > [...] > Now hotplug-detect doesn't work with the current sn65dsi86 driver, but > that's a separate issue; when I boot with a monitor attached, its edid > is correctly read out. But I still don't get any output, and the monitor > says "no signal" - my naive attempt (which has worked fine in other > cases) was to just dd /dev/urandom to /dev/fb0, so I'm clearly missing > some important step. No idea if it's important, but in the NXP kernel, there's a display-subsystem { compatible = "fsl,imx-display-subsystem"; ports = <&lcdif1_disp>, <&lcdif2_disp>, <&lcdif3_disp>; }; node in imx8mp.dtsi, and when commenting out that node, the graphics ceases to work, even if all the devices in the lcdif->mipi->bridge chain actually probes. However, adding a corresponding node in mainline, which does have a driver for that "fsl,imx-display-subsystem", makes no difference; with or without that, I do get a /dev/fb0 device and the whole chain probes, but again the monitor says no signal. Rasmus
Re: [PATCH drm-next v2 04/16] maple_tree: add flag MT_FLAGS_LOCK_NONE
* Danilo Krummrich [230217 08:44]: > Generic components making use of the maple tree (such as the > DRM GPUVA Manager) delegate the responsibility of ensuring mutual > exclusion to their users. > > While such components could inherit the concept of an external lock, > some users might just serialize the access to the component and hence to > the internal maple tree. > > In order to allow such use cases, add a new flag MT_FLAGS_LOCK_NONE to > indicate not to do any internal lockdep checks. > > Signed-off-by: Danilo Krummrich > --- > include/linux/maple_tree.h | 20 +++- > lib/maple_tree.c | 7 --- > 2 files changed, 19 insertions(+), 8 deletions(-) > > diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h > index ca04c900e51a..f795e5def8d0 100644 > --- a/include/linux/maple_tree.h > +++ b/include/linux/maple_tree.h > @@ -170,10 +170,11 @@ enum maple_type { > #define MT_FLAGS_USE_RCU 0x02 > #define MT_FLAGS_HEIGHT_OFFSET 0x02 > #define MT_FLAGS_HEIGHT_MASK 0x7C > -#define MT_FLAGS_LOCK_MASK 0x300 > +#define MT_FLAGS_LOCK_MASK 0x700 > #define MT_FLAGS_LOCK_IRQ0x100 > #define MT_FLAGS_LOCK_BH 0x200 > #define MT_FLAGS_LOCK_EXTERN 0x300 > +#define MT_FLAGS_LOCK_NONE 0x400 Please add this to the documentation above the flags as well. We should probably add enough context so that users don't just set this and then use multiple writers. > > #define MAPLE_HEIGHT_MAX 31 > > @@ -559,11 +560,16 @@ static inline void mas_set(struct ma_state *mas, > unsigned long index) > mas_set_range(mas, index, index); > } > > -static inline bool mt_external_lock(const struct maple_tree *mt) > +static inline bool mt_lock_external(const struct maple_tree *mt) > { > return (mt->ma_flags & MT_FLAGS_LOCK_MASK) == MT_FLAGS_LOCK_EXTERN; > } > > +static inline bool mt_lock_none(const struct maple_tree *mt) > +{ > + return (mt->ma_flags & MT_FLAGS_LOCK_MASK) == MT_FLAGS_LOCK_NONE; > +} > + > /** > * mt_init_flags() - Initialise an empty maple tree with flags. > * @mt: Maple Tree > @@ -577,7 +583,7 @@ static inline bool mt_external_lock(const struct > maple_tree *mt) > static inline void mt_init_flags(struct maple_tree *mt, unsigned int flags) > { > mt->ma_flags = flags; > - if (!mt_external_lock(mt)) > + if (!mt_lock_external(mt) && !mt_lock_none(mt)) > spin_lock_init(&mt->ma_lock); > rcu_assign_pointer(mt->ma_root, NULL); > } > @@ -612,9 +618,11 @@ static inline void mt_clear_in_rcu(struct maple_tree *mt) > if (!mt_in_rcu(mt)) > return; > > - if (mt_external_lock(mt)) { > + if (mt_lock_external(mt)) { > BUG_ON(!mt_lock_is_held(mt)); > mt->ma_flags &= ~MT_FLAGS_USE_RCU; > + } else if (mt_lock_none(mt)) { > + mt->ma_flags &= ~MT_FLAGS_USE_RCU; > } else { > mtree_lock(mt); > mt->ma_flags &= ~MT_FLAGS_USE_RCU; > @@ -631,9 +639,11 @@ static inline void mt_set_in_rcu(struct maple_tree *mt) > if (mt_in_rcu(mt)) > return; > > - if (mt_external_lock(mt)) { > + if (mt_lock_external(mt)) { > BUG_ON(!mt_lock_is_held(mt)); > mt->ma_flags |= MT_FLAGS_USE_RCU; > + } else if (mt_lock_none(mt)) { > + mt->ma_flags |= MT_FLAGS_USE_RCU; > } else { > mtree_lock(mt); > mt->ma_flags |= MT_FLAGS_USE_RCU; > diff --git a/lib/maple_tree.c b/lib/maple_tree.c > index 26e2045d3cda..f51c0fd4eaad 100644 > --- a/lib/maple_tree.c > +++ b/lib/maple_tree.c > @@ -802,8 +802,8 @@ static inline void __rcu **ma_slots(struct maple_node > *mn, enum maple_type mt) > > static inline bool mt_locked(const struct maple_tree *mt) > { > - return mt_external_lock(mt) ? mt_lock_is_held(mt) : > - lockdep_is_held(&mt->ma_lock); > + return mt_lock_external(mt) ? mt_lock_is_held(mt) : > + mt_lock_none(mt) ? true : lockdep_is_held(&mt->ma_lock); It might be better to just make this two return statements for clarity. > } > > static inline void *mt_slot(const struct maple_tree *mt, > @@ -6120,7 +6120,8 @@ bool mas_nomem(struct ma_state *mas, gfp_t gfp) > return false; > } > > - if (gfpflags_allow_blocking(gfp) && !mt_external_lock(mas->tree)) { > + if (gfpflags_allow_blocking(gfp) && > + !mt_lock_external(mas->tree) && !mt_lock_none(mas->tree)) { > mtree_unlock(mas->tree); > mas_alloc_nodes(mas, gfp); > mtree_lock(mas->tree); > -- > 2.39.1 >
[PATCH v2 0/3] Resolve warnings from AMDGPU
Hi, This series resolve some of the warnings that appear when compiling AMDGPU with W=1. Each patch is focused in a specific warning. This is my First Patch for the GSoC Project Idea about increasing code coverage of the DRM code[1]. Thanks for reviewing! Best regards, Arthur Grillo [1]: https://www.x.org/wiki/DRMcoverage2023/#firstpatch --- v1 -> v2: https://lore.kernel.org/all/20230213204923.111948-1-arthurgri...@riseup.net/ - Use dm_odm_combine_mode_disabled dm_odm_combine_mode_2to1 instead of an enum casting - Maintain register read --- Arthur Grillo (3): drm/amd/display: Fix implicit enum conversion drm/amd/display: Remove unused local variables drm/amd/display: Remove unused local variables and function .../amd/display/dc/dcn10/dcn10_link_encoder.c | 3 +- .../drm/amd/display/dc/dcn201/dcn201_dpp.c| 7 .../drm/amd/display/dc/dcn201/dcn201_hwseq.c | 2 - .../gpu/drm/amd/display/dc/dcn30/dcn30_afmt.c | 2 - .../gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c | 4 -- .../drm/amd/display/dc/dcn30/dcn30_hwseq.c| 3 -- .../gpu/drm/amd/display/dc/dcn31/dcn31_apg.c | 41 --- .../drm/amd/display/dc/dcn32/dcn32_resource.c | 5 +-- .../display/dc/dcn32/dcn32_resource_helpers.c | 4 -- .../dc/dml/dcn20/display_mode_vba_20.c| 9 ++-- .../dc/dml/dcn20/display_mode_vba_20v2.c | 11 ++--- .../dc/dml/dcn21/display_mode_vba_21.c| 12 +++--- .../dc/dml/dcn31/display_rq_dlg_calc_31.c | 2 - .../dc/link/protocols/link_dp_capability.c| 4 -- 14 files changed, 19 insertions(+), 90 deletions(-) -- 2.39.2
Re: [PATCH v2 2/8] accel/qaic: Add uapi and core driver file
On 2/16/2023 7:13 AM, Jacek Lawrynowicz wrote: Hi, On 06.02.2023 16:41, Jeffrey Hugo wrote: Add the QAIC driver uapi file and core driver file that binds to the PCIe device. The core driver file also creates the accel device and manages all the interconnections between the different parts of the driver. The driver can be built as a module. If so, it will be called "qaic.ko". Signed-off-by: Jeffrey Hugo Reviewed-by: Carl Vanderlip --- drivers/accel/qaic/qaic.h | 321 ++ drivers/accel/qaic/qaic_drv.c | 771 ++ include/uapi/drm/qaic_accel.h | 283 3 files changed, 1375 insertions(+) create mode 100644 drivers/accel/qaic/qaic.h create mode 100644 drivers/accel/qaic/qaic_drv.c create mode 100644 include/uapi/drm/qaic_accel.h diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h new file mode 100644 index 000..3f7ea76 --- /dev/null +++ b/drivers/accel/qaic/qaic.h @@ -0,0 +1,321 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * + * Copyright (c) 2019-2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef QAICINTERNAL_H_ Please use guard macro that matches the file name: _QAIC_H_ Before moving to DRM/ACCEL, this conflicted with the uapi file. However, that is no longer the case, so yes, this should be changed. Will do. +#define QAICINTERNAL_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define QAIC_DBC_BASE 0x2 +#define QAIC_DBC_SIZE 0x1000 + +#define QAIC_NO_PARTITION -1 + +#define QAIC_DBC_OFF(i)((i) * QAIC_DBC_SIZE + QAIC_DBC_BASE) + +#define to_qaic_bo(obj) container_of(obj, struct qaic_bo, base) + +extern bool poll_datapath; + +struct qaic_user { + /* Uniquely identifies this user for the device */ + int handle; + struct kref ref_count; + /* Char device opened by this user */ + struct qaic_drm_device *qddev; + /* Node in list of users that opened this drm device */ + struct list_headnode; + /* SRCU used to synchronize this user during cleanup */ + struct srcu_struct qddev_lock; + atomic_tchunk_id; +}; + +struct dma_bridge_chan { + /* Pointer to device strcut maintained by driver */ + struct qaic_device *qdev; + /* ID of this DMA bridge channel(DBC) */ + unsigned intid; + /* Synchronizes access to xfer_list */ + spinlock_t xfer_lock; + /* Base address of request queue */ + void*req_q_base; + /* Base address of response queue */ + void*rsp_q_base; + /* +* Base bus address of request queue. Response queue bus address can be +* calculated by adding request queue size to this variable +*/ + dma_addr_t dma_addr; + /* Total size of request and response queue in byte */ + u32 total_size; + /* Capacity of request/response queue */ + u32 nelem; + /* The user that opened this DBC */ + struct qaic_user*usr; + /* +* Request ID of next memory handle that goes in request queue. One +* memory handle can enqueue more than one request elements, all +* this requests that belong to same memory handle have same request ID +*/ + u16 next_req_id; + /* TRUE: DBC is in use; FALSE: DBC not in use */ Use standard "true"/"false" instead of custom "TRUE"/"FALSE" macros. This applies here and in multiple other places in the driver. I think you are getting at that the documentation could be confusing. I don't appear to see custom macro use in the code. Will try to clarify that here. + boolin_use; + /* +* Base address of device registers. Used to read/write request and +* response queue's head and tail pointer of this DBC. +*/ + void __iomem*dbc_base; + /* Head of list where each node is a memory handle queued in request queue */ + struct list_headxfer_list; + /* Synchronizes DBC readers during cleanup */ + struct srcu_struct ch_lock; + /* +* When this DBC is released, any thread waiting on this wait queue is +* woken up +*/ + wait_queue_head_t dbc_release; + /* Head of list where each node is a bo associated with this DBC */ + struct list_headbo_lists; + /* The irq line for this DBC. Used for polling */ + unsigned intirq; + /* Polling work item to simulate interrupts */ + struct work_struct poll_work; +}; + +struct qaic_device { +
[PATCH v2 3/3] drm/amd/display: Remove unused local variables and function
Remove a couple of local variables that are only set but never used, also remove an static utility function that is never used in consequence of the variable removal. This decrease the number of -Wunused-but-set-variable warnings. Signed-off-by: Arthur Grillo --- .../gpu/drm/amd/display/dc/dcn31/dcn31_apg.c | 41 --- 1 file changed, 41 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.c index 24e9ff65434d..05aac3e444b4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.c @@ -72,40 +72,6 @@ static void apg31_disable( REG_UPDATE(APG_CONTROL2, APG_ENABLE, 0); } -static union audio_cea_channels speakers_to_channels( - struct audio_speaker_flags speaker_flags) -{ - union audio_cea_channels cea_channels = {0}; - - /* these are one to one */ - cea_channels.channels.FL = speaker_flags.FL_FR; - cea_channels.channels.FR = speaker_flags.FL_FR; - cea_channels.channels.LFE = speaker_flags.LFE; - cea_channels.channels.FC = speaker_flags.FC; - - /* if Rear Left and Right exist move RC speaker to channel 7 -* otherwise to channel 5 -*/ - if (speaker_flags.RL_RR) { - cea_channels.channels.RL_RC = speaker_flags.RL_RR; - cea_channels.channels.RR = speaker_flags.RL_RR; - cea_channels.channels.RC_RLC_FLC = speaker_flags.RC; - } else { - cea_channels.channels.RL_RC = speaker_flags.RC; - } - - /* FRONT Left Right Center and REAR Left Right Center are exclusive */ - if (speaker_flags.FLC_FRC) { - cea_channels.channels.RC_RLC_FLC = speaker_flags.FLC_FRC; - cea_channels.channels.RRC_FRC = speaker_flags.FLC_FRC; - } else { - cea_channels.channels.RC_RLC_FLC = speaker_flags.RLC_RRC; - cea_channels.channels.RRC_FRC = speaker_flags.RLC_RRC; - } - - return cea_channels; -} - static void apg31_se_audio_setup( struct apg *apg, unsigned int az_inst, @@ -113,24 +79,17 @@ static void apg31_se_audio_setup( { struct dcn31_apg *apg31 = DCN31_APG_FROM_APG(apg); - uint32_t speakers = 0; - uint32_t channels = 0; - ASSERT(audio_info); /* This should not happen.it does so we don't get BSOD*/ if (audio_info == NULL) return; - speakers = audio_info->flags.info.ALLSPEAKERS; - channels = speakers_to_channels(audio_info->flags.speaker_flags).all; - /* DisplayPort only allows for one audio stream with stream ID 0 */ REG_UPDATE(APG_CONTROL2, APG_DP_AUDIO_STREAM_ID, 0); /* When running in "pair mode", pairs of audio channels have their own enable * this is for really old audio drivers */ REG_UPDATE(APG_DBG_GEN_CONTROL, APG_DBG_AUDIO_CHANNEL_ENABLE, 0xFF); - // REG_UPDATE(APG_DBG_GEN_CONTROL, APG_DBG_AUDIO_CHANNEL_ENABLE, channels); /* Disable forced mem power off */ REG_UPDATE(APG_MEM_PWR, APG_MEM_PWR_FORCE, 0); -- 2.39.2
[PATCH v2 2/3] drm/amd/display: Remove unused local variables
Remove local variables that were just set but were never used. This decrease the number of -Wunused-but-set-variable warnings. Signed-off-by: Arthur Grillo --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c | 3 +-- drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dpp.c | 7 --- drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hwseq.c | 2 -- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_afmt.c | 2 -- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c | 4 drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c | 3 --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 5 + .../gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c | 4 .../drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c | 2 -- .../drm/amd/display/dc/link/protocols/link_dp_capability.c | 4 10 files changed, 2 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c index c4287147b853..ee08b545aaea 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c @@ -1219,7 +1219,6 @@ void dcn10_link_encoder_update_mst_stream_allocation_table( const struct link_mst_stream_allocation_table *table) { struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); - uint32_t value0 = 0; uint32_t value1 = 0; uint32_t value2 = 0; uint32_t slots = 0; @@ -1321,7 +1320,7 @@ void dcn10_link_encoder_update_mst_stream_allocation_table( do { udelay(10); - value0 = REG_READ(DP_MSE_SAT_UPDATE); + REG_READ(DP_MSE_SAT_UPDATE); REG_GET(DP_MSE_SAT_UPDATE, DP_MSE_SAT_UPDATE, &value1); diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dpp.c index f50ab961bc17..a7268027a472 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dpp.c @@ -185,13 +185,6 @@ static bool dpp201_get_optimal_number_of_taps( struct scaler_data *scl_data, const struct scaling_taps *in_taps) { - uint32_t pixel_width; - - if (scl_data->viewport.width > scl_data->recout.width) - pixel_width = scl_data->recout.width; - else - pixel_width = scl_data->viewport.width; - if (scl_data->viewport.width != scl_data->h_active && scl_data->viewport.height != scl_data->v_active && dpp->caps->dscl_data_proc_format == DSCL_DATA_PRCESSING_FIXED_FORMAT && diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hwseq.c index 61bcfa03c4e7..1aeb04fbd89d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hwseq.c @@ -541,8 +541,6 @@ void dcn201_pipe_control_lock( bool lock) { struct dce_hwseq *hws = dc->hwseq; - struct hubp *hubp = NULL; - hubp = dc->res_pool->hubps[pipe->pipe_idx]; /* use TG master update lock to lock everything on the TG * therefore only top pipe need to lock */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_afmt.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_afmt.c index 95528e5ef89e..55e388c4c98b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_afmt.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_afmt.c @@ -123,7 +123,6 @@ void afmt3_se_audio_setup( { struct dcn30_afmt *afmt3 = DCN30_AFMT_FROM_AFMT(afmt); - uint32_t speakers = 0; uint32_t channels = 0; ASSERT(audio_info); @@ -131,7 +130,6 @@ void afmt3_se_audio_setup( if (audio_info == NULL) return; - speakers = audio_info->flags.info.ALLSPEAKERS; channels = speakers_to_channels(audio_info->flags.speaker_flags).all; /* setup the audio stream source select (audio -> dig mapping) */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c index dc3e8df706b3..e46bbe7ddcc9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c @@ -47,13 +47,9 @@ void hubp3_set_vm_system_aperture_settings(struct hubp *hubp, { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); - PHYSICAL_ADDRESS_LOC mc_vm_apt_default; PHYSICAL_ADDRESS_LOC mc_vm_apt_low; PHYSICAL_ADDRESS_LOC mc_vm_apt_high; - // The format of default addr is 48:12 of the 48 bit addr - mc_vm_apt_default.quad_part = apt->sys_default.quad_part >> 12; - // The format of high/low are 48:18 of the 48 bit addr mc_vm_apt_low.quad_part = apt->sys_low.quad_part >> 18; mc_vm_apt_high.quad_part = apt->sys_
[PATCH v2 1/3] drm/amd/display: Fix implicit enum conversion
Make implicit enum conversion to avoid -Wenum-conversion warning, such as: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn21/display_mode_vba_21.c:4109:88: warning: implicit conversion from ‘enum ’ to ‘enum odm_combine_mode’ [-Wenum-conversion] 4109 | locals->ODMCombineEnablePerState[i][k] = true; | ^ Signed-off-by: Arthur Grillo --- .../amd/display/dc/dml/dcn20/display_mode_vba_20.c | 9 + .../amd/display/dc/dml/dcn20/display_mode_vba_20v2.c | 11 ++- .../amd/display/dc/dml/dcn21/display_mode_vba_21.c | 12 ++-- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c index d3b5b6fedf04..1b47249f01d8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c @@ -26,6 +26,7 @@ #include "../display_mode_lib.h" #include "display_mode_vba_20.h" #include "../dml_inline_defs.h" +#include "dml/display_mode_enums.h" /* * NOTE: @@ -3897,14 +3898,14 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2 * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); - locals->ODMCombineEnablePerState[i][k] = false; + locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine; if (mode_lib->vba.ODMCapability) { if (locals->PlaneRequiredDISPCLKWithoutODMCombine > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity) { - locals->ODMCombineEnablePerState[i][k] = true; + locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine; } else if (locals->HActive[k] > DCN20_MAX_420_IMAGE_WIDTH && locals->OutputFormat[k] == dm_420) { - locals->ODMCombineEnablePerState[i][k] = true; + locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine; } } @@ -3957,7 +3958,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l locals->RequiredDISPCLK[i][j] = 0.0; locals->DISPCLK_DPPCLK_Support[i][j] = true; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - locals->ODMCombineEnablePerState[i][k] = false; + locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; if (locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]) { locals->NoOfDPP[i][j][k] = 1; locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k] diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c index edd098c7eb92..4781bf82eec6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c @@ -26,6 +26,7 @@ #include "../display_mode_lib.h" #include "display_mode_vba_20v2.h" #include "../dml_inline_defs.h" +#include "dml/display_mode_enums.h" /* * NOTE: @@ -4008,17 +4009,17 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2 * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); - locals->ODMCombineEnablePerState[i][k] = false; + locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; mode_lib->vba.PlaneRequiredDISPCLK =
Re: [PATCH v3 0/2] Don't use stolen memory or BAR mappings for ring buffers
On 2/17/2023 00:39, Hogander, Jouni wrote: On Wed, 2023-02-15 at 17:10 -0800, john.c.harri...@intel.com wrote: From: John Harrison Instruction from hardware arch is that stolen memory and BAR mappings are unsafe for use as ring buffers. There can be issues with cache aliasing due to the CPU access going to memory via the BAR. So, don't do it. Tested these patches for GPU Hang I was debugging. Seem to fix that one as well: Tested-by: Jouni Högander Sweet! Out of interest, which platform was that? And how reproducible was it? It would be interesting to know if an IGT was actually regularly showing the issue and we had just been ignoring it! John. v2: Dont use BAR mappings either. Make conditional on LLC so as not to change platforms that don't need to change (Daniele). Add 'Fixes' tags (Tvrtko). v3: Fix dumb typo. Signed-off-by: John Harrison John Harrison (2): drm/i915: Don't use stolen memory for ring buffers with LLC drm/i915: Don't use BAR mappings for ring buffers with LLC drivers/gpu/drm/i915/gt/intel_ring.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
Re: [Intel-gfx] [RFC v2 0/5] Waitboost drm syncobj waits
On Fri, Feb 17, 2023 at 8:03 AM Tvrtko Ursulin wrote: > > > On 17/02/2023 14:55, Rob Clark wrote: > > On Fri, Feb 17, 2023 at 4:56 AM Tvrtko Ursulin > > wrote: > >> > >> > >> On 16/02/2023 18:19, Rodrigo Vivi wrote: > >>> On Tue, Feb 14, 2023 at 11:14:00AM -0800, Rob Clark wrote: > On Fri, Feb 10, 2023 at 5:07 AM Tvrtko Ursulin > wrote: > > > > From: Tvrtko Ursulin > > > > In i915 we have this concept of "wait boosting" where we give a > > priority boost > > for instance to fences which are actively waited upon from userspace. > > This has > > it's pros and cons and can certainly be discussed at lenght. However > > fact is > > some workloads really like it. > > > > Problem is that with the arrival of drm syncobj and a new userspace > > waiting > > entry point it added, the waitboost mechanism was bypassed. Hence I > > cooked up > > this mini series really (really) quickly to see if some discussion can > > be had. > > > > It adds a concept of "wait count" to dma fence, which is incremented > > for every > > explicit dma_fence_enable_sw_signaling and dma_fence_add_wait_callback > > (like > > dma_fence_add_callback but from explicit/userspace wait paths). > > I was thinking about a similar thing, but in the context of dma_fence > (or rather sync_file) fd poll()ing. How does the kernel differentiate > between "housekeeping" poll()ers that don't want to trigger boost but > simply know when to do cleanup, and waiters who are waiting with some > urgency. I think we could use EPOLLPRI for this purpose. > > Not sure how that translates to waits via the syncobj. But I think we > want to let userspace give some hint about urgent vs housekeeping > waits. > >>> > >>> Should the hint be on the waits, or should the hints be on the executed > >>> context? > >>> > >>> In the end we need some way to quickly ramp-up the frequency to avoid > >>> the execution bubbles. > >>> > >>> waitboost is trying to guess that, but in some cases it guess wrong > >>> and waste power. > >> > >> Do we have a list of workloads which shows who benefits and who loses > >> from the current implementation of waitboost? > >>> btw, this is something that other drivers might need: > >>> > >>> https://gitlab.freedesktop.org/drm/amd/-/issues/1500#note_825883 > >>> Cc: Alex Deucher > >> > >> I have several issues with the context hint if it would directly > >> influence frequency selection in the "more power" direction. > >> > >> First of all, assume a context hint would replace the waitboost. Which > >> applications would need to set it to restore the lost performance and > >> how would they set it? > >> > >> Then I don't even think userspace necessarily knows. Think of a layer > >> like OpenCL. It doesn't really know in advance the profile of > >> submissions vs waits. It depends on the CPU vs GPU speed, so hardware > >> generation, and the actual size of the workload which can be influenced > >> by the application (or user) and not the library. > >> > >> The approach also lends itself well for the "arms race" where every > >> application can say "Me me me, I am the most important workload there is!". > > > > since there is discussion happening in two places: > > > > https://gitlab.freedesktop.org/drm/intel/-/issues/8014#note_1777433 > > > > What I think you might want is a ctx boost_mask which lets an app or > > driver disable certain boost signals/classes. Where fence waits is > > one class of boost, but hypothetical other signals like touchscreen > > (or other) input events could be another class of boost. A compute > > workload might be interested in fence wait boosts but could care less > > about input events. > > I think it can only be apps which could have any chance knowing whether > their use of a library is latency sensitive or not. Which means new > library extensions and their adoption. So I have some strong reservation > that route is feasible. > > Or we tie with priority which many drivers do. Normal and above gets the > boosting and what lowered itself does not (aka SCHED_IDLE/SCHED_BATCH). yeah, that sounds reasonable. > Related note is that we lack any external control of our scheduling > decisions so we really do suck compared to other scheduling domains like > CPU and IO etc. > > >> The last concern is for me shared with the proposal to expose deadlines > >> or high priority waits as explicit uapi knobs. Both come under the "what > >> application told us it will do" category vs what it actually does. So I > >> think it is slightly weaker than basing decisions of waits. > >> > >> The current waitboost is a bit detached from that problem because when > >> we waitboost for flips we _know_ it is an actual framebuffer in the flip > >> chain. When we waitboost for waits we also know someone is waiting. We > >> are not trusting userspace telling us this will be a buffer
Re: [PATCH v29 4/7] drm/mediatek: add dma dev get function
On Tue, 27 Dec 2022 09:10, "" wrote: >This is a preparation for adding support for the ovl_adaptor sub driver >Ovl_adaptor is a DRM sub driver, which doesn't have dma dev. Add >dma_dev_get function for getting representative dma dev in ovl_adaptor. > >Signed-off-by: Nancy.Lin >Reviewed-by: AngeloGioachino Del Regno > >Reviewed-by: CK Hu >Tested-by: AngeloGioacchino Del Regno >Tested-by: Bo-Chen Chen >--- > drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 15 +++ > drivers/gpu/drm/mediatek/mtk_drm_crtc.h | 1 + > drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h | 8 > 3 files changed, 24 insertions(+) > >diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c >b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c >index 112615817dcb..78e20f604158 100644 >--- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c >+++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c >@@ -58,6 +58,7 @@ struct mtk_drm_crtc { > #endif > > struct device *mmsys_dev; >+ struct device *dma_dev; > struct mtk_mutex*mutex; > unsigned intddp_comp_nr; > struct mtk_ddp_comp **ddp_comp; >@@ -865,6 +866,13 @@ static int mtk_drm_crtc_init_comp_planes(struct >drm_device *drm_dev, > return 0; > } > >+struct device *mtk_drm_crtc_dma_dev_get(struct drm_crtc *crtc) >+{ >+ struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); >+ >+ return mtk_crtc->dma_dev; >+} While testing out the HDMI patchset for i1200, I've ended up with a panic here with crtc being NULL. I've fixed the issue on my side by testing crtc prior doing anything in that function. Not sure this is the proper fix. HTH, Guillaume. >+ > int mtk_drm_crtc_create(struct drm_device *drm_dev, > const enum mtk_ddp_comp_id *path, unsigned int path_len) > { >@@ -953,6 +961,13 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, > return ret; > } > >+ /* >+ * Default to use the first component as the dma dev. >+ * In the case of ovl_adaptor sub driver, it needs to use the >+ * dma_dev_get function to get representative dma dev. >+ */ >+ mtk_crtc->dma_dev = mtk_ddp_comp_dma_dev_get(&priv->ddp_comp[path[0]]); >+ > ret = mtk_drm_crtc_init(drm_dev, mtk_crtc, pipe); > if (ret < 0) > return ret; >diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.h >b/drivers/gpu/drm/mediatek/mtk_drm_crtc.h >index cb9a36c48d4f..f5a6e80c5265 100644 >--- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.h >+++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.h >@@ -22,5 +22,6 @@ int mtk_drm_crtc_plane_check(struct drm_crtc *crtc, struct >drm_plane *plane, >struct mtk_plane_state *state); > void mtk_drm_crtc_async_update(struct drm_crtc *crtc, struct drm_plane *plane, > struct drm_atomic_state *plane_state); >+struct device *mtk_drm_crtc_dma_dev_get(struct drm_crtc *crtc); > > #endif /* MTK_DRM_CRTC_H */ >diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h >b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h >index 2d0052c23dcb..364f3f7f59fa 100644 >--- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h >+++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h >@@ -71,6 +71,7 @@ struct mtk_ddp_comp_funcs { > void (*bgclr_in_off)(struct device *dev); > void (*ctm_set)(struct device *dev, > struct drm_crtc_state *state); >+ struct device * (*dma_dev_get)(struct device *dev); > }; > > struct mtk_ddp_comp { >@@ -203,6 +204,13 @@ static inline void mtk_ddp_ctm_set(struct mtk_ddp_comp >*comp, > comp->funcs->ctm_set(comp->dev, state); > } > >+static inline struct device *mtk_ddp_comp_dma_dev_get(struct mtk_ddp_comp >*comp) >+{ >+ if (comp->funcs && comp->funcs->dma_dev_get) >+ return comp->funcs->dma_dev_get(comp->dev); >+ return comp->dev; >+} >+ > int mtk_ddp_comp_get_id(struct device_node *node, > enum mtk_ddp_comp_type comp_type); > unsigned int mtk_drm_find_possible_crtc_by_comp(struct drm_device *drm, >-- >2.18.0 > >
Re: [PATCH v29 3/7] drm/mediatek: add ovl_adaptor support for MT8195
On Tue, 27 Dec 2022 09:10, "" wrote: Hi Nancy. I've been using your patches lately to test out the HDMI series on mt8195 and I have hit a scheduling bug. >Add ovl_adaptor driver for MT8195. >Ovl_adaptor is an encapsulated module and designed for simplified >DRM control flow. This module is composed of 8 RDMAs, 4 MERGEs and >an ETHDR. Two RDMAs merge into one layer, so this module support 4 >layers. > >Signed-off-by: Nancy.Lin >Reviewed-by: Chun-Kuang Hu >Reviewed-by: AngeloGioacchino Del Regno > >Tested-by: AngeloGioacchino Del Regno >Tested-by: Bo-Chen Chen >--- > drivers/gpu/drm/mediatek/Makefile | 1 + > drivers/gpu/drm/mediatek/mtk_disp_drv.h | 26 + > .../gpu/drm/mediatek/mtk_disp_ovl_adaptor.c | 533 ++ > drivers/gpu/drm/mediatek/mtk_drm_drv.h| 1 + > 4 files changed, 561 insertions(+) > create mode 100644 drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c > >diff --git a/drivers/gpu/drm/mediatek/Makefile >b/drivers/gpu/drm/mediatek/Makefile >index 840f14436d3c..d4d193f60271 100644 >--- a/drivers/gpu/drm/mediatek/Makefile >+++ b/drivers/gpu/drm/mediatek/Makefile >@@ -6,6 +6,7 @@ mediatek-drm-y := mtk_disp_aal.o \ > mtk_disp_gamma.o \ > mtk_disp_merge.o \ > mtk_disp_ovl.o \ >+mtk_disp_ovl_adaptor.o \ > mtk_disp_rdma.o \ > mtk_drm_crtc.o \ > mtk_drm_ddp_comp.o \ >diff --git a/drivers/gpu/drm/mediatek/mtk_disp_drv.h >b/drivers/gpu/drm/mediatek/mtk_disp_drv.h >index 33e61a136bbc..654f8e257984 100644 >--- a/drivers/gpu/drm/mediatek/mtk_disp_drv.h >+++ b/drivers/gpu/drm/mediatek/mtk_disp_drv.h >@@ -7,6 +7,8 @@ > #define _MTK_DISP_DRV_H_ > > #include >+#include >+#include > #include "mtk_drm_plane.h" > #include "mtk_mdp_rdma.h" > >@@ -116,6 +118,30 @@ void mtk_rdma_unregister_vblank_cb(struct device *dev); > void mtk_rdma_enable_vblank(struct device *dev); > void mtk_rdma_disable_vblank(struct device *dev); > >+void mtk_ovl_adaptor_add_comp(struct device *dev, struct mtk_mutex *mutex); >+void mtk_ovl_adaptor_remove_comp(struct device *dev, struct mtk_mutex *mutex); >+void mtk_ovl_adaptor_connect(struct device *dev, struct device *mmsys_dev, >+ unsigned int next); >+void mtk_ovl_adaptor_disconnect(struct device *dev, struct device *mmsys_dev, >+ unsigned int next); >+int mtk_ovl_adaptor_clk_enable(struct device *dev); >+void mtk_ovl_adaptor_clk_disable(struct device *dev); >+void mtk_ovl_adaptor_config(struct device *dev, unsigned int w, >+ unsigned int h, unsigned int vrefresh, >+ unsigned int bpc, struct cmdq_pkt *cmdq_pkt); >+void mtk_ovl_adaptor_layer_config(struct device *dev, unsigned int idx, >+struct mtk_plane_state *state, >+struct cmdq_pkt *cmdq_pkt); >+void mtk_ovl_adaptor_register_vblank_cb(struct device *dev, void >(*vblank_cb)(void *), >+ void *vblank_cb_data); >+void mtk_ovl_adaptor_unregister_vblank_cb(struct device *dev); >+void mtk_ovl_adaptor_enable_vblank(struct device *dev); >+void mtk_ovl_adaptor_disable_vblank(struct device *dev); >+void mtk_ovl_adaptor_start(struct device *dev); >+void mtk_ovl_adaptor_stop(struct device *dev); >+unsigned int mtk_ovl_adaptor_layer_nr(struct device *dev); >+struct device *mtk_ovl_adaptor_dma_dev_get(struct device *dev); >+ > int mtk_mdp_rdma_clk_enable(struct device *dev); > void mtk_mdp_rdma_clk_disable(struct device *dev); > void mtk_mdp_rdma_start(struct device *dev, struct cmdq_pkt *cmdq_pkt); >diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c >b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c >new file mode 100644 >index ..046217828ab3 >--- /dev/null >+++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c >@@ -0,0 +1,533 @@ >+// SPDX-License-Identifier: GPL-2.0-only >+/* >+ * Copyright (c) 2021 MediaTek Inc. >+ */ >+ >+#include >+#include >+#include >+#include >+#include >+#include >+#include >+#include >+#include >+#include >+#include >+#include >+ >+#include "mtk_disp_drv.h" >+#include "mtk_drm_crtc.h" >+#include "mtk_drm_ddp_comp.h" >+#include "mtk_drm_drv.h" >+#include "mtk_ethdr.h" >+ >+#define MTK_OVL_ADAPTOR_RDMA_MAX_WIDTH 1920 >+#define MTK_OVL_ADAPTOR_LAYER_NUM 4 >+ >+enum mtk_ovl_adaptor_comp_type { >+ OVL_ADAPTOR_TYPE_RDMA = 0, >+ OVL_ADAPTOR_TYPE_MERGE, >+ OVL_ADAPTOR_TYPE_ETHDR, >+ OVL_ADAPTOR_TYPE_NUM, >+}; >+ >+enum mtk_ovl_adaptor_comp_id { >+ OVL_ADAPTOR_MDP_RDMA0, >+ OVL_ADAPTOR_MDP_RDMA1, >+ OVL_ADAPTOR_MDP_RDMA2, >+ OVL_ADAPTOR_MDP_RDMA3, >+ OVL_ADAPTOR_MDP_RDMA4, >+ OVL_ADAPTOR_MDP_RDMA5, >+ OVL_ADAPTOR_MDP_RDMA6, >+ OVL_ADAPTOR_MDP_RDMA7, >+ OVL_ADAPTOR_MERGE0, >+ OVL_ADAPTOR_MERGE1, >+ OVL_ADAPTOR_MERGE2, >+ OVL_ADAPTOR_MERGE3, >+ OVL_ADAPTO
[PATCH v2 2/2] drm: document DRM_IOCTL_PRIME_HANDLE_TO_FD and PRIME_FD_TO_HANDLE
v2: mention caps, note that the IOCTLs might fail, document that user-space needs a data structure to keep track of the handles (Daniel V.) Signed-off-by: Simon Ser Cc: Daniel Vetter Cc: Pekka Paalanen Cc: Daniel Stone --- include/uapi/drm/drm.h | 30 ++ 1 file changed, 30 insertions(+) diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 292e4778a2f4..a87ca2d4 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -1025,7 +1025,37 @@ extern "C" { #define DRM_IOCTL_UNLOCK DRM_IOW( 0x2b, struct drm_lock) #define DRM_IOCTL_FINISH DRM_IOW( 0x2c, struct drm_lock) +/** + * DRM_IOCTL_PRIME_HANDLE_TO_FD - Convert a GEM handle to a DMA-BUF FD. + * + * User-space sets &drm_prime_handle.handle with the GEM handle to export and + * &drm_prime_handle.flags, and gets back a DMA-BUF file descriptor in + * &drm_prime_handle.fd. + * + * The export can fail for any driver-specific reason, e.g. because export is + * not supported for this specific GEM handle (but might be for others). + * + * Support for exporting DMA-BUFs is advertised via &DRM_PRIME_CAP_EXPORT. + */ #define DRM_IOCTL_PRIME_HANDLE_TO_FDDRM_IOWR(0x2d, struct drm_prime_handle) +/** + * DRM_IOCTL_PRIME_FD_TO_HANDLE - Convert a DMA-BUF FD to a GEM handle. + * + * User-space sets &drm_prime_handle.fd with a DMA-BUF file descriptor to + * import, and gets back a GEM handle in &drm_prime_handle.handle. + * &drm_prime_handle.flags is unused. + * + * If an existing GEM handle refers to the memory object backing the DMA-BUF, + * that GEM handle is returned. Therefore user-space which needs to handle + * arbitrary DMA-BUFs must have a user-space lookup data structure to manually + * reference-count duplicated GEM handles. For more information see + * &DRM_IOCTL_GEM_CLOSE. + * + * The import can fail for any driver-specific reason, e.g. because import is + * only supported for DMA-BUFs allocated on this DRM device. + * + * Support for importing DMA-BUFs is advertised via &DRM_PRIME_CAP_IMPORT. + */ #define DRM_IOCTL_PRIME_FD_TO_HANDLEDRM_IOWR(0x2e, struct drm_prime_handle) #define DRM_IOCTL_AGP_ACQUIRE DRM_IO( 0x30) -- 2.39.2
[PATCH v2 1/2] drm: document expectations for GETFB2 handles
There are two important details missing from the docs: - If the memory object backing the FB already has a GEM handle, it's not re-used, a new one is generated. - Aliased planes will return the same GEM handle. v2: document how user-space can obtain DMA-BUF FDs without leaking handles (Pekka) Signed-off-by: Simon Ser Cc: Daniel Vetter Cc: Pekka Paalanen Cc: Daniel Stone Acked-by: Pekka Paalanen --- include/uapi/drm/drm.h | 14 -- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index c39fefb54613..292e4778a2f4 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -1117,8 +1117,13 @@ extern "C" { * struct as the output. * * If the client is DRM master or has &CAP_SYS_ADMIN, &drm_mode_fb_cmd2.handles - * will be filled with GEM buffer handles. Planes are valid until one has a - * zero handle -- this can be used to compute the number of planes. + * will be filled with GEM buffer handles. Fresh new GEM handles are always + * returned, even if another GEM handle referring to the same memory object + * already exists on the DRM file description. The caller is responsible for + * removing the new handles, e.g. via the &DRM_IOCTL_GEM_CLOSE IOCTL. The same + * new handle will be returned for multiple planes in case they use the same + * memory object. Planes are valid until one has a zero handle -- this can be + * used to compute the number of planes. * * Otherwise, &drm_mode_fb_cmd2.handles will be zeroed and planes are valid * until one has a zero &drm_mode_fb_cmd2.pitches. @@ -1126,6 +1131,11 @@ extern "C" { * If the framebuffer has a format modifier, &DRM_MODE_FB_MODIFIERS will be set * in &drm_mode_fb_cmd2.flags and &drm_mode_fb_cmd2.modifier will contain the * modifier. Otherwise, user-space must ignore &drm_mode_fb_cmd2.modifier. + * + * To obtain DMA-BUF FDs for each plane without leaking GEM handles, user-space + * can export each handle via &DRM_IOCTL_PRIME_HANDLE_TO_FD, then immediately + * close each unique handle via &DRM_IOCTL_GEM_CLOSE, making sure to not + * double-close handles which are specified multiple times in the array. */ #define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2) -- 2.39.2
Re: [Intel-gfx] [PATCH] drm/i915/guc: avoid FIELD_PREP warning
On Fri, Feb 17, 2023, at 16:38, Andrzej Hajda wrote: > On 17.02.2023 13:46, Arnd Bergmann wrote: >> From: Arnd Bergmann >> >> With gcc-7 and earlier, there are lots of warnings like >> >> In file included from :0:0: >> In function '__guc_context_policy_add_priority.isra.66', >> inlined from '__guc_context_set_prio.isra.67' at >> drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:3292:3, >> inlined from 'guc_context_set_prio' at >> drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:3320:2: >> include/linux/compiler_types.h:399:38: error: call to >> '__compiletime_assert_631' declared with attribute error: FIELD_PREP: mask >> is not constant >>_compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) >>^ >> ... >> drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:2422:3: note: in expansion >> of macro 'FIELD_PREP' >> FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ >> ^~ >> >> Make sure that GUC_KLV_0_KEY is an unsigned value to avoid the warning. > > Does it mean __builtin_constant_p in gcc7 returns 0 on signed constants? > I guess there should be more similar errors. No, it's not as simple as that, I'm not really sure what the underlying problem is with the compiler, and this is the only file that triggered this particular warning. There are other cases where old compilers had the reverse problem, where they sometimes report a variable to be __builtin_constant_p()==true if there is a branch that assigns a constant to it. I think here it happens because GUC_KLV_0_KEY and GUC_KLV_n_VALUE are negative. Arnd
Re: [PATCH v2 0/8] QAIC accel driver
On 2/8/2023 3:01 PM, Jeffrey Hugo wrote: On 2/6/2023 8:41 AM, Jeffrey Hugo wrote: Regarding the open userspace (see the documentation patch), the UMD and compiler are a week or so away from being posted in the indicated repos. Just need to polish some documentation. An update to this, the compiler is now live on github at the link specified in the documentation patch. The UMD is now posted. -Jeff
Re: [PATCH] drm/i915/guc: avoid FIELD_PREP warning
On Fri, Feb 17, 2023 at 01:46:50PM +0100, Arnd Bergmann wrote: > From: Arnd Bergmann > > With gcc-7 and earlier, there are lots of warnings like > > In file included from :0:0: > In function '__guc_context_policy_add_priority.isra.66', > inlined from '__guc_context_set_prio.isra.67' at > drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:3292:3, > inlined from 'guc_context_set_prio' at > drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:3320:2: > include/linux/compiler_types.h:399:38: error: call to > '__compiletime_assert_631' declared with attribute error: FIELD_PREP: mask is > not constant > _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) > ^ > ... > drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:2422:3: note: in expansion > of macro 'FIELD_PREP' >FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ >^~ > > Make sure that GUC_KLV_0_KEY is an unsigned value to avoid the warning. > > Fixes: 77b6f79df66e ("drm/i915/guc: Update to GuC version 69.0.3") > Signed-off-by: Arnd Bergmann > --- > drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h | 6 +++--- > 1 file changed, 3 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h > b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h > index 58012edd4eb0..4f4f53c42a9c 100644 > --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h > +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h > @@ -29,9 +29,9 @@ > */ > > #define GUC_KLV_LEN_MIN 1u > -#define GUC_KLV_0_KEY(0x << 16) > -#define GUC_KLV_0_LEN(0x << 0) > -#define GUC_KLV_n_VALUE (0x << 0) > +#define GUC_KLV_0_KEY(0xu << 16) > +#define GUC_KLV_0_LEN(0xu << 0) > +#define GUC_KLV_n_VALUE (0xu << 0) what about changing them to GENMASK? > > /** > * DOC: GuC Self Config KLVs > -- > 2.39.1 >
Re: [Intel-gfx] [RFC v2 0/5] Waitboost drm syncobj waits
On 17/02/2023 14:55, Rob Clark wrote: On Fri, Feb 17, 2023 at 4:56 AM Tvrtko Ursulin wrote: On 16/02/2023 18:19, Rodrigo Vivi wrote: On Tue, Feb 14, 2023 at 11:14:00AM -0800, Rob Clark wrote: On Fri, Feb 10, 2023 at 5:07 AM Tvrtko Ursulin wrote: From: Tvrtko Ursulin In i915 we have this concept of "wait boosting" where we give a priority boost for instance to fences which are actively waited upon from userspace. This has it's pros and cons and can certainly be discussed at lenght. However fact is some workloads really like it. Problem is that with the arrival of drm syncobj and a new userspace waiting entry point it added, the waitboost mechanism was bypassed. Hence I cooked up this mini series really (really) quickly to see if some discussion can be had. It adds a concept of "wait count" to dma fence, which is incremented for every explicit dma_fence_enable_sw_signaling and dma_fence_add_wait_callback (like dma_fence_add_callback but from explicit/userspace wait paths). I was thinking about a similar thing, but in the context of dma_fence (or rather sync_file) fd poll()ing. How does the kernel differentiate between "housekeeping" poll()ers that don't want to trigger boost but simply know when to do cleanup, and waiters who are waiting with some urgency. I think we could use EPOLLPRI for this purpose. Not sure how that translates to waits via the syncobj. But I think we want to let userspace give some hint about urgent vs housekeeping waits. Should the hint be on the waits, or should the hints be on the executed context? In the end we need some way to quickly ramp-up the frequency to avoid the execution bubbles. waitboost is trying to guess that, but in some cases it guess wrong and waste power. Do we have a list of workloads which shows who benefits and who loses from the current implementation of waitboost? btw, this is something that other drivers might need: https://gitlab.freedesktop.org/drm/amd/-/issues/1500#note_825883 Cc: Alex Deucher I have several issues with the context hint if it would directly influence frequency selection in the "more power" direction. First of all, assume a context hint would replace the waitboost. Which applications would need to set it to restore the lost performance and how would they set it? Then I don't even think userspace necessarily knows. Think of a layer like OpenCL. It doesn't really know in advance the profile of submissions vs waits. It depends on the CPU vs GPU speed, so hardware generation, and the actual size of the workload which can be influenced by the application (or user) and not the library. The approach also lends itself well for the "arms race" where every application can say "Me me me, I am the most important workload there is!". since there is discussion happening in two places: https://gitlab.freedesktop.org/drm/intel/-/issues/8014#note_1777433 What I think you might want is a ctx boost_mask which lets an app or driver disable certain boost signals/classes. Where fence waits is one class of boost, but hypothetical other signals like touchscreen (or other) input events could be another class of boost. A compute workload might be interested in fence wait boosts but could care less about input events. I think it can only be apps which could have any chance knowing whether their use of a library is latency sensitive or not. Which means new library extensions and their adoption. So I have some strong reservation that route is feasible. Or we tie with priority which many drivers do. Normal and above gets the boosting and what lowered itself does not (aka SCHED_IDLE/SCHED_BATCH). Related note is that we lack any external control of our scheduling decisions so we really do suck compared to other scheduling domains like CPU and IO etc. The last concern is for me shared with the proposal to expose deadlines or high priority waits as explicit uapi knobs. Both come under the "what application told us it will do" category vs what it actually does. So I think it is slightly weaker than basing decisions of waits. The current waitboost is a bit detached from that problem because when we waitboost for flips we _know_ it is an actual framebuffer in the flip chain. When we waitboost for waits we also know someone is waiting. We are not trusting userspace telling us this will be a buffer in the flip chain or that this is a context which will have a certain duty-cycle. But yes, even if the input is truthful, latter is still only a heuristics because nothing says all waits are important. AFAIU it just happened to work well in the past. I do understand I am effectively arguing for more heuristics, which may sound a bit against the common wisdom. This is because in general I think the logic to do the right thing, be it in the driver or in the firmware, can work best if it has a holistic view. Simply put it needs to have more inputs to the decisions it is making. That is what my series is proposing - adding a co
Re: [PATCH drm-next v2 01/16] drm: execution context for GEM buffers
Am 17.02.23 um 14:44 schrieb Danilo Krummrich: From: Christian König This adds the infrastructure for an execution context for GEM buffers which is similar to the existinc TTMs execbuf util and intended to replace it in the long term. The basic functionality is that we abstracts the necessary loop to lock many different GEM buffers with automated deadlock and duplicate handling. v2: drop xarray and use dynamic resized array instead, the locking overhead is unecessary and measureable. Question for Danilo and probably others: Does Nouveau make use of the duplicate tracking at some point? Background is that I only have two or three use cases for this in radeon/amdgpu and would like to make it an optional feature. Going to take a look at the rest of this series next week. Regards, Christian. Signed-off-by: Christian König --- Documentation/gpu/drm-mm.rst | 12 ++ drivers/gpu/drm/Kconfig| 6 + drivers/gpu/drm/Makefile | 2 + drivers/gpu/drm/amd/amdgpu/Kconfig | 1 + drivers/gpu/drm/drm_exec.c | 295 + include/drm/drm_exec.h | 144 ++ 6 files changed, 460 insertions(+) create mode 100644 drivers/gpu/drm/drm_exec.c create mode 100644 include/drm/drm_exec.h diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst index a79fd3549ff8..a52e6f4117d6 100644 --- a/Documentation/gpu/drm-mm.rst +++ b/Documentation/gpu/drm-mm.rst @@ -493,6 +493,18 @@ DRM Sync Objects .. kernel-doc:: drivers/gpu/drm/drm_syncobj.c :export: +DRM Execution context += + +.. kernel-doc:: drivers/gpu/drm/drm_exec.c + :doc: Overview + +.. kernel-doc:: include/drm/drm_exec.h + :internal: + +.. kernel-doc:: drivers/gpu/drm/drm_exec.c + :export: + GPU Scheduler = diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index f42d4c6a19f2..1573d658fbb5 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -200,6 +200,12 @@ config DRM_TTM GPU memory types. Will be enabled automatically if a device driver uses it. +config DRM_EXEC + tristate + depends on DRM + help + Execution context for command submissions + config DRM_BUDDY tristate depends on DRM diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index ab4460fcd63f..d40defbb0347 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -78,6 +78,8 @@ obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += drm_panel_orientation_quirks.o # # Memory-management helpers # +# +obj-$(CONFIG_DRM_EXEC) += drm_exec.o obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 5341b6b242c3..279fb3bba810 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -11,6 +11,7 @@ config DRM_AMDGPU select DRM_SCHED select DRM_TTM select DRM_TTM_HELPER + select DRM_EXEC select POWER_SUPPLY select HWMON select I2C diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c new file mode 100644 index ..ed2106c22786 --- /dev/null +++ b/drivers/gpu/drm/drm_exec.c @@ -0,0 +1,295 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ + +#include +#include +#include + +/** + * DOC: Overview + * + * This component mainly abstracts the retry loop necessary for locking + * multiple GEM objects while preparing hardware operations (e.g. command + * submissions, page table updates etc..). + * + * If a contention is detected while locking a GEM object the cleanup procedure + * unlocks all previously locked GEM objects and locks the contended one first + * before locking any further objects. + * + * After an object is locked fences slots can optionally be reserved on the + * dma_resv object inside the GEM object. + * + * A typical usage pattern should look like this:: + * + * struct drm_gem_object *obj; + * struct drm_exec exec; + * unsigned long index; + * int ret; + * + * drm_exec_init(&exec, true); + * drm_exec_while_not_all_locked(&exec) { + * ret = drm_exec_prepare_obj(&exec, boA, 1); + * drm_exec_continue_on_contention(&exec); + * if (ret) + * goto error; + * + * ret = drm_exec_lock(&exec, boB, 1); + * drm_exec_continue_on_contention(&exec); + * if (ret) + * goto error; + * } + * + * drm_exec_for_each_locked_object(&exec, index, obj) { + * dma_resv_add_fence(obj->resv, fence, DMA_RESV_USAGE_READ); + * ... + * } + * drm_exec_fini(&exec); + * + * See struct dma_exec for more details. + */ + +/* Dummy value used to initially enter the retry loop */ +#define DRM_EXEC_DUMMY (void*)~0 + +/* Initialize the drm_exec_objects container */ +static void drm_exec_object
Re: [PATCH 3/3] drm/debugfs: remove dev->debugfs_list and debugfs_mutex
Am 17.02.23 um 13:37 schrieb Jani Nikula: On Fri, 17 Feb 2023, Christian König wrote: If i915 have such structural problems then I strongly suggest to solve them inside i915 and not make common code out of that. All other things aside, that's just a completely unnecessary and unhelpful remark. Sorry, but why? We have gone through the same problems on radeon and it was massively painful, what I try here is to prevent others from using this bad design as well. And yes I think devm_ and drmm_ is a bit questionable in that regard as well. The goal is not to make it as simple as possible to write a driver, but rather as defensive as possible. In other words automatically releasing memory when an object is destroyed might be helpful, but it isn't automatically a good idea. What can easily happen for example is that you run into use after free situations on object reference decommissions, e.g. parent is freed before child for example. Regards, Christian. BR, Jani.
Re: [PATCH v3 2/2] drm/i915: Don't use BAR mappings for ring buffers with LLC
On 2/15/2023 5:11 PM, john.c.harri...@intel.com wrote: From: John Harrison Direction from hardware is that ring buffers should never be mapped via the BAR on systems with LLC. There are too many caching pitfalls due to the way BAR accesses are routed. So it is safest to just not use it. Signed-off-by: John Harrison Fixes: 9d80841ea4c9 ("drm/i915: Allow ringbuffers to be bound anywhere") Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Jani Nikula Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: intel-...@lists.freedesktop.org Cc: # v4.9+ I've double-checked the original patches to make sure the 4.9 fixes tag was correct for both (which dim confirmed), because if we backport this fix without the previous one then the driver would break. Also, the original patches were merged as part of the same series (https://patchwork.freedesktop.org/series/11278/), so we should be guaranteed that they're always there as a pair. Reviewed-by: Daniele Ceraolo Spurio Daniele --- drivers/gpu/drm/i915/gt/intel_ring.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index fb1d2595392ed..fb99143be98e7 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -53,7 +53,7 @@ int intel_ring_pin(struct intel_ring *ring, struct i915_gem_ww_ctx *ww) if (unlikely(ret)) goto err_unpin; - if (i915_vma_is_map_and_fenceable(vma)) { + if (i915_vma_is_map_and_fenceable(vma) && !HAS_LLC(vma->vm->i915)) { addr = (void __force *)i915_vma_pin_iomap(vma); } else { int type = i915_coherent_map_type(vma->vm->i915, vma->obj, false); @@ -98,7 +98,7 @@ void intel_ring_unpin(struct intel_ring *ring) return; i915_vma_unset_ggtt_write(vma); - if (i915_vma_is_map_and_fenceable(vma)) + if (i915_vma_is_map_and_fenceable(vma) && !HAS_LLC(vma->vm->i915)) i915_vma_unpin_iomap(vma); else i915_gem_object_unpin_map(vma->obj);
Re: [Intel-gfx] [PATCH v3 1/2] drm/i915: Don't use stolen memory for ring buffers with LLC
On 2/15/2023 5:11 PM, john.c.harri...@intel.com wrote: From: John Harrison Direction from hardware is that stolen memory should never be used for ring buffer allocations on platforms with LLC. There are too many caching pitfalls due to the way stolen memory accesses are routed. So it is safest to just not use it. Signed-off-by: John Harrison Fixes: c58b735fc762 ("drm/i915: Allocate rings from stolen") Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Jani Nikula Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: intel-...@lists.freedesktop.org Cc: # v4.9+ Reviewed-by: Daniele Ceraolo Spurio Daniele --- drivers/gpu/drm/i915/gt/intel_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index 15ec64d881c44..fb1d2595392ed 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -116,7 +116,7 @@ static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size) obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_VOLATILE | I915_BO_ALLOC_PM_VOLATILE); - if (IS_ERR(obj) && i915_ggtt_has_aperture(ggtt)) + if (IS_ERR(obj) && i915_ggtt_has_aperture(ggtt) && !HAS_LLC(i915)) obj = i915_gem_object_create_stolen(i915, size); if (IS_ERR(obj)) obj = i915_gem_object_create_internal(i915, size);
Re: [Intel-gfx] [PATCH] drm/i915/guc: avoid FIELD_PREP warning
On 17.02.2023 13:46, Arnd Bergmann wrote: From: Arnd Bergmann With gcc-7 and earlier, there are lots of warnings like In file included from :0:0: In function '__guc_context_policy_add_priority.isra.66', inlined from '__guc_context_set_prio.isra.67' at drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:3292:3, inlined from 'guc_context_set_prio' at drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:3320:2: include/linux/compiler_types.h:399:38: error: call to '__compiletime_assert_631' declared with attribute error: FIELD_PREP: mask is not constant _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ^ ... drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:2422:3: note: in expansion of macro 'FIELD_PREP' FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ ^~ Make sure that GUC_KLV_0_KEY is an unsigned value to avoid the warning. Does it mean __builtin_constant_p in gcc7 returns 0 on signed constants? I guess there should be more similar errors. Regards Andrzej Fixes: 77b6f79df66e ("drm/i915/guc: Update to GuC version 69.0.3") Signed-off-by: Arnd Bergmann --- drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h index 58012edd4eb0..4f4f53c42a9c 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h @@ -29,9 +29,9 @@ */ #define GUC_KLV_LEN_MIN1u -#define GUC_KLV_0_KEY (0x << 16) -#define GUC_KLV_0_LEN (0x << 0) -#define GUC_KLV_n_VALUE(0x << 0) +#define GUC_KLV_0_KEY (0xu << 16) +#define GUC_KLV_0_LEN (0xu << 0) +#define GUC_KLV_n_VALUE(0xu << 0) /** * DOC: GuC Self Config KLVs
Re: [PATCH v11 6/9] drm/bridge: anx7625: Register Type C mode switches
On Sat, Feb 04, 2023 at 09:30:37PM +0800, Pin-yen Lin wrote: [..] > --- a/drivers/gpu/drm/bridge/analogix/anx7625.c > +++ b/drivers/gpu/drm/bridge/analogix/anx7625.c [..] > +static void anx7625_set_crosspoint_switch(struct anx7625_data *ctx, > + enum typec_orientation orientation) > +{ > + if (orientation == TYPEC_ORIENTATION_NORMAL) { > + anx7625_reg_write(ctx, ctx->i2c.tcpc_client, TCPC_SWITCH_0, > + SW_SEL1_SSRX_RX1 | SW_SEL1_DPTX0_RX2); > + anx7625_reg_write(ctx, ctx->i2c.tcpc_client, TCPC_SWITCH_1, > + SW_SEL2_SSTX_TX1 | SW_SEL2_DPTX1_TX2); This seems inverted compared to the binding. Binding says 0, 1, 2, 3 in "data-lanes" maps to SSRX1, SSTX1, SSRX2, SSTX2, respectively. But in anx7625_register_typec_switches(), lanes 0-1 mean orientation normal, then in this logic, you set RX2 and TX2 to carry the DP signals. So the driver is mapping lanes 0-1 to SSRX2/SSTX2 and lanes 2-3 to SSRX1/SSTX1, the opposite from the binding. Thanks, Nícolas > + } else if (orientation == TYPEC_ORIENTATION_REVERSE) { > + anx7625_reg_write(ctx, ctx->i2c.tcpc_client, TCPC_SWITCH_0, > + SW_SEL1_SSRX_RX2 | SW_SEL1_DPTX0_RX1); > + anx7625_reg_write(ctx, ctx->i2c.tcpc_client, TCPC_SWITCH_1, > + SW_SEL2_SSTX_TX2 | SW_SEL2_DPTX1_TX1); > + } > +} > + [..] > +static int anx7625_register_typec_switches(struct device *dev, struct > anx7625_data *ctx) > +{ [..] > + ctx->port_data[i].orientation = (dp_lanes[0] / 2 == 0) ? > + TYPEC_ORIENTATION_NORMAL : TYPEC_ORIENTATION_REVERSE; [..]
Re: [PATCH 6/6] drm/tidss: Implement struct drm_plane_helper_funcs.atomic_enable
Hi Am 17.02.23 um 15:42 schrieb Tomi Valkeinen: On 09/02/2023 17:41, Thomas Zimmermann wrote: Enable the primary plane for tidss hardware via atomic_enable. Atomic helpers invoke this callback only when the plane becomes active. Signed-off-by: Thomas Zimmermann --- drivers/gpu/drm/tidss/tidss_plane.c | 11 +++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/tidss/tidss_plane.c b/drivers/gpu/drm/tidss/tidss_plane.c index 0b12405edb47..6bdd6e4a955a 100644 --- a/drivers/gpu/drm/tidss/tidss_plane.c +++ b/drivers/gpu/drm/tidss/tidss_plane.c @@ -124,6 +124,16 @@ static void tidss_plane_atomic_update(struct drm_plane *plane, hw_videoport = to_tidss_crtc(new_state->crtc)->hw_videoport; dispc_plane_setup(tidss->dispc, tplane->hw_plane_id, new_state, hw_videoport); +} + +static void tidss_plane_atomic_enable(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_device *ddev = plane->dev; + struct tidss_device *tidss = to_tidss(ddev); + struct tidss_plane *tplane = to_tidss_plane(plane); + + dev_dbg(ddev->dev, "%s\n", __func__); dispc_plane_enable(tidss->dispc, tplane->hw_plane_id, true); } @@ -151,6 +161,7 @@ static void drm_plane_destroy(struct drm_plane *plane) static const struct drm_plane_helper_funcs tidss_plane_helper_funcs = { .atomic_check = tidss_plane_atomic_check, .atomic_update = tidss_plane_atomic_update, + .atomic_enable = tidss_plane_atomic_enable, .atomic_disable = tidss_plane_atomic_disable, }; I haven't tested this, but looks fine to me. Reviewed-by: Tomi Valkeinen One thought, though, is that we still do dispc_plane_enable(false) in tidss_plane_atomic_update() when the plane is not visible. Not a problem, but it would be nice to only enable/disable the plane inside atomic_enable/disable. Or maybe in cases like this the driver should only use atomic_update, and do all the enabling and disabling there... I agree. Drivers that have complex enable/disable semantics should probably handle everything in atomic_update. Enabling/disabling is currently connected to the plane's framebuffer. As you said, it would be nice if this could be tied to visibility instead. The patch would be trivial, but some drivers might not like the change. I guess we could do an RFC patch and gather opinions. Best regards Thomas Tomi -- Thomas Zimmermann Graphics Driver Developer SUSE Software Solutions Germany GmbH Maxfeldstr. 5, 90409 Nürnberg, Germany (HRB 36809, AG Nürnberg) Geschäftsführer: Ivo Totev OpenPGP_signature Description: OpenPGP digital signature
Re: [PATCH 1/3] drm/msm/dpu: Read previously-uninitialized SSPP scaler version from hw
On Thu, 16 Feb 2023 at 23:46, Marijn Suijten wrote: > > On 2023-02-16 18:34:43, Dmitry Baryshkov wrote: > > On 16/02/2023 10:31, Marijn Suijten wrote: > > > On 2023-02-16 04:22:13, Dmitry Baryshkov wrote: > > >> On Thu, 16 Feb 2023 at 01:02, Marijn Suijten > > >> wrote: > > >>> > > >>> DPU's catalog never assigned dpu_scaler_blk::version leading to > > >>> initialization code in dpu_hw_setup_scaler3 to wander the wrong > > >>> codepaths. Instead of hardcoding the correct QSEED algorithm version, > > >>> read it back from a hardware register. > > >>> > > >>> Note that this register is only available starting with QSEED3, where > > >>> 0x1002 corresponds to QSEED3, 0x2004 to QSEED3LITE and 0x3000 to QSEED4. > > >> > > >> This is not purely accurate. 0x1003 (sdm845) also corresponds to QSEED3. > > >> I'd say instead that there are several variations of QSEED3 scalers, > > >> where starting from 0x2004 it is called QSEED3LITE and starting from > > >> 0x3000 it is called QSEED4. > > > > > > Good catch, I'll update that. > > > > > >>> Fixes: 25fdd5933e4c ("drm/msm: Add SDM845 DPU support") > > >>> Signed-off-by: Marijn Suijten > > >>> --- > > >>> drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h | 2 -- > > >>> drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c| 8 +++- > > >>> drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h| 3 +++ > > >>> 3 files changed, 10 insertions(+), 3 deletions(-) > > >>> > > >>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h > > >>> b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h > > >>> index ddab9caebb18..96ce1766f4a1 100644 > > >>> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h > > >>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h > > >>> @@ -324,11 +324,9 @@ struct dpu_src_blk { > > >>> /** > > >>>* struct dpu_scaler_blk: Scaler information > > >>>* @info: HW register and features supported by this sub-blk > > >>> - * @version: qseed block revision > > >>>*/ > > >>> struct dpu_scaler_blk { > > >>> DPU_HW_SUBBLK_INFO; > > >>> - u32 version; > > >> > > >> No. Please keep the version in the scaler subblk. It is a version of > > >> the QSEED (scaler block), not the SSPP's version. > > > > > > You are right that the new variable in the parent (SSPP) block is > > > nondescriptive and should have been named scaler_version. > > > > > > However. > > > > > > dpu_scaler_blk is only used as a const static struct in the catalog, > > > meaning we cannot (should not!) store a runtime-read register value > > > here. Instead I followed your IRC suggestion to read the register in > > > dpu_hw_sspp_init, but my original implementation called > > > dpu_hw_get_scaler3_ver in _dpu_hw_sspp_setup_scaler3 where we already > > > have access to the subblk_offset, allowing us to delete > > > _dpu_hw_sspp_get_scaler3_ver. Would you rather have that? We don't > > > need the register value anywhere else. > > > > After giving it another thought, let's follow the vendor's approach and > > store the predefined scaler_version in hw catalog (in dpu_scaler_blk, as > > it currently is). This way we can still drop all QSEED3/3LITE/4 > > crazyness, while keeping the data sane. > > You want to drop the descriptive #define's, and replace them with magic > 0x1002/0x2004/0x3000 and whatever other values we know? And nothing stops us from adding defines for 0x2004 (SCALER_VERSION_QSEED3LITE) and 0x3000 (SCALER_VERSION_QSEED4). I'm not sure regarding 0x1002: whether it is used on msm8998 and/or sdm630 too or not. What I want to remove is the duplication of the information. It was too easy to miss that vig_mask has version1, while the dpu_caps has version 2. We are going to replace dpu_caps with scaler_version, but the problem of having the duplicate still exists. I might have suggested settling on the dpu_caps.qseed_type or on the bit in dpu_sspp_cfg.features, but it seems that 0x1002 is not represented this way. Unless we define something like DPU_SSPP_SCALER_QSEED3_SDM660. > That seems > impossible to port without reading back the register value, which we've > only done for a handful of SoCs. I hope I'm misunderstanding you? Newer vendor dts files provide this value, see the "qcom,sde-qseed-scalar-version" property. For older platforms we'd have to read the register. See below > After all the vendor approach (in a random 4.14 kernel I have open now) > is to read the register value at runtime but their catalog is also > dynamic and built at runtime based on version ranges and register reads, > which sometimes is more sensible. Ours is const. In later techpacks (since 5.4) they have switched to the property in the DTS. > > > Then _dpu_hw_sspp_get_scaler3_ver() can also be dropped (or you can use > > it as a safety guard while doing dpu_hw_sspp init). > > That (safety guard) is exactly what Abhinav requested against, since the > kernel (and our catalog) should be trustworthy. I'll let you two fight > this out and come to a consensus before sending
Re: [Intel-gfx] [RFC v2 0/5] Waitboost drm syncobj waits
On Fri, Feb 17, 2023 at 4:56 AM Tvrtko Ursulin wrote: > > > On 16/02/2023 18:19, Rodrigo Vivi wrote: > > On Tue, Feb 14, 2023 at 11:14:00AM -0800, Rob Clark wrote: > >> On Fri, Feb 10, 2023 at 5:07 AM Tvrtko Ursulin > >> wrote: > >>> > >>> From: Tvrtko Ursulin > >>> > >>> In i915 we have this concept of "wait boosting" where we give a priority > >>> boost > >>> for instance to fences which are actively waited upon from userspace. > >>> This has > >>> it's pros and cons and can certainly be discussed at lenght. However fact > >>> is > >>> some workloads really like it. > >>> > >>> Problem is that with the arrival of drm syncobj and a new userspace > >>> waiting > >>> entry point it added, the waitboost mechanism was bypassed. Hence I > >>> cooked up > >>> this mini series really (really) quickly to see if some discussion can be > >>> had. > >>> > >>> It adds a concept of "wait count" to dma fence, which is incremented for > >>> every > >>> explicit dma_fence_enable_sw_signaling and dma_fence_add_wait_callback > >>> (like > >>> dma_fence_add_callback but from explicit/userspace wait paths). > >> > >> I was thinking about a similar thing, but in the context of dma_fence > >> (or rather sync_file) fd poll()ing. How does the kernel differentiate > >> between "housekeeping" poll()ers that don't want to trigger boost but > >> simply know when to do cleanup, and waiters who are waiting with some > >> urgency. I think we could use EPOLLPRI for this purpose. > >> > >> Not sure how that translates to waits via the syncobj. But I think we > >> want to let userspace give some hint about urgent vs housekeeping > >> waits. > > > > Should the hint be on the waits, or should the hints be on the executed > > context? > > > > In the end we need some way to quickly ramp-up the frequency to avoid > > the execution bubbles. > > > > waitboost is trying to guess that, but in some cases it guess wrong > > and waste power. > > Do we have a list of workloads which shows who benefits and who loses > from the current implementation of waitboost? > > btw, this is something that other drivers might need: > > > > https://gitlab.freedesktop.org/drm/amd/-/issues/1500#note_825883 > > Cc: Alex Deucher > > I have several issues with the context hint if it would directly > influence frequency selection in the "more power" direction. > > First of all, assume a context hint would replace the waitboost. Which > applications would need to set it to restore the lost performance and > how would they set it? > > Then I don't even think userspace necessarily knows. Think of a layer > like OpenCL. It doesn't really know in advance the profile of > submissions vs waits. It depends on the CPU vs GPU speed, so hardware > generation, and the actual size of the workload which can be influenced > by the application (or user) and not the library. > > The approach also lends itself well for the "arms race" where every > application can say "Me me me, I am the most important workload there is!". since there is discussion happening in two places: https://gitlab.freedesktop.org/drm/intel/-/issues/8014#note_1777433 What I think you might want is a ctx boost_mask which lets an app or driver disable certain boost signals/classes. Where fence waits is one class of boost, but hypothetical other signals like touchscreen (or other) input events could be another class of boost. A compute workload might be interested in fence wait boosts but could care less about input events. > The last concern is for me shared with the proposal to expose deadlines > or high priority waits as explicit uapi knobs. Both come under the "what > application told us it will do" category vs what it actually does. So I > think it is slightly weaker than basing decisions of waits. > > The current waitboost is a bit detached from that problem because when > we waitboost for flips we _know_ it is an actual framebuffer in the flip > chain. When we waitboost for waits we also know someone is waiting. We > are not trusting userspace telling us this will be a buffer in the flip > chain or that this is a context which will have a certain duty-cycle. > > But yes, even if the input is truthful, latter is still only a > heuristics because nothing says all waits are important. AFAIU it just > happened to work well in the past. > > I do understand I am effectively arguing for more heuristics, which may > sound a bit against the common wisdom. This is because in general I > think the logic to do the right thing, be it in the driver or in the > firmware, can work best if it has a holistic view. Simply put it needs > to have more inputs to the decisions it is making. > > That is what my series is proposing - adding a common signal of "someone > in userspace is waiting". What happens with that signal needs not be > defined (promised) in the uapi contract. > > Say you route it to SLPC logic. It doesn't need to do with it what > legacy i915 is doing today. It just needs to do something which w
Re: [PATCH 6/6] drm/tidss: Implement struct drm_plane_helper_funcs.atomic_enable
On 09/02/2023 17:41, Thomas Zimmermann wrote: Enable the primary plane for tidss hardware via atomic_enable. Atomic helpers invoke this callback only when the plane becomes active. Signed-off-by: Thomas Zimmermann --- drivers/gpu/drm/tidss/tidss_plane.c | 11 +++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/tidss/tidss_plane.c b/drivers/gpu/drm/tidss/tidss_plane.c index 0b12405edb47..6bdd6e4a955a 100644 --- a/drivers/gpu/drm/tidss/tidss_plane.c +++ b/drivers/gpu/drm/tidss/tidss_plane.c @@ -124,6 +124,16 @@ static void tidss_plane_atomic_update(struct drm_plane *plane, hw_videoport = to_tidss_crtc(new_state->crtc)->hw_videoport; dispc_plane_setup(tidss->dispc, tplane->hw_plane_id, new_state, hw_videoport); +} + +static void tidss_plane_atomic_enable(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_device *ddev = plane->dev; + struct tidss_device *tidss = to_tidss(ddev); + struct tidss_plane *tplane = to_tidss_plane(plane); + + dev_dbg(ddev->dev, "%s\n", __func__); dispc_plane_enable(tidss->dispc, tplane->hw_plane_id, true); } @@ -151,6 +161,7 @@ static void drm_plane_destroy(struct drm_plane *plane) static const struct drm_plane_helper_funcs tidss_plane_helper_funcs = { .atomic_check = tidss_plane_atomic_check, .atomic_update = tidss_plane_atomic_update, + .atomic_enable = tidss_plane_atomic_enable, .atomic_disable = tidss_plane_atomic_disable, }; I haven't tested this, but looks fine to me. Reviewed-by: Tomi Valkeinen One thought, though, is that we still do dispc_plane_enable(false) in tidss_plane_atomic_update() when the plane is not visible. Not a problem, but it would be nice to only enable/disable the plane inside atomic_enable/disable. Or maybe in cases like this the driver should only use atomic_update, and do all the enabling and disabling there... Tomi
Re: [PATCH 5/6] drm/tidss: Remove return values from dispc_plane_{setup,enable}()
On 09/02/2023 17:41, Thomas Zimmermann wrote: Calls to dispc_plane_setup() and dispc_plane_enable() cannot fail. Remove the return value. Signed-off-by: Thomas Zimmermann --- drivers/gpu/drm/tidss/tidss_dispc.c | 12 drivers/gpu/drm/tidss/tidss_dispc.h | 8 drivers/gpu/drm/tidss/tidss_plane.c | 11 +-- 3 files changed, 9 insertions(+), 22 deletions(-) Reviewed-by: Tomi Valkeinen Tomi
Patch "drm: Disable dynamic debug as broken" has been added to the 6.1-stable tree
This is a note to let you know that I've just added the patch titled drm: Disable dynamic debug as broken to the 6.1-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: drm-disable-dynamic-debug-as-broken.patch and it can be found in the queue-6.1 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let know about it. >From bb2ff6c27bc9e1da4d3ec5e7b1d6b9df1092cb5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 7 Feb 2023 16:33:37 +0200 Subject: drm: Disable dynamic debug as broken MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From: Ville Syrjälä commit bb2ff6c27bc9e1da4d3ec5e7b1d6b9df1092cb5a upstream. CONFIG_DRM_USE_DYNAMIC_DEBUG breaks debug prints for (at least modular) drm drivers. The debug prints can be reinstated by manually frobbing /sys/module/drm/parameters/debug after the fact, but at that point the damage is done and all debugs from driver probe are lost. This makes drivers totally undebuggable. There's a more complete fix in progress [1], with further details, but we need this fixed in stable kernels. Mark the feature as broken and disable it by default, with hopes distros follow suit and disable it as well. [1] https://lore.kernel.org/r/20230125203743.564009-1-jim.cro...@gmail.com Fixes: 84ec67288c10 ("drm_print: wrap drm_*_dbg in dyndbg descriptor factory macro") Cc: Jim Cromie Cc: Greg Kroah-Hartman Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: David Airlie Cc: Daniel Vetter Cc: dri-devel@lists.freedesktop.org Cc: # v6.1+ Signed-off-by: Ville Syrjälä Acked-by: Greg Kroah-Hartman Acked-by: Jim Cromie Acked-by: Maxime Ripard Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20230207143337.2126678-1-jani.nik...@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 315cbdf61979..9abfb482b615 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -53,7 +53,8 @@ config DRM_DEBUG_MM config DRM_USE_DYNAMIC_DEBUG bool "use dynamic debug to implement drm.debug" - default y + default n + depends on BROKEN depends on DRM depends on DYNAMIC_DEBUG || DYNAMIC_DEBUG_CORE depends on JUMP_LABEL -- 2.39.1 Patches currently in stable-queue which might be from ville.syrj...@linux.intel.com are queue-6.1/drm-disable-dynamic-debug-as-broken.patch
Re: [PATCH 1/3] drm/suballoc: Introduce a generic suballocation manager
On 2/17/23 14:18, Christian König wrote: Am 17.02.23 um 14:10 schrieb Thomas Hellström: [SNIP] Any chance you could do a quick performance comparison? If not, anything against merging this without the amd / radeon changes until we can land a simpler allocator? Only if you can stick the allocator inside Xe and not drm, cause this seems to be for a different use case than the allocators inside radeon/amdgpu. Hmm. No It's allocating in a ring-like fashion as well. Let me put together a unit test for benchmaking. I think it would be a failure for the community to end up with three separate suballocators doing the exact same thing for the same problem, really. Well exactly that's the point. Those allocators aren't the same because they handle different problems. The allocator in radeon is simpler because it only had to deal with a limited number of fence timelines. The one in amdgpu is a bit more complex because of the added complexity for more fence timelines. We could take the one from amdgpu and use it for radeon and others as well, but the allocator proposed here doesn't even remotely matches the requirements. But again, what *are* those missing requirements exactly? What is the pathological case you see for the current code? Well very low CPU overhead and don't do anything in a callback. Well, dma_fence_wait_any() will IIRC register callbacks on all affected fences, although admittedly there is no actual allocator processing in them. From what I can tell the amdgpu suballocator introduces excessive complexity to coalesce waits for fences from the same contexts, whereas the present code just frees from the fence callback if the fence wasn't already signaled. And this is exactly the design we had previously which we removed after Dave stumbled over tons of problems with it. So is the worry that those problems have spilled over in this code then? It's been pretty extensively tested, or is it you should never really use dma-fence callbacks? The fence signalling code that fires that callback is typcally always run anyway on scheduler fences. The reason we had for not using the amdgpu suballocator as originally planned was that this complexity made it very hard for us to undertand it and to fix issues we had with it. Well what are those problems? The idea is actually not that hardware to understand. We hit memory corruption, and we spent substantially more time trying to debug it than to put together this patch, while never really understanding what happened, nor why you don't see that with amdgpu. We could simplify it massively for the cost of only waiting for the oldest fence if that helps. Let me grab the latest version from amdgpu and give it a try again, but yes I think that to make it common code we'll need it simpler (and my personal wish would be to separate the allocator functionality a bit more from the fence waiting, which I guess should be OK if the fence waiting is vastly simplified). /Thomas Regards, Christian. Regards, Thomas
[PATCH drm-next v2 14/16] drm/nouveau: implement uvmm for user mode bindings
uvmm provides the driver abstraction around the DRM GPU VA manager connecting it to the nouveau infrastructure. It handles the split and merge operations provided by the DRM GPU VA manager for map operations colliding with existent mappings and takes care of the driver specific locking around the DRM GPU VA manager. Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/Kbuild |1 + drivers/gpu/drm/nouveau/nouveau_abi16.c |7 + drivers/gpu/drm/nouveau/nouveau_bo.c| 147 +-- drivers/gpu/drm/nouveau/nouveau_bo.h|2 +- drivers/gpu/drm/nouveau/nouveau_drm.c |2 + drivers/gpu/drm/nouveau/nouveau_drv.h | 48 + drivers/gpu/drm/nouveau/nouveau_gem.c | 25 +- drivers/gpu/drm/nouveau/nouveau_mem.h |5 + drivers/gpu/drm/nouveau/nouveau_prime.c |2 +- drivers/gpu/drm/nouveau/nouveau_uvmm.c | 1090 +++ drivers/gpu/drm/nouveau/nouveau_uvmm.h | 110 +++ 11 files changed, 1378 insertions(+), 61 deletions(-) create mode 100644 drivers/gpu/drm/nouveau/nouveau_uvmm.c create mode 100644 drivers/gpu/drm/nouveau/nouveau_uvmm.h diff --git a/drivers/gpu/drm/nouveau/Kbuild b/drivers/gpu/drm/nouveau/Kbuild index 5e5617006da5..ee281bb76463 100644 --- a/drivers/gpu/drm/nouveau/Kbuild +++ b/drivers/gpu/drm/nouveau/Kbuild @@ -47,6 +47,7 @@ nouveau-y += nouveau_prime.o nouveau-y += nouveau_sgdma.o nouveau-y += nouveau_ttm.o nouveau-y += nouveau_vmm.o +nouveau-y += nouveau_uvmm.o # DRM - modesetting nouveau-$(CONFIG_DRM_NOUVEAU_BACKLIGHT) += nouveau_backlight.o diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index 82dab51d8aeb..36cc80eb0e20 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -261,6 +261,13 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS) if (!drm->channel) return nouveau_abi16_put(abi16, -ENODEV); + /* If uvmm wasn't initialized until now disable it completely to prevent +* userspace from mixing up UAPIs. +* +* The client lock is already acquired by nouveau_abi16_get(). +*/ + __nouveau_cli_uvmm_disable(cli); + device = &abi16->device; engine = NV_DEVICE_HOST_RUNLIST_ENGINES_GR; diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index bf6984c8754c..f3d73d6edd46 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -199,7 +199,7 @@ nouveau_bo_fixup_align(struct nouveau_bo *nvbo, int *align, u64 *size) struct nouveau_bo * nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int *align, u32 domain, -u32 tile_mode, u32 tile_flags) +u32 tile_mode, u32 tile_flags, bool internal) { struct nouveau_drm *drm = cli->drm; struct nouveau_bo *nvbo; @@ -235,68 +235,103 @@ nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int *align, u32 domain, nvbo->force_coherent = true; } - if (cli->device.info.family >= NV_DEVICE_INFO_V0_FERMI) { - nvbo->kind = (tile_flags & 0xff00) >> 8; - if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) { - kfree(nvbo); - return ERR_PTR(-EINVAL); + nvbo->contig = !(tile_flags & NOUVEAU_GEM_TILE_NONCONTIG); + if (!nouveau_cli_uvmm(cli) || internal) { + /* for BO noVM allocs, don't assign kinds */ + if (cli->device.info.family >= NV_DEVICE_INFO_V0_FERMI) { + nvbo->kind = (tile_flags & 0xff00) >> 8; + if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) { + kfree(nvbo); + return ERR_PTR(-EINVAL); + } + + nvbo->comp = mmu->kind[nvbo->kind] != nvbo->kind; + } else if (cli->device.info.family >= NV_DEVICE_INFO_V0_TESLA) { + nvbo->kind = (tile_flags & 0x7f00) >> 8; + nvbo->comp = (tile_flags & 0x0003) >> 16; + if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) { + kfree(nvbo); + return ERR_PTR(-EINVAL); + } + } else { + nvbo->zeta = (tile_flags & 0x0007); } + nvbo->mode = tile_mode; + + /* Determine the desirable target GPU page size for the buffer. */ + for (i = 0; i < vmm->page_nr; i++) { + /* Because we cannot currently allow VMM maps to fail +* during buffer migration, we need to determine page +* size for the buffer up-front, and pre-allocate its +* page tables. +* +* Skip page sizes that can't support needed domain
[PATCH drm-next v2 12/16] drm/nouveau: chan: provide nouveau_channel_kill()
The new VM_BIND UAPI implementation introduced in subsequent commits will allow asynchronous jobs processing push buffers and emitting fences. If a job times out, we need a way to recover from this situation. For now, simply kill the channel to unblock all hung up jobs and signal userspace that the device is dead on the next EXEC or VM_BIND ioctl. Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nouveau_chan.c | 14 +++--- drivers/gpu/drm/nouveau/nouveau_chan.h | 1 + 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c index 1068abe41024..6f47e997d9cf 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.c +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c @@ -40,6 +40,14 @@ MODULE_PARM_DESC(vram_pushbuf, "Create DMA push buffers in VRAM"); int nouveau_vram_pushbuf; module_param_named(vram_pushbuf, nouveau_vram_pushbuf, int, 0400); +void +nouveau_channel_kill(struct nouveau_channel *chan) +{ + atomic_set(&chan->killed, 1); + if (chan->fence) + nouveau_fence_context_kill(chan->fence, -ENODEV); +} + static int nouveau_channel_killed(struct nvif_event *event, void *repv, u32 repc) { @@ -47,9 +55,9 @@ nouveau_channel_killed(struct nvif_event *event, void *repv, u32 repc) struct nouveau_cli *cli = (void *)chan->user.client; NV_PRINTK(warn, cli, "channel %d killed!\n", chan->chid); - atomic_set(&chan->killed, 1); - if (chan->fence) - nouveau_fence_context_kill(chan->fence, -ENODEV); + + if (unlikely(!atomic_read(&chan->killed))) + nouveau_channel_kill(chan); return NVIF_EVENT_DROP; } diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.h b/drivers/gpu/drm/nouveau/nouveau_chan.h index e06a8ffed31a..e483f4a254da 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.h +++ b/drivers/gpu/drm/nouveau/nouveau_chan.h @@ -65,6 +65,7 @@ int nouveau_channel_new(struct nouveau_drm *, struct nvif_device *, bool priv, u32 vram, u32 gart, struct nouveau_channel **); void nouveau_channel_del(struct nouveau_channel **); int nouveau_channel_idle(struct nouveau_channel *); +void nouveau_channel_kill(struct nouveau_channel *); extern int nouveau_vram_pushbuf; -- 2.39.1
[PATCH drm-next v2 13/16] drm/nouveau: nvkm/vmm: implement raw ops to manage uvmm
The new VM_BIND UAPI uses the DRM GPU VA manager to manage the VA space. Hence, we a need a way to manipulate the MMUs page tables without going through the internal range allocator implemented by nvkm/vmm. This patch adds a raw interface for nvkm/vmm to pass the resposibility for managing the address space and the corresponding map/unmap/sparse operations to the upper layers. Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/include/nvif/if000c.h | 26 ++- drivers/gpu/drm/nouveau/include/nvif/vmm.h| 19 +- .../gpu/drm/nouveau/include/nvkm/subdev/mmu.h | 20 +- drivers/gpu/drm/nouveau/nouveau_svm.c | 2 +- drivers/gpu/drm/nouveau/nouveau_vmm.c | 4 +- drivers/gpu/drm/nouveau/nvif/vmm.c| 100 +++- .../gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c| 213 -- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c | 197 drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h | 25 ++ .../drm/nouveau/nvkm/subdev/mmu/vmmgf100.c| 16 +- .../drm/nouveau/nvkm/subdev/mmu/vmmgp100.c| 16 +- .../gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c | 27 ++- 12 files changed, 566 insertions(+), 99 deletions(-) diff --git a/drivers/gpu/drm/nouveau/include/nvif/if000c.h b/drivers/gpu/drm/nouveau/include/nvif/if000c.h index 9c7ff56831c5..a5a182b3c28d 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/if000c.h +++ b/drivers/gpu/drm/nouveau/include/nvif/if000c.h @@ -3,7 +3,10 @@ struct nvif_vmm_v0 { __u8 version; __u8 page_nr; - __u8 managed; +#define NVIF_VMM_V0_TYPE_UNMANAGED 0x00 +#define NVIF_VMM_V0_TYPE_MANAGED 0x01 +#define NVIF_VMM_V0_TYPE_RAW 0x02 + __u8 type; __u8 pad03[5]; __u64 addr; __u64 size; @@ -17,6 +20,7 @@ struct nvif_vmm_v0 { #define NVIF_VMM_V0_UNMAP 0x04 #define NVIF_VMM_V0_PFNMAP 0x05 #define NVIF_VMM_V0_PFNCLR 0x06 +#define NVIF_VMM_V0_RAW0x07 #define NVIF_VMM_V0_MTHD(i) ((i) + 0x80) struct nvif_vmm_page_v0 { @@ -66,6 +70,26 @@ struct nvif_vmm_unmap_v0 { __u64 addr; }; +struct nvif_vmm_raw_v0 { + __u8 version; +#define NVIF_VMM_RAW_V0_GET0x0 +#define NVIF_VMM_RAW_V0_PUT0x1 +#define NVIF_VMM_RAW_V0_MAP0x2 +#define NVIF_VMM_RAW_V0_UNMAP 0x3 +#define NVIF_VMM_RAW_V0_SPARSE 0x4 + __u8 op; + __u8 sparse; + __u8 ref; + __u8 shift; + __u32 argc; + __u8 pad01[7]; + __u64 addr; + __u64 size; + __u64 offset; + __u64 memory; + __u64 argv; +}; + struct nvif_vmm_pfnmap_v0 { __u8 version; __u8 page; diff --git a/drivers/gpu/drm/nouveau/include/nvif/vmm.h b/drivers/gpu/drm/nouveau/include/nvif/vmm.h index a2ee92201ace..0ecedd0ee0a5 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/vmm.h +++ b/drivers/gpu/drm/nouveau/include/nvif/vmm.h @@ -4,6 +4,12 @@ struct nvif_mem; struct nvif_mmu; +enum nvif_vmm_type { + UNMANAGED, + MANAGED, + RAW, +}; + enum nvif_vmm_get { ADDR, PTES, @@ -30,8 +36,9 @@ struct nvif_vmm { int page_nr; }; -int nvif_vmm_ctor(struct nvif_mmu *, const char *name, s32 oclass, bool managed, - u64 addr, u64 size, void *argv, u32 argc, struct nvif_vmm *); +int nvif_vmm_ctor(struct nvif_mmu *, const char *name, s32 oclass, + enum nvif_vmm_type, u64 addr, u64 size, void *argv, u32 argc, + struct nvif_vmm *); void nvif_vmm_dtor(struct nvif_vmm *); int nvif_vmm_get(struct nvif_vmm *, enum nvif_vmm_get, bool sparse, u8 page, u8 align, u64 size, struct nvif_vma *); @@ -39,4 +46,12 @@ void nvif_vmm_put(struct nvif_vmm *, struct nvif_vma *); int nvif_vmm_map(struct nvif_vmm *, u64 addr, u64 size, void *argv, u32 argc, struct nvif_mem *, u64 offset); int nvif_vmm_unmap(struct nvif_vmm *, u64); + +int nvif_vmm_raw_get(struct nvif_vmm *vmm, u64 addr, u64 size, u8 shift); +int nvif_vmm_raw_put(struct nvif_vmm *vmm, u64 addr, u64 size, u8 shift); +int nvif_vmm_raw_map(struct nvif_vmm *vmm, u64 addr, u64 size, u8 shift, +void *argv, u32 argc, struct nvif_mem *mem, u64 offset); +int nvif_vmm_raw_unmap(struct nvif_vmm *vmm, u64 addr, u64 size, + u8 shift, bool sparse); +int nvif_vmm_raw_sparse(struct nvif_vmm *vmm, u64 addr, u64 size, bool ref); #endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h index 70e7887ef4b4..2fd2f2433fc7 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h @@ -17,6 +17,7 @@ s
[PATCH drm-next v2 15/16] drm/nouveau: implement new VM_BIND UAPI
This commit provides the implementation for the new uapi motivated by the Vulkan API. It allows user mode drivers (UMDs) to: 1) Initialize a GPU virtual address (VA) space via the new DRM_IOCTL_NOUVEAU_VM_INIT ioctl for UMDs to specify the portion of VA space managed by the kernel and userspace, respectively. 2) Allocate and free a VA space region as well as bind and unbind memory to the GPUs VA space via the new DRM_IOCTL_NOUVEAU_VM_BIND ioctl. UMDs can request the named operations to be processed either synchronously or asynchronously. It supports DRM syncobjs (incl. timelines) as synchronization mechanism. The management of the GPU VA mappings is implemented with the DRM GPU VA manager. 3) Execute push buffers with the new DRM_IOCTL_NOUVEAU_EXEC ioctl. The execution happens asynchronously. It supports DRM syncobj (incl. timelines) as synchronization mechanism. DRM GEM object locking is handled with drm_exec. Both, DRM_IOCTL_NOUVEAU_VM_BIND and DRM_IOCTL_NOUVEAU_EXEC, use the DRM GPU scheduler for the asynchronous paths. Signed-off-by: Danilo Krummrich --- Documentation/gpu/driver-uapi.rst | 3 + drivers/gpu/drm/nouveau/Kbuild | 2 + drivers/gpu/drm/nouveau/Kconfig | 2 + drivers/gpu/drm/nouveau/nouveau_abi16.c | 16 + drivers/gpu/drm/nouveau/nouveau_abi16.h | 1 + drivers/gpu/drm/nouveau/nouveau_drm.c | 24 +- drivers/gpu/drm/nouveau/nouveau_drv.h | 9 +- drivers/gpu/drm/nouveau/nouveau_exec.c | 322 drivers/gpu/drm/nouveau/nouveau_exec.h | 39 ++ drivers/gpu/drm/nouveau/nouveau_sched.c | 467 drivers/gpu/drm/nouveau/nouveau_sched.h | 96 + drivers/gpu/drm/nouveau/nouveau_uvmm.c | 446 ++ drivers/gpu/drm/nouveau/nouveau_uvmm.h | 28 ++ 13 files changed, 1451 insertions(+), 4 deletions(-) create mode 100644 drivers/gpu/drm/nouveau/nouveau_exec.c create mode 100644 drivers/gpu/drm/nouveau/nouveau_exec.h create mode 100644 drivers/gpu/drm/nouveau/nouveau_sched.c create mode 100644 drivers/gpu/drm/nouveau/nouveau_sched.h diff --git a/Documentation/gpu/driver-uapi.rst b/Documentation/gpu/driver-uapi.rst index 9c7ca6e33a68..c08bcbb95fb3 100644 --- a/Documentation/gpu/driver-uapi.rst +++ b/Documentation/gpu/driver-uapi.rst @@ -13,4 +13,7 @@ drm/nouveau uAPI VM_BIND / EXEC uAPI --- +.. kernel-doc:: drivers/gpu/drm/nouveau/nouveau_exec.c +:doc: Overview + .. kernel-doc:: include/uapi/drm/nouveau_drm.h diff --git a/drivers/gpu/drm/nouveau/Kbuild b/drivers/gpu/drm/nouveau/Kbuild index ee281bb76463..cf6b3a80c0c8 100644 --- a/drivers/gpu/drm/nouveau/Kbuild +++ b/drivers/gpu/drm/nouveau/Kbuild @@ -47,6 +47,8 @@ nouveau-y += nouveau_prime.o nouveau-y += nouveau_sgdma.o nouveau-y += nouveau_ttm.o nouveau-y += nouveau_vmm.o +nouveau-y += nouveau_exec.o +nouveau-y += nouveau_sched.o nouveau-y += nouveau_uvmm.o # DRM - modesetting diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index a70bd65e1400..c52e8096cca4 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -10,6 +10,8 @@ config DRM_NOUVEAU select DRM_KMS_HELPER select DRM_TTM select DRM_TTM_HELPER + select DRM_EXEC + select DRM_SCHED select I2C select I2C_ALGOBIT select BACKLIGHT_CLASS_DEVICE if DRM_NOUVEAU_BACKLIGHT diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index 36cc80eb0e20..694777a58bca 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -35,6 +35,7 @@ #include "nouveau_chan.h" #include "nouveau_abi16.h" #include "nouveau_vmm.h" +#include "nouveau_sched.h" static struct nouveau_abi16 * nouveau_abi16(struct drm_file *file_priv) @@ -125,6 +126,17 @@ nouveau_abi16_chan_fini(struct nouveau_abi16 *abi16, { struct nouveau_abi16_ntfy *ntfy, *temp; + /* When a client exits without waiting for it's queued up jobs to +* finish it might happen that we fault the channel. This is due to +* drm_file_free() calling drm_gem_release() before the postclose() +* callback. Hence, we can't tear down this scheduler entity before +* uvmm mappings are unmapped. Currently, we can't detect this case. +* +* However, this should be rare and harmless, since the channel isn't +* needed anymore. +*/ + nouveau_sched_entity_fini(&chan->sched_entity); + /* wait for all activity to stop before cleaning up */ if (chan->chan) nouveau_channel_idle(chan->chan); @@ -311,6 +323,10 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS) if (ret) goto done; + ret = nouveau_sched_entity_init(&chan->sched_entity, &drm->sched); + if (ret) + goto done; + init->channel = chan->chan->chid; if (device->info.famil
[PATCH drm-next v2 09/16] drm/nouveau: bo: initialize GEM GPU VA interface
Initialize the GEM's DRM GPU VA manager interface in preparation for the (u)vmm implementation, provided by subsequent commits, to make use of it. Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nouveau_bo.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index f3039c1f87c9..bf6984c8754c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -215,11 +215,14 @@ nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int *align, u32 domain, nvbo = kzalloc(sizeof(struct nouveau_bo), GFP_KERNEL); if (!nvbo) return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&nvbo->head); INIT_LIST_HEAD(&nvbo->entry); INIT_LIST_HEAD(&nvbo->vma_list); nvbo->bo.bdev = &drm->ttm.bdev; + drm_gem_gpuva_init(&nvbo->bo.base); + /* This is confusing, and doesn't actually mean we want an uncached * mapping, but is what NOUVEAU_GEM_DOMAIN_COHERENT gets translated * into in nouveau_gem_new(). -- 2.39.1
[PATCH drm-next v2 16/16] drm/nouveau: debugfs: implement DRM GPU VA debugfs
Provide the driver indirection iterating over all DRM GPU VA spaces to enable the common 'gpuvas' debugfs file for dumping DRM GPU VA spaces. Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nouveau_debugfs.c | 24 +++ 1 file changed, 24 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c b/drivers/gpu/drm/nouveau/nouveau_debugfs.c index 2a36d1ca8fda..7f6ccc5d1d86 100644 --- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c +++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c @@ -202,6 +202,29 @@ nouveau_debugfs_pstate_open(struct inode *inode, struct file *file) return single_open(file, nouveau_debugfs_pstate_get, inode->i_private); } +static int +nouveau_debugfs_gpuva(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct nouveau_drm *drm = nouveau_drm(node->minor->dev); + struct nouveau_cli *cli; + + mutex_lock(&drm->clients_lock); + list_for_each_entry(cli, &drm->clients, head) { + struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli); + + if (!uvmm) + continue; + + nouveau_uvmm_lock(uvmm); + drm_debugfs_gpuva_info(m, &uvmm->umgr); + nouveau_uvmm_unlock(uvmm); + } + mutex_unlock(&drm->clients_lock); + + return 0; +} + static const struct file_operations nouveau_pstate_fops = { .owner = THIS_MODULE, .open = nouveau_debugfs_pstate_open, @@ -213,6 +236,7 @@ static const struct file_operations nouveau_pstate_fops = { static struct drm_info_list nouveau_debugfs_list[] = { { "vbios.rom", nouveau_debugfs_vbios_image, 0, NULL }, { "strap_peek", nouveau_debugfs_strap_peek, 0, NULL }, + DRM_DEBUGFS_GPUVA_INFO(nouveau_debugfs_gpuva, NULL), }; #define NOUVEAU_DEBUGFS_ENTRIES ARRAY_SIZE(nouveau_debugfs_list) -- 2.39.1
[PATCH drm-next v2 02/16] drm/exec: fix memory leak in drm_exec_prepare_obj()
Don't call drm_gem_object_get() unconditionally. Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/drm_exec.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c index ed2106c22786..5713a589a6a3 100644 --- a/drivers/gpu/drm/drm_exec.c +++ b/drivers/gpu/drm/drm_exec.c @@ -282,7 +282,6 @@ int drm_exec_prepare_obj(struct drm_exec *exec, struct drm_gem_object *obj, goto error_unlock; } - drm_gem_object_get(obj); return 0; error_unlock: -- 2.39.1
[PATCH drm-next v2 11/16] drm/nouveau: fence: fail to emit when fence context is killed
The new VM_BIND UAPI implementation introduced in subsequent commits will allow asynchronous jobs processing push buffers and emitting fences. If a fence context is killed, e.g. due to a channel fault, jobs which are already queued for execution might still emit new fences. In such a case a job would hang forever. To fix that, fail to emit a new fence on a killed fence context with -ENODEV to unblock the job. Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nouveau_fence.c | 7 +++ drivers/gpu/drm/nouveau/nouveau_fence.h | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index ee5e9d40c166..62c70d9a32e6 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -96,6 +96,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error) if (nouveau_fence_signal(fence)) nvif_event_block(&fctx->event); } + fctx->killed = 1; spin_unlock_irqrestore(&fctx->lock, flags); } @@ -226,6 +227,12 @@ nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan) dma_fence_get(&fence->base); spin_lock_irq(&fctx->lock); + if (unlikely(fctx->killed)) { + spin_unlock_irq(&fctx->lock); + dma_fence_put(&fence->base); + return -ENODEV; + } + if (nouveau_fence_update(chan, fctx)) nvif_event_block(&fctx->event); diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h index 0ca2bc85adf6..00a08699bb58 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.h +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h @@ -45,7 +45,7 @@ struct nouveau_fence_chan { char name[32]; struct nvif_event event; - int notify_ref, dead; + int notify_ref, dead, killed; }; struct nouveau_fence_priv { -- 2.39.1
[PATCH drm-next v2 10/16] drm/nouveau: move usercopy helpers to nouveau_drv.h
Move the usercopy helpers to a common driver header file to make it usable for the new API added in subsequent commits. Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nouveau_drv.h | 26 ++ drivers/gpu/drm/nouveau/nouveau_gem.c | 26 -- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 81350e685b50..20a7f31b9082 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -130,6 +130,32 @@ nouveau_cli(struct drm_file *fpriv) return fpriv ? fpriv->driver_priv : NULL; } +static inline void +u_free(void *addr) +{ + kvfree(addr); +} + +static inline void * +u_memcpya(uint64_t user, unsigned nmemb, unsigned size) +{ + void *mem; + void __user *userptr = (void __force __user *)(uintptr_t)user; + + size *= nmemb; + + mem = kvmalloc(size, GFP_KERNEL); + if (!mem) + return ERR_PTR(-ENOMEM); + + if (copy_from_user(mem, userptr, size)) { + u_free(mem); + return ERR_PTR(-EFAULT); + } + + return mem; +} + #include #include diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 08689ced4f6a..4369c8dc8b5b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -613,32 +613,6 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan, return 0; } -static inline void -u_free(void *addr) -{ - kvfree(addr); -} - -static inline void * -u_memcpya(uint64_t user, unsigned nmemb, unsigned size) -{ - void *mem; - void __user *userptr = (void __force __user *)(uintptr_t)user; - - size *= nmemb; - - mem = kvmalloc(size, GFP_KERNEL); - if (!mem) - return ERR_PTR(-ENOMEM); - - if (copy_from_user(mem, userptr, size)) { - u_free(mem); - return ERR_PTR(-EFAULT); - } - - return mem; -} - static int nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli, struct drm_nouveau_gem_pushbuf *req, -- 2.39.1
[PATCH drm-next v2 03/16] maple_tree: split up MA_STATE() macro
Split up the MA_STATE() macro such that components using the maple tree can easily inherit from struct ma_state and build custom tree walk macros to hide their internals from users. Example: struct sample_iter { struct ma_state mas; struct sample_mgr *mgr; struct sample_entry *entry; }; \#define SAMPLE_ITER(name, __mgr) \ struct sample_iter name = { \ .mas = __MA_STATE(&(__mgr)->mt, 0, 0), .mgr = __mgr, .entry = NULL, } \#define sample_iter_for_each_range(it__, start__, end__) \ for ((it__).mas.index = start__, (it__).entry = mas_find(&(it__).mas, end__ - 1); \ (it__).entry; (it__).entry = mas_find(&(it__).mas, end__ - 1)) Signed-off-by: Danilo Krummrich --- include/linux/maple_tree.h | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index e594db58a0f1..ca04c900e51a 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -424,8 +424,8 @@ struct ma_wr_state { #define MA_ERROR(err) \ ((struct maple_enode *)(((unsigned long)err << 2) | 2UL)) -#define MA_STATE(name, mt, first, end) \ - struct ma_state name = {\ +#define __MA_STATE(mt, first, end) \ + { \ .tree = mt, \ .index = first, \ .last = end,\ @@ -435,6 +435,9 @@ struct ma_wr_state { .alloc = NULL, \ } +#define MA_STATE(name, mt, first, end) \ + struct ma_state name = __MA_STATE(mt, first, end) + #define MA_WR_STATE(name, ma_state, wr_entry) \ struct ma_wr_state name = { \ .mas = ma_state,\ -- 2.39.1
[PATCH drm-next v2 08/16] drm/nouveau: get vmm via nouveau_cli_vmm()
Provide a getter function for the client's current vmm context. Since we'll add a new (u)vmm context for UMD bindings in subsequent commits, this will keep the code clean. Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nouveau_bo.c | 2 +- drivers/gpu/drm/nouveau/nouveau_chan.c | 2 +- drivers/gpu/drm/nouveau/nouveau_drv.h | 9 + drivers/gpu/drm/nouveau/nouveau_gem.c | 6 +++--- 4 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 288eebc70a67..f3039c1f87c9 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -204,7 +204,7 @@ nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int *align, u32 domain, struct nouveau_drm *drm = cli->drm; struct nouveau_bo *nvbo; struct nvif_mmu *mmu = &cli->mmu; - struct nvif_vmm *vmm = cli->svm.cli ? &cli->svm.vmm : &cli->vmm.vmm; + struct nvif_vmm *vmm = &nouveau_cli_vmm(cli)->vmm; int i, pi = -1; if (!*size) { diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c index e648ecd0c1a0..1068abe41024 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.c +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c @@ -148,7 +148,7 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct nvif_device *device, chan->device = device; chan->drm = drm; - chan->vmm = cli->svm.cli ? &cli->svm : &cli->vmm; + chan->vmm = nouveau_cli_vmm(cli); atomic_set(&chan->killed, 0); /* allocate memory for dma push buffer */ diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index b5de312a523f..81350e685b50 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -112,6 +112,15 @@ struct nouveau_cli_work { struct dma_fence_cb cb; }; +static inline struct nouveau_vmm * +nouveau_cli_vmm(struct nouveau_cli *cli) +{ + if (cli->svm.cli) + return &cli->svm; + + return &cli->vmm; +} + void nouveau_cli_work_queue(struct nouveau_cli *, struct dma_fence *, struct nouveau_cli_work *); diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index f77e44958037..08689ced4f6a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -103,7 +103,7 @@ nouveau_gem_object_open(struct drm_gem_object *gem, struct drm_file *file_priv) struct nouveau_bo *nvbo = nouveau_gem_object(gem); struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); struct device *dev = drm->dev->dev; - struct nouveau_vmm *vmm = cli->svm.cli ? &cli->svm : &cli->vmm; + struct nouveau_vmm *vmm = nouveau_cli_vmm(cli); struct nouveau_vma *vma; int ret; @@ -180,7 +180,7 @@ nouveau_gem_object_close(struct drm_gem_object *gem, struct drm_file *file_priv) struct nouveau_bo *nvbo = nouveau_gem_object(gem); struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); struct device *dev = drm->dev->dev; - struct nouveau_vmm *vmm = cli->svm.cli ? &cli->svm : & cli->vmm; + struct nouveau_vmm *vmm = nouveau_cli_vmm(cli); struct nouveau_vma *vma; int ret; @@ -269,7 +269,7 @@ nouveau_gem_info(struct drm_file *file_priv, struct drm_gem_object *gem, { struct nouveau_cli *cli = nouveau_cli(file_priv); struct nouveau_bo *nvbo = nouveau_gem_object(gem); - struct nouveau_vmm *vmm = cli->svm.cli ? &cli->svm : &cli->vmm; + struct nouveau_vmm *vmm = nouveau_cli_vmm(cli); struct nouveau_vma *vma; if (is_power_of_2(nvbo->valid_domains)) -- 2.39.1
[PATCH drm-next v2 00/16] [RFC] DRM GPUVA Manager & Nouveau VM_BIND UAPI
This patch series provides a new UAPI for the Nouveau driver in order to support Vulkan features, such as sparse bindings and sparse residency. Furthermore, with the DRM GPUVA manager it provides a new DRM core feature to keep track of GPU virtual address (VA) mappings in a more generic way. The DRM GPUVA manager is indented to help drivers implement userspace-manageable GPU VA spaces in reference to the Vulkan API. In order to achieve this goal it serves the following purposes in this context. 1) Provide infrastructure to track GPU VA allocations and mappings, making use of the maple_tree. 2) Generically connect GPU VA mappings to their backing buffers, in particular DRM GEM objects. 3) Provide a common implementation to perform more complex mapping operations on the GPU VA space. In particular splitting and merging of GPU VA mappings, e.g. for intersecting mapping requests or partial unmap requests. The new VM_BIND Nouveau UAPI build on top of the DRM GPUVA manager, itself providing the following new interfaces. 1) Initialize a GPU VA space via the new DRM_IOCTL_NOUVEAU_VM_INIT ioctl for UMDs to specify the portion of VA space managed by the kernel and userspace, respectively. 2) Allocate and free a VA space region as well as bind and unbind memory to the GPUs VA space via the new DRM_IOCTL_NOUVEAU_VM_BIND ioctl. 3) Execute push buffers with the new DRM_IOCTL_NOUVEAU_EXEC ioctl. Both, DRM_IOCTL_NOUVEAU_VM_BIND and DRM_IOCTL_NOUVEAU_EXEC, make use of the DRM scheduler to queue jobs and support asynchronous processing with DRM syncobjs as synchronization mechanism. By default DRM_IOCTL_NOUVEAU_VM_BIND does synchronous processing, DRM_IOCTL_NOUVEAU_EXEC supports asynchronous processing only. The new VM_BIND UAPI for Nouveau makes also use of drm_exec (execution context for GEM buffers) by Christian König. Since the patch implementing drm_exec was not yet merged into drm-next it is part of this series, as well as a small fix for this patch, which was found while testing this series. This patch series is also available at [1]. There is a Mesa NVK merge request by Dave Airlie [2] implementing the corresponding userspace parts for this series. The Vulkan CTS test suite passes the sparse binding and sparse residency test cases for the new UAPI together with Dave's Mesa work. There are also some test cases in the igt-gpu-tools project [3] for the new UAPI and hence the DRM GPU VA manager. However, most of them are testing the DRM GPU VA manager's logic through Nouveau's new UAPI and should be considered just as helper for implementation. However, I absolutely intend to change those test cases to proper kunit test cases for the DRM GPUVA manager, once and if we agree on it's usefulness and design. [1] https://gitlab.freedesktop.org/nouvelles/kernel/-/tree/new-uapi-drm-next / https://gitlab.freedesktop.org/nouvelles/kernel/-/merge_requests/1 [2] https://gitlab.freedesktop.org/nouveau/mesa/-/merge_requests/150/ [3] https://gitlab.freedesktop.org/dakr/igt-gpu-tools/-/tree/wip_nouveau_vm_bind Changes in V2: == Nouveau: - Reworked the Nouveau VM_BIND UAPI to avoid memory allocations in fence signalling critical sections. Updates to the VA space are split up in three separate stages, where only the 2. stage executes in a fence signalling critical section: 1. update the VA space, allocate new structures and page tables 2. (un-)map the requested memory bindings 3. free structures and page tables - Separated generic job scheduler code from specific job implementations. - Separated the EXEC and VM_BIND implementation of the UAPI. - Reworked the locking parts of the nvkm/vmm RAW interface, such that (un-)map operations can be executed in fence signalling critical sections. GPUVA Manager: - made drm_gpuva_regions optional for users of the GPUVA manager - allow NULL GEMs for drm_gpuva entries - swichted from drm_mm to maple_tree for track drm_gpuva / drm_gpuva_region entries - provide callbacks for users to allocate custom drm_gpuva_op structures to allow inheritance - added user bits to drm_gpuva_flags - added a prefetch operation type in order to support generating prefetch operations in the same way other operations generated - hand the responsibility for mutual exclusion for a GEM's drm_gpuva list to the user; simplified corresponding (un-)link functions Maple Tree: - I added two maple tree patches to the series, one to support custom tree walk macros and one to hand the locking responsibility to the user of the GPUVA manager without pre-defined lockdep checks. TODO Maple Tree: - Maple tree uses the 'unsinged long' type for node entries. While this works for 64bit, it's incompatible with the DRM GPUVA Manager on 32bit, since the DRM GPUVA Manager
[PATCH drm-next v2 01/16] drm: execution context for GEM buffers
From: Christian König This adds the infrastructure for an execution context for GEM buffers which is similar to the existinc TTMs execbuf util and intended to replace it in the long term. The basic functionality is that we abstracts the necessary loop to lock many different GEM buffers with automated deadlock and duplicate handling. v2: drop xarray and use dynamic resized array instead, the locking overhead is unecessary and measureable. Signed-off-by: Christian König --- Documentation/gpu/drm-mm.rst | 12 ++ drivers/gpu/drm/Kconfig| 6 + drivers/gpu/drm/Makefile | 2 + drivers/gpu/drm/amd/amdgpu/Kconfig | 1 + drivers/gpu/drm/drm_exec.c | 295 + include/drm/drm_exec.h | 144 ++ 6 files changed, 460 insertions(+) create mode 100644 drivers/gpu/drm/drm_exec.c create mode 100644 include/drm/drm_exec.h diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst index a79fd3549ff8..a52e6f4117d6 100644 --- a/Documentation/gpu/drm-mm.rst +++ b/Documentation/gpu/drm-mm.rst @@ -493,6 +493,18 @@ DRM Sync Objects .. kernel-doc:: drivers/gpu/drm/drm_syncobj.c :export: +DRM Execution context += + +.. kernel-doc:: drivers/gpu/drm/drm_exec.c + :doc: Overview + +.. kernel-doc:: include/drm/drm_exec.h + :internal: + +.. kernel-doc:: drivers/gpu/drm/drm_exec.c + :export: + GPU Scheduler = diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index f42d4c6a19f2..1573d658fbb5 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -200,6 +200,12 @@ config DRM_TTM GPU memory types. Will be enabled automatically if a device driver uses it. +config DRM_EXEC + tristate + depends on DRM + help + Execution context for command submissions + config DRM_BUDDY tristate depends on DRM diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index ab4460fcd63f..d40defbb0347 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -78,6 +78,8 @@ obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += drm_panel_orientation_quirks.o # # Memory-management helpers # +# +obj-$(CONFIG_DRM_EXEC) += drm_exec.o obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 5341b6b242c3..279fb3bba810 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -11,6 +11,7 @@ config DRM_AMDGPU select DRM_SCHED select DRM_TTM select DRM_TTM_HELPER + select DRM_EXEC select POWER_SUPPLY select HWMON select I2C diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c new file mode 100644 index ..ed2106c22786 --- /dev/null +++ b/drivers/gpu/drm/drm_exec.c @@ -0,0 +1,295 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ + +#include +#include +#include + +/** + * DOC: Overview + * + * This component mainly abstracts the retry loop necessary for locking + * multiple GEM objects while preparing hardware operations (e.g. command + * submissions, page table updates etc..). + * + * If a contention is detected while locking a GEM object the cleanup procedure + * unlocks all previously locked GEM objects and locks the contended one first + * before locking any further objects. + * + * After an object is locked fences slots can optionally be reserved on the + * dma_resv object inside the GEM object. + * + * A typical usage pattern should look like this:: + * + * struct drm_gem_object *obj; + * struct drm_exec exec; + * unsigned long index; + * int ret; + * + * drm_exec_init(&exec, true); + * drm_exec_while_not_all_locked(&exec) { + * ret = drm_exec_prepare_obj(&exec, boA, 1); + * drm_exec_continue_on_contention(&exec); + * if (ret) + * goto error; + * + * ret = drm_exec_lock(&exec, boB, 1); + * drm_exec_continue_on_contention(&exec); + * if (ret) + * goto error; + * } + * + * drm_exec_for_each_locked_object(&exec, index, obj) { + * dma_resv_add_fence(obj->resv, fence, DMA_RESV_USAGE_READ); + * ... + * } + * drm_exec_fini(&exec); + * + * See struct dma_exec for more details. + */ + +/* Dummy value used to initially enter the retry loop */ +#define DRM_EXEC_DUMMY (void*)~0 + +/* Initialize the drm_exec_objects container */ +static void drm_exec_objects_init(struct drm_exec_objects *container) +{ + container->objects = kmalloc(PAGE_SIZE, GFP_KERNEL); + + /* If allocation here fails, just delay that till the first use */ + container->max_objects = container->objects ? + PAGE_SIZE / sizeof(void *) : 0; + container->num_objects = 0; +} + +/* Cleanup the drm_exec_objects container */ +static void drm_exec_ob