[PATCH] drm/amdgpu: protect ring overrun
Wait for the oldest sequence on the ring to be signaled in order to make sure there will be no command overrun. Signed-off-by: Yintian Tao --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 7 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 17 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 8 +++- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 9 - drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c| 8 +++- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 +++- 6 files changed, 51 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 7531527067df..5462ea83d8b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -200,6 +200,13 @@ int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s) return -EINVAL; seq = ++ring->fence_drv.sync_seq; + if ((abs(seq - ring->fence_drv.num_fences_mask) > + ring->fence_drv.num_fences_mask) && + (amdgpu_fence_wait_polling(ring, + seq - ring->fence_drv.num_fences_mask, + MAX_KIQ_REG_WAIT) < 1)) +return -ETIME; + amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index a721b0e0ff69..7087333681f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -681,7 +681,14 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) } amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_rreg(ring, reg, reg_val_offs); - amdgpu_fence_emit_polling(ring, &seq); + r = amdgpu_fence_emit_polling(ring, &seq); + if (r) { + amdgpu_ring_undo(ring); + amdgpu_device_wb_free(adev, reg_val_offs); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + goto failed_kiq_read; + } + amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); @@ -730,7 +737,13 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) spin_lock_irqsave(&kiq->ring_lock, flags); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_wreg(ring, reg, v); - amdgpu_fence_emit_polling(ring, &seq); + r = amdgpu_fence_emit_polling(ring, &seq); + if (r) { + amdgpu_ring_undo(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + goto failed_kiq_write; + } + amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 8c10084f44ef..12d181ac7e78 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -60,7 +60,13 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1, ref, mask); - amdgpu_fence_emit_polling(ring, &seq); + r = amdgpu_fence_emit_polling(ring, &seq); + if (r) { + amdgpu_ring_undo(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + goto failed_kiq; + } + amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 5b1549f167b0..650b7a67d3bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4068,7 +4068,14 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev) reg_val_offs * 4)); amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + reg_val_offs * 4)); - amdgpu_fence_emit_polling(ring, &seq); + r = amdgpu_fence_emit_polling(ring, &seq); + if (r) { + amdgpu_ring_undo(ring); + amdgpu_device_wb_free(adev, reg_val_offs); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + goto failed_kiq_read; + } + amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 30b75d79efdb..71430f2a2374 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -427,7 +427,13 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8); kiq->pmf->kiq_invalidate_tlbs(ring,
RE: [PATCH] drm/amdgpu: protect kiq overrun
Hi Christian Ok , I got it. The real max number can be submitted to kiq ring buffer is 1024. If we use the num_fneces_mask value then the max submission number will be reduced to 512, do you think whether it is ok? Best Regards Yintian Tao -Original Message- From: Koenig, Christian Sent: 2020年4月23日 2:43 To: Tao, Yintian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: protect kiq overrun Am 22.04.20 um 16:50 schrieb Yintian Tao: > Wait for the oldest sequence on the kiq ring to be signaled in order > to make sure there will be no kiq overrun. > > v2: remove unused the variable and correct > kiq max_sub_num value First of all this should probably be added to the fence handling code and not the kiq code. Then you are kind of duplicating some of the functionality we have in the ring handling here. Probably better to avoid this, see amdgpu_fence_driver_init_ring() as well. That's also why I suggested to use the num_fences_mask value. Regards, Christian. > > Signed-off-by: Yintian Tao > --- > .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 6 > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 6 > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 30 +++ > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 3 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 6 > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 6 > drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c| 7 + > drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 7 + > 8 files changed, 71 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c > index 691c89705bcd..fac8b9713dfc 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c > @@ -325,6 +325,12 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void > *mqd, >mec, pipe, queue_id); > > spin_lock(&adev->gfx.kiq.ring_lock); > + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); > + if (r) { > + pr_err("critical bug! too many kiq submission\n"); > + goto out_unlock; > + } > + > r = amdgpu_ring_alloc(kiq_ring, 7); > if (r) { > pr_err("Failed to alloc KIQ (%d).\n", r); diff --git > a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > index df841c2ac5e7..fd42c126510f 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > @@ -323,6 +323,12 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void > *mqd, >mec, pipe, queue_id); > > spin_lock(&adev->gfx.kiq.ring_lock); > + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); > + if (r) { > + pr_err("critical bug! too many kiq submissions\n"); > + goto out_unlock; > + } > + > r = amdgpu_ring_alloc(kiq_ring, 7); > if (r) { > pr_err("Failed to alloc KIQ (%d).\n", r); diff --git > a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > index a721b0e0ff69..84e66c45df37 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > @@ -321,6 +321,9 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, >AMDGPU_RING_PRIO_DEFAULT); > if (r) > dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); > + else > + kiq->max_sub_num = (ring->ring_size / 4) / > + (ring->funcs->align_mask + 1); > > return r; > } > @@ -663,6 +666,21 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device > *adev, > return 0; > } > > +int amdgpu_gfx_kiq_is_avail(struct amdgpu_kiq *kiq) { > + uint32_t seq = 0; > + signed long r = 0; > + > + seq = abs(kiq->ring.fence_drv.sync_seq - kiq->max_sub_num); > + if (seq > kiq->max_sub_num) { > + r = amdgpu_fence_wait_polling(&kiq->ring, seq, > + MAX_KIQ_REG_WAIT); > + return r < 1 ? -ETIME : 0; > + } > + > + return 0; > +} > + > uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) > { > signed long r, cnt = 0; > @@ -674,6 +692,12 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, > uint32_t reg) > BUG_ON(!ring->funcs->emit_rreg); > > spin_lock_irqsave(&kiq->ring_lock, flags); > + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); > + if (r) { > + spin_unlock_irqrestore(&kiq->ring_lock, flags); > + goto failed_kiq_read; > + } > + > if (amdgpu_device_wb_get(adev, ®_val_offs)) { > spin_unlock_irqrestore(&kiq->ring_lock, flags); > pr_err("critical bug! too many kiq readers\n"); @@ -728,6 > +752,12 >
[PATCH v2] drm/amdkfd: Track GPU memory utilization per process
Track GPU VRAM usage on a per process basis and report it through sysfs. v2: - Handle AMDGPU BO-specific details in amdgpu_amdkfd_gpuvm_free_memory_of_gpu(). - Return size of VRAM BO being freed from amdgpu_amdkfd_gpuvm_free_memory_of_gpu(). - Do not consider imported memory for VRAM usage calculations. Signed-off-by: Mukul Joshi --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h| 3 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 9 ++- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 17 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 7 +++ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 57 --- 5 files changed, 81 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index d065c50582eb..a501026e829c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -65,6 +65,7 @@ struct kgd_mem { struct amdgpu_sync sync; bool aql_queue; + bool is_imported; }; /* KFD Memory Eviction */ @@ -219,7 +220,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( void *vm, struct kgd_mem **mem, uint64_t *offset, uint32_t flags); int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem); + struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size); int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( struct kgd_dev *kgd, struct kgd_mem *mem, void *vm); int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 0768b7eb7683..fe6615a06cd0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1277,7 +1277,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( } int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem) + struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size) { struct amdkfd_process_info *process_info = mem->process_info; unsigned long bo_size = mem->bo->tbo.mem.size; @@ -1340,6 +1340,12 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( kfree(mem->bo->tbo.sg); } + /* Update the size of the BO being freed if it was allocated from +* VRAM +*/ + if (size && (mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM)) + *size = bo_size; + /* Free the BO*/ amdgpu_bo_unref(&mem->bo); mutex_destroy(&mem->lock); @@ -1694,6 +1700,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, (*mem)->process_info = avm->process_info; add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false); amdgpu_sync_create(&(*mem)->sync); + (*mem)->is_imported = true; return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index f8fa03a12add..aac2cdb65eb5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1322,6 +1322,10 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, goto err_free; } + /* Update the VRAM usage count */ + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) + pdd->vram_usage += args->size; + mutex_unlock(&p->mutex); args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); @@ -1337,7 +1341,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, return 0; err_free: - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL); err_unlock: mutex_unlock(&p->mutex); return err; @@ -1351,6 +1355,8 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, void *mem; struct kfd_dev *dev; int ret; + uint64_t size = 0; + bool is_imported = 0; dev = kfd_device_by_id(GET_GPU_ID(args->handle)); if (!dev) @@ -1372,8 +1378,10 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, goto err_unlock; } + is_imported = ((struct kgd_mem *)mem)->is_imported; + ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, - (struct kgd_mem *)mem); + (struct kgd_mem *)mem, &size); /* If freeing the buffer failed, leave the handle in place for * clean-up during process tear-down. @@ -1382,6 +1390,9 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, kfd_process_device_remove_obj_handle( pdd, GET_IDR_HANDLE(args->handle)); + if (!is_imported) + pdd->vram_usage -= s
[pull] amdgpu drm-fixes-5.7
Hi Dave, Daniel, Fixes for 5.7. The following changes since commit 4da858c086433cd012c0bb16b5921f6fafe3f803: Merge branch 'linux-5.7' of git://github.com/skeggsb/linux into drm-fixes (2020-04-16 15:40:02 +1000) are available in the Git repository at: git://people.freedesktop.org/~agd5f/linux tags/amd-drm-fixes-5.7-2020-04-22 for you to fetch changes up to 7daec99fdcde7b01595134a3d8f385bc1009f1d8: drm/amdgpu/display: give aux i2c buses more meaningful names (2020-04-17 17:31:38 -0400) amd-drm-fixes-5.7-2020-04-22: amdgpu: - Fix resume issue on renoir - Thermal fix for older CI dGPUs - Fix some fallout from dropping drm load/unload callbacks Alex Deucher (2): drm/amdgpu/display: fix aux registration (v2) drm/amdgpu/display: give aux i2c buses more meaningful names Prike Liang (1): drm/amd/powerplay: fix resume failed as smu table initialize early exit Sandeep Raghuraman (1): drm/amdgpu: Correctly initialize thermal controller for GPUs with Powerplay table v0 (e.g Hawaii) drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 14 ++-- .../amd/display/amdgpu_dm/amdgpu_dm_mst_types.c| 15 - .../amd/display/amdgpu_dm/amdgpu_dm_mst_types.h| 3 ++- .../gpu/drm/amd/powerplay/hwmgr/processpptables.c | 26 ++ drivers/gpu/drm/amd/powerplay/renoir_ppt.c | 7 +- 5 files changed, 55 insertions(+), 10 deletions(-) ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 07/19] drm/amd/display: fix rn soc bb update
From: Dmytro Laktyushkin Currently RN SOC bounding box update assumes we will get at least 2 clock states from SMU. This isn't always true and because of special casing on first clock state we end up with low disp, dpp, dsc and phy clocks. This change removes the special casing allowing the first state to acquire correct clocks. Signed-off-by: Dmytro Laktyushkin Reviewed-by: Eric Yang Acked-by: Aurabindo Pillai Acked-by: Tony Cheng --- .../drm/amd/display/dc/dcn21/dcn21_resource.c | 71 --- 1 file changed, 28 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index 78e6259b4ac9..8fcb03e65fdb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -1379,64 +1379,49 @@ static void update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param { struct dcn21_resource_pool *pool = TO_DCN21_RES_POOL(dc->res_pool); struct clk_limit_table *clk_table = &bw_params->clk_table; - unsigned int i, j, k; - int closest_clk_lvl; + struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES]; + unsigned int i, j, closest_clk_lvl; // Default clock levels are used for diags, which may lead to overclocking. - if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment) && !IS_DIAG_DC(dc->ctx->dce_environment)) { + if (!IS_DIAG_DC(dc->ctx->dce_environment)) { dcn2_1_ip.max_num_otg = pool->base.res_cap->num_timing_generator; dcn2_1_ip.max_num_dpp = pool->base.pipe_count; dcn2_1_soc.num_chans = bw_params->num_channels; - /* Vmin: leave lowest DCN clocks, override with dcfclk, fclk, memclk from fuse */ - dcn2_1_soc.clock_limits[0].state = 0; - dcn2_1_soc.clock_limits[0].dcfclk_mhz = clk_table->entries[0].dcfclk_mhz; - dcn2_1_soc.clock_limits[0].fabricclk_mhz = clk_table->entries[0].fclk_mhz; - dcn2_1_soc.clock_limits[0].socclk_mhz = clk_table->entries[0].socclk_mhz; - dcn2_1_soc.clock_limits[0].dram_speed_mts = clk_table->entries[0].memclk_mhz * 2; - - /* -* Other levels: find closest DCN clocks that fit the given clock limit using dcfclk -* as indicator -*/ - - closest_clk_lvl = -1; - /* index currently being filled */ - k = 1; - for (i = 1; i < clk_table->num_entries; i++) { - /* loop backwards, skip duplicate state*/ - for (j = dcn2_1_soc.num_states - 1; j >= k; j--) { + ASSERT(clk_table->num_entries); + for (i = 0; i < clk_table->num_entries; i++) { + /* loop backwards*/ + for (closest_clk_lvl = 0, j = dcn2_1_soc.num_states - 1; j >= 0; j--) { if ((unsigned int) dcn2_1_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) { closest_clk_lvl = j; break; } } - /* if found a lvl that fits, use the DCN clks from it, if not, go to next clk limit*/ - if (closest_clk_lvl != -1) { - dcn2_1_soc.clock_limits[k].state = i; - dcn2_1_soc.clock_limits[k].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; - dcn2_1_soc.clock_limits[k].fabricclk_mhz = clk_table->entries[i].fclk_mhz; - dcn2_1_soc.clock_limits[k].socclk_mhz = clk_table->entries[i].socclk_mhz; - dcn2_1_soc.clock_limits[k].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2; - - dcn2_1_soc.clock_limits[k].dispclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz; - dcn2_1_soc.clock_limits[k].dppclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz; - dcn2_1_soc.clock_limits[k].dram_bw_per_chan_gbps = dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; - dcn2_1_soc.clock_limits[k].dscclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz; - dcn2_1_soc.clock_limits[k].dtbclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; - dcn2_1_soc.clock_limits[k].phyclk_d18_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; - dcn2_1_soc.clock_limits[k].phyclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz; - k++; - } + clock_limit
[PATCH 15/19] drm/amd/display: Add set backlight to hw sequencer.
From: Yongqiang Sun [Why & How] Add set backlight to hw sequencer, dmu communication will be handled in hw sequencer for new asics. Signed-off-by: Yongqiang Sun Reviewed-by: Anthony Koo Acked-by: Aurabindo Pillai --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 35 ++- .../display/dc/dce110/dce110_hw_sequencer.c | 34 +- .../display/dc/dce110/dce110_hw_sequencer.h | 4 +++ .../gpu/drm/amd/display/dc/dcn10/dcn10_init.c | 1 + .../gpu/drm/amd/display/dc/dcn20/dcn20_init.c | 1 + .../gpu/drm/amd/display/dc/dcn21/dcn21_init.c | 1 + .../gpu/drm/amd/display/dc/inc/hw_sequencer.h | 4 +++ 7 files changed, 55 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index a54b3e05f66b..67c5342cf89a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -2509,35 +2509,21 @@ bool dc_link_set_backlight_level(const struct dc_link *link, uint32_t frame_ramp) { struct dc *dc = link->ctx->dc; - struct abm *abm = get_abm_from_stream_res(link); - struct dmcu *dmcu = dc->res_pool->dmcu; - unsigned int controller_id = 0; - bool fw_set_brightness = true; int i; - DC_LOGGER_INIT(link->ctx->logger); - - if (abm == NULL || (abm->funcs->set_backlight_level_pwm == NULL)) - return false; - - if (dmcu) - fw_set_brightness = dmcu->funcs->is_dmcu_initialized(dmcu); + DC_LOGGER_INIT(link->ctx->logger); DC_LOG_BACKLIGHT("New Backlight level: %d (0x%X)\n", backlight_pwm_u16_16, backlight_pwm_u16_16); if (dc_is_embedded_signal(link->connector_signal)) { + struct pipe_ctx *pipe_ctx = NULL; + for (i = 0; i < MAX_PIPES; i++) { if (dc->current_state->res_ctx.pipe_ctx[i].stream) { if (dc->current_state->res_ctx. pipe_ctx[i].stream->link == link) { - /* DMCU -1 for all controller id values, -* therefore +1 here -*/ - controller_id = - dc->current_state-> - res_ctx.pipe_ctx[i].stream_res.tg->inst + - 1; + pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; /* Disable brightness ramping when the display is blanked * as it can hang the DMCU @@ -2547,13 +2533,14 @@ bool dc_link_set_backlight_level(const struct dc_link *link, } } } - abm->funcs->set_backlight_level_pwm( - abm, + + if (pipe_ctx == NULL) + ASSERT(false); + + dc->hwss.set_backlight_level( + pipe_ctx, backlight_pwm_u16_16, - frame_ramp, - controller_id, - link->panel_cntl->inst, - fw_set_brightness); + frame_ramp); } return true; diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 9cd130c8894a..30469026c642 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -2714,6 +2714,37 @@ void dce110_set_cursor_attribute(struct pipe_ctx *pipe_ctx) pipe_ctx->plane_res.xfm, attributes); } +bool dce110_set_backlight_level(struct pipe_ctx *pipe_ctx, + uint32_t backlight_pwm_u16_16, + uint32_t frame_ramp) +{ + struct dc_link *link = pipe_ctx->stream->link; + struct dc *dc = link->ctx->dc; + struct abm *abm = pipe_ctx->stream_res.abm; + struct dmcu *dmcu = dc->res_pool->dmcu; + bool fw_set_brightness = true; + /* DMCU -1 for all controller id values, +* therefore +1 here +*/ + uint32_t controller_id = pipe_ctx->stream_res.tg->inst + 1; + + if (abm == NULL || (abm->funcs->set_backlight_level_pwm == NULL)) + return false; + + if (dmcu) + fw_set_brightness = dmcu->funcs->is_dmcu_initialized(dmcu); + + abm->funcs->set_backlight_level_pwm( + abm, + backlight_pwm_u16_16, + frame_ramp
[PATCH 09/19] drm/amd/display: Change viewport limit to 12 for DCN2
From: Sung Lee [WHY & HOW] Viewport limit was set to 16 pixels due to an issue with MPO on small viewports. This restriction does not apply and the viewport limit can now be lowered. Signed-off-by: Sung Lee Reviewed-by: Dmytro Laktyushkin Acked-by: Aurabindo Pillai --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 12f5c6881cd0..1a01c038632b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1064,8 +1064,8 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) calculate_viewport(pipe_ctx); - if (pipe_ctx->plane_res.scl_data.viewport.height < 16 || - pipe_ctx->plane_res.scl_data.viewport.width < 16) { + if (pipe_ctx->plane_res.scl_data.viewport.height < 12 || + pipe_ctx->plane_res.scl_data.viewport.width < 12) { if (store_h_border_left) { restore_border_left_from_dst(pipe_ctx, store_h_border_left); -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 17/19] drm/amd/display: Use cursor locking to prevent flip delays
From: Aric Cyr [Why] Current locking scheme for cursor can result in a flip missing its vsync, deferring it for one or more vsyncs. Result is a potential for stuttering when cursor is moved. [How] Use cursor update lock so that flips are not blocked while cursor is being programmed. Signed-off-by: Aric Cyr Reviewed-by: Nicholas Kazlauskas Acked-by: Aurabindo Pillai --- .../gpu/drm/amd/display/dc/core/dc_stream.c | 40 ++- .../display/dc/dce110/dce110_hw_sequencer.c | 1 + .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 10 + .../amd/display/dc/dcn10/dcn10_hw_sequencer.h | 1 + .../gpu/drm/amd/display/dc/dcn10/dcn10_init.c | 1 + .../gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c | 15 +++ .../gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h | 20 +++--- .../drm/amd/display/dc/dcn10/dcn10_resource.c | 14 ++- .../gpu/drm/amd/display/dc/dcn20/dcn20_init.c | 1 + .../gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c | 1 + .../gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h | 3 +- .../drm/amd/display/dc/dcn20/dcn20_resource.c | 4 ++ .../gpu/drm/amd/display/dc/dcn21/dcn21_init.c | 1 + .../drm/amd/display/dc/dcn21/dcn21_resource.c | 4 ++ drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h | 16 .../gpu/drm/amd/display/dc/inc/hw_sequencer.h | 1 + 16 files changed, 88 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 8c20e9e907b2..4f0e7203dba4 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -231,34 +231,6 @@ struct dc_stream_status *dc_stream_get_status( return dc_stream_get_status_from_state(dc->current_state, stream); } -static void delay_cursor_until_vupdate(struct pipe_ctx *pipe_ctx, struct dc *dc) -{ -#if defined(CONFIG_DRM_AMD_DC_DCN) - unsigned int vupdate_line; - unsigned int lines_to_vupdate, us_to_vupdate, vpos, nvpos; - struct dc_stream_state *stream = pipe_ctx->stream; - unsigned int us_per_line; - - if (!dc->hwss.get_vupdate_offset_from_vsync) - return; - - vupdate_line = dc->hwss.get_vupdate_offset_from_vsync(pipe_ctx); - if (!dc_stream_get_crtc_position(dc, &stream, 1, &vpos, &nvpos)) - return; - - if (vpos >= vupdate_line) - return; - - us_per_line = - stream->timing.h_total * 1 / stream->timing.pix_clk_100hz; - lines_to_vupdate = vupdate_line - vpos; - us_to_vupdate = lines_to_vupdate * us_per_line; - - /* 70 us is a conservative estimate of cursor update time*/ - if (us_to_vupdate < 70) - udelay(us_to_vupdate); -#endif -} /** * dc_stream_set_cursor_attributes() - Update cursor attributes and set cursor surface address @@ -298,9 +270,7 @@ bool dc_stream_set_cursor_attributes( if (!pipe_to_program) { pipe_to_program = pipe_ctx; - - delay_cursor_until_vupdate(pipe_ctx, dc); - dc->hwss.pipe_control_lock(dc, pipe_to_program, true); + dc->hwss.cursor_lock(dc, pipe_to_program, true); } dc->hwss.set_cursor_attribute(pipe_ctx); @@ -309,7 +279,7 @@ bool dc_stream_set_cursor_attributes( } if (pipe_to_program) - dc->hwss.pipe_control_lock(dc, pipe_to_program, false); + dc->hwss.cursor_lock(dc, pipe_to_program, false); return true; } @@ -349,16 +319,14 @@ bool dc_stream_set_cursor_position( if (!pipe_to_program) { pipe_to_program = pipe_ctx; - - delay_cursor_until_vupdate(pipe_ctx, dc); - dc->hwss.pipe_control_lock(dc, pipe_to_program, true); + dc->hwss.cursor_lock(dc, pipe_to_program, true); } dc->hwss.set_cursor_position(pipe_ctx); } if (pipe_to_program) - dc->hwss.pipe_control_lock(dc, pipe_to_program, false); + dc->hwss.cursor_lock(dc, pipe_to_program, false); return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 30469026c642..6bd8d4e1c294 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -2767,6 +2767,7 @@ static const struct hw_sequencer_funcs dce110_funcs = { .disable_plane = dce110_power_down_fe, .pipe_control_lock = dce_pipe_control_lock, .interdependent_update_lock = NULL, + .cursor_lock = dce_pipe_control_lock, .prepare_bandwidth = dce110_prepare_bandwidth, .optimize_bandwidth = dce110_optimize_bandwidth, .set_drr = set_drr, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.
[PATCH 19/19] drm/amd/display: 3.2.83.1
From: Aric Cyr Update firmware blobs Signed-off-by: Aric Cyr Reviewed-by: Aric Cyr Acked-by: Aurabindo Pillai --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 8957429c6a24..17075f99bc54 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -42,7 +42,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.83" +#define DC_VER "3.2.83.1" #define MAX_SURFACES 3 #define MAX_PLANES 6 -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 16/19] drm/amd/display: Internal refactoring to abstract color caps
From: Krunoslav Kovac [Why&How] modules/color calculates various colour operations which are translated to abstracted HW. DCE 5-12 had almost no important changes, but starting with DCN1, every new generation comes with fairly major differences in color pipeline. We would hack it with some DCN checks, but a better approach is to abstract color pipe capabilities so modules/DM can decide mapping to HW block based on logical capabilities, Signed-off-by: Krunoslav Kovac Reviewed-by: Aric Cyr Acked-by: Anthony Koo Acked-by: Aurabindo Pillai --- .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 7 +-- drivers/gpu/drm/amd/display/dc/dc.h | 45 ++- .../drm/amd/display/dc/dcn10/dcn10_resource.c | 34 ++ .../drm/amd/display/dc/dcn20/dcn20_resource.c | 35 ++- .../drm/amd/display/dc/dcn21/dcn21_resource.c | 35 ++- .../amd/display/modules/color/color_gamma.c | 31 ++--- .../amd/display/modules/color/color_gamma.h | 4 +- 7 files changed, 178 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 838f35668f12..4dfb6b55bb2e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -239,7 +239,8 @@ static int __set_output_tf(struct dc_transfer_func *func, * instead to simulate this. */ gamma->type = GAMMA_CUSTOM; - res = mod_color_calculate_degamma_params(func, gamma, true); + res = mod_color_calculate_degamma_params(NULL, func, + gamma, true); } else { /* * Assume sRGB. The actual mapping will depend on whether the @@ -271,7 +272,7 @@ static int __set_input_tf(struct dc_transfer_func *func, __drm_lut_to_dc_gamma(lut, gamma, false); - res = mod_color_calculate_degamma_params(func, gamma, true); + res = mod_color_calculate_degamma_params(NULL, func, gamma, true); dc_gamma_release(&gamma); return res ? 0 : -ENOMEM; @@ -485,7 +486,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, dc_plane_state->in_transfer_func->tf = tf; if (tf != TRANSFER_FUNCTION_SRGB && - !mod_color_calculate_degamma_params( + !mod_color_calculate_degamma_params(NULL, dc_plane_state->in_transfer_func, NULL, false)) return -ENOMEM; } else { diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 99c8e40049e6..b4aeb5d8a818 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -98,6 +98,49 @@ struct dc_plane_cap { } max_downscale_factor; }; +// Color management caps (DPP and MPC) +struct rom_curve_caps { + uint16_t srgb : 1; + uint16_t bt2020 : 1; + uint16_t gamma2_2 : 1; + uint16_t pq : 1; + uint16_t hlg : 1; +}; + +struct dpp_color_caps { + uint16_t dcn_arch : 1; // all DCE generations treated the same + // input lut is different than most LUTs, just plain 256-entry lookup + uint16_t input_lut_shared : 1; // shared with DGAM + uint16_t icsc : 1; + uint16_t dgam_ram : 1; + uint16_t post_csc : 1; // before gamut remap + uint16_t gamma_corr : 1; + + // hdr_mult and gamut remap always available in DPP (in that order) + // 3d lut implies shaper LUT, + // it may be shared with MPC - check MPC:shared_3d_lut flag + uint16_t hw_3d_lut : 1; + uint16_t ogam_ram : 1; // blnd gam + uint16_t ocsc : 1; + struct rom_curve_caps dgam_rom_caps; + struct rom_curve_caps ogam_rom_caps; +}; + +struct mpc_color_caps { + uint16_t gamut_remap : 1; + uint16_t ogam_ram : 1; + uint16_t ocsc : 1; + uint16_t num_3dluts : 3; //3d lut always assumes a preceding shaper LUT + uint16_t shared_3d_lut:1; //can be in either DPP or MPC, but single instance + + struct rom_curve_caps ogam_rom_caps; +}; + +struct dc_color_caps { + struct dpp_color_caps dpp; + struct mpc_color_caps mpc; +}; + struct dc_caps { uint32_t max_streams; uint32_t max_links; @@ -120,9 +163,9 @@ struct dc_caps { bool psp_setup_panel_mode; bool extended_aux_timeout_support; bool dmcub_support; - bool hw_3d_lut; enum dp_protocol_version max_dp_protocol_version; struct dc_plane_cap planes[MAX_PLANES]; + struct dc_color_caps color; }; struct dc_bug_wa { diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 43116749af9f..6d506c37fc71 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++
[PATCH 13/19] drm/amd/display: Add panel cntl id for set backlight level.
From: Yongqiang Sun [Why & How] Add panel cntl instance when calling set backlight. Signed-off-by: Yongqiang Sun Reviewed-by: Anthony Koo Acked-by: Aurabindo Pillai Acked-by: Tony Cheng --- drivers/gpu/drm/amd/display/dc/core/dc.c | 4 +++- drivers/gpu/drm/amd/display/dc/core/dc_link.c| 3 ++- drivers/gpu/drm/amd/display/dc/dce/dce_abm.c | 15 +-- drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c| 16 ++-- .../amd/display/dc/dcn10/dcn10_hw_sequencer.c| 9 ++--- .../gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c | 9 ++--- drivers/gpu/drm/amd/display/dc/inc/hw/abm.h | 5 +++-- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 1 + 8 files changed, 40 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 0f7810571be3..ad817bd74586 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2210,7 +2210,9 @@ static void commit_planes_do_stream_update(struct dc *dc, if (should_program_abm) { if (*stream_update->abm_level == ABM_LEVEL_IMMEDIATE_DISABLE) { - pipe_ctx->stream_res.abm->funcs->set_abm_immediate_disable(pipe_ctx->stream_res.abm); + pipe_ctx->stream_res.abm->funcs->set_abm_immediate_disable( + pipe_ctx->stream_res.abm, + pipe_ctx->stream->link->panel_cntl->inst); } else { pipe_ctx->stream_res.abm->funcs->set_abm_level( pipe_ctx->stream_res.abm, stream->abm_level); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 9c4686edcf3e..a54b3e05f66b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -2552,6 +2552,7 @@ bool dc_link_set_backlight_level(const struct dc_link *link, backlight_pwm_u16_16, frame_ramp, controller_id, + link->panel_cntl->inst, fw_set_brightness); } @@ -2564,7 +2565,7 @@ bool dc_link_set_abm_disable(const struct dc_link *link) bool success = false; if (abm) - success = abm->funcs->set_abm_immediate_disable(abm); + success = abm->funcs->set_abm_immediate_disable(abm, link->panel_cntl->inst); return success; } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c index 4dae9efebb6f..c15e60fb5ebc 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c @@ -55,7 +55,7 @@ #define MCP_DISABLE_ABM_IMMEDIATELY 255 -static bool dce_abm_set_pipe(struct abm *abm, uint32_t controller_id) +static bool dce_abm_set_pipe(struct abm *abm, uint32_t controller_id, uint32_t panel_inst) { struct dce_abm *abm_dce = TO_DCE_ABM(abm); uint32_t rampingBoundary = 0x; @@ -201,7 +201,8 @@ static void dmcu_set_backlight_level( struct dce_abm *abm_dce, uint32_t backlight_pwm_u16_16, uint32_t frame_ramp, - uint32_t controller_id) + uint32_t controller_id, + uint32_t panel_id) { unsigned int backlight_8_bit = 0; uint32_t s2; @@ -213,7 +214,7 @@ static void dmcu_set_backlight_level( // Take MSB of fractional part since backlight is not max backlight_8_bit = (backlight_pwm_u16_16 >> 8) & 0xFF; - dce_abm_set_pipe(&abm_dce->base, controller_id); + dce_abm_set_pipe(&abm_dce->base, controller_id, panel_id); /* waitDMCUReadyForCmd */ REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, @@ -331,14 +332,14 @@ static bool dce_abm_set_level(struct abm *abm, uint32_t level) return true; } -static bool dce_abm_immediate_disable(struct abm *abm) +static bool dce_abm_immediate_disable(struct abm *abm, uint32_t panel_inst) { struct dce_abm *abm_dce = TO_DCE_ABM(abm); if (abm->dmcu_is_running == false) return true; - dce_abm_set_pipe(abm, MCP_DISABLE_ABM_IMMEDIATELY); + dce_abm_set_pipe(abm, MCP_DISABLE_ABM_IMMEDIATELY, panel_inst); abm->stored_backlight_registers.BL_PWM_CNTL = REG_READ(BL_PWM_CNTL); @@ -420,6 +421,7 @@ static bool dce_abm_set_backlight_level_pwm( unsigned int backlight_pwm_u16_16, unsigned int frame_ramp, unsigned int controller_id, + unsigned int p
[PATCH 05/19] drm/amd/display: Add DML variable for future asics
From: Joshua Aberback Signed-off-by: Joshua Aberback Reviewed-by: Dmytro Laktyushkin Reviewed-by: Jun Lei Acked-by: Aurabindo Pillai --- drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h | 1 + drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c | 1 + drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h | 1 + 3 files changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index 687010c17324..b2ecb174a93f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -121,6 +121,7 @@ struct _vcs_dpi_soc_bounding_box_st { }; struct _vcs_dpi_ip_params_st { + bool use_min_dcfclk; bool gpuvm_enable; bool hostvm_enable; unsigned int gpuvm_max_page_table_levels; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c index 6b525c52124c..6e4e8a452e66 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c @@ -280,6 +280,7 @@ static void fetch_ip_params(struct display_mode_lib *mode_lib) ip_params_st *ip = &mode_lib->vba.ip; // IP Parameters + mode_lib->vba.UseMinimumRequiredDCFCLK = ip->use_min_dcfclk; mode_lib->vba.MaxNumDPP = ip->max_num_dpp; mode_lib->vba.MaxNumOTG = ip->max_num_otg; mode_lib->vba.MaxNumHDMIFRLOutputs = ip->max_num_hdmi_frl_outputs; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index 5d82fc5a7ed7..a1884ffe63ae 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -898,6 +898,7 @@ struct vba_vars_st { bool dummystring[DC__NUM_DPP__MAX]; double BPP; enum odm_combine_policy ODMCombinePolicy; + bool UseMinimumRequiredDCFCLK; }; bool CalculateMinAndMaxPrefetchMode( -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 10/19] drm/amd/display: Defer cursor update around VUPDATE for all ASIC
From: Nicholas Kazlauskas [Why] Fixes the following scenario: - Flip has been prepared sometime during the frame, update pending - Cursor update happens right when VUPDATE would happen - OPTC lock acquired, VUPDATE is blocked until next frame - Flip is delayed potentially infinitely With the igt@kms_cursor_legacy cursor-vs-flip-legacy test we can observe nearly *13* frames of delay for some flips on Navi. [How] Apply the Raven workaround generically. When close enough to VUPDATE block cursor updates from occurring from the dc_stream_set_cursor_* helpers. This could perhaps be a little smarter by checking if there were pending updates or flips earlier in the frame on the HUBP side before applying the delay, but this should be fine for now. This fixes the kms_cursor_legacy test. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Aric Cyr Acked-by: Aurabindo Pillai --- .../gpu/drm/amd/display/dc/core/dc_stream.c | 28 +-- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 6ddbb00ed37a..8c20e9e907b2 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -239,24 +239,24 @@ static void delay_cursor_until_vupdate(struct pipe_ctx *pipe_ctx, struct dc *dc) struct dc_stream_state *stream = pipe_ctx->stream; unsigned int us_per_line; - if (stream->ctx->asic_id.chip_family == FAMILY_RV && - ASICREV_IS_RAVEN(stream->ctx->asic_id.hw_internal_rev)) { + if (!dc->hwss.get_vupdate_offset_from_vsync) + return; - vupdate_line = dc->hwss.get_vupdate_offset_from_vsync(pipe_ctx); - if (!dc_stream_get_crtc_position(dc, &stream, 1, &vpos, &nvpos)) - return; + vupdate_line = dc->hwss.get_vupdate_offset_from_vsync(pipe_ctx); + if (!dc_stream_get_crtc_position(dc, &stream, 1, &vpos, &nvpos)) + return; - if (vpos >= vupdate_line) - return; + if (vpos >= vupdate_line) + return; - us_per_line = stream->timing.h_total * 1 / stream->timing.pix_clk_100hz; - lines_to_vupdate = vupdate_line - vpos; - us_to_vupdate = lines_to_vupdate * us_per_line; + us_per_line = + stream->timing.h_total * 1 / stream->timing.pix_clk_100hz; + lines_to_vupdate = vupdate_line - vpos; + us_to_vupdate = lines_to_vupdate * us_per_line; - /* 70 us is a conservative estimate of cursor update time*/ - if (us_to_vupdate < 70) - udelay(us_to_vupdate); - } + /* 70 us is a conservative estimate of cursor update time*/ + if (us_to_vupdate < 70) + udelay(us_to_vupdate); #endif } -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 14/19] drm/amd/display: Add dummy p-state latency bounding box override
From: Joshua Aberback [Why] For debugging, it can be useful to be able to modify the dummy p-state latency, this will make it easier to do so. Signed-off-by: Joshua Aberback Reviewed-by: Wesley Chalmers Acked-by: Aurabindo Pillai --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 7 +++ 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 84e5056521a3..99c8e40049e6 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -478,6 +478,7 @@ struct dc_bounding_box_overrides { int urgent_latency_ns; int percent_of_ideal_drambw; int dram_clock_change_latency_ns; + int dummy_clock_change_latency_ns; /* This forces a hard min on the DCFCLK we use * for DML. Unlike the debug option for forcing * DCFCLK, this override affects watermark calculations diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 3a8a4c54738a..b7e4d0c2432c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -3467,6 +3467,13 @@ void dcn20_patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st bb->dram_clock_change_latency_us = dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; } + + if ((int)(bb->dummy_pstate_latency_us * 1000) + != dc->bb_overrides.dummy_clock_change_latency_ns + && dc->bb_overrides.dummy_clock_change_latency_ns) { + bb->dummy_pstate_latency_us = + dc->bb_overrides.dummy_clock_change_latency_ns / 1000.0; + } } static struct _vcs_dpi_soc_bounding_box_st *get_asic_rev_soc_bb( -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 08/19] drm/amd/display: Fail validation if building scaling params fails
From: Sung Lee [WHY & HOW] If building scaling parameters fails, validation should also fail. Signed-off-by: Sung Lee Reviewed-by: Dmytro Laktyushkin Acked-by: Aurabindo Pillai --- .../drm/amd/display/dc/dcn20/dcn20_resource.c| 16 ++-- .../drm/amd/display/dc/dcn20/dcn20_resource.h| 2 +- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index f41fc322d50a..3a8a4c54738a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -1936,7 +1936,7 @@ bool dcn20_split_stream_for_odm( return true; } -void dcn20_split_stream_for_mpc( +bool dcn20_split_stream_for_mpc( struct resource_context *res_ctx, const struct resource_pool *pool, struct pipe_ctx *primary_pipe, @@ -1965,8 +1965,11 @@ void dcn20_split_stream_for_mpc( secondary_pipe->top_pipe = primary_pipe; ASSERT(primary_pipe->plane_state); - resource_build_scaling_params(primary_pipe); - resource_build_scaling_params(secondary_pipe); + if (!resource_build_scaling_params(primary_pipe) || + !resource_build_scaling_params(secondary_pipe)) + return false; + + return true; } void dcn20_populate_dml_writeback_from_context( @@ -2796,9 +2799,10 @@ bool dcn20_fast_validate_bw( goto validate_fail; dcn20_build_mapped_resource(dc, context, pipe->stream); } else - dcn20_split_stream_for_mpc( - &context->res_ctx, dc->res_pool, - pipe, hsplit_pipe); + if (!dcn20_split_stream_for_mpc( + &context->res_ctx, dc->res_pool, + pipe, hsplit_pipe)) + goto validate_fail; pipe_split_from[hsplit_pipe->pipe_idx] = pipe_idx; } } else if (hsplit_pipe && hsplit_pipe->plane_state == pipe->plane_state) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h index 9d5bff9455fd..578265ccbf5b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h @@ -129,7 +129,7 @@ void dcn20_release_dsc(struct resource_context *res_ctx, const struct resource_pool *pool, struct display_stream_compressor **dsc); bool dcn20_validate_dsc(struct dc *dc, struct dc_state *new_ctx); -void dcn20_split_stream_for_mpc( +bool dcn20_split_stream_for_mpc( struct resource_context *res_ctx, const struct resource_pool *pool, struct pipe_ctx *primary_pipe, -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 18/19] drm/amd/display: 3.2.83
From: Aric Cyr Signed-off-by: Aric Cyr Reviewed-by: Aric Cyr Acked-by: Aurabindo Pillai --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index b4aeb5d8a818..8957429c6a24 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -42,7 +42,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.82" +#define DC_VER "3.2.83" #define MAX_SURFACES 3 #define MAX_PLANES 6 -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 12/19] drm/amd/display: Pass command instead of header into DMUB service
From: Nicholas Kazlauskas [Why] We read memory that we shouldn't be touching if the struct isn't a full union dmub_rb_cmd. [How] Fix up all the callers and functions that take in the dmub_cmd_header to use the dmub_rb_cmd instead. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Tony Cheng Acked-by: Aurabindo Pillai --- .../drm/amd/display/dc/bios/command_table2.c | 62 +-- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 2 +- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h | 3 +- drivers/gpu/drm/amd/display/dc/dc_helper.c| 6 +- drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c | 10 +-- drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c | 8 +-- .../gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c | 33 ++ .../gpu/drm/amd/display/dmub/inc/dmub_rb.h| 6 +- .../gpu/drm/amd/display/dmub/inc/dmub_srv.h | 3 +- .../gpu/drm/amd/display/dmub/src/dmub_srv.c | 2 +- 10 files changed, 80 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c index 8edc2506d49e..bed91572f82a 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c @@ -113,13 +113,19 @@ static void encoder_control_dmcub( struct dc_dmub_srv *dmcub, struct dig_encoder_stream_setup_parameters_v1_5 *dig) { - struct dmub_rb_cmd_digx_encoder_control encoder_control = { 0 }; + union dmub_rb_cmd cmd; - encoder_control.header.type = DMUB_CMD__VBIOS; - encoder_control.header.sub_type = DMUB_CMD__VBIOS_DIGX_ENCODER_CONTROL; - encoder_control.encoder_control.dig.stream_param = *dig; + memset(&cmd, 0, sizeof(cmd)); - dc_dmub_srv_cmd_queue(dmcub, &encoder_control.header); + cmd.digx_encoder_control.header.type = DMUB_CMD__VBIOS; + cmd.digx_encoder_control.header.sub_type = + DMUB_CMD__VBIOS_DIGX_ENCODER_CONTROL; + cmd.digx_encoder_control.header.payload_bytes = + sizeof(cmd.digx_encoder_control) - + sizeof(cmd.digx_encoder_control.header); + cmd.digx_encoder_control.encoder_control.dig.stream_param = *dig; + + dc_dmub_srv_cmd_queue(dmcub, &cmd); dc_dmub_srv_cmd_execute(dmcub); dc_dmub_srv_wait_idle(dmcub); } @@ -238,14 +244,19 @@ static void transmitter_control_dmcub( struct dc_dmub_srv *dmcub, struct dig_transmitter_control_parameters_v1_6 *dig) { - struct dmub_rb_cmd_dig1_transmitter_control transmitter_control; + union dmub_rb_cmd cmd; + + memset(&cmd, 0, sizeof(cmd)); - transmitter_control.header.type = DMUB_CMD__VBIOS; - transmitter_control.header.sub_type = + cmd.dig1_transmitter_control.header.type = DMUB_CMD__VBIOS; + cmd.dig1_transmitter_control.header.sub_type = DMUB_CMD__VBIOS_DIG1_TRANSMITTER_CONTROL; - transmitter_control.transmitter_control.dig = *dig; + cmd.dig1_transmitter_control.header.payload_bytes = + sizeof(cmd.dig1_transmitter_control) - + sizeof(cmd.dig1_transmitter_control.header); + cmd.dig1_transmitter_control.transmitter_control.dig = *dig; - dc_dmub_srv_cmd_queue(dmcub, &transmitter_control.header); + dc_dmub_srv_cmd_queue(dmcub, &cmd); dc_dmub_srv_cmd_execute(dmcub); dc_dmub_srv_wait_idle(dmcub); } @@ -339,13 +350,18 @@ static void set_pixel_clock_dmcub( struct dc_dmub_srv *dmcub, struct set_pixel_clock_parameter_v1_7 *clk) { - struct dmub_rb_cmd_set_pixel_clock pixel_clock = { 0 }; + union dmub_rb_cmd cmd; - pixel_clock.header.type = DMUB_CMD__VBIOS; - pixel_clock.header.sub_type = DMUB_CMD__VBIOS_SET_PIXEL_CLOCK; - pixel_clock.pixel_clock.clk = *clk; + memset(&cmd, 0, sizeof(cmd)); - dc_dmub_srv_cmd_queue(dmcub, &pixel_clock.header); + cmd.set_pixel_clock.header.type = DMUB_CMD__VBIOS; + cmd.set_pixel_clock.header.sub_type = DMUB_CMD__VBIOS_SET_PIXEL_CLOCK; + cmd.set_pixel_clock.header.payload_bytes = + sizeof(cmd.set_pixel_clock) - + sizeof(cmd.set_pixel_clock.header); + cmd.set_pixel_clock.pixel_clock.clk = *clk; + + dc_dmub_srv_cmd_queue(dmcub, &cmd); dc_dmub_srv_cmd_execute(dmcub); dc_dmub_srv_wait_idle(dmcub); } @@ -705,13 +721,19 @@ static void enable_disp_power_gating_dmcub( struct dc_dmub_srv *dmcub, struct enable_disp_power_gating_parameters_v2_1 *pwr) { - struct dmub_rb_cmd_enable_disp_power_gating power_gating; + union dmub_rb_cmd cmd; + + memset(&cmd, 0, sizeof(cmd)); - power_gating.header.type = DMUB_CMD__VBIOS; - power_gating.header.sub_type = DMUB_CMD__VBIOS_ENABLE_DISP_POWER_GATING; - power_gating.power_gating.pwr = *pwr; + cmd.enable_disp_power_gating.header.type = DMUB_CMD__V
[PATCH 11/19] drm/amd/display: Update downspread percent to match spreadsheet for DCN2.1
From: Sung Lee [WHY] The downspread percentage was copied over from a previous version of the display_mode_lib spreadsheet. This value has been updated, and the previous value is too high to allow for such modes as 4K120hz. The new value is sufficient for such modes. [HOW] Update the value in dcn21_resource to match the spreadsheet. Signed-off-by: Sung Lee Reviewed-by: Yongqiang Sun Acked-by: Aurabindo Pillai --- drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index 8fcb03e65fdb..802372f09dc7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -286,7 +286,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = { .dram_channel_width_bytes = 4, .fabric_datapath_to_dcn_data_return_bytes = 32, .dcn_downspread_percent = 0.5, - .downspread_percent = 0.5, + .downspread_percent = 0.38, .dram_page_open_time_ns = 50.0, .dram_rw_turnaround_time_ns = 17.5, .dram_return_buffer_per_channel_bytes = 8192, -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 01/19] drm/amd/display: 3.2.82
From: Aric Cyr Signed-off-by: Aric Cyr Reviewed-by: Aric Cyr Acked-by: Aurabindo Pillai --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 5432ca1657b1..84e5056521a3 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -42,7 +42,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.81" +#define DC_VER "3.2.82" #define MAX_SURFACES 3 #define MAX_PLANES 6 -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 02/19] drm/amd/display: Do not disable pipe split if mode is not supported
From: Sung Lee [WHY] If mode is not supported, pipe split should not be disabled. This may cause more modes to fail. [HOW] Check for mode support before disabling pipe split. This commit was previously reverted as it was thought to have problems, but those issues have been resolved. Signed-off-by: Sung Lee Reviewed-by: Yongqiang Sun Acked-by: Aurabindo Pillai --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 63044ae06327..f41fc322d50a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -2623,19 +2623,24 @@ int dcn20_validate_apply_pipe_split_flags( /* Avoid split loop looks for lowest voltage level that allows most unsplit pipes possible */ if (avoid_split) { + int max_mpc_comb = context->bw_ctx.dml.vba.maxMpcComb; + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { if (!context->res_ctx.pipe_ctx[i].stream) continue; for (vlevel_split = vlevel; vlevel <= context->bw_ctx.dml.soc.num_states; vlevel++) - if (context->bw_ctx.dml.vba.NoOfDPP[vlevel][0][pipe_idx] == 1) + if (context->bw_ctx.dml.vba.NoOfDPP[vlevel][0][pipe_idx] == 1 && + context->bw_ctx.dml.vba.ModeSupport[vlevel][0]) break; /* Impossible to not split this pipe */ if (vlevel > context->bw_ctx.dml.soc.num_states) vlevel = vlevel_split; + else + max_mpc_comb = 0; pipe_idx++; } - context->bw_ctx.dml.vba.maxMpcComb = 0; + context->bw_ctx.dml.vba.maxMpcComb = max_mpc_comb; } /* Split loop sets which pipe should be split based on dml outputs and dc flags */ -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 03/19] drm/amd/display: Fix DMUB meta offset for new load method
From: Nicholas Kazlauskas [Why] The new metadata offset is located at the end of the firmware binary without any additional padding. Firmware state is currently larger than 1024 bytes so new firmware state will hang when trying to access any data above 1024 bytes. [How] Specify the correct offset based on legacy vs new loading method. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Yongqiang Sun Acked-by: Aurabindo Pillai --- drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index 0a1a851741c5..a6e403227872 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -96,25 +96,27 @@ dmub_get_fw_meta_info(const struct dmub_srv_region_params *params) const union dmub_fw_meta *meta; const uint8_t *blob = NULL; uint32_t blob_size = 0; + uint32_t meta_offset = 0; if (params->fw_bss_data) { /* Legacy metadata region. */ blob = params->fw_bss_data; blob_size = params->bss_data_size; + meta_offset = DMUB_FW_META_OFFSET; } else if (params->fw_inst_const) { /* Combined metadata region. */ blob = params->fw_inst_const; blob_size = params->inst_const_size; + meta_offset = 0; } if (!blob || !blob_size) return NULL; - if (blob_size < sizeof(union dmub_fw_meta) + DMUB_FW_META_OFFSET) + if (blob_size < sizeof(union dmub_fw_meta) + meta_offset) return NULL; - meta = (const union dmub_fw_meta *)(blob + blob_size - - DMUB_FW_META_OFFSET - + meta = (const union dmub_fw_meta *)(blob + blob_size - meta_offset - sizeof(union dmub_fw_meta)); if (meta->info.magic_value != DMUB_FW_META_MAGIC) -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 00/19] DC Patches 20 April 2020
This DC patchset brings improvements in multiple areas. In summary, we hightlight: * DC version 3.2.83.1 * Enhancements and refactoring in DMUB * Improvments for DCN2, backlight and others __ Anthony Koo (1): drm/amd/display: clean up some header paths Aric Cyr (4): drm/amd/display: 3.2.82 drm/amd/display: Use cursor locking to prevent flip delays drm/amd/display: 3.2.83 drm/amd/display: 3.2.83.1 Dmytro Laktyushkin (2): drm/amd/display: check if REFCLK_CNTL register is present drm/amd/display: fix rn soc bb update Joshua Aberback (2): drm/amd/display: Add DML variable for future asics drm/amd/display: Add dummy p-state latency bounding box override Krunoslav Kovac (1): drm/amd/display: Internal refactoring to abstract color caps Nicholas Kazlauskas (3): drm/amd/display: Fix DMUB meta offset for new load method drm/amd/display: Defer cursor update around VUPDATE for all ASIC drm/amd/display: Pass command instead of header into DMUB service Sung Lee (4): drm/amd/display: Do not disable pipe split if mode is not supported drm/amd/display: Fail validation if building scaling params fails drm/amd/display: Change viewport limit to 12 for DCN2 drm/amd/display: Update downspread percent to match spreadsheet for DCN2.1 Yongqiang Sun (2): drm/amd/display: Add panel cntl id for set backlight level. drm/amd/display: Add set backlight to hw sequencer. .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 7 +- .../drm/amd/display/dc/bios/command_table2.c | 62 ++ drivers/gpu/drm/amd/display/dc/core/dc.c | 4 +- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 36 ++ .../gpu/drm/amd/display/dc/core/dc_resource.c | 4 +- .../gpu/drm/amd/display/dc/core/dc_stream.c | 40 +-- drivers/gpu/drm/amd/display/dc/dc.h | 48 +++- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 2 +- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h | 5 +- drivers/gpu/drm/amd/display/dc/dc_helper.c| 6 +- drivers/gpu/drm/amd/display/dc/dce/dce_abm.c | 15 ++- drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c | 28 +++-- drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c | 12 +- .../display/dc/dce110/dce110_hw_sequencer.c | 35 +- .../display/dc/dce110/dce110_hw_sequencer.h | 4 + .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 19 ++- .../amd/display/dc/dcn10/dcn10_hw_sequencer.h | 1 + .../gpu/drm/amd/display/dc/dcn10/dcn10_init.c | 2 + .../gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c | 15 +++ .../gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h | 20 +++- .../drm/amd/display/dc/dcn10/dcn10_resource.c | 48 +++- .../drm/amd/display/dc/dcn20/dcn20_hwseq.c| 12 +- .../gpu/drm/amd/display/dc/dcn20/dcn20_init.c | 2 + .../gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c | 1 + .../gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h | 3 +- .../drm/amd/display/dc/dcn20/dcn20_resource.c | 71 +-- .../drm/amd/display/dc/dcn20/dcn20_resource.h | 2 +- .../gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c | 33 -- .../gpu/drm/amd/display/dc/dcn21/dcn21_init.c | 2 + .../drm/amd/display/dc/dcn21/dcn21_resource.c | 112 +++--- .../amd/display/dc/dml/display_mode_structs.h | 1 + .../drm/amd/display/dc/dml/display_mode_vba.c | 1 + .../drm/amd/display/dc/dml/display_mode_vba.h | 1 + drivers/gpu/drm/amd/display/dc/inc/hw/abm.h | 5 +- drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h | 16 +++ .../gpu/drm/amd/display/dc/inc/hw_sequencer.h | 5 + .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 6 +- .../gpu/drm/amd/display/dmub/inc/dmub_rb.h| 6 +- .../gpu/drm/amd/display/dmub/inc/dmub_srv.h | 3 +- .../gpu/drm/amd/display/dmub/inc/dmub_types.h | 11 ++ .../gpu/drm/amd/display/dmub/src/dmub_srv.c | 10 +- .../amd/display/modules/color/color_gamma.c | 31 - .../amd/display/modules/color/color_gamma.h | 4 +- 43 files changed, 523 insertions(+), 228 deletions(-) -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 04/19] drm/amd/display: clean up some header paths
From: Anthony Koo [Why] Some include paths don't need to have relative paths And some types missing [How] make some changes to headers and modify include path Signed-off-by: Anthony Koo Reviewed-by: Tony Cheng Acked-by: Aurabindo Pillai --- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h | 2 +- drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c | 2 +- drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c | 4 ++-- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 5 - drivers/gpu/drm/amd/display/dmub/inc/dmub_types.h | 11 +++ 5 files changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h index 754b6077539c..855431483699 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h @@ -27,7 +27,7 @@ #define _DMUB_DC_SRV_H_ #include "os_types.h" -#include "../dmub/inc/dmub_cmd.h" +#include "dmub/inc/dmub_cmd.h" struct dmub_srv; struct dmub_cmd_header; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c index a19f359e45d7..992d869188c5 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c @@ -27,7 +27,7 @@ #include "dce_abm.h" #include "dc.h" #include "dc_dmub_srv.h" -#include "../../dmub/inc/dmub_srv.h" +#include "dmub/inc/dmub_srv.h" #include "core_types.h" #include "dm_services.h" #include "reg_helper.h" diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c index 3b8a49e8e665..7b32e5d60ed6 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -26,8 +26,8 @@ #include "dmub_psr.h" #include "dc.h" #include "dc_dmub_srv.h" -#include "../../dmub/inc/dmub_srv.h" -#include "../../dmub/inc/dmub_gpint_cmd.h" +#include "dmub/inc/dmub_srv.h" +#include "dmub/inc/dmub_gpint_cmd.h" #include "core_types.h" #define MAX_PIPES 6 diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 7c7a3561b6aa..6b48285446c3 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -215,11 +215,6 @@ struct dmub_rb_cmd_dpphy_init { uint8_t reserved[60]; }; -struct dmub_psr_debug_flags { - uint8_t visual_confirm : 1; - uint8_t reserved : 7; -}; - struct dmub_cmd_psr_copy_settings_data { uint16_t psr_level; uint8_t dpp_inst; diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_types.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_types.h index 41d524b0db2f..bed5b023a396 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_types.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_types.h @@ -49,6 +49,12 @@ extern "C" { #define dmub_udelay(microseconds) udelay(microseconds) #endif +/* Maximum number of streams on any ASIC. */ +#define DMUB_MAX_STREAMS 6 + +/* Maximum number of planes on any ASIC. */ +#define DMUB_MAX_PLANES 6 + union dmub_addr { struct { uint32_t low_part; @@ -57,6 +63,11 @@ union dmub_addr { uint64_t quad_part; }; +struct dmub_psr_debug_flags { + uint8_t visual_confirm : 1; + uint8_t reserved : 7; +}; + #if defined(__cplusplus) } #endif -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 06/19] drm/amd/display: check if REFCLK_CNTL register is present
From: Dmytro Laktyushkin Check before programming the register since it isn't present on all IPs using this code. Signed-off-by: Dmytro Laktyushkin Reviewed-by: Eric Bernstein Acked-by: Aurabindo Pillai --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 6ad4ed7da629..bd2ccf8eb9cf 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -2304,7 +2304,8 @@ void dcn20_fpga_init_hw(struct dc *dc) REG_UPDATE(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, 2); REG_UPDATE(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_ENABLE, 1); - REG_WRITE(REFCLK_CNTL, 0); + if (REG(REFCLK_CNTL)) + REG_WRITE(REFCLK_CNTL, 0); // -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: Init data to avoid oops while reading pp_num_states.
On Wed, Apr 22, 2020 at 9:13 AM limingyu wrote: > > For chip like CHIP_OLAND with si enabled(amdgpu.si_support=1), > the amdgpu will expose pp_num_states to the /sys directory. > In this moment, read the pp_num_states file will excute the > amdgpu_get_pp_num_states func. In our case, the data hasn't > been initialized, so the kernel will access some ilegal > address, trigger the segmentfault and system will reboot soon: > > uos@uos-PC:~$ cat /sys/devices/pci\:00/\:00\:00.0/\:01\:00 > .0/pp_num_states > > Message from syslogd@uos-PC at Apr 22 09:26:20 ... > kernel:[ 82.154129] Internal error: Oops: 9604 [#1] SMP > > This patch aims to fix this problem, avoid that reading file > triggers the kernel sementfault. > > Signed-off-by: limingyu > Signed-off-by: zhoubinbin Applied. Thanks! Alex > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 4 +++- > 1 file changed, 3 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c > index abe94a55ecad..17de9dc60ea1 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c > @@ -444,8 +444,10 @@ static ssize_t amdgpu_get_pp_num_states(struct device > *dev, > ret = smu_get_power_num_states(&adev->smu, &data); > if (ret) > return ret; > - } else if (adev->powerplay.pp_funcs->get_pp_num_states) > + } else if (adev->powerplay.pp_funcs->get_pp_num_states) { > amdgpu_dpm_get_pp_num_states(adev, &data); > + } else > + memset(&data, 0, sizeof(data)); > > pm_runtime_mark_last_busy(ddev->dev); > pm_runtime_put_autosuspend(ddev->dev); > -- > 2.20.1 > > > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm: amdgpu: fix kernel-doc struct warning
On Tue, Apr 21, 2020 at 10:34 AM Christian König wrote: > > Am 21.04.20 um 16:33 schrieb Christian König: > > Am 20.04.20 um 03:50 schrieb Randy Dunlap: > >> Fix a kernel-doc warning of missing struct field desription: > >> > >> ../drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:92: warning: Function > >> parameter or member 'vm' not described in 'amdgpu_vm_eviction_lock' > > > > Can't we just document the function parameter instead? Should only be > > one IIRC. > > On the other hand forget that, the format doesn't match a proper > kernel-doc for a function anyway. > > Reviewed-by: Christian König > Applied. Thanks! Alex > > > > Thanks, > > Christian. > > > >> > >> Fixes: a269e44989f3 ("drm/amdgpu: Avoid reclaim fs while eviction lock") > >> Signed-off-by: Randy Dunlap > >> Cc: Signed-off-by: Alex Sierra > >> Cc: Felix Kuehling > >> Cc: Christian König > >> Cc: Alex Deucher > >> Cc: David (ChunMing) Zhou > >> Cc: amd-gfx@lists.freedesktop.org > >> --- > >> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |2 +- > >> 1 file changed, 1 insertion(+), 1 deletion(-) > >> > >> --- lnx-57-rc2.orig/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > >> +++ lnx-57-rc2/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > >> @@ -82,7 +82,7 @@ struct amdgpu_prt_cb { > >> struct dma_fence_cb cb; > >> }; > >> -/** > >> +/* > >>* vm eviction_lock can be taken in MMU notifiers. Make sure no > >> reclaim-FS > >>* happens while holding this lock anywhere to prevent deadlocks when > >>* an MMU notifier runs in reclaim-FS context. > > > > ___ > > amd-gfx mailing list > > amd-gfx@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/amd-gfx > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm: amd/display: fix Kconfig help text
On Wed, Apr 22, 2020 at 10:00 AM Harry Wentland wrote: > > On 2020-04-21 7:34 p.m., Randy Dunlap wrote: > > From: Randy Dunlap > > > > Fix help text: indent one tab + 2 spaces; end a sentence with a > > period; and collapse short lines of text to one line. > > > > Fixes: 23c61b4599c4 ("drm/amd: Fix Kconfig indentation") > > Fixes: 4562236b3bc0 ("drm/amd/dc: Add dc display driver (v2)") > > Signed-off-by: Randy Dunlap > > Cc: Harry Wentland > > Cc: Alex Deucher > > Cc: Krzysztof Kozlowski > > Reviewed-by: Harry Wentland > Applied. Thanks! Alex > Harry > > > --- > > drivers/gpu/drm/amd/display/Kconfig |8 ++-- > > 1 file changed, 2 insertions(+), 6 deletions(-) > > > > --- linux-next-20200421.orig/drivers/gpu/drm/amd/display/Kconfig > > +++ linux-next-20200421/drivers/gpu/drm/amd/display/Kconfig > > @@ -21,16 +21,12 @@ config DRM_AMD_DC_HDCP > > bool "Enable HDCP support in DC" > > depends on DRM_AMD_DC > > help > > - Choose this option > > - if you want to support > > - HDCP authentication > > + Choose this option if you want to support HDCP authentication. > > > > config DEBUG_KERNEL_DC > > bool "Enable kgdb break in DC" > > depends on DRM_AMD_DC > > help > > - Choose this option > > - if you want to hit > > - kdgb_break in assert. > > + Choose this option if you want to hit kdgb_break in assert. > > > > endmenu > > > ___ > dri-devel mailing list > dri-de...@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/dri-devel ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH hmm 0/5] Adjust hmm_range_fault() API
On 4/21/20 5:21 PM, Jason Gunthorpe wrote: From: Jason Gunthorpe The API is a bit complicated for the uses we actually have, and disucssions for simplifying have come up a number of times. This small series removes the customizable pfn format and simplifies the return code of hmm_range_fault() All the drivers are adjusted to process in the simplified format. I would appreciated tested-by's for the two drivers, thanks! For nouveau you can add: Tested-by: Ralph Campbell ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: protect kiq overrun
Am 22.04.20 um 16:50 schrieb Yintian Tao: Wait for the oldest sequence on the kiq ring to be signaled in order to make sure there will be no kiq overrun. v2: remove unused the variable and correct kiq max_sub_num value First of all this should probably be added to the fence handling code and not the kiq code. Then you are kind of duplicating some of the functionality we have in the ring handling here. Probably better to avoid this, see amdgpu_fence_driver_init_ring() as well. That's also why I suggested to use the num_fences_mask value. Regards, Christian. Signed-off-by: Yintian Tao --- .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 6 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 6 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 30 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 3 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 6 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 6 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c| 7 + drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 7 + 8 files changed, 71 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 691c89705bcd..fac8b9713dfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -325,6 +325,12 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, mec, pipe, queue_id); spin_lock(&adev->gfx.kiq.ring_lock); + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); + if (r) { + pr_err("critical bug! too many kiq submission\n"); + goto out_unlock; + } + r = amdgpu_ring_alloc(kiq_ring, 7); if (r) { pr_err("Failed to alloc KIQ (%d).\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index df841c2ac5e7..fd42c126510f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -323,6 +323,12 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, mec, pipe, queue_id); spin_lock(&adev->gfx.kiq.ring_lock); + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); + if (r) { + pr_err("critical bug! too many kiq submissions\n"); + goto out_unlock; + } + r = amdgpu_ring_alloc(kiq_ring, 7); if (r) { pr_err("Failed to alloc KIQ (%d).\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index a721b0e0ff69..84e66c45df37 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -321,6 +321,9 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, AMDGPU_RING_PRIO_DEFAULT); if (r) dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); + else + kiq->max_sub_num = (ring->ring_size / 4) / + (ring->funcs->align_mask + 1); return r; } @@ -663,6 +666,21 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, return 0; } +int amdgpu_gfx_kiq_is_avail(struct amdgpu_kiq *kiq) +{ + uint32_t seq = 0; + signed long r = 0; + + seq = abs(kiq->ring.fence_drv.sync_seq - kiq->max_sub_num); + if (seq > kiq->max_sub_num) { + r = amdgpu_fence_wait_polling(&kiq->ring, seq, + MAX_KIQ_REG_WAIT); + return r < 1 ? -ETIME : 0; + } + + return 0; +} + uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) { signed long r, cnt = 0; @@ -674,6 +692,12 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) BUG_ON(!ring->funcs->emit_rreg); spin_lock_irqsave(&kiq->ring_lock, flags); + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); + if (r) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + goto failed_kiq_read; + } + if (amdgpu_device_wb_get(adev, ®_val_offs)) { spin_unlock_irqrestore(&kiq->ring_lock, flags); pr_err("critical bug! too many kiq readers\n"); @@ -728,6 +752,12 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) BUG_ON(!ring->funcs->emit_wreg); spin_lock_irqsave(&kiq->ring_lock, flags); + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); + if (r) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + goto failed_kiq_write; + } + amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_wreg(ring, reg, v); amdgpu_fence_emit_polling(ring, &seq); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index ee698
Re: [PATCH] drm/amdgpu: protect kiq overrun
Can't we check the rptr write back to determine the available room in the ring_alloc ? We removed that a couple of years ago. One reason for this is that checking the rptr each time is quite a huge overhead since it is an MMIO register. The other reason is that the rptr is no longer a secure way of figuring out if an engine has processed all data. Some engines like the CP are pipelined and the rptr is only the beginning of the pipeline, subsequent pipeline steps might read from the ring buffer again. If you want to figure out the processing status of ring allocations you need to take a look at the fences. Regards, Christian. Am 22.04.20 um 17:43 schrieb Liu, Shaoyun: [AMD Official Use Only - Internal Distribution Only] I always has an impression for each submission, once the ring be allocated , before the fence be signed , this ring space will always be reserved . If this can not be guaranteed , it sound a big issue to me . Can't we check the rptr write back to determine the available room in the ring_alloc ? Regards Shaoyun.liu -Original Message- From: Koenig, Christian Sent: Wednesday, April 22, 2020 10:57 AM To: Liu, Shaoyun ; Tao, Yintian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: protect kiq overrun The amdgpu_ring_alloc() function checks if the requested number of DW don't exceed the maximum submission size. It does NOT check if there is enough room on the ring. That would require MMIO access and that is what we want to avoid. Regards, Christian. Am 22.04.20 um 16:54 schrieb Liu, Shaoyun: [AMD Official Use Only - Internal Distribution Only] I think each kiq operation will call ring_alloc for the package space , why not just check whether this allocation is succeed or not ? Shaoyun.liu -Original Message- From: amd-gfx On Behalf Of Yintian Tao Sent: Wednesday, April 22, 2020 10:50 AM To: Koenig, Christian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org; Tao, Yintian Subject: [PATCH] drm/amdgpu: protect kiq overrun Wait for the oldest sequence on the kiq ring to be signaled in order to make sure there will be no kiq overrun. v2: remove unused the variable and correct kiq max_sub_num value Signed-off-by: Yintian Tao --- .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 6 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 6 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 30 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 3 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 6 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 6 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c| 7 + drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 7 + 8 files changed, 71 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 691c89705bcd..fac8b9713dfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -325,6 +325,12 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, mec, pipe, queue_id); spin_lock(&adev->gfx.kiq.ring_lock); + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); + if (r) { + pr_err("critical bug! too many kiq submission\n"); + goto out_unlock; + } + r = amdgpu_ring_alloc(kiq_ring, 7); if (r) { pr_err("Failed to alloc KIQ (%d).\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index df841c2ac5e7..fd42c126510f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -323,6 +323,12 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, mec, pipe, queue_id); spin_lock(&adev->gfx.kiq.ring_lock); + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); + if (r) { + pr_err("critical bug! too many kiq submissions\n"); + goto out_unlock; + } + r = amdgpu_ring_alloc(kiq_ring, 7); if (r) { pr_err("Failed to alloc KIQ (%d).\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index a721b0e0ff69..84e66c45df37 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -321,6 +321,9 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, AMDGPU_RING_PRIO_DEFAULT); if (r) dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); + else + kiq->max_sub_num = (ring->ring_size / 4) / + (ring->funcs->align_mask + 1); return r; } @@ -663,6 +666,21 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
Re: [PATCH] amdgpu: fixes memleak issue when init failed
Am 22.04.20 um 17:51 schrieb Ruhl, Michael J: -Original Message- From: dri-devel On Behalf Of Bernard Zhao Sent: Tuesday, April 21, 2020 7:17 AM To: Alex Deucher ; Christian König ; David (ChunMing) Zhou ; David Airlie ; Daniel Vetter ; Tom St Denis ; Ori Messinger ; Sam Ravnborg ; Bernard Zhao ; amd-gfx@lists.freedesktop.org; dri- de...@lists.freedesktop.org; linux-ker...@vger.kernel.org Cc: opensource.ker...@vivo.com Subject: [PATCH] amdgpu: fixes memleak issue when init failed VRAM manager and DRM MM when init failed, there is no operaction to free kzalloc memory & remove device file. This will lead to memleak & cause stability issue. Signed-off-by: Bernard Zhao --- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 24 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 82a3299e53c0..4c5fb153e6b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -175,30 +175,44 @@ static int amdgpu_vram_mgr_init(struct ttm_mem_type_manager *man, ret = device_create_file(adev->dev, &dev_attr_mem_info_vram_total); if (ret) { DRM_ERROR("Failed to create device file mem_info_vram_total\n"); - return ret; + goto VRAM_TOTAL_FAIL; } ret = device_create_file(adev->dev, &dev_attr_mem_info_vis_vram_total); Have you looked at the DEVICE_ATTR mechanism? Yeah, I've thought about that as well. But didn't had time to look into detail if that could be applied here or not. Regards, Christian. It is set up to add device files. You won't get the granularity of each file, but it has a lot more automatic-ness to setting this stuff up. Mike if (ret) { DRM_ERROR("Failed to create device file mem_info_vis_vram_total\n"); - return ret; + goto VIS_VRAM_TOTA_FAIL; } ret = device_create_file(adev->dev, &dev_attr_mem_info_vram_used); if (ret) { DRM_ERROR("Failed to create device file mem_info_vram_used\n"); - return ret; + goto VRAM_USED_FAIL; } ret = device_create_file(adev->dev, &dev_attr_mem_info_vis_vram_used); if (ret) { DRM_ERROR("Failed to create device file mem_info_vis_vram_used\n"); - return ret; + goto VIS_VRAM_USED_FAIL; } ret = device_create_file(adev->dev, &dev_attr_mem_info_vram_vendor); if (ret) { DRM_ERROR("Failed to create device file mem_info_vram_vendor\n"); - return ret; + goto VRAM_VERDOR_FAIL; } return 0; + +VRAM_VERDOR_FAIL: + device_remove_file(adev->dev, &dev_attr_mem_info_vis_vram_used); +VIS_VRAM_USED_FAIL: + device_remove_file(adev->dev, &dev_attr_mem_info_vram_used); +RVAM_USED_FAIL: + device_remove_file(adev->dev, &dev_attr_mem_info_vis_vram_total); +VIS_VRAM_TOTA_FAIL: + device_remove_file(adev->dev, &dev_attr_mem_info_vram_total); +VRAM_TOTAL_FAIL: + kfree(mgr); + man->priv = NULL; + + return ret; } /** -- 2.26.2 ___ dri-devel mailing list dri-de...@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH hmm 5/5] mm/hmm: remove the customizable pfn format from hmm_range_fault
[+Philip Yang] Am 2020-04-21 um 8:21 p.m. schrieb Jason Gunthorpe: > From: Jason Gunthorpe > > Presumably the intent here was that hmm_range_fault() could put the data > into some HW specific format and thus avoid some work. However, nothing > actually does that, and it isn't clear how anything actually could do that > as hmm_range_fault() provides CPU addresses which must be DMA mapped. > > Perhaps there is some special HW that does not need DMA mapping, but we > don't have any examples of this, and the theoretical performance win of > avoiding an extra scan over the pfns array doesn't seem worth the > complexity. Plus pfns needs to be scanned anyhow to sort out any > DEVICE_PRIVATE pages. > > This version replaces the uint64_t with an usigned long containing a pfn > and fix flags. On input flags is filled with the HMM_PFN_REQ_* values, on > successful output it is filled with HMM_PFN_* values, describing the state > of the pages. > > amdgpu is simple to convert, it doesn't use snapshot and doesn't use > per-page flags. > > nouveau uses only 16 hmm_pte entries at most (ie fits in a few cache > lines), and it sweeps over its pfns array a couple of times anyhow. > > Signed-off-by: Jason Gunthorpe > Signed-off-by: Christoph Hellwig Hi Jason, I pointed out a typo in the documentation inline. Other than that, the series is Acked-by: Felix Kuehling I'll try to build it and run some basic tests later. > --- > Documentation/vm/hmm.rst| 26 ++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 35 ++ > drivers/gpu/drm/nouveau/nouveau_dmem.c | 60 +++-- > drivers/gpu/drm/nouveau/nouveau_dmem.h | 4 +- > drivers/gpu/drm/nouveau/nouveau_svm.c | 52 > include/linux/hmm.h | 99 ++- > mm/hmm.c| 160 +++- > 7 files changed, 204 insertions(+), 232 deletions(-) > > diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst > index 9924f2caa0184c..73a9b8c858e5d9 100644 > --- a/Documentation/vm/hmm.rst > +++ b/Documentation/vm/hmm.rst > @@ -185,9 +185,6 @@ The usage pattern is:: >range.start = ...; >range.end = ...; >range.pfns = ...; > - range.flags = ...; > - range.values = ...; > - range.pfn_shift = ...; > >if (!mmget_not_zero(interval_sub->notifier.mm)) >return -EFAULT; > @@ -229,15 +226,10 @@ The hmm_range struct has 2 fields, default_flags and > pfn_flags_mask, that specif > fault or snapshot policy for the whole range instead of having to set them > for each entry in the pfns array. > > -For instance, if the device flags for range.flags are:: > +For instance if the device driver wants pages for a range with at least read > +permission, it sets:: > > -range.flags[HMM_PFN_VALID] = (1 << 63); > -range.flags[HMM_PFN_WRITE] = (1 << 62); > - > -and the device driver wants pages for a range with at least read permission, > -it sets:: > - > -range->default_flags = (1 << 63); > +range->default_flags = HMM_PFN_REQ_VALID; This should be HMM_PFN_REQ_FAULT. > range->pfn_flags_mask = 0; > > and calls hmm_range_fault() as described above. This will fill fault all > pages > @@ -246,18 +238,18 @@ in the range with at least read permission. > Now let's say the driver wants to do the same except for one page in the > range for > which it wants to have write permission. Now driver set:: > > -range->default_flags = (1 << 63); > -range->pfn_flags_mask = (1 << 62); > -range->pfns[index_of_write] = (1 << 62); > +range->default_flags = HMM_PFN_REQ_VALID; HMM_PFN_REQ_FAULT Regards, Felix > +range->pfn_flags_mask = HMM_PFN_REQ_WRITE; > +range->pfns[index_of_write] = HMM_PFN_REQ_WRITE; > > With this, HMM will fault in all pages with at least read (i.e., valid) and > for the > address == range->start + (index_of_write << PAGE_SHIFT) it will fault with > write permission i.e., if the CPU pte does not have write permission set > then HMM > will call handle_mm_fault(). > > -Note that HMM will populate the pfns array with write permission for any page > -that is mapped with CPU write permission no matter what values are set > -in default_flags or pfn_flags_mask. > +After hmm_range_fault completes the flag bits are set to the current state of > +the page tables, ie HMM_PFN_VALID | HMM_PFN_WRITE will be set if the page is > +writable. > > > Represent and manage device memory from core kernel point of view > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index 449083f9f8a2bf..bcfa8c26647d5e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -766,17 +766,6 @@ struct amdgpu_ttm_tt { > }; > > #ifdef CONFIG_DRM_AMDGPU_USERPTR > -/* flags used by HMM internal, not related to CPU/GPU PTE flags */ > -static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { > -
RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg
Hi Shaoyun Yes, you are right. It is the rare corner case. Best Regards Yintian Tao -Original Message- From: Liu, Shaoyun Sent: 2020年4月22日 23:51 To: Tao, Yintian ; Koenig, Christian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg [AMD Official Use Only - Internal Distribution Only] OK, I see, the submission it self be signaled so the ring space for this submission will be re-use by other submission , but the CPU still not read the out put value yet. Thanks Shaoyun.liu -Original Message- From: Tao, Yintian Sent: Wednesday, April 22, 2020 11:47 AM To: Tao, Yintian ; Liu, Shaoyun ; Koenig, Christian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg Add more Especially for the multi-VF environment, we have to wait through msleep() instead udeay. Because the max udelay time is 15VF * 6ms(world-switch) = 90ms. -Original Message- From: amd-gfx On Behalf Of Tao, Yintian Sent: 2020年4月22日 23:43 To: Liu, Shaoyun ; Koenig, Christian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg Hi Shaoyun No, the second patch can't solve this rare case because only Slot-D is signaled and the Slot-A can be overwritten. The second patch think the sequence is signaled the Slot-A buffer can be freed. if you store the output value in each ring buffer itself , each kiq operation will be atomic and self contain . [yttao]: If we wan to really make the kiq operation be atomic then we have to do the things below: spin_lock_irqsave(&kiq->ring_lock, flags); . Fulfill the command buffer . if (r < 1 && (adev->in_gpu_reset || in_interrupt())) goto failed_kiq_write; might_sleep(); while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); /* here will break atomic and we need directly use udealy*/ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); } spin_lock_irqrestore(&kiq->ring_lock, flags); Best Regards Yintian Tao -Original Message- From: Liu, Shaoyun Sent: 2020年4月22日 23:35 To: Tao, Yintian ; Koenig, Christian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg [AMD Official Use Only - Internal Distribution Only] This is the issue you try to solve with your second patch (protect kiq overrun) . For current patch , if you store the output value in each ring buffer itself , each kiq operation will be atomic and self contain . Shaoyun.liu -Original Message- From: Tao, Yintian Sent: Wednesday, April 22, 2020 11:00 AM To: Koenig, Christian ; Liu, Shaoyun ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg Hi Shaoyun There is one rare corner case which will raise problem when using ring buffer to store value. It is assumed there are only total four slots at KIQ ring buffer. And these four slots are fulfilled with command to read registers. Slot-A Slot-B Slot-C Slot-D And they are waiting for the sequence fences to be signaled. Here, there is one new command to write register to be submitted 1. Slot-A under msleep not to read register 2. Slot-B under msleep not to read register 3. Slot-C under msleep not to read register. 4. Slot-D happen to find the sequence signaled and here the new write command will overwrite the Slot-A contents. Best Regards Yintian Tao -Original Message- From: Koenig, Christian Sent: 2020年4月22日 22:52 To: Liu, Shaoyun ; Tao, Yintian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg Hi Shaoyun, the ring buffer is usually filled with command and not read results. Allocating extra space would only work if we use the special NOP command and that is way more complicated and fragile than just using the wb functions which where made for this stuff. Regards, Christian. Am 22.04.20 um 16:48 schrieb Liu, Shaoyun: > [AMD Official Use Only - Internal Distribution Only] > > Hi ,Yintian & Christian > I still don't understand why we need this complicated change here . Why can > not just allocate few more extra space in the ring for each read and use the > space to store the output value ? > > Regards > Shaoyun.liu > > > -Original Message- > From: amd-gfx On Behalf Of > Christian König > Sent: Wednesday, April 22, 2020 8:42 AM > To: Tao, Yintian ; Liu, Monk ; > Kuehling, Felix > Cc: amd-gfx@lists.freedesktop.org > Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read > reg > > Am 22.04.20 um 14:36 schrieb Yintian Tao: >> According to the current
RE: [PATCH] amdgpu: fixes memleak issue when init failed
>-Original Message- >From: dri-devel On Behalf Of >Bernard Zhao >Sent: Tuesday, April 21, 2020 7:17 AM >To: Alex Deucher ; Christian König >; David (ChunMing) Zhou >; David Airlie ; Daniel Vetter >; Tom St Denis ; Ori Messinger >; Sam Ravnborg ; Bernard >Zhao ; amd-gfx@lists.freedesktop.org; dri- >de...@lists.freedesktop.org; linux-ker...@vger.kernel.org >Cc: opensource.ker...@vivo.com >Subject: [PATCH] amdgpu: fixes memleak issue when init failed > >VRAM manager and DRM MM when init failed, there is no operaction >to free kzalloc memory & remove device file. >This will lead to memleak & cause stability issue. > >Signed-off-by: Bernard Zhao >--- > drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 24 > > 1 file changed, 19 insertions(+), 5 deletions(-) > >diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c >b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c >index 82a3299e53c0..4c5fb153e6b4 100644 >--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c >+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c >@@ -175,30 +175,44 @@ static int amdgpu_vram_mgr_init(struct >ttm_mem_type_manager *man, > ret = device_create_file(adev->dev, >&dev_attr_mem_info_vram_total); > if (ret) { > DRM_ERROR("Failed to create device file >mem_info_vram_total\n"); >- return ret; >+ goto VRAM_TOTAL_FAIL; > } > ret = device_create_file(adev->dev, >&dev_attr_mem_info_vis_vram_total); Have you looked at the DEVICE_ATTR mechanism? It is set up to add device files. You won't get the granularity of each file, but it has a lot more automatic-ness to setting this stuff up. Mike > if (ret) { > DRM_ERROR("Failed to create device file >mem_info_vis_vram_total\n"); >- return ret; >+ goto VIS_VRAM_TOTA_FAIL; > } > ret = device_create_file(adev->dev, >&dev_attr_mem_info_vram_used); > if (ret) { > DRM_ERROR("Failed to create device file >mem_info_vram_used\n"); >- return ret; >+ goto VRAM_USED_FAIL; > } > ret = device_create_file(adev->dev, >&dev_attr_mem_info_vis_vram_used); > if (ret) { > DRM_ERROR("Failed to create device file >mem_info_vis_vram_used\n"); >- return ret; >+ goto VIS_VRAM_USED_FAIL; > } > ret = device_create_file(adev->dev, >&dev_attr_mem_info_vram_vendor); > if (ret) { > DRM_ERROR("Failed to create device file >mem_info_vram_vendor\n"); >- return ret; >+ goto VRAM_VERDOR_FAIL; > } > > return 0; >+ >+VRAM_VERDOR_FAIL: >+ device_remove_file(adev->dev, >&dev_attr_mem_info_vis_vram_used); >+VIS_VRAM_USED_FAIL: >+ device_remove_file(adev->dev, &dev_attr_mem_info_vram_used); >+RVAM_USED_FAIL: >+ device_remove_file(adev->dev, >&dev_attr_mem_info_vis_vram_total); >+VIS_VRAM_TOTA_FAIL: >+ device_remove_file(adev->dev, &dev_attr_mem_info_vram_total); >+VRAM_TOTAL_FAIL: >+ kfree(mgr); >+ man->priv = NULL; >+ >+ return ret; > } > > /** >-- >2.26.2 > >___ >dri-devel mailing list >dri-de...@lists.freedesktop.org >https://lists.freedesktop.org/mailman/listinfo/dri-devel ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg
[AMD Official Use Only - Internal Distribution Only] OK, I see, the submission it self be signaled so the ring space for this submission will be re-use by other submission , but the CPU still not read the out put value yet. Thanks Shaoyun.liu -Original Message- From: Tao, Yintian Sent: Wednesday, April 22, 2020 11:47 AM To: Tao, Yintian ; Liu, Shaoyun ; Koenig, Christian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg Add more Especially for the multi-VF environment, we have to wait through msleep() instead udeay. Because the max udelay time is 15VF * 6ms(world-switch) = 90ms. -Original Message- From: amd-gfx On Behalf Of Tao, Yintian Sent: 2020年4月22日 23:43 To: Liu, Shaoyun ; Koenig, Christian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg Hi Shaoyun No, the second patch can't solve this rare case because only Slot-D is signaled and the Slot-A can be overwritten. The second patch think the sequence is signaled the Slot-A buffer can be freed. if you store the output value in each ring buffer itself , each kiq operation will be atomic and self contain . [yttao]: If we wan to really make the kiq operation be atomic then we have to do the things below: spin_lock_irqsave(&kiq->ring_lock, flags); . Fulfill the command buffer . if (r < 1 && (adev->in_gpu_reset || in_interrupt())) goto failed_kiq_write; might_sleep(); while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); /* here will break atomic and we need directly use udealy*/ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); } spin_lock_irqrestore(&kiq->ring_lock, flags); Best Regards Yintian Tao -Original Message- From: Liu, Shaoyun Sent: 2020年4月22日 23:35 To: Tao, Yintian ; Koenig, Christian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg [AMD Official Use Only - Internal Distribution Only] This is the issue you try to solve with your second patch (protect kiq overrun) . For current patch , if you store the output value in each ring buffer itself , each kiq operation will be atomic and self contain . Shaoyun.liu -Original Message- From: Tao, Yintian Sent: Wednesday, April 22, 2020 11:00 AM To: Koenig, Christian ; Liu, Shaoyun ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg Hi Shaoyun There is one rare corner case which will raise problem when using ring buffer to store value. It is assumed there are only total four slots at KIQ ring buffer. And these four slots are fulfilled with command to read registers. Slot-A Slot-B Slot-C Slot-D And they are waiting for the sequence fences to be signaled. Here, there is one new command to write register to be submitted 1. Slot-A under msleep not to read register 2. Slot-B under msleep not to read register 3. Slot-C under msleep not to read register. 4. Slot-D happen to find the sequence signaled and here the new write command will overwrite the Slot-A contents. Best Regards Yintian Tao -Original Message- From: Koenig, Christian Sent: 2020年4月22日 22:52 To: Liu, Shaoyun ; Tao, Yintian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg Hi Shaoyun, the ring buffer is usually filled with command and not read results. Allocating extra space would only work if we use the special NOP command and that is way more complicated and fragile than just using the wb functions which where made for this stuff. Regards, Christian. Am 22.04.20 um 16:48 schrieb Liu, Shaoyun: > [AMD Official Use Only - Internal Distribution Only] > > Hi ,Yintian & Christian > I still don't understand why we need this complicated change here . Why can > not just allocate few more extra space in the ring for each read and use the > space to store the output value ? > > Regards > Shaoyun.liu > > > -Original Message- > From: amd-gfx On Behalf Of > Christian König > Sent: Wednesday, April 22, 2020 8:42 AM > To: Tao, Yintian ; Liu, Monk ; > Kuehling, Felix > Cc: amd-gfx@lists.freedesktop.org > Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read > reg > > Am 22.04.20 um 14:36 schrieb Yintian Tao: >> According to the current kiq read register method, there will be race >> condition when using KIQ to read register if multiple clients want to >> read at same time just like the expample below: >> 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the >> seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll >> the seqno-1 5. the k
RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg
Add more Especially for the multi-VF environment, we have to wait through msleep() instead udeay. Because the max udelay time is 15VF * 6ms(world-switch) = 90ms. -Original Message- From: amd-gfx On Behalf Of Tao, Yintian Sent: 2020年4月22日 23:43 To: Liu, Shaoyun ; Koenig, Christian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg Hi Shaoyun No, the second patch can't solve this rare case because only Slot-D is signaled and the Slot-A can be overwritten. The second patch think the sequence is signaled the Slot-A buffer can be freed. if you store the output value in each ring buffer itself , each kiq operation will be atomic and self contain . [yttao]: If we wan to really make the kiq operation be atomic then we have to do the things below: spin_lock_irqsave(&kiq->ring_lock, flags); . Fulfill the command buffer . if (r < 1 && (adev->in_gpu_reset || in_interrupt())) goto failed_kiq_write; might_sleep(); while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); /* here will break atomic and we need directly use udealy*/ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); } spin_lock_irqrestore(&kiq->ring_lock, flags); Best Regards Yintian Tao -Original Message- From: Liu, Shaoyun Sent: 2020年4月22日 23:35 To: Tao, Yintian ; Koenig, Christian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg [AMD Official Use Only - Internal Distribution Only] This is the issue you try to solve with your second patch (protect kiq overrun) . For current patch , if you store the output value in each ring buffer itself , each kiq operation will be atomic and self contain . Shaoyun.liu -Original Message- From: Tao, Yintian Sent: Wednesday, April 22, 2020 11:00 AM To: Koenig, Christian ; Liu, Shaoyun ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg Hi Shaoyun There is one rare corner case which will raise problem when using ring buffer to store value. It is assumed there are only total four slots at KIQ ring buffer. And these four slots are fulfilled with command to read registers. Slot-A Slot-B Slot-C Slot-D And they are waiting for the sequence fences to be signaled. Here, there is one new command to write register to be submitted 1. Slot-A under msleep not to read register 2. Slot-B under msleep not to read register 3. Slot-C under msleep not to read register. 4. Slot-D happen to find the sequence signaled and here the new write command will overwrite the Slot-A contents. Best Regards Yintian Tao -Original Message- From: Koenig, Christian Sent: 2020年4月22日 22:52 To: Liu, Shaoyun ; Tao, Yintian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg Hi Shaoyun, the ring buffer is usually filled with command and not read results. Allocating extra space would only work if we use the special NOP command and that is way more complicated and fragile than just using the wb functions which where made for this stuff. Regards, Christian. Am 22.04.20 um 16:48 schrieb Liu, Shaoyun: > [AMD Official Use Only - Internal Distribution Only] > > Hi ,Yintian & Christian > I still don't understand why we need this complicated change here . Why can > not just allocate few more extra space in the ring for each read and use the > space to store the output value ? > > Regards > Shaoyun.liu > > > -Original Message- > From: amd-gfx On Behalf Of > Christian König > Sent: Wednesday, April 22, 2020 8:42 AM > To: Tao, Yintian ; Liu, Monk ; > Kuehling, Felix > Cc: amd-gfx@lists.freedesktop.org > Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read > reg > > Am 22.04.20 um 14:36 schrieb Yintian Tao: >> According to the current kiq read register method, there will be race >> condition when using KIQ to read register if multiple clients want to >> read at same time just like the expample below: >> 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the >> seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll >> the seqno-1 5. the kiq complete these two read operation 6. client-A >> to read the register at the wb buffer and >> get REG-1 value >> >> Therefore, use amdgpu_device_wb_get() to request reg_val_offs for >> each kiq read register. >> >> v2: fix the error remove >> >> Signed-off-by: Yintian Tao >> --- >>drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- >>drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 19 ++--- >>drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 - >>drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 +++-- >>
RE: [PATCH] drm/amdgpu: protect kiq overrun
[AMD Official Use Only - Internal Distribution Only] I always has an impression for each submission, once the ring be allocated , before the fence be signed , this ring space will always be reserved . If this can not be guaranteed , it sound a big issue to me . Can't we check the rptr write back to determine the available room in the ring_alloc ? Regards Shaoyun.liu -Original Message- From: Koenig, Christian Sent: Wednesday, April 22, 2020 10:57 AM To: Liu, Shaoyun ; Tao, Yintian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: protect kiq overrun The amdgpu_ring_alloc() function checks if the requested number of DW don't exceed the maximum submission size. It does NOT check if there is enough room on the ring. That would require MMIO access and that is what we want to avoid. Regards, Christian. Am 22.04.20 um 16:54 schrieb Liu, Shaoyun: > [AMD Official Use Only - Internal Distribution Only] > > I think each kiq operation will call ring_alloc for the package space , > why not just check whether this allocation is succeed or not ? > > Shaoyun.liu > > -Original Message- > From: amd-gfx On Behalf Of > Yintian Tao > Sent: Wednesday, April 22, 2020 10:50 AM > To: Koenig, Christian ; Liu, Monk > ; Kuehling, Felix > Cc: amd-gfx@lists.freedesktop.org; Tao, Yintian > Subject: [PATCH] drm/amdgpu: protect kiq overrun > > Wait for the oldest sequence on the kiq ring to be signaled in order to make > sure there will be no kiq overrun. > > v2: remove unused the variable and correct > kiq max_sub_num value > > Signed-off-by: Yintian Tao > --- > .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 6 > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 6 > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 30 +++ > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 3 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 6 > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 6 > drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c| 7 + > drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 7 + > 8 files changed, 71 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c > index 691c89705bcd..fac8b9713dfc 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c > @@ -325,6 +325,12 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void > *mqd, >mec, pipe, queue_id); > > spin_lock(&adev->gfx.kiq.ring_lock); > + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); > + if (r) { > + pr_err("critical bug! too many kiq submission\n"); > + goto out_unlock; > + } > + > r = amdgpu_ring_alloc(kiq_ring, 7); > if (r) { > pr_err("Failed to alloc KIQ (%d).\n", r); diff --git > a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > index df841c2ac5e7..fd42c126510f 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > @@ -323,6 +323,12 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void > *mqd, >mec, pipe, queue_id); > > spin_lock(&adev->gfx.kiq.ring_lock); > + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); > + if (r) { > + pr_err("critical bug! too many kiq submissions\n"); > + goto out_unlock; > + } > + > r = amdgpu_ring_alloc(kiq_ring, 7); > if (r) { > pr_err("Failed to alloc KIQ (%d).\n", r); diff --git > a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > index a721b0e0ff69..84e66c45df37 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > @@ -321,6 +321,9 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, >AMDGPU_RING_PRIO_DEFAULT); > if (r) > dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); > + else > + kiq->max_sub_num = (ring->ring_size / 4) / > + (ring->funcs->align_mask + 1); > > return r; > } > @@ -663,6 +666,21 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device > *adev, > return 0; > } > > +int amdgpu_gfx_kiq_is_avail(struct amdgpu_kiq *kiq) { > + uint32_t seq = 0; > + signed long r = 0; > + > + seq = abs(kiq->ring.fence_drv.sync_seq - kiq->max_sub_num); > + if (seq > kiq->max_sub_num) { > + r = amdgpu_fence_wait_polling(&kiq->ring, seq, > + MAX_KIQ_REG_WAIT); > + return r < 1 ? -ETIME : 0; > + } > + > + return 0; > +} > + > uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) { > signed long r, cnt = 0; > @@ -6
RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg
Hi Shaoyun No, the second patch can't solve this rare case because only Slot-D is signaled and the Slot-A can be overwritten. The second patch think the sequence is signaled the Slot-A buffer can be freed. if you store the output value in each ring buffer itself , each kiq operation will be atomic and self contain . [yttao]: If we wan to really make the kiq operation be atomic then we have to do the things below: spin_lock_irqsave(&kiq->ring_lock, flags); . Fulfill the command buffer . if (r < 1 && (adev->in_gpu_reset || in_interrupt())) goto failed_kiq_write; might_sleep(); while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); /* here will break atomic and we need directly use udealy*/ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); } spin_lock_irqrestore(&kiq->ring_lock, flags); Best Regards Yintian Tao -Original Message- From: Liu, Shaoyun Sent: 2020年4月22日 23:35 To: Tao, Yintian ; Koenig, Christian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg [AMD Official Use Only - Internal Distribution Only] This is the issue you try to solve with your second patch (protect kiq overrun) . For current patch , if you store the output value in each ring buffer itself , each kiq operation will be atomic and self contain . Shaoyun.liu -Original Message- From: Tao, Yintian Sent: Wednesday, April 22, 2020 11:00 AM To: Koenig, Christian ; Liu, Shaoyun ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg Hi Shaoyun There is one rare corner case which will raise problem when using ring buffer to store value. It is assumed there are only total four slots at KIQ ring buffer. And these four slots are fulfilled with command to read registers. Slot-A Slot-B Slot-C Slot-D And they are waiting for the sequence fences to be signaled. Here, there is one new command to write register to be submitted 1. Slot-A under msleep not to read register 2. Slot-B under msleep not to read register 3. Slot-C under msleep not to read register. 4. Slot-D happen to find the sequence signaled and here the new write command will overwrite the Slot-A contents. Best Regards Yintian Tao -Original Message- From: Koenig, Christian Sent: 2020年4月22日 22:52 To: Liu, Shaoyun ; Tao, Yintian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg Hi Shaoyun, the ring buffer is usually filled with command and not read results. Allocating extra space would only work if we use the special NOP command and that is way more complicated and fragile than just using the wb functions which where made for this stuff. Regards, Christian. Am 22.04.20 um 16:48 schrieb Liu, Shaoyun: > [AMD Official Use Only - Internal Distribution Only] > > Hi ,Yintian & Christian > I still don't understand why we need this complicated change here . Why can > not just allocate few more extra space in the ring for each read and use the > space to store the output value ? > > Regards > Shaoyun.liu > > > -Original Message- > From: amd-gfx On Behalf Of > Christian König > Sent: Wednesday, April 22, 2020 8:42 AM > To: Tao, Yintian ; Liu, Monk ; > Kuehling, Felix > Cc: amd-gfx@lists.freedesktop.org > Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read > reg > > Am 22.04.20 um 14:36 schrieb Yintian Tao: >> According to the current kiq read register method, there will be race >> condition when using KIQ to read register if multiple clients want to >> read at same time just like the expample below: >> 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the >> seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll >> the seqno-1 5. the kiq complete these two read operation 6. client-A >> to read the register at the wb buffer and >> get REG-1 value >> >> Therefore, use amdgpu_device_wb_get() to request reg_val_offs for >> each kiq read register. >> >> v2: fix the error remove >> >> Signed-off-by: Yintian Tao >> --- >>drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- >>drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 19 ++--- >>drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 - >>drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 +++-- >>drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 7 +++--- >>drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c| 7 +++--- >>drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 27 >>7 files changed, 41 insertions(+), 27 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> index 4e1d4cfe7a9f..7ee5a4da398a 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >
RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg
[AMD Official Use Only - Internal Distribution Only] This is the issue you try to solve with your second patch (protect kiq overrun) . For current patch , if you store the output value in each ring buffer itself , each kiq operation will be atomic and self contain . Shaoyun.liu -Original Message- From: Tao, Yintian Sent: Wednesday, April 22, 2020 11:00 AM To: Koenig, Christian ; Liu, Shaoyun ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg Hi Shaoyun There is one rare corner case which will raise problem when using ring buffer to store value. It is assumed there are only total four slots at KIQ ring buffer. And these four slots are fulfilled with command to read registers. Slot-A Slot-B Slot-C Slot-D And they are waiting for the sequence fences to be signaled. Here, there is one new command to write register to be submitted 1. Slot-A under msleep not to read register 2. Slot-B under msleep not to read register 3. Slot-C under msleep not to read register. 4. Slot-D happen to find the sequence signaled and here the new write command will overwrite the Slot-A contents. Best Regards Yintian Tao -Original Message- From: Koenig, Christian Sent: 2020年4月22日 22:52 To: Liu, Shaoyun ; Tao, Yintian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg Hi Shaoyun, the ring buffer is usually filled with command and not read results. Allocating extra space would only work if we use the special NOP command and that is way more complicated and fragile than just using the wb functions which where made for this stuff. Regards, Christian. Am 22.04.20 um 16:48 schrieb Liu, Shaoyun: > [AMD Official Use Only - Internal Distribution Only] > > Hi ,Yintian & Christian > I still don't understand why we need this complicated change here . Why can > not just allocate few more extra space in the ring for each read and use the > space to store the output value ? > > Regards > Shaoyun.liu > > > -Original Message- > From: amd-gfx On Behalf Of > Christian König > Sent: Wednesday, April 22, 2020 8:42 AM > To: Tao, Yintian ; Liu, Monk ; > Kuehling, Felix > Cc: amd-gfx@lists.freedesktop.org > Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read > reg > > Am 22.04.20 um 14:36 schrieb Yintian Tao: >> According to the current kiq read register method, there will be race >> condition when using KIQ to read register if multiple clients want to >> read at same time just like the expample below: >> 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the >> seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll >> the seqno-1 5. the kiq complete these two read operation 6. client-A >> to read the register at the wb buffer and >> get REG-1 value >> >> Therefore, use amdgpu_device_wb_get() to request reg_val_offs for >> each kiq read register. >> >> v2: fix the error remove >> >> Signed-off-by: Yintian Tao >> --- >>drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- >>drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 19 ++--- >>drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 - >>drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 +++-- >>drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 7 +++--- >>drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c| 7 +++--- >>drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 27 >>7 files changed, 41 insertions(+), 27 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> index 4e1d4cfe7a9f..7ee5a4da398a 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> @@ -526,7 +526,7 @@ static inline void amdgpu_set_ib_value(struct >> amdgpu_cs_parser *p, >>/* >> * Writeback >> */ >> -#define AMDGPU_MAX_WB 128 /* Reserve at most 128 WB slots for >> amdgpu-owned rings. */ >> +#define AMDGPU_MAX_WB 256 /* Reserve at most 256 WB slots for >> amdgpu-owned rings. */ >> >>struct amdgpu_wb { >> struct amdgpu_bo*wb_obj; >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c >> index ea576b4260a4..d5a59d7c48d6 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c >> @@ -304,10 +304,6 @@ int amdgpu_gfx_kiq_init_ring(struct >> amdgpu_device *adev, >> >> spin_lock_init(&kiq->ring_lock); >> >> -r = amdgpu_device_wb_get(adev, &kiq->reg_val_offs); >> -if (r) >> -return r; >> - >> ring->adev = NULL; >> ring->ring_obj = NULL; >> ring->use_doorbell = true; >> @@ -331,7 +327,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device >> *adev, >> >>void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring) >>{ >> -amdgpu_device_wb_fre
Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg
Yeah, we discussed that approach as well. But both Yintian and I agreed that this is the simpler solution. Regards, Christian. Am 22.04.20 um 16:59 schrieb Liu, Shaoyun: [AMD Official Use Only - Internal Distribution Only] Usually doesn't means we can not do it . I feel this proposal is the simplest and clean . But anyway this is just my suggestion. Regards Shaoyun.liu -Original Message- From: Koenig, Christian Sent: Wednesday, April 22, 2020 10:52 AM To: Liu, Shaoyun ; Tao, Yintian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg Hi Shaoyun, the ring buffer is usually filled with command and not read results. Allocating extra space would only work if we use the special NOP command and that is way more complicated and fragile than just using the wb functions which where made for this stuff. Regards, Christian. Am 22.04.20 um 16:48 schrieb Liu, Shaoyun: [AMD Official Use Only - Internal Distribution Only] Hi ,Yintian & Christian I still don't understand why we need this complicated change here . Why can not just allocate few more extra space in the ring for each read and use the space to store the output value ? Regards Shaoyun.liu -Original Message- From: amd-gfx On Behalf Of Christian König Sent: Wednesday, April 22, 2020 8:42 AM To: Tao, Yintian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg Am 22.04.20 um 14:36 schrieb Yintian Tao: According to the current kiq read register method, there will be race condition when using KIQ to read register if multiple clients want to read at same time just like the expample below: 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll the seqno-1 5. the kiq complete these two read operation 6. client-A to read the register at the wb buffer and get REG-1 value Therefore, use amdgpu_device_wb_get() to request reg_val_offs for each kiq read register. v2: fix the error remove Signed-off-by: Yintian Tao --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 19 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 +++-- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 7 +++--- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c| 7 +++--- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 27 7 files changed, 41 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 4e1d4cfe7a9f..7ee5a4da398a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -526,7 +526,7 @@ static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p, /* * Writeback */ -#define AMDGPU_MAX_WB 128 /* Reserve at most 128 WB slots for amdgpu-owned rings. */ +#define AMDGPU_MAX_WB 256 /* Reserve at most 256 WB slots for amdgpu-owned rings. */ struct amdgpu_wb { struct amdgpu_bo*wb_obj; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index ea576b4260a4..d5a59d7c48d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -304,10 +304,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, spin_lock_init(&kiq->ring_lock); - r = amdgpu_device_wb_get(adev, &kiq->reg_val_offs); - if (r) - return r; - ring->adev = NULL; ring->ring_obj = NULL; ring->use_doorbell = true; @@ -331,7 +327,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring) { - amdgpu_device_wb_free(ring->adev, ring->adev->gfx.kiq.reg_val_offs); amdgpu_ring_fini(ring); } @@ -672,15 +667,20 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) { signed long r, cnt = 0; unsigned long flags; - uint32_t seq; + uint32_t seq, reg_val_offs = 0, value = 0; struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *ring = &kiq->ring; BUG_ON(!ring->funcs->emit_rreg); spin_lock_irqsave(&kiq->ring_lock, flags); + if (amdgpu_device_wb_get(adev, ®_val_offs)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + pr_err("critical bug! too more kiq readers\n"); Typo here, this should probably read "too many kiq readers". But I don't think we need the error message here anyway, the failed_kiq_read label also prints an error. With that fixed the patch is Reviewed-by: Christian König . Thanks, Christian. + goto failed_kiq_read; + } amdgpu_ring_allo
RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg
Hi Shaoyun There is one rare corner case which will raise problem when using ring buffer to store value. It is assumed there are only total four slots at KIQ ring buffer. And these four slots are fulfilled with command to read registers. Slot-A Slot-B Slot-C Slot-D And they are waiting for the sequence fences to be signaled. Here, there is one new command to write register to be submitted 1. Slot-A under msleep not to read register 2. Slot-B under msleep not to read register 3. Slot-C under msleep not to read register. 4. Slot-D happen to find the sequence signaled and here the new write command will overwrite the Slot-A contents. Best Regards Yintian Tao -Original Message- From: Koenig, Christian Sent: 2020年4月22日 22:52 To: Liu, Shaoyun ; Tao, Yintian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg Hi Shaoyun, the ring buffer is usually filled with command and not read results. Allocating extra space would only work if we use the special NOP command and that is way more complicated and fragile than just using the wb functions which where made for this stuff. Regards, Christian. Am 22.04.20 um 16:48 schrieb Liu, Shaoyun: > [AMD Official Use Only - Internal Distribution Only] > > Hi ,Yintian & Christian > I still don't understand why we need this complicated change here . Why can > not just allocate few more extra space in the ring for each read and use the > space to store the output value ? > > Regards > Shaoyun.liu > > > -Original Message- > From: amd-gfx On Behalf Of > Christian König > Sent: Wednesday, April 22, 2020 8:42 AM > To: Tao, Yintian ; Liu, Monk ; > Kuehling, Felix > Cc: amd-gfx@lists.freedesktop.org > Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read > reg > > Am 22.04.20 um 14:36 schrieb Yintian Tao: >> According to the current kiq read register method, there will be race >> condition when using KIQ to read register if multiple clients want to >> read at same time just like the expample below: >> 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the >> seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll >> the seqno-1 5. the kiq complete these two read operation 6. client-A >> to read the register at the wb buffer and >> get REG-1 value >> >> Therefore, use amdgpu_device_wb_get() to request reg_val_offs for >> each kiq read register. >> >> v2: fix the error remove >> >> Signed-off-by: Yintian Tao >> --- >>drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- >>drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 19 ++--- >>drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 - >>drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 +++-- >>drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 7 +++--- >>drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c| 7 +++--- >>drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 27 >>7 files changed, 41 insertions(+), 27 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> index 4e1d4cfe7a9f..7ee5a4da398a 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> @@ -526,7 +526,7 @@ static inline void amdgpu_set_ib_value(struct >> amdgpu_cs_parser *p, >>/* >> * Writeback >> */ >> -#define AMDGPU_MAX_WB 128 /* Reserve at most 128 WB slots for >> amdgpu-owned rings. */ >> +#define AMDGPU_MAX_WB 256 /* Reserve at most 256 WB slots for >> amdgpu-owned rings. */ >> >>struct amdgpu_wb { >> struct amdgpu_bo*wb_obj; >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c >> index ea576b4260a4..d5a59d7c48d6 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c >> @@ -304,10 +304,6 @@ int amdgpu_gfx_kiq_init_ring(struct >> amdgpu_device *adev, >> >> spin_lock_init(&kiq->ring_lock); >> >> -r = amdgpu_device_wb_get(adev, &kiq->reg_val_offs); >> -if (r) >> -return r; >> - >> ring->adev = NULL; >> ring->ring_obj = NULL; >> ring->use_doorbell = true; >> @@ -331,7 +327,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device >> *adev, >> >>void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring) >>{ >> -amdgpu_device_wb_free(ring->adev, ring->adev->gfx.kiq.reg_val_offs); >> amdgpu_ring_fini(ring); >>} >> >> @@ -672,15 +667,20 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, >> uint32_t reg) >>{ >> signed long r, cnt = 0; >> unsigned long flags; >> -uint32_t seq; >> +uint32_t seq, reg_val_offs = 0, value = 0; >> struct amdgpu_kiq *kiq = &adev->gfx.kiq; >> struct amdgpu_ring *ring = &kiq->ring; >> >> BUG_ON(!ring->funcs->emit_rreg); >> >> spin_lock_irqsave(&kiq->ring_lock, flags); >> +if (amdgpu
RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg
[AMD Official Use Only - Internal Distribution Only] Usually doesn't means we can not do it . I feel this proposal is the simplest and clean . But anyway this is just my suggestion. Regards Shaoyun.liu -Original Message- From: Koenig, Christian Sent: Wednesday, April 22, 2020 10:52 AM To: Liu, Shaoyun ; Tao, Yintian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg Hi Shaoyun, the ring buffer is usually filled with command and not read results. Allocating extra space would only work if we use the special NOP command and that is way more complicated and fragile than just using the wb functions which where made for this stuff. Regards, Christian. Am 22.04.20 um 16:48 schrieb Liu, Shaoyun: > [AMD Official Use Only - Internal Distribution Only] > > Hi ,Yintian & Christian > I still don't understand why we need this complicated change here . Why can > not just allocate few more extra space in the ring for each read and use the > space to store the output value ? > > Regards > Shaoyun.liu > > > -Original Message- > From: amd-gfx On Behalf Of > Christian König > Sent: Wednesday, April 22, 2020 8:42 AM > To: Tao, Yintian ; Liu, Monk ; > Kuehling, Felix > Cc: amd-gfx@lists.freedesktop.org > Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read > reg > > Am 22.04.20 um 14:36 schrieb Yintian Tao: >> According to the current kiq read register method, there will be race >> condition when using KIQ to read register if multiple clients want to >> read at same time just like the expample below: >> 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the >> seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll >> the seqno-1 5. the kiq complete these two read operation 6. client-A >> to read the register at the wb buffer and >> get REG-1 value >> >> Therefore, use amdgpu_device_wb_get() to request reg_val_offs for >> each kiq read register. >> >> v2: fix the error remove >> >> Signed-off-by: Yintian Tao >> --- >>drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- >>drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 19 ++--- >>drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 - >>drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 +++-- >>drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 7 +++--- >>drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c| 7 +++--- >>drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 27 >>7 files changed, 41 insertions(+), 27 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> index 4e1d4cfe7a9f..7ee5a4da398a 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> @@ -526,7 +526,7 @@ static inline void amdgpu_set_ib_value(struct >> amdgpu_cs_parser *p, >>/* >> * Writeback >> */ >> -#define AMDGPU_MAX_WB 128 /* Reserve at most 128 WB slots for >> amdgpu-owned rings. */ >> +#define AMDGPU_MAX_WB 256 /* Reserve at most 256 WB slots for >> amdgpu-owned rings. */ >> >>struct amdgpu_wb { >> struct amdgpu_bo*wb_obj; >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c >> index ea576b4260a4..d5a59d7c48d6 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c >> @@ -304,10 +304,6 @@ int amdgpu_gfx_kiq_init_ring(struct >> amdgpu_device *adev, >> >> spin_lock_init(&kiq->ring_lock); >> >> -r = amdgpu_device_wb_get(adev, &kiq->reg_val_offs); >> -if (r) >> -return r; >> - >> ring->adev = NULL; >> ring->ring_obj = NULL; >> ring->use_doorbell = true; >> @@ -331,7 +327,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device >> *adev, >> >>void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring) >>{ >> -amdgpu_device_wb_free(ring->adev, ring->adev->gfx.kiq.reg_val_offs); >> amdgpu_ring_fini(ring); >>} >> >> @@ -672,15 +667,20 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, >> uint32_t reg) >>{ >> signed long r, cnt = 0; >> unsigned long flags; >> -uint32_t seq; >> +uint32_t seq, reg_val_offs = 0, value = 0; >> struct amdgpu_kiq *kiq = &adev->gfx.kiq; >> struct amdgpu_ring *ring = &kiq->ring; >> >> BUG_ON(!ring->funcs->emit_rreg); >> >> spin_lock_irqsave(&kiq->ring_lock, flags); >> +if (amdgpu_device_wb_get(adev, ®_val_offs)) { >> +spin_unlock_irqrestore(&kiq->ring_lock, flags); >> +pr_err("critical bug! too more kiq readers\n"); > Typo here, this should probably read "too many kiq readers". > > But I don't think we need the error message here anyway, the failed_kiq_read > label also prints an error. > > With that fixed the patch is Reviewed-by: Christian König > . > > Thanks, > Christian. > >> +g
Re: [PATCH] drm/amdgpu: protect kiq overrun
The amdgpu_ring_alloc() function checks if the requested number of DW don't exceed the maximum submission size. It does NOT check if there is enough room on the ring. That would require MMIO access and that is what we want to avoid. Regards, Christian. Am 22.04.20 um 16:54 schrieb Liu, Shaoyun: [AMD Official Use Only - Internal Distribution Only] I think each kiq operation will call ring_alloc for the package space , why not just check whether this allocation is succeed or not ? Shaoyun.liu -Original Message- From: amd-gfx On Behalf Of Yintian Tao Sent: Wednesday, April 22, 2020 10:50 AM To: Koenig, Christian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org; Tao, Yintian Subject: [PATCH] drm/amdgpu: protect kiq overrun Wait for the oldest sequence on the kiq ring to be signaled in order to make sure there will be no kiq overrun. v2: remove unused the variable and correct kiq max_sub_num value Signed-off-by: Yintian Tao --- .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 6 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 6 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 30 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 3 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 6 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 6 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c| 7 + drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 7 + 8 files changed, 71 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 691c89705bcd..fac8b9713dfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -325,6 +325,12 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, mec, pipe, queue_id); spin_lock(&adev->gfx.kiq.ring_lock); + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); + if (r) { + pr_err("critical bug! too many kiq submission\n"); + goto out_unlock; + } + r = amdgpu_ring_alloc(kiq_ring, 7); if (r) { pr_err("Failed to alloc KIQ (%d).\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index df841c2ac5e7..fd42c126510f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -323,6 +323,12 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, mec, pipe, queue_id); spin_lock(&adev->gfx.kiq.ring_lock); + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); + if (r) { + pr_err("critical bug! too many kiq submissions\n"); + goto out_unlock; + } + r = amdgpu_ring_alloc(kiq_ring, 7); if (r) { pr_err("Failed to alloc KIQ (%d).\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index a721b0e0ff69..84e66c45df37 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -321,6 +321,9 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, AMDGPU_RING_PRIO_DEFAULT); if (r) dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); + else + kiq->max_sub_num = (ring->ring_size / 4) / + (ring->funcs->align_mask + 1); return r; } @@ -663,6 +666,21 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, return 0; } +int amdgpu_gfx_kiq_is_avail(struct amdgpu_kiq *kiq) { + uint32_t seq = 0; + signed long r = 0; + + seq = abs(kiq->ring.fence_drv.sync_seq - kiq->max_sub_num); + if (seq > kiq->max_sub_num) { + r = amdgpu_fence_wait_polling(&kiq->ring, seq, + MAX_KIQ_REG_WAIT); + return r < 1 ? -ETIME : 0; + } + + return 0; +} + uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) { signed long r, cnt = 0; @@ -674,6 +692,12 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) BUG_ON(!ring->funcs->emit_rreg); spin_lock_irqsave(&kiq->ring_lock, flags); + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); + if (r) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + goto failed_kiq_read; + } + if (amdgpu_device_wb_get(adev, ®_val_offs)) { spin_unlock_irqrestore(&kiq->ring_lock, flags); pr_err("critical bug! too many kiq readers\n"); @@ -728,6 +752,12 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) BUG_ON(!ring->funcs->emit_wreg); spin_lock_irqsave(&kiq->ring_lock, flags); + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.k
RE: [PATCH] drm/amdgpu: protect kiq overrun
[AMD Official Use Only - Internal Distribution Only] I think each kiq operation will call ring_alloc for the package space , why not just check whether this allocation is succeed or not ? Shaoyun.liu -Original Message- From: amd-gfx On Behalf Of Yintian Tao Sent: Wednesday, April 22, 2020 10:50 AM To: Koenig, Christian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org; Tao, Yintian Subject: [PATCH] drm/amdgpu: protect kiq overrun Wait for the oldest sequence on the kiq ring to be signaled in order to make sure there will be no kiq overrun. v2: remove unused the variable and correct kiq max_sub_num value Signed-off-by: Yintian Tao --- .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 6 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 6 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 30 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 3 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 6 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 6 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c| 7 + drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 7 + 8 files changed, 71 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 691c89705bcd..fac8b9713dfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -325,6 +325,12 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, mec, pipe, queue_id); spin_lock(&adev->gfx.kiq.ring_lock); + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); + if (r) { + pr_err("critical bug! too many kiq submission\n"); + goto out_unlock; + } + r = amdgpu_ring_alloc(kiq_ring, 7); if (r) { pr_err("Failed to alloc KIQ (%d).\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index df841c2ac5e7..fd42c126510f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -323,6 +323,12 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, mec, pipe, queue_id); spin_lock(&adev->gfx.kiq.ring_lock); + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); + if (r) { + pr_err("critical bug! too many kiq submissions\n"); + goto out_unlock; + } + r = amdgpu_ring_alloc(kiq_ring, 7); if (r) { pr_err("Failed to alloc KIQ (%d).\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index a721b0e0ff69..84e66c45df37 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -321,6 +321,9 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, AMDGPU_RING_PRIO_DEFAULT); if (r) dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); + else + kiq->max_sub_num = (ring->ring_size / 4) / + (ring->funcs->align_mask + 1); return r; } @@ -663,6 +666,21 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, return 0; } +int amdgpu_gfx_kiq_is_avail(struct amdgpu_kiq *kiq) { + uint32_t seq = 0; + signed long r = 0; + + seq = abs(kiq->ring.fence_drv.sync_seq - kiq->max_sub_num); + if (seq > kiq->max_sub_num) { + r = amdgpu_fence_wait_polling(&kiq->ring, seq, + MAX_KIQ_REG_WAIT); + return r < 1 ? -ETIME : 0; + } + + return 0; +} + uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) { signed long r, cnt = 0; @@ -674,6 +692,12 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) BUG_ON(!ring->funcs->emit_rreg); spin_lock_irqsave(&kiq->ring_lock, flags); + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); + if (r) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + goto failed_kiq_read; + } + if (amdgpu_device_wb_get(adev, ®_val_offs)) { spin_unlock_irqrestore(&kiq->ring_lock, flags); pr_err("critical bug! too many kiq readers\n"); @@ -728,6 +752,12 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) BUG_ON(!ring->funcs->emit_wreg); spin_lock_irqsave(&kiq->ring_lock, flags); + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); + if (r) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + goto failed_kiq_write; + } + amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_wreg(ring, reg, v); amdgpu_fence_emit_polling(ring, &seq); diff --git a/drivers/gpu/drm/amd
Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg
Hi Shaoyun, the ring buffer is usually filled with command and not read results. Allocating extra space would only work if we use the special NOP command and that is way more complicated and fragile than just using the wb functions which where made for this stuff. Regards, Christian. Am 22.04.20 um 16:48 schrieb Liu, Shaoyun: [AMD Official Use Only - Internal Distribution Only] Hi ,Yintian & Christian I still don't understand why we need this complicated change here . Why can not just allocate few more extra space in the ring for each read and use the space to store the output value ? Regards Shaoyun.liu -Original Message- From: amd-gfx On Behalf Of Christian König Sent: Wednesday, April 22, 2020 8:42 AM To: Tao, Yintian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg Am 22.04.20 um 14:36 schrieb Yintian Tao: According to the current kiq read register method, there will be race condition when using KIQ to read register if multiple clients want to read at same time just like the expample below: 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll the seqno-1 5. the kiq complete these two read operation 6. client-A to read the register at the wb buffer and get REG-1 value Therefore, use amdgpu_device_wb_get() to request reg_val_offs for each kiq read register. v2: fix the error remove Signed-off-by: Yintian Tao --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 19 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 +++-- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 7 +++--- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c| 7 +++--- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 27 7 files changed, 41 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 4e1d4cfe7a9f..7ee5a4da398a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -526,7 +526,7 @@ static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p, /* * Writeback */ -#define AMDGPU_MAX_WB 128 /* Reserve at most 128 WB slots for amdgpu-owned rings. */ +#define AMDGPU_MAX_WB 256 /* Reserve at most 256 WB slots for amdgpu-owned rings. */ struct amdgpu_wb { struct amdgpu_bo*wb_obj; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index ea576b4260a4..d5a59d7c48d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -304,10 +304,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, spin_lock_init(&kiq->ring_lock); - r = amdgpu_device_wb_get(adev, &kiq->reg_val_offs); - if (r) - return r; - ring->adev = NULL; ring->ring_obj = NULL; ring->use_doorbell = true; @@ -331,7 +327,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring) { - amdgpu_device_wb_free(ring->adev, ring->adev->gfx.kiq.reg_val_offs); amdgpu_ring_fini(ring); } @@ -672,15 +667,20 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) { signed long r, cnt = 0; unsigned long flags; - uint32_t seq; + uint32_t seq, reg_val_offs = 0, value = 0; struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *ring = &kiq->ring; BUG_ON(!ring->funcs->emit_rreg); spin_lock_irqsave(&kiq->ring_lock, flags); + if (amdgpu_device_wb_get(adev, ®_val_offs)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + pr_err("critical bug! too more kiq readers\n"); Typo here, this should probably read "too many kiq readers". But I don't think we need the error message here anyway, the failed_kiq_read label also prints an error. With that fixed the patch is Reviewed-by: Christian König . Thanks, Christian. + goto failed_kiq_read; + } amdgpu_ring_alloc(ring, 32); - amdgpu_ring_emit_rreg(ring, reg); + amdgpu_ring_emit_rreg(ring, reg, reg_val_offs); amdgpu_fence_emit_polling(ring, &seq); amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); @@ -707,7 +707,10 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) if (cnt > MAX_KIQ_REG_TRY) goto failed_kiq_read; - return adev->wb.wb[kiq->reg_val_offs]; + mb(); + value = adev->wb.wb[reg_val_offs]; + amdgpu_device_wb_free(adev, reg_val_offs); + return value; failed_kiq_read: pr_err("failed to read reg:%x\n", reg); diff --git a/d
[PATCH] drm/amdgpu: protect kiq overrun
Wait for the oldest sequence on the kiq ring to be signaled in order to make sure there will be no kiq overrun. v2: remove unused the variable and correct kiq max_sub_num value Signed-off-by: Yintian Tao --- .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 6 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 6 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 30 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 3 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 6 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 6 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c| 7 + drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 7 + 8 files changed, 71 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 691c89705bcd..fac8b9713dfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -325,6 +325,12 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, mec, pipe, queue_id); spin_lock(&adev->gfx.kiq.ring_lock); + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); + if (r) { + pr_err("critical bug! too many kiq submission\n"); + goto out_unlock; + } + r = amdgpu_ring_alloc(kiq_ring, 7); if (r) { pr_err("Failed to alloc KIQ (%d).\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index df841c2ac5e7..fd42c126510f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -323,6 +323,12 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, mec, pipe, queue_id); spin_lock(&adev->gfx.kiq.ring_lock); + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); + if (r) { + pr_err("critical bug! too many kiq submissions\n"); + goto out_unlock; + } + r = amdgpu_ring_alloc(kiq_ring, 7); if (r) { pr_err("Failed to alloc KIQ (%d).\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index a721b0e0ff69..84e66c45df37 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -321,6 +321,9 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, AMDGPU_RING_PRIO_DEFAULT); if (r) dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); + else + kiq->max_sub_num = (ring->ring_size / 4) / + (ring->funcs->align_mask + 1); return r; } @@ -663,6 +666,21 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, return 0; } +int amdgpu_gfx_kiq_is_avail(struct amdgpu_kiq *kiq) +{ + uint32_t seq = 0; + signed long r = 0; + + seq = abs(kiq->ring.fence_drv.sync_seq - kiq->max_sub_num); + if (seq > kiq->max_sub_num) { + r = amdgpu_fence_wait_polling(&kiq->ring, seq, + MAX_KIQ_REG_WAIT); + return r < 1 ? -ETIME : 0; + } + + return 0; +} + uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) { signed long r, cnt = 0; @@ -674,6 +692,12 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) BUG_ON(!ring->funcs->emit_rreg); spin_lock_irqsave(&kiq->ring_lock, flags); + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); + if (r) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + goto failed_kiq_read; + } + if (amdgpu_device_wb_get(adev, ®_val_offs)) { spin_unlock_irqrestore(&kiq->ring_lock, flags); pr_err("critical bug! too many kiq readers\n"); @@ -728,6 +752,12 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) BUG_ON(!ring->funcs->emit_wreg); spin_lock_irqsave(&kiq->ring_lock, flags); + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); + if (r) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + goto failed_kiq_write; + } + amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_wreg(ring, reg, v); amdgpu_fence_emit_polling(ring, &seq); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index ee698f0246d8..1ee59a927bd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -103,6 +103,7 @@ struct amdgpu_kiq { struct amdgpu_ring ring; struct amdgpu_irq_src irq; const struct kiq_pm4_funcs *pmf; + uint32_tmax_sub_num; }; /* @@ -387,4 +388,6 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_dev
RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg
[AMD Official Use Only - Internal Distribution Only] Hi ,Yintian & Christian I still don't understand why we need this complicated change here . Why can not just allocate few more extra space in the ring for each read and use the space to store the output value ? Regards Shaoyun.liu -Original Message- From: amd-gfx On Behalf Of Christian König Sent: Wednesday, April 22, 2020 8:42 AM To: Tao, Yintian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg Am 22.04.20 um 14:36 schrieb Yintian Tao: > According to the current kiq read register method, there will be race > condition when using KIQ to read register if multiple clients want to > read at same time just like the expample below: > 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the > seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll > the seqno-1 5. the kiq complete these two read operation 6. client-A > to read the register at the wb buffer and > get REG-1 value > > Therefore, use amdgpu_device_wb_get() to request reg_val_offs for each > kiq read register. > > v2: fix the error remove > > Signed-off-by: Yintian Tao > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 19 ++--- > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 - > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 +++-- > drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 7 +++--- > drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c| 7 +++--- > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 27 > 7 files changed, 41 insertions(+), 27 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index 4e1d4cfe7a9f..7ee5a4da398a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -526,7 +526,7 @@ static inline void amdgpu_set_ib_value(struct > amdgpu_cs_parser *p, > /* >* Writeback >*/ > -#define AMDGPU_MAX_WB 128/* Reserve at most 128 WB slots for > amdgpu-owned rings. */ > +#define AMDGPU_MAX_WB 256/* Reserve at most 256 WB slots for > amdgpu-owned rings. */ > > struct amdgpu_wb { > struct amdgpu_bo*wb_obj; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > index ea576b4260a4..d5a59d7c48d6 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > @@ -304,10 +304,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device > *adev, > > spin_lock_init(&kiq->ring_lock); > > - r = amdgpu_device_wb_get(adev, &kiq->reg_val_offs); > - if (r) > - return r; > - > ring->adev = NULL; > ring->ring_obj = NULL; > ring->use_doorbell = true; > @@ -331,7 +327,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device > *adev, > > void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring) > { > - amdgpu_device_wb_free(ring->adev, ring->adev->gfx.kiq.reg_val_offs); > amdgpu_ring_fini(ring); > } > > @@ -672,15 +667,20 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, > uint32_t reg) > { > signed long r, cnt = 0; > unsigned long flags; > - uint32_t seq; > + uint32_t seq, reg_val_offs = 0, value = 0; > struct amdgpu_kiq *kiq = &adev->gfx.kiq; > struct amdgpu_ring *ring = &kiq->ring; > > BUG_ON(!ring->funcs->emit_rreg); > > spin_lock_irqsave(&kiq->ring_lock, flags); > + if (amdgpu_device_wb_get(adev, ®_val_offs)) { > + spin_unlock_irqrestore(&kiq->ring_lock, flags); > + pr_err("critical bug! too more kiq readers\n"); Typo here, this should probably read "too many kiq readers". But I don't think we need the error message here anyway, the failed_kiq_read label also prints an error. With that fixed the patch is Reviewed-by: Christian König . Thanks, Christian. > + goto failed_kiq_read; > + } > amdgpu_ring_alloc(ring, 32); > - amdgpu_ring_emit_rreg(ring, reg); > + amdgpu_ring_emit_rreg(ring, reg, reg_val_offs); > amdgpu_fence_emit_polling(ring, &seq); > amdgpu_ring_commit(ring); > spin_unlock_irqrestore(&kiq->ring_lock, flags); @@ -707,7 +707,10 > @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) > if (cnt > MAX_KIQ_REG_TRY) > goto failed_kiq_read; > > - return adev->wb.wb[kiq->reg_val_offs]; > + mb(); > + value = adev->wb.wb[reg_val_offs]; > + amdgpu_device_wb_free(adev, reg_val_offs); > + return value; > > failed_kiq_read: > pr_err("failed to read reg:%x\n", reg); diff --git > a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > index 634746829024..ee698f0246d8 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > +++ b/drivers/gpu/drm/amd/amdg
[PATCH] drm/amdgpu: protect kiq overrun
Wait for the oldest to be signaled to make sure there will be no kiq overrun. Signed-off-by: Yintian Tao --- .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 8 - .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 8 - drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 30 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 3 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 6 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 6 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c| 7 + drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 7 + 8 files changed, 73 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 691c89705bcd..72a5d7e15494 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -311,7 +311,7 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; struct v10_compute_mqd *m; - uint32_t mec, pipe; + uint32_t mec, pipe, seq = 0; int r; m = get_mqd(mqd); @@ -325,6 +325,12 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, mec, pipe, queue_id); spin_lock(&adev->gfx.kiq.ring_lock); + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); + if (r) { + pr_err("critical bug! too many kiq submission\n"); + goto out_unlock; + } + r = amdgpu_ring_alloc(kiq_ring, 7); if (r) { pr_err("Failed to alloc KIQ (%d).\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index df841c2ac5e7..332f72b2d334 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -309,7 +309,7 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; struct v9_mqd *m; - uint32_t mec, pipe; + uint32_t mec, pipe, seq = 0; int r; m = get_mqd(mqd); @@ -323,6 +323,12 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, mec, pipe, queue_id); spin_lock(&adev->gfx.kiq.ring_lock); + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); + if (r) { + pr_err("critical bug! too many kiq submissions\n"); + goto out_unlock; + } + r = amdgpu_ring_alloc(kiq_ring, 7); if (r) { pr_err("Failed to alloc KIQ (%d).\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index a721b0e0ff69..387b1a8ed4df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -321,6 +321,9 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, AMDGPU_RING_PRIO_DEFAULT); if (r) dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); + else + kiq->max_sub_num = (ring->ring_size / 4) / + ring->funcs->align_mask; return r; } @@ -663,6 +666,21 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, return 0; } +int amdgpu_gfx_kiq_is_avail(struct amdgpu_kiq *kiq) +{ + uint32_t seq = 0; + signed long r = 0; + + seq = abs(kiq->ring.fence_drv.sync_seq - kiq->max_sub_num); + if (seq > kiq->max_sub_num) { + r = amdgpu_fence_wait_polling(&kiq->ring, seq, + MAX_KIQ_REG_WAIT); + return r < 1 ? -ETIME : 0; + } + + return 0; +} + uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) { signed long r, cnt = 0; @@ -674,6 +692,12 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) BUG_ON(!ring->funcs->emit_rreg); spin_lock_irqsave(&kiq->ring_lock, flags); + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); + if (r) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + goto failed_kiq_read; + } + if (amdgpu_device_wb_get(adev, ®_val_offs)) { spin_unlock_irqrestore(&kiq->ring_lock, flags); pr_err("critical bug! too many kiq readers\n"); @@ -728,6 +752,12 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) BUG_ON(!ring->funcs->emit_wreg); spin_lock_irqsave(&kiq->ring_lock, flags); + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq); + if (r) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + goto failed_kiq_write; + } + amdgpu_ring_alloc(ring, 32);
Re: [PATCH] drm: amd/display: fix Kconfig help text
On 2020-04-21 7:34 p.m., Randy Dunlap wrote: > From: Randy Dunlap > > Fix help text: indent one tab + 2 spaces; end a sentence with a > period; and collapse short lines of text to one line. > > Fixes: 23c61b4599c4 ("drm/amd: Fix Kconfig indentation") > Fixes: 4562236b3bc0 ("drm/amd/dc: Add dc display driver (v2)") > Signed-off-by: Randy Dunlap > Cc: Harry Wentland > Cc: Alex Deucher > Cc: Krzysztof Kozlowski Reviewed-by: Harry Wentland Harry > --- > drivers/gpu/drm/amd/display/Kconfig |8 ++-- > 1 file changed, 2 insertions(+), 6 deletions(-) > > --- linux-next-20200421.orig/drivers/gpu/drm/amd/display/Kconfig > +++ linux-next-20200421/drivers/gpu/drm/amd/display/Kconfig > @@ -21,16 +21,12 @@ config DRM_AMD_DC_HDCP > bool "Enable HDCP support in DC" > depends on DRM_AMD_DC > help > - Choose this option > - if you want to support > - HDCP authentication > + Choose this option if you want to support HDCP authentication. > > config DEBUG_KERNEL_DC > bool "Enable kgdb break in DC" > depends on DRM_AMD_DC > help > - Choose this option > - if you want to hit > - kdgb_break in assert. > + Choose this option if you want to hit kdgb_break in assert. > > endmenu > ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: put the audio codec into suspend state before gpu reset V2
On Tue, Apr 21, 2020 at 10:42 PM Evan Quan wrote: > > At default, the autosuspend delay of audio controller is 3S. If the > gpu reset is triggered within 3S(after audio controller idle), > the audio controller may be unable into suspended state. Then > the sudden gpu reset will cause some audio errors. The change > here is targeted to resolve this. > > However if the audio controller is in use when the gpu reset > triggered, this change may be still not enough to put the > audio controller into suspend state. Under this case, the > gpu reset will still proceed but there will be a warning > message printed("failed to suspend display audio"). > > V2: limit this for BACO and mode1 reset only > > Change-Id: I33d85e6fcad1882eb33f9cde8916d57be8d5a87a > Signed-off-by: Evan Quan > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 70 ++ > 1 file changed, 70 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index 2d4b78d96426..70f43b1aed78 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -69,6 +69,7 @@ > > #include > #include > +#include > > MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); > MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); > @@ -4146,6 +4147,59 @@ static void amdgpu_device_unlock_adev(struct > amdgpu_device *adev) > mutex_unlock(&adev->lock_reset); > } > > +static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev) > +{ > + struct pci_dev *p = NULL; > + > + p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus), > + adev->pdev->bus->number, 1); > + if (p) { > + pm_runtime_enable(&(p->dev)); > + pm_runtime_resume(&(p->dev)); > + } > +} > + > +static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev) > +{ > + enum amd_reset_method reset_method; > + struct pci_dev *p = NULL; > + unsigned long end_jiffies; > + > + /* > +* For now, only BACO and mode1 reset are confirmed > +* to suffer the audio issue without proper suspended. > +*/ > + reset_method = amdgpu_asic_reset_method(adev); > + if ((reset_method != AMD_RESET_METHOD_BACO) && > +(reset_method != AMD_RESET_METHOD_MODE1)) > + return -EINVAL; > + > + p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus), > + adev->pdev->bus->number, 1); > + if (!p) > + return -ENODEV; > + > + /* > +* 3S is the audio controller default autosuspend delay setting. > +* 4S used here is guaranteed to cover that. > +*/ Instead of hardcoding 3S, we should probably use pm_runtime_autosuspend_expiration() to query how much time is left and then use that. That way this will work even if userspace has changed the delay. With that fixed: Reviewed-by: Alex Deucher Alex > + end_jiffies = msecs_to_jiffies(4000) + jiffies; > + while (!pm_runtime_status_suspended(&(p->dev))) { > + if (!pm_runtime_suspend(&(p->dev))) > + break; > + > + if (time_after(jiffies, end_jiffies)) { > + dev_warn(adev->dev, "failed to suspend display > audio\n"); > + /* TODO: abort the succeeding gpu reset? */ > + return -ETIMEDOUT; > + } > + } > + > + pm_runtime_disable(&(p->dev)); > + > + return 0; > +} > + > /** > * amdgpu_device_gpu_recover - reset the asic and recover scheduler > * > @@ -4170,6 +4224,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device > *adev, > bool use_baco = > (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ? > true : false; > + bool audio_suspended = false; > > /* > * Flush RAM to disk so that after reboot > @@ -4227,6 +4282,19 @@ int amdgpu_device_gpu_recover(struct amdgpu_device > *adev, > return 0; > } > > + /* > +* Try to put the audio codec into suspend state > +* before gpu reset started. > +* > +* Due to the power domain of the graphics device > +* is shared with AZ power domain. Without this, > +* we may change the audio hardware from behind > +* the audio driver's back. That will trigger > +* some audio codec errors. > +*/ > + if (!amdgpu_device_suspend_display_audio(tmp_adev)) > + audio_suspended = true; > + > amdgpu_ras_set_error_query_ready(tmp_adev, false); > > cancel_delayed_work_sync(&tmp_adev->delayed_init_work); > @@ -4339,6 +4407,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device > *adev, >
[PATCH] drm/amdgpu: Init data to avoid oops while reading pp_num_states.
For chip like CHIP_OLAND with si enabled(amdgpu.si_support=1), the amdgpu will expose pp_num_states to the /sys directory. In this moment, read the pp_num_states file will excute the amdgpu_get_pp_num_states func. In our case, the data hasn't been initialized, so the kernel will access some ilegal address, trigger the segmentfault and system will reboot soon: uos@uos-PC:~$ cat /sys/devices/pci\:00/\:00\:00.0/\:01\:00 .0/pp_num_states Message from syslogd@uos-PC at Apr 22 09:26:20 ... kernel:[ 82.154129] Internal error: Oops: 9604 [#1] SMP This patch aims to fix this problem, avoid that reading file triggers the kernel sementfault. Signed-off-by: limingyu Signed-off-by: zhoubinbin --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index abe94a55ecad..17de9dc60ea1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -444,8 +444,10 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev, ret = smu_get_power_num_states(&adev->smu, &data); if (ret) return ret; - } else if (adev->powerplay.pp_funcs->get_pp_num_states) + } else if (adev->powerplay.pp_funcs->get_pp_num_states) { amdgpu_dpm_get_pp_num_states(adev, &data); + } else + memset(&data, 0, sizeof(data)); pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); -- 2.20.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH hmm 5/5] mm/hmm: remove the customizable pfn format from hmm_range_fault
On Wed, Apr 22, 2020 at 08:03:29AM +0200, Christoph Hellwig wrote: > > > On Tue, Apr 21, 2020 at 09:21:46PM -0300, Jason Gunthorpe wrote: > > +void nouveau_hmm_convert_pfn(struct nouveau_drm *drm, struct hmm_range > > *range, > > +u64 *ioctl_addr) > > { > > unsigned long i, npages; > > > > + /* > > +* The ioctl_addr prepared here is passed through nvif_object_ioctl() > > +* to an eventual DMA map on some call chain like: > > +*nouveau_svm_fault(): > > +* args.i.m.method = NVIF_VMM_V0_PFNMAP > > +* nouveau_range_fault() > > +* nvif_object_ioctl() > > +*client->driver->ioctl() > > +* struct nvif_driver nvif_driver_nvkm: > > +* .ioctl = nvkm_client_ioctl > > +*nvkm_ioctl() > > +* nvkm_ioctl_path() > > +* nvkm_ioctl_v0[type].func(..) > > +* nvkm_ioctl_mthd() > > +*nvkm_object_mthd() > > +* struct nvkm_object_func nvkm_uvmm: > > +* .mthd = nvkm_uvmm_mthd > > +*nvkm_uvmm_mthd() > > +* nvkm_uvmm_mthd_pfnmap() > > +* nvkm_vmm_pfn_map() > > +* nvkm_vmm_ptes_get_map() > > +*func == gp100_vmm_pgt_pfn > > +* struct nvkm_vmm_desc_func gp100_vmm_desc_spt: > > +* .pfn = gp100_vmm_pgt_pfn > > +* nvkm_vmm_iter() > > +* REF_PTES == func == gp100_vmm_pgt_pfn() > > +*dma_map_page() > > +* > > +* This is all just encoding the internal hmm reprensetation into a > > +* different nouveau internal representation. > > +*/ > > Nice callchain from hell.. Unfortunately such "code listings" tend to > get out of date very quickly, so I'm not sure it is worth keeping in > the code. What would be really worthile is consolidating the two > different sets of defines (NVIF_VMM_PFNMAP_V0_ vs NVKM_VMM_PFN_) > to make the code a little easier to follow. I was mainly concerned that this function is using hmm properly, becuase it sure looks like it is just forming the CPU physical address into a HW specific data. But it turns out it is just an internal data for some other code and the dma_map is impossibly far away It took forever to find, I figured I'd leave a hint for the next poor soul that has to look at this.. Also, I think it shows there is no 'performance' argument here, if this path needs more performance the above should be cleaned before we abuse hmm_range_fault. Put it in the commit message instead? > > npages = (range->end - range->start) >> PAGE_SHIFT; > > for (i = 0; i < npages; ++i) { > > struct page *page; > > > > + if (!(range->hmm_pfns[i] & HMM_PFN_VALID)) { > > + ioctl_addr[i] = 0; > > continue; > > + } > > Can't we rely on the caller pre-zeroing the array? This ends up as args.phys in nouveau_svm_fault - I didn't see a zeroing? I think it makes sense that this routine fully sets the output array and does not assume pre-initialize > > + page = hmm_pfn_to_page(range->hmm_pfns[i]); > > + if (is_device_private_page(page)) > > + ioctl_addr[i] = nouveau_dmem_page_addr(page) | > > + NVIF_VMM_PFNMAP_V0_V | > > + NVIF_VMM_PFNMAP_V0_VRAM; > > + else > > + ioctl_addr[i] = page_to_phys(page) | > > + NVIF_VMM_PFNMAP_V0_V | > > + NVIF_VMM_PFNMAP_V0_HOST; > > + if (range->hmm_pfns[i] & HMM_PFN_WRITE) > > + ioctl_addr[i] |= NVIF_VMM_PFNMAP_V0_W; > > Now that this routine isn't really device memory specific any more, I > wonder if it should move to nouveau_svm.c. Yes, if we expose nouveau_dmem_page_addr(), I will try it Thanks, Jason ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg
Am 22.04.20 um 14:36 schrieb Yintian Tao: According to the current kiq read register method, there will be race condition when using KIQ to read register if multiple clients want to read at same time just like the expample below: 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll the seqno-1 5. the kiq complete these two read operation 6. client-A to read the register at the wb buffer and get REG-1 value Therefore, use amdgpu_device_wb_get() to request reg_val_offs for each kiq read register. v2: fix the error remove Signed-off-by: Yintian Tao --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 19 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 +++-- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 7 +++--- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c| 7 +++--- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 27 7 files changed, 41 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 4e1d4cfe7a9f..7ee5a4da398a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -526,7 +526,7 @@ static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p, /* * Writeback */ -#define AMDGPU_MAX_WB 128 /* Reserve at most 128 WB slots for amdgpu-owned rings. */ +#define AMDGPU_MAX_WB 256 /* Reserve at most 256 WB slots for amdgpu-owned rings. */ struct amdgpu_wb { struct amdgpu_bo*wb_obj; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index ea576b4260a4..d5a59d7c48d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -304,10 +304,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, spin_lock_init(&kiq->ring_lock); - r = amdgpu_device_wb_get(adev, &kiq->reg_val_offs); - if (r) - return r; - ring->adev = NULL; ring->ring_obj = NULL; ring->use_doorbell = true; @@ -331,7 +327,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring) { - amdgpu_device_wb_free(ring->adev, ring->adev->gfx.kiq.reg_val_offs); amdgpu_ring_fini(ring); } @@ -672,15 +667,20 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) { signed long r, cnt = 0; unsigned long flags; - uint32_t seq; + uint32_t seq, reg_val_offs = 0, value = 0; struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *ring = &kiq->ring; BUG_ON(!ring->funcs->emit_rreg); spin_lock_irqsave(&kiq->ring_lock, flags); + if (amdgpu_device_wb_get(adev, ®_val_offs)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + pr_err("critical bug! too more kiq readers\n"); Typo here, this should probably read "too many kiq readers". But I don't think we need the error message here anyway, the failed_kiq_read label also prints an error. With that fixed the patch is Reviewed-by: Christian König . Thanks, Christian. + goto failed_kiq_read; + } amdgpu_ring_alloc(ring, 32); - amdgpu_ring_emit_rreg(ring, reg); + amdgpu_ring_emit_rreg(ring, reg, reg_val_offs); amdgpu_fence_emit_polling(ring, &seq); amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); @@ -707,7 +707,10 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) if (cnt > MAX_KIQ_REG_TRY) goto failed_kiq_read; - return adev->wb.wb[kiq->reg_val_offs]; + mb(); + value = adev->wb.wb[reg_val_offs]; + amdgpu_device_wb_free(adev, reg_val_offs); + return value; failed_kiq_read: pr_err("failed to read reg:%x\n", reg); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 634746829024..ee698f0246d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -103,7 +103,6 @@ struct amdgpu_kiq { struct amdgpu_ring ring; struct amdgpu_irq_src irq; const struct kiq_pm4_funcs *pmf; - uint32_treg_val_offs; }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index f61664ee4940..137d3d2b46e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -181,7 +181,8 @@ struct amdgpu_ring_funcs { void (*end_use)(struct amdgpu_ring *ring); void (*emit_switch_buffer) (struct amdgpu_ring *ring); void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags); - void (*emit_r
RE: [PATCH] drm/amdgpu: refine kiq access register
Hi Christian Thanks, I got it. I will send another patch for the KIQ overrun problem Best Regards Yintian Tao -Original Message- From: Koenig, Christian Sent: 2020年4月22日 20:33 To: Tao, Yintian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: refine kiq access register Am 22.04.20 um 14:20 schrieb Tao, Yintian: > Hi Christian > > > Please see inline commetns. > -Original Message- > From: Koenig, Christian > Sent: 2020年4月22日 19:57 > To: Tao, Yintian ; Liu, Monk ; > Kuehling, Felix > Cc: amd-gfx@lists.freedesktop.org > Subject: Re: [PATCH] drm/amdgpu: refine kiq access register > > Am 22.04.20 um 13:49 schrieb Tao, Yintian: >> Hi Christian >> >> >> Can you help answer the questions below? Thanks in advance. >> -Original Message- >> From: Koenig, Christian >> Sent: 2020年4月22日 19:03 >> To: Tao, Yintian ; Liu, Monk ; >> Kuehling, Felix >> Cc: amd-gfx@lists.freedesktop.org >> Subject: Re: [PATCH] drm/amdgpu: refine kiq access register >> >> Am 22.04.20 um 11:29 schrieb Yintian Tao: >>> According to the current kiq access register method, there will be >>> race condition when using KIQ to read register if multiple clients >>> want to read at same time just like the expample below: >>> 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the >>> seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll >>> the seqno-1 5. the kiq complete these two read operation 6. client-A >>> to read the register at the wb buffer and >>> get REG-1 value >>> >>> And if there are multiple clients to frequently write registers >>> through KIQ which may raise the KIQ ring buffer overwritten problem. >>> >>> Therefore, allocate fixed number wb slot for rreg use and limit the >>> submit number which depends on the kiq ring_size in order to prevent >>> the overwritten problem. >>> >>> v2: directly use amdgpu_device_wb_get() for each read instead >>>of to reserve fixde number slot. >>>if there is no enough kiq ring buffer or rreg slot then >>>directly print error log and return instead of busy waiting >> I would split that into three patches. One for each problem we have here: >> >> 1. Fix kgd_hiq_mqd_load() and maybe other occasions to use >> spin_lock_irqsave(). >> [yttao]: Do you mean that we need to use spin_lock_irqsave for the functions >> just like kgd_hiq_mqd_load()? > Yes, I strongly think so. > > See when you have one spin lock you either need always need to lock it with > irqs disabled or never. > > In other words we always need to either use spin_lock() or > spin_lock_irqsave(), but never mix them with the same lock. > > The only exception to this rule is when you take multiple locks, e.g. > you can do: > > spin_lock_irqsave(&a, flags); > spin_lock(&b, flags); > spin_lock(&c, flags); > > spin_unlock_irqsave(&a, flags); > > Here you don't need to use spin_lock_irqsave for b and c. But we rarely have > that case in the code. > [yttao]: thanks , I got it. I will submit another patch for it. > >> 2. Prevent the overrung of the KIQ. Please drop the approach with the >> atomic here. Instead just add a amdgpu_fence_wait_polling() into >> amdgpu_fence_emit_polling() as I discussed with Monk. >> [yttao]: Sorry, I can't get your original idea for the >> amdgpu_fence_wait_polling(). Can you give more details about it? Thanks in >> advance. >> >> "That is actually only a problem because the KIQ uses polling waits. >> >> See amdgpu_fence_emit() waits for the oldest possible fence to be signaled >> before emitting a new one. >> >> I suggest that we do the same in amdgpu_fence_emit_polling(). A one liner >> like the following should be enough: >> >> amdgpu_fence_wait_polling(ring, seq - ring->fence_drv.num_fences_mask, >> timeout);" >> [yttao]: there is no usage of num_fences_mask at kiq fence polling, the >> num_fences_mask is only effective at dma_fence architecture. >> If I understand correctly, do you want the protype code below? >> If the protype code is wrong, can you help give one sample? Thanks in >> advance. >> >> int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s) { >> uint32_t seq; >> >> if (!s) >> return -EINVAL; >> +amdgpu_fence_wait_polling(ring, seq, timeout); >> seq = ++ring->fence_drv.sync_seq; > Your understanding sounds more or less correct. The code should look > something like this: > > seq = ++ring->fence_drv.sync_seq; > amdgpu_fence_wait_polling(ring, seq - > number_of_allowed_submissions_to_the_kiq, timeout); > [yttao]: whether we need directly wait at the first just like below? > Otherwise, amdgpu_ring_emit_wreg may overwrite the KIQ ring buffer. There should always be room for at least one more submission. As long as we always submit a fence checking the free room there should be fine. Regards, Christian. > + amdgpu_fence_wait_polling(ring, seq - >
[PATCH] drm/amdgpu: request reg_val_offs each kiq read reg
According to the current kiq read register method, there will be race condition when using KIQ to read register if multiple clients want to read at same time just like the expample below: 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll the seqno-1 5. the kiq complete these two read operation 6. client-A to read the register at the wb buffer and get REG-1 value Therefore, use amdgpu_device_wb_get() to request reg_val_offs for each kiq read register. v2: fix the error remove Signed-off-by: Yintian Tao --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 19 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 +++-- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 7 +++--- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c| 7 +++--- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 27 7 files changed, 41 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 4e1d4cfe7a9f..7ee5a4da398a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -526,7 +526,7 @@ static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p, /* * Writeback */ -#define AMDGPU_MAX_WB 128 /* Reserve at most 128 WB slots for amdgpu-owned rings. */ +#define AMDGPU_MAX_WB 256 /* Reserve at most 256 WB slots for amdgpu-owned rings. */ struct amdgpu_wb { struct amdgpu_bo*wb_obj; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index ea576b4260a4..d5a59d7c48d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -304,10 +304,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, spin_lock_init(&kiq->ring_lock); - r = amdgpu_device_wb_get(adev, &kiq->reg_val_offs); - if (r) - return r; - ring->adev = NULL; ring->ring_obj = NULL; ring->use_doorbell = true; @@ -331,7 +327,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring) { - amdgpu_device_wb_free(ring->adev, ring->adev->gfx.kiq.reg_val_offs); amdgpu_ring_fini(ring); } @@ -672,15 +667,20 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) { signed long r, cnt = 0; unsigned long flags; - uint32_t seq; + uint32_t seq, reg_val_offs = 0, value = 0; struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *ring = &kiq->ring; BUG_ON(!ring->funcs->emit_rreg); spin_lock_irqsave(&kiq->ring_lock, flags); + if (amdgpu_device_wb_get(adev, ®_val_offs)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + pr_err("critical bug! too more kiq readers\n"); + goto failed_kiq_read; + } amdgpu_ring_alloc(ring, 32); - amdgpu_ring_emit_rreg(ring, reg); + amdgpu_ring_emit_rreg(ring, reg, reg_val_offs); amdgpu_fence_emit_polling(ring, &seq); amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); @@ -707,7 +707,10 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) if (cnt > MAX_KIQ_REG_TRY) goto failed_kiq_read; - return adev->wb.wb[kiq->reg_val_offs]; + mb(); + value = adev->wb.wb[reg_val_offs]; + amdgpu_device_wb_free(adev, reg_val_offs); + return value; failed_kiq_read: pr_err("failed to read reg:%x\n", reg); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 634746829024..ee698f0246d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -103,7 +103,6 @@ struct amdgpu_kiq { struct amdgpu_ring ring; struct amdgpu_irq_src irq; const struct kiq_pm4_funcs *pmf; - uint32_treg_val_offs; }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index f61664ee4940..137d3d2b46e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -181,7 +181,8 @@ struct amdgpu_ring_funcs { void (*end_use)(struct amdgpu_ring *ring); void (*emit_switch_buffer) (struct amdgpu_ring *ring); void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags); - void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg); + void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg, + uint32_t reg_val_offs); void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg
[PATCH] drm/amdgpu: request reg_val_offs each kiq read reg
According to the current kiq read register method, there will be race condition when using KIQ to read register if multiple clients want to read at same time just like the expample below: 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll the seqno-1 5. the kiq complete these two read operation 6. client-A to read the register at the wb buffer and get REG-1 value Therefore, use amdgpu_device_wb_get() to request reg_val_offs for each kiq read register. Signed-off-by: Yintian Tao --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 19 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 +++-- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 7 +++--- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c| 7 +++--- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 27 7 files changed, 41 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 4e1d4cfe7a9f..7ee5a4da398a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -526,7 +526,7 @@ static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p, /* * Writeback */ -#define AMDGPU_MAX_WB 128 /* Reserve at most 128 WB slots for amdgpu-owned rings. */ +#define AMDGPU_MAX_WB 256 /* Reserve at most 256 WB slots for amdgpu-owned rings. */ struct amdgpu_wb { struct amdgpu_bo*wb_obj; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index ea576b4260a4..d5a59d7c48d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -304,10 +304,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, spin_lock_init(&kiq->ring_lock); - r = amdgpu_device_wb_get(adev, &kiq->reg_val_offs); - if (r) - return r; - ring->adev = NULL; ring->ring_obj = NULL; ring->use_doorbell = true; @@ -331,7 +327,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring) { - amdgpu_device_wb_free(ring->adev, ring->adev->gfx.kiq.reg_val_offs); amdgpu_ring_fini(ring); } @@ -672,15 +667,20 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) { signed long r, cnt = 0; unsigned long flags; - uint32_t seq; + uint32_t seq, reg_val_offs = 0, value = 0; struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *ring = &kiq->ring; BUG_ON(!ring->funcs->emit_rreg); spin_lock_irqsave(&kiq->ring_lock, flags); + if (amdgpu_device_wb_get(adev, ®_val_offs)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + pr_err("critical bug! too more kiq readers\n"); + goto failed_kiq_read; + } amdgpu_ring_alloc(ring, 32); - amdgpu_ring_emit_rreg(ring, reg); + amdgpu_ring_emit_rreg(ring, reg, reg_val_offs); amdgpu_fence_emit_polling(ring, &seq); amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); @@ -707,7 +707,10 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) if (cnt > MAX_KIQ_REG_TRY) goto failed_kiq_read; - return adev->wb.wb[kiq->reg_val_offs]; + mb(); + value = adev->wb.wb[reg_val_offs]; + amdgpu_device_wb_free(adev, reg_val_offs); + return value; failed_kiq_read: pr_err("failed to read reg:%x\n", reg); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 634746829024..ee698f0246d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -103,7 +103,6 @@ struct amdgpu_kiq { struct amdgpu_ring ring; struct amdgpu_irq_src irq; const struct kiq_pm4_funcs *pmf; - uint32_treg_val_offs; }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index f61664ee4940..137d3d2b46e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -181,7 +181,8 @@ struct amdgpu_ring_funcs { void (*end_use)(struct amdgpu_ring *ring); void (*emit_switch_buffer) (struct amdgpu_ring *ring); void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags); - void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg); + void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg, + uint32_t reg_val_offs); void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg,
Re: [PATCH] drm/amdgpu: refine kiq access register
Am 22.04.20 um 14:20 schrieb Tao, Yintian: Hi Christian Please see inline commetns. -Original Message- From: Koenig, Christian Sent: 2020年4月22日 19:57 To: Tao, Yintian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: refine kiq access register Am 22.04.20 um 13:49 schrieb Tao, Yintian: Hi Christian Can you help answer the questions below? Thanks in advance. -Original Message- From: Koenig, Christian Sent: 2020年4月22日 19:03 To: Tao, Yintian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: refine kiq access register Am 22.04.20 um 11:29 schrieb Yintian Tao: According to the current kiq access register method, there will be race condition when using KIQ to read register if multiple clients want to read at same time just like the expample below: 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll the seqno-1 5. the kiq complete these two read operation 6. client-A to read the register at the wb buffer and get REG-1 value And if there are multiple clients to frequently write registers through KIQ which may raise the KIQ ring buffer overwritten problem. Therefore, allocate fixed number wb slot for rreg use and limit the submit number which depends on the kiq ring_size in order to prevent the overwritten problem. v2: directly use amdgpu_device_wb_get() for each read instead of to reserve fixde number slot. if there is no enough kiq ring buffer or rreg slot then directly print error log and return instead of busy waiting I would split that into three patches. One for each problem we have here: 1. Fix kgd_hiq_mqd_load() and maybe other occasions to use spin_lock_irqsave(). [yttao]: Do you mean that we need to use spin_lock_irqsave for the functions just like kgd_hiq_mqd_load()? Yes, I strongly think so. See when you have one spin lock you either need always need to lock it with irqs disabled or never. In other words we always need to either use spin_lock() or spin_lock_irqsave(), but never mix them with the same lock. The only exception to this rule is when you take multiple locks, e.g. you can do: spin_lock_irqsave(&a, flags); spin_lock(&b, flags); spin_lock(&c, flags); spin_unlock_irqsave(&a, flags); Here you don't need to use spin_lock_irqsave for b and c. But we rarely have that case in the code. [yttao]: thanks , I got it. I will submit another patch for it. 2. Prevent the overrung of the KIQ. Please drop the approach with the atomic here. Instead just add a amdgpu_fence_wait_polling() into amdgpu_fence_emit_polling() as I discussed with Monk. [yttao]: Sorry, I can't get your original idea for the amdgpu_fence_wait_polling(). Can you give more details about it? Thanks in advance. "That is actually only a problem because the KIQ uses polling waits. See amdgpu_fence_emit() waits for the oldest possible fence to be signaled before emitting a new one. I suggest that we do the same in amdgpu_fence_emit_polling(). A one liner like the following should be enough: amdgpu_fence_wait_polling(ring, seq - ring->fence_drv.num_fences_mask, timeout);" [yttao]: there is no usage of num_fences_mask at kiq fence polling, the num_fences_mask is only effective at dma_fence architecture. If I understand correctly, do you want the protype code below? If the protype code is wrong, can you help give one sample? Thanks in advance. int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s) { uint32_t seq; if (!s) return -EINVAL; + amdgpu_fence_wait_polling(ring, seq, timeout); seq = ++ring->fence_drv.sync_seq; Your understanding sounds more or less correct. The code should look something like this: seq = ++ring->fence_drv.sync_seq; amdgpu_fence_wait_polling(ring, seq - number_of_allowed_submissions_to_the_kiq, timeout); [yttao]: whether we need directly wait at the first just like below? Otherwise, amdgpu_ring_emit_wreg may overwrite the KIQ ring buffer. There should always be room for at least one more submission. As long as we always submit a fence checking the free room there should be fine. Regards, Christian. + amdgpu_fence_wait_polling(ring, seq - number_of_allowed_submissions_to_the_kiq, timeout); spin_lock_irqsave(&kiq->ring_lock, flags); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_wreg(ring, reg, v); amdgpu_fence_emit_polling(ring, &seq); /* wait */ amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); I just used num_fences_mask as number_of_allowed_submissions_to_the_kiq because it is probably a good value to start with. But you could give that as parameter as well if you think that makes more sense. amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_ad
RE: [PATCH] drm/amdgpu: refine kiq access register
Hi Christian Please see inline commetns. -Original Message- From: Koenig, Christian Sent: 2020年4月22日 19:57 To: Tao, Yintian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: refine kiq access register Am 22.04.20 um 13:49 schrieb Tao, Yintian: > Hi Christian > > > Can you help answer the questions below? Thanks in advance. > -Original Message- > From: Koenig, Christian > Sent: 2020年4月22日 19:03 > To: Tao, Yintian ; Liu, Monk ; > Kuehling, Felix > Cc: amd-gfx@lists.freedesktop.org > Subject: Re: [PATCH] drm/amdgpu: refine kiq access register > > Am 22.04.20 um 11:29 schrieb Yintian Tao: >> According to the current kiq access register method, there will be >> race condition when using KIQ to read register if multiple clients >> want to read at same time just like the expample below: >> 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the >> seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll >> the seqno-1 5. the kiq complete these two read operation 6. client-A >> to read the register at the wb buffer and >> get REG-1 value >> >> And if there are multiple clients to frequently write registers >> through KIQ which may raise the KIQ ring buffer overwritten problem. >> >> Therefore, allocate fixed number wb slot for rreg use and limit the >> submit number which depends on the kiq ring_size in order to prevent >> the overwritten problem. >> >> v2: directly use amdgpu_device_wb_get() for each read instead >> of to reserve fixde number slot. >> if there is no enough kiq ring buffer or rreg slot then >> directly print error log and return instead of busy waiting > I would split that into three patches. One for each problem we have here: > > 1. Fix kgd_hiq_mqd_load() and maybe other occasions to use > spin_lock_irqsave(). > [yttao]: Do you mean that we need to use spin_lock_irqsave for the functions > just like kgd_hiq_mqd_load()? Yes, I strongly think so. See when you have one spin lock you either need always need to lock it with irqs disabled or never. In other words we always need to either use spin_lock() or spin_lock_irqsave(), but never mix them with the same lock. The only exception to this rule is when you take multiple locks, e.g. you can do: spin_lock_irqsave(&a, flags); spin_lock(&b, flags); spin_lock(&c, flags); spin_unlock_irqsave(&a, flags); Here you don't need to use spin_lock_irqsave for b and c. But we rarely have that case in the code. [yttao]: thanks , I got it. I will submit another patch for it. > 2. Prevent the overrung of the KIQ. Please drop the approach with the > atomic here. Instead just add a amdgpu_fence_wait_polling() into > amdgpu_fence_emit_polling() as I discussed with Monk. > [yttao]: Sorry, I can't get your original idea for the > amdgpu_fence_wait_polling(). Can you give more details about it? Thanks in > advance. > > "That is actually only a problem because the KIQ uses polling waits. > > See amdgpu_fence_emit() waits for the oldest possible fence to be signaled > before emitting a new one. > > I suggest that we do the same in amdgpu_fence_emit_polling(). A one liner > like the following should be enough: > > amdgpu_fence_wait_polling(ring, seq - ring->fence_drv.num_fences_mask, > timeout);" > [yttao]: there is no usage of num_fences_mask at kiq fence polling, the > num_fences_mask is only effective at dma_fence architecture. > If I understand correctly, do you want the protype code below? > If the protype code is wrong, can you help give one sample? Thanks in advance. > > int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s) { > uint32_t seq; > > if (!s) > return -EINVAL; > + amdgpu_fence_wait_polling(ring, seq, timeout); > seq = ++ring->fence_drv.sync_seq; Your understanding sounds more or less correct. The code should look something like this: seq = ++ring->fence_drv.sync_seq; amdgpu_fence_wait_polling(ring, seq - number_of_allowed_submissions_to_the_kiq, timeout); [yttao]: whether we need directly wait at the first just like below? Otherwise, amdgpu_ring_emit_wreg may overwrite the KIQ ring buffer. + amdgpu_fence_wait_polling(ring, seq - number_of_allowed_submissions_to_the_kiq, timeout); spin_lock_irqsave(&kiq->ring_lock, flags); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_wreg(ring, reg, v); amdgpu_fence_emit_polling(ring, &seq); /* wait */ amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); I just used num_fences_mask as number_of_allowed_submissions_to_the_kiq because it is probably a good value to start with. But you could give that as parameter as well if you think that makes more sense. > amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, > ¦ seq, 0); > > *s = seq; > >
Re: [PATCH] drm/amdgpu: refine kiq access register
Am 22.04.20 um 13:49 schrieb Tao, Yintian: Hi Christian Can you help answer the questions below? Thanks in advance. -Original Message- From: Koenig, Christian Sent: 2020年4月22日 19:03 To: Tao, Yintian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: refine kiq access register Am 22.04.20 um 11:29 schrieb Yintian Tao: According to the current kiq access register method, there will be race condition when using KIQ to read register if multiple clients want to read at same time just like the expample below: 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll the seqno-1 5. the kiq complete these two read operation 6. client-A to read the register at the wb buffer and get REG-1 value And if there are multiple clients to frequently write registers through KIQ which may raise the KIQ ring buffer overwritten problem. Therefore, allocate fixed number wb slot for rreg use and limit the submit number which depends on the kiq ring_size in order to prevent the overwritten problem. v2: directly use amdgpu_device_wb_get() for each read instead of to reserve fixde number slot. if there is no enough kiq ring buffer or rreg slot then directly print error log and return instead of busy waiting I would split that into three patches. One for each problem we have here: 1. Fix kgd_hiq_mqd_load() and maybe other occasions to use spin_lock_irqsave(). [yttao]: Do you mean that we need to use spin_lock_irqsave for the functions just like kgd_hiq_mqd_load()? Yes, I strongly think so. See when you have one spin lock you either need always need to lock it with irqs disabled or never. In other words we always need to either use spin_lock() or spin_lock_irqsave(), but never mix them with the same lock. The only exception to this rule is when you take multiple locks, e.g. you can do: spin_lock_irqsave(&a, flags); spin_lock(&b, flags); spin_lock(&c, flags); spin_unlock_irqsave(&a, flags); Here you don't need to use spin_lock_irqsave for b and c. But we rarely have that case in the code. 2. Prevent the overrung of the KIQ. Please drop the approach with the atomic here. Instead just add a amdgpu_fence_wait_polling() into amdgpu_fence_emit_polling() as I discussed with Monk. [yttao]: Sorry, I can't get your original idea for the amdgpu_fence_wait_polling(). Can you give more details about it? Thanks in advance. "That is actually only a problem because the KIQ uses polling waits. See amdgpu_fence_emit() waits for the oldest possible fence to be signaled before emitting a new one. I suggest that we do the same in amdgpu_fence_emit_polling(). A one liner like the following should be enough: amdgpu_fence_wait_polling(ring, seq - ring->fence_drv.num_fences_mask, timeout);" [yttao]: there is no usage of num_fences_mask at kiq fence polling, the num_fences_mask is only effective at dma_fence architecture. If I understand correctly, do you want the protype code below? If the protype code is wrong, can you help give one sample? Thanks in advance. int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s) { uint32_t seq; if (!s) return -EINVAL; + amdgpu_fence_wait_polling(ring, seq, timeout); seq = ++ring->fence_drv.sync_seq; Your understanding sounds more or less correct. The code should look something like this: seq = ++ring->fence_drv.sync_seq; amdgpu_fence_wait_polling(ring, seq - number_of_allowed_submissions_to_the_kiq, timeout); I just used num_fences_mask as number_of_allowed_submissions_to_the_kiq because it is probably a good value to start with. But you could give that as parameter as well if you think that makes more sense. amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, ¦ seq, 0); *s = seq; return 0; } 3. Use amdgpu_device_wb_get() each time we need to submit a read. [yttao]: yes, I will do it. Thanks, Christian. Regards, Christian. Signed-off-by: Yintian Tao --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 8 +- .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 13 ++- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 13 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 83 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 +- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 13 ++- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c| 8 +- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 8 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 35 +--- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c| 13 ++- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 13 ++- 12 files changed, 167 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/am
RE: [PATCH] drm/amdgpu: refine kiq access register
Hi Christian Can you help answer the questions below? Thanks in advance. -Original Message- From: Koenig, Christian Sent: 2020年4月22日 19:03 To: Tao, Yintian ; Liu, Monk ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: refine kiq access register Am 22.04.20 um 11:29 schrieb Yintian Tao: > According to the current kiq access register method, there will be > race condition when using KIQ to read register if multiple clients > want to read at same time just like the expample below: > 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the > seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll > the seqno-1 5. the kiq complete these two read operation 6. client-A > to read the register at the wb buffer and > get REG-1 value > > And if there are multiple clients to frequently write registers > through KIQ which may raise the KIQ ring buffer overwritten problem. > > Therefore, allocate fixed number wb slot for rreg use and limit the > submit number which depends on the kiq ring_size in order to prevent > the overwritten problem. > > v2: directly use amdgpu_device_wb_get() for each read instead > of to reserve fixde number slot. > if there is no enough kiq ring buffer or rreg slot then > directly print error log and return instead of busy waiting I would split that into three patches. One for each problem we have here: 1. Fix kgd_hiq_mqd_load() and maybe other occasions to use spin_lock_irqsave(). [yttao]: Do you mean that we need to use spin_lock_irqsave for the functions just like kgd_hiq_mqd_load()? 2. Prevent the overrung of the KIQ. Please drop the approach with the atomic here. Instead just add a amdgpu_fence_wait_polling() into amdgpu_fence_emit_polling() as I discussed with Monk. [yttao]: Sorry, I can't get your original idea for the amdgpu_fence_wait_polling(). Can you give more details about it? Thanks in advance. "That is actually only a problem because the KIQ uses polling waits. See amdgpu_fence_emit() waits for the oldest possible fence to be signaled before emitting a new one. I suggest that we do the same in amdgpu_fence_emit_polling(). A one liner like the following should be enough: amdgpu_fence_wait_polling(ring, seq - ring->fence_drv.num_fences_mask, timeout);" [yttao]: there is no usage of num_fences_mask at kiq fence polling, the num_fences_mask is only effective at dma_fence architecture. If I understand correctly, do you want the protype code below? If the protype code is wrong, can you help give one sample? Thanks in advance. int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s) { uint32_t seq; if (!s) return -EINVAL; + amdgpu_fence_wait_polling(ring, seq, timeout); seq = ++ring->fence_drv.sync_seq; amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, ¦ seq, 0); *s = seq; return 0; } 3. Use amdgpu_device_wb_get() each time we need to submit a read. [yttao]: yes, I will do it. Regards, Christian. > > Signed-off-by: Yintian Tao > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 8 +- > .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 13 ++- > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 13 ++- > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 83 +++ > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 3 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 13 ++- > drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c| 8 +- > drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 8 +- > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 35 +--- > drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c| 13 ++- > drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 13 ++- > 12 files changed, 167 insertions(+), 48 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index 4e1d4cfe7a9f..1157c1a0b888 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -526,7 +526,7 @@ static inline void amdgpu_set_ib_value(struct > amdgpu_cs_parser *p, > /* >* Writeback >*/ > -#define AMDGPU_MAX_WB 128/* Reserve at most 128 WB slots for > amdgpu-owned rings. */ > +#define AMDGPU_MAX_WB 256/* Reserve at most 256 WB slots for > amdgpu-owned rings. */ > > struct amdgpu_wb { > struct amdgpu_bo*wb_obj; > @@ -1028,6 +1028,12 @@ bool amdgpu_device_has_dc_support(struct > amdgpu_device *adev); > > int emu_soc_asic_init(struct amdgpu_device *adev); > > +int amdgpu_gfx_kiq_lock(struct amdgpu_kiq *kiq, bool read, > + unsigned long *flags); > +void amdgpu_gfx_kiq_unlock(struct amdgpu_kiq *kiq, unsigned long > +*flags); > + > +void amdgpu_gfx_kiq_consume(struct amdgpu_kiq *kiq, uint32_t *offs); > +void amdgpu_g
Re: [PATCH] drm/amdgpu: refine kiq access register
Am 22.04.20 um 11:29 schrieb Yintian Tao: According to the current kiq access register method, there will be race condition when using KIQ to read register if multiple clients want to read at same time just like the expample below: 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll the seqno-1 5. the kiq complete these two read operation 6. client-A to read the register at the wb buffer and get REG-1 value And if there are multiple clients to frequently write registers through KIQ which may raise the KIQ ring buffer overwritten problem. Therefore, allocate fixed number wb slot for rreg use and limit the submit number which depends on the kiq ring_size in order to prevent the overwritten problem. v2: directly use amdgpu_device_wb_get() for each read instead of to reserve fixde number slot. if there is no enough kiq ring buffer or rreg slot then directly print error log and return instead of busy waiting I would split that into three patches. One for each problem we have here: 1. Fix kgd_hiq_mqd_load() and maybe other occasions to use spin_lock_irqsave(). 2. Prevent the overrung of the KIQ. Please drop the approach with the atomic here. Instead just add a amdgpu_fence_wait_polling() into amdgpu_fence_emit_polling() as I discussed with Monk. 3. Use amdgpu_device_wb_get() each time we need to submit a read. Regards, Christian. Signed-off-by: Yintian Tao --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 8 +- .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 13 ++- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 13 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 83 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 +- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 13 ++- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c| 8 +- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 8 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 35 +--- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c| 13 ++- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 13 ++- 12 files changed, 167 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 4e1d4cfe7a9f..1157c1a0b888 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -526,7 +526,7 @@ static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p, /* * Writeback */ -#define AMDGPU_MAX_WB 128 /* Reserve at most 128 WB slots for amdgpu-owned rings. */ +#define AMDGPU_MAX_WB 256 /* Reserve at most 256 WB slots for amdgpu-owned rings. */ struct amdgpu_wb { struct amdgpu_bo*wb_obj; @@ -1028,6 +1028,12 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev); int emu_soc_asic_init(struct amdgpu_device *adev); +int amdgpu_gfx_kiq_lock(struct amdgpu_kiq *kiq, bool read, + unsigned long *flags); +void amdgpu_gfx_kiq_unlock(struct amdgpu_kiq *kiq, unsigned long *flags); + +void amdgpu_gfx_kiq_consume(struct amdgpu_kiq *kiq, uint32_t *offs); +void amdgpu_gfx_kiq_restore(struct amdgpu_kiq *kiq, uint32_t *offs); /* * Registers read & write functions. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 691c89705bcd..a65d6a1abc04 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -309,9 +309,11 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, uint32_t doorbell_off) { struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; struct v10_compute_mqd *m; uint32_t mec, pipe; + unsigned long flags = 0; int r; m = get_mqd(mqd); @@ -324,13 +326,19 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", mec, pipe, queue_id); - spin_lock(&adev->gfx.kiq.ring_lock); + r = amdgpu_gfx_kiq_lock(kiq, false, &flags); + if (r) { + pr_err("failed to lock kiq\n"); + goto out_unlock; + } + r = amdgpu_ring_alloc(kiq_ring, 7); if (r) { pr_err("Failed to alloc KIQ (%d).\n", r); goto out_unlock; } + amdgpu_gfx_kiq_consume(kiq, NULL); amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ @@ -350,8 +358,9 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi); am
[PATCH] drm/amdgpu: refine kiq access register
According to the current kiq access register method, there will be race condition when using KIQ to read register if multiple clients want to read at same time just like the expample below: 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll the seqno-1 5. the kiq complete these two read operation 6. client-A to read the register at the wb buffer and get REG-1 value And if there are multiple clients to frequently write registers through KIQ which may raise the KIQ ring buffer overwritten problem. Therefore, allocate fixed number wb slot for rreg use and limit the submit number which depends on the kiq ring_size in order to prevent the overwritten problem. v2: directly use amdgpu_device_wb_get() for each read instead of to reserve fixde number slot. if there is no enough kiq ring buffer or rreg slot then directly print error log and return instead of busy waiting Signed-off-by: Yintian Tao --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 8 +- .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 13 ++- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 13 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 83 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 +- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 13 ++- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c| 8 +- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 8 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 35 +--- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c| 13 ++- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 13 ++- 12 files changed, 167 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 4e1d4cfe7a9f..1157c1a0b888 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -526,7 +526,7 @@ static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p, /* * Writeback */ -#define AMDGPU_MAX_WB 128 /* Reserve at most 128 WB slots for amdgpu-owned rings. */ +#define AMDGPU_MAX_WB 256 /* Reserve at most 256 WB slots for amdgpu-owned rings. */ struct amdgpu_wb { struct amdgpu_bo*wb_obj; @@ -1028,6 +1028,12 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev); int emu_soc_asic_init(struct amdgpu_device *adev); +int amdgpu_gfx_kiq_lock(struct amdgpu_kiq *kiq, bool read, + unsigned long *flags); +void amdgpu_gfx_kiq_unlock(struct amdgpu_kiq *kiq, unsigned long *flags); + +void amdgpu_gfx_kiq_consume(struct amdgpu_kiq *kiq, uint32_t *offs); +void amdgpu_gfx_kiq_restore(struct amdgpu_kiq *kiq, uint32_t *offs); /* * Registers read & write functions. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 691c89705bcd..a65d6a1abc04 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -309,9 +309,11 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, uint32_t doorbell_off) { struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; struct v10_compute_mqd *m; uint32_t mec, pipe; + unsigned long flags = 0; int r; m = get_mqd(mqd); @@ -324,13 +326,19 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", mec, pipe, queue_id); - spin_lock(&adev->gfx.kiq.ring_lock); + r = amdgpu_gfx_kiq_lock(kiq, false, &flags); + if (r) { + pr_err("failed to lock kiq\n"); + goto out_unlock; + } + r = amdgpu_ring_alloc(kiq_ring, 7); if (r) { pr_err("Failed to alloc KIQ (%d).\n", r); goto out_unlock; } + amdgpu_gfx_kiq_consume(kiq, NULL); amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ @@ -350,8 +358,9 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi); amdgpu_ring_commit(kiq_ring); + amdgpu_gfx_kiq_restore(kiq, NULL); out_unlock: - spin_unlock(&adev->gfx.kiq.ring_lock); + amdgpu_gfx_kiq_unlock(&adev->gfx.kiq, &flags); release_queue(kgd); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index df841c2ac5e7..4435bd716edd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@
RE: [PATCH] drm/amdgpu: refine kiq access register
Hi Christian Please see inline comments. -Original Message- From: Koenig, Christian Sent: 2020年4月22日 16:23 To: Tao, Yintian ; Liu, Monk ; Liu, Shaoyun ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: refine kiq access register Am 22.04.20 um 10:06 schrieb Tao, Yintian: > Hi Christian > > Please see inline comments > > -Original Message- > From: Koenig, Christian > Sent: 2020年4月22日 15:54 > To: Tao, Yintian ; Liu, Monk ; > Liu, Shaoyun ; Kuehling, Felix > > Cc: amd-gfx@lists.freedesktop.org > Subject: Re: [PATCH] drm/amdgpu: refine kiq access register > > Am 22.04.20 um 09:49 schrieb Tao, Yintian: >> Hi Christian >> >> >> Please see inline comments. >> -Original Message- >> From: Christian König >> Sent: 2020年4月22日 15:40 >> To: Tao, Yintian ; Koenig, Christian >> ; Liu, Monk ; Liu, >> Shaoyun ; Kuehling, Felix >> >> Cc: amd-gfx@lists.freedesktop.org >> Subject: Re: [PATCH] drm/amdgpu: refine kiq access register >> >> Am 22.04.20 um 09:35 schrieb Tao, Yintian: >>> Hi Christian >>> >>> BUG_ON(in_interrupt()); >>> That won't work like this. The KIQ is also used in interrupt context in the >>> driver, that's why we used spin_lock_irqsave(). >>> [yttao]: According to the current drm-next code, I have not find where to >>> access register through KIQ. >>> And you need to wait for the free kiq ring buffer space if >>> there is no free kiq ring buffer, here, you wait at interrupt context is >>> illegal. >> Waiting in atomic context is illegal as well, but we don't have much other >> choice. >> [yttao]: no, there is no sleep in atomic context at my patch. > I'm not talking about a sleeping, but busy waiting. > >> We just need to make sure that waiting never happens by making the buffers >> large enough and if it still happens print and error. >> [yttao]: this is not the good choice because KMD need to protect it instead >> of hoping user not frequently invoke KIQ acess. > The only other choice we have is busy waiting, e.g. loop until we get a free > slot. > [yttao]: Yes, now may patch use msleep() to busy waiting. Or you means need > to use udelay()? If we use udelay(), it will be the nightmare under multi-VF. > Because it is assumed that there are 16VF within world-switch > 6ms, the bad situation is that one VF will udelay(16*6ms = 96ms) to get one > free slot. You can't use msleep() here since sleeping in atomic or interrupt context is forbidden. The trick is that in atomic context the CPU can't switch to a different process, so we have a very limited number of concurrent KIQ reads which can happen. With a MAX_WB of 256 we can easily have 128 CPUs and don't run into problems. [yttao]: fine, this is a good idea. But it seems current drm-next code, KIQ access still use msleep to wait the fence which is not correct according to your comments. I think we need submit another patch to add one more condition "in_atomic()" to prevent it but this function cannot know about held spinlocks in non-preemptible kernels. Regards, Christian. > > > Regards, > Christian. > >>> And I would either say that we should use the trick with the NOP to reserve >>> space on the ring buffer or call amdgpu_device_wb_get() for each read. >>> amdgpu_device_wb_get() also uses find_first_zero_bit() and should work >>> equally well. >>> [yttao]: sorry, can you give me more details about how to use NOP to >>> reserve space? I will use amdgpu_device_wb_get() for the read operation. >> We could use the NOP PM4 command as Felix suggested, this command has >> a >> header+length and says that the next X dw should be ignore on the >> header+ring >> buffer. >> >> But I think using amdgpu_device_wb_get() is better anyway. >> [yttao]: yes, I agreed with amdgpu_device_wb_get() method because it >> will fix prevent potential read race condition but NOP method will >> not prevent it >> >> Regards, >> Christian. >> >>> >>> -Original Message- >>> From: Koenig, Christian >>> Sent: 2020年4月22日 15:23 >>> To: Tao, Yintian ; Liu, Monk >>> ; Liu, Shaoyun ; Kuehling, >>> Felix >>> Cc: amd-gfx@lists.freedesktop.org >>> Subject: Re: [PATCH] drm/amdgpu: refine kiq access register >>> BUG_ON(in_interrupt()); >>> That won't work like this. The KIQ is also used in interrupt context in the >>> driver, that's why we used spin_lock_irqsave(). >>> >>> And I would either say that we should use the trick with the NOP to reserve >>> space on the ring buffer or call amdgpu_device_wb_get() for each read. >>> amdgpu_device_wb_get() also uses find_first_zero_bit() and should work >>> equally well. >>> >>> You also don't need to worry to much about overflowing the wb area. >>> Since we run in an atomic context we can have at most the number of CPU in >>> the system + interrupt context here. >>> >>> Regards, >>> Christian. >>> >>> Am 22.04.20 um 09:11 schrieb Tao, Yintian: Add Felix and Shaoyun -
Re: [PATCH] drm/amdgpu: refine kiq access register
Am 22.04.20 um 10:06 schrieb Tao, Yintian: Hi Christian Please see inline comments -Original Message- From: Koenig, Christian Sent: 2020年4月22日 15:54 To: Tao, Yintian ; Liu, Monk ; Liu, Shaoyun ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: refine kiq access register Am 22.04.20 um 09:49 schrieb Tao, Yintian: Hi Christian Please see inline comments. -Original Message- From: Christian König Sent: 2020年4月22日 15:40 To: Tao, Yintian ; Koenig, Christian ; Liu, Monk ; Liu, Shaoyun ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: refine kiq access register Am 22.04.20 um 09:35 schrieb Tao, Yintian: Hi Christian BUG_ON(in_interrupt()); That won't work like this. The KIQ is also used in interrupt context in the driver, that's why we used spin_lock_irqsave(). [yttao]: According to the current drm-next code, I have not find where to access register through KIQ. And you need to wait for the free kiq ring buffer space if there is no free kiq ring buffer, here, you wait at interrupt context is illegal. Waiting in atomic context is illegal as well, but we don't have much other choice. [yttao]: no, there is no sleep in atomic context at my patch. I'm not talking about a sleeping, but busy waiting. We just need to make sure that waiting never happens by making the buffers large enough and if it still happens print and error. [yttao]: this is not the good choice because KMD need to protect it instead of hoping user not frequently invoke KIQ acess. The only other choice we have is busy waiting, e.g. loop until we get a free slot. [yttao]: Yes, now may patch use msleep() to busy waiting. Or you means need to use udelay()? If we use udelay(), it will be the nightmare under multi-VF. Because it is assumed that there are 16VF within world-switch 6ms, the bad situation is that one VF will udelay(16*6ms = 96ms) to get one free slot. You can't use msleep() here since sleeping in atomic or interrupt context is forbidden. The trick is that in atomic context the CPU can't switch to a different process, so we have a very limited number of concurrent KIQ reads which can happen. With a MAX_WB of 256 we can easily have 128 CPUs and don't run into problems. Regards, Christian. Regards, Christian. And I would either say that we should use the trick with the NOP to reserve space on the ring buffer or call amdgpu_device_wb_get() for each read. amdgpu_device_wb_get() also uses find_first_zero_bit() and should work equally well. [yttao]: sorry, can you give me more details about how to use NOP to reserve space? I will use amdgpu_device_wb_get() for the read operation. We could use the NOP PM4 command as Felix suggested, this command has a header+length and says that the next X dw should be ignore on the ring buffer. But I think using amdgpu_device_wb_get() is better anyway. [yttao]: yes, I agreed with amdgpu_device_wb_get() method because it will fix prevent potential read race condition but NOP method will not prevent it Regards, Christian. -Original Message- From: Koenig, Christian Sent: 2020年4月22日 15:23 To: Tao, Yintian ; Liu, Monk ; Liu, Shaoyun ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: refine kiq access register BUG_ON(in_interrupt()); That won't work like this. The KIQ is also used in interrupt context in the driver, that's why we used spin_lock_irqsave(). And I would either say that we should use the trick with the NOP to reserve space on the ring buffer or call amdgpu_device_wb_get() for each read. amdgpu_device_wb_get() also uses find_first_zero_bit() and should work equally well. You also don't need to worry to much about overflowing the wb area. Since we run in an atomic context we can have at most the number of CPU in the system + interrupt context here. Regards, Christian. Am 22.04.20 um 09:11 schrieb Tao, Yintian: Add Felix and Shaoyun -Original Message- From: Yintian Tao Sent: 2020年4月22日 12:42 To: Koenig, Christian ; Liu, Monk Cc: amd-gfx@lists.freedesktop.org; Tao, Yintian Subject: [PATCH] drm/amdgpu: refine kiq access register According to the current kiq access register method, there will be race condition when using KIQ to read register if multiple clients want to read at same time just like the expample below: 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll the seqno-1 5. the kiq complete these two read operation 6. client-A to read the register at the wb buffer and get REG-1 value And if there are multiple clients to frequently write registers through KIQ which may raise the KIQ ring buffer overwritten problem. Therefore, allocate fixed number wb slot for rreg use and limit the submit number which depends on the kiq ring_size in order to prevent the overwritten p
RE: [PATCH] drm/amdgpu: refine kiq access register
Hi Christian Please see inline comments -Original Message- From: Koenig, Christian Sent: 2020年4月22日 15:54 To: Tao, Yintian ; Liu, Monk ; Liu, Shaoyun ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: refine kiq access register Am 22.04.20 um 09:49 schrieb Tao, Yintian: > Hi Christian > > > Please see inline comments. > -Original Message- > From: Christian König > Sent: 2020年4月22日 15:40 > To: Tao, Yintian ; Koenig, Christian > ; Liu, Monk ; Liu, Shaoyun > ; Kuehling, Felix > Cc: amd-gfx@lists.freedesktop.org > Subject: Re: [PATCH] drm/amdgpu: refine kiq access register > > Am 22.04.20 um 09:35 schrieb Tao, Yintian: >> Hi Christian >> >> >>> BUG_ON(in_interrupt()); >> That won't work like this. The KIQ is also used in interrupt context in the >> driver, that's why we used spin_lock_irqsave(). >> [yttao]: According to the current drm-next code, I have not find where to >> access register through KIQ. >> And you need to wait for the free kiq ring buffer space if >> there is no free kiq ring buffer, here, you wait at interrupt context is >> illegal. > Waiting in atomic context is illegal as well, but we don't have much other > choice. > [yttao]: no, there is no sleep in atomic context at my patch. I'm not talking about a sleeping, but busy waiting. > We just need to make sure that waiting never happens by making the buffers > large enough and if it still happens print and error. > [yttao]: this is not the good choice because KMD need to protect it instead > of hoping user not frequently invoke KIQ acess. The only other choice we have is busy waiting, e.g. loop until we get a free slot. [yttao]: Yes, now may patch use msleep() to busy waiting. Or you means need to use udelay()? If we use udelay(), it will be the nightmare under multi-VF. Because it is assumed that there are 16VF within world-switch 6ms, the bad situation is that one VF will udelay(16*6ms = 96ms) to get one free slot. Regards, Christian. > >> And I would either say that we should use the trick with the NOP to reserve >> space on the ring buffer or call amdgpu_device_wb_get() for each read. >> amdgpu_device_wb_get() also uses find_first_zero_bit() and should work >> equally well. >> [yttao]: sorry, can you give me more details about how to use NOP to reserve >> space? I will use amdgpu_device_wb_get() for the read operation. > We could use the NOP PM4 command as Felix suggested, this command has > a > header+length and says that the next X dw should be ignore on the ring > buffer. > > But I think using amdgpu_device_wb_get() is better anyway. > [yttao]: yes, I agreed with amdgpu_device_wb_get() method because it > will fix prevent potential read race condition but NOP method will not > prevent it > > Regards, > Christian. > >> >> >> -Original Message- >> From: Koenig, Christian >> Sent: 2020年4月22日 15:23 >> To: Tao, Yintian ; Liu, Monk ; >> Liu, Shaoyun ; Kuehling, Felix >> >> Cc: amd-gfx@lists.freedesktop.org >> Subject: Re: [PATCH] drm/amdgpu: refine kiq access register >> >>> BUG_ON(in_interrupt()); >> That won't work like this. The KIQ is also used in interrupt context in the >> driver, that's why we used spin_lock_irqsave(). >> >> And I would either say that we should use the trick with the NOP to reserve >> space on the ring buffer or call amdgpu_device_wb_get() for each read. >> amdgpu_device_wb_get() also uses find_first_zero_bit() and should work >> equally well. >> >> You also don't need to worry to much about overflowing the wb area. >> Since we run in an atomic context we can have at most the number of CPU in >> the system + interrupt context here. >> >> Regards, >> Christian. >> >> Am 22.04.20 um 09:11 schrieb Tao, Yintian: >>> Add Felix and Shaoyun >>> >>> -Original Message- >>> From: Yintian Tao >>> Sent: 2020年4月22日 12:42 >>> To: Koenig, Christian ; Liu, Monk >>> >>> Cc: amd-gfx@lists.freedesktop.org; Tao, Yintian >>> >>> Subject: [PATCH] drm/amdgpu: refine kiq access register >>> >>> According to the current kiq access register method, there will be race >>> condition when using KIQ to read register if multiple clients want to read >>> at same time just like the expample below: >>> 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the seqno-0 3. >>> client-B start to read REG-1 through KIQ 4. client-B poll the seqno-1 5. >>> the kiq complete these two read operation 6. client-A to read the register >>> at the wb buffer and >>> get REG-1 value >>> >>> And if there are multiple clients to frequently write registers through KIQ >>> which may raise the KIQ ring buffer overwritten problem. >>> >>> Therefore, allocate fixed number wb slot for rreg use and limit the submit >>> number which depends on the kiq ring_size in order to prevent the >>> overwritten problem. >>> >>> Signed-off-by: Yintian Tao >>> --- >>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 +-
Re: [PATCH] drm/amdgpu: refine kiq access register
Am 22.04.20 um 09:49 schrieb Tao, Yintian: Hi Christian Please see inline comments. -Original Message- From: Christian König Sent: 2020年4月22日 15:40 To: Tao, Yintian ; Koenig, Christian ; Liu, Monk ; Liu, Shaoyun ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: refine kiq access register Am 22.04.20 um 09:35 schrieb Tao, Yintian: Hi Christian BUG_ON(in_interrupt()); That won't work like this. The KIQ is also used in interrupt context in the driver, that's why we used spin_lock_irqsave(). [yttao]: According to the current drm-next code, I have not find where to access register through KIQ. And you need to wait for the free kiq ring buffer space if there is no free kiq ring buffer, here, you wait at interrupt context is illegal. Waiting in atomic context is illegal as well, but we don't have much other choice. [yttao]: no, there is no sleep in atomic context at my patch. I'm not talking about a sleeping, but busy waiting. We just need to make sure that waiting never happens by making the buffers large enough and if it still happens print and error. [yttao]: this is not the good choice because KMD need to protect it instead of hoping user not frequently invoke KIQ acess. The only other choice we have is busy waiting, e.g. loop until we get a free slot. Regards, Christian. And I would either say that we should use the trick with the NOP to reserve space on the ring buffer or call amdgpu_device_wb_get() for each read. amdgpu_device_wb_get() also uses find_first_zero_bit() and should work equally well. [yttao]: sorry, can you give me more details about how to use NOP to reserve space? I will use amdgpu_device_wb_get() for the read operation. We could use the NOP PM4 command as Felix suggested, this command has a header+length and says that the next X dw should be ignore on the ring buffer. But I think using amdgpu_device_wb_get() is better anyway. [yttao]: yes, I agreed with amdgpu_device_wb_get() method because it will fix prevent potential read race condition but NOP method will not prevent it Regards, Christian. -Original Message- From: Koenig, Christian Sent: 2020年4月22日 15:23 To: Tao, Yintian ; Liu, Monk ; Liu, Shaoyun ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: refine kiq access register BUG_ON(in_interrupt()); That won't work like this. The KIQ is also used in interrupt context in the driver, that's why we used spin_lock_irqsave(). And I would either say that we should use the trick with the NOP to reserve space on the ring buffer or call amdgpu_device_wb_get() for each read. amdgpu_device_wb_get() also uses find_first_zero_bit() and should work equally well. You also don't need to worry to much about overflowing the wb area. Since we run in an atomic context we can have at most the number of CPU in the system + interrupt context here. Regards, Christian. Am 22.04.20 um 09:11 schrieb Tao, Yintian: Add Felix and Shaoyun -Original Message- From: Yintian Tao Sent: 2020年4月22日 12:42 To: Koenig, Christian ; Liu, Monk Cc: amd-gfx@lists.freedesktop.org; Tao, Yintian Subject: [PATCH] drm/amdgpu: refine kiq access register According to the current kiq access register method, there will be race condition when using KIQ to read register if multiple clients want to read at same time just like the expample below: 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll the seqno-1 5. the kiq complete these two read operation 6. client-A to read the register at the wb buffer and get REG-1 value And if there are multiple clients to frequently write registers through KIQ which may raise the KIQ ring buffer overwritten problem. Therefore, allocate fixed number wb slot for rreg use and limit the submit number which depends on the kiq ring_size in order to prevent the overwritten problem. Signed-off-by: Yintian Tao --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 +- .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 12 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 12 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 129 -- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 13 +- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c| 8 +- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 8 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 34 +++-- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c| 12 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 12 +- 12 files changed, 211 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 4e1d4cfe7a9f..4530e0de4257 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgp
RE: [PATCH] drm/amdgpu: refine kiq access register
Hi Christian Please see inline comments. -Original Message- From: Christian König Sent: 2020年4月22日 15:40 To: Tao, Yintian ; Koenig, Christian ; Liu, Monk ; Liu, Shaoyun ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: refine kiq access register Am 22.04.20 um 09:35 schrieb Tao, Yintian: > Hi Christian > > >> BUG_ON(in_interrupt()); > That won't work like this. The KIQ is also used in interrupt context in the > driver, that's why we used spin_lock_irqsave(). > [yttao]: According to the current drm-next code, I have not find where to > access register through KIQ. > And you need to wait for the free kiq ring buffer space if > there is no free kiq ring buffer, here, you wait at interrupt context is > illegal. Waiting in atomic context is illegal as well, but we don't have much other choice. [yttao]: no, there is no sleep in atomic context at my patch. We just need to make sure that waiting never happens by making the buffers large enough and if it still happens print and error. [yttao]: this is not the good choice because KMD need to protect it instead of hoping user not frequently invoke KIQ acess. > And I would either say that we should use the trick with the NOP to reserve > space on the ring buffer or call amdgpu_device_wb_get() for each read. > amdgpu_device_wb_get() also uses find_first_zero_bit() and should work > equally well. > [yttao]: sorry, can you give me more details about how to use NOP to reserve > space? I will use amdgpu_device_wb_get() for the read operation. We could use the NOP PM4 command as Felix suggested, this command has a header+length and says that the next X dw should be ignore on the ring buffer. But I think using amdgpu_device_wb_get() is better anyway. [yttao]: yes, I agreed with amdgpu_device_wb_get() method because it will fix prevent potential read race condition but NOP method will not prevent it Regards, Christian. > > > > -Original Message- > From: Koenig, Christian > Sent: 2020年4月22日 15:23 > To: Tao, Yintian ; Liu, Monk ; Liu, > Shaoyun ; Kuehling, Felix > Cc: amd-gfx@lists.freedesktop.org > Subject: Re: [PATCH] drm/amdgpu: refine kiq access register > >> BUG_ON(in_interrupt()); > That won't work like this. The KIQ is also used in interrupt context in the > driver, that's why we used spin_lock_irqsave(). > > And I would either say that we should use the trick with the NOP to reserve > space on the ring buffer or call amdgpu_device_wb_get() for each read. > amdgpu_device_wb_get() also uses find_first_zero_bit() and should work > equally well. > > You also don't need to worry to much about overflowing the wb area. > Since we run in an atomic context we can have at most the number of CPU in > the system + interrupt context here. > > Regards, > Christian. > > Am 22.04.20 um 09:11 schrieb Tao, Yintian: >> Add Felix and Shaoyun >> >> -Original Message- >> From: Yintian Tao >> Sent: 2020年4月22日 12:42 >> To: Koenig, Christian ; Liu, Monk >> >> Cc: amd-gfx@lists.freedesktop.org; Tao, Yintian >> Subject: [PATCH] drm/amdgpu: refine kiq access register >> >> According to the current kiq access register method, there will be race >> condition when using KIQ to read register if multiple clients want to read >> at same time just like the expample below: >> 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the seqno-0 3. >> client-B start to read REG-1 through KIQ 4. client-B poll the seqno-1 5. the >> kiq complete these two read operation 6. client-A to read the register at >> the wb buffer and >> get REG-1 value >> >> And if there are multiple clients to frequently write registers through KIQ >> which may raise the KIQ ring buffer overwritten problem. >> >> Therefore, allocate fixed number wb slot for rreg use and limit the submit >> number which depends on the kiq ring_size in order to prevent the >> overwritten problem. >> >> Signed-off-by: Yintian Tao >> --- >>drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 +- >>.../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 12 +- >>.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 12 +- >>drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 129 -- >>drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 6 +- >>drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 6 +- >>drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 13 +- >>drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c| 8 +- >>drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 8 +- >>drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 34 +++-- >>drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c| 12 +- >>drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 12 +- >>12 files changed, 211 insertions(+), 48 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> index 4e1d4cfe7a9f..4530e0de4257 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> +++ b/driv
Re: [PATCH] drm/amdgpu: refine kiq access register
Am 22.04.20 um 09:35 schrieb Tao, Yintian: Hi Christian BUG_ON(in_interrupt()); That won't work like this. The KIQ is also used in interrupt context in the driver, that's why we used spin_lock_irqsave(). [yttao]: According to the current drm-next code, I have not find where to access register through KIQ. And you need to wait for the free kiq ring buffer space if there is no free kiq ring buffer, here, you wait at interrupt context is illegal. Waiting in atomic context is illegal as well, but we don't have much other choice. We just need to make sure that waiting never happens by making the buffers large enough and if it still happens print and error. And I would either say that we should use the trick with the NOP to reserve space on the ring buffer or call amdgpu_device_wb_get() for each read. amdgpu_device_wb_get() also uses find_first_zero_bit() and should work equally well. [yttao]: sorry, can you give me more details about how to use NOP to reserve space? I will use amdgpu_device_wb_get() for the read operation. We could use the NOP PM4 command as Felix suggested, this command has a header+length and says that the next X dw should be ignore on the ring buffer. But I think using amdgpu_device_wb_get() is better anyway. Regards, Christian. -Original Message- From: Koenig, Christian Sent: 2020年4月22日 15:23 To: Tao, Yintian ; Liu, Monk ; Liu, Shaoyun ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: refine kiq access register BUG_ON(in_interrupt()); That won't work like this. The KIQ is also used in interrupt context in the driver, that's why we used spin_lock_irqsave(). And I would either say that we should use the trick with the NOP to reserve space on the ring buffer or call amdgpu_device_wb_get() for each read. amdgpu_device_wb_get() also uses find_first_zero_bit() and should work equally well. You also don't need to worry to much about overflowing the wb area. Since we run in an atomic context we can have at most the number of CPU in the system + interrupt context here. Regards, Christian. Am 22.04.20 um 09:11 schrieb Tao, Yintian: Add Felix and Shaoyun -Original Message- From: Yintian Tao Sent: 2020年4月22日 12:42 To: Koenig, Christian ; Liu, Monk Cc: amd-gfx@lists.freedesktop.org; Tao, Yintian Subject: [PATCH] drm/amdgpu: refine kiq access register According to the current kiq access register method, there will be race condition when using KIQ to read register if multiple clients want to read at same time just like the expample below: 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll the seqno-1 5. the kiq complete these two read operation 6. client-A to read the register at the wb buffer and get REG-1 value And if there are multiple clients to frequently write registers through KIQ which may raise the KIQ ring buffer overwritten problem. Therefore, allocate fixed number wb slot for rreg use and limit the submit number which depends on the kiq ring_size in order to prevent the overwritten problem. Signed-off-by: Yintian Tao --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 +- .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 12 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 12 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 129 -- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 13 +- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c| 8 +- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 8 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 34 +++-- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c| 12 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 12 +- 12 files changed, 211 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 4e1d4cfe7a9f..4530e0de4257 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -526,7 +526,7 @@ static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p, /* * Writeback */ -#define AMDGPU_MAX_WB 128 /* Reserve at most 128 WB slots for amdgpu-owned rings. */ +#define AMDGPU_MAX_WB 256 /* Reserve at most 256 WB slots for amdgpu-owned rings. */ struct amdgpu_wb { struct amdgpu_bo*wb_obj; @@ -1028,6 +1028,11 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev); int emu_soc_asic_init(struct amdgpu_device *adev); +int amdgpu_gfx_kiq_lock(struct amdgpu_kiq *kiq, bool read); void +amdgpu_gfx_kiq_unlock(struct amdgpu_kiq *kiq); + +void amdgpu_gfx_kiq_consume(struct amdgpu_kiq *kiq, uint32_t *offs); +void amdgpu_gfx_kiq_restore(struct amdgpu_kiq *kiq, uint32_t *offs); /* * Registers read & w
RE: [PATCH] drm/amdgpu: refine kiq access register
Hi Christian > BUG_ON(in_interrupt()); That won't work like this. The KIQ is also used in interrupt context in the driver, that's why we used spin_lock_irqsave(). [yttao]: According to the current drm-next code, I have not find where to access register through KIQ. And you need to wait for the free kiq ring buffer space if there is no free kiq ring buffer, here, you wait at interrupt context is illegal. And I would either say that we should use the trick with the NOP to reserve space on the ring buffer or call amdgpu_device_wb_get() for each read. amdgpu_device_wb_get() also uses find_first_zero_bit() and should work equally well. [yttao]: sorry, can you give me more details about how to use NOP to reserve space? I will use amdgpu_device_wb_get() for the read operation. -Original Message- From: Koenig, Christian Sent: 2020年4月22日 15:23 To: Tao, Yintian ; Liu, Monk ; Liu, Shaoyun ; Kuehling, Felix Cc: amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: refine kiq access register > BUG_ON(in_interrupt()); That won't work like this. The KIQ is also used in interrupt context in the driver, that's why we used spin_lock_irqsave(). And I would either say that we should use the trick with the NOP to reserve space on the ring buffer or call amdgpu_device_wb_get() for each read. amdgpu_device_wb_get() also uses find_first_zero_bit() and should work equally well. You also don't need to worry to much about overflowing the wb area. Since we run in an atomic context we can have at most the number of CPU in the system + interrupt context here. Regards, Christian. Am 22.04.20 um 09:11 schrieb Tao, Yintian: > Add Felix and Shaoyun > > -Original Message- > From: Yintian Tao > Sent: 2020年4月22日 12:42 > To: Koenig, Christian ; Liu, Monk > > Cc: amd-gfx@lists.freedesktop.org; Tao, Yintian > Subject: [PATCH] drm/amdgpu: refine kiq access register > > According to the current kiq access register method, there will be race > condition when using KIQ to read register if multiple clients want to read at > same time just like the expample below: > 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the seqno-0 3. > client-B start to read REG-1 through KIQ 4. client-B poll the seqno-1 5. the > kiq complete these two read operation 6. client-A to read the register at the > wb buffer and > get REG-1 value > > And if there are multiple clients to frequently write registers through KIQ > which may raise the KIQ ring buffer overwritten problem. > > Therefore, allocate fixed number wb slot for rreg use and limit the submit > number which depends on the kiq ring_size in order to prevent the overwritten > problem. > > Signed-off-by: Yintian Tao > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 +- > .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 12 +- > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 12 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 129 -- > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 6 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 6 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 13 +- > drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c| 8 +- > drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 8 +- > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 34 +++-- > drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c| 12 +- > drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 12 +- > 12 files changed, 211 insertions(+), 48 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index 4e1d4cfe7a9f..4530e0de4257 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -526,7 +526,7 @@ static inline void amdgpu_set_ib_value(struct > amdgpu_cs_parser *p, > /* >* Writeback >*/ > -#define AMDGPU_MAX_WB 128/* Reserve at most 128 WB slots for > amdgpu-owned rings. */ > +#define AMDGPU_MAX_WB 256/* Reserve at most 256 WB slots for > amdgpu-owned rings. */ > > struct amdgpu_wb { > struct amdgpu_bo*wb_obj; > @@ -1028,6 +1028,11 @@ bool amdgpu_device_has_dc_support(struct > amdgpu_device *adev); > > int emu_soc_asic_init(struct amdgpu_device *adev); > > +int amdgpu_gfx_kiq_lock(struct amdgpu_kiq *kiq, bool read); void > +amdgpu_gfx_kiq_unlock(struct amdgpu_kiq *kiq); > + > +void amdgpu_gfx_kiq_consume(struct amdgpu_kiq *kiq, uint32_t *offs); > +void amdgpu_gfx_kiq_restore(struct amdgpu_kiq *kiq, uint32_t *offs); > /* >* Registers read & write functions. >*/ > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c > index 691c89705bcd..034c9f416499 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c > @@ -309,6 +309,7 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void > *mqd, >
Re: [PATCH] amdgpu: fixes memleak issue when init failed
Am 22.04.20 um 02:56 schrieb 赵军奎: 发件人:"Christian König" 发送日期:2020-04-21 22:53:47 收件人:"赵军奎" 抄送人:Alex Deucher ,"David (ChunMing) Zhou" ,David Airlie ,Daniel Vetter ,Tom St Denis ,Ori Messinger ,Sam Ravnborg ,amd-gfx@lists.freedesktop.org,dri-de...@lists.freedesktop.org,linux-ker...@vger.kernel.org,opensource.ker...@vivo.com 主题:Re: [PATCH] amdgpu: fixes memleak issue when init failed>Am 21.04.20 um 15:39 schrieb 赵军奎: 发件人:"Christian König" 发送日期:2020-04-21 21:02:27 收件人:"赵军奎" 抄送人:Alex Deucher ,"David (ChunMing) Zhou" ,David Airlie ,Daniel Vetter ,Tom St Denis ,Ori Messinger ,Sam Ravnborg ,amd-gfx@lists.freedesktop.org,dri-de...@lists.freedesktop.org,linux-ker...@vger.kernel.org,opensource.ker...@vivo.com 主题:Re: [PATCH] amdgpu: fixes memleak issue when init failed>Am 21.04.20 um 14:09 schrieb 赵军奎: From: "Christian König" Date: 2020-04-21 19:22:49 To: Bernard Zhao ,Alex Deucher ,"David (ChunMing) Zhou" ,David Airlie ,Daniel Vetter ,Tom St Denis ,Ori Messinger ,Sam Ravnborg ,amd-gfx@lists.freedesktop.org,dri-de...@lists.freedesktop.org,linux-ker...@vger.kernel.org Cc: opensource.ker...@vivo.com Subject: Re: [PATCH] amdgpu: fixes memleak issue when init failed>Am 21.04.20 um 13:17 schrieb Bernard Zhao: VRAM manager and DRM MM when init failed, there is no operaction to free kzalloc memory & remove device file. This will lead to memleak & cause stability issue. NAK, failure to create sysfs nodes are not critical. Christian. OK, get it. By the way, should i modify this patch to just handle in error branch, or that it is also unnecessary? What you can do is to drop the "return ret" if anything with the sysfs nodes goes wrong and instead print the error code. Emmm, for this part, i am not sure, my modify first print the error, secone release not free memory, and last return error, make everything clear to the system. I think it`s the same with what you mentioned, is there something that I misunderstood? Yes, maybe an example makes it more clear what to do here. Currently we print and error and return when something with the sysfs files goes wrong: if (ret) { DRM_ERROR("Failed to create device file mem_info_vram_total\n"); return ret; } But what we should do instead is just to print an error and continue and in the end return success status: if (ret) DRM_ERROR("Failed to create device file mem_info_vram_total (%d)\n", r); ... return 0; Regards, Christian. Emmm, i am still confused about two points: 1 Does that mean there is no failed case in this function? Well the kzalloc can still fail. 2 There is no need to free the kzmalloc space(no possibility of memory leak )? Correct, yes. Regards, Christian. Regards, Bernard It's really annoying that loading, unloading and loading the driver again sometimes fails because we have a bug in the sysfs files cleanup. We certainly should fix those bugs as well, but they are just not critical for correct driver functionality. Regards, Christian. Regards, Bernard Signed-off-by: Bernard Zhao --- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 24 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 82a3299e53c0..4c5fb153e6b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -175,30 +175,44 @@ static int amdgpu_vram_mgr_init(struct ttm_mem_type_manager *man, ret = device_create_file(adev->dev, &dev_attr_mem_info_vram_total); if (ret) { DRM_ERROR("Failed to create device file mem_info_vram_total\n"); - return ret; + goto VRAM_TOTAL_FAIL; } ret = device_create_file(adev->dev, &dev_attr_mem_info_vis_vram_total); if (ret) { DRM_ERROR("Failed to create device file mem_info_vis_vram_total\n"); - return ret; + goto VIS_VRAM_TOTA_FAIL; } ret = device_create_file(adev->dev, &dev_attr_mem_info_vram_used); if (ret) { DRM_ERROR("Failed to create device file mem_info_vram_used\n"); - return ret; + goto VRAM_USED_FAIL; } ret = device_create_file(adev->dev, &dev_attr_mem_info_vis_vram_used); if (ret) { DRM_ERROR("Failed to create device file mem_info_vis_vram_used\n"); - return ret; + goto VIS_VRAM_USED_FAIL; } ret = device_create_file(adev->dev, &dev_attr_mem_info_vram_vendor); if (ret) { DRM_ERROR("Failed to create device file mem_info_vram_vendor\n"); - return ret; + goto VRAM_VERDOR_FAIL; } return 0; + +VRAM_VERDOR_FAIL: + device_remove_file(adev->dev, &dev_attr_mem_info_vis_vram_used); +VIS_VRAM_USED_FAIL: + device_remove_file(adev->dev, &dev_attr_mem_info_vram_used)
Re: [PATCH] drm/amdgpu: refine kiq access register
BUG_ON(in_interrupt()); That won't work like this. The KIQ is also used in interrupt context in the driver, that's why we used spin_lock_irqsave(). And I would either say that we should use the trick with the NOP to reserve space on the ring buffer or call amdgpu_device_wb_get() for each read. amdgpu_device_wb_get() also uses find_first_zero_bit() and should work equally well. You also don't need to worry to much about overflowing the wb area. Since we run in an atomic context we can have at most the number of CPU in the system + interrupt context here. Regards, Christian. Am 22.04.20 um 09:11 schrieb Tao, Yintian: Add Felix and Shaoyun -Original Message- From: Yintian Tao Sent: 2020年4月22日 12:42 To: Koenig, Christian ; Liu, Monk Cc: amd-gfx@lists.freedesktop.org; Tao, Yintian Subject: [PATCH] drm/amdgpu: refine kiq access register According to the current kiq access register method, there will be race condition when using KIQ to read register if multiple clients want to read at same time just like the expample below: 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll the seqno-1 5. the kiq complete these two read operation 6. client-A to read the register at the wb buffer and get REG-1 value And if there are multiple clients to frequently write registers through KIQ which may raise the KIQ ring buffer overwritten problem. Therefore, allocate fixed number wb slot for rreg use and limit the submit number which depends on the kiq ring_size in order to prevent the overwritten problem. Signed-off-by: Yintian Tao --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 +- .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 12 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 12 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 129 -- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 13 +- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c| 8 +- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 8 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 34 +++-- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c| 12 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 12 +- 12 files changed, 211 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 4e1d4cfe7a9f..4530e0de4257 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -526,7 +526,7 @@ static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p, /* * Writeback */ -#define AMDGPU_MAX_WB 128 /* Reserve at most 128 WB slots for amdgpu-owned rings. */ +#define AMDGPU_MAX_WB 256 /* Reserve at most 256 WB slots for amdgpu-owned rings. */ struct amdgpu_wb { struct amdgpu_bo*wb_obj; @@ -1028,6 +1028,11 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev); int emu_soc_asic_init(struct amdgpu_device *adev); +int amdgpu_gfx_kiq_lock(struct amdgpu_kiq *kiq, bool read); void +amdgpu_gfx_kiq_unlock(struct amdgpu_kiq *kiq); + +void amdgpu_gfx_kiq_consume(struct amdgpu_kiq *kiq, uint32_t *offs); +void amdgpu_gfx_kiq_restore(struct amdgpu_kiq *kiq, uint32_t *offs); /* * Registers read & write functions. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 691c89705bcd..034c9f416499 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -309,6 +309,7 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, uint32_t doorbell_off) { struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; struct v10_compute_mqd *m; uint32_t mec, pipe; @@ -324,13 +325,19 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", mec, pipe, queue_id); - spin_lock(&adev->gfx.kiq.ring_lock); + r = amdgpu_gfx_kiq_lock(kiq, false); + if (r) { + pr_err("failed to lock kiq\n"); + goto out_unlock; + } + r = amdgpu_ring_alloc(kiq_ring, 7); if (r) { pr_err("Failed to alloc KIQ (%d).\n", r); goto out_unlock; } + amdgpu_gfx_kiq_consume(kiq, NULL); amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ @@ -350,8 +357,9 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_
RE: [PATCH] drm/amdgpu: refine kiq access register
Add Felix and Shaoyun -Original Message- From: Yintian Tao Sent: 2020年4月22日 12:42 To: Koenig, Christian ; Liu, Monk Cc: amd-gfx@lists.freedesktop.org; Tao, Yintian Subject: [PATCH] drm/amdgpu: refine kiq access register According to the current kiq access register method, there will be race condition when using KIQ to read register if multiple clients want to read at same time just like the expample below: 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll the seqno-1 5. the kiq complete these two read operation 6. client-A to read the register at the wb buffer and get REG-1 value And if there are multiple clients to frequently write registers through KIQ which may raise the KIQ ring buffer overwritten problem. Therefore, allocate fixed number wb slot for rreg use and limit the submit number which depends on the kiq ring_size in order to prevent the overwritten problem. Signed-off-by: Yintian Tao --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 +- .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 12 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 12 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 129 -- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 13 +- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c| 8 +- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 8 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 34 +++-- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c| 12 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 12 +- 12 files changed, 211 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 4e1d4cfe7a9f..4530e0de4257 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -526,7 +526,7 @@ static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p, /* * Writeback */ -#define AMDGPU_MAX_WB 128 /* Reserve at most 128 WB slots for amdgpu-owned rings. */ +#define AMDGPU_MAX_WB 256 /* Reserve at most 256 WB slots for amdgpu-owned rings. */ struct amdgpu_wb { struct amdgpu_bo*wb_obj; @@ -1028,6 +1028,11 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev); int emu_soc_asic_init(struct amdgpu_device *adev); +int amdgpu_gfx_kiq_lock(struct amdgpu_kiq *kiq, bool read); void +amdgpu_gfx_kiq_unlock(struct amdgpu_kiq *kiq); + +void amdgpu_gfx_kiq_consume(struct amdgpu_kiq *kiq, uint32_t *offs); +void amdgpu_gfx_kiq_restore(struct amdgpu_kiq *kiq, uint32_t *offs); /* * Registers read & write functions. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 691c89705bcd..034c9f416499 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -309,6 +309,7 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, uint32_t doorbell_off) { struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; struct v10_compute_mqd *m; uint32_t mec, pipe; @@ -324,13 +325,19 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", mec, pipe, queue_id); - spin_lock(&adev->gfx.kiq.ring_lock); + r = amdgpu_gfx_kiq_lock(kiq, false); + if (r) { + pr_err("failed to lock kiq\n"); + goto out_unlock; + } + r = amdgpu_ring_alloc(kiq_ring, 7); if (r) { pr_err("Failed to alloc KIQ (%d).\n", r); goto out_unlock; } + amdgpu_gfx_kiq_consume(kiq, NULL); amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ @@ -350,8 +357,9 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi); amdgpu_ring_commit(kiq_ring); + amdgpu_gfx_kiq_restore(kiq, NULL); out_unlock: - spin_unlock(&adev->gfx.kiq.ring_lock); + amdgpu_gfx_kiq_unlock(&adev->gfx.kiq); release_queue(kgd); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index df841c2ac5e7..f243d9990ced 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -307,6 +307,7 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, uint32_t doorbell_off) {