[PATCH] drm/amdgpu: protect ring overrun

2020-04-22 Thread Yintian Tao
Wait for the oldest sequence on the ring
to be signaled in order to make sure there
will be no command overrun.

Signed-off-by: Yintian Tao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c |  7 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   | 17 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  8 +++-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c |  9 -
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c|  8 +++-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  8 +++-
 6 files changed, 51 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 7531527067df..5462ea83d8b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -200,6 +200,13 @@ int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, 
uint32_t *s)
return -EINVAL;
 
seq = ++ring->fence_drv.sync_seq;
+   if ((abs(seq - ring->fence_drv.num_fences_mask) >
+   ring->fence_drv.num_fences_mask) &&
+   (amdgpu_fence_wait_polling(ring,
+  seq - ring->fence_drv.num_fences_mask,
+  MAX_KIQ_REG_WAIT) < 1))
+return -ETIME;
+
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
   seq, 0);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index a721b0e0ff69..7087333681f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -681,7 +681,14 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, 
uint32_t reg)
}
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
-   amdgpu_fence_emit_polling(ring, &seq);
+   r = amdgpu_fence_emit_polling(ring, &seq);
+   if (r) {
+   amdgpu_ring_undo(ring);
+   amdgpu_device_wb_free(adev, reg_val_offs);
+   spin_unlock_irqrestore(&kiq->ring_lock, flags);
+   goto failed_kiq_read;
+   }
+
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
 
@@ -730,7 +737,13 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t 
reg, uint32_t v)
spin_lock_irqsave(&kiq->ring_lock, flags);
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_wreg(ring, reg, v);
-   amdgpu_fence_emit_polling(ring, &seq);
+   r = amdgpu_fence_emit_polling(ring, &seq);
+   if (r) {
+   amdgpu_ring_undo(ring);
+   spin_unlock_irqrestore(&kiq->ring_lock, flags);
+   goto failed_kiq_write;
+   }
+
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 8c10084f44ef..12d181ac7e78 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -60,7 +60,13 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device 
*adev,
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
ref, mask);
-   amdgpu_fence_emit_polling(ring, &seq);
+   r = amdgpu_fence_emit_polling(ring, &seq);
+   if (r) {
+   amdgpu_ring_undo(ring);
+   spin_unlock_irqrestore(&kiq->ring_lock, flags);
+   goto failed_kiq;
+   }
+
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 5b1549f167b0..650b7a67d3bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4068,7 +4068,14 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct 
amdgpu_device *adev)
reg_val_offs * 4));
amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
reg_val_offs * 4));
-   amdgpu_fence_emit_polling(ring, &seq);
+   r = amdgpu_fence_emit_polling(ring, &seq);
+   if (r) {
+   amdgpu_ring_undo(ring);
+   amdgpu_device_wb_free(adev, reg_val_offs);
+   spin_unlock_irqrestore(&kiq->ring_lock, flags);
+   goto failed_kiq_read;
+   }
+
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 30b75d79efdb..71430f2a2374 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -427,7 +427,13 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct 
amdgpu_device *adev,
amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
kiq->pmf->kiq_invalidate_tlbs(ring,

RE: [PATCH] drm/amdgpu: protect kiq overrun

2020-04-22 Thread Tao, Yintian
Hi  Christian


Ok , I got it. The real max number can be submitted to kiq ring buffer is 1024.
If we use the num_fneces_mask value then the max submission number will be 
reduced to 512, do you think whether it is ok?


Best Regards
Yintian Tao
-Original Message-
From: Koenig, Christian  
Sent: 2020年4月23日 2:43
To: Tao, Yintian ; Liu, Monk ; Kuehling, 
Felix 
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: protect kiq overrun

Am 22.04.20 um 16:50 schrieb Yintian Tao:
> Wait for the oldest sequence on the kiq ring to be signaled in order 
> to make sure there will be no kiq overrun.
>
> v2: remove unused the variable and correct
>  kiq max_sub_num value

First of all this should probably be added to the fence handling code and not 
the kiq code.

Then you are kind of duplicating some of the functionality we have in the ring 
handling here. Probably better to avoid this, see
amdgpu_fence_driver_init_ring() as well. That's also why I suggested to use the 
num_fences_mask value.

Regards,
Christian.

>
> Signed-off-by: Yintian Tao 
> ---
>   .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c|  6 
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  6 
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   | 30 +++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h   |  3 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  6 
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c |  6 
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c|  7 +
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  7 +
>   8 files changed, 71 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> index 691c89705bcd..fac8b9713dfc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> @@ -325,6 +325,12 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void 
> *mqd,
>mec, pipe, queue_id);
>   
>   spin_lock(&adev->gfx.kiq.ring_lock);
> + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
> + if (r) {
> + pr_err("critical bug! too many kiq submission\n");
> + goto out_unlock;
> + }
> +
>   r = amdgpu_ring_alloc(kiq_ring, 7);
>   if (r) {
>   pr_err("Failed to alloc KIQ (%d).\n", r); diff --git 
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index df841c2ac5e7..fd42c126510f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -323,6 +323,12 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void 
> *mqd,
>mec, pipe, queue_id);
>   
>   spin_lock(&adev->gfx.kiq.ring_lock);
> + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
> + if (r) {
> + pr_err("critical bug! too many kiq submissions\n");
> + goto out_unlock;
> + }
> +
>   r = amdgpu_ring_alloc(kiq_ring, 7);
>   if (r) {
>   pr_err("Failed to alloc KIQ (%d).\n", r); diff --git 
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index a721b0e0ff69..84e66c45df37 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -321,6 +321,9 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
>AMDGPU_RING_PRIO_DEFAULT);
>   if (r)
>   dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
> + else
> + kiq->max_sub_num = (ring->ring_size / 4) /
> + (ring->funcs->align_mask + 1);
>   
>   return r;
>   }
> @@ -663,6 +666,21 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device 
> *adev,
>   return 0;
>   }
>   
> +int amdgpu_gfx_kiq_is_avail(struct amdgpu_kiq *kiq) {
> + uint32_t seq = 0;
> + signed long r = 0;
> +
> + seq = abs(kiq->ring.fence_drv.sync_seq - kiq->max_sub_num);
> + if (seq > kiq->max_sub_num) {
> + r = amdgpu_fence_wait_polling(&kiq->ring, seq,
> +   MAX_KIQ_REG_WAIT);
> + return r < 1 ? -ETIME : 0;
> + }
> +
> + return 0;
> +}
> +
>   uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
>   {
>   signed long r, cnt = 0;
> @@ -674,6 +692,12 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, 
> uint32_t reg)
>   BUG_ON(!ring->funcs->emit_rreg);
>   
>   spin_lock_irqsave(&kiq->ring_lock, flags);
> + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
> + if (r) {
> + spin_unlock_irqrestore(&kiq->ring_lock, flags);
> + goto failed_kiq_read;
> + }
> +
>   if (amdgpu_device_wb_get(adev, ®_val_offs)) {
>   spin_unlock_irqrestore(&kiq->ring_lock, flags);
>   pr_err("critical bug! too many kiq readers\n"); @@ -728,6 
> +752,12 
>

[PATCH v2] drm/amdkfd: Track GPU memory utilization per process

2020-04-22 Thread Mukul Joshi
Track GPU VRAM usage on a per process basis and report it through
sysfs.

v2: 
   - Handle AMDGPU BO-specific details in 
 amdgpu_amdkfd_gpuvm_free_memory_of_gpu().
   - Return size of VRAM BO being freed from 
 amdgpu_amdkfd_gpuvm_free_memory_of_gpu().
   - Do not consider imported memory for VRAM
 usage calculations.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  3 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  9 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 17 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  7 +++
 drivers/gpu/drm/amd/amdkfd/kfd_process.c  | 57 ---
 5 files changed, 81 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index d065c50582eb..a501026e829c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -65,6 +65,7 @@ struct kgd_mem {
struct amdgpu_sync sync;
 
bool aql_queue;
+   bool is_imported;
 };
 
 /* KFD Memory Eviction */
@@ -219,7 +220,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
void *vm, struct kgd_mem **mem,
uint64_t *offset, uint32_t flags);
 int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
-   struct kgd_dev *kgd, struct kgd_mem *mem);
+   struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size);
 int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
 int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 0768b7eb7683..fe6615a06cd0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1277,7 +1277,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 }
 
 int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
-   struct kgd_dev *kgd, struct kgd_mem *mem)
+   struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size)
 {
struct amdkfd_process_info *process_info = mem->process_info;
unsigned long bo_size = mem->bo->tbo.mem.size;
@@ -1340,6 +1340,12 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
kfree(mem->bo->tbo.sg);
}
 
+   /* Update the size of the BO being freed if it was allocated from
+* VRAM
+*/
+   if (size && (mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM))
+   *size = bo_size;
+
/* Free the BO*/
amdgpu_bo_unref(&mem->bo);
mutex_destroy(&mem->lock);
@@ -1694,6 +1700,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
(*mem)->process_info = avm->process_info;
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
amdgpu_sync_create(&(*mem)->sync);
+   (*mem)->is_imported = true;
 
return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index f8fa03a12add..aac2cdb65eb5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1322,6 +1322,10 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file 
*filep,
goto err_free;
}
 
+   /* Update the VRAM usage count */
+   if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
+   pdd->vram_usage += args->size;
+
mutex_unlock(&p->mutex);
 
args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
@@ -1337,7 +1341,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file 
*filep,
return 0;
 
 err_free:
-   amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
+   amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, 
NULL);
 err_unlock:
mutex_unlock(&p->mutex);
return err;
@@ -1351,6 +1355,8 @@ static int kfd_ioctl_free_memory_of_gpu(struct file 
*filep,
void *mem;
struct kfd_dev *dev;
int ret;
+   uint64_t size = 0;
+   bool is_imported = 0;
 
dev = kfd_device_by_id(GET_GPU_ID(args->handle));
if (!dev)
@@ -1372,8 +1378,10 @@ static int kfd_ioctl_free_memory_of_gpu(struct file 
*filep,
goto err_unlock;
}
 
+   is_imported = ((struct kgd_mem *)mem)->is_imported;
+
ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd,
-   (struct kgd_mem *)mem);
+   (struct kgd_mem *)mem, &size);
 
/* If freeing the buffer failed, leave the handle in place for
 * clean-up during process tear-down.
@@ -1382,6 +1390,9 @@ static int kfd_ioctl_free_memory_of_gpu(struct file 
*filep,
kfd_process_device_remove_obj_handle(
pdd, GET_IDR_HANDLE(args->handle));
 
+   if (!is_imported)
+   pdd->vram_usage -= s

[pull] amdgpu drm-fixes-5.7

2020-04-22 Thread Alex Deucher
Hi Dave, Daniel,

Fixes for 5.7.

The following changes since commit 4da858c086433cd012c0bb16b5921f6fafe3f803:

  Merge branch 'linux-5.7' of git://github.com/skeggsb/linux into drm-fixes 
(2020-04-16 15:40:02 +1000)

are available in the Git repository at:

  git://people.freedesktop.org/~agd5f/linux tags/amd-drm-fixes-5.7-2020-04-22

for you to fetch changes up to 7daec99fdcde7b01595134a3d8f385bc1009f1d8:

  drm/amdgpu/display: give aux i2c buses more meaningful names (2020-04-17 
17:31:38 -0400)


amd-drm-fixes-5.7-2020-04-22:

amdgpu:
- Fix resume issue on renoir
- Thermal fix for older CI dGPUs
- Fix some fallout from dropping drm load/unload callbacks


Alex Deucher (2):
  drm/amdgpu/display: fix aux registration (v2)
  drm/amdgpu/display: give aux i2c buses more meaningful names

Prike Liang (1):
  drm/amd/powerplay: fix resume failed as smu table initialize early exit

Sandeep Raghuraman (1):
  drm/amdgpu: Correctly initialize thermal controller for GPUs with 
Powerplay table v0 (e.g Hawaii)

 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c  | 14 ++--
 .../amd/display/amdgpu_dm/amdgpu_dm_mst_types.c| 15 -
 .../amd/display/amdgpu_dm/amdgpu_dm_mst_types.h|  3 ++-
 .../gpu/drm/amd/powerplay/hwmgr/processpptables.c  | 26 ++
 drivers/gpu/drm/amd/powerplay/renoir_ppt.c |  7 +-
 5 files changed, 55 insertions(+), 10 deletions(-)
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 07/19] drm/amd/display: fix rn soc bb update

2020-04-22 Thread Aurabindo Pillai
From: Dmytro Laktyushkin 

Currently RN SOC bounding box update assumes we will get at least
2 clock states from SMU. This isn't always true and because of special
casing on first clock state we end up with low disp, dpp, dsc and phy
clocks.

This change removes the special casing allowing the first state to
acquire correct clocks.

Signed-off-by: Dmytro Laktyushkin 
Reviewed-by: Eric Yang 
Acked-by: Aurabindo Pillai 
Acked-by: Tony Cheng 
---
 .../drm/amd/display/dc/dcn21/dcn21_resource.c | 71 ---
 1 file changed, 28 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
index 78e6259b4ac9..8fcb03e65fdb 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
@@ -1379,64 +1379,49 @@ static void update_bw_bounding_box(struct dc *dc, 
struct clk_bw_params *bw_param
 {
struct dcn21_resource_pool *pool = TO_DCN21_RES_POOL(dc->res_pool);
struct clk_limit_table *clk_table = &bw_params->clk_table;
-   unsigned int i, j, k;
-   int closest_clk_lvl;
+   struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES];
+   unsigned int i, j, closest_clk_lvl;
 
// Default clock levels are used for diags, which may lead to 
overclocking.
-   if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment) && 
!IS_DIAG_DC(dc->ctx->dce_environment)) {
+   if (!IS_DIAG_DC(dc->ctx->dce_environment)) {
dcn2_1_ip.max_num_otg = 
pool->base.res_cap->num_timing_generator;
dcn2_1_ip.max_num_dpp = pool->base.pipe_count;
dcn2_1_soc.num_chans = bw_params->num_channels;
 
-   /* Vmin: leave lowest DCN clocks, override with dcfclk, fclk, 
memclk from fuse */
-   dcn2_1_soc.clock_limits[0].state = 0;
-   dcn2_1_soc.clock_limits[0].dcfclk_mhz = 
clk_table->entries[0].dcfclk_mhz;
-   dcn2_1_soc.clock_limits[0].fabricclk_mhz = 
clk_table->entries[0].fclk_mhz;
-   dcn2_1_soc.clock_limits[0].socclk_mhz = 
clk_table->entries[0].socclk_mhz;
-   dcn2_1_soc.clock_limits[0].dram_speed_mts = 
clk_table->entries[0].memclk_mhz * 2;
-
-   /*
-* Other levels: find closest DCN clocks that fit the given 
clock limit using dcfclk
-* as indicator
-*/
-
-   closest_clk_lvl = -1;
-   /* index currently being filled */
-   k = 1;
-   for (i = 1; i < clk_table->num_entries; i++) {
-   /* loop backwards, skip duplicate state*/
-   for (j = dcn2_1_soc.num_states - 1; j >= k; j--) {
+   ASSERT(clk_table->num_entries);
+   for (i = 0; i < clk_table->num_entries; i++) {
+   /* loop backwards*/
+   for (closest_clk_lvl = 0, j = dcn2_1_soc.num_states - 
1; j >= 0; j--) {
if ((unsigned int) 
dcn2_1_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) {
closest_clk_lvl = j;
break;
}
}
 
-   /* if found a lvl that fits, use the DCN clks from it, 
if not, go to next clk limit*/
-   if (closest_clk_lvl != -1) {
-   dcn2_1_soc.clock_limits[k].state = i;
-   dcn2_1_soc.clock_limits[k].dcfclk_mhz = 
clk_table->entries[i].dcfclk_mhz;
-   dcn2_1_soc.clock_limits[k].fabricclk_mhz = 
clk_table->entries[i].fclk_mhz;
-   dcn2_1_soc.clock_limits[k].socclk_mhz = 
clk_table->entries[i].socclk_mhz;
-   dcn2_1_soc.clock_limits[k].dram_speed_mts = 
clk_table->entries[i].memclk_mhz * 2;
-
-   dcn2_1_soc.clock_limits[k].dispclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
-   dcn2_1_soc.clock_limits[k].dppclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
-   
dcn2_1_soc.clock_limits[k].dram_bw_per_chan_gbps = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
-   dcn2_1_soc.clock_limits[k].dscclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
-   dcn2_1_soc.clock_limits[k].dtbclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
-   dcn2_1_soc.clock_limits[k].phyclk_d18_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
-   dcn2_1_soc.clock_limits[k].phyclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
-   k++;
-   }
+   clock_limit

[PATCH 15/19] drm/amd/display: Add set backlight to hw sequencer.

2020-04-22 Thread Aurabindo Pillai
From: Yongqiang Sun 

[Why & How]
Add set backlight to hw sequencer, dmu communication will
be handled in hw sequencer for new asics.

Signed-off-by: Yongqiang Sun 
Reviewed-by: Anthony Koo 
Acked-by: Aurabindo Pillai 
---
 drivers/gpu/drm/amd/display/dc/core/dc_link.c | 35 ++-
 .../display/dc/dce110/dce110_hw_sequencer.c   | 34 +-
 .../display/dc/dce110/dce110_hw_sequencer.h   |  4 +++
 .../gpu/drm/amd/display/dc/dcn10/dcn10_init.c |  1 +
 .../gpu/drm/amd/display/dc/dcn20/dcn20_init.c |  1 +
 .../gpu/drm/amd/display/dc/dcn21/dcn21_init.c |  1 +
 .../gpu/drm/amd/display/dc/inc/hw_sequencer.h |  4 +++
 7 files changed, 55 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index a54b3e05f66b..67c5342cf89a 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -2509,35 +2509,21 @@ bool dc_link_set_backlight_level(const struct dc_link 
*link,
uint32_t frame_ramp)
 {
struct dc  *dc = link->ctx->dc;
-   struct abm *abm = get_abm_from_stream_res(link);
-   struct dmcu *dmcu = dc->res_pool->dmcu;
-   unsigned int controller_id = 0;
-   bool fw_set_brightness = true;
int i;
-   DC_LOGGER_INIT(link->ctx->logger);
-
-   if (abm == NULL || (abm->funcs->set_backlight_level_pwm == NULL))
-   return false;
-
-   if (dmcu)
-   fw_set_brightness = dmcu->funcs->is_dmcu_initialized(dmcu);
 
+   DC_LOGGER_INIT(link->ctx->logger);
DC_LOG_BACKLIGHT("New Backlight level: %d (0x%X)\n",
backlight_pwm_u16_16, backlight_pwm_u16_16);
 
if (dc_is_embedded_signal(link->connector_signal)) {
+   struct pipe_ctx *pipe_ctx = NULL;
+
for (i = 0; i < MAX_PIPES; i++) {
if (dc->current_state->res_ctx.pipe_ctx[i].stream) {
if (dc->current_state->res_ctx.
pipe_ctx[i].stream->link
== link) {
-   /* DMCU -1 for all controller id values,
-* therefore +1 here
-*/
-   controller_id =
-   dc->current_state->
-   
res_ctx.pipe_ctx[i].stream_res.tg->inst +
-   1;
+   pipe_ctx = 
&dc->current_state->res_ctx.pipe_ctx[i];
 
/* Disable brightness ramping when the 
display is blanked
 * as it can hang the DMCU
@@ -2547,13 +2533,14 @@ bool dc_link_set_backlight_level(const struct dc_link 
*link,
}
}
}
-   abm->funcs->set_backlight_level_pwm(
-   abm,
+
+   if (pipe_ctx == NULL)
+   ASSERT(false);
+
+   dc->hwss.set_backlight_level(
+   pipe_ctx,
backlight_pwm_u16_16,
-   frame_ramp,
-   controller_id,
-   link->panel_cntl->inst,
-   fw_set_brightness);
+   frame_ramp);
}
 
return true;
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c 
b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index 9cd130c8894a..30469026c642 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -2714,6 +2714,37 @@ void dce110_set_cursor_attribute(struct pipe_ctx 
*pipe_ctx)
pipe_ctx->plane_res.xfm, attributes);
 }
 
+bool dce110_set_backlight_level(struct pipe_ctx *pipe_ctx,
+   uint32_t backlight_pwm_u16_16,
+   uint32_t frame_ramp)
+{
+   struct dc_link *link = pipe_ctx->stream->link;
+   struct dc  *dc = link->ctx->dc;
+   struct abm *abm = pipe_ctx->stream_res.abm;
+   struct dmcu *dmcu = dc->res_pool->dmcu;
+   bool fw_set_brightness = true;
+   /* DMCU -1 for all controller id values,
+* therefore +1 here
+*/
+   uint32_t controller_id = pipe_ctx->stream_res.tg->inst + 1;
+
+   if (abm == NULL || (abm->funcs->set_backlight_level_pwm == NULL))
+   return false;
+
+   if (dmcu)
+   fw_set_brightness = dmcu->funcs->is_dmcu_initialized(dmcu);
+
+   abm->funcs->set_backlight_level_pwm(
+   abm,
+   backlight_pwm_u16_16,
+   frame_ramp

[PATCH 09/19] drm/amd/display: Change viewport limit to 12 for DCN2

2020-04-22 Thread Aurabindo Pillai
From: Sung Lee 

[WHY & HOW]
Viewport limit was set to 16 pixels due to an issue with MPO
on small viewports. This restriction does not apply and the
viewport limit can now be lowered.

Signed-off-by: Sung Lee 
Reviewed-by: Dmytro Laktyushkin 
Acked-by: Aurabindo Pillai 
---
 drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 12f5c6881cd0..1a01c038632b 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -1064,8 +1064,8 @@ bool resource_build_scaling_params(struct pipe_ctx 
*pipe_ctx)
 
calculate_viewport(pipe_ctx);
 
-   if (pipe_ctx->plane_res.scl_data.viewport.height < 16 ||
-   pipe_ctx->plane_res.scl_data.viewport.width < 16) {
+   if (pipe_ctx->plane_res.scl_data.viewport.height < 12 ||
+   pipe_ctx->plane_res.scl_data.viewport.width < 12) {
if (store_h_border_left) {
restore_border_left_from_dst(pipe_ctx,
store_h_border_left);
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 17/19] drm/amd/display: Use cursor locking to prevent flip delays

2020-04-22 Thread Aurabindo Pillai
From: Aric Cyr 

[Why]
Current locking scheme for cursor can result in a flip missing
its vsync, deferring it for one or more vsyncs.  Result is a
potential for stuttering when cursor is moved.

[How]
Use cursor update lock so that flips are not blocked while cursor
is being programmed.

Signed-off-by: Aric Cyr 
Reviewed-by: Nicholas Kazlauskas 
Acked-by: Aurabindo Pillai 
---
 .../gpu/drm/amd/display/dc/core/dc_stream.c   | 40 ++-
 .../display/dc/dce110/dce110_hw_sequencer.c   |  1 +
 .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 10 +
 .../amd/display/dc/dcn10/dcn10_hw_sequencer.h |  1 +
 .../gpu/drm/amd/display/dc/dcn10/dcn10_init.c |  1 +
 .../gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c  | 15 +++
 .../gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h  | 20 +++---
 .../drm/amd/display/dc/dcn10/dcn10_resource.c | 14 ++-
 .../gpu/drm/amd/display/dc/dcn20/dcn20_init.c |  1 +
 .../gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c  |  1 +
 .../gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h  |  3 +-
 .../drm/amd/display/dc/dcn20/dcn20_resource.c |  4 ++
 .../gpu/drm/amd/display/dc/dcn21/dcn21_init.c |  1 +
 .../drm/amd/display/dc/dcn21/dcn21_resource.c |  4 ++
 drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h   | 16 
 .../gpu/drm/amd/display/dc/inc/hw_sequencer.h |  1 +
 16 files changed, 88 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index 8c20e9e907b2..4f0e7203dba4 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -231,34 +231,6 @@ struct dc_stream_status *dc_stream_get_status(
return dc_stream_get_status_from_state(dc->current_state, stream);
 }
 
-static void delay_cursor_until_vupdate(struct pipe_ctx *pipe_ctx, struct dc 
*dc)
-{
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-   unsigned int vupdate_line;
-   unsigned int lines_to_vupdate, us_to_vupdate, vpos, nvpos;
-   struct dc_stream_state *stream = pipe_ctx->stream;
-   unsigned int us_per_line;
-
-   if (!dc->hwss.get_vupdate_offset_from_vsync)
-   return;
-
-   vupdate_line = dc->hwss.get_vupdate_offset_from_vsync(pipe_ctx);
-   if (!dc_stream_get_crtc_position(dc, &stream, 1, &vpos, &nvpos))
-   return;
-
-   if (vpos >= vupdate_line)
-   return;
-
-   us_per_line =
-   stream->timing.h_total * 1 / stream->timing.pix_clk_100hz;
-   lines_to_vupdate = vupdate_line - vpos;
-   us_to_vupdate = lines_to_vupdate * us_per_line;
-
-   /* 70 us is a conservative estimate of cursor update time*/
-   if (us_to_vupdate < 70)
-   udelay(us_to_vupdate);
-#endif
-}
 
 /**
  * dc_stream_set_cursor_attributes() - Update cursor attributes and set cursor 
surface address
@@ -298,9 +270,7 @@ bool dc_stream_set_cursor_attributes(
 
if (!pipe_to_program) {
pipe_to_program = pipe_ctx;
-
-   delay_cursor_until_vupdate(pipe_ctx, dc);
-   dc->hwss.pipe_control_lock(dc, pipe_to_program, true);
+   dc->hwss.cursor_lock(dc, pipe_to_program, true);
}
 
dc->hwss.set_cursor_attribute(pipe_ctx);
@@ -309,7 +279,7 @@ bool dc_stream_set_cursor_attributes(
}
 
if (pipe_to_program)
-   dc->hwss.pipe_control_lock(dc, pipe_to_program, false);
+   dc->hwss.cursor_lock(dc, pipe_to_program, false);
 
return true;
 }
@@ -349,16 +319,14 @@ bool dc_stream_set_cursor_position(
 
if (!pipe_to_program) {
pipe_to_program = pipe_ctx;
-
-   delay_cursor_until_vupdate(pipe_ctx, dc);
-   dc->hwss.pipe_control_lock(dc, pipe_to_program, true);
+   dc->hwss.cursor_lock(dc, pipe_to_program, true);
}
 
dc->hwss.set_cursor_position(pipe_ctx);
}
 
if (pipe_to_program)
-   dc->hwss.pipe_control_lock(dc, pipe_to_program, false);
+   dc->hwss.cursor_lock(dc, pipe_to_program, false);
 
return true;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c 
b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index 30469026c642..6bd8d4e1c294 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -2767,6 +2767,7 @@ static const struct hw_sequencer_funcs dce110_funcs = {
.disable_plane = dce110_power_down_fe,
.pipe_control_lock = dce_pipe_control_lock,
.interdependent_update_lock = NULL,
+   .cursor_lock = dce_pipe_control_lock,
.prepare_bandwidth = dce110_prepare_bandwidth,
.optimize_bandwidth = dce110_optimize_bandwidth,
.set_drr = set_drr,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.

[PATCH 19/19] drm/amd/display: 3.2.83.1

2020-04-22 Thread Aurabindo Pillai
From: Aric Cyr 

Update firmware blobs

Signed-off-by: Aric Cyr 
Reviewed-by: Aric Cyr 
Acked-by: Aurabindo Pillai 
---
 drivers/gpu/drm/amd/display/dc/dc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
b/drivers/gpu/drm/amd/display/dc/dc.h
index 8957429c6a24..17075f99bc54 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -42,7 +42,7 @@
 #include "inc/hw/dmcu.h"
 #include "dml/display_mode_lib.h"
 
-#define DC_VER "3.2.83"
+#define DC_VER "3.2.83.1"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 16/19] drm/amd/display: Internal refactoring to abstract color caps

2020-04-22 Thread Aurabindo Pillai
From: Krunoslav Kovac 

[Why&How]
modules/color calculates various colour operations which are translated
to abstracted HW. DCE 5-12 had almost no important changes, but
starting with DCN1, every new generation comes with fairly major
differences in color pipeline.
We would hack it with some DCN checks, but a better approach is to
abstract color pipe capabilities so modules/DM can decide mapping to
HW block based on logical capabilities,

Signed-off-by: Krunoslav Kovac 
Reviewed-by: Aric Cyr 
Acked-by: Anthony Koo 
Acked-by: Aurabindo Pillai 
---
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   |  7 +--
 drivers/gpu/drm/amd/display/dc/dc.h   | 45 ++-
 .../drm/amd/display/dc/dcn10/dcn10_resource.c | 34 ++
 .../drm/amd/display/dc/dcn20/dcn20_resource.c | 35 ++-
 .../drm/amd/display/dc/dcn21/dcn21_resource.c | 35 ++-
 .../amd/display/modules/color/color_gamma.c   | 31 ++---
 .../amd/display/modules/color/color_gamma.h   |  4 +-
 7 files changed, 178 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 838f35668f12..4dfb6b55bb2e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -239,7 +239,8 @@ static int __set_output_tf(struct dc_transfer_func *func,
 * instead to simulate this.
 */
gamma->type = GAMMA_CUSTOM;
-   res = mod_color_calculate_degamma_params(func, gamma, true);
+   res = mod_color_calculate_degamma_params(NULL, func,
+   gamma, true);
} else {
/*
 * Assume sRGB. The actual mapping will depend on whether the
@@ -271,7 +272,7 @@ static int __set_input_tf(struct dc_transfer_func *func,
 
__drm_lut_to_dc_gamma(lut, gamma, false);
 
-   res = mod_color_calculate_degamma_params(func, gamma, true);
+   res = mod_color_calculate_degamma_params(NULL, func, gamma, true);
dc_gamma_release(&gamma);
 
return res ? 0 : -ENOMEM;
@@ -485,7 +486,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state 
*crtc,
dc_plane_state->in_transfer_func->tf = tf;
 
if (tf != TRANSFER_FUNCTION_SRGB &&
-   !mod_color_calculate_degamma_params(
+   !mod_color_calculate_degamma_params(NULL,
dc_plane_state->in_transfer_func, NULL, false))
return -ENOMEM;
} else {
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
b/drivers/gpu/drm/amd/display/dc/dc.h
index 99c8e40049e6..b4aeb5d8a818 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -98,6 +98,49 @@ struct dc_plane_cap {
} max_downscale_factor;
 };
 
+// Color management caps (DPP and MPC)
+struct rom_curve_caps {
+   uint16_t srgb : 1;
+   uint16_t bt2020 : 1;
+   uint16_t gamma2_2 : 1;
+   uint16_t pq : 1;
+   uint16_t hlg : 1;
+};
+
+struct dpp_color_caps {
+   uint16_t dcn_arch : 1; // all DCE generations treated the same
+   // input lut is different than most LUTs, just plain 256-entry lookup
+   uint16_t input_lut_shared : 1; // shared with DGAM
+   uint16_t icsc : 1;
+   uint16_t dgam_ram : 1;
+   uint16_t post_csc : 1; // before gamut remap
+   uint16_t gamma_corr : 1;
+
+   // hdr_mult and gamut remap always available in DPP (in that order)
+   // 3d lut implies shaper LUT,
+   // it may be shared with MPC - check MPC:shared_3d_lut flag
+   uint16_t hw_3d_lut : 1;
+   uint16_t ogam_ram : 1; // blnd gam
+   uint16_t ocsc : 1;
+   struct rom_curve_caps dgam_rom_caps;
+   struct rom_curve_caps ogam_rom_caps;
+};
+
+struct mpc_color_caps {
+   uint16_t gamut_remap : 1;
+   uint16_t ogam_ram : 1;
+   uint16_t ocsc : 1;
+   uint16_t num_3dluts : 3; //3d lut always assumes a preceding shaper LUT
+   uint16_t shared_3d_lut:1; //can be in either DPP or MPC, but single 
instance
+
+   struct rom_curve_caps ogam_rom_caps;
+};
+
+struct dc_color_caps {
+   struct dpp_color_caps dpp;
+   struct mpc_color_caps mpc;
+};
+
 struct dc_caps {
uint32_t max_streams;
uint32_t max_links;
@@ -120,9 +163,9 @@ struct dc_caps {
bool psp_setup_panel_mode;
bool extended_aux_timeout_support;
bool dmcub_support;
-   bool hw_3d_lut;
enum dp_protocol_version max_dp_protocol_version;
struct dc_plane_cap planes[MAX_PLANES];
+   struct dc_color_caps color;
 };
 
 struct dc_bug_wa {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
index 43116749af9f..6d506c37fc71 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ 

[PATCH 13/19] drm/amd/display: Add panel cntl id for set backlight level.

2020-04-22 Thread Aurabindo Pillai
From: Yongqiang Sun 

[Why & How]
Add panel cntl instance when calling set backlight.

Signed-off-by: Yongqiang Sun 
Reviewed-by: Anthony Koo 
Acked-by: Aurabindo Pillai 
Acked-by: Tony Cheng 
---
 drivers/gpu/drm/amd/display/dc/core/dc.c |  4 +++-
 drivers/gpu/drm/amd/display/dc/core/dc_link.c|  3 ++-
 drivers/gpu/drm/amd/display/dc/dce/dce_abm.c | 15 +--
 drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c| 16 ++--
 .../amd/display/dc/dcn10/dcn10_hw_sequencer.c|  9 ++---
 .../gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c   |  9 ++---
 drivers/gpu/drm/amd/display/dc/inc/hw/abm.h  |  5 +++--
 drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h  |  1 +
 8 files changed, 40 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 0f7810571be3..ad817bd74586 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -2210,7 +2210,9 @@ static void commit_planes_do_stream_update(struct dc *dc,
 
if (should_program_abm) {
if (*stream_update->abm_level == 
ABM_LEVEL_IMMEDIATE_DISABLE) {
-   
pipe_ctx->stream_res.abm->funcs->set_abm_immediate_disable(pipe_ctx->stream_res.abm);
+   
pipe_ctx->stream_res.abm->funcs->set_abm_immediate_disable(
+   
pipe_ctx->stream_res.abm,
+   
pipe_ctx->stream->link->panel_cntl->inst);
} else {

pipe_ctx->stream_res.abm->funcs->set_abm_level(

pipe_ctx->stream_res.abm, stream->abm_level);
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index 9c4686edcf3e..a54b3e05f66b 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -2552,6 +2552,7 @@ bool dc_link_set_backlight_level(const struct dc_link 
*link,
backlight_pwm_u16_16,
frame_ramp,
controller_id,
+   link->panel_cntl->inst,
fw_set_brightness);
}
 
@@ -2564,7 +2565,7 @@ bool dc_link_set_abm_disable(const struct dc_link *link)
bool success = false;
 
if (abm)
-   success = abm->funcs->set_abm_immediate_disable(abm);
+   success = abm->funcs->set_abm_immediate_disable(abm, 
link->panel_cntl->inst);
 
return success;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c 
b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
index 4dae9efebb6f..c15e60fb5ebc 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
@@ -55,7 +55,7 @@
 
 #define MCP_DISABLE_ABM_IMMEDIATELY 255
 
-static bool dce_abm_set_pipe(struct abm *abm, uint32_t controller_id)
+static bool dce_abm_set_pipe(struct abm *abm, uint32_t controller_id, uint32_t 
panel_inst)
 {
struct dce_abm *abm_dce = TO_DCE_ABM(abm);
uint32_t rampingBoundary = 0x;
@@ -201,7 +201,8 @@ static void dmcu_set_backlight_level(
struct dce_abm *abm_dce,
uint32_t backlight_pwm_u16_16,
uint32_t frame_ramp,
-   uint32_t controller_id)
+   uint32_t controller_id,
+   uint32_t panel_id)
 {
unsigned int backlight_8_bit = 0;
uint32_t s2;
@@ -213,7 +214,7 @@ static void dmcu_set_backlight_level(
// Take MSB of fractional part since backlight is not max
backlight_8_bit = (backlight_pwm_u16_16 >> 8) & 0xFF;
 
-   dce_abm_set_pipe(&abm_dce->base, controller_id);
+   dce_abm_set_pipe(&abm_dce->base, controller_id, panel_id);
 
/* waitDMCUReadyForCmd */
REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT,
@@ -331,14 +332,14 @@ static bool dce_abm_set_level(struct abm *abm, uint32_t 
level)
return true;
 }
 
-static bool dce_abm_immediate_disable(struct abm *abm)
+static bool dce_abm_immediate_disable(struct abm *abm, uint32_t panel_inst)
 {
struct dce_abm *abm_dce = TO_DCE_ABM(abm);
 
if (abm->dmcu_is_running == false)
return true;
 
-   dce_abm_set_pipe(abm, MCP_DISABLE_ABM_IMMEDIATELY);
+   dce_abm_set_pipe(abm, MCP_DISABLE_ABM_IMMEDIATELY, panel_inst);
 
abm->stored_backlight_registers.BL_PWM_CNTL =
REG_READ(BL_PWM_CNTL);
@@ -420,6 +421,7 @@ static bool dce_abm_set_backlight_level_pwm(
unsigned int backlight_pwm_u16_16,
unsigned int frame_ramp,
unsigned int controller_id,
+   unsigned int p

[PATCH 05/19] drm/amd/display: Add DML variable for future asics

2020-04-22 Thread Aurabindo Pillai
From: Joshua Aberback 

Signed-off-by: Joshua Aberback 
Reviewed-by: Dmytro Laktyushkin 
Reviewed-by: Jun Lei 
Acked-by: Aurabindo Pillai 
---
 drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h | 1 +
 drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c | 1 +
 drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h | 1 +
 3 files changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h 
b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
index 687010c17324..b2ecb174a93f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
@@ -121,6 +121,7 @@ struct _vcs_dpi_soc_bounding_box_st {
 };
 
 struct _vcs_dpi_ip_params_st {
+   bool use_min_dcfclk;
bool gpuvm_enable;
bool hostvm_enable;
unsigned int gpuvm_max_page_table_levels;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c 
b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
index 6b525c52124c..6e4e8a452e66 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
@@ -280,6 +280,7 @@ static void fetch_ip_params(struct display_mode_lib 
*mode_lib)
ip_params_st *ip = &mode_lib->vba.ip;
 
// IP Parameters
+   mode_lib->vba.UseMinimumRequiredDCFCLK = ip->use_min_dcfclk;
mode_lib->vba.MaxNumDPP = ip->max_num_dpp;
mode_lib->vba.MaxNumOTG = ip->max_num_otg;
mode_lib->vba.MaxNumHDMIFRLOutputs = ip->max_num_hdmi_frl_outputs;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h 
b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
index 5d82fc5a7ed7..a1884ffe63ae 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
@@ -898,6 +898,7 @@ struct vba_vars_st {
bool dummystring[DC__NUM_DPP__MAX];
double BPP;
enum odm_combine_policy ODMCombinePolicy;
+   bool UseMinimumRequiredDCFCLK;
 };
 
 bool CalculateMinAndMaxPrefetchMode(
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 10/19] drm/amd/display: Defer cursor update around VUPDATE for all ASIC

2020-04-22 Thread Aurabindo Pillai
From: Nicholas Kazlauskas 

[Why]
Fixes the following scenario:

- Flip has been prepared sometime during the frame, update pending
- Cursor update happens right when VUPDATE would happen
- OPTC lock acquired, VUPDATE is blocked until next frame
- Flip is delayed potentially infinitely

With the igt@kms_cursor_legacy cursor-vs-flip-legacy test we can
observe nearly *13* frames of delay for some flips on Navi.

[How]
Apply the Raven workaround generically. When close enough to VUPDATE
block cursor updates from occurring from the dc_stream_set_cursor_*
helpers.

This could perhaps be a little smarter by checking if there were
pending updates or flips earlier in the frame on the HUBP side before
applying the delay, but this should be fine for now.

This fixes the kms_cursor_legacy test.

Signed-off-by: Nicholas Kazlauskas 
Reviewed-by: Aric Cyr 
Acked-by: Aurabindo Pillai 
---
 .../gpu/drm/amd/display/dc/core/dc_stream.c   | 28 +--
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index 6ddbb00ed37a..8c20e9e907b2 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -239,24 +239,24 @@ static void delay_cursor_until_vupdate(struct pipe_ctx 
*pipe_ctx, struct dc *dc)
struct dc_stream_state *stream = pipe_ctx->stream;
unsigned int us_per_line;
 
-   if (stream->ctx->asic_id.chip_family == FAMILY_RV &&
-   ASICREV_IS_RAVEN(stream->ctx->asic_id.hw_internal_rev)) 
{
+   if (!dc->hwss.get_vupdate_offset_from_vsync)
+   return;
 
-   vupdate_line = dc->hwss.get_vupdate_offset_from_vsync(pipe_ctx);
-   if (!dc_stream_get_crtc_position(dc, &stream, 1, &vpos, &nvpos))
-   return;
+   vupdate_line = dc->hwss.get_vupdate_offset_from_vsync(pipe_ctx);
+   if (!dc_stream_get_crtc_position(dc, &stream, 1, &vpos, &nvpos))
+   return;
 
-   if (vpos >= vupdate_line)
-   return;
+   if (vpos >= vupdate_line)
+   return;
 
-   us_per_line = stream->timing.h_total * 1 / 
stream->timing.pix_clk_100hz;
-   lines_to_vupdate = vupdate_line - vpos;
-   us_to_vupdate = lines_to_vupdate * us_per_line;
+   us_per_line =
+   stream->timing.h_total * 1 / stream->timing.pix_clk_100hz;
+   lines_to_vupdate = vupdate_line - vpos;
+   us_to_vupdate = lines_to_vupdate * us_per_line;
 
-   /* 70 us is a conservative estimate of cursor update time*/
-   if (us_to_vupdate < 70)
-   udelay(us_to_vupdate);
-   }
+   /* 70 us is a conservative estimate of cursor update time*/
+   if (us_to_vupdate < 70)
+   udelay(us_to_vupdate);
 #endif
 }
 
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 14/19] drm/amd/display: Add dummy p-state latency bounding box override

2020-04-22 Thread Aurabindo Pillai
From: Joshua Aberback 

[Why]

For debugging, it can be useful to be able to modify the dummy
p-state latency, this will make it easier to do so.

Signed-off-by: Joshua Aberback 
Reviewed-by: Wesley Chalmers 
Acked-by: Aurabindo Pillai 
---
 drivers/gpu/drm/amd/display/dc/dc.h   | 1 +
 drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 7 +++
 2 files changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
b/drivers/gpu/drm/amd/display/dc/dc.h
index 84e5056521a3..99c8e40049e6 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -478,6 +478,7 @@ struct dc_bounding_box_overrides {
int urgent_latency_ns;
int percent_of_ideal_drambw;
int dram_clock_change_latency_ns;
+   int dummy_clock_change_latency_ns;
/* This forces a hard min on the DCFCLK we use
 * for DML.  Unlike the debug option for forcing
 * DCFCLK, this override affects watermark calculations
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
index 3a8a4c54738a..b7e4d0c2432c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -3467,6 +3467,13 @@ void dcn20_patch_bounding_box(struct dc *dc, struct 
_vcs_dpi_soc_bounding_box_st
bb->dram_clock_change_latency_us =
dc->bb_overrides.dram_clock_change_latency_ns / 
1000.0;
}
+
+   if ((int)(bb->dummy_pstate_latency_us * 1000)
+   != 
dc->bb_overrides.dummy_clock_change_latency_ns
+   && dc->bb_overrides.dummy_clock_change_latency_ns) {
+   bb->dummy_pstate_latency_us =
+   dc->bb_overrides.dummy_clock_change_latency_ns 
/ 1000.0;
+   }
 }
 
 static struct _vcs_dpi_soc_bounding_box_st *get_asic_rev_soc_bb(
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 08/19] drm/amd/display: Fail validation if building scaling params fails

2020-04-22 Thread Aurabindo Pillai
From: Sung Lee 

[WHY & HOW]
If building scaling parameters fails, validation
should also fail.

Signed-off-by: Sung Lee 
Reviewed-by: Dmytro Laktyushkin 
Acked-by: Aurabindo Pillai 
---
 .../drm/amd/display/dc/dcn20/dcn20_resource.c| 16 ++--
 .../drm/amd/display/dc/dcn20/dcn20_resource.h|  2 +-
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
index f41fc322d50a..3a8a4c54738a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -1936,7 +1936,7 @@ bool dcn20_split_stream_for_odm(
return true;
 }
 
-void dcn20_split_stream_for_mpc(
+bool dcn20_split_stream_for_mpc(
struct resource_context *res_ctx,
const struct resource_pool *pool,
struct pipe_ctx *primary_pipe,
@@ -1965,8 +1965,11 @@ void dcn20_split_stream_for_mpc(
secondary_pipe->top_pipe = primary_pipe;
 
ASSERT(primary_pipe->plane_state);
-   resource_build_scaling_params(primary_pipe);
-   resource_build_scaling_params(secondary_pipe);
+   if (!resource_build_scaling_params(primary_pipe) ||
+   !resource_build_scaling_params(secondary_pipe))
+   return false;
+
+   return true;
 }
 
 void dcn20_populate_dml_writeback_from_context(
@@ -2796,9 +2799,10 @@ bool dcn20_fast_validate_bw(
goto validate_fail;
dcn20_build_mapped_resource(dc, 
context, pipe->stream);
} else
-   dcn20_split_stream_for_mpc(
-   &context->res_ctx, dc->res_pool,
-   pipe, hsplit_pipe);
+   if (!dcn20_split_stream_for_mpc(
+   &context->res_ctx, 
dc->res_pool,
+   pipe, hsplit_pipe))
+   goto validate_fail;
pipe_split_from[hsplit_pipe->pipe_idx] = 
pipe_idx;
}
} else if (hsplit_pipe && hsplit_pipe->plane_state == 
pipe->plane_state) {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h 
b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h
index 9d5bff9455fd..578265ccbf5b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h
@@ -129,7 +129,7 @@ void dcn20_release_dsc(struct resource_context *res_ctx,
const struct resource_pool *pool,
struct display_stream_compressor **dsc);
 bool dcn20_validate_dsc(struct dc *dc, struct dc_state *new_ctx);
-void dcn20_split_stream_for_mpc(
+bool dcn20_split_stream_for_mpc(
struct resource_context *res_ctx,
const struct resource_pool *pool,
struct pipe_ctx *primary_pipe,
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 18/19] drm/amd/display: 3.2.83

2020-04-22 Thread Aurabindo Pillai
From: Aric Cyr 

Signed-off-by: Aric Cyr 
Reviewed-by: Aric Cyr 
Acked-by: Aurabindo Pillai 
---
 drivers/gpu/drm/amd/display/dc/dc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
b/drivers/gpu/drm/amd/display/dc/dc.h
index b4aeb5d8a818..8957429c6a24 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -42,7 +42,7 @@
 #include "inc/hw/dmcu.h"
 #include "dml/display_mode_lib.h"
 
-#define DC_VER "3.2.82"
+#define DC_VER "3.2.83"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 12/19] drm/amd/display: Pass command instead of header into DMUB service

2020-04-22 Thread Aurabindo Pillai
From: Nicholas Kazlauskas 

[Why]
We read memory that we shouldn't be touching if the struct isn't
a full union dmub_rb_cmd.

[How]
Fix up all the callers and functions that take in the dmub_cmd_header
to use the dmub_rb_cmd instead.

Signed-off-by: Nicholas Kazlauskas 
Reviewed-by: Tony Cheng 
Acked-by: Aurabindo Pillai 
---
 .../drm/amd/display/dc/bios/command_table2.c  | 62 +--
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c  |  2 +-
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h  |  3 +-
 drivers/gpu/drm/amd/display/dc/dc_helper.c|  6 +-
 drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c | 10 +--
 drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c |  8 +--
 .../gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c | 33 ++
 .../gpu/drm/amd/display/dmub/inc/dmub_rb.h|  6 +-
 .../gpu/drm/amd/display/dmub/inc/dmub_srv.h   |  3 +-
 .../gpu/drm/amd/display/dmub/src/dmub_srv.c   |  2 +-
 10 files changed, 80 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c 
b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
index 8edc2506d49e..bed91572f82a 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
@@ -113,13 +113,19 @@ static void encoder_control_dmcub(
struct dc_dmub_srv *dmcub,
struct dig_encoder_stream_setup_parameters_v1_5 *dig)
 {
-   struct dmub_rb_cmd_digx_encoder_control encoder_control = { 0 };
+   union dmub_rb_cmd cmd;
 
-   encoder_control.header.type = DMUB_CMD__VBIOS;
-   encoder_control.header.sub_type = DMUB_CMD__VBIOS_DIGX_ENCODER_CONTROL;
-   encoder_control.encoder_control.dig.stream_param = *dig;
+   memset(&cmd, 0, sizeof(cmd));
 
-   dc_dmub_srv_cmd_queue(dmcub, &encoder_control.header);
+   cmd.digx_encoder_control.header.type = DMUB_CMD__VBIOS;
+   cmd.digx_encoder_control.header.sub_type =
+   DMUB_CMD__VBIOS_DIGX_ENCODER_CONTROL;
+   cmd.digx_encoder_control.header.payload_bytes =
+   sizeof(cmd.digx_encoder_control) -
+   sizeof(cmd.digx_encoder_control.header);
+   cmd.digx_encoder_control.encoder_control.dig.stream_param = *dig;
+
+   dc_dmub_srv_cmd_queue(dmcub, &cmd);
dc_dmub_srv_cmd_execute(dmcub);
dc_dmub_srv_wait_idle(dmcub);
 }
@@ -238,14 +244,19 @@ static void transmitter_control_dmcub(
struct dc_dmub_srv *dmcub,
struct dig_transmitter_control_parameters_v1_6 *dig)
 {
-   struct dmub_rb_cmd_dig1_transmitter_control transmitter_control;
+   union dmub_rb_cmd cmd;
+
+   memset(&cmd, 0, sizeof(cmd));
 
-   transmitter_control.header.type = DMUB_CMD__VBIOS;
-   transmitter_control.header.sub_type =
+   cmd.dig1_transmitter_control.header.type = DMUB_CMD__VBIOS;
+   cmd.dig1_transmitter_control.header.sub_type =
DMUB_CMD__VBIOS_DIG1_TRANSMITTER_CONTROL;
-   transmitter_control.transmitter_control.dig = *dig;
+   cmd.dig1_transmitter_control.header.payload_bytes =
+   sizeof(cmd.dig1_transmitter_control) -
+   sizeof(cmd.dig1_transmitter_control.header);
+   cmd.dig1_transmitter_control.transmitter_control.dig = *dig;
 
-   dc_dmub_srv_cmd_queue(dmcub, &transmitter_control.header);
+   dc_dmub_srv_cmd_queue(dmcub, &cmd);
dc_dmub_srv_cmd_execute(dmcub);
dc_dmub_srv_wait_idle(dmcub);
 }
@@ -339,13 +350,18 @@ static void set_pixel_clock_dmcub(
struct dc_dmub_srv *dmcub,
struct set_pixel_clock_parameter_v1_7 *clk)
 {
-   struct dmub_rb_cmd_set_pixel_clock pixel_clock = { 0 };
+   union dmub_rb_cmd cmd;
 
-   pixel_clock.header.type = DMUB_CMD__VBIOS;
-   pixel_clock.header.sub_type = DMUB_CMD__VBIOS_SET_PIXEL_CLOCK;
-   pixel_clock.pixel_clock.clk = *clk;
+   memset(&cmd, 0, sizeof(cmd));
 
-   dc_dmub_srv_cmd_queue(dmcub, &pixel_clock.header);
+   cmd.set_pixel_clock.header.type = DMUB_CMD__VBIOS;
+   cmd.set_pixel_clock.header.sub_type = DMUB_CMD__VBIOS_SET_PIXEL_CLOCK;
+   cmd.set_pixel_clock.header.payload_bytes =
+   sizeof(cmd.set_pixel_clock) -
+   sizeof(cmd.set_pixel_clock.header);
+   cmd.set_pixel_clock.pixel_clock.clk = *clk;
+
+   dc_dmub_srv_cmd_queue(dmcub, &cmd);
dc_dmub_srv_cmd_execute(dmcub);
dc_dmub_srv_wait_idle(dmcub);
 }
@@ -705,13 +721,19 @@ static void enable_disp_power_gating_dmcub(
struct dc_dmub_srv *dmcub,
struct enable_disp_power_gating_parameters_v2_1 *pwr)
 {
-   struct dmub_rb_cmd_enable_disp_power_gating power_gating;
+   union dmub_rb_cmd cmd;
+
+   memset(&cmd, 0, sizeof(cmd));
 
-   power_gating.header.type = DMUB_CMD__VBIOS;
-   power_gating.header.sub_type = DMUB_CMD__VBIOS_ENABLE_DISP_POWER_GATING;
-   power_gating.power_gating.pwr = *pwr;
+   cmd.enable_disp_power_gating.header.type = DMUB_CMD__V

[PATCH 11/19] drm/amd/display: Update downspread percent to match spreadsheet for DCN2.1

2020-04-22 Thread Aurabindo Pillai
From: Sung Lee 

[WHY]
The downspread percentage was copied over from a previous version
of the display_mode_lib spreadsheet. This value has been updated,
and the previous value is too high to allow for such modes as
4K120hz. The new value is sufficient for such modes.

[HOW]
Update the value in dcn21_resource to match the spreadsheet.

Signed-off-by: Sung Lee 
Reviewed-by: Yongqiang Sun 
Acked-by: Aurabindo Pillai 
---
 drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
index 8fcb03e65fdb..802372f09dc7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
@@ -286,7 +286,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = {
.dram_channel_width_bytes = 4,
.fabric_datapath_to_dcn_data_return_bytes = 32,
.dcn_downspread_percent = 0.5,
-   .downspread_percent = 0.5,
+   .downspread_percent = 0.38,
.dram_page_open_time_ns = 50.0,
.dram_rw_turnaround_time_ns = 17.5,
.dram_return_buffer_per_channel_bytes = 8192,
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 01/19] drm/amd/display: 3.2.82

2020-04-22 Thread Aurabindo Pillai
From: Aric Cyr 

Signed-off-by: Aric Cyr 
Reviewed-by: Aric Cyr 
Acked-by: Aurabindo Pillai 
---
 drivers/gpu/drm/amd/display/dc/dc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
b/drivers/gpu/drm/amd/display/dc/dc.h
index 5432ca1657b1..84e5056521a3 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -42,7 +42,7 @@
 #include "inc/hw/dmcu.h"
 #include "dml/display_mode_lib.h"
 
-#define DC_VER "3.2.81"
+#define DC_VER "3.2.82"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 02/19] drm/amd/display: Do not disable pipe split if mode is not supported

2020-04-22 Thread Aurabindo Pillai
From: Sung Lee 

[WHY]
If mode is not supported, pipe split should not be disabled.
This may cause more modes to fail.

[HOW]
Check for mode support before disabling pipe split.

This commit was previously reverted as it was thought to
have problems, but those issues have been resolved.

Signed-off-by: Sung Lee 
Reviewed-by: Yongqiang Sun 
Acked-by: Aurabindo Pillai 
---
 drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
index 63044ae06327..f41fc322d50a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -2623,19 +2623,24 @@ int dcn20_validate_apply_pipe_split_flags(
 
/* Avoid split loop looks for lowest voltage level that allows most 
unsplit pipes possible */
if (avoid_split) {
+   int max_mpc_comb = context->bw_ctx.dml.vba.maxMpcComb;
+
for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
if (!context->res_ctx.pipe_ctx[i].stream)
continue;
 
for (vlevel_split = vlevel; vlevel <= 
context->bw_ctx.dml.soc.num_states; vlevel++)
-   if 
(context->bw_ctx.dml.vba.NoOfDPP[vlevel][0][pipe_idx] == 1)
+   if 
(context->bw_ctx.dml.vba.NoOfDPP[vlevel][0][pipe_idx] == 1 &&
+   
context->bw_ctx.dml.vba.ModeSupport[vlevel][0])
break;
/* Impossible to not split this pipe */
if (vlevel > context->bw_ctx.dml.soc.num_states)
vlevel = vlevel_split;
+   else
+   max_mpc_comb = 0;
pipe_idx++;
}
-   context->bw_ctx.dml.vba.maxMpcComb = 0;
+   context->bw_ctx.dml.vba.maxMpcComb = max_mpc_comb;
}
 
/* Split loop sets which pipe should be split based on dml outputs and 
dc flags */
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 03/19] drm/amd/display: Fix DMUB meta offset for new load method

2020-04-22 Thread Aurabindo Pillai
From: Nicholas Kazlauskas 

[Why]
The new metadata offset is located at the end of the firmware binary
without any additional padding.

Firmware state is currently larger than 1024 bytes so new firmware state
will hang when trying to access any data above 1024 bytes.

[How]
Specify the correct offset based on legacy vs new loading method.

Signed-off-by: Nicholas Kazlauskas 
Reviewed-by: Yongqiang Sun 
Acked-by: Aurabindo Pillai 
---
 drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c 
b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
index 0a1a851741c5..a6e403227872 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
@@ -96,25 +96,27 @@ dmub_get_fw_meta_info(const struct dmub_srv_region_params 
*params)
const union dmub_fw_meta *meta;
const uint8_t *blob = NULL;
uint32_t blob_size = 0;
+   uint32_t meta_offset = 0;
 
if (params->fw_bss_data) {
/* Legacy metadata region. */
blob = params->fw_bss_data;
blob_size = params->bss_data_size;
+   meta_offset = DMUB_FW_META_OFFSET;
} else if (params->fw_inst_const) {
/* Combined metadata region. */
blob = params->fw_inst_const;
blob_size = params->inst_const_size;
+   meta_offset = 0;
}
 
if (!blob || !blob_size)
return NULL;
 
-   if (blob_size < sizeof(union dmub_fw_meta) + DMUB_FW_META_OFFSET)
+   if (blob_size < sizeof(union dmub_fw_meta) + meta_offset)
return NULL;
 
-   meta = (const union dmub_fw_meta *)(blob + blob_size -
-   DMUB_FW_META_OFFSET -
+   meta = (const union dmub_fw_meta *)(blob + blob_size - meta_offset -
sizeof(union dmub_fw_meta));
 
if (meta->info.magic_value != DMUB_FW_META_MAGIC)
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 00/19] DC Patches 20 April 2020

2020-04-22 Thread Aurabindo Pillai
This DC patchset brings improvements in multiple areas. In summary, we 
hightlight:

* DC version 3.2.83.1
* Enhancements and refactoring in DMUB
* Improvments for DCN2, backlight and others

__

Anthony Koo (1):
  drm/amd/display: clean up some header paths

Aric Cyr (4):
  drm/amd/display: 3.2.82
  drm/amd/display: Use cursor locking to prevent flip delays
  drm/amd/display: 3.2.83
  drm/amd/display: 3.2.83.1

Dmytro Laktyushkin (2):
  drm/amd/display: check if REFCLK_CNTL register is present
  drm/amd/display: fix rn soc bb update

Joshua Aberback (2):
  drm/amd/display: Add DML variable for future asics
  drm/amd/display: Add dummy p-state latency bounding box override

Krunoslav Kovac (1):
  drm/amd/display: Internal refactoring to abstract color caps

Nicholas Kazlauskas (3):
  drm/amd/display: Fix DMUB meta offset for new load method
  drm/amd/display: Defer cursor update around VUPDATE for all ASIC
  drm/amd/display: Pass command instead of header into DMUB service

Sung Lee (4):
  drm/amd/display: Do not disable pipe split if mode is not supported
  drm/amd/display: Fail validation if building scaling params fails
  drm/amd/display: Change viewport limit to 12 for DCN2
  drm/amd/display: Update downspread percent to match spreadsheet for
DCN2.1

Yongqiang Sun (2):
  drm/amd/display: Add panel cntl id for set backlight level.
  drm/amd/display: Add set backlight to hw sequencer.

 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   |   7 +-
 .../drm/amd/display/dc/bios/command_table2.c  |  62 ++
 drivers/gpu/drm/amd/display/dc/core/dc.c  |   4 +-
 drivers/gpu/drm/amd/display/dc/core/dc_link.c |  36 ++
 .../gpu/drm/amd/display/dc/core/dc_resource.c |   4 +-
 .../gpu/drm/amd/display/dc/core/dc_stream.c   |  40 +--
 drivers/gpu/drm/amd/display/dc/dc.h   |  48 +++-
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c  |   2 +-
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h  |   5 +-
 drivers/gpu/drm/amd/display/dc/dc_helper.c|   6 +-
 drivers/gpu/drm/amd/display/dc/dce/dce_abm.c  |  15 ++-
 drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c |  28 +++--
 drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c |  12 +-
 .../display/dc/dce110/dce110_hw_sequencer.c   |  35 +-
 .../display/dc/dce110/dce110_hw_sequencer.h   |   4 +
 .../amd/display/dc/dcn10/dcn10_hw_sequencer.c |  19 ++-
 .../amd/display/dc/dcn10/dcn10_hw_sequencer.h |   1 +
 .../gpu/drm/amd/display/dc/dcn10/dcn10_init.c |   2 +
 .../gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c  |  15 +++
 .../gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h  |  20 +++-
 .../drm/amd/display/dc/dcn10/dcn10_resource.c |  48 +++-
 .../drm/amd/display/dc/dcn20/dcn20_hwseq.c|  12 +-
 .../gpu/drm/amd/display/dc/dcn20/dcn20_init.c |   2 +
 .../gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c  |   1 +
 .../gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h  |   3 +-
 .../drm/amd/display/dc/dcn20/dcn20_resource.c |  71 +--
 .../drm/amd/display/dc/dcn20/dcn20_resource.h |   2 +-
 .../gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c |  33 --
 .../gpu/drm/amd/display/dc/dcn21/dcn21_init.c |   2 +
 .../drm/amd/display/dc/dcn21/dcn21_resource.c | 112 +++---
 .../amd/display/dc/dml/display_mode_structs.h |   1 +
 .../drm/amd/display/dc/dml/display_mode_vba.c |   1 +
 .../drm/amd/display/dc/dml/display_mode_vba.h |   1 +
 drivers/gpu/drm/amd/display/dc/inc/hw/abm.h   |   5 +-
 drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h   |  16 +++
 .../gpu/drm/amd/display/dc/inc/hw_sequencer.h |   5 +
 .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h   |   6 +-
 .../gpu/drm/amd/display/dmub/inc/dmub_rb.h|   6 +-
 .../gpu/drm/amd/display/dmub/inc/dmub_srv.h   |   3 +-
 .../gpu/drm/amd/display/dmub/inc/dmub_types.h |  11 ++
 .../gpu/drm/amd/display/dmub/src/dmub_srv.c   |  10 +-
 .../amd/display/modules/color/color_gamma.c   |  31 -
 .../amd/display/modules/color/color_gamma.h   |   4 +-
 43 files changed, 523 insertions(+), 228 deletions(-)

-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 04/19] drm/amd/display: clean up some header paths

2020-04-22 Thread Aurabindo Pillai
From: Anthony Koo 

[Why]
Some include paths don't need to have relative paths
And some types missing

[How]
make some changes to headers and modify include path

Signed-off-by: Anthony Koo 
Reviewed-by: Tony Cheng 
Acked-by: Aurabindo Pillai 
---
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h  |  2 +-
 drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c |  2 +-
 drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c |  4 ++--
 drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h   |  5 -
 drivers/gpu/drm/amd/display/dmub/inc/dmub_types.h | 11 +++
 5 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h 
b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
index 754b6077539c..855431483699 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
@@ -27,7 +27,7 @@
 #define _DMUB_DC_SRV_H_
 
 #include "os_types.h"
-#include "../dmub/inc/dmub_cmd.h"
+#include "dmub/inc/dmub_cmd.h"
 
 struct dmub_srv;
 struct dmub_cmd_header;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c 
b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c
index a19f359e45d7..992d869188c5 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c
@@ -27,7 +27,7 @@
 #include "dce_abm.h"
 #include "dc.h"
 #include "dc_dmub_srv.h"
-#include "../../dmub/inc/dmub_srv.h"
+#include "dmub/inc/dmub_srv.h"
 #include "core_types.h"
 #include "dm_services.h"
 #include "reg_helper.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c 
b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
index 3b8a49e8e665..7b32e5d60ed6 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
@@ -26,8 +26,8 @@
 #include "dmub_psr.h"
 #include "dc.h"
 #include "dc_dmub_srv.h"
-#include "../../dmub/inc/dmub_srv.h"
-#include "../../dmub/inc/dmub_gpint_cmd.h"
+#include "dmub/inc/dmub_srv.h"
+#include "dmub/inc/dmub_gpint_cmd.h"
 #include "core_types.h"
 
 #define MAX_PIPES 6
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h 
b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index 7c7a3561b6aa..6b48285446c3 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -215,11 +215,6 @@ struct dmub_rb_cmd_dpphy_init {
uint8_t reserved[60];
 };
 
-struct dmub_psr_debug_flags {
-   uint8_t visual_confirm : 1;
-   uint8_t reserved : 7;
-};
-
 struct dmub_cmd_psr_copy_settings_data {
uint16_t psr_level;
uint8_t dpp_inst;
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_types.h 
b/drivers/gpu/drm/amd/display/dmub/inc/dmub_types.h
index 41d524b0db2f..bed5b023a396 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_types.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_types.h
@@ -49,6 +49,12 @@ extern "C" {
 #define dmub_udelay(microseconds) udelay(microseconds)
 #endif
 
+/* Maximum number of streams on any ASIC. */
+#define DMUB_MAX_STREAMS 6
+
+/* Maximum number of planes on any ASIC. */
+#define DMUB_MAX_PLANES 6
+
 union dmub_addr {
struct {
uint32_t low_part;
@@ -57,6 +63,11 @@ union dmub_addr {
uint64_t quad_part;
 };
 
+struct dmub_psr_debug_flags {
+   uint8_t visual_confirm : 1;
+   uint8_t reserved : 7;
+};
+
 #if defined(__cplusplus)
 }
 #endif
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 06/19] drm/amd/display: check if REFCLK_CNTL register is present

2020-04-22 Thread Aurabindo Pillai
From: Dmytro Laktyushkin 

Check before programming the register since it isn't present on
all IPs using this code.

Signed-off-by: Dmytro Laktyushkin 
Reviewed-by: Eric Bernstein 
Acked-by: Aurabindo Pillai 
---
 drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c 
b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
index 6ad4ed7da629..bd2ccf8eb9cf 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
@@ -2304,7 +2304,8 @@ void dcn20_fpga_init_hw(struct dc *dc)
 
REG_UPDATE(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, 2);
REG_UPDATE(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_ENABLE, 1);
-   REG_WRITE(REFCLK_CNTL, 0);
+   if (REG(REFCLK_CNTL))
+   REG_WRITE(REFCLK_CNTL, 0);
//
 
 
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: Init data to avoid oops while reading pp_num_states.

2020-04-22 Thread Alex Deucher
On Wed, Apr 22, 2020 at 9:13 AM limingyu  wrote:
>
> For chip like CHIP_OLAND with si enabled(amdgpu.si_support=1),
> the amdgpu will expose pp_num_states to the /sys directory.
> In this moment, read the pp_num_states file will excute the
> amdgpu_get_pp_num_states func. In our case, the data hasn't
> been initialized, so the kernel will access some ilegal
> address, trigger the segmentfault and system will reboot soon:
>
> uos@uos-PC:~$ cat /sys/devices/pci\:00/\:00\:00.0/\:01\:00
> .0/pp_num_states
>
> Message from syslogd@uos-PC at Apr 22 09:26:20 ...
>  kernel:[   82.154129] Internal error: Oops: 9604 [#1] SMP
>
> This patch aims to fix this problem, avoid that reading file
> triggers the kernel sementfault.
>
> Signed-off-by: limingyu 
> Signed-off-by: zhoubinbin 

Applied.  Thanks!

Alex

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> index abe94a55ecad..17de9dc60ea1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> @@ -444,8 +444,10 @@ static ssize_t amdgpu_get_pp_num_states(struct device 
> *dev,
> ret = smu_get_power_num_states(&adev->smu, &data);
> if (ret)
> return ret;
> -   } else if (adev->powerplay.pp_funcs->get_pp_num_states)
> +   } else if (adev->powerplay.pp_funcs->get_pp_num_states) {
> amdgpu_dpm_get_pp_num_states(adev, &data);
> +   } else
> +   memset(&data, 0, sizeof(data));
>
> pm_runtime_mark_last_busy(ddev->dev);
> pm_runtime_put_autosuspend(ddev->dev);
> --
> 2.20.1
>
>
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm: amdgpu: fix kernel-doc struct warning

2020-04-22 Thread Alex Deucher
On Tue, Apr 21, 2020 at 10:34 AM Christian König
 wrote:
>
> Am 21.04.20 um 16:33 schrieb Christian König:
> > Am 20.04.20 um 03:50 schrieb Randy Dunlap:
> >> Fix a kernel-doc warning of missing struct field desription:
> >>
> >> ../drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:92: warning: Function
> >> parameter or member 'vm' not described in 'amdgpu_vm_eviction_lock'
> >
> > Can't we just document the function parameter instead? Should only be
> > one IIRC.
>
> On the other hand forget that, the format doesn't match a proper
> kernel-doc for a function anyway.
>
> Reviewed-by: Christian König 
>

Applied.  Thanks!

Alex

> >
> > Thanks,
> > Christian.
> >
> >>
> >> Fixes: a269e44989f3 ("drm/amdgpu: Avoid reclaim fs while eviction lock")
> >> Signed-off-by: Randy Dunlap 
> >> Cc: Signed-off-by: Alex Sierra 
> >> Cc: Felix Kuehling 
> >> Cc: Christian König 
> >> Cc: Alex Deucher 
> >> Cc: David (ChunMing) Zhou 
> >> Cc: amd-gfx@lists.freedesktop.org
> >> ---
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |2 +-
> >>   1 file changed, 1 insertion(+), 1 deletion(-)
> >>
> >> --- lnx-57-rc2.orig/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >> +++ lnx-57-rc2/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >> @@ -82,7 +82,7 @@ struct amdgpu_prt_cb {
> >>   struct dma_fence_cb cb;
> >>   };
> >>   -/**
> >> +/*
> >>* vm eviction_lock can be taken in MMU notifiers. Make sure no
> >> reclaim-FS
> >>* happens while holding this lock anywhere to prevent deadlocks when
> >>* an MMU notifier runs in reclaim-FS context.
> >
> > ___
> > amd-gfx mailing list
> > amd-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm: amd/display: fix Kconfig help text

2020-04-22 Thread Alex Deucher
On Wed, Apr 22, 2020 at 10:00 AM Harry Wentland  wrote:
>
> On 2020-04-21 7:34 p.m., Randy Dunlap wrote:
> > From: Randy Dunlap 
> >
> > Fix help text: indent one tab + 2 spaces; end a sentence with a
> > period; and collapse short lines of text to one line.
> >
> > Fixes: 23c61b4599c4 ("drm/amd: Fix Kconfig indentation")
> > Fixes: 4562236b3bc0 ("drm/amd/dc: Add dc display driver (v2)")
> > Signed-off-by: Randy Dunlap 
> > Cc: Harry Wentland 
> > Cc: Alex Deucher 
> > Cc: Krzysztof Kozlowski 
>
> Reviewed-by: Harry Wentland 
>

Applied.  Thanks!

Alex

> Harry
>
> > ---
> >  drivers/gpu/drm/amd/display/Kconfig |8 ++--
> >  1 file changed, 2 insertions(+), 6 deletions(-)
> >
> > --- linux-next-20200421.orig/drivers/gpu/drm/amd/display/Kconfig
> > +++ linux-next-20200421/drivers/gpu/drm/amd/display/Kconfig
> > @@ -21,16 +21,12 @@ config DRM_AMD_DC_HDCP
> >   bool "Enable HDCP support in DC"
> >   depends on DRM_AMD_DC
> >   help
> > -  Choose this option
> > -  if you want to support
> > -  HDCP authentication
> > +   Choose this option if you want to support HDCP authentication.
> >
> >  config DEBUG_KERNEL_DC
> >   bool "Enable kgdb break in DC"
> >   depends on DRM_AMD_DC
> >   help
> > -   Choose this option
> > -   if you want to hit
> > -   kdgb_break in assert.
> > +   Choose this option if you want to hit kdgb_break in assert.
> >
> >  endmenu
> >
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH hmm 0/5] Adjust hmm_range_fault() API

2020-04-22 Thread Ralph Campbell



On 4/21/20 5:21 PM, Jason Gunthorpe wrote:

From: Jason Gunthorpe 

The API is a bit complicated for the uses we actually have, and
disucssions for simplifying have come up a number of times.

This small series removes the customizable pfn format and simplifies the
return code of hmm_range_fault()

All the drivers are adjusted to process in the simplified format.
I would appreciated tested-by's for the two drivers, thanks!


For nouveau you can add:
Tested-by: Ralph Campbell 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: protect kiq overrun

2020-04-22 Thread Christian König

Am 22.04.20 um 16:50 schrieb Yintian Tao:

Wait for the oldest sequence on the kiq ring
to be signaled in order to make sure there
will be no kiq overrun.

v2: remove unused the variable and correct
 kiq max_sub_num value


First of all this should probably be added to the fence handling code 
and not the kiq code.


Then you are kind of duplicating some of the functionality we have in 
the ring handling here. Probably better to avoid this, see 
amdgpu_fence_driver_init_ring() as well. That's also why I suggested to 
use the num_fences_mask value.


Regards,
Christian.



Signed-off-by: Yintian Tao 
---
  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c|  6 
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  6 
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   | 30 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h   |  3 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  6 
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c |  6 
  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c|  7 +
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  7 +
  8 files changed, 71 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 691c89705bcd..fac8b9713dfc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -325,6 +325,12 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
 mec, pipe, queue_id);
  
  	spin_lock(&adev->gfx.kiq.ring_lock);

+   r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+   if (r) {
+   pr_err("critical bug! too many kiq submission\n");
+   goto out_unlock;
+   }
+
r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index df841c2ac5e7..fd42c126510f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -323,6 +323,12 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
 mec, pipe, queue_id);
  
  	spin_lock(&adev->gfx.kiq.ring_lock);

+   r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+   if (r) {
+   pr_err("critical bug! too many kiq submissions\n");
+   goto out_unlock;
+   }
+
r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index a721b0e0ff69..84e66c45df37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -321,6 +321,9 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
 AMDGPU_RING_PRIO_DEFAULT);
if (r)
dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
+   else
+   kiq->max_sub_num = (ring->ring_size / 4) /
+   (ring->funcs->align_mask + 1);
  
  	return r;

  }
@@ -663,6 +666,21 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
return 0;
  }
  
+int amdgpu_gfx_kiq_is_avail(struct amdgpu_kiq *kiq)

+{
+   uint32_t seq = 0;
+   signed long r = 0;
+
+   seq = abs(kiq->ring.fence_drv.sync_seq - kiq->max_sub_num);
+   if (seq > kiq->max_sub_num) {
+   r = amdgpu_fence_wait_polling(&kiq->ring, seq,
+ MAX_KIQ_REG_WAIT);
+   return r < 1 ? -ETIME : 0;
+   }
+
+   return 0;
+}
+
  uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
  {
signed long r, cnt = 0;
@@ -674,6 +692,12 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, 
uint32_t reg)
BUG_ON(!ring->funcs->emit_rreg);
  
  	spin_lock_irqsave(&kiq->ring_lock, flags);

+   r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+   if (r) {
+   spin_unlock_irqrestore(&kiq->ring_lock, flags);
+   goto failed_kiq_read;
+   }
+
if (amdgpu_device_wb_get(adev, ®_val_offs)) {
spin_unlock_irqrestore(&kiq->ring_lock, flags);
pr_err("critical bug! too many kiq readers\n");
@@ -728,6 +752,12 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t 
reg, uint32_t v)
BUG_ON(!ring->funcs->emit_wreg);
  
  	spin_lock_irqsave(&kiq->ring_lock, flags);

+   r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+   if (r) {
+   spin_unlock_irqrestore(&kiq->ring_lock, flags);
+   goto failed_kiq_write;
+   }
+
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_wreg(ring, reg, v);
amdgpu_fence_emit_polling(ring, &seq);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index ee698

Re: [PATCH] drm/amdgpu: protect kiq overrun

2020-04-22 Thread Christian König

Can't  we  check the rptr write back  to determine the available room in the 
ring_alloc ?

We removed that a couple of years ago.

One reason for this is that checking the rptr each time is quite a huge 
overhead since it is an MMIO register.


The other reason is that the rptr is no longer a secure way of figuring 
out if an engine has processed all data.


Some engines like the CP are pipelined and the rptr is only the 
beginning of the pipeline, subsequent pipeline steps might read from the 
ring buffer again.


If you want to figure out the processing status of ring allocations you 
need to take a look at the fences.


Regards,
Christian.

Am 22.04.20 um 17:43 schrieb Liu, Shaoyun:

[AMD Official Use Only - Internal Distribution Only]

I always has an impression for each submission,  once the ring be allocated , 
before the fence be signed , this ring space will always be reserved . If this 
can not be guaranteed , it sound a  big issue  to me .   Can't  we  check the 
rptr write back  to determine the available room in the ring_alloc ?

Regards
Shaoyun.liu
-Original Message-
From: Koenig, Christian 
Sent: Wednesday, April 22, 2020 10:57 AM
To: Liu, Shaoyun ; Tao, Yintian ; Liu, Monk 
; Kuehling, Felix 
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: protect kiq overrun

The amdgpu_ring_alloc() function checks if the requested number of DW don't 
exceed the maximum submission size.

It does NOT check if there is enough room on the ring. That would require MMIO 
access and that is what we want to avoid.

Regards,
Christian.

Am 22.04.20 um 16:54 schrieb Liu, Shaoyun:

[AMD Official Use Only - Internal Distribution Only]

I think each  kiq operation will call ring_alloc  for the package space  , why  
not just check whether this allocation is succeed or not ?

Shaoyun.liu

-Original Message-
From: amd-gfx  On Behalf Of
Yintian Tao
Sent: Wednesday, April 22, 2020 10:50 AM
To: Koenig, Christian ; Liu, Monk
; Kuehling, Felix 
Cc: amd-gfx@lists.freedesktop.org; Tao, Yintian 
Subject: [PATCH] drm/amdgpu: protect kiq overrun

Wait for the oldest sequence on the kiq ring to be signaled in order to make 
sure there will be no kiq overrun.

v2: remove unused the variable and correct
  kiq max_sub_num value

Signed-off-by: Yintian Tao 
---
   .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c|  6 
   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  6 
   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   | 30 +++
   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h   |  3 ++
   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  6 
   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c |  6 
   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c|  7 +
   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  7 +
   8 files changed, 71 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 691c89705bcd..fac8b9713dfc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -325,6 +325,12 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
 mec, pipe, queue_id);
   
   	spin_lock(&adev->gfx.kiq.ring_lock);

+   r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+   if (r) {
+   pr_err("critical bug! too many kiq submission\n");
+   goto out_unlock;
+   }
+
r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r); diff --git
a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index df841c2ac5e7..fd42c126510f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -323,6 +323,12 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
 mec, pipe, queue_id);
   
   	spin_lock(&adev->gfx.kiq.ring_lock);

+   r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+   if (r) {
+   pr_err("critical bug! too many kiq submissions\n");
+   goto out_unlock;
+   }
+
r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r); diff --git
a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index a721b0e0ff69..84e66c45df37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -321,6 +321,9 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
 AMDGPU_RING_PRIO_DEFAULT);
if (r)
dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
+   else
+   kiq->max_sub_num = (ring->ring_size / 4) /
+   (ring->funcs->align_mask + 1);
   
   	return r;

   }
@@ -663,6 +666,21 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,

Re: [PATCH] amdgpu: fixes memleak issue when init failed

2020-04-22 Thread Christian König

Am 22.04.20 um 17:51 schrieb Ruhl, Michael J:

-Original Message-
From: dri-devel  On Behalf Of
Bernard Zhao
Sent: Tuesday, April 21, 2020 7:17 AM
To: Alex Deucher ; Christian König
; David (ChunMing) Zhou
; David Airlie ; Daniel Vetter
; Tom St Denis ; Ori Messinger
; Sam Ravnborg ; Bernard
Zhao ; amd-gfx@lists.freedesktop.org; dri-
de...@lists.freedesktop.org; linux-ker...@vger.kernel.org
Cc: opensource.ker...@vivo.com
Subject: [PATCH] amdgpu: fixes memleak issue when init failed

VRAM manager and DRM MM when init failed, there is no operaction
to free kzalloc memory & remove device file.
This will lead to memleak & cause stability issue.

Signed-off-by: Bernard Zhao 
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 24

1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 82a3299e53c0..4c5fb153e6b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -175,30 +175,44 @@ static int amdgpu_vram_mgr_init(struct
ttm_mem_type_manager *man,
ret = device_create_file(adev->dev,
&dev_attr_mem_info_vram_total);
if (ret) {
DRM_ERROR("Failed to create device file
mem_info_vram_total\n");
-   return ret;
+   goto VRAM_TOTAL_FAIL;
}
ret = device_create_file(adev->dev,
&dev_attr_mem_info_vis_vram_total);

Have you looked at the DEVICE_ATTR mechanism?


Yeah, I've thought about that as well. But didn't had time to look into 
detail if that could be applied here or not.


Regards,
Christian.



It is set up to add device files.  You won't get the granularity of each file,
but it has a lot more automatic-ness to setting this stuff up.

Mike


if (ret) {
DRM_ERROR("Failed to create device file
mem_info_vis_vram_total\n");
-   return ret;
+   goto VIS_VRAM_TOTA_FAIL;
}
ret = device_create_file(adev->dev,
&dev_attr_mem_info_vram_used);
if (ret) {
DRM_ERROR("Failed to create device file
mem_info_vram_used\n");
-   return ret;
+   goto VRAM_USED_FAIL;
}
ret = device_create_file(adev->dev,
&dev_attr_mem_info_vis_vram_used);
if (ret) {
DRM_ERROR("Failed to create device file
mem_info_vis_vram_used\n");
-   return ret;
+   goto VIS_VRAM_USED_FAIL;
}
ret = device_create_file(adev->dev,
&dev_attr_mem_info_vram_vendor);
if (ret) {
DRM_ERROR("Failed to create device file
mem_info_vram_vendor\n");
-   return ret;
+   goto VRAM_VERDOR_FAIL;
}

return 0;
+
+VRAM_VERDOR_FAIL:
+   device_remove_file(adev->dev,
&dev_attr_mem_info_vis_vram_used);
+VIS_VRAM_USED_FAIL:
+   device_remove_file(adev->dev, &dev_attr_mem_info_vram_used);
+RVAM_USED_FAIL:
+   device_remove_file(adev->dev,
&dev_attr_mem_info_vis_vram_total);
+VIS_VRAM_TOTA_FAIL:
+   device_remove_file(adev->dev, &dev_attr_mem_info_vram_total);
+VRAM_TOTAL_FAIL:
+   kfree(mgr);
+   man->priv = NULL;
+
+   return ret;
}

/**
--
2.26.2

___
dri-devel mailing list
dri-de...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH hmm 5/5] mm/hmm: remove the customizable pfn format from hmm_range_fault

2020-04-22 Thread Felix Kuehling
[+Philip Yang]

Am 2020-04-21 um 8:21 p.m. schrieb Jason Gunthorpe:
> From: Jason Gunthorpe 
>
> Presumably the intent here was that hmm_range_fault() could put the data
> into some HW specific format and thus avoid some work. However, nothing
> actually does that, and it isn't clear how anything actually could do that
> as hmm_range_fault() provides CPU addresses which must be DMA mapped.
>
> Perhaps there is some special HW that does not need DMA mapping, but we
> don't have any examples of this, and the theoretical performance win of
> avoiding an extra scan over the pfns array doesn't seem worth the
> complexity. Plus pfns needs to be scanned anyhow to sort out any
> DEVICE_PRIVATE pages.
>
> This version replaces the uint64_t with an usigned long containing a pfn
> and fix flags. On input flags is filled with the HMM_PFN_REQ_* values, on
> successful output it is filled with HMM_PFN_* values, describing the state
> of the pages.
>
> amdgpu is simple to convert, it doesn't use snapshot and doesn't use
> per-page flags.
>
> nouveau uses only 16 hmm_pte entries at most (ie fits in a few cache
> lines), and it sweeps over its pfns array a couple of times anyhow.
>
> Signed-off-by: Jason Gunthorpe 
> Signed-off-by: Christoph Hellwig 

Hi Jason,

I pointed out a typo in the documentation inline. Other than that, the
series is

Acked-by: Felix Kuehling 

I'll try to build it and run some basic tests later.


> ---
>  Documentation/vm/hmm.rst|  26 ++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  35 ++
>  drivers/gpu/drm/nouveau/nouveau_dmem.c  |  60 +++--
>  drivers/gpu/drm/nouveau/nouveau_dmem.h  |   4 +-
>  drivers/gpu/drm/nouveau/nouveau_svm.c   |  52 
>  include/linux/hmm.h |  99 ++-
>  mm/hmm.c| 160 +++-
>  7 files changed, 204 insertions(+), 232 deletions(-)
>
> diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst
> index 9924f2caa0184c..73a9b8c858e5d9 100644
> --- a/Documentation/vm/hmm.rst
> +++ b/Documentation/vm/hmm.rst
> @@ -185,9 +185,6 @@ The usage pattern is::
>range.start = ...;
>range.end = ...;
>range.pfns = ...;
> -  range.flags = ...;
> -  range.values = ...;
> -  range.pfn_shift = ...;
>  
>if (!mmget_not_zero(interval_sub->notifier.mm))
>return -EFAULT;
> @@ -229,15 +226,10 @@ The hmm_range struct has 2 fields, default_flags and 
> pfn_flags_mask, that specif
>  fault or snapshot policy for the whole range instead of having to set them
>  for each entry in the pfns array.
>  
> -For instance, if the device flags for range.flags are::
> +For instance if the device driver wants pages for a range with at least read
> +permission, it sets::
>  
> -range.flags[HMM_PFN_VALID] = (1 << 63);
> -range.flags[HMM_PFN_WRITE] = (1 << 62);
> -
> -and the device driver wants pages for a range with at least read permission,
> -it sets::
> -
> -range->default_flags = (1 << 63);
> +range->default_flags = HMM_PFN_REQ_VALID;

This should be HMM_PFN_REQ_FAULT.


>  range->pfn_flags_mask = 0;
>  
>  and calls hmm_range_fault() as described above. This will fill fault all 
> pages
> @@ -246,18 +238,18 @@ in the range with at least read permission.
>  Now let's say the driver wants to do the same except for one page in the 
> range for
>  which it wants to have write permission. Now driver set::
>  
> -range->default_flags = (1 << 63);
> -range->pfn_flags_mask = (1 << 62);
> -range->pfns[index_of_write] = (1 << 62);
> +range->default_flags = HMM_PFN_REQ_VALID;

HMM_PFN_REQ_FAULT

Regards,
  Felix


> +range->pfn_flags_mask = HMM_PFN_REQ_WRITE;
> +range->pfns[index_of_write] = HMM_PFN_REQ_WRITE;
>  
>  With this, HMM will fault in all pages with at least read (i.e., valid) and 
> for the
>  address == range->start + (index_of_write << PAGE_SHIFT) it will fault with
>  write permission i.e., if the CPU pte does not have write permission set 
> then HMM
>  will call handle_mm_fault().
>  
> -Note that HMM will populate the pfns array with write permission for any page
> -that is mapped with CPU write permission no matter what values are set
> -in default_flags or pfn_flags_mask.
> +After hmm_range_fault completes the flag bits are set to the current state of
> +the page tables, ie HMM_PFN_VALID | HMM_PFN_WRITE will be set if the page is
> +writable.
>  
>  
>  Represent and manage device memory from core kernel point of view
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 449083f9f8a2bf..bcfa8c26647d5e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -766,17 +766,6 @@ struct amdgpu_ttm_tt {
>  };
>  
>  #ifdef CONFIG_DRM_AMDGPU_USERPTR
> -/* flags used by HMM internal, not related to CPU/GPU PTE flags */
> -static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
> -

RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

2020-04-22 Thread Tao, Yintian
Hi Shaoyun


Yes, you are right. It is the rare corner case.


Best Regards
Yintian Tao

-Original Message-
From: Liu, Shaoyun  
Sent: 2020年4月22日 23:51
To: Tao, Yintian ; Koenig, Christian 
; Liu, Monk ; Kuehling, Felix 

Cc: amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

[AMD Official Use Only - Internal Distribution Only]

OK, I see, the submission it self be signaled so the ring space for this 
submission will be re-use by other submission , but the CPU still  not read the 
out put value yet. 

Thanks
Shaoyun.liu

-Original Message-
From: Tao, Yintian 
Sent: Wednesday, April 22, 2020 11:47 AM
To: Tao, Yintian ; Liu, Shaoyun ; 
Koenig, Christian ; Liu, Monk ; 
Kuehling, Felix 
Cc: amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

Add more 

Especially for the multi-VF environment, we have to wait through msleep() 
instead udeay.
Because the max udelay time is 15VF * 6ms(world-switch) = 90ms.


-Original Message-
From: amd-gfx  On Behalf Of Tao, Yintian
Sent: 2020年4月22日 23:43
To: Liu, Shaoyun ; Koenig, Christian 
; Liu, Monk ; Kuehling, Felix 

Cc: amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

Hi  Shaoyun



No, the second patch can't solve this rare case because only Slot-D is signaled 
and the Slot-A can be overwritten. 
The second patch think the sequence is signaled the Slot-A buffer can be freed.

if you store  the output value in each ring buffer itself , each kiq operation 
will be atomic and self contain .  
[yttao]: If we wan to really make the kiq operation be atomic then we have to 
do the things below:
spin_lock_irqsave(&kiq->ring_lock, flags); .
Fulfill the command buffer
.
if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
goto failed_kiq_write;

might_sleep();
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {

msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); /* here will break atomic 
and we need directly use udealy*/
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
}
spin_lock_irqrestore(&kiq->ring_lock, flags);


Best Regards
Yintian Tao
-Original Message-
From: Liu, Shaoyun 
Sent: 2020年4月22日 23:35
To: Tao, Yintian ; Koenig, Christian 
; Liu, Monk ; Kuehling, Felix 

Cc: amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

[AMD Official Use Only - Internal Distribution Only]

This is the issue you try to solve  with your second patch (protect kiq 
overrun) . For current  patch , if you store  the output value in each ring 
buffer itself , each kiq operation will be atomic and self contain . 

Shaoyun.liu

-Original Message-
From: Tao, Yintian 
Sent: Wednesday, April 22, 2020 11:00 AM
To: Koenig, Christian ; Liu, Shaoyun 
; Liu, Monk ; Kuehling, Felix 

Cc: amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

Hi  Shaoyun


There is one rare corner case which will raise problem when using ring buffer 
to store value.

It is assumed there are only total four slots at KIQ ring buffer.

And these four slots are fulfilled with command to read registers.  Slot-A 
Slot-B Slot-C Slot-D

And they are waiting for the sequence fences to be signaled. Here, there is one 
new command to write register to be submitted

1. Slot-A under msleep not to read register 2. Slot-B under msleep not to read 
register 3. Slot-C under msleep not to read register.
4. Slot-D happen to find the sequence signaled and here the new write command 
will overwrite the Slot-A contents.


Best Regards
Yintian Tao

-Original Message-
From: Koenig, Christian 
Sent: 2020年4月22日 22:52
To: Liu, Shaoyun ; Tao, Yintian ; 
Liu, Monk ; Kuehling, Felix 
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

Hi Shaoyun,

the ring buffer is usually filled with command and not read results.

Allocating extra space would only work if we use the special NOP command and 
that is way more complicated and fragile than just using the wb functions which 
where made for this stuff.

Regards,
Christian.

Am 22.04.20 um 16:48 schrieb Liu, Shaoyun:
> [AMD Official Use Only - Internal Distribution Only]
>
> Hi ,Yintian & Christian
> I still don't understand why we need this complicated  change here . Why can 
> not just allocate few more extra space in the ring for each read  and use the 
> space to store the output value  ?
>
> Regards
> Shaoyun.liu
> 
>
> -Original Message-
> From: amd-gfx  On Behalf Of 
> Christian König
> Sent: Wednesday, April 22, 2020 8:42 AM
> To: Tao, Yintian ; Liu, Monk ; 
> Kuehling, Felix 
> Cc: amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read 
> reg
>
> Am 22.04.20 um 14:36 schrieb Yintian Tao:
>> According to the current 

RE: [PATCH] amdgpu: fixes memleak issue when init failed

2020-04-22 Thread Ruhl, Michael J
>-Original Message-
>From: dri-devel  On Behalf Of
>Bernard Zhao
>Sent: Tuesday, April 21, 2020 7:17 AM
>To: Alex Deucher ; Christian König
>; David (ChunMing) Zhou
>; David Airlie ; Daniel Vetter
>; Tom St Denis ; Ori Messinger
>; Sam Ravnborg ; Bernard
>Zhao ; amd-gfx@lists.freedesktop.org; dri-
>de...@lists.freedesktop.org; linux-ker...@vger.kernel.org
>Cc: opensource.ker...@vivo.com
>Subject: [PATCH] amdgpu: fixes memleak issue when init failed
>
>VRAM manager and DRM MM when init failed, there is no operaction
>to free kzalloc memory & remove device file.
>This will lead to memleak & cause stability issue.
>
>Signed-off-by: Bernard Zhao 
>---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 24
>
> 1 file changed, 19 insertions(+), 5 deletions(-)
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>index 82a3299e53c0..4c5fb153e6b4 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>@@ -175,30 +175,44 @@ static int amdgpu_vram_mgr_init(struct
>ttm_mem_type_manager *man,
>   ret = device_create_file(adev->dev,
>&dev_attr_mem_info_vram_total);
>   if (ret) {
>   DRM_ERROR("Failed to create device file
>mem_info_vram_total\n");
>-  return ret;
>+  goto VRAM_TOTAL_FAIL;
>   }
>   ret = device_create_file(adev->dev,
>&dev_attr_mem_info_vis_vram_total);

Have you looked at the DEVICE_ATTR mechanism?

It is set up to add device files.  You won't get the granularity of each file,
but it has a lot more automatic-ness to setting this stuff up.

Mike

>   if (ret) {
>   DRM_ERROR("Failed to create device file
>mem_info_vis_vram_total\n");
>-  return ret;
>+  goto VIS_VRAM_TOTA_FAIL;
>   }
>   ret = device_create_file(adev->dev,
>&dev_attr_mem_info_vram_used);
>   if (ret) {
>   DRM_ERROR("Failed to create device file
>mem_info_vram_used\n");
>-  return ret;
>+  goto VRAM_USED_FAIL;
>   }
>   ret = device_create_file(adev->dev,
>&dev_attr_mem_info_vis_vram_used);
>   if (ret) {
>   DRM_ERROR("Failed to create device file
>mem_info_vis_vram_used\n");
>-  return ret;
>+  goto VIS_VRAM_USED_FAIL;
>   }
>   ret = device_create_file(adev->dev,
>&dev_attr_mem_info_vram_vendor);
>   if (ret) {
>   DRM_ERROR("Failed to create device file
>mem_info_vram_vendor\n");
>-  return ret;
>+  goto VRAM_VERDOR_FAIL;
>   }
>
>   return 0;
>+
>+VRAM_VERDOR_FAIL:
>+  device_remove_file(adev->dev,
>&dev_attr_mem_info_vis_vram_used);
>+VIS_VRAM_USED_FAIL:
>+  device_remove_file(adev->dev, &dev_attr_mem_info_vram_used);
>+RVAM_USED_FAIL:
>+  device_remove_file(adev->dev,
>&dev_attr_mem_info_vis_vram_total);
>+VIS_VRAM_TOTA_FAIL:
>+  device_remove_file(adev->dev, &dev_attr_mem_info_vram_total);
>+VRAM_TOTAL_FAIL:
>+  kfree(mgr);
>+  man->priv = NULL;
>+
>+  return ret;
> }
>
> /**
>--
>2.26.2
>
>___
>dri-devel mailing list
>dri-de...@lists.freedesktop.org
>https://lists.freedesktop.org/mailman/listinfo/dri-devel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

2020-04-22 Thread Liu, Shaoyun
[AMD Official Use Only - Internal Distribution Only]

OK, I see, the submission it self be signaled so the ring space for this 
submission will be re-use by other submission , but the CPU still  not read the 
out put value yet. 

Thanks 
Shaoyun.liu

-Original Message-
From: Tao, Yintian  
Sent: Wednesday, April 22, 2020 11:47 AM
To: Tao, Yintian ; Liu, Shaoyun ; 
Koenig, Christian ; Liu, Monk ; 
Kuehling, Felix 
Cc: amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

Add more 

Especially for the multi-VF environment, we have to wait through msleep() 
instead udeay.
Because the max udelay time is 15VF * 6ms(world-switch) = 90ms.


-Original Message-
From: amd-gfx  On Behalf Of Tao, Yintian
Sent: 2020年4月22日 23:43
To: Liu, Shaoyun ; Koenig, Christian 
; Liu, Monk ; Kuehling, Felix 

Cc: amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

Hi  Shaoyun



No, the second patch can't solve this rare case because only Slot-D is signaled 
and the Slot-A can be overwritten. 
The second patch think the sequence is signaled the Slot-A buffer can be freed.

if you store  the output value in each ring buffer itself , each kiq operation 
will be atomic and self contain .  
[yttao]: If we wan to really make the kiq operation be atomic then we have to 
do the things below:
spin_lock_irqsave(&kiq->ring_lock, flags); .
Fulfill the command buffer
.
if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
goto failed_kiq_write;

might_sleep();
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {

msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); /* here will break atomic 
and we need directly use udealy*/
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
}
spin_lock_irqrestore(&kiq->ring_lock, flags);


Best Regards
Yintian Tao
-Original Message-
From: Liu, Shaoyun 
Sent: 2020年4月22日 23:35
To: Tao, Yintian ; Koenig, Christian 
; Liu, Monk ; Kuehling, Felix 

Cc: amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

[AMD Official Use Only - Internal Distribution Only]

This is the issue you try to solve  with your second patch (protect kiq 
overrun) . For current  patch , if you store  the output value in each ring 
buffer itself , each kiq operation will be atomic and self contain . 

Shaoyun.liu

-Original Message-
From: Tao, Yintian 
Sent: Wednesday, April 22, 2020 11:00 AM
To: Koenig, Christian ; Liu, Shaoyun 
; Liu, Monk ; Kuehling, Felix 

Cc: amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

Hi  Shaoyun


There is one rare corner case which will raise problem when using ring buffer 
to store value.

It is assumed there are only total four slots at KIQ ring buffer.

And these four slots are fulfilled with command to read registers.  Slot-A 
Slot-B Slot-C Slot-D

And they are waiting for the sequence fences to be signaled. Here, there is one 
new command to write register to be submitted

1. Slot-A under msleep not to read register 2. Slot-B under msleep not to read 
register 3. Slot-C under msleep not to read register.
4. Slot-D happen to find the sequence signaled and here the new write command 
will overwrite the Slot-A contents.


Best Regards
Yintian Tao

-Original Message-
From: Koenig, Christian 
Sent: 2020年4月22日 22:52
To: Liu, Shaoyun ; Tao, Yintian ; 
Liu, Monk ; Kuehling, Felix 
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

Hi Shaoyun,

the ring buffer is usually filled with command and not read results.

Allocating extra space would only work if we use the special NOP command and 
that is way more complicated and fragile than just using the wb functions which 
where made for this stuff.

Regards,
Christian.

Am 22.04.20 um 16:48 schrieb Liu, Shaoyun:
> [AMD Official Use Only - Internal Distribution Only]
>
> Hi ,Yintian & Christian
> I still don't understand why we need this complicated  change here . Why can 
> not just allocate few more extra space in the ring for each read  and use the 
> space to store the output value  ?
>
> Regards
> Shaoyun.liu
> 
>
> -Original Message-
> From: amd-gfx  On Behalf Of 
> Christian König
> Sent: Wednesday, April 22, 2020 8:42 AM
> To: Tao, Yintian ; Liu, Monk ; 
> Kuehling, Felix 
> Cc: amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read 
> reg
>
> Am 22.04.20 um 14:36 schrieb Yintian Tao:
>> According to the current kiq read register method, there will be race 
>> condition when using KIQ to read register if multiple clients want to 
>> read at same time just like the expample below:
>> 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the
>> seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll 
>> the seqno-1 5. the k

RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

2020-04-22 Thread Tao, Yintian
Add more 

Especially for the multi-VF environment, we have to wait through msleep() 
instead udeay.
Because the max udelay time is 15VF * 6ms(world-switch) = 90ms.


-Original Message-
From: amd-gfx  On Behalf Of Tao, Yintian
Sent: 2020年4月22日 23:43
To: Liu, Shaoyun ; Koenig, Christian 
; Liu, Monk ; Kuehling, Felix 

Cc: amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

Hi  Shaoyun



No, the second patch can't solve this rare case because only Slot-D is signaled 
and the Slot-A can be overwritten. 
The second patch think the sequence is signaled the Slot-A buffer can be freed.

if you store  the output value in each ring buffer itself , each kiq operation 
will be atomic and self contain .  
[yttao]: If we wan to really make the kiq operation be atomic then we have to 
do the things below:
spin_lock_irqsave(&kiq->ring_lock, flags); .
Fulfill the command buffer
.
if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
goto failed_kiq_write;

might_sleep();
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {

msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); /* here will break atomic 
and we need directly use udealy*/
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
}
spin_lock_irqrestore(&kiq->ring_lock, flags);


Best Regards
Yintian Tao
-Original Message-
From: Liu, Shaoyun 
Sent: 2020年4月22日 23:35
To: Tao, Yintian ; Koenig, Christian 
; Liu, Monk ; Kuehling, Felix 

Cc: amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

[AMD Official Use Only - Internal Distribution Only]

This is the issue you try to solve  with your second patch (protect kiq 
overrun) . For current  patch , if you store  the output value in each ring 
buffer itself , each kiq operation will be atomic and self contain . 

Shaoyun.liu

-Original Message-
From: Tao, Yintian 
Sent: Wednesday, April 22, 2020 11:00 AM
To: Koenig, Christian ; Liu, Shaoyun 
; Liu, Monk ; Kuehling, Felix 

Cc: amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

Hi  Shaoyun


There is one rare corner case which will raise problem when using ring buffer 
to store value.

It is assumed there are only total four slots at KIQ ring buffer.

And these four slots are fulfilled with command to read registers.  Slot-A 
Slot-B Slot-C Slot-D

And they are waiting for the sequence fences to be signaled. Here, there is one 
new command to write register to be submitted

1. Slot-A under msleep not to read register 2. Slot-B under msleep not to read 
register 3. Slot-C under msleep not to read register.
4. Slot-D happen to find the sequence signaled and here the new write command 
will overwrite the Slot-A contents.


Best Regards
Yintian Tao

-Original Message-
From: Koenig, Christian 
Sent: 2020年4月22日 22:52
To: Liu, Shaoyun ; Tao, Yintian ; 
Liu, Monk ; Kuehling, Felix 
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

Hi Shaoyun,

the ring buffer is usually filled with command and not read results.

Allocating extra space would only work if we use the special NOP command and 
that is way more complicated and fragile than just using the wb functions which 
where made for this stuff.

Regards,
Christian.

Am 22.04.20 um 16:48 schrieb Liu, Shaoyun:
> [AMD Official Use Only - Internal Distribution Only]
>
> Hi ,Yintian & Christian
> I still don't understand why we need this complicated  change here . Why can 
> not just allocate few more extra space in the ring for each read  and use the 
> space to store the output value  ?
>
> Regards
> Shaoyun.liu
> 
>
> -Original Message-
> From: amd-gfx  On Behalf Of 
> Christian König
> Sent: Wednesday, April 22, 2020 8:42 AM
> To: Tao, Yintian ; Liu, Monk ; 
> Kuehling, Felix 
> Cc: amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read 
> reg
>
> Am 22.04.20 um 14:36 schrieb Yintian Tao:
>> According to the current kiq read register method, there will be race 
>> condition when using KIQ to read register if multiple clients want to 
>> read at same time just like the expample below:
>> 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the
>> seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll 
>> the seqno-1 5. the kiq complete these two read operation 6. client-A 
>> to read the register at the wb buffer and
>>  get REG-1 value
>>
>> Therefore, use amdgpu_device_wb_get() to request reg_val_offs for 
>> each kiq read register.
>>
>> v2: fix the error remove
>>
>> Signed-off-by: Yintian Tao 
>> ---
>>drivers/gpu/drm/amd/amdgpu/amdgpu.h  |  2 +-
>>drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c  | 19 ++---
>>drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |  1 -
>>drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 +++--
>>  

RE: [PATCH] drm/amdgpu: protect kiq overrun

2020-04-22 Thread Liu, Shaoyun
[AMD Official Use Only - Internal Distribution Only]

I always has an impression for each submission,  once the ring be allocated , 
before the fence be signed , this ring space will always be reserved . If this 
can not be guaranteed , it sound a  big issue  to me .   Can't  we  check the 
rptr write back  to determine the available room in the ring_alloc ? 

Regards
Shaoyun.liu
-Original Message-
From: Koenig, Christian  
Sent: Wednesday, April 22, 2020 10:57 AM
To: Liu, Shaoyun ; Tao, Yintian ; 
Liu, Monk ; Kuehling, Felix 
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: protect kiq overrun

The amdgpu_ring_alloc() function checks if the requested number of DW don't 
exceed the maximum submission size.

It does NOT check if there is enough room on the ring. That would require MMIO 
access and that is what we want to avoid.

Regards,
Christian.

Am 22.04.20 um 16:54 schrieb Liu, Shaoyun:
> [AMD Official Use Only - Internal Distribution Only]
>
> I think each  kiq operation will call ring_alloc  for the package space  , 
> why  not just check whether this allocation is succeed or not ?
>
> Shaoyun.liu
>
> -Original Message-
> From: amd-gfx  On Behalf Of 
> Yintian Tao
> Sent: Wednesday, April 22, 2020 10:50 AM
> To: Koenig, Christian ; Liu, Monk 
> ; Kuehling, Felix 
> Cc: amd-gfx@lists.freedesktop.org; Tao, Yintian 
> Subject: [PATCH] drm/amdgpu: protect kiq overrun
>
> Wait for the oldest sequence on the kiq ring to be signaled in order to make 
> sure there will be no kiq overrun.
>
> v2: remove unused the variable and correct
>  kiq max_sub_num value
>
> Signed-off-by: Yintian Tao 
> ---
>   .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c|  6 
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  6 
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   | 30 +++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h   |  3 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  6 
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c |  6 
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c|  7 +
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  7 +
>   8 files changed, 71 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> index 691c89705bcd..fac8b9713dfc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> @@ -325,6 +325,12 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void 
> *mqd,
>mec, pipe, queue_id);
>   
>   spin_lock(&adev->gfx.kiq.ring_lock);
> + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
> + if (r) {
> + pr_err("critical bug! too many kiq submission\n");
> + goto out_unlock;
> + }
> +
>   r = amdgpu_ring_alloc(kiq_ring, 7);
>   if (r) {
>   pr_err("Failed to alloc KIQ (%d).\n", r); diff --git 
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index df841c2ac5e7..fd42c126510f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -323,6 +323,12 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void 
> *mqd,
>mec, pipe, queue_id);
>   
>   spin_lock(&adev->gfx.kiq.ring_lock);
> + r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
> + if (r) {
> + pr_err("critical bug! too many kiq submissions\n");
> + goto out_unlock;
> + }
> +
>   r = amdgpu_ring_alloc(kiq_ring, 7);
>   if (r) {
>   pr_err("Failed to alloc KIQ (%d).\n", r); diff --git 
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index a721b0e0ff69..84e66c45df37 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -321,6 +321,9 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
>AMDGPU_RING_PRIO_DEFAULT);
>   if (r)
>   dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
> + else
> + kiq->max_sub_num = (ring->ring_size / 4) /
> + (ring->funcs->align_mask + 1);
>   
>   return r;
>   }
> @@ -663,6 +666,21 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device 
> *adev,
>   return 0;
>   }
>   
> +int amdgpu_gfx_kiq_is_avail(struct amdgpu_kiq *kiq) {
> + uint32_t seq = 0;
> + signed long r = 0;
> +
> + seq = abs(kiq->ring.fence_drv.sync_seq - kiq->max_sub_num);
> + if (seq > kiq->max_sub_num) {
> + r = amdgpu_fence_wait_polling(&kiq->ring, seq,
> +   MAX_KIQ_REG_WAIT);
> + return r < 1 ? -ETIME : 0;
> + }
> +
> + return 0;
> +}
> +
>   uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)  {
>   signed long r, cnt = 0;
> @@ -6

RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

2020-04-22 Thread Tao, Yintian
Hi  Shaoyun



No, the second patch can't solve this rare case because only Slot-D is signaled 
and the Slot-A can be overwritten. 
The second patch think the sequence is signaled the Slot-A buffer can be freed.

if you store  the output value in each ring buffer itself , each kiq operation 
will be atomic and self contain .  
[yttao]: If we wan to really make the kiq operation be atomic then we have to 
do the things below:
spin_lock_irqsave(&kiq->ring_lock, flags);
.
Fulfill the command buffer
.
if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
goto failed_kiq_write;

might_sleep();
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {

msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); /* here will break atomic 
and we need directly use udealy*/
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
}
spin_lock_irqrestore(&kiq->ring_lock, flags);


Best Regards
Yintian Tao 
-Original Message-
From: Liu, Shaoyun  
Sent: 2020年4月22日 23:35
To: Tao, Yintian ; Koenig, Christian 
; Liu, Monk ; Kuehling, Felix 

Cc: amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

[AMD Official Use Only - Internal Distribution Only]

This is the issue you try to solve  with your second patch (protect kiq 
overrun) . For current  patch , if you store  the output value in each ring 
buffer itself , each kiq operation will be atomic and self contain . 

Shaoyun.liu

-Original Message-
From: Tao, Yintian 
Sent: Wednesday, April 22, 2020 11:00 AM
To: Koenig, Christian ; Liu, Shaoyun 
; Liu, Monk ; Kuehling, Felix 

Cc: amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

Hi  Shaoyun


There is one rare corner case which will raise problem when using ring buffer 
to store value.

It is assumed there are only total four slots at KIQ ring buffer.

And these four slots are fulfilled with command to read registers.  Slot-A 
Slot-B Slot-C Slot-D

And they are waiting for the sequence fences to be signaled. Here, there is one 
new command to write register to be submitted

1. Slot-A under msleep not to read register 2. Slot-B under msleep not to read 
register 3. Slot-C under msleep not to read register.
4. Slot-D happen to find the sequence signaled and here the new write command 
will overwrite the Slot-A contents.


Best Regards
Yintian Tao

-Original Message-
From: Koenig, Christian 
Sent: 2020年4月22日 22:52
To: Liu, Shaoyun ; Tao, Yintian ; 
Liu, Monk ; Kuehling, Felix 
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

Hi Shaoyun,

the ring buffer is usually filled with command and not read results.

Allocating extra space would only work if we use the special NOP command and 
that is way more complicated and fragile than just using the wb functions which 
where made for this stuff.

Regards,
Christian.

Am 22.04.20 um 16:48 schrieb Liu, Shaoyun:
> [AMD Official Use Only - Internal Distribution Only]
>
> Hi ,Yintian & Christian
> I still don't understand why we need this complicated  change here . Why can 
> not just allocate few more extra space in the ring for each read  and use the 
> space to store the output value  ?
>
> Regards
> Shaoyun.liu
> 
>
> -Original Message-
> From: amd-gfx  On Behalf Of 
> Christian König
> Sent: Wednesday, April 22, 2020 8:42 AM
> To: Tao, Yintian ; Liu, Monk ; 
> Kuehling, Felix 
> Cc: amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read 
> reg
>
> Am 22.04.20 um 14:36 schrieb Yintian Tao:
>> According to the current kiq read register method, there will be race 
>> condition when using KIQ to read register if multiple clients want to 
>> read at same time just like the expample below:
>> 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the
>> seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll 
>> the seqno-1 5. the kiq complete these two read operation 6. client-A 
>> to read the register at the wb buffer and
>>  get REG-1 value
>>
>> Therefore, use amdgpu_device_wb_get() to request reg_val_offs for 
>> each kiq read register.
>>
>> v2: fix the error remove
>>
>> Signed-off-by: Yintian Tao 
>> ---
>>drivers/gpu/drm/amd/amdgpu/amdgpu.h  |  2 +-
>>drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c  | 19 ++---
>>drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |  1 -
>>drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 +++--
>>drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   |  7 +++---
>>drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c|  7 +++---
>>drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 27 
>>7 files changed, 41 insertions(+), 27 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 4e1d4cfe7a9f..7ee5a4da398a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>

RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

2020-04-22 Thread Liu, Shaoyun
[AMD Official Use Only - Internal Distribution Only]

This is the issue you try to solve  with your second patch (protect kiq 
overrun) . For current  patch , if you store  the output value in each ring 
buffer itself , each kiq operation will be atomic and self contain . 

Shaoyun.liu

-Original Message-
From: Tao, Yintian  
Sent: Wednesday, April 22, 2020 11:00 AM
To: Koenig, Christian ; Liu, Shaoyun 
; Liu, Monk ; Kuehling, Felix 

Cc: amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

Hi  Shaoyun


There is one rare corner case which will raise problem when using ring buffer 
to store value.

It is assumed there are only total four slots at KIQ ring buffer.

And these four slots are fulfilled with command to read registers.  Slot-A 
Slot-B Slot-C Slot-D

And they are waiting for the sequence fences to be signaled. Here, there is one 
new command to write register to be submitted

1. Slot-A under msleep not to read register 2. Slot-B under msleep not to read 
register 3. Slot-C under msleep not to read register.
4. Slot-D happen to find the sequence signaled and here the new write command 
will overwrite the Slot-A contents.


Best Regards
Yintian Tao

-Original Message-
From: Koenig, Christian 
Sent: 2020年4月22日 22:52
To: Liu, Shaoyun ; Tao, Yintian ; 
Liu, Monk ; Kuehling, Felix 
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

Hi Shaoyun,

the ring buffer is usually filled with command and not read results.

Allocating extra space would only work if we use the special NOP command and 
that is way more complicated and fragile than just using the wb functions which 
where made for this stuff.

Regards,
Christian.

Am 22.04.20 um 16:48 schrieb Liu, Shaoyun:
> [AMD Official Use Only - Internal Distribution Only]
>
> Hi ,Yintian & Christian
> I still don't understand why we need this complicated  change here . Why can 
> not just allocate few more extra space in the ring for each read  and use the 
> space to store the output value  ?
>
> Regards
> Shaoyun.liu
> 
>
> -Original Message-
> From: amd-gfx  On Behalf Of 
> Christian König
> Sent: Wednesday, April 22, 2020 8:42 AM
> To: Tao, Yintian ; Liu, Monk ; 
> Kuehling, Felix 
> Cc: amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read 
> reg
>
> Am 22.04.20 um 14:36 schrieb Yintian Tao:
>> According to the current kiq read register method, there will be race 
>> condition when using KIQ to read register if multiple clients want to 
>> read at same time just like the expample below:
>> 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the
>> seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll 
>> the seqno-1 5. the kiq complete these two read operation 6. client-A 
>> to read the register at the wb buffer and
>>  get REG-1 value
>>
>> Therefore, use amdgpu_device_wb_get() to request reg_val_offs for 
>> each kiq read register.
>>
>> v2: fix the error remove
>>
>> Signed-off-by: Yintian Tao 
>> ---
>>drivers/gpu/drm/amd/amdgpu/amdgpu.h  |  2 +-
>>drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c  | 19 ++---
>>drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |  1 -
>>drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 +++--
>>drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   |  7 +++---
>>drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c|  7 +++---
>>drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 27 
>>7 files changed, 41 insertions(+), 27 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 4e1d4cfe7a9f..7ee5a4da398a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -526,7 +526,7 @@ static inline void amdgpu_set_ib_value(struct 
>> amdgpu_cs_parser *p,
>>/*
>> * Writeback
>> */
>> -#define AMDGPU_MAX_WB 128   /* Reserve at most 128 WB slots for 
>> amdgpu-owned rings. */
>> +#define AMDGPU_MAX_WB 256   /* Reserve at most 256 WB slots for 
>> amdgpu-owned rings. */
>>
>>struct amdgpu_wb {
>>  struct amdgpu_bo*wb_obj;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> index ea576b4260a4..d5a59d7c48d6 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> @@ -304,10 +304,6 @@ int amdgpu_gfx_kiq_init_ring(struct 
>> amdgpu_device *adev,
>>
>>  spin_lock_init(&kiq->ring_lock);
>>
>> -r = amdgpu_device_wb_get(adev, &kiq->reg_val_offs);
>> -if (r)
>> -return r;
>> -
>>  ring->adev = NULL;
>>  ring->ring_obj = NULL;
>>  ring->use_doorbell = true;
>> @@ -331,7 +327,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device 
>> *adev,
>>
>>void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
>>{
>> -amdgpu_device_wb_fre

Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

2020-04-22 Thread Christian König
Yeah, we discussed that approach as well. But both Yintian and I agreed 
that this is the simpler solution.


Regards,
Christian.

Am 22.04.20 um 16:59 schrieb Liu, Shaoyun:

[AMD Official Use Only - Internal Distribution Only]

Usually doesn't means we can not do it .  I feel  this proposal  is the 
simplest and clean . But anyway this is just my suggestion.

Regards
Shaoyun.liu

-Original Message-
From: Koenig, Christian 
Sent: Wednesday, April 22, 2020 10:52 AM
To: Liu, Shaoyun ; Tao, Yintian ; Liu, Monk 
; Kuehling, Felix 
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

Hi Shaoyun,

the ring buffer is usually filled with command and not read results.

Allocating extra space would only work if we use the special NOP command and 
that is way more complicated and fragile than just using the wb functions which 
where made for this stuff.

Regards,
Christian.

Am 22.04.20 um 16:48 schrieb Liu, Shaoyun:

[AMD Official Use Only - Internal Distribution Only]

Hi ,Yintian & Christian
I still don't understand why we need this complicated  change here . Why can 
not just allocate few more extra space in the ring for each read  and use the 
space to store the output value  ?

Regards
Shaoyun.liu
 


-Original Message-
From: amd-gfx  On Behalf Of
Christian König
Sent: Wednesday, April 22, 2020 8:42 AM
To: Tao, Yintian ; Liu, Monk ;
Kuehling, Felix 
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read
reg

Am 22.04.20 um 14:36 schrieb Yintian Tao:

According to the current kiq read register method, there will be race
condition when using KIQ to read register if multiple clients want to
read at same time just like the expample below:
1. client-A start to read REG-0 throguh KIQ 2. client-A poll the
seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll
the seqno-1 5. the kiq complete these two read operation 6. client-A
to read the register at the wb buffer and
  get REG-1 value

Therefore, use amdgpu_device_wb_get() to request reg_val_offs for
each kiq read register.

v2: fix the error remove

Signed-off-by: Yintian Tao 
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h  |  2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c  | 19 ++---
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |  1 -
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 +++--
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   |  7 +++---
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c|  7 +++---
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 27 
7 files changed, 41 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4e1d4cfe7a9f..7ee5a4da398a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -526,7 +526,7 @@ static inline void amdgpu_set_ib_value(struct 
amdgpu_cs_parser *p,
/*
 * Writeback
 */
-#define AMDGPU_MAX_WB 128  /* Reserve at most 128 WB slots for 
amdgpu-owned rings. */
+#define AMDGPU_MAX_WB 256  /* Reserve at most 256 WB slots for 
amdgpu-owned rings. */

struct amdgpu_wb {

struct amdgpu_bo*wb_obj;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index ea576b4260a4..d5a59d7c48d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -304,10 +304,6 @@ int amdgpu_gfx_kiq_init_ring(struct
amdgpu_device *adev,

	spin_lock_init(&kiq->ring_lock);

-	r = amdgpu_device_wb_get(adev, &kiq->reg_val_offs);

-   if (r)
-   return r;
-
ring->adev = NULL;
ring->ring_obj = NULL;
ring->use_doorbell = true;
@@ -331,7 +327,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device
*adev,

void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)

{
-   amdgpu_device_wb_free(ring->adev, ring->adev->gfx.kiq.reg_val_offs);
amdgpu_ring_fini(ring);
}

@@ -672,15 +667,20 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)

{
signed long r, cnt = 0;
unsigned long flags;
-   uint32_t seq;
+   uint32_t seq, reg_val_offs = 0, value = 0;
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
struct amdgpu_ring *ring = &kiq->ring;

	BUG_ON(!ring->funcs->emit_rreg);

	spin_lock_irqsave(&kiq->ring_lock, flags);

+   if (amdgpu_device_wb_get(adev, ®_val_offs)) {
+   spin_unlock_irqrestore(&kiq->ring_lock, flags);
+   pr_err("critical bug! too more kiq readers\n");

Typo here, this should probably read  "too many kiq readers".

But I don't think we need the error message here anyway, the failed_kiq_read 
label also prints an error.

With that fixed the patch is Reviewed-by: Christian König 
.

Thanks,
Christian.


+   goto failed_kiq_read;
+   }
amdgpu_ring_allo

RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

2020-04-22 Thread Tao, Yintian
Hi  Shaoyun


There is one rare corner case which will raise problem when using ring buffer 
to store value.

It is assumed there are only total four slots at KIQ ring buffer.

And these four slots are fulfilled with command to read registers.  Slot-A 
Slot-B Slot-C Slot-D

And they are waiting for the sequence fences to be signaled. Here, there is one 
new command to write register to be submitted

1. Slot-A under msleep not to read register
2. Slot-B under msleep not to read register
3. Slot-C under msleep not to read register.
4. Slot-D happen to find the sequence signaled and here the new write command 
will overwrite the Slot-A contents.


Best Regards
Yintian Tao

-Original Message-
From: Koenig, Christian  
Sent: 2020年4月22日 22:52
To: Liu, Shaoyun ; Tao, Yintian ; 
Liu, Monk ; Kuehling, Felix 
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

Hi Shaoyun,

the ring buffer is usually filled with command and not read results.

Allocating extra space would only work if we use the special NOP command and 
that is way more complicated and fragile than just using the wb functions which 
where made for this stuff.

Regards,
Christian.

Am 22.04.20 um 16:48 schrieb Liu, Shaoyun:
> [AMD Official Use Only - Internal Distribution Only]
>
> Hi ,Yintian & Christian
> I still don't understand why we need this complicated  change here . Why can 
> not just allocate few more extra space in the ring for each read  and use the 
> space to store the output value  ?
>
> Regards
> Shaoyun.liu
> 
>
> -Original Message-
> From: amd-gfx  On Behalf Of 
> Christian König
> Sent: Wednesday, April 22, 2020 8:42 AM
> To: Tao, Yintian ; Liu, Monk ; 
> Kuehling, Felix 
> Cc: amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read 
> reg
>
> Am 22.04.20 um 14:36 schrieb Yintian Tao:
>> According to the current kiq read register method, there will be race 
>> condition when using KIQ to read register if multiple clients want to 
>> read at same time just like the expample below:
>> 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the
>> seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll 
>> the seqno-1 5. the kiq complete these two read operation 6. client-A 
>> to read the register at the wb buffer and
>>  get REG-1 value
>>
>> Therefore, use amdgpu_device_wb_get() to request reg_val_offs for 
>> each kiq read register.
>>
>> v2: fix the error remove
>>
>> Signed-off-by: Yintian Tao 
>> ---
>>drivers/gpu/drm/amd/amdgpu/amdgpu.h  |  2 +-
>>drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c  | 19 ++---
>>drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |  1 -
>>drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 +++--
>>drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   |  7 +++---
>>drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c|  7 +++---
>>drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 27 
>>7 files changed, 41 insertions(+), 27 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 4e1d4cfe7a9f..7ee5a4da398a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -526,7 +526,7 @@ static inline void amdgpu_set_ib_value(struct 
>> amdgpu_cs_parser *p,
>>/*
>> * Writeback
>> */
>> -#define AMDGPU_MAX_WB 128   /* Reserve at most 128 WB slots for 
>> amdgpu-owned rings. */
>> +#define AMDGPU_MAX_WB 256   /* Reserve at most 256 WB slots for 
>> amdgpu-owned rings. */
>>
>>struct amdgpu_wb {
>>  struct amdgpu_bo*wb_obj;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> index ea576b4260a4..d5a59d7c48d6 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> @@ -304,10 +304,6 @@ int amdgpu_gfx_kiq_init_ring(struct 
>> amdgpu_device *adev,
>>
>>  spin_lock_init(&kiq->ring_lock);
>>
>> -r = amdgpu_device_wb_get(adev, &kiq->reg_val_offs);
>> -if (r)
>> -return r;
>> -
>>  ring->adev = NULL;
>>  ring->ring_obj = NULL;
>>  ring->use_doorbell = true;
>> @@ -331,7 +327,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device 
>> *adev,
>>
>>void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
>>{
>> -amdgpu_device_wb_free(ring->adev, ring->adev->gfx.kiq.reg_val_offs);
>>  amdgpu_ring_fini(ring);
>>}
>>
>> @@ -672,15 +667,20 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, 
>> uint32_t reg)
>>{
>>  signed long r, cnt = 0;
>>  unsigned long flags;
>> -uint32_t seq;
>> +uint32_t seq, reg_val_offs = 0, value = 0;
>>  struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>>  struct amdgpu_ring *ring = &kiq->ring;
>>
>>  BUG_ON(!ring->funcs->emit_rreg);
>>
>>  spin_lock_irqsave(&kiq->ring_lock, flags);
>> +if (amdgpu

RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

2020-04-22 Thread Liu, Shaoyun
[AMD Official Use Only - Internal Distribution Only]

Usually doesn't means we can not do it .  I feel  this proposal  is the 
simplest and clean . But anyway this is just my suggestion. 

Regards
Shaoyun.liu

-Original Message-
From: Koenig, Christian  
Sent: Wednesday, April 22, 2020 10:52 AM
To: Liu, Shaoyun ; Tao, Yintian ; 
Liu, Monk ; Kuehling, Felix 
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

Hi Shaoyun,

the ring buffer is usually filled with command and not read results.

Allocating extra space would only work if we use the special NOP command and 
that is way more complicated and fragile than just using the wb functions which 
where made for this stuff.

Regards,
Christian.

Am 22.04.20 um 16:48 schrieb Liu, Shaoyun:
> [AMD Official Use Only - Internal Distribution Only]
>
> Hi ,Yintian & Christian
> I still don't understand why we need this complicated  change here . Why can 
> not just allocate few more extra space in the ring for each read  and use the 
> space to store the output value  ?
>
> Regards
> Shaoyun.liu
> 
>
> -Original Message-
> From: amd-gfx  On Behalf Of 
> Christian König
> Sent: Wednesday, April 22, 2020 8:42 AM
> To: Tao, Yintian ; Liu, Monk ; 
> Kuehling, Felix 
> Cc: amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read 
> reg
>
> Am 22.04.20 um 14:36 schrieb Yintian Tao:
>> According to the current kiq read register method, there will be race 
>> condition when using KIQ to read register if multiple clients want to 
>> read at same time just like the expample below:
>> 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the
>> seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll 
>> the seqno-1 5. the kiq complete these two read operation 6. client-A 
>> to read the register at the wb buffer and
>>  get REG-1 value
>>
>> Therefore, use amdgpu_device_wb_get() to request reg_val_offs for 
>> each kiq read register.
>>
>> v2: fix the error remove
>>
>> Signed-off-by: Yintian Tao 
>> ---
>>drivers/gpu/drm/amd/amdgpu/amdgpu.h  |  2 +-
>>drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c  | 19 ++---
>>drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |  1 -
>>drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 +++--
>>drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   |  7 +++---
>>drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c|  7 +++---
>>drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 27 
>>7 files changed, 41 insertions(+), 27 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 4e1d4cfe7a9f..7ee5a4da398a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -526,7 +526,7 @@ static inline void amdgpu_set_ib_value(struct 
>> amdgpu_cs_parser *p,
>>/*
>> * Writeback
>> */
>> -#define AMDGPU_MAX_WB 128   /* Reserve at most 128 WB slots for 
>> amdgpu-owned rings. */
>> +#define AMDGPU_MAX_WB 256   /* Reserve at most 256 WB slots for 
>> amdgpu-owned rings. */
>>
>>struct amdgpu_wb {
>>  struct amdgpu_bo*wb_obj;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> index ea576b4260a4..d5a59d7c48d6 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> @@ -304,10 +304,6 @@ int amdgpu_gfx_kiq_init_ring(struct 
>> amdgpu_device *adev,
>>
>>  spin_lock_init(&kiq->ring_lock);
>>
>> -r = amdgpu_device_wb_get(adev, &kiq->reg_val_offs);
>> -if (r)
>> -return r;
>> -
>>  ring->adev = NULL;
>>  ring->ring_obj = NULL;
>>  ring->use_doorbell = true;
>> @@ -331,7 +327,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device 
>> *adev,
>>
>>void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
>>{
>> -amdgpu_device_wb_free(ring->adev, ring->adev->gfx.kiq.reg_val_offs);
>>  amdgpu_ring_fini(ring);
>>}
>>
>> @@ -672,15 +667,20 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, 
>> uint32_t reg)
>>{
>>  signed long r, cnt = 0;
>>  unsigned long flags;
>> -uint32_t seq;
>> +uint32_t seq, reg_val_offs = 0, value = 0;
>>  struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>>  struct amdgpu_ring *ring = &kiq->ring;
>>
>>  BUG_ON(!ring->funcs->emit_rreg);
>>
>>  spin_lock_irqsave(&kiq->ring_lock, flags);
>> +if (amdgpu_device_wb_get(adev, ®_val_offs)) {
>> +spin_unlock_irqrestore(&kiq->ring_lock, flags);
>> +pr_err("critical bug! too more kiq readers\n");
> Typo here, this should probably read  "too many kiq readers".
>
> But I don't think we need the error message here anyway, the failed_kiq_read 
> label also prints an error.
>
> With that fixed the patch is Reviewed-by: Christian König 
> .
>
> Thanks,
> Christian.
>
>> +g

Re: [PATCH] drm/amdgpu: protect kiq overrun

2020-04-22 Thread Christian König
The amdgpu_ring_alloc() function checks if the requested number of DW 
don't exceed the maximum submission size.


It does NOT check if there is enough room on the ring. That would 
require MMIO access and that is what we want to avoid.


Regards,
Christian.

Am 22.04.20 um 16:54 schrieb Liu, Shaoyun:

[AMD Official Use Only - Internal Distribution Only]

I think each  kiq operation will call ring_alloc  for the package space  , why  
not just check whether this allocation is succeed or not ?

Shaoyun.liu

-Original Message-
From: amd-gfx  On Behalf Of Yintian Tao
Sent: Wednesday, April 22, 2020 10:50 AM
To: Koenig, Christian ; Liu, Monk ; 
Kuehling, Felix 
Cc: amd-gfx@lists.freedesktop.org; Tao, Yintian 
Subject: [PATCH] drm/amdgpu: protect kiq overrun

Wait for the oldest sequence on the kiq ring to be signaled in order to make 
sure there will be no kiq overrun.

v2: remove unused the variable and correct
 kiq max_sub_num value

Signed-off-by: Yintian Tao 
---
  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c|  6 
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  6 
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   | 30 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h   |  3 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  6 
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c |  6 
  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c|  7 +
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  7 +
  8 files changed, 71 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 691c89705bcd..fac8b9713dfc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -325,6 +325,12 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
 mec, pipe, queue_id);
  
  	spin_lock(&adev->gfx.kiq.ring_lock);

+   r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+   if (r) {
+   pr_err("critical bug! too many kiq submission\n");
+   goto out_unlock;
+   }
+
r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r); diff --git 
a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index df841c2ac5e7..fd42c126510f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -323,6 +323,12 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
 mec, pipe, queue_id);
  
  	spin_lock(&adev->gfx.kiq.ring_lock);

+   r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+   if (r) {
+   pr_err("critical bug! too many kiq submissions\n");
+   goto out_unlock;
+   }
+
r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r); diff --git 
a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index a721b0e0ff69..84e66c45df37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -321,6 +321,9 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
 AMDGPU_RING_PRIO_DEFAULT);
if (r)
dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
+   else
+   kiq->max_sub_num = (ring->ring_size / 4) /
+   (ring->funcs->align_mask + 1);
  
  	return r;

  }
@@ -663,6 +666,21 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
return 0;
  }
  
+int amdgpu_gfx_kiq_is_avail(struct amdgpu_kiq *kiq) {

+   uint32_t seq = 0;
+   signed long r = 0;
+
+   seq = abs(kiq->ring.fence_drv.sync_seq - kiq->max_sub_num);
+   if (seq > kiq->max_sub_num) {
+   r = amdgpu_fence_wait_polling(&kiq->ring, seq,
+ MAX_KIQ_REG_WAIT);
+   return r < 1 ? -ETIME : 0;
+   }
+
+   return 0;
+}
+
  uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)  {
signed long r, cnt = 0;
@@ -674,6 +692,12 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, 
uint32_t reg)
BUG_ON(!ring->funcs->emit_rreg);
  
  	spin_lock_irqsave(&kiq->ring_lock, flags);

+   r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+   if (r) {
+   spin_unlock_irqrestore(&kiq->ring_lock, flags);
+   goto failed_kiq_read;
+   }
+
if (amdgpu_device_wb_get(adev, ®_val_offs)) {
spin_unlock_irqrestore(&kiq->ring_lock, flags);
pr_err("critical bug! too many kiq readers\n"); @@ -728,6 
+752,12 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
BUG_ON(!ring->funcs->emit_wreg);
  
  	spin_lock_irqsave(&kiq->ring_lock, flags);

+   r = amdgpu_gfx_kiq_is_avail(&adev->gfx.k

RE: [PATCH] drm/amdgpu: protect kiq overrun

2020-04-22 Thread Liu, Shaoyun
[AMD Official Use Only - Internal Distribution Only]

I think each  kiq operation will call ring_alloc  for the package space  , why  
not just check whether this allocation is succeed or not ?  

Shaoyun.liu 

-Original Message-
From: amd-gfx  On Behalf Of Yintian Tao
Sent: Wednesday, April 22, 2020 10:50 AM
To: Koenig, Christian ; Liu, Monk ; 
Kuehling, Felix 
Cc: amd-gfx@lists.freedesktop.org; Tao, Yintian 
Subject: [PATCH] drm/amdgpu: protect kiq overrun

Wait for the oldest sequence on the kiq ring to be signaled in order to make 
sure there will be no kiq overrun.

v2: remove unused the variable and correct
kiq max_sub_num value

Signed-off-by: Yintian Tao 
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c|  6 
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  6 
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   | 30 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h   |  3 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  6 
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c |  6 
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c|  7 +
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  7 +
 8 files changed, 71 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 691c89705bcd..fac8b9713dfc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -325,6 +325,12 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
 mec, pipe, queue_id);
 
spin_lock(&adev->gfx.kiq.ring_lock);
+   r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+   if (r) {
+   pr_err("critical bug! too many kiq submission\n");
+   goto out_unlock;
+   }
+
r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r); diff --git 
a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index df841c2ac5e7..fd42c126510f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -323,6 +323,12 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
 mec, pipe, queue_id);
 
spin_lock(&adev->gfx.kiq.ring_lock);
+   r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+   if (r) {
+   pr_err("critical bug! too many kiq submissions\n");
+   goto out_unlock;
+   }
+
r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r); diff --git 
a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index a721b0e0ff69..84e66c45df37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -321,6 +321,9 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
 AMDGPU_RING_PRIO_DEFAULT);
if (r)
dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
+   else
+   kiq->max_sub_num = (ring->ring_size / 4) /
+   (ring->funcs->align_mask + 1);
 
return r;
 }
@@ -663,6 +666,21 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
return 0;
 }
 
+int amdgpu_gfx_kiq_is_avail(struct amdgpu_kiq *kiq) {
+   uint32_t seq = 0;
+   signed long r = 0;
+
+   seq = abs(kiq->ring.fence_drv.sync_seq - kiq->max_sub_num);
+   if (seq > kiq->max_sub_num) {
+   r = amdgpu_fence_wait_polling(&kiq->ring, seq,
+ MAX_KIQ_REG_WAIT);
+   return r < 1 ? -ETIME : 0;
+   }
+
+   return 0;
+}
+
 uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)  {
signed long r, cnt = 0;
@@ -674,6 +692,12 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, 
uint32_t reg)
BUG_ON(!ring->funcs->emit_rreg);
 
spin_lock_irqsave(&kiq->ring_lock, flags);
+   r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+   if (r) {
+   spin_unlock_irqrestore(&kiq->ring_lock, flags);
+   goto failed_kiq_read;
+   }
+
if (amdgpu_device_wb_get(adev, ®_val_offs)) {
spin_unlock_irqrestore(&kiq->ring_lock, flags);
pr_err("critical bug! too many kiq readers\n"); @@ -728,6 
+752,12 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, 
uint32_t v)
BUG_ON(!ring->funcs->emit_wreg);
 
spin_lock_irqsave(&kiq->ring_lock, flags);
+   r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+   if (r) {
+   spin_unlock_irqrestore(&kiq->ring_lock, flags);
+   goto failed_kiq_write;
+   }
+
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_wreg(ring, reg, v);
amdgpu_fence_emit_polling(ring, &seq); diff --git 
a/drivers/gpu/drm/amd

Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

2020-04-22 Thread Christian König

Hi Shaoyun,

the ring buffer is usually filled with command and not read results.

Allocating extra space would only work if we use the special NOP command 
and that is way more complicated and fragile than just using the wb 
functions which where made for this stuff.


Regards,
Christian.

Am 22.04.20 um 16:48 schrieb Liu, Shaoyun:

[AMD Official Use Only - Internal Distribution Only]

Hi ,Yintian & Christian
I still don't understand why we need this complicated  change here . Why can 
not just allocate few more extra space in the ring for each read  and use the 
space to store the output value  ?

Regards
Shaoyun.liu



-Original Message-
From: amd-gfx  On Behalf Of Christian 
König
Sent: Wednesday, April 22, 2020 8:42 AM
To: Tao, Yintian ; Liu, Monk ; Kuehling, Felix 

Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

Am 22.04.20 um 14:36 schrieb Yintian Tao:

According to the current kiq read register method, there will be race
condition when using KIQ to read register if multiple clients want to
read at same time just like the expample below:
1. client-A start to read REG-0 throguh KIQ 2. client-A poll the
seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll
the seqno-1 5. the kiq complete these two read operation 6. client-A
to read the register at the wb buffer and
 get REG-1 value

Therefore, use amdgpu_device_wb_get() to request reg_val_offs for each
kiq read register.

v2: fix the error remove

Signed-off-by: Yintian Tao 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu.h  |  2 +-
   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c  | 19 ++---
   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |  1 -
   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 +++--
   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   |  7 +++---
   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c|  7 +++---
   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 27 
   7 files changed, 41 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4e1d4cfe7a9f..7ee5a4da398a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -526,7 +526,7 @@ static inline void amdgpu_set_ib_value(struct 
amdgpu_cs_parser *p,
   /*
* Writeback
*/
-#define AMDGPU_MAX_WB 128  /* Reserve at most 128 WB slots for 
amdgpu-owned rings. */
+#define AMDGPU_MAX_WB 256  /* Reserve at most 256 WB slots for 
amdgpu-owned rings. */
   
   struct amdgpu_wb {

struct amdgpu_bo*wb_obj;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index ea576b4260a4..d5a59d7c48d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -304,10 +304,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device
*adev,
   
   	spin_lock_init(&kiq->ring_lock);
   
-	r = amdgpu_device_wb_get(adev, &kiq->reg_val_offs);

-   if (r)
-   return r;
-
ring->adev = NULL;
ring->ring_obj = NULL;
ring->use_doorbell = true;
@@ -331,7 +327,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device
*adev,
   
   void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)

   {
-   amdgpu_device_wb_free(ring->adev, ring->adev->gfx.kiq.reg_val_offs);
amdgpu_ring_fini(ring);
   }
   
@@ -672,15 +667,20 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)

   {
signed long r, cnt = 0;
unsigned long flags;
-   uint32_t seq;
+   uint32_t seq, reg_val_offs = 0, value = 0;
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
struct amdgpu_ring *ring = &kiq->ring;
   
   	BUG_ON(!ring->funcs->emit_rreg);
   
   	spin_lock_irqsave(&kiq->ring_lock, flags);

+   if (amdgpu_device_wb_get(adev, ®_val_offs)) {
+   spin_unlock_irqrestore(&kiq->ring_lock, flags);
+   pr_err("critical bug! too more kiq readers\n");

Typo here, this should probably read  "too many kiq readers".

But I don't think we need the error message here anyway, the failed_kiq_read 
label also prints an error.

With that fixed the patch is Reviewed-by: Christian König 
.

Thanks,
Christian.


+   goto failed_kiq_read;
+   }
amdgpu_ring_alloc(ring, 32);
-   amdgpu_ring_emit_rreg(ring, reg);
+   amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
amdgpu_fence_emit_polling(ring, &seq);
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags); @@ -707,7 +707,10
@@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
if (cnt > MAX_KIQ_REG_TRY)
goto failed_kiq_read;
   
-	return adev->wb.wb[kiq->reg_val_offs];

+   mb();
+   value = adev->wb.wb[reg_val_offs];
+   amdgpu_device_wb_free(adev, reg_val_offs);
+   return value;
   
   failed_kiq_read:

pr_err("failed to read reg:%x\n", reg); diff --git
a/d

[PATCH] drm/amdgpu: protect kiq overrun

2020-04-22 Thread Yintian Tao
Wait for the oldest sequence on the kiq ring
to be signaled in order to make sure there
will be no kiq overrun.

v2: remove unused the variable and correct
kiq max_sub_num value

Signed-off-by: Yintian Tao 
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c|  6 
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  6 
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   | 30 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h   |  3 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  6 
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c |  6 
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c|  7 +
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  7 +
 8 files changed, 71 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 691c89705bcd..fac8b9713dfc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -325,6 +325,12 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
 mec, pipe, queue_id);
 
spin_lock(&adev->gfx.kiq.ring_lock);
+   r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+   if (r) {
+   pr_err("critical bug! too many kiq submission\n");
+   goto out_unlock;
+   }
+
r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index df841c2ac5e7..fd42c126510f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -323,6 +323,12 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
 mec, pipe, queue_id);
 
spin_lock(&adev->gfx.kiq.ring_lock);
+   r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+   if (r) {
+   pr_err("critical bug! too many kiq submissions\n");
+   goto out_unlock;
+   }
+
r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index a721b0e0ff69..84e66c45df37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -321,6 +321,9 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
 AMDGPU_RING_PRIO_DEFAULT);
if (r)
dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
+   else
+   kiq->max_sub_num = (ring->ring_size / 4) /
+   (ring->funcs->align_mask + 1);
 
return r;
 }
@@ -663,6 +666,21 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
return 0;
 }
 
+int amdgpu_gfx_kiq_is_avail(struct amdgpu_kiq *kiq)
+{
+   uint32_t seq = 0;
+   signed long r = 0;
+
+   seq = abs(kiq->ring.fence_drv.sync_seq - kiq->max_sub_num);
+   if (seq > kiq->max_sub_num) {
+   r = amdgpu_fence_wait_polling(&kiq->ring, seq,
+ MAX_KIQ_REG_WAIT);
+   return r < 1 ? -ETIME : 0;
+   }
+
+   return 0;
+}
+
 uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
 {
signed long r, cnt = 0;
@@ -674,6 +692,12 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, 
uint32_t reg)
BUG_ON(!ring->funcs->emit_rreg);
 
spin_lock_irqsave(&kiq->ring_lock, flags);
+   r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+   if (r) {
+   spin_unlock_irqrestore(&kiq->ring_lock, flags);
+   goto failed_kiq_read;
+   }
+
if (amdgpu_device_wb_get(adev, ®_val_offs)) {
spin_unlock_irqrestore(&kiq->ring_lock, flags);
pr_err("critical bug! too many kiq readers\n");
@@ -728,6 +752,12 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t 
reg, uint32_t v)
BUG_ON(!ring->funcs->emit_wreg);
 
spin_lock_irqsave(&kiq->ring_lock, flags);
+   r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+   if (r) {
+   spin_unlock_irqrestore(&kiq->ring_lock, flags);
+   goto failed_kiq_write;
+   }
+
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_wreg(ring, reg, v);
amdgpu_fence_emit_polling(ring, &seq);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index ee698f0246d8..1ee59a927bd9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -103,6 +103,7 @@ struct amdgpu_kiq {
struct amdgpu_ring  ring;
struct amdgpu_irq_src   irq;
const struct kiq_pm4_funcs *pmf;
+   uint32_tmax_sub_num;
 };
 
 /*
@@ -387,4 +388,6 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_dev

RE: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

2020-04-22 Thread Liu, Shaoyun
[AMD Official Use Only - Internal Distribution Only]

Hi ,Yintian & Christian 
I still don't understand why we need this complicated  change here . Why can 
not just allocate few more extra space in the ring for each read  and use the 
space to store the output value  ?   

Regards
Shaoyun.liu
   

-Original Message-
From: amd-gfx  On Behalf Of Christian 
König
Sent: Wednesday, April 22, 2020 8:42 AM
To: Tao, Yintian ; Liu, Monk ; Kuehling, 
Felix 
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

Am 22.04.20 um 14:36 schrieb Yintian Tao:
> According to the current kiq read register method, there will be race 
> condition when using KIQ to read register if multiple clients want to 
> read at same time just like the expample below:
> 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the 
> seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll 
> the seqno-1 5. the kiq complete these two read operation 6. client-A 
> to read the register at the wb buffer and
> get REG-1 value
>
> Therefore, use amdgpu_device_wb_get() to request reg_val_offs for each 
> kiq read register.
>
> v2: fix the error remove
>
> Signed-off-by: Yintian Tao 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h  |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c  | 19 ++---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |  1 -
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 +++--
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   |  7 +++---
>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c|  7 +++---
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 27 
>   7 files changed, 41 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 4e1d4cfe7a9f..7ee5a4da398a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -526,7 +526,7 @@ static inline void amdgpu_set_ib_value(struct 
> amdgpu_cs_parser *p,
>   /*
>* Writeback
>*/
> -#define AMDGPU_MAX_WB 128/* Reserve at most 128 WB slots for 
> amdgpu-owned rings. */
> +#define AMDGPU_MAX_WB 256/* Reserve at most 256 WB slots for 
> amdgpu-owned rings. */
>   
>   struct amdgpu_wb {
>   struct amdgpu_bo*wb_obj;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index ea576b4260a4..d5a59d7c48d6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -304,10 +304,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device 
> *adev,
>   
>   spin_lock_init(&kiq->ring_lock);
>   
> - r = amdgpu_device_wb_get(adev, &kiq->reg_val_offs);
> - if (r)
> - return r;
> -
>   ring->adev = NULL;
>   ring->ring_obj = NULL;
>   ring->use_doorbell = true;
> @@ -331,7 +327,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device 
> *adev,
>   
>   void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
>   {
> - amdgpu_device_wb_free(ring->adev, ring->adev->gfx.kiq.reg_val_offs);
>   amdgpu_ring_fini(ring);
>   }
>   
> @@ -672,15 +667,20 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, 
> uint32_t reg)
>   {
>   signed long r, cnt = 0;
>   unsigned long flags;
> - uint32_t seq;
> + uint32_t seq, reg_val_offs = 0, value = 0;
>   struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>   struct amdgpu_ring *ring = &kiq->ring;
>   
>   BUG_ON(!ring->funcs->emit_rreg);
>   
>   spin_lock_irqsave(&kiq->ring_lock, flags);
> + if (amdgpu_device_wb_get(adev, ®_val_offs)) {
> + spin_unlock_irqrestore(&kiq->ring_lock, flags);
> + pr_err("critical bug! too more kiq readers\n");

Typo here, this should probably read  "too many kiq readers".

But I don't think we need the error message here anyway, the failed_kiq_read 
label also prints an error.

With that fixed the patch is Reviewed-by: Christian König 
.

Thanks,
Christian.

> + goto failed_kiq_read;
> + }
>   amdgpu_ring_alloc(ring, 32);
> - amdgpu_ring_emit_rreg(ring, reg);
> + amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
>   amdgpu_fence_emit_polling(ring, &seq);
>   amdgpu_ring_commit(ring);
>   spin_unlock_irqrestore(&kiq->ring_lock, flags); @@ -707,7 +707,10 
> @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
>   if (cnt > MAX_KIQ_REG_TRY)
>   goto failed_kiq_read;
>   
> - return adev->wb.wb[kiq->reg_val_offs];
> + mb();
> + value = adev->wb.wb[reg_val_offs];
> + amdgpu_device_wb_free(adev, reg_val_offs);
> + return value;
>   
>   failed_kiq_read:
>   pr_err("failed to read reg:%x\n", reg); diff --git 
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 634746829024..ee698f0246d8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdg

[PATCH] drm/amdgpu: protect kiq overrun

2020-04-22 Thread Yintian Tao
Wait for the oldest to be signaled to make sure
there will be no kiq overrun.

Signed-off-by: Yintian Tao 
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c|  8 -
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  8 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   | 30 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h   |  3 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  6 
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c |  6 
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c|  7 +
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  7 +
 8 files changed, 73 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 691c89705bcd..72a5d7e15494 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -311,7 +311,7 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
struct v10_compute_mqd *m;
-   uint32_t mec, pipe;
+   uint32_t mec, pipe, seq = 0;
int r;
 
m = get_mqd(mqd);
@@ -325,6 +325,12 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
 mec, pipe, queue_id);
 
spin_lock(&adev->gfx.kiq.ring_lock);
+   r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+   if (r) {
+   pr_err("critical bug! too many kiq submission\n");
+   goto out_unlock;
+   }
+
r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index df841c2ac5e7..332f72b2d334 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -309,7 +309,7 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
struct v9_mqd *m;
-   uint32_t mec, pipe;
+   uint32_t mec, pipe, seq = 0;
int r;
 
m = get_mqd(mqd);
@@ -323,6 +323,12 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
 mec, pipe, queue_id);
 
spin_lock(&adev->gfx.kiq.ring_lock);
+   r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+   if (r) {
+   pr_err("critical bug! too many kiq submissions\n");
+   goto out_unlock;
+   }
+
r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index a721b0e0ff69..387b1a8ed4df 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -321,6 +321,9 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
 AMDGPU_RING_PRIO_DEFAULT);
if (r)
dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
+   else
+   kiq->max_sub_num = (ring->ring_size / 4) /
+   ring->funcs->align_mask;
 
return r;
 }
@@ -663,6 +666,21 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
return 0;
 }
 
+int amdgpu_gfx_kiq_is_avail(struct amdgpu_kiq *kiq)
+{
+   uint32_t seq = 0;
+   signed long r = 0;
+
+   seq = abs(kiq->ring.fence_drv.sync_seq - kiq->max_sub_num);
+   if (seq > kiq->max_sub_num) {
+   r = amdgpu_fence_wait_polling(&kiq->ring, seq,
+ MAX_KIQ_REG_WAIT);
+   return r < 1 ? -ETIME : 0;
+   }
+
+   return 0;
+}
+
 uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
 {
signed long r, cnt = 0;
@@ -674,6 +692,12 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, 
uint32_t reg)
BUG_ON(!ring->funcs->emit_rreg);
 
spin_lock_irqsave(&kiq->ring_lock, flags);
+   r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+   if (r) {
+   spin_unlock_irqrestore(&kiq->ring_lock, flags);
+   goto failed_kiq_read;
+   }
+
if (amdgpu_device_wb_get(adev, ®_val_offs)) {
spin_unlock_irqrestore(&kiq->ring_lock, flags);
pr_err("critical bug! too many kiq readers\n");
@@ -728,6 +752,12 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t 
reg, uint32_t v)
BUG_ON(!ring->funcs->emit_wreg);
 
spin_lock_irqsave(&kiq->ring_lock, flags);
+   r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+   if (r) {
+   spin_unlock_irqrestore(&kiq->ring_lock, flags);
+   goto failed_kiq_write;
+   }
+
amdgpu_ring_alloc(ring, 32);
 

Re: [PATCH] drm: amd/display: fix Kconfig help text

2020-04-22 Thread Harry Wentland
On 2020-04-21 7:34 p.m., Randy Dunlap wrote:
> From: Randy Dunlap 
> 
> Fix help text: indent one tab + 2 spaces; end a sentence with a
> period; and collapse short lines of text to one line.
> 
> Fixes: 23c61b4599c4 ("drm/amd: Fix Kconfig indentation")
> Fixes: 4562236b3bc0 ("drm/amd/dc: Add dc display driver (v2)")
> Signed-off-by: Randy Dunlap 
> Cc: Harry Wentland 
> Cc: Alex Deucher 
> Cc: Krzysztof Kozlowski 

Reviewed-by: Harry Wentland 

Harry

> ---
>  drivers/gpu/drm/amd/display/Kconfig |8 ++--
>  1 file changed, 2 insertions(+), 6 deletions(-)
> 
> --- linux-next-20200421.orig/drivers/gpu/drm/amd/display/Kconfig
> +++ linux-next-20200421/drivers/gpu/drm/amd/display/Kconfig
> @@ -21,16 +21,12 @@ config DRM_AMD_DC_HDCP
>   bool "Enable HDCP support in DC"
>   depends on DRM_AMD_DC
>   help
> -  Choose this option
> -  if you want to support
> -  HDCP authentication
> +   Choose this option if you want to support HDCP authentication.
>  
>  config DEBUG_KERNEL_DC
>   bool "Enable kgdb break in DC"
>   depends on DRM_AMD_DC
>   help
> -   Choose this option
> -   if you want to hit
> -   kdgb_break in assert.
> +   Choose this option if you want to hit kdgb_break in assert.
>  
>  endmenu
> 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: put the audio codec into suspend state before gpu reset V2

2020-04-22 Thread Alex Deucher
On Tue, Apr 21, 2020 at 10:42 PM Evan Quan  wrote:
>
> At default, the autosuspend delay of audio controller is 3S. If the
> gpu reset is triggered within 3S(after audio controller idle),
> the audio controller may be unable into suspended state. Then
> the sudden gpu reset will cause some audio errors. The change
> here is targeted to resolve this.
>
> However if the audio controller is in use when the gpu reset
> triggered, this change may be still not enough to put the
> audio controller into suspend state. Under this case, the
> gpu reset will still proceed but there will be a warning
> message printed("failed to suspend display audio").
>
> V2: limit this for BACO and mode1 reset only
>
> Change-Id: I33d85e6fcad1882eb33f9cde8916d57be8d5a87a
> Signed-off-by: Evan Quan 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 70 ++
>  1 file changed, 70 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 2d4b78d96426..70f43b1aed78 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -69,6 +69,7 @@
>
>  #include 
>  #include 
> +#include 
>
>  MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
>  MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
> @@ -4146,6 +4147,59 @@ static void amdgpu_device_unlock_adev(struct 
> amdgpu_device *adev)
> mutex_unlock(&adev->lock_reset);
>  }
>
> +static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
> +{
> +   struct pci_dev *p = NULL;
> +
> +   p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
> +   adev->pdev->bus->number, 1);
> +   if (p) {
> +   pm_runtime_enable(&(p->dev));
> +   pm_runtime_resume(&(p->dev));
> +   }
> +}
> +
> +static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
> +{
> +   enum amd_reset_method reset_method;
> +   struct pci_dev *p = NULL;
> +   unsigned long end_jiffies;
> +
> +   /*
> +* For now, only BACO and mode1 reset are confirmed
> +* to suffer the audio issue without proper suspended.
> +*/
> +   reset_method = amdgpu_asic_reset_method(adev);
> +   if ((reset_method != AMD_RESET_METHOD_BACO) &&
> +(reset_method != AMD_RESET_METHOD_MODE1))
> +   return -EINVAL;
> +
> +   p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
> +   adev->pdev->bus->number, 1);
> +   if (!p)
> +   return -ENODEV;
> +
> +   /*
> +* 3S is the audio controller default autosuspend delay setting.
> +* 4S used here is guaranteed to cover that.
> +*/

Instead of hardcoding 3S, we should probably use
pm_runtime_autosuspend_expiration() to query how much time is left and
then use that.  That way this will work even if userspace has changed
the delay.  With that fixed:
Reviewed-by: Alex Deucher 

Alex


> +   end_jiffies = msecs_to_jiffies(4000) + jiffies;
> +   while (!pm_runtime_status_suspended(&(p->dev))) {
> +   if (!pm_runtime_suspend(&(p->dev)))
> +   break;
> +
> +   if (time_after(jiffies, end_jiffies)) {
> +   dev_warn(adev->dev, "failed to suspend display 
> audio\n");
> +   /* TODO: abort the succeeding gpu reset? */
> +   return -ETIMEDOUT;
> +   }
> +   }
> +
> +   pm_runtime_disable(&(p->dev));
> +
> +   return 0;
> +}
> +
>  /**
>   * amdgpu_device_gpu_recover - reset the asic and recover scheduler
>   *
> @@ -4170,6 +4224,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device 
> *adev,
> bool use_baco =
> (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
> true : false;
> +   bool audio_suspended = false;
>
> /*
>  * Flush RAM to disk so that after reboot
> @@ -4227,6 +4282,19 @@ int amdgpu_device_gpu_recover(struct amdgpu_device 
> *adev,
> return 0;
> }
>
> +   /*
> +* Try to put the audio codec into suspend state
> +* before gpu reset started.
> +*
> +* Due to the power domain of the graphics device
> +* is shared with AZ power domain. Without this,
> +* we may change the audio hardware from behind
> +* the audio driver's back. That will trigger
> +* some audio codec errors.
> +*/
> +   if (!amdgpu_device_suspend_display_audio(tmp_adev))
> +   audio_suspended = true;
> +
> amdgpu_ras_set_error_query_ready(tmp_adev, false);
>
> cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
> @@ -4339,6 +4407,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device 
> *adev,
>   

[PATCH] drm/amdgpu: Init data to avoid oops while reading pp_num_states.

2020-04-22 Thread limingyu
For chip like CHIP_OLAND with si enabled(amdgpu.si_support=1),
the amdgpu will expose pp_num_states to the /sys directory.
In this moment, read the pp_num_states file will excute the
amdgpu_get_pp_num_states func. In our case, the data hasn't
been initialized, so the kernel will access some ilegal
address, trigger the segmentfault and system will reboot soon:

uos@uos-PC:~$ cat /sys/devices/pci\:00/\:00\:00.0/\:01\:00
.0/pp_num_states

Message from syslogd@uos-PC at Apr 22 09:26:20 ...
 kernel:[   82.154129] Internal error: Oops: 9604 [#1] SMP

This patch aims to fix this problem, avoid that reading file
triggers the kernel sementfault.

Signed-off-by: limingyu 
Signed-off-by: zhoubinbin 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index abe94a55ecad..17de9dc60ea1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -444,8 +444,10 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev,
ret = smu_get_power_num_states(&adev->smu, &data);
if (ret)
return ret;
-   } else if (adev->powerplay.pp_funcs->get_pp_num_states)
+   } else if (adev->powerplay.pp_funcs->get_pp_num_states) {
amdgpu_dpm_get_pp_num_states(adev, &data);
+   } else
+   memset(&data, 0, sizeof(data));
 
pm_runtime_mark_last_busy(ddev->dev);
pm_runtime_put_autosuspend(ddev->dev);
-- 
2.20.1



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH hmm 5/5] mm/hmm: remove the customizable pfn format from hmm_range_fault

2020-04-22 Thread Jason Gunthorpe
On Wed, Apr 22, 2020 at 08:03:29AM +0200, Christoph Hellwig wrote:
> 
> 
> On Tue, Apr 21, 2020 at 09:21:46PM -0300, Jason Gunthorpe wrote:
> > +void nouveau_hmm_convert_pfn(struct nouveau_drm *drm, struct hmm_range 
> > *range,
> > +u64 *ioctl_addr)
> >  {
> > unsigned long i, npages;
> >  
> > +   /*
> > +* The ioctl_addr prepared here is passed through nvif_object_ioctl()
> > +* to an eventual DMA map on some call chain like:
> > +*nouveau_svm_fault():
> > +*  args.i.m.method = NVIF_VMM_V0_PFNMAP
> > +*  nouveau_range_fault()
> > +*   nvif_object_ioctl()
> > +*client->driver->ioctl()
> > +*   struct nvif_driver nvif_driver_nvkm:
> > +* .ioctl = nvkm_client_ioctl
> > +*nvkm_ioctl()
> > +* nvkm_ioctl_path()
> > +*   nvkm_ioctl_v0[type].func(..)
> > +*   nvkm_ioctl_mthd()
> > +*nvkm_object_mthd()
> > +*   struct nvkm_object_func nvkm_uvmm:
> > +* .mthd = nvkm_uvmm_mthd
> > +*nvkm_uvmm_mthd()
> > +* nvkm_uvmm_mthd_pfnmap()
> > +*  nvkm_vmm_pfn_map()
> > +*   nvkm_vmm_ptes_get_map()
> > +*func == gp100_vmm_pgt_pfn
> > +* struct nvkm_vmm_desc_func gp100_vmm_desc_spt:
> > +*   .pfn = gp100_vmm_pgt_pfn
> > +*  nvkm_vmm_iter()
> > +*   REF_PTES == func == gp100_vmm_pgt_pfn()
> > +*dma_map_page()
> > +*
> > +* This is all just encoding the internal hmm reprensetation into a
> > +* different nouveau internal representation.
> > +*/
> 
> Nice callchain from hell..  Unfortunately such "code listings" tend to
> get out of date very quickly, so I'm not sure it is worth keeping in
> the code.  What would be really worthile is consolidating the two
> different sets of defines (NVIF_VMM_PFNMAP_V0_ vs NVKM_VMM_PFN_)
> to make the code a little easier to follow.

I was mainly concerned that this function is using hmm properly,
becuase it sure looks like it is just forming the CPU physical address
into a HW specific data. But it turns out it is just an internal data
for some other code and the dma_map is impossibly far away

It took forever to find, I figured I'd leave a hint for the next poor
soul that has to look at this.. 

Also, I think it shows there is no 'performance' argument here, if
this path needs more performance the above should be cleaned
before we abuse hmm_range_fault.

Put it in the commit message instead?

> > npages = (range->end - range->start) >> PAGE_SHIFT;
> > for (i = 0; i < npages; ++i) {
> > struct page *page;
> >  
> > +   if (!(range->hmm_pfns[i] & HMM_PFN_VALID)) {
> > +   ioctl_addr[i] = 0;
> > continue;
> > +   }
> 
> Can't we rely on the caller pre-zeroing the array?

This ends up as args.phys in nouveau_svm_fault - I didn't see a
zeroing?

I think it makes sense that this routine fully sets the output array
and does not assume pre-initialize

> > +   page = hmm_pfn_to_page(range->hmm_pfns[i]);
> > +   if (is_device_private_page(page))
> > +   ioctl_addr[i] = nouveau_dmem_page_addr(page) |
> > +   NVIF_VMM_PFNMAP_V0_V |
> > +   NVIF_VMM_PFNMAP_V0_VRAM;
> > +   else
> > +   ioctl_addr[i] = page_to_phys(page) |
> > +   NVIF_VMM_PFNMAP_V0_V |
> > +   NVIF_VMM_PFNMAP_V0_HOST;
> > +   if (range->hmm_pfns[i] & HMM_PFN_WRITE)
> > +   ioctl_addr[i] |= NVIF_VMM_PFNMAP_V0_W;
> 
> Now that this routine isn't really device memory specific any more, I
> wonder if it should move to nouveau_svm.c.

Yes, if we expose nouveau_dmem_page_addr(), I will try it

Thanks,
Jason
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

2020-04-22 Thread Christian König

Am 22.04.20 um 14:36 schrieb Yintian Tao:

According to the current kiq read register method,
there will be race condition when using KIQ to read
register if multiple clients want to read at same time
just like the expample below:
1. client-A start to read REG-0 throguh KIQ
2. client-A poll the seqno-0
3. client-B start to read REG-1 through KIQ
4. client-B poll the seqno-1
5. the kiq complete these two read operation
6. client-A to read the register at the wb buffer and
get REG-1 value

Therefore, use amdgpu_device_wb_get() to request reg_val_offs
for each kiq read register.

v2: fix the error remove

Signed-off-by: Yintian Tao 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h  |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c  | 19 ++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |  1 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 +++--
  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   |  7 +++---
  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c|  7 +++---
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 27 
  7 files changed, 41 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4e1d4cfe7a9f..7ee5a4da398a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -526,7 +526,7 @@ static inline void amdgpu_set_ib_value(struct 
amdgpu_cs_parser *p,
  /*
   * Writeback
   */
-#define AMDGPU_MAX_WB 128  /* Reserve at most 128 WB slots for 
amdgpu-owned rings. */
+#define AMDGPU_MAX_WB 256  /* Reserve at most 256 WB slots for 
amdgpu-owned rings. */
  
  struct amdgpu_wb {

struct amdgpu_bo*wb_obj;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index ea576b4260a4..d5a59d7c48d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -304,10 +304,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
  
  	spin_lock_init(&kiq->ring_lock);
  
-	r = amdgpu_device_wb_get(adev, &kiq->reg_val_offs);

-   if (r)
-   return r;
-
ring->adev = NULL;
ring->ring_obj = NULL;
ring->use_doorbell = true;
@@ -331,7 +327,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
  
  void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)

  {
-   amdgpu_device_wb_free(ring->adev, ring->adev->gfx.kiq.reg_val_offs);
amdgpu_ring_fini(ring);
  }
  
@@ -672,15 +667,20 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)

  {
signed long r, cnt = 0;
unsigned long flags;
-   uint32_t seq;
+   uint32_t seq, reg_val_offs = 0, value = 0;
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
struct amdgpu_ring *ring = &kiq->ring;
  
  	BUG_ON(!ring->funcs->emit_rreg);
  
  	spin_lock_irqsave(&kiq->ring_lock, flags);

+   if (amdgpu_device_wb_get(adev, ®_val_offs)) {
+   spin_unlock_irqrestore(&kiq->ring_lock, flags);
+   pr_err("critical bug! too more kiq readers\n");


Typo here, this should probably read  "too many kiq readers".

But I don't think we need the error message here anyway, the 
failed_kiq_read label also prints an error.


With that fixed the patch is Reviewed-by: Christian König 
.


Thanks,
Christian.


+   goto failed_kiq_read;
+   }
amdgpu_ring_alloc(ring, 32);
-   amdgpu_ring_emit_rreg(ring, reg);
+   amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
amdgpu_fence_emit_polling(ring, &seq);
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
@@ -707,7 +707,10 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, 
uint32_t reg)
if (cnt > MAX_KIQ_REG_TRY)
goto failed_kiq_read;
  
-	return adev->wb.wb[kiq->reg_val_offs];

+   mb();
+   value = adev->wb.wb[reg_val_offs];
+   amdgpu_device_wb_free(adev, reg_val_offs);
+   return value;
  
  failed_kiq_read:

pr_err("failed to read reg:%x\n", reg);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 634746829024..ee698f0246d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -103,7 +103,6 @@ struct amdgpu_kiq {
struct amdgpu_ring  ring;
struct amdgpu_irq_src   irq;
const struct kiq_pm4_funcs *pmf;
-   uint32_treg_val_offs;
  };
  
  /*

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index f61664ee4940..137d3d2b46e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -181,7 +181,8 @@ struct amdgpu_ring_funcs {
void (*end_use)(struct amdgpu_ring *ring);
void (*emit_switch_buffer) (struct amdgpu_ring *ring);
void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
-   void (*emit_r

RE: [PATCH] drm/amdgpu: refine kiq access register

2020-04-22 Thread Tao, Yintian
Hi  Christian

Thanks, I got it. I will send another patch for the KIQ overrun problem

Best Regards
Yintian Tao
-Original Message-
From: Koenig, Christian  
Sent: 2020年4月22日 20:33
To: Tao, Yintian ; Liu, Monk ; Kuehling, 
Felix 
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: refine kiq access register

Am 22.04.20 um 14:20 schrieb Tao, Yintian:
> Hi  Christian
>
>
> Please see inline commetns.
> -Original Message-
> From: Koenig, Christian 
> Sent: 2020年4月22日 19:57
> To: Tao, Yintian ; Liu, Monk ; 
> Kuehling, Felix 
> Cc: amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdgpu: refine kiq access register
>
> Am 22.04.20 um 13:49 schrieb Tao, Yintian:
>> Hi  Christian
>>
>>
>> Can you help answer the questions below? Thanks in advance.
>> -Original Message-
>> From: Koenig, Christian 
>> Sent: 2020年4月22日 19:03
>> To: Tao, Yintian ; Liu, Monk ; 
>> Kuehling, Felix 
>> Cc: amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH] drm/amdgpu: refine kiq access register
>>
>> Am 22.04.20 um 11:29 schrieb Yintian Tao:
>>> According to the current kiq access register method, there will be 
>>> race condition when using KIQ to read register if multiple clients 
>>> want to read at same time just like the expample below:
>>> 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the
>>> seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll 
>>> the seqno-1 5. the kiq complete these two read operation 6. client-A 
>>> to read the register at the wb buffer and
>>>   get REG-1 value
>>>
>>> And if there are multiple clients to frequently write registers 
>>> through KIQ which may raise the KIQ ring buffer overwritten problem.
>>>
>>> Therefore, allocate fixed number wb slot for rreg use and limit the 
>>> submit number which depends on the kiq ring_size in order to prevent 
>>> the overwritten problem.
>>>
>>> v2: directly use amdgpu_device_wb_get() for each read instead
>>>of to reserve fixde number slot.
>>>if there is no enough kiq ring buffer or rreg slot then
>>>directly print error log and return instead of busy waiting
>> I would split that into three patches. One for each problem we have here:
>>
>> 1. Fix kgd_hiq_mqd_load() and maybe other occasions to use 
>> spin_lock_irqsave().
>> [yttao]: Do you mean that we need to use spin_lock_irqsave for the functions 
>> just like kgd_hiq_mqd_load()?
> Yes, I strongly think so.
>
> See when you have one spin lock you either need always need to lock it with 
> irqs disabled or never.
>
> In other words we always need to either use spin_lock() or 
> spin_lock_irqsave(), but never mix them with the same lock.
>
> The only exception to this rule is when you take multiple locks, e.g.
> you can do:
>
> spin_lock_irqsave(&a, flags);
> spin_lock(&b, flags);
> spin_lock(&c, flags);
> 
> spin_unlock_irqsave(&a, flags);
>
> Here you don't need to use spin_lock_irqsave for b and c. But we rarely have 
> that case in the code.
> [yttao]: thanks , I got it. I will submit another patch for it.
>
>> 2. Prevent the overrung of the KIQ. Please drop the approach with the 
>> atomic here. Instead just add a amdgpu_fence_wait_polling() into
>> amdgpu_fence_emit_polling() as I discussed with Monk.
>> [yttao]: Sorry, I can't get your original idea for the 
>> amdgpu_fence_wait_polling(). Can you give more details about it? Thanks in 
>> advance.
>>
>> "That is actually only a problem because the KIQ uses polling waits.
>>
>> See amdgpu_fence_emit() waits for the oldest possible fence to be signaled 
>> before emitting a new one.
>>
>> I suggest that we do the same in amdgpu_fence_emit_polling(). A one liner 
>> like the following should be enough:
>>
>> amdgpu_fence_wait_polling(ring, seq - ring->fence_drv.num_fences_mask, 
>> timeout);"
>> [yttao]: there is no usage of num_fences_mask at kiq fence polling, the 
>> num_fences_mask is only effective at dma_fence architecture.
>>  If I understand correctly, do you want the protype code below? 
>> If the protype code is wrong, can you help give one sample? Thanks in 
>> advance.
>>
>> int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s) {
>>   uint32_t seq;
>>
>>   if (!s)
>>   return -EINVAL;
>> +amdgpu_fence_wait_polling(ring, seq, timeout);
>>   seq = ++ring->fence_drv.sync_seq;
> Your understanding sounds more or less correct. The code should look 
> something like this:
>
> seq = ++ring->fence_drv.sync_seq;
> amdgpu_fence_wait_polling(ring, seq -
> number_of_allowed_submissions_to_the_kiq, timeout);
> [yttao]: whether we need directly wait at the first just like below? 
> Otherwise, amdgpu_ring_emit_wreg may overwrite the KIQ ring buffer.

There should always be room for at least one more submission.

As long as we always submit a fence checking the free room there should be fine.

Regards,
Christian.

> + amdgpu_fence_wait_polling(ring, seq - 
>

[PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

2020-04-22 Thread Yintian Tao
According to the current kiq read register method,
there will be race condition when using KIQ to read
register if multiple clients want to read at same time
just like the expample below:
1. client-A start to read REG-0 throguh KIQ
2. client-A poll the seqno-0
3. client-B start to read REG-1 through KIQ
4. client-B poll the seqno-1
5. the kiq complete these two read operation
6. client-A to read the register at the wb buffer and
   get REG-1 value

Therefore, use amdgpu_device_wb_get() to request reg_val_offs
for each kiq read register.

v2: fix the error remove

Signed-off-by: Yintian Tao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c  | 19 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 +++--
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   |  7 +++---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c|  7 +++---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 27 
 7 files changed, 41 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4e1d4cfe7a9f..7ee5a4da398a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -526,7 +526,7 @@ static inline void amdgpu_set_ib_value(struct 
amdgpu_cs_parser *p,
 /*
  * Writeback
  */
-#define AMDGPU_MAX_WB 128  /* Reserve at most 128 WB slots for 
amdgpu-owned rings. */
+#define AMDGPU_MAX_WB 256  /* Reserve at most 256 WB slots for 
amdgpu-owned rings. */
 
 struct amdgpu_wb {
struct amdgpu_bo*wb_obj;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index ea576b4260a4..d5a59d7c48d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -304,10 +304,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
 
spin_lock_init(&kiq->ring_lock);
 
-   r = amdgpu_device_wb_get(adev, &kiq->reg_val_offs);
-   if (r)
-   return r;
-
ring->adev = NULL;
ring->ring_obj = NULL;
ring->use_doorbell = true;
@@ -331,7 +327,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
 
 void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
 {
-   amdgpu_device_wb_free(ring->adev, ring->adev->gfx.kiq.reg_val_offs);
amdgpu_ring_fini(ring);
 }
 
@@ -672,15 +667,20 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, 
uint32_t reg)
 {
signed long r, cnt = 0;
unsigned long flags;
-   uint32_t seq;
+   uint32_t seq, reg_val_offs = 0, value = 0;
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
struct amdgpu_ring *ring = &kiq->ring;
 
BUG_ON(!ring->funcs->emit_rreg);
 
spin_lock_irqsave(&kiq->ring_lock, flags);
+   if (amdgpu_device_wb_get(adev, ®_val_offs)) {
+   spin_unlock_irqrestore(&kiq->ring_lock, flags);
+   pr_err("critical bug! too more kiq readers\n");
+   goto failed_kiq_read;
+   }
amdgpu_ring_alloc(ring, 32);
-   amdgpu_ring_emit_rreg(ring, reg);
+   amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
amdgpu_fence_emit_polling(ring, &seq);
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
@@ -707,7 +707,10 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, 
uint32_t reg)
if (cnt > MAX_KIQ_REG_TRY)
goto failed_kiq_read;
 
-   return adev->wb.wb[kiq->reg_val_offs];
+   mb();
+   value = adev->wb.wb[reg_val_offs];
+   amdgpu_device_wb_free(adev, reg_val_offs);
+   return value;
 
 failed_kiq_read:
pr_err("failed to read reg:%x\n", reg);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 634746829024..ee698f0246d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -103,7 +103,6 @@ struct amdgpu_kiq {
struct amdgpu_ring  ring;
struct amdgpu_irq_src   irq;
const struct kiq_pm4_funcs *pmf;
-   uint32_treg_val_offs;
 };
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index f61664ee4940..137d3d2b46e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -181,7 +181,8 @@ struct amdgpu_ring_funcs {
void (*end_use)(struct amdgpu_ring *ring);
void (*emit_switch_buffer) (struct amdgpu_ring *ring);
void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
-   void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg);
+   void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t reg_val_offs);
void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg

[PATCH] drm/amdgpu: request reg_val_offs each kiq read reg

2020-04-22 Thread Yintian Tao
According to the current kiq read register method,
there will be race condition when using KIQ to read
register if multiple clients want to read at same time
just like the expample below:
1. client-A start to read REG-0 throguh KIQ
2. client-A poll the seqno-0
3. client-B start to read REG-1 through KIQ
4. client-B poll the seqno-1
5. the kiq complete these two read operation
6. client-A to read the register at the wb buffer and
   get REG-1 value

Therefore, use amdgpu_device_wb_get() to request reg_val_offs
for each kiq read register.

Signed-off-by: Yintian Tao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c  | 19 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 +++--
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   |  7 +++---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c|  7 +++---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 27 
 7 files changed, 41 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4e1d4cfe7a9f..7ee5a4da398a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -526,7 +526,7 @@ static inline void amdgpu_set_ib_value(struct 
amdgpu_cs_parser *p,
 /*
  * Writeback
  */
-#define AMDGPU_MAX_WB 128  /* Reserve at most 128 WB slots for 
amdgpu-owned rings. */
+#define AMDGPU_MAX_WB 256  /* Reserve at most 256 WB slots for 
amdgpu-owned rings. */
 
 struct amdgpu_wb {
struct amdgpu_bo*wb_obj;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index ea576b4260a4..d5a59d7c48d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -304,10 +304,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
 
spin_lock_init(&kiq->ring_lock);
 
-   r = amdgpu_device_wb_get(adev, &kiq->reg_val_offs);
-   if (r)
-   return r;
-
ring->adev = NULL;
ring->ring_obj = NULL;
ring->use_doorbell = true;
@@ -331,7 +327,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
 
 void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
 {
-   amdgpu_device_wb_free(ring->adev, ring->adev->gfx.kiq.reg_val_offs);
amdgpu_ring_fini(ring);
 }
 
@@ -672,15 +667,20 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, 
uint32_t reg)
 {
signed long r, cnt = 0;
unsigned long flags;
-   uint32_t seq;
+   uint32_t seq, reg_val_offs = 0, value = 0;
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
struct amdgpu_ring *ring = &kiq->ring;
 
BUG_ON(!ring->funcs->emit_rreg);
 
spin_lock_irqsave(&kiq->ring_lock, flags);
+   if (amdgpu_device_wb_get(adev, ®_val_offs)) {
+   spin_unlock_irqrestore(&kiq->ring_lock, flags);
+   pr_err("critical bug! too more kiq readers\n");
+   goto failed_kiq_read;
+   }
amdgpu_ring_alloc(ring, 32);
-   amdgpu_ring_emit_rreg(ring, reg);
+   amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
amdgpu_fence_emit_polling(ring, &seq);
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
@@ -707,7 +707,10 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, 
uint32_t reg)
if (cnt > MAX_KIQ_REG_TRY)
goto failed_kiq_read;
 
-   return adev->wb.wb[kiq->reg_val_offs];
+   mb();
+   value = adev->wb.wb[reg_val_offs];
+   amdgpu_device_wb_free(adev, reg_val_offs);
+   return value;
 
 failed_kiq_read:
pr_err("failed to read reg:%x\n", reg);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 634746829024..ee698f0246d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -103,7 +103,6 @@ struct amdgpu_kiq {
struct amdgpu_ring  ring;
struct amdgpu_irq_src   irq;
const struct kiq_pm4_funcs *pmf;
-   uint32_treg_val_offs;
 };
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index f61664ee4940..137d3d2b46e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -181,7 +181,8 @@ struct amdgpu_ring_funcs {
void (*end_use)(struct amdgpu_ring *ring);
void (*emit_switch_buffer) (struct amdgpu_ring *ring);
void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
-   void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg);
+   void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t reg_val_offs);
void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg,

Re: [PATCH] drm/amdgpu: refine kiq access register

2020-04-22 Thread Christian König

Am 22.04.20 um 14:20 schrieb Tao, Yintian:

Hi  Christian


Please see inline commetns.
-Original Message-
From: Koenig, Christian 
Sent: 2020年4月22日 19:57
To: Tao, Yintian ; Liu, Monk ; Kuehling, Felix 

Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: refine kiq access register

Am 22.04.20 um 13:49 schrieb Tao, Yintian:

Hi  Christian


Can you help answer the questions below? Thanks in advance.
-Original Message-
From: Koenig, Christian 
Sent: 2020年4月22日 19:03
To: Tao, Yintian ; Liu, Monk ;
Kuehling, Felix 
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: refine kiq access register

Am 22.04.20 um 11:29 schrieb Yintian Tao:

According to the current kiq access register method, there will be
race condition when using KIQ to read register if multiple clients
want to read at same time just like the expample below:
1. client-A start to read REG-0 throguh KIQ 2. client-A poll the
seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll
the seqno-1 5. the kiq complete these two read operation 6. client-A
to read the register at the wb buffer and
  get REG-1 value

And if there are multiple clients to frequently write registers
through KIQ which may raise the KIQ ring buffer overwritten problem.

Therefore, allocate fixed number wb slot for rreg use and limit the
submit number which depends on the kiq ring_size in order to prevent
the overwritten problem.

v2: directly use amdgpu_device_wb_get() for each read instead
   of to reserve fixde number slot.
   if there is no enough kiq ring buffer or rreg slot then
   directly print error log and return instead of busy waiting

I would split that into three patches. One for each problem we have here:

1. Fix kgd_hiq_mqd_load() and maybe other occasions to use spin_lock_irqsave().
[yttao]: Do you mean that we need to use spin_lock_irqsave for the functions 
just like kgd_hiq_mqd_load()?

Yes, I strongly think so.

See when you have one spin lock you either need always need to lock it with 
irqs disabled or never.

In other words we always need to either use spin_lock() or spin_lock_irqsave(), 
but never mix them with the same lock.

The only exception to this rule is when you take multiple locks, e.g.
you can do:

spin_lock_irqsave(&a, flags);
spin_lock(&b, flags);
spin_lock(&c, flags);

spin_unlock_irqsave(&a, flags);

Here you don't need to use spin_lock_irqsave for b and c. But we rarely have 
that case in the code.
[yttao]: thanks , I got it. I will submit another patch for it.


2. Prevent the overrung of the KIQ. Please drop the approach with the
atomic here. Instead just add a amdgpu_fence_wait_polling() into
amdgpu_fence_emit_polling() as I discussed with Monk.
[yttao]: Sorry, I can't get your original idea for the 
amdgpu_fence_wait_polling(). Can you give more details about it? Thanks in 
advance.

"That is actually only a problem because the KIQ uses polling waits.

See amdgpu_fence_emit() waits for the oldest possible fence to be signaled 
before emitting a new one.

I suggest that we do the same in amdgpu_fence_emit_polling(). A one liner like 
the following should be enough:

amdgpu_fence_wait_polling(ring, seq - ring->fence_drv.num_fences_mask, 
timeout);"
[yttao]: there is no usage of num_fences_mask at kiq fence polling, the 
num_fences_mask is only effective at dma_fence architecture.
If I understand correctly, do you want the protype code below? 
If the protype code is wrong, can you help give one sample? Thanks in advance.

int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s) {
  uint32_t seq;

  if (!s)
  return -EINVAL;
+   amdgpu_fence_wait_polling(ring, seq, timeout);
  seq = ++ring->fence_drv.sync_seq;

Your understanding sounds more or less correct. The code should look something 
like this:

seq = ++ring->fence_drv.sync_seq;
amdgpu_fence_wait_polling(ring, seq -
number_of_allowed_submissions_to_the_kiq, timeout);
[yttao]: whether we need directly wait at the first just like below? Otherwise, 
amdgpu_ring_emit_wreg may overwrite the KIQ ring buffer.


There should always be room for at least one more submission.

As long as we always submit a fence checking the free room there should 
be fine.


Regards,
Christian.


+   amdgpu_fence_wait_polling(ring, seq - 
number_of_allowed_submissions_to_the_kiq, timeout);
spin_lock_irqsave(&kiq->ring_lock, flags);
 amdgpu_ring_alloc(ring, 32);
 amdgpu_ring_emit_wreg(ring, reg, v);
 amdgpu_fence_emit_polling(ring, &seq); /* wait */
 amdgpu_ring_commit(ring);
 spin_unlock_irqrestore(&kiq->ring_lock, flags);

I just used num_fences_mask as number_of_allowed_submissions_to_the_kiq
because it is probably a good value to start with.

But you could give that as parameter as well if you think that makes more sense.


  amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_ad

RE: [PATCH] drm/amdgpu: refine kiq access register

2020-04-22 Thread Tao, Yintian
Hi  Christian


Please see inline commetns.
-Original Message-
From: Koenig, Christian  
Sent: 2020年4月22日 19:57
To: Tao, Yintian ; Liu, Monk ; Kuehling, 
Felix 
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: refine kiq access register

Am 22.04.20 um 13:49 schrieb Tao, Yintian:
> Hi  Christian
>
>
> Can you help answer the questions below? Thanks in advance.
> -Original Message-
> From: Koenig, Christian 
> Sent: 2020年4月22日 19:03
> To: Tao, Yintian ; Liu, Monk ; 
> Kuehling, Felix 
> Cc: amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdgpu: refine kiq access register
>
> Am 22.04.20 um 11:29 schrieb Yintian Tao:
>> According to the current kiq access register method, there will be 
>> race condition when using KIQ to read register if multiple clients 
>> want to read at same time just like the expample below:
>> 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the
>> seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll 
>> the seqno-1 5. the kiq complete these two read operation 6. client-A 
>> to read the register at the wb buffer and
>>  get REG-1 value
>>
>> And if there are multiple clients to frequently write registers 
>> through KIQ which may raise the KIQ ring buffer overwritten problem.
>>
>> Therefore, allocate fixed number wb slot for rreg use and limit the 
>> submit number which depends on the kiq ring_size in order to prevent 
>> the overwritten problem.
>>
>> v2: directly use amdgpu_device_wb_get() for each read instead
>>   of to reserve fixde number slot.
>>   if there is no enough kiq ring buffer or rreg slot then
>>   directly print error log and return instead of busy waiting
> I would split that into three patches. One for each problem we have here:
>
> 1. Fix kgd_hiq_mqd_load() and maybe other occasions to use 
> spin_lock_irqsave().
> [yttao]: Do you mean that we need to use spin_lock_irqsave for the functions 
> just like kgd_hiq_mqd_load()?

Yes, I strongly think so.

See when you have one spin lock you either need always need to lock it with 
irqs disabled or never.

In other words we always need to either use spin_lock() or spin_lock_irqsave(), 
but never mix them with the same lock.

The only exception to this rule is when you take multiple locks, e.g. 
you can do:

spin_lock_irqsave(&a, flags);
spin_lock(&b, flags);
spin_lock(&c, flags);

spin_unlock_irqsave(&a, flags);

Here you don't need to use spin_lock_irqsave for b and c. But we rarely have 
that case in the code.
[yttao]: thanks , I got it. I will submit another patch for it.

> 2. Prevent the overrung of the KIQ. Please drop the approach with the 
> atomic here. Instead just add a amdgpu_fence_wait_polling() into
> amdgpu_fence_emit_polling() as I discussed with Monk.
> [yttao]: Sorry, I can't get your original idea for the 
> amdgpu_fence_wait_polling(). Can you give more details about it? Thanks in 
> advance.
>
> "That is actually only a problem because the KIQ uses polling waits.
>
> See amdgpu_fence_emit() waits for the oldest possible fence to be signaled 
> before emitting a new one.
>
> I suggest that we do the same in amdgpu_fence_emit_polling(). A one liner 
> like the following should be enough:
>
> amdgpu_fence_wait_polling(ring, seq - ring->fence_drv.num_fences_mask, 
> timeout);"
> [yttao]: there is no usage of num_fences_mask at kiq fence polling, the 
> num_fences_mask is only effective at dma_fence architecture.
>   If I understand correctly, do you want the protype code below? 
> If the protype code is wrong, can you help give one sample? Thanks in advance.
>
> int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s) {
>  uint32_t seq;
>
>  if (!s)
>  return -EINVAL;
> + amdgpu_fence_wait_polling(ring, seq, timeout);
>  seq = ++ring->fence_drv.sync_seq;

Your understanding sounds more or less correct. The code should look something 
like this:

seq = ++ring->fence_drv.sync_seq;
amdgpu_fence_wait_polling(ring, seq -
number_of_allowed_submissions_to_the_kiq, timeout);
[yttao]: whether we need directly wait at the first just like below? Otherwise, 
amdgpu_ring_emit_wreg may overwrite the KIQ ring buffer.
+   amdgpu_fence_wait_polling(ring, seq - 
number_of_allowed_submissions_to_the_kiq, timeout);
spin_lock_irqsave(&kiq->ring_lock, flags);
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_wreg(ring, reg, v);
amdgpu_fence_emit_polling(ring, &seq); /* wait */
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);

I just used num_fences_mask as number_of_allowed_submissions_to_the_kiq
because it is probably a good value to start with.

But you could give that as parameter as well if you think that makes more sense.

>  amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
>  ¦  seq, 0);
>
>  *s = seq;
>
> 

Re: [PATCH] drm/amdgpu: refine kiq access register

2020-04-22 Thread Christian König

Am 22.04.20 um 13:49 schrieb Tao, Yintian:

Hi  Christian


Can you help answer the questions below? Thanks in advance.
-Original Message-
From: Koenig, Christian 
Sent: 2020年4月22日 19:03
To: Tao, Yintian ; Liu, Monk ; Kuehling, Felix 

Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: refine kiq access register

Am 22.04.20 um 11:29 schrieb Yintian Tao:

According to the current kiq access register method, there will be
race condition when using KIQ to read register if multiple clients
want to read at same time just like the expample below:
1. client-A start to read REG-0 throguh KIQ 2. client-A poll the
seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll
the seqno-1 5. the kiq complete these two read operation 6. client-A
to read the register at the wb buffer and
 get REG-1 value

And if there are multiple clients to frequently write registers
through KIQ which may raise the KIQ ring buffer overwritten problem.

Therefore, allocate fixed number wb slot for rreg use and limit the
submit number which depends on the kiq ring_size in order to prevent
the overwritten problem.

v2: directly use amdgpu_device_wb_get() for each read instead
  of to reserve fixde number slot.
  if there is no enough kiq ring buffer or rreg slot then
  directly print error log and return instead of busy waiting

I would split that into three patches. One for each problem we have here:

1. Fix kgd_hiq_mqd_load() and maybe other occasions to use spin_lock_irqsave().
[yttao]: Do you mean that we need to use spin_lock_irqsave for the functions 
just like kgd_hiq_mqd_load()?


Yes, I strongly think so.

See when you have one spin lock you either need always need to lock it 
with irqs disabled or never.


In other words we always need to either use spin_lock() or 
spin_lock_irqsave(), but never mix them with the same lock.


The only exception to this rule is when you take multiple locks, e.g. 
you can do:


spin_lock_irqsave(&a, flags);
spin_lock(&b, flags);
spin_lock(&c, flags);

spin_unlock_irqsave(&a, flags);

Here you don't need to use spin_lock_irqsave for b and c. But we rarely 
have that case in the code.



2. Prevent the overrung of the KIQ. Please drop the approach with the atomic 
here. Instead just add a amdgpu_fence_wait_polling() into
amdgpu_fence_emit_polling() as I discussed with Monk.
[yttao]: Sorry, I can't get your original idea for the 
amdgpu_fence_wait_polling(). Can you give more details about it? Thanks in 
advance.

"That is actually only a problem because the KIQ uses polling waits.

See amdgpu_fence_emit() waits for the oldest possible fence to be signaled 
before emitting a new one.

I suggest that we do the same in amdgpu_fence_emit_polling(). A one liner like 
the following should be enough:

amdgpu_fence_wait_polling(ring, seq - ring->fence_drv.num_fences_mask, 
timeout);"
[yttao]: there is no usage of num_fences_mask at kiq fence polling, the 
num_fences_mask is only effective at dma_fence architecture.
If I understand correctly, do you want the protype code below? 
If the protype code is wrong, can you help give one sample? Thanks in advance.

int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s)
{
 uint32_t seq;

 if (!s)
 return -EINVAL;
+   amdgpu_fence_wait_polling(ring, seq, timeout);
 seq = ++ring->fence_drv.sync_seq;


Your understanding sounds more or less correct. The code should look 
something like this:


seq = ++ring->fence_drv.sync_seq;
amdgpu_fence_wait_polling(ring, seq - 
number_of_allowed_submissions_to_the_kiq, timeout);


I just used num_fences_mask as number_of_allowed_submissions_to_the_kiq 
because it is probably a good value to start with.


But you could give that as parameter as well if you think that makes 
more sense.



 amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
 ¦  seq, 0);

 *s = seq;

 return 0;
}




3. Use amdgpu_device_wb_get() each time we need to submit a read.
[yttao]: yes, I will do it.


Thanks,
Christian.



Regards,
Christian.


Signed-off-by: Yintian Tao 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  8 +-
   .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 13 ++-
   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 13 ++-
   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   | 83 +++
   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h   |  3 +-
   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h  |  5 +-
   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  | 13 ++-
   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c|  8 +-
   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c |  8 +-
   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 35 +---
   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c| 13 ++-
   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 13 ++-
   12 files changed, 167 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/am

RE: [PATCH] drm/amdgpu: refine kiq access register

2020-04-22 Thread Tao, Yintian
Hi  Christian


Can you help answer the questions below? Thanks in advance.
-Original Message-
From: Koenig, Christian  
Sent: 2020年4月22日 19:03
To: Tao, Yintian ; Liu, Monk ; Kuehling, 
Felix 
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: refine kiq access register

Am 22.04.20 um 11:29 schrieb Yintian Tao:
> According to the current kiq access register method, there will be 
> race condition when using KIQ to read register if multiple clients 
> want to read at same time just like the expample below:
> 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the 
> seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll 
> the seqno-1 5. the kiq complete these two read operation 6. client-A 
> to read the register at the wb buffer and
> get REG-1 value
>
> And if there are multiple clients to frequently write registers 
> through KIQ which may raise the KIQ ring buffer overwritten problem.
>
> Therefore, allocate fixed number wb slot for rreg use and limit the 
> submit number which depends on the kiq ring_size in order to prevent 
> the overwritten problem.
>
> v2: directly use amdgpu_device_wb_get() for each read instead
>  of to reserve fixde number slot.
>  if there is no enough kiq ring buffer or rreg slot then
>  directly print error log and return instead of busy waiting

I would split that into three patches. One for each problem we have here:

1. Fix kgd_hiq_mqd_load() and maybe other occasions to use spin_lock_irqsave().
[yttao]: Do you mean that we need to use spin_lock_irqsave for the functions 
just like kgd_hiq_mqd_load()?

2. Prevent the overrung of the KIQ. Please drop the approach with the atomic 
here. Instead just add a amdgpu_fence_wait_polling() into
amdgpu_fence_emit_polling() as I discussed with Monk.
[yttao]: Sorry, I can't get your original idea for the 
amdgpu_fence_wait_polling(). Can you give more details about it? Thanks in 
advance.

"That is actually only a problem because the KIQ uses polling waits.

See amdgpu_fence_emit() waits for the oldest possible fence to be signaled 
before emitting a new one.

I suggest that we do the same in amdgpu_fence_emit_polling(). A one liner like 
the following should be enough:

amdgpu_fence_wait_polling(ring, seq - ring->fence_drv.num_fences_mask, 
timeout);"
[yttao]: there is no usage of num_fences_mask at kiq fence polling, the 
num_fences_mask is only effective at dma_fence architecture.
If I understand correctly, do you want the protype code below? 
If the protype code is wrong, can you help give one sample? Thanks in advance.

int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s) 
{
uint32_t seq;

if (!s)
return -EINVAL;
+   amdgpu_fence_wait_polling(ring, seq, timeout); 
seq = ++ring->fence_drv.sync_seq;
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
¦  seq, 0); 

*s = seq;

return 0;
}




3. Use amdgpu_device_wb_get() each time we need to submit a read.
[yttao]: yes, I will do it.

Regards,
Christian.

>
> Signed-off-by: Yintian Tao 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  8 +-
>   .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 13 ++-
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 13 ++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   | 83 +++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h   |  3 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h  |  5 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  | 13 ++-
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c|  8 +-
>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c |  8 +-
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 35 +---
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c| 13 ++-
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 13 ++-
>   12 files changed, 167 insertions(+), 48 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 4e1d4cfe7a9f..1157c1a0b888 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -526,7 +526,7 @@ static inline void amdgpu_set_ib_value(struct 
> amdgpu_cs_parser *p,
>   /*
>* Writeback
>*/
> -#define AMDGPU_MAX_WB 128/* Reserve at most 128 WB slots for 
> amdgpu-owned rings. */
> +#define AMDGPU_MAX_WB 256/* Reserve at most 256 WB slots for 
> amdgpu-owned rings. */
>   
>   struct amdgpu_wb {
>   struct amdgpu_bo*wb_obj;
> @@ -1028,6 +1028,12 @@ bool amdgpu_device_has_dc_support(struct 
> amdgpu_device *adev);
>   
>   int emu_soc_asic_init(struct amdgpu_device *adev);
>   
> +int amdgpu_gfx_kiq_lock(struct amdgpu_kiq *kiq, bool read,
> + unsigned long *flags);
> +void amdgpu_gfx_kiq_unlock(struct amdgpu_kiq *kiq, unsigned long 
> +*flags);
> +
> +void amdgpu_gfx_kiq_consume(struct amdgpu_kiq *kiq, uint32_t *offs); 
> +void amdgpu_g

Re: [PATCH] drm/amdgpu: refine kiq access register

2020-04-22 Thread Christian König

Am 22.04.20 um 11:29 schrieb Yintian Tao:

According to the current kiq access register method,
there will be race condition when using KIQ to read
register if multiple clients want to read at same time
just like the expample below:
1. client-A start to read REG-0 throguh KIQ
2. client-A poll the seqno-0
3. client-B start to read REG-1 through KIQ
4. client-B poll the seqno-1
5. the kiq complete these two read operation
6. client-A to read the register at the wb buffer and
get REG-1 value

And if there are multiple clients to frequently write
registers through KIQ which may raise the KIQ ring buffer
overwritten problem.

Therefore, allocate fixed number wb slot for rreg use
and limit the submit number which depends on the kiq
ring_size in order to prevent the overwritten problem.

v2: directly use amdgpu_device_wb_get() for each read instead
 of to reserve fixde number slot.
 if there is no enough kiq ring buffer or rreg slot then
 directly print error log and return instead of busy waiting


I would split that into three patches. One for each problem we have here:

1. Fix kgd_hiq_mqd_load() and maybe other occasions to use 
spin_lock_irqsave().


2. Prevent the overrung of the KIQ. Please drop the approach with the 
atomic here. Instead just add a amdgpu_fence_wait_polling() into 
amdgpu_fence_emit_polling() as I discussed with Monk.


3. Use amdgpu_device_wb_get() each time we need to submit a read.

Regards,
Christian.



Signed-off-by: Yintian Tao 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  8 +-
  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 13 ++-
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 13 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   | 83 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h   |  3 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h  |  5 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  | 13 ++-
  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c|  8 +-
  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c |  8 +-
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 35 +---
  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c| 13 ++-
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 13 ++-
  12 files changed, 167 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4e1d4cfe7a9f..1157c1a0b888 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -526,7 +526,7 @@ static inline void amdgpu_set_ib_value(struct 
amdgpu_cs_parser *p,
  /*
   * Writeback
   */
-#define AMDGPU_MAX_WB 128  /* Reserve at most 128 WB slots for 
amdgpu-owned rings. */
+#define AMDGPU_MAX_WB 256  /* Reserve at most 256 WB slots for 
amdgpu-owned rings. */
  
  struct amdgpu_wb {

struct amdgpu_bo*wb_obj;
@@ -1028,6 +1028,12 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device 
*adev);
  
  int emu_soc_asic_init(struct amdgpu_device *adev);
  
+int amdgpu_gfx_kiq_lock(struct amdgpu_kiq *kiq, bool read,

+   unsigned long *flags);
+void amdgpu_gfx_kiq_unlock(struct amdgpu_kiq *kiq, unsigned long *flags);
+
+void amdgpu_gfx_kiq_consume(struct amdgpu_kiq *kiq, uint32_t *offs);
+void amdgpu_gfx_kiq_restore(struct amdgpu_kiq *kiq, uint32_t *offs);
  /*
   * Registers read & write functions.
   */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 691c89705bcd..a65d6a1abc04 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -309,9 +309,11 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
uint32_t doorbell_off)
  {
struct amdgpu_device *adev = get_amdgpu_device(kgd);
+   struct amdgpu_kiq *kiq = &adev->gfx.kiq;
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
struct v10_compute_mqd *m;
uint32_t mec, pipe;
+   unsigned long flags = 0;
int r;
  
  	m = get_mqd(mqd);

@@ -324,13 +326,19 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void 
*mqd,
pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
 mec, pipe, queue_id);
  
-	spin_lock(&adev->gfx.kiq.ring_lock);

+   r = amdgpu_gfx_kiq_lock(kiq, false, &flags);
+   if (r) {
+   pr_err("failed to lock kiq\n");
+   goto out_unlock;
+   }
+
r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r);
goto out_unlock;
}
  
+	amdgpu_gfx_kiq_consume(kiq, NULL);

amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
amdgpu_ring_write(kiq_ring,
  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
@@ -350,8 +358,9 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
am

[PATCH] drm/amdgpu: refine kiq access register

2020-04-22 Thread Yintian Tao
According to the current kiq access register method,
there will be race condition when using KIQ to read
register if multiple clients want to read at same time
just like the expample below:
1. client-A start to read REG-0 throguh KIQ
2. client-A poll the seqno-0
3. client-B start to read REG-1 through KIQ
4. client-B poll the seqno-1
5. the kiq complete these two read operation
6. client-A to read the register at the wb buffer and
   get REG-1 value

And if there are multiple clients to frequently write
registers through KIQ which may raise the KIQ ring buffer
overwritten problem.

Therefore, allocate fixed number wb slot for rreg use
and limit the submit number which depends on the kiq
ring_size in order to prevent the overwritten problem.

v2: directly use amdgpu_device_wb_get() for each read instead
of to reserve fixde number slot.
if there is no enough kiq ring buffer or rreg slot then
directly print error log and return instead of busy waiting

Signed-off-by: Yintian Tao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  8 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 13 ++-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 13 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   | 83 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h   |  3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h  |  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  | 13 ++-
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c|  8 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c |  8 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 35 +---
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c| 13 ++-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 13 ++-
 12 files changed, 167 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4e1d4cfe7a9f..1157c1a0b888 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -526,7 +526,7 @@ static inline void amdgpu_set_ib_value(struct 
amdgpu_cs_parser *p,
 /*
  * Writeback
  */
-#define AMDGPU_MAX_WB 128  /* Reserve at most 128 WB slots for 
amdgpu-owned rings. */
+#define AMDGPU_MAX_WB 256  /* Reserve at most 256 WB slots for 
amdgpu-owned rings. */
 
 struct amdgpu_wb {
struct amdgpu_bo*wb_obj;
@@ -1028,6 +1028,12 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device 
*adev);
 
 int emu_soc_asic_init(struct amdgpu_device *adev);
 
+int amdgpu_gfx_kiq_lock(struct amdgpu_kiq *kiq, bool read,
+   unsigned long *flags);
+void amdgpu_gfx_kiq_unlock(struct amdgpu_kiq *kiq, unsigned long *flags);
+
+void amdgpu_gfx_kiq_consume(struct amdgpu_kiq *kiq, uint32_t *offs);
+void amdgpu_gfx_kiq_restore(struct amdgpu_kiq *kiq, uint32_t *offs);
 /*
  * Registers read & write functions.
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 691c89705bcd..a65d6a1abc04 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -309,9 +309,11 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
uint32_t doorbell_off)
 {
struct amdgpu_device *adev = get_amdgpu_device(kgd);
+   struct amdgpu_kiq *kiq = &adev->gfx.kiq;
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
struct v10_compute_mqd *m;
uint32_t mec, pipe;
+   unsigned long flags = 0;
int r;
 
m = get_mqd(mqd);
@@ -324,13 +326,19 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void 
*mqd,
pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
 mec, pipe, queue_id);
 
-   spin_lock(&adev->gfx.kiq.ring_lock);
+   r = amdgpu_gfx_kiq_lock(kiq, false, &flags);
+   if (r) {
+   pr_err("failed to lock kiq\n");
+   goto out_unlock;
+   }
+
r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r);
goto out_unlock;
}
 
+   amdgpu_gfx_kiq_consume(kiq, NULL);
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
amdgpu_ring_write(kiq_ring,
  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
@@ -350,8 +358,9 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
amdgpu_ring_commit(kiq_ring);
 
+   amdgpu_gfx_kiq_restore(kiq, NULL);
 out_unlock:
-   spin_unlock(&adev->gfx.kiq.ring_lock);
+   amdgpu_gfx_kiq_unlock(&adev->gfx.kiq, &flags);
release_queue(kgd);
 
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index df841c2ac5e7..4435bd716edd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@

RE: [PATCH] drm/amdgpu: refine kiq access register

2020-04-22 Thread Tao, Yintian
Hi  Christian

Please see inline comments.

-Original Message-
From: Koenig, Christian  
Sent: 2020年4月22日 16:23
To: Tao, Yintian ; Liu, Monk ; Liu, 
Shaoyun ; Kuehling, Felix 
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: refine kiq access register

Am 22.04.20 um 10:06 schrieb Tao, Yintian:
> Hi  Christian
>
> Please see inline comments
>
> -Original Message-
> From: Koenig, Christian 
> Sent: 2020年4月22日 15:54
> To: Tao, Yintian ; Liu, Monk ; 
> Liu, Shaoyun ; Kuehling, Felix 
> 
> Cc: amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdgpu: refine kiq access register
>
> Am 22.04.20 um 09:49 schrieb Tao, Yintian:
>> Hi Christian
>>
>>
>> Please see inline comments.
>> -Original Message-
>> From: Christian König 
>> Sent: 2020年4月22日 15:40
>> To: Tao, Yintian ; Koenig, Christian 
>> ; Liu, Monk ; Liu, 
>> Shaoyun ; Kuehling, Felix 
>> 
>> Cc: amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH] drm/amdgpu: refine kiq access register
>>
>> Am 22.04.20 um 09:35 schrieb Tao, Yintian:
>>> Hi  Christian
>>>
>>>
 BUG_ON(in_interrupt());
>>> That won't work like this. The KIQ is also used in interrupt context in the 
>>> driver, that's why we used spin_lock_irqsave().
>>> [yttao]: According to the current drm-next code, I have not find where to 
>>> access register through KIQ.
>>> And you need to wait for the free kiq ring buffer space if 
>>> there is no free kiq ring buffer, here, you wait at interrupt context is 
>>> illegal.
>> Waiting in atomic context is illegal as well, but we don't have much other 
>> choice.
>> [yttao]: no, there is no sleep in atomic context at my patch.
> I'm not talking about a sleeping, but busy waiting.
>
>> We just need to make sure that waiting never happens by making the buffers 
>> large enough and if it still happens print and error.
>> [yttao]: this is not the good choice because KMD need to protect it instead 
>> of hoping user not frequently invoke KIQ acess.
> The only other choice we have is busy waiting, e.g. loop until we get a free 
> slot.
> [yttao]: Yes, now may patch use msleep() to busy waiting. Or you means need 
> to use udelay()? If we use udelay(), it will be the nightmare under multi-VF.
>   Because it is assumed that there are 16VF within world-switch 
> 6ms, the bad situation is that one VF will udelay(16*6ms = 96ms) to get one 
> free slot.

You can't use msleep() here since sleeping in atomic or interrupt context is 
forbidden.

The trick is that in atomic context the CPU can't switch to a different 
process, so we have a very limited number of concurrent KIQ reads which can 
happen.

With a MAX_WB of 256 we can easily have 128 CPUs and don't run into problems.
[yttao]: fine, this is a good idea. But it seems current drm-next code, KIQ 
access still use msleep to wait the fence which is not correct according to 
your comments.
I think we need submit another patch to add one more condition 
"in_atomic()" to prevent it but this function cannot know about held spinlocks 
in non-preemptible kernels.

Regards,
Christian.

>
>
> Regards,
> Christian.
>
>>> And I would either say that we should use the trick with the NOP to reserve 
>>> space on the ring buffer or call amdgpu_device_wb_get() for each read. 
>>> amdgpu_device_wb_get() also uses find_first_zero_bit() and should work 
>>> equally well.
>>> [yttao]: sorry, can you give me more details about how to use NOP to 
>>> reserve space? I will use amdgpu_device_wb_get() for the read operation.
>> We could use the NOP PM4 command as Felix suggested, this command has 
>> a
>> header+length and says that the next X dw should be ignore on the 
>> header+ring
>> buffer.
>>
>> But I think using amdgpu_device_wb_get() is better anyway.
>> [yttao]: yes, I agreed with amdgpu_device_wb_get() method because it 
>> will fix prevent potential read race condition but NOP method will 
>> not prevent it
>>
>> Regards,
>> Christian.
>>
>>>
>>> -Original Message-
>>> From: Koenig, Christian 
>>> Sent: 2020年4月22日 15:23
>>> To: Tao, Yintian ; Liu, Monk 
>>> ; Liu, Shaoyun ; Kuehling, 
>>> Felix 
>>> Cc: amd-gfx@lists.freedesktop.org
>>> Subject: Re: [PATCH] drm/amdgpu: refine kiq access register
>>>
 BUG_ON(in_interrupt());
>>> That won't work like this. The KIQ is also used in interrupt context in the 
>>> driver, that's why we used spin_lock_irqsave().
>>>
>>> And I would either say that we should use the trick with the NOP to reserve 
>>> space on the ring buffer or call amdgpu_device_wb_get() for each read. 
>>> amdgpu_device_wb_get() also uses find_first_zero_bit() and should work 
>>> equally well.
>>>
>>> You also don't need to worry to much about overflowing the wb area.
>>> Since we run in an atomic context we can have at most the number of CPU in 
>>> the system + interrupt context here.
>>>
>>> Regards,
>>> Christian.
>>>
>>> Am 22.04.20 um 09:11 schrieb Tao, Yintian:
 Add Felix and Shaoyun

 -

Re: [PATCH] drm/amdgpu: refine kiq access register

2020-04-22 Thread Christian König

Am 22.04.20 um 10:06 schrieb Tao, Yintian:

Hi  Christian

Please see inline comments

-Original Message-
From: Koenig, Christian 
Sent: 2020年4月22日 15:54
To: Tao, Yintian ; Liu, Monk ; Liu, Shaoyun 
; Kuehling, Felix 
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: refine kiq access register

Am 22.04.20 um 09:49 schrieb Tao, Yintian:

Hi Christian


Please see inline comments.
-Original Message-
From: Christian König 
Sent: 2020年4月22日 15:40
To: Tao, Yintian ; Koenig, Christian
; Liu, Monk ; Liu, Shaoyun
; Kuehling, Felix 
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: refine kiq access register

Am 22.04.20 um 09:35 schrieb Tao, Yintian:

Hi  Christian



BUG_ON(in_interrupt());

That won't work like this. The KIQ is also used in interrupt context in the 
driver, that's why we used spin_lock_irqsave().
[yttao]: According to the current drm-next code, I have not find where to 
access register through KIQ.
And you need to wait for the free kiq ring buffer space if 
there is no free kiq ring buffer, here, you wait at interrupt context is 
illegal.

Waiting in atomic context is illegal as well, but we don't have much other 
choice.
[yttao]: no, there is no sleep in atomic context at my patch.

I'm not talking about a sleeping, but busy waiting.


We just need to make sure that waiting never happens by making the buffers 
large enough and if it still happens print and error.
[yttao]: this is not the good choice because KMD need to protect it instead of 
hoping user not frequently invoke KIQ acess.

The only other choice we have is busy waiting, e.g. loop until we get a free 
slot.
[yttao]: Yes, now may patch use msleep() to busy waiting. Or you means need to 
use udelay()? If we use udelay(), it will be the nightmare under multi-VF.
Because it is assumed that there are 16VF within world-switch 
6ms, the bad situation is that one VF will udelay(16*6ms = 96ms) to get one 
free slot.


You can't use msleep() here since sleeping in atomic or interrupt 
context is forbidden.


The trick is that in atomic context the CPU can't switch to a different 
process, so we have a very limited number of concurrent KIQ reads which 
can happen.


With a MAX_WB of 256 we can easily have 128 CPUs and don't run into 
problems.


Regards,
Christian.




Regards,
Christian.


And I would either say that we should use the trick with the NOP to reserve 
space on the ring buffer or call amdgpu_device_wb_get() for each read. 
amdgpu_device_wb_get() also uses find_first_zero_bit() and should work equally 
well.
[yttao]: sorry, can you give me more details about how to use NOP to reserve 
space? I will use amdgpu_device_wb_get() for the read operation.

We could use the NOP PM4 command as Felix suggested, this command has
a
header+length and says that the next X dw should be ignore on the ring
buffer.

But I think using amdgpu_device_wb_get() is better anyway.
[yttao]: yes, I agreed with amdgpu_device_wb_get() method because it
will fix prevent potential read race condition but NOP method will not
prevent it

Regards,
Christian.



-Original Message-
From: Koenig, Christian 
Sent: 2020年4月22日 15:23
To: Tao, Yintian ; Liu, Monk ;
Liu, Shaoyun ; Kuehling, Felix

Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: refine kiq access register


BUG_ON(in_interrupt());

That won't work like this. The KIQ is also used in interrupt context in the 
driver, that's why we used spin_lock_irqsave().

And I would either say that we should use the trick with the NOP to reserve 
space on the ring buffer or call amdgpu_device_wb_get() for each read. 
amdgpu_device_wb_get() also uses find_first_zero_bit() and should work equally 
well.

You also don't need to worry to much about overflowing the wb area.
Since we run in an atomic context we can have at most the number of CPU in the 
system + interrupt context here.

Regards,
Christian.

Am 22.04.20 um 09:11 schrieb Tao, Yintian:

Add Felix and Shaoyun

-Original Message-
From: Yintian Tao 
Sent: 2020年4月22日 12:42
To: Koenig, Christian ; Liu, Monk

Cc: amd-gfx@lists.freedesktop.org; Tao, Yintian

Subject: [PATCH] drm/amdgpu: refine kiq access register

According to the current kiq access register method, there will be race 
condition when using KIQ to read register if multiple clients want to read at 
same time just like the expample below:
1. client-A start to read REG-0 throguh KIQ 2. client-A poll the seqno-0 3. 
client-B start to read REG-1 through KIQ 4. client-B poll the seqno-1 5. the 
kiq complete these two read operation 6. client-A to read the register at the 
wb buffer and
   get REG-1 value

And if there are multiple clients to frequently write registers through KIQ 
which may raise the KIQ ring buffer overwritten problem.

Therefore, allocate fixed number wb slot for rreg use and limit the submit 
number which depends on the kiq ring_size in order to prevent the overwritten 
p

RE: [PATCH] drm/amdgpu: refine kiq access register

2020-04-22 Thread Tao, Yintian
Hi  Christian

Please see inline comments

-Original Message-
From: Koenig, Christian  
Sent: 2020年4月22日 15:54
To: Tao, Yintian ; Liu, Monk ; Liu, 
Shaoyun ; Kuehling, Felix 
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: refine kiq access register

Am 22.04.20 um 09:49 schrieb Tao, Yintian:
> Hi Christian
>
>
> Please see inline comments.
> -Original Message-
> From: Christian König 
> Sent: 2020年4月22日 15:40
> To: Tao, Yintian ; Koenig, Christian 
> ; Liu, Monk ; Liu, Shaoyun 
> ; Kuehling, Felix 
> Cc: amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdgpu: refine kiq access register
>
> Am 22.04.20 um 09:35 schrieb Tao, Yintian:
>> Hi  Christian
>>
>>
>>> BUG_ON(in_interrupt());
>> That won't work like this. The KIQ is also used in interrupt context in the 
>> driver, that's why we used spin_lock_irqsave().
>> [yttao]: According to the current drm-next code, I have not find where to 
>> access register through KIQ.
>>  And you need to wait for the free kiq ring buffer space if 
>> there is no free kiq ring buffer, here, you wait at interrupt context is 
>> illegal.
> Waiting in atomic context is illegal as well, but we don't have much other 
> choice.
> [yttao]: no, there is no sleep in atomic context at my patch.

I'm not talking about a sleeping, but busy waiting.

> We just need to make sure that waiting never happens by making the buffers 
> large enough and if it still happens print and error.
> [yttao]: this is not the good choice because KMD need to protect it instead 
> of hoping user not frequently invoke KIQ acess.

The only other choice we have is busy waiting, e.g. loop until we get a free 
slot.
[yttao]: Yes, now may patch use msleep() to busy waiting. Or you means need to 
use udelay()? If we use udelay(), it will be the nightmare under multi-VF.
Because it is assumed that there are 16VF within world-switch 
6ms, the bad situation is that one VF will udelay(16*6ms = 96ms) to get one 
free slot.


Regards,
Christian.

>
>> And I would either say that we should use the trick with the NOP to reserve 
>> space on the ring buffer or call amdgpu_device_wb_get() for each read. 
>> amdgpu_device_wb_get() also uses find_first_zero_bit() and should work 
>> equally well.
>> [yttao]: sorry, can you give me more details about how to use NOP to reserve 
>> space? I will use amdgpu_device_wb_get() for the read operation.
> We could use the NOP PM4 command as Felix suggested, this command has 
> a
> header+length and says that the next X dw should be ignore on the ring
> buffer.
>
> But I think using amdgpu_device_wb_get() is better anyway.
> [yttao]: yes, I agreed with amdgpu_device_wb_get() method because it 
> will fix prevent potential read race condition but NOP method will not 
> prevent it
>
> Regards,
> Christian.
>
>>
>>
>> -Original Message-
>> From: Koenig, Christian 
>> Sent: 2020年4月22日 15:23
>> To: Tao, Yintian ; Liu, Monk ; 
>> Liu, Shaoyun ; Kuehling, Felix 
>> 
>> Cc: amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH] drm/amdgpu: refine kiq access register
>>
>>> BUG_ON(in_interrupt());
>> That won't work like this. The KIQ is also used in interrupt context in the 
>> driver, that's why we used spin_lock_irqsave().
>>
>> And I would either say that we should use the trick with the NOP to reserve 
>> space on the ring buffer or call amdgpu_device_wb_get() for each read. 
>> amdgpu_device_wb_get() also uses find_first_zero_bit() and should work 
>> equally well.
>>
>> You also don't need to worry to much about overflowing the wb area.
>> Since we run in an atomic context we can have at most the number of CPU in 
>> the system + interrupt context here.
>>
>> Regards,
>> Christian.
>>
>> Am 22.04.20 um 09:11 schrieb Tao, Yintian:
>>> Add Felix and Shaoyun
>>>
>>> -Original Message-
>>> From: Yintian Tao 
>>> Sent: 2020年4月22日 12:42
>>> To: Koenig, Christian ; Liu, Monk 
>>> 
>>> Cc: amd-gfx@lists.freedesktop.org; Tao, Yintian 
>>> 
>>> Subject: [PATCH] drm/amdgpu: refine kiq access register
>>>
>>> According to the current kiq access register method, there will be race 
>>> condition when using KIQ to read register if multiple clients want to read 
>>> at same time just like the expample below:
>>> 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the seqno-0 3. 
>>> client-B start to read REG-1 through KIQ 4. client-B poll the seqno-1 5. 
>>> the kiq complete these two read operation 6. client-A to read the register 
>>> at the wb buffer and
>>>   get REG-1 value
>>>
>>> And if there are multiple clients to frequently write registers through KIQ 
>>> which may raise the KIQ ring buffer overwritten problem.
>>>
>>> Therefore, allocate fixed number wb slot for rreg use and limit the submit 
>>> number which depends on the kiq ring_size in order to prevent the 
>>> overwritten problem.
>>>
>>> Signed-off-by: Yintian Tao 
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   7 +-

Re: [PATCH] drm/amdgpu: refine kiq access register

2020-04-22 Thread Christian König

Am 22.04.20 um 09:49 schrieb Tao, Yintian:

Hi Christian


Please see inline comments.
-Original Message-
From: Christian König 
Sent: 2020年4月22日 15:40
To: Tao, Yintian ; Koenig, Christian ; Liu, Monk 
; Liu, Shaoyun ; Kuehling, Felix 

Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: refine kiq access register

Am 22.04.20 um 09:35 schrieb Tao, Yintian:

Hi  Christian



BUG_ON(in_interrupt());

That won't work like this. The KIQ is also used in interrupt context in the 
driver, that's why we used spin_lock_irqsave().
[yttao]: According to the current drm-next code, I have not find where to 
access register through KIQ.
And you need to wait for the free kiq ring buffer space if 
there is no free kiq ring buffer, here, you wait at interrupt context is 
illegal.

Waiting in atomic context is illegal as well, but we don't have much other 
choice.
[yttao]: no, there is no sleep in atomic context at my patch.


I'm not talking about a sleeping, but busy waiting.


We just need to make sure that waiting never happens by making the buffers 
large enough and if it still happens print and error.
[yttao]: this is not the good choice because KMD need to protect it instead of 
hoping user not frequently invoke KIQ acess.


The only other choice we have is busy waiting, e.g. loop until we get a 
free slot.


Regards,
Christian.




And I would either say that we should use the trick with the NOP to reserve 
space on the ring buffer or call amdgpu_device_wb_get() for each read. 
amdgpu_device_wb_get() also uses find_first_zero_bit() and should work equally 
well.
[yttao]: sorry, can you give me more details about how to use NOP to reserve 
space? I will use amdgpu_device_wb_get() for the read operation.

We could use the NOP PM4 command as Felix suggested, this command has a
header+length and says that the next X dw should be ignore on the ring
buffer.

But I think using amdgpu_device_wb_get() is better anyway.
[yttao]: yes, I agreed with amdgpu_device_wb_get() method because it will fix 
prevent potential read race condition but NOP method will not prevent it

Regards,
Christian.




-Original Message-
From: Koenig, Christian 
Sent: 2020年4月22日 15:23
To: Tao, Yintian ; Liu, Monk ; Liu, Shaoyun 
; Kuehling, Felix 
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: refine kiq access register


BUG_ON(in_interrupt());

That won't work like this. The KIQ is also used in interrupt context in the 
driver, that's why we used spin_lock_irqsave().

And I would either say that we should use the trick with the NOP to reserve 
space on the ring buffer or call amdgpu_device_wb_get() for each read. 
amdgpu_device_wb_get() also uses find_first_zero_bit() and should work equally 
well.

You also don't need to worry to much about overflowing the wb area.
Since we run in an atomic context we can have at most the number of CPU in the 
system + interrupt context here.

Regards,
Christian.

Am 22.04.20 um 09:11 schrieb Tao, Yintian:

Add Felix and Shaoyun

-Original Message-
From: Yintian Tao 
Sent: 2020年4月22日 12:42
To: Koenig, Christian ; Liu, Monk

Cc: amd-gfx@lists.freedesktop.org; Tao, Yintian 
Subject: [PATCH] drm/amdgpu: refine kiq access register

According to the current kiq access register method, there will be race 
condition when using KIQ to read register if multiple clients want to read at 
same time just like the expample below:
1. client-A start to read REG-0 throguh KIQ 2. client-A poll the seqno-0 3. 
client-B start to read REG-1 through KIQ 4. client-B poll the seqno-1 5. the 
kiq complete these two read operation 6. client-A to read the register at the 
wb buffer and
  get REG-1 value

And if there are multiple clients to frequently write registers through KIQ 
which may raise the KIQ ring buffer overwritten problem.

Therefore, allocate fixed number wb slot for rreg use and limit the submit 
number which depends on the kiq ring_size in order to prevent the overwritten 
problem.

Signed-off-by: Yintian Tao 
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   7 +-
.../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c|  12 +-
.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  12 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   | 129 --
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h   |   6 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h  |   6 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  13 +-
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c|   8 +-
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c |   8 +-
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c |  34 +++--
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c|  12 +-
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  12 +-
12 files changed, 211 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4e1d4cfe7a9f..4530e0de4257 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgp

RE: [PATCH] drm/amdgpu: refine kiq access register

2020-04-22 Thread Tao, Yintian
Hi Christian


Please see inline comments.
-Original Message-
From: Christian König  
Sent: 2020年4月22日 15:40
To: Tao, Yintian ; Koenig, Christian 
; Liu, Monk ; Liu, Shaoyun 
; Kuehling, Felix 
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: refine kiq access register

Am 22.04.20 um 09:35 schrieb Tao, Yintian:
> Hi  Christian
>
>
>> BUG_ON(in_interrupt());
> That won't work like this. The KIQ is also used in interrupt context in the 
> driver, that's why we used spin_lock_irqsave().
> [yttao]: According to the current drm-next code, I have not find where to 
> access register through KIQ.
>   And you need to wait for the free kiq ring buffer space if 
> there is no free kiq ring buffer, here, you wait at interrupt context is 
> illegal.

Waiting in atomic context is illegal as well, but we don't have much other 
choice.
[yttao]: no, there is no sleep in atomic context at my patch.

We just need to make sure that waiting never happens by making the buffers 
large enough and if it still happens print and error.
[yttao]: this is not the good choice because KMD need to protect it instead of 
hoping user not frequently invoke KIQ acess.

> And I would either say that we should use the trick with the NOP to reserve 
> space on the ring buffer or call amdgpu_device_wb_get() for each read. 
> amdgpu_device_wb_get() also uses find_first_zero_bit() and should work 
> equally well.
> [yttao]: sorry, can you give me more details about how to use NOP to reserve 
> space? I will use amdgpu_device_wb_get() for the read operation.

We could use the NOP PM4 command as Felix suggested, this command has a 
header+length and says that the next X dw should be ignore on the ring
buffer.

But I think using amdgpu_device_wb_get() is better anyway.
[yttao]: yes, I agreed with amdgpu_device_wb_get() method because it will fix 
prevent potential read race condition but NOP method will not prevent it

Regards,
Christian.

>
>
>
> -Original Message-
> From: Koenig, Christian 
> Sent: 2020年4月22日 15:23
> To: Tao, Yintian ; Liu, Monk ; Liu, 
> Shaoyun ; Kuehling, Felix 
> Cc: amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdgpu: refine kiq access register
>
>> BUG_ON(in_interrupt());
> That won't work like this. The KIQ is also used in interrupt context in the 
> driver, that's why we used spin_lock_irqsave().
>
> And I would either say that we should use the trick with the NOP to reserve 
> space on the ring buffer or call amdgpu_device_wb_get() for each read. 
> amdgpu_device_wb_get() also uses find_first_zero_bit() and should work 
> equally well.
>
> You also don't need to worry to much about overflowing the wb area.
> Since we run in an atomic context we can have at most the number of CPU in 
> the system + interrupt context here.
>
> Regards,
> Christian.
>
> Am 22.04.20 um 09:11 schrieb Tao, Yintian:
>> Add Felix and Shaoyun
>>
>> -Original Message-
>> From: Yintian Tao 
>> Sent: 2020年4月22日 12:42
>> To: Koenig, Christian ; Liu, Monk
>> 
>> Cc: amd-gfx@lists.freedesktop.org; Tao, Yintian 
>> Subject: [PATCH] drm/amdgpu: refine kiq access register
>>
>> According to the current kiq access register method, there will be race 
>> condition when using KIQ to read register if multiple clients want to read 
>> at same time just like the expample below:
>> 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the seqno-0 3. 
>> client-B start to read REG-1 through KIQ 4. client-B poll the seqno-1 5. the 
>> kiq complete these two read operation 6. client-A to read the register at 
>> the wb buffer and
>>  get REG-1 value
>>
>> And if there are multiple clients to frequently write registers through KIQ 
>> which may raise the KIQ ring buffer overwritten problem.
>>
>> Therefore, allocate fixed number wb slot for rreg use and limit the submit 
>> number which depends on the kiq ring_size in order to prevent the 
>> overwritten problem.
>>
>> Signed-off-by: Yintian Tao 
>> ---
>>drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   7 +-
>>.../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c|  12 +-
>>.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  12 +-
>>drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   | 129 --
>>drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h   |   6 +-
>>drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h  |   6 +-
>>drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  13 +-
>>drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c|   8 +-
>>drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c |   8 +-
>>drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c |  34 +++--
>>drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c|  12 +-
>>drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  12 +-
>>12 files changed, 211 insertions(+), 48 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 4e1d4cfe7a9f..4530e0de4257 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/driv

Re: [PATCH] drm/amdgpu: refine kiq access register

2020-04-22 Thread Christian König

Am 22.04.20 um 09:35 schrieb Tao, Yintian:

Hi  Christian



BUG_ON(in_interrupt());

That won't work like this. The KIQ is also used in interrupt context in the 
driver, that's why we used spin_lock_irqsave().
[yttao]: According to the current drm-next code, I have not find where to 
access register through KIQ.
And you need to wait for the free kiq ring buffer space if 
there is no free kiq ring buffer, here, you wait at interrupt context is 
illegal.


Waiting in atomic context is illegal as well, but we don't have much 
other choice.


We just need to make sure that waiting never happens by making the 
buffers large enough and if it still happens print and error.



And I would either say that we should use the trick with the NOP to reserve 
space on the ring buffer or call amdgpu_device_wb_get() for each read. 
amdgpu_device_wb_get() also uses find_first_zero_bit() and should work equally 
well.
[yttao]: sorry, can you give me more details about how to use NOP to reserve 
space? I will use amdgpu_device_wb_get() for the read operation.


We could use the NOP PM4 command as Felix suggested, this command has a 
header+length and says that the next X dw should be ignore on the ring 
buffer.


But I think using amdgpu_device_wb_get() is better anyway.

Regards,
Christian.





-Original Message-
From: Koenig, Christian 
Sent: 2020年4月22日 15:23
To: Tao, Yintian ; Liu, Monk ; Liu, Shaoyun 
; Kuehling, Felix 
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: refine kiq access register


BUG_ON(in_interrupt());

That won't work like this. The KIQ is also used in interrupt context in the 
driver, that's why we used spin_lock_irqsave().

And I would either say that we should use the trick with the NOP to reserve 
space on the ring buffer or call amdgpu_device_wb_get() for each read. 
amdgpu_device_wb_get() also uses find_first_zero_bit() and should work equally 
well.

You also don't need to worry to much about overflowing the wb area.
Since we run in an atomic context we can have at most the number of CPU in the 
system + interrupt context here.

Regards,
Christian.

Am 22.04.20 um 09:11 schrieb Tao, Yintian:

Add Felix and Shaoyun

-Original Message-
From: Yintian Tao 
Sent: 2020年4月22日 12:42
To: Koenig, Christian ; Liu, Monk

Cc: amd-gfx@lists.freedesktop.org; Tao, Yintian 
Subject: [PATCH] drm/amdgpu: refine kiq access register

According to the current kiq access register method, there will be race 
condition when using KIQ to read register if multiple clients want to read at 
same time just like the expample below:
1. client-A start to read REG-0 throguh KIQ 2. client-A poll the seqno-0 3. 
client-B start to read REG-1 through KIQ 4. client-B poll the seqno-1 5. the 
kiq complete these two read operation 6. client-A to read the register at the 
wb buffer and
 get REG-1 value

And if there are multiple clients to frequently write registers through KIQ 
which may raise the KIQ ring buffer overwritten problem.

Therefore, allocate fixed number wb slot for rreg use and limit the submit 
number which depends on the kiq ring_size in order to prevent the overwritten 
problem.

Signed-off-by: Yintian Tao 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   7 +-
   .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c|  12 +-
   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  12 +-
   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   | 129 --
   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h   |   6 +-
   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h  |   6 +-
   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  13 +-
   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c|   8 +-
   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c |   8 +-
   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c |  34 +++--
   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c|  12 +-
   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  12 +-
   12 files changed, 211 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4e1d4cfe7a9f..4530e0de4257 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -526,7 +526,7 @@ static inline void amdgpu_set_ib_value(struct 
amdgpu_cs_parser *p,
   /*
* Writeback
*/
-#define AMDGPU_MAX_WB 128  /* Reserve at most 128 WB slots for 
amdgpu-owned rings. */
+#define AMDGPU_MAX_WB 256  /* Reserve at most 256 WB slots for 
amdgpu-owned rings. */
   
   struct amdgpu_wb {

struct amdgpu_bo*wb_obj;
@@ -1028,6 +1028,11 @@ bool amdgpu_device_has_dc_support(struct
amdgpu_device *adev);
   
   int emu_soc_asic_init(struct amdgpu_device *adev);
   
+int amdgpu_gfx_kiq_lock(struct amdgpu_kiq *kiq, bool read); void

+amdgpu_gfx_kiq_unlock(struct amdgpu_kiq *kiq);
+
+void amdgpu_gfx_kiq_consume(struct amdgpu_kiq *kiq, uint32_t *offs);
+void amdgpu_gfx_kiq_restore(struct amdgpu_kiq *kiq, uint32_t *offs);
   /*
* Registers read & w

RE: [PATCH] drm/amdgpu: refine kiq access register

2020-04-22 Thread Tao, Yintian
Hi  Christian


> BUG_ON(in_interrupt());
That won't work like this. The KIQ is also used in interrupt context in the 
driver, that's why we used spin_lock_irqsave().
[yttao]: According to the current drm-next code, I have not find where to 
access register through KIQ.
And you need to wait for the free kiq ring buffer space if 
there is no free kiq ring buffer, here, you wait at interrupt context is 
illegal.

And I would either say that we should use the trick with the NOP to reserve 
space on the ring buffer or call amdgpu_device_wb_get() for each read. 
amdgpu_device_wb_get() also uses find_first_zero_bit() and should work equally 
well.
[yttao]: sorry, can you give me more details about how to use NOP to reserve 
space? I will use amdgpu_device_wb_get() for the read operation.



-Original Message-
From: Koenig, Christian  
Sent: 2020年4月22日 15:23
To: Tao, Yintian ; Liu, Monk ; Liu, 
Shaoyun ; Kuehling, Felix 
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: refine kiq access register

> BUG_ON(in_interrupt());
That won't work like this. The KIQ is also used in interrupt context in the 
driver, that's why we used spin_lock_irqsave().

And I would either say that we should use the trick with the NOP to reserve 
space on the ring buffer or call amdgpu_device_wb_get() for each read. 
amdgpu_device_wb_get() also uses find_first_zero_bit() and should work equally 
well.

You also don't need to worry to much about overflowing the wb area. 
Since we run in an atomic context we can have at most the number of CPU in the 
system + interrupt context here.

Regards,
Christian.

Am 22.04.20 um 09:11 schrieb Tao, Yintian:
> Add Felix and Shaoyun
>
> -Original Message-
> From: Yintian Tao 
> Sent: 2020年4月22日 12:42
> To: Koenig, Christian ; Liu, Monk 
> 
> Cc: amd-gfx@lists.freedesktop.org; Tao, Yintian 
> Subject: [PATCH] drm/amdgpu: refine kiq access register
>
> According to the current kiq access register method, there will be race 
> condition when using KIQ to read register if multiple clients want to read at 
> same time just like the expample below:
> 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the seqno-0 3. 
> client-B start to read REG-1 through KIQ 4. client-B poll the seqno-1 5. the 
> kiq complete these two read operation 6. client-A to read the register at the 
> wb buffer and
> get REG-1 value
>
> And if there are multiple clients to frequently write registers through KIQ 
> which may raise the KIQ ring buffer overwritten problem.
>
> Therefore, allocate fixed number wb slot for rreg use and limit the submit 
> number which depends on the kiq ring_size in order to prevent the overwritten 
> problem.
>
> Signed-off-by: Yintian Tao 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   7 +-
>   .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c|  12 +-
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  12 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   | 129 --
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h   |   6 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h  |   6 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  13 +-
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c|   8 +-
>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c |   8 +-
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c |  34 +++--
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c|  12 +-
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  12 +-
>   12 files changed, 211 insertions(+), 48 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 4e1d4cfe7a9f..4530e0de4257 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -526,7 +526,7 @@ static inline void amdgpu_set_ib_value(struct 
> amdgpu_cs_parser *p,
>   /*
>* Writeback
>*/
> -#define AMDGPU_MAX_WB 128/* Reserve at most 128 WB slots for 
> amdgpu-owned rings. */
> +#define AMDGPU_MAX_WB 256/* Reserve at most 256 WB slots for 
> amdgpu-owned rings. */
>   
>   struct amdgpu_wb {
>   struct amdgpu_bo*wb_obj;
> @@ -1028,6 +1028,11 @@ bool amdgpu_device_has_dc_support(struct 
> amdgpu_device *adev);
>   
>   int emu_soc_asic_init(struct amdgpu_device *adev);
>   
> +int amdgpu_gfx_kiq_lock(struct amdgpu_kiq *kiq, bool read); void 
> +amdgpu_gfx_kiq_unlock(struct amdgpu_kiq *kiq);
> +
> +void amdgpu_gfx_kiq_consume(struct amdgpu_kiq *kiq, uint32_t *offs); 
> +void amdgpu_gfx_kiq_restore(struct amdgpu_kiq *kiq, uint32_t *offs);
>   /*
>* Registers read & write functions.
>*/
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> index 691c89705bcd..034c9f416499 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> @@ -309,6 +309,7 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void 
> *mqd,
>  

Re: [PATCH] amdgpu: fixes memleak issue when init failed

2020-04-22 Thread Christian König

Am 22.04.20 um 02:56 schrieb 赵军奎:

发件人:"Christian König" 
发送日期:2020-04-21 22:53:47
收件人:"赵军奎" 
抄送人:Alex Deucher ,"David (ChunMing) Zhou" ,David Airlie 
,Daniel Vetter ,Tom St Denis ,Ori Messinger 
,Sam Ravnborg 
,amd-gfx@lists.freedesktop.org,dri-de...@lists.freedesktop.org,linux-ker...@vger.kernel.org,opensource.ker...@vivo.com
主题:Re: [PATCH] amdgpu: fixes memleak issue when init failed>Am 21.04.20 um 
15:39 schrieb 赵军奎:

发件人:"Christian König" 
发送日期:2020-04-21 21:02:27
收件人:"赵军奎" 
抄送人:Alex Deucher ,"David (ChunMing) Zhou" ,David Airlie 
,Daniel Vetter ,Tom St Denis ,Ori Messinger 
,Sam Ravnborg 
,amd-gfx@lists.freedesktop.org,dri-de...@lists.freedesktop.org,linux-ker...@vger.kernel.org,opensource.ker...@vivo.com
主题:Re: [PATCH] amdgpu: fixes memleak issue when init failed>Am 21.04.20 um 
14:09 schrieb 赵军奎:

From: "Christian König" 
Date: 2020-04-21 19:22:49
To:  Bernard Zhao ,Alex Deucher ,"David (ChunMing) Zhou" 
,David Airlie ,Daniel Vetter ,Tom St Denis 
,Ori Messinger ,Sam Ravnborg 
,amd-gfx@lists.freedesktop.org,dri-de...@lists.freedesktop.org,linux-ker...@vger.kernel.org
Cc:  opensource.ker...@vivo.com
Subject: Re: [PATCH] amdgpu: fixes memleak issue when init failed>Am 21.04.20 
um 13:17 schrieb Bernard Zhao:

VRAM manager and DRM MM when init failed, there is no operaction
to free kzalloc memory & remove device file.
This will lead to memleak & cause stability issue.

NAK, failure to create sysfs nodes are not critical.

Christian.


OK, get it.
By the way, should i modify this patch to just handle  in error 
branch, or that it is also unnecessary?

What you can do is to drop the "return ret" if anything with the sysfs
nodes goes wrong and instead print the error code.

Emmm, for this part, i am not sure, my modify first print the error, secone 
release not free memory,
and last return error, make everything clear to the system.
I think it`s the same with what you mentioned, is there something that I 
misunderstood?

Yes, maybe an example makes it more clear what to do here. Currently we
print and error and return when something with the sysfs files goes wrong:

if (ret) {
     DRM_ERROR("Failed to create device file mem_info_vram_total\n");
     return ret;
}

But what we should do instead is just to print an error and continue and
in the end return success status:

if (ret)
     DRM_ERROR("Failed to create device file mem_info_vram_total
(%d)\n", r);

...
return 0;

Regards,
Christian.


Emmm,  i am still confused about two points:
1 Does that mean there is no failed case in this function?


Well the kzalloc can still fail.


2 There is no need to free the kzmalloc space(no possibility of memory leak )?


Correct, yes.

Regards,
Christian.



Regards,
Bernard


It's really annoying that loading, unloading and loading the driver
again sometimes fails because we have a bug in the sysfs files cleanup.

We certainly should fix those bugs as well, but they are just not
critical for correct driver functionality.

Regards,
Christian.

Regards,
Bernard


Signed-off-by: Bernard Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 24 
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 82a3299e53c0..4c5fb153e6b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -175,30 +175,44 @@ static int amdgpu_vram_mgr_init(struct 
ttm_mem_type_manager *man,
ret = device_create_file(adev->dev, &dev_attr_mem_info_vram_total);
if (ret) {
DRM_ERROR("Failed to create device file mem_info_vram_total\n");
-   return ret;
+   goto VRAM_TOTAL_FAIL;
}
ret = device_create_file(adev->dev, &dev_attr_mem_info_vis_vram_total);
if (ret) {
DRM_ERROR("Failed to create device file 
mem_info_vis_vram_total\n");
-   return ret;
+   goto VIS_VRAM_TOTA_FAIL;
}
ret = device_create_file(adev->dev, &dev_attr_mem_info_vram_used);
if (ret) {
DRM_ERROR("Failed to create device file mem_info_vram_used\n");
-   return ret;
+   goto VRAM_USED_FAIL;
}
ret = device_create_file(adev->dev, &dev_attr_mem_info_vis_vram_used);
if (ret) {
DRM_ERROR("Failed to create device file 
mem_info_vis_vram_used\n");
-   return ret;
+   goto VIS_VRAM_USED_FAIL;
}
ret = device_create_file(adev->dev, &dev_attr_mem_info_vram_vendor);
if (ret) {
DRM_ERROR("Failed to create device file 
mem_info_vram_vendor\n");
-   return ret;
+   goto VRAM_VERDOR_FAIL;
}
 
 	return 0;

+
+VRAM_VERDOR_FAIL:
+   device_remove_file(adev->dev, &dev_attr_mem_info_vis_vram_used);
+VIS_VRAM_USED_FAIL:
+   device_remove_file(adev->dev, &dev_attr_mem_info_vram_used)

Re: [PATCH] drm/amdgpu: refine kiq access register

2020-04-22 Thread Christian König

BUG_ON(in_interrupt());
That won't work like this. The KIQ is also used in interrupt context in 
the driver, that's why we used spin_lock_irqsave().


And I would either say that we should use the trick with the NOP to 
reserve space on the ring buffer or call amdgpu_device_wb_get() for each 
read. amdgpu_device_wb_get() also uses find_first_zero_bit() and should 
work equally well.


You also don't need to worry to much about overflowing the wb area. 
Since we run in an atomic context we can have at most the number of CPU 
in the system + interrupt context here.


Regards,
Christian.

Am 22.04.20 um 09:11 schrieb Tao, Yintian:

Add Felix and Shaoyun

-Original Message-
From: Yintian Tao 
Sent: 2020年4月22日 12:42
To: Koenig, Christian ; Liu, Monk 
Cc: amd-gfx@lists.freedesktop.org; Tao, Yintian 
Subject: [PATCH] drm/amdgpu: refine kiq access register

According to the current kiq access register method, there will be race 
condition when using KIQ to read register if multiple clients want to read at 
same time just like the expample below:
1. client-A start to read REG-0 throguh KIQ 2. client-A poll the seqno-0 3. 
client-B start to read REG-1 through KIQ 4. client-B poll the seqno-1 5. the 
kiq complete these two read operation 6. client-A to read the register at the 
wb buffer and
get REG-1 value

And if there are multiple clients to frequently write registers through KIQ 
which may raise the KIQ ring buffer overwritten problem.

Therefore, allocate fixed number wb slot for rreg use and limit the submit 
number which depends on the kiq ring_size in order to prevent the overwritten 
problem.

Signed-off-by: Yintian Tao 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   7 +-
  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c|  12 +-
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  12 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   | 129 --
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h   |   6 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h  |   6 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  13 +-
  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c|   8 +-
  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c |   8 +-
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c |  34 +++--
  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c|  12 +-
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  12 +-
  12 files changed, 211 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4e1d4cfe7a9f..4530e0de4257 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -526,7 +526,7 @@ static inline void amdgpu_set_ib_value(struct 
amdgpu_cs_parser *p,
  /*
   * Writeback
   */
-#define AMDGPU_MAX_WB 128  /* Reserve at most 128 WB slots for 
amdgpu-owned rings. */
+#define AMDGPU_MAX_WB 256  /* Reserve at most 256 WB slots for 
amdgpu-owned rings. */
  
  struct amdgpu_wb {

struct amdgpu_bo*wb_obj;
@@ -1028,6 +1028,11 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device 
*adev);
  
  int emu_soc_asic_init(struct amdgpu_device *adev);
  
+int amdgpu_gfx_kiq_lock(struct amdgpu_kiq *kiq, bool read); void

+amdgpu_gfx_kiq_unlock(struct amdgpu_kiq *kiq);
+
+void amdgpu_gfx_kiq_consume(struct amdgpu_kiq *kiq, uint32_t *offs);
+void amdgpu_gfx_kiq_restore(struct amdgpu_kiq *kiq, uint32_t *offs);
  /*
   * Registers read & write functions.
   */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 691c89705bcd..034c9f416499 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -309,6 +309,7 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
uint32_t doorbell_off)
  {
struct amdgpu_device *adev = get_amdgpu_device(kgd);
+   struct amdgpu_kiq *kiq = &adev->gfx.kiq;
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
struct v10_compute_mqd *m;
uint32_t mec, pipe;
@@ -324,13 +325,19 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void 
*mqd,
pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
 mec, pipe, queue_id);
  
-	spin_lock(&adev->gfx.kiq.ring_lock);

+   r = amdgpu_gfx_kiq_lock(kiq, false);
+   if (r) {
+   pr_err("failed to lock kiq\n");
+   goto out_unlock;
+   }
+
r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r);
goto out_unlock;
}
  
+	amdgpu_gfx_kiq_consume(kiq, NULL);

amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
amdgpu_ring_write(kiq_ring,
  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ @@ 
-350,8 +357,9 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_

RE: [PATCH] drm/amdgpu: refine kiq access register

2020-04-22 Thread Tao, Yintian
Add Felix and Shaoyun

-Original Message-
From: Yintian Tao  
Sent: 2020年4月22日 12:42
To: Koenig, Christian ; Liu, Monk 
Cc: amd-gfx@lists.freedesktop.org; Tao, Yintian 
Subject: [PATCH] drm/amdgpu: refine kiq access register

According to the current kiq access register method, there will be race 
condition when using KIQ to read register if multiple clients want to read at 
same time just like the expample below:
1. client-A start to read REG-0 throguh KIQ 2. client-A poll the seqno-0 3. 
client-B start to read REG-1 through KIQ 4. client-B poll the seqno-1 5. the 
kiq complete these two read operation 6. client-A to read the register at the 
wb buffer and
   get REG-1 value

And if there are multiple clients to frequently write registers through KIQ 
which may raise the KIQ ring buffer overwritten problem.

Therefore, allocate fixed number wb slot for rreg use and limit the submit 
number which depends on the kiq ring_size in order to prevent the overwritten 
problem.

Signed-off-by: Yintian Tao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   7 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c|  12 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  12 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   | 129 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h   |   6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h  |   6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  13 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c|   8 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c |   8 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c |  34 +++--
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c|  12 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  12 +-
 12 files changed, 211 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4e1d4cfe7a9f..4530e0de4257 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -526,7 +526,7 @@ static inline void amdgpu_set_ib_value(struct 
amdgpu_cs_parser *p,
 /*
  * Writeback
  */
-#define AMDGPU_MAX_WB 128  /* Reserve at most 128 WB slots for 
amdgpu-owned rings. */
+#define AMDGPU_MAX_WB 256  /* Reserve at most 256 WB slots for 
amdgpu-owned rings. */
 
 struct amdgpu_wb {
struct amdgpu_bo*wb_obj;
@@ -1028,6 +1028,11 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device 
*adev);
 
 int emu_soc_asic_init(struct amdgpu_device *adev);
 
+int amdgpu_gfx_kiq_lock(struct amdgpu_kiq *kiq, bool read); void 
+amdgpu_gfx_kiq_unlock(struct amdgpu_kiq *kiq);
+
+void amdgpu_gfx_kiq_consume(struct amdgpu_kiq *kiq, uint32_t *offs); 
+void amdgpu_gfx_kiq_restore(struct amdgpu_kiq *kiq, uint32_t *offs);
 /*
  * Registers read & write functions.
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 691c89705bcd..034c9f416499 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -309,6 +309,7 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
uint32_t doorbell_off)
 {
struct amdgpu_device *adev = get_amdgpu_device(kgd);
+   struct amdgpu_kiq *kiq = &adev->gfx.kiq;
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
struct v10_compute_mqd *m;
uint32_t mec, pipe;
@@ -324,13 +325,19 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void 
*mqd,
pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
 mec, pipe, queue_id);
 
-   spin_lock(&adev->gfx.kiq.ring_lock);
+   r = amdgpu_gfx_kiq_lock(kiq, false);
+   if (r) {
+   pr_err("failed to lock kiq\n");
+   goto out_unlock;
+   }
+
r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r);
goto out_unlock;
}
 
+   amdgpu_gfx_kiq_consume(kiq, NULL);
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
amdgpu_ring_write(kiq_ring,
  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ @@ 
-350,8 +357,9 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
amdgpu_ring_commit(kiq_ring);
 
+   amdgpu_gfx_kiq_restore(kiq, NULL);
 out_unlock:
-   spin_unlock(&adev->gfx.kiq.ring_lock);
+   amdgpu_gfx_kiq_unlock(&adev->gfx.kiq);
release_queue(kgd);
 
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index df841c2ac5e7..f243d9990ced 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -307,6 +307,7 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
uint32_t doorbell_off)
 {