[PATCH] Revert "drm/amdgpu: Add missing locking for MES API calls"

2024-06-14 Thread Mukul Joshi
This reverts commit aa68f57443c5aed125079ae66fef0e9fd7008b33.

This is causing a BUG message during suspend.

[   61.603542] BUG: sleeping function called from invalid context at 
kernel/locking/mutex.c:283
[   61.603550] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 2028, 
name: kworker/u64:14
[   61.603553] preempt_count: 1, expected: 0
[   61.603555] RCU nest depth: 0, expected: 0
[   61.603557] Preemption disabled at:
[   61.603559] [] amdgpu_gfx_disable_kgq+0x61/0x160 [amdgpu]
[   61.603789] CPU: 9 PID: 2028 Comm: kworker/u64:14 Tainted: GW
  6.8.0+ #7
[   61.603795] Workqueue: events_unbound async_run_entry_fn
[   61.603801] Call Trace:
[   61.603803]  
[   61.603806]  dump_stack_lvl+0x37/0x50
[   61.603811]  ? amdgpu_gfx_disable_kgq+0x61/0x160 [amdgpu]
[   61.604007]  dump_stack+0x10/0x20
[   61.604010]  __might_resched+0x16f/0x1d0
[   61.604016]  __might_sleep+0x43/0x70
[   61.604020]  mutex_lock+0x1f/0x60
[   61.604024]  amdgpu_mes_unmap_legacy_queue+0x6d/0x100 [amdgpu]
[   61.604226]  gfx11_kiq_unmap_queues+0x3dc/0x430 [amdgpu]
[   61.604422]  ? srso_alias_return_thunk+0x5/0xfbef5
[   61.604429]  amdgpu_gfx_disable_kgq+0x122/0x160 [amdgpu]
[   61.604621]  gfx_v11_0_hw_fini+0xda/0x100 [amdgpu]
[   61.604814]  gfx_v11_0_suspend+0xe/0x20 [amdgpu]
[   61.605008]  amdgpu_device_ip_suspend_phase2+0x135/0x1d0 [amdgpu]
[   61.605175]  amdgpu_device_suspend+0xec/0x180 [amdgpu]

Signed-off-by: Mukul Joshi 
Reviewed-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 12 
 1 file changed, 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 1a65ced60e68..48a5b0713fed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -801,9 +801,7 @@ int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev,
queue_input.mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
queue_input.wptr_addr = ring->wptr_gpu_addr;
 
-   amdgpu_mes_lock(>mes);
r = adev->mes.funcs->map_legacy_queue(>mes, _input);
-   amdgpu_mes_unlock(>mes);
if (r)
DRM_ERROR("failed to map legacy queue\n");
 
@@ -826,9 +824,7 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device 
*adev,
queue_input.trail_fence_addr = gpu_addr;
queue_input.trail_fence_data = seq;
 
-   amdgpu_mes_lock(>mes);
r = adev->mes.funcs->unmap_legacy_queue(>mes, _input);
-   amdgpu_mes_unlock(>mes);
if (r)
DRM_ERROR("failed to unmap legacy queue\n");
 
@@ -849,13 +845,11 @@ uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, 
uint32_t reg)
goto error;
}
 
-   amdgpu_mes_lock(>mes);
r = adev->mes.funcs->misc_op(>mes, _input);
if (r)
DRM_ERROR("failed to read reg (0x%x)\n", reg);
else
val = *(adev->mes.read_val_ptr);
-   amdgpu_mes_unlock(>mes);
 
 error:
return val;
@@ -877,9 +871,7 @@ int amdgpu_mes_wreg(struct amdgpu_device *adev,
goto error;
}
 
-   amdgpu_mes_lock(>mes);
r = adev->mes.funcs->misc_op(>mes, _input);
-   amdgpu_mes_unlock(>mes);
if (r)
DRM_ERROR("failed to write reg (0x%x)\n", reg);
 
@@ -906,9 +898,7 @@ int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device 
*adev,
goto error;
}
 
-   amdgpu_mes_lock(>mes);
r = adev->mes.funcs->misc_op(>mes, _input);
-   amdgpu_mes_unlock(>mes);
if (r)
DRM_ERROR("failed to reg_write_reg_wait\n");
 
@@ -933,9 +923,7 @@ int amdgpu_mes_reg_wait(struct amdgpu_device *adev, 
uint32_t reg,
goto error;
}
 
-   amdgpu_mes_lock(>mes);
r = adev->mes.funcs->misc_op(>mes, _input);
-   amdgpu_mes_unlock(>mes);
if (r)
DRM_ERROR("failed to reg_write_reg_wait\n");
 
-- 
2.35.1



[PATCH] drm/ttm: Add cgroup memory accounting for GTT memory

2024-06-06 Thread Mukul Joshi
Make sure we do not overflow the memory limits set for a cgroup when doing
GTT memory allocations.

Suggested-by: Philip Yang 
Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/ttm/ttm_pool.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 6e1fd6985ffc..59e1accdef08 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -91,7 +91,7 @@ static struct page *ttm_pool_alloc_page(struct ttm_pool 
*pool, gfp_t gfp_flags,
 */
if (order)
gfp_flags |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN |
-   __GFP_KSWAPD_RECLAIM;
+   __GFP_KSWAPD_RECLAIM | __GFP_ACCOUNT;
 
if (!pool->use_dma_alloc) {
p = alloc_pages_node(pool->nid, gfp_flags, order);
-- 
2.35.1



[PATCH] drm/amdgpu: Add missing locking for MES API calls

2024-06-06 Thread Mukul Joshi
Add missing locking at a few places when calling MES APIs to ensure
exclusive access to MES queue.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 62edf6328566..df6c067b1dc9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -801,7 +801,9 @@ int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev,
queue_input.mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
queue_input.wptr_addr = ring->wptr_gpu_addr;
 
+   amdgpu_mes_lock(>mes);
r = adev->mes.funcs->map_legacy_queue(>mes, _input);
+   amdgpu_mes_unlock(>mes);
if (r)
DRM_ERROR("failed to map legacy queue\n");
 
@@ -824,7 +826,9 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device 
*adev,
queue_input.trail_fence_addr = gpu_addr;
queue_input.trail_fence_data = seq;
 
+   amdgpu_mes_lock(>mes);
r = adev->mes.funcs->unmap_legacy_queue(>mes, _input);
+   amdgpu_mes_unlock(>mes);
if (r)
DRM_ERROR("failed to unmap legacy queue\n");
 
@@ -845,11 +849,13 @@ uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, 
uint32_t reg)
goto error;
}
 
+   amdgpu_mes_lock(>mes);
r = adev->mes.funcs->misc_op(>mes, _input);
if (r)
DRM_ERROR("failed to read reg (0x%x)\n", reg);
else
val = *(adev->mes.read_val_ptr);
+   amdgpu_mes_unlock(>mes);
 
 error:
return val;
@@ -871,7 +877,9 @@ int amdgpu_mes_wreg(struct amdgpu_device *adev,
goto error;
}
 
+   amdgpu_mes_lock(>mes);
r = adev->mes.funcs->misc_op(>mes, _input);
+   amdgpu_mes_unlock(>mes);
if (r)
DRM_ERROR("failed to write reg (0x%x)\n", reg);
 
@@ -898,7 +906,9 @@ int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device 
*adev,
goto error;
}
 
+   amdgpu_mes_lock(>mes);
r = adev->mes.funcs->misc_op(>mes, _input);
+   amdgpu_mes_unlock(>mes);
if (r)
DRM_ERROR("failed to reg_write_reg_wait\n");
 
@@ -923,7 +933,9 @@ int amdgpu_mes_reg_wait(struct amdgpu_device *adev, 
uint32_t reg,
goto error;
}
 
+   amdgpu_mes_lock(>mes);
r = adev->mes.funcs->misc_op(>mes, _input);
+   amdgpu_mes_unlock(>mes);
if (r)
DRM_ERROR("failed to reg_write_reg_wait\n");
 
-- 
2.35.1



[PATCH] drm/amdkfd: Fix CU Masking for GFX 9.4.3

2024-05-09 Thread Mukul Joshi
We are incorrectly passing the first XCC's MQD when
updating CU masks for other XCCs in the partition. Fix
this by passing the MQD for the XCC currently being
updated with CU mask to update_cu_mask function.

Fixes: fc6efed2c728 ("drm/amdkfd: Update CU masking for GFX 9.4.3")
Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 542191656aeb..399fa2106631 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -715,7 +715,7 @@ static void update_mqd_v9_4_3(struct mqd_manager *mm, void 
*mqd,
m = get_mqd(mqd + size * xcc);
update_mqd(mm, m, q, minfo);
 
-   update_cu_mask(mm, mqd, minfo, xcc);
+   update_cu_mask(mm, m, minfo, xcc);
 
if (q->format == KFD_QUEUE_FORMAT_AQL) {
switch (xcc) {
-- 
2.35.1



[PATCH] drm/amdgpu: Fix VRAM memory accounting

2024-04-23 Thread Mukul Joshi
Subtract the VRAM pinned memory when checking for available memory
in amdgpu_amdkfd_reserve_mem_limit function since that memory is not
available for use.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 101a2836480d..f672205243e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -220,7 +220,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device 
*adev,
(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
 kfd_mem_limit.max_ttm_mem_limit) ||
(adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed >
-vram_size - reserved_for_pt)) {
+vram_size - reserved_for_pt - 
atomic64_read(>vram_pin_size))) {
ret = -ENOMEM;
goto release;
}
-- 
2.35.1



[PATCH] drm/amdkfd: Add VRAM accounting for SVM migration

2024-04-19 Thread Mukul Joshi
Do VRAM accounting when doing migrations to vram to make sure
there is enough available VRAM and migrating to VRAM doesn't evict
other possible non-unified memory BOs. If migrating to VRAM fails,
driver can fall back to using system memory seamlessly.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 16 +++-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c |  2 +-
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index bdc01ca9609a..a6bfc00c0310 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -509,10 +509,19 @@ svm_migrate_ram_to_vram(struct svm_range *prange, 
uint32_t best_loc,
start = start_mgr << PAGE_SHIFT;
end = (last_mgr + 1) << PAGE_SHIFT;
 
+   r = amdgpu_amdkfd_reserve_mem_limit(node->adev,
+   prange->npages * PAGE_SIZE,
+   KFD_IOC_ALLOC_MEM_FLAGS_VRAM,
+   node->xcp ? node->xcp->id : 0);
+   if (r) {
+   dev_dbg(node->adev->dev, "failed to allocate VRAM, size exceeds 
VRAM limit\n", r);
+   return -ENOSPC;
+   }
+
r = svm_range_vram_node_new(node, prange, true);
if (r) {
dev_dbg(node->adev->dev, "fail %ld to alloc vram\n", r);
-   return r;
+   goto out;
}
ttm_res_offset = (start_mgr - prange->start + prange->offset) << 
PAGE_SHIFT;
 
@@ -545,6 +554,11 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t 
best_loc,
svm_range_vram_node_free(prange);
}
 
+out:
+   amdgpu_amdkfd_unreserve_mem_limit(node->adev,
+   prange->npages * PAGE_SIZE,
+   KFD_IOC_ALLOC_MEM_FLAGS_VRAM,
+   node->xcp ? node->xcp->id : 0);
return r < 0 ? r : 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index f7d75b432cc6..bfab16b43fec 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -3426,7 +3426,7 @@ svm_range_trigger_migration(struct mm_struct *mm, struct 
svm_range *prange,
mm, KFD_MIGRATE_TRIGGER_PREFETCH);
*migrated = !r;
 
-   return r;
+   return 0;
 }
 
 int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence)
-- 
2.35.1



[PATCH] drm/amdgpu: Fix leak when GPU memory allocation fails

2024-04-18 Thread Mukul Joshi
Free the sync object if the memory allocation fails for any
reason.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 0ae9fd844623..bcf4a9e82075 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1854,6 +1854,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 err_bo_create:
amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags, xcp_id);
 err_reserve_limit:
+   amdgpu_sync_free(&(*mem)->sync);
mutex_destroy(&(*mem)->lock);
if (gobj)
drm_gem_object_put(gobj);
-- 
2.35.1



[PATCH] drm/amdkfd: Cleanup workqueue during module unload

2024-03-20 Thread Mukul Joshi
Destroy the high priority workqueue that handles interrupts
during KFD node cleanup.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
index dd3c43c1ad70..9b6b6e882593 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -104,6 +104,8 @@ void kfd_interrupt_exit(struct kfd_node *node)
 */
flush_workqueue(node->ih_wq);
 
+   destroy_workqueue(node->ih_wq);
+
kfifo_free(>ih_fifo);
 }
 
-- 
2.35.1



[PATCH] drm/amdkfd: Check cgroup when returning DMABuf info

2024-03-15 Thread Mukul Joshi
Check cgroup permissions when returning DMA-buf info and
based on cgroup check return the id of the GPU that has
access to the BO.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index dfa8c69532d4..f9631f4b1a02 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1523,7 +1523,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep,
 
/* Find a KFD GPU device that supports the get_dmabuf_info query */
for (i = 0; kfd_topology_enum_kfd_devices(i, ) == 0; i++)
-   if (dev)
+   if (dev && !kfd_devcgroup_check_permission(dev))
break;
if (!dev)
return -EINVAL;
@@ -1545,7 +1545,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep,
if (xcp_id >= 0)
args->gpu_id = dmabuf_adev->kfd.dev->nodes[xcp_id]->id;
else
-   args->gpu_id = dmabuf_adev->kfd.dev->nodes[0]->id;
+   args->gpu_id = dev->id;
args->flags = flags;
 
/* Copy metadata buffer to user mode */
-- 
2.35.1



[PATCH 1/2] drm/amdkfd: Rename read_doorbell_id in MQD functions

2024-03-14 Thread Mukul Joshi
Rename read_doorbell_id function to a more meaningful name,
implying what it is used for. No functional change.

Suggested-by: Jay Cornwall 
Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h  | 2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c  | 4 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  | 4 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  | 4 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   | 4 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c   | 4 ++--
 7 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index f4d395e38683..1ce398ab0b3d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1997,7 +1997,7 @@ static int unmap_queues_cpsch(struct device_queue_manager 
*dqm,
 * check those fields
 */
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
-   if (mqd_mgr->read_doorbell_id(dqm->packet_mgr.priv_queue->queue->mqd)) {
+   if 
(mqd_mgr->check_preemption_failed(dqm->packet_mgr.priv_queue->queue->mqd)) {
dev_err(dev, "HIQ MQD's queue_doorbell_id0 is not 0, Queue 
preemption time out\n");
while (halt_if_hws_hang)
schedule();
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
index e5cc697a3ca8..ba3eebb2ca6d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
@@ -119,7 +119,7 @@ struct mqd_manager {
 #if defined(CONFIG_DEBUG_FS)
int (*debugfs_show_mqd)(struct seq_file *m, void *data);
 #endif
-   uint32_t (*read_doorbell_id)(void *mqd);
+   uint32_t (*check_preemption_failed)(void *mqd);
uint64_t (*mqd_stride)(struct mqd_manager *mm,
struct queue_properties *p);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index 1a4a69943c71..8f9f56f7a8b0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -206,7 +206,7 @@ static void __update_mqd(struct mqd_manager *mm, void *mqd,
q->is_active = QUEUE_IS_ACTIVE(*q);
 }
 
-static uint32_t read_doorbell_id(void *mqd)
+static uint32_t check_preemption_failed(void *mqd)
 {
struct cik_mqd *m = (struct cik_mqd *)mqd;
 
@@ -423,7 +423,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE 
type,
 #if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
 #endif
-   mqd->read_doorbell_id = read_doorbell_id;
+   mqd->check_preemption_failed = check_preemption_failed;
break;
case KFD_MQD_TYPE_DIQ:
mqd->allocate_mqd = allocate_mqd;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index 22cbfa1bdadd..d4cf7d845928 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -224,7 +224,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
q->is_active = QUEUE_IS_ACTIVE(*q);
 }
 
-static uint32_t read_doorbell_id(void *mqd)
+static uint32_t check_preemption_failed(void *mqd)
 {
struct v10_compute_mqd *m = (struct v10_compute_mqd *)mqd;
 
@@ -488,7 +488,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE 
type,
 #if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
 #endif
-   mqd->read_doorbell_id = read_doorbell_id;
+   mqd->check_preemption_failed = check_preemption_failed;
pr_debug("%s@%i\n", __func__, __LINE__);
break;
case KFD_MQD_TYPE_DIQ:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
index 826bc4f6c8a7..2b9f57c267eb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
@@ -278,7 +278,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
q->is_active = QUEUE_IS_ACTIVE(*q);
 }
 
-static uint32_t read_doorbell_id(void *mqd)
+static uint32_t check_preemption_failed(void *mqd)
 {
struct v11_compute_mqd *m = (struct v11_compute_mqd *)mqd;
 
@@ -517,7 +517,7 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE 
type,
 #if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
 #endif
-   mqd->read_doorbell_id = read_doorbell_id;
+   mqd->check_preemption_failed = check_preemption_

[PATCH 2/2] drm/amdkfd: Check preemption status on all XCDs

2024-03-14 Thread Mukul Joshi
This patch adds the following functionality:
- Check the queue preemption status on all XCDs in a partition
  for GFX 9.4.3.
- Update the queue preemption debug message to print the queue
  doorbell id for which preemption failed.
- Change the signature of check preemption failed function to
  return a bool instead of uint32_t and pass the MQD manager
  as an argument.

Suggested-by: Jay Cornwall 
Signed-off-by: Mukul Joshi 
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c |  3 +--
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c  | 18 +
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h  |  4 ++-
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c  |  4 +--
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  |  4 +--
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  |  4 +--
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   | 25 ---
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c   |  4 +--
 8 files changed, 52 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 1ce398ab0b3d..151fabf84040 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1997,8 +1997,7 @@ static int unmap_queues_cpsch(struct device_queue_manager 
*dqm,
 * check those fields
 */
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
-   if 
(mqd_mgr->check_preemption_failed(dqm->packet_mgr.priv_queue->queue->mqd)) {
-   dev_err(dev, "HIQ MQD's queue_doorbell_id0 is not 0, Queue 
preemption time out\n");
+   if (mqd_mgr->check_preemption_failed(mqd_mgr, 
dqm->packet_mgr.priv_queue->queue->mqd)) {
while (halt_if_hws_hang)
schedule();
return -ETIME;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index 050a6936ff84..cbec8c87c984 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -290,3 +290,21 @@ uint64_t kfd_mqd_stride(struct mqd_manager *mm,
 {
return mm->mqd_size;
 }
+
+bool kfd_check_hiq_mqd_doorbell_id(struct kfd_node *node, uint32_t doorbell_id,
+  uint32_t inst)
+{
+   if (doorbell_id) {
+   struct device *dev = node->adev->dev;
+
+   if (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 3))
+   dev_err(dev, "XCC %d: Queue preemption failed for queue 
with doorbell_id: %x\n",
+   inst, doorbell_id);
+   else
+   dev_err(dev, "Queue preemption failed for queue with 
doorbell_id: %x\n",
+   doorbell_id);
+   return true;
+   }
+
+   return false;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
index ba3eebb2ca6d..17cc1f25c8d0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
@@ -119,7 +119,7 @@ struct mqd_manager {
 #if defined(CONFIG_DEBUG_FS)
int (*debugfs_show_mqd)(struct seq_file *m, void *data);
 #endif
-   uint32_t (*check_preemption_failed)(void *mqd);
+   bool (*check_preemption_failed)(struct mqd_manager *mm, void *mqd);
uint64_t (*mqd_stride)(struct mqd_manager *mm,
struct queue_properties *p);
 
@@ -198,4 +198,6 @@ void kfd_get_hiq_xcc_mqd(struct kfd_node *dev,
 uint64_t kfd_hiq_mqd_stride(struct kfd_node *dev);
 uint64_t kfd_mqd_stride(struct mqd_manager *mm,
struct queue_properties *q);
+bool kfd_check_hiq_mqd_doorbell_id(struct kfd_node *node, uint32_t doorbell_id,
+  uint32_t inst);
 #endif /* KFD_MQD_MANAGER_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index 8f9f56f7a8b0..05f3ac2eaef9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -206,11 +206,11 @@ static void __update_mqd(struct mqd_manager *mm, void 
*mqd,
q->is_active = QUEUE_IS_ACTIVE(*q);
 }
 
-static uint32_t check_preemption_failed(void *mqd)
+static bool check_preemption_failed(struct mqd_manager *mm, void *mqd)
 {
struct cik_mqd *m = (struct cik_mqd *)mqd;
 
-   return m->queue_doorbell_id0;
+   return kfd_check_hiq_mqd_doorbell_id(mm->dev, m->queue_doorbell_id0, 0);
 }
 
 static void update_mqd(struct mqd_manager *mm, void *mqd,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index d4cf7d845928..2eff37aaf827 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm

[PATCH] drm/amdgpu: Handle duplicate BOs during process restore

2024-03-08 Thread Mukul Joshi
In certain situations, some apps can import a BO multiple times
(through IPC for example). To restore such processes successfully,
we need to tell drm to ignore duplicate BOs.
While at it, also add additional logging to prevent silent failures
when process restore fails.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index bf8e6653341f..65d808d8b5da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2869,14 +2869,16 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, 
struct dma_fence __rcu *
 
mutex_lock(_info->lock);
 
-   drm_exec_init(, 0);
+   drm_exec_init(, DRM_EXEC_IGNORE_DUPLICATES);
drm_exec_until_all_locked() {
list_for_each_entry(peer_vm, _info->vm_list_head,
vm_list_node) {
ret = amdgpu_vm_lock_pd(peer_vm, , 2);
drm_exec_retry_on_contention();
-   if (unlikely(ret))
+   if (unlikely(ret)) {
+   pr_err("Locking VM PD failed, ret: %d\n", ret);
goto ttm_reserve_fail;
+   }
}
 
/* Reserve all BOs and page tables/directory. Add all BOs from
@@ -2889,8 +2891,10 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, 
struct dma_fence __rcu *
gobj = >bo->tbo.base;
ret = drm_exec_prepare_obj(, gobj, 1);
drm_exec_retry_on_contention();
-   if (unlikely(ret))
+   if (unlikely(ret)) {
+   pr_err("drm_exec_prepare_obj failed, ret: 
%d\n", ret);
goto ttm_reserve_fail;
+   }
}
}
 
@@ -2950,8 +2954,10 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, 
struct dma_fence __rcu *
 * validations above would invalidate DMABuf imports again.
 */
ret = process_validate_vms(process_info, );
-   if (ret)
+   if (ret) {
+   pr_err("Validating VMs failed, ret: %d\n", ret);
goto validate_map_fail;
+   }
 
/* Update mappings not managed by KFD */
list_for_each_entry(peer_vm, _info->vm_list_head,
-- 
2.35.1



[PATCH] drm/amdkfd: Use correct drm device for cgroup permission check

2024-01-26 Thread Mukul Joshi
On GFX 9.4.3, for a given KFD node, fetch the correct drm device from
XCP manager when checking for cgroup permissions.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 17fbedbf3651..677281c0793e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1488,10 +1488,15 @@ void kfd_dec_compute_active(struct kfd_node *dev);
 
 /* Cgroup Support */
 /* Check with device cgroup if @kfd device is accessible */
-static inline int kfd_devcgroup_check_permission(struct kfd_node *kfd)
+static inline int kfd_devcgroup_check_permission(struct kfd_node *node)
 {
 #if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF)
-   struct drm_device *ddev = adev_to_drm(kfd->adev);
+   struct drm_device *ddev;
+
+   if (node->xcp)
+   ddev = node->xcp->ddev;
+   else
+   ddev = adev_to_drm(node->adev);
 
return devcgroup_check_permission(DEVCG_DEV_CHAR, DRM_MAJOR,
  ddev->render->index,
-- 
2.35.1



[PATCH] drm/amdgpu: Fix module unload hang with RAS enabled

2024-01-23 Thread Mukul Joshi
The driver unload hangs because the page retirement kthread cannot
be stopped as it is sleeping and waiting on page retirement event
to occur. Add kthread_should_stop() to the event condition to wake up the
kthread when kthread stop is called during driver unload.

Fixes: 45c3d468793d ("drm/amdgpu: Prepare for asynchronous processing of umc 
page retirement")
Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index a32e7eb31354..80816c4ec1f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2670,8 +2670,12 @@ static int amdgpu_ras_page_retirement_thread(void *param)
while (!kthread_should_stop()) {
 
wait_event_interruptible(con->page_retirement_wq,
+   kthread_should_stop() ||
atomic_read(>page_retirement_req_cnt));
 
+   if (kthread_should_stop())
+   break;
+
dev_info(adev->dev, "Start processing page retirement. 
request:%d\n",
atomic_read(>page_retirement_req_cnt));
 
-- 
2.35.1



[PATCH] drm/amdkfd: Use common function for IP version check

2023-11-22 Thread Mukul Joshi
KFD_GC_VERSION was recently updated to use a new function
for IP version checks. As a result, use KFD_GC_VERSION as
the common function for all IP version checks in KFD.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index a40f8cfc6aa5..45366b4ca976 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1127,7 +1127,7 @@ static inline struct kfd_node *kfd_node_by_irq_ids(struct 
amdgpu_device *adev,
struct kfd_dev *dev = adev->kfd.dev;
uint32_t i;
 
-   if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 3))
+   if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3))
return dev->nodes[0];
 
for (i = 0; i < dev->num_nodes; i++)
-- 
2.35.1



[PATCHv2 2/2] drm/amdkfd: Update cache info for GFX 9.4.3

2023-10-27 Thread Mukul Joshi
Update cache info reporting based on compute and
memory partitioning modes.

Signed-off-by: Mukul Joshi 
---
v1->v2:
- Separate into a separate patch.
- Simplify the if condition to reduce indentation and make it
  logically more clear.

 drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 18 --
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 4e530791507e..dc7c8312e8c7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1602,10 +1602,13 @@ static int fill_in_l2_l3_pcache(struct 
kfd_cache_properties **props_ext,
unsigned int cu_sibling_map_mask;
int first_active_cu;
int i, j, k, xcc, start, end;
+   int num_xcc = NUM_XCC(knode->xcc_mask);
struct kfd_cache_properties *pcache = NULL;
+   enum amdgpu_memory_partition mode;
+   struct amdgpu_device *adev = knode->adev;
 
start = ffs(knode->xcc_mask) - 1;
-   end = start + NUM_XCC(knode->xcc_mask);
+   end = start + num_xcc;
cu_sibling_map_mask = cu_info->bitmap[start][0][0];
cu_sibling_map_mask &=
((1 << pcache_info[cache_type].num_cu_shared) - 1);
@@ -1624,7 +1627,18 @@ static int fill_in_l2_l3_pcache(struct 
kfd_cache_properties **props_ext,
pcache->processor_id_low = cu_processor_id
+ (first_active_cu - 1);
pcache->cache_level = pcache_info[cache_type].cache_level;
-   pcache->cache_size = pcache_info[cache_type].cache_size;
+
+   if (KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 3))
+   mode = 
adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+   else
+   mode = UNKNOWN_MEMORY_PARTITION_MODE;
+
+   if (pcache->cache_level == 2)
+   pcache->cache_size = pcache_info[cache_type].cache_size 
* num_xcc;
+   else if (mode)
+   pcache->cache_size = pcache_info[cache_type].cache_size 
/ mode;
+   else
+   pcache->cache_size = pcache_info[cache_type].cache_size;
 
if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE)
pcache->cache_type |= HSA_CACHE_TYPE_DATA;
-- 
2.35.1



[PATCHv2 1/2] drm/amdkfd: Populate cache info for GFX 9.4.3

2023-10-27 Thread Mukul Joshi
GFX 9.4.3 uses a new version of the GC info table which
contains the cache info. This patch adds a new function
to populate the cache info from IP discovery for GFX 9.4.3.

Signed-off-by: Mukul Joshi 
---
v1->v2:
- Separate out the original patch into 2 patches.

 drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 66 ++-
 1 file changed, 65 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 0e792a8496d6..cd8e459201f1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1404,6 +1404,66 @@ static int 
kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
return i;
 }
 
+static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev,
+  struct kfd_gpu_cache_info 
*pcache_info)
+{
+   struct amdgpu_device *adev = kdev->adev;
+   int i = 0;
+
+   /* TCP L1 Cache per CU */
+   if (adev->gfx.config.gc_tcp_size_per_cu) {
+   pcache_info[i].cache_size = adev->gfx.config.gc_tcp_size_per_cu;
+   pcache_info[i].cache_level = 1;
+   pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+   CRAT_CACHE_FLAGS_DATA_CACHE |
+   CRAT_CACHE_FLAGS_SIMD_CACHE);
+   pcache_info[i].num_cu_shared = 1;
+   i++;
+   }
+   /* Scalar L1 Instruction Cache per SQC */
+   if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) {
+   pcache_info[i].cache_size =
+   adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
+   pcache_info[i].cache_level = 1;
+   pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+   CRAT_CACHE_FLAGS_INST_CACHE |
+   CRAT_CACHE_FLAGS_SIMD_CACHE);
+   pcache_info[i].num_cu_shared = 
adev->gfx.config.gc_num_cu_per_sqc;
+   i++;
+   }
+   /* Scalar L1 Data Cache per SQC */
+   if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {
+   pcache_info[i].cache_size = 
adev->gfx.config.gc_l1_data_cache_size_per_sqc;
+   pcache_info[i].cache_level = 1;
+   pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+   CRAT_CACHE_FLAGS_DATA_CACHE |
+   CRAT_CACHE_FLAGS_SIMD_CACHE);
+   pcache_info[i].num_cu_shared = 
adev->gfx.config.gc_num_cu_per_sqc;
+   i++;
+   }
+   /* L2 Data Cache per GPU (Total Tex Cache) */
+   if (adev->gfx.config.gc_tcc_size) {
+   pcache_info[i].cache_size = adev->gfx.config.gc_tcc_size;
+   pcache_info[i].cache_level = 2;
+   pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+   CRAT_CACHE_FLAGS_DATA_CACHE |
+   CRAT_CACHE_FLAGS_SIMD_CACHE);
+   pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+   i++;
+   }
+   /* L3 Data Cache per GPU */
+   if (adev->gmc.mall_size) {
+   pcache_info[i].cache_size = adev->gmc.mall_size / 1024;
+   pcache_info[i].cache_level = 3;
+   pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+   CRAT_CACHE_FLAGS_DATA_CACHE |
+   CRAT_CACHE_FLAGS_SIMD_CACHE);
+   pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+   i++;
+   }
+   return i;
+}
+
 int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info 
**pcache_info)
 {
int num_of_cache_types = 0;
@@ -1461,10 +1521,14 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, 
struct kfd_gpu_cache_info **pc
num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
break;
case IP_VERSION(9, 4, 2):
-   case IP_VERSION(9, 4, 3):
*pcache_info = aldebaran_cache_info;
num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
break;
+   case IP_VERSION(9, 4, 3):
+   num_of_cache_types =
+   
kfd_fill_gpu_cache_info_from_gfx_config_v2(kdev->kfd,
+   
*pcache_info);
+   break;
case IP_VERSION(9, 1, 0):
case IP_VERSION(9, 2, 2):
*pcache_info = raven_cache_info;
-- 
2.35.1



[PATCH] drm/amdkfd: Update cache reporting for GFX 9.4.3

2023-10-26 Thread Mukul Joshi
GFX 9.4.3 uses a new version of the GC info table in IP
discovery. This patch adds a new function to parse and
fill the cache information based on the new table. Also,
update cache reporting based on compute and memory
partitioning modes.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 66 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 23 +++-
 2 files changed, 86 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 0e792a8496d6..cd8e459201f1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1404,6 +1404,66 @@ static int 
kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
return i;
 }
 
+static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev,
+  struct kfd_gpu_cache_info 
*pcache_info)
+{
+   struct amdgpu_device *adev = kdev->adev;
+   int i = 0;
+
+   /* TCP L1 Cache per CU */
+   if (adev->gfx.config.gc_tcp_size_per_cu) {
+   pcache_info[i].cache_size = adev->gfx.config.gc_tcp_size_per_cu;
+   pcache_info[i].cache_level = 1;
+   pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+   CRAT_CACHE_FLAGS_DATA_CACHE |
+   CRAT_CACHE_FLAGS_SIMD_CACHE);
+   pcache_info[i].num_cu_shared = 1;
+   i++;
+   }
+   /* Scalar L1 Instruction Cache per SQC */
+   if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) {
+   pcache_info[i].cache_size =
+   adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
+   pcache_info[i].cache_level = 1;
+   pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+   CRAT_CACHE_FLAGS_INST_CACHE |
+   CRAT_CACHE_FLAGS_SIMD_CACHE);
+   pcache_info[i].num_cu_shared = 
adev->gfx.config.gc_num_cu_per_sqc;
+   i++;
+   }
+   /* Scalar L1 Data Cache per SQC */
+   if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {
+   pcache_info[i].cache_size = 
adev->gfx.config.gc_l1_data_cache_size_per_sqc;
+   pcache_info[i].cache_level = 1;
+   pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+   CRAT_CACHE_FLAGS_DATA_CACHE |
+   CRAT_CACHE_FLAGS_SIMD_CACHE);
+   pcache_info[i].num_cu_shared = 
adev->gfx.config.gc_num_cu_per_sqc;
+   i++;
+   }
+   /* L2 Data Cache per GPU (Total Tex Cache) */
+   if (adev->gfx.config.gc_tcc_size) {
+   pcache_info[i].cache_size = adev->gfx.config.gc_tcc_size;
+   pcache_info[i].cache_level = 2;
+   pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+   CRAT_CACHE_FLAGS_DATA_CACHE |
+   CRAT_CACHE_FLAGS_SIMD_CACHE);
+   pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+   i++;
+   }
+   /* L3 Data Cache per GPU */
+   if (adev->gmc.mall_size) {
+   pcache_info[i].cache_size = adev->gmc.mall_size / 1024;
+   pcache_info[i].cache_level = 3;
+   pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+   CRAT_CACHE_FLAGS_DATA_CACHE |
+   CRAT_CACHE_FLAGS_SIMD_CACHE);
+   pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+   i++;
+   }
+   return i;
+}
+
 int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info 
**pcache_info)
 {
int num_of_cache_types = 0;
@@ -1461,10 +1521,14 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, 
struct kfd_gpu_cache_info **pc
num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
break;
case IP_VERSION(9, 4, 2):
-   case IP_VERSION(9, 4, 3):
*pcache_info = aldebaran_cache_info;
num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
break;
+   case IP_VERSION(9, 4, 3):
+   num_of_cache_types =
+   
kfd_fill_gpu_cache_info_from_gfx_config_v2(kdev->kfd,
+   
*pcache_info);
+   break;
case IP_VERSION(9, 1, 0):
case IP_VERSION(9, 2, 2):
*pcache_info = raven_cache_info;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 4e530791507e..1a79

[PATCHv2] drm/amdgpu: Fix typo in IP discovery parsing

2023-10-26 Thread Mukul Joshi
Fix a typo in parsing of the GC info table header when
reading the IP discovery table.

Fixes: ecb70926eb86 ("drm/amdgpu: add type conversion for gc info")
Signed-off-by: Mukul Joshi 
---
v1->v2:
- Add the Fixes tag.

 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 8d3681172cea..81476cd8461c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -1488,7 +1488,7 @@ static int amdgpu_discovery_get_gfx_info(struct 
amdgpu_device *adev)
adev->gfx.config.num_sc_per_sh = 
le32_to_cpu(gc_info->v2.gc_num_sc_per_se) /
le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
adev->gfx.config.num_packer_per_sc = 
le32_to_cpu(gc_info->v2.gc_num_packer_per_sc);
-   if (le16_to_cpu(gc_info->v2.header.version_minor == 1)) {
+   if (le16_to_cpu(gc_info->v2.header.version_minor) == 1) {
adev->gfx.config.gc_num_tcp_per_sa = 
le32_to_cpu(gc_info->v2_1.gc_num_tcp_per_sh);
adev->gfx.config.gc_tcp_size_per_cu = 
le32_to_cpu(gc_info->v2_1.gc_tcp_size_per_cu);
adev->gfx.config.gc_num_sdp_interface = 
le32_to_cpu(gc_info->v2_1.gc_num_sdp_interface); /* per XCD */
-- 
2.35.1



[PATCH] drm/amdgpu: Fix typo in IP discovery parsing

2023-10-26 Thread Mukul Joshi
Fix a typo in parsing of the GC info table header when
reading the IP discovery table.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 8d3681172cea..81476cd8461c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -1488,7 +1488,7 @@ static int amdgpu_discovery_get_gfx_info(struct 
amdgpu_device *adev)
adev->gfx.config.num_sc_per_sh = 
le32_to_cpu(gc_info->v2.gc_num_sc_per_se) /
le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
adev->gfx.config.num_packer_per_sc = 
le32_to_cpu(gc_info->v2.gc_num_packer_per_sc);
-   if (le16_to_cpu(gc_info->v2.header.version_minor == 1)) {
+   if (le16_to_cpu(gc_info->v2.header.version_minor) == 1) {
adev->gfx.config.gc_num_tcp_per_sa = 
le32_to_cpu(gc_info->v2_1.gc_num_tcp_per_sh);
adev->gfx.config.gc_tcp_size_per_cu = 
le32_to_cpu(gc_info->v2_1.gc_tcp_size_per_cu);
adev->gfx.config.gc_num_sdp_interface = 
le32_to_cpu(gc_info->v2_1.gc_num_sdp_interface); /* per XCD */
-- 
2.35.1



[PATCH 4/4] drm/amdgpu: Rename KGD_MAX_QUEUES to AMDGPU_MAX_QUEUES

2023-09-06 Thread Mukul Joshi
Rename KGD_MAX_QUEUES to AMDGPU_MAX_QUEUES to conform with
the naming convention followed in amdgpu_gfx.h. No functional
change.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c| 4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h   | 6 +++---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 4 ++--
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h   | 2 +-
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 25d5fda5b243..26ff5f8d9795 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -164,7 +164,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 */
bitmap_complement(gpu_resources.cp_queue_bitmap,
  adev->gfx.mec_bitmap[0].queue_bitmap,
- KGD_MAX_QUEUES);
+ AMDGPU_MAX_QUEUES);
 
/* According to linux/bitmap.h we shouldn't use bitmap_clear if
 * nbits is not compile time constant
@@ -172,7 +172,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
last_valid_bit = 1 /* only first MEC can have compute queues */
* adev->gfx.mec.num_pipe_per_mec
* adev->gfx.mec.num_queue_per_pipe;
-   for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i)
+   for (i = last_valid_bit; i < AMDGPU_MAX_QUEUES; ++i)
clear_bit(i, gpu_resources.cp_queue_bitmap);
 
amdgpu_doorbell_get_kfd_info(adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 3c45a188b701..04b8c7dacd30 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -1037,7 +1037,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device 
*adev, int pasid,
int pasid_tmp;
int max_queue_cnt;
int vmid_wave_cnt = 0;
-   DECLARE_BITMAP(cp_queue_bitmap, KGD_MAX_QUEUES);
+   DECLARE_BITMAP(cp_queue_bitmap, AMDGPU_MAX_QUEUES);
 
lock_spi_csq_mutexes(adev);
soc15_grbm_select(adev, 1, 0, 0, 0, inst);
@@ -1047,7 +1047,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device 
*adev, int pasid,
 * to get number of waves in flight
 */
bitmap_complement(cp_queue_bitmap, adev->gfx.mec_bitmap[0].queue_bitmap,
- KGD_MAX_QUEUES);
+ AMDGPU_MAX_QUEUES);
max_queue_cnt = adev->gfx.mec.num_pipe_per_mec *
adev->gfx.mec.num_queue_per_pipe;
sh_cnt = adev->gfx.config.max_sh_per_se;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 0ca95c4d4bfb..42ac6d1bf9ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -43,10 +43,10 @@
 #define AMDGPU_GFX_LBPW_DISABLED_MODE  0x0008L
 
 #define AMDGPU_MAX_GC_INSTANCES8
-#define KGD_MAX_QUEUES 128
+#define AMDGPU_MAX_QUEUES  128
 
-#define AMDGPU_MAX_GFX_QUEUES KGD_MAX_QUEUES
-#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
+#define AMDGPU_MAX_GFX_QUEUES AMDGPU_MAX_QUEUES
+#define AMDGPU_MAX_COMPUTE_QUEUES AMDGPU_MAX_QUEUES
 
 enum amdgpu_gfx_pipe_priority {
AMDGPU_GFX_PIPE_PRIO_NORMAL = AMDGPU_RING_PRIO_1,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 4170e3d32630..6d07a5dd2648 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -92,7 +92,7 @@ static bool is_pipe_enabled(struct device_queue_manager *dqm, 
int mec, int pipe)
 unsigned int get_cp_queues_num(struct device_queue_manager *dqm)
 {
return bitmap_weight(dqm->dev->kfd->shared_resources.cp_queue_bitmap,
-   KGD_MAX_QUEUES);
+   AMDGPU_MAX_QUEUES);
 }
 
 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
@@ -1576,7 +1576,7 @@ static int set_sched_resources(struct 
device_queue_manager *dqm)
res.vmid_mask = dqm->dev->compute_vmid_bitmap;
 
res.queue_mask = 0;
-   for (i = 0; i < KGD_MAX_QUEUES; ++i) {
+   for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) {
mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe)
/ dqm->dev->kfd->shared_resources.num_pipe_per_mec;
 
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h 
b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 3

[PATCHv2 1/4] drm/amdgpu: Store CU info from all XCCs for GFX v9.4.3

2023-09-06 Thread Mukul Joshi
Currently, we store CU info only for a single XCC assuming
that it is the same for all XCCs. However, that may not be
true. As a result, store CU info for all XCCs. This info is
later used for CU masking.

Signed-off-by: Mukul Joshi 
---
v1->v2:
- Incorporate Felix's review comments.

 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h   |  3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c |  4 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c   | 76 +--
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c |  3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c  |  8 +-
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 11 ++-
 .../gpu/drm/amd/include/kgd_kfd_interface.h   |  6 +-
 14 files changed, 60 insertions(+), 65 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index cdf6087706aa..25d5fda5b243 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -478,7 +478,7 @@ void amdgpu_amdkfd_get_cu_info(struct amdgpu_device *adev, 
struct kfd_cu_info *c
cu_info->cu_active_number = acu_info.number;
cu_info->cu_ao_mask = acu_info.ao_cu_mask;
memcpy(_info->cu_bitmap[0], _info.bitmap[0],
-  sizeof(acu_info.bitmap));
+  sizeof(cu_info->cu_bitmap));
cu_info->num_shader_engines = adev->gfx.config.max_shader_engines;
cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 395c1768b9fc..0ca95c4d4bfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -43,6 +43,7 @@
 #define AMDGPU_GFX_LBPW_DISABLED_MODE  0x0008L
 
 #define AMDGPU_MAX_GC_INSTANCES8
+#define KGD_MAX_QUEUES 128
 
 #define AMDGPU_MAX_GFX_QUEUES KGD_MAX_QUEUES
 #define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
@@ -257,7 +258,7 @@ struct amdgpu_cu_info {
uint32_t number;
uint32_t ao_cu_mask;
uint32_t ao_cu_bitmap[4][4];
-   uint32_t bitmap[4][4];
+   uint32_t bitmap[AMDGPU_MAX_GC_INSTANCES][4][4];
 };
 
 struct amdgpu_gfx_ras {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 3a48bec10aea..d462b36adf4b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -850,7 +850,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
memcpy(_info->cu_ao_bitmap[0], 
>gfx.cu_info.ao_cu_bitmap[0],
   sizeof(adev->gfx.cu_info.ao_cu_bitmap));
memcpy(_info->cu_bitmap[0], >gfx.cu_info.bitmap[0],
-  sizeof(adev->gfx.cu_info.bitmap));
+  sizeof(dev_info->cu_bitmap));
dev_info->vram_type = adev->gmc.vram_type;
dev_info->vram_bit_width = adev->gmc.vram_width;
dev_info->vce_harvest_config = adev->vce.harvest_config;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 6ccde07ed63e..62329a822022 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -9442,7 +9442,7 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device 
*adev,
gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(
adev, disable_masks[i * 2 + j]);
bitmap = gfx_v10_0_get_cu_active_bitmap_per_sh(adev);
-   cu_info->bitmap[i][j] = bitmap;
+   cu_info->bitmap[0][i][j] = bitmap;
 
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
if (bitmap & mask) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 337ed771605f..39c434ca0dad 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -6392,7 +6392,7 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device 
*adev,
 *SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
 *SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
 */
-   cu_info->bitmap[i % 4][j + (i / 4) * 2] = bitmap;
+   

[PATCHv2 3/4] drm/amdkfd: Update CU masking for GFX 9.4.3

2023-09-06 Thread Mukul Joshi
The CU mask passed from user-space will change based on
different spatial partitioning mode. As a result, update
CU masking code for GFX9.4.3 to work for all partitioning
modes.

Signed-off-by: Mukul Joshi 
---
v1->v2:
- Incorporate Felix's review comments.

 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c  | 28 ---
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h  |  2 +-
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c  |  2 +-
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  |  2 +-
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  |  2 +-
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   | 46 ---
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c   |  2 +-
 7 files changed, 56 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index 763966236658..447829c22295 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -97,14 +97,16 @@ void free_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
 
 void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
const uint32_t *cu_mask, uint32_t cu_mask_count,
-   uint32_t *se_mask)
+   uint32_t *se_mask, uint32_t inst)
 {
struct kfd_cu_info cu_info;
uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0};
bool wgp_mode_req = KFD_GC_VERSION(mm->dev) >= IP_VERSION(10, 0, 0);
uint32_t en_mask = wgp_mode_req ? 0x3 : 0x1;
-   int i, se, sh, cu, cu_bitmap_sh_mul, inc = wgp_mode_req ? 2 : 1;
+   int i, se, sh, cu, cu_bitmap_sh_mul, cu_inc = wgp_mode_req ? 2 : 1;
uint32_t cu_active_per_node;
+   int inc = cu_inc * NUM_XCC(mm->dev->xcc_mask);
+   int xcc_inst = inst + ffs(mm->dev->xcc_mask) - 1;
 
amdgpu_amdkfd_get_cu_info(mm->dev->adev, _info);
 
@@ -143,7 +145,8 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
for (se = 0; se < cu_info.num_shader_engines; se++)
for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++)
cu_per_sh[se][sh] = hweight32(
-   cu_info.cu_bitmap[0][se % 4][sh + (se / 4) * 
cu_bitmap_sh_mul]);
+   cu_info.cu_bitmap[xcc_inst][se % 4][sh + (se / 
4) *
+   cu_bitmap_sh_mul]);
 
/* Symmetrically map cu_mask to all SEs & SHs:
 * se_mask programs up to 2 SH in the upper and lower 16 bits.
@@ -166,20 +169,33 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
 * cu_mask[0] bit8 -> se_mask[0] bit1 (SE0,SH0,CU1)
 * ...
 *
+* For GFX 9.4.3, the following code only looks at a
+* subset of the cu_mask corresponding to the inst parameter.
+* If we have n XCCs under one GPU node
+* cu_mask[0] bit0 -> XCC0 se_mask[0] bit0 (XCC0,SE0,SH0,CU0)
+* cu_mask[0] bit1 -> XCC1 se_mask[0] bit0 (XCC1,SE0,SH0,CU0)
+* ..
+* cu_mask[0] bitn -> XCCn se_mask[0] bit0 (XCCn,SE0,SH0,CU0)
+* cu_mask[0] bit n+1 -> XCC0 se_mask[1] bit0 (XCC0,SE1,SH0,CU0)
+*
+* For example, if there are 6 XCCs under 1 KFD node, this code
+* running for each inst, will look at the bits as:
+* inst, inst + 6, inst + 12...
+*
 * First ensure all CUs are disabled, then enable user specified CUs.
 */
for (i = 0; i < cu_info.num_shader_engines; i++)
se_mask[i] = 0;
 
-   i = 0;
-   for (cu = 0; cu < 16; cu += inc) {
+   i = inst;
+   for (cu = 0; cu < 16; cu += cu_inc) {
for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) {
for (se = 0; se < cu_info.num_shader_engines; se++) {
if (cu_per_sh[se][sh] > cu) {
if (cu_mask[i / 32] & (en_mask << (i % 
32)))
se_mask[se] |= en_mask << (cu + 
sh * 16);
i += inc;
-   if (i == cu_mask_count)
+   if (i >= cu_mask_count)
return;
}
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
index 23158db7da03..57bf5e513f4d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
@@ -138,7 +138,7 @@ void free_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
 
 void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
const uint32_t *cu_mask, uint32_t cu_mask_count,
-   uint32_t *se_mask);
+   uint32_t *se_mask, uint32_t inst);
 
 int kfd_hiq_load_mqd_kiq(struct mq

[PATCHv2 2/4] drm/amdkfd: Update cache info reporting for GFX v9.4.3

2023-09-06 Thread Mukul Joshi
Update cache info reporting in sysfs to report the correct
number of CUs and associated cache information based on
different spatial partitioning modes.

Signed-off-by: Mukul Joshi 
---
v1->v2:
- Revert the change in kfd_crat.c
- Add a comment to not change value of CRAT_SIBLINGMAP_SIZE.

 drivers/gpu/drm/amd/amdkfd/kfd_crat.h |  4 ++
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 82 +--
 drivers/gpu/drm/amd/amdkfd/kfd_topology.h |  2 +-
 3 files changed, 51 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
index 387a8ef49385..74c2d7a0d628 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
@@ -79,6 +79,10 @@ struct crat_header {
 #define CRAT_SUBTYPE_IOLINK_AFFINITY   5
 #define CRAT_SUBTYPE_MAX   6
 
+/*
+ * Do not change the value of CRAT_SIBLINGMAP_SIZE from 32
+ * as it breaks the ABI.
+ */
 #define CRAT_SIBLINGMAP_SIZE   32
 
 /*
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index c54795682dfb..b98cc7930e4c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1596,14 +1596,17 @@ static int fill_in_l1_pcache(struct 
kfd_cache_properties **props_ext,
 static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
struct kfd_gpu_cache_info *pcache_info,
struct kfd_cu_info *cu_info,
-   int cache_type, unsigned int cu_processor_id)
+   int cache_type, unsigned int cu_processor_id,
+   struct kfd_node *knode)
 {
unsigned int cu_sibling_map_mask;
int first_active_cu;
-   int i, j, k;
+   int i, j, k, xcc, start, end;
struct kfd_cache_properties *pcache = NULL;
 
-   cu_sibling_map_mask = cu_info->cu_bitmap[0][0][0];
+   start = ffs(knode->xcc_mask) - 1;
+   end = start + NUM_XCC(knode->xcc_mask);
+   cu_sibling_map_mask = cu_info->cu_bitmap[start][0][0];
cu_sibling_map_mask &=
((1 << pcache_info[cache_type].num_cu_shared) - 1);
first_active_cu = ffs(cu_sibling_map_mask);
@@ -1638,16 +1641,18 @@ static int fill_in_l2_l3_pcache(struct 
kfd_cache_properties **props_ext,
cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 
1);
k = 0;
 
-   for (i = 0; i < cu_info->num_shader_engines; i++) {
-   for (j = 0; j < cu_info->num_shader_arrays_per_engine; 
j++) {
-   pcache->sibling_map[k] = 
(uint8_t)(cu_sibling_map_mask & 0xFF);
-   pcache->sibling_map[k+1] = 
(uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
-   pcache->sibling_map[k+2] = 
(uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
-   pcache->sibling_map[k+3] = 
(uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
-   k += 4;
-
-   cu_sibling_map_mask = cu_info->cu_bitmap[0][i % 
4][j + i / 4];
-   cu_sibling_map_mask &= ((1 << 
pcache_info[cache_type].num_cu_shared) - 1);
+   for (xcc = start; xcc < end; xcc++) {
+   for (i = 0; i < cu_info->num_shader_engines; i++) {
+   for (j = 0; j < 
cu_info->num_shader_arrays_per_engine; j++) {
+   pcache->sibling_map[k] = 
(uint8_t)(cu_sibling_map_mask & 0xFF);
+   pcache->sibling_map[k+1] = 
(uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
+   pcache->sibling_map[k+2] = 
(uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
+   pcache->sibling_map[k+3] = 
(uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
+   k += 4;
+
+   cu_sibling_map_mask = 
cu_info->cu_bitmap[start][i % 4][j + i / 4];
+   cu_sibling_map_mask &= ((1 << 
pcache_info[cache_type].num_cu_shared) - 1);
+   }
}
}
pcache->sibling_map_size = k;
@@ -1665,7 +1670,7 @@ static int fill_in_l2_l3_pcache(struct 
kfd_cache_properties **props_ext,
 static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, 
struct kfd_node *kdev)
 {
struct kfd_gpu_cache_info *pcache_info = NULL;
-   int i, j, k;
+   int i, j, k, xcc, start, end;
int ct = 0;
unsigned int cu_processor_id;
int re

[PATCHv3] drm/amdkfd: Fix unaligned 64-bit doorbell warning

2023-09-06 Thread Mukul Joshi
This patch fixes the following unaligned 64-bit doorbell
warning seen when submitting packets on HIQ on GFX v9.4.3
by making the HIQ doorbell 64-bit aligned.
The warning is seen when GPU is loaded in any mode other
than SPX mode.

[  +0.000301] [ cut here ]
[  +0.03] Unaligned 64-bit doorbell
[  +0.30] WARNING: /amdkfd/kfd_doorbell.c:339 
write_kernel_doorbell64+0x72/0x80
[  +0.03] RIP: 0010:write_kernel_doorbell64+0x72/0x80
[  +0.04] RSP: 0018:c90004287730 EFLAGS: 00010246
[  +0.05] RAX:  RBX:  RCX: 
[  +0.03] RDX: 0001 RSI: 82837c71 RDI: 
[  +0.03] RBP: c90004287748 R08: 0003 R09: 0001
[  +0.02] R10: 001a R11: 88a034008198 R12: c900013bd004
[  +0.03] R13: 0008 R14: c900042877b0 R15: 007f
[  +0.03] FS:  7fa8c7b62000() GS:889f8840() 
knlGS:
[  +0.04] CS:  0010 DS:  ES:  CR0: 80050033
[  +0.03] CR2: 56111c45aaf0 CR3: 0001414f2002 CR4: 00770ee0
[  +0.03] PKRU: 5554
[  +0.02] Call Trace:
[  +0.04]  
[  +0.06]  kq_submit_packet+0x45/0x50 [amdgpu]
[  +0.000524]  pm_send_set_resources+0x7f/0xc0 [amdgpu]
[  +0.000500]  set_sched_resources+0xe4/0x160 [amdgpu]
[  +0.000503]  start_cpsch+0x1c5/0x2a0 [amdgpu]
[  +0.000497]  kgd2kfd_device_init.cold+0x816/0xb42 [amdgpu]
[  +0.000743]  amdgpu_amdkfd_device_init+0x15f/0x1f0 [amdgpu]
[  +0.000602]  amdgpu_device_init.cold+0x1813/0x2176 [amdgpu]
[  +0.000684]  ? pci_bus_read_config_word+0x4a/0x80
[  +0.12]  ? do_pci_enable_device+0xdc/0x110
[  +0.08]  amdgpu_driver_load_kms+0x1a/0x110 [amdgpu]
[  +0.000545]  amdgpu_pci_probe+0x197/0x400 [amdgpu]

Fixes: cfeaeb3c0ce7 ("drm/amdgpu: use doorbell mgr for kfd kernel doorbells")
Signed-off-by: Mukul Joshi 
---
v1->v2:
- Update the logic to make it work with both 32 bit
  64 bit doorbells.
- Add the Fixed tag
v2->v3:
- Revert to the original change to align it with whats done in
  amdgpu_doorbell_index_on_bar.

 drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index c2e0b79dcc6d..7b38537c7c99 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -162,6 +162,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
return NULL;
 
*doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, kfd->doorbells, 
inx);
+   inx *= 2;
 
pr_debug("Get kernel queue doorbell\n"
" doorbell offset   == 0x%08X\n"
@@ -176,6 +177,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 
__iomem *db_addr)
unsigned int inx;
 
inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);
+   inx /= 2;
 
mutex_lock(>doorbell_mutex);
__clear_bit(inx, kfd->doorbell_bitmap);
-- 
2.35.1



[PATCHv2] drm/amdkfd: Fix unaligned 64-bit doorbell warning

2023-08-30 Thread Mukul Joshi
This patch fixes the following unaligned 64-bit doorbell
warning seen when submitting packets on HIQ on GFX v9.4.3
by making the HIQ doorbell 64-bit aligned.
The warning is seen when GPU is loaded in any mode other
than SPX mode.

[  +0.000301] [ cut here ]
[  +0.03] Unaligned 64-bit doorbell
[  +0.30] WARNING: /amdkfd/kfd_doorbell.c:339 
write_kernel_doorbell64+0x72/0x80 [amdgpu]
[  +0.03] RIP: 0010:write_kernel_doorbell64+0x72/0x80 [amdgpu]
[  +0.04] RSP: 0018:c90004287730 EFLAGS: 00010246
[  +0.05] RAX:  RBX:  RCX: 
[  +0.03] RDX: 0001 RSI: 82837c71 RDI: 
[  +0.03] RBP: c90004287748 R08: 0003 R09: 0001
[  +0.02] R10: 001a R11: 88a034008198 R12: c900013bd004
[  +0.03] R13: 0008 R14: c900042877b0 R15: 007f
[  +0.03] FS:  7fa8c7b62000() GS:889f8840() 
knlGS:
[  +0.04] CS:  0010 DS:  ES:  CR0: 80050033
[  +0.03] CR2: 56111c45aaf0 CR3: 0001414f2002 CR4: 00770ee0
[  +0.03] PKRU: 5554
[  +0.02] Call Trace:
[  +0.04]  
[  +0.06]  kq_submit_packet+0x45/0x50 [amdgpu]
[  +0.000524]  pm_send_set_resources+0x7f/0xc0 [amdgpu]
[  +0.000500]  set_sched_resources+0xe4/0x160 [amdgpu]
[  +0.000503]  start_cpsch+0x1c5/0x2a0 [amdgpu]
[  +0.000497]  kgd2kfd_device_init.cold+0x816/0xb42 [amdgpu]
[  +0.000743]  amdgpu_amdkfd_device_init+0x15f/0x1f0 [amdgpu]
[  +0.000602]  amdgpu_device_init.cold+0x1813/0x2176 [amdgpu]
[  +0.000684]  ? pci_bus_read_config_word+0x4a/0x80
[  +0.12]  ? do_pci_enable_device+0xdc/0x110
[  +0.08]  amdgpu_driver_load_kms+0x1a/0x110 [amdgpu]
[  +0.000545]  amdgpu_pci_probe+0x197/0x400 [amdgpu]

Fixes: cfeaeb3c0ce7 ("drm/amdgpu: use doorbell mgr for kfd kernel doorbells")
Signed-off-by: Mukul Joshi 
---
v1->v2: 
- Update the logic to make it work with both 32 bit
  64 bit doorbells.
- Add the Fixed tag.

 drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index c2e0b79dcc6d..e0d44f4af18e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -162,6 +162,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
return NULL;
 
*doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, kfd->doorbells, 
inx);
+   inx *= kfd->device_info.doorbell_size / sizeof(u32);
 
pr_debug("Get kernel queue doorbell\n"
" doorbell offset   == 0x%08X\n"
@@ -175,7 +176,8 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 
__iomem *db_addr)
 {
unsigned int inx;
 
-   inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);
+   inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr)
+   * sizeof(u32) / kfd->device_info.doorbell_size;
 
mutex_lock(>doorbell_mutex);
__clear_bit(inx, kfd->doorbell_bitmap);
-- 
2.35.1



[PATCH] drm/amdkfd: Fix unaligned 64-bit doorbell warning

2023-08-29 Thread Mukul Joshi
This patch fixes the following unaligned 64-bit doorbell
warning seen when submitting packets on HIQ on GFX v9.4.3
by making the HIQ doorbell 64-bit aligned.
The warning is seen when GPU is loaded in any mode other
than SPX mode.

[  +0.000301] [ cut here ]
[  +0.03] Unaligned 64-bit doorbell
[  +0.30] WARNING: /amdkfd/kfd_doorbell.c:339 
write_kernel_doorbell64+0x72/0x80 [amdgpu]
[  +0.03] RIP: 0010:write_kernel_doorbell64+0x72/0x80 [amdgpu]
[  +0.04] RSP: 0018:c90004287730 EFLAGS: 00010246
[  +0.05] RAX:  RBX:  RCX: 
[  +0.03] RDX: 0001 RSI: 82837c71 RDI: 
[  +0.03] RBP: c90004287748 R08: 0003 R09: 0001
[  +0.02] R10: 001a R11: 88a034008198 R12: c900013bd004
[  +0.03] R13: 0008 R14: c900042877b0 R15: 007f
[  +0.03] FS:  7fa8c7b62000() GS:889f8840() 
knlGS:
[  +0.04] CS:  0010 DS:  ES:  CR0: 80050033
[  +0.03] CR2: 56111c45aaf0 CR3: 0001414f2002 CR4: 00770ee0
[  +0.03] PKRU: 5554
[  +0.02] Call Trace:
[  +0.04]  
[  +0.06]  kq_submit_packet+0x45/0x50 [amdgpu]
[  +0.000524]  pm_send_set_resources+0x7f/0xc0 [amdgpu]
[  +0.000500]  set_sched_resources+0xe4/0x160 [amdgpu]
[  +0.000503]  start_cpsch+0x1c5/0x2a0 [amdgpu]
[  +0.000497]  kgd2kfd_device_init.cold+0x816/0xb42 [amdgpu]
[  +0.000743]  amdgpu_amdkfd_device_init+0x15f/0x1f0 [amdgpu]
[  +0.000602]  amdgpu_device_init.cold+0x1813/0x2176 [amdgpu]
[  +0.000684]  ? pci_bus_read_config_word+0x4a/0x80
[  +0.12]  ? do_pci_enable_device+0xdc/0x110
[  +0.08]  amdgpu_driver_load_kms+0x1a/0x110 [amdgpu]
[  +0.000545]  amdgpu_pci_probe+0x197/0x400 [amdgpu]

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index c2e0b79dcc6d..b1c2772c3a8d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -168,7 +168,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
" doorbell index== 0x%x\n",
*doorbell_off, inx);
 
-   return kfd->doorbell_kernel_ptr + inx;
+   return kfd->doorbell_kernel_ptr + inx * 2;
 }
 
 void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
@@ -176,6 +176,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 
__iomem *db_addr)
unsigned int inx;
 
inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);
+   inx /= 2;
 
mutex_lock(>doorbell_mutex);
__clear_bit(inx, kfd->doorbell_bitmap);
-- 
2.35.1



[PATCH] drm/amdkfd: Fix reg offset for setting CWSR grace period

2023-08-29 Thread Mukul Joshi
This patch fixes the case where the code currently passes
absolute register address and not the reg offset, which HWS
expects, when sending the PM4 packet to set/update CWSR grace
period. Additionally, cleanup the signature of
build_grace_period_packet_info function as it no longer needs
the inst parameter.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 3 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h| 3 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 6 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 3 +--
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 3 +--
 drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c| 3 +--
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h   | 3 +--
 7 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index f1f2c24de081..69810b3f1c63 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -980,8 +980,7 @@ void kgd_gfx_v10_build_grace_period_packet_info(struct 
amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t *reg_offset,
-   uint32_t *reg_data,
-   uint32_t inst)
+   uint32_t *reg_data)
 {
*reg_data = wait_times;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
index ecaead24e8c9..67bcaa3d4226 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
@@ -55,5 +55,4 @@ void kgd_gfx_v10_build_grace_period_packet_info(struct 
amdgpu_device *adev,
   uint32_t wait_times,
   uint32_t grace_period,
   uint32_t *reg_offset,
-  uint32_t *reg_data,
-  uint32_t inst);
+  uint32_t *reg_data);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index fa5ee96f8845..3c45a188b701 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -1103,8 +1103,7 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct 
amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t *reg_offset,
-   uint32_t *reg_data,
-   uint32_t inst)
+   uint32_t *reg_data)
 {
*reg_data = wait_times;
 
@@ -1120,8 +1119,7 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct 
amdgpu_device *adev,
SCH_WAVE,
grace_period);
 
-   *reg_offset = SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
-   mmCP_IQ_WAIT_TIME2);
+   *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
 }
 
 void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
index 936e501908ce..ce424615f59b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
@@ -100,5 +100,4 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct 
amdgpu_device *adev,
   uint32_t wait_times,
   uint32_t grace_period,
   uint32_t *reg_offset,
-  uint32_t *reg_data,
-  uint32_t inst);
+  uint32_t *reg_data);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index b166f30f083e..8a6cb41444a4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1677,8 +1677,7 @@ static int start_cpsch(struct device_queue_manager *dqm)
dqm->dev->kfd2kgd->build_grace_period_packet_info(
dqm->dev->adev, dqm->wait_times,
grace_period, _offset,
-   >wait_times,
-   ffs(dqm->dev->xcc_mask) - 1);
+   

[PATCH] drm/amdkfd: Update CWSR grace period for GFX9.4.3

2023-07-10 Thread Mukul Joshi
For GFX9.4.3, setup a reduced default CWSR grace period equal to
1000 cycles instead of 64000 cycles.

Signed-off-by: Mukul Joshi 
Reviewed-by: Felix Kuehling 
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  2 +-
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 22 ++-
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 7b1eea493377..28963726bc97 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -1109,7 +1109,7 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct 
amdgpu_device *adev,
*reg_data = wait_times;
 
/*
-* The CP cannont handle a 0 grace period input and will result in
+* The CP cannot handle a 0 grace period input and will result in
 * an infinite grace period being set so set to 1 to prevent this.
 */
if (grace_period == 0)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index de83eccdd9de..31cac1fd0d58 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1622,7 +1622,7 @@ static int initialize_cpsch(struct device_queue_manager 
*dqm)
if (dqm->dev->kfd2kgd->get_iq_wait_times)
dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
>wait_times,
-   0);
+   ffs(dqm->dev->xcc_mask) - 1);
return 0;
 }
 
@@ -1664,6 +1664,26 @@ static int start_cpsch(struct device_queue_manager *dqm)
 
if (!dqm->dev->kfd->shared_resources.enable_mes)
execute_queues_cpsch(dqm, 
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD);
+
+   /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */
+   if (amdgpu_emu_mode == 0 && dqm->dev->adev->gmc.is_app_apu &&
+   (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3))) {
+   uint32_t reg_offset = 0;
+   uint32_t grace_period = 1;
+
+   retval = pm_update_grace_period(>packet_mgr,
+   grace_period);
+   if (retval)
+   pr_err("Setting grace timeout failed\n");
+   else if (dqm->dev->kfd2kgd->build_grace_period_packet_info)
+   /* Update dqm->wait_times maintained in software */
+   dqm->dev->kfd2kgd->build_grace_period_packet_info(
+   dqm->dev->adev, dqm->wait_times,
+   grace_period, _offset,
+   >wait_times,
+   ffs(dqm->dev->xcc_mask) - 1);
+   }
+
dqm_unlock(dqm);
 
return 0;
-- 
2.35.1



[PATCHv2] drm/amdkfd: Use KIQ to unmap HIQ

2023-06-29 Thread Mukul Joshi
Currently, we unmap HIQ by directly writing to HQD
registers. This doesn't work for GFX9.4.3. Instead,
use KIQ to unmap HIQ, similar to how we use KIQ to
map HIQ. Using KIQ to unmap HIQ works for all GFX
series post GFXv9.

Signed-off-by: Mukul Joshi 
---
v1->v2:
- Use kiq_unmap_queues function instead of duplicating
  code (Felix).

 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c| 36 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  2 ++
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  | 22 +++-
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  | 22 +++-
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   | 36 +++
 5 files changed, 109 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index b4fcad0e62f7..0040c63e2356 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -830,3 +830,39 @@ u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device 
*adev, int xcp_id)
return adev->gmc.real_vram_size;
}
 }
+
+int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off,
+   u32 inst)
+{
+   struct amdgpu_kiq *kiq = >gfx.kiq[inst];
+   struct amdgpu_ring *kiq_ring = >ring;
+   struct amdgpu_ring_funcs ring_funcs;
+   struct amdgpu_ring ring;
+   int r = 0;
+
+   if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+   return -EINVAL;
+
+   memset(, 0x0, sizeof(struct amdgpu_ring));
+   memset(_funcs, 0x0, sizeof(struct amdgpu_ring_funcs));
+
+   ring_funcs.type = AMDGPU_RING_TYPE_COMPUTE;
+   ring.doorbell_index = doorbell_off;
+   ring.funcs = _funcs;
+
+   spin_lock(>ring_lock);
+
+   if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+   spin_unlock(>ring_lock);
+   return -ENOMEM;
+   }
+
+   kiq->pmf->kiq_unmap_queues(kiq_ring, , RESET_QUEUES, 0, 0);
+
+   if (kiq_ring->sched.ready && !adev->job_hang)
+   r = amdgpu_ring_test_helper(kiq_ring);
+
+   spin_unlock(>ring_lock);
+
+   return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 2d0406bff84e..b34418e3e006 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -252,6 +252,8 @@ int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct 
amdgpu_device *dst,
 int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool 
is_min);
 int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,
uint32_t *payload);
+int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off,
+   u32 inst);
 
 /* Read user wptr from a specified user address space with page fault
  * disabled. The memory must be pinned and mapped to the hardware when
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index 94c0fc2e57b7..83699392c808 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -318,6 +318,26 @@ static void init_mqd_hiq(struct mqd_manager *mm, void 
**mqd,
1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
 }
 
+static int destroy_hiq_mqd(struct mqd_manager *mm, void *mqd,
+   enum kfd_preempt_type type, unsigned int timeout,
+   uint32_t pipe_id, uint32_t queue_id)
+{
+   int err;
+   struct v10_compute_mqd *m;
+   u32 doorbell_off;
+
+   m = get_mqd(mqd);
+
+   doorbell_off = m->cp_hqd_pq_doorbell_control >>
+   CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+
+   err = amdgpu_amdkfd_unmap_hiq(mm->dev->adev, doorbell_off, 0);
+   if (err)
+   pr_debug("Destroy HIQ MQD failed: %d\n", err);
+
+   return err;
+}
+
 static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -460,7 +480,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE 
type,
mqd->free_mqd = free_mqd_hiq_sdma;
mqd->load_mqd = kfd_hiq_load_mqd_kiq;
mqd->update_mqd = update_mqd;
-   mqd->destroy_mqd = kfd_destroy_mqd_cp;
+   mqd->destroy_mqd = destroy_hiq_mqd;
mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct v10_compute_mqd);
mqd->mqd_stride = kfd_mqd_stride;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
index 31fec5e70d13..2319467d2d95 10064

[PATCH 2/2] drm/amdgpu: Correctly setup TMR region size for GFX9.4.3

2023-06-22 Thread Mukul Joshi
A faulty check was causing TMR region size to be setup incorrectly
for GFX9.4.3. Remove the check and setup TMR region size as 280MB
for GFX9.4.3.

Fixes: b6780d70db5e ("drm/amdgpu: bypass bios dependent operations")
Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 7c6dd3de1867..fa5721b3139c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1718,7 +1718,7 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device 
*adev)
reserve_size =
amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
 
-   if (!adev->bios && adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3))
+   if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3))
reserve_size = max(reserve_size, (uint32_t)280 << 20);
else if (!reserve_size)
reserve_size = DISCOVERY_TMR_OFFSET;
-- 
2.35.1



[PATCH 1/2] drm/amdkfd: Update interrupt handling for GFX 9.4.3

2023-06-22 Thread Mukul Joshi
For GFX 9.4.3, interrupt handling needs to be updated for:
- Interrupt cookie will have a NodeId field. Each KFD
  node needs to check the NodeId before processing the
  interrupt.
- For CPX mode, there are additional checks of client ID
  needed to process the interrupt.
- Add NodeId to the process drain interrupt.

Signed-off-by: Mukul Joshi 
Reviewed-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c   | 43 ++-
 .../gpu/drm/amd/amdkfd/kfd_int_process_v9.c   | 29 +
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  1 +
 drivers/gpu/drm/amd/amdkfd/kfd_process.c  |  9 
 drivers/gpu/drm/amd/amdkfd/soc15_int.h|  1 +
 5 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 226d2dd7fa49..0b3dc754e06b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -138,9 +138,12 @@ static void 
kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
case IP_VERSION(9, 4, 0): /* VEGA20 */
case IP_VERSION(9, 4, 1): /* ARCTURUS */
case IP_VERSION(9, 4, 2): /* ALDEBARAN */
-   case IP_VERSION(9, 4, 3): /* GC 9.4.3 */
kfd->device_info.event_interrupt_class = 
_interrupt_class_v9;
break;
+   case IP_VERSION(9, 4, 3): /* GC 9.4.3 */
+   kfd->device_info.event_interrupt_class =
+   _interrupt_class_v9_4_3;
+   break;
case IP_VERSION(10, 3, 1): /* VANGOGH */
case IP_VERSION(10, 3, 3): /* YELLOW_CARP */
case IP_VERSION(10, 3, 6): /* GC 10.3.6 */
@@ -599,6 +602,41 @@ static void kfd_cleanup_nodes(struct kfd_dev *kfd, 
unsigned int num_nodes)
}
 }
 
+static void kfd_setup_interrupt_bitmap(struct kfd_node *node,
+  unsigned int kfd_node_idx)
+{
+   struct amdgpu_device *adev = node->adev;
+   uint32_t xcc_mask = node->xcc_mask;
+   uint32_t xcc, mapped_xcc;
+   /*
+* Interrupt bitmap is setup for processing interrupts from
+* different XCDs and AIDs.
+* Interrupt bitmap is defined as follows:
+* 1. Bits 0-15 - correspond to the NodeId field.
+*Each bit corresponds to NodeId number. For example, if
+*a KFD node has interrupt bitmap set to 0x7, then this
+*KFD node will process interrupts with NodeId = 0, 1 and 2
+*in the IH cookie.
+* 2. Bits 16-31 - unused.
+*
+* Please note that the kfd_node_idx argument passed to this
+* function is not related to NodeId field received in the
+* IH cookie.
+*
+* In CPX mode, a KFD node will process an interrupt if:
+* - the Node Id matches the corresponding bit set in
+*   Bits 0-15.
+* - AND VMID reported in the interrupt lies within the
+*   VMID range of the node.
+*/
+   for_each_inst(xcc, xcc_mask) {
+   mapped_xcc = GET_INST(GC, xcc);
+   node->interrupt_bitmap |= (mapped_xcc % 2 ? 5 : 3) << (4 * 
(mapped_xcc / 2));
+   }
+   dev_info(kfd_device, "Node: %d, interrupt_bitmap: %x\n", kfd_node_idx,
+   node->interrupt_bitmap);
+}
+
 bool kgd2kfd_device_init(struct kfd_dev *kfd,
 const struct kgd2kfd_shared_resources *gpu_resources)
 {
@@ -798,6 +836,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
amdgpu_amdkfd_get_local_mem_info(kfd->adev,
>local_mem_info, node->xcp);
 
+   if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3))
+   kfd_setup_interrupt_bitmap(node, i);
+
/* Initialize the KFD node */
if (kfd_init_node(node)) {
dev_err(kfd_device, "Error initializing KFD node\n");
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index d5c9f30552e3..f0731a6a5306 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -446,7 +446,36 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
}
 }
 
+static bool event_interrupt_isr_v9_4_3(struct kfd_node *node,
+   const uint32_t *ih_ring_entry,
+   uint32_t *patched_ihre,
+   bool *patched_flag)
+{
+   uint16_t node_id, vmid;
+
+   /*
+* For GFX 9.4.3, process the interrupt if:
+* - NodeID field in IH entry matches the corresponding bit
+*   set in interrupt_bitmap Bits 0-15.
+*   OR
+* - If partition mode is CPX and interrupt came from
+*   Node_id 0,4,8,12, then check if the Bit (16 + cl

[PATCHv4] drm/amdgpu: Update invalid PTE flag setting

2023-06-19 Thread Mukul Joshi
Update the invalid PTE flag setting with TF enabled.
This is to ensure, in addition to transitioning the
retry fault to a no-retry fault, it also causes the
wavefront to enter the trap handler. With the current
setting, the fault only transitions to a no-retry fault.
Additionally, have 2 sets of invalid PTE settings, one for
TF enabled, the other for TF disabled. The setting with
TF disabled, doesn't work with TF enabled.

Signed-off-by: Mukul Joshi 
---
v1->v2:
- Update handling according to Christian's feedback.

v2->v3:
- Remove ASIC specific callback (Felix).

v3->v4:
- Add noretry flag to amdgpu->gmc. This allows to set
  ASIC specific flags.

 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h   |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|  6 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 31 +++
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c|  1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c|  1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c |  1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c |  1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  1 +
 9 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 56d73fade568..fdc25cd559b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -331,6 +331,8 @@ struct amdgpu_gmc {
u64 VM_CONTEXT_PAGE_TABLE_END_ADDR_LO32[16];
u64 VM_CONTEXT_PAGE_TABLE_END_ADDR_HI32[16];
u64 MC_VM_MX_L1_TLB_CNTL;
+
+   u64 noretry_flags;
 };
 
 #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) 
((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index eff73c428b12..8c7861a4d75d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2604,7 +2604,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
u32 pasid,
/* Intentionally setting invalid PTE flag
 * combination to force a no-retry-fault
 */
-   flags = AMDGPU_PTE_SNOOPED | AMDGPU_PTE_PRT;
+   flags = AMDGPU_VM_NORETRY_FLAGS;
value = 0;
} else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
/* Redirect the access to the dummy page */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 9c85d494f2a2..b81fcb962d8f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -84,7 +84,13 @@ struct amdgpu_mem_stats;
 /* PDE Block Fragment Size for VEGA10 */
 #define AMDGPU_PDE_BFS(a)  ((uint64_t)a << 59)
 
+/* Flag combination to set no-retry with TF disabled */
+#define AMDGPU_VM_NORETRY_FLAGS(AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE 
| \
+   AMDGPU_PTE_TF)
 
+/* Flag combination to set no-retry with TF enabled */
+#define AMDGPU_VM_NORETRY_FLAGS_TF (AMDGPU_PTE_VALID | AMDGPU_PTE_SYSTEM | \
+  AMDGPU_PTE_PRT)
 /* For GFX9 */
 #define AMDGPU_PTE_MTYPE_VG10(a)   ((uint64_t)(a) << 57)
 #define AMDGPU_PTE_MTYPE_VG10_MASK AMDGPU_PTE_MTYPE_VG10(3ULL)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index dea1a64be44d..24ddf6a0512a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -778,6 +778,27 @@ int amdgpu_vm_pde_update(struct amdgpu_vm_update_params 
*params,
1, 0, flags);
 }
 
+/**
+ * amdgpu_vm_pte_update_noretry_flags - Update PTE no-retry flags
+ *
+ * @adev - amdgpu_device pointer
+ * @flags: pointer to PTE flags
+ *
+ * Update PTE no-retry flags when TF is enabled.
+ */
+static void amdgpu_vm_pte_update_noretry_flags(struct amdgpu_device *adev,
+   uint64_t *flags)
+{
+   /*
+* Update no-retry flags with the corresponding TF
+* no-retry combination.
+*/
+   if ((*flags & AMDGPU_VM_NORETRY_FLAGS) == AMDGPU_VM_NORETRY_FLAGS) {
+   *flags &= ~AMDGPU_VM_NORETRY_FLAGS;
+   *flags |= adev->gmc.noretry_flags;
+   }
+}
+
 /*
  * amdgpu_vm_pte_update_flags - figure out flags for PTE updates
  *
@@ -804,6 +825,16 @@ static void amdgpu_vm_pte_update_flags(struct 
amdgpu_vm_update_params *params,
flags |= AMDGPU_PTE_EXECUTABLE;
}
 
+   /*
+* Update no-retry flags to use the no-retry flag combination
+* with TF enabled. The AMDGPU_VM_NORETRY_FLAGS flag combination
+* does not work when TF is enabled. So, replace them with
+* AMDGPU_VM_NORETRY_FLAGS_TF flag combination which works for
+*

[PATCHv2] drm/amdkfd: Enable GWS on GFX9.4.3

2023-06-16 Thread Mukul Joshi
Enable GWS capable queue creation for forward
progress gaurantee on GFX 9.4.3.

Signed-off-by: Mukul Joshi 
---
v1->v2:
- Update the condition for setting pqn->q->gws
  for GFX 9.4.3.
 drivers/gpu/drm/amd/amdkfd/kfd_device.c   |  1 +
 .../amd/amdkfd/kfd_process_queue_manager.c| 35 ---
 2 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 9d4abfd8b55e..226d2dd7fa49 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -518,6 +518,7 @@ static int kfd_gws_init(struct kfd_node *node)
&& kfd->mec2_fw_version >= 0x30)   ||
(KFD_GC_VERSION(node) == IP_VERSION(9, 4, 2)
&& kfd->mec2_fw_version >= 0x28) ||
+   (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 3)) ||
(KFD_GC_VERSION(node) >= IP_VERSION(10, 3, 0)
&& KFD_GC_VERSION(node) < IP_VERSION(11, 0, 0)
&& kfd->mec2_fw_version >= 0x6b
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 9ad1a2186a24..ba9d69054119 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -123,16 +123,24 @@ int pqm_set_gws(struct process_queue_manager *pqm, 
unsigned int qid,
if (!gws && pdd->qpd.num_gws == 0)
return -EINVAL;
 
-   if (gws)
-   ret = 
amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
-   gws, );
-   else
-   ret = 
amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info,
-   pqn->q->gws);
-   if (unlikely(ret))
-   return ret;
+   if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3)) {
+   if (gws)
+   ret = 
amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
+   gws, );
+   else
+   ret = 
amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info,
+   pqn->q->gws);
+   if (unlikely(ret))
+   return ret;
+   pqn->q->gws = mem;
+   } else {
+   /*
+* Intentionally set GWS to a non-NULL value
+* for GFX 9.4.3.
+*/
+   pqn->q->gws = gws ? ERR_PTR(-ENOMEM) : NULL;
+   }
 
-   pqn->q->gws = mem;
pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0;
 
return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
@@ -164,7 +172,8 @@ void pqm_uninit(struct process_queue_manager *pqm)
struct process_queue_node *pqn, *next;
 
list_for_each_entry_safe(pqn, next, >queues, process_queue_list) {
-   if (pqn->q && pqn->q->gws)
+   if (pqn->q && pqn->q->gws &&
+   KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3))

amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
pqn->q->gws);
kfd_procfs_del_queue(pqn->q);
@@ -446,8 +455,10 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, 
unsigned int qid)
}
 
if (pqn->q->gws) {
-   
amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
-   pqn->q->gws);
+   if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 
3))
+   amdgpu_amdkfd_remove_gws_from_process(
+   pqm->process->kgd_process_info,
+   pqn->q->gws);
pdd->qpd.num_gws = 0;
}
 
-- 
2.35.1



[PATCH] drm/amdkfd: Use KIQ to unmap HIQ

2023-06-16 Thread Mukul Joshi
Currently, we unmap HIQ by directly writing to HQD
registers. This doesn't work for GFX9.4.3. Instead,
use KIQ to unmap HIQ, similar to how we use KIQ to
map HIQ. Using KIQ to unmap HIQ works for all GFX
series post GFXv9.

Signed-off-by: Mukul Joshi 
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   |  1 +
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 47 ++
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h|  3 ++
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c  |  1 +
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c| 47 ++
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 48 +++
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |  3 ++
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c  |  8 
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h  |  4 ++
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  |  2 +-
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  |  2 +-
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   |  7 ++-
 .../gpu/drm/amd/include/kgd_kfd_interface.h   |  3 ++
 13 files changed, 170 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
index 5b4b7f8b92a5..b82435e17ed0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
@@ -372,6 +372,7 @@ const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = {
.hqd_sdma_dump = kgd_gfx_v9_4_3_hqd_sdma_dump,
.hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied,
.hqd_sdma_is_occupied = kgd_gfx_v9_4_3_hqd_sdma_is_occupied,
+   .hiq_hqd_destroy = kgd_gfx_v9_hiq_hqd_destroy,
.hqd_destroy = kgd_gfx_v9_hqd_destroy,
.hqd_sdma_destroy = kgd_gfx_v9_4_3_hqd_sdma_destroy,
.wave_control_execute = kgd_gfx_v9_wave_control_execute,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 8ad7a7779e14..a919fb8e09a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -510,6 +510,52 @@ static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device 
*adev, void *mqd)
return false;
 }
 
+int kgd_gfx_v10_hiq_hqd_destroy(struct amdgpu_device *adev, void *mqd,
+   uint32_t pipe_id, uint32_t queue_id,
+   uint32_t inst)
+{
+   struct amdgpu_ring *kiq_ring = >gfx.kiq[0].ring;
+   struct v10_compute_mqd *m = get_mqd(mqd);
+   uint32_t mec, pipe;
+   uint32_t doorbell_off;
+   int r;
+
+   doorbell_off = m->cp_hqd_pq_doorbell_control >>
+   CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+
+   acquire_queue(adev, pipe_id, queue_id);
+
+   mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+   pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+   spin_lock(>gfx.kiq[0].ring_lock);
+   r = amdgpu_ring_alloc(kiq_ring, 6);
+   if (r) {
+   pr_err("Failed to alloc KIQ (%d).\n", r);
+   goto out_unlock;
+   }
+
+   amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
+   amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_UNMAP_QUEUES_ACTION(0) |
+ PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
+ PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
+ PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
+   amdgpu_ring_write(kiq_ring,
+ PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(doorbell_off));
+   amdgpu_ring_write(kiq_ring, 0);
+   amdgpu_ring_write(kiq_ring, 0);
+   amdgpu_ring_write(kiq_ring, 0);
+
+   amdgpu_ring_commit(kiq_ring);
+
+out_unlock:
+   spin_unlock(>gfx.kiq[0].ring_lock);
+   release_queue(adev);
+
+   return r;
+}
+
 static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
@@ -1034,6 +1080,7 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
.hqd_sdma_dump = kgd_hqd_sdma_dump,
.hqd_is_occupied = kgd_hqd_is_occupied,
.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
+   .hiq_hqd_destroy = kgd_gfx_v10_hiq_hqd_destroy,
.hqd_destroy = kgd_hqd_destroy,
.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
.wave_control_execute = kgd_wave_control_execute,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
index e6b70196071a..00b4514ebdd5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
@@ -53,3 +53,6 @@ void kgd_gfx_v10_build_grace_period_packet_info(struct 
amdgpu_device *adev,
  

[PATCH] drm/amdkfd: Enable GWS on GFX9.4.3

2023-06-16 Thread Mukul Joshi
Enable GWS capable queue creation for forward
progress gaurantee on GFX 9.4.3.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c   |  1 +
 .../amd/amdkfd/kfd_process_queue_manager.c| 31 ---
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 9d4abfd8b55e..226d2dd7fa49 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -518,6 +518,7 @@ static int kfd_gws_init(struct kfd_node *node)
&& kfd->mec2_fw_version >= 0x30)   ||
(KFD_GC_VERSION(node) == IP_VERSION(9, 4, 2)
&& kfd->mec2_fw_version >= 0x28) ||
+   (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 3)) ||
(KFD_GC_VERSION(node) >= IP_VERSION(10, 3, 0)
&& KFD_GC_VERSION(node) < IP_VERSION(11, 0, 0)
&& kfd->mec2_fw_version >= 0x6b
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 9ad1a2186a24..9a091d8f9aaf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -123,16 +123,20 @@ int pqm_set_gws(struct process_queue_manager *pqm, 
unsigned int qid,
if (!gws && pdd->qpd.num_gws == 0)
return -EINVAL;
 
-   if (gws)
-   ret = 
amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
-   gws, );
-   else
-   ret = 
amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info,
-   pqn->q->gws);
-   if (unlikely(ret))
-   return ret;
+   if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3)) {
+   if (gws)
+   ret = 
amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
+   gws, );
+   else
+   ret = 
amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info,
+   pqn->q->gws);
+   if (unlikely(ret))
+   return ret;
+   pqn->q->gws = mem;
+   } else {
+   pqn->q->gws = ERR_PTR(-ENOMEM);
+   }
 
-   pqn->q->gws = mem;
pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0;
 
return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
@@ -164,7 +168,8 @@ void pqm_uninit(struct process_queue_manager *pqm)
struct process_queue_node *pqn, *next;
 
list_for_each_entry_safe(pqn, next, >queues, process_queue_list) {
-   if (pqn->q && pqn->q->gws)
+   if (pqn->q && pqn->q->gws &&
+   KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3))

amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
pqn->q->gws);
kfd_procfs_del_queue(pqn->q);
@@ -446,8 +451,10 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, 
unsigned int qid)
}
 
if (pqn->q->gws) {
-   
amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
-   pqn->q->gws);
+   if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 
3))
+   amdgpu_amdkfd_remove_gws_from_process(
+   pqm->process->kgd_process_info,
+   pqn->q->gws);
pdd->qpd.num_gws = 0;
}
 
-- 
2.35.1



[PATCHv3] drm/amdgpu: Update invalid PTE flag setting

2023-06-13 Thread Mukul Joshi
Update the invalid PTE flag setting with TF enabled.
This is to ensure, in addition to transitioning the
retry fault to a no-retry fault, it also causes the
wavefront to enter the trap handler. With the current
setting, the fault only transitions to a no-retry fault.
Additionally, have 2 sets of invalid PTE settings, one for
TF enabled, the other for TF disabled. The setting with
TF disabled, doesn't work with TF enabled.

Signed-off-by: Mukul Joshi 
---
v1->v2:
- Update handling according to Christian's feedback.

v2->v3:
- Remove ASIC specific callback (Felix).

 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|  6 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 21 +
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 1cb14ea18cd9..ff9db7e5c086 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2583,7 +2583,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
u32 pasid,
/* Intentionally setting invalid PTE flag
 * combination to force a no-retry-fault
 */
-   flags = AMDGPU_PTE_SNOOPED | AMDGPU_PTE_PRT;
+   flags = AMDGPU_VM_NORETRY_FLAGS;
value = 0;
} else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
/* Redirect the access to the dummy page */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 9c85d494f2a2..b81fcb962d8f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -84,7 +84,13 @@ struct amdgpu_mem_stats;
 /* PDE Block Fragment Size for VEGA10 */
 #define AMDGPU_PDE_BFS(a)  ((uint64_t)a << 59)
 
+/* Flag combination to set no-retry with TF disabled */
+#define AMDGPU_VM_NORETRY_FLAGS(AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE 
| \
+   AMDGPU_PTE_TF)
 
+/* Flag combination to set no-retry with TF enabled */
+#define AMDGPU_VM_NORETRY_FLAGS_TF (AMDGPU_PTE_VALID | AMDGPU_PTE_SYSTEM | \
+  AMDGPU_PTE_PRT)
 /* For GFX9 */
 #define AMDGPU_PTE_MTYPE_VG10(a)   ((uint64_t)(a) << 57)
 #define AMDGPU_PTE_MTYPE_VG10_MASK AMDGPU_PTE_MTYPE_VG10(3ULL)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index dea1a64be44d..45b26cad59cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -778,6 +778,24 @@ int amdgpu_vm_pde_update(struct amdgpu_vm_update_params 
*params,
1, 0, flags);
 }
 
+/**
+ * amdgpu_vm_pte_update_noretry_flags - Update PTE no-retry flags
+ *
+ * @adev - amdgpu_device pointer
+ * @flags: pointer to PTE flags
+ *
+ * Update PTE no-retry flags when TF is enabled.
+ */
+static void amdgpu_vm_pte_update_noretry_flags(struct amdgpu_device *adev,
+   uint64_t *flags)
+{
+   /* Update no retry flags when TF is enabled */
+   if ((*flags & AMDGPU_VM_NORETRY_FLAGS) == AMDGPU_VM_NORETRY_FLAGS) {
+   *flags &= ~AMDGPU_VM_NORETRY_FLAGS;
+   *flags |= AMDGPU_VM_NORETRY_FLAGS_TF;
+   }
+}
+
 /*
  * amdgpu_vm_pte_update_flags - figure out flags for PTE updates
  *
@@ -804,6 +822,9 @@ static void amdgpu_vm_pte_update_flags(struct 
amdgpu_vm_update_params *params,
flags |= AMDGPU_PTE_EXECUTABLE;
}
 
+   if (adev->gmc.translate_further && level == AMDGPU_VM_PTB)
+   amdgpu_vm_pte_update_noretry_flags(adev, );
+
/* APUs mapping system memory may need different MTYPEs on different
 * NUMA nodes. Only do this for contiguous ranges that can be assumed
 * to be on the same NUMA node.
-- 
2.35.1



[PATCH] drm/amdkfd: Remove DUMMY_VRAM_SIZE

2023-06-12 Thread Mukul Joshi
Remove DUMMY_VRAM_SIZE as it is not needed and can result
in reporting incorrect memory size.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 3dcd8f8bc98e..49f40d9f16e8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -30,9 +30,6 @@
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
 
-/* Fixme: Fake 32GB for 1PNPS1 mode bringup */
-#define DUMMY_VRAM_SIZE 31138512896
-
 /* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
  * GPU processor ID are expressed with Bit[31]=1.
  * The base is set to 0x8000_ + 0x1000 to avoid collision with GPU IDs
@@ -1056,8 +1053,6 @@ static int kfd_parse_subtype_mem(struct 
crat_subtype_memory *mem,
 
props->heap_type = heap_type;
props->flags = flags;
-   if (size_in_bytes == 0)
-   size_in_bytes = DUMMY_VRAM_SIZE; /* Fixme: TBD 
*/
props->size_in_bytes = size_in_bytes;
props->width = width;
 
-- 
2.35.1



[PATCHv2] drm/amdgpu: Update invalid PTE flag setting

2023-06-12 Thread Mukul Joshi
Update the invalid PTE flag setting with TF enabled.
This is to ensure, in addition to transitioning the
retry fault to a no-retry fault, it also causes the
wavefront to enter the trap handler. With the current
setting, the fault only transitions to a no-retry fault.
Additionally, have 2 sets of invalid PTE settings, one for
TF enabled, the other for TF disabled. The setting with
TF disabled, doesn't work with TF enabled.

Signed-off-by: Mukul Joshi 
---
v1->v2:
- Update handling according to Christian's feedback.

 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h   |  7 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|  6 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c |  3 +++
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 11 +++
 5 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 6794edd1d2d2..e5c6b075fbbb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -152,6 +152,10 @@ struct amdgpu_gmc_funcs {
void (*override_vm_pte_flags)(struct amdgpu_device *dev,
  struct amdgpu_vm *vm,
  uint64_t addr, uint64_t *flags);
+   /* update no-retry flags */
+   void (*update_vm_pte_noretry_flags)(struct amdgpu_device *dev,
+   uint64_t *flags);
+
/* get the amount of memory used by the vbios for pre-OS console */
unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev);
 
@@ -343,6 +347,9 @@ struct amdgpu_gmc {
 #define amdgpu_gmc_override_vm_pte_flags(adev, vm, addr, pte_flags)\
(adev)->gmc.gmc_funcs->override_vm_pte_flags\
((adev), (vm), (addr), (pte_flags))
+#define amdgpu_gmc_update_vm_pte_noretry_flags(adev, pte_flags)
\
+   ((adev)->gmc.gmc_funcs->update_vm_pte_noretry_flags \
+   ((adev), (pte_flags)))
 #define amdgpu_gmc_get_vbios_fb_size(adev) 
(adev)->gmc.gmc_funcs->get_vbios_fb_size((adev))
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 1cb14ea18cd9..ff9db7e5c086 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2583,7 +2583,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
u32 pasid,
/* Intentionally setting invalid PTE flag
 * combination to force a no-retry-fault
 */
-   flags = AMDGPU_PTE_SNOOPED | AMDGPU_PTE_PRT;
+   flags = AMDGPU_VM_NORETRY_FLAGS;
value = 0;
} else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
/* Redirect the access to the dummy page */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 9c85d494f2a2..b81fcb962d8f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -84,7 +84,13 @@ struct amdgpu_mem_stats;
 /* PDE Block Fragment Size for VEGA10 */
 #define AMDGPU_PDE_BFS(a)  ((uint64_t)a << 59)
 
+/* Flag combination to set no-retry with TF disabled */
+#define AMDGPU_VM_NORETRY_FLAGS(AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE 
| \
+   AMDGPU_PTE_TF)
 
+/* Flag combination to set no-retry with TF enabled */
+#define AMDGPU_VM_NORETRY_FLAGS_TF (AMDGPU_PTE_VALID | AMDGPU_PTE_SYSTEM | \
+  AMDGPU_PTE_PRT)
 /* For GFX9 */
 #define AMDGPU_PTE_MTYPE_VG10(a)   ((uint64_t)(a) << 57)
 #define AMDGPU_PTE_MTYPE_VG10_MASK AMDGPU_PTE_MTYPE_VG10(3ULL)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index dea1a64be44d..39f1650f6d00 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -804,6 +804,9 @@ static void amdgpu_vm_pte_update_flags(struct 
amdgpu_vm_update_params *params,
flags |= AMDGPU_PTE_EXECUTABLE;
}
 
+   if (adev->gmc.translate_further && level == AMDGPU_VM_PTB)
+   amdgpu_gmc_update_vm_pte_noretry_flags(adev, );
+
/* APUs mapping system memory may need different MTYPEs on different
 * NUMA nodes. Only do this for contiguous ranges that can be assumed
 * to be on the same NUMA node.
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 3ed286b72cae..aea8e80c3419 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1307,6 +1307,16 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device 
*adev,
 mapping, flags);
 }
 
+static void gmc_v9_0_update_vm_

[PATCH] drm/amdkfd: Fix reserved SDMA queues handling

2023-06-07 Thread Mukul Joshi
This patch fixes a regression caused by a bad merge where
the handling of reserved SDMA queues was accidentally removed.
With the fix, the reserved SDMA queues are again correctly
marked as unavailable for allocation.

Fixes: c27842c84a848 ("drm/amdkfd: Update SDMA queue management for GFX9.4.3")
Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c | 13 ++---
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c   | 10 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h   |  2 +-
 3 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 9fc9d32cb579..9d4abfd8b55e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -106,20 +106,19 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev 
*kfd)
kfd->device_info.num_sdma_queues_per_engine = 8;
}
 
+   bitmap_zero(kfd->device_info.reserved_sdma_queues_bitmap, 
KFD_MAX_SDMA_QUEUES);
+
switch (sdma_version) {
case IP_VERSION(6, 0, 0):
+   case IP_VERSION(6, 0, 1):
case IP_VERSION(6, 0, 2):
case IP_VERSION(6, 0, 3):
/* Reserve 1 for paging and 1 for gfx */
kfd->device_info.num_reserved_sdma_queues_per_engine = 2;
/* BIT(0)=engine-0 queue-0; BIT(1)=engine-1 queue-0; 
BIT(2)=engine-0 queue-1; ... */
-   kfd->device_info.reserved_sdma_queues_bitmap = 0xFULL;
-   break;
-   case IP_VERSION(6, 0, 1):
-   /* Reserve 1 for paging and 1 for gfx */
-   kfd->device_info.num_reserved_sdma_queues_per_engine = 2;
-   /* BIT(0)=engine-0 queue-0; BIT(1)=engine-0 queue-1; ... */
-   kfd->device_info.reserved_sdma_queues_bitmap = 0x3ULL;
+   bitmap_set(kfd->device_info.reserved_sdma_queues_bitmap, 0,
+  kfd->adev->sdma.num_instances *
+  
kfd->device_info.num_reserved_sdma_queues_per_engine);
break;
default:
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 0c1be91a87c6..498ad7d4e7d9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -123,11 +123,6 @@ unsigned int get_num_xgmi_sdma_queues(struct 
device_queue_manager *dqm)
dqm->dev->kfd->device_info.num_sdma_queues_per_engine;
 }
 
-static inline uint64_t get_reserved_sdma_queues_bitmap(struct 
device_queue_manager *dqm)
-{
-   return dqm->dev->kfd->device_info.reserved_sdma_queues_bitmap;
-}
-
 static void init_sdma_bitmaps(struct device_queue_manager *dqm)
 {
bitmap_zero(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES);
@@ -135,6 +130,11 @@ static void init_sdma_bitmaps(struct device_queue_manager 
*dqm)
 
bitmap_zero(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES);
bitmap_set(dqm->xgmi_sdma_bitmap, 0, get_num_xgmi_sdma_queues(dqm));
+
+   /* Mask out the reserved queues */
+   bitmap_andnot(dqm->sdma_bitmap, dqm->sdma_bitmap,
+ dqm->dev->kfd->device_info.reserved_sdma_queues_bitmap,
+ KFD_MAX_SDMA_QUEUES);
 }
 
 void program_sh_mem_settings(struct device_queue_manager *dqm,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 023b17e0116b..7364a5d77c6e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -239,7 +239,7 @@ struct kfd_device_info {
uint32_t no_atomic_fw_version;
unsigned int num_sdma_queues_per_engine;
unsigned int num_reserved_sdma_queues_per_engine;
-   uint64_t reserved_sdma_queues_bitmap;
+   DECLARE_BITMAP(reserved_sdma_queues_bitmap, KFD_MAX_SDMA_QUEUES);
 };
 
 unsigned int kfd_get_num_sdma_engines(struct kfd_node *kdev);
-- 
2.35.1



[PATCH] drm/amdgpu: Raname DRM schedulers in amdgpu TTM

2023-06-07 Thread Mukul Joshi
Rename mman.entity to mman.high_pr to make the distinction
clearer that this is a high priority scheduler. Similarly,
rename the recently added mman.delayed to mman.low_pr to
make it clear it is a low priority scheduler.
No functional change in this patch.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  | 18 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h  |  8 
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c   |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |  2 +-
 4 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 8884c043cf76..8a4ed69a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -228,7 +228,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object 
*bo,
num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
 
-   r = amdgpu_job_alloc_with_ib(adev, >mman.entity,
+   r = amdgpu_job_alloc_with_ib(adev, >mman.high_pr,
 AMDGPU_FENCE_OWNER_UNDEFINED,
 num_dw * 4 + num_bytes,
 AMDGPU_IB_POOL_DELAYED, );
@@ -1460,7 +1460,7 @@ static int amdgpu_ttm_access_memory_sdma(struct 
ttm_buffer_object *bo,
memcpy(adev->mman.sdma_access_ptr, buf, len);
 
num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
-   r = amdgpu_job_alloc_with_ib(adev, >mman.entity,
+   r = amdgpu_job_alloc_with_ib(adev, >mman.high_pr,
 AMDGPU_FENCE_OWNER_UNDEFINED,
 num_dw * 4, AMDGPU_IB_POOL_DELAYED,
 );
@@ -2036,7 +2036,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct 
amdgpu_device *adev, bool enable)
 
ring = adev->mman.buffer_funcs_ring;
sched = >sched;
-   r = drm_sched_entity_init(>mman.entity,
+   r = drm_sched_entity_init(>mman.high_pr,
  DRM_SCHED_PRIORITY_KERNEL, ,
  1, NULL);
if (r) {
@@ -2045,7 +2045,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct 
amdgpu_device *adev, bool enable)
return;
}
 
-   r = drm_sched_entity_init(>mman.delayed,
+   r = drm_sched_entity_init(>mman.low_pr,
  DRM_SCHED_PRIORITY_NORMAL, ,
  1, NULL);
if (r) {
@@ -2054,8 +2054,8 @@ void amdgpu_ttm_set_buffer_funcs_status(struct 
amdgpu_device *adev, bool enable)
goto error_free_entity;
}
} else {
-   drm_sched_entity_destroy(>mman.entity);
-   drm_sched_entity_destroy(>mman.delayed);
+   drm_sched_entity_destroy(>mman.high_pr);
+   drm_sched_entity_destroy(>mman.low_pr);
dma_fence_put(man->move);
man->move = NULL;
}
@@ -2071,7 +2071,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct 
amdgpu_device *adev, bool enable)
return;
 
 error_free_entity:
-   drm_sched_entity_destroy(>mman.entity);
+   drm_sched_entity_destroy(>mman.high_pr);
 }
 
 static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
@@ -2086,8 +2086,8 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device 
*adev,
AMDGPU_IB_POOL_DIRECT :
AMDGPU_IB_POOL_DELAYED;
int r;
-   struct drm_sched_entity *entity = delayed ? >mman.delayed :
-   >mman.entity;
+   struct drm_sched_entity *entity = delayed ? >mman.low_pr :
+   >mman.high_pr;
r = amdgpu_job_alloc_with_ib(adev, entity,
 AMDGPU_FENCE_OWNER_UNDEFINED,
 num_dw * 4, pool, job);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index e82b1edee7a4..6d0d66e40db9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -59,10 +59,10 @@ struct amdgpu_mman {
boolbuffer_funcs_enabled;
 
struct mutexgtt_window_lock;
-   /* Scheduler entity for buffer moves */
-   struct drm_sched_entity entity;
-   /* Scheduler entity for VRAM clearing */
-   struct drm_sched_entity delayed;
+   /* High priority scheduler entity for buffer moves */
+   struct drm_sched_entity high_pr;
+   /* Low priority scheduler e

[PATCH] drm/amdkfd: Set event interrupt class for GFX 9.4.3

2023-05-23 Thread Mukul Joshi
Fix the warning during driver load because the event
interrupt class is not set for GFX9.4.3.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index e92b93b2c14c..862a50f7b490 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -139,6 +139,7 @@ static void 
kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
case IP_VERSION(9, 4, 0): /* VEGA20 */
case IP_VERSION(9, 4, 1): /* ARCTURUS */
case IP_VERSION(9, 4, 2): /* ALDEBARAN */
+   case IP_VERSION(9, 4, 3): /* GC 9.4.3 */
case IP_VERSION(10, 3, 1): /* VANGOGH */
case IP_VERSION(10, 3, 3): /* YELLOW_CARP */
case IP_VERSION(10, 3, 6): /* GC 10.3.6 */
-- 
2.35.1



[PATCH] drm/amdgpu: Add a low priority scheduler for VRAM clearing

2023-05-17 Thread Mukul Joshi
Add a low priority DRM scheduler for VRAM clearing instead of using
the exisiting high priority scheduler. Use the high priority scheduler
for migrations and evictions.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  4 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 37 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h|  5 ++-
 3 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 9924c8de57ab..46f249912b67 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -627,7 +627,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
bo->tbo.resource->mem_type == TTM_PL_VRAM) {
struct dma_fence *fence;
 
-   r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, );
+   r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, , true);
if (unlikely(r))
goto fail_unreserve;
 
@@ -1354,7 +1354,7 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object 
*bo)
if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
return;
 
-   r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, );
+   r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, , true);
if (!WARN_ON(r)) {
amdgpu_bo_fence(abo, fence, false);
dma_fence_put(fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 34724b771ace..bbdad0dc1b07 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -384,7 +384,8 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
struct dma_fence *wipe_fence = NULL;
 
-   r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, _fence);
+   r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, _fence,
+   false);
if (r) {
goto error;
} else if (wipe_fence) {
@@ -2040,8 +2041,18 @@ void amdgpu_ttm_set_buffer_funcs_status(struct 
amdgpu_device *adev, bool enable)
  r);
return;
}
+
+   r = drm_sched_entity_init(>mman.delayed,
+ DRM_SCHED_PRIORITY_NORMAL, ,
+ 1, NULL);
+   if (r) {
+   DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
+ r);
+   goto error_free_entity;
+   }
} else {
drm_sched_entity_destroy(>mman.entity);
+   drm_sched_entity_destroy(>mman.delayed);
dma_fence_put(man->move);
man->move = NULL;
}
@@ -2053,6 +2064,11 @@ void amdgpu_ttm_set_buffer_funcs_status(struct 
amdgpu_device *adev, bool enable)
size = adev->gmc.visible_vram_size;
man->size = size;
adev->mman.buffer_funcs_enabled = enable;
+
+   return;
+
+error_free_entity:
+   drm_sched_entity_destroy(>mman.entity);
 }
 
 static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
@@ -2060,14 +2076,16 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device 
*adev,
  unsigned int num_dw,
  struct dma_resv *resv,
  bool vm_needs_flush,
- struct amdgpu_job **job)
+ struct amdgpu_job **job,
+ bool delayed)
 {
enum amdgpu_ib_pool_type pool = direct_submit ?
AMDGPU_IB_POOL_DIRECT :
AMDGPU_IB_POOL_DELAYED;
int r;
-
-   r = amdgpu_job_alloc_with_ib(adev, >mman.entity,
+   struct drm_sched_entity *entity = delayed ? >mman.delayed :
+   >mman.entity;
+   r = amdgpu_job_alloc_with_ib(adev, entity,
 AMDGPU_FENCE_OWNER_UNDEFINED,
 num_dw * 4, pool, job);
if (r)
@@ -2108,7 +2126,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t 
src_offset,
num_loops = DIV_ROUND_UP(byte_count, max_bytes);
num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw,
-  resv, vm_needs_flush, );
+  resv, vm_needs_flush, , false);
if (r)
return r;
 
@@ -2144,7 +2162,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring

[PATCHv2 2/3] drm/amdgpu: Set GTT size equal to TTM mem limit

2023-04-26 Thread Mukul Joshi
Use the helper function in TTM to get TTM mem limit and
set GTT size to be equal to TTL mem limit.

Signed-off-by: Mukul Joshi 
Reviewed-by: Christian König 
---
v1->v2:
- Remove AMDGPU_DEFAULT_GTT_SIZE_MB as well as it is
  unused.

 drivers/gpu/drm/amd/amdgpu/amdgpu.h |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 25 ++---
 2 files changed, 6 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index d107d95e2e6f..5ded5a90dc68 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -245,7 +245,6 @@ extern int amdgpu_sg_display;
 
 #define AMDGPU_VM_MAX_NUM_CTX  4096
 #define AMDGPU_SG_THRESHOLD(256*1024*1024)
-#define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */
 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
 #define AMDGPU_MAX_USEC_TIMEOUT10  /* 100 ms */
 #define AMDGPU_FENCE_JIFFIES_TIMEOUT   (HZ / 2)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index ce34b73d05bc..ac220c779fc8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1807,26 +1807,13 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
 (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
 
-   /* Compute GTT size, either based on 1/2 the size of RAM size
-* or whatever the user passed on module init */
-   if (amdgpu_gtt_size == -1) {
-   struct sysinfo si;
-
-   si_meminfo();
-   /* Certain GL unit tests for large textures can cause problems
-* with the OOM killer since there is no way to link this memory
-* to a process.  This was originally mitigated (but not 
necessarily
-* eliminated) by limiting the GTT size.  The problem is this 
limit
-* is often too low for many modern games so just make the 
limit 1/2
-* of system memory which aligns with TTM. The OOM accounting 
needs
-* to be addressed, but we shouldn't prevent common 3D 
applications
-* from being usable just to potentially mitigate that corner 
case.
-*/
-   gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
-  (u64)si.totalram * si.mem_unit / 2);
-   } else {
+   /* Compute GTT size, either based on TTM limit
+* or whatever the user passed on module init.
+*/
+   if (amdgpu_gtt_size == -1)
+   gtt_size = ttm_tt_pages_limit() << PAGE_SHIFT;
+   else
gtt_size = (uint64_t)amdgpu_gtt_size << 20;
-   }
 
/* Initialize GTT memory pool */
r = amdgpu_gtt_mgr_init(adev, gtt_size);
-- 
2.35.1



[PATCH 3/3] drm/amdkfd: Update KFD TTM mem limit

2023-04-25 Thread Mukul Joshi
Use the helper function in TTM to get TTM memory
limit and set KFD's internal mem limit. This ensures
that KFD's TTM mem limit and actual TTM mem limit are
exactly same.

Signed-off-by: Mukul Joshi 
Reviewed-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c   | 3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 +-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index fed8bb9a721f..a46285841d17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -53,7 +53,6 @@ int amdgpu_amdkfd_init(void)
amdgpu_amdkfd_total_mem_size *= si.mem_unit;
 
ret = kgd2kfd_init();
-   amdgpu_amdkfd_gpuvm_init_mem_limits();
kfd_initialized = !ret;
 
return ret;
@@ -143,6 +142,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
int i;
int last_valid_bit;
 
+   amdgpu_amdkfd_gpuvm_init_mem_limits();
+
if (adev->kfd.dev) {
struct kgd2kfd_shared_resources gpu_resources = {
.compute_vmid_bitmap =
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 862e94fbf53c..1002c7834386 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -35,6 +35,7 @@
 #include 
 #include "amdgpu_xgmi.h"
 #include "kfd_smi_events.h"
+#include 
 
 /* Userptr restore delay, just long enough to allow consecutive VM
  * changes to accumulate
@@ -109,13 +110,16 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
struct sysinfo si;
uint64_t mem;
 
+   if (kfd_mem_limit.max_system_mem_limit)
+   return;
+
si_meminfo();
mem = si.freeram - si.freehigh;
mem *= si.mem_unit;
 
spin_lock_init(_mem_limit.mem_limit_lock);
kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
-   kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
+   kfd_mem_limit.max_ttm_mem_limit = ttm_tt_pages_limit() << PAGE_SHIFT;
pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
(kfd_mem_limit.max_system_mem_limit >> 20),
(kfd_mem_limit.max_ttm_mem_limit >> 20));
-- 
2.35.1



[PATCH 2/3] drm/amdgpu: Set GTT size equal to TTM mem limit

2023-04-25 Thread Mukul Joshi
Use the helper function in TTM to get TTM mem limit and
set GTT size to be equal to TTL mem limit.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 25 ++---
 1 file changed, 6 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index ce34b73d05bc..ac220c779fc8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1807,26 +1807,13 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
 (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
 
-   /* Compute GTT size, either based on 1/2 the size of RAM size
-* or whatever the user passed on module init */
-   if (amdgpu_gtt_size == -1) {
-   struct sysinfo si;
-
-   si_meminfo();
-   /* Certain GL unit tests for large textures can cause problems
-* with the OOM killer since there is no way to link this memory
-* to a process.  This was originally mitigated (but not 
necessarily
-* eliminated) by limiting the GTT size.  The problem is this 
limit
-* is often too low for many modern games so just make the 
limit 1/2
-* of system memory which aligns with TTM. The OOM accounting 
needs
-* to be addressed, but we shouldn't prevent common 3D 
applications
-* from being usable just to potentially mitigate that corner 
case.
-*/
-   gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
-  (u64)si.totalram * si.mem_unit / 2);
-   } else {
+   /* Compute GTT size, either based on TTM limit
+* or whatever the user passed on module init.
+*/
+   if (amdgpu_gtt_size == -1)
+   gtt_size = ttm_tt_pages_limit() << PAGE_SHIFT;
+   else
gtt_size = (uint64_t)amdgpu_gtt_size << 20;
-   }
 
/* Initialize GTT memory pool */
r = amdgpu_gtt_mgr_init(adev, gtt_size);
-- 
2.35.1



[PATCH 1/3] drm/ttm: Helper function to get TTM mem limit

2023-04-25 Thread Mukul Joshi
Add a helper function to get TTM memory limit. This is
needed by KFD to set its own internal memory limits.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/ttm/ttm_tt.c | 6 ++
 include/drm/ttm/ttm_tt.h | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index d505603930a7..1f765dd7792c 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -449,3 +449,9 @@ ttm_kmap_iter_tt_init(struct ttm_kmap_iter_tt *iter_tt,
return _tt->base;
 }
 EXPORT_SYMBOL(ttm_kmap_iter_tt_init);
+
+unsigned long ttm_tt_pages_limit(void)
+{
+   return ttm_pages_limit;
+}
+EXPORT_SYMBOL(ttm_tt_pages_limit);
diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
index b7d3f3843f1e..d54b2dc05d71 100644
--- a/include/drm/ttm/ttm_tt.h
+++ b/include/drm/ttm/ttm_tt.h
@@ -222,7 +222,7 @@ void ttm_tt_mgr_init(unsigned long num_pages, unsigned long 
num_dma32_pages);
 
 struct ttm_kmap_iter *ttm_kmap_iter_tt_init(struct ttm_kmap_iter_tt *iter_tt,
struct ttm_tt *tt);
-
+unsigned long ttm_tt_pages_limit(void);
 #if IS_ENABLED(CONFIG_AGP)
 #include 
 
-- 
2.35.1



[PATCH] drm/amdgpu: Update invalid PTE flag setting

2023-04-04 Thread Mukul Joshi
Update the invalid PTE flag setting to ensure, in addition
to transitioning the retry fault to a no-retry fault, it
also causes the wavefront to enter the trap handler. With the
current setting, it only transitions to a no-retry fault.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index af6f26a97fc5..5df4f7bb241f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2488,7 +2488,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
u32 pasid,
/* Intentionally setting invalid PTE flag
 * combination to force a no-retry-fault
 */
-   flags = AMDGPU_PTE_SNOOPED | AMDGPU_PTE_PRT;
+   flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SYSTEM | AMDGPU_PTE_PRT;
value = 0;
} else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
/* Redirect the access to the dummy page */
-- 
2.35.1



[PATCHv2] drm/amdgpu: Enable IH retry CAM on GFX9

2023-01-19 Thread Mukul Joshi
This patch enables the IH retry CAM on GFX9 series cards. This
retry filter is used to prevent sending lots of retry interrupts
in a short span of time and overflowing the IH ring buffer. This
will also help reduce CPU interrupt workload.

Signed-off-by: Mukul Joshi 
---
v1:
- Reviewed by Felix.

v1->v2:
- Update the CAM enable register offset for Aldebaran.
- Add new register defines for Aldebaran in vega20_ih.c.
- Drain IH0 ring also along with soft ring.
- Setup CAM doorbell register before enabling CAM.

 drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h   |  2 +
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 51 +++--
 drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/vega20_ih.c| 55 +--
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c  | 10 +++-
 .../asic_reg/oss/osssys_4_2_0_offset.h|  6 ++
 .../asic_reg/oss/osssys_4_2_0_sh_mask.h   | 11 
 7 files changed, 88 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
index e9f2c11ea416..be243adf3e65 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
@@ -98,6 +98,8 @@ struct amdgpu_irq {
struct irq_domain   *domain; /* GPU irq controller domain */
unsignedvirq[AMDGPU_MAX_IRQ_SRC_ID];
uint32_tsrbm_soft_reset;
+   u32 retry_cam_doorbell_index;
+   boolretry_cam_enabled;
 };
 
 void amdgpu_irq_disable_all(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index d65c6cea3445..4847117d67a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -553,32 +553,49 @@ static int gmc_v9_0_process_interrupt(struct 
amdgpu_device *adev,
const char *mmhub_cid;
const char *hub_name;
u64 addr;
+   uint32_t cam_index = 0;
+   int ret;
 
addr = (u64)entry->src_data[0] << 12;
addr |= ((u64)entry->src_data[1] & 0xf) << 44;
 
if (retry_fault) {
-   /* Returning 1 here also prevents sending the IV to the KFD */
+   if (adev->irq.retry_cam_enabled) {
+   /* Delegate it to a different ring if the hardware 
hasn't
+* already done it.
+*/
+   if (entry->ih == >irq.ih) {
+   amdgpu_irq_delegate(adev, entry, 8);
+   return 1;
+   }
+
+   cam_index = entry->src_data[2] & 0x3ff;
 
-   /* Process it onyl if it's the first fault for this address */
-   if (entry->ih != >irq.ih_soft &&
-   amdgpu_gmc_filter_faults(adev, entry->ih, addr, 
entry->pasid,
+   ret = amdgpu_vm_handle_fault(adev, entry->pasid, addr, 
write_fault);
+   WDOORBELL32(adev->irq.retry_cam_doorbell_index, 
cam_index);
+   if (ret)
+   return 1;
+   } else {
+   /* Process it onyl if it's the first fault for this 
address */
+   if (entry->ih != >irq.ih_soft &&
+   amdgpu_gmc_filter_faults(adev, entry->ih, addr, 
entry->pasid,
 entry->timestamp))
-   return 1;
+   return 1;
 
-   /* Delegate it to a different ring if the hardware hasn't
-* already done it.
-*/
-   if (entry->ih == >irq.ih) {
-   amdgpu_irq_delegate(adev, entry, 8);
-   return 1;
-   }
+   /* Delegate it to a different ring if the hardware 
hasn't
+* already done it.
+*/
+   if (entry->ih == >irq.ih) {
+   amdgpu_irq_delegate(adev, entry, 8);
+   return 1;
+   }
 
-   /* Try to handle the recoverable page faults by filling page
-* tables
-*/
-   if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, 
write_fault))
-   return 1;
+   /* Try to handle the recoverable page faults by filling 
page
+* tables
+*/
+   if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, 
write_fault))
+   return 1;
+   }
}
 
if (!printk_ratelimit())
diff --git a/drivers/gpu/drm/amd/amdgpu/nbi

[PATCH] drm/amdkfd: Fix kernel warning during topology setup

2022-12-20 Thread Mukul Joshi
This patch fixes the following kernel warning seen during
driver load by correctly initializing the p2plink attr before
creating the sysfs file:

[  +0.002865] [ cut here ]
[  +0.002327] kobject: '(null)' (56260cfb): is not initialized, yet 
kobject_put() is being called.
[  +0.004780] WARNING: CPU: 32 PID: 1006 at lib/kobject.c:718 
kobject_put+0xaa/0x1c0
[  +0.001361] Call Trace:
[  +0.001234]  
[  +0.001067]  kfd_remove_sysfs_node_entry+0x24a/0x2d0 [amdgpu]
[  +0.003147]  kfd_topology_update_sysfs+0x3d/0x750 [amdgpu]
[  +0.002890]  kfd_topology_add_device+0xbd7/0xc70 [amdgpu]
[  +0.002844]  ? lock_release+0x13c/0x2e0
[  +0.001936]  ? smu_cmn_send_smc_msg_with_param+0x1e8/0x2d0 [amdgpu]
[  +0.003313]  ? amdgpu_dpm_get_mclk+0x54/0x60 [amdgpu]
[  +0.002703]  kgd2kfd_device_init.cold+0x39f/0x4ed [amdgpu]
[  +0.002930]  amdgpu_amdkfd_device_init+0x13d/0x1f0 [amdgpu]
[  +0.002944]  amdgpu_device_init.cold+0x1464/0x17b4 [amdgpu]
[  +0.002970]  ? pci_bus_read_config_word+0x43/0x80
[  +0.002380]  amdgpu_driver_load_kms+0x15/0x100 [amdgpu]
[  +0.002744]  amdgpu_pci_probe+0x147/0x370 [amdgpu]
[  +0.002522]  local_pci_probe+0x40/0x80
[  +0.001896]  work_for_cpu_fn+0x10/0x20
[  +0.001892]  process_one_work+0x26e/0x5a0
[  +0.002029]  worker_thread+0x1fd/0x3e0
[  +0.001890]  ? process_one_work+0x5a0/0x5a0
[  +0.002115]  kthread+0xea/0x110
[  +0.001618]  ? kthread_complete_and_exit+0x20/0x20
[  +0.002422]  ret_from_fork+0x1f/0x30
[  +0.001808]  
[  +0.001103] irq event stamp: 59837
[  +0.001718] hardirqs last  enabled at (59849): [] 
__up_console_sem+0x52/0x60
[  +0.004414] hardirqs last disabled at (59860): [] 
__up_console_sem+0x37/0x60
[  +0.004414] softirqs last  enabled at (59654): [] 
irq_exit_rcu+0xd7/0x130
[  +0.004205] softirqs last disabled at (59649): [] 
irq_exit_rcu+0xd7/0x130
[  +0.004203] ---[ end trace  ]---

Fixes: 0f28cca87e9a ("drm/amdkfd: Extend KFD device topology to surface 
peer-to-peer links")
Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index bceb1a5b2518..3fdaba56be6f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -801,7 +801,7 @@ static int kfd_build_sysfs_node_entry(struct 
kfd_topology_device *dev,
 
p2plink->attr.name = "properties";
p2plink->attr.mode = KFD_SYSFS_FILE_MODE;
-   sysfs_attr_init(>attr);
+   sysfs_attr_init(>attr);
ret = sysfs_create_file(p2plink->kobj, >attr);
if (ret < 0)
return ret;
-- 
2.35.1



[PATCH 2/2] drm/amdgpu: Rework retry fault removal

2022-12-12 Thread Mukul Joshi
Rework retry fault removal from the software filter by
storing an expired timestamp for a fault that is being removed.
When a new fault comes, and it matches an entry in the sw filter,
it will be added as a new fault only when its timestamp is greater
than the timestamp expiry of the fault in the sw filter.
This helps in avoiding stale faults being added back into the
filter and preventing legitimate faults from being handled.

Suggested-by: Felix Kuehling 
Signed-off-by: Mukul Joshi 
Reviewed-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 36 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  1 +
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 02a4c93673ce..fb811bc7214c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -387,8 +387,21 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev,
while (fault->timestamp >= stamp) {
uint64_t tmp;
 
-   if (atomic64_read(>key) == key)
-   return true;
+   if (atomic64_read(>key) == key) {
+   /*
+* if we get a fault which is already present in
+* the fault_ring and the timestamp of
+* the fault is after the expired timestamp,
+* then this is a new fault that needs to be added
+* into the fault ring.
+*/
+   if (fault->timestamp_expiry != 0 &&
+   amdgpu_ih_ts_after(fault->timestamp_expiry,
+  timestamp))
+   break;
+   else
+   return true;
+   }
 
tmp = fault->timestamp;
fault = >fault_ring[fault->next];
@@ -424,15 +437,32 @@ void amdgpu_gmc_filter_faults_remove(struct amdgpu_device 
*adev, uint64_t addr,
 {
struct amdgpu_gmc *gmc = >gmc;
uint64_t key = amdgpu_gmc_fault_key(addr, pasid);
+   struct amdgpu_ih_ring *ih;
struct amdgpu_gmc_fault *fault;
+   uint32_t last_wptr;
+   uint64_t last_ts;
uint32_t hash;
uint64_t tmp;
 
+   ih = adev->irq.retry_cam_enabled ? >irq.ih_soft : >irq.ih1;
+   /* Get the WPTR of the last entry in IH ring */
+   last_wptr = amdgpu_ih_get_wptr(adev, ih);
+   /* Order wptr with ring data. */
+   rmb();
+   /* Get the timetamp of the last entry in IH ring */
+   last_ts = amdgpu_ih_decode_iv_ts(adev, ih, last_wptr, -1);
+
hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER);
fault = >fault_ring[gmc->fault_hash[hash].idx];
do {
-   if (atomic64_cmpxchg(>key, key, 0) == key)
+   if (atomic64_read(>key) == key) {
+   /*
+* Update the timestamp when this fault
+* expired.
+*/
+   fault->timestamp_expiry = last_ts;
break;
+   }
 
tmp = fault->timestamp;
fault = >fault_ring[fault->next];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 0305b660cd17..9441e530f1dc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -70,6 +70,7 @@ struct amdgpu_gmc_fault {
uint64_ttimestamp:48;
uint64_tnext:AMDGPU_GMC_FAULT_RING_ORDER;
atomic64_t  key;
+   uint64_ttimestamp_expiry:48;
 };
 
 /*
-- 
2.35.1



[PATCH 1/2] drm/amdgpu: Enable IH retry CAM on GFX9

2022-12-12 Thread Mukul Joshi
This patch enables the IH retry CAM on GFX9 series cards. This
retry filter is used to prevent sending lots of retry interrupts
in a short span of time and overflowing the IH ring buffer. This
will also help reduce CPU interrupt workload.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h   |  2 +
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 51 ---
 drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/vega20_ih.c| 46 ++---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c  |  4 +-
 .../asic_reg/oss/osssys_4_2_0_offset.h|  6 +++
 .../asic_reg/oss/osssys_4_2_0_sh_mask.h   | 13 +
 7 files changed, 75 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
index e9f2c11ea416..be243adf3e65 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
@@ -98,6 +98,8 @@ struct amdgpu_irq {
struct irq_domain   *domain; /* GPU irq controller domain */
unsignedvirq[AMDGPU_MAX_IRQ_SRC_ID];
uint32_tsrbm_soft_reset;
+   u32 retry_cam_doorbell_index;
+   boolretry_cam_enabled;
 };
 
 void amdgpu_irq_disable_all(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 08d6cf79fb15..e5ab84f3e072 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -537,32 +537,49 @@ static int gmc_v9_0_process_interrupt(struct 
amdgpu_device *adev,
const char *mmhub_cid;
const char *hub_name;
u64 addr;
+   uint32_t cam_index = 0;
+   int ret;
 
addr = (u64)entry->src_data[0] << 12;
addr |= ((u64)entry->src_data[1] & 0xf) << 44;
 
if (retry_fault) {
-   /* Returning 1 here also prevents sending the IV to the KFD */
+   if (adev->irq.retry_cam_enabled) {
+   /* Delegate it to a different ring if the hardware 
hasn't
+* already done it.
+*/
+   if (entry->ih == >irq.ih) {
+   amdgpu_irq_delegate(adev, entry, 8);
+   return 1;
+   }
+
+   cam_index = entry->src_data[2] & 0x3ff;
 
-   /* Process it onyl if it's the first fault for this address */
-   if (entry->ih != >irq.ih_soft &&
-   amdgpu_gmc_filter_faults(adev, entry->ih, addr, 
entry->pasid,
+   ret = amdgpu_vm_handle_fault(adev, entry->pasid, addr, 
write_fault);
+   WDOORBELL32(adev->irq.retry_cam_doorbell_index, 
cam_index);
+   if (ret)
+   return 1;
+   } else {
+   /* Process it onyl if it's the first fault for this 
address */
+   if (entry->ih != >irq.ih_soft &&
+   amdgpu_gmc_filter_faults(adev, entry->ih, addr, 
entry->pasid,
 entry->timestamp))
-   return 1;
+   return 1;
 
-   /* Delegate it to a different ring if the hardware hasn't
-* already done it.
-*/
-   if (entry->ih == >irq.ih) {
-   amdgpu_irq_delegate(adev, entry, 8);
-   return 1;
-   }
+   /* Delegate it to a different ring if the hardware 
hasn't
+* already done it.
+*/
+   if (entry->ih == >irq.ih) {
+   amdgpu_irq_delegate(adev, entry, 8);
+   return 1;
+   }
 
-   /* Try to handle the recoverable page faults by filling page
-* tables
-*/
-   if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, 
write_fault))
-   return 1;
+   /* Try to handle the recoverable page faults by filling 
page
+* tables
+*/
+   if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, 
write_fault))
+   return 1;
+   }
}
 
if (!printk_ratelimit())
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index 19455a725939..685abf57ffdd 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -238,7 +238,7 @@ static void nbio_v7_4_ih_doorbell_range(str

[PATCH] drm/amdgpu: Update PTE flags with TF enabled

2022-09-13 Thread Mukul Joshi
This patch updates the PTE flags when translate further (TF) is
enabled:
- With translate_further enabled, invalid PTEs can be 0. Reading
  consecutive invalid PTEs as 0 is considered a fault. To prevent
  this, ensure invalid PTEs have at least 1 bit set.
- The current invalid PTE flags settings to translate a retry fault
  into a no-retry fault, doesn't work with TF enabled. As a result,
  update invalid PTE flags settings which works for both TF enabled
  and disabled case.

Fixes: 2abf2573b1c69 ("drm/amdgpu: Enable translate_further to extend UTCL2 
reach")
Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 +--
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 7 +--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 59cac347baa3..690fd4f639f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2484,8 +2484,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
u32 pasid,
/* Intentionally setting invalid PTE flag
 * combination to force a no-retry-fault
 */
-   flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE |
-   AMDGPU_PTE_TF;
+   flags = AMDGPU_PTE_SNOOPED | AMDGPU_PTE_PRT;
value = 0;
} else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
/* Redirect the access to the dummy page */
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 4603653916f5..67ca16a8027c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1103,10 +1103,13 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device 
*adev, int level,
*flags |= AMDGPU_PDE_BFS(0x9);
 
} else if (level == AMDGPU_VM_PDB0) {
-   if (*flags & AMDGPU_PDE_PTE)
+   if (*flags & AMDGPU_PDE_PTE) {
*flags &= ~AMDGPU_PDE_PTE;
-   else
+   if (!(*flags & AMDGPU_PTE_VALID))
+   *addr |= 1 << PAGE_SHIFT;
+   } else {
*flags |= AMDGPU_PTE_TF;
+   }
}
 }
 
-- 
2.35.1



[PATCH] drm/amdgpu: Fix page table setup on Arcturus

2022-08-22 Thread Mukul Joshi
When translate_further is enabled, page table depth needs to
be updated. This was missing on Arcturus MMHUB init. This was
causing address translations to fail for SDMA user-mode queues.

Fixes: 2abf2573b1c69 ("drm/amdgpu: Enable translate_further to extend UTCL2 
reach"
Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c 
b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
index 6e0145b2b408..445cb06b9d26 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
@@ -295,9 +295,17 @@ static void mmhub_v9_4_disable_identity_aperture(struct 
amdgpu_device *adev,
 static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid)
 {
struct amdgpu_vmhub *hub = >vmhub[AMDGPU_MMHUB_0];
+   unsigned int num_level, block_size;
uint32_t tmp;
int i;
 
+   num_level = adev->vm_manager.num_level;
+   block_size = adev->vm_manager.block_size;
+   if (adev->gmc.translate_further)
+   num_level -= 1;
+   else
+   block_size -= 9;
+
for (i = 0; i <= 14; i++) {
tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL,
hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i);
@@ -305,7 +313,7 @@ static void mmhub_v9_4_setup_vmid_config(struct 
amdgpu_device *adev, int hubid)
ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
PAGE_TABLE_DEPTH,
-   adev->vm_manager.num_level);
+   num_level);
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
@@ -323,7 +331,7 @@ static void mmhub_v9_4_setup_vmid_config(struct 
amdgpu_device *adev, int hubid)
EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
PAGE_TABLE_BLOCK_SIZE,
-   adev->vm_manager.block_size - 9);
+   block_size);
/* Send no-retry XNACK on fault to suppress VM fault storm. */
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
-- 
2.35.1



[PATCHv2] drm/amdgpu: Fix interrupt handling on ih_soft ring

2022-08-15 Thread Mukul Joshi
There are no backing hardware registers for ih_soft ring.
As a result, don't try to access hardware registers for read
and write pointers when processing interrupts on the IH soft
ring.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 7 ++-
 drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 7 ++-
 drivers/gpu/drm/amd/amdgpu/vega20_ih.c | 7 ++-
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c 
b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
index 4b5396d3e60f..eec13cb5bf75 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -409,9 +409,11 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev,
u32 wptr, tmp;
struct amdgpu_ih_regs *ih_regs;
 
-   if (ih == >irq.ih) {
+   if (ih == >irq.ih || ih == >irq.ih_soft) {
/* Only ring0 supports writeback. On other rings fall back
 * to register-based code with overflow checking below.
+* ih_soft ring doesn't have any backing hardware registers,
+* update wptr and return.
 */
wptr = le32_to_cpu(*ih->wptr_cpu);
 
@@ -483,6 +485,9 @@ static void navi10_ih_set_rptr(struct amdgpu_device *adev,
 {
struct amdgpu_ih_regs *ih_regs;
 
+   if (ih == >irq.ih_soft)
+   return;
+
if (ih->use_doorbell) {
/* XXX check if swapping is necessary on BE */
*ih->rptr_cpu = ih->rptr;
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c 
b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index cdd599a08125..03b7066471f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -334,9 +334,11 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev,
u32 wptr, tmp;
struct amdgpu_ih_regs *ih_regs;
 
-   if (ih == >irq.ih) {
+   if (ih == >irq.ih || ih == >irq.ih_soft) {
/* Only ring0 supports writeback. On other rings fall back
 * to register-based code with overflow checking below.
+* ih_soft ring doesn't have any backing hardware registers,
+* update wptr and return.
 */
wptr = le32_to_cpu(*ih->wptr_cpu);
 
@@ -409,6 +411,9 @@ static void vega10_ih_set_rptr(struct amdgpu_device *adev,
 {
struct amdgpu_ih_regs *ih_regs;
 
+   if (ih == >irq.ih_soft)
+   return;
+
if (ih->use_doorbell) {
/* XXX check if swapping is necessary on BE */
*ih->rptr_cpu = ih->rptr;
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c 
b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
index 3b4eb8285943..2022ffbb8dba 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
@@ -385,9 +385,11 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev,
u32 wptr, tmp;
struct amdgpu_ih_regs *ih_regs;
 
-   if (ih == >irq.ih) {
+   if (ih == >irq.ih || ih == >irq.ih_soft) {
/* Only ring0 supports writeback. On other rings fall back
 * to register-based code with overflow checking below.
+* ih_soft ring doesn't have any backing hardware registers,
+* update wptr and return.
 */
wptr = le32_to_cpu(*ih->wptr_cpu);
 
@@ -461,6 +463,9 @@ static void vega20_ih_set_rptr(struct amdgpu_device *adev,
 {
struct amdgpu_ih_regs *ih_regs;
 
+   if (ih == >irq.ih_soft)
+   return;
+
if (ih->use_doorbell) {
/* XXX check if swapping is necessary on BE */
*ih->rptr_cpu = ih->rptr;
-- 
2.35.1



[PATCH] drm/amdgpu: Fix interrupt handling on ih_soft ring

2022-08-12 Thread Mukul Joshi
There are no backing hardware registers for ih_soft ring.
As a result, don't try to access hardware registers for read
and write pointers when processing interrupts on the IH soft
ring.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/vega20_ih.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c 
b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
index 3b4eb8285943..2022ffbb8dba 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
@@ -385,9 +385,11 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev,
u32 wptr, tmp;
struct amdgpu_ih_regs *ih_regs;
 
-   if (ih == >irq.ih) {
+   if (ih == >irq.ih || ih == >irq.ih_soft) {
/* Only ring0 supports writeback. On other rings fall back
 * to register-based code with overflow checking below.
+* ih_soft ring doesn't have any backing hardware registers,
+* update wptr and return.
 */
wptr = le32_to_cpu(*ih->wptr_cpu);
 
@@ -461,6 +463,9 @@ static void vega20_ih_set_rptr(struct amdgpu_device *adev,
 {
struct amdgpu_ih_regs *ih_regs;
 
+   if (ih == >irq.ih_soft)
+   return;
+
if (ih->use_doorbell) {
/* XXX check if swapping is necessary on BE */
*ih->rptr_cpu = ih->rptr;
-- 
2.35.1



[PATCH 2/2] drm/amdkfd: Fix circular lock dependency warning

2022-04-22 Thread Mukul Joshi
   lock(>mmap_lock#2);
[  169.092922]lock(>i_mutex_dir_key#6);
[  169.100975]lock(>mmap_lock#2);
[  169.108320]   lock(_lock);
[  169.112957]
 *** DEADLOCK ***

This commit fixes the deadlock warning by ensuring pm.mutex is not
held while holding the topology lock. For this, kfd_local_mem_info
is moved into the KFD dev struct and filled during device init.
This cached value can then be used instead of querying the value
again and again.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 7 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 3 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_device.c   | 2 ++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 ++
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 7 ++-
 5 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 459f59e3d0ed..95fa7a9718bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -944,8 +944,6 @@ static int kfd_ioctl_acquire_vm(struct file *filep, struct 
kfd_process *p,
 
 bool kfd_dev_is_large_bar(struct kfd_dev *dev)
 {
-   struct kfd_local_mem_info mem_info;
-
if (debug_largebar) {
pr_debug("Simulate large-bar allocation on non large-bar 
machine\n");
return true;
@@ -954,9 +952,8 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev)
if (dev->use_iommu_v2)
return false;
 
-   amdgpu_amdkfd_get_local_mem_info(dev->adev, _info);
-   if (mem_info.local_mem_size_private == 0 &&
-   mem_info.local_mem_size_public > 0)
+   if (dev->local_mem_info.local_mem_size_private == 0 &&
+   dev->local_mem_info.local_mem_size_public > 0)
return true;
return false;
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index afc8a7fcdad8..af1c4e054a23 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -2152,7 +2152,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
 * report the total FB size (public+private) as a single
 * private heap.
 */
-   amdgpu_amdkfd_get_local_mem_info(kdev->adev, _mem_info);
+   memcpy(_mem_info, >local_mem_info,
+   sizeof(struct kfd_local_mem_info));
sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
sub_type_hdr->length);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 62aa6c9d5123..c96d521447fc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -575,6 +575,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
if (kfd_resume(kfd))
goto kfd_resume_error;
 
+   amdgpu_amdkfd_get_local_mem_info(kfd->adev, >local_mem_info);
+
if (kfd_topology_add_device(kfd)) {
dev_err(kfd_device, "Error adding device to topology\n");
goto kfd_topology_add_device_error;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 33e7ffd8e3b5..49430c714544 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -344,6 +344,8 @@ struct kfd_dev {
 
/* HMM page migration MEMORY_DEVICE_PRIVATE mapping */
struct dev_pagemap pgmap;
+
+   struct kfd_local_mem_info local_mem_info;
 };
 
 enum kfd_mempool {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 4283afd60fa5..05089f1de4e9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1112,15 +1112,12 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
uint32_t buf[7];
uint64_t local_mem_size;
int i;
-   struct kfd_local_mem_info local_mem_info;
 
if (!gpu)
return 0;
 
-   amdgpu_amdkfd_get_local_mem_info(gpu->adev, _mem_info);
-
-   local_mem_size = local_mem_info.local_mem_size_private +
-   local_mem_info.local_mem_size_public;
+   local_mem_size = gpu->local_mem_info.local_mem_size_private +
+   gpu->local_mem_info.local_mem_size_public;
 
buf[0] = gpu->pdev->devfn;
buf[1] = gpu->pdev->subsystem_vendor |
-- 
2.35.1



[PATCH 1/2] drm/amdkfd: Fix updating IO links during device removal

2022-04-22 Thread Mukul Joshi
The logic to update the IO links when a KFD device
is removed was not correct as it would miss updating
the proximity domain values for some nodes where the
node_from and node_to both were greater values than the
proximity domain value of the KFD device being removed
from topology.

Fixes: 9be62cbcc62f ("drm/amdkfd: Cleanup IO links during KFD device removal")
Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 8b7710b4d3ed..4283afd60fa5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1534,13 +1534,13 @@ static void kfd_topology_update_io_links(int 
proximity_domain)
list_del(>list);
dev->io_link_count--;
dev->node_props.io_links_count--;
-   } else if (iolink->node_from > proximity_domain) {
-   iolink->node_from--;
-   } else if (iolink->node_to > proximity_domain) {
-   iolink->node_to--;
+   } else {
+   if (iolink->node_from > proximity_domain)
+   iolink->node_from--;
+   if (iolink->node_to > proximity_domain)
+   iolink->node_to--;
}
}
-
}
 }
 
-- 
2.35.1



[PATCHv2] drm/amdkfd: Cleanup IO links during KFD device removal

2022-04-11 Thread Mukul Joshi
Currently, the IO-links to the device being removed from topology,
are not cleared. As a result, there would be dangling links left in
the KFD topology. This patch aims to fix the following:
1. Cleanup all IO links to the device being removed.
2. Ensure that node numbering in sysfs and nodes proximity domain
   values are consistent after the device is removed:
   a. Adding a device and removing a GPU device are made mutually
  exclusive.
   b. The global proximity domain counter is no longer required to be
  an atomic counter. A normal 32-bit counter can be used instead.
3. Update generation_count to let user-mode know that topology has
   changed due to device removal.

CC: Shuotao Xu 
Signed-off-by: Mukul Joshi 
Reviewed-by: Shuotao Xu 
---
v1->v2:
- Remove comments from inside kfd_topology_update_io_links()
  and add them as kernel-doc comments.

 drivers/gpu/drm/amd/amdkfd/kfd_crat.c |  4 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  2 +
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 83 ---
 3 files changed, 78 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 1eaabd2cb41b..afc8a7fcdad8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1056,7 +1056,7 @@ static int kfd_parse_subtype_iolink(struct 
crat_subtype_iolink *iolink,
 * table, add corresponded reversed direction link now.
 */
if (props && (iolink->flags & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL)) {
-   to_dev = kfd_topology_device_by_proximity_domain(id_to);
+   to_dev = kfd_topology_device_by_proximity_domain_no_lock(id_to);
if (!to_dev)
return -ENODEV;
/* same everything but the other direction */
@@ -2225,7 +2225,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
 */
if (kdev->hive_id) {
for (nid = 0; nid < proximity_domain; ++nid) {
-   peer_dev = kfd_topology_device_by_proximity_domain(nid);
+   peer_dev = 
kfd_topology_device_by_proximity_domain_no_lock(nid);
if (!peer_dev->gpu)
continue;
if (peer_dev->gpu->hive_id != kdev->hive_id)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index e1b7e6afa920..8a43def1f638 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1016,6 +1016,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu);
 int kfd_topology_remove_device(struct kfd_dev *gpu);
 struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
uint32_t proximity_domain);
+struct kfd_topology_device *kfd_topology_device_by_proximity_domain_no_lock(
+   uint32_t proximity_domain);
 struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id);
 struct kfd_dev *kfd_device_by_id(uint32_t gpu_id);
 struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 3bdcae239bc0..98a51847cd8c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -46,22 +46,32 @@ static struct list_head topology_device_list;
 static struct kfd_system_properties sys_props;
 
 static DECLARE_RWSEM(topology_lock);
-static atomic_t topology_crat_proximity_domain;
+static uint32_t topology_crat_proximity_domain;
 
-struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
+struct kfd_topology_device *kfd_topology_device_by_proximity_domain_no_lock(
uint32_t proximity_domain)
 {
struct kfd_topology_device *top_dev;
struct kfd_topology_device *device = NULL;
 
-   down_read(_lock);
-
list_for_each_entry(top_dev, _device_list, list)
if (top_dev->proximity_domain == proximity_domain) {
device = top_dev;
break;
}
 
+   return device;
+}
+
+struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
+   uint32_t proximity_domain)
+{
+   struct kfd_topology_device *device = NULL;
+
+   down_read(_lock);
+
+   device = kfd_topology_device_by_proximity_domain_no_lock(
+   proximity_domain);
up_read(_lock);
 
return device;
@@ -1060,7 +1070,7 @@ int kfd_topology_init(void)
down_write(_lock);
kfd_topology_update_device_list(_topology_device_list,
_device_list);
-   atomic_set(_crat_pr

[PATCH] drm/amdkfd: Cleanup IO links during KFD device removal

2022-04-07 Thread Mukul Joshi
Currently, the IO-links to the device being removed from topology,
are not cleared. As a result, there would be dangling links left in
the KFD topology. This patch aims to fix the following:
1. Cleanup all IO links to the device being removed.
2. Ensure that node numbering in sysfs and nodes proximity domain
   values are consistent after the device is removed:
   a. Adding a device and removing a GPU device are made mutually
  exclusive.
   b. The global proximity domain counter is no longer required to be
  an atomic counter. A normal 32-bit counter can be used instead.
3. Update generation_count to let user-mode know that topology has
   changed due to device removal.

CC: Shuotao Xu 
Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c |  4 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  2 +
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 79 ---
 3 files changed, 74 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 1eaabd2cb41b..afc8a7fcdad8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1056,7 +1056,7 @@ static int kfd_parse_subtype_iolink(struct 
crat_subtype_iolink *iolink,
 * table, add corresponded reversed direction link now.
 */
if (props && (iolink->flags & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL)) {
-   to_dev = kfd_topology_device_by_proximity_domain(id_to);
+   to_dev = kfd_topology_device_by_proximity_domain_no_lock(id_to);
if (!to_dev)
return -ENODEV;
/* same everything but the other direction */
@@ -2225,7 +2225,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
 */
if (kdev->hive_id) {
for (nid = 0; nid < proximity_domain; ++nid) {
-   peer_dev = kfd_topology_device_by_proximity_domain(nid);
+   peer_dev = 
kfd_topology_device_by_proximity_domain_no_lock(nid);
if (!peer_dev->gpu)
continue;
if (peer_dev->gpu->hive_id != kdev->hive_id)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index e1b7e6afa920..8a43def1f638 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1016,6 +1016,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu);
 int kfd_topology_remove_device(struct kfd_dev *gpu);
 struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
uint32_t proximity_domain);
+struct kfd_topology_device *kfd_topology_device_by_proximity_domain_no_lock(
+   uint32_t proximity_domain);
 struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id);
 struct kfd_dev *kfd_device_by_id(uint32_t gpu_id);
 struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 3bdcae239bc0..874a273b81f7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -46,27 +46,38 @@ static struct list_head topology_device_list;
 static struct kfd_system_properties sys_props;
 
 static DECLARE_RWSEM(topology_lock);
-static atomic_t topology_crat_proximity_domain;
+static uint32_t topology_crat_proximity_domain;
 
-struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
+struct kfd_topology_device *kfd_topology_device_by_proximity_domain_no_lock(
uint32_t proximity_domain)
 {
struct kfd_topology_device *top_dev;
struct kfd_topology_device *device = NULL;
 
-   down_read(_lock);
-
list_for_each_entry(top_dev, _device_list, list)
if (top_dev->proximity_domain == proximity_domain) {
device = top_dev;
break;
}
 
+   return device;
+}
+
+struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
+   uint32_t proximity_domain)
+{
+   struct kfd_topology_device *device = NULL;
+
+   down_read(_lock);
+
+   device = kfd_topology_device_by_proximity_domain_no_lock(
+   proximity_domain);
up_read(_lock);
 
return device;
 }
 
+
 struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id)
 {
struct kfd_topology_device *top_dev = NULL;
@@ -1060,7 +1071,7 @@ int kfd_topology_init(void)
down_write(_lock);
kfd_topology_update_device_list(_topology_device_list,
_device_list);
-   atomic_set(_crat_proxi

[PATCHv2 3/3] drm/amdkfd: Consolidate MQD manager functions

2022-02-07 Thread Mukul Joshi
A few MQD manager functions are duplicated for all versions of
MQD manager. Remove this duplication by moving the common
functions into kfd_mqd_manager.c file.

Signed-off-by: Mukul Joshi 
---
v1->v2:
- Add "kfd_" prefix to functions moved to kfd_mqd_manager.c.
- Also, suffix "_cp" to function names shared by CP, HIQ and DIQ.

 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c  | 63 +
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h  | 27 ++
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c  | 76 +++
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  | 85 +++--
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   | 92 +++
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c   | 75 +++
 6 files changed, 136 insertions(+), 282 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index e2825ad4d699..dd99f23e24f1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -173,3 +173,66 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
}
}
 }
+
+int kfd_hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
+uint32_t pipe_id, uint32_t queue_id,
+struct queue_properties *p, struct mm_struct *mms)
+{
+   return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, mqd, pipe_id,
+ queue_id, p->doorbell_off);
+}
+
+int kfd_destroy_mqd_cp(struct mqd_manager *mm, void *mqd,
+   enum kfd_preempt_type type, unsigned int timeout,
+   uint32_t pipe_id,uint32_t queue_id)
+{
+   return mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, mqd, type, timeout,
+   pipe_id, queue_id);
+}
+
+void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd,
+ struct kfd_mem_obj *mqd_mem_obj)
+{
+   if (mqd_mem_obj->gtt_mem) {
+   amdgpu_amdkfd_free_gtt_mem(mm->dev->adev, mqd_mem_obj->gtt_mem);
+   kfree(mqd_mem_obj);
+   } else {
+   kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
+   }
+}
+
+bool kfd_is_occupied_cp(struct mqd_manager *mm, void *mqd,
+uint64_t queue_address, uint32_t pipe_id,
+uint32_t queue_id)
+{
+   return mm->dev->kfd2kgd->hqd_is_occupied(mm->dev->adev, queue_address,
+   pipe_id, queue_id);
+}
+
+int kfd_load_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms)
+{
+   return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd,
+   (uint32_t __user *)p->write_ptr,
+   mms);
+}
+
+/*
+ * preempt type here is ignored because there is only one way
+ * to preempt sdma queue
+ */
+int kfd_destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
+enum kfd_preempt_type type,
+unsigned int timeout, uint32_t pipe_id,
+uint32_t queue_id)
+{
+   return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd, timeout);
+}
+
+bool kfd_is_occupied_sdma(struct mqd_manager *mm, void *mqd,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+   return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
index 23486a23df84..21851110f9eb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
@@ -136,4 +136,31 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
const uint32_t *cu_mask, uint32_t cu_mask_count,
uint32_t *se_mask);
 
+int kfd_hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
+   uint32_t pipe_id, uint32_t queue_id,
+   struct queue_properties *p, struct mm_struct *mms);
+
+int kfd_destroy_mqd_cp(struct mqd_manager *mm, void *mqd,
+   enum kfd_preempt_type type, unsigned int timeout,
+   uint32_t pipe_id,uint32_t queue_id);
+
+void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd,
+   struct kfd_mem_obj *mqd_mem_obj);
+
+bool kfd_is_occupied_cp(struct mqd_manager *mm, void *mqd,
+uint64_t queue_address, uint32_t pipe_id,
+uint32_t queue_id);
+
+int kfd_load_mqd_sdma(struct mqd_manager *mm, void *mqd,
+   uint32_t pipe_id, uint32_t queue_id,
+   struct queue_properties *p, struct mm_struct *mms);
+
+int kfd_destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
+   enum kfd_preempt_typ

[PATCHv2 2/3] drm/amdkfd: Remove unused old debugger implementation

2022-02-07 Thread Mukul Joshi
Cleanup the kfd code by removing the unused old debugger
implementation.
Only a small piece of resetting wavefronts is kept and
is moved to kfd_device_queue_manager.c

Signed-off-by: Mukul Joshi 
---
v1->v2:
- Rename AMDKFD_IOC_DBG_* to AMDKFD_IOC_DBG_*_DEPRECATED.
- Cleanup address_watch_disable(), address_watch_execute(),
  and address_watch_get_offset() from amdgpu_amdkfd_gfx_* files.

 .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c  |   3 -
 .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |   3 -
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c|  24 -
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c  |  25 -
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c |  96 --
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c |  24 -
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  24 -
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |  10 -
 drivers/gpu/drm/amd/amdkfd/Makefile   |   2 -
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 290 +-
 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c   | 845 --
 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h   | 230 -
 drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c   | 158 
 drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h   | 293 --
 drivers/gpu/drm/amd/amdkfd/kfd_device.c   |   2 -
 .../drm/amd/amdkfd/kfd_device_queue_manager.c |  59 ++
 .../drm/amd/amdkfd/kfd_device_queue_manager.h |  35 +
 drivers/gpu/drm/amd/amdkfd/kfd_iommu.c|  12 -
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |   5 -
 drivers/gpu/drm/amd/amdkfd/kfd_process.c  |  19 -
 .../gpu/drm/amd/include/kgd_kfd_interface.h   |   9 -
 include/uapi/linux/kfd_ioctl.h|   8 +-
 22 files changed, 106 insertions(+), 2070 deletions(-)
 delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
 delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h
 delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
 delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
index 46cd4ee6bafb..c8935d718207 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -37,10 +37,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
.hqd_sdma_is_occupied = kgd_arcturus_hqd_sdma_is_occupied,
.hqd_destroy = kgd_gfx_v9_hqd_destroy,
.hqd_sdma_destroy = kgd_arcturus_hqd_sdma_destroy,
-   .address_watch_disable = kgd_gfx_v9_address_watch_disable,
-   .address_watch_execute = kgd_gfx_v9_address_watch_execute,
.wave_control_execute = kgd_gfx_v9_wave_control_execute,
-   .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset,
.get_atc_vmid_pasid_mapping_info =
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
.set_vm_context_page_table_base = 
kgd_gfx_v9_set_vm_context_page_table_base,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index abe93b3ff765..4191af5a3f13 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -289,10 +289,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
.hqd_sdma_is_occupied = kgd_arcturus_hqd_sdma_is_occupied,
.hqd_destroy = kgd_gfx_v9_hqd_destroy,
.hqd_sdma_destroy = kgd_arcturus_hqd_sdma_destroy,
-   .address_watch_disable = kgd_gfx_v9_address_watch_disable,
-   .address_watch_execute = kgd_gfx_v9_address_watch_execute,
.wave_control_execute = kgd_gfx_v9_wave_control_execute,
-   .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset,
.get_atc_vmid_pasid_mapping_info =
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
.set_vm_context_page_table_base =
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 7b7f4b2764c1..9378fc79e9ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -671,20 +671,6 @@ static bool get_atc_vmid_pasid_mapping_info(struct 
amdgpu_device *adev,
return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
 }
 
-static int kgd_address_watch_disable(struct amdgpu_device *adev)
-{
-   return 0;
-}
-
-static int kgd_address_watch_execute(struct amdgpu_device *adev,
-   unsigned int watch_point_id,
-   uint32_t cntl_val,
-   uint32_t addr_hi,
-   uint32_t addr_lo)
-{
-   return 0;
-}
-
 static int kgd_wave_control_execute(struct amdgpu_device *adev,
uint32_t gfx_index_val,
uint32_t sq_cmd)
@@ -709,13 +695,6 @@ static int kgd_wave_control_execute

[PATCHv2 1/3] drm/amdkfd: Fix TLB flushing in KFD SVM with no HWS

2022-02-07 Thread Mukul Joshi
With no HWS, TLB flushing will not work in SVM code.
Fix this by calling kfd_flush_tlb() which works for both
HWS and no HWS case.

Signed-off-by: Mukul Joshi 
Reviewed-by: Philip Yang 
---
v1->v2:
- Don't pass adev to svm_range_map_to_gpu().
 
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 24 ++--
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 41f03d165bad..058f85b432b0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1229,18 +1229,17 @@ svm_range_unmap_from_gpus(struct svm_range *prange, 
unsigned long start,
if (r)
break;
}
-   amdgpu_amdkfd_flush_gpu_tlb_pasid(pdd->dev->adev,
-   p->pasid, TLB_FLUSH_HEAVYWEIGHT);
+   kfd_flush_tlb(pdd, TLB_FLUSH_HEAVYWEIGHT);
}
 
return r;
 }
 
 static int
-svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-struct svm_range *prange, unsigned long offset,
-unsigned long npages, bool readonly, dma_addr_t *dma_addr,
-struct amdgpu_device *bo_adev, struct dma_fence **fence)
+svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
+unsigned long offset, unsigned long npages, bool readonly,
+dma_addr_t *dma_addr, struct amdgpu_device *bo_adev,
+struct dma_fence **fence)
 {
bool table_freed = false;
uint64_t pte_flags;
@@ -1248,6 +1247,8 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
int last_domain;
int r = 0;
int64_t i, j;
+   struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
+   struct amdgpu_device *adev = pdd->dev->adev;
 
last_start = prange->start + offset;
 
@@ -1305,12 +1306,8 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
if (fence)
*fence = dma_fence_get(vm->last_update);
 
-   if (table_freed) {
-   struct kfd_process *p;
-
-   p = container_of(prange->svms, struct kfd_process, svms);
-   amdgpu_amdkfd_flush_gpu_tlb_pasid(adev, p->pasid, 
TLB_FLUSH_LEGACY);
-   }
+   if (table_freed)
+   kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
 out:
return r;
 }
@@ -1351,8 +1348,7 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned 
long offset,
continue;
}
 
-   r = svm_range_map_to_gpu(pdd->dev->adev, 
drm_priv_to_vm(pdd->drm_priv),
-prange, offset, npages, readonly,
+   r = svm_range_map_to_gpu(pdd, prange, offset, npages, readonly,
 prange->dma_addr[gpuidx],
 bo_adev, wait ?  : NULL);
if (r)
-- 
2.33.1



[PATCH 2/3] drm/amdkfd: Remove unused old debugger implementation

2022-02-04 Thread Mukul Joshi
Cleanup the kfd code by removing the unused old debugger
implementation.
Only a small piece of resetting wavefronts is kept and
is moved to kfd_device_queue_manager.c

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdkfd/Makefile   |   2 -
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 282 +-
 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c   | 845 --
 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h   | 230 -
 drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c   | 158 
 drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h   | 293 --
 drivers/gpu/drm/amd/amdkfd/kfd_device.c   |   2 -
 .../drm/amd/amdkfd/kfd_device_queue_manager.c |  59 ++
 .../drm/amd/amdkfd/kfd_device_queue_manager.h |  35 +
 drivers/gpu/drm/amd/amdkfd/kfd_iommu.c|  12 -
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |   5 -
 drivers/gpu/drm/amd/amdkfd/kfd_process.c  |  19 -
 12 files changed, 98 insertions(+), 1844 deletions(-)
 delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
 delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h
 delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
 delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h

diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile 
b/drivers/gpu/drm/amd/amdkfd/Makefile
index c4f3aff11072..19cfbf9577b4 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -51,8 +51,6 @@ AMDKFD_FILES  := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_events.o \
$(AMDKFD_PATH)/cik_event_interrupt.o \
$(AMDKFD_PATH)/kfd_int_process_v9.o \
-   $(AMDKFD_PATH)/kfd_dbgdev.o \
-   $(AMDKFD_PATH)/kfd_dbgmgr.o \
$(AMDKFD_PATH)/kfd_smi_events.o \
$(AMDKFD_PATH)/kfd_crat.o
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 64e3b4e3a712..cfe12525165f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -39,7 +39,6 @@
 #include 
 #include "kfd_priv.h"
 #include "kfd_device_queue_manager.h"
-#include "kfd_dbgmgr.h"
 #include "kfd_svm.h"
 #include "amdgpu_amdkfd.h"
 #include "kfd_smi_events.h"
@@ -580,299 +579,26 @@ static int kfd_ioctl_set_trap_handler(struct file *filep,
 static int kfd_ioctl_dbg_register(struct file *filep,
struct kfd_process *p, void *data)
 {
-   struct kfd_ioctl_dbg_register_args *args = data;
-   struct kfd_dev *dev;
-   struct kfd_dbgmgr *dbgmgr_ptr;
-   struct kfd_process_device *pdd;
-   bool create_ok;
-   long status = 0;
-
-   mutex_lock(>mutex);
-   pdd = kfd_process_device_data_by_id(p, args->gpu_id);
-   if (!pdd) {
-   status = -EINVAL;
-   goto err_pdd;
-   }
-   dev = pdd->dev;
-
-   if (dev->adev->asic_type == CHIP_CARRIZO) {
-   pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
-   status = -EINVAL;
-   goto err_chip_unsupp;
-   }
-
-   mutex_lock(kfd_get_dbgmgr_mutex());
-
-   /*
-* make sure that we have pdd, if this the first queue created for
-* this process
-*/
-   pdd = kfd_bind_process_to_device(dev, p);
-   if (IS_ERR(pdd)) {
-   status = PTR_ERR(pdd);
-   goto out;
-   }
-
-   if (!dev->dbgmgr) {
-   /* In case of a legal call, we have no dbgmgr yet */
-   create_ok = kfd_dbgmgr_create(_ptr, dev);
-   if (create_ok) {
-   status = kfd_dbgmgr_register(dbgmgr_ptr, p);
-   if (status != 0)
-   kfd_dbgmgr_destroy(dbgmgr_ptr);
-   else
-   dev->dbgmgr = dbgmgr_ptr;
-   }
-   } else {
-   pr_debug("debugger already registered\n");
-   status = -EINVAL;
-   }
-
-out:
-   mutex_unlock(kfd_get_dbgmgr_mutex());
-err_pdd:
-err_chip_unsupp:
-   mutex_unlock(>mutex);
-
-   return status;
+   return -EPERM;
 }
 
 static int kfd_ioctl_dbg_unregister(struct file *filep,
struct kfd_process *p, void *data)
 {
-   struct kfd_ioctl_dbg_unregister_args *args = data;
-   struct kfd_process_device *pdd;
-   struct kfd_dev *dev;
-   long status;
-
-   mutex_lock(>mutex);
-   pdd = kfd_process_device_data_by_id(p, args->gpu_id);
-   mutex_unlock(>mutex);
-   if (!pdd || !pdd->dev->dbgmgr)
-   return -EINVAL;
-
-   dev = pdd->dev;
-
-   if (dev->adev->asic_type == CHIP_CARRIZO) {
-   pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n");
-   return -EINVAL;
-   }
-
-   mutex_lock(kfd_get_dbgmgr

[PATCH 3/3] drm/amdkfd: Consolidate MQD manager functions

2022-02-04 Thread Mukul Joshi
A few MQD manager functions are duplicated for all versions of
MQD manager. Remove this duplication by moving the common
functions into kfd_mqd_manager.c file.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c  | 63 +
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h  | 27 
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c  | 54 ---
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  | 61 -
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   | 68 ---
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c   | 53 ---
 6 files changed, 90 insertions(+), 236 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index e2825ad4d699..f4a6af98db2d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -173,3 +173,66 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
}
}
 }
+
+int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
+uint32_t pipe_id, uint32_t queue_id,
+struct queue_properties *p, struct mm_struct *mms)
+{
+   return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, mqd, pipe_id,
+ queue_id, p->doorbell_off);
+}
+
+int destroy_mqd(struct mqd_manager *mm, void *mqd,
+   enum kfd_preempt_type type, unsigned int timeout,
+   uint32_t pipe_id,uint32_t queue_id)
+{
+   return mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, mqd, type, timeout,
+   pipe_id, queue_id);
+}
+
+void free_mqd(struct mqd_manager *mm, void *mqd,
+ struct kfd_mem_obj *mqd_mem_obj)
+{
+   if (mqd_mem_obj->gtt_mem) {
+   amdgpu_amdkfd_free_gtt_mem(mm->dev->adev, mqd_mem_obj->gtt_mem);
+   kfree(mqd_mem_obj);
+   } else {
+   kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
+   }
+}
+
+bool is_occupied(struct mqd_manager *mm, void *mqd,
+uint64_t queue_address, uint32_t pipe_id,
+uint32_t queue_id)
+{
+   return mm->dev->kfd2kgd->hqd_is_occupied(mm->dev->adev, queue_address,
+   pipe_id, queue_id);
+}
+
+int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms)
+{
+   return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd,
+   (uint32_t __user *)p->write_ptr,
+   mms);
+}
+
+/*
+ * preempt type here is ignored because there is only one way
+ * to preempt sdma queue
+ */
+int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
+enum kfd_preempt_type type,
+unsigned int timeout, uint32_t pipe_id,
+uint32_t queue_id)
+{
+   return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd, timeout);
+}
+
+bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+   return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
index 23486a23df84..76f20637b938 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
@@ -136,4 +136,31 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
const uint32_t *cu_mask, uint32_t cu_mask_count,
uint32_t *se_mask);
 
+int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
+   uint32_t pipe_id, uint32_t queue_id,
+   struct queue_properties *p, struct mm_struct *mms);
+
+int destroy_mqd(struct mqd_manager *mm, void *mqd,
+   enum kfd_preempt_type type, unsigned int timeout,
+   uint32_t pipe_id,uint32_t queue_id);
+
+void free_mqd(struct mqd_manager *mm, void *mqd,
+   struct kfd_mem_obj *mqd_mem_obj);
+
+bool is_occupied(struct mqd_manager *mm, void *mqd,
+uint64_t queue_address, uint32_t pipe_id,
+uint32_t queue_id);
+
+int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
+   uint32_t pipe_id, uint32_t queue_id,
+   struct queue_properties *p, struct mm_struct *mms);
+
+int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
+   enum kfd_preempt_type type,unsigned int timeout,
+   uint32_t pipe_id, uint32_t queue_id);
+
+bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
+   uint64_t queue_address, uint32_t pipe_id,
+   uint32_t

[PATCH 1/3] drm/amdkfd: Fix TLB flushing in KFD SVM with no HWS

2022-02-04 Thread Mukul Joshi
With no HWS, TLB flushing will not work in SVM code.
Fix this by calling kfd_flush_tlb() which works for both
HWS and no HWS case.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 16 ++--
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 41f03d165bad..b1315c97b952 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1229,15 +1229,14 @@ svm_range_unmap_from_gpus(struct svm_range *prange, 
unsigned long start,
if (r)
break;
}
-   amdgpu_amdkfd_flush_gpu_tlb_pasid(pdd->dev->adev,
-   p->pasid, TLB_FLUSH_HEAVYWEIGHT);
+   kfd_flush_tlb(pdd, TLB_FLUSH_HEAVYWEIGHT);
}
 
return r;
 }
 
 static int
-svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+svm_range_map_to_gpu(struct amdgpu_device *adev, struct kfd_process_device 
*pdd,
 struct svm_range *prange, unsigned long offset,
 unsigned long npages, bool readonly, dma_addr_t *dma_addr,
 struct amdgpu_device *bo_adev, struct dma_fence **fence)
@@ -1248,6 +1247,7 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
int last_domain;
int r = 0;
int64_t i, j;
+   struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
 
last_start = prange->start + offset;
 
@@ -1305,12 +1305,8 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
if (fence)
*fence = dma_fence_get(vm->last_update);
 
-   if (table_freed) {
-   struct kfd_process *p;
-
-   p = container_of(prange->svms, struct kfd_process, svms);
-   amdgpu_amdkfd_flush_gpu_tlb_pasid(adev, p->pasid, 
TLB_FLUSH_LEGACY);
-   }
+   if (table_freed)
+   kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
 out:
return r;
 }
@@ -1351,7 +1347,7 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned 
long offset,
continue;
}
 
-   r = svm_range_map_to_gpu(pdd->dev->adev, 
drm_priv_to_vm(pdd->drm_priv),
+   r = svm_range_map_to_gpu(pdd->dev->adev, pdd,
 prange, offset, npages, readonly,
 prange->dma_addr[gpuidx],
 bo_adev, wait ?  : NULL);
-- 
2.33.1



[PATCH 2/2] drm/amdgpu: Fix RAS page retirement with mode2 reset on Aldebaran

2021-10-11 Thread Mukul Joshi
During mode2 reset, the GPU is temporarily removed from the
mgpu_info list. As a result, page retirement fails because it
cannot find the GPU in the GPU list.
To fix this, create our own list of GPUs that support MCE notifier
based page retirement and use that list to check if the UMC error
occurred on a GPU that supports MCE notifier based page retirement.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 24 
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index e8875351967e..e8d88c77eb46 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -112,7 +112,12 @@ static bool amdgpu_ras_check_bad_page_unlock(struct 
amdgpu_ras *con,
 static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
uint64_t addr);
 #ifdef CONFIG_X86_MCE_AMD
-static void amdgpu_register_bad_pages_mca_notifier(void);
+static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev);
+struct mce_notifier_adev_list {
+   struct amdgpu_device *devs[MAX_GPU_INSTANCE];
+   int num_gpu;
+};
+static struct mce_notifier_adev_list mce_adev_list;
 #endif
 
 void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready)
@@ -2108,7 +2113,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
 #ifdef CONFIG_X86_MCE_AMD
if ((adev->asic_type == CHIP_ALDEBARAN) &&
(adev->gmc.xgmi.connected_to_cpu))
-   amdgpu_register_bad_pages_mca_notifier();
+   amdgpu_register_bad_pages_mca_notifier(adev);
 #endif
return 0;
 
@@ -2605,24 +2610,18 @@ void amdgpu_release_ras_context(struct amdgpu_device 
*adev)
 #ifdef CONFIG_X86_MCE_AMD
 static struct amdgpu_device *find_adev(uint32_t node_id)
 {
-   struct amdgpu_gpu_instance *gpu_instance;
int i;
struct amdgpu_device *adev = NULL;
 
-   mutex_lock(_info.mutex);
-
-   for (i = 0; i < mgpu_info.num_gpu; i++) {
-   gpu_instance = &(mgpu_info.gpu_ins[i]);
-   adev = gpu_instance->adev;
+   for (i = 0; i < mce_adev_list.num_gpu; i++) {
+   adev = mce_adev_list.devs[i];
 
-   if (adev->gmc.xgmi.connected_to_cpu &&
+   if (adev && adev->gmc.xgmi.connected_to_cpu &&
adev->gmc.xgmi.physical_node_id == node_id)
break;
adev = NULL;
}
 
-   mutex_unlock(_info.mutex);
-
return adev;
 }
 
@@ -2718,8 +2717,9 @@ static struct notifier_block amdgpu_bad_page_nb = {
.priority   = MCE_PRIO_UC,
 };
 
-static void amdgpu_register_bad_pages_mca_notifier(void)
+static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev)
 {
+   mce_adev_list.devs[mce_adev_list.num_gpu++] = adev;
/*
 * Register the x86 notifier only once
 * with MCE subsystem.
-- 
2.33.0



[PATCH 1/2] drm/amdgpu: Enable RAS error injection after mode2 reset on Aldebaran

2021-10-11 Thread Mukul Joshi
Add the missing call to re-enable RAS error injections on the Aldebaran
mode2 reset code path.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/aldebaran.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c 
b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
index 148f6c3343ab..bcfdb63b1d42 100644
--- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
@@ -307,6 +307,8 @@ static int aldebaran_mode2_restore_ip(struct amdgpu_device 
*adev)
adev->ip_blocks[i].status.late_initialized = true;
}
 
+   amdgpu_ras_set_error_query_ready(adev, true);
+
amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
 
-- 
2.33.0



[PATCHv4 2/2] drm/amdgpu: Register MCE notifier for Aldebaran RAS

2021-09-23 Thread Mukul Joshi
On Aldebaran, GPU driver will handle bad page retirement
for GPU memory even though UMC is host managed. As a result,
register a bad page retirement handler on the mce notifier
chain to retire bad pages on Aldebaran.

Signed-off-by: Mukul Joshi 
---
v1->v2:
- Use smca_get_bank_type() to determine MCA bank.
- Envelope the changes under #ifdef CONFIG_X86_MCE_AMD.
- Use MCE_PRIORITY_UC instead of MCE_PRIO_ACCEL as we are
  only handling uncorrectable errors.
- Use macros to determine UMC instance and channel instance
  where the uncorrectable error occured.

v2->v3:
- Move the check for correctable error before find_adev().
- Fix a NULL pointer dereference if find_adev() returns NULL.

v3->v4:
- Update the commit log to specify page retirement for GPU
  memory only.
- Fix the mask passed to XEC macro.

 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 141 
 1 file changed, 141 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index e1c34eef76b7..02841a0efbb9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -35,7 +35,11 @@
 #include "amdgpu_xgmi.h"
 #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
 #include "atom.h"
+#ifdef CONFIG_X86_MCE_AMD
+#include 
 
+static bool notifier_registered;
+#endif
 static const char *RAS_FS_NAME = "ras";
 
 const char *ras_error_string[] = {
@@ -107,6 +111,9 @@ static bool amdgpu_ras_check_bad_page_unlock(struct 
amdgpu_ras *con,
uint64_t addr);
 static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
uint64_t addr);
+#ifdef CONFIG_X86_MCE_AMD
+static void amdgpu_register_bad_pages_mca_notifier(void);
+#endif
 
 void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready)
 {
@@ -2089,6 +2096,11 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
adev->smu.ppt_funcs->send_hbm_bad_pages_num(>smu, 
con->eeprom_control.ras_num_recs);
}
 
+#ifdef CONFIG_X86_MCE_AMD
+   if ((adev->asic_type == CHIP_ALDEBARAN) &&
+   (adev->gmc.xgmi.connected_to_cpu))
+   amdgpu_register_bad_pages_mca_notifier();
+#endif
return 0;
 
 free:
@@ -2552,3 +2564,132 @@ void amdgpu_release_ras_context(struct amdgpu_device 
*adev)
kfree(con);
}
 }
+
+#ifdef CONFIG_X86_MCE_AMD
+static struct amdgpu_device *find_adev(uint32_t node_id)
+{
+   struct amdgpu_gpu_instance *gpu_instance;
+   int i;
+   struct amdgpu_device *adev = NULL;
+
+   mutex_lock(_info.mutex);
+
+   for (i = 0; i < mgpu_info.num_gpu; i++) {
+   gpu_instance = &(mgpu_info.gpu_ins[i]);
+   adev = gpu_instance->adev;
+
+   if (adev->gmc.xgmi.connected_to_cpu &&
+   adev->gmc.xgmi.physical_node_id == node_id)
+   break;
+   adev = NULL;
+   }
+
+   mutex_unlock(_info.mutex);
+
+   return adev;
+}
+
+#define GET_MCA_IPID_GPUID(m)  (((m) >> 44) & 0xF)
+#define GET_UMC_INST(m)(((m) >> 21) & 0x7)
+#define GET_CHAN_INDEX(m)  m) >> 12) & 0x3) | (((m) >> 18) & 0x4))
+#define GPU_ID_OFFSET  8
+
+static int amdgpu_bad_page_notifier(struct notifier_block *nb,
+   unsigned long val, void *data)
+{
+   struct mce *m = (struct mce *)data;
+   struct amdgpu_device *adev = NULL;
+   uint32_t gpu_id = 0;
+   uint32_t umc_inst = 0;
+   uint32_t ch_inst, channel_index = 0;
+   struct ras_err_data err_data = {0, 0, 0, NULL};
+   struct eeprom_table_record err_rec;
+   uint64_t retired_page;
+
+   /*
+* If the error was generated in UMC_V2, which belongs to GPU UMCs,
+* and error occurred in DramECC (Extended error code = 0) then only
+* process the error, else bail out.
+*/
+   if (!m || !((smca_get_bank_type(m->bank) == SMCA_UMC_V2) &&
+   (XEC(m->status, 0x3f) == 0x0)))
+   return NOTIFY_DONE;
+
+   /*
+* If it is correctable error, return.
+*/
+   if (mce_is_correctable(m))
+   return NOTIFY_OK;
+
+   /*
+* GPU Id is offset by GPU_ID_OFFSET in MCA_IPID_UMC register.
+*/
+   gpu_id = GET_MCA_IPID_GPUID(m->ipid) - GPU_ID_OFFSET;
+
+   adev = find_adev(gpu_id);
+   if (!adev) {
+   DRM_WARN("%s: Unable to find adev for gpu_id: %d\n", __func__,
+   gpu_id);
+   return NOTIFY_DONE;
+   }
+
+   /*
+* If it is uncorrectable error, then find out UMC instance and
+* channel index.
+*/
+   umc_inst = GET_UMC_INST(m->ipid);
+   ch_in

[PATCHv3 2/2] drm/amdgpu: Register MCE notifier for Aldebaran RAS

2021-09-22 Thread Mukul Joshi
On Aldebaran, GPU driver will handle bad page retirement
even though UMC is host managed. As a result, register a
bad page retirement handler on the mce notifier chain to
retire bad pages on Aldebaran.

Signed-off-by: Mukul Joshi 
---
v1->v2:
- Use smca_get_bank_type() to determine MCA bank.
- Envelope the changes under #ifdef CONFIG_X86_MCE_AMD.
- Use MCE_PRIORITY_UC instead of MCE_PRIO_ACCEL as we are
  only handling uncorrectable errors.
- Use macros to determine UMC instance and channel instance
  where the uncorrectable error occured.

v2->v3:
- Move the check for correctable error before find_adev().
- Fix a NULL pointer dereference if find_adev() returns NULL.

 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 141 
 1 file changed, 141 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 912ea1f9fd04..c1e806762e41 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -35,7 +35,11 @@
 #include "amdgpu_xgmi.h"
 #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
 #include "atom.h"
+#ifdef CONFIG_X86_MCE_AMD
+#include 
 
+static bool notifier_registered;
+#endif
 static const char *RAS_FS_NAME = "ras";
 
 const char *ras_error_string[] = {
@@ -107,6 +111,9 @@ static bool amdgpu_ras_check_bad_page_unlock(struct 
amdgpu_ras *con,
uint64_t addr);
 static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
uint64_t addr);
+#ifdef CONFIG_X86_MCE_AMD
+static void amdgpu_register_bad_pages_mca_notifier(void);
+#endif
 
 void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready)
 {
@@ -2089,6 +2096,11 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
adev->smu.ppt_funcs->send_hbm_bad_pages_num(>smu, 
con->eeprom_control.ras_num_recs);
}
 
+#ifdef CONFIG_X86_MCE_AMD
+   if ((adev->asic_type == CHIP_ALDEBARAN) &&
+   (adev->gmc.xgmi.connected_to_cpu))
+   amdgpu_register_bad_pages_mca_notifier();
+#endif
return 0;
 
 free:
@@ -2583,3 +2595,132 @@ void amdgpu_release_ras_context(struct amdgpu_device 
*adev)
kfree(con);
}
 }
+
+#ifdef CONFIG_X86_MCE_AMD
+static struct amdgpu_device *find_adev(uint32_t node_id)
+{
+   struct amdgpu_gpu_instance *gpu_instance;
+   int i;
+   struct amdgpu_device *adev = NULL;
+
+   mutex_lock(_info.mutex);
+
+   for (i = 0; i < mgpu_info.num_gpu; i++) {
+   gpu_instance = &(mgpu_info.gpu_ins[i]);
+   adev = gpu_instance->adev;
+
+   if (adev->gmc.xgmi.connected_to_cpu &&
+   adev->gmc.xgmi.physical_node_id == node_id)
+   break;
+   adev = NULL;
+   }
+
+   mutex_unlock(_info.mutex);
+
+   return adev;
+}
+
+#define GET_MCA_IPID_GPUID(m)  (((m) >> 44) & 0xF)
+#define GET_UMC_INST(m)(((m) >> 21) & 0x7)
+#define GET_CHAN_INDEX(m)  m) >> 12) & 0x3) | (((m) >> 18) & 0x4))
+#define GPU_ID_OFFSET  8
+
+static int amdgpu_bad_page_notifier(struct notifier_block *nb,
+   unsigned long val, void *data)
+{
+   struct mce *m = (struct mce *)data;
+   struct amdgpu_device *adev = NULL;
+   uint32_t gpu_id = 0;
+   uint32_t umc_inst = 0;
+   uint32_t ch_inst, channel_index = 0;
+   struct ras_err_data err_data = {0, 0, 0, NULL};
+   struct eeprom_table_record err_rec;
+   uint64_t retired_page;
+
+   /*
+* If the error was generated in UMC_V2, which belongs to GPU UMCs,
+* and error occurred in DramECC (Extended error code = 0) then only
+* process the error, else bail out.
+*/
+   if (!m || !((smca_get_bank_type(m->bank) == SMCA_UMC_V2) &&
+   (XEC(m->status, 0x1f) == 0x0)))
+   return NOTIFY_DONE;
+
+   /*
+* If it is correctable error, return.
+*/
+   if (mce_is_correctable(m))
+   return NOTIFY_OK;
+
+   /*
+* GPU Id is offset by GPU_ID_OFFSET in MCA_IPID_UMC register.
+*/
+   gpu_id = GET_MCA_IPID_GPUID(m->ipid) - GPU_ID_OFFSET;
+
+   adev = find_adev(gpu_id);
+   if (!adev) {
+   DRM_WARN("%s: Unable to find adev for gpu_id: %d\n", __func__,
+   gpu_id);
+   return NOTIFY_DONE;
+   }
+
+   /*
+* If it is uncorrectable error, then find out UMC instance and
+* channel index.
+*/
+   umc_inst = GET_UMC_INST(m->ipid);
+   ch_inst = GET_CHAN_INDEX(m->ipid);
+
+   dev_info(adev->dev, "Uncorrectable error detected in UMC inst: %d, 
chan_idx: %d"

[PATCHv2 2/2] drm/amdgpu: Register MCE notifier for Aldebaran RAS

2021-09-12 Thread Mukul Joshi
On Aldebaran, GPU driver will handle bad page retirement
even though UMC is host managed. As a result, register a
bad page retirement handler on the mce notifier chain to
retire bad pages on Aldebaran.

v1->v2:
- Use smca_get_bank_type() to determine MCA bank.
- Envelope the changes under #ifdef CONFIG_X86_MCE_AMD.
- Use MCE_PRIORITY_UC instead of MCE_PRIO_ACCEL as we are
  only handling uncorrectable errors.
- Use macros to determine UMC instance and channel instance
  where the uncorrectable error occured.
- Update the headline.

Signed-off-by: Mukul Joshi 
Link: https://lore.kernel.org/amd-gfx/20210512013058.6827-1-mukul.jo...@amd.com/
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 142 
 1 file changed, 142 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index b5332db4d287..35cfcc71ff94 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -35,7 +35,11 @@
 #include "amdgpu_xgmi.h"
 #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
 #include "atom.h"
+#ifdef CONFIG_X86_MCE_AMD
+#include 
 
+static bool notifier_registered;
+#endif
 static const char *RAS_FS_NAME = "ras";
 
 const char *ras_error_string[] = {
@@ -86,6 +90,9 @@ static bool amdgpu_ras_check_bad_page_unlock(struct 
amdgpu_ras *con,
uint64_t addr);
 static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
uint64_t addr);
+#ifdef CONFIG_X86_MCE_AMD
+static void amdgpu_register_bad_pages_mca_notifier(void);
+#endif
 
 void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready)
 {
@@ -2018,6 +2025,11 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
adev->smu.ppt_funcs->send_hbm_bad_pages_num(>smu, 
con->eeprom_control.ras_num_recs);
}
 
+#ifdef CONFIG_X86_MCE_AMD
+   if ((adev->asic_type == CHIP_ALDEBARAN) &&
+   (adev->gmc.xgmi.connected_to_cpu))
+   amdgpu_register_bad_pages_mca_notifier();
+#endif
return 0;
 
 free:
@@ -2511,3 +2523,133 @@ void amdgpu_release_ras_context(struct amdgpu_device 
*adev)
kfree(con);
}
 }
+
+#ifdef CONFIG_X86_MCE_AMD
+static struct amdgpu_device *find_adev(uint32_t node_id)
+{
+   struct amdgpu_gpu_instance *gpu_instance;
+   int i;
+   struct amdgpu_device *adev = NULL;
+
+   mutex_lock(_info.mutex);
+
+   for (i = 0; i < mgpu_info.num_gpu; i++) {
+   gpu_instance = &(mgpu_info.gpu_ins[i]);
+   adev = gpu_instance->adev;
+
+   if (adev->gmc.xgmi.connected_to_cpu &&
+   adev->gmc.xgmi.physical_node_id == node_id)
+   break;
+   adev = NULL;
+   }
+
+   mutex_unlock(_info.mutex);
+
+   return adev;
+}
+
+#define GET_MCA_IPID_GPUID(m)  (((m) >> 44) & 0xF)
+#define GET_UMC_INST(m)(((m) >> 21) & 0x7)
+#define GET_CHAN_INDEX(m)  m) >> 12) & 0x3) | (((m) >> 18) & 0x4))
+#define GPU_ID_OFFSET  8
+
+static int amdgpu_bad_page_notifier(struct notifier_block *nb,
+   unsigned long val, void *data)
+{
+   struct mce *m = (struct mce *)data;
+   struct amdgpu_device *adev = NULL;
+   uint32_t gpu_id = 0;
+   uint32_t umc_inst = 0;
+   uint32_t ch_inst, channel_index = 0;
+   struct ras_err_data err_data = {0, 0, 0, NULL};
+   struct eeprom_table_record err_rec;
+   uint64_t retired_page;
+
+   /*
+* If the error was generated in UMC_V2, which belongs to GPU UMCs,
+* and error occurred in DramECC (Extended error code = 0) then only
+* process the error, else bail out.
+*/
+   if (!m || !((smca_get_bank_type(m->bank) == SMCA_UMC_V2) &&
+   (XEC(m->status, 0x1f) == 0x0)))
+   return NOTIFY_DONE;
+
+   /*
+* GPU Id is offset by GPU_ID_OFFSET in MCA_IPID_UMC register.
+*/
+   gpu_id = GET_MCA_IPID_GPUID(m->ipid) - GPU_ID_OFFSET;
+
+   adev = find_adev(gpu_id);
+   if (!adev) {
+   dev_warn(adev->dev, "%s: Unable to find adev for gpu_id: %d\n",
+__func__, gpu_id);
+   return NOTIFY_DONE;
+   }
+
+   /*
+* If it is correctable error, return.
+*/
+   if (mce_is_correctable(m)) {
+   return NOTIFY_OK;
+   }
+
+   /*
+* If it is uncorrectable error, then find out UMC instance and
+* channel index.
+*/
+   umc_inst = GET_UMC_INST(m->ipid);
+   ch_inst = GET_CHAN_INDEX(m->ipid);
+
+   dev_info(adev->dev, "Uncorrectable error detected in UMC inst: %d, 
chan_idx: %d",
+

[PATCHv2 1/2] x86/MCE/AMD: Export smca_get_bank_type symbol

2021-09-12 Thread Mukul Joshi
Export smca_get_bank_type for use in the AMD GPU
driver to determine MCA bank while handling correctable
and uncorrectable errors in GPU UMC.

v1->v2:
- Drop the function is_smca_umc_v2().
- Drop the patch to introduce a new MCE priority (MCE_PRIO_ACEL)
  for GPU/accelarator cards.

Signed-off-by: Mukul Joshi 
---
 arch/x86/include/asm/mce.h| 2 +-
 arch/x86/kernel/cpu/mce/amd.c | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index fc3d36f1f9d0..d90d3ccb583a 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -358,7 +358,7 @@ extern int mce_threshold_remove_device(unsigned int cpu);
 
 void mce_amd_feature_init(struct cpuinfo_x86 *c);
 int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr);
-
+enum smca_bank_types smca_get_bank_type(unsigned int bank);
 #else
 
 static inline int mce_threshold_create_device(unsigned int cpu)
{ return 0; };
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 67a337672ee4..c9272c53026e 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -123,7 +123,7 @@ const char *smca_get_long_name(enum smca_bank_types t)
 }
 EXPORT_SYMBOL_GPL(smca_get_long_name);
 
-static enum smca_bank_types smca_get_bank_type(unsigned int bank)
+enum smca_bank_types smca_get_bank_type(unsigned int bank)
 {
struct smca_bank *b;
 
@@ -136,6 +136,7 @@ static enum smca_bank_types smca_get_bank_type(unsigned int 
bank)
 
return b->hwid->bank_type;
 }
+EXPORT_SYMBOL_GPL(smca_get_bank_type);
 
 static struct smca_hwid smca_hwid_mcatypes[] = {
/* { bank_type, hwid_mcatype } */
-- 
2.17.1



[PATCH] drm/amdkfd: CWSR with sw scheduler on Aldebaran and Arcturus

2021-08-20 Thread Mukul Joshi
Program trap handler settings to enable CWSR with software scheduler
on Aldebaran and Arcturus.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c | 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c  | 3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c| 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h| 2 ++
 4 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
index a5434b713856..46cd4ee6bafb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -44,4 +44,5 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
.get_atc_vmid_pasid_mapping_info =
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
.set_vm_context_page_table_base = 
kgd_gfx_v9_set_vm_context_page_table_base,
+   .program_trap_handler_settings = 
kgd_gfx_v9_program_trap_handler_settings
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index 6409d6b1b2df..5a7f680bcb3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -305,5 +305,6 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
.set_vm_context_page_table_base =
kgd_gfx_v9_set_vm_context_page_table_base,
-   .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy
+   .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
+   .program_trap_handler_settings = 
kgd_gfx_v9_program_trap_handler_settings
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 154244916727..bcc1cbeb8799 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -882,7 +882,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int 
pasid,
adev->gfx.cu_info.max_waves_per_simd;
 }
 
-static void kgd_gfx_v9_program_trap_handler_settings(struct kgd_dev *kgd,
+void kgd_gfx_v9_program_trap_handler_settings(struct kgd_dev *kgd,
 uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
 {
struct amdgpu_device *adev = get_amdgpu_device(kgd);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
index e64deba8900f..c63591106879 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
@@ -65,3 +65,5 @@ void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev 
*kgd,
uint32_t vmid, uint64_t page_table_base);
 void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid,
int *pasid_wave_cnt, int *max_waves_per_cu);
+void kgd_gfx_v9_program_trap_handler_settings(struct kgd_dev *kgd,
+   uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr);
-- 
2.17.1



[PATCH] drm/amdkfd: CWSR with software scheduler

2021-08-09 Thread Mukul Joshi
This patch adds support to program trap handler settings
when loading driver with software scheduler (sched_policy=2).

Signed-off-by: Mukul Joshi 
Suggested-by: Jay Cornwall 
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 31 +
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c  | 31 +
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 33 ++-
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 20 +--
 .../gpu/drm/amd/include/kgd_kfd_interface.h   |  3 ++
 5 files changed, 115 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 491acdf92f73..960acf68150a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -560,6 +560,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
type = RESET_WAVES;
break;
+   case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
+   type = SAVE_WAVES;
+   break;
default:
type = DRAIN_PIPE;
break;
@@ -754,6 +757,33 @@ static void set_vm_context_page_table_base(struct kgd_dev 
*kgd, uint32_t vmid,
adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
 }
 
+static void program_trap_handler_settings(struct kgd_dev *kgd,
+   uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
+{
+   struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+   lock_srbm(kgd, 0, 0, 0, vmid);
+
+   /*
+* Program TBA registers
+*/
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
+   lower_32_bits(tba_addr >> 8));
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
+   upper_32_bits(tba_addr >> 8) |
+   (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT));
+
+   /*
+* Program TMA registers
+*/
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
+   lower_32_bits(tma_addr >> 8));
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
+   upper_32_bits(tma_addr >> 8));
+
+   unlock_srbm(kgd);
+}
+
 const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
@@ -774,4 +804,5 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
.get_atc_vmid_pasid_mapping_info =
get_atc_vmid_pasid_mapping_info,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
+   .program_trap_handler_settings = program_trap_handler_settings,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
index 1f5620cc3570..dac0d751d5af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
@@ -537,6 +537,9 @@ static int hqd_destroy_v10_3(struct kgd_dev *kgd, void *mqd,
case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
type = RESET_WAVES;
break;
+   case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
+   type = SAVE_WAVES;
+   break;
default:
type = DRAIN_PIPE;
break;
@@ -658,6 +661,33 @@ static void set_vm_context_page_table_base_v10_3(struct 
kgd_dev *kgd, uint32_t v
adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
 }
 
+static void program_trap_handler_settings_v10_3(struct kgd_dev *kgd,
+   uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
+{
+   struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+   lock_srbm(kgd, 0, 0, 0, vmid);
+
+   /*
+* Program TBA registers
+*/
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
+   lower_32_bits(tba_addr >> 8));
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
+   upper_32_bits(tba_addr >> 8) |
+   (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT));
+
+   /*
+* Program TMA registers
+*/
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
+   lower_32_bits(tma_addr >> 8));
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
+upper_32_bits(tma_addr >> 8));
+
+   unlock_srbm(kgd);
+}
+
 #if 0
 uint32_t enable_debug_trap_v10_3(struct kgd_dev *kgd,
uint32_t trap_debug_wave_launch_mode,
@@ -820,6 +850,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
.address_watch_get_offset = address_watch_get_offset_v10_3,
.get_atc_vmid_pasid_mapping_info = NULL,
.set_vm_context_page_table_base = set_

[PATCH] drm/amdgpu: Fix channel_index table layout for Aldebaran

2021-07-29 Thread Mukul Joshi
Fix the channel_index table layout to fetch the correct
channel_index when calculating physical address from
normalized address during page retirement.
Also, fix the number of UMC instances and number of channels
within each UMC instance for Aldebaran.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  4 ++--
 drivers/gpu/drm/amd/amdgpu/umc_v6_7.c | 16 
 drivers/gpu/drm/amd/amdgpu/umc_v6_7.h |  4 ++--
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 7cf653f9e9a7..097230b5e946 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1171,8 +1171,8 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device 
*adev)
break;
case CHIP_ALDEBARAN:
adev->umc.max_ras_err_cnt_per_query = 
UMC_V6_7_TOTAL_CHANNEL_NUM;
-   adev->umc.channel_inst_num = UMC_V6_7_UMC_INSTANCE_NUM;
-   adev->umc.umc_inst_num = UMC_V6_7_CHANNEL_INSTANCE_NUM;
+   adev->umc.channel_inst_num = UMC_V6_7_CHANNEL_INSTANCE_NUM;
+   adev->umc.umc_inst_num = UMC_V6_7_UMC_INSTANCE_NUM;
adev->umc.channel_offs = UMC_V6_7_PER_CHANNEL_OFFSET;
if (!adev->gmc.xgmi.connected_to_cpu)
adev->umc.ras_funcs = _v6_7_ras_funcs;
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c 
b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
index 7da12110425c..bb30336b1e8d 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
@@ -30,17 +30,17 @@
 
 const uint32_t

umc_v6_7_channel_idx_tbl_second[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM]
 = {
-   {28, 12, 6, 22},{19, 3, 9, 25},
-   {20, 4, 30, 14},{11, 27, 1, 17},
-   {24, 8, 2, 18}, {15, 31, 5, 21},
-   {16, 0, 26, 10},{7, 23, 29, 13}
+   {28, 20, 24, 16, 12, 4, 8, 0},
+   {6, 30, 2, 26, 22, 14, 18, 10},
+   {19, 11, 15, 7, 3, 27, 31, 23},
+   {9, 1, 5, 29, 25, 17, 21, 13}
 };
 const uint32_t

umc_v6_7_channel_idx_tbl_first[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM]
 = {
-   {19, 3, 9, 25}, {28, 12, 6, 22},
-   {11, 27, 1, 17},{20, 4, 30, 14},
-   {15, 31, 5, 21},{24, 8, 2, 18},
-   {7, 23, 29, 13},{16, 0, 26, 10}
+   {19, 11, 15, 7, 3, 27, 31, 23},
+   {9, 1, 5, 29, 25, 17, 21, 13},
+   {28, 20, 24, 16, 12, 4, 8, 0},
+   {6, 30, 2, 26, 22, 14, 18, 10},
 };
 
 static inline uint32_t get_umc_v6_7_reg_offset(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h 
b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h
index 81b8f1844091..57f2557e7aca 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h
@@ -36,9 +36,9 @@
 #define UMC_V6_7_INST_DIST 0x4
 
 /* number of umc channel instance with memory map register access */
-#define UMC_V6_7_CHANNEL_INSTANCE_NUM  4
+#define UMC_V6_7_UMC_INSTANCE_NUM  4
 /* number of umc instance with memory map register access */
-#define UMC_V6_7_UMC_INSTANCE_NUM  8
+#define UMC_V6_7_CHANNEL_INSTANCE_NUM  8
 /* total channel instances in one umc block */
 #define UMC_V6_7_TOTAL_CHANNEL_NUM (UMC_V6_7_CHANNEL_INSTANCE_NUM * 
UMC_V6_7_UMC_INSTANCE_NUM)
 /* UMC regiser per channel offset */
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: Conditionally reset SDMA RAS error counts

2021-06-29 Thread Mukul Joshi
Reset SDMA RAS error counts during init only if persistent
EDC harvesting is not supported.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index f6881d99609b..8931000dcd41 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1896,8 +1896,11 @@ static int sdma_v4_0_late_init(void *handle)
 
sdma_v4_0_setup_ulv(adev);
 
-   if (adev->sdma.funcs && adev->sdma.funcs->reset_ras_error_count)
-   adev->sdma.funcs->reset_ras_error_count(adev);
+   if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
+   if (adev->sdma.funcs &&
+   adev->sdma.funcs->reset_ras_error_count)
+   adev->sdma.funcs->reset_ras_error_count(adev);
+   }
 
if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init)
return adev->sdma.funcs->ras_late_init(adev, _info);
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: Correctly clear GCEA error status

2021-05-25 Thread Mukul Joshi
While clearing GCEA error status, do not clear the bits
set by RAS TA.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
index 87ec96a18a5d..c0352dcc89be 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
@@ -1676,13 +1676,14 @@ static void gfx_v9_4_2_reset_ea_err_status(struct 
amdgpu_device *adev)
uint32_t i, j;
uint32_t value;
 
-   value = REG_SET_FIELD(0, GCEA_ERR_STATUS, CLEAR_ERROR_STATUS, 0x1);
-
mutex_lock(>grbm_idx_mutex);
for (i = 0; i < gfx_v9_4_2_ea_err_status_regs.se_num; i++) {
for (j = 0; j < gfx_v9_4_2_ea_err_status_regs.instance;
 j++) {
gfx_v9_4_2_select_se_sh(adev, i, 0, j);
+   value = RREG32(SOC15_REG_ENTRY_OFFSET(
+   gfx_v9_4_2_ea_err_status_regs));
+   value = REG_SET_FIELD(value, GCEA_ERR_STATUS, 
CLEAR_ERROR_STATUS, 0x1);

WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), value);
}
}
@@ -1734,6 +1735,7 @@ static void gfx_v9_4_2_query_ea_err_status(struct 
amdgpu_device *adev)
gfx_v9_4_2_select_se_sh(adev, i, 0, j);
reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(
gfx_v9_4_2_ea_err_status_regs));
+
if (REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, 
SDP_RDRSP_STATUS) ||
REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, 
SDP_WRRSP_STATUS) ||
REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, 
SDP_RDRSP_DATAPARITY_ERROR)) {
@@ -1741,7 +1743,9 @@ static void gfx_v9_4_2_query_ea_err_status(struct 
amdgpu_device *adev)
j, reg_value);
}
/* clear after read */
-   
WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), 0x10);
+   reg_value = REG_SET_FIELD(reg_value, GCEA_ERR_STATUS,
+ CLEAR_ERROR_STATUS, 0x1);
+   
WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), reg_value);
}
}
 
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: Query correct register for DF hashing on Aldebaran

2021-05-18 Thread Mukul Joshi
For Aldebaran, driver needs to query DramMegaBaseAddress to
check if DF hashing is enabled.

Signed-off-by: Mukul Joshi 
Acked-by: Alex Deucher 
Reviewed-by: Harish Kasiviswanathan 
---
 drivers/gpu/drm/amd/amdgpu/df_v3_6.c| 9 +
 drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h | 3 +++
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c 
b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
index 36ba229576d8..14514a145c17 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
@@ -277,13 +277,14 @@ static u32 df_v3_6_get_fb_channel_number(struct 
amdgpu_device *adev)
 {
u32 tmp;
 
-   tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DramBaseAddress0);
-   if (adev->asic_type == CHIP_ALDEBARAN)
+   if (adev->asic_type == CHIP_ALDEBARAN) {
+   tmp = RREG32_SOC15(DF, 0, mmDF_GCM_AON0_DramMegaBaseAddress0);
tmp &=
ALDEBARAN_DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
-   else
+   } else {
+   tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DramBaseAddress0);
tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
-
+   }
tmp >>= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
 
return tmp;
diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h 
b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h
index bb2c9c7a18df..bd37aa6b6560 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h
@@ -33,6 +33,9 @@
 #define mmDF_CS_UMC_AON0_DramBaseAddress0  
0x0044
 #define mmDF_CS_UMC_AON0_DramBaseAddress0_BASE_IDX 
0
 
+#define mmDF_GCM_AON0_DramMegaBaseAddress0 
0x0064
+#define mmDF_GCM_AON0_DramMegaBaseAddress0_BASE_IDX
0
+
 #define smnPerfMonCtlLo0   0x01d440UL
 #define smnPerfMonCtlHi0   0x01d444UL
 #define smnPerfMonCtlLo1   0x01d450UL
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: Register bad page handler for Aldebaran

2021-05-11 Thread Mukul Joshi
On Aldebaran, GPU driver will handle bad page retirement
even though UMC is host managed. As a result, register a
bad page retirement handler on the mce notifier chain to
retire bad pages on Aldebaran.

Signed-off-by: Mukul Joshi 
Reviewed-by: John Clements 
Acked-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 154 
 1 file changed, 154 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index b1c57a5b6e89..02263f509b36 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -34,7 +34,9 @@
 #include "amdgpu_xgmi.h"
 #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
 #include "atom.h"
+#include 
 
+static bool notifier_registered;
 static const char *RAS_FS_NAME = "ras";
 
 const char *ras_error_string[] = {
@@ -73,6 +75,11 @@ const char *ras_block_string[] = {
 /* typical ECC bad page rate(1 bad page per 100MB VRAM) */
 #define RAS_BAD_PAGE_RATE  (100 * 1024 * 1024ULL)
 
+#define GET_MCA_IPID_GPUID(m)  (((m) >> 44) & 0xF)
+#define GET_UMC_INST_NIBBLE(m) (((m) >> 20) & 0xF)
+#define GET_CHAN_INDEX_NIBBLE(m)   (((m) >> 12) & 0xF)
+#define GPU_ID_OFFSET  8
+
 enum amdgpu_ras_retire_page_reservation {
AMDGPU_RAS_RETIRE_PAGE_RESERVED,
AMDGPU_RAS_RETIRE_PAGE_PENDING,
@@ -85,6 +92,7 @@ static bool amdgpu_ras_check_bad_page_unlock(struct 
amdgpu_ras *con,
uint64_t addr);
 static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
uint64_t addr);
+static void amdgpu_register_bad_pages_mca_notifier(void);
 
 void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready)
 {
@@ -1978,6 +1986,10 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
goto free;
}
 
+   if ((adev->asic_type == CHIP_ALDEBARAN) &&
+   (adev->gmc.xgmi.connected_to_cpu))
+   amdgpu_register_bad_pages_mca_notifier();
+
return 0;
 
 free:
@@ -2427,3 +2439,145 @@ void amdgpu_release_ras_context(struct amdgpu_device 
*adev)
kfree(con);
}
 }
+
+static struct amdgpu_device *find_adev(uint32_t node_id)
+{
+   struct amdgpu_gpu_instance *gpu_instance;
+   int i;
+   struct amdgpu_device *adev = NULL;
+
+   mutex_lock(_info.mutex);
+
+   for (i = 0; i < mgpu_info.num_gpu; i++) {
+   gpu_instance = &(mgpu_info.gpu_ins[i]);
+   adev = gpu_instance->adev;
+
+   if (adev->gmc.xgmi.connected_to_cpu &&
+   adev->gmc.xgmi.physical_node_id == node_id)
+   break;
+   adev = NULL;
+   }
+
+   mutex_unlock(_info.mutex);
+
+   return adev;
+}
+
+static void find_umc_inst_chan_index(struct mce *m, uint32_t *umc_inst,
+uint32_t *chan_index)
+{
+   uint32_t val1 = 0;
+   uint32_t val2 = 0;
+   uint32_t rem = 0;
+
+   /*
+* Bit 20-23 provides the UMC instance nibble.
+* Bit 12-15 provides the channel index nibble.
+*/
+   val1 = GET_UMC_INST_NIBBLE(m->ipid);
+   val2 = GET_CHAN_INDEX_NIBBLE(m->ipid);
+
+   *umc_inst = val1/2;
+   rem = val1%2;
+
+   *chan_index = (4*rem) + val2;
+}
+
+static int amdgpu_bad_page_notifier(struct notifier_block *nb,
+   unsigned long val, void *data)
+{
+   struct mce *m = (struct mce *)data;
+   struct amdgpu_device *adev = NULL;
+   uint32_t gpu_id = 0;
+   uint32_t umc_inst = 0;
+   uint32_t chan_index = 0;
+   struct ras_err_data err_data = {0, 0, 0, NULL};
+   struct eeprom_table_record err_rec;
+   uint64_t retired_page;
+
+   /*
+* If the error was generated in UMC_V2, which belongs to GPU UMCs,
+* and error occurred in DramECC (Extended error code = 0) then only
+* process the error, else bail out.
+*/
+   if (!m || !(is_smca_umc_v2(m->bank) && (XEC(m->status, 0x1f) == 0x0)))
+   return NOTIFY_DONE;
+
+   gpu_id = GET_MCA_IPID_GPUID(m->ipid);
+
+   /*
+* GPU Id is offset by GPU_ID_OFFSET in MCA_IPID_UMC register.
+*/
+   gpu_id -= GPU_ID_OFFSET;
+
+   adev = find_adev(gpu_id);
+   if (!adev) {
+   dev_warn(adev->dev, "%s: Unable to find adev for gpu_id: %d\n",
+__func__, gpu_id);
+   return NOTIFY_DONE;
+   }
+
+   /*
+* If it is correctable error, then print a message and return.
+*/
+   if (mce_is_correctable(m)) {
+   dev_info(adev->dev, "%s: UMC Correctable error detected.",
+   __func_

[PATCH] drm/amdgpu: Enable TCP channel hashing for Aldebaran

2021-05-06 Thread Mukul Joshi
Enable TCP channel hashing to match DF hash settings for Aldebaran.

Signed-off-by: Mukul Joshi 
Signed-off-by: Oak Zeng 
Reviewed-by: Joseph Greathouse 
---
 drivers/gpu/drm/amd/amdgpu/df_v3_6.c| 17 +++--
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   |  3 ++-
 .../amd/include/asic_reg/df/df_3_6_sh_mask.h|  1 +
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c 
b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
index 0d8459d63bac..36ba229576d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
@@ -219,11 +219,11 @@ static void df_v3_6_query_hashes(struct amdgpu_device 
*adev)
adev->df.hash_status.hash_2m = false;
adev->df.hash_status.hash_1g = false;
 
-   if (adev->asic_type != CHIP_ARCTURUS)
-   return;
-
-   /* encoding for hash-enabled on Arcturus */
-   if (adev->df.funcs->get_fb_channel_number(adev) == 0xe) {
+   /* encoding for hash-enabled on Arcturus and Aldebaran */
+   if ((adev->asic_type == CHIP_ARCTURUS &&
+adev->df.funcs->get_fb_channel_number(adev) == 0xe) ||
+(adev->asic_type == CHIP_ALDEBARAN &&
+ adev->df.funcs->get_fb_channel_number(adev) == 0x1e)) {
tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DfGlobalCtrl);
adev->df.hash_status.hash_64k = REG_GET_FIELD(tmp,
DF_CS_UMC_AON0_DfGlobalCtrl,
@@ -278,7 +278,12 @@ static u32 df_v3_6_get_fb_channel_number(struct 
amdgpu_device *adev)
u32 tmp;
 
tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DramBaseAddress0);
-   tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
+   if (adev->asic_type == CHIP_ALDEBARAN)
+   tmp &=
+   ALDEBARAN_DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
+   else
+   tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
+
tmp >>= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
 
return tmp;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 16a3b279a9ef..22608c45f07c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3937,7 +3937,8 @@ static void gfx_v9_0_init_tcp_config(struct amdgpu_device 
*adev)
 {
u32 tmp;
 
-   if (adev->asic_type != CHIP_ARCTURUS)
+   if (adev->asic_type != CHIP_ARCTURUS &&
+   adev->asic_type != CHIP_ALDEBARAN)
return;
 
tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h 
b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h
index 7afa87c7ff54..f804e13b002e 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h
@@ -50,6 +50,7 @@
 #define DF_CS_UMC_AON0_DramBaseAddress0__AddrRngVal_MASK   
0x0001L
 #define DF_CS_UMC_AON0_DramBaseAddress0__LgcyMmioHoleEn_MASK   
0x0002L
 #define DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK 
0x003CL
+#define ALDEBARAN_DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK   
0x007CL
 #define DF_CS_UMC_AON0_DramBaseAddress0__IntLvAddrSel_MASK 
0x0E00L
 #define DF_CS_UMC_AON0_DramBaseAddress0__DramBaseAddr_MASK 
0xF000L
 
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2] drm/amdgpu: Enable SDMA utilization for Arcturus

2020-09-11 Thread Mukul Joshi
SDMA utilization calculations are enabled/disabled by
writing to SDMAx_PUB_DUMMY_REG2 register. Currently,
enable this only for Arcturus.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 856c50386c86..edea8743f26e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1063,6 +1063,15 @@ static void sdma_v4_0_ctx_switch_enable(struct 
amdgpu_device *adev, bool enable)
WREG32_SDMA(i, mmSDMA0_PHASE2_QUANTUM, phase_quantum);
}
WREG32_SDMA(i, mmSDMA0_CNTL, f32_cntl);
+
+   /*
+* Enable SDMA utilization. Its only supported on
+* Arcturus for the moment and firmware version 14
+* and above.
+*/
+   if (adev->asic_type == CHIP_ARCTURUS &&
+   adev->sdma.instance[i].fw_version >= 14)
+   WREG32_SDMA(i, mmSDMA0_PUB_DUMMY_REG2, enable);
}
 
 }
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: Enable SDMA utilization for Arcturus

2020-09-11 Thread Mukul Joshi
SDMA utilization calculations are enabled/disabled by
writing to SDMAx_PUB_DUMMY_REG2 register. Currently,
enable this only for Arcturus.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 856c50386c86..c764c27ba86d 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1063,6 +1063,16 @@ static void sdma_v4_0_ctx_switch_enable(struct 
amdgpu_device *adev, bool enable)
WREG32_SDMA(i, mmSDMA0_PHASE2_QUANTUM, phase_quantum);
}
WREG32_SDMA(i, mmSDMA0_CNTL, f32_cntl);
+
+   /*
+* Enable SDMA utilization. Its only supported on
+* Arcturus for the moment and firmware version 14
+* and above.
+*/
+   if ((adev->asic_type == CHIP_ARCTURUS) &&
+   (adev->sdma.instance[i].fw_version > 13)) {
+   WREG32_SDMA(i, mmSDMA0_PUB_DUMMY_REG2, enable);
+   }
}
 
 }
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdkfd: Move process doorbell allocation into kfd device

2020-09-01 Thread Mukul Joshi
Move doorbell allocation for a process into kfd device and
allocate doorbell space in each PDD during process creation.
Currently, KFD manages its own doorbell space but for some
devices, amdgpu would allocate the complete doorbell
space instead of leaving a chunk of doorbell space for KFD to
manage. In a system with mix of such devices, KFD would need
to request process doorbell space based on the type of device,
either from amdgpu or from its own doorbell space.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 30 +--
 drivers/gpu/drm/amd/amdkfd/kfd_device.c   |  3 ++
 .../drm/amd/amdkfd/kfd_device_queue_manager.c |  3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 37 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 17 ++---
 drivers/gpu/drm/amd/amdkfd/kfd_process.c  | 21 ++-
 6 files changed, 64 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index b7b16adb0615..b23caa78328b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1290,18 +1290,6 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file 
*filep,
return -EINVAL;
}
 
-   if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
-   if (args->size != kfd_doorbell_process_slice(dev))
-   return -EINVAL;
-   offset = kfd_get_process_doorbells(dev, p);
-   } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
-   if (args->size != PAGE_SIZE)
-   return -EINVAL;
-   offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
-   if (!offset)
-   return -ENOMEM;
-   }
-
mutex_lock(>mutex);
 
pdd = kfd_bind_process_to_device(dev, p);
@@ -1310,6 +1298,24 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file 
*filep,
goto err_unlock;
}
 
+   if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
+   if (args->size != kfd_doorbell_process_slice(dev)) {
+   err = -EINVAL;
+   goto err_unlock;
+   }
+   offset = kfd_get_process_doorbells(dev, pdd);
+   } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
+   if (args->size != PAGE_SIZE) {
+   err = -EINVAL;
+   goto err_unlock;
+   }
+   offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
+   if (!offset) {
+   err = -ENOMEM;
+   goto err_unlock;
+   }
+   }
+
err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
dev->kgd, args->va_addr, args->size,
pdd->vm, (struct kgd_mem **) , ,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 0e71a0543f98..a857282f3d09 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -583,6 +583,8 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
 
atomic_set(>sram_ecc_flag, 0);
 
+   ida_init(>doorbell_ida);
+
return kfd;
 }
 
@@ -798,6 +800,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
kfd_interrupt_exit(kfd);
kfd_topology_remove_device(kfd);
kfd_doorbell_fini(kfd);
+   ida_destroy(>doorbell_ida);
kfd_gtt_sa_fini(kfd);
amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
if (kfd->gws)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 560adc57a050..b9d1359c6fe0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -191,9 +191,8 @@ static int allocate_doorbell(struct qcm_process_device 
*qpd, struct queue *q)
}
 
q->properties.doorbell_off =
-   kfd_get_doorbell_dw_offset_in_bar(dev, q->process,
+   kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd),
  q->doorbell_id);
-
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index 8e0c00b9555e..5946bfb6b75c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -31,9 +31,6 @@
  * kernel queues using the first doorbell page reserved for the kernel.
  */
 
-static DEFINE_IDA(doorbell_ida);
-static unsigned int max_doorbell_slices;
-
 /*
  * Each device exposes a doorbell aperture, a PCI MMIO aperture that
  * receives 32-bit writes that are passed to queues as wptr values.
@@ -84,9 +81,9 @@ int kfd_do

[PATCH] include/uapi/linux: Fix indentation in kfd_smi_event enum

2020-08-28 Thread Mukul Joshi
Replace spaces with Tabs to fix indentation in kfd_smi_event
enum.

Signed-off-by: Mukul Joshi 
---
 include/uapi/linux/kfd_ioctl.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 8b7368bfbd84..695b606da4b1 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -450,9 +450,9 @@ struct kfd_ioctl_import_dmabuf_args {
  * KFD SMI(System Management Interface) events
  */
 enum kfd_smi_event {
-KFD_SMI_EVENT_NONE = 0, /* not used */
-KFD_SMI_EVENT_VMFAULT = 1, /* event start counting at 1 */
-KFD_SMI_EVENT_THERMAL_THROTTLE = 2,
+   KFD_SMI_EVENT_NONE = 0, /* not used */
+   KFD_SMI_EVENT_VMFAULT = 1, /* event start counting at 1 */
+   KFD_SMI_EVENT_THERMAL_THROTTLE = 2,
KFD_SMI_EVENT_GPU_PRE_RESET = 3,
KFD_SMI_EVENT_GPU_POST_RESET = 4,
 };
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v3] drm/amdkfd: Add GPU reset SMI event

2020-08-28 Thread Mukul Joshi
Add support for reporting GPU reset events through SMI. KFD
would report both pre and post GPU reset events.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c |  5 +++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h   |  2 ++
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 35 +++--
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h |  1 +
 include/uapi/linux/kfd_ioctl.h  |  2 ++
 5 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index e1cd6599529f..f5e1b3aaa10c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -812,6 +812,8 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd)
if (!kfd->init_complete)
return 0;
 
+   kfd_smi_event_update_gpu_reset(kfd, false);
+
kfd->dqm->ops.pre_reset(kfd->dqm);
 
kgd2kfd_suspend(kfd, false);
@@ -833,6 +835,7 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd)
if (!kfd->init_complete)
return 0;
 
+
ret = kfd_resume(kfd);
if (ret)
return ret;
@@ -840,6 +843,8 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd)
 
atomic_set(>sram_ecc_flag, 0);
 
+   kfd_smi_event_update_gpu_reset(kfd, true);
+
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 18bc711f97ae..7e8767934748 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -312,6 +312,8 @@ struct kfd_dev {
/* Clients watching SMI events */
struct list_head smi_clients;
spinlock_t smi_lock;
+
+   uint32_t reset_seq_num;
 };
 
 enum kfd_mempool {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index 4d4b6e3ab697..001cacb09467 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -174,6 +174,37 @@ static void add_event_to_kfifo(struct kfd_dev *dev, 
unsigned int smi_event,
rcu_read_unlock();
 }
 
+void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
+{
+   /*
+* GpuReset msg = Reset seq number (incremented for
+* every reset message sent before GPU reset).
+* 1 byte event + 1 byte space + 8 bytes seq num +
+* 1 byte \n + 1 byte \0 = 12
+*/
+   char fifo_in[12];
+   int len;
+   unsigned int event;
+
+   if (list_empty(>smi_clients)) {
+   return;
+   }
+
+   memset(fifo_in, 0x0, sizeof(fifo_in));
+
+   if (post_reset) {
+   event = KFD_SMI_EVENT_GPU_POST_RESET;
+   } else {
+   event = KFD_SMI_EVENT_GPU_PRE_RESET;
+   ++(dev->reset_seq_num);
+   }
+
+   len = snprintf(fifo_in, sizeof(fifo_in), "%x %x\n", event,
+   dev->reset_seq_num);
+
+   add_event_to_kfifo(dev, event, fifo_in, len);
+}
+
 void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
 uint32_t throttle_bitmask)
 {
@@ -191,7 +222,7 @@ void kfd_smi_event_update_thermal_throttling(struct kfd_dev 
*dev,
if (list_empty(>smi_clients))
return;
 
-   len = snprintf(fifo_in, 29, "%x %x:%llx\n",
+   len = snprintf(fifo_in, sizeof(fifo_in), "%x %x:%llx\n",
   KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask,
   atomic64_read(>smu.throttle_int_counter));
 
@@ -218,7 +249,7 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, 
uint16_t pasid)
if (!task_info.pid)
return;
 
-   len = snprintf(fifo_in, 29, "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT,
+   len = snprintf(fifo_in, sizeof(fifo_in), "%x %x:%s\n", 
KFD_SMI_EVENT_VMFAULT,
task_info.pid, task_info.task_name);
 
add_event_to_kfifo(dev, KFD_SMI_EVENT_VMFAULT, fifo_in, len);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
index 15537b2cccb5..b9b0438202e2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
@@ -27,5 +27,6 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd);
 void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid);
 void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
 uint32_t throttle_bitmask);
+void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset);
 
 #endif
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index cb1f963a84e0..8b7368bfbd84 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -453,6 +453,8 @@ enum kfd_smi_event {
 KFD_SM

[PATCH v2] drm/amdkfd: Add GPU reset SMI event

2020-08-26 Thread Mukul Joshi
Add support for reporting GPU reset events through SMI. KFD
would report both pre and post GPU reset events.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c |  4 +++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h   |  2 ++
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 30 +
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h |  1 +
 include/uapi/linux/kfd_ioctl.h  |  2 ++
 5 files changed, 39 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index e1cd6599529f..aad1ecfa1239 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -812,6 +812,8 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd)
if (!kfd->init_complete)
return 0;
 
+   kfd_smi_event_update_gpu_reset(kfd, false);
+
kfd->dqm->ops.pre_reset(kfd->dqm);
 
kgd2kfd_suspend(kfd, false);
@@ -833,6 +835,8 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd)
if (!kfd->init_complete)
return 0;
 
+   kfd_smi_event_update_gpu_reset(kfd, true);
+
ret = kfd_resume(kfd);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 18bc711f97ae..b1a2979e086f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -312,6 +312,8 @@ struct kfd_dev {
/* Clients watching SMI events */
struct list_head smi_clients;
spinlock_t smi_lock;
+
+   uint64_t reset_seq_num;
 };
 
 enum kfd_mempool {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index 4d4b6e3ab697..4f0590bcb1a3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -174,6 +174,36 @@ static void add_event_to_kfifo(struct kfd_dev *dev, 
unsigned int smi_event,
rcu_read_unlock();
 }
 
+void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
+{
+   /*
+* GpuReset msg = Reset seq number (incremented for
+* every reset message sent before GPU reset).
+* 1 byte event + 1 byte space + 16 bytes seq num +
+* 1 byte \n + 1 byte \0 = 20
+*/
+   char fifo_in[20];
+   int len;
+   unsigned int event;
+
+   if (list_empty(>smi_clients)) {
+   return;
+   }
+
+   memset(fifo_in, 0x0, sizeof(fifo_in));
+
+   if (post_reset) {
+   event = KFD_SMI_EVENT_GPU_POST_RESET;
+   } else {
+   event = KFD_SMI_EVENT_GPU_PRE_RESET;
+   ++(dev->reset_seq_num);
+   }
+
+   len = snprintf(fifo_in, 20, "%x %llx\n", event, dev->reset_seq_num);
+
+   add_event_to_kfifo(dev, event, fifo_in, len);
+}
+
 void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
 uint32_t throttle_bitmask)
 {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
index 15537b2cccb5..b9b0438202e2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
@@ -27,5 +27,6 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd);
 void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid);
 void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
 uint32_t throttle_bitmask);
+void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset);
 
 #endif
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index cb1f963a84e0..8b7368bfbd84 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -453,6 +453,8 @@ enum kfd_smi_event {
 KFD_SMI_EVENT_NONE = 0, /* not used */
 KFD_SMI_EVENT_VMFAULT = 1, /* event start counting at 1 */
 KFD_SMI_EVENT_THERMAL_THROTTLE = 2,
+   KFD_SMI_EVENT_GPU_PRE_RESET = 3,
+   KFD_SMI_EVENT_GPU_POST_RESET = 4,
 };
 
 #define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1))
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdkfd: Add GPU reset SMI event

2020-08-25 Thread Mukul Joshi
Add support for reporting GPU reset events through SMI. KFD
would report both pre and post GPU reset events.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c |  4 +++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h   |  2 ++
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 30 +
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h |  1 +
 include/uapi/linux/kfd_ioctl.h  |  2 ++
 5 files changed, 39 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index e1cd6599529f..aad1ecfa1239 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -812,6 +812,8 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd)
if (!kfd->init_complete)
return 0;
 
+   kfd_smi_event_update_gpu_reset(kfd, false);
+
kfd->dqm->ops.pre_reset(kfd->dqm);
 
kgd2kfd_suspend(kfd, false);
@@ -833,6 +835,8 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd)
if (!kfd->init_complete)
return 0;
 
+   kfd_smi_event_update_gpu_reset(kfd, true);
+
ret = kfd_resume(kfd);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 18bc711f97ae..b1a2979e086f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -312,6 +312,8 @@ struct kfd_dev {
/* Clients watching SMI events */
struct list_head smi_clients;
spinlock_t smi_lock;
+
+   uint64_t reset_seq_num;
 };
 
 enum kfd_mempool {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index 4d4b6e3ab697..448abfdde230 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -174,6 +174,36 @@ static void add_event_to_kfifo(struct kfd_dev *dev, 
unsigned int smi_event,
rcu_read_unlock();
 }
 
+void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
+{
+   /*
+* GpuReset msg = Reset seq number (incremented for
+* every reset message sent before GPU reset).
+* 1 byte event + 1 byte space + 16 bytes seq num +
+* 1 byte \n + 1 byte \0 = 20
+*/
+   char fifo_in[20];
+   int len;
+   unsigned int event;
+
+   if (list_empty(>smi_clients)) {
+   return;
+   }
+
+   memset(fifo_in, 0x0, sizeof(fifo_in));
+
+   if (post_reset) {
+   event = KFD_SMI_EVENT_GPU_POST_RESET;
+   } else {
+   event = KFD_SMI_EVENT_GPU_PRE_RESET;
+   ++(dev->reset_seq_num);
+   }
+
+   len = snprintf(fifo_in, 4, "%x %llx\n", event, dev->reset_seq_num);
+
+   add_event_to_kfifo(dev, event, fifo_in, len);
+}
+
 void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
 uint32_t throttle_bitmask)
 {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
index 15537b2cccb5..b9b0438202e2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
@@ -27,5 +27,6 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd);
 void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid);
 void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
 uint32_t throttle_bitmask);
+void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset);
 
 #endif
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index cb1f963a84e0..8b7368bfbd84 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -453,6 +453,8 @@ enum kfd_smi_event {
 KFD_SMI_EVENT_NONE = 0, /* not used */
 KFD_SMI_EVENT_VMFAULT = 1, /* event start counting at 1 */
 KFD_SMI_EVENT_THERMAL_THROTTLE = 2,
+   KFD_SMI_EVENT_GPU_PRE_RESET = 3,
+   KFD_SMI_EVENT_GPU_POST_RESET = 4,
 };
 
 #define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1))
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v3] drm/amdkfd: sparse: Fix warning in reading SDMA counters

2020-08-18 Thread Mukul Joshi
Add __user annotation to fix related sparse warning while reading
SDMA counters from userland.
Also, rework the read SDMA counters function by removing redundant
checks.

Reported-by: kernel test robot 
Signed-off-by: Mukul Joshi 
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 28 ++-
 .../drm/amd/amdkfd/kfd_device_queue_manager.h |  8 +-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c  |  6 ++--
 3 files changed, 12 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index e0e60b0d0669..560adc57a050 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -153,30 +153,6 @@ static void decrement_queue_count(struct 
device_queue_manager *dqm,
dqm->active_cp_queue_count--;
 }
 
-int read_sdma_queue_counter(uint64_t q_rptr, uint64_t *val)
-{
-   int ret;
-   uint64_t tmp = 0;
-
-   if (!val)
-   return -EINVAL;
-   /*
-* SDMA activity counter is stored at queue's RPTR + 0x8 location.
-*/
-   if (!access_ok((const void __user *)(q_rptr +
-   sizeof(uint64_t)), sizeof(uint64_t))) {
-   pr_err("Can't access sdma queue activity counter\n");
-   return -EFAULT;
-   }
-
-   ret = get_user(tmp, (uint64_t *)(q_rptr + sizeof(uint64_t)));
-   if (!ret) {
-   *val = tmp;
-   }
-
-   return ret;
-}
-
 static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
 {
struct kfd_dev *dev = qpd->dqm->dev;
@@ -552,7 +528,7 @@ static int destroy_queue_nocpsch(struct 
device_queue_manager *dqm,
/* Get the SDMA queue stats */
if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
(q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
-   retval = 
read_sdma_queue_counter((uint64_t)q->properties.read_ptr,
+   retval = read_sdma_queue_counter((uint64_t __user 
*)q->properties.read_ptr,
_val);
if (retval)
pr_err("Failed to read SDMA queue counter for queue: 
%d\n",
@@ -1473,7 +1449,7 @@ static int destroy_queue_cpsch(struct 
device_queue_manager *dqm,
/* Get the SDMA queue stats */
if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
(q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
-   retval = 
read_sdma_queue_counter((uint64_t)q->properties.read_ptr,
+   retval = read_sdma_queue_counter((uint64_t __user 
*)q->properties.read_ptr,
_val);
if (retval)
pr_err("Failed to read SDMA queue counter for queue: 
%d\n",
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 49d8e324c636..16262e5d93f5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -251,5 +251,11 @@ static inline void dqm_unlock(struct device_queue_manager 
*dqm)
mutex_unlock(>lock_hidden);
 }
 
-int read_sdma_queue_counter(uint64_t q_rptr, uint64_t *val);
+static inline int read_sdma_queue_counter(uint64_t __user *q_rptr, uint64_t 
*val)
+{
+/*
+ * SDMA activity counter is stored at queue's RPTR + 0x8 location.
+ */
+   return get_user(*val, q_rptr + 1);
+}
 #endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 4480f905814c..ff7686250ae0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -87,7 +87,7 @@ struct kfd_sdma_activity_handler_workarea {
 };
 
 struct temp_sdma_queue_list {
-   uint64_t rptr;
+   uint64_t __user *rptr;
uint64_t sdma_val;
unsigned int queue_id;
struct list_head list;
@@ -159,7 +159,7 @@ static void kfd_sdma_activity_worker(struct work_struct 
*work)
}
 
INIT_LIST_HEAD(_q->list);
-   sdma_q->rptr = (uint64_t)q->properties.read_ptr;
+   sdma_q->rptr = (uint64_t __user *)q->properties.read_ptr;
sdma_q->queue_id = q->properties.queue_id;
list_add_tail(_q->list, _q_list.list);
}
@@ -218,7 +218,7 @@ static void kfd_sdma_activity_worker(struct work_struct 
*work)
continue;
 
list_for_each_entry_safe(sdma_q, next, _q_list.list, list) 
{
-   if (((uint64_t)q->properties.read_ptr == sdma_q->rptr) 
&&
+   if (((uint64_t __user *)q->proper

[PATCH v2] drm/amdkfd: sparse: Fix warning in reading SDMA counters

2020-08-17 Thread Mukul Joshi
Add __user annotation to fix related sparse warning while reading
SDMA counters from userland.

Reported-by: kernel test robot 
Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 9 +++--
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index e0e60b0d0669..e2894967c372 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -157,19 +157,16 @@ int read_sdma_queue_counter(uint64_t q_rptr, uint64_t 
*val)
 {
int ret;
uint64_t tmp = 0;
+   uint64_t __user *sdma_usage_cntr;
 
if (!val)
return -EINVAL;
/*
 * SDMA activity counter is stored at queue's RPTR + 0x8 location.
 */
-   if (!access_ok((const void __user *)(q_rptr +
-   sizeof(uint64_t)), sizeof(uint64_t))) {
-   pr_err("Can't access sdma queue activity counter\n");
-   return -EFAULT;
-   }
+   sdma_usage_cntr = (uint64_t __user *)q_rptr + 1;
 
-   ret = get_user(tmp, (uint64_t *)(q_rptr + sizeof(uint64_t)));
+   ret = get_user(tmp, sdma_usage_cntr);
if (!ret) {
*val = tmp;
}
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdkfd: sparse: Fix warning in reading SDMA counters

2020-08-17 Thread Mukul Joshi
Add __user annotation to fix related sparse warning while reading
SDMA counters from userland.

Reported-by: kernel test robot 
Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index e0e60b0d0669..a6a4bbf99d9b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -157,19 +157,21 @@ int read_sdma_queue_counter(uint64_t q_rptr, uint64_t 
*val)
 {
int ret;
uint64_t tmp = 0;
+   uint64_t __user *sdma_rptr;
 
if (!val)
return -EINVAL;
/*
 * SDMA activity counter is stored at queue's RPTR + 0x8 location.
 */
-   if (!access_ok((const void __user *)(q_rptr +
-   sizeof(uint64_t)), sizeof(uint64_t))) {
+   sdma_rptr = (uint64_t *)(q_rptr + sizeof(uint64_t));
+
+   if (!access_ok((const void __user *)sdma_rptr, sizeof(uint64_t))) {
pr_err("Can't access sdma queue activity counter\n");
return -EFAULT;
}
 
-   ret = get_user(tmp, (uint64_t *)(q_rptr + sizeof(uint64_t)));
+   ret = get_user(tmp, sdma_rptr);
if (!ret) {
*val = tmp;
}
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdkfd: Initialize SDMA activity counter to 0

2020-08-17 Thread Mukul Joshi
To prevent reporting erroneous SDMA usage, initialize SDMA
activity counter to 0 before using.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 013c2b018edc..4480f905814c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -270,6 +270,7 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct 
attribute *attr,
kfd_sdma_activity_worker);
 
sdma_activity_work_handler.pdd = pdd;
+   sdma_activity_work_handler.sdma_activity_counter = 0;
 
schedule_work(_activity_work_handler.sdma_activity_work);
 
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdkfd: Add GPU reset SMI event

2020-07-27 Thread Mukul Joshi
Add support for reporting GPU reset events through SMI.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c |  2 ++
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 18 ++
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h |  1 +
 include/uapi/linux/kfd_ioctl.h  |  1 +
 4 files changed, 22 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index d5e790f046b4..d788aa24ef3f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -811,6 +811,8 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd)
if (!kfd->init_complete)
return 0;
 
+   kfd_smi_event_update_gpu_reset(kfd);
+
kfd->dqm->ops.pre_reset(kfd->dqm);
 
kgd2kfd_suspend(kfd, false);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index 4d4b6e3ab697..4de57923d9f5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -174,6 +174,24 @@ static void add_event_to_kfifo(struct kfd_dev *dev, 
unsigned int smi_event,
rcu_read_unlock();
 }
 
+void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev)
+{
+   /*
+* GpuReset msg = empty
+* 1 byte event + 1 byte space + 1 byte \n + 1 byte \0 = 4
+*/
+   char fifo_in[4];
+   int len;
+
+   if (list_empty(>smi_clients)) {
+   return;
+   }
+
+   len = snprintf(fifo_in, 4, "%x \n", KFD_SMI_EVENT_GPU_RESET);
+
+   add_event_to_kfifo(dev, KFD_SMI_EVENT_GPU_RESET, fifo_in, len);
+}
+
 void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
 uint32_t throttle_bitmask)
 {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
index 15537b2cccb5..ffdb822d120b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
@@ -27,5 +27,6 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd);
 void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid);
 void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
 uint32_t throttle_bitmask);
+void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev);
 
 #endif
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index cb1f963a84e0..128b6235b540 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -453,6 +453,7 @@ enum kfd_smi_event {
 KFD_SMI_EVENT_NONE = 0, /* not used */
 KFD_SMI_EVENT_VMFAULT = 1, /* event start counting at 1 */
 KFD_SMI_EVENT_THERMAL_THROTTLE = 2,
+   KFD_SMI_EVENT_GPU_RESET = 3,
 };
 
 #define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1))
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdkfd: Replace bitmask with event idx in SMI event msg

2020-07-26 Thread Mukul Joshi
Event bitmask is a 64-bit mask with only 1 bit set. Sending this
event bitmask in KFD SMI event message is both wasteful of memory
and potentially limiting to only 64 events. Instead send event
index in SMI event message.

Signed-off-by: Mukul Joshi 
Suggested-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 24 +++--
 include/uapi/linux/kfd_ioctl.h  | 10 ++---
 2 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index 86c2c3e97944..4d4b6e3ab697 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -149,7 +149,7 @@ static int kfd_smi_ev_release(struct inode *inode, struct 
file *filep)
return 0;
 }
 
-static void add_event_to_kfifo(struct kfd_dev *dev, unsigned long long 
smi_event,
+static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event,
  char *event_msg, int len)
 {
struct kfd_smi_client *client;
@@ -157,14 +157,15 @@ static void add_event_to_kfifo(struct kfd_dev *dev, 
unsigned long long smi_event
rcu_read_lock();
 
list_for_each_entry_rcu(client, >smi_clients, list) {
-   if (!(READ_ONCE(client->events) & smi_event))
+   if (!(READ_ONCE(client->events) &
+   KFD_SMI_EVENT_MASK_FROM_INDEX(smi_event)))
continue;
spin_lock(>lock);
if (kfifo_avail(>fifo) >= len) {
kfifo_in(>fifo, event_msg, len);
wake_up_all(>wait_queue);
} else {
-   pr_debug("smi_event(EventID: %llu): no space left\n",
+   pr_debug("smi_event(EventID: %u): no space left\n",
smi_event);
}
spin_unlock(>lock);
@@ -180,21 +181,21 @@ void kfd_smi_event_update_thermal_throttling(struct 
kfd_dev *dev,
/*
 * ThermalThrottle msg = throttle_bitmask(8):
 *   thermal_interrupt_count(16):
-* 16 bytes event + 1 byte space + 8 byte throttle_bitmask +
+* 1 byte event + 1 byte space + 8 byte throttle_bitmask +
 * 1 byte : + 16 byte thermal_interupt_counter + 1 byte \n +
-* 1 byte \0 = 44
+* 1 byte \0 = 29
 */
-   char fifo_in[44];
+   char fifo_in[29];
int len;
 
if (list_empty(>smi_clients))
return;
 
-   len = snprintf(fifo_in, 44, "%x %x:%llx\n",
+   len = snprintf(fifo_in, 29, "%x %x:%llx\n",
   KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask,
   atomic64_read(>smu.throttle_int_counter));
 
-   add_event_to_kfifo(dev, KFD_SMI_EVENT_THERMAL_THROTTLE, fifo_in, len);
+   add_event_to_kfifo(dev, KFD_SMI_EVENT_THERMAL_THROTTLE, fifo_in, len);
 }
 
 void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
@@ -202,9 +203,10 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, 
uint16_t pasid)
struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd;
struct amdgpu_task_info task_info;
/* VmFault msg = (hex)uint32_pid(8) + :(1) + task name(16) = 25 */
-   /* 16 bytes event + 1 byte space + 25 bytes msg + 1 byte \n = 43
+   /* 1 byte event + 1 byte space + 25 bytes msg + 1 byte \n +
+* 1 byte \0 = 29
 */
-   char fifo_in[43];
+   char fifo_in[29];
int len;
 
if (list_empty(>smi_clients))
@@ -216,7 +218,7 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, 
uint16_t pasid)
if (!task_info.pid)
return;
 
-   len = snprintf(fifo_in, 43, "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT,
+   len = snprintf(fifo_in, 29, "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT,
task_info.pid, task_info.task_name);
 
add_event_to_kfifo(dev, KFD_SMI_EVENT_VMFAULT, fifo_in, len);
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index df6c7a43aadc..796f836ba773 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -449,9 +449,13 @@ struct kfd_ioctl_import_dmabuf_args {
 /*
  * KFD SMI(System Management Interface) events
  */
-/* Event type (defined by bitmask) */
-#define KFD_SMI_EVENT_VMFAULT  0x0001
-#define KFD_SMI_EVENT_THERMAL_THROTTLE 0x0002
+enum kfd_smi_event {
+KFD_SMI_EVENT_NONE = 0, /* not used */
+KFD_SMI_EVENT_VMFAULT = 1, /* event start counting at 1 */
+KFD_SMI_EVENT_THERMAL_THROTTLE = 2,
+};
+
+#define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << (i - 1))
 
 struct kfd_ioctl_smi_events_args {
__u32 gpuid;/* to KFD */
-- 

[PATCH v3] drm/amdkfd: Add thermal throttling SMI event

2020-07-23 Thread Mukul Joshi
Add support for reporting thermal throttling events through SMI.
Also, add a counter to count the number of throttling interrupts
observed and report the count in the SMI event message.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c|  4 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  1 +
 drivers/gpu/drm/amd/amdkfd/kfd_device.c   |  7 ++
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c   | 67 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h   |  2 +
 drivers/gpu/drm/amd/powerplay/amdgpu_smu.c|  1 +
 drivers/gpu/drm/amd/powerplay/arcturus_ppt.c  |  1 +
 .../gpu/drm/amd/powerplay/inc/amdgpu_smu.h|  1 +
 drivers/gpu/drm/amd/powerplay/smu_v11_0.c |  5 ++
 include/uapi/linux/kfd_ioctl.h|  3 +-
 10 files changed, 74 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 1b865fed74ca..19e4658756d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -755,4 +755,8 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void 
*ih_ring_entry)
 void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
 {
 }
+
+void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask)
+{
+}
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 3f2b695cf19e..e8b0258aae24 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -269,5 +269,6 @@ int kgd2kfd_resume_mm(struct mm_struct *mm);
 int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
   struct dma_fence *fence);
 void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd);
+void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t 
throttle_bitmask);
 
 #endif /* AMDGPU_AMDKFD_H_INCLUDED */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 4bfedaab183f..d5e790f046b4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -29,6 +29,7 @@
 #include "cwsr_trap_handler.h"
 #include "kfd_iommu.h"
 #include "amdgpu_amdkfd.h"
+#include "kfd_smi_events.h"
 
 #define MQD_SIZE_ALIGNED 768
 
@@ -1245,6 +1246,12 @@ void kfd_dec_compute_active(struct kfd_dev *kfd)
WARN_ONCE(count < 0, "Compute profile ref. count error");
 }
 
+void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask)
+{
+   if (kfd)
+   kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask);
+}
+
 #if defined(CONFIG_DEBUG_FS)
 
 /* This function will send a package to HIQ to hang the HWS
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index 7b348bf9df21..86c2c3e97944 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -24,6 +24,7 @@
 #include 
 #include 
 #include 
+#include "amdgpu.h"
 #include "amdgpu_vm.h"
 #include "kfd_priv.h"
 #include "kfd_smi_events.h"
@@ -148,6 +149,54 @@ static int kfd_smi_ev_release(struct inode *inode, struct 
file *filep)
return 0;
 }
 
+static void add_event_to_kfifo(struct kfd_dev *dev, unsigned long long 
smi_event,
+ char *event_msg, int len)
+{
+   struct kfd_smi_client *client;
+
+   rcu_read_lock();
+
+   list_for_each_entry_rcu(client, >smi_clients, list) {
+   if (!(READ_ONCE(client->events) & smi_event))
+   continue;
+   spin_lock(>lock);
+   if (kfifo_avail(>fifo) >= len) {
+   kfifo_in(>fifo, event_msg, len);
+   wake_up_all(>wait_queue);
+   } else {
+   pr_debug("smi_event(EventID: %llu): no space left\n",
+   smi_event);
+   }
+   spin_unlock(>lock);
+   }
+
+   rcu_read_unlock();
+}
+
+void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
+uint32_t throttle_bitmask)
+{
+   struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd;
+   /*
+* ThermalThrottle msg = throttle_bitmask(8):
+*   thermal_interrupt_count(16):
+* 16 bytes event + 1 byte space + 8 byte throttle_bitmask +
+* 1 byte : + 16 byte thermal_interupt_counter + 1 byte \n +
+* 1 byte \0 = 44
+*/
+   char fifo_in[44];
+   int len;
+
+   if (list_empty(>smi_clients))
+   return;
+
+   len = snprintf(fifo_in, 44, "%x %x:%llx\n",
+  KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask,
+ 

[PATCH v2] drm/amdkfd: Add thermal throttling SMI event

2020-07-22 Thread Mukul Joshi
Add support for reporting thermal throttling events through SMI.
Also, add a counter to count the number of throttling interrupts
observed and report the count in the SMI event message.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c|  4 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  1 +
 drivers/gpu/drm/amd/amdkfd/kfd_device.c   |  7 ++
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c   | 68 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h   |  2 +
 drivers/gpu/drm/amd/powerplay/amdgpu_smu.c|  1 +
 drivers/gpu/drm/amd/powerplay/arcturus_ppt.c  |  1 +
 .../gpu/drm/amd/powerplay/inc/amdgpu_smu.h|  1 +
 drivers/gpu/drm/amd/powerplay/smu_v11_0.c |  5 ++
 include/uapi/linux/kfd_ioctl.h|  3 +-
 10 files changed, 75 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 1b865fed74ca..19e4658756d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -755,4 +755,8 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void 
*ih_ring_entry)
 void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
 {
 }
+
+void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask)
+{
+}
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 3f2b695cf19e..e8b0258aae24 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -269,5 +269,6 @@ int kgd2kfd_resume_mm(struct mm_struct *mm);
 int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
   struct dma_fence *fence);
 void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd);
+void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t 
throttle_bitmask);
 
 #endif /* AMDGPU_AMDKFD_H_INCLUDED */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 4bfedaab183f..d5e790f046b4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -29,6 +29,7 @@
 #include "cwsr_trap_handler.h"
 #include "kfd_iommu.h"
 #include "amdgpu_amdkfd.h"
+#include "kfd_smi_events.h"
 
 #define MQD_SIZE_ALIGNED 768
 
@@ -1245,6 +1246,12 @@ void kfd_dec_compute_active(struct kfd_dev *kfd)
WARN_ONCE(count < 0, "Compute profile ref. count error");
 }
 
+void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask)
+{
+   if (kfd)
+   kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask);
+}
+
 #if defined(CONFIG_DEBUG_FS)
 
 /* This function will send a package to HIQ to hang the HWS
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index 7b348bf9df21..00c90b47155b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -24,6 +24,7 @@
 #include 
 #include 
 #include 
+#include "amdgpu.h"
 #include "amdgpu_vm.h"
 #include "kfd_priv.h"
 #include "kfd_smi_events.h"
@@ -148,6 +149,55 @@ static int kfd_smi_ev_release(struct inode *inode, struct 
file *filep)
return 0;
 }
 
+static void add_event_to_kfifo(struct kfd_dev *dev, unsigned long long 
smi_event,
+ char *event_msg, int len)
+{
+   struct kfd_smi_client *client;
+
+   rcu_read_lock();
+
+   list_for_each_entry_rcu(client, >smi_clients, list) {
+   if (!(READ_ONCE(client->events) & smi_event))
+   continue;
+   spin_lock(>lock);
+   if (kfifo_avail(>fifo) >= len) {
+   kfifo_in(>fifo, event_msg, len);
+   wake_up_all(>wait_queue);
+   } else {
+   pr_debug("smi_event(EventID: %llu): no space left\n",
+   smi_event);
+   }
+   spin_unlock(>lock);
+   }
+
+   rcu_read_unlock();
+}
+
+void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
+uint32_t throttle_bitmask)
+{
+   struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd;
+   /*
+* ThermalThrottle msg = gpu_id(4):throttle_bitmask(4):
+*   thermal_interrupt_count(8):
+* 16 bytes event + 1 byte space + 4 bytes gpu_id + 1 byte : +
+* 4 byte throttle_bitmask + 1 byte : +
+* 8 byte thermal_interupt_counter + 1 byte \n = 36
+*/
+   char fifo_in[36];
+   int len;
+
+   if (list_empty(>smi_clients))
+   return;
+
+   len = snprintf(fifo_in, 36, "%x %x:%x:%llx\n",
+  KFD_SMI_EVENT_THERMAL_THROTTLE,
+

[PATCH] drm/amdkfd: Add thermal throttling SMI event

2020-07-21 Thread Mukul Joshi
Add support for reporting thermal throttling events through SMI.
Also, add a counter to count the number of throttling interrupts
observed and report the count in the SMI event message.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c|  4 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  1 +
 drivers/gpu/drm/amd/amdkfd/kfd_device.c   |  7 ++
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c   | 70 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h   |  2 +
 drivers/gpu/drm/amd/powerplay/amdgpu_smu.c|  1 +
 drivers/gpu/drm/amd/powerplay/arcturus_ppt.c  |  1 +
 .../gpu/drm/amd/powerplay/inc/amdgpu_smu.h|  1 +
 drivers/gpu/drm/amd/powerplay/smu_v11_0.c |  6 ++
 include/uapi/linux/kfd_ioctl.h|  1 +
 10 files changed, 77 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 1b865fed74ca..19e4658756d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -755,4 +755,8 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void 
*ih_ring_entry)
 void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
 {
 }
+
+void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask)
+{
+}
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 3f2b695cf19e..e8b0258aae24 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -269,5 +269,6 @@ int kgd2kfd_resume_mm(struct mm_struct *mm);
 int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
   struct dma_fence *fence);
 void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd);
+void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t 
throttle_bitmask);
 
 #endif /* AMDGPU_AMDKFD_H_INCLUDED */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 4bfedaab183f..d5e790f046b4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -29,6 +29,7 @@
 #include "cwsr_trap_handler.h"
 #include "kfd_iommu.h"
 #include "amdgpu_amdkfd.h"
+#include "kfd_smi_events.h"
 
 #define MQD_SIZE_ALIGNED 768
 
@@ -1245,6 +1246,12 @@ void kfd_dec_compute_active(struct kfd_dev *kfd)
WARN_ONCE(count < 0, "Compute profile ref. count error");
 }
 
+void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask)
+{
+   if (kfd)
+   kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask);
+}
+
 #if defined(CONFIG_DEBUG_FS)
 
 /* This function will send a package to HIQ to hang the HWS
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index 7b348bf9df21..247538bccba2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -24,6 +24,7 @@
 #include 
 #include 
 #include 
+#include "amdgpu.h"
 #include "amdgpu_vm.h"
 #include "kfd_priv.h"
 #include "kfd_smi_events.h"
@@ -148,6 +149,56 @@ static int kfd_smi_ev_release(struct inode *inode, struct 
file *filep)
return 0;
 }
 
+static int add_event_to_kfifo(struct kfd_dev *dev, long long smi_event,
+ char *event_msg, int len)
+{
+   struct kfd_smi_client *client;
+   int ret = 0;
+
+   rcu_read_lock();
+
+   list_for_each_entry_rcu(client, >smi_clients, list) {
+   if (!(READ_ONCE(client->events) & smi_event))
+   continue;
+   spin_lock(>lock);
+   if (kfifo_avail(>fifo) >= len) {
+   kfifo_in(>fifo, event_msg, len);
+   wake_up_all(>wait_queue);
+   }
+   else
+   ret = -ENOSPC;
+   spin_unlock(>lock);
+   }
+
+   rcu_read_unlock();
+
+   return ret;
+}
+
+void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
+uint32_t throttle_bitmask)
+{
+   struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd;
+   /*
+* ThermalThrottle msg = gpu_id(4):thermal_interrupt_count(4):
+*   throttle_bitmask(4)
+* 16 bytes event + 1 byte space + 4 bytes gpu_id + 1 byte : +
+* 4 byte thermal_interupt_counter + 1 byte : +
+* 4 byte throttle_bitmask + 1 byte \n = 32
+*/
+   char fifo_in[32];
+   int len;
+
+   if (list_empty(>smi_clients))
+   return;
+
+   len = snprintf(fifo_in, 32, "%x %d:%d:%x\n", KFD_SMI_EVENT_THERMAL,
+   dev->id, READ_ONCE(adev->smu.throttle_int_counter), 
throttle_bitmask);
+
+  

[PATCH v2] drm/amdkfd: Fix circular locking dependency warning

2020-06-24 Thread Mukul Joshi
Lockdep is spewing circular locking dependency warning when
reading SDMA usage stats.

  150.887733] ==
[  150.893903] WARNING: possible circular locking dependency detected
[  150.905917] --
[  150.912129] kfdtest/4081 is trying to acquire lock:
[  150.917002] 8f7f3762e118 (>mmap_sem#2){}, at:
__might_fault+0x3e/0x90
[  150.924490]
   but task is already holding lock:
[  150.930320] 8f7f49d229e8 (>lock_hidden){+.+.}, at:
destroy_queue_cpsch+0x29/0x210 [amdgpu]
[  150.939432]
   which lock already depends on the new lock.
[  150.947603]
   the existing dependency chain (in reverse order) is:
[  150.955074]
   -> #3 (>lock_hidden){+.+.}:
[  150.960822]__mutex_lock+0xa1/0x9f0
[  150.964996]evict_process_queues_cpsch+0x22/0x120 [amdgpu]
[  150.971155]kfd_process_evict_queues+0x3b/0xc0 [amdgpu]
[  150.977054]kgd2kfd_quiesce_mm+0x25/0x60 [amdgpu]
[  150.982442]amdgpu_amdkfd_evict_userptr+0x35/0x70 [amdgpu]
[  150.988615]amdgpu_mn_invalidate_hsa+0x41/0x60 [amdgpu]
[  150.994448]__mmu_notifier_invalidate_range_start+0xa4/0x240
[  151.000714]copy_page_range+0xd70/0xd80
[  151.005159]dup_mm+0x3ca/0x550
[  151.008816]copy_process+0x1bdc/0x1c70
[  151.013183]_do_fork+0x76/0x6c0
[  151.016929]__x64_sys_clone+0x8c/0xb0
[  151.021201]do_syscall_64+0x4a/0x1d0
[  151.025404]entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  151.030977]
   -> #2 (>notifier_lock){+.+.}:
[  151.036993]__mutex_lock+0xa1/0x9f0
[  151.041168]amdgpu_mn_invalidate_hsa+0x30/0x60 [amdgpu]
[  151.047019]__mmu_notifier_invalidate_range_start+0xa4/0x240
[  151.053277]copy_page_range+0xd70/0xd80
[  151.057722]dup_mm+0x3ca/0x550
[  151.061388]copy_process+0x1bdc/0x1c70
[  151.065748]_do_fork+0x76/0x6c0
[  151.069499]__x64_sys_clone+0x8c/0xb0
[  151.073765]do_syscall_64+0x4a/0x1d0
[  151.077952]entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  151.083523]
   -> #1 (mmu_notifier_invalidate_range_start){+.+.}:
[  151.090833]change_protection+0x802/0xab0
[  151.095448]mprotect_fixup+0x187/0x2d0
[  151.099801]setup_arg_pages+0x124/0x250
[  151.104251]load_elf_binary+0x3a4/0x1464
[  151.108781]search_binary_handler+0x6c/0x210
[  151.113656]__do_execve_file.isra.40+0x7f7/0xa50
[  151.118875]do_execve+0x21/0x30
[  151.122632]call_usermodehelper_exec_async+0x17e/0x190
[  151.128393]ret_from_fork+0x24/0x30
[  151.132489]
   -> #0 (>mmap_sem#2){}:
[  151.138064]__lock_acquire+0x11a1/0x1490
[  151.142597]lock_acquire+0x90/0x180
[  151.146694]__might_fault+0x68/0x90
[  151.150879]read_sdma_queue_counter+0x5f/0xb0 [amdgpu]
[  151.156693]update_sdma_queue_past_activity_stats+0x3b/0x90 [amdgpu]
[  151.163725]destroy_queue_cpsch+0x1ae/0x210 [amdgpu]
[  151.169373]pqm_destroy_queue+0xf0/0x250 [amdgpu]
[  151.174762]kfd_ioctl_destroy_queue+0x32/0x70 [amdgpu]
[  151.180577]kfd_ioctl+0x223/0x400 [amdgpu]
[  151.185284]ksys_ioctl+0x8f/0xb0
[  151.189118]__x64_sys_ioctl+0x16/0x20
[  151.193389]do_syscall_64+0x4a/0x1d0
[  151.197569]entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  151.203141]
   other info that might help us debug this:

[  151.211140] Chain exists of:
 >mmap_sem#2 --> >notifier_lock --> >lock_hidden

  151.222535]  Possible unsafe locking scenario:

[  151.228447]CPU0CPU1
[  151.232971]
[  151.237502]   lock(>lock_hidden);
[  151.241254]lock(>notifier_lock);
[  151.247774]lock(>lock_hidden);
[  151.254038]   lock(>mmap_sem#2);
[  151.257610]

This commit fixes the warning by ensuring get_user() is not called
while reading SDMA stats with dqm_lock held as get_user() could cause a
page fault which leads to the circular locking scenario.

Signed-off-by: Mukul Joshi 
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c |  75 +
 .../drm/amd/amdkfd/kfd_device_queue_manager.h |   3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c  | 145 +++---
 3 files changed, 162 insertions(+), 61 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 21eb0998c4ae..57e579876037 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -153,24 +153,23 @@ static void decrement_queue_count(struct 
device_queue_manager *dqm,
 

[PATCH] drm/amdkfd: Fix circular locking dependency warning

2020-06-23 Thread Mukul Joshi
Lockdep is spewing circular locking dependency warning when
reading SDMA usage stats.

  150.887733] ==
[  150.893903] WARNING: possible circular locking dependency detected
[  150.905917] --
[  150.912129] kfdtest/4081 is trying to acquire lock:
[  150.917002] 8f7f3762e118 (>mmap_sem#2){}, at:
__might_fault+0x3e/0x90
[  150.924490]
   but task is already holding lock:
[  150.930320] 8f7f49d229e8 (>lock_hidden){+.+.}, at:
destroy_queue_cpsch+0x29/0x210 [amdgpu]
[  150.939432]
   which lock already depends on the new lock.
[  150.947603]
   the existing dependency chain (in reverse order) is:
[  150.955074]
   -> #3 (>lock_hidden){+.+.}:
[  150.960822]__mutex_lock+0xa1/0x9f0
[  150.964996]evict_process_queues_cpsch+0x22/0x120 [amdgpu]
[  150.971155]kfd_process_evict_queues+0x3b/0xc0 [amdgpu]
[  150.977054]kgd2kfd_quiesce_mm+0x25/0x60 [amdgpu]
[  150.982442]amdgpu_amdkfd_evict_userptr+0x35/0x70 [amdgpu]
[  150.988615]amdgpu_mn_invalidate_hsa+0x41/0x60 [amdgpu]
[  150.994448]__mmu_notifier_invalidate_range_start+0xa4/0x240
[  151.000714]copy_page_range+0xd70/0xd80
[  151.005159]dup_mm+0x3ca/0x550
[  151.008816]copy_process+0x1bdc/0x1c70
[  151.013183]_do_fork+0x76/0x6c0
[  151.016929]__x64_sys_clone+0x8c/0xb0
[  151.021201]do_syscall_64+0x4a/0x1d0
[  151.025404]entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  151.030977]
   -> #2 (>notifier_lock){+.+.}:
[  151.036993]__mutex_lock+0xa1/0x9f0
[  151.041168]amdgpu_mn_invalidate_hsa+0x30/0x60 [amdgpu]
[  151.047019]__mmu_notifier_invalidate_range_start+0xa4/0x240
[  151.053277]copy_page_range+0xd70/0xd80
[  151.057722]dup_mm+0x3ca/0x550
[  151.061388]copy_process+0x1bdc/0x1c70
[  151.065748]_do_fork+0x76/0x6c0
[  151.069499]__x64_sys_clone+0x8c/0xb0
[  151.073765]do_syscall_64+0x4a/0x1d0
[  151.077952]entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  151.083523]
   -> #1 (mmu_notifier_invalidate_range_start){+.+.}:
[  151.090833]change_protection+0x802/0xab0
[  151.095448]mprotect_fixup+0x187/0x2d0
[  151.099801]setup_arg_pages+0x124/0x250
[  151.104251]load_elf_binary+0x3a4/0x1464
[  151.108781]search_binary_handler+0x6c/0x210
[  151.113656]__do_execve_file.isra.40+0x7f7/0xa50
[  151.118875]do_execve+0x21/0x30
[  151.122632]call_usermodehelper_exec_async+0x17e/0x190
[  151.128393]ret_from_fork+0x24/0x30
[  151.132489]
   -> #0 (>mmap_sem#2){}:
[  151.138064]__lock_acquire+0x11a1/0x1490
[  151.142597]lock_acquire+0x90/0x180
[  151.146694]__might_fault+0x68/0x90
[  151.150879]read_sdma_queue_counter+0x5f/0xb0 [amdgpu]
[  151.156693]update_sdma_queue_past_activity_stats+0x3b/0x90 [amdgpu]
[  151.163725]destroy_queue_cpsch+0x1ae/0x210 [amdgpu]
[  151.169373]pqm_destroy_queue+0xf0/0x250 [amdgpu]
[  151.174762]kfd_ioctl_destroy_queue+0x32/0x70 [amdgpu]
[  151.180577]kfd_ioctl+0x223/0x400 [amdgpu]
[  151.185284]ksys_ioctl+0x8f/0xb0
[  151.189118]__x64_sys_ioctl+0x16/0x20
[  151.193389]do_syscall_64+0x4a/0x1d0
[  151.197569]entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  151.203141]
   other info that might help us debug this:

[  151.211140] Chain exists of:
 >mmap_sem#2 --> >notifier_lock --> >lock_hidden

  151.222535]  Possible unsafe locking scenario:

[  151.228447]CPU0CPU1
[  151.232971]
[  151.237502]   lock(>lock_hidden);
[  151.241254]lock(>notifier_lock);
[  151.247774]lock(>lock_hidden);
[  151.254038]   lock(>mmap_sem#2);
[  151.257610]

This commit fixes the warning by ensuring get_user() is not called
while reading SDMA stats with dqm_lock held as get_user() could cause a
page fault which leads to the circular locking scenario.

Signed-off-by: Mukul Joshi 
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c |  36 +++---
 .../drm/amd/amdkfd/kfd_device_queue_manager.h |   3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c  | 120 +++---
 3 files changed, 124 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 21eb0998c4ae..204612de3dbc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -153,24 +153,23 @@ static void decrement_queue_count(struct 
device_queue_manager *dqm,
 

[PATCH v3] drm/amdkfd: Track SDMA utilization per process

2020-05-26 Thread Mukul Joshi
Track SDMA usage on a per process basis and report it through sysfs.
The value in the sysfs file indicates the amount of time SDMA has
been in-use by this process since the creation of the process.
This value is in microsecond granularity.

v2:
- Remove unnecessary checks for pdd is kfd_procfs_show().
- Make counter part of the kfd_sdma_activity_handler_workarea
  structure.

v3:
- Remove READ_ONCE/WRITE_ONCE while updating acitivty
  counter.
- Add updation of past acitivt counter under dqm_lock.

Signed-off-by: Mukul Joshi 
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c |  57 
 .../drm/amd/amdkfd/kfd_device_queue_manager.h |   2 +
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  16 +-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c  | 137 --
 4 files changed, 198 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index e9c4867abeff..6293017bd5bf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -153,6 +153,52 @@ void decrement_queue_count(struct device_queue_manager 
*dqm,
dqm->active_cp_queue_count--;
 }
 
+int read_sdma_queue_counter(struct queue *q, uint64_t *val)
+{
+   int ret;
+   uint64_t tmp = 0;
+
+   if (!q || !val)
+   return -EINVAL;
+   /*
+* SDMA activity counter is stored at queue's RPTR + 0x8 location.
+*/
+   if (!access_ok((const void __user *)((uint64_t)q->properties.read_ptr +
+   sizeof(uint64_t)), sizeof(uint64_t))) {
+   pr_err("Can't access sdma queue activity counter\n");
+   return -EFAULT;
+   }
+
+   ret = get_user(tmp, (uint64_t *)((uint64_t)(q->properties.read_ptr) +
+   sizeof(uint64_t)));
+   if (!ret) {
+   *val = tmp;
+   }
+
+   return ret;
+}
+
+static int update_sdma_queue_past_activity_stats(struct kfd_process_device 
*pdd,
+struct queue *q)
+{
+   int ret;
+   uint64_t val = 0;
+
+   if (!pdd)
+   return -ENODEV;
+
+   ret = read_sdma_queue_counter(q, );
+   if (ret) {
+   pr_err("Failed to read SDMA queue counter for queue: %d\n",
+   q->properties.queue_id);
+   return ret;
+   }
+
+   pdd->sdma_past_activity_counter += val;
+
+   return ret;
+}
+
 static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
 {
struct kfd_dev *dev = qpd->dqm->dev;
@@ -487,6 +533,12 @@ static int destroy_queue_nocpsch_locked(struct 
device_queue_manager *dqm,
if (retval == -ETIME)
qpd->reset_wavefronts = true;
 
+   /* Get the SDMA queue stats */
+if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
+(q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
+update_sdma_queue_past_activity_stats(qpd_to_pdd(qpd), q);
+}
+
mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
 
list_del(>list);
@@ -1468,6 +1520,11 @@ static int destroy_queue_cpsch(struct 
device_queue_manager *dqm,
}
}
 
+   /* Get the SDMA queue stats */
+   if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
+   (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
+   update_sdma_queue_past_activity_stats(qpd_to_pdd(qpd), q);
+   }
/*
 * Unconditionally decrement this counter, regardless of the queue's
 * type
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 4afa015c69b1..894bcf877f9e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -251,4 +251,6 @@ static inline void dqm_unlock(struct device_queue_manager 
*dqm)
mutex_unlock(>lock_hidden);
 }
 
+int read_sdma_queue_counter(struct queue *q, uint64_t *val);
+
 #endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index f70f789c3cb3..fae139b77c0a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -633,7 +633,14 @@ enum kfd_pdd_bound {
PDD_BOUND_SUSPENDED,
 };
 
-#define MAX_VRAM_FILENAME_LEN 11
+#define MAX_SYSFS_FILENAME_LEN 11
+
+/*
+ * SDMA counter runs at 100MHz frequency.
+ * We display SDMA activity in microsecond granularity in sysfs.
+ * As a result, the divisor is 100.
+ */
+#define SDMA_ACTIVITY_DIVISOR  100
 
 /* Data that is per-process-per device. */
 struct kfd_process_device {
@@ -681,7 +688,12 @@ struct kfd_process_device {
/* VRAM usage */
  

[PATCH v2] drm/amdkfd: Track SDMA utilization per process

2020-05-22 Thread Mukul Joshi
Track SDMA usage on a per process basis and report it through sysfs.
The value in the sysfs file indicates the amount of time SDMA has
been in-use by this process since the creation of the process.
This value is in microsecond granularity.

v2:
- Remove unnecessary checks for pdd is kfd_procfs_show().
- Make counter part of the kfd_sdma_activity_handler_workarea 
  structure.

Signed-off-by: Mukul Joshi 
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c |  57 
 .../drm/amd/amdkfd/kfd_device_queue_manager.h |   2 +
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  16 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c  | 130 --
 4 files changed, 191 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index e9c4867abeff..49f72d0f7be7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -153,6 +153,52 @@ void decrement_queue_count(struct device_queue_manager 
*dqm,
dqm->active_cp_queue_count--;
 }
 
+int read_sdma_queue_counter(struct queue *q, uint64_t *val)
+{
+   int ret;
+   uint64_t tmp = 0;
+
+   if (!q || !val)
+   return -EINVAL;
+   /*
+* SDMA activity counter is stored at queue's RPTR + 0x8 location.
+*/
+   if (!access_ok((const void __user *)((uint64_t)q->properties.read_ptr +
+   sizeof(uint64_t)), sizeof(uint64_t))) {
+   pr_err("Can't access sdma queue activity counter\n");
+   return -EFAULT;
+   }
+
+   ret = get_user(tmp, (uint64_t *)((uint64_t)(q->properties.read_ptr) +
+   sizeof(uint64_t)));
+   if (!ret) {
+   *val = tmp;
+   }
+
+   return ret;
+}
+
+static int update_sdma_queue_past_activity_stats(struct kfd_process_device 
*pdd,
+struct queue *q)
+{
+   int ret;
+   uint64_t val = 0;
+
+   if (!pdd)
+   return -ENODEV;
+
+   ret = read_sdma_queue_counter(q, );
+   if (ret) {
+   pr_err("Failed to read SDMA queue counter for queue: %d\n",
+   q->properties.queue_id);
+   return ret;
+   }
+
+   WRITE_ONCE(pdd->sdma_past_activity_counter, 
pdd->sdma_past_activity_counter + val);
+
+   return ret;
+}
+
 static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
 {
struct kfd_dev *dev = qpd->dqm->dev;
@@ -487,6 +533,12 @@ static int destroy_queue_nocpsch_locked(struct 
device_queue_manager *dqm,
if (retval == -ETIME)
qpd->reset_wavefronts = true;
 
+   /* Get the SDMA queue stats */
+if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
+(q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
+update_sdma_queue_past_activity_stats(qpd_to_pdd(qpd), q);
+}
+
mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
 
list_del(>list);
@@ -1468,6 +1520,11 @@ static int destroy_queue_cpsch(struct 
device_queue_manager *dqm,
}
}
 
+   /* Get the SDMA queue stats */
+   if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
+   (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
+   update_sdma_queue_past_activity_stats(qpd_to_pdd(qpd), q);
+   }
/*
 * Unconditionally decrement this counter, regardless of the queue's
 * type
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 4afa015c69b1..894bcf877f9e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -251,4 +251,6 @@ static inline void dqm_unlock(struct device_queue_manager 
*dqm)
mutex_unlock(>lock_hidden);
 }
 
+int read_sdma_queue_counter(struct queue *q, uint64_t *val);
+
 #endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index f70f789c3cb3..fae139b77c0a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -633,7 +633,14 @@ enum kfd_pdd_bound {
PDD_BOUND_SUSPENDED,
 };
 
-#define MAX_VRAM_FILENAME_LEN 11
+#define MAX_SYSFS_FILENAME_LEN 11
+
+/*
+ * SDMA counter runs at 100MHz frequency.
+ * We display SDMA activity in microsecond granularity in sysfs.
+ * As a result, the divisor is 100.
+ */
+#define SDMA_ACTIVITY_DIVISOR  100
 
 /* Data that is per-process-per device. */
 struct kfd_process_device {
@@ -681,7 +688,12 @@ struct kfd_process_device {
/* VRAM usage */
uint64_t vram_usage;
struct attribute attr_vram;
-  

  1   2   >