[PATCH] drm/amdkfd: use unmap all queues for poison consumption
Replace reset queue for specific PASID with unmap all queues, reset queue could break CP scheduler. Signed-off-by: Tao Zhou --- drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index e8bc28009c22..dca0b5fac1db 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -109,8 +109,7 @@ static void event_interrupt_poison_consumption(struct kfd_dev *dev, switch (source_id) { case SOC15_INTSRC_SQ_INTERRUPT_MSG: - if (dev->dqm->ops.reset_queues) - ret = dev->dqm->ops.reset_queues(dev->dqm, pasid); + ret = kfd_process_vm_fault(dev->dqm, pasid); break; case SOC15_INTSRC_SDMA_ECC: default: -- 2.17.1
[PATCH] drm/amdgpu: fix list add issue in vram reserve
The parameter order in the list_add_tail is incorrect, it causes the reuse of ras reserved page. Signed-off-by: Tao Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 7a2b487db57c..6c99ef700cc8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -281,7 +281,7 @@ int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr, rsv->mm_node.size = size >> PAGE_SHIFT; spin_lock(>lock); - list_add_tail(>reservations_pending, >node); + list_add_tail(>node, >reservations_pending); amdgpu_vram_mgr_do_reserve(>manager); spin_unlock(>lock); -- 2.17.1
RE: [PATCH V2 1/2] drm/amdgpu: Fixed the defect of soft lock caused by infinite loop
[AMD Official Use Only] The series is: Reviewed-by: Tao Zhou -Original Message- > From: Chai, Thomas > Sent: Sunday, January 30, 2022 3:12 PM > To: amd-gfx@lists.freedesktop.org > Cc: Chai, Thomas ; Zhang, Hawking > ; Zhou1, Tao ; Clements, > John ; Chai, Thomas > Subject: [PATCH V2 1/2] drm/amdgpu: Fixed the defect of soft lock caused by > infinite loop > > 1. The infinite loop case only occurs on multiple cards support >ras functions. > 2. The explanation of root cause refer to commit 76641cbbf196 >("drm/amdgpu: Add judgement to avoid infinite loop"). > 3. Create new node to manage each unique ras instance to guarantee >each device .ras_list is completely independent. > 4. Fixes: commit 7a6b8ab3231b51 ("drm/amdgpu: Unify ras block >interface for each ras block"). > 5. The soft locked logs are as follows: > [ 262.165690] CPU: 93 PID: 758 Comm: kworker/93:1 Tainted: G OE > 5.13.0-27-generic #29~20.04.1-Ubuntu > [ 262.165695] Hardware name: Supermicro AS -4124GS-TNR/H12DSG-O-CPU, > BIOS T20200717143848 07/17/2020 [ 262.165698] Workqueue: events > amdgpu_ras_do_recovery [amdgpu] [ 262.165980] RIP: > 0010:amdgpu_ras_get_ras_block+0x86/0xd0 [amdgpu] [ 262.166239] Code: 68 > d8 4c 8d 71 d8 48 39 c3 74 54 49 8b 45 38 48 85 c0 74 32 44 89 fa 44 89 e6 4c > 89 > ef e8 82 e4 9b dc 85 c0 74 3c 49 8b 46 28 <49> 8d 56 28 4d 89 f5 48 83 e8 28 > 48 > 39 d3 74 25 49 89 c6 49 8b 45 [ 262.166243] RSP: 0018:ac908fa87d80 > EFLAGS: 0202 [ 262.166247] RAX: c1394248 RBX: 91e4ab8d6e20 > RCX: c1394248 [ 262.166249] RDX: 91e4aa356e20 RSI: > 000e RDI: 91e4ab8c [ 262.166252] RBP: > ac908fa87da8 R08: 0007 R09: 0001 > [ 262.166254] R10: 91e4930b64ec R11: R12: > 000e [ 262.166256] R13: 91e4aa356df8 R14: c1394320 > R15: 0003 [ 262.166258] FS: () > GS:92238fb4() knlGS: [ 262.166261] CS: 0010 > DS: ES: CR0: 80050033 [ 262.166264] CR2: > 0001004865d0 CR3: 00406d796000 CR4: 00350ee0 > [ 262.166267] Call Trace: > [ 262.166272] amdgpu_ras_do_recovery+0x130/0x290 [amdgpu] > [ 262.166529] ? psi_task_switch+0xd2/0x250 [ 262.166537] ? > __switch_to+0x11d/0x460 [ 262.166542] ? __switch_to_asm+0x36/0x70 > [ 262.166549] process_one_work+0x220/0x3c0 [ 262.166556] > worker_thread+0x4d/0x3f0 [ 262.166560] ? process_one_work+0x3c0/0x3c0 > [ 262.166563] kthread+0x12b/0x150 [ 262.166568] ? > set_kthread_struct+0x40/0x40 [ 262.166571] ret_from_fork+0x22/0x30 > > Signed-off-by: yipechai > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 37 ++-- > - drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 3 -- > 2 files changed, 33 insertions(+), 7 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > index 9d7c778c1a2d..9b94c9c4960c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > @@ -75,6 +75,13 @@ const char *ras_mca_block_string[] = { > "mca_iohc", > }; > > +struct amdgpu_ras_block_list { > + /* ras block link */ > + struct list_head node; > + > + struct amdgpu_ras_block_object *ras_obj; }; > + > const char *get_ras_block_str(struct ras_common_if *ras_block) { > if (!ras_block) > @@ -880,7 +887,8 @@ static struct amdgpu_ras_block_object > *amdgpu_ras_get_ras_block(struct amdgpu_de > enum amdgpu_ras_block block, > uint32_t sub_block_index) { > int loop_cnt = 0; > - struct amdgpu_ras_block_object *obj, *tmp; > + struct amdgpu_ras_block_list *node, *tmp; > + struct amdgpu_ras_block_object *obj; > > if (block >= AMDGPU_RAS_BLOCK__LAST) > return NULL; > @@ -888,7 +896,13 @@ static struct amdgpu_ras_block_object > *amdgpu_ras_get_ras_block(struct amdgpu_de > if (!amdgpu_ras_is_supported(adev, block)) > return NULL; > > - list_for_each_entry_safe(obj, tmp, >ras_list, node) { > + list_for_each_entry_safe(node, tmp, >ras_list, node) { > + if (!node->ras_obj) { > + dev_warn(adev->dev, "Warning: abnormal ras list > node.\n"); > + continue; > + } > + > + obj = node->ras_obj; > if (obj->ras_block_match) { > if (obj->ras_block_match(obj, block, sub_block_index) > == 0) > return obj; > @@ -2527,6 +2541,7 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev) > > int amdgpu_ras_fini(struct amdgpu_device *adev) { > + struct amdgpu_ras_block_list *ras_node, *tmp; > struct amdgpu_ras *con = amdgpu_ras_get_context(adev); > > if (!adev->ras_enabled || !con) > @@ -2545,6 +2560,12 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) > amdgpu_ras_set_context(adev,
[PATCH V2 2/2] Revert "drm/amdgpu: Add judgement to avoid infinite loop"
The commit 8583c8983f1b ("drm/amdgpu: Fixed the defect of soft lock caused by infinite loop") had fixed this defect. Revert workaround commit 76641cbbf196 ("drm/amdgpu: Add judgement to avoid infinite loop"). Signed-off-by: yipechai --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 9b94c9c4960c..5558df3b21f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -886,7 +886,6 @@ static int amdgpu_ras_block_match_default(struct amdgpu_ras_block_object *block_ static struct amdgpu_ras_block_object *amdgpu_ras_get_ras_block(struct amdgpu_device *adev, enum amdgpu_ras_block block, uint32_t sub_block_index) { - int loop_cnt = 0; struct amdgpu_ras_block_list *node, *tmp; struct amdgpu_ras_block_object *obj; @@ -910,9 +909,6 @@ static struct amdgpu_ras_block_object *amdgpu_ras_get_ras_block(struct amdgpu_de if (amdgpu_ras_block_match_default(obj, block) == 0) return obj; } - - if (++loop_cnt >= AMDGPU_RAS_BLOCK__LAST) - break; } return NULL; -- 2.25.1
[PATCH V2 1/2] drm/amdgpu: Fixed the defect of soft lock caused by infinite loop
1. The infinite loop case only occurs on multiple cards support ras functions. 2. The explanation of root cause refer to commit 76641cbbf196 ("drm/amdgpu: Add judgement to avoid infinite loop"). 3. Create new node to manage each unique ras instance to guarantee each device .ras_list is completely independent. 4. Fixes: commit 7a6b8ab3231b51 ("drm/amdgpu: Unify ras block interface for each ras block"). 5. The soft locked logs are as follows: [ 262.165690] CPU: 93 PID: 758 Comm: kworker/93:1 Tainted: G OE 5.13.0-27-generic #29~20.04.1-Ubuntu [ 262.165695] Hardware name: Supermicro AS -4124GS-TNR/H12DSG-O-CPU, BIOS T20200717143848 07/17/2020 [ 262.165698] Workqueue: events amdgpu_ras_do_recovery [amdgpu] [ 262.165980] RIP: 0010:amdgpu_ras_get_ras_block+0x86/0xd0 [amdgpu] [ 262.166239] Code: 68 d8 4c 8d 71 d8 48 39 c3 74 54 49 8b 45 38 48 85 c0 74 32 44 89 fa 44 89 e6 4c 89 ef e8 82 e4 9b dc 85 c0 74 3c 49 8b 46 28 <49> 8d 56 28 4d 89 f5 48 83 e8 28 48 39 d3 74 25 49 89 c6 49 8b 45 [ 262.166243] RSP: 0018:ac908fa87d80 EFLAGS: 0202 [ 262.166247] RAX: c1394248 RBX: 91e4ab8d6e20 RCX: c1394248 [ 262.166249] RDX: 91e4aa356e20 RSI: 000e RDI: 91e4ab8c [ 262.166252] RBP: ac908fa87da8 R08: 0007 R09: 0001 [ 262.166254] R10: 91e4930b64ec R11: R12: 000e [ 262.166256] R13: 91e4aa356df8 R14: c1394320 R15: 0003 [ 262.166258] FS: () GS:92238fb4() knlGS: [ 262.166261] CS: 0010 DS: ES: CR0: 80050033 [ 262.166264] CR2: 0001004865d0 CR3: 00406d796000 CR4: 00350ee0 [ 262.166267] Call Trace: [ 262.166272] amdgpu_ras_do_recovery+0x130/0x290 [amdgpu] [ 262.166529] ? psi_task_switch+0xd2/0x250 [ 262.166537] ? __switch_to+0x11d/0x460 [ 262.166542] ? __switch_to_asm+0x36/0x70 [ 262.166549] process_one_work+0x220/0x3c0 [ 262.166556] worker_thread+0x4d/0x3f0 [ 262.166560] ? process_one_work+0x3c0/0x3c0 [ 262.166563] kthread+0x12b/0x150 [ 262.166568] ? set_kthread_struct+0x40/0x40 [ 262.166571] ret_from_fork+0x22/0x30 Signed-off-by: yipechai --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 37 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 3 -- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 9d7c778c1a2d..9b94c9c4960c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -75,6 +75,13 @@ const char *ras_mca_block_string[] = { "mca_iohc", }; +struct amdgpu_ras_block_list { + /* ras block link */ + struct list_head node; + + struct amdgpu_ras_block_object *ras_obj; +}; + const char *get_ras_block_str(struct ras_common_if *ras_block) { if (!ras_block) @@ -880,7 +887,8 @@ static struct amdgpu_ras_block_object *amdgpu_ras_get_ras_block(struct amdgpu_de enum amdgpu_ras_block block, uint32_t sub_block_index) { int loop_cnt = 0; - struct amdgpu_ras_block_object *obj, *tmp; + struct amdgpu_ras_block_list *node, *tmp; + struct amdgpu_ras_block_object *obj; if (block >= AMDGPU_RAS_BLOCK__LAST) return NULL; @@ -888,7 +896,13 @@ static struct amdgpu_ras_block_object *amdgpu_ras_get_ras_block(struct amdgpu_de if (!amdgpu_ras_is_supported(adev, block)) return NULL; - list_for_each_entry_safe(obj, tmp, >ras_list, node) { + list_for_each_entry_safe(node, tmp, >ras_list, node) { + if (!node->ras_obj) { + dev_warn(adev->dev, "Warning: abnormal ras list node.\n"); + continue; + } + + obj = node->ras_obj; if (obj->ras_block_match) { if (obj->ras_block_match(obj, block, sub_block_index) == 0) return obj; @@ -2527,6 +2541,7 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev) int amdgpu_ras_fini(struct amdgpu_device *adev) { + struct amdgpu_ras_block_list *ras_node, *tmp; struct amdgpu_ras *con = amdgpu_ras_get_context(adev); if (!adev->ras_enabled || !con) @@ -2545,6 +2560,12 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) amdgpu_ras_set_context(adev, NULL); kfree(con); + /* Clear ras blocks from ras_list and free ras block list node */ + list_for_each_entry_safe(ras_node, tmp, >ras_list, node) { + list_del(_node->node); + kfree(ras_node); + } + return 0; } @@ -2754,14 +2775,22 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) int amdgpu_ras_register_ras_block(struct amdgpu_device *adev, struct amdgpu_ras_block_object *ras_block_obj) { + struct
RE: [PATCH 1/2] drm/amdgpu: Fixed the defect of soft lock caused by infinite loop
OK -Original Message- From: Zhou1, Tao Sent: Sunday, January 30, 2022 11:20 AM To: Chai, Thomas ; amd-gfx@lists.freedesktop.org Cc: Zhang, Hawking ; Clements, John Subject: RE: [PATCH 1/2] drm/amdgpu: Fixed the defect of soft lock caused by infinite loop [AMD Official Use Only] > -Original Message- > From: Chai, Thomas > Sent: Saturday, January 29, 2022 8:34 PM > To: amd-gfx@lists.freedesktop.org > Cc: Chai, Thomas ; Zhang, Hawking > ; Zhou1, Tao ; Clements, > John ; Chai, Thomas > Subject: [PATCH 1/2] drm/amdgpu: Fixed the defect of soft lock caused > by infinite loop > > 1. The infinite loop case only occurs on multiple cards support >ras functions. > 2. The explanation of root cause refer to 76641cbbf196523b5752c6cf68f86. > 3. Create new node to manage each unique ras instance to guarantee >each device .ras_list is completely independent. > 4. Fixes:7a6b8ab3231b511915cb94cac1debabf093. > 5. The soft locked logs are as follows: > [ 262.165690] CPU: 93 PID: 758 Comm: kworker/93:1 Tainted: G OE > 5.13.0-27-generic #29~20.04.1-Ubuntu > [ 262.165695] Hardware name: Supermicro AS -4124GS-TNR/H12DSG-O-CPU, > BIOS T20200717143848 07/17/2020 [ 262.165698] Workqueue: events > amdgpu_ras_do_recovery [amdgpu] [ 262.165980] RIP: > 0010:amdgpu_ras_get_ras_block+0x86/0xd0 [amdgpu] [ 262.166239] Code: > 68 > d8 4c 8d 71 d8 48 39 c3 74 54 49 8b 45 38 48 85 c0 74 32 44 89 fa 44 > 89 e6 4c 89 ef e8 82 e4 9b dc 85 c0 74 3c 49 8b 46 28 <49> 8d 56 28 4d > 89 f5 48 83 e8 28 48 > 39 d3 74 25 49 89 c6 49 8b 45 [ 262.166243] RSP: > 0018:ac908fa87d80 > EFLAGS: 0202 [ 262.166247] RAX: c1394248 RBX: > 91e4ab8d6e20 > RCX: c1394248 [ 262.166249] RDX: 91e4aa356e20 RSI: > 000e RDI: 91e4ab8c [ 262.166252] RBP: > ac908fa87da8 R08: 0007 R09: 0001 [ > 262.166254] R10: 91e4930b64ec R11: R12: > 000e [ 262.166256] R13: 91e4aa356df8 R14: > c1394320 > R15: 0003 [ 262.166258] FS: () > GS:92238fb4() knlGS: [ 262.166261] CS: > 0010 > DS: ES: CR0: 80050033 [ 262.166264] CR2: > 0001004865d0 CR3: 00406d796000 CR4: 00350ee0 [ > 262.166267] Call Trace: > [ 262.166272] amdgpu_ras_do_recovery+0x130/0x290 [amdgpu] [ > 262.166529] ? psi_task_switch+0xd2/0x250 [ 262.166537] ? > __switch_to+0x11d/0x460 [ 262.166542] ? __switch_to_asm+0x36/0x70 [ > 262.166549] process_one_work+0x220/0x3c0 [ 262.166556] > worker_thread+0x4d/0x3f0 [ 262.166560] ? > process_one_work+0x3c0/0x3c0 [ 262.166563] kthread+0x12b/0x150 [ > 262.166568] ? > set_kthread_struct+0x40/0x40 [ 262.166571] ret_from_fork+0x22/0x30 > > Signed-off-by: yipechai > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 37 ++-- > - drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 3 -- > 2 files changed, 33 insertions(+), 7 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > index 9d7c778c1a2d..b0aa67308c31 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > @@ -75,6 +75,13 @@ const char *ras_mca_block_string[] = { > "mca_iohc", > }; > > +struct amdgpu_ras_block_list { > + /* ras block link */ > + struct list_head node; > + > + struct amdgpu_ras_block_object *ras_obj; }; > + > const char *get_ras_block_str(struct ras_common_if *ras_block) { > if (!ras_block) > @@ -880,7 +887,8 @@ static struct amdgpu_ras_block_object > *amdgpu_ras_get_ras_block(struct amdgpu_de > enum amdgpu_ras_block block, > uint32_t sub_block_index) { > int loop_cnt = 0; > - struct amdgpu_ras_block_object *obj, *tmp; > + struct amdgpu_ras_block_list *node, *tmp; > + struct amdgpu_ras_block_object *obj; > > if (block >= AMDGPU_RAS_BLOCK__LAST) > return NULL; > @@ -888,7 +896,13 @@ static struct amdgpu_ras_block_object > *amdgpu_ras_get_ras_block(struct amdgpu_de > if (!amdgpu_ras_is_supported(adev, block)) > return NULL; > > - list_for_each_entry_safe(obj, tmp, >ras_list, node) { > + list_for_each_entry_safe(node, tmp, >ras_list, node) { > + if (!node->ras_obj) { > + DRM_ERROR("Warning: abnormal ras list node"); [Tao]: dev_warn is recommended. > + continue; > + } > + > + obj = node->ras_obj; > if (obj->ras_block_match) { > if (obj->ras_block_match(obj, block, sub_block_index) > == 0) > return obj; > @@ -2527,6 +2541,7 @@ int amdgpu_ras_pre_fini(struct amdgpu_device > *adev) > > int amdgpu_ras_fini(struct amdgpu_device *adev) { > + struct amdgpu_ras_block_list *ras_node, *tmp; > struct amdgpu_ras *con =
RE: [PATCH 1/2] drm/amdgpu: Fixed the defect of soft lock caused by infinite loop
[AMD Official Use Only] > -Original Message- > From: Chai, Thomas > Sent: Saturday, January 29, 2022 8:34 PM > To: amd-gfx@lists.freedesktop.org > Cc: Chai, Thomas ; Zhang, Hawking > ; Zhou1, Tao ; Clements, > John ; Chai, Thomas > Subject: [PATCH 1/2] drm/amdgpu: Fixed the defect of soft lock caused by > infinite loop > > 1. The infinite loop case only occurs on multiple cards support >ras functions. > 2. The explanation of root cause refer to 76641cbbf196523b5752c6cf68f86. > 3. Create new node to manage each unique ras instance to guarantee >each device .ras_list is completely independent. > 4. Fixes:7a6b8ab3231b511915cb94cac1debabf093. > 5. The soft locked logs are as follows: > [ 262.165690] CPU: 93 PID: 758 Comm: kworker/93:1 Tainted: G OE > 5.13.0-27-generic #29~20.04.1-Ubuntu > [ 262.165695] Hardware name: Supermicro AS -4124GS-TNR/H12DSG-O-CPU, > BIOS T20200717143848 07/17/2020 [ 262.165698] Workqueue: events > amdgpu_ras_do_recovery [amdgpu] [ 262.165980] RIP: > 0010:amdgpu_ras_get_ras_block+0x86/0xd0 [amdgpu] [ 262.166239] Code: 68 > d8 4c 8d 71 d8 48 39 c3 74 54 49 8b 45 38 48 85 c0 74 32 44 89 fa 44 89 e6 4c > 89 > ef e8 82 e4 9b dc 85 c0 74 3c 49 8b 46 28 <49> 8d 56 28 4d 89 f5 48 83 e8 28 > 48 > 39 d3 74 25 49 89 c6 49 8b 45 [ 262.166243] RSP: 0018:ac908fa87d80 > EFLAGS: 0202 [ 262.166247] RAX: c1394248 RBX: 91e4ab8d6e20 > RCX: c1394248 [ 262.166249] RDX: 91e4aa356e20 RSI: > 000e RDI: 91e4ab8c [ 262.166252] RBP: > ac908fa87da8 R08: 0007 R09: 0001 > [ 262.166254] R10: 91e4930b64ec R11: R12: > 000e [ 262.166256] R13: 91e4aa356df8 R14: c1394320 > R15: 0003 [ 262.166258] FS: () > GS:92238fb4() knlGS: [ 262.166261] CS: 0010 > DS: ES: CR0: 80050033 [ 262.166264] CR2: > 0001004865d0 CR3: 00406d796000 CR4: 00350ee0 > [ 262.166267] Call Trace: > [ 262.166272] amdgpu_ras_do_recovery+0x130/0x290 [amdgpu] > [ 262.166529] ? psi_task_switch+0xd2/0x250 [ 262.166537] ? > __switch_to+0x11d/0x460 [ 262.166542] ? __switch_to_asm+0x36/0x70 > [ 262.166549] process_one_work+0x220/0x3c0 [ 262.166556] > worker_thread+0x4d/0x3f0 [ 262.166560] ? process_one_work+0x3c0/0x3c0 > [ 262.166563] kthread+0x12b/0x150 [ 262.166568] ? > set_kthread_struct+0x40/0x40 [ 262.166571] ret_from_fork+0x22/0x30 > > Signed-off-by: yipechai > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 37 ++-- > - drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 3 -- > 2 files changed, 33 insertions(+), 7 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > index 9d7c778c1a2d..b0aa67308c31 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > @@ -75,6 +75,13 @@ const char *ras_mca_block_string[] = { > "mca_iohc", > }; > > +struct amdgpu_ras_block_list { > + /* ras block link */ > + struct list_head node; > + > + struct amdgpu_ras_block_object *ras_obj; }; > + > const char *get_ras_block_str(struct ras_common_if *ras_block) { > if (!ras_block) > @@ -880,7 +887,8 @@ static struct amdgpu_ras_block_object > *amdgpu_ras_get_ras_block(struct amdgpu_de > enum amdgpu_ras_block block, > uint32_t sub_block_index) { > int loop_cnt = 0; > - struct amdgpu_ras_block_object *obj, *tmp; > + struct amdgpu_ras_block_list *node, *tmp; > + struct amdgpu_ras_block_object *obj; > > if (block >= AMDGPU_RAS_BLOCK__LAST) > return NULL; > @@ -888,7 +896,13 @@ static struct amdgpu_ras_block_object > *amdgpu_ras_get_ras_block(struct amdgpu_de > if (!amdgpu_ras_is_supported(adev, block)) > return NULL; > > - list_for_each_entry_safe(obj, tmp, >ras_list, node) { > + list_for_each_entry_safe(node, tmp, >ras_list, node) { > + if (!node->ras_obj) { > + DRM_ERROR("Warning: abnormal ras list node"); [Tao]: dev_warn is recommended. > + continue; > + } > + > + obj = node->ras_obj; > if (obj->ras_block_match) { > if (obj->ras_block_match(obj, block, sub_block_index) > == 0) > return obj; > @@ -2527,6 +2541,7 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev) > > int amdgpu_ras_fini(struct amdgpu_device *adev) { > + struct amdgpu_ras_block_list *ras_node, *tmp; > struct amdgpu_ras *con = amdgpu_ras_get_context(adev); > > if (!adev->ras_enabled || !con) > @@ -2545,6 +2560,12 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) > amdgpu_ras_set_context(adev, NULL); > kfree(con); > > + /* Clear ras blocks from ras_list and free ras block list node */ > +
RE: [PATCH] drm/amdgpu: Fix uninitialized variable use warning
[Public] Reviewed-by: Guchun Chen Hi @Lijo Lazar, Can you pls submit your patch to drm-next soon? This indeed fixs the regession by rlc indiect reg access related patches. Regards, Guchun -Original Message- From: amd-gfx On Behalf Of Lijo Lazar Sent: Friday, January 28, 2022 2:40 PM To: amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander ; kernel test robot ; Zhang, Hawking Subject: [PATCH] drm/amdgpu: Fix uninitialized variable use warning Fix uninitialized variable use warning: variable 'reg_access_ctrl' is uninitialized when used here [-Wuninitialized] scratch_reg0 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg0; Fixes: 51263163eb3f("drm/amdgpu: add helper for rlcg indirect reg access") Reported-by: kernel test robot Signed-off-by: Lijo Lazar --- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 80c25176c993..c13765218919 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -875,6 +875,7 @@ static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v return 0; } + reg_access_ctrl = >gfx.rlc.reg_access_ctrl; scratch_reg0 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg0; scratch_reg1 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg1; scratch_reg2 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg2; -- 2.25.1
[PATCH 2/2] Revert "drm/amdgpu: Add judgement to avoid infinite loop"
The commit 8583c8983f1b ("drm/amdgpu: Fixed the defect of soft lock caused by infinite loop") had fixed this defect. Revert workaround commit 76641cbbf196 ("drm/amdgpu: Add judgement to avoid infinite loop"). Signed-off-by: yipechai --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index b0aa67308c31..5a43a220e9fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -886,7 +886,6 @@ static int amdgpu_ras_block_match_default(struct amdgpu_ras_block_object *block_ static struct amdgpu_ras_block_object *amdgpu_ras_get_ras_block(struct amdgpu_device *adev, enum amdgpu_ras_block block, uint32_t sub_block_index) { - int loop_cnt = 0; struct amdgpu_ras_block_list *node, *tmp; struct amdgpu_ras_block_object *obj; @@ -910,9 +909,6 @@ static struct amdgpu_ras_block_object *amdgpu_ras_get_ras_block(struct amdgpu_de if (amdgpu_ras_block_match_default(obj, block) == 0) return obj; } - - if (++loop_cnt >= AMDGPU_RAS_BLOCK__LAST) - break; } return NULL; -- 2.25.1
[PATCH 1/2] drm/amdgpu: Fixed the defect of soft lock caused by infinite loop
1. The infinite loop case only occurs on multiple cards support ras functions. 2. The explanation of root cause refer to 76641cbbf196523b5752c6cf68f86. 3. Create new node to manage each unique ras instance to guarantee each device .ras_list is completely independent. 4. Fixes:7a6b8ab3231b511915cb94cac1debabf093. 5. The soft locked logs are as follows: [ 262.165690] CPU: 93 PID: 758 Comm: kworker/93:1 Tainted: G OE 5.13.0-27-generic #29~20.04.1-Ubuntu [ 262.165695] Hardware name: Supermicro AS -4124GS-TNR/H12DSG-O-CPU, BIOS T20200717143848 07/17/2020 [ 262.165698] Workqueue: events amdgpu_ras_do_recovery [amdgpu] [ 262.165980] RIP: 0010:amdgpu_ras_get_ras_block+0x86/0xd0 [amdgpu] [ 262.166239] Code: 68 d8 4c 8d 71 d8 48 39 c3 74 54 49 8b 45 38 48 85 c0 74 32 44 89 fa 44 89 e6 4c 89 ef e8 82 e4 9b dc 85 c0 74 3c 49 8b 46 28 <49> 8d 56 28 4d 89 f5 48 83 e8 28 48 39 d3 74 25 49 89 c6 49 8b 45 [ 262.166243] RSP: 0018:ac908fa87d80 EFLAGS: 0202 [ 262.166247] RAX: c1394248 RBX: 91e4ab8d6e20 RCX: c1394248 [ 262.166249] RDX: 91e4aa356e20 RSI: 000e RDI: 91e4ab8c [ 262.166252] RBP: ac908fa87da8 R08: 0007 R09: 0001 [ 262.166254] R10: 91e4930b64ec R11: R12: 000e [ 262.166256] R13: 91e4aa356df8 R14: c1394320 R15: 0003 [ 262.166258] FS: () GS:92238fb4() knlGS: [ 262.166261] CS: 0010 DS: ES: CR0: 80050033 [ 262.166264] CR2: 0001004865d0 CR3: 00406d796000 CR4: 00350ee0 [ 262.166267] Call Trace: [ 262.166272] amdgpu_ras_do_recovery+0x130/0x290 [amdgpu] [ 262.166529] ? psi_task_switch+0xd2/0x250 [ 262.166537] ? __switch_to+0x11d/0x460 [ 262.166542] ? __switch_to_asm+0x36/0x70 [ 262.166549] process_one_work+0x220/0x3c0 [ 262.166556] worker_thread+0x4d/0x3f0 [ 262.166560] ? process_one_work+0x3c0/0x3c0 [ 262.166563] kthread+0x12b/0x150 [ 262.166568] ? set_kthread_struct+0x40/0x40 [ 262.166571] ret_from_fork+0x22/0x30 Signed-off-by: yipechai --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 37 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 3 -- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 9d7c778c1a2d..b0aa67308c31 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -75,6 +75,13 @@ const char *ras_mca_block_string[] = { "mca_iohc", }; +struct amdgpu_ras_block_list { + /* ras block link */ + struct list_head node; + + struct amdgpu_ras_block_object *ras_obj; +}; + const char *get_ras_block_str(struct ras_common_if *ras_block) { if (!ras_block) @@ -880,7 +887,8 @@ static struct amdgpu_ras_block_object *amdgpu_ras_get_ras_block(struct amdgpu_de enum amdgpu_ras_block block, uint32_t sub_block_index) { int loop_cnt = 0; - struct amdgpu_ras_block_object *obj, *tmp; + struct amdgpu_ras_block_list *node, *tmp; + struct amdgpu_ras_block_object *obj; if (block >= AMDGPU_RAS_BLOCK__LAST) return NULL; @@ -888,7 +896,13 @@ static struct amdgpu_ras_block_object *amdgpu_ras_get_ras_block(struct amdgpu_de if (!amdgpu_ras_is_supported(adev, block)) return NULL; - list_for_each_entry_safe(obj, tmp, >ras_list, node) { + list_for_each_entry_safe(node, tmp, >ras_list, node) { + if (!node->ras_obj) { + DRM_ERROR("Warning: abnormal ras list node"); + continue; + } + + obj = node->ras_obj; if (obj->ras_block_match) { if (obj->ras_block_match(obj, block, sub_block_index) == 0) return obj; @@ -2527,6 +2541,7 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev) int amdgpu_ras_fini(struct amdgpu_device *adev) { + struct amdgpu_ras_block_list *ras_node, *tmp; struct amdgpu_ras *con = amdgpu_ras_get_context(adev); if (!adev->ras_enabled || !con) @@ -2545,6 +2560,12 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) amdgpu_ras_set_context(adev, NULL); kfree(con); + /* Clear ras blocks from ras_list and free ras block list node */ + list_for_each_entry_safe(ras_node, tmp, >ras_list, node) { + list_del(_node->node); + kfree(ras_node); + } + return 0; } @@ -2754,14 +2775,22 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) int amdgpu_ras_register_ras_block(struct amdgpu_device *adev, struct amdgpu_ras_block_object *ras_block_obj) { + struct amdgpu_ras_block_list *ras_node; if (!adev || !ras_block_obj) return -EINVAL;
Re: [PATCH RESEND] drm/amd/display: Force link_rate as LINK_RATE_RBR2 for 2018 15" Apple Retina panels
> On 28-Jan-2022, at 8:33 PM, Harry Wentland wrote: > > I think either leaving the 2017 quirk in its original place or moving it down > works. I don't have a strong preference. I’d better leave it in the original place then > > With the comment style addressed this patch is Sending a v2 now with this issue addressed. > Reviewed-by: Harry Wentland > > Harry
[PATCH 1/2] drm/amd/amdgpu/amdgpu_uvd: Fix forgotten unmap buffer object
after the buffer object is successfully mapped, call amdgpu_bo_kunmap before the function returns. Signed-off-by: zhanglianjie diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 6f8de11a17f1..9cc23b220537 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -834,6 +834,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, handle = msg[2]; if (handle == 0) { + amdgpu_bo_kunmap(bo); DRM_ERROR("Invalid UVD handle!\n"); return -EINVAL; } @@ -892,6 +893,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type); } + amdgpu_bo_kunmap(bo); return -EINVAL; } -- 2.20.1
[PATCH -next] drm/amd/display: clean up some inconsistent indenting
Eliminate the follow smatch warning: drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c:2246 dp_perform_8b_10b_link_training() warn: inconsistent indenting Reported-by: Abaci Robot Signed-off-by: Yang Li --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index daaec3164875..34ffcd5bb1d7 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -2243,11 +2243,11 @@ static enum link_training_result dp_perform_8b_10b_link_training( if (status == LINK_TRAINING_SUCCESS) { status = perform_clock_recovery_sequence(link, link_res, lt_settings, DPRX); - if (status == LINK_TRAINING_SUCCESS) { - status = perform_channel_equalization_sequence(link, - link_res, - lt_settings, - DPRX); + if (status == LINK_TRAINING_SUCCESS) { + status = perform_channel_equalization_sequence(link, + link_res, + lt_settings, + DPRX); } } -- 2.20.1.7.g153144c
[PATCH v2] drm/amd/display: Force link_rate as LINK_RATE_RBR2 for 2018 15" Apple Retina panels
From: Aun-Ali Zaidi The eDP link rate reported by the DP_MAX_LINK_RATE dpcd register (0xa) is contradictory to the highest rate supported reported by EDID (0xc = LINK_RATE_RBR2). The effects of this compounded with commit '4a8ca46bae8a ("drm/amd/display: Default max bpc to 16 for eDP")' results in no display modes being found and a dark panel. For now, simply force the maximum supported link rate for the eDP attached 2018 15" Apple Retina panels. Additionally, we must also check the firmware revision since the device ID reported by the DPCD is identical to that of the more capable 16,1, incorrectly quirking it. We also use said firmware check to quirk the refreshed 15,1 models with Vega graphics as they use a slightly newer firmware version. Tested-by: Aun-Ali Zaidi Reviewed-by: Harry Wentland Signed-off-by: Aun-Ali Zaidi Signed-off-by: Aditya Garg --- v2 :- Use C styled comments .../gpu/drm/amd/display/dc/core/dc_link_dp.c | 20 +++ 1 file changed, 20 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 05e216524..086f7ee2c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -5597,6 +5597,26 @@ static bool retrieve_link_cap(struct dc_link *link) dp_hw_fw_revision.ieee_fw_rev, sizeof(dp_hw_fw_revision.ieee_fw_rev)); + /* Quirk for Apple MBP 2018 15" Retina panels: wrong DP_MAX_LINK_RATE */ + { + uint8_t str_mbp_2018[] = { 101, 68, 21, 103, 98, 97 }; + uint8_t fwrev_mbp_2018[] = { 7, 4 }; + uint8_t fwrev_mbp_2018_vega[] = { 8, 4 }; + + /* We also check for the firmware revision as 16,1 models have an +* identical device id and are incorrectly quirked otherwise. +*/ + if ((link->dpcd_caps.sink_dev_id == 0x0010fa) && + !memcmp(link->dpcd_caps.sink_dev_id_str, str_mbp_2018, +sizeof(str_mbp_2018)) && + (!memcmp(link->dpcd_caps.sink_fw_revision, fwrev_mbp_2018, +sizeof(fwrev_mbp_2018)) || + !memcmp(link->dpcd_caps.sink_fw_revision, fwrev_mbp_2018_vega, +sizeof(fwrev_mbp_2018_vega { + link->reported_link_cap.link_rate = LINK_RATE_RBR2; + } + } + memset(>dpcd_caps.dsc_caps, '\0', sizeof(link->dpcd_caps.dsc_caps)); memset(>dpcd_caps.fec_cap, '\0', sizeof(link->dpcd_caps.fec_cap)); -- 2.25.1
[PATCH 2/2] drm/radeon/uvd: Fix forgotten unmap buffer objects
after the buffer object is successfully mapped, call radeon_bo_kunmap before the function returns. Signed-off-by: zhanglianjie diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index 377f9cdb5b53..c5482f7793db 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -497,6 +497,7 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, handle = msg[2]; if (handle == 0) { + radeon_bo_kunmap(bo); DRM_ERROR("Invalid UVD handle!\n"); return -EINVAL; } @@ -559,11 +560,10 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, return 0; default: - DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type); - return -EINVAL; } + radeon_bo_kunmap(bo); BUG(); return -EINVAL; } -- 2.20.1
RE: [PATCH] drm/amdgpu: Add judgement to avoid infinite loop
It have solution to solve this defect, I am debugging the modifications. -Original Message- From: Zhou1, Tao Sent: Saturday, January 29, 2022 3:54 PM To: Chai, Thomas ; amd-gfx@lists.freedesktop.org Cc: Zhang, Hawking ; Clements, John Subject: RE: [PATCH] drm/amdgpu: Add judgement to avoid infinite loop [AMD Official Use Only] For quick workaround, I agree with the solution. But regarding the root cause, the list is still messed up. Can we make ras_list to be a global variable across all cards, and add list empty check (or add a flag to indicate the register status of ras block) before list add to avoid redundant register? Regards, Tao > -Original Message- > From: Chai, Thomas > Sent: Saturday, January 29, 2022 11:53 AM > To: amd-gfx@lists.freedesktop.org > Cc: Chai, Thomas ; Zhang, Hawking > ; Zhou1, Tao ; Clements, > John ; Chai, Thomas > Subject: [PATCH] drm/amdgpu: Add judgement to avoid infinite loop > > 1. The infinite loop causing soft lock occurs on multiple amdgpu cards >supporting ras feature. > 2. This a workaround patch. It is valid for multiple amdgpu cards of the >same type. > 3. The root cause is that each GPU card device has a separate .ras_list >link header, but the instance and linked list node of each ras block >are unique. When each device is initialized, each ras instance will >repeatedly add link node to the device every time. In this way, only >the .ras_list of the last initialized device is completely correct. >the .ras_list->prev and .ras_list->next of the device initialzied >before can still point to the correct ras instance, but the prev >pointer and next pointer of the pointed ras instance both point to >the last initialized device's .ras_ list instead of the beginning >.ras_ list. When using list_for_each_entry_safe searches for >non-existent Ras nodes on devices other than the last device, the >last ras instance next pointer cannot always be equal to the >beginning .ras_list, so that the loop cannot be terminated, the >program enters a infinite loop. > BTW: Since the data and initialization process of each card are the same, > the link list between ras instances will not be destroyed every time > the device is initialized. > 4. The soft locked logs are as follows: > [ 262.165690] CPU: 93 PID: 758 Comm: kworker/93:1 Tainted: G OE > 5.13.0-27-generic #29~20.04.1-Ubuntu > [ 262.165695] Hardware name: Supermicro AS -4124GS-TNR/H12DSG-O-CPU, > BIOS T20200717143848 07/17/2020 [ 262.165698] Workqueue: events > amdgpu_ras_do_recovery [amdgpu] [ 262.165980] RIP: > 0010:amdgpu_ras_get_ras_block+0x86/0xd0 [amdgpu] [ 262.166239] Code: > 68 > d8 4c 8d 71 d8 48 39 c3 74 54 49 8b 45 38 48 85 c0 74 32 44 89 fa 44 > 89 e6 4c 89 ef e8 82 e4 9b dc 85 c0 74 3c 49 8b 46 28 <49> 8d 56 28 4d > 89 f5 48 83 e8 28 48 > 39 d3 74 25 49 89 c6 49 8b 45 [ 262.166243] RSP: > 0018:ac908fa87d80 > EFLAGS: 0202 [ 262.166247] RAX: c1394248 RBX: > 91e4ab8d6e20 > RCX: c1394248 [ 262.166249] RDX: 91e4aa356e20 RSI: > 000e RDI: 91e4ab8c [ 262.166252] RBP: > ac908fa87da8 R08: 0007 R09: 0001 [ > 262.166254] R10: 91e4930b64ec R11: R12: > 000e [ 262.166256] R13: 91e4aa356df8 R14: > c1394320 > R15: 0003 [ 262.166258] FS: () > GS:92238fb4() knlGS: [ 262.166261] CS: > 0010 > DS: ES: CR0: 80050033 [ 262.166264] CR2: > 0001004865d0 CR3: 00406d796000 CR4: 00350ee0 [ > 262.166267] Call Trace: > [ 262.166272] amdgpu_ras_do_recovery+0x130/0x290 [amdgpu] [ > 262.166529] ? psi_task_switch+0xd2/0x250 [ 262.166537] ? > __switch_to+0x11d/0x460 [ 262.166542] ? __switch_to_asm+0x36/0x70 [ > 262.166549] process_one_work+0x220/0x3c0 [ 262.166556] > worker_thread+0x4d/0x3f0 [ 262.166560] ? > process_one_work+0x3c0/0x3c0 [ 262.166563] kthread+0x12b/0x150 [ > 262.166568] ? > set_kthread_struct+0x40/0x40 [ 262.166571] ret_from_fork+0x22/0x30 > > Signed-off-by: yipechai > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 > 1 file changed, 4 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > index d4e07d0acb66..3d533ef0783d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > @@ -884,6 +884,7 @@ static int amdgpu_ras_block_match_default(struct > amdgpu_ras_block_object *block_ static struct amdgpu_ras_block_object > *amdgpu_ras_get_ras_block(struct amdgpu_device *adev, > enum amdgpu_ras_block block, > uint32_t sub_block_index) { > + int loop_cnt = 0; > struct amdgpu_ras_block_object *obj, *tmp; > > if (block >= AMDGPU_RAS_BLOCK__LAST) @@ -900,6 +901,9 @@ static > struct