[Public]
kfd_interrupt_exit needs to break to two parts, because knode->ih_fifo is
still needed by flush ih_wq. Change like this:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index e9cfb80bd436..270459826147 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -655,19 +655,33 @@ static void kfd_cleanup_nodes(struct kfd_dev *kfd,
unsigned int num_nodes)
{
struct kfd_node *knode;
unsigned int i;
+ unsigned long flags;
/*
* flush_work ensures that there are no outstanding
* work-queue items that will access interrupt_ring. New work items
* can't be created because we stopped interrupt handling above.
*/
+ for (i = 0; i < num_nodes; i++) {
+ knode = kfd->nodes[i];
+ spin_lock_irqsave(&knode->interrupt_lock, flags);
+ knode->interrupts_active = false;
+ spin_unlock_irqrestore(&knode->interrupt_lock, flags);
+ }
+
flush_workqueue(kfd->ih_wq);
destroy_workqueue(kfd->ih_wq);
+ for (i = 0; i < num_nodes; i++) {
+ spin_lock_irqsave(&knode->interrupt_lock, flags);
+ knode->kfd->ih_wq = NULL;
+ spin_unlock_irqrestore(&knode->interrupt_lock, flags);
+ kfifo_free(&knode->ih_fifo);
+ }
+
for (i = 0; i < num_nodes; i++) {
knode = kfd->nodes[i];
device_queue_manager_uninit(knode->dqm);
- kfd_interrupt_exit(knode);
kfd_topology_remove_device(knode);
if (knode->gws)
amdgpu_amdkfd_free_gws(knode->adev, knode->gws);
@@ -1125,7 +1139,8 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void
*ih_ring_entry)
unsigned long flags;
struct kfd_node *node;
- if (!kfd->init_complete)
+ if (!kfd->init_complete || !kfd->ih_wq)
return;
if (kfd->device_info.ih_ring_entry_size > sizeof(patched_ihre)) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
index 783c2f5a04e4..be10c0a9d391 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -87,21 +87,6 @@ int kfd_interrupt_init(struct kfd_node *node)
return 0;
}
-void kfd_interrupt_exit(struct kfd_node *node)
-{
- /*
- * Stop the interrupt handler from writing to the ring and scheduling
- * workqueue items. The spinlock ensures that any interrupt running
- * after we have unlocked sees interrupts_active = false.
- */
- unsigned long flags;
-
- spin_lock_irqsave(&node->interrupt_lock, flags);
- node->interrupts_active = false;
- spin_unlock_irqrestore(&node->interrupt_lock, flags);
- kfifo_free(&node->ih_fifo);
-}
-
________________________________
From: Lazar, Lijo <[email protected]>
Sent: Friday, September 26, 2025 2:49 PM
To: Zhang, Yifan <[email protected]>; Yang, Philip <[email protected]>;
[email protected] <[email protected]>
Cc: Deucher, Alexander <[email protected]>; Kuehling, Felix
<[email protected]>
Subject: RE: [PATCH v4 1/2] amd/amdkfd: resolve a race in
amdgpu_amdkfd_device_fini_sw
[Public]
The intention is to let kgd2kfd_interrupt thread know that KFD is done with
interrupt handling and exit at the earliest (that is even without going through
kfd node loop). I was thinking of checking ih_wq NULL value, but since that
value is not under lock, it's not necessary that kgd2kfd_interrupt thread sees
the NULL value immediately.
> if (!kfd->init_complete && !kfd->ih_wq)
I think this should be if (!kfd->ih_wq || !kfd->init_complete)
For this - kfd_interrupt_exit(knode) - it's better to keep it before
flush_workqueue. That indicates no more queueing is allowed from any node.
Thanks,
Lijo
-----Original Message-----
From: Zhang, Yifan <[email protected]>
Sent: Thursday, September 25, 2025 3:25 PM
To: Lazar, Lijo <[email protected]>; Yang, Philip <[email protected]>;
[email protected]
Cc: Deucher, Alexander <[email protected]>; Kuehling, Felix
<[email protected]>
Subject: RE: [PATCH v4 1/2] amd/amdkfd: resolve a race in
amdgpu_amdkfd_device_fini_sw
[Public]
Hi Lijo, Do you mean a change like below ? Besides readability, is there
functional improvement ?
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index e9cfb80bd436..86676acd9cbe 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -663,6 +663,7 @@ static void kfd_cleanup_nodes(struct kfd_dev *kfd, unsigned
int num_nodes)
*/
flush_workqueue(kfd->ih_wq);
destroy_workqueue(kfd->ih_wq);
+ kfd->ih_wq = NULL;
for (i = 0; i < num_nodes; i++) {
knode = kfd->nodes[i];
@@ -1125,7 +1126,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void
*ih_ring_entry)
unsigned long flags;
struct kfd_node *node;
- if (!kfd->init_complete)
+ if (!kfd->init_complete && !kfd->ih_wq)
return;
if (kfd->device_info.ih_ring_entry_size > sizeof(patched_ihre)) {
Regarding the destroy_workqueue(kfd->ih_wq) and queue_work(node->kfd->ih_wq,
&node->interrupt_work) sync, it looks like another issue, should set
node->interrupts_active = false before destroy_workqueue(kfd->ih_wq) is
called. I think something like below:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index e9cfb80bd436..92d9fa99e954 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -662,6 +662,9 @@ static void kfd_cleanup_nodes(struct kfd_dev *kfd, unsigned
int num_nodes)
* can't be created because we stopped interrupt handling above.
*/
flush_workqueue(kfd->ih_wq);
+ for (i = 0; i < num_nodes; i++) {
+ kfd_interrupt_exit(knode);
+ }
destroy_workqueue(kfd->ih_wq);
for (i = 0; i < num_nodes; i++) {
-----Original Message-----
From: Lazar, Lijo <[email protected]>
Sent: Thursday, September 25, 2025 3:06 PM
To: Lazar, Lijo <[email protected]>; Zhang, Yifan <[email protected]>;
Yang, Philip <[email protected]>; [email protected]
Cc: Deucher, Alexander <[email protected]>; Kuehling, Felix
<[email protected]>
Subject: RE: [PATCH v4 1/2] amd/amdkfd: resolve a race in
amdgpu_amdkfd_device_fini_sw
[Public]
On a second thought, probably this will require some locking. For ex: it's
quite possible that destroy_workqueue(kfd->ih_wq) and
queue_work(node->kfd->ih_wq, &node->interrupt_work) could be happening
back-to-back. Node is not yet deleted.
Thanks,
Lijo
-----Original Message-----
From: amd-gfx <[email protected]> On Behalf Of Lazar, Lijo
Sent: Thursday, September 25, 2025 12:29 PM
To: Zhang, Yifan <[email protected]>; Yang, Philip <[email protected]>;
[email protected]
Cc: Deucher, Alexander <[email protected]>; Kuehling, Felix
<[email protected]>
Subject: RE: [PATCH v4 1/2] amd/amdkfd: resolve a race in
amdgpu_amdkfd_device_fini_sw
[Public]
I meant something like this.
destroy_workqueue(kfd->ih_wq);
kfd->ih_wq = NULL;
Then check for NULL at the beginning of kgd2kfd_interrupt. If there is no IH
workqueue, then there is no interrupt handling capability.
May also be within the loop. Not sure if that is really required; if some work
is already scheduled previously, it should be inside flush_workqueue() of
cleanup_nodes.
for (i = 0; kfd->ih_wq && i < kfd->num_nodes; i++)
Thanks,
Lijo
-----Original Message-----
From: Zhang, Yifan <[email protected]>
Sent: Thursday, September 25, 2025 12:11 PM
To: Lazar, Lijo <[email protected]>; Yang, Philip <[email protected]>;
[email protected]
Cc: Deucher, Alexander <[email protected]>; Kuehling, Felix
<[email protected]>
Subject: RE: [PATCH v4 1/2] amd/amdkfd: resolve a race in
amdgpu_amdkfd_device_fini_sw
[Public]
The interrupts are from KGD, still active after flush ih_wq and kfd_dev is
freed. And after knode is freed, node->interrupts_active is also inaccessible.
The race condition is as below:
Interrupt handling switch
partition process
|
|
flush_workqueue(kfd->ih_wq);
|
destroy_workqueue(kfd->ih_wq);
amdgpu_irq_dispatch |
amdgpu_amdkfd_interrupt |
kgd2kfd_interrupt |
|
kfd_cleanup_nodes
| kfree(knode);
spin_lock_irqsave(&node->interrupt_lock, flags); |
//NULL Pointer
-----Original Message-----
From: Lazar, Lijo <[email protected]>
Sent: Thursday, September 25, 2025 2:19 PM
To: Yang, Philip <[email protected]>; Zhang, Yifan <[email protected]>;
[email protected]
Cc: Deucher, Alexander <[email protected]>; Kuehling, Felix
<[email protected]>
Subject: RE: [PATCH v4 1/2] amd/amdkfd: resolve a race in
amdgpu_amdkfd_device_fini_sw
[Public]
> Race if another thread in b/w kfd_cleanup_nodes
This code is there before cleanup of nodes.
flush_workqueue(kfd->ih_wq);
destroy_workqueue(kfd->ih_wq);
Shouldn't the interrupt handling code check if interrupt handling is enabled
rather than checking individual node NULL pointers?
Thanks,
Lijo
-----Original Message-----
From: Yang, Philip <[email protected]>
Sent: Thursday, September 25, 2025 4:19 AM
To: Zhang, Yifan <[email protected]>; [email protected]
Cc: Deucher, Alexander <[email protected]>; Kuehling, Felix
<[email protected]>; Yang, Philip <[email protected]>; Lazar, Lijo
<[email protected]>
Subject: Re: [PATCH v4 1/2] amd/amdkfd: resolve a race in
amdgpu_amdkfd_device_fini_sw
On 2025-09-24 11:29, Yifan Zhang wrote:
> There is race in amdgpu_amdkfd_device_fini_sw and interrupt.
> if amdgpu_amdkfd_device_fini_sw run in b/w kfd_cleanup_nodes and
> kfree(kfd), and KGD interrupt generated.
>
> kernel panic log:
>
> BUG: kernel NULL pointer dereference, address: 0000000000000098 amdgpu
> 0000:c8:00.0: amdgpu: Requesting 4 partitions through PSP
>
> PGD d78c68067 P4D d78c68067
>
> kfd kfd: amdgpu: Allocated 3969056 bytes on gart
>
> PUD 1465b8067 PMD @
>
> Oops: @002 [#1] SMP NOPTI
>
> kfd kfd: amdgpu: Total number of KFD nodes to be created: 4
> CPU: 115 PID: @ Comm: swapper/115 Kdump: loaded Tainted: G S W OE K
>
> RIP: 0010:_raw_spin_lock_irqsave+0x12/0x40
>
> Code: 89 e@ 41 5c c3 cc cc cc cc 66 66 2e Of 1f 84 00 00 00 00 00 OF
> 1f 40 00 Of 1f 44% 00 00 41 54 9c 41 5c fa 31 cO ba 01 00 00 00 <fO>
> OF b1 17 75 Ba 4c 89 e@ 41 Sc
>
> 89 c6 e8 07 38 5d
>
> RSP: 0018: ffffc90@1a6b0e28 EFLAGS: 00010046
>
> RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000018
> 0000000000000001 RSI: ffff8883bb623e00 RDI: 0000000000000098
> ffff8883bb000000 RO8: ffff888100055020 ROO: ffff888100055020
> 0000000000000000 R11: 0000000000000000 R12: 0900000000000002
> ffff888F2b97da0@ R14: @000000000000098 R15: ffff8883babdfo00
>
> CS: 010 DS: 0000 ES: 0000 CRO: 0000000080050033
>
> CR2: 0000000000000098 CR3: 0000000e7cae2006 CR4: 0000000002770ce0
> 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> 0000000000000000 DR6: 00000000fffeO7FO DR7: 0000000000000400
>
> PKRU: 55555554
>
> Call Trace:
>
> <IRQ>
>
> kgd2kfd_interrupt+@x6b/0x1f@ [amdgpu]
>
> ? amdgpu_fence_process+0xa4/0x150 [amdgpu]
>
> kfd kfd: amdgpu: Node: 0, interrupt_bitmap: 3 YcpxFl Rant tErace
>
> amdgpu_irq_dispatch+0x165/0x210 [amdgpu]
>
> amdgpu_ih_process+0x80/0x100 [amdgpu]
>
> amdgpu: Virtual CRAT table created for GPU
>
> amdgpu_irq_handler+0x1f/@x60 [amdgpu]
>
> __handle_irq_event_percpu+0x3d/0x170
>
> amdgpu: Topology: Add dGPU node [0x74a2:0x1002]
>
> handle_irq_event+0x5a/@xcO
>
> handle_edge_irq+0x93/0x240
>
> kfd kfd: amdgpu: KFD node 1 partition @ size 49148M
>
> asm_call_irq_on_stack+0xf/@x20
>
> </IRQ>
>
> common_interrupt+0xb3/0x130
>
> asm_common_interrupt+0x1le/0x40
>
> 5.10.134-010.a1i5000.a18.x86_64 #1
>
> Signed-off-by: Yifan Zhang <[email protected]>
Reviewed-by: Philip Yang<[email protected]>
> ---
> drivers/gpu/drm/amd/amdkfd/kfd_device.c | 10 +++++++++-
> 1 file changed, 9 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 349c351e242b..051a00152b08 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -1133,7 +1133,15 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void
> *ih_ring_entry)
> }
>
> for (i = 0; i < kfd->num_nodes; i++) {
> - node = kfd->nodes[i];
> + /* Race if another thread in b/w
> + * kfd_cleanup_nodes and kfree(kfd),
> + * when kfd->nodes[i] = NULL
> + */
> + if (kfd->nodes[i])
> + node = kfd->nodes[i];
> + else
> + return;
> +
> spin_lock_irqsave(&node->interrupt_lock, flags);
>
> if (node->interrupts_active