[AMD Official Use Only - AMD Internal Distribution Only] -----Original Message----- From: amd-gfx <[email protected]> On Behalf Of YiPeng Chai Sent: Thursday, November 13, 2025 16:42 To: [email protected] Cc: Zhang, Hawking <[email protected]>; Zhou1, Tao <[email protected]>; Li, Candice <[email protected]>; Yang, Stanley <[email protected]>; Su, Joe <[email protected]>; Chai, Thomas <[email protected]> Subject: [PATCH 2/3] drm/amdgpu: Add lock to serialize sriov command execution
Add lock to serialize sriov command execution. Signed-off-by: YiPeng Chai <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 2 ++ drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 17 ++++++++++++----- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index f2ce8f506aa8..47a6ce4fdc74 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -917,6 +917,7 @@ static void amdgpu_virt_init_ras(struct amdgpu_device *adev) RATELIMIT_MSG_ON_RELEASE); mutex_init(&adev->virt.ras.ras_telemetry_mutex); + mutex_init(&adev->virt.access_req_mutex); [kevin]: it seems the "mutex_destroy()" function is missed in this change? this can lead to resource leaks. Btw, all above 2 locks have this kind of issue. Best Regards, Kevin adev->virt.ras.cper_rptr = 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 14d864be5800..8e61cf52c946 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -324,6 +324,8 @@ struct amdgpu_virt { /* Spinlock to protect access to the RLCG register interface */ spinlock_t rlcg_reg_lock; + struct mutex access_req_mutex; + union amd_sriov_ras_caps ras_en_caps; union amd_sriov_ras_caps ras_telemetry_en_caps; struct amdgpu_virt_ras ras; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index cd5b2f07edb8..e7cd07383d56 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -173,13 +173,17 @@ static void xgpu_nv_mailbox_trans_msg (struct amdgpu_device *adev, static int xgpu_nv_send_access_requests_with_param(struct amdgpu_device *adev, enum idh_request req, u32 data1, u32 data2, u32 data3) { - int r, retry = 1; + struct amdgpu_virt *virt = &adev->virt; + int r = 0, retry = 1; enum idh_event event = -1; + mutex_lock(&virt->access_req_mutex); send_request: - if (amdgpu_ras_is_rma(adev)) - return -ENODEV; + if (amdgpu_ras_is_rma(adev)) { + r = -ENODEV; + goto out; + } xgpu_nv_mailbox_trans_msg(adev, req, data1, data2, data3); @@ -217,7 +221,7 @@ static int xgpu_nv_send_access_requests_with_param(struct amdgpu_device *adev, if (req != IDH_REQ_GPU_INIT_DATA) { dev_err(adev->dev, "Doesn't get msg:%d from pf, error=%d\n", event, r); - return r; + goto out; } else /* host doesn't support REQ_GPU_INIT_DATA handshake */ adev->virt.req_init_data_ver = 0; } else { @@ -246,7 +250,10 @@ static int xgpu_nv_send_access_requests_with_param(struct amdgpu_device *adev, } } - return 0; +out: + mutex_unlock(&virt->access_req_mutex); + + return r; } static int xgpu_nv_send_access_requests(struct amdgpu_device *adev, -- 2.34.1
