[AMD Official Use Only - AMD Internal Distribution Only]

Reviewed-by: Hawking Zhang <hawking.zh...@amd.com>

Regards,
Hawking
-----Original Message-----
From: Chai, Thomas <yipeng.c...@amd.com>
Sent: Monday, July 15, 2024 14:04
To: amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking <hawking.zh...@amd.com>; Zhou1, Tao <tao.zh...@amd.com>; Li, 
Candice <candice...@amd.com>; Wang, Yang(Kevin) <kevinyang.w...@amd.com>; Yang, 
Stanley <stanley.y...@amd.com>; Chai, Thomas <yipeng.c...@amd.com>
Subject: [PATCH V2] drm/amdgpu: add mutex to protect ras shared memory

Add mutex to protect ras shared memory.

v2:
  Add TA_RAS_COMMAND__TRIGGER_ERROR command call
  status check.

Signed-off-by: YiPeng Chai <yipeng.c...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c    | 123 ++++++++++++++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h    |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c |   2 +
 3 files changed, 86 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index ba2abcf92d9c..e97a5e0ce208 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -1591,6 +1591,68 @@ static void psp_ras_ta_check_status(struct psp_context 
*psp)
        }
 }

+static int psp_ras_send_cmd(struct psp_context *psp,
+               enum ras_command cmd_id, void *in, void *out) {
+       struct ta_ras_shared_memory *ras_cmd;
+       uint32_t cmd = cmd_id;
+       int ret = 0;
+
+       if (!in)
+               return -EINVAL;
+
+       mutex_lock(&psp->ras_context.mutex);
+       ras_cmd = (struct ta_ras_shared_memory 
*)psp->ras_context.context.mem_context.shared_buf;
+       memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
+
+       switch (cmd) {
+       case TA_RAS_COMMAND__ENABLE_FEATURES:
+       case TA_RAS_COMMAND__DISABLE_FEATURES:
+               memcpy(&ras_cmd->ras_in_message,
+                       in, sizeof(ras_cmd->ras_in_message));
+               break;
+       case TA_RAS_COMMAND__TRIGGER_ERROR:
+               memcpy(&ras_cmd->ras_in_message.trigger_error,
+                       in, sizeof(ras_cmd->ras_in_message.trigger_error));
+               break;
+       case TA_RAS_COMMAND__QUERY_ADDRESS:
+               memcpy(&ras_cmd->ras_in_message.address,
+                       in, sizeof(ras_cmd->ras_in_message.address));
+               break;
+       default:
+               dev_err(psp->adev->dev, "Invalid ras cmd id: %u\n", cmd);
+               ret = -EINVAL;
+               goto err_out;
+       }
+
+       ras_cmd->cmd_id = cmd;
+       ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
+
+       switch (cmd) {
+       case TA_RAS_COMMAND__TRIGGER_ERROR:
+               if (ret || psp->cmd_buf_mem->resp.status)
+                       ret = -EINVAL;
+               else if (out)
+                       memcpy(out, &ras_cmd->ras_status, 
sizeof(ras_cmd->ras_status));
+               break;
+       case TA_RAS_COMMAND__QUERY_ADDRESS:
+               if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status)
+                       ret = -EINVAL;
+               else if (out)
+                       memcpy(out,
+                               &ras_cmd->ras_out_message.address,
+                               sizeof(ras_cmd->ras_out_message.address));
+               break;
+       default:
+               break;
+       }
+
+err_out:
+       mutex_unlock(&psp->ras_context.mutex);
+
+       return ret;
+}
+
 int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)  {
        struct ta_ras_shared_memory *ras_cmd;
@@ -1632,23 +1694,15 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t 
ta_cmd_id)  int psp_ras_enable_features(struct psp_context *psp,
                union ta_ras_cmd_input *info, bool enable)  {
-       struct ta_ras_shared_memory *ras_cmd;
+       enum ras_command cmd_id;
        int ret;

-       if (!psp->ras_context.context.initialized)
+       if (!psp->ras_context.context.initialized || !info)
                return -EINVAL;

-       ras_cmd = (struct ta_ras_shared_memory 
*)psp->ras_context.context.mem_context.shared_buf;
-       memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
-
-       if (enable)
-               ras_cmd->cmd_id = TA_RAS_COMMAND__ENABLE_FEATURES;
-       else
-               ras_cmd->cmd_id = TA_RAS_COMMAND__DISABLE_FEATURES;
-
-       ras_cmd->ras_in_message = *info;
-
-       ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
+       cmd_id = enable ?
+               TA_RAS_COMMAND__ENABLE_FEATURES : 
TA_RAS_COMMAND__DISABLE_FEATURES;
+       ret = psp_ras_send_cmd(psp, cmd_id, info, NULL);
        if (ret)
                return -EINVAL;

@@ -1672,6 +1726,8 @@ int psp_ras_terminate(struct psp_context *psp)

        psp->ras_context.context.initialized = false;

+       mutex_destroy(&psp->ras_context.mutex);
+
        return ret;
 }

@@ -1756,9 +1812,10 @@ int psp_ras_initialize(struct psp_context *psp)

        ret = psp_ta_load(psp, &psp->ras_context.context);

-       if (!ret && !ras_cmd->ras_status)
+       if (!ret && !ras_cmd->ras_status) {
                psp->ras_context.context.initialized = true;
-       else {
+               mutex_init(&psp->ras_context.mutex);
+       } else {
                if (ras_cmd->ras_status)
                        dev_warn(adev->dev, "RAS Init Status: 0x%X\n", 
ras_cmd->ras_status);

@@ -1772,12 +1829,12 @@ int psp_ras_initialize(struct psp_context *psp)  int 
psp_ras_trigger_error(struct psp_context *psp,
                          struct ta_ras_trigger_error_input *info, uint32_t 
instance_mask)  {
-       struct ta_ras_shared_memory *ras_cmd;
        struct amdgpu_device *adev = psp->adev;
        int ret;
        uint32_t dev_mask;
+       uint32_t ras_status = 0;

-       if (!psp->ras_context.context.initialized)
+       if (!psp->ras_context.context.initialized || !info)
                return -EINVAL;

        switch (info->block_id) {
@@ -1801,13 +1858,8 @@ int psp_ras_trigger_error(struct psp_context *psp,
        dev_mask &= AMDGPU_RAS_INST_MASK;
        info->sub_block_index |= dev_mask;

-       ras_cmd = (struct ta_ras_shared_memory 
*)psp->ras_context.context.mem_context.shared_buf;
-       memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
-
-       ras_cmd->cmd_id = TA_RAS_COMMAND__TRIGGER_ERROR;
-       ras_cmd->ras_in_message.trigger_error = *info;
-
-       ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
+       ret = psp_ras_send_cmd(psp,
+                       TA_RAS_COMMAND__TRIGGER_ERROR, info, &ras_status);
        if (ret)
                return -EINVAL;

@@ -1817,9 +1869,9 @@ int psp_ras_trigger_error(struct psp_context *psp,
        if (amdgpu_ras_intr_triggered())
                return 0;

-       if (ras_cmd->ras_status == TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED)
+       if (ras_status == TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED)
                return -EACCES;
-       else if (ras_cmd->ras_status)
+       else if (ras_status)
                return -EINVAL;

        return 0;
@@ -1829,25 +1881,16 @@ int psp_ras_query_address(struct psp_context *psp,
                          struct ta_ras_query_address_input *addr_in,
                          struct ta_ras_query_address_output *addr_out)  {
-       struct ta_ras_shared_memory *ras_cmd;
        int ret;

-       if (!psp->ras_context.context.initialized)
-               return -EINVAL;
-
-       ras_cmd = (struct ta_ras_shared_memory 
*)psp->ras_context.context.mem_context.shared_buf;
-       memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
-
-       ras_cmd->cmd_id = TA_RAS_COMMAND__QUERY_ADDRESS;
-       ras_cmd->ras_in_message.address = *addr_in;
-
-       ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
-       if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status)
+       if (!psp->ras_context.context.initialized ||
+               !addr_in || !addr_out)
                return -EINVAL;

-       *addr_out = ras_cmd->ras_out_message.address;
+       ret = psp_ras_send_cmd(psp,
+                       TA_RAS_COMMAND__QUERY_ADDRESS, addr_in, addr_out);

-       return 0;
+       return ret;
 }
 // ras end

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 3635303e6548..74a96516c913 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -200,6 +200,7 @@ struct psp_xgmi_context {  struct psp_ras_context {
        struct ta_context               context;
        struct amdgpu_ras               *ras;
+       struct mutex                    mutex;
 };

 #define MEM_TRAIN_SYSTEM_SIGNATURE             0x54534942
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
index 8e8afbd237bc..0c856005df6b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
@@ -348,6 +348,7 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, 
const char *buf, size

        context->session_id = ta_id;

+       mutex_lock(&psp->ras_context.mutex);
        ret = prep_ta_mem_context(&context->mem_context, shared_buf, 
shared_buf_len);
        if (ret)
                goto err_free_shared_buf;
@@ -366,6 +367,7 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, 
const char *buf, size
                ret = -EFAULT;

 err_free_shared_buf:
+       mutex_unlock(&psp->ras_context.mutex);
        kfree(shared_buf);

        return ret;
--
2.34.1

Reply via email to