RE: [PATCH] drm/amdgpu: add mutex to protect ras shared memory

2024-07-12 Thread Zhang, Hawking
[AMD Official Use Only - AMD Internal Distribution Only]

BTW, can we consider leveraging psp->mutext to protect both shared memory and 
command submission?

Regards,
Hawking

-Original Message-
From: Chai, Thomas 
Sent: Friday, June 28, 2024 18:01
To: Zhang, Hawking ; amd-gfx@lists.freedesktop.org
Cc: Zhou1, Tao ; Li, Candice ; Wang, 
Yang(Kevin) ; Yang, Stanley 
Subject: RE: [PATCH] drm/amdgpu: add mutex to protect ras shared memory

[AMD Official Use Only - AMD Internal Distribution Only]

OK


-
Best Regards,
Thomas

-Original Message-
From: Zhang, Hawking 
Sent: Friday, June 28, 2024 5:42 PM
To: Chai, Thomas ; amd-gfx@lists.freedesktop.org
Cc: Zhou1, Tao ; Li, Candice ; Wang, 
Yang(Kevin) ; Yang, Stanley ; 
Chai, Thomas 
Subject: RE: [PATCH] drm/amdgpu: add mutex to protect ras shared memory

[AMD Official Use Only - AMD Internal Distribution Only]

+   ret = psp_ras_invoke(psp, ras_cmd->cmd_id);

+   case TA_RAS_COMMAND__TRIGGER_ERROR:
+   if (out) {
+   uint32_t *ras_status = (uint32_t *)out;
+
+   *ras_status = ras_cmd->ras_status;
+   }
+   break;
+   case TA_RAS_COMMAND__QUERY_ADDRESS:
+   if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status)
+   ret = -EINVAL;

Shall we make the following check for both TRIGGER_ERROR and QUERY_ADDRESS 
command?

+   if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status)
+   ret = -EINVAL;

Regards,
Hawking
-Original Message-
From: amd-gfx  On Behalf Of YiPeng Chai
Sent: Wednesday, June 26, 2024 10:20
To: amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking ; Zhou1, Tao ; Li, 
Candice ; Wang, Yang(Kevin) ; Yang, 
Stanley ; Chai, Thomas 
Subject: [PATCH] drm/amdgpu: add mutex to protect ras shared memory

Add mutex to protect ras shared memory.

Signed-off-by: YiPeng Chai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c| 124 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h|   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c |   2 +
 3 files changed, 87 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index fec968d8340f..323b3c470c5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -1573,6 +1573,69 @@ static void psp_ras_ta_check_status(struct psp_context 
*psp)
}
 }

+static int psp_ras_send_cmd(struct psp_context *psp,
+   enum ras_command cmd_id, void *in, void *out) {
+   struct ta_ras_shared_memory *ras_cmd;
+   uint32_t cmd = cmd_id;
+   int ret = 0;
+
+   if (!in)
+   return -EINVAL;
+
+   mutex_lock(>ras_context.mutex);
+   ras_cmd = (struct ta_ras_shared_memory 
*)psp->ras_context.context.mem_context.shared_buf;
+   memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
+
+   switch (cmd) {
+   case TA_RAS_COMMAND__ENABLE_FEATURES:
+   case TA_RAS_COMMAND__DISABLE_FEATURES:
+   memcpy(_cmd->ras_in_message,
+   in, sizeof(ras_cmd->ras_in_message));
+   break;
+   case TA_RAS_COMMAND__TRIGGER_ERROR:
+   memcpy(_cmd->ras_in_message.trigger_error,
+   in, sizeof(ras_cmd->ras_in_message.trigger_error));
+   break;
+   case TA_RAS_COMMAND__QUERY_ADDRESS:
+   memcpy(_cmd->ras_in_message.address,
+   in, sizeof(ras_cmd->ras_in_message.address));
+   break;
+   default:
+   dev_err(psp->adev->dev, "Invalid ras cmd id: %u\n", cmd);
+   ret = -EINVAL;
+   goto err_out;
+   }
+
+   ras_cmd->cmd_id = cmd;
+   ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
+
+   switch (cmd) {
+   case TA_RAS_COMMAND__TRIGGER_ERROR:
+   if (out) {
+   uint32_t *ras_status = (uint32_t *)out;
+
+   *ras_status = ras_cmd->ras_status;
+   }
+   break;
+   case TA_RAS_COMMAND__QUERY_ADDRESS:
+   if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status)
+   ret = -EINVAL;
+   else if (out)
+   memcpy(out,
+   _cmd->ras_out_message.address,
+   sizeof(ras_cmd->ras_out_message.address));
+   break;
+   default:
+   break;
+   }
+
+err_out:
+   mutex_unlock(>ras_context.mutex);
+
+   return ret;
+}
+
 int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)  {
struct ta_ras_shared_memory *ras_cmd; @@ -1614,23 +1677,15 @@ int 
psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)  int 
psp_ras_enable_features(struct 

RE: [PATCH] drm/amdgpu: add mutex to protect ras shared memory

2024-06-28 Thread Chai, Thomas
[AMD Official Use Only - AMD Internal Distribution Only]

OK


-
Best Regards,
Thomas

-Original Message-
From: Zhang, Hawking 
Sent: Friday, June 28, 2024 5:42 PM
To: Chai, Thomas ; amd-gfx@lists.freedesktop.org
Cc: Zhou1, Tao ; Li, Candice ; Wang, 
Yang(Kevin) ; Yang, Stanley ; 
Chai, Thomas 
Subject: RE: [PATCH] drm/amdgpu: add mutex to protect ras shared memory

[AMD Official Use Only - AMD Internal Distribution Only]

+   ret = psp_ras_invoke(psp, ras_cmd->cmd_id);

+   case TA_RAS_COMMAND__TRIGGER_ERROR:
+   if (out) {
+   uint32_t *ras_status = (uint32_t *)out;
+
+   *ras_status = ras_cmd->ras_status;
+   }
+   break;
+   case TA_RAS_COMMAND__QUERY_ADDRESS:
+   if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status)
+   ret = -EINVAL;

Shall we make the following check for both TRIGGER_ERROR and QUERY_ADDRESS 
command?

+   if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status)
+   ret = -EINVAL;

Regards,
Hawking
-Original Message-
From: amd-gfx  On Behalf Of YiPeng Chai
Sent: Wednesday, June 26, 2024 10:20
To: amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking ; Zhou1, Tao ; Li, 
Candice ; Wang, Yang(Kevin) ; Yang, 
Stanley ; Chai, Thomas 
Subject: [PATCH] drm/amdgpu: add mutex to protect ras shared memory

Add mutex to protect ras shared memory.

Signed-off-by: YiPeng Chai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c| 124 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h|   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c |   2 +
 3 files changed, 87 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index fec968d8340f..323b3c470c5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -1573,6 +1573,69 @@ static void psp_ras_ta_check_status(struct psp_context 
*psp)
}
 }

+static int psp_ras_send_cmd(struct psp_context *psp,
+   enum ras_command cmd_id, void *in, void *out) {
+   struct ta_ras_shared_memory *ras_cmd;
+   uint32_t cmd = cmd_id;
+   int ret = 0;
+
+   if (!in)
+   return -EINVAL;
+
+   mutex_lock(>ras_context.mutex);
+   ras_cmd = (struct ta_ras_shared_memory 
*)psp->ras_context.context.mem_context.shared_buf;
+   memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
+
+   switch (cmd) {
+   case TA_RAS_COMMAND__ENABLE_FEATURES:
+   case TA_RAS_COMMAND__DISABLE_FEATURES:
+   memcpy(_cmd->ras_in_message,
+   in, sizeof(ras_cmd->ras_in_message));
+   break;
+   case TA_RAS_COMMAND__TRIGGER_ERROR:
+   memcpy(_cmd->ras_in_message.trigger_error,
+   in, sizeof(ras_cmd->ras_in_message.trigger_error));
+   break;
+   case TA_RAS_COMMAND__QUERY_ADDRESS:
+   memcpy(_cmd->ras_in_message.address,
+   in, sizeof(ras_cmd->ras_in_message.address));
+   break;
+   default:
+   dev_err(psp->adev->dev, "Invalid ras cmd id: %u\n", cmd);
+   ret = -EINVAL;
+   goto err_out;
+   }
+
+   ras_cmd->cmd_id = cmd;
+   ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
+
+   switch (cmd) {
+   case TA_RAS_COMMAND__TRIGGER_ERROR:
+   if (out) {
+   uint32_t *ras_status = (uint32_t *)out;
+
+   *ras_status = ras_cmd->ras_status;
+   }
+   break;
+   case TA_RAS_COMMAND__QUERY_ADDRESS:
+   if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status)
+   ret = -EINVAL;
+   else if (out)
+   memcpy(out,
+   _cmd->ras_out_message.address,
+   sizeof(ras_cmd->ras_out_message.address));
+   break;
+   default:
+   break;
+   }
+
+err_out:
+   mutex_unlock(>ras_context.mutex);
+
+   return ret;
+}
+
 int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)  {
struct ta_ras_shared_memory *ras_cmd; @@ -1614,23 +1677,15 @@ int 
psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)  int 
psp_ras_enable_features(struct psp_context *psp,
union ta_ras_cmd_input *info, bool enable)  {
-   struct ta_ras_shared_memory *ras_cmd;
+   enum ras_command cmd_id;
int ret;

-   if (!psp->ras_context.context.initialized)
+   if (!psp->ras_context.context.initialized || !info)
return -EINVAL;

-   ras_cmd = (struct ta_ras_shared_memory 
*)psp->ras_context.context.mem_context.shared_buf;
-   memset(ras

RE: [PATCH] drm/amdgpu: add mutex to protect ras shared memory

2024-06-28 Thread Zhang, Hawking
[AMD Official Use Only - AMD Internal Distribution Only]

+   ret = psp_ras_invoke(psp, ras_cmd->cmd_id);

+   case TA_RAS_COMMAND__TRIGGER_ERROR:
+   if (out) {
+   uint32_t *ras_status = (uint32_t *)out;
+
+   *ras_status = ras_cmd->ras_status;
+   }
+   break;
+   case TA_RAS_COMMAND__QUERY_ADDRESS:
+   if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status)
+   ret = -EINVAL;

Shall we make the following check for both TRIGGER_ERROR and QUERY_ADDRESS 
command?

+   if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status)
+   ret = -EINVAL;

Regards,
Hawking
-Original Message-
From: amd-gfx  On Behalf Of YiPeng Chai
Sent: Wednesday, June 26, 2024 10:20
To: amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking ; Zhou1, Tao ; Li, 
Candice ; Wang, Yang(Kevin) ; Yang, 
Stanley ; Chai, Thomas 
Subject: [PATCH] drm/amdgpu: add mutex to protect ras shared memory

Add mutex to protect ras shared memory.

Signed-off-by: YiPeng Chai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c| 124 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h|   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c |   2 +
 3 files changed, 87 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index fec968d8340f..323b3c470c5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -1573,6 +1573,69 @@ static void psp_ras_ta_check_status(struct psp_context 
*psp)
}
 }

+static int psp_ras_send_cmd(struct psp_context *psp,
+   enum ras_command cmd_id, void *in, void *out) {
+   struct ta_ras_shared_memory *ras_cmd;
+   uint32_t cmd = cmd_id;
+   int ret = 0;
+
+   if (!in)
+   return -EINVAL;
+
+   mutex_lock(>ras_context.mutex);
+   ras_cmd = (struct ta_ras_shared_memory 
*)psp->ras_context.context.mem_context.shared_buf;
+   memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
+
+   switch (cmd) {
+   case TA_RAS_COMMAND__ENABLE_FEATURES:
+   case TA_RAS_COMMAND__DISABLE_FEATURES:
+   memcpy(_cmd->ras_in_message,
+   in, sizeof(ras_cmd->ras_in_message));
+   break;
+   case TA_RAS_COMMAND__TRIGGER_ERROR:
+   memcpy(_cmd->ras_in_message.trigger_error,
+   in, sizeof(ras_cmd->ras_in_message.trigger_error));
+   break;
+   case TA_RAS_COMMAND__QUERY_ADDRESS:
+   memcpy(_cmd->ras_in_message.address,
+   in, sizeof(ras_cmd->ras_in_message.address));
+   break;
+   default:
+   dev_err(psp->adev->dev, "Invalid ras cmd id: %u\n", cmd);
+   ret = -EINVAL;
+   goto err_out;
+   }
+
+   ras_cmd->cmd_id = cmd;
+   ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
+
+   switch (cmd) {
+   case TA_RAS_COMMAND__TRIGGER_ERROR:
+   if (out) {
+   uint32_t *ras_status = (uint32_t *)out;
+
+   *ras_status = ras_cmd->ras_status;
+   }
+   break;
+   case TA_RAS_COMMAND__QUERY_ADDRESS:
+   if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status)
+   ret = -EINVAL;
+   else if (out)
+   memcpy(out,
+   _cmd->ras_out_message.address,
+   sizeof(ras_cmd->ras_out_message.address));
+   break;
+   default:
+   break;
+   }
+
+err_out:
+   mutex_unlock(>ras_context.mutex);
+
+   return ret;
+}
+
 int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)  {
struct ta_ras_shared_memory *ras_cmd;
@@ -1614,23 +1677,15 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t 
ta_cmd_id)  int psp_ras_enable_features(struct psp_context *psp,
union ta_ras_cmd_input *info, bool enable)  {
-   struct ta_ras_shared_memory *ras_cmd;
+   enum ras_command cmd_id;
int ret;

-   if (!psp->ras_context.context.initialized)
+   if (!psp->ras_context.context.initialized || !info)
return -EINVAL;

-   ras_cmd = (struct ta_ras_shared_memory 
*)psp->ras_context.context.mem_context.shared_buf;
-   memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
-
-   if (enable)
-   ras_cmd->cmd_id = TA_RAS_COMMAND__ENABLE_FEATURES;
-   else
-   ras_cmd->cmd_id = TA_RAS_COMMAND__DISABLE_FEATURES;
-
-   ras_cmd->ras_in_message = *info;
-
-   ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
+   cmd_id = enable ?
+   TA_RAS_COMMAND__ENABLE_FEATUR

[PATCH] drm/amdgpu: add mutex to protect ras shared memory

2024-06-25 Thread YiPeng Chai
Add mutex to protect ras shared memory.

Signed-off-by: YiPeng Chai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c| 124 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h|   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c |   2 +
 3 files changed, 87 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index fec968d8340f..323b3c470c5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -1573,6 +1573,69 @@ static void psp_ras_ta_check_status(struct psp_context 
*psp)
}
 }
 
+static int psp_ras_send_cmd(struct psp_context *psp,
+   enum ras_command cmd_id, void *in, void *out)
+{
+   struct ta_ras_shared_memory *ras_cmd;
+   uint32_t cmd = cmd_id;
+   int ret = 0;
+
+   if (!in)
+   return -EINVAL;
+
+   mutex_lock(>ras_context.mutex);
+   ras_cmd = (struct ta_ras_shared_memory 
*)psp->ras_context.context.mem_context.shared_buf;
+   memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
+
+   switch (cmd) {
+   case TA_RAS_COMMAND__ENABLE_FEATURES:
+   case TA_RAS_COMMAND__DISABLE_FEATURES:
+   memcpy(_cmd->ras_in_message,
+   in, sizeof(ras_cmd->ras_in_message));
+   break;
+   case TA_RAS_COMMAND__TRIGGER_ERROR:
+   memcpy(_cmd->ras_in_message.trigger_error,
+   in, sizeof(ras_cmd->ras_in_message.trigger_error));
+   break;
+   case TA_RAS_COMMAND__QUERY_ADDRESS:
+   memcpy(_cmd->ras_in_message.address,
+   in, sizeof(ras_cmd->ras_in_message.address));
+   break;
+   default:
+   dev_err(psp->adev->dev, "Invalid ras cmd id: %u\n", cmd);
+   ret = -EINVAL;
+   goto err_out;
+   }
+
+   ras_cmd->cmd_id = cmd;
+   ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
+
+   switch (cmd) {
+   case TA_RAS_COMMAND__TRIGGER_ERROR:
+   if (out) {
+   uint32_t *ras_status = (uint32_t *)out;
+
+   *ras_status = ras_cmd->ras_status;
+   }
+   break;
+   case TA_RAS_COMMAND__QUERY_ADDRESS:
+   if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status)
+   ret = -EINVAL;
+   else if (out)
+   memcpy(out,
+   _cmd->ras_out_message.address,
+   sizeof(ras_cmd->ras_out_message.address));
+   break;
+   default:
+   break;
+   }
+
+err_out:
+   mutex_unlock(>ras_context.mutex);
+
+   return ret;
+}
+
 int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
 {
struct ta_ras_shared_memory *ras_cmd;
@@ -1614,23 +1677,15 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t 
ta_cmd_id)
 int psp_ras_enable_features(struct psp_context *psp,
union ta_ras_cmd_input *info, bool enable)
 {
-   struct ta_ras_shared_memory *ras_cmd;
+   enum ras_command cmd_id;
int ret;
 
-   if (!psp->ras_context.context.initialized)
+   if (!psp->ras_context.context.initialized || !info)
return -EINVAL;
 
-   ras_cmd = (struct ta_ras_shared_memory 
*)psp->ras_context.context.mem_context.shared_buf;
-   memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
-
-   if (enable)
-   ras_cmd->cmd_id = TA_RAS_COMMAND__ENABLE_FEATURES;
-   else
-   ras_cmd->cmd_id = TA_RAS_COMMAND__DISABLE_FEATURES;
-
-   ras_cmd->ras_in_message = *info;
-
-   ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
+   cmd_id = enable ?
+   TA_RAS_COMMAND__ENABLE_FEATURES : 
TA_RAS_COMMAND__DISABLE_FEATURES;
+   ret = psp_ras_send_cmd(psp, cmd_id, info, NULL);
if (ret)
return -EINVAL;
 
@@ -1654,6 +1709,8 @@ int psp_ras_terminate(struct psp_context *psp)
 
psp->ras_context.context.initialized = false;
 
+   mutex_destroy(>ras_context.mutex);
+
return ret;
 }
 
@@ -1738,9 +1795,10 @@ int psp_ras_initialize(struct psp_context *psp)
 
ret = psp_ta_load(psp, >ras_context.context);
 
-   if (!ret && !ras_cmd->ras_status)
+   if (!ret && !ras_cmd->ras_status) {
psp->ras_context.context.initialized = true;
-   else {
+   mutex_init(>ras_context.mutex);
+   } else {
if (ras_cmd->ras_status)
dev_warn(adev->dev, "RAS Init Status: 0x%X\n", 
ras_cmd->ras_status);
 
@@ -1754,12 +1812,12 @@ int psp_ras_initialize(struct psp_context *psp)
 int psp_ras_trigger_error(struct psp_context *psp,
  struct ta_ras_trigger_error_input *info, uint32_t 
instance_mask)
 {
-   struct ta_ras_shared_memory *ras_cmd;
struct amdgpu_device *adev = psp->adev;

Re: [PATCH] drm/amdgpu: add mutex to protect ras shared memory

2024-04-29 Thread Lazar, Lijo



On 4/28/2024 12:38 PM, YiPeng Chai wrote:
> Add mutex to protect ras shared memory.
> 
> Signed-off-by: YiPeng Chai 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c| 121 ++---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h|   1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c |   2 +
>  3 files changed, 84 insertions(+), 40 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> index 5583e2d1b12f..fa4fea00f6b4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> @@ -1564,6 +1564,66 @@ static void psp_ras_ta_check_status(struct psp_context 
> *psp)
>   }
>  }
>  
> +static int psp_ras_send_cmd(struct psp_context *psp,
> + enum ras_command cmd_id, void *in, void *out)
> +{
> + struct ta_ras_shared_memory *ras_cmd;
> + uint32_t cmd = cmd_id;
> + int ret = 0;
> +
> + mutex_lock(>ras_context.mutex);
> + ras_cmd = (struct ta_ras_shared_memory 
> *)psp->ras_context.context.mem_context.shared_buf;
> + memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
> +
> + switch (cmd) {
> + case TA_RAS_COMMAND__ENABLE_FEATURES:
> + case TA_RAS_COMMAND__DISABLE_FEATURES:
> + memcpy(_cmd->ras_in_message,
> + in, sizeof(ras_cmd->ras_in_message));
> + break;
> + case TA_RAS_COMMAND__TRIGGER_ERROR:
> + memcpy(_cmd->ras_in_message.trigger_error,
> + in, sizeof(ras_cmd->ras_in_message.trigger_error));
> + break;
> + case TA_RAS_COMMAND__QUERY_ADDRESS:
> + memcpy(_cmd->ras_in_message.address,
> + in, sizeof(ras_cmd->ras_in_message.address));
> + break;
> + default:
> + dev_err(psp->adev->dev, "Invalid ras cmd id: %u\n", cmd);
> + ret = -EINVAL;
> + goto err_out;
> + }
> +
> + ras_cmd->cmd_id = cmd;
> + ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
> +
> + switch (cmd) {
> + case TA_RAS_COMMAND__TRIGGER_ERROR:
> + if (out) {

As NULL check for 'out' is done before copying, better to do the same
for 'in' also.
> + uint32_t *ras_status = (uint32_t *)out;
> +
> + *ras_status = ras_cmd->ras_status;
> + }
> + break;
> + case TA_RAS_COMMAND__QUERY_ADDRESS:
> + if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status)
> + ret = -EINVAL;
> + else if (out)
> + memcpy(out,
> + _cmd->ras_out_message.address,
> + sizeof(ras_cmd->ras_out_message.address));
> + break;
> + default:
> + break;
> + }
> +
> +err_out:
> + mutex_unlock(>ras_context.mutex);
> +
> + return ret;
> +}
> +
>  int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
>  {
>   struct ta_ras_shared_memory *ras_cmd;
> @@ -1605,23 +1665,15 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t 
> ta_cmd_id)
>  int psp_ras_enable_features(struct psp_context *psp,
>   union ta_ras_cmd_input *info, bool enable)
>  {
> - struct ta_ras_shared_memory *ras_cmd;
> + enum ras_command cmd_id;
>   int ret;
>  
> - if (!psp->ras_context.context.initialized)
> + if (!psp->ras_context.context.initialized || !info)
>   return -EINVAL;
>  
> - ras_cmd = (struct ta_ras_shared_memory 
> *)psp->ras_context.context.mem_context.shared_buf;
> - memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
> -
> - if (enable)
> - ras_cmd->cmd_id = TA_RAS_COMMAND__ENABLE_FEATURES;
> - else
> - ras_cmd->cmd_id = TA_RAS_COMMAND__DISABLE_FEATURES;
> -
> - ras_cmd->ras_in_message = *info;
> -
> - ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
> + cmd_id = enable ?
> + TA_RAS_COMMAND__ENABLE_FEATURES : 
> TA_RAS_COMMAND__DISABLE_FEATURES;
> + ret = psp_ras_send_cmd(psp, cmd_id, info, NULL);
>   if (ret)
>   return -EINVAL;
>  
> @@ -1645,6 +1697,8 @@ int psp_ras_terminate(struct psp_context *psp)
>  
>   psp->ras_context.context.initialized = false;
>  
> + mutex_destroy(>ras_context.mutex);
> +
>   return ret;
>  }
>  
> @@ -1729,9 +1783,10 @@ int psp_ras_initialize(struct psp_context *psp)
>  
>   ret = psp_ta_load(psp, >ras_context.context);
>  
> - if (!ret && !ras_cmd->ras_status)
> + if (!ret && !ras_cmd->ras_status) {
>   psp->ras_context.context.initialized = true;
> - else {
> + mutex_init(>ras_context.mutex);
> + } else {
>   if (ras_cmd->ras_status)
>   dev_warn(adev->dev, "RAS Init Status: 0x%X\n", 
> ras_cmd->ras_status);
>  
> @@ -1745,12 +1800,12 @@ int psp_ras_initialize(struct psp_context *psp)
>  int psp_ras_trigger_error(struct psp_context *psp,
> 

RE: [PATCH] drm/amdgpu: add mutex to protect ras shared memory

2024-04-28 Thread Chai, Thomas
[AMD Official Use Only - General]

OK


-
Best Regards,
Thomas

-Original Message-
From: Wang, Yang(Kevin) 
Sent: Sunday, April 28, 2024 3:48 PM
To: Chai, Thomas ; amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking ; Zhou1, Tao ; Li, 
Candice ; Yang, Stanley 
Subject: RE: [PATCH] drm/amdgpu: add mutex to protect ras shared memory

[AMD Official Use Only - General]

-Original Message-
From: Chai, Thomas 
Sent: Sunday, April 28, 2024 3:08 PM
To: amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking ; Zhou1, Tao ; Li, 
Candice ; Wang, Yang(Kevin) ; Yang, 
Stanley ; Chai, Thomas 
Subject: [PATCH] drm/amdgpu: add mutex to protect ras shared memory

Add mutex to protect ras shared memory.

Signed-off-by: YiPeng Chai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c| 121 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h|   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c |   2 +
 3 files changed, 84 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 5583e2d1b12f..fa4fea00f6b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -1564,6 +1564,66 @@ static void psp_ras_ta_check_status(struct psp_context 
*psp)
}
 }

+static int psp_ras_send_cmd(struct psp_context *psp,
+   enum ras_command cmd_id, void *in, void *out) {
+   struct ta_ras_shared_memory *ras_cmd;
+   uint32_t cmd = cmd_id;
+   int ret = 0;
+
+   mutex_lock(>ras_context.mutex);
+   ras_cmd = (struct ta_ras_shared_memory 
*)psp->ras_context.context.mem_context.shared_buf;
+   memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
+
+   switch (cmd) {
+   case TA_RAS_COMMAND__ENABLE_FEATURES:
+   case TA_RAS_COMMAND__DISABLE_FEATURES:
+   memcpy(_cmd->ras_in_message,
+   in, sizeof(ras_cmd->ras_in_message));
+   break;
+   case TA_RAS_COMMAND__TRIGGER_ERROR:
+   memcpy(_cmd->ras_in_message.trigger_error,
+   in, sizeof(ras_cmd->ras_in_message.trigger_error));
+   break;
+   case TA_RAS_COMMAND__QUERY_ADDRESS:
+   memcpy(_cmd->ras_in_message.address,
+   in, sizeof(ras_cmd->ras_in_message.address));
+   break;
+   default:
+   dev_err(psp->adev->dev, "Invalid ras cmd id: %u\n", cmd);
+   ret = -EINVAL;
+   goto err_out;
+   }
+
+   ras_cmd->cmd_id = cmd;
+   ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
+
+   switch (cmd) {
+   case TA_RAS_COMMAND__TRIGGER_ERROR:
+   if (out) {
+   uint32_t *ras_status = (uint32_t *)out;
[Kevin]:
It's better to check 'ret' value first before use this 'out' data.

Best Regards,
Kevin
+
+   *ras_status = ras_cmd->ras_status;
+   }
+   break;
+   case TA_RAS_COMMAND__QUERY_ADDRESS:
+   if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status)
+   ret = -EINVAL;
+   else if (out)
+   memcpy(out,
+   _cmd->ras_out_message.address,
+   sizeof(ras_cmd->ras_out_message.address));
+   break;
+   default:
+   break;
+   }
+
+err_out:
+   mutex_unlock(>ras_context.mutex);
+
+   return ret;
+}
+
 int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)  {
struct ta_ras_shared_memory *ras_cmd; @@ -1605,23 +1665,15 @@ int 
psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)  int 
psp_ras_enable_features(struct psp_context *psp,
union ta_ras_cmd_input *info, bool enable)  {
-   struct ta_ras_shared_memory *ras_cmd;
+   enum ras_command cmd_id;
int ret;

-   if (!psp->ras_context.context.initialized)
+   if (!psp->ras_context.context.initialized || !info)
return -EINVAL;

-   ras_cmd = (struct ta_ras_shared_memory 
*)psp->ras_context.context.mem_context.shared_buf;
-   memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
-
-   if (enable)
-   ras_cmd->cmd_id = TA_RAS_COMMAND__ENABLE_FEATURES;
-   else
-   ras_cmd->cmd_id = TA_RAS_COMMAND__DISABLE_FEATURES;
-
-   ras_cmd->ras_in_message = *info;
-
-   ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
+   cmd_id = enable ?
+   TA_RAS_COMMAND__ENABLE_FEATURES : 
TA_RAS_COMMAND__DISABLE_FEATURES;
+   ret = psp_ras_send_cmd(psp, cmd_id, info, NULL);
if (ret)
return -EINVAL;

@@ -1645,6 +1697,8 @@ int psp_ras_terminate(struct psp_context *psp)

psp->ras_context.context.initialized = false;

+   mutex_destroy(>ras_context.mutex);
+
return ret;
 }

@@ 

RE: [PATCH] drm/amdgpu: add mutex to protect ras shared memory

2024-04-28 Thread Wang, Yang(Kevin)
[AMD Official Use Only - General]

-Original Message-
From: Chai, Thomas 
Sent: Sunday, April 28, 2024 3:08 PM
To: amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking ; Zhou1, Tao ; Li, 
Candice ; Wang, Yang(Kevin) ; Yang, 
Stanley ; Chai, Thomas 
Subject: [PATCH] drm/amdgpu: add mutex to protect ras shared memory

Add mutex to protect ras shared memory.

Signed-off-by: YiPeng Chai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c| 121 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h|   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c |   2 +
 3 files changed, 84 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 5583e2d1b12f..fa4fea00f6b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -1564,6 +1564,66 @@ static void psp_ras_ta_check_status(struct psp_context 
*psp)
}
 }

+static int psp_ras_send_cmd(struct psp_context *psp,
+   enum ras_command cmd_id, void *in, void *out) {
+   struct ta_ras_shared_memory *ras_cmd;
+   uint32_t cmd = cmd_id;
+   int ret = 0;
+
+   mutex_lock(>ras_context.mutex);
+   ras_cmd = (struct ta_ras_shared_memory 
*)psp->ras_context.context.mem_context.shared_buf;
+   memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
+
+   switch (cmd) {
+   case TA_RAS_COMMAND__ENABLE_FEATURES:
+   case TA_RAS_COMMAND__DISABLE_FEATURES:
+   memcpy(_cmd->ras_in_message,
+   in, sizeof(ras_cmd->ras_in_message));
+   break;
+   case TA_RAS_COMMAND__TRIGGER_ERROR:
+   memcpy(_cmd->ras_in_message.trigger_error,
+   in, sizeof(ras_cmd->ras_in_message.trigger_error));
+   break;
+   case TA_RAS_COMMAND__QUERY_ADDRESS:
+   memcpy(_cmd->ras_in_message.address,
+   in, sizeof(ras_cmd->ras_in_message.address));
+   break;
+   default:
+   dev_err(psp->adev->dev, "Invalid ras cmd id: %u\n", cmd);
+   ret = -EINVAL;
+   goto err_out;
+   }
+
+   ras_cmd->cmd_id = cmd;
+   ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
+
+   switch (cmd) {
+   case TA_RAS_COMMAND__TRIGGER_ERROR:
+   if (out) {
+   uint32_t *ras_status = (uint32_t *)out;
[Kevin]:
It's better to check 'ret' value first before use this 'out' data.

Best Regards,
Kevin
+
+   *ras_status = ras_cmd->ras_status;
+   }
+   break;
+   case TA_RAS_COMMAND__QUERY_ADDRESS:
+   if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status)
+   ret = -EINVAL;
+   else if (out)
+   memcpy(out,
+   _cmd->ras_out_message.address,
+   sizeof(ras_cmd->ras_out_message.address));
+   break;
+   default:
+   break;
+   }
+
+err_out:
+   mutex_unlock(>ras_context.mutex);
+
+   return ret;
+}
+
 int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)  {
struct ta_ras_shared_memory *ras_cmd;
@@ -1605,23 +1665,15 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t 
ta_cmd_id)  int psp_ras_enable_features(struct psp_context *psp,
union ta_ras_cmd_input *info, bool enable)  {
-   struct ta_ras_shared_memory *ras_cmd;
+   enum ras_command cmd_id;
int ret;

-   if (!psp->ras_context.context.initialized)
+   if (!psp->ras_context.context.initialized || !info)
return -EINVAL;

-   ras_cmd = (struct ta_ras_shared_memory 
*)psp->ras_context.context.mem_context.shared_buf;
-   memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
-
-   if (enable)
-   ras_cmd->cmd_id = TA_RAS_COMMAND__ENABLE_FEATURES;
-   else
-   ras_cmd->cmd_id = TA_RAS_COMMAND__DISABLE_FEATURES;
-
-   ras_cmd->ras_in_message = *info;
-
-   ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
+   cmd_id = enable ?
+   TA_RAS_COMMAND__ENABLE_FEATURES : 
TA_RAS_COMMAND__DISABLE_FEATURES;
+   ret = psp_ras_send_cmd(psp, cmd_id, info, NULL);
if (ret)
return -EINVAL;

@@ -1645,6 +1697,8 @@ int psp_ras_terminate(struct psp_context *psp)

psp->ras_context.context.initialized = false;

+   mutex_destroy(>ras_context.mutex);
+
return ret;
 }

@@ -1729,9 +1783,10 @@ int psp_ras_initialize(struct psp_context *psp)

ret = psp_ta_load(psp, >ras_context.context);

-   if (!ret && !ras_cmd->ras_status)
+   if (!ret && !ras_cmd->ras_status) {
psp->ras_context.context.initialized = true;
-   else {
+   mutex_init(>ras_co

[PATCH] drm/amdgpu: add mutex to protect ras shared memory

2024-04-28 Thread YiPeng Chai
Add mutex to protect ras shared memory.

Signed-off-by: YiPeng Chai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c| 121 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h|   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c |   2 +
 3 files changed, 84 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 5583e2d1b12f..fa4fea00f6b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -1564,6 +1564,66 @@ static void psp_ras_ta_check_status(struct psp_context 
*psp)
}
 }
 
+static int psp_ras_send_cmd(struct psp_context *psp,
+   enum ras_command cmd_id, void *in, void *out)
+{
+   struct ta_ras_shared_memory *ras_cmd;
+   uint32_t cmd = cmd_id;
+   int ret = 0;
+
+   mutex_lock(>ras_context.mutex);
+   ras_cmd = (struct ta_ras_shared_memory 
*)psp->ras_context.context.mem_context.shared_buf;
+   memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
+
+   switch (cmd) {
+   case TA_RAS_COMMAND__ENABLE_FEATURES:
+   case TA_RAS_COMMAND__DISABLE_FEATURES:
+   memcpy(_cmd->ras_in_message,
+   in, sizeof(ras_cmd->ras_in_message));
+   break;
+   case TA_RAS_COMMAND__TRIGGER_ERROR:
+   memcpy(_cmd->ras_in_message.trigger_error,
+   in, sizeof(ras_cmd->ras_in_message.trigger_error));
+   break;
+   case TA_RAS_COMMAND__QUERY_ADDRESS:
+   memcpy(_cmd->ras_in_message.address,
+   in, sizeof(ras_cmd->ras_in_message.address));
+   break;
+   default:
+   dev_err(psp->adev->dev, "Invalid ras cmd id: %u\n", cmd);
+   ret = -EINVAL;
+   goto err_out;
+   }
+
+   ras_cmd->cmd_id = cmd;
+   ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
+
+   switch (cmd) {
+   case TA_RAS_COMMAND__TRIGGER_ERROR:
+   if (out) {
+   uint32_t *ras_status = (uint32_t *)out;
+
+   *ras_status = ras_cmd->ras_status;
+   }
+   break;
+   case TA_RAS_COMMAND__QUERY_ADDRESS:
+   if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status)
+   ret = -EINVAL;
+   else if (out)
+   memcpy(out,
+   _cmd->ras_out_message.address,
+   sizeof(ras_cmd->ras_out_message.address));
+   break;
+   default:
+   break;
+   }
+
+err_out:
+   mutex_unlock(>ras_context.mutex);
+
+   return ret;
+}
+
 int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
 {
struct ta_ras_shared_memory *ras_cmd;
@@ -1605,23 +1665,15 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t 
ta_cmd_id)
 int psp_ras_enable_features(struct psp_context *psp,
union ta_ras_cmd_input *info, bool enable)
 {
-   struct ta_ras_shared_memory *ras_cmd;
+   enum ras_command cmd_id;
int ret;
 
-   if (!psp->ras_context.context.initialized)
+   if (!psp->ras_context.context.initialized || !info)
return -EINVAL;
 
-   ras_cmd = (struct ta_ras_shared_memory 
*)psp->ras_context.context.mem_context.shared_buf;
-   memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
-
-   if (enable)
-   ras_cmd->cmd_id = TA_RAS_COMMAND__ENABLE_FEATURES;
-   else
-   ras_cmd->cmd_id = TA_RAS_COMMAND__DISABLE_FEATURES;
-
-   ras_cmd->ras_in_message = *info;
-
-   ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
+   cmd_id = enable ?
+   TA_RAS_COMMAND__ENABLE_FEATURES : 
TA_RAS_COMMAND__DISABLE_FEATURES;
+   ret = psp_ras_send_cmd(psp, cmd_id, info, NULL);
if (ret)
return -EINVAL;
 
@@ -1645,6 +1697,8 @@ int psp_ras_terminate(struct psp_context *psp)
 
psp->ras_context.context.initialized = false;
 
+   mutex_destroy(>ras_context.mutex);
+
return ret;
 }
 
@@ -1729,9 +1783,10 @@ int psp_ras_initialize(struct psp_context *psp)
 
ret = psp_ta_load(psp, >ras_context.context);
 
-   if (!ret && !ras_cmd->ras_status)
+   if (!ret && !ras_cmd->ras_status) {
psp->ras_context.context.initialized = true;
-   else {
+   mutex_init(>ras_context.mutex);
+   } else {
if (ras_cmd->ras_status)
dev_warn(adev->dev, "RAS Init Status: 0x%X\n", 
ras_cmd->ras_status);
 
@@ -1745,12 +1800,12 @@ int psp_ras_initialize(struct psp_context *psp)
 int psp_ras_trigger_error(struct psp_context *psp,
  struct ta_ras_trigger_error_input *info, uint32_t 
instance_mask)
 {
-   struct ta_ras_shared_memory *ras_cmd;
struct amdgpu_device *adev = psp->adev;
int ret;
uint32_t dev_mask;
+