RE: [PATCH v5 7/9] drm/amd/pm: Add vangogh throttler translation

2021-06-08 Thread Lazar, Lijo
[Public]

Didn't realize 16-bits will max out so fast. For THM_GFX - 
"SMU_THROTTLER_TEMP_GPU_BIT" looks appropriate. SOC domain will need a new one. 
As temp throttling reasons are more, you may shift the "OTHERS" by 8-bits if 
required. 

Thanks,
Lijo

-Original Message-
From: Sider, Graham  
Sent: Monday, June 7, 2021 8:23 PM
To: Lazar, Lijo ; amd-gfx@lists.freedesktop.org
Cc: Kasiviswanathan, Harish ; Sakhnovitch, 
Elena (Elen) 
Subject: RE: [PATCH v5 7/9] drm/amd/pm: Add vangogh throttler translation

Great, thanks for all the feedback Lijo. Out of the new bit definitions in 
amdgpu_smu.h are there any that currently exist that are more applicable for 
these mappings? *_THM_GFX and *_THM_SOC only exist in VanGogh and Renoir. With 
the expansion of the MEM and LIQUID bits there is not enough room in the 
temperature field to add two new definitions.

Best,
Graham

-Original Message-
From: Lazar, Lijo  
Sent: Monday, June 7, 2021 10:35 AM
To: Sider, Graham ; amd-gfx@lists.freedesktop.org
Cc: Kasiviswanathan, Harish ; Sakhnovitch, 
Elena (Elen) 
Subject: Re: [PATCH v5 7/9] drm/amd/pm: Add vangogh throttler translation



On 6/7/2021 7:14 PM, Graham Sider wrote:
> Perform dependent to independent throttle status translation for 
> vangogh.
> 
> Signed-off-by: Graham Sider 
> ---
>   .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c  | 38 ++-
>   1 file changed, 29 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c 
> b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> index 77f532a49e37..589304367929 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> @@ -190,6 +190,20 @@ static struct cmn2asic_mapping 
> vangogh_workload_map[PP_SMC_POWER_PROFILE_COUNT]
>   WORKLOAD_MAP(PP_SMC_POWER_PROFILE_CUSTOM,   
> WORKLOAD_PPLIB_CUSTOM_BIT),
>   };
>   
> +static const uint8_t vangogh_throttler_map[] = {
> + [THROTTLER_STATUS_BIT_SPL]  = (SMU_THROTTLER_SPL_BIT),
> + [THROTTLER_STATUS_BIT_FPPT] = (SMU_THROTTLER_FPPT_BIT),
> + [THROTTLER_STATUS_BIT_SPPT] = (SMU_THROTTLER_SPPT_BIT),
> + [THROTTLER_STATUS_BIT_SPPT_APU] = (SMU_THROTTLER_SPPT_APU_BIT),
> + [THROTTLER_STATUS_BIT_THM_CORE] = (SMU_THROTTLER_TEMP_CORE_BIT),
> + [THROTTLER_STATUS_BIT_THM_GFX]  = (SMU_THROTTLER_TEMP_VR_GFX_BIT),
> + [THROTTLER_STATUS_BIT_THM_SOC]  = (SMU_THROTTLER_TEMP_VR_SOC_BIT),

Above two mappings don't look correct. They essentially mean throttling due to 
GFX/SOC domain temperatures in APU exceeding their limits, not the VR 
temperatures. Except those mappings, rest of the patch series looks good to me.

Thanks,
Lijo

> + [THROTTLER_STATUS_BIT_TDC_VDD]  = (SMU_THROTTLER_TDC_VDD_BIT),
> + [THROTTLER_STATUS_BIT_TDC_SOC]  = (SMU_THROTTLER_TDC_SOC_BIT),
> + [THROTTLER_STATUS_BIT_TDC_GFX]  = (SMU_THROTTLER_TDC_GFX_BIT),
> + [THROTTLER_STATUS_BIT_TDC_CVIP] = (SMU_THROTTLER_TDC_CVIP_BIT),
> +};
> +
>   static int vangogh_tables_init(struct smu_context *smu)
>   {
>   struct smu_table_context *smu_table = >smu_table; @@ -226,7 
> +240,7 @@ static int vangogh_tables_init(struct smu_context *smu)
>   goto err0_out;
>   smu_table->metrics_time = 0;
>   
> - smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v2_1);
> + smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v2_2);
>   smu_table->gpu_metrics_table = 
> kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
>   if (!smu_table->gpu_metrics_table)
>   goto err1_out;
> @@ -1632,8 +1646,8 @@ static ssize_t vangogh_get_legacy_gpu_metrics(struct 
> smu_context *smu,
> void **table)
>   {
>   struct smu_table_context *smu_table = >smu_table;
> - struct gpu_metrics_v2_1 *gpu_metrics =
> - (struct gpu_metrics_v2_1 *)smu_table->gpu_metrics_table;
> + struct gpu_metrics_v2_2 *gpu_metrics =
> + (struct gpu_metrics_v2_2 *)smu_table->gpu_metrics_table;
>   SmuMetrics_legacy_t metrics;
>   int ret = 0;
>   
> @@ -1641,7 +1655,7 @@ static ssize_t vangogh_get_legacy_gpu_metrics(struct 
> smu_context *smu,
>   if (ret)
>   return ret;
>   
> - smu_cmn_init_soft_gpu_metrics(gpu_metrics, 2, 1);
> + smu_cmn_init_soft_gpu_metrics(gpu_metrics, 2, 2);
>   
>   gpu_metrics->temperature_gfx = metrics.GfxTemperature;
>   gpu_metrics->temperature_soc = metrics.SocTemperature; @@ -1674,20 
> +1688,23 @@ static ssize_t vangogh_get_legacy_gpu_metrics(struct smu_context 
> *smu,
>   gpu_metrics->current_l3clk[0] = metrics.L3Frequency[0];
>   
>

RE: [PATCH v5 7/9] drm/amd/pm: Add vangogh throttler translation

2021-06-07 Thread Sider, Graham
Great, thanks for all the feedback Lijo. Out of the new bit definitions in 
amdgpu_smu.h are there any that currently exist that are more applicable for 
these mappings? *_THM_GFX and *_THM_SOC only exist in VanGogh and Renoir. With 
the expansion of the MEM and LIQUID bits there is not enough room in the 
temperature field to add two new definitions.

Best,
Graham

-Original Message-
From: Lazar, Lijo  
Sent: Monday, June 7, 2021 10:35 AM
To: Sider, Graham ; amd-gfx@lists.freedesktop.org
Cc: Kasiviswanathan, Harish ; Sakhnovitch, 
Elena (Elen) 
Subject: Re: [PATCH v5 7/9] drm/amd/pm: Add vangogh throttler translation



On 6/7/2021 7:14 PM, Graham Sider wrote:
> Perform dependent to independent throttle status translation for 
> vangogh.
> 
> Signed-off-by: Graham Sider 
> ---
>   .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c  | 38 ++-
>   1 file changed, 29 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c 
> b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> index 77f532a49e37..589304367929 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> @@ -190,6 +190,20 @@ static struct cmn2asic_mapping 
> vangogh_workload_map[PP_SMC_POWER_PROFILE_COUNT]
>   WORKLOAD_MAP(PP_SMC_POWER_PROFILE_CUSTOM,   
> WORKLOAD_PPLIB_CUSTOM_BIT),
>   };
>   
> +static const uint8_t vangogh_throttler_map[] = {
> + [THROTTLER_STATUS_BIT_SPL]  = (SMU_THROTTLER_SPL_BIT),
> + [THROTTLER_STATUS_BIT_FPPT] = (SMU_THROTTLER_FPPT_BIT),
> + [THROTTLER_STATUS_BIT_SPPT] = (SMU_THROTTLER_SPPT_BIT),
> + [THROTTLER_STATUS_BIT_SPPT_APU] = (SMU_THROTTLER_SPPT_APU_BIT),
> + [THROTTLER_STATUS_BIT_THM_CORE] = (SMU_THROTTLER_TEMP_CORE_BIT),
> + [THROTTLER_STATUS_BIT_THM_GFX]  = (SMU_THROTTLER_TEMP_VR_GFX_BIT),
> + [THROTTLER_STATUS_BIT_THM_SOC]  = (SMU_THROTTLER_TEMP_VR_SOC_BIT),

Above two mappings don't look correct. They essentially mean throttling due to 
GFX/SOC domain temperatures in APU exceeding their limits, not the VR 
temperatures. Except those mappings, rest of the patch series looks good to me.

Thanks,
Lijo

> + [THROTTLER_STATUS_BIT_TDC_VDD]  = (SMU_THROTTLER_TDC_VDD_BIT),
> + [THROTTLER_STATUS_BIT_TDC_SOC]  = (SMU_THROTTLER_TDC_SOC_BIT),
> + [THROTTLER_STATUS_BIT_TDC_GFX]  = (SMU_THROTTLER_TDC_GFX_BIT),
> + [THROTTLER_STATUS_BIT_TDC_CVIP] = (SMU_THROTTLER_TDC_CVIP_BIT),
> +};
> +
>   static int vangogh_tables_init(struct smu_context *smu)
>   {
>   struct smu_table_context *smu_table = >smu_table; @@ -226,7 
> +240,7 @@ static int vangogh_tables_init(struct smu_context *smu)
>   goto err0_out;
>   smu_table->metrics_time = 0;
>   
> - smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v2_1);
> + smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v2_2);
>   smu_table->gpu_metrics_table = 
> kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
>   if (!smu_table->gpu_metrics_table)
>   goto err1_out;
> @@ -1632,8 +1646,8 @@ static ssize_t vangogh_get_legacy_gpu_metrics(struct 
> smu_context *smu,
> void **table)
>   {
>   struct smu_table_context *smu_table = >smu_table;
> - struct gpu_metrics_v2_1 *gpu_metrics =
> - (struct gpu_metrics_v2_1 *)smu_table->gpu_metrics_table;
> + struct gpu_metrics_v2_2 *gpu_metrics =
> + (struct gpu_metrics_v2_2 *)smu_table->gpu_metrics_table;
>   SmuMetrics_legacy_t metrics;
>   int ret = 0;
>   
> @@ -1641,7 +1655,7 @@ static ssize_t vangogh_get_legacy_gpu_metrics(struct 
> smu_context *smu,
>   if (ret)
>   return ret;
>   
> - smu_cmn_init_soft_gpu_metrics(gpu_metrics, 2, 1);
> + smu_cmn_init_soft_gpu_metrics(gpu_metrics, 2, 2);
>   
>   gpu_metrics->temperature_gfx = metrics.GfxTemperature;
>   gpu_metrics->temperature_soc = metrics.SocTemperature; @@ -1674,20 
> +1688,23 @@ static ssize_t vangogh_get_legacy_gpu_metrics(struct smu_context 
> *smu,
>   gpu_metrics->current_l3clk[0] = metrics.L3Frequency[0];
>   
>   gpu_metrics->throttle_status = metrics.ThrottlerStatus;
> + gpu_metrics->indep_throttle_status =
> + 
> smu_cmn_get_indep_throttler_status(metrics.ThrottlerStatus,
> +
> vangogh_throttler_map);
>   
>   gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
>   
>   *table = (void *)gpu_metrics;
>   
> - return sizeof(struct gpu_metrics_v2_1);
> + return sizeof(struct gpu_metric

Re: [PATCH v5 7/9] drm/amd/pm: Add vangogh throttler translation

2021-06-07 Thread Lazar, Lijo




On 6/7/2021 7:14 PM, Graham Sider wrote:

Perform dependent to independent throttle status translation
for vangogh.

Signed-off-by: Graham Sider 
---
  .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c  | 38 ++-
  1 file changed, 29 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
index 77f532a49e37..589304367929 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
@@ -190,6 +190,20 @@ static struct cmn2asic_mapping 
vangogh_workload_map[PP_SMC_POWER_PROFILE_COUNT]
WORKLOAD_MAP(PP_SMC_POWER_PROFILE_CUSTOM,   
WORKLOAD_PPLIB_CUSTOM_BIT),
  };
  
+static const uint8_t vangogh_throttler_map[] = {

+   [THROTTLER_STATUS_BIT_SPL]  = (SMU_THROTTLER_SPL_BIT),
+   [THROTTLER_STATUS_BIT_FPPT] = (SMU_THROTTLER_FPPT_BIT),
+   [THROTTLER_STATUS_BIT_SPPT] = (SMU_THROTTLER_SPPT_BIT),
+   [THROTTLER_STATUS_BIT_SPPT_APU] = (SMU_THROTTLER_SPPT_APU_BIT),
+   [THROTTLER_STATUS_BIT_THM_CORE] = (SMU_THROTTLER_TEMP_CORE_BIT),
+   [THROTTLER_STATUS_BIT_THM_GFX]  = (SMU_THROTTLER_TEMP_VR_GFX_BIT),
+   [THROTTLER_STATUS_BIT_THM_SOC]  = (SMU_THROTTLER_TEMP_VR_SOC_BIT),


Above two mappings don't look correct. They essentially mean throttling 
due to GFX/SOC domain temperatures in APU exceeding their limits, not 
the VR temperatures. Except those mappings, rest of the patch series 
looks good to me.


Thanks,
Lijo


+   [THROTTLER_STATUS_BIT_TDC_VDD]  = (SMU_THROTTLER_TDC_VDD_BIT),
+   [THROTTLER_STATUS_BIT_TDC_SOC]  = (SMU_THROTTLER_TDC_SOC_BIT),
+   [THROTTLER_STATUS_BIT_TDC_GFX]  = (SMU_THROTTLER_TDC_GFX_BIT),
+   [THROTTLER_STATUS_BIT_TDC_CVIP] = (SMU_THROTTLER_TDC_CVIP_BIT),
+};
+
  static int vangogh_tables_init(struct smu_context *smu)
  {
struct smu_table_context *smu_table = >smu_table;
@@ -226,7 +240,7 @@ static int vangogh_tables_init(struct smu_context *smu)
goto err0_out;
smu_table->metrics_time = 0;
  
-	smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v2_1);

+   smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v2_2);
smu_table->gpu_metrics_table = 
kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
if (!smu_table->gpu_metrics_table)
goto err1_out;
@@ -1632,8 +1646,8 @@ static ssize_t vangogh_get_legacy_gpu_metrics(struct 
smu_context *smu,
  void **table)
  {
struct smu_table_context *smu_table = >smu_table;
-   struct gpu_metrics_v2_1 *gpu_metrics =
-   (struct gpu_metrics_v2_1 *)smu_table->gpu_metrics_table;
+   struct gpu_metrics_v2_2 *gpu_metrics =
+   (struct gpu_metrics_v2_2 *)smu_table->gpu_metrics_table;
SmuMetrics_legacy_t metrics;
int ret = 0;
  
@@ -1641,7 +1655,7 @@ static ssize_t vangogh_get_legacy_gpu_metrics(struct smu_context *smu,

if (ret)
return ret;
  
-	smu_cmn_init_soft_gpu_metrics(gpu_metrics, 2, 1);

+   smu_cmn_init_soft_gpu_metrics(gpu_metrics, 2, 2);
  
  	gpu_metrics->temperature_gfx = metrics.GfxTemperature;

gpu_metrics->temperature_soc = metrics.SocTemperature;
@@ -1674,20 +1688,23 @@ static ssize_t vangogh_get_legacy_gpu_metrics(struct 
smu_context *smu,
gpu_metrics->current_l3clk[0] = metrics.L3Frequency[0];
  
  	gpu_metrics->throttle_status = metrics.ThrottlerStatus;

+   gpu_metrics->indep_throttle_status =
+   
smu_cmn_get_indep_throttler_status(metrics.ThrottlerStatus,
+  
vangogh_throttler_map);
  
  	gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
  
  	*table = (void *)gpu_metrics;
  
-	return sizeof(struct gpu_metrics_v2_1);

+   return sizeof(struct gpu_metrics_v2_2);
  }
  
  static ssize_t vangogh_get_gpu_metrics(struct smu_context *smu,

  void **table)
  {
struct smu_table_context *smu_table = >smu_table;
-   struct gpu_metrics_v2_1 *gpu_metrics =
-   (struct gpu_metrics_v2_1 *)smu_table->gpu_metrics_table;
+   struct gpu_metrics_v2_2 *gpu_metrics =
+   (struct gpu_metrics_v2_2 *)smu_table->gpu_metrics_table;
SmuMetrics_t metrics;
int ret = 0;
  
@@ -1695,7 +1712,7 @@ static ssize_t vangogh_get_gpu_metrics(struct smu_context *smu,

if (ret)
return ret;
  
-	smu_cmn_init_soft_gpu_metrics(gpu_metrics, 2, 1);

+   smu_cmn_init_soft_gpu_metrics(gpu_metrics, 2, 2);
  
  	gpu_metrics->temperature_gfx = metrics.Current.GfxTemperature;

gpu_metrics->temperature_soc = metrics.Current.SocTemperature;
@@ -1735,12 +1752,15 @@ static ssize_t vangogh_get_gpu_metrics(struct 
smu_context *smu,
gpu_metrics->current_l3clk[0] = metrics.Current.L3Frequency[0];