RE: [PATCH] drm/admdgpu: Add get_gfx_off_status interface

2022-07-20 Thread Chen, Guchun
*@smu: amdgpu_device pointer

I guess a typo here, smu is not amdgpu device pointer.

Regards,
Guchun

-Original Message-
From: amd-gfx  On Behalf Of 
shikai@amd.com
Sent: Thursday, July 21, 2022 2:20 PM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Liang, Prike 
; Quan, Evan ; Guo, Shikai 

Subject: [PATCH] drm/admdgpu: Add get_gfx_off_status interface

From: Shikai Guo 

add get_gfx_off_status interface to yellow_carp_ppt_funcs structure.

Signed-off-by: Shikai Guo 
---
 .../drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c  | 30 +++
 1 file changed, 30 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
index 70cbc46341a3..cac48121d72b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
@@ -31,6 +31,7 @@
 #include "smu_v13_0_1_ppsmc.h"
 #include "smu_v13_0_1_pmfw.h"
 #include "smu_cmn.h"
+#include "asic_reg/smuio/smuio_13_0_2_offset.h"
 
 /*
  * DO NOT use these for err/warn/info/debug messages.
@@ -42,6 +43,9 @@
 #undef pr_info
 #undef pr_debug
 
+#define SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS_MASK0x0006L
+#define SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS__SHIFT  0x1
+
 #define FEATURE_MASK(feature) (1ULL << feature)  #define SMC_DPM_FEATURE ( \
FEATURE_MASK(FEATURE_CCLK_DPM_BIT) | \ @@ -587,6 +591,31 @@ static 
ssize_t yellow_carp_get_gpu_metrics(struct smu_context *smu,
return sizeof(struct gpu_metrics_v2_1);  }
 
+/**
+ * yellow_carp_get_gfxoff_status - get gfxoff status
+ *
+ * @smu: amdgpu_device pointer
+ *
+ * This function will be used to get gfxoff status
+ *
+ * Returns 0=GFXOFF(default).
+ * Returns 1=Transition out of GFX State.
+ * Returns 2=Not in GFXOFF.
+ * Returns 3=Transition into GFXOFF.
+ */
+static uint32_t yellow_carp_get_gfxoff_status(struct smu_context *smu) 
+{
+   uint32_t reg;
+   uint32_t gfxOff_Status = 0;
+   struct amdgpu_device *adev = smu->adev;
+
+   reg = RREG32_SOC15(SMUIO, 0, regSMUIO_GFX_MISC_CNTL);
+   gfxOff_Status = (reg & SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS_MASK)
+   >> SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS__SHIFT;
+
+   return gfxOff_Status;
+}
+
 static int yellow_carp_set_default_dpm_tables(struct smu_context *smu)  {
struct smu_table_context *smu_table = &smu->smu_table; @@ -1186,6 
+1215,7 @@ static const struct pptable_funcs yellow_carp_ppt_funcs = {
.get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
.set_driver_table_location = smu_v13_0_set_driver_table_location,
.gfx_off_control = smu_v13_0_gfx_off_control,
+   .get_gfx_off_status = yellow_carp_get_gfxoff_status,
.post_init = yellow_carp_post_smu_init,
.mode2_reset = yellow_carp_mode2_reset,
.get_dpm_ultimate_freq = yellow_carp_get_dpm_ultimate_freq,
--
2.25.1



RE: [PATCH] drm/admdgpu: Add get_gfx_off_status interface

2022-07-20 Thread Quan, Evan
[AMD Official Use Only - General]

Reviewed-by: Evan Quan 

> -Original Message-
> From: Guo, Shikai 
> Sent: Thursday, July 21, 2022 2:20 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Quan, Evan ; Deucher, Alexander
> ; Liang, Prike ; Guo,
> Shikai ; Guo, Shikai 
> Subject: [PATCH] drm/admdgpu: Add get_gfx_off_status interface
> 
> From: Shikai Guo 
> 
> add get_gfx_off_status interface to yellow_carp_ppt_funcs structure.
> 
> Signed-off-by: Shikai Guo 
> ---
>  .../drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c  | 30
> +++
>  1 file changed, 30 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
> b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
> index 70cbc46341a3..cac48121d72b 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
> @@ -31,6 +31,7 @@
>  #include "smu_v13_0_1_ppsmc.h"
>  #include "smu_v13_0_1_pmfw.h"
>  #include "smu_cmn.h"
> +#include "asic_reg/smuio/smuio_13_0_2_offset.h"
> 
>  /*
>   * DO NOT use these for err/warn/info/debug messages.
> @@ -42,6 +43,9 @@
>  #undef pr_info
>  #undef pr_debug
> 
> +#define SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS_MASK
>   0x0006L
> +#define SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS__SHIFT
>   0x1
> +
>  #define FEATURE_MASK(feature) (1ULL << feature)
>  #define SMC_DPM_FEATURE ( \
>   FEATURE_MASK(FEATURE_CCLK_DPM_BIT) | \
> @@ -587,6 +591,31 @@ static ssize_t yellow_carp_get_gpu_metrics(struct
> smu_context *smu,
>   return sizeof(struct gpu_metrics_v2_1);
>  }
> 
> +/**
> + * yellow_carp_get_gfxoff_status - get gfxoff status
> + *
> + * @smu: amdgpu_device pointer
> + *
> + * This function will be used to get gfxoff status
> + *
> + * Returns 0=GFXOFF(default).
> + * Returns 1=Transition out of GFX State.
> + * Returns 2=Not in GFXOFF.
> + * Returns 3=Transition into GFXOFF.
> + */
> +static uint32_t yellow_carp_get_gfxoff_status(struct smu_context *smu)
> +{
> + uint32_t reg;
> + uint32_t gfxOff_Status = 0;
> + struct amdgpu_device *adev = smu->adev;
> +
> + reg = RREG32_SOC15(SMUIO, 0, regSMUIO_GFX_MISC_CNTL);
> + gfxOff_Status = (reg &
> SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS_MASK)
> + >>
> SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS__SHIFT;
> +
> + return gfxOff_Status;
> +}
> +
>  static int yellow_carp_set_default_dpm_tables(struct smu_context *smu)
>  {
>   struct smu_table_context *smu_table = &smu->smu_table;
> @@ -1186,6 +1215,7 @@ static const struct pptable_funcs
> yellow_carp_ppt_funcs = {
>   .get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
>   .set_driver_table_location = smu_v13_0_set_driver_table_location,
>   .gfx_off_control = smu_v13_0_gfx_off_control,
> + .get_gfx_off_status = yellow_carp_get_gfxoff_status,
>   .post_init = yellow_carp_post_smu_init,
>   .mode2_reset = yellow_carp_mode2_reset,
>   .get_dpm_ultimate_freq = yellow_carp_get_dpm_ultimate_freq,
> --
> 2.25.1


[PATCH] drm/admdgpu: Add get_gfx_off_status interface

2022-07-20 Thread shikai.guo
From: Shikai Guo 

add get_gfx_off_status interface to yellow_carp_ppt_funcs structure.

Signed-off-by: Shikai Guo 
---
 .../drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c  | 30 +++
 1 file changed, 30 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
index 70cbc46341a3..cac48121d72b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
@@ -31,6 +31,7 @@
 #include "smu_v13_0_1_ppsmc.h"
 #include "smu_v13_0_1_pmfw.h"
 #include "smu_cmn.h"
+#include "asic_reg/smuio/smuio_13_0_2_offset.h"
 
 /*
  * DO NOT use these for err/warn/info/debug messages.
@@ -42,6 +43,9 @@
 #undef pr_info
 #undef pr_debug
 
+#define SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS_MASK0x0006L
+#define SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS__SHIFT  0x1
+
 #define FEATURE_MASK(feature) (1ULL << feature)
 #define SMC_DPM_FEATURE ( \
FEATURE_MASK(FEATURE_CCLK_DPM_BIT) | \
@@ -587,6 +591,31 @@ static ssize_t yellow_carp_get_gpu_metrics(struct 
smu_context *smu,
return sizeof(struct gpu_metrics_v2_1);
 }
 
+/**
+ * yellow_carp_get_gfxoff_status - get gfxoff status
+ *
+ * @smu: amdgpu_device pointer
+ *
+ * This function will be used to get gfxoff status
+ *
+ * Returns 0=GFXOFF(default).
+ * Returns 1=Transition out of GFX State.
+ * Returns 2=Not in GFXOFF.
+ * Returns 3=Transition into GFXOFF.
+ */
+static uint32_t yellow_carp_get_gfxoff_status(struct smu_context *smu)
+{
+   uint32_t reg;
+   uint32_t gfxOff_Status = 0;
+   struct amdgpu_device *adev = smu->adev;
+
+   reg = RREG32_SOC15(SMUIO, 0, regSMUIO_GFX_MISC_CNTL);
+   gfxOff_Status = (reg & SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS_MASK)
+   >> SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS__SHIFT;
+
+   return gfxOff_Status;
+}
+
 static int yellow_carp_set_default_dpm_tables(struct smu_context *smu)
 {
struct smu_table_context *smu_table = &smu->smu_table;
@@ -1186,6 +1215,7 @@ static const struct pptable_funcs yellow_carp_ppt_funcs = 
{
.get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
.set_driver_table_location = smu_v13_0_set_driver_table_location,
.gfx_off_control = smu_v13_0_gfx_off_control,
+   .get_gfx_off_status = yellow_carp_get_gfxoff_status,
.post_init = yellow_carp_post_smu_init,
.mode2_reset = yellow_carp_mode2_reset,
.get_dpm_ultimate_freq = yellow_carp_get_dpm_ultimate_freq,
-- 
2.25.1



[PATCH 5/5] drm/amd/display: reduce stack for dml32_CalculatePrefetchSchedule

2022-07-20 Thread Alex Deucher
Move stack variables to dummy structure.

Signed-off-by: Alex Deucher 
Cc: Stephen Rothwell 
---
 .../dc/dml/dcn32/display_mode_vba_32.c|   5 +-
 .../dc/dml/dcn32/display_mode_vba_util_32.c   | 394 --
 .../dc/dml/dcn32/display_mode_vba_util_32.h   |   1 +
 .../drm/amd/display/dc/dml/display_mode_vba.h |  38 ++
 4 files changed, 227 insertions(+), 211 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
index f7d108123b07..db3e43499a26 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
@@ -757,7 +757,9 @@ static void 
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman

v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelY
 = v->BytePerPixelY[k];

v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelC
 = v->BytePerPixelC[k];

v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ProgressiveToInterlaceUnitInOPP
 = mode_lib->vba.ProgressiveToInterlaceUnitInOPP;
-   v->ErrorResult[k] = 
dml32_CalculatePrefetchSchedule(v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor,
+   v->ErrorResult[k] = dml32_CalculatePrefetchSchedule(
+   
&v->dummy_vars.dml32_CalculatePrefetchSchedule,
+   
v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor,

&v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe,
 v->DSCDelay[k],
mode_lib->vba.DPPCLKDelaySubtotal + 
mode_lib->vba.DPPCLKDelayCNVCFormater,
mode_lib->vba.DPPCLKDelaySCL,
@@ -3195,6 +3197,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct 
display_mode_lib *mode_l
 

mode_lib->vba.NoTimeForPrefetch[i][j][k] =
dml32_CalculatePrefetchSchedule(
+   
&v->dummy_vars.dml32_CalculatePrefetchSchedule,

v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor,

&v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe,

mode_lib->vba.DSCDelayPerState[i][k],
diff --git 
a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
index 40b4c88ff2e7..4b010b1b8aed 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
@@ -3342,6 +3342,7 @@ double dml32_CalculateExtraLatency(
 } // CalculateExtraLatency
 
 bool dml32_CalculatePrefetchSchedule(
+   struct dml32_CalculatePrefetchSchedule *st_vars,
double HostVMInefficiencyFactor,
DmlPipe *myPipe,
unsigned int DSCDelay,
@@ -3405,45 +3406,18 @@ bool dml32_CalculatePrefetchSchedule(
double   *VReadyOffsetPix)
 {
bool MyError = false;
-   unsigned int DPPCycles, DISPCLKCycles;
-   double DSTTotalPixelsAfterScaler;
-   double LineTime;
-   double dst_y_prefetch_equ;
-   double prefetch_bw_oto;
-   double Tvm_oto;
-   double Tr0_oto;
-   double Tvm_oto_lines;
-   double Tr0_oto_lines;
-   double dst_y_prefetch_oto;
-   double TimeForFetchingMetaPTE = 0;
-   double TimeForFetchingRowInVBlank = 0;
-   double LinesToRequestPrefetchPixelData = 0;
-   unsigned int HostVMDynamicLevelsTrips;
-   double  trip_to_mem;
-   double  Tvm_trips;
-   double  Tr0_trips;
-   double  Tvm_trips_rounded;
-   double  Tr0_trips_rounded;
-   double  Lsw_oto;
-   double  Tpre_rounded;
-   double  prefetch_bw_equ;
-   double  Tvm_equ;
-   double  Tr0_equ;
-   double  Tdmbf;
-   double  Tdmec;
-   double  Tdmsks;
-   double  prefetch_sw_bytes;
-   double  bytes_pp;
-   double  dep_bytes;
-   unsigned int max_vratio_pre = __DML_MAX_VRATIO_PRE__;
-   double  min_Lsw;
-   double  Tsw_est1 = 0;
-   double  Tsw_est3 = 0;
+
+   st_vars->TimeForFetchingMetaPTE = 0;
+   st_vars->TimeForFetchingRowInVBlank = 0;
+   st_vars->LinesToRequestPre

[PATCH 4/5] drm/amd/display: reduce stack for dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport

2022-07-20 Thread Alex Deucher
Move stack variables to dummy structure.

Signed-off-by: Alex Deucher 
Cc: Stephen Rothwell 
---
 .../dc/dml/dcn32/display_mode_vba_32.c|   2 +
 .../dc/dml/dcn32/display_mode_vba_util_32.c   | 187 --
 .../dc/dml/dcn32/display_mode_vba_util_32.h   |   1 +
 .../drm/amd/display/dc/dml/display_mode_vba.h |  34 
 4 files changed, 120 insertions(+), 104 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
index 0ecc9e4c52a6..f7d108123b07 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
@@ -1165,6 +1165,7 @@ static void 
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman

v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SMNLatency
 = mode_lib->vba.SMNLatency;
 
dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
+   
&v->dummy_vars.dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport,
mode_lib->vba.USRRetrainingRequiredFinal,
mode_lib->vba.UsesMALLForPStateChange,

mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb],
@@ -3493,6 +3494,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct 
display_mode_lib *mode_l
 
{

dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
+   
&v->dummy_vars.dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport,

mode_lib->vba.USRRetrainingRequiredFinal,

mode_lib->vba.UsesMALLForPStateChange,

mode_lib->vba.PrefetchModePerState[i][j],
diff --git 
a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
index 9ebd3207ce42..40b4c88ff2e7 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
@@ -4185,6 +4185,7 @@ void dml32_CalculateFlipSchedule(
 } // CalculateFlipSchedule
 
 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
+   struct 
dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport *st_vars,
bool USRRetrainingRequiredFinal,
enum dm_use_mall_for_pstate_change_mode 
UseMALLForPStateChange[],
unsigned int PrefetchMode,
@@ -4246,37 +4247,15 @@ void 
dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
double ActiveDRAMClockChangeLatencyMargin[])
 {
unsigned int i, j, k;
-   unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0;
-   unsigned int DRAMClockChangeSupportNumber = 0;
-   unsigned int LastSurfaceWithoutMargin;
-   unsigned int DRAMClockChangeMethod = 0;
-   bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
-   double MinActiveFCLKChangeMargin = 0.;
-   double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
-   double ActiveClockChangeLatencyHidingY;
-   double ActiveClockChangeLatencyHidingC;
-   double ActiveClockChangeLatencyHiding;
-double EffectiveDETBufferSizeY;
-   double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
-   double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
-   double TotalPixelBW = 0.0;
-   boolSynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
-   double EffectiveLBLatencyHidingY;
-   double EffectiveLBLatencyHidingC;
-   double LinesInDETY[DC__NUM_DPP__MAX];
-   double LinesInDETC[DC__NUM_DPP__MAX];
-   unsigned intLinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
-   unsigned intLinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
-   double FullDETBufferingTimeY;
-   double FullDETBufferingTimeC;
-   double WritebackDRAMClockChangeLatencyMargin;
-   double WritebackFCLKChangeLatencyMargin;
-   double WritebackLatencyHiding;
-   boolSameTimingForFCLKChange;
-
-   unsigned intTotalActiveWriteback = 0;
-   unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
-   unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];
+
+   st_vars->SurfaceWithMinActiveFCLKChangeMargin = 0;
+   st_vars->DRAMClockChangeSupportNumber = 0;
+   st_vars->DRAMClockChangeMethod = 0;
+   st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
+   st_vars->MinActiveFCLKChangeMargin = 0.;
+   st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
+   st_vars->TotalPixelBW = 0.0;
+   st_vars->Tot

[PATCH 1/5] drm/amd/display: reduce stack size in dcn32 dml (v2)

2022-07-20 Thread Alex Deucher
Move additional dummy structures off the stack and into
the dummy vars structure.

Fixes the following:
drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c: In 
function 
'DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation':
drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:1659:1:
 error: the frame size of 2144 bytes is larger than 2048 bytes 
[-Werror=frame-larger-than=]
 1659 | }
  | ^
drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c: In 
function 'dml32_ModeSupportAndSystemConfigurationFull':
drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:3799:1:
 error: the frame size of 2464 bytes is larger than 2048 bytes 
[-Werror=frame-larger-than=]
 3799 | } // ModeSupportAndSystemConfigurationFull
  | ^

v2: move more stuff to dummy structure, fix init order (Alex)

Signed-off-by: Alex Deucher 
Cc: Stephen Rothwell 
---
 .../dc/dml/dcn32/display_mode_vba_32.c| 411 --
 .../drm/amd/display/dc/dml/display_mode_vba.h |  36 ++
 2 files changed, 217 insertions(+), 230 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
index e9204c711cb9..9c2003fbe8fa 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
@@ -65,6 +65,12 @@ static void 
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
int iteration;
double MaxTotalRDBandwidth;
unsigned int NextPrefetchMode;
+   double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
+   bool DestinationLineTimesForPrefetchLessThan2 = false;
+   bool VRatioPrefetchMoreThanMax = false;
+   double TWait;
+   double TotalWRBandwidth = 0;
+   double WRBandwidth = 0;
 
 #ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: --- START ---\n", __func__);
@@ -710,11 +716,6 @@ static void 
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
NextPrefetchMode = 
mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb];
 
do {
-   double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
-   bool DestinationLineTimesForPrefetchLessThan2 = false;
-   bool VRatioPrefetchMoreThanMax = false;
-   double dummy_unit_vector[DC__NUM_DPP__MAX];
-
MaxTotalRDBandwidth = 0;
 #ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, 
mode_lib->vba.VStartupLines);
@@ -723,41 +724,39 @@ static void 
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
/* NOTE PerfetchMode variable is invalid in DAL as per 
the input received.
 * Hence the direction is to use PrefetchModePerState.
 */
-   double TWait = dml32_CalculateTWait(
-   
mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb],
-   
mode_lib->vba.UsesMALLForPStateChange[k],
-   
mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
-   mode_lib->vba.DRRDisplay[k],
-   mode_lib->vba.DRAMClockChangeLatency,
-   mode_lib->vba.FCLKChangeLatency, 
v->UrgentLatency,
-   mode_lib->vba.SREnterPlusExitTime);
-
-   DmlPipe myPipe;
-
-   myPipe.Dppclk = mode_lib->vba.DPPCLK[k];
-   myPipe.Dispclk = mode_lib->vba.DISPCLK;
-   myPipe.PixelClock = mode_lib->vba.PixelClock[k];
-   myPipe.DCFClkDeepSleep = v->DCFCLKDeepSleep;
-   myPipe.DPPPerSurface = mode_lib->vba.DPPPerPlane[k];
-   myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k];
-   myPipe.SourceRotation = mode_lib->vba.SourceRotation[k];
-   myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
-   myPipe.BlockHeight256BytesY = 
v->BlockHeight256BytesY[k];
-   myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
-   myPipe.BlockHeight256BytesC = 
v->BlockHeight256BytesC[k];
-   myPipe.InterlaceEnable = mode_lib->vba.Interlace[k];
-   myPipe.NumberOfCursors = 
mode_lib->vba.NumberOfCursors[k];
-   myPipe.VBlank = mode_lib->vba.VTotal[k] - 
mode_lib->vba.VActive[k];
-   myPipe.HTotal = mode_lib->vba.HTotal[k];
-   myPipe.HActive = mode_lib->vba.HActive[k];
-   myPipe.DCCEnable = mode_lib->vba.DCCEnable

[PATCH 3/5] drm/amd/display: reduce stack for dml32_CalculateVMRowAndSwath

2022-07-20 Thread Alex Deucher
Move stack variables to dummy structure.

Signed-off-by: Alex Deucher 
Cc: Stephen Rothwell 
---
 .../dc/dml/dcn32/display_mode_vba_32.c|   2 +
 .../dc/dml/dcn32/display_mode_vba_util_32.c   | 110 --
 .../dc/dml/dcn32/display_mode_vba_util_32.h   |   1 +
 .../drm/amd/display/dc/dml/display_mode_vba.h |  19 +++
 4 files changed, 70 insertions(+), 62 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
index c0e537731c1f..0ecc9e4c52a6 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
@@ -461,6 +461,7 @@ static void 
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
{
 
dml32_CalculateVMRowAndSwath(
+   &v->dummy_vars.dml32_CalculateVMRowAndSwath,
mode_lib->vba.NumberOfActiveSurfaces,

v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters,
v->SurfaceSizeInMALL,
@@ -2676,6 +2677,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct 
display_mode_lib *mode_l
 
{
dml32_CalculateVMRowAndSwath(
+   
&v->dummy_vars.dml32_CalculateVMRowAndSwath,

mode_lib->vba.NumberOfActiveSurfaces,

v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters,
mode_lib->vba.SurfaceSizeInMALL,
diff --git 
a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
index da5befd7fdec..9ebd3207ce42 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
@@ -1867,6 +1867,7 @@ void dml32_CalculateSurfaceSizeInMall(
 } // CalculateSurfaceSizeInMall
 
 void dml32_CalculateVMRowAndSwath(
+   struct dml32_CalculateVMRowAndSwath *st_vars,
unsigned int NumberOfActiveSurfaces,
DmlPipe myPipe[],
unsigned int SurfaceSizeInMALL[],
@@ -1932,21 +1933,6 @@ void dml32_CalculateVMRowAndSwath(
unsigned int BIGK_FRAGMENT_SIZE[])
 {
unsigned int k;
-   unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
-   unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
-   unsigned int PDEAndMetaPTEBytesFrameY;
-   unsigned int PDEAndMetaPTEBytesFrameC;
-   unsigned int MetaRowByteY[DC__NUM_DPP__MAX];
-   unsigned int MetaRowByteC[DC__NUM_DPP__MAX];
-   unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
-   unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
-   unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
-   unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
-   unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
-   unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
-   unsigned int 
dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
-   unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
-   bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];
 
for (k = 0; k < NumberOfActiveSurfaces; ++k) {
if (HostVMEnable == true) {
@@ -1968,15 +1954,15 @@ void dml32_CalculateVMRowAndSwath(
myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
if ((myPipe[k].SourcePixelFormat == dm_420_10 || 
myPipe[k].SourcePixelFormat == dm_420_12) &&
!IsVertical(myPipe[k].SourceRotation)) {
-   PTEBufferSizeInRequestsForLuma[k] =
+   st_vars->PTEBufferSizeInRequestsForLuma[k] =
(PTEBufferSizeInRequestsLuma + 
PTEBufferSizeInRequestsChroma) / 2;
-   PTEBufferSizeInRequestsForChroma[k] = 
PTEBufferSizeInRequestsForLuma[k];
+   st_vars->PTEBufferSizeInRequestsForChroma[k] = 
st_vars->PTEBufferSizeInRequestsForLuma[k];
} else {
-   PTEBufferSizeInRequestsForLuma[k] = 
PTEBufferSizeInRequestsLuma;
-   PTEBufferSizeInRequestsForChroma[k] = 
PTEBufferSizeInRequestsChroma;
+   st_vars->PTEBufferSizeInRequestsForLuma[k] = 
PTEBufferSizeInRequestsLuma;
+   st_vars->PTEBufferSizeInRequestsForChroma[k] = 
PTEBufferSizeInRequestsChroma

[PATCH 2/5] drm/amd/display: reduce stack for dml32_CalculateSwathAndDETConfiguration

2022-07-20 Thread Alex Deucher
Move stack variables to dummy structure.

Signed-off-by: Alex Deucher 
Cc: Stephen Rothwell 
---
 .../dc/dml/dcn32/display_mode_vba_32.c|   3 +
 .../dc/dml/dcn32/display_mode_vba_util_32.c   | 121 +-
 .../dc/dml/dcn32/display_mode_vba_util_32.h   |   2 +
 .../drm/amd/display/dc/dml/display_mode_vba.h |  15 +++
 4 files changed, 77 insertions(+), 64 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
index 9c2003fbe8fa..c0e537731c1f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
@@ -221,6 +221,7 @@ static void 
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
// VBA_DELTA
// Calculate DET size, swath height
dml32_CalculateSwathAndDETConfiguration(
+   
&v->dummy_vars.dml32_CalculateSwathAndDETConfiguration,
mode_lib->vba.DETSizeOverride,
mode_lib->vba.UsesMALLForPStateChange,
mode_lib->vba.ConfigReturnBufferSizeInKByte,
@@ -1878,6 +1879,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct 
display_mode_lib *mode_l
}
 
dml32_CalculateSwathAndDETConfiguration(
+   &v->dummy_vars.dml32_CalculateSwathAndDETConfiguration,
mode_lib->vba.DETSizeOverride,
mode_lib->vba.UsesMALLForPStateChange,
mode_lib->vba.ConfigReturnBufferSizeInKByte,
@@ -2474,6 +2476,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct 
display_mode_lib *mode_l
}
 
dml32_CalculateSwathAndDETConfiguration(
+   
&v->dummy_vars.dml32_CalculateSwathAndDETConfiguration,
mode_lib->vba.DETSizeOverride,
mode_lib->vba.UsesMALLForPStateChange,

mode_lib->vba.ConfigReturnBufferSizeInKByte,
diff --git 
a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
index c8a3f367d622..da5befd7fdec 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
@@ -391,6 +391,7 @@ void dml32_CalculateBytePerPixelAndBlockSizes(
 } // CalculateBytePerPixelAndBlockSizes
 
 void dml32_CalculateSwathAndDETConfiguration(
+   struct dml32_CalculateSwathAndDETConfiguration *st_vars,
unsigned int DETSizeOverride[],
enum dm_use_mall_for_pstate_change_mode 
UseMALLForPStateChange[],
unsigned int ConfigReturnBufferSizeInKByte,
@@ -455,18 +456,10 @@ void dml32_CalculateSwathAndDETConfiguration(
bool ViewportSizeSupportPerSurface[],
bool *ViewportSizeSupport)
 {
-   unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
-   unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
-   unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
-   unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
-   unsigned int RoundedUpSwathSizeBytesY;
-   unsigned int RoundedUpSwathSizeBytesC;
-   double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
-   double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
unsigned int k;
-   unsigned int TotalActiveDPP = 0;
-   bool NoChromaSurfaces = true;
-   unsigned int DETBufferSizeInKByteForSwathCalculation;
+
+   st_vars->TotalActiveDPP = 0;
+   st_vars->NoChromaSurfaces = true;
 
 #ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
@@ -501,43 +494,43 @@ void dml32_CalculateSwathAndDETConfiguration(
DPPPerSurface,
 
/* Output */
-   SwathWidthdoubleDPP,
-   SwathWidthdoubleDPPChroma,
+   st_vars->SwathWidthdoubleDPP,
+   st_vars->SwathWidthdoubleDPPChroma,
SwathWidth,
SwathWidthChroma,
-   MaximumSwathHeightY,
-   MaximumSwathHeightC,
+   st_vars->MaximumSwathHeightY,
+   st_vars->MaximumSwathHeightC,
swath_width_luma_ub,
swath_width_chroma_ub);
 
for (k = 0; k < NumberOfActiveSurfaces; ++k) {
-   RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * 
BytePerPixDETY[k] * MaximumSwathHeightY[k];
-   RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * 
BytePerPixDETC[k] * MaximumSwathHeightC[k];
+   st_

[PATCH] mm/gup.c: Fix formating in check_and_migrate_movable_page()

2022-07-20 Thread Alistair Popple
Commit b05a79d4377f ("mm/gup: migrate device coherent pages when pinning
instead of failing") added a badly formatted if statement. Fix it.

Signed-off-by: Alistair Popple 
Reported-by: David Hildenbrand 
---

Apologies Andrew for missing this. Hopefully this fixes things.

 mm/gup.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index 364b274a10c2..c6d060dee9e0 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1980,8 +1980,8 @@ static long check_and_migrate_movable_pages(unsigned long 
nr_pages,
folio_nr_pages(folio));
}
 
-   if (!list_empty(&movable_page_list) || isolation_error_count
-   || coherent_pages)
+   if (!list_empty(&movable_page_list) || isolation_error_count ||
+   coherent_pages)
goto unpin_pages;
 
/*
-- 
2.35.1



[PATCH] mm/gup.c: Fix formating in check_and_migrate_movable_page()

2022-07-20 Thread Alistair Popple
Commit b05a79d4377f ("mm/gup: migrate device coherent pages when pinning
instead of failing") added a badly formatted if statement. Fix it.

Signed-off-by: Alistair Popple 
Reported-by: David Hildenbrand 
---

Apologies Andrew for missing this. Hopefully this fixes things.

 mm/gup.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index 364b274a10c2..c6d060dee9e0 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1980,8 +1980,8 @@ static long check_and_migrate_movable_pages(unsigned long 
nr_pages,
folio_nr_pages(folio));
}
 
-   if (!list_empty(&movable_page_list) || isolation_error_count
-   || coherent_pages)
+   if (!list_empty(&movable_page_list) || isolation_error_count ||
+   coherent_pages)
goto unpin_pages;
 
/*
-- 
2.35.1



[PATCH] drm/amd/pm: update driver if header for smu_13_0_7

2022-07-20 Thread Kenneth Feng
update driver if header for smu_13_0_7

Signed-off-by: Kenneth Feng 
---
 .../inc/pmfw_if/smu13_driver_if_v13_0_7.h | 24 ---
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h  |  2 +-
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
index 132da684e379..25c08f963f49 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
@@ -25,10 +25,10 @@
 
 // *** IMPORTANT ***
 // PMFW TEAM: Always increment the interface version on any change to this file
-#define SMU13_DRIVER_IF_VERSION  0x2A
+#define SMU13_DRIVER_IF_VERSION  0x2C
 
 //Increment this version if SkuTable_t or BoardTable_t change
-#define PPTABLE_VERSION 0x1E
+#define PPTABLE_VERSION 0x20
 
 #define NUM_GFXCLK_DPM_LEVELS16
 #define NUM_SOCCLK_DPM_LEVELS8
@@ -152,6 +152,7 @@ typedef enum {
 #define DEBUG_OVERRIDE_DISABLE_DFLL0x0200
 #define DEBUG_OVERRIDE_ENABLE_RLC_VF_BRINGUP_MODE  0x0400
 #define DEBUG_OVERRIDE_DFLL_MASTER_MODE0x0800
+#define DEBUG_OVERRIDE_ENABLE_PROFILING_MODE   0x1000
 
 // VR Mapping Bit Defines
 #define VR_MAPPING_VR_SELECT_MASK  0x01
@@ -1014,8 +1015,8 @@ typedef struct {
   uint16_tVmin_Hot_Eol[PMFW_VOLT_PLANE_COUNT];   //In mV(Q2) 
End-of-life Vset to be used at hot.
   uint16_tVmin_Cold_Eol[PMFW_VOLT_PLANE_COUNT];  //In mV(Q2) 
End-of-life Vset to be used at cold.
   uint16_tVmin_Aging_Offset[PMFW_VOLT_PLANE_COUNT];  //In mV(Q2) 
Worst-case aging margin
-  uint16_tVmin_Plat_Offset_Hot[PMFW_VOLT_PLANE_COUNT];   //In mV(Q2) 
Platform offset apply to T0 Hot
-  uint16_tVmin_Plat_Offset_Cold[PMFW_VOLT_PLANE_COUNT];  //In mV(Q2) 
Platform offset apply to T0 Cold
+  uint16_tSpare_Vmin_Plat_Offset_Hot[PMFW_VOLT_PLANE_COUNT];   //In 
mV(Q2) Platform offset apply to T0 Hot
+  uint16_tSpare_Vmin_Plat_Offset_Cold[PMFW_VOLT_PLANE_COUNT];  //In 
mV(Q2) Platform offset apply to T0 Cold
 
   //This is a fixed/minimum VMIN aging degradation offset which is applied at 
T0. This reflects the minimum amount of aging already accounted for.
   uint16_tVcBtcFixedVminAgingOffset[PMFW_VOLT_PLANE_COUNT];
@@ -1081,11 +1082,15 @@ typedef struct {
 
   uint16_tGfxclkFreqGfxUlv; // in MHz
   uint8_t GfxIdlePadding2[2];
-
-  uint32_tGfxoffSpare[16];
+  uint32_tGfxOffEntryHysteresis; //For RLC to count after it enters 
CGCG, and before triggers GFXOFF entry
+  uint32_tGfxoffSpare[15];
 
   // GFX GPO
-  uint32_tGfxGpoSpare[16];
+  float   DfllBtcMasterScalerM;
+  int32_t DfllBtcMasterScalerB;
+  float   DfllBtcSlaveScalerM;
+  int32_t DfllBtcSlaveScalerB;
+  uint32_tGfxGpoSpare[12];
 
   // GFX DCS
 
@@ -1326,8 +1331,11 @@ typedef struct {
   uint32_tPostVoltageSetBacoDelay; // in microseconds. Amount of time FW 
will wait after power good is established or PSI0 command is issued
   uint32_tBacoEntryDelay; // in milliseconds. Amount of time FW will wait 
to trigger BACO entry after receiving entry notification from OS
 
+  uint8_t FuseWritePowerMuxPresent;
+  uint8_t FuseWritePadding[3];
+
   // SECTION: Board Reserved
-  uint32_t BoardSpare[64];
+  uint32_t BoardSpare[63];
 
   // SECTION: Structure Padding
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index 038a8956de5b..3e5838346f02 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -31,7 +31,7 @@
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x04
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x2A
-#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2A
+#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2C
 
 #define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500  //500ms
 
-- 
2.25.1



Re: [PATCH] drm/amdgpu: Refactor code to handle non coherent and uncached

2022-07-20 Thread Bhardwaj, Rajneesh



On 7/20/2022 7:18 PM, Felix Kuehling wrote:


On 2022-07-18 18:52, Rajneesh Bhardwaj wrote:

This simplifies existing coherence handling for Arcturus and Aldabaran
to account for !coherent && uncached scenarios.

Cc: Joseph Greathouse 
Cc: Alex Deucher 
Signed-off-by: Rajneesh Bhardwaj 
---
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 53 +--
  1 file changed, 26 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

index d1657de5f875..0fdfd79f69ad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -471,45 +471,44 @@ static uint64_t get_pte_flags(struct 
amdgpu_device *adev, struct kgd_mem *mem)

    switch (adev->asic_type) {
  case CHIP_ARCTURUS:
-    if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
-    if (bo_adev == adev)
-    mapping_flags |= coherent ?
-    AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
-    else
-    mapping_flags |= coherent ?
-    AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
-    } else {
-    mapping_flags |= coherent ?
-    AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
-    }
-    break;
  case CHIP_ALDEBARAN:
-    if (coherent && uncached) {
-    if (adev->gmc.xgmi.connected_to_cpu ||
-    !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM))
-    snoop = true;
-    mapping_flags |= AMDGPU_VM_MTYPE_UC;
-    } else if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+    if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
  if (bo_adev == adev) {
-    mapping_flags |= coherent ?
-    AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
-    if (adev->gmc.xgmi.connected_to_cpu)
+    if (uncached)
+    mapping_flags |= AMDGPU_VM_MTYPE_UC;
+    else if (coherent)
+    mapping_flags |= AMDGPU_VM_MTYPE_CC;
+    else
+    mapping_flags |= AMDGPU_VM_MTYPE_RW;
+    if (adev->asic_type == CHIP_ALDEBARAN &&
+    adev->gmc.xgmi.connected_to_cpu)
  snoop = true;
  } else {
-    mapping_flags |= coherent ?
-    AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+    if (uncached || coherent)
+    mapping_flags |= AMDGPU_VM_MTYPE_UC;
+    else
+    mapping_flags |= AMDGPU_VM_MTYPE_NC;
  if (amdgpu_xgmi_same_hive(adev, bo_adev))
  snoop = true;
  }
  } else {
+    if (uncached || coherent)
+    mapping_flags |= AMDGPU_VM_MTYPE_UC;
+    else
+    mapping_flags |= AMDGPU_VM_MTYPE_NC;
  snoop = true;
-    mapping_flags |= coherent ?
-    AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
  }
  break;
  default:
-    mapping_flags |= coherent ?
-    AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+    if (uncached || coherent)
+    mapping_flags |= AMDGPU_VM_MTYPE_UC;
+    else
+    mapping_flags |= AMDGPU_VM_MTYPE_NC;
+
+    if (!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM))
+    snoop = true;
+
+


With the two extra blank lines removed, this patch is

Reviewed-by: Felix Kuehling 

Please check whether a similar cleanup can be made in 
svm_range_get_pte_flags, or maybe even, whether common code can be 
factored out of those two functions.



Thanks Felix for the review. Do you want me to send V2 with two lines 
removed or just apply to amd-staging-drm-next after deleting those two 
lines?



I will check svm_range_get_pte_flags and see if I can cleanup the code 
there and get back to you.





Regards,
  Felix



  }
    pte_flags = amdgpu_gem_va_map_flags(adev, mapping_flags);


Re: [PATCH] drm/amdgpu: Refactor code to handle non coherent and uncached

2022-07-20 Thread Felix Kuehling



On 2022-07-18 18:52, Rajneesh Bhardwaj wrote:

This simplifies existing coherence handling for Arcturus and Aldabaran
to account for !coherent && uncached scenarios.

Cc: Joseph Greathouse 
Cc: Alex Deucher 
Signed-off-by: Rajneesh Bhardwaj 
---
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 53 +--
  1 file changed, 26 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index d1657de5f875..0fdfd79f69ad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -471,45 +471,44 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, 
struct kgd_mem *mem)
  
  	switch (adev->asic_type) {

case CHIP_ARCTURUS:
-   if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
-   if (bo_adev == adev)
-   mapping_flags |= coherent ?
-   AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
-   else
-   mapping_flags |= coherent ?
-   AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
-   } else {
-   mapping_flags |= coherent ?
-   AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
-   }
-   break;
case CHIP_ALDEBARAN:
-   if (coherent && uncached) {
-   if (adev->gmc.xgmi.connected_to_cpu ||
-   !(mem->alloc_flags & 
KFD_IOC_ALLOC_MEM_FLAGS_VRAM))
-   snoop = true;
-   mapping_flags |= AMDGPU_VM_MTYPE_UC;
-   } else if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+   if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
if (bo_adev == adev) {
-   mapping_flags |= coherent ?
-   AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
-   if (adev->gmc.xgmi.connected_to_cpu)
+   if (uncached)
+   mapping_flags |= AMDGPU_VM_MTYPE_UC;
+   else if (coherent)
+   mapping_flags |= AMDGPU_VM_MTYPE_CC;
+   else
+   mapping_flags |= AMDGPU_VM_MTYPE_RW;
+   if (adev->asic_type == CHIP_ALDEBARAN &&
+   adev->gmc.xgmi.connected_to_cpu)
snoop = true;
} else {
-   mapping_flags |= coherent ?
-   AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+   if (uncached || coherent)
+   mapping_flags |= AMDGPU_VM_MTYPE_UC;
+   else
+   mapping_flags |= AMDGPU_VM_MTYPE_NC;
if (amdgpu_xgmi_same_hive(adev, bo_adev))
snoop = true;
}
} else {
+   if (uncached || coherent)
+   mapping_flags |= AMDGPU_VM_MTYPE_UC;
+   else
+   mapping_flags |= AMDGPU_VM_MTYPE_NC;
snoop = true;
-   mapping_flags |= coherent ?
-   AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
}
break;
default:
-   mapping_flags |= coherent ?
-   AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+   if (uncached || coherent)
+   mapping_flags |= AMDGPU_VM_MTYPE_UC;
+   else
+   mapping_flags |= AMDGPU_VM_MTYPE_NC;
+
+   if (!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM))
+   snoop = true;
+
+


With the two extra blank lines removed, this patch is

Reviewed-by: Felix Kuehling 

Please check whether a similar cleanup can be made in 
svm_range_get_pte_flags, or maybe even, whether common code can be 
factored out of those two functions.


Regards,
  Felix



}
  
  	pte_flags = amdgpu_gem_va_map_flags(adev, mapping_flags);


Re: [PATCH 4/4] Documentation/gpu/amdgpu/amdgpu_dm: add DM docs for pixel blend mode

2022-07-20 Thread Melissa Wen
On 07/17, Tales Lelo da Aparecida wrote:
> On 16/07/2022 19:25, Melissa Wen wrote:
> > AMD GPU display manager (DM) maps DRM pixel blend modes (None,
> > Pre-multiplied, Coverage) to MPC hw blocks through blend configuration
> > options. Describe relevant elements and how to set and test them to get
> > the expected DRM blend mode on DCN hw.
> > 
> > Signed-off-by: Melissa Wen 
> > ---
> >   .../gpu/amdgpu/display/display-manager.rst| 98 +++
> >   Documentation/gpu/drm-kms.rst |  2 +
> >   2 files changed, 100 insertions(+)
> > 
> > diff --git a/Documentation/gpu/amdgpu/display/display-manager.rst 
> > b/Documentation/gpu/amdgpu/display/display-manager.rst
> > index 8960a5f1fa66..7a495ed1f69e 100644
> > --- a/Documentation/gpu/amdgpu/display/display-manager.rst
> > +++ b/Documentation/gpu/amdgpu/display/display-manager.rst
> > @@ -84,3 +84,101 @@ families below.
> >   **DCN 3.0 family color caps and mapping**
> >   .. kernel-figure:: dcn3_cm_drm_current.svg
> > +
> > +Blend Mode Properties
> > +=
> > +
> > +Pixel blend mode is a DRM plane composition property of 
> > :c:type:`drm_plane` used to
> > +describes how pixels from a foreground plane (fg) are composited with the
> > +background plane (bg). Here, we present main concepts of DRM blend mode to 
> > help
> > +to understand how this property is mapped to AMD DC interface. See more 
> > about
> > +this DRM property and the alpha blending equations in :ref:`DRM Plane
> > +Composition Properties `.
> > +
> > +Basically, a blend mode sets the alpha blending equation for plane
> > +composition that fits the mode in which the alpha channel affects the 
> > state of
> > +pixel color values and, therefore, the resulted pixel color. For
> > +example, consider the following elements of the alpha blending equation:
> > +
> > +- *fg.rgb*: Each of the RGB component values from the foreground's pixel.
> > +- *fg.alpha*: Alpha component value from the foreground's pixel.
> > +- *bg.rgb*: Each of the RGB component values from the background.
> > +- *plane_alpha*: Plane alpha value set by the **plane "alpha" property**, 
> > see
> > +  more in `DRM Plane Composition Properties 
> > `.
> 
> You forgot to use :ref: in here.
> 
> > +
> > +in the basic alpha blending equation::
> > +
> > +   out.rgb = alpha * fg.rgb + (1 - alpha) * bg.rgb
> > +
> > +the alpha channel value of each pixel in a plane is ignored and only the 
> > plane
> > +alpha affects the resulted pixel color values.
> > +
> > +DRM has three blend mode to define the blend formula in the plane 
> > composition:
> > +
> > +* **None**: Blend formula that ignores the pixel alpha.
> > +
> > +* **Pre-multiplied**: Blend formula that assumes the pixel color values in 
> > a
> > +  plane was already pre-multiplied by its own alpha channel before storage.
> > +
> > +* **Coverage**: Blend formula that assumes the pixel color values were not
> > +  pre-multiplied with the alpha channel values.
> > +
> > +and pre-multiplied is the default pixel blend mode, that means, when no 
> > blend
> > +mode property is created or defined, DRM considers the plane's pixels has
> > +pre-multiplied color values. On IGT GPU tools, the kms_plane_alpha_blend 
> > test
> > +provides a set of subtests to verify plane alpha and blend mode properties.
> > +
> > +The DRM blend mode and its elements are then mapped by AMDGPU display 
> > manager
> > +(DM) to program the blending configuration of the Multiple Pipe/Plane 
> > Combined
> > +(MPC), as follows:
> > +
> > +.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
> > +   :doc: mpc-overview
> > +
> > +.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
> > +   :functions: mpcc_blnd_cfg
> > +
> > +Therefore, the blending configuration for a single MPCC instance on the MPC
> > +tree is defined by :c:type:`mpcc_blnd_cfg`, where
> > +:c:type:`pre_multiplied_alpha` is the alpha pre-multiplied mode flag used 
> > to
> > +set :c:type:`MPCC_ALPHA_MULTIPLIED_MODE`. It controls whether alpha is
> > +multiplied (true/false), being only true for DRM pre-multiplied blend mode.
> > +:c:type:`mpcc_alpha_blend_mode` defines the alpha blend mode regarding 
> > pixel
> > +alpha and plane alpha values. It sets one of the three modes for
> > +:c:type:`MPCC_ALPHA_BLND_MODE`, as described below.
> > +
> > +.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
> > +   :functions: mpcc_alpha_blend_mode
> > +
> > +DM then maps the elements of `enum mpcc_alpha_blend_mode` to those in the 
> > DRM
> > +blend formula, as follows:
> > +
> > +* *MPC pixel alpha* matches *DRM fg.alpha* as the alpha component value
> > +  from the plane's pixel
> > +* *MPC global alpha* matches *DRM plane_alpha* when the pixel alpha should
> > +  be ignored and, therefore, pixel values are not pre-multiplied
> > +* *MPC global gain* assumes *MPC global alpha* value when both *DRM
> > +  fg.alpha* and *DRM plane_alpha* participate in the blend equation
> > +
>

Re: [PATCH 1/4] Documentation/amdgpu_dm: Add DM color correction documentation

2022-07-20 Thread Melissa Wen
On 07/17, Tales Lelo da Aparecida wrote:
> On 16/07/2022 19:25, Melissa Wen wrote:
> > AMDGPU DM maps DRM color management properties (degamma, ctm and gamma)
> > to DC color correction entities. Part of this mapping is already
> > documented as code comments and can be converted as kernel docs.
> > 
> > v2:
> > - rebase to amd-staging-drm-next
> > 
> > Reviewed-by: Harry Wentland 
> > Signed-off-by: Melissa Wen 
> > ---
> >   .../gpu/amdgpu/display/display-manager.rst|   9 ++
> >   .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 121 +-
> >   2 files changed, 98 insertions(+), 32 deletions(-)
> > 
> > diff --git a/Documentation/gpu/amdgpu/display/display-manager.rst 
> > b/Documentation/gpu/amdgpu/display/display-manager.rst
> > index 7ce31f89d9a0..b1b0f11aed83 100644
> > --- a/Documentation/gpu/amdgpu/display/display-manager.rst
> > +++ b/Documentation/gpu/amdgpu/display/display-manager.rst
> > @@ -40,3 +40,12 @@ Atomic Implementation
> >   .. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> >  :functions: amdgpu_dm_atomic_check amdgpu_dm_atomic_commit_tail
> > +
> > +Color Management Properties
> > +===
> > +
> > +.. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
> > +   :doc: overview
> > +
> > +.. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
> > +   :internal:
> > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
> > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
> > index a71177305bcd..93c813089bff 100644
> > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
> > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
> > @@ -29,7 +29,9 @@
> >   #include "modules/color/color_gamma.h"
> >   #include "basics/conversion.h"
> > -/*
> > +/**
> > + * DOC: overview
> > + *
> >* The DC interface to HW gives us the following color management blocks
> >* per pipe (surface):
> >*
> > @@ -71,8 +73,8 @@
> >   #define MAX_DRM_LUT_VALUE 0x
> > -/*
> > - * Initialize the color module.
> > +/**
> > + * amdgpu_dm_init_color_mod - Initialize the color module.
> >*
> >* We're not using the full color module, only certain components.
> >* Only call setup functions for components that we need.
> > @@ -82,7 +84,14 @@ void amdgpu_dm_init_color_mod(void)
> > setup_x_points_distribution();
> >   }
> > -/* Extracts the DRM lut and lut size from a blob. */
> > +/**
> > + * __extract_blob_lut - Extracts the DRM lut and lut size from a blob.
> > + * @blob: DRM color mgmt property blob
> > + * @size: lut size
> > + *
> > + * Returns:
> > + * DRM LUT or NULL
> > + */
> >   static const struct drm_color_lut *
> >   __extract_blob_lut(const struct drm_property_blob *blob, uint32_t *size)
> >   {
> > @@ -90,13 +99,18 @@ __extract_blob_lut(const struct drm_property_blob 
> > *blob, uint32_t *size)
> > return blob ? (struct drm_color_lut *)blob->data : NULL;
> >   }
> 
> I don't think everyone would approve using actual kernel-doc for these
> static functions, but I can appreciate they being formatted as such.
> Consider replacing /** with /*.

IMHO, although they are static, they provide info to understand the AMD
DM programming of DRM color correction properties. I see the value for
external contributors, but I'm not sure about kernel-doc rules about it.

> 
> > -/*
> > - * Return true if the given lut is a linear mapping of values, i.e. it acts
> > - * like a bypass LUT.
> > +/**
> > + * __is_lut_linear - check if the given lut is a linear mapping of values
> > + * @lut: given lut to check values
> > + * @size: lut size
> >*
> >* It is considered linear if the lut represents:
> > - * f(a) = (0xFF00/MAX_COLOR_LUT_ENTRIES-1)a; for integer a in
> > - *   [0, MAX_COLOR_LUT_ENTRIES)
> > + * f(a) = (0xFF00/MAX_COLOR_LUT_ENTRIES-1)a; for integer a in [0,
> > + * MAX_COLOR_LUT_ENTRIES)
> > + *
> > + * Returns:
> > + * True if the given lut is a linear mapping of values, i.e. it acts like a
> > + * bypass LUT. Otherwise, false.
> >*/
> >   static bool __is_lut_linear(const struct drm_color_lut *lut, uint32_t 
> > size)
> >   {
> > @@ -119,9 +133,13 @@ static bool __is_lut_linear(const struct drm_color_lut 
> > *lut, uint32_t size)
> > return true;
> >   }
> > -/*
> > - * Convert the drm_color_lut to dc_gamma. The conversion depends on the 
> > size
> > - * of the lut - whether or not it's legacy.
> > +/**
> > + * __drm_lut_to_dc_gamma - convert the drm_color_lut to dc_gamma.
> > + * @lut: DRM lookup table for color conversion
> > + * @gamma: DC gamma to set entries
> > + * @is_legacy: legacy or atomic gamma
> > + *
> > + * The conversion depends on the size of the lut - whether or not it's 
> > legacy.
> >*/
> >   static void __drm_lut_to_dc_gamma(const struct drm_color_lut *lut,
> >   struct dc_gamma *gamma, bool is_legacy)
> > @@ -154,8 +172,11 @@ static void __drm

Re: [PATCH 3/4] drm/amd/display: add doc entries for MPC blending configuration

2022-07-20 Thread Melissa Wen
On 07/17, Tales Lelo da Aparecida wrote:
> On 16/07/2022 19:25, Melissa Wen wrote:
> > Describe structs and enums used to set blend mode properties to MPC
> > blocks. Some pieces of information are already available as code
> > comments, and were just formatted. Others were collected and summarised
> > from discusssions on AMD issue tracker[1][2].
> 
> Typo in the commit message: discusssions -> discussions
> 
> > 
> > [1] https://gitlab.freedesktop.org/drm/amd/-/issues/1734
> > [2] https://gitlab.freedesktop.org/drm/amd/-/issues/1769
> > 
> > Signed-off-by: Melissa Wen 
> > ---
> >   drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h | 91 +
> >   1 file changed, 77 insertions(+), 14 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h 
> > b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
> > index 5097037e3962..cf28b841c42d 100644
> > --- a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
> > +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
> > @@ -22,6 +22,16 @@
> >*
> >*/
> > +/**
> > + * DOC: mpc-overview
> > + *
> > + * Multiple Pipe/Plane Combined (MPC) is a component in the hardware 
> > pipeline
> > + * that performs blending of multiple planes, using global and per-pixel 
> > alpha.
> > + * It also performs post-blending color correction operations according to 
> > the
> > + * hardware capabilities, such as color transformation matrix and gamma 1D 
> > and
> > + * 3D LUT.
> > + */
> > +
> >   #ifndef __DC_MPCC_H__
> >   #define __DC_MPCC_H__
> > @@ -48,14 +58,39 @@ enum mpcc_blend_mode {
> > MPCC_BLEND_MODE_TOP_BOT_BLENDING
> >   };
> > +/**
> > + * enum mpcc_alpha_blend_mode - define the alpha blend mode regarding pixel
> > + * alpha and plane alpha values
> > + */
> >   enum mpcc_alpha_blend_mode {
> > +   /**
> > +* @MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA: per pixel alpha using DPP
> > +* alpha value
> > +*/
> > MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA,
> > +   /**
> > +* @MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN: per
> > +* pixel alpha using DPP alpha value multiplied by a global gain (plane
> > +* alpha)
> > +*/
> > MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN,
> > +   /**
> > +* @MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA: global alpha value, ignores
> > +* pixel alpha and consider only plane alpha
> > +*/
> > MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA
> >   };
> > -/*
> > - * MPCC blending configuration
> > +/**
> > + * struct mpcc_blnd_cfg - MPCC blending configuration
> > + *
> > + * @black_color: background color
> > + * @alpha_mode: alpha blend mode (MPCC_ALPHA_BLND_MODE)
> > + * @pre_multiplied_alpha: whether pixel color values were pre-multiplied 
> > by the
> > + * alpha channel (MPCC_ALPHA_MULTIPLIED_MODE)
> > + * @global_gain: used when blend mode considers both pixel alpha and plane
> > + * alpha value and assumes the global alpha value.
> > + * @global_alpha: plane alpha value
> 
> There's quite a few members missing definition. After reading the 4th patch
> may I conclude that they weren't relevant for what's being described about
> alpha blending?

Hi Tales,

although they aren't changed for DRM blend modes programming, it would
be nice if someone can describe them and also avoid those warnings. I
wasn't able to identify how they behave for MPC programming (hope
someone from AMD can help on documenting them).

> 
> ./drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h:109: warning: Function
> parameter or member 'overlap_only' not described in 'mpcc_blnd_cfg'
> ./drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h:109: warning: Function
> parameter or member 'bottom_gain_mode' not described in 'mpcc_blnd_cfg'
> ./drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h:109: warning: Function
> parameter or member 'background_color_bpc' not described in 'mpcc_blnd_cfg'
> ./drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h:109: warning: Function
> parameter or member 'top_gain' not described in 'mpcc_blnd_cfg'
> ./drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h:109: warning: Function
> parameter or member 'bottom_inside_gain' not described in 'mpcc_blnd_cfg'
> ./drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h:109: warning: Function
> parameter or member 'bottom_outside_gain' not described in 'mpcc_blnd_cfg'
> 
> >*/
> >   struct mpcc_blnd_cfg {
> > struct tg_color black_color;/* background color */
> > @@ -107,8 +142,15 @@ struct mpc_dwb_flow_control {
> > int flow_ctrl_cnt1;
> >   };
> > -/*
> > - * MPCC connection and blending configuration for a single MPCC instance.
> > +/**
> > + * struct mpcc - MPCC connection and blending configuration for a single 
> > MPCC instance.
> 
> Might be worth writing the definition of the abbreviation, if not here, in
> the glossary... I couldn't find what the last "C" stands for, my guess would
> be "context". hehehe
> 
> > + * @mpcc_id: MPCC physical instance
> > + * @dpp_id: DPP input to this MPCC
> > + * @mpcc_bot: pointer to bottom layer MPCC. NULL when not c

[pull] amdgpu drm-fixes-5.19

2022-07-20 Thread Alex Deucher
Hi Dave, Daniel,

A couple more fixes for 5.19 this week.  These are in addition to the
PR I sent late last week:
https://lists.freedesktop.org/archives/amd-gfx/2022-July/081597.html

The following changes since commit 2d4bd81fea1ad6ebba543bd6da3ef5179d130e6a:

  drm/amd/display: Fix new dmub notification enabling in DM (2022-07-15 
10:04:59 -0400)

are available in the Git repository at:

  https://gitlab.freedesktop.org/agd5f/linux.git 
tags/amd-drm-fixes-5.19-2022-07-20

for you to fetch changes up to 90af0ca047f3049c4b46e902f432ad6ef1e2ded6:

  drm/amdgpu: Protect the amdgpu_bo_list list with a mutex v2 (2022-07-20 
16:23:34 -0400)


amd-drm-fixes-5.19-2022-07-20:

amdgpu:
- Drop redundant buffer cleanup that can lead to a segfault
- Add a bo_list mutex to avoid possible list corruption in CS


Luben Tuikov (1):
  drm/amdgpu: Protect the amdgpu_bo_list list with a mutex v2

xinhui pan (1):
  drm/amdgpu: Remove one duplicated ef removal

 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c |  6 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c  |  3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h  |  4 
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   | 16 +---
 4 files changed, 19 insertions(+), 10 deletions(-)


[PATCH 5/5] drm/amd/display: move FPU code from dcn301 clk mgr to DML folder

2022-07-20 Thread Melissa Wen
The -mno-gnu-attribute option in dcn301 clk mgr makefile hides a soft vs
hard fp error for powerpc. After removing this flag, we can see some FPU
code remains there:

gcc-11.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld:
drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.o uses
hard float,
drivers/gpu/drm/amd/amdgpu/../display/dc/clk_mgr/dcn301/vg_clk_mgr.o
uses soft float

Therefore, remove the -mno-gnu-attribute flag for dcn301/powerpc and
move FPU-associated code to DML folder.

Signed-off-by: Melissa Wen 
---
 .../gpu/drm/amd/display/dc/clk_mgr/Makefile   |  6 --
 .../display/dc/clk_mgr/dcn301/vg_clk_mgr.c| 86 ++-
 .../display/dc/clk_mgr/dcn301/vg_clk_mgr.h|  3 +
 .../amd/display/dc/dml/dcn301/dcn301_fpu.c| 74 
 4 files changed, 84 insertions(+), 85 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile 
b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
index 15b660a951a5..271d8e573181 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
@@ -123,12 +123,6 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN30)
 ###
 CLK_MGR_DCN301 = vg_clk_mgr.o dcn301_smu.o
 
-# prevent build errors regarding soft-float vs hard-float FP ABI tags
-# this code is currently unused on ppc64, as it applies to VanGogh APUs only
-ifdef CONFIG_PPC64
-CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn301/vg_clk_mgr.o := $(call 
cc-option,-mno-gnu-attribute)
-endif
-
 AMD_DAL_CLK_MGR_DCN301 = $(addprefix 
$(AMDDALPATH)/dc/clk_mgr/dcn301/,$(CLK_MGR_DCN301))
 
 AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN301)
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c 
b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c
index f310b0d25a07..65f224af03c0 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c
@@ -32,6 +32,10 @@
 // For dcn20_update_clocks_update_dpp_dto
 #include "dcn20/dcn20_clk_mgr.h"
 
+// For DML FPU code
+#include "dml/dcn20/dcn20_fpu.h"
+#include "dml/dcn301/dcn301_fpu.h"
+
 #include "vg_clk_mgr.h"
 #include "dcn301_smu.h"
 #include "reg_helper.h"
@@ -526,81 +530,6 @@ static struct clk_bw_params vg_bw_params = {
 
 };
 
-static struct wm_table ddr4_wm_table = {
-   .entries = {
-   {
-   .wm_inst = WM_A,
-   .wm_type = WM_TYPE_PSTATE_CHG,
-   .pstate_latency_us = 11.72,
-   .sr_exit_time_us = 6.09,
-   .sr_enter_plus_exit_time_us = 7.14,
-   .valid = true,
-   },
-   {
-   .wm_inst = WM_B,
-   .wm_type = WM_TYPE_PSTATE_CHG,
-   .pstate_latency_us = 11.72,
-   .sr_exit_time_us = 10.12,
-   .sr_enter_plus_exit_time_us = 11.48,
-   .valid = true,
-   },
-   {
-   .wm_inst = WM_C,
-   .wm_type = WM_TYPE_PSTATE_CHG,
-   .pstate_latency_us = 11.72,
-   .sr_exit_time_us = 10.12,
-   .sr_enter_plus_exit_time_us = 11.48,
-   .valid = true,
-   },
-   {
-   .wm_inst = WM_D,
-   .wm_type = WM_TYPE_PSTATE_CHG,
-   .pstate_latency_us = 11.72,
-   .sr_exit_time_us = 10.12,
-   .sr_enter_plus_exit_time_us = 11.48,
-   .valid = true,
-   },
-   }
-};
-
-static struct wm_table lpddr5_wm_table = {
-   .entries = {
-   {
-   .wm_inst = WM_A,
-   .wm_type = WM_TYPE_PSTATE_CHG,
-   .pstate_latency_us = 11.65333,
-   .sr_exit_time_us = 13.5,
-   .sr_enter_plus_exit_time_us = 16.5,
-   .valid = true,
-   },
-   {
-   .wm_inst = WM_B,
-   .wm_type = WM_TYPE_PSTATE_CHG,
-   .pstate_latency_us = 11.65333,
-   .sr_exit_time_us = 13.5,
-   .sr_enter_plus_exit_time_us = 16.5,
-   .valid = true,
-   },
-   {
-   .wm_inst = WM_C,
-   .wm_type = WM_TYPE_PSTATE_CHG,
-   .pstate_latency_us = 11.65333,
-   .sr_exit_time_us = 13.5,
-   .sr_enter_plus_exit_time_us = 16.5,
-   .valid = true,
-   },
-   {
-   .wm_inst = WM_D,
-   .wm_type = WM_TYPE_PSTATE_CHG,
-   .pstate_latency_us = 11

[PATCH 3/5] drm/amd/display: move FPU code on dcn21 clk_mgr

2022-07-20 Thread Melissa Wen
The -mno-gnu-attribute option in dcn21 clk mgr makefile hides a soft vs
hard fp error for powerpc. After removing this flag, we can see some FPU
code remains there:

/gcc-11.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld:
drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.o uses
hard float,
drivers/gpu/drm/amd/amdgpu/../display/dc/clk_mgr/dcn21/rn_clk_mgr.o uses
soft float

Therefore, remove the -mno-gnu-attribute flag for dcn21/powerpc and move
FPU-associated code to DML folder.

Signed-off-by: Melissa Wen 
---
 .../gpu/drm/amd/display/dc/clk_mgr/Makefile   |   6 -
 .../amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c | 234 +
 .../amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h |   7 +
 .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c  | 235 ++
 .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.h  |   2 +
 5 files changed, 248 insertions(+), 236 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile 
b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
index a48453612d10..66dc02c426e9 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
@@ -107,12 +107,6 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN201)
 ###
 CLK_MGR_DCN21 = rn_clk_mgr.o rn_clk_mgr_vbios_smu.o
 
-# prevent build errors regarding soft-float vs hard-float FP ABI tags
-# this code is currently unused on ppc64, as it applies to Renoir APUs only
-ifdef CONFIG_PPC64
-CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn21/rn_clk_mgr.o := $(call 
cc-option,-mno-gnu-attribute)
-endif
-
 AMD_DAL_CLK_MGR_DCN21 = $(addprefix 
$(AMDDALPATH)/dc/clk_mgr/dcn21/,$(CLK_MGR_DCN21))
 
 AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN21)
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c 
b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
index cf1b5f354ae9..0202dc682682 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
@@ -26,10 +26,9 @@
 #include "dccg.h"
 #include "clk_mgr_internal.h"
 
-
 #include "dcn20/dcn20_clk_mgr.h"
 #include "rn_clk_mgr.h"
-
+#include "dml/dcn20/dcn20_fpu.h"
 
 #include "dce100/dce_clk_mgr.h"
 #include "rn_clk_mgr_vbios_smu.h"
@@ -45,7 +44,6 @@
 
 /* Constants */
 
-#define LPDDR_MEM_RETRAIN_LATENCY 4.977 /* Number obtained from LPDDR4 
Training Counter Requirement doc */
 #define SMU_VER_55_51_0 0x373300 /* SMU Version that is able to set DISPCLK 
below 100MHz */
 
 /* Macros */
@@ -613,228 +611,6 @@ static struct clk_bw_params rn_bw_params = {
 
 };
 
-static struct wm_table ddr4_wm_table_gs = {
-   .entries = {
-   {
-   .wm_inst = WM_A,
-   .wm_type = WM_TYPE_PSTATE_CHG,
-   .pstate_latency_us = 11.72,
-   .sr_exit_time_us = 7.09,
-   .sr_enter_plus_exit_time_us = 8.14,
-   .valid = true,
-   },
-   {
-   .wm_inst = WM_B,
-   .wm_type = WM_TYPE_PSTATE_CHG,
-   .pstate_latency_us = 11.72,
-   .sr_exit_time_us = 10.12,
-   .sr_enter_plus_exit_time_us = 11.48,
-   .valid = true,
-   },
-   {
-   .wm_inst = WM_C,
-   .wm_type = WM_TYPE_PSTATE_CHG,
-   .pstate_latency_us = 11.72,
-   .sr_exit_time_us = 10.12,
-   .sr_enter_plus_exit_time_us = 11.48,
-   .valid = true,
-   },
-   {
-   .wm_inst = WM_D,
-   .wm_type = WM_TYPE_PSTATE_CHG,
-   .pstate_latency_us = 11.72,
-   .sr_exit_time_us = 10.12,
-   .sr_enter_plus_exit_time_us = 11.48,
-   .valid = true,
-   },
-   }
-};
-
-static struct wm_table lpddr4_wm_table_gs = {
-   .entries = {
-   {
-   .wm_inst = WM_A,
-   .wm_type = WM_TYPE_PSTATE_CHG,
-   .pstate_latency_us = 11.65333,
-   .sr_exit_time_us = 5.32,
-   .sr_enter_plus_exit_time_us = 6.38,
-   .valid = true,
-   },
-   {
-   .wm_inst = WM_B,
-   .wm_type = WM_TYPE_PSTATE_CHG,
-   .pstate_latency_us = 11.65333,
-   .sr_exit_time_us = 9.82,
-   .sr_enter_plus_exit_time_us = 11.196,
-   .valid = true,
-   },
-   {
-   .wm_inst = WM_C,
-   .wm_type = WM_TYPE_PSTATE_CHG,
-   .pstate_latency_us = 11.65333,
-   .sr_exi

[PATCH 4/5] drm/amd/display: move FPU code from dcn30 clk mgr to DML folder

2022-07-20 Thread Melissa Wen
The -mno-gnu-attribute option in clk mgr makefile for dcn30 hides a soft
vs hard fp error for powerpc. After removing this flag, we can see some
FPU code remains there:

gcc-11.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld:
drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.o uses
hard float,
drivers/gpu/drm/amd/amdgpu/../display/dc/clk_mgr/dcn30/dcn30_clk_mgr.o
uses soft float

Therefore, remove the -mno-gnu-attribute flag for dcn30/powerpc and move
FPU-associated code to DML folder.

Signed-off-by: Melissa Wen 
---
 .../gpu/drm/amd/display/dc/clk_mgr/Makefile   |  6 --
 .../display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c  | 63 ++-
 .../drm/amd/display/dc/dml/dcn30/dcn30_fpu.c  | 63 ++-
 .../drm/amd/display/dc/dml/dcn30/dcn30_fpu.h  |  1 +
 4 files changed, 68 insertions(+), 65 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile 
b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
index 66dc02c426e9..15b660a951a5 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
@@ -115,12 +115,6 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN21)
 ###
 CLK_MGR_DCN30 = dcn30_clk_mgr.o dcn30_clk_mgr_smu_msg.o
 
-# prevent build errors regarding soft-float vs hard-float FP ABI tags
-# this code is currently unused on ppc64, as it applies to VanGogh APUs only
-ifdef CONFIG_PPC64
-CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn30/dcn30_clk_mgr.o := $(call 
cc-option,-mno-gnu-attribute)
-endif
-
 AMD_DAL_CLK_MGR_DCN30 = $(addprefix 
$(AMDDALPATH)/dc/clk_mgr/dcn30/,$(CLK_MGR_DCN30))
 
 AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN30)
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c 
b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
index 914708cefc79..3ce0ee0d012f 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
@@ -29,6 +29,7 @@
 #include "dcn20/dcn20_clk_mgr.h"
 #include "dce100/dce_clk_mgr.h"
 #include "dcn30/dcn30_clk_mgr.h"
+#include "dml/dcn30/dcn30_fpu.h"
 #include "reg_helper.h"
 #include "core_types.h"
 #include "dm_helpers.h"
@@ -97,65 +98,11 @@ static void dcn3_init_single_clock(struct clk_mgr_internal 
*clk_mgr, uint32_t cl
}
 }
 
-static noinline void dcn3_build_wm_range_table(struct clk_mgr_internal 
*clk_mgr)
+static void dcn3_build_wm_range_table(struct clk_mgr_internal *clk_mgr)
 {
-   /* defaults */
-   double pstate_latency_us = 
clk_mgr->base.ctx->dc->dml.soc.dram_clock_change_latency_us;
-   double sr_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_exit_time_us;
-   double sr_enter_plus_exit_time_us = 
clk_mgr->base.ctx->dc->dml.soc.sr_enter_plus_exit_time_us;
-   uint16_t min_uclk_mhz = 
clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz;
-
-   /* Set A - Normal - default values*/
-   clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].valid = true;
-   
clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us 
= pstate_latency_us;
-   
clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us = 
sr_exit_time_us;
-   
clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us
 = sr_enter_plus_exit_time_us;
-   
clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = 
WATERMARKS_CLOCK_RANGE;
-   
clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = 
0;
-   
clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 
0x;
-   
clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = 
min_uclk_mhz;
-   
clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 
0x;
-
-   /* Set B - Performance - higher minimum clocks */
-// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].valid = true;
-// 
clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us 
= pstate_latency_us;
-// 
clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us = 
sr_exit_time_us;
-// 
clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us
 = sr_enter_plus_exit_time_us;
-// 
clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.wm_type = 
WATERMARKS_CLOCK_RANGE;
-// 
clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = 
TUNED VALUE;
-// 
clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_dcfclk = 
0x;
-// 
clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_uclk = 
TUNED VALUE;
-// 
clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_uclk = 
0x;
-
-   /* Set C - Dummy P-State - P-State latency set to "dummy p-state" value 
*/
-   clk_mgr->base.bw_params->wm_tab

[PATCH 2/5] drm/amd/display: remove useless FPU protection wrapper from dcn31_resource file

2022-07-20 Thread Melissa Wen
Many lines of code in dcn31_resource_construct are wrapped by DC_FP
macro to protect FPU operations; however, there is no FPU in this
region. Therefore, just remove the wrapper for clarity.

Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 6 --
 1 file changed, 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
index 929b712cbada..6d25fcf865bf 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
@@ -1863,8 +1863,6 @@ static bool dcn31_resource_construct(
struct dc_context *ctx = dc->ctx;
struct irq_service_init_data init_data;
 
-   DC_FP_START();
-
ctx->dc_bios->regs = &bios_regs;
 
pool->base.res_cap = &res_cap_dcn31;
@@ -2175,13 +2173,9 @@ static bool dcn31_resource_construct(
 
dc->dcn_ip->max_num_dpp = dcn3_1_ip.max_num_dpp;
 
-   DC_FP_END();
-
return true;
 
 create_fail:
-
-   DC_FP_END();
dcn31_resource_destruct(pool);
 
return false;
-- 
2.35.1



[PATCH 1/5] drm/amd/display: fix soft-fp vs hard-fp on DCN 3.1 family for powerpc

2022-07-20 Thread Melissa Wen
Move remaining FPU code to DML folder that caused compilation error for
powerpc. This patch depends on [1] to prevent the error below:

/gcc-11.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld: 
drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.o uses hard 
float, drivers/gpu/drm/amd/amdgpu/../display/dc/dcn31/dcn31_resource.o uses 
soft float
/gcc-11.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld: failed to merge 
target specific data of file 
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn31/dcn31_resource.o
/gcc-11.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld: 
drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.o uses hard 
float, drivers/gpu/drm/amd/amdgpu/../display/dc/dcn315/dcn315_resource.o uses 
soft float
/gcc-11.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld: failed to merge 
target specific data of file 
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn315/dcn315_resource.o
/gcc-11.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld: 
drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.o uses hard 
float, drivers/gpu/drm/amd/amdgpu/../display/dc/dcn316/dcn316_resource.o uses 
soft float
/gcc-11.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld: failed to merge 
target specific data of file 
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn316/dcn316_resource.o

[1] https://lore.kernel.org/amd-gfx/20220716195144.342960-1-m...@igalia.com/

Reported-by: Guenter Roeck 
Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c |  5 +++--
 .../gpu/drm/amd/display/dc/dcn315/dcn315_resource.c   |  5 +++--
 .../gpu/drm/amd/display/dc/dcn316/dcn316_resource.c   |  5 +++--
 drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c  | 11 +++
 drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h  |  3 +++
 5 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
index 178d40c0d70a..929b712cbada 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
@@ -1663,11 +1663,12 @@ int dcn31_populate_dml_pipes_from_context(
pipes[pipe_cnt].pipe.src.immediate_flip = true;
pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
pipes[pipe_cnt].pipe.src.gpuvm = true;
-   pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
-   pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
pipes[pipe_cnt].pipe.src.dcc_rate = 3;
pipes[pipe_cnt].dout.dsc_input_bpc = 0;
+   DC_FP_START();
+   dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt);
+   DC_FP_END();
 
if (dc->debug.dml_hostvm_override == DML_HOSTVM_NO_OVERRIDE)
pipes[pipe_cnt].pipe.src.hostvm = 
dc->res_pool->hubbub->riommu_active;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c
index df2abd8fe2eb..1a5f5977f962 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c
@@ -1658,11 +1658,12 @@ static int dcn315_populate_dml_pipes_from_context(
 
pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
pipes[pipe_cnt].pipe.src.gpuvm = true;
-   pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
-   pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
pipes[pipe_cnt].pipe.src.dcc_rate = 3;
pipes[pipe_cnt].dout.dsc_input_bpc = 0;
+   DC_FP_START();
+   dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt);
+   DC_FP_END();
 
if (pipes[pipe_cnt].dout.dsc_enable) {
switch (timing->display_color_depth) {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c
index 070fe10a004e..53dea466348f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c
@@ -1661,11 +1661,12 @@ static int dcn316_populate_dml_pipes_from_context(
 
pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
pipes[pipe_cnt].pipe.src.gpuvm = true;
-   pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
-   pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
pipes[pipe_cnt].pipe.src.dcc_rate = 3;
pipes[pipe_cnt].dout.dsc_input_bpc = 0;
+   DC_FP_START();
+   dcn31_zero_pipe

[PATCH 0/5] drm/amd/display: FPU cleanup in clk_mgr files for powerpc

2022-07-20 Thread Melissa Wen
An initial report from Guenter[1] shows some soft-fp vs hard-fp error
from DCN31 clk mgr for powerpc. I was not able to reproduce it
cross-compiling with gcc-powerpc-linux-gnu and gcc-11.3, but thanks to
Maíra tips, I can reproduce the issue using make.cross, as follows:

- wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross 
-O ~/bin/make.cross
- chmod +x ~/bin/make.cross
- mkdir build_dir
- COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.3.0 ~/make.cross O=build_dir 
ARCH=powerpc SHELL=/bin/bash

with a config file generate by allmodconfig

So, the first patch fix the issue reported by Guenter. The second is
just a cleanup in dcn31_resource file to remove useless DC_FP_ wrapper.
Finally, the last three patches I'm removing the -mno-gnu-attribute
option, that was just hiding FPU-associated code in clk mgr files of
dcn21/30/301, and moving them to DML folder. This series doesn't cover
recent drivers dcn32/314.

Thanks Guenter, Maíra, Siqueira and Alex for all inputs on this
debugging process. Let me know your thoughts on this approach.

Melissa

[1] https://lore.kernel.org/amd-gfx/20220618232737.2036722-1-li...@roeck-us.net/

Melissa Wen (5):
  drm/amd/display: fix soft-fp vs hard-fp on DCN 3.1 family for powerpc
  drm/amd/display: remove useless FPU protection wrapper from
dcn31_resource file
  drm/amd/display: move FPU code on dcn21 clk_mgr
  drm/amd/display: move FPU code from dcn30 clk mgr to DML folder
  drm/amd/display: move FPU code from dcn301 clk mgr to DML folder

 .../gpu/drm/amd/display/dc/clk_mgr/Makefile   |  18 --
 .../amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c | 234 +
 .../amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h |   7 +
 .../display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c  |  63 +
 .../display/dc/clk_mgr/dcn301/vg_clk_mgr.c|  86 +--
 .../display/dc/clk_mgr/dcn301/vg_clk_mgr.h|   3 +
 .../drm/amd/display/dc/dcn31/dcn31_resource.c |  11 +-
 .../amd/display/dc/dcn315/dcn315_resource.c   |   5 +-
 .../amd/display/dc/dcn316/dcn316_resource.c   |   5 +-
 .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c  | 235 ++
 .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.h  |   2 +
 .../drm/amd/display/dc/dml/dcn30/dcn30_fpu.c  |  63 -
 .../drm/amd/display/dc/dml/dcn30/dcn30_fpu.h  |   1 +
 .../amd/display/dc/dml/dcn301/dcn301_fpu.c|  74 ++
 .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c  |  11 +
 .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.h  |   3 +
 16 files changed, 423 insertions(+), 398 deletions(-)

-- 
2.35.1



[PATCH 4/4] drm/amd/display: Rewrite CalculateWriteBackDISPCLK function

2022-07-20 Thread Maíra Canal
Based on the dml30_CalculateWriteBackDISPCLK, it separates the
DISPCLK calculations on three variables, making no functional changes, in order
to make it more readable and better express that three values are being compared
on dml_max.

Signed-off-by: Maíra Canal 
Reviewed-by: André Almeida 
---
 .../drm/amd/display/dc/dml/display_mode_vba.c | 29 ---
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c 
b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
index c5a0a3649e9a..53a6705b8320 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
@@ -1113,20 +1113,27 @@ double CalculateWriteBackDISPCLK(
unsigned int HTotal,
unsigned int WritebackChromaLineBufferWidth)
 {
-   double CalculateWriteBackDISPCLK = 1.01 * PixelClock * dml_max(
-   dml_ceil(WritebackLumaHTaps / 4.0, 1) / WritebackHRatio,
-   dml_max((WritebackLumaVTaps * dml_ceil(1.0 / WritebackVRatio, 
1) * dml_ceil(WritebackDestinationWidth / 4.0, 1)
+   double DISPCLK_H, DISPCLK_V, DISPCLK_HB, CalculateWriteBackDISPCLK;
+
+   DISPCLK_H = dml_ceil(WritebackLumaHTaps / 4.0, 1) / WritebackHRatio;
+   DISPCLK_V = (WritebackLumaVTaps * dml_ceil(1.0 / WritebackVRatio, 1) * 
dml_ceil(WritebackDestinationWidth / 4.0, 1)
+ dml_ceil(WritebackDestinationWidth / 4.0, 1)) / 
(double) HTotal + dml_ceil(1.0 / WritebackVRatio, 1)
-   * (dml_ceil(WritebackLumaVTaps / 4.0, 1) + 4.0) / 
(double) HTotal,
-   dml_ceil(1.0 / WritebackVRatio, 1) * 
WritebackDestinationWidth / (double) HTotal));
+   * (dml_ceil(WritebackLumaVTaps / 4.0, 1) + 4.0) / 
(double) HTotal;
+   DISPCLK_HB = dml_ceil(1.0 / WritebackVRatio, 1) * 
WritebackDestinationWidth / (double) HTotal;
+
+   CalculateWriteBackDISPCLK = 1.01 * PixelClock * dml_max3(DISPCLK_H, 
DISPCLK_V, DISPCLK_HB);
+
if (WritebackPixelFormat != dm_444_32) {
-   CalculateWriteBackDISPCLK = dml_max(CalculateWriteBackDISPCLK, 
1.01 * PixelClock * dml_max(
-   dml_ceil(WritebackChromaHTaps / 2.0, 1) / (2 * 
WritebackHRatio),
-   dml_max((WritebackChromaVTaps * dml_ceil(1 / (2 * 
WritebackVRatio), 1) * dml_ceil(WritebackDestinationWidth / 2.0 / 2.0, 1)
-   + dml_ceil(WritebackDestinationWidth / 2.0 / 
WritebackChromaLineBufferWidth, 1)) / HTotal
-   + dml_ceil(1 / (2 * WritebackVRatio), 1) * 
(dml_ceil(WritebackChromaVTaps / 4.0, 1) + 4) / HTotal,
-   dml_ceil(1.0 / (2 * WritebackVRatio), 1) * 
WritebackDestinationWidth / 2.0 / HTotal)));
+   DISPCLK_H = dml_ceil(WritebackChromaHTaps / 2.0, 1) / (2 * 
WritebackHRatio);
+   DISPCLK_V = (WritebackChromaVTaps * dml_ceil(1 / (2 * 
WritebackVRatio), 1) *
+   dml_ceil(WritebackDestinationWidth / 4.0, 1) +
+   dml_ceil(WritebackDestinationWidth / 2.0 / 
WritebackChromaLineBufferWidth, 1)) / HTotal +
+   dml_ceil(1 / (2 * WritebackVRatio), 1) 
*(dml_ceil(WritebackChromaVTaps / 4.0, 1) + 4) / HTotal;
+   DISPCLK_HB = dml_ceil(1.0 / (2 * WritebackVRatio), 1) * 
WritebackDestinationWidth / 2.0 / HTotal;
+   CalculateWriteBackDISPCLK = dml_max(CalculateWriteBackDISPCLK,
+   1.01 * PixelClock * dml_max3(DISPCLK_H, 
DISPCLK_V, DISPCLK_HB));
}
+
return CalculateWriteBackDISPCLK;
 }
 
-- 
2.36.1



[PATCH 3/4] drm/amd/display: Remove parameters from rq_dlg_get_dlg_reg

2022-07-20 Thread Maíra Canal
Across all DCN's (except DCN32, that has a separate
rq_dlg_get_dlg_reg), the parameters const bool vm_en, const bool
ignore_viewport_pos, and const bool immediate_flip_support are not used
on the function. Therefore, the rq_dlg_get_dlg_reg signature is changed
by deleting those parameters.

Signed-off-by: Maíra Canal 
---
 .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c  |  3 +--
 .../dc/dml/dcn20/display_rq_dlg_calc_20.c |  5 +
 .../dc/dml/dcn20/display_rq_dlg_calc_20.h |  5 +
 .../dc/dml/dcn20/display_rq_dlg_calc_20v2.c   |  5 +
 .../dc/dml/dcn20/display_rq_dlg_calc_20v2.h   |  5 +
 .../dc/dml/dcn21/display_rq_dlg_calc_21.c |  5 +
 .../dc/dml/dcn21/display_rq_dlg_calc_21.h |  5 +
 .../dc/dml/dcn30/display_rq_dlg_calc_30.c | 18 +++---
 .../dc/dml/dcn30/display_rq_dlg_calc_30.h |  5 +
 .../dc/dml/dcn31/display_rq_dlg_calc_31.c | 19 +++
 .../dc/dml/dcn31/display_rq_dlg_calc_31.h |  5 +
 .../dc/dml/dcn314/display_rq_dlg_calc_314.c   | 15 ++-
 .../dc/dml/dcn314/display_rq_dlg_calc_314.h   |  5 +
 .../drm/amd/display/dc/dml/display_mode_lib.h |  5 +
 .../gpu/drm/amd/display/dc/dml/dml_wrapper.c  |  3 +--
 15 files changed, 20 insertions(+), 88 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
index dc60b835e938..d9cfb29a2651 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
@@ -857,8 +857,7 @@ void dcn20_calculate_dlg_params(
pipe_cnt,
pipe_idx,
cstate_en,
-   
context->bw_ctx.bw.dcn.clk.p_state_change_support,
-   false, false, true);
+   
context->bw_ctx.bw.dcn.clk.p_state_change_support);
 

context->bw_ctx.dml.funcs.rq_dlg_get_rq_reg(&context->bw_ctx.dml,
&context->res_ctx.pipe_ctx[i].rq_regs,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c
index 548cdef8a8ad..d0a4c69b47c8 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c
@@ -1553,10 +1553,7 @@ void dml20_rq_dlg_get_dlg_reg(struct display_mode_lib 
*mode_lib,
const unsigned int num_pipes,
const unsigned int pipe_idx,
const bool cstate_en,
-   const bool pstate_en,
-   const bool vm_en,
-   const bool ignore_viewport_pos,
-   const bool immediate_flip_support)
+   const bool pstate_en)
 {
display_rq_params_st rq_param = {0};
display_dlg_sys_params_st dlg_sys_param = {0};
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.h 
b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.h
index 8b23867e97c1..36c3692e53b8 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.h
@@ -65,9 +65,6 @@ void dml20_rq_dlg_get_dlg_reg(
const unsigned int num_pipes,
const unsigned int pipe_idx,
const bool cstate_en,
-   const bool pstate_en,
-   const bool vm_en,
-   const bool ignore_viewport_pos,
-   const bool immediate_flip_support);
+   const bool pstate_en);
 
 #endif
diff --git 
a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c
index 0fc9f3e3ffae..17df9d31c11f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c
@@ -1554,10 +1554,7 @@ void dml20v2_rq_dlg_get_dlg_reg(struct display_mode_lib 
*mode_lib,
const unsigned int num_pipes,
const unsigned int pipe_idx,
const bool cstate_en,
-   const bool pstate_en,
-   const bool vm_en,
-   const bool ignore_viewport_pos,
-   const bool immediate_flip_support)
+   const bool pstate_en)
 {
display_rq_params_st rq_param = {0};
display_dlg_sys_params_st dlg_sys_param = {0};
diff --git 
a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.h 
b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.h
index 2b4e46ea1c3d..f524f1ccfe41 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.h
@@ -65,9 +65,6 @@ void dml20v2_rq_dlg_get_dlg_reg(
const 

[PATCH 2/4] drm/amd/display: Remove duplicated CalculateWriteBackDISPCLK

2022-07-20 Thread Maíra Canal
The functions dml30_CalculateWriteBackDISPCLK and
dml31_CalculateWriteBackDISPCLK are identical. Therefor, to avoid
code duplication, dml31_CalculateWriteBackDISPCLK is removed and
replaced by dml30_CalculateWriteBackDISPCLK.

Signed-off-by: Maíra Canal 
---
 .../dc/dml/dcn31/display_mode_vba_31.c| 24 ++-
 .../dc/dml/dcn31/display_mode_vba_31.h| 11 -
 2 files changed, 2 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
index 3fab19134480..3bc529f0b0fc 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
@@ -2085,7 +2085,7 @@ static void 
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
if (v->WritebackEnable[k]) {
v->WritebackDISPCLK = dml_max(
v->WritebackDISPCLK,
-   dml31_CalculateWriteBackDISPCLK(
+   dml30_CalculateWriteBackDISPCLK(

v->WritebackPixelFormat[k],
v->PixelClock[k],
v->WritebackHRatio[k],
@@ -3470,26 +3470,6 @@ static double CalculateTWait(unsigned int PrefetchMode, 
double DRAMClockChangeLa
}
 }
 
-double dml31_CalculateWriteBackDISPCLK(
-   enum source_format_class WritebackPixelFormat,
-   double PixelClock,
-   double WritebackHRatio,
-   double WritebackVRatio,
-   unsigned int WritebackHTaps,
-   unsigned int WritebackVTaps,
-   long WritebackSourceWidth,
-   long WritebackDestinationWidth,
-   unsigned int HTotal,
-   unsigned int WritebackLineBufferSize)
-{
-   double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
-
-   DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / 
WritebackHRatio;
-   DISPCLK_V = PixelClock * (WritebackVTaps * 
dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
-   DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * 
WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
-   return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
-}
-
 static double CalculateWriteBackDelay(
enum source_format_class WritebackPixelFormat,
double WritebackHRatio,
@@ -4055,7 +4035,7 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct 
display_mode_lib *mode_l
if (v->WritebackEnable[k] == true) {
v->WritebackRequiredDISPCLK = dml_max(
v->WritebackRequiredDISPCLK,
-   dml31_CalculateWriteBackDISPCLK(
+   dml30_CalculateWriteBackDISPCLK(

v->WritebackPixelFormat[k],
v->PixelClock[k],
v->WritebackHRatio[k],
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.h 
b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.h
index 90be612f26b2..654362adcaa9 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.h
@@ -28,16 +28,5 @@
 
 void dml31_recalculate(struct display_mode_lib *mode_lib);
 void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib 
*mode_lib);
-double dml31_CalculateWriteBackDISPCLK(
-   enum source_format_class WritebackPixelFormat,
-   double PixelClock,
-   double WritebackHRatio,
-   double WritebackVRatio,
-   unsigned int WritebackHTaps,
-   unsigned int WritebackVTaps,
-   long   WritebackSourceWidth,
-   long   WritebackDestinationWidth,
-   unsigned int HTotal,
-   unsigned int WritebackLineBufferSize);
 
 #endif /* __DML31_DISPLAY_MODE_VBA_H__ */
-- 
2.36.1



[PATCH 1/4] drm/amd/display: Drop dm_sw_gfx7_2d_thin_l_vp and dm_sw_gfx7_2d_thin_gl

2022-07-20 Thread Maíra Canal
As the enum dm_sw_gfx7_2d_thin_gl and dm_sw_gfx7_2d_thin_l_vp are not
used on the codebase, this commit drops those entries from enum
dm_swizzle_mode.

Signed-off-by: Maíra Canal 
---
 .../dc/dml/dcn20/display_mode_vba_20.c| 26 +-
 .../dc/dml/dcn20/display_mode_vba_20v2.c  | 26 +-
 .../dc/dml/dcn21/display_mode_vba_21.c| 27 +--
 .../amd/display/dc/dml/display_mode_enums.h   |  2 --
 .../display/dc/dml/dml_wrapper_translation.c  |  9 ---
 5 files changed, 19 insertions(+), 71 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c
index d3b5b6fedf04..4e4cb0927057 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c
@@ -938,7 +938,7 @@ static unsigned int CalculateVMAndRowBytes(
*MetaRowByte = 0;
}
 
-   if (SurfaceTiling == dm_sw_linear || SurfaceTiling == 
dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_l_vp) {
+   if (SurfaceTiling == dm_sw_linear) {
MacroTileSizeBytes = 256;
MacroTileHeight = BlockHeight256Bytes;
} else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == 
dm_sw_4kb_s_x
@@ -3347,26 +3347,12 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct 
display_mode_lib *mode_l

== dm_420_8
|| 
mode_lib->vba.SourcePixelFormat[k]

== dm_420_10))
-   || (((mode_lib->vba.SurfaceTiling[k] == 
dm_sw_gfx7_2d_thin_gl
-   || 
mode_lib->vba.SurfaceTiling[k]
-   == 
dm_sw_gfx7_2d_thin_l_vp)
-   && 
!((mode_lib->vba.SourcePixelFormat[k]
-   == dm_444_64
+   || (mode_lib->vba.DCCEnable[k] == true
+   && 
(mode_lib->vba.SurfaceTiling[k] == dm_sw_linear
|| 
mode_lib->vba.SourcePixelFormat[k]
-   
== dm_444_32)
-   && 
mode_lib->vba.SourceScan[k]
-   
== dm_horz
-   && 
mode_lib->vba.SupportGFX7CompatibleTilingIn32bppAnd64bpp
-   
== true
-   && 
mode_lib->vba.DCCEnable[k]
-   
== false))
-   || (mode_lib->vba.DCCEnable[k] 
== true
-   && 
(mode_lib->vba.SurfaceTiling[k]
-   
== dm_sw_linear
-   
|| mode_lib->vba.SourcePixelFormat[k]
-   
== dm_420_8
-   
|| mode_lib->vba.SourcePixelFormat[k]
-   
== dm_420_10 {
+   
== dm_420_8
+   || 
mode_lib->vba.SourcePixelFormat[k]
+   
== dm_420_10))) {
mode_lib->vba.SourceFormatPixelAndScanSupport = false;
}
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
index 63bbdf8b8678..eaa0cdb599ba 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
@@ -998,7 +998,7 @@ static unsigned int CalculateVMAndRowBytes(
*MetaRowByte = 0;
}
 
-   if (SurfaceTiling == dm_sw_linear || SurfaceTiling == 
dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_l_vp) {
+   if (SurfaceTiling == dm_sw_linear) {
MacroTileSizeBytes = 256;
MacroTileHeight = BlockHeight

Re: [PATCH 09/31] drm/amd/display: Create a file dedicated for CRTC

2022-07-20 Thread André Almeida
Hi Siqueira :)

Às 15:16 de 15/07/22, Rodrigo Siqueira escreveu:
> [Why]
> The amdgpu_dm file contains most of the code that works as an interface
> between DRM API and DC. As a result, this file becomes very large since
> it comprises multiple abstractions such as CRTC manipulation.
> 
> [How]
> This commit extracts the CRTC code to its specific file named
> amdgpu_dm_crtc. This change does not change anything inside the
> functions; the only exception is converting some static functions to a
> global function.
> 
> Reviewed-by: Harry Wentland 
> Acked-by: Alan Liu 
> Signed-off-by: Rodrigo Siqueira 
> ---

[...]

> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
> new file mode 100644
> index ..a9413acfe4dc
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
> @@ -0,0 +1,463 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + * Authors: AMD
> + *
> + */
> +#include 
> +#include 
> +
> +#include "dc.h"
> +#include "amdgpu.h"
> +#include "amdgpu_dm_psr.h"
> +#include "amdgpu_dm_crtc.h"
> +#include "amdgpu_dm_plane.h"
> +#include "amdgpu_dm_trace.h"
> +#include "amdgpu_dm_debugfs.h"
> +

It's a good idea do keep includes ordered like

+#include 
+#include 
+
+#include "amdgpu.h"
+#include "amdgpu_dm_crtc.h"
+#include "amdgpu_dm_debugfs.h"
+#include "amdgpu_dm_plane.h"
+#include "amdgpu_dm_psr.h"
+#include "amdgpu_dm_trace.h"
+#include "dc.h"

because it's easier to check for duplicates and prevents the need for
fixes like this in the future:
https://gitlab.freedesktop.org/agd5f/linux/-/commit/b7be3ae759160aa3355ebeb0583f67fb9bda4dae




Re: [PATCH] drm/amdgpu: Fix the incomplete product number

2022-07-20 Thread André Almeida
Às 05:26 de 20/07/22, Roy Sun escreveu:
> The comments say that the product number is a 16-digit HEX string so the
> buffer needs to be at least 17 characters to hold the NUL terminator.
> 
> Signed-off-by: Roy Sun 
> ---

Reviewed-by: André Almeida 

...but I would appreciate a more detailed commit message with the points
I raised in the review


Re: [PATCH v2 14/29] drm/radeon: Register ACPI video backlight when skipping radeon backlight registration

2022-07-20 Thread Alex Deucher
On Tue, Jul 12, 2022 at 3:40 PM Hans de Goede  wrote:
>
> Typically the acpi_video driver will initialize before radeon, which
> used to cause /sys/class/backlight/acpi_video0 to get registered and then
> radeon would register its own radeon_bl# device later. After which
> the drivers/acpi/video_detect.c code unregistered the acpi_video0 device
> to avoid there being 2 backlight devices.
>
> This means that userspace used to briefly see 2 devices and the
> disappearing of acpi_video0 after a brief time confuses the systemd
> backlight level save/restore code, see e.g.:
> https://bbs.archlinux.org/viewtopic.php?id=269920
>
> To fix this the ACPI video code has been modified to make backlight class
> device registration a separate step, relying on the drm/kms driver to
> ask for the acpi_video backlight registration after it is done setting up
> its native backlight device.
>
> Add a call to the new acpi_video_register_backlight() when radeon skips
> registering its own backlight device because of e.g. the firmware_flags
> or the acpi_video_get_backlight_type() return value. This ensures that
> if the acpi_video backlight device should be used, it will be available
> before the radeon drm_device gets registered with userspace.
>
> Signed-off-by: Hans de Goede 

Acked-by: Alex Deucher 

> ---
>  drivers/gpu/drm/radeon/radeon_encoders.c | 11 ++-
>  1 file changed, 10 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c 
> b/drivers/gpu/drm/radeon/radeon_encoders.c
> index 46549d5179ee..c1cbebb51be1 100644
> --- a/drivers/gpu/drm/radeon/radeon_encoders.c
> +++ b/drivers/gpu/drm/radeon/radeon_encoders.c
> @@ -30,6 +30,8 @@
>  #include 
>  #include 
>
> +#include 
> +
>  #include "radeon.h"
>  #include "radeon_atombios.h"
>  #include "radeon_legacy_encoders.h"
> @@ -167,7 +169,7 @@ static void radeon_encoder_add_backlight(struct 
> radeon_encoder *radeon_encoder,
> return;
>
> if (radeon_backlight == 0) {
> -   return;
> +   use_bl = false;
> } else if (radeon_backlight == 1) {
> use_bl = true;
> } else if (radeon_backlight == -1) {
> @@ -193,6 +195,13 @@ static void radeon_encoder_add_backlight(struct 
> radeon_encoder *radeon_encoder,
> else
> radeon_legacy_backlight_init(radeon_encoder, 
> connector);
> }
> +
> +   /*
> +* If there is no native backlight device (which may happen even when
> +* use_bl==true) try registering an ACPI video backlight device 
> instead.
> +*/
> +   if (!rdev->mode_info.bl_encoder)
> +   acpi_video_register_backlight();
>  }
>
>  void
> --
> 2.36.0
>


Re: [PATCH v2 13/29] drm/amdgpu: Register ACPI video backlight when skipping amdgpu backlight registration

2022-07-20 Thread Alex Deucher
On Tue, Jul 12, 2022 at 3:40 PM Hans de Goede  wrote:
>
> Typically the acpi_video driver will initialize before amdgpu, which
> used to cause /sys/class/backlight/acpi_video0 to get registered and then
> amdgpu would register its own amdgpu_bl# device later. After which
> the drivers/acpi/video_detect.c code unregistered the acpi_video0 device
> to avoid there being 2 backlight devices.
>
> This means that userspace used to briefly see 2 devices and the
> disappearing of acpi_video0 after a brief time confuses the systemd
> backlight level save/restore code, see e.g.:
> https://bbs.archlinux.org/viewtopic.php?id=269920
>
> To fix this the ACPI video code has been modified to make backlight class
> device registration a separate step, relying on the drm/kms driver to
> ask for the acpi_video backlight registration after it is done setting up
> its native backlight device.
>
> Add a call to the new acpi_video_register_backlight() when amdgpu skips
> registering its own backlight device because of either the firmware_flags
> or the acpi_video_get_backlight_type() return value. This ensures that
> if the acpi_video backlight device should be used, it will be available
> before the amdgpu drm_device gets registered with userspace.
>
> Signed-off-by: Hans de Goede 
> ---
>  drivers/gpu/drm/amd/amdgpu/atombios_encoders.c| 9 +++--
>  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 ++
>  2 files changed, 9 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c 
> b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
> index abf209e36fca..45cd9268b426 100644
> --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
> +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
> @@ -184,11 +184,11 @@ void amdgpu_atombios_encoder_init_backlight(struct 
> amdgpu_encoder *amdgpu_encode
> return;
>
> if (!(adev->mode_info.firmware_flags & 
> ATOM_BIOS_INFO_BL_CONTROLLED_BY_GPU))
> -   return;
> +   goto register_acpi_backlight;
>
> if (!acpi_video_backlight_use_native()) {
> DRM_INFO("Skipping amdgpu atom DIG backlight registration\n");
> -   return;
> +   goto register_acpi_backlight;
> }
>
> pdata = kmalloc(sizeof(struct amdgpu_backlight_privdata), GFP_KERNEL);
> @@ -225,6 +225,11 @@ void amdgpu_atombios_encoder_init_backlight(struct 
> amdgpu_encoder *amdgpu_encode
>  error:
> kfree(pdata);
> return;
> +
> +register_acpi_backlight:
> +   /* Try registering an ACPI video backlight device instead. */
> +   acpi_video_register_backlight();
> +   return;

Can drop the return here.  Either way,
Acked-by: Alex Deucher 

>  }
>
>  void
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index 3b03a95e59a8..a667e66a9842 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -4054,6 +4054,8 @@ amdgpu_dm_register_backlight_device(struct 
> amdgpu_display_manager *dm)
>
> if (!acpi_video_backlight_use_native()) {
> DRM_INFO("Skipping amdgpu DM backlight registration\n");
> +   /* Try registering an ACPI video backlight device instead. */
> +   acpi_video_register_backlight();
> return;
> }
>
> --
> 2.36.0
>


Re: [PATCH v2 09/29] ACPI: video: Make backlight class device registration a separate step

2022-07-20 Thread Alex Deucher
On Tue, Jul 12, 2022 at 3:40 PM Hans de Goede  wrote:
>
> On x86/ACPI boards the acpi_video driver will usually initializing before

initializing -> initialize

> the kms driver (except i915). This causes /sys/class/backlight/acpi_video0
> to show up and then the kms driver registers its own native backlight
> device after which the drivers/acpi/video_detect.c code unregisters
> the acpi_video0 device (when acpi_video_get_backlight_type()==native).
>
> This means that userspace briefly sees 2 devices and the disappearing of
> acpi_video0 after a brief time confuses the systemd backlight level
> save/restore code, see e.g.:
> https://bbs.archlinux.org/viewtopic.php?id=269920
>
> To fix this make backlight class device registration a separate step
> done by a new acpi_video_register_backlight() function. The intend is for
> this to be called by the drm/kms driver *after* it is done setting up its
> own native backlight device. So that acpi_video_get_backlight_type() knows
> if a native backlight will be available or not at acpi_video backlight
> registration time, avoiding the add + remove dance.
>
> Note the new acpi_video_register_backlight() function is also called from
> a delayed work to ensure that the acpi_video backlight devices does get
> registered if necessary even if there is no drm/kms driver or when it is
> disabled.
>
> Signed-off-by: Hans de Goede 
> ---
>  drivers/acpi/acpi_video.c | 45 ---
>  include/acpi/video.h  |  2 ++
>  2 files changed, 44 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c
> index 6944794797a5..c4c3a9e7ce69 100644
> --- a/drivers/acpi/acpi_video.c
> +++ b/drivers/acpi/acpi_video.c
> @@ -31,6 +31,12 @@
>  #define ACPI_VIDEO_BUS_NAME"Video Bus"
>  #define ACPI_VIDEO_DEVICE_NAME "Video Device"
>
> +/*
> + * Display probing is known to take up to 5 seconds, so delay the fallback
> + * backlight registration by 5 seconds + 3 seconds for some extra margin.
> + */
> +#define ACPI_VIDEO_REGISTER_BACKLIGHT_DELAY(8 * HZ)
> +
>  #define MAX_NAME_LEN   20
>
>  MODULE_AUTHOR("Bruno Ducrot");
> @@ -81,6 +87,9 @@ static LIST_HEAD(video_bus_head);
>  static int acpi_video_bus_add(struct acpi_device *device);
>  static int acpi_video_bus_remove(struct acpi_device *device);
>  static void acpi_video_bus_notify(struct acpi_device *device, u32 event);
> +static void acpi_video_bus_register_backlight_work(struct work_struct 
> *ignored);
> +static DECLARE_DELAYED_WORK(video_bus_register_backlight_work,
> +   acpi_video_bus_register_backlight_work);
>  void acpi_video_detect_exit(void);
>
>  /*
> @@ -1865,8 +1874,6 @@ static int acpi_video_bus_register_backlight(struct 
> acpi_video_bus *video)
> if (video->backlight_registered)
> return 0;
>
> -   acpi_video_run_bcl_for_osi(video);
> -
> if (acpi_video_get_backlight_type() != acpi_backlight_video)
> return 0;
>
> @@ -2092,7 +2099,11 @@ static int acpi_video_bus_add(struct acpi_device 
> *device)
> list_add_tail(&video->entry, &video_bus_head);
> mutex_unlock(&video_list_lock);
>
> -   acpi_video_bus_register_backlight(video);
> +   /*
> +* The userspace visible backlight_device gets registered separately
> +* from acpi_video_register_backlight().
> +*/
> +   acpi_video_run_bcl_for_osi(video);
> acpi_video_bus_add_notify_handler(video);
>
> return 0;
> @@ -2131,6 +2142,11 @@ static int acpi_video_bus_remove(struct acpi_device 
> *device)
> return 0;
>  }
>
> +static void acpi_video_bus_register_backlight_work(struct work_struct 
> *ignored)
> +{
> +   acpi_video_register_backlight();
> +}
> +
>  static int __init is_i740(struct pci_dev *dev)
>  {
> if (dev->device == 0x00D1)
> @@ -2241,6 +2257,17 @@ int acpi_video_register(void)
>  */
> register_count = 1;
>
> +   /*
> +* acpi_video_bus_add() skips registering the userspace visible
> +* backlight_device. The intend is for this to be registered by the
> +* drm/kms driver calling acpi_video_register_backlight() *after* it 
> is
> +* done setting up its own native backlight device. The delayed work
> +* ensures that acpi_video_register_backlight() always gets called
> +* eventually, in case there is no drm/kms driver or it is disabled.
> +*/
> +   schedule_delayed_work(&video_bus_register_backlight_work,
> + ACPI_VIDEO_REGISTER_BACKLIGHT_DELAY);
> +
>  leave:
> mutex_unlock(®ister_count_mutex);
> return ret;
> @@ -2251,6 +2278,7 @@ void acpi_video_unregister(void)
>  {
> mutex_lock(®ister_count_mutex);
> if (register_count) {
> +   cancel_delayed_work_sync(&video_bus_register_backlight_work);
> acpi_bus_unregister_driver(&acpi_video_bus);
>

Re: [PATCH v2 03/29] drm/amdgpu: Don't register backlight when another backlight should be used

2022-07-20 Thread Alex Deucher
On Wed, Jul 20, 2022 at 12:44 PM Alex Deucher  wrote:
>
> On Tue, Jul 12, 2022 at 3:39 PM Hans de Goede  wrote:
> >
> > Before this commit when we want userspace to use the acpi_video backlight
> > device we register both the GPU's native backlight device and acpi_video's
> > firmware acpi_video# backlight device. This relies on userspace preferring
> > firmware type backlight devices over native ones.
> >
> > Registering 2 backlight devices for a single display really is
> > undesirable, don't register the GPU's native backlight device when
> > another backlight device should be used.
> >
> > Changes in v2:
> > - To avoid linker errors when amdgpu is builtin and video_detect.c is in
> >   a module, select ACPI_VIDEO and its deps if ACPI && X86 are enabled.
> >   When these are not set, ACPI_VIDEO is disabled, ensuring the stubs
> >   from acpi/video.h will be used.
> >
> > Signed-off-by: Hans de Goede 
>
> Acked-by: Alex Deucher 

Actually, can you use dev_info for the messages below rather than
DRM_INFO?  That makes it easier to tell which GPU is affected in a
multi-GPU system.  With that changed,
Acked-by: Alex Deucher 

>
> > ---
> >  drivers/gpu/drm/Kconfig   | 6 ++
> >  drivers/gpu/drm/amd/amdgpu/atombios_encoders.c| 7 +++
> >  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 7 +++
> >  3 files changed, 20 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
> > index aaa7ad1f0614..d65119860760 100644
> > --- a/drivers/gpu/drm/Kconfig
> > +++ b/drivers/gpu/drm/Kconfig
> > @@ -258,6 +258,12 @@ config DRM_AMDGPU
> > select HWMON
> > select BACKLIGHT_CLASS_DEVICE
> > select INTERVAL_TREE
> > +   # amdgpu depends on ACPI_VIDEO when X86 and ACPI are both enabled
> > +   # for select to work, ACPI_VIDEO's dependencies must also be 
> > selected
> > +   select INPUT if ACPI && X86
> > +   select X86_PLATFORM_DEVICES if ACPI && X86
> > +   select ACPI_WMI if ACPI && X86
> > +   select ACPI_VIDEO if ACPI && X86
> > help
> >   Choose this option if you have a recent AMD Radeon graphics card.
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c 
> > b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
> > index fa7421afb9a6..abf209e36fca 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
> > @@ -26,6 +26,8 @@
> >
> >  #include 
> >
> > +#include 
> > +
> >  #include 
> >  #include 
> >  #include "amdgpu.h"
> > @@ -184,6 +186,11 @@ void amdgpu_atombios_encoder_init_backlight(struct 
> > amdgpu_encoder *amdgpu_encode
> > if (!(adev->mode_info.firmware_flags & 
> > ATOM_BIOS_INFO_BL_CONTROLLED_BY_GPU))
> > return;
> >
> > +   if (!acpi_video_backlight_use_native()) {
> > +   DRM_INFO("Skipping amdgpu atom DIG backlight 
> > registration\n");
> > +   return;
> > +   }
> > +
> > pdata = kmalloc(sizeof(struct amdgpu_backlight_privdata), 
> > GFP_KERNEL);
> > if (!pdata) {
> > DRM_ERROR("Memory allocation failed\n");
> > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > index 5eb111d35793..3b03a95e59a8 100644
> > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > @@ -86,6 +86,8 @@
> >  #include 
> >  #include 
> >
> > +#include 
> > +
> >  #include "ivsrcid/dcn/irqsrcs_dcn_1_0.h"
> >
> >  #include "dcn/dcn_1_0_offset.h"
> > @@ -4050,6 +4052,11 @@ amdgpu_dm_register_backlight_device(struct 
> > amdgpu_display_manager *dm)
> > amdgpu_dm_update_backlight_caps(dm, dm->num_of_edps);
> > dm->brightness[dm->num_of_edps] = AMDGPU_MAX_BL_LEVEL;
> >
> > +   if (!acpi_video_backlight_use_native()) {
> > +   DRM_INFO("Skipping amdgpu DM backlight registration\n");
> > +   return;
> > +   }
> > +
> > props.max_brightness = AMDGPU_MAX_BL_LEVEL;
> > props.brightness = AMDGPU_MAX_BL_LEVEL;
> > props.type = BACKLIGHT_RAW;
> > --
> > 2.36.0
> >


Re: [PATCH v2 04/29] drm/radeon: Don't register backlight when another backlight should be used

2022-07-20 Thread Alex Deucher
On Tue, Jul 12, 2022 at 3:39 PM Hans de Goede  wrote:
>
> Before this commit when we want userspace to use the acpi_video backlight
> device we register both the GPU's native backlight device and acpi_video's
> firmware acpi_video# backlight device. This relies on userspace preferring
> firmware type backlight devices over native ones.
>
> Registering 2 backlight devices for a single display really is
> undesirable, don't register the GPU's native backlight device when
> another backlight device should be used.
>
> Changes in v2:
> - To avoid linker errors when radeon is builtin and video_detect.c is in
>   a module, select ACPI_VIDEO and its deps if ACPI && X86 are enabled.
>   When these are not set, ACPI_VIDEO is disabled, ensuring the stubs
>   from acpi/video.h will be used.
>
> Signed-off-by: Hans de Goede 

Acked-by: Alex Deucher 

> ---
>  drivers/gpu/drm/Kconfig | 6 ++
>  drivers/gpu/drm/radeon/atombios_encoders.c  | 7 +++
>  drivers/gpu/drm/radeon/radeon_legacy_encoders.c | 7 +++
>  3 files changed, 20 insertions(+)
>
> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
> index d65119860760..a07b76e06f84 100644
> --- a/drivers/gpu/drm/Kconfig
> +++ b/drivers/gpu/drm/Kconfig
> @@ -234,6 +234,12 @@ config DRM_RADEON
> select HWMON
> select BACKLIGHT_CLASS_DEVICE
> select INTERVAL_TREE
> +   # radeon depends on ACPI_VIDEO when X86 and ACPI are both enabled
> +   # for select to work, ACPI_VIDEO's dependencies must also be selected
> +   select INPUT if ACPI && X86
> +   select X86_PLATFORM_DEVICES if ACPI && X86
> +   select ACPI_WMI if ACPI && X86
> +   select ACPI_VIDEO if ACPI && X86
> help
>   Choose this option if you have an ATI Radeon graphics card.  There
>   are both PCI and AGP versions.  You don't need to choose this to
> diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c 
> b/drivers/gpu/drm/radeon/atombios_encoders.c
> index c93040e60d04..958920230d6f 100644
> --- a/drivers/gpu/drm/radeon/atombios_encoders.c
> +++ b/drivers/gpu/drm/radeon/atombios_encoders.c
> @@ -32,6 +32,8 @@
>  #include 
>  #include 
>
> +#include 
> +
>  #include "atom.h"
>  #include "radeon_atombios.h"
>  #include "radeon.h"
> @@ -209,6 +211,11 @@ void radeon_atom_backlight_init(struct radeon_encoder 
> *radeon_encoder,
> if (!(rdev->mode_info.firmware_flags & 
> ATOM_BIOS_INFO_BL_CONTROLLED_BY_GPU))
> return;
>
> +   if (!acpi_video_backlight_use_native()) {
> +   DRM_INFO("Skipping radeon atom DIG backlight registration\n");
> +   return;
> +   }
> +
> pdata = kmalloc(sizeof(struct radeon_backlight_privdata), GFP_KERNEL);
> if (!pdata) {
> DRM_ERROR("Memory allocation failed\n");
> diff --git a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c 
> b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
> index 1a66fb969ee7..d24cedf20c47 100644
> --- a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
> +++ b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
> @@ -33,6 +33,8 @@
>  #include 
>  #include 
>
> +#include 
> +
>  #include "radeon.h"
>  #include "radeon_asic.h"
>  #include "radeon_legacy_encoders.h"
> @@ -387,6 +389,11 @@ void radeon_legacy_backlight_init(struct radeon_encoder 
> *radeon_encoder,
> return;
>  #endif
>
> +   if (!acpi_video_backlight_use_native()) {
> +   DRM_INFO("Skipping radeon legacy LVDS backlight 
> registration\n");
> +   return;
> +   }
> +
> pdata = kmalloc(sizeof(struct radeon_backlight_privdata), GFP_KERNEL);
> if (!pdata) {
> DRM_ERROR("Memory allocation failed\n");
> --
> 2.36.0
>


Re: [PATCH v2 03/29] drm/amdgpu: Don't register backlight when another backlight should be used

2022-07-20 Thread Alex Deucher
On Tue, Jul 12, 2022 at 3:39 PM Hans de Goede  wrote:
>
> Before this commit when we want userspace to use the acpi_video backlight
> device we register both the GPU's native backlight device and acpi_video's
> firmware acpi_video# backlight device. This relies on userspace preferring
> firmware type backlight devices over native ones.
>
> Registering 2 backlight devices for a single display really is
> undesirable, don't register the GPU's native backlight device when
> another backlight device should be used.
>
> Changes in v2:
> - To avoid linker errors when amdgpu is builtin and video_detect.c is in
>   a module, select ACPI_VIDEO and its deps if ACPI && X86 are enabled.
>   When these are not set, ACPI_VIDEO is disabled, ensuring the stubs
>   from acpi/video.h will be used.
>
> Signed-off-by: Hans de Goede 

Acked-by: Alex Deucher 

> ---
>  drivers/gpu/drm/Kconfig   | 6 ++
>  drivers/gpu/drm/amd/amdgpu/atombios_encoders.c| 7 +++
>  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 7 +++
>  3 files changed, 20 insertions(+)
>
> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
> index aaa7ad1f0614..d65119860760 100644
> --- a/drivers/gpu/drm/Kconfig
> +++ b/drivers/gpu/drm/Kconfig
> @@ -258,6 +258,12 @@ config DRM_AMDGPU
> select HWMON
> select BACKLIGHT_CLASS_DEVICE
> select INTERVAL_TREE
> +   # amdgpu depends on ACPI_VIDEO when X86 and ACPI are both enabled
> +   # for select to work, ACPI_VIDEO's dependencies must also be selected
> +   select INPUT if ACPI && X86
> +   select X86_PLATFORM_DEVICES if ACPI && X86
> +   select ACPI_WMI if ACPI && X86
> +   select ACPI_VIDEO if ACPI && X86
> help
>   Choose this option if you have a recent AMD Radeon graphics card.
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c 
> b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
> index fa7421afb9a6..abf209e36fca 100644
> --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
> +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
> @@ -26,6 +26,8 @@
>
>  #include 
>
> +#include 
> +
>  #include 
>  #include 
>  #include "amdgpu.h"
> @@ -184,6 +186,11 @@ void amdgpu_atombios_encoder_init_backlight(struct 
> amdgpu_encoder *amdgpu_encode
> if (!(adev->mode_info.firmware_flags & 
> ATOM_BIOS_INFO_BL_CONTROLLED_BY_GPU))
> return;
>
> +   if (!acpi_video_backlight_use_native()) {
> +   DRM_INFO("Skipping amdgpu atom DIG backlight registration\n");
> +   return;
> +   }
> +
> pdata = kmalloc(sizeof(struct amdgpu_backlight_privdata), GFP_KERNEL);
> if (!pdata) {
> DRM_ERROR("Memory allocation failed\n");
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index 5eb111d35793..3b03a95e59a8 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -86,6 +86,8 @@
>  #include 
>  #include 
>
> +#include 
> +
>  #include "ivsrcid/dcn/irqsrcs_dcn_1_0.h"
>
>  #include "dcn/dcn_1_0_offset.h"
> @@ -4050,6 +4052,11 @@ amdgpu_dm_register_backlight_device(struct 
> amdgpu_display_manager *dm)
> amdgpu_dm_update_backlight_caps(dm, dm->num_of_edps);
> dm->brightness[dm->num_of_edps] = AMDGPU_MAX_BL_LEVEL;
>
> +   if (!acpi_video_backlight_use_native()) {
> +   DRM_INFO("Skipping amdgpu DM backlight registration\n");
> +   return;
> +   }
> +
> props.max_brightness = AMDGPU_MAX_BL_LEVEL;
> props.brightness = AMDGPU_MAX_BL_LEVEL;
> props.type = BACKLIGHT_RAW;
> --
> 2.36.0
>


[PATCH] drm/amd/display: reduce stack size in dcn32 dml (v2)

2022-07-20 Thread Alex Deucher
Move additional dummy structures off the stack and into
the dummy vars structure.

Fixes the following:
drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c: In 
function 
'DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation':
drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:1659:1:
 error: the frame size of 2144 bytes is larger than 2048 bytes 
[-Werror=frame-larger-than=]
 1659 | }
  | ^
drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c: In 
function 'dml32_ModeSupportAndSystemConfigurationFull':
drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:3799:1:
 error: the frame size of 2464 bytes is larger than 2048 bytes 
[-Werror=frame-larger-than=]
 3799 | } // ModeSupportAndSystemConfigurationFull
  | ^

v2: more more stuff to dummy structure, fix init order (Alex)

Signed-off-by: Alex Deucher 
Cc: Stephen Rothwell 
---
 .../dc/dml/dcn32/display_mode_vba_32.c| 406 --
 .../drm/amd/display/dc/dml/display_mode_vba.h |  35 ++
 2 files changed, 214 insertions(+), 227 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
index 349e36ae9333..91450a973920 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
@@ -67,6 +67,12 @@ static void 
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
int iteration;
double MaxTotalRDBandwidth;
unsigned int NextPrefetchMode;
+   double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
+   bool DestinationLineTimesForPrefetchLessThan2 = false;
+   bool VRatioPrefetchMoreThanMax = false;
+   double TWait;
+   double TotalWRBandwidth = 0;
+   double WRBandwidth = 0;
 
 #ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: --- START ---\n", __func__);
@@ -702,11 +708,6 @@ static void 
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
NextPrefetchMode = 
mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb];
 
do {
-   double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
-   bool DestinationLineTimesForPrefetchLessThan2 = false;
-   bool VRatioPrefetchMoreThanMax = false;
-   double dummy_unit_vector[DC__NUM_DPP__MAX];
-
MaxTotalRDBandwidth = 0;
 #ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, 
mode_lib->vba.VStartupLines);
@@ -715,41 +716,39 @@ static void 
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
/* NOTE PerfetchMode variable is invalid in DAL as per 
the input received.
 * Hence the direction is to use PrefetchModePerState.
 */
-   double TWait = dml32_CalculateTWait(
-   
mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb],
-   
mode_lib->vba.UsesMALLForPStateChange[k],
-   
mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
-   mode_lib->vba.DRRDisplay[k],
-   mode_lib->vba.DRAMClockChangeLatency,
-   mode_lib->vba.FCLKChangeLatency, 
v->UrgentLatency,
-   mode_lib->vba.SREnterPlusExitTime);
-
-   DmlPipe myPipe;
-
-   myPipe.Dppclk = mode_lib->vba.DPPCLK[k];
-   myPipe.Dispclk = mode_lib->vba.DISPCLK;
-   myPipe.PixelClock = mode_lib->vba.PixelClock[k];
-   myPipe.DCFClkDeepSleep = v->DCFCLKDeepSleep;
-   myPipe.DPPPerSurface = mode_lib->vba.DPPPerPlane[k];
-   myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k];
-   myPipe.SourceRotation = mode_lib->vba.SourceRotation[k];
-   myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
-   myPipe.BlockHeight256BytesY = 
v->BlockHeight256BytesY[k];
-   myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
-   myPipe.BlockHeight256BytesC = 
v->BlockHeight256BytesC[k];
-   myPipe.InterlaceEnable = mode_lib->vba.Interlace[k];
-   myPipe.NumberOfCursors = 
mode_lib->vba.NumberOfCursors[k];
-   myPipe.VBlank = mode_lib->vba.VTotal[k] - 
mode_lib->vba.VActive[k];
-   myPipe.HTotal = mode_lib->vba.HTotal[k];
-   myPipe.HActive = mode_lib->vba.HActive[k];
-   myPipe.DCCEnable = mode_lib->vba.DCCEnable

[PATCH v4 40/41] nouveau-dbg: fixup lost prdbgs

2022-07-20 Thread Jim Cromie
Undo the 1-line change that reduced count of prdbgs from 632 to 119.

ie: s/NV_SUBDEV_DBG_##l/NV_DBG_##l/

So heres what happened: new symbol is 15 (or 10), and fails this macro
test, so gets compiled out, and the dev_dbg is excluded.

if (CONFIG_NOUVEAU_DEBUG >= (l) && _subdev->debug >= (l))   \
dev_dbg(_subdev->device->dev, "%s: "f, _subdev->name, ##a); \

I could hack this, by hardcoding in (l + #base), but base is pretty
distant to just toss into the macro.  At least, the base-ref should be
a macro(&classmap) properly exposing it.

OTOH, the whole CONFIG_NOUVEAU_DEBUG check could be reworked; given
that trace is minumum recommended, theres not that many callsites
elided (SPAM only iirc) at compile-time, and dyndbg means keeping them
has "zero" run=cost (and 56 bytes per).  So this config item doesnt do
much when DRM_USE_DYNAMIC_DEBUG=y.

So this is a useful place to stop and take another look around, try to
guess which trail to take..

Signed-off-by: Jim Cromie 
---
 drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h 
b/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
index bf9c69f4fc3e..d5f6ca05d5fa 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
@@ -66,7 +66,7 @@ void nvkm_subdev_intr(struct nvkm_subdev *);
if (CONFIG_NOUVEAU_DEBUG >= (l) && _subdev->debug >= (l))   \
dev_dbg(_subdev->device->dev, "%s: "f, _subdev->name, ##a); \
 } while(0)
-#define nvkm_drmdbg_(s,l,f,a...) nvkm_drmdbg__((s), NV_SUBDEV_DBG_##l, dbg, f, 
##a)
+#define nvkm_drmdbg_(s,l,f,a...) nvkm_drmdbg__((s), NV_DBG_##l, dbg, f, ##a)
 #define nvkm_debug(s,f,a...) nvkm_drmdbg_((s), DEBUG, f, ##a)
 #define nvkm_trace(s,f,a...) nvkm_drmdbg_((s), TRACE, f, ##a)
 #define nvkm_spam(s,f,a...)  nvkm_drmdbg_((s),  SPAM, f, ##a)
-- 
2.36.1



[PATCH v4 31/41] dyndbg: add _DPRINTK_FLAGS_ENABLED

2022-07-20 Thread Jim Cromie
Distinguish the condition: _DPRINTK_FLAGS_ENABLED from the bit:
_DPRINTK_FLAGS_PRINT, and re-define former in terms of latter, in
preparation to add a 2nd bit: _DPRINTK_FLAGS_TRACE

Update JUMP_LABEL code block to check _DPRINTK_FLAGS_ENABLED symbol.
Also add a 'K' to get new symbol _DPRINTK_FLAGS_PRINTK, in order to
break any stale uses.

CC: vincent.whitchu...@axis.com
Signed-off-by: Jim Cromie 
---
 include/linux/dynamic_debug.h | 10 ++
 lib/dynamic_debug.c   |  8 
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index b50bdd5c8184..38cfdd5c0bdc 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -32,7 +32,7 @@ struct _ddebug {
 * writes commands to /dynamic_debug/control
 */
 #define _DPRINTK_FLAGS_NONE0
-#define _DPRINTK_FLAGS_PRINT   (1<<0) /* printk() a message using the format */
+#define _DPRINTK_FLAGS_PRINTK  (1 << 0) /* printk() a message using the format 
*/
 #define _DPRINTK_FLAGS_INCL_MODNAME(1<<1)
 #define _DPRINTK_FLAGS_INCL_FUNCNAME   (1<<2)
 #define _DPRINTK_FLAGS_INCL_LINENO (1<<3)
@@ -42,8 +42,10 @@ struct _ddebug {
(_DPRINTK_FLAGS_INCL_MODNAME | _DPRINTK_FLAGS_INCL_FUNCNAME |\
 _DPRINTK_FLAGS_INCL_LINENO  | _DPRINTK_FLAGS_INCL_TID)
 
+#define _DPRINTK_FLAGS_ENABLED _DPRINTK_FLAGS_PRINTK
+
 #if defined DEBUG
-#define _DPRINTK_FLAGS_DEFAULT _DPRINTK_FLAGS_PRINT
+#define _DPRINTK_FLAGS_DEFAULT _DPRINTK_FLAGS_PRINTK
 #else
 #define _DPRINTK_FLAGS_DEFAULT 0
 #endif
@@ -191,10 +193,10 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor,
 
 #ifdef DEBUG
 #define DYNAMIC_DEBUG_BRANCH(descriptor) \
-   likely(descriptor.flags & _DPRINTK_FLAGS_PRINT)
+   likely(descriptor.flags & _DPRINTK_FLAGS_ENABLED)
 #else
 #define DYNAMIC_DEBUG_BRANCH(descriptor) \
-   unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT)
+   unlikely(descriptor.flags & _DPRINTK_FLAGS_ENABLED)
 #endif
 
 #endif /* CONFIG_JUMP_LABEL */
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index dd27dc514aa3..2a46c642373a 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -89,7 +89,7 @@ static inline const char *trim_prefix(const char *path)
 }
 
 static struct { unsigned flag:8; char opt_char; } opt_array[] = {
-   { _DPRINTK_FLAGS_PRINT, 'p' },
+   { _DPRINTK_FLAGS_PRINTK, 'p' },
{ _DPRINTK_FLAGS_INCL_MODNAME, 'm' },
{ _DPRINTK_FLAGS_INCL_FUNCNAME, 'f' },
{ _DPRINTK_FLAGS_INCL_LINENO, 'l' },
@@ -246,10 +246,10 @@ static int ddebug_change(const struct ddebug_query *query,
if (newflags == dp->flags)
continue;
 #ifdef CONFIG_JUMP_LABEL
-   if (dp->flags & _DPRINTK_FLAGS_PRINT) {
-   if (!(newflags & _DPRINTK_FLAGS_PRINT))
+   if (dp->flags & _DPRINTK_FLAGS_ENABLED) {
+   if (!(newflags & _DPRINTK_FLAGS_ENABLED))

static_branch_disable(&dp->key.dd_key_true);
-   } else if (newflags & _DPRINTK_FLAGS_PRINT) {
+   } else if (newflags & _DPRINTK_FLAGS_ENABLED) {
static_branch_enable(&dp->key.dd_key_true);
}
 #endif
-- 
2.36.1



[PATCH v4 30/41] tracing/events: Add __vstring() and __assign_vstr() helper macros

2022-07-20 Thread Jim Cromie
From: "Steven Rostedt (Google)" 

Steve's patch, carried til upstream.

There's several places that open code the following logic:

  TP_STRUCT__entry(__dynamic_array(char, msg, MSG_MAX)),
  TP_fast_assign(vsnprintf(__get_str(msg), MSG_MAX, vaf->fmt, *vaf->va);)

To load a string created by variable array va_list.

The main issue with this approach is that "MSG_MAX" usage in the
__dynamic_array() portion. That actually just reserves the MSG_MAX in the
event, and even wastes space because there's dynamic meta data also saved
in the event to denote the offset and size of the dynamic array. It would
have been better to just use a static __array() field.

Instead, create __vstring() and __assign_vstr() that work like __string
and __assign_str() but instead of taking a destination string to copy,
take a format string and a va_list pointer and fill in the values.

It uses the helper:

 #define __trace_event_vstr_len(fmt, va)\
 ({ \
va_list __ap;   \
int __ret;  \
\
va_copy(__ap, *(va));   \
__ret = vsnprintf(NULL, 0, fmt, __ap);  \
va_end(__ap);   \
\
min(__ret, TRACE_EVENT_STR_MAX);\
 })

To figure out the length to store the string. It may be slightly slower as
it needs to run the vsnprintf() twice, but it now saves space on the ring
buffer.

Link: https://lkml.kernel.org/r/20220705224749.053570...@goodmis.org

Cc: Dennis Dalessandro 
Cc: Ingo Molnar 
Cc: Andrew Morton 
Cc: Jason Gunthorpe 
Cc: Leon Romanovsky 
Cc: Kalle Valo 
Cc: "David S. Miller" 
Cc: Eric Dumazet 
Cc: Jakub Kicinski 
Cc: Paolo Abeni 
Cc: Arend van Spriel 
Cc: Franky Lin 
Cc: Hante Meuleman 
Cc: Gregory Greenman 
Cc: Peter Chen 
Cc: Greg Kroah-Hartman 
Cc: Mathias Nyman 
Cc: Chunfeng Yun 
Cc: Bin Liu 
Cc: Marek Lindner 
Cc: Simon Wunderlich 
Cc: Antonio Quartulli 
Cc: Sven Eckelmann 
Cc: Johannes Berg 
Cc: Jim Cromie 
Signed-off-by: Steven Rostedt (Google) 
---
 include/linux/trace_events.h | 18 ++
 include/trace/stages/stage1_struct_define.h  |  3 +++
 include/trace/stages/stage2_data_offsets.h   |  3 +++
 include/trace/stages/stage4_event_fields.h   |  3 +++
 include/trace/stages/stage5_get_offsets.h|  4 
 include/trace/stages/stage6_event_callback.h |  7 +++
 6 files changed, 38 insertions(+)

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index e6e95a9f07a5..b18759a673c6 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -916,6 +916,24 @@ perf_trace_buf_submit(void *raw_data, int size, int rctx, 
u16 type,
 
 #endif
 
+#define TRACE_EVENT_STR_MAX512
+
+/*
+ * gcc warns that you can not use a va_list in an inlined
+ * function. But lets me make it into a macro :-/
+ */
+#define __trace_event_vstr_len(fmt, va)\
+({ \
+   va_list __ap;   \
+   int __ret;  \
+   \
+   va_copy(__ap, *(va));   \
+   __ret = vsnprintf(NULL, 0, fmt, __ap) + 1;  \
+   va_end(__ap);   \
+   \
+   min(__ret, TRACE_EVENT_STR_MAX);\
+})
+
 #endif /* _LINUX_TRACE_EVENT_H */
 
 /*
diff --git a/include/trace/stages/stage1_struct_define.h 
b/include/trace/stages/stage1_struct_define.h
index a16783419687..1b7bab60434c 100644
--- a/include/trace/stages/stage1_struct_define.h
+++ b/include/trace/stages/stage1_struct_define.h
@@ -26,6 +26,9 @@
 #undef __string_len
 #define __string_len(item, src, len) __dynamic_array(char, item, -1)
 
+#undef __vstring
+#define __vstring(item, fmt, ap) __dynamic_array(char, item, -1)
+
 #undef __bitmask
 #define __bitmask(item, nr_bits) __dynamic_array(char, item, -1)
 
diff --git a/include/trace/stages/stage2_data_offsets.h 
b/include/trace/stages/stage2_data_offsets.h
index 42fd1e8813ec..1b7a8f764fdd 100644
--- a/include/trace/stages/stage2_data_offsets.h
+++ b/include/trace/stages/stage2_data_offsets.h
@@ -32,6 +32,9 @@
 #undef __string_len
 #define __string_len(item, src, len) __dynamic_array(char, item, -1)
 
+#undef __vstring
+#define __vstring(item, fmt, ap) __dynamic_array(char, item, -1)
+
 #undef __bitmask
 #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
 
diff --git a/include/trace/stages/stage4_event_fields.h 
b/include/trace/stages/stage4_event_fields.h
index e80cdc397a43..c3790ec7a453 100644
--- a/include/trace/stages/stage4_event_fields.h
+++ b/include/trace/stages/stage4_event_fields.h
@@ -38,6 +38,9 @@
 #undef __string_len
 #define __string_len(item, src, len) __dyna

[PATCH v4 36/41] dyndbg/drm: POC add tracebits sysfs-knob

2022-07-20 Thread Jim Cromie
clone DRM.debug interface to DRM.tracebits: ie declare __drm_trace,
map its bits to drm-debug-categories, except this interface enables
messages to tracefs, not to syslog.

1- we reuse the drm_debug_classes class-map added previously.
   this reflects the single source of both syslog/trace events
   and is why structs classmap and bitmap-param are separate.

2- add a 2nd struct ddebug_classes_bitmap_param
   refs 1, reusing it.
   flags = "T", to enable trace-events on this callsite.

3- module_param_cb([2]) - does the sysfs part

Signed-off-by: Jim Cromie 
---
 drivers/gpu/drm/drm_print.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c
index 9fb0b8e40dca..47a41d96beea 100644
--- a/drivers/gpu/drm/drm_print.c
+++ b/drivers/gpu/drm/drm_print.c
@@ -45,6 +45,9 @@
 unsigned long __drm_debug;
 EXPORT_SYMBOL(__drm_debug);
 
+unsigned long __drm_trace;
+EXPORT_SYMBOL(__drm_trace);
+
 MODULE_PARM_DESC(debug, "Enable debug output, where each bit enables a debug 
category.\n"
 "\t\tBit 0 (0x01)  will enable CORE messages (drm core code)\n"
 "\t\tBit 1 (0x02)  will enable DRIVER messages (drm controller code)\n"
@@ -77,6 +80,13 @@ static struct ddebug_classes_bitmap_param drm_debug_bitmap = 
{
.map = &drm_debug_classes,
 };
 module_param_cb(debug, ¶m_ops_dyndbg_classes, &drm_debug_bitmap, 0600);
+
+static struct ddebug_classes_bitmap_param drm_trace_bitmap = {
+   .bits = &__drm_trace,
+   .flags = "T",
+   .map = &drm_debug_classes,
+};
+module_param_cb(tracecats, ¶m_ops_dyndbg_classes, &drm_trace_bitmap, 0600);
 #endif
 
 void __drm_puts_coredump(struct drm_printer *p, const char *str)
-- 
2.36.1



[PATCH v4 41/41] nouveau-dyndbg: wip subdev refine, breaks on use

2022-07-20 Thread Jim Cromie
Change nvkm_subdev.debug to a ulong, so dyndbg can maybe use it.

Move macro decl from nv-drm.c to subdev.c, and add a struct
ddebug_classes_bitmap_param and a module_param_cb() that creates the
sysfs-knob.

Finally, in nvkm_subdev_ctor(), *attempt* to set dyndbg's pointer to
the debug address, so that dyndbg can observe the underlying debug
value, and make enable/disable decisions based upon it.

But Im not getting the ctor called, so the ptr is NULL when refd.

Signed-off-by: Jim Cromie 
---
 .../drm/nouveau/include/nvkm/core/subdev.h|  2 +-
 drivers/gpu/drm/nouveau/nouveau_drm.c |  7 --
 drivers/gpu/drm/nouveau/nvkm/core/subdev.c| 23 +++
 3 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h 
b/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
index d5f6ca05d5fa..05807403fdd6 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
@@ -19,7 +19,7 @@ struct nvkm_subdev {
enum nvkm_subdev_type type;
int inst;
char name[16];
-   u32 debug;
+   unsigned long debug;
struct list_head head;
 
void **pself;
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c 
b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 85b63b527877..d45c71ffc09e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -90,13 +90,6 @@ DECLARE_DYNDBG_CLASSMAP(nv_cli_debug_verbose, 
DD_CLASS_TYPE_VERBOSE, 10,
"NV_CLI_DBG_TRACE",
"NV_CLI_DBG_SPAM");
 
-DECLARE_DYNDBG_CLASSMAP(nv_subdev_debug_verbose, DD_CLASS_TYPE_VERBOSE, 15,
-   "NV_SUBDEV_DBG_OFF",
-   "NV_SUBDEV_DBG_INFO",
-   "NV_SUBDEV_DBG_DEBUG",
-   "NV_SUBDEV_DBG_TRACE",
-   "NV_SUBDEV_DBG_SPAM");
-
 MODULE_PARM_DESC(config, "option string to pass to driver core");
 static char *nouveau_config;
 module_param_named(config, nouveau_config, charp, 0400);
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/subdev.c 
b/drivers/gpu/drm/nouveau/nvkm/core/subdev.c
index a74b7acb6832..227871c3a749 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/subdev.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/subdev.c
@@ -26,6 +26,27 @@
 #include 
 #include 
 
+#include 
+#include 
+
+#define DEBUG
+
+DECLARE_DYNDBG_CLASSMAP(nv_subdev_debug_verbose, DD_CLASS_TYPE_VERBOSE, 15,
+   "NV_SUBDEV_DBG_OFF",
+   "NV_SUBDEV_DBG_INFO",
+   "NV_SUBDEV_DBG_DEBUG",
+   "NV_SUBDEV_DBG_TRACE",
+   "NV_SUBDEV_DBG_SPAM");
+
+static struct ddebug_classes_bitmap_param nv_subdev_verbose = {
+   .bits = NULL, // wants &_subdev->debug
+   .flags = "p",
+   .map = &nv_subdev_debug_verbose,
+};
+module_param_cb(debug_subdev, ¶m_ops_dyndbg_classes, &nv_subdev_verbose, 
0600);
+
+
+
 const char *
 nvkm_subdev_type[NVKM_SUBDEV_NR] = {
 #define NVKM_LAYOUT_ONCE(type,data,ptr,...) [type] = #ptr,
@@ -180,6 +201,8 @@ nvkm_subdev_ctor(const struct nvkm_subdev_func *func, 
struct nvkm_device *device
else
strscpy(subdev->name, nvkm_subdev_type[type], 
sizeof(subdev->name));
subdev->debug = nvkm_dbgopt(device->dbgopt, subdev->name);
+   nv_subdev_verbose.bits = &subdev->debug;
+   pr_debug("updated bitmap: %px\n", &nv_subdev_verbose.bits);
list_add_tail(&subdev->head, &device->subdev);
 }
 
-- 
2.36.1



[PATCH v4 28/41] drm_print: add _ddebug descriptor to drm_*dbg prototypes

2022-07-20 Thread Jim Cromie
upgrade the callchain to drm_dbg() and drm_dev_dbg(); add a struct
_ddebug ptr parameter to them, and supply that additional param by
replacing the '_no_desc' flavor of dyndbg Factory macro currently used
with the flavor that supplies the descriptor.

NOTES:

The descriptor gives these fns access to the decorator flags, but does
none of the dynamic-prefixing done by __dynamic_emit_prefix().

DRM already has conventions for logging/messaging; just tossing
optional decorations on top may not help.  Instead, existing flags (or
new ones) can be used to make current conventions optional.

For CONFIG_DRM_USE_DYNAMIC_DEBUG=N, just pass null.

Note: desc->class_id is redundant with category parameter, but its
availability is dependent on desc.

Signed-off-by: Jim Cromie 
---
 drivers/gpu/drm/drm_print.c |  8 +---
 include/drm/drm_print.h | 23 ---
 2 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c
index e0de79a22255..92f3f45e410c 100644
--- a/drivers/gpu/drm/drm_print.c
+++ b/drivers/gpu/drm/drm_print.c
@@ -29,6 +29,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -278,8 +279,8 @@ void drm_dev_printk(const struct device *dev, const char 
*level,
 }
 EXPORT_SYMBOL(drm_dev_printk);
 
-void __drm_dev_dbg(const struct device *dev, enum drm_debug_category category,
-  const char *format, ...)
+void __drm_dev_dbg(struct _ddebug *desc, const struct device *dev,
+  enum drm_debug_category category, const char *format, ...)
 {
struct va_format vaf;
va_list args;
@@ -287,6 +288,7 @@ void __drm_dev_dbg(const struct device *dev, enum 
drm_debug_category category,
if (!__drm_debug_enabled(category))
return;
 
+   /* we know we are printing for either syslog, tracefs, or both */
va_start(args, format);
vaf.fmt = format;
vaf.va = &args;
@@ -302,7 +304,7 @@ void __drm_dev_dbg(const struct device *dev, enum 
drm_debug_category category,
 }
 EXPORT_SYMBOL(__drm_dev_dbg);
 
-void ___drm_dbg(enum drm_debug_category category, const char *format, ...)
+void ___drm_dbg(struct _ddebug *desc, enum drm_debug_category category, const 
char *format, ...)
 {
struct va_format vaf;
va_list args;
diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
index 7631b5fb669e..46f14cfb401e 100644
--- a/include/drm/drm_print.h
+++ b/include/drm/drm_print.h
@@ -363,9 +363,10 @@ static inline bool drm_debug_enabled(enum 
drm_debug_category category)
 __printf(3, 4)
 void drm_dev_printk(const struct device *dev, const char *level,
const char *format, ...);
-__printf(3, 4)
-void __drm_dev_dbg(const struct device *dev, enum drm_debug_category category,
-const char *format, ...);
+struct _ddebug;
+__printf(4, 5)
+void __drm_dev_dbg(struct _ddebug *desc, const struct device *dev,
+  enum drm_debug_category category, const char *format, ...);
 
 /**
  * DRM_DEV_ERROR() - Error output.
@@ -415,11 +416,11 @@ void __drm_dev_dbg(const struct device *dev, enum 
drm_debug_category category,
 
 #if !defined(CONFIG_DRM_USE_DYNAMIC_DEBUG)
 #define drm_dev_dbg(dev, cat, fmt, ...)\
-   __drm_dev_dbg(dev, cat, fmt, ##__VA_ARGS__)
+   __drm_dev_dbg(NULL, dev, cat, fmt, ##__VA_ARGS__)
 #else
 #define drm_dev_dbg(dev, cat, fmt, ...)\
-   _dynamic_func_call_no_desc(fmt, __drm_dev_dbg,  \
-  dev, cat, fmt, ##__VA_ARGS__)
+   _dynamic_func_call_cls(cat, fmt, __drm_dev_dbg, \
+  dev, cat, fmt, ##__VA_ARGS__)
 #endif
 
 /**
@@ -523,17 +524,17 @@ void __drm_dev_dbg(const struct device *dev, enum 
drm_debug_category category,
  * Prefer drm_device based logging over device or prink based logging.
  */
 
-__printf(2, 3)
-void ___drm_dbg(enum drm_debug_category category, const char *format, ...);
+__printf(3, 4)
+void ___drm_dbg(struct _ddebug *desc, enum drm_debug_category category, const 
char *format, ...);
 __printf(1, 2)
 void __drm_err(const char *format, ...);
 
 #if !defined(CONFIG_DRM_USE_DYNAMIC_DEBUG)
-#define __drm_dbg(fmt, ...)___drm_dbg(fmt, ##__VA_ARGS__)
+#define __drm_dbg(fmt, ...)___drm_dbg(NULL, fmt, ##__VA_ARGS__)
 #else
 #define __drm_dbg(cat, fmt, ...)   \
-   _dynamic_func_call_no_desc(fmt, ___drm_dbg, \
-  cat, fmt, ##__VA_ARGS__)
+   _dynamic_func_call_cls(cat, fmt, ___drm_dbg,\
+  cat, fmt, ##__VA_ARGS__)
 #endif
 
 /* Macros to make printk easier */
-- 
2.36.1



[PATCH v4 34/41] dyndbg: add 2 trace-events: drm_{,dev}debug

2022-07-20 Thread Jim Cromie
Add include/trace/events/drm.h, with 2 new events: drm_debug() &
drm_devdbg(), and call them from drm_dbg() & drm_dev_dbg().  This is
easy, cuz the callers already build the vaf that the callee wants.

This allows the 3-5k drm.debug/on-dyndbg callsites to independently
(re-)direct messages to tracefs, not just syslog.  ISTM this is good;
debug traffic can be sent (just) to the tool where it can be best
used.  Over time, I'd expect to see less traffic to syslog.

NOTE: The message formats for the 2 events are both sub-optimal.
(both have event-name too)

drm_devdbg: TP_printk("cat:%d, %s %s", __entry->drm_debug_category,

  "cat:%d" should be "%s", but the classnames arent really in-scope
  here.  Maybe the events-decl-header should be under drm somewhere,
  so that class-names are available.

  It would also be nice to replace the event-name with the classname,
  as the names are highly client centric.

drm_dbg: TP_printk("%s", __get_str(msg))

  same as above.

NB:

The existing category param in this callchain is partially redundant;
when descriptor is available, it has the callsite's class_id.  If
later, CONFIG_DRM_USE_DYNAMIC_DEBUG:=y (hardwired), then category can
be dropped here, since the descriptor will always be available.

Also, if combined with header-move (or maybe its expanding inclusion
by lib/dynamic_debug), we could add the optional flags prefix, by
exposing dynamic_emit_prefix.  And perhaps this could be done only in
TP_printk, to save work while writing to the ring-buffer.

Signed-off-by: Jim Cromie 
---
 drivers/gpu/drm/drm_print.c | 25 -
 include/trace/events/drm.h  | 54 +
 2 files changed, 72 insertions(+), 7 deletions(-)
 create mode 100644 include/trace/events/drm.h

diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c
index 92f3f45e410c..9fb0b8e40dca 100644
--- a/drivers/gpu/drm/drm_print.c
+++ b/drivers/gpu/drm/drm_print.c
@@ -35,6 +35,9 @@
 #include 
 #include 
 
+#define CREATE_TRACE_POINTS
+#include 
+
 /*
  * __drm_debug: Enable debug output.
  * Bitmask of DRM_UT_x. See include/drm/drm_print.h for details.
@@ -293,13 +296,19 @@ void __drm_dev_dbg(struct _ddebug *desc, const struct 
device *dev,
vaf.fmt = format;
vaf.va = &args;
 
-   if (dev)
-   dev_printk(KERN_DEBUG, dev, "[" DRM_NAME ":%ps] %pV",
-  __builtin_return_address(0), &vaf);
-   else
-   printk(KERN_DEBUG "[" DRM_NAME ":%ps] %pV",
-  __builtin_return_address(0), &vaf);
-
+   if (dev) {
+   if (desc->flags & _DPRINTK_FLAGS_PRINTK)
+   dev_printk(KERN_DEBUG, dev, "[" DRM_NAME ":%ps] %pV",
+  __builtin_return_address(0), &vaf);
+   if (desc->flags & _DPRINTK_FLAGS_TRACE)
+   trace_drm_devdbg(dev, category, &vaf);
+   } else {
+   if (desc->flags & _DPRINTK_FLAGS_PRINTK)
+   printk(KERN_DEBUG "[" DRM_NAME ":%ps] %pV",
+  __builtin_return_address(0), &vaf);
+   if (desc->flags & _DPRINTK_FLAGS_TRACE)
+   trace_drm_debug(category, &vaf);
+   }
va_end(args);
 }
 EXPORT_SYMBOL(__drm_dev_dbg);
@@ -319,6 +328,8 @@ void ___drm_dbg(struct _ddebug *desc, enum 
drm_debug_category category, const ch
printk(KERN_DEBUG "[" DRM_NAME ":%ps] %pV",
   __builtin_return_address(0), &vaf);
 
+   trace_drm_debug(category, &vaf);
+
va_end(args);
 }
 EXPORT_SYMBOL(___drm_dbg);
diff --git a/include/trace/events/drm.h b/include/trace/events/drm.h
new file mode 100644
index ..589fa1e1f2c2
--- /dev/null
+++ b/include/trace/events/drm.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM drm
+
+#if !defined(_TRACE_DRM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_DRM_H
+
+#include 
+
+/* drm_debug() was called, pass its args */
+TRACE_EVENT(drm_debug,
+   TP_PROTO(int drm_debug_category, struct va_format *vaf),
+
+   TP_ARGS(drm_debug_category, vaf),
+
+   TP_STRUCT__entry(
+   __field(int, drm_debug_category)
+   __vstring(msg, vaf->fmt, vaf->va)
+   ),
+
+   TP_fast_assign(
+   __entry->drm_debug_category = drm_debug_category;
+   __assign_vstr(msg, vaf->fmt, vaf->va);
+   ),
+
+   TP_printk("%s", __get_str(msg))
+);
+
+/* drm_devdbg() was called, pass its args, preserving order */
+TRACE_EVENT(drm_devdbg,
+   TP_PROTO(const struct device *dev, int drm_debug_category, struct 
va_format *vaf),
+
+   TP_ARGS(dev, drm_debug_category, vaf),
+
+   TP_STRUCT__entry(
+   __field(const struct device*, dev)
+   __field(int, drm_debug_category)
+   __vstring(msg, vaf->fmt, vaf->va)
+

[PATCH v4 38/41] nouveau-dyndbg: alter DEBUG, TRACE, SPAM levels to use dyndbg

2022-07-20 Thread Jim Cromie
clone the nvkm_printk,_,__ macro ladder into nvkm_drmdbg,_,__.
And alter debug, trace, spam macros to use the renamed ladder.

This *sets-up* to remove the _subdev->debug >= (l) condition from the
__ macro, once the bitmap-param is wired up correctly (pointing at the
right state-bit-vector), and figured into dyndbg's jump-label
enablement.

Also, with DYNDBG=y, sites will be off, until enabled by >control, or
by #define DEBUG at compile time.

Starting with this as a model:

static struct ddebug_classes_bitmap_param drm_trace_bitmap = {
.bits = &__drm_trace,
.flags = "T",
.map = &drm_trace_classes,
};
module_param_cb(tracecats, ¶m_ops_dyndbg_classes, &drm_trace_bitmap,..

We basically need to clone that, but ref a different .bits:
.bits = &_subdev->debug,
while respecting the _subdev's lifecycle.
hints welcomed.

no functional changes. (other than dyndbg's default-off)

Signed-off-by: Jim Cromie 
---
 drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h | 14 +++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h 
b/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
index 065d07ccea87..b9c2afab321f 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
@@ -59,9 +59,17 @@ void nvkm_subdev_intr(struct nvkm_subdev *);
 #define nvkm_error(s,f,a...) nvkm_printk((s), ERROR,err, f, ##a)
 #define nvkm_warn(s,f,a...)  nvkm_printk((s),  WARN, notice, f, ##a)
 #define nvkm_info(s,f,a...)  nvkm_printk((s),  INFO,   info, f, ##a)
-#define nvkm_debug(s,f,a...) nvkm_printk((s), DEBUG,dbg, f, ##a)
-#define nvkm_trace(s,f,a...) nvkm_printk((s), TRACE,dbg, f, ##a)
-#define nvkm_spam(s,f,a...)  nvkm_printk((s),  SPAM,dbg, f, ##a)
+
+#define nvkm_drmdbg__(s,l,p,f,a...) do {   \
+   const struct nvkm_subdev *_subdev = (s);\
+   if (CONFIG_NOUVEAU_DEBUG >= (l) && _subdev->debug >= (l))   \
+   dev_dbg(_subdev->device->dev, "%s: "f, _subdev->name, ##a); \
+} while(0)
+#define nvkm_drmdbg_(s,l,f,a...) nvkm_drmdbg__((s), NV_DBG_##l, dbg, f, ##a)
+#define nvkm_debug(s,f,a...) nvkm_drmdbg_((s), DEBUG, f, ##a)
+#define nvkm_trace(s,f,a...) nvkm_drmdbg_((s), TRACE, f, ##a)
+#define nvkm_spam(s,f,a...)  nvkm_drmdbg_((s),  SPAM, f, ##a)
 
 #define nvkm_error_ratelimited(s,f,a...) nvkm_printk((s), ERROR, 
err_ratelimited, f, ##a)
+
 #endif
-- 
2.36.1



[PATCH v4 25/41] drm-print: include dyndbg header indirectly

2022-07-20 Thread Jim Cromie
lkp robot told me:

  >> drivers/gpu/drm/drm_ioc32.c:989:2:
  error: call to undeclared function '_dynamic_func_call_cls';
  ISO C99 and later do not support implicit function declarations
  [-Wimplicit-function-declaration]

   DRM_DEBUG("comm=\"%s\", pid=%d, dev=0x%lx, auth=%d, %s\n",

Since that macro is defined in drm_print.h, include 
there too, so that any uses have the definitions of all the macros in
the callchain.

This is done as a separate patch mostly to see how lkp sorts it;
I'll probably squash it with HEAD~1

Signed-off-by: Jim Cromie 
---
 include/drm/drm_print.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
index 3aa5e9ea26f4..dfdd81c3287c 100644
--- a/include/drm/drm_print.h
+++ b/include/drm/drm_print.h
@@ -31,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
-- 
2.36.1



[PATCH v4 39/41] nouveau-dbg: add 2 verbose-classmaps for CLI, SUBDEV

2022-07-20 Thread Jim Cromie
nouveau has additional debug variables to consider:

drivers/gpu/drm/nouveau/include/nvkm/core/device.h
131:if (_device->debug >= (l)) \

drivers/gpu/drm/nouveau/include/nvkm/core/client.h
39: if (_client->debug >= NV_DBG_##l)  \

drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
54: if (CONFIG_NOUVEAU_DEBUG >= (l) && _subdev->debug >= (l))  \

This is another baby-step, that seems not to break, so lets get a
snapshot.

whats done here:

In nouveau_drm.c, declare class-names: NV_CLI_DBG_* NV_SUBDEV_DBG_* by
calling DECLARE_DYNDBG_CLASSMAP(verbose-type) 2x more, right after the
drm DECLARE, for cli and subdev.  Adjust base to 10,15, to avoid 0-10
allocated subsystem-wide for drm.debug.  This shares the 0..30
classid-space.

In nvkm/core/debug.h, add enums to match the names, with initial
values to match the bases.

In nvkm/core/subdev.h, alter (recently added) nvkm_drmdbg_() to use
NV_SUBDEV_DBG_* instead of NV_DBG_*.  Note: this is undone next,
because base != CONFIG_NOUVEAU_DEBUG.

NOTES:

class-name-space is flat and wide, so super-generic names like INFO
should be prefixed; who could predict what a generic "V1" does across
all modules.

A sub-system prefix, such as "DRM_UT_" could be considered a working
shorthand for module "foo,bar,buzz" (which wouldnt work), but it adds
a layer of authors-judgment in the classifications.

In both classmaps, Ive left FATAL..WARN out, they're not really
optional the way INFO..SPAM are; even if they were defaulted to on,
dyndbg shouldn't be able to turn them off.

bash-5.1# modprobe nouveau
[  966.107833] dyndbg:   3 debug prints in module wmi
[  966.342188] dyndbg: class[0]: module:nouveau base:15 len:5 ty:1
[  966.342873] dyndbg:  15: 0 NV_SUBDEV_DBG_OFF
[  966.343352] dyndbg:  16: 1 NV_SUBDEV_DBG_INFO
[  966.343912] dyndbg:  17: 2 NV_SUBDEV_DBG_DEBUG
[  966.33] dyndbg:  18: 3 NV_SUBDEV_DBG_TRACE
[  966.344938] dyndbg:  19: 4 NV_SUBDEV_DBG_SPAM
[  966.345402] dyndbg: class[1]: module:nouveau base:10 len:5 ty:1
[  966.346011] dyndbg:  10: 0 NV_CLI_DBG_OFF
[  966.346477] dyndbg:  11: 1 NV_CLI_DBG_INFO
[  966.346989] dyndbg:  12: 2 NV_CLI_DBG_DEBUG
[  966.347442] dyndbg:  13: 3 NV_CLI_DBG_TRACE
[  966.347875] dyndbg:  14: 4 NV_CLI_DBG_SPAM
[  966.348284] dyndbg: class[2]: module:nouveau base:0 len:10 ty:0
[  966.34] dyndbg:  0: 0 DRM_UT_CORE
[  966.349310] dyndbg:  1: 1 DRM_UT_DRIVER
[  966.349694] dyndbg:  2: 2 DRM_UT_KMS
[  966.350083] dyndbg:  3: 3 DRM_UT_PRIME
[  966.350482] dyndbg:  4: 4 DRM_UT_ATOMIC
[  966.351016] dyndbg:  5: 5 DRM_UT_VBL
[  966.351475] dyndbg:  6: 6 DRM_UT_STATE
[  966.351899] dyndbg:  7: 7 DRM_UT_LEASE
[  966.352309] dyndbg:  8: 8 DRM_UT_DP
[  966.352678] dyndbg:  9: 9 DRM_UT_DRMRES
[  966.353104] dyndbg: module:nouveau attached 3 classes
[  966.353759] dyndbg: 119 debug prints in module nouveau

NOTE: it was 632 with previous commit, switching NV_DEBUG to use
NV_SUBDEV_DBG_DEBUG instead of NV_DBG_DEBUG is the cause.

Signed-off-by: Jim Cromie 
---
 .../gpu/drm/nouveau/include/nvkm/core/debug.h| 16 
 .../gpu/drm/nouveau/include/nvkm/core/subdev.h   |  3 ++-
 drivers/gpu/drm/nouveau/nouveau_drm.c| 14 ++
 3 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/debug.h 
b/drivers/gpu/drm/nouveau/include/nvkm/core/debug.h
index b4a9c7d991ca..6a155a23a3d1 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/debug.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/debug.h
@@ -9,4 +9,20 @@
 #define NV_DBG_TRACE5
 #define NV_DBG_PARANOIA 6
 #define NV_DBG_SPAM 7
+
+enum nv_cli_dbg_verbose {
+   NV_CLI_DBG_OFF = 10,
+   NV_CLI_DBG_INFO,
+   NV_CLI_DBG_DEBUG,
+   NV_CLI_DBG_TRACE,
+   NV_CLI_DBG_SPAM
+};
+enum nv_subdev_dbg_verbose {
+   NV_SUBDEV_DBG_OFF = 15,
+   NV_SUBDEV_DBG_INFO,
+   NV_SUBDEV_DBG_DEBUG,
+   NV_SUBDEV_DBG_TRACE,
+   NV_SUBDEV_DBG_SPAM
+};
+
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h 
b/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
index b9c2afab321f..bf9c69f4fc3e 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
@@ -2,6 +2,7 @@
 #ifndef __NVKM_SUBDEV_H__
 #define __NVKM_SUBDEV_H__
 #include 
+#include 
 
 enum nvkm_subdev_type {
 #define NVKM_LAYOUT_ONCE(t,s,p,...) t,
@@ -65,7 +66,7 @@ void nvkm_subdev_intr(struct nvkm_subdev *);
if (CONFIG_NOUVEAU_DEBUG >= (l) && _subdev->debug >= (l))   \
dev_dbg(_subdev->device->dev, "%s: "f, _subdev->name, ##a); \
 } while(0)
-#define nvkm_drmdbg_(s,l,f,a...) nvkm_drmdbg__((s), NV_DBG_##l, dbg, f, ##a)
+#define nvkm_drmdbg_(s,l,f,a...) nvkm_drmdbg__((s), NV_SUBDEV_DBG_##l, dbg, f, 
##a)
 #define nvkm_debug(s,f,a...) nvkm_drmdbg_((s), DEBUG, f, ##a)
 #define nvkm_trace(s,f,a...) nvkm_drmdbg_((s), TRACE, f, ##a)
 #def

[PATCH v4 14/41] dyndbg: add ddebug_attach_module_classes

2022-07-20 Thread Jim Cromie
Add ddebug_attach_module_classes(), call it from ddebug_add_module().
It scans the classes/section its given, finds records where the
module-name matches the module being added, and adds them to the
module's maps list.  No locking here, since the record
isn't yet linked into the ddebug_tables list.

It is called indirectly from 2 sources:

 - from load_module(), where it scans the module's __dyndbg_classes
   section, which contains DYNAMIC_DEBUG_CLASSES definitions from just
   the module.

 - from dynamic_debug_init(), where all DYNAMIC_DEBUG_CLASSES
   definitions of each builtin module have been packed together.
   This is why ddebug_attach_module_classes() checks module-name.

RFC:

Its (highly) likely that builtin classes will be ordered by module
name (just like prdbg descriptors are in the __dyndbg section).  So
the list can be replaced by a vector (ptr + length), which will work
for loaded modules too.  This would imitate whats currently done for
the _ddebug descriptors.

That said, converting to vector,len is close to pointless; a small
minority of modules will ever define a class-map, and almost all of
them will have only 1 or 2 class-maps, so theres only a couple dozen
pointers to save.

Signed-off-by: Jim Cromie 
---
 lib/dynamic_debug.c | 34 +-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index b6d80ba25bf5..e29730686cfb 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -45,7 +45,7 @@ extern struct ddebug_class_map __start___dyndbg_classes[];
 extern struct ddebug_class_map __stop___dyndbg_classes[];
 
 struct ddebug_table {
-   struct list_head link;
+   struct list_head link, maps;
const char *mod_name;
unsigned int num_ddebugs;
struct _ddebug *ddebugs;
@@ -921,6 +921,32 @@ static const struct proc_ops proc_fops = {
.proc_write = ddebug_proc_write
 };
 
+static void ddebug_attach_module_classes(struct ddebug_table *dt,
+struct ddebug_class_map *classes,
+int num_classes)
+{
+   struct ddebug_class_map *cm;
+   int i, j, ct = 0;
+
+   for (cm = classes, i = 0; i < num_classes; i++, cm++) {
+
+   if (!strcmp(cm->mod_name, dt->mod_name)) {
+
+   v2pr_info("class[%d]: module:%s base:%d len:%d 
ty:%d\n", i,
+ cm->mod_name, cm->base, cm->length, 
cm->map_type);
+
+   for (j = 0; j < cm->length; j++)
+   v3pr_info(" %d: %d %s\n", j + cm->base, j,
+ cm->class_names[j]);
+
+   list_add(&cm->link, &dt->maps);
+   ct++;
+   }
+   }
+   if (ct)
+   vpr_info("module:%s attached %d classes\n", dt->mod_name, ct);
+}
+
 /*
  * Allocate a new ddebug_table for the given module
  * and add it to the global list.
@@ -946,6 +972,12 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int 
num_debugs,
dt->num_ddebugs = num_debugs;
dt->ddebugs = tab;
 
+   INIT_LIST_HEAD(&dt->link);
+   INIT_LIST_HEAD(&dt->maps);
+
+   if (classes && num_classes)
+   ddebug_attach_module_classes(dt, classes, num_classes);
+
mutex_lock(&ddebug_lock);
list_add_tail(&dt->link, &ddebug_tables);
mutex_unlock(&ddebug_lock);
-- 
2.36.1



[PATCH v4 27/41] drm_print: prefer bare printk KERN_DEBUG on generic fn

2022-07-20 Thread Jim Cromie
drm_print.c calls pr_debug() just once, from __drm_printfn_debug(),
which is a generic/service fn.  The callsite is compile-time enabled
by DEBUG in both DYNAMIC_DEBUG=y/n builds.

For dyndbg builds, reverting this callsite back to bare printk is
correcting a few anti-features:

1- callsite is generic, serves multiple drm users.
   it is soft-wired on currently by #define DEBUG
   could accidentally: #> echo -p > /proc/dynamic_debug/control

2- optional "decorations" by dyndbg are unhelpful/misleading here,
   they describe only the generic site, not end users

IOW, 1,2 are unhelpful at best, and possibly confusing.

reverting yields a nominal data and text shrink:

   textdata bss dec hex filename
 462583   36604   54592 553779   87333 /kernel/drivers/gpu/drm/drm.ko
 462515   36532   54592 553639   872a7 -dirty/kernel/drivers/gpu/drm/drm.ko

Signed-off-by: Jim Cromie 
---
 drivers/gpu/drm/drm_print.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c
index effb95b3c2bf..e0de79a22255 100644
--- a/drivers/gpu/drm/drm_print.c
+++ b/drivers/gpu/drm/drm_print.c
@@ -23,8 +23,6 @@
  * Rob Clark 
  */
 
-#define DEBUG /* for pr_debug() */
-
 #include 
 
 #include 
@@ -185,7 +183,8 @@ EXPORT_SYMBOL(__drm_printfn_info);
 
 void __drm_printfn_debug(struct drm_printer *p, struct va_format *vaf)
 {
-   pr_debug("%s %pV", p->prefix, vaf);
+   /* pr_debug callsite decorations are unhelpful here */
+   printk(KERN_DEBUG "%s %pV", p->prefix, vaf);
 }
 EXPORT_SYMBOL(__drm_printfn_debug);
 
-- 
2.36.1



[PATCH v4 33/41] dyndbg: add write-events-to-tracefs code

2022-07-20 Thread Jim Cromie
1st, internals:

adds: ddebug_trace()
 uses trace_console() temporarily to issue printk:console event
 uses internal-ish __ftrace_trace_stack code:
  4-context buffer stack, barriers per Steve Rostedt

call it from new mid-layer funcs:
  ddebug_printk() - ddebug_trace or vprintk (to syslog)
  ddebug_dev_printk() - ddebug_trace or dev_printk_emit

These handle both _DPRINTK_FLAGS_PRINTK and _DPRINTK_FLAGS_TRACE
cases, allowing to vsnprintf the message once and use it for both,
skipping past the KERN_DEBUG prefix for tracing.

Finally, adjust the top-layer: __dynamic_{pr_debug,{,net,ib}dev_dbg),
replacing printk/dev_printk_emit with ddebug_printk/ddebug_dev_printk.

Interface additions:
  new 'T' flag decl in opt_array. existing code handles it like others.
  document the new flag.

To enable drm.debug ATOMIC messages to tracefs:

  :#> echo class DRM_UT_ATOMIC +T > /proc/dynamic_debug/control

NB:

This patch,~1,~2 are basically direct copies of:
  
https://lore.kernel.org/lkml/20200825153338.17061-1-vincent.whitchu...@axis.com/

with a few differences:

- s/dynamic_/ddebug_/ on Vincent's additions, tighter naming.
- __printf attrs on the _printk funcs.
- reuses trace_console() event, not adding a new "printk:dyndbg" event.
  later patches differentiate to new events.

- a flags arg remains unchanged, adapt later to *descriptor.

CC: vincent.whitchu...@axis.com
Signed-off-by: Jim Cromie 
---
 .../admin-guide/dynamic-debug-howto.rst   |   5 +-
 lib/dynamic_debug.c   | 156 +++---
 2 files changed, 133 insertions(+), 28 deletions(-)

diff --git a/Documentation/admin-guide/dynamic-debug-howto.rst 
b/Documentation/admin-guide/dynamic-debug-howto.rst
index faa22f77847a..45b6e5697c89 100644
--- a/Documentation/admin-guide/dynamic-debug-howto.rst
+++ b/Documentation/admin-guide/dynamic-debug-howto.rst
@@ -209,8 +209,9 @@ of the characters::
 
 The flags are::
 
-  penables the pr_debug() callsite.
-  _enables no flags.
+  pcallsite prints to syslog
+  Tcallsite issues a dyndbg:* trace-event
+  _enables no flags
 
   Decorator flags add to the message-prefix, in order:
   tInclude thread ID, or 
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 2a46c642373a..66f12b9127c7 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -90,6 +91,7 @@ static inline const char *trim_prefix(const char *path)
 
 static struct { unsigned flag:8; char opt_char; } opt_array[] = {
{ _DPRINTK_FLAGS_PRINTK, 'p' },
+   { _DPRINTK_FLAGS_TRACE, 'T' },
{ _DPRINTK_FLAGS_INCL_MODNAME, 'm' },
{ _DPRINTK_FLAGS_INCL_FUNCNAME, 'f' },
{ _DPRINTK_FLAGS_INCL_LINENO, 'l' },
@@ -835,6 +837,98 @@ static inline char *dynamic_emit_prefix(struct _ddebug 
*desc, char *buf)
return buf;
 }
 
+/*
+ * This code is heavily based on __ftrace_trace_stack().
+ *
+ * Allow 4 levels of nesting: normal, softirq, irq, NMI.
+ */
+#define DYNAMIC_TRACE_NESTING  4
+
+struct ddebug_trace_buf {
+   char buf[256];
+};
+
+struct ddebug_trace_bufs {
+   struct ddebug_trace_buf bufs[DYNAMIC_TRACE_NESTING];
+};
+
+static DEFINE_PER_CPU(struct ddebug_trace_bufs, ddebug_trace_bufs);
+static DEFINE_PER_CPU(int, ddebug_trace_reserve);
+
+static void ddebug_trace(const char *fmt, va_list args)
+{
+   struct ddebug_trace_buf *buf;
+   int bufidx;
+   int len;
+
+   preempt_disable_notrace();
+
+   bufidx = __this_cpu_inc_return(ddebug_trace_reserve) - 1;
+
+   if (WARN_ON_ONCE(bufidx > DYNAMIC_TRACE_NESTING))
+   goto out;
+
+   /* For the same reasons as in __ftrace_trace_stack(). */
+   barrier();
+
+   buf = this_cpu_ptr(ddebug_trace_bufs.bufs) + bufidx;
+
+   len = vscnprintf(buf->buf, sizeof(buf->buf), fmt, args);
+   trace_console(buf->buf, len);
+
+out:
+   /* As above. */
+   barrier();
+   __this_cpu_dec(ddebug_trace_reserve);
+   preempt_enable_notrace();
+}
+
+__printf(2, 3)
+static void ddebug_printk(unsigned int flags, const char *fmt, ...)
+{
+   if (flags & _DPRINTK_FLAGS_TRACE) {
+   va_list args;
+
+   va_start(args, fmt);
+   /*
+* All callers include the KERN_DEBUG prefix to keep the
+* vprintk case simple; strip it out for tracing.
+*/
+   ddebug_trace(fmt + strlen(KERN_DEBUG), args);
+   va_end(args);
+   }
+
+   if (flags & _DPRINTK_FLAGS_PRINTK) {
+   va_list args;
+
+   va_start(args, fmt);
+   vprintk(fmt, args);
+   va_end(args);
+   }
+}
+
+__printf(3, 4)
+static void ddebug_dev_printk(unsigned int flags, const struct device *dev,
+ const char *fmt, ...)
+{
+
+   if (flags & _DPRINTK_FLAGS_TRACE) {
+   va_list args;
+
+   va_start(args, fmt);
+

[PATCH v4 37/41] nouveau: adapt NV_DEBUG, NV_ATOMIC to use DRM.debug

2022-07-20 Thread Jim Cromie
These 2 macros used drm_debug_enabled() on DRM_UT_{DRIVER,ATOMIC}
respectively, replace those with drm_dbg_##cat invocations.

this results in new class'd prdbg callsites:

:#> grep nouveau /proc/dynamic_debug/control | grep class | wc
1161130   15584
:#> grep nouveau /proc/dynamic_debug/control | grep class | grep DRIVER | wc
 74 7049709
:#> grep nouveau /proc/dynamic_debug/control | grep class | grep ATOMIC | wc
 31 3074237
:#> grep nouveau /proc/dynamic_debug/control | grep class | grep KMS | wc
 11 1191638

the KMS entries are due to existing uses of drm_dbg_kms().

Signed-off-by: Jim Cromie 
---
 drivers/gpu/drm/nouveau/nouveau_drv.h | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h 
b/drivers/gpu/drm/nouveau/nouveau_drv.h
index b2a970aa9bf4..f266cd6b0405 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -39,6 +39,7 @@
  */
 
 #include 
+#include 
 
 #include 
 #include 
@@ -264,13 +265,16 @@ void nouveau_drm_device_remove(struct drm_device *dev);
 #define NV_WARN(drm,f,a...) NV_PRINTK(warn, &(drm)->client, f, ##a)
 #define NV_INFO(drm,f,a...) NV_PRINTK(info, &(drm)->client, f, ##a)
 
-#define NV_DEBUG(drm,f,a...) do {  
\
-   if (drm_debug_enabled(DRM_UT_DRIVER))  \
-   NV_PRINTK(info, &(drm)->client, f, ##a);   \
+#define NV_DRMDBG(cat,c,f,a...) do {   \
+   struct nouveau_cli *_cli = (c); \
+   drm_dbg_##cat(_cli->drm->dev, "%s: "f, _cli->name, ##a); \
 } while(0)
-#define NV_ATOMIC(drm,f,a...) do { 
\
-   if (drm_debug_enabled(DRM_UT_ATOMIC))  \
-   NV_PRINTK(info, &(drm)->client, f, ##a);   \
+
+#define NV_DEBUG(drm,f,a...) do {  \
+   NV_DRMDBG(driver, &(drm)->client, f, ##a);  \
+} while(0)
+#define NV_ATOMIC(drm,f,a...) do { \
+   NV_DRMDBG(atomic, &(drm)->client, f, ##a);  \
 } while(0)
 
 #define NV_PRINTK_ONCE(l,c,f,a...) NV_PRINTK(l##_once,c,f, ##a)
-- 
2.36.1



[PATCH v4 17/41] dyndbg: test DECLARE_DYNDBG_CLASSMAP, sysfs nodes

2022-07-20 Thread Jim Cromie
Demonstrate use of DECLARE_DYNDBG_CLASSMAP macro, and expose them as
sysfs-nodes for testing.

For each of the 4 class-map-types:

  - declare a class-map of that type,
  - declare the enum corresponding to those class-names
  - share _base across 0..30 range
  - add a __pr_debug_cls() call for each class-name
  - declare 2 sysnodes for each class-map
for 'p' flag, and future 'T' flag

These declarations create the following sysfs parameter interface:

  :> pwd
  /sys/module/test_dynamic_debug/parameters
  :> ls
  T_disjoint  T_levels  T_symbolic  T_verbosity  do_prints
  p_disjoint  p_levels  p_symbolic  p_verbosity

NOTES:

The local wrapper macro is an api candidate, but there are already too
many parameters.  OTOH, maybe related enum should be in there too,
since it has _base inter-dependencies.

The T_* params control the (future) T flag on the same class'd
pr_debug callsites as their p* counterparts.  Using them will fail,
until the dyndbg-trace patches are added in.

:#> echo 1 > T_disjoint
[   28.792489] dyndbg: disjoint: 0x1 > test_dynamic_debug.T_D2
[   28.793848] dyndbg: query 0: "class D2_CORE +T" mod:*
[   28.795086] dyndbg: split into words: "class" "D2_CORE" "+T"
[   28.796467] dyndbg: op='+'
[   28.797148] dyndbg: unknown flag 'T'
[   28.798021] dyndbg: flags parse failed
[   28.798947] dyndbg: processed 1 queries, with 0 matches, 1 errs
[   28.800378] dyndbg: bit_0: -22 matches on class: D2_CORE -> 0x1
[   28.801959] dyndbg: test_dynamic_debug.T_D2: updated 0x0 -> 0x1
[   28.803974] dyndbg: total matches: -22

Signed-off-by: Jim Cromie 
---
 lib/test_dynamic_debug.c | 125 ++-
 1 file changed, 110 insertions(+), 15 deletions(-)

diff --git a/lib/test_dynamic_debug.c b/lib/test_dynamic_debug.c
index ba3882ca3e48..eac85e4e996a 100644
--- a/lib/test_dynamic_debug.c
+++ b/lib/test_dynamic_debug.c
@@ -10,57 +10,152 @@
 
 #include 
 
-static void do_prints(void); /* device under test */
-
-/* run tests by reading or writing sysfs node */
+/* run tests by reading or writing sysfs node: do_prints */
 
+static void do_prints(void); /* device under test */
 static int param_set_do_prints(const char *instr, const struct kernel_param 
*kp)
 {
do_prints();
return 0;
 }
-
 static int param_get_do_prints(char *buffer, const struct kernel_param *kp)
 {
do_prints();
return scnprintf(buffer, PAGE_SIZE, "did do_prints\n");
 }
-
 static const struct kernel_param_ops param_ops_do_prints = {
.set = param_set_do_prints,
.get = param_get_do_prints,
 };
-
 module_param_cb(do_prints, ¶m_ops_do_prints, NULL, 0600);
 
-static void do_alpha(void)
+/*
+ * Using the CLASSMAP api:
+ * - classmaps must have corresponding enum
+ * - enum symbols must match/correlate with class-name strings in the map.
+ * - base must equal enum's 1st value
+ * - multiple maps must set their base to share the 0-30 class_id space !!
+ *   (build-bug-on tips welcome)
+ * Additionally, here:
+ * - tie together sysname, mapname, bitsname, flagsname
+ */
+#define DD_SYS_WRAP(_model, _flags)\
+   static unsigned long bits_##_model; \
+   static struct ddebug_classes_bitmap_param _flags##_model = {\
+   .bits = &bits_##_model, \
+   .flags = #_flags,   \
+   .map = &map_##_model,   \
+   };  \
+   module_param_cb(_flags##_##_model, ¶m_ops_dyndbg_classes, 
&_flags##_model, 0600)
+
+/* numeric input, independent bits */
+enum cat_disjoint {
+   D2_CORE = 0,
+   D2_DRIVER,
+   D2_KMS,
+   D2_PRIME,
+   D2_ATOMIC,
+   D2_VBL,
+   D2_STATE,
+   D2_LEASE,
+   D2_DP,
+   D2_DRMRES };
+DECLARE_DYNDBG_CLASSMAP(map_disjoint, DD_CLASS_TYPE_DISJOINT, 0,
+   "D2_CORE",
+   "D2_DRIVER",
+   "D2_KMS",
+   "D2_PRIME",
+   "D2_ATOMIC",
+   "D2_VBL",
+   "D2_STATE",
+   "D2_LEASE",
+   "D2_DP",
+   "D2_DRMRES");
+DD_SYS_WRAP(disjoint, p);
+DD_SYS_WRAP(disjoint, T);
+
+/* symbolic input, independent bits */
+enum cat_symbolic { LOW = 11, MID, HI };
+DECLARE_DYNDBG_CLASSMAP(map_symbolic, DD_CLASS_TYPE_SYMBOLIC, 10,
+   "LOW", "MID", "HI");
+DD_SYS_WRAP(symbolic, p);
+DD_SYS_WRAP(symbolic, T);
+
+/* numeric verbosity, V2 > V1 related */
+enum cat_verbosity { V0 = 14, V1, V2, V3, V4, V5, V6, V7 };
+DECLARE_DYNDBG_CLASSMAP(map_verbosity, DD_CLASS_TYPE_VERBOSE, 14,
+  "V0", "V1", "V2", "V3", "V4", "V5", "V6", "V7");
+DD_SYS_WRAP(verbosity, p);
+DD_SYS_WRAP(verbosity, T);
+
+/* symbolic verbosity */
+enum cat_levels { L0 = 22, L1, L2, L3

[PATCH v4 32/41] dyndbg: add _DPRINTK_FLAGS_TRACE

2022-07-20 Thread Jim Cromie
add new flag, and OR it into _DPRINTK_FLAGS_ENABLED definition

CC: vincent.whitchu...@axis.com
Signed-off-by: Jim Cromie 
---
 include/linux/dynamic_debug.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index 38cfdd5c0bdc..0752e6c21c6e 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -42,7 +42,9 @@ struct _ddebug {
(_DPRINTK_FLAGS_INCL_MODNAME | _DPRINTK_FLAGS_INCL_FUNCNAME |\
 _DPRINTK_FLAGS_INCL_LINENO  | _DPRINTK_FLAGS_INCL_TID)
 
-#define _DPRINTK_FLAGS_ENABLED _DPRINTK_FLAGS_PRINTK
+#define _DPRINTK_FLAGS_TRACE   (1 << 5)
+#define _DPRINTK_FLAGS_ENABLED (_DPRINTK_FLAGS_PRINTK | \
+_DPRINTK_FLAGS_TRACE)
 
 #if defined DEBUG
 #define _DPRINTK_FLAGS_DEFAULT _DPRINTK_FLAGS_PRINTK
-- 
2.36.1



[PATCH v4 26/41] drm_print: refine drm_debug_enabled for jump-label

2022-07-20 Thread Jim Cromie
In order to use dynamic-debug's jump-label optimization in drm-debug,
its clarifying to refine drm_debug_enabled into 3 uses:

1.   drm_debug_enabled - legacy, public
2. __drm_debug_enabled - optimized for dyndbg jump-label enablement.
3.  _drm_debug_enabled - pr_debug instrumented, observable

1. The legacy version always checks the bits.

2. is privileged, for use by __drm_dbg(), __drm_dev_dbg(), which do an
early return unless the category is enabled.  For dyndbg builds, debug
callsites are selectively "pre-enabled", so __drm_debug_enabled()
short-circuits to true there.  Remaining callers of 1 may be able to
use 2, case by case.

3. is 1st wrapped in a macro, with a pr_debug, which reports each
usage in /proc/dynamic_debug/control, making it observable in the
logs.  The macro lets the pr_debug see the real caller, not an inline
function.

When plugged into 1, 3 identified ~10 remaining callers of the
function, leading to the follow-on cleanup patch, and would allow
activating the pr_debugs, estimating the callrate, and the potential
savings by using the wrapper macro.  It is unused ATM, but it fills
out the picture.

Signed-off-by: Jim Cromie 
---
 drivers/gpu/drm/drm_print.c |  4 ++--
 include/drm/drm_print.h | 28 
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c
index 21b416af0be2..effb95b3c2bf 100644
--- a/drivers/gpu/drm/drm_print.c
+++ b/drivers/gpu/drm/drm_print.c
@@ -285,7 +285,7 @@ void __drm_dev_dbg(const struct device *dev, enum 
drm_debug_category category,
struct va_format vaf;
va_list args;
 
-   if (!drm_debug_enabled(category))
+   if (!__drm_debug_enabled(category))
return;
 
va_start(args, format);
@@ -308,7 +308,7 @@ void ___drm_dbg(enum drm_debug_category category, const 
char *format, ...)
struct va_format vaf;
va_list args;
 
-   if (!drm_debug_enabled(category))
+   if (!__drm_debug_enabled(category))
return;
 
va_start(args, format);
diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
index dfdd81c3287c..7631b5fb669e 100644
--- a/include/drm/drm_print.h
+++ b/include/drm/drm_print.h
@@ -321,11 +321,39 @@ enum drm_debug_category {
DRM_UT_DRMRES
 };
 
+/*
+ * 3 name flavors of drm_debug_enabled:
+ *   drm_debug_enabled - public/legacy, always checks bits
+ *  _drm_debug_enabled - instrumented to observe call-rates, est overheads.
+ * __drm_debug_enabled - privileged - knows jump-label state, can short-circuit
+ */
 static inline bool drm_debug_enabled(enum drm_debug_category category)
 {
return unlikely(__drm_debug & BIT(category));
 }
 
+/*
+ * Wrap fn in macro, so that the pr_debug sees the actual caller, not
+ * the inline fn.  Using this name creates a callsite entry / control
+ * point in /proc/dynamic_debug/control.
+ */
+#define _drm_debug_enabled(category)   \
+   ({  \
+   pr_debug("todo: maybe avoid via dyndbg\n"); \
+   drm_debug_enabled(category);\
+   })
+
+#if defined(CONFIG_DRM_USE_DYNAMIC_DEBUG)
+/*
+ * dyndbg is wrapping the drm.debug API, so as to avoid the runtime
+ * bit-test overheads of drm_debug_enabled() in those api calls.
+ * In this case, executed callsites are known enabled, so true.
+ */
+#define __drm_debug_enabled(category)  true
+#else
+#define __drm_debug_enabled(category)  drm_debug_enabled(category)
+#endif
+
 /*
  * struct device based logging
  *
-- 
2.36.1



[PATCH v4 23/41] drm_print: wrap drm_*_dbg in dyndbg descriptor factory macro

2022-07-20 Thread Jim Cromie
For CONFIG_DRM_USE_DYNAMIC_DEBUG=y, wrap __drm_dbg() & __drm_dev_dbg()
in one of dyndbg's Factory macros: _dynamic_func_call_no_desc().

This adds the callsite descriptor into the code, and an entry for each
into /proc/dynamic_debug/control.

  #> echo class DRM_UT_ATOMIC +p > /proc/dynamic_debug/control

CONFIG_DRM_USE_DYNAMIC_DEBUG=y/n is configurable because of the .data
footprint cost of per-callsite control; 56 bytes/site * ~2k for i915,
~4k callsites for amdgpu.  This is large enough that a kernel builder
might not want it.

Signed-off-by: Jim Cromie 
---
 drivers/gpu/drm/Kconfig  | 12 
 drivers/gpu/drm/Makefile |  2 ++
 include/drm/drm_print.h  | 12 
 3 files changed, 26 insertions(+)

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index e88c497fa010..bb1fa20a8eb2 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -50,6 +50,18 @@ config DRM_DEBUG_MM
 
  If in doubt, say "N".
 
+config DRM_USE_DYNAMIC_DEBUG
+   bool "use dynamic debug to implement drm.debug"
+   default y
+   depends on DRM
+   depends on DYNAMIC_DEBUG || DYNAMIC_DEBUG_CORE
+   depends on JUMP_LABEL
+   help
+ Use dynamic-debug to avoid drm_debug_enabled() runtime overheads.
+ Due to callsite counts in DRM drivers (~4k in amdgpu) and 56
+ bytes per callsite, the .data costs can be substantial, and
+ are therefore configurable.
+
 config DRM_DEBUG_SELFTEST
tristate "kselftests for DRM"
depends on DRM
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 15fe3163f822..272de137d207 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -3,6 +3,8 @@
 # Makefile for the drm device driver.  This driver provides support for the
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
+CFLAGS-$(CONFIG_DRM_USE_DYNAMIC_DEBUG) += -DDYNAMIC_DEBUG_MODULE
+
 drm-y   := drm_aperture.o drm_auth.o drm_cache.o \
drm_file.o drm_gem.o drm_ioctl.o \
drm_drv.o \
diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
index c429c258c957..2d2cef76b5c1 100644
--- a/include/drm/drm_print.h
+++ b/include/drm/drm_print.h
@@ -384,8 +384,14 @@ void __drm_dev_dbg(const struct device *dev, enum 
drm_debug_category category,
}   \
 })
 
+#if !defined(CONFIG_DRM_USE_DYNAMIC_DEBUG)
 #define drm_dev_dbg(dev, cat, fmt, ...)\
__drm_dev_dbg(dev, cat, fmt, ##__VA_ARGS__)
+#else
+#define drm_dev_dbg(dev, cat, fmt, ...)\
+   _dynamic_func_call_no_desc(fmt, __drm_dev_dbg,  \
+  dev, cat, fmt, ##__VA_ARGS__)
+#endif
 
 /**
  * DRM_DEV_DEBUG() - Debug output for generic drm code
@@ -492,7 +498,13 @@ void ___drm_dbg(enum drm_debug_category category, const 
char *format, ...);
 __printf(1, 2)
 void __drm_err(const char *format, ...);
 
+#if !defined(CONFIG_DRM_USE_DYNAMIC_DEBUG)
 #define __drm_dbg(fmt, ...)___drm_dbg(fmt, ##__VA_ARGS__)
+#else
+#define __drm_dbg(cat, fmt, ...)   \
+   _dynamic_func_call_no_desc(fmt, ___drm_dbg, \
+  cat, fmt, ##__VA_ARGS__)
+#endif
 
 /* Macros to make printk easier */
 
-- 
2.36.1



[PATCH v4 18/41] doc-dyndbg: describe "class CLASS_NAME" query support

2022-07-20 Thread Jim Cromie
Add an explanation of the new "class CLASS_NAME" syntax and meaning,
noting that the module determines if CLASS_NAME applies to it.

Signed-off-by: Jim Cromie 
---
 Documentation/admin-guide/dynamic-debug-howto.rst | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/Documentation/admin-guide/dynamic-debug-howto.rst 
b/Documentation/admin-guide/dynamic-debug-howto.rst
index a89cfa083155..d8954ab05c7b 100644
--- a/Documentation/admin-guide/dynamic-debug-howto.rst
+++ b/Documentation/admin-guide/dynamic-debug-howto.rst
@@ -35,6 +35,7 @@ Dynamic debug has even more useful features:
- line number (including ranges of line numbers)
- module name
- format string
+   - class name (as known/declared by each module)
 
  * Provides a debugfs control file: ``/dynamic_debug/control``
which can be read to display the complete list of known debug
@@ -142,6 +143,7 @@ against.  Possible keywords are:::
 'file' string |
 'module' string |
 'format' string |
+'class' string |
 'line' line-range
 
   line-range ::= lineno |
@@ -203,6 +205,15 @@ format
format "nfsd: SETATTR"  // a neater way to match a format with 
whitespace
format 'nfsd: SETATTR'  // yet another way to match a format with 
whitespace
 
+class
+The given class_name is validated against each module, which may
+have declared a list of known class_names.  If the class_name is
+found for a module, callsite & class matching and adjustment
+proceeds.  Examples::
+
+   class DRM_UT_KMS# a DRM.debug category
+   class JUNK  # silent non-match
+
 line
 The given line number or range of line numbers is compared
 against the line number of each ``pr_debug()`` callsite.  A single
-- 
2.36.1



[PATCH v4 21/41] drm: POC drm on dyndbg - use in core, 2 helpers, 3 drivers.

2022-07-20 Thread Jim Cromie
Use DECLARE_DYNDBG_CLASSMAP across DRM:

 - in .c files, since macro defines/initializes a record

 - in drivers, $mod_{drv,drm,param}.c
   ie where param setup is done (since a class-bitmap is a param)

 - in drm/drm_print.c, since thats where it
   adds the class-bitmap, and replaces module_param_named.

 - in drm_*_helper modules:
   dp/drm_dp - 1st item in makefile target
   drivers/gpu/drm/drm_crtc_helper.c - random pick iirc.

Since these modules all use identical class-maps (ie: names and
.class_id's) they all will respond together to class FOO changes:

  :#> echo class DRM_UT_KMS +p > /proc/dynamic_debug/control

ttm and video have 2,8 callsites each, I havent looked.

NOTES:

DRM's enum drm_debug_category values need to sync with the index of
their respective class-names here.  Then .class_id == category, and
dyndbg's class FOO mechanisms will enable drm_dbg(DRM_UT_KMS, ...).

Though DRM needs consistent categories across all modules, thats not
generally needed; modules X and Y could define FOO differently (ie
different corresponding .class_id), changes are made according to each
module's private class-map.

No callsites are actually selected here, since none are class'd yet.

change __drm_debug from int to long, so BIT() is usable on it.

Signed-off-by: Jim Cromie 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 14 +
 drivers/gpu/drm/display/drm_dp_helper.c | 13 
 drivers/gpu/drm/drm_crtc_helper.c   | 13 
 drivers/gpu/drm/drm_print.c | 27 +++--
 drivers/gpu/drm/i915/i915_params.c  | 12 +++
 drivers/gpu/drm/nouveau/nouveau_drm.c   | 13 
 include/drm/drm_print.h |  3 ++-
 7 files changed, 92 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 8890300766a5..ba96d33137b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -38,6 +38,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include "amdgpu.h"
 #include "amdgpu_irq.h"
@@ -183,6 +185,18 @@ int amdgpu_vcnfw_log;
 
 static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
 
+DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT, 0,
+   "DRM_UT_CORE",
+   "DRM_UT_DRIVER",
+   "DRM_UT_KMS",
+   "DRM_UT_PRIME",
+   "DRM_UT_ATOMIC",
+   "DRM_UT_VBL",
+   "DRM_UT_STATE",
+   "DRM_UT_LEASE",
+   "DRM_UT_DP",
+   "DRM_UT_DRMRES");
+
 struct amdgpu_mgpu_info mgpu_info = {
.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
.delayed_reset_work = __DELAYED_WORK_INITIALIZER(
diff --git a/drivers/gpu/drm/display/drm_dp_helper.c 
b/drivers/gpu/drm/display/drm_dp_helper.c
index e7c22c2ca90c..eb7aef22e7fd 100644
--- a/drivers/gpu/drm/display/drm_dp_helper.c
+++ b/drivers/gpu/drm/display/drm_dp_helper.c
@@ -29,6 +29,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -38,6 +39,18 @@
 
 #include "drm_dp_helper_internal.h"
 
+DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT, 0,
+   "DRM_UT_CORE",
+   "DRM_UT_DRIVER",
+   "DRM_UT_KMS",
+   "DRM_UT_PRIME",
+   "DRM_UT_ATOMIC",
+   "DRM_UT_VBL",
+   "DRM_UT_STATE",
+   "DRM_UT_LEASE",
+   "DRM_UT_DP",
+   "DRM_UT_DRMRES");
+
 struct dp_aux_backlight {
struct backlight_device *base;
struct drm_dp_aux *aux;
diff --git a/drivers/gpu/drm/drm_crtc_helper.c 
b/drivers/gpu/drm/drm_crtc_helper.c
index b632825654a9..80f2cf807dae 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -32,6 +32,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -50,6 +51,18 @@
 
 #include "drm_crtc_helper_internal.h"
 
+DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT, 0,
+   "DRM_UT_CORE",
+   "DRM_UT_DRIVER",
+   "DRM_UT_KMS",
+   "DRM_UT_PRIME",
+   "DRM_UT_ATOMIC",
+   "DRM_UT_VBL",
+   "DRM_UT_STATE",
+   "DRM_UT_LEASE",
+   "DRM_UT_DP",
+   "DRM_UT_DRMRES");
+
 /**
  * DOC: overview
  *
diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c
index f783d4963d4b..44be95fac164 100644
--- a/drivers/gpu/drm/drm_print.c
+++ b/drivers/gpu/drm/drm_print.c
@@ -40,7 +40,7 @@
  * __drm_debug: Enable debug output.
  * Bitmask of DRM_UT_x. See include/drm/drm_print.h for details.
  */
-unsigned int __drm_debug;
+unsigned lo

[PATCH v4 24/41] drm-print: add drm_dbg_driver to improve namespace symmetry

2022-07-20 Thread Jim Cromie
drm_print defines all of these:
drm_dbg_{core,kms,prime,atomic,vbl,lease,_dp,_drmres}

but not drm_dbg_driver itself, since it was the original drm_dbg.

To improve namespace symmetry, change the drm_dbg defn to
drm_dbg_driver, and redef grandfathered name to symmetric one.

This will help with nouveau, which uses its own stack of macros to
construct calls to dev_info, dev_dbg, etc, for which adaptation means
drm_dbg_##driver constructs.

Signed-off-by: Jim Cromie 
---
 include/drm/drm_print.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
index 2d2cef76b5c1..3aa5e9ea26f4 100644
--- a/include/drm/drm_print.h
+++ b/include/drm/drm_print.h
@@ -467,7 +467,7 @@ void __drm_dev_dbg(const struct device *dev, enum 
drm_debug_category category,
 
 #define drm_dbg_core(drm, fmt, ...)\
drm_dev_dbg((drm) ? (drm)->dev : NULL, DRM_UT_CORE, fmt, ##__VA_ARGS__)
-#define drm_dbg(drm, fmt, ...) \
+#define drm_dbg_driver(drm, fmt, ...)  
\
drm_dev_dbg((drm) ? (drm)->dev : NULL, DRM_UT_DRIVER, fmt, 
##__VA_ARGS__)
 #define drm_dbg_kms(drm, fmt, ...) \
drm_dev_dbg((drm) ? (drm)->dev : NULL, DRM_UT_KMS, fmt, ##__VA_ARGS__)
@@ -486,6 +486,7 @@ void __drm_dev_dbg(const struct device *dev, enum 
drm_debug_category category,
 #define drm_dbg_drmres(drm, fmt, ...)  \
drm_dev_dbg((drm) ? (drm)->dev : NULL, DRM_UT_DRMRES, fmt, 
##__VA_ARGS__)
 
+#define drm_dbg(drm, fmt, ...) drm_dbg_driver(drm, fmt, ##__VA_ARGS__)
 
 /*
  * printk based logging
-- 
2.36.1



[PATCH v4 35/41] dyndbg: add 2 more trace-events: pr_debug, dev_dbg

2022-07-20 Thread Jim Cromie
ddebug_trace() currently issues a single printk:console event.
Replace that, adding include/trace/events/dyndbg.h, which defines 2
new events:

- dyndbg:prdbg  - from trace_prdbg()  - if !dev
- dyndbg:devdbg - from trace_devdbg() - if !!dev

This links the legacy pr_debug API to tracefs, via dyndbg, allowing
pr_debug()s etc to add just a little more user-context to the
trace-logs, and then at users option, less syslog.

The 2 new trace_*() calls accept their caller's args respectively,
keeping the available info w/o alteration; we can't improve on
full disclosure.  The args:

 1- struct _ddebug *descriptor, giving tracefs all of dyndbg's info.
this replaces flags, which is in desc
 2- struct device *dev, used by trace_devdbg(), if !!dev

The trace_*() calls need the descriptor arg, the prototypes of the
callchain above them are extended to provide it.

dev_dbg(desc, dev...), if dev is true, issues a trace_devdbg(),
otherwise trace_prdbg().  This way we don't consume buffer space
storing nulls.  Otherwise the events are equivalent.

Signed-off-by: Jim Cromie 
---
 include/trace/events/dyndbg.h | 74 +++
 lib/dynamic_debug.c   | 73 +-
 2 files changed, 111 insertions(+), 36 deletions(-)
 create mode 100644 include/trace/events/dyndbg.h

diff --git a/include/trace/events/dyndbg.h b/include/trace/events/dyndbg.h
new file mode 100644
index ..e19fcb56566c
--- /dev/null
+++ b/include/trace/events/dyndbg.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM dyndbg
+
+#if !defined(_TRACE_DYNDBG_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_DYNDBG_H
+
+#include 
+
+/* capture pr_debug() callsite descriptor and message */
+TRACE_EVENT(prdbg,
+   TP_PROTO(const struct _ddebug *desc, const char *text, size_t len),
+
+   TP_ARGS(desc, text, len),
+
+   TP_STRUCT__entry(
+   __field(const struct _ddebug *, desc)
+   __dynamic_array(char, msg, len + 1)
+   ),
+
+   TP_fast_assign(
+   __entry->desc = desc;
+   /*
+* Each trace entry is printed in a new line.
+* If the msg finishes with '\n', cut it off
+* to avoid blank lines in the trace.
+*/
+   if (len > 0 && (text[len - 1] == '\n'))
+   len -= 1;
+
+   memcpy(__get_str(msg), text, len);
+   __get_str(msg)[len] = 0;
+   ),
+
+   TP_printk("%s.%s %s", __entry->desc->modname,
+ __entry->desc->function, __get_str(msg))
+);
+
+/* capture dev_dbg() callsite descriptor, device, and message */
+TRACE_EVENT(devdbg,
+   TP_PROTO(const struct _ddebug *desc, const struct device *dev,
+const char *text, size_t len),
+
+   TP_ARGS(desc, dev, text, len),
+
+   TP_STRUCT__entry(
+   __field(const struct _ddebug *, desc)
+   __field(const struct device *, dev)
+   __dynamic_array(char, msg, len + 1)
+   ),
+
+   TP_fast_assign(
+   __entry->desc = desc;
+   __entry->dev = (struct device *) dev;
+   /*
+* Each trace entry is printed in a new line.
+* If the msg finishes with '\n', cut it off
+* to avoid blank lines in the trace.
+*/
+   if (len > 0 && (text[len - 1] == '\n'))
+   len -= 1;
+
+   memcpy(__get_str(msg), text, len);
+   __get_str(msg)[len] = 0;
+   ),
+
+   TP_printk("%s.%s %s", __entry->desc->modname,
+ __entry->desc->function, __get_str(msg))
+);
+
+#endif /* _TRACE_DYNDBG_H */
+
+/* This part must be outside protection */
+#include 
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 66f12b9127c7..e000d037cb2e 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -36,7 +36,9 @@
 #include 
 #include 
 #include 
-#include 
+
+#define CREATE_TRACE_POINTS
+#include 
 
 #include 
 
@@ -855,7 +857,9 @@ struct ddebug_trace_bufs {
 static DEFINE_PER_CPU(struct ddebug_trace_bufs, ddebug_trace_bufs);
 static DEFINE_PER_CPU(int, ddebug_trace_reserve);
 
-static void ddebug_trace(const char *fmt, va_list args)
+__printf(3, 0)
+static void ddebug_trace(struct _ddebug *desc, const struct device *dev,
+const char *fmt, va_list args)
 {
struct ddebug_trace_buf *buf;
int bufidx;
@@ -874,7 +878,11 @@ static void ddebug_trace(const char *fmt, va_list args)
buf = this_cpu_ptr(ddebug_trace_bufs.bufs) + bufidx;
 
len = vscnprintf(buf->buf, sizeof(buf->buf), fmt, args);
-   trace_console(buf->buf, len);
+
+   if (!d

[PATCH v4 29/41] nouveau: change nvkm_debug/trace to use dev_dbg POC

2022-07-20 Thread Jim Cromie
These 2 macros formerly used dev_info, and they still check
subdev->debug to gate the printing.  So dyndbg control is redundant
ATM (and possibly confusing, since its off by default).

prdbg count is up from 3, or from 65 (with VMM_DEBUG here)

[7.765379] dyndbg: 516 debug prints in module nouveau

Its possible to control error, warn, info callsites too, but they're
usually on, and the .data overheads on ~450 more callsites (56 bytes
each) would just be wasted.

$ for l in fatal error warn info debug trace spam; do
  echo $l; ack nvkm_$l drivers/gpu |wc; done
fatal
  3  19 335
error
2891956   30651
warn
 84 5138860
info
 14  881502
debug
3872339   40844
trace
 31 2193368
spam
  1   7 123

bash-5.1# echo $(( 516-65-387-31-1 ))
32

Thats approximate; not accounting #defines and doc/comment mentions.

NOTE: this patch changes the log-level of the macro-issued messages
from KERN_INFO to KERN_DEBUG.  Adding a .kern_lvl field to struct
_ddebug could fix that.

RFC: dyndbg & subdev->debug

Separate class-maps for each subdev are possible; except for the
coordinated use of _base, each is independent, including choice of
DISJOINT or LEVELS, as long as class-names don't conflict.
So theres some flexibility.

Signed-off-by: Jim Cromie 
---
 drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h 
b/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
index 96113c8bee8c..065d07ccea87 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
@@ -59,8 +59,8 @@ void nvkm_subdev_intr(struct nvkm_subdev *);
 #define nvkm_error(s,f,a...) nvkm_printk((s), ERROR,err, f, ##a)
 #define nvkm_warn(s,f,a...)  nvkm_printk((s),  WARN, notice, f, ##a)
 #define nvkm_info(s,f,a...)  nvkm_printk((s),  INFO,   info, f, ##a)
-#define nvkm_debug(s,f,a...) nvkm_printk((s), DEBUG,   info, f, ##a)
-#define nvkm_trace(s,f,a...) nvkm_printk((s), TRACE,   info, f, ##a)
+#define nvkm_debug(s,f,a...) nvkm_printk((s), DEBUG,dbg, f, ##a)
+#define nvkm_trace(s,f,a...) nvkm_printk((s), TRACE,dbg, f, ##a)
 #define nvkm_spam(s,f,a...)  nvkm_printk((s),  SPAM,dbg, f, ##a)
 
 #define nvkm_error_ratelimited(s,f,a...) nvkm_printk((s), ERROR, 
err_ratelimited, f, ##a)
-- 
2.36.1



[PATCH v4 22/41] drm_print: interpose drm_*dbg with forwarding macros

2022-07-20 Thread Jim Cromie
change drm_dev_dbg & drm_dbg to macros, which forward to the renamed
functions (with __ prefix added).

Those functions sit below the categorized layer of macros implementing
the DRM debug.category API, and implement most of it.  These are good
places to insert dynamic-debug jump-label mechanics, which will allow
DRM to avoid the runtime cost of drm_debug_enabled().

no functional changes.

memory cost baseline: (unchanged)
bash-5.1# drms_load
[9.220389] dyndbg:   1 debug prints in module drm
[9.224426] ACPI: bus type drm_connector registered
[9.302192] dyndbg:   2 debug prints in module ttm
[9.305033] dyndbg:   8 debug prints in module video
[9.627563] dyndbg: 127 debug prints in module i915
[9.721505] AMD-Vi: AMD IOMMUv2 functionality not available on this system - 
This is not a bug.
[   10.091345] dyndbg: 2196 debug prints in module amdgpu
[   10.106589] [drm] amdgpu kernel modesetting enabled.
[   10.107270] amdgpu: CRAT table not found
[   10.107926] amdgpu: Virtual CRAT table created for CPU
[   10.108398] amdgpu: Topology: Add CPU node
[   10.168507] dyndbg:   3 debug prints in module wmi
[   10.329587] dyndbg:   3 debug prints in module nouveau

Signed-off-by: Jim Cromie 
---
 drivers/gpu/drm/drm_print.c | 10 +-
 include/drm/drm_print.h |  9 +++--
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c
index 44be95fac164..21b416af0be2 100644
--- a/drivers/gpu/drm/drm_print.c
+++ b/drivers/gpu/drm/drm_print.c
@@ -279,8 +279,8 @@ void drm_dev_printk(const struct device *dev, const char 
*level,
 }
 EXPORT_SYMBOL(drm_dev_printk);
 
-void drm_dev_dbg(const struct device *dev, enum drm_debug_category category,
-const char *format, ...)
+void __drm_dev_dbg(const struct device *dev, enum drm_debug_category category,
+  const char *format, ...)
 {
struct va_format vaf;
va_list args;
@@ -301,9 +301,9 @@ void drm_dev_dbg(const struct device *dev, enum 
drm_debug_category category,
 
va_end(args);
 }
-EXPORT_SYMBOL(drm_dev_dbg);
+EXPORT_SYMBOL(__drm_dev_dbg);
 
-void __drm_dbg(enum drm_debug_category category, const char *format, ...)
+void ___drm_dbg(enum drm_debug_category category, const char *format, ...)
 {
struct va_format vaf;
va_list args;
@@ -320,7 +320,7 @@ void __drm_dbg(enum drm_debug_category category, const char 
*format, ...)
 
va_end(args);
 }
-EXPORT_SYMBOL(__drm_dbg);
+EXPORT_SYMBOL(___drm_dbg);
 
 void __drm_err(const char *format, ...)
 {
diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
index 668273e36c2c..c429c258c957 100644
--- a/include/drm/drm_print.h
+++ b/include/drm/drm_print.h
@@ -335,7 +335,7 @@ __printf(3, 4)
 void drm_dev_printk(const struct device *dev, const char *level,
const char *format, ...);
 __printf(3, 4)
-void drm_dev_dbg(const struct device *dev, enum drm_debug_category category,
+void __drm_dev_dbg(const struct device *dev, enum drm_debug_category category,
 const char *format, ...);
 
 /**
@@ -384,6 +384,9 @@ void drm_dev_dbg(const struct device *dev, enum 
drm_debug_category category,
}   \
 })
 
+#define drm_dev_dbg(dev, cat, fmt, ...)\
+   __drm_dev_dbg(dev, cat, fmt, ##__VA_ARGS__)
+
 /**
  * DRM_DEV_DEBUG() - Debug output for generic drm code
  *
@@ -485,10 +488,12 @@ void drm_dev_dbg(const struct device *dev, enum 
drm_debug_category category,
  */
 
 __printf(2, 3)
-void __drm_dbg(enum drm_debug_category category, const char *format, ...);
+void ___drm_dbg(enum drm_debug_category category, const char *format, ...);
 __printf(1, 2)
 void __drm_err(const char *format, ...);
 
+#define __drm_dbg(fmt, ...)___drm_dbg(fmt, ##__VA_ARGS__)
+
 /* Macros to make printk easier */
 
 #define _DRM_PRINTK(once, level, fmt, ...) \
-- 
2.36.1



[PATCH v4 15/41] dyndbg: validate class FOO by checking with module

2022-07-20 Thread Jim Cromie
Add module-to-class validation:

  #> echo class DRM_UT_KMS +p > /proc/dynamic_debug/control

If a query has "class FOO", then ddebug_find_valid_class(), called
from ddebug_change(), requires that FOO is known to module X,
otherwize the query is skipped entirely for X.  This protects each
module's class-space, other than the default:31.

The authors' choice of FOO is highly selective, giving isolation
and/or coordinated sharing of FOOs.  For example, only DRM modules
should know and respond to DRM_UT_KMS.

So this, combined with module's opt-in declaration of known classes,
effectively privatizes the .class_id space for each module (or
coordinated set of modules).

Notes:

For all "class FOO" queries, ddebug_find_valid_class() is called, it
returns the map matching the query, and sets valid_class via an
*outvar).

If no "class FOO" is supplied, valid_class = _CLASS_DFLT.  This
insures that legacy queries do not trample on new class'd callsites,
as they get added.

Also add a new column to control-file output, displaying non-default
class-name (when found) or the "unknown _id:", if it has not been
(correctly) declared with one of the declarator macros.

Signed-off-by: Jim Cromie 
---
 lib/dynamic_debug.c | 76 -
 1 file changed, 68 insertions(+), 8 deletions(-)

diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index e29730686cfb..4c27bbe5187e 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -56,6 +56,7 @@ struct ddebug_query {
const char *module;
const char *function;
const char *format;
+   const char *class_string;
unsigned int first_lineno, last_lineno;
 };
 
@@ -136,15 +137,33 @@ static void vpr_info_dq(const struct ddebug_query *query, 
const char *msg)
fmtlen--;
}
 
-   v3pr_info("%s: func=\"%s\" file=\"%s\" module=\"%s\" format=\"%.*s\" 
lineno=%u-%u\n",
-msg,
-query->function ?: "",
-query->filename ?: "",
-query->module ?: "",
-fmtlen, query->format ?: "",
-query->first_lineno, query->last_lineno);
+   v3pr_info("%s: func=\"%s\" file=\"%s\" module=\"%s\" format=\"%.*s\" 
lineno=%u-%u class=%s\n",
+ msg,
+ query->function ?: "",
+ query->filename ?: "",
+ query->module ?: "",
+ fmtlen, query->format ?: "",
+ query->first_lineno, query->last_lineno, query->class_string);
 }
 
+static struct ddebug_class_map *ddebug_find_valid_class(struct ddebug_table 
const *dt,
+ const char 
*class_string, int *class_id)
+{
+   struct ddebug_class_map *map;
+   int idx;
+
+   list_for_each_entry(map, &dt->maps, link) {
+   idx = match_string(map->class_names, map->length, class_string);
+   if (idx >= 0) {
+   *class_id = idx + map->base;
+   return map;
+   }
+   }
+   *class_id = -ENOENT;
+   return NULL;
+}
+
+#define __outvar /* filled by callee */
 /*
  * Search the tables for _ddebug's which match the given `query' and
  * apply the `flags' and `mask' to them.  Returns number of matching
@@ -159,6 +178,8 @@ static int ddebug_change(const struct ddebug_query *query,
unsigned int newflags;
unsigned int nfound = 0;
struct flagsbuf fbuf, nbuf;
+   struct ddebug_class_map *map = NULL;
+   int __outvar valid_class;
 
/* search for matching ddebugs */
mutex_lock(&ddebug_lock);
@@ -169,9 +190,22 @@ static int ddebug_change(const struct ddebug_query *query,
!match_wildcard(query->module, dt->mod_name))
continue;
 
+   if (query->class_string) {
+   map = ddebug_find_valid_class(dt, query->class_string, 
&valid_class);
+   if (!map)
+   continue;
+   } else {
+   /* constrain query, do not touch class'd callsites */
+   valid_class = _DPRINTK_CLASS_DFLT;
+   }
+
for (i = 0; i < dt->num_ddebugs; i++) {
struct _ddebug *dp = &dt->ddebugs[i];
 
+   /* match site against query-class */
+   if (dp->class_id != valid_class)
+   continue;
+
/* match against the source filename */
if (query->filename &&
!match_wildcard(query->filename, dp->filename) &&
@@ -420,6 +454,8 @@ static int ddebug_parse_query(char *words[], int nwords,
} else if (!strcmp(keyword, "line")) {
if (parse_linerange(query, arg))
return -EINVAL;
+   } else if (!strcmp(keyword, "class")) {

[PATCH v4 19/41] doc-dyndbg: edit dynamic-debug-howto for brevity, audience

2022-07-20 Thread Jim Cromie
Rework/modernize docs:

 - use /proc/dynamic_debug/control in examples
   its *always* there (when dyndbg is config'd), even when  is not.
   drop  talk, its a distraction here.

 - alias ddcmd='echo $* > /proc/dynamic_debug/control
   declutter, hide boilerplate, focus on args

 - move Viewing before Controlling. read before write.
   control file as Catalog.

 - focus on use by a system administrator
   add an alias to make examples more readable
   drop grep-101 lessons, admins know this.

 - use init/main.c as 1st example, thread it thru doc where useful.
   everybodys kernel boots, runs these.

 - add *prdbg* api section
   to the bottom of the file, its for developers more than admins.
   move list of api functions there.

 - simplify - drop extra words, phrases, sentences.

 - add "decorator" flags line to unify "prefix", trim fmlt descriptions

CC: linux-...@vger.kernel.org
Signed-off-by: Jim Cromie 

---
fixup-doc: trailing colons for block headers, trim fedora numbers. Bagas
---
 .../admin-guide/dynamic-debug-howto.rst   | 235 +-
 1 file changed, 117 insertions(+), 118 deletions(-)

diff --git a/Documentation/admin-guide/dynamic-debug-howto.rst 
b/Documentation/admin-guide/dynamic-debug-howto.rst
index d8954ab05c7b..faa22f77847a 100644
--- a/Documentation/admin-guide/dynamic-debug-howto.rst
+++ b/Documentation/admin-guide/dynamic-debug-howto.rst
@@ -5,30 +5,19 @@ Dynamic debug
 Introduction
 
 
-This document describes how to use the dynamic debug (dyndbg) feature.
+Dynamic debug allows you to dynamically enable/disable kernel
+debug-print code to obtain additional kernel information.
 
-Dynamic debug is designed to allow you to dynamically enable/disable
-kernel code to obtain additional kernel information.  Currently, if
-``CONFIG_DYNAMIC_DEBUG`` is set, then all ``pr_debug()``/``dev_dbg()`` and
-``print_hex_dump_debug()``/``print_hex_dump_bytes()`` calls can be dynamically
-enabled per-callsite.
+If ``/proc/dynamic_debug/control`` exists, your kernel has dynamic
+debug.  You'll need root access (sudo su) to use this.
 
-If you do not want to enable dynamic debug globally (i.e. in some embedded
-system), you may set ``CONFIG_DYNAMIC_DEBUG_CORE`` as basic support of dynamic
-debug and add ``ccflags := -DDYNAMIC_DEBUG_MODULE`` into the Makefile of any
-modules which you'd like to dynamically debug later.
+Dynamic debug provides:
 
-If ``CONFIG_DYNAMIC_DEBUG`` is not set, ``print_hex_dump_debug()`` is just
-shortcut for ``print_hex_dump(KERN_DEBUG)``.
+ * a Catalog of all *prdbgs* in your kernel.
+   ``cat /proc/dynamic_debug/control`` to see them.
 
-For ``print_hex_dump_debug()``/``print_hex_dump_bytes()``, format string is
-its ``prefix_str`` argument, if it is constant string; or ``hexdump``
-in case ``prefix_str`` is built dynamically.
-
-Dynamic debug has even more useful features:
-
- * Simple query language allows turning on and off debugging
-   statements by matching any combination of 0 or 1 of:
+ * a Simple query/command language to alter *prdbgs* by selecting on
+   any combination of 0 or 1 of:
 
- source filename
- function name
@@ -37,107 +26,88 @@ Dynamic debug has even more useful features:
- format string
- class name (as known/declared by each module)
 
- * Provides a debugfs control file: ``/dynamic_debug/control``
-   which can be read to display the complete list of known debug
-   statements, to help guide you
-
-Controlling dynamic debug Behaviour
-===
-
-The behaviour of ``pr_debug()``/``dev_dbg()`` are controlled via writing to a
-control file in the 'debugfs' filesystem. Thus, you must first mount
-the debugfs filesystem, in order to make use of this feature.
-Subsequently, we refer to the control file as:
-``/dynamic_debug/control``. For example, if you want to enable
-printing from source file ``svcsock.c``, line 1603 you simply do::
-
-  nullarbor:~ # echo 'file svcsock.c line 1603 +p' >
-   /dynamic_debug/control
-
-If you make a mistake with the syntax, the write will fail thus::
-
-  nullarbor:~ # echo 'file svcsock.c wtf 1 +p' >
-   /dynamic_debug/control
-  -bash: echo: write error: Invalid argument
-
-Note, for systems without 'debugfs' enabled, the control file can be
-found in ``/proc/dynamic_debug/control``.
-
 Viewing Dynamic Debug Behaviour
 ===
 
-You can view the currently configured behaviour of all the debug
-statements via::
+You can view the currently configured behaviour in the *prdbg* catalog::
 
-  nullarbor:~ # cat /dynamic_debug/control
+  :#> head -n7 /proc/dynamic_debug/control
   # filename:lineno [module]function flags format
-  net/sunrpc/svc_rdma.c:323 [svcxprt_rdma]svc_rdma_cleanup =_ "SVCRDMA Module 
Removed, deregister RPC RDMA transport\012"
-  net/sunrpc/svc_rdma.c:341 [svcxprt_rdma]svc_rdma_init =_ "\011max_inline 
  : %d\012"
-  net/sunrpc/svc_rdma.c:340 [svcxprt_rdma

[PATCH v4 12/41] dyndbg: add DECLARE_DYNDBG_CLASSMAP

2022-07-20 Thread Jim Cromie
DECLARE_DYNDBG_CLASSMAP lets modules declare a set of classnames, this
opt-in authorizes dyndbg to allow enabling of prdbgs by their class:

   :#> echo class DRM_UT_KMS +p > /proc/dynamic_debug/control

This is just the setup; following commits deliver.

The macro declares and initializes a static struct ddebug_class_map:

 - maps approved class-names to class_ids used in module,
   by array order. forex: DRM_UT_*
 - class-name vals allow validation of "class FOO" queries
   using macro is opt-in
 - enum class_map_type - determines interface, behavior

Each module has its own .class_id space, and only known class-names
will be authorized for a manipulation.  Only DRM stuff should know this:

  :#> echo class DRM_UT_CORE +p > control   # across all modules

pr_debugs (with default class_id) are still controllable as before.

DECLARE_DYNDBG_CLASSMAP(_var, _maptype, _base, classes...) is::

 _var: name of the static struct var. user passes to module_param_cb()
   if they want a sysfs node. (ive only done it this way).

 _maptype: this is hard-coded to DD_CLASS_TYPE_DISJOINT for now.

 _base: usually 0, it allows splitting 31 classes into subranges, so
that multiple classes / sysfs-nodes can share the module's
class-id space.

 classes: list of class_name strings, these are mapped to class-ids
  starting at _base.  This class-names list must have a
  corresponding ENUM, with SYMBOLS that match the literals,
  and 1st enum val = _base.

enum class_map_type has 4 values, on 2 factors::

 - classes are disjoint/independent vs relative/xcontrol interface
doesn't enforce the LEVELS relationship, so you could confusingly have
V3 enabled, but V1 disabled.  OTOH, the control iface already allows
infinite variety in the underlying callsites, despite the veneer of
uniformity suggested by the bitmap overlay, and LEVELS over that.

2. All dyndbg >control reduces to a query/command, includes +/-, which
is at-root a kernel patching operation with +/- semantics.  And the
symbolic sys-node handling exposes it to the user:

Consider whether these are/should-be 'exactly' the same:

   # force both 2 "half-duplex" relations
   echo +V3,-V4 > /sys/module/test_dynamic_debug/p_VX

   # should these both do the same ?
   echo +V3 > /sys/module/test_dynamic_debug/p_VX
   echo -V4 > /sys/module/test_dynamic_debug/p_VX

IOW, half relations are suggested by the +/-, and enum control of
individual behaviors leaves some room for this, especially wrt
handling [+-]SYMBOLIC inputs from the user.

Signed-off-by: Jim Cromie 
---
 include/linux/dynamic_debug.h | 55 +++
 1 file changed, 55 insertions(+)

diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index 0f7ad6cecf05..84e97cd0e8c4 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -56,7 +56,62 @@ struct _ddebug {
 #endif
 } __attribute__((aligned(8)));
 
+enum class_map_type {
+   DD_CLASS_TYPE_DISJOINT,
+   /**
+* DD_CLASS_TYPE_DISJOINT: classes are independent, one per bit.
+* expecting hex input. basis for others.
+*/
+   DD_CLASS_TYPE_VERBOSE,
+   /**
+* DD_CLASS_TYPE_VERBOSE: input is numeric level, 0-N.
+* 0 should be silent, use printk to break that.
+* (x

[PATCH v4 20/41] drm_print: condense enum drm_debug_category

2022-07-20 Thread Jim Cromie
enum drm_debug_category has 10 categories, but is initialized with
bitmasks which require 10 bits of underlying storage.  By using
natural enumeration, and moving the BIT(cat) into drm_debug_enabled(),
the enum fits in 4 bits, allowing the category to be represented
directly in pr_debug callsites, via the ddebug.class_id field.

While this slightly pessimizes the bit-test in drm_debug_enabled(),
using dyndbg with JUMP_LABEL will avoid the function entirely.

NOTE: this change forecloses the possibility of doing:

  drm_dbg(DRM_UT_CORE|DRM_UT_KMS, "weird 2-cat experiment")

but thats already strongly implied by the use of the enum itself; its
not a normal enum if it can be 2 values simultaneously.

Signed-off-by: Jim Cromie 
---
 include/drm/drm_print.h | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
index 22fabdeed297..b3b470440e46 100644
--- a/include/drm/drm_print.h
+++ b/include/drm/drm_print.h
@@ -279,49 +279,49 @@ enum drm_debug_category {
 * @DRM_UT_CORE: Used in the generic drm code: drm_ioctl.c, drm_mm.c,
 * drm_memory.c, ...
 */
-   DRM_UT_CORE = 0x01,
+   DRM_UT_CORE,
/**
 * @DRM_UT_DRIVER: Used in the vendor specific part of the driver: i915,
 * radeon, ... macro.
 */
-   DRM_UT_DRIVER   = 0x02,
+   DRM_UT_DRIVER,
/**
 * @DRM_UT_KMS: Used in the modesetting code.
 */
-   DRM_UT_KMS  = 0x04,
+   DRM_UT_KMS,
/**
 * @DRM_UT_PRIME: Used in the prime code.
 */
-   DRM_UT_PRIME= 0x08,
+   DRM_UT_PRIME,
/**
 * @DRM_UT_ATOMIC: Used in the atomic code.
 */
-   DRM_UT_ATOMIC   = 0x10,
+   DRM_UT_ATOMIC,
/**
 * @DRM_UT_VBL: Used for verbose debug message in the vblank code.
 */
-   DRM_UT_VBL  = 0x20,
+   DRM_UT_VBL,
/**
 * @DRM_UT_STATE: Used for verbose atomic state debugging.
 */
-   DRM_UT_STATE= 0x40,
+   DRM_UT_STATE,
/**
 * @DRM_UT_LEASE: Used in the lease code.
 */
-   DRM_UT_LEASE= 0x80,
+   DRM_UT_LEASE,
/**
 * @DRM_UT_DP: Used in the DP code.
 */
-   DRM_UT_DP   = 0x100,
+   DRM_UT_DP,
/**
 * @DRM_UT_DRMRES: Used in the drm managed resources code.
 */
-   DRM_UT_DRMRES   = 0x200,
+   DRM_UT_DRMRES
 };
 
 static inline bool drm_debug_enabled(enum drm_debug_category category)
 {
-   return unlikely(__drm_debug & category);
+   return unlikely(__drm_debug & BIT(category));
 }
 
 /*
-- 
2.36.1



[PATCH v4 09/41] dyndbg: drop EXPORTed dynamic_debug_exec_queries

2022-07-20 Thread Jim Cromie
This exported fn is unused, and will not be needed. Lets dump it.

The export was added to let drm control pr_debugs, as part of using
them to avoid drm_debug_enabled overheads.  But its better to just
implement the drm.debug bitmap interface, then its available for
everyone.

Fixes: a2d375eda771 ("dyndbg: refine export, rename to 
dynamic_debug_exec_queries()")
Fixes: 4c0d77828d4f ("dyndbg: export ddebug_exec_queries")
Signed-off-by: Jim Cromie 
---
 include/linux/dynamic_debug.h |  9 -
 lib/dynamic_debug.c   | 29 -
 2 files changed, 38 deletions(-)

diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index f30b01aa9fa4..8d9eec5f6d8b 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -55,9 +55,6 @@ struct _ddebug {
 
 #if defined(CONFIG_DYNAMIC_DEBUG_CORE)
 
-/* exported for module authors to exercise >control */
-int dynamic_debug_exec_queries(const char *query, const char *modname);
-
 int ddebug_add_module(struct _ddebug *tab, unsigned int n,
const char *modname);
 extern int ddebug_remove_module(const char *mod_name);
@@ -221,12 +218,6 @@ static inline int ddebug_dyndbg_module_param_cb(char 
*param, char *val,
rowsize, groupsize, buf, len, ascii);   \
} while (0)
 
-static inline int dynamic_debug_exec_queries(const char *query, const char 
*modname)
-{
-   pr_warn("kernel not built with CONFIG_DYNAMIC_DEBUG_CORE\n");
-   return 0;
-}
-
 #endif /* !CONFIG_DYNAMIC_DEBUG_CORE */
 
 #endif
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 5a849716220a..e96dc216463b 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -558,35 +558,6 @@ static int ddebug_exec_queries(char *query, const char 
*modname)
return nfound;
 }
 
-/**
- * dynamic_debug_exec_queries - select and change dynamic-debug prints
- * @query: query-string described in admin-guide/dynamic-debug-howto
- * @modname: string containing module name, usually &module.mod_name
- *
- * This uses the >/proc/dynamic_debug/control reader, allowing module
- * authors to modify their dynamic-debug callsites. The modname is
- * canonically struct module.mod_name, but can also be null or a
- * module-wildcard, for example: "drm*".
- */
-int dynamic_debug_exec_queries(const char *query, const char *modname)
-{
-   int rc;
-   char *qry; /* writable copy of query */
-
-   if (!query) {
-   pr_err("non-null query/command string expected\n");
-   return -EINVAL;
-   }
-   qry = kstrndup(query, PAGE_SIZE, GFP_KERNEL);
-   if (!qry)
-   return -ENOMEM;
-
-   rc = ddebug_exec_queries(qry, modname);
-   kfree(qry);
-   return rc;
-}
-EXPORT_SYMBOL_GPL(dynamic_debug_exec_queries);
-
 #define PREFIX_SIZE 64
 
 static int remaining(int wrote)
-- 
2.36.1



[PATCH v4 16/41] dyndbg: add drm.debug style bitmap support

2022-07-20 Thread Jim Cromie
Add kernel_param_ops and callbacks to apply a class-map to a
sysfs-node, which then can control classes defined in that class-map.
This supports uses like:

  echo 0x3 > /sys/module/drm/parameters/debug

IE add these:

 - int param_set_dyndbg_classes()
 - int param_get_dyndbg_classes()
 - struct kernel_param_ops param_ops_dyndbg_classes

Following the model of kernel/params.c STANDARD_PARAM_DEFS, these are
non-static and exported.  This might be unnecessary here.

get/set use an augmented kernel_param; the arg refs a new struct
ddebug_classes_bitmap_param, initialized by DYNAMIC_DEBUG_CLASSBITS
macro, which contains:

BITS: a pointer to the user module's ulong holding the bits/state.  By
ref'g the client's bit-state _var, we coordinate with existing code
(such as drm_debug_enabled) which uses the _var, so it works
unchanged, even as the foundation is switched out underneath it..
Using a ulong allows use of BIT() etc.

FLAGS: dyndbg.flags toggled by changes to bitmap. Usually just "p".

MAP: a pointer to struct ddebug_classes_map, which maps those
class-names to .class_ids 0..N that the module is using.  This
class-map is declared & initialized by DEFINE_DYNDBG_CLASSMAP.

map-type: add support here for DD_CLASS_DISJOINT, DD_CLASS_VERBOSE.

These 2 class-types both expect an integer; _DISJOINT treats input
like a bit-vector (ala drm.debug), and sets each bit accordingly.

_VERBOSE treats input like a bit-pos:N, then sets bits(0..N)=1, and
bits(N+1..max)=0.  This applies (bit bitmap transform that set-param does
on VERBOSE inputs, this gives the read-what-was-written property.

_VERBOSE is overlay on _DISJOINT:

verbose-maps still need class-names, even though theyre not usable at
the sysfs interface (unlike with _SYMBOLIC/_LEVELS).

 - It must have a "V0" name,
   something below "V1" to turn "V1" off.
   __pr_debug_cls(V0,..) is printk, don't do that.

 - "class names" is required at the >control interface.
 - relative levels are not enforced at >control

IOW this is possible, and maybe confusing:

  echo class V3 +p > control
  echo class V1 -p > control

IMO thats ok, relative verbosity is an interface property.

Signed-off-by: Jim Cromie 
---
. drop kp->mod->name as unneeded (build-dependent) 
---
 include/linux/dynamic_debug.h |  18 
 lib/dynamic_debug.c   | 193 ++
 2 files changed, 211 insertions(+)

diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index f57076e02767..b50bdd5c8184 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -113,6 +113,12 @@ struct ddebug_class_map {
 #define NUM_TYPE_ARGS(eltype, ...) \
(sizeof((eltype[]) {__VA_ARGS__}) / sizeof(eltype))
 
+struct ddebug_classes_bitmap_param {
+   unsigned long *bits;
+   char flags[8];
+   const struct ddebug_class_map *map;
+};
+
 #if defined(CONFIG_DYNAMIC_DEBUG_CORE)
 
 int ddebug_add_module(struct _ddebug *tab, unsigned int num_debugs,
@@ -274,6 +280,10 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor,
   KERN_DEBUG, prefix_str, prefix_type, \
   rowsize, groupsize, buf, len, ascii)
 
+struct kernel_param;
+int param_set_dyndbg_classes(const char *instr, const struct kernel_param *kp);
+int param_get_dyndbg_classes(char *buffer, const struct kernel_param *kp);
+
 /* for test only, generally expect drm.debug style macro wrappers */
 #define __pr_debug_cls(cls, fmt, ...) do { \
BUILD_BUG_ON_MSG(!__builtin_constant_p(cls),\
@@ -322,6 +332,14 @@ static inline int ddebug_dyndbg_module_param_cb(char 
*param, char *val,
rowsize, groupsize, buf, len, ascii);   \
} while (0)
 
+struct kernel_param;
+static inline int param_set_dyndbg_classes(const char *instr, const struct 
kernel_param *kp)
+{ return 0; }
+static inline int param_get_dyndbg_classes(char *buffer, const struct 
kernel_param *kp)
+{ return 0; }
+
 #endif /* !CONFIG_DYNAMIC_DEBUG_CORE */
 
+extern const struct kernel_param_ops param_ops_dyndbg_classes;
+
 #endif
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 4c27bbe5187e..dd27dc514aa3 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -596,6 +596,199 @@ static int ddebug_exec_queries(char *query, const char 
*modname)
return nfound;
 }
 
+static int ddebug_apply_class_bitmap(const struct ddebug_classes_bitmap_param 
*dcp,
+unsigned long inbits)
+{
+#define QUERY_SIZE 128
+   char query[QUERY_SIZE];
+   const struct ddebug_class_map *map = dcp->map;
+   int matches = 0;
+   int bi, ct;
+
+   v2pr_info("in: 0x%lx on: 0x%lx\n", inbits, *dcp->bits);
+
+   for (bi = 0; bi < map->length; bi++) {
+   if (test_bit(bi, &inbits) == test_bit(bi, dcp->bits))
+   continue;
+
+   snprintf(query, QUERY_SIZE, "class %s %c%

[PATCH v4 13/41] kernel/module: add __dyndbg_classes section

2022-07-20 Thread Jim Cromie
Like existing sections, particularly __dyndbg, this new one is an
array/address and its length.  In a close imitation of __dyndbg
handling, these are defined, then passed around, as follows:

vmlinux.lds.h:

KEEP the new section, which also silences orphan section warning on
loadable modules.  Add (__start_/__stop_)__dyndbg_classes linker
symbols for the c externs (below).

kernel/module/internal.h:
- add new fields for classes,length to struct load_info,

kernel/module.c:
- fill new fields in find_module_sections(), using section_objs()
- extend callchain prototypes
  to pass classes, length
  load_module(): pass new info to dynamic_debug_setup()
  dynamic_debug_setup(): new params, pass through to ddebug_add_module()

dynamic_debug.c:
- add externs to the linker symbols.

ddebug_add_module(): add params for classes, length.
- It currently builds a debug_table, and *will* find and attach classes.

dynamic_debug_init(): compute num_classes from linker symbols, and add
new _start, num_classes params to ddebug_add_module() calls.

Signed-off-by: Jim Cromie 
---
 include/asm-generic/vmlinux.lds.h |  3 +++
 include/linux/dynamic_debug.h |  9 ++---
 kernel/module/internal.h  |  2 ++
 kernel/module/main.c  | 10 +++---
 lib/dynamic_debug.c   | 22 --
 5 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/include/asm-generic/vmlinux.lds.h 
b/include/asm-generic/vmlinux.lds.h
index 7515a465ec03..9b8bd5504ad9 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -345,6 +345,9 @@
*(__tracepoints)\
/* implement dynamic printk debug */\
. = ALIGN(8);   \
+   __start___dyndbg_classes = .;   \
+   KEEP(*(__dyndbg_classes))   \
+   __stop___dyndbg_classes = .;\
__start___dyndbg = .;   \
KEEP(*(__dyndbg))   \
__stop___dyndbg = .;\
diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index 84e97cd0e8c4..f57076e02767 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -115,8 +115,10 @@ struct ddebug_class_map {
 
 #if defined(CONFIG_DYNAMIC_DEBUG_CORE)
 
-int ddebug_add_module(struct _ddebug *tab, unsigned int n,
-   const char *modname);
+int ddebug_add_module(struct _ddebug *tab, unsigned int num_debugs,
+ struct ddebug_class_map *classes, unsigned int 
num_classes,
+ const char *modname);
+
 extern int ddebug_remove_module(const char *mod_name);
 extern __printf(2, 3)
 void __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...);
@@ -285,7 +287,8 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor,
 #include 
 #include 
 
-static inline int ddebug_add_module(struct _ddebug *tab, unsigned int n,
+static inline int ddebug_add_module(struct _ddebug *tab, unsigned int 
num_debugs,
+   struct ddebug_class_map *classes, unsigned 
int num_classes,
const char *modname)
 {
return 0;
diff --git a/kernel/module/internal.h b/kernel/module/internal.h
index bc5507ab8450..02601dfe452b 100644
--- a/kernel/module/internal.h
+++ b/kernel/module/internal.h
@@ -60,7 +60,9 @@ struct load_info {
char *secstrings, *strtab;
unsigned long symoffs, stroffs, init_typeoffs, core_typeoffs;
struct _ddebug *debug;
+   struct ddebug_class_map *debug_classes;
unsigned int num_debug;
+   unsigned int num_debug_classes;
bool sig_ok;
 #ifdef CONFIG_KALLSYMS
unsigned long mod_kallsyms_init_off;
diff --git a/kernel/module/main.c b/kernel/module/main.c
index fed58d30725d..0f8e888908df 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -1593,11 +1593,12 @@ static void free_modinfo(struct module *mod)
}
 }
 
-static void dynamic_debug_setup(struct module *mod, struct _ddebug *debug, 
unsigned int num)
+static void dynamic_debug_setup(struct module *mod, struct _ddebug *debug, 
unsigned int num,
+   struct ddebug_class_map *classes, unsigned int 
num_classes)
 {
if (!debug)
return;
-   ddebug_add_module(debug, num, mod->name);
+   ddebug_add_module(debug, num, classes, num_classes, mod->name);
 }
 
 static void dynamic_debug_remove(struct module *mod, struct _ddebug *debug)
@@ -2093,6 +2094,8 @@ static int find_module_sections(struct module *mod, 
struct load_info *info)
if (section_addr(info, "__obsparm"))
pr_warn("%s: Ignoring obsolete parameters\n", mod->name);
 
+   info->d

[PATCH v4 10/41] dyndbg: add class_id to pr_debug callsites

2022-07-20 Thread Jim Cromie
DRM issues ~10 exclusive categories of debug messages; to represent
this directly in dyndbg, add a new field: struct _ddebug.class_id:5.

This gives us 32 classes, which is a practical usability limit
with a bitmap interface:

  #> echo 0x012345678 > /sys/module/drm/parameters/debug

All existing callsites are initialized with _DPRINTK_CLASS_DFLT, which
is 2^5-1.  This reserves 0-30 for use in new categorized/class'd
pr_debugs, which fits perfectly with natural enums (ints: 0..N).

Then extend the init macro: DEFINE_DYNAMIC_DEBUG_METADATA() with
_CLS(cls, ...), and redef old name using extended name.

And extend the factory macro callchain with _cls() versions to provide
the callsite.class_id, with old-names passing _DPRINTK_CLASS_DFLT.

This sets us up to create class'd prdebug callsites (class'd callsites
are those with .class_id != _DPRINTK_CLASS_DFLT).

No behavior change.

Signed-off-by: Jim Cromie 
---
 include/linux/dynamic_debug.h | 71 +++
 1 file changed, 55 insertions(+), 16 deletions(-)

diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index 8d9eec5f6d8b..d1429812be2e 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -6,6 +6,8 @@
 #include 
 #endif
 
+#include 
+
 /*
  * An instance of this structure is created in a special
  * ELF section at every dynamic debug callsite.  At runtime,
@@ -21,6 +23,9 @@ struct _ddebug {
const char *filename;
const char *format;
unsigned int lineno:18;
+#define CLS_BITS 5
+   unsigned int class_id:CLS_BITS;
+#define _DPRINTK_CLASS_DFLT((1 << CLS_BITS) - 1)
/*
 * The flags field controls the behaviour at the callsite.
 * The bits here are changed dynamically when the user
@@ -84,7 +89,7 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor,
 const struct ib_device *ibdev,
 const char *fmt, ...);
 
-#define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt)   \
+#define DEFINE_DYNAMIC_DEBUG_METADATA_CLS(name, cls, fmt)  \
static struct _ddebug  __aligned(8) \
__section("__dyndbg") name = {  \
.modname = KBUILD_MODNAME,  \
@@ -93,8 +98,14 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor,
.format = (fmt),\
.lineno = __LINE__, \
.flags = _DPRINTK_FLAGS_DEFAULT,\
+   .class_id = cls,\
_DPRINTK_KEY_INIT   \
-   }
+   };  \
+   BUILD_BUG_ON_MSG(cls > _DPRINTK_CLASS_DFLT, \
+"classid value overflow")
+
+#define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt)   \
+   DEFINE_DYNAMIC_DEBUG_METADATA_CLS(name, _DPRINTK_CLASS_DFLT, fmt)
 
 #ifdef CONFIG_JUMP_LABEL
 
@@ -125,17 +136,34 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor,
 
 #endif /* CONFIG_JUMP_LABEL */
 
-#define __dynamic_func_call(id, fmt, func, ...) do {   \
-   DEFINE_DYNAMIC_DEBUG_METADATA(id, fmt); \
-   if (DYNAMIC_DEBUG_BRANCH(id))   \
-   func(&id, ##__VA_ARGS__);   \
-} while (0)
-
-#define __dynamic_func_call_no_desc(id, fmt, func, ...) do {   \
-   DEFINE_DYNAMIC_DEBUG_METADATA(id, fmt); \
+/*
+ * Factory macros: ($prefix)dynamic_func_call($suffix)
+ *
+ * Lower layer (with __ prefix) gets the callsite metadata, and wraps
+ * the func inside a debug-branch/static-key construct.  Upper layer
+ * (with _ prefix) does the UNIQUE_ID once, so that lower can ref the
+ * name/label multiple times, and tie the elements together.
+ * Multiple flavors:
+ * (|_cls):adds in _DPRINT_CLASS_DFLT as needed
+ * (|_no_desc):former gets callsite descriptor as 1st arg (for prdbgs)
+ */
+#define __dynamic_func_call_cls(id, cls, fmt, func, ...) do {  \
+   DEFINE_DYNAMIC_DEBUG_METADATA_CLS(id, cls, fmt);\
if (DYNAMIC_DEBUG_BRANCH(id))   \
-   func(__VA_ARGS__);  \
+   func(&id, ##__VA_ARGS__);   \
 } while (0)
+#define __dynamic_func_call(id, fmt, func, ...)
\
+   __dynamic_func_call_cls(id, _DPRINTK_CLASS_DFLT, fmt,   \
+   func, ##__VA_ARGS__)
+
+#define __dynamic_func_call_cls_no_desc(id, cls, fmt, func, ...) do {  \
+   DEFINE_DYNAMIC_DEBUG_METADATA_CLS(id, cls, fmt);\
+   if (DYNAMIC_DEBUG_BRANCH(id))   \
+   func(__VA_ARGS__);  \
+} while (0)
+#define __dynamic_func_call_no_desc(id, fmt, func, ...)  

[PATCH v4 11/41] dyndbg: add __pr_debug_cls for testing

2022-07-20 Thread Jim Cromie
For selftest purposes, add __pr_debug_cls(class, fmt, ...)

I didn't think we'd need to define this, since DRM effectively has it
already in drm_dbg, drm_devdbg.  But test_dynamic_debug needs it in
order to demonstrate all the moving parts.

Note the __ prefix; its not intended for general use, at least until a
need emerges.  ISTM the drm.debug model (macro wrappers inserting enum
const 1st arg) is the baseline approach.

NB: it does require a builtin-constant class, no __pr_debug_cls(i"",...)

Signed-off-by: Jim Cromie 
---
 include/linux/dynamic_debug.h | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index d1429812be2e..0f7ad6cecf05 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -217,6 +217,13 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor,
   KERN_DEBUG, prefix_str, prefix_type, \
   rowsize, groupsize, buf, len, ascii)
 
+/* for test only, generally expect drm.debug style macro wrappers */
+#define __pr_debug_cls(cls, fmt, ...) do { \
+   BUILD_BUG_ON_MSG(!__builtin_constant_p(cls),\
+"expecting constant class int/enum");  \
+   dynamic_pr_debug_cls(cls, fmt, ##__VA_ARGS__);  \
+   } while (0)
+
 #else /* !CONFIG_DYNAMIC_DEBUG_CORE */
 
 #include 
-- 
2.36.1



[PATCH v4 05/41] dyndbg: reverse module.callsite walk in cat control

2022-07-20 Thread Jim Cromie
Walk the module's vector of callsites backwards; ie N..0.  This
"corrects" the backwards appearance of a module's prdbg vector when
walked 0..N.  I think this is due to linker mechanics, which I'm
inclined to treat as immutable, and the order is fixable in display.

No functional changes.

Combined with previous commit, which reversed tables-list, we get:

  :#> head -n7 /proc/dynamic_debug/control
  # filename:lineno [module]function flags format
  init/main.c:1179 [main]initcall_blacklist =_ "blacklisting initcall %s\012"
  init/main.c:1218 [main]initcall_blacklisted =_ "initcall %s blacklisted\012"
  init/main.c:1424 [main]run_init_process =_ "  with arguments:\012"
  init/main.c:1426 [main]run_init_process =_ "%s\012"
  init/main.c:1427 [main]run_init_process =_ "  with environment:\012"
  init/main.c:1429 [main]run_init_process =_ "%s\012"

Signed-off-by: Jim Cromie 
---
 lib/dynamic_debug.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 7fb99492c16f..8ff11977b8bd 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -59,7 +59,7 @@ struct ddebug_query {
 
 struct ddebug_iter {
struct ddebug_table *table;
-   unsigned int idx;
+   int idx;
 };
 
 struct flag_settings {
@@ -805,13 +805,12 @@ static struct _ddebug *ddebug_iter_first(struct 
ddebug_iter *iter)
 {
if (list_empty(&ddebug_tables)) {
iter->table = NULL;
-   iter->idx = 0;
return NULL;
}
iter->table = list_entry(ddebug_tables.next,
 struct ddebug_table, link);
-   iter->idx = 0;
-   return &iter->table->ddebugs[iter->idx];
+   iter->idx = iter->table->num_ddebugs;
+   return &iter->table->ddebugs[--iter->idx];
 }
 
 /*
@@ -824,15 +823,16 @@ static struct _ddebug *ddebug_iter_next(struct 
ddebug_iter *iter)
 {
if (iter->table == NULL)
return NULL;
-   if (++iter->idx == iter->table->num_ddebugs) {
+   if (--iter->idx < 0) {
/* iterate to next table */
-   iter->idx = 0;
if (list_is_last(&iter->table->link, &ddebug_tables)) {
iter->table = NULL;
return NULL;
}
iter->table = list_entry(iter->table->link.next,
 struct ddebug_table, link);
+   iter->idx = iter->table->num_ddebugs;
+   --iter->idx;
}
return &iter->table->ddebugs[iter->idx];
 }
-- 
2.36.1



[PATCH v4 04/41] dyndbg: reverse module walk in cat control

2022-07-20 Thread Jim Cromie
/proc/dynamic_debug/control walks the prdbg catalog in "reverse",
fix this by adding new ddebug_tables to tail of list.

This puts init/main.c entries 1st, which looks intentional.

no functional changes.

Signed-off-by: Jim Cromie 
---
 lib/dynamic_debug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 8faf584f2f4b..7fb99492c16f 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -970,7 +970,7 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n,
dt->ddebugs = tab;
 
mutex_lock(&ddebug_lock);
-   list_add(&dt->link, &ddebug_tables);
+   list_add_tail(&dt->link, &ddebug_tables);
mutex_unlock(&ddebug_lock);
 
vpr_info("%3u debug prints in module %s\n", n, dt->mod_name);
-- 
2.36.1



[PATCH v4 08/41] dyndbg: add test_dynamic_debug module

2022-07-20 Thread Jim Cromie
Provide a simple module to allow testing DYNAMIC_DEBUG behavior.  It
calls do_prints() from module-init, and with a sysfs-node.

  dmesg -C
  dmesg -w &
  modprobe test_dynamic_debug dyndbg=+p
  echo 1 > /sys/module/dynamic_debug/parameters/verbose

  cat /sys/module/test_dynamic_debug/parameters/do_prints
  echo module test_dynamic_debug +mftl > /proc/dynamic_debug/control
  echo junk > /sys/module/test_dynamic_debug/parameters/do_prints

Signed-off-by: Jim Cromie 
---
 MAINTAINERS  |  2 ++
 lib/Kconfig.debug| 10 ++
 lib/Makefile |  1 +
 lib/test_dynamic_debug.c | 70 
 4 files changed, 83 insertions(+)
 create mode 100644 lib/test_dynamic_debug.c

diff --git a/MAINTAINERS b/MAINTAINERS
index f679152bdbad..663307268285 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7094,6 +7094,8 @@ M:Jason Baron 
 S: Maintained
 F: include/linux/dynamic_debug.h
 F: lib/dynamic_debug.c
+M: Jim Cromie 
+F: lib/test_dynamic_debug.c
 
 DYNAMIC INTERRUPT MODERATION
 M: Tal Gilboa 
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2e24db4bff19..ca5978e1d18a 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2529,6 +2529,16 @@ config TEST_STATIC_KEYS
 
  If unsure, say N.
 
+config TEST_DYNAMIC_DEBUG
+   tristate "Test DYNAMIC_DEBUG"
+   depends on DYNAMIC_DEBUG
+   help
+ This module registers a tracer callback to count enabled
+ pr_debugs in a 'do_debugging' function, then alters their
+ enablements, calls the function, and compares counts.
+
+ If unsure, say N.
+
 config TEST_KMOD
tristate "kmod stress tester"
depends on m
diff --git a/lib/Makefile b/lib/Makefile
index f99bf61f8bbc..9c316df868de 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -82,6 +82,7 @@ obj-$(CONFIG_TEST_SORT) += test_sort.o
 obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o
 obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o
 obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o
+obj-$(CONFIG_TEST_DYNAMIC_DEBUG) += test_dynamic_debug.o
 obj-$(CONFIG_TEST_PRINTF) += test_printf.o
 obj-$(CONFIG_TEST_SCANF) += test_scanf.o
 obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
diff --git a/lib/test_dynamic_debug.c b/lib/test_dynamic_debug.c
new file mode 100644
index ..ba3882ca3e48
--- /dev/null
+++ b/lib/test_dynamic_debug.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Kernel module for testing dynamic_debug
+ *
+ * Authors:
+ *  Jim Cromie 
+ */
+
+#define pr_fmt(fmt) "test_dd: " fmt
+
+#include 
+
+static void do_prints(void); /* device under test */
+
+/* run tests by reading or writing sysfs node */
+
+static int param_set_do_prints(const char *instr, const struct kernel_param 
*kp)
+{
+   do_prints();
+   return 0;
+}
+
+static int param_get_do_prints(char *buffer, const struct kernel_param *kp)
+{
+   do_prints();
+   return scnprintf(buffer, PAGE_SIZE, "did do_prints\n");
+}
+
+static const struct kernel_param_ops param_ops_do_prints = {
+   .set = param_set_do_prints,
+   .get = param_get_do_prints,
+};
+
+module_param_cb(do_prints, ¶m_ops_do_prints, NULL, 0600);
+
+static void do_alpha(void)
+{
+   pr_debug("do alpha\n");
+}
+static void do_beta(void)
+{
+   pr_debug("do beta\n");
+}
+
+static void do_prints(void)
+{
+   do_alpha();
+   do_beta();
+}
+
+static int __init test_dynamic_debug_init(void)
+{
+   pr_debug("init start\n");
+
+   do_prints();
+
+   pr_debug("init done\n");
+   return 0;
+}
+
+static void __exit test_dynamic_debug_exit(void)
+{
+   pr_debug("exiting\n");
+}
+
+module_init(test_dynamic_debug_init);
+module_exit(test_dynamic_debug_exit);
+
+MODULE_AUTHOR("Jim Cromie ");
+MODULE_LICENSE("GPL");
-- 
2.36.1



[PATCH v4 07/41] dyndbg: let query-modname override actual module name

2022-07-20 Thread Jim Cromie
dyndbg's control-parser: ddebug_parse_query(), requires that search
terms: module, func, file, lineno, are used only once in a query; a
thing cannot be named both foo and bar.

The cited commit added an overriding module modname, taken from the
module loader, which is authoritative.  So it set query.module 1st,
which disallowed its use in the query-string.

But now, its useful to allow a module-load to enable classes across a
whole (or part of) a subsystem at once.

  # enable (dynamic-debug in) drm only
  modprobe drm dyndbg="class DRM_UT_CORE +p"

  # get drm_helper too
  modprobe drm dyndbg="class DRM_UT_CORE module drm* +p"

  # get everything that knows DRM_UT_CORE
  modprobe drm dyndbg="class DRM_UT_CORE module * +p"

  # also for boot-args:
  drm.dyndbg="class DRM_UT_CORE module * +p"

So convert the override into a default, by filling it only when/after
the query-string omitted the module.

NB: the query class FOO handling is forthcoming.

Fixes: 8e59b5cfb9a6 dynamic_debug: add modname arg to exec_query callchain
Signed-off-by: Jim Cromie 
---
 lib/dynamic_debug.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index e5cbe603000c..5a849716220a 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -385,10 +385,6 @@ static int ddebug_parse_query(char *words[], int nwords,
return -EINVAL;
}
 
-   if (modname)
-   /* support $modname.dyndbg= */
-   query->module = modname;
-
for (i = 0; i < nwords; i += 2) {
char *keyword = words[i];
char *arg = words[i+1];
@@ -429,6 +425,13 @@ static int ddebug_parse_query(char *words[], int nwords,
if (rc)
return rc;
}
+   if (!query->module && modname)
+   /*
+* support $modname.dyndbg=, when
+* not given in the query itself
+*/
+   query->module = modname;
+
vpr_info_dq(query, "parsed");
return 0;
 }
-- 
2.36.1



[PATCH v4 06/41] dyndbg: use ESCAPE_SPACE for cat control

2022-07-20 Thread Jim Cromie
`cat control` currently does octal escape, so '\n' becomes "\012".
Change this to display as "\n" instead, which reads much cleaner.

   :#> head -n7 /proc/dynamic_debug/control
   # filename:lineno [module]function flags format
   init/main.c:1179 [main]initcall_blacklist =_ "blacklisting initcall %s\n"
   init/main.c:1218 [main]initcall_blacklisted =_ "initcall %s blacklisted\n"
   init/main.c:1424 [main]run_init_process =_ "  with arguments:\n"
   init/main.c:1426 [main]run_init_process =_ "%s\n"
   init/main.c:1427 [main]run_init_process =_ "  with environment:\n"
   init/main.c:1429 [main]run_init_process =_ "%s\n"

Signed-off-by: Jim Cromie 
---
 lib/dynamic_debug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 8ff11977b8bd..e5cbe603000c 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -900,7 +900,7 @@ static int ddebug_proc_show(struct seq_file *m, void *p)
   trim_prefix(dp->filename), dp->lineno,
   iter->table->mod_name, dp->function,
   ddebug_describe_flags(dp->flags, &flags));
-   seq_escape(m, dp->format, "\t\r\n\"");
+   seq_escape_str(m, dp->format, ESCAPE_SPACE, "\t\r\n\"");
seq_puts(m, "\"\n");
 
return 0;
-- 
2.36.1



[PATCH v4 03/41] dyndbg: show both old and new in change-info

2022-07-20 Thread Jim Cromie
print "old => new" flag values to the info("change") message.

no functional change.

Signed-off-by: Jim Cromie 
---
 lib/dynamic_debug.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index a56c1286ffa4..8faf584f2f4b 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -156,7 +156,7 @@ static int ddebug_change(const struct ddebug_query *query,
struct ddebug_table *dt;
unsigned int newflags;
unsigned int nfound = 0;
-   struct flagsbuf fbuf;
+   struct flagsbuf fbuf, nbuf;
 
/* search for matching ddebugs */
mutex_lock(&ddebug_lock);
@@ -217,11 +217,12 @@ static int ddebug_change(const struct ddebug_query *query,
static_branch_enable(&dp->key.dd_key_true);
}
 #endif
+   v4pr_info("changed %s:%d [%s]%s %s => %s\n",
+ trim_prefix(dp->filename), dp->lineno,
+ dt->mod_name, dp->function,
+ ddebug_describe_flags(dp->flags, &fbuf),
+ ddebug_describe_flags(newflags, &nbuf));
dp->flags = newflags;
-   v4pr_info("changed %s:%d [%s]%s =%s\n",
-trim_prefix(dp->filename), dp->lineno,
-dt->mod_name, dp->function,
-ddebug_describe_flags(dp->flags, &fbuf));
}
}
mutex_unlock(&ddebug_lock);
-- 
2.36.1



[PATCH v4 02/41] dyndbg: fix module.dyndbg handling

2022-07-20 Thread Jim Cromie
For CONFIG_DYNAMIC_DEBUG=N, the ddebug_dyndbg_module_param_cb()
stub-fn is too permissive:

bash-5.1# modprobe drm JUNKdyndbg
bash-5.1# modprobe drm dyndbgJUNK
[   42.933220] dyndbg param is supported only in CONFIG_DYNAMIC_DEBUG builds
[   42.937484] ACPI: bus type drm_connector registered

This caused no ill effects, because unknown parameters are either
ignored by default (with an "unknown parameter" warning, see below),
or ignored because dyndbg allows its no-effect use on non-dyndbg builds.

But since the code has an explicit feedback message, it should be
issued accurately.  Fix with strcmp for exact param-name match.

Reported-by: Rasmus Villemoes 
Fixes: b48420c1d301 dynamic_debug: make dynamic-debug work for module 
initialization
Signed-off-by: Jim Cromie 
---
 include/linux/dynamic_debug.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index dce631e678dd..f30b01aa9fa4 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -201,7 +201,7 @@ static inline int ddebug_remove_module(const char *mod)
 static inline int ddebug_dyndbg_module_param_cb(char *param, char *val,
const char *modname)
 {
-   if (strstr(param, "dyndbg")) {
+   if (!strcmp(param, "dyndbg")) {
/* avoid pr_warn(), which wants pr_fmt() fully defined */
printk(KERN_WARNING "dyndbg param is supported only in "
"CONFIG_DYNAMIC_DEBUG builds\n");
-- 
2.36.1



[PATCH v4 01/41] dyndbg: fix static_branch manipulation

2022-07-20 Thread Jim Cromie
In https://lore.kernel.org/lkml/20211209150910.ga23...@axis.com/

Vincent's patch commented on, and worked around, a bug toggling
static_branch's, when a 2nd PRINTK-ish flag was added.  The bug
results in a premature static_branch_disable when the 1st of 2 flags
was disabled.

The cited commit computed newflags, but then in the JUMP_LABEL block,
failed to use that result, instead using just one of the terms in it.
Using newflags instead made the code work properly.

This is Vincents test-case, reduced.  It needs the 2nd flag to
demonstrate the bug, but it's explanatory here.

pt_test() {
echo 5 > /sys/module/dynamic_debug/verbose

site="module tcp" # just one callsite
echo " $site =_ " > /proc/dynamic_debug/control # clear it

# A B ~A ~B
for flg in +T +p "-T #broke here" -p; do
echo " $site $flg " > /proc/dynamic_debug/control
done;

# A B ~B ~A
for flg in +T +p "-p #broke here" -T; do
echo " $site $flg " > /proc/dynamic_debug/control
done
}
pt_test

Fixes: 84da83a6ffc0 dyndbg: combine flags & mask into a struct, simplify with it
CC: vincent.whitchu...@axis.com
Signed-off-by: Jim Cromie 
Acked-by: Jason Baron 
---
.drop @stable, no exposed bug.
.add jbaron ack
---
 lib/dynamic_debug.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index dd7f56af9aed..a56c1286ffa4 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -211,10 +211,11 @@ static int ddebug_change(const struct ddebug_query *query,
continue;
 #ifdef CONFIG_JUMP_LABEL
if (dp->flags & _DPRINTK_FLAGS_PRINT) {
-   if (!(modifiers->flags & _DPRINTK_FLAGS_PRINT))
+   if (!(newflags & _DPRINTK_FLAGS_PRINT))

static_branch_disable(&dp->key.dd_key_true);
-   } else if (modifiers->flags & _DPRINTK_FLAGS_PRINT)
+   } else if (newflags & _DPRINTK_FLAGS_PRINT) {
static_branch_enable(&dp->key.dd_key_true);
+   }
 #endif
dp->flags = newflags;
v4pr_info("changed %s:%d [%s]%s =%s\n",
-- 
2.36.1



[PATCH v4 00/41] DYNDBG: opt-in class'd debug for modules, use in drm.

2022-07-20 Thread Jim Cromie
Oof, v3 had 2 copies renumbered and intermingled. Resending w/o the crud.
v4 missed dri-devel & patchwork, sending there now. with doc tweak per Bagas.

Its also at https://github.com/jimc/linux.git, in the dyn-drm-trc branch.


Hi Jason, Greg, DRM-folk,

This adds 'typed' "class FOO" support to dynamic-debug, where 'typed'
means either DISJOINT (like drm debug categories), or VERBOSE (like
nouveau debug-levels).  Use it in DRM modules: core, helpers, and in
drivers i915, amdgpu, nouveau.

If a module is using class'd prdbgs (pr_debug_class, dev_dbg_class, or
adapted drm_dbg_) or similar in its code, it can "opt in" to
allow dyndbg to manipulate those class'd prdebugs, by declaring in a
c-file:

 DECLARE_DYNDBG_CLASSMAP(drm_debug_classes,
DD_CLASS_TYPE_DISJOINT, 0,
"DRM_UT_CORE",
"DRM_UT_DRIVER",
"DRM_UT_KMS",
"DRM_UT_PRIME",
"DRM_UT_ATOMIC",
"DRM_UT_VBL",
"DRM_UT_STATE",
"DRM_UT_LEASE",
"DRM_UT_DP",
"DRM_UT_DRMRES");
// how-to stringify __va_args inside the macro ?

By doing this, a module tells dyndbg that it:

   - is using class-ids [0..N] in prdbg callsites
 0..N are the numeric values of DRM_UT_CORE..DRM_UT_DRMRES
   - wants to refer to them by class-names [0..N]
   - is mapping those names to those class-ids
   - expects users to enable them via >control or >parameter/knob

Then, a user can enable the prdbgs by their class:

   :#> echo class DRM_UT_KMS +p > /proc/dynamic_debug/control

And with another 3-line bitmap param decl/init, wrapping the
drm_debug_classes var in a module-param-cb:

   :#> echo 0x1 > /sys/module/drm/parameters/debug

and optionally:

   :#> echo +DRM_UT_CORE,-DRM_UT_KMS \
> /sys/module/drm/parameters/debug_cats

DYNAMIC_DEBUG gets:

new .class_id:5 field in struct _ddebug (callsite record) big enough
to represent drm_debug_category (after squeezing) defaults to 31 for
all existing prdbgs.  class_id is also used to, or any reasonable
number of verbose levels (30 is impractical, istm).

classmaps (as declared by macro above) are in their own linker
section, and are loaded by kernel/module, and handled by add_module,
which attaches classmaps to their module's ddebug table.
 
ddebug_change() handles a class FOO query by validating that FOO is
known by each module in the loop.  The query is skipped unless the
module knows FOO, so no changes are possible w/o a good classname.

Without class FOO in a query/command, only ids=31 can be changed by
that query.  This protects all class'd prdbgs from changes by old,
class-less user queries.

With this support, the module opt-in approach means that:

   - modules declare classnames they like, meaningful names, DRM_UT_*
 these are numbered [0..N]
   - modules call pr_debug_class(N, "fmt..",...)
 or drm_dbg(CAT, "fmt..",...) - same form.
   - class-id space, while limited:0-30, is private to each module
   - "class FOO" is only way to enable a class'd prdbg
   - unrelated modules use 0..N separately, for different purposes.
   - modules "share" classnames by separate decls (uses of macro)
 all drm modules reuse the above declaration.
 then they respond together to a >control

4 CLASS_TYPES are defined; they split behavior on 2 factors:

   1. independent bits vs related:(X sysknob ambiguity
   as was case when both were accepted on same knob
 - narrower interfaces
   uint is uint
 - can defer SYMBOLIC handling, but keep the enums.
   it has no users ...
 - can later add 2 more ENUMS allowing both inputs
   in separate VERBOSE & DISJOINT choices
   then authors choice if they want to accept mixed input
 - can enumerate "wierd" relations if needed
   DISJOINT|VERBOSE should cover everything I can forsee
   but theres room for DD_CLASS_TYPE_STOCHASTIC (over the garage)

NB: DISJOINT v RELATED cover the space; there is no semi-related.  The
relation could differ from (xhttps://lore.kernel.org/lkml/20220516225640.3102269-1-jim.cro...@gmail.com/

summary of diffs:

 - rebased on 5.19-rc6 to pick up kernel/module changes
 - tracfs bits now use __vstring, __vstr_assign, from S.Rostedt
 
 - 4 class-map-types - as outlined above
   now supports VERBOSE semantics, WIP nouveau integration.
   v2 became the DISJOINT use case
   Lots of API-ish surface area here *RFC*

 - class-maps now in section("__dyndbg_classes")
   class FOO queries are available at earlyboot / module-load
   drop (un)?register_classes()

 - test-dynamic-debug module
   tests the 4 CLASS-TYPES
   good place to bikeshed / paintshop the API

 - nouveau - start poking - WIP
   NV_PRINT -> dev_dbg (creates 632 prdbgs, but not class'd)
   VERBOSE classes declared to see how they "fit", unused yet.

Summary:

 - plenty of new stuff here.
 - plenty of new API surface area.
 -

Re: [PATCH] drm/amd/display: reduce stack size in dcn32 dml

2022-07-20 Thread Maíra Canal



On 7/19/22 18:14, Alex Deucher wrote:
> Move additional dummy structures off the stack and into
> the dummy vars structure.
> 
> Fixes the following:
> drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c: In 
> function 
> 'DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation':
> drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:1659:1:
>  error: the frame size of 2144 bytes is larger than 2048 bytes 
> [-Werror=frame-larger-than=]
>  1659 | }
>   | ^
> drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c: In 
> function 'dml32_ModeSupportAndSystemConfigurationFull':
> drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:3799:1:
>  error: the frame size of 2464 bytes is larger than 2048 bytes 
> [-Werror=frame-larger-than=]
>  3799 | } // ModeSupportAndSystemConfigurationFull
>   | ^
> 
> Signed-off-by: Alex Deucher 
> Cc: Stephen Rothwell 
> ---

Reviewed-by: Maíra Canal 

I believe dcn20 could also receive the same treatment, as I'm still
getting a similar warning on display_mode_vba_20.c:

drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn20/display_mode_vba_20.c:1085:13:
warning: stack frame size (1356) exceeds limit (1024) in
'dml20_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation'
[-Wframe-larger-than]
static void
dml20_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
^
drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn20/display_mode_vba_20.c:3286:6:
warning: stack frame size (1484) exceeds limit (1024) in
'dml20_ModeSupportAndSystemConfigurationFull' [-Wframe-larger-than]
void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib
*mode_lib)
 ^
2 warnings generated.

To reproduce it on clang-14, you can run:

make -skj"$(nproc)" LLVM=1 LLVM_IAS=1 i386_defconfig
scripts/config -e DRM_AMDGPU
make -skj"$(nproc)" LLVM=1 LLVM_IAS=1 olddefconfig
drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn20/display_mode_vba_20.o

I believe it is also reproducible on GCC.

Best regards,
- Maíra Canal

>  .../dc/dml/dcn32/display_mode_vba_32.c| 214 --
>  .../drm/amd/display/dc/dml/display_mode_vba.h |   3 +
>  2 files changed, 100 insertions(+), 117 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c 
> b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
> index 349e36ae9333..441311cb9a86 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
> @@ -67,6 +67,18 @@ static void 
> DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
>   int iteration;
>   double MaxTotalRDBandwidth;
>   unsigned int NextPrefetchMode;
> + double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
> + bool DestinationLineTimesForPrefetchLessThan2 = false;
> + bool VRatioPrefetchMoreThanMax = false;
> + double dummy_unit_vector[DC__NUM_DPP__MAX];
> + double TWait;
> + double dummy_single[2];
> + bool dummy_boolean[1];
> + enum clock_change_support dummy_dramchange_support;
> + enum dm_fclock_change_support dummy_fclkchange_support;
> + bool dummy_USRRetrainingSupport;
> + double TotalWRBandwidth = 0;
> + double WRBandwidth = 0;
>  
>  #ifdef __DML_VBA_DEBUG__
>   dml_print("DML::%s: --- START ---\n", __func__);
> @@ -702,11 +714,6 @@ static void 
> DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
>   NextPrefetchMode = 
> mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb];
>  
>   do {
> - double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
> - bool DestinationLineTimesForPrefetchLessThan2 = false;
> - bool VRatioPrefetchMoreThanMax = false;
> - double dummy_unit_vector[DC__NUM_DPP__MAX];
> -
>   MaxTotalRDBandwidth = 0;
>  #ifdef __DML_VBA_DEBUG__
>   dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, 
> mode_lib->vba.VStartupLines);
> @@ -715,41 +722,39 @@ static void 
> DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
>   /* NOTE PerfetchMode variable is invalid in DAL as per 
> the input received.
>* Hence the direction is to use PrefetchModePerState.
>*/
> - double TWait = dml32_CalculateTWait(
> - 
> mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb],
> - 
> mode_lib->vba.UsesMALLForPStateChange[k],
> - 
> mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
> - mode_lib->vba.DRRDisplay[k],
> - mode_lib->vba.DRAMClockC

Re: [PATCH] drm/amdgpu: Fix the incomplete product number

2022-07-20 Thread Christian König

Am 20.07.22 um 16:26 schrieb André Almeida:

Às 05:26 de 20/07/22, Roy Sun escreveu:

The comments say that the product number is a 16-digit HEX string so the
buffer needs to be at least 17 characters to hold the NUL terminator.


Which comment?


In internal documentation of the eeprom layout I think.




Signed-off-by: Roy Sun 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9f729a648005..187e3dae3965 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1019,7 +1019,7 @@ struct amdgpu_device {
boolpsp_sysfs_en;
  
  	/* Chip product information */

-   charproduct_number[16];
+   charproduct_number[20];

If 17 is enough, why setting as 20?


That's the next multiple of 4 and so avoids alignment issues.


Christian.




charproduct_name[AMDGPU_PRODUCT_NAME_LEN];
charserial[20];
  




Re: [PATCH] drm/amdgpu: Fix the incomplete product number

2022-07-20 Thread André Almeida
Às 05:26 de 20/07/22, Roy Sun escreveu:
> The comments say that the product number is a 16-digit HEX string so the
> buffer needs to be at least 17 characters to hold the NUL terminator.
> 

Which comment?

> Signed-off-by: Roy Sun 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 9f729a648005..187e3dae3965 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1019,7 +1019,7 @@ struct amdgpu_device {
>   boolpsp_sysfs_en;
>  
>   /* Chip product information */
> - charproduct_number[16];
> + charproduct_number[20];

If 17 is enough, why setting as 20?

>   charproduct_name[AMDGPU_PRODUCT_NAME_LEN];
>   charserial[20];
>  


Re: [PATCH v1 4/6] dma-buf: Acquire wait-wound context on attachment

2022-07-20 Thread Dmitry Osipenko
On 7/20/22 11:29, Christian König wrote:
> Am 19.07.22 um 22:05 schrieb Dmitry Osipenko:
>> On 7/15/22 09:59, Dmitry Osipenko wrote:
>>> On 7/15/22 09:50, Christian König wrote:
 Am 15.07.22 um 02:52 schrieb Dmitry Osipenko:
> Intel i915 GPU driver uses wait-wound mutex to lock multiple GEMs
> on the
> attachment to the i915 dma-buf. In order to let all drivers utilize
> shared
> wait-wound context during attachment in a general way, make dma-buf
> core to
> acquire the ww context internally for the attachment operation and
> update
> i915 driver to use the importer's ww context instead of the
> internal one.
>
>   From now on all dma-buf exporters shall use the importer's ww
> context
> for
> the attachment operation.
>
> Signed-off-by: Dmitry Osipenko 
> ---
>    drivers/dma-buf/dma-buf.c |  8 +-
>    drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c    |  2 +-
>    .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  2 +-
>    drivers/gpu/drm/i915/gem/i915_gem_object.h    |  6 ++---
>    drivers/gpu/drm/i915/i915_gem_evict.c |  2 +-
>    drivers/gpu/drm/i915/i915_gem_ww.c    | 26
> +++
>    drivers/gpu/drm/i915/i915_gem_ww.h    | 15 +--
>    7 files changed, 47 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> index 0ee588276534..37545ecb845a 100644
> --- a/drivers/dma-buf/dma-buf.c
> +++ b/drivers/dma-buf/dma-buf.c
> @@ -807,6 +807,8 @@ static struct sg_table * __map_dma_buf(struct
> dma_buf_attachment *attach,
>     * Optionally this calls &dma_buf_ops.attach to allow
> device-specific attach
>     * functionality.
>     *
> + * Exporters shall use ww_ctx acquired by this function.
> + *
>     * Returns:
>     *
>     * A pointer to newly created &dma_buf_attachment on success, or a
> negative
> @@ -822,6 +824,7 @@ dma_buf_dynamic_attach_unlocked(struct dma_buf
> *dmabuf, struct device *dev,
>    void *importer_priv)
>    {
>    struct dma_buf_attachment *attach;
> +    struct ww_acquire_ctx ww_ctx;
>    int ret;
>      if (WARN_ON(!dmabuf || !dev))
> @@ -841,7 +844,8 @@ dma_buf_dynamic_attach_unlocked(struct dma_buf
> *dmabuf, struct device *dev,
>    attach->importer_ops = importer_ops;
>    attach->importer_priv = importer_priv;
>    -    dma_resv_lock(dmabuf->resv, NULL);
> +    ww_acquire_init(&ww_ctx, &reservation_ww_class);
> +    dma_resv_lock(dmabuf->resv, &ww_ctx);
 That won't work like this. The core property of a WW context is that
 you
 need to unwind all the locks and re-quire them with the contended one
 first.

 When you statically lock the imported one here you can't do that any
 more.
>>> You're right. I felt that something is missing here, but couldn't
>>> notice. I'll think more about this and enable
>>> CONFIG_DEBUG_WW_MUTEX_SLOWPATH. Thank you!
>>>
>> Christian, do you think we could make an excuse for the attach()
>> callback and make the exporter responsible for taking the resv lock? It
>> will be inconsistent with the rest of the callbacks, where importer
>> takes the lock, but it will be the simplest and least invasive solution.
>> It's very messy to do a cross-driver ww locking, I don't think it's the
>> right approach.
> 
> So to summarize the following calls will require that the caller hold
> the resv lock:
> 1. dma_buf_pin()/dma_buf_unpin()
> 2. dma_buf_map_attachment()/dma_buf_unmap_attachment()
> 3. dma_buf_vmap()/dma_buf_vunmap()
> 4. dma_buf_move_notify()
> 
> The following calls require that caller does not held the resv lock:
> 1. dma_buf_attach()/dma_buf_dynamic_attach()/dma_buf_detach()
> 2. dma_buf_export()/dma_buf_fd()
> 3. dma_buf_get()/dma_buf_put()
> 4. dma_buf_begin_cpu_access()/dma_buf_end_cpu_access()
> 
> If that's correct than that would work for me as well, but we should
> probably document this.

Looks good, thank you. I'll try this variant.

> Or let me ask the other way around: What calls exactly do you need to
> change to solve your original issue? That was vmap/vunmap, wasn't it? If
> yes then let's concentrate on those for the moment.

Originally, Daniel Vetter asked to sort out the dma-buf lockings across
all drivers, so we could replace custom locks in DRM-SHMEM with the resv
lock, otherwise there were no guarantees that we won't have deadlocks in
the dma-buf code paths.

The vmap/vunmap is one of the paths that needs to be sorted out, there
is no particular issue with it, just need to specify the convention. The
mmaping was the other questionable path and we concluded that it's
better to prohibit dma-buf mappings for DRM entirely. Lastly, there is
i915 attach() that uses the ww locking.

-- 
Best regards,
Dmitry


Re: [PATCH 1/2] drm/amdgpu: drop non-necessary call trace dump

2022-07-20 Thread Christian König

Am 20.07.22 um 11:06 schrieb Evan Quan:

This extra call trace dump comes out in every gpu reset.
And it gives people a wrong impression that something
went wrong. Although actually there was not.

Signed-off-by: Evan Quan 
Change-Id: I884af405b6b3cd52b9024408a21fd39811a01f4d


Acked-by: Christian König 

And please guys don't add calls to dump_stack() in the future. That's 
really not supposed to be here.


Thanks,
Christian.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 -
  1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index b79ee4ffb879..1b1a70a6da18 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4667,7 +4667,6 @@ static int amdgpu_reset_reg_dumps(struct amdgpu_device 
*adev)
int i;
  
  	lockdep_assert_held(&adev->reset_domain->sem);

-   dump_stack();
  
  	for (i = 0; i < adev->num_regs; i++) {

adev->reset_dump_reg_value[i] = 
RREG32(adev->reset_dump_reg_list[i]);




Re: [PATCH] drm/amdgpu: Fix the incomplete product number

2022-07-20 Thread Christian König

Am 20.07.22 um 10:26 schrieb Roy Sun:

The comments say that the product number is a 16-digit HEX string so the
buffer needs to be at least 17 characters to hold the NUL terminator.

Signed-off-by: Roy Sun 


Acked-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9f729a648005..187e3dae3965 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1019,7 +1019,7 @@ struct amdgpu_device {
boolpsp_sysfs_en;
  
  	/* Chip product information */

-   charproduct_number[16];
+   charproduct_number[20];
charproduct_name[AMDGPU_PRODUCT_NAME_LEN];
charserial[20];
  




[PATCH 2/2] drm/amdgpu: move mes self test after drm sched re-started

2022-07-20 Thread Evan Quan
From: Jack Xiao 

mes self test rely on vm mapping, move it after
drm sched re-started so that vm mapping can work
during gpu reset.

Signed-off-by: Jack Xiao 
Acked-and-tested-by: Evan Quan 
Change-Id: Ib202c04d86191ca47da90d27c2a8cf9e7c8e6732
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++
 drivers/gpu/drm/amd/amdgpu/mes_v10_1.c | 3 ++-
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 3 ++-
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1b1a70a6da18..8fa3cf8e3a46 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5298,6 +5298,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
drm_sched_start(&ring->sched, 
!tmp_adev->asic_reset_res);
}
 
+   if (adev->enable_mes)
+   amdgpu_mes_self_test(tmp_adev);
+
if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && 
!job_signaled) {
drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
}
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
index 0082e2e1e0b4..067d10073a56 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
@@ -1233,7 +1233,8 @@ static int mes_v10_0_late_init(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-   amdgpu_mes_self_test(adev);
+   if (!amdgpu_in_reset(adev))
+   amdgpu_mes_self_test(adev);
 
return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 777f9268d92d..120ea294abef 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -1280,7 +1280,8 @@ static int mes_v11_0_late_init(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-   amdgpu_mes_self_test(adev);
+   if (!amdgpu_in_reset(adev))
+   amdgpu_mes_self_test(adev);
 
return 0;
 }
-- 
2.29.0



[PATCH 1/2] drm/amdgpu: drop non-necessary call trace dump

2022-07-20 Thread Evan Quan
This extra call trace dump comes out in every gpu reset.
And it gives people a wrong impression that something
went wrong. Although actually there was not.

Signed-off-by: Evan Quan 
Change-Id: I884af405b6b3cd52b9024408a21fd39811a01f4d
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index b79ee4ffb879..1b1a70a6da18 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4667,7 +4667,6 @@ static int amdgpu_reset_reg_dumps(struct amdgpu_device 
*adev)
int i;
 
lockdep_assert_held(&adev->reset_domain->sem);
-   dump_stack();
 
for (i = 0; i < adev->num_regs; i++) {
adev->reset_dump_reg_value[i] = 
RREG32(adev->reset_dump_reg_list[i]);
-- 
2.29.0



Re: [PATCH 4/4] drm/ttm: Switch to using the new intersect callback

2022-07-20 Thread Christian König




Am 20.07.22 um 09:36 schrieb Arunpravin Paneer Selvam:

Use new intersect callback instead of having a generic
placement range verification.

Signed-off-by: Christian König 
Signed-off-by: Arunpravin Paneer Selvam 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 45 +++--
  drivers/gpu/drm/ttm/ttm_bo.c|  9 +++--
  drivers/gpu/drm/ttm/ttm_resource.c  |  5 +--
  3 files changed, 20 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 170935c294f5..7d25a10395c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1328,11 +1328,12 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device 
*adev, struct ttm_tt *ttm,
  static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
const struct ttm_place *place)
  {
-   unsigned long num_pages = bo->resource->num_pages;
struct dma_resv_iter resv_cursor;
-   struct amdgpu_res_cursor cursor;
struct dma_fence *f;
  
+	if (!amdgpu_bo_is_amdgpu_bo(bo))

+   return ttm_bo_eviction_valuable(bo, place);
+
/* Swapout? */
if (bo->resource->mem_type == TTM_PL_SYSTEM)
return true;
@@ -1351,40 +1352,20 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct 
ttm_buffer_object *bo,
return false;
}
  
-	switch (bo->resource->mem_type) {

-   case AMDGPU_PL_PREEMPT:
-   /* Preemptible BOs don't own system resources managed by the
-* driver (pages, VRAM, GART space). They point to resources
-* owned by someone else (e.g. pageable memory in user mode
-* or a DMABuf). They are used in a preemptible context so we
-* can guarantee no deadlocks and good QoS in case of MMU
-* notifiers or DMABuf move notifiers from the resource owner.
-*/
+   /* Preemptible BOs don't own system resources managed by the
+* driver (pages, VRAM, GART space). They point to resources
+* owned by someone else (e.g. pageable memory in user mode
+* or a DMABuf). They are used in a preemptible context so we
+* can guarantee no deadlocks and good QoS in case of MMU
+* notifiers or DMABuf move notifiers from the resource owner.
+*/
+   if (bo->resource->mem_type == AMDGPU_PL_PREEMPT)
return false;
-   case TTM_PL_TT:
-   if (amdgpu_bo_is_amdgpu_bo(bo) &&
-   amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo)))
-   return false;
-   return true;
  
-	case TTM_PL_VRAM:

-   /* Check each drm MM node individually */
-   amdgpu_res_first(bo->resource, 0, (u64)num_pages << PAGE_SHIFT,
-&cursor);
-   while (cursor.remaining) {
-   if (place->fpfn < PFN_DOWN(cursor.start + cursor.size)
-   && !(place->lpfn &&
-place->lpfn <= PFN_DOWN(cursor.start)))
-   return true;
-
-   amdgpu_res_next(&cursor, cursor.size);
-   }
+   if (bo->resource->mem_type == TTM_PL_TT &&
+   amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo)))
return false;
  
-	default:

-   break;
-   }
-
return ttm_bo_eviction_valuable(bo, place);
  }
  
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c

index c1bd006a5525..03409409e43e 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -518,6 +518,9 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo,
  bool ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
  const struct ttm_place *place)
  {
+   struct ttm_resource *res = bo->resource;
+   struct ttm_device *bdev = bo->bdev;
+
dma_resv_assert_held(bo->base.resv);
if (bo->resource->mem_type == TTM_PL_SYSTEM)
return true;
@@ -525,11 +528,7 @@ bool ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
/* Don't evict this BO if it's outside of the
 * requested placement range
 */
-   if (place->fpfn >= (bo->resource->start + bo->resource->num_pages) ||
-   (place->lpfn && place->lpfn <= bo->resource->start))
-   return false;
-
-   return true;
+   return ttm_resource_intersect(bdev, res, place, bo->base.size);
  }
  EXPORT_SYMBOL(ttm_bo_eviction_valuable);
  
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c

index 84c21f92b422..ff3e9058943c 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -285,6 +285,8 @@ static bool ttm_resource_places_compat(struct ttm_resource 
*res,
   

Re: [PATCH 1/4] drm/ttm: add new intersect callback to res mgr

2022-07-20 Thread Christian König

Am 20.07.22 um 09:36 schrieb Arunpravin Paneer Selvam:

- This allows the resource manager to handle intersection
   of placement and resources.

- Add callback function to amdgpu driver module fetching
   start offset from buddy allocator.


Probably better to only add the callback and ttm_resource_intersect() 
wrapper function in this patch and then move the amdgpu and 
ttm_range_manager changes to separate patches.


Apart from that looks good to me.

Regards,
Christian.



Signed-off-by: Christian König 
Signed-off-by: Arunpravin Paneer Selvam 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c  | 19 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 33 
  drivers/gpu/drm/ttm/ttm_range_manager.c  | 17 ++
  drivers/gpu/drm/ttm/ttm_resource.c   | 28 +
  include/drm/ttm/ttm_resource.h   | 20 
  5 files changed, 117 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 8c6b2284cf56..727c80134aa6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -204,6 +204,24 @@ void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr)
amdgpu_gart_invalidate_tlb(adev);
  }
  
+/**

+ * amdgpu_gtt_mgr_intersect - test for intersection
+ *
+ * @man: Our manager object
+ * @res: The resource to test
+ * @place: The place for the new allocation
+ * @size: The size of the new allocation
+ *
+ * Simplified intersection test, only interesting if we need GART or not.
+ */
+static bool amdgpu_gtt_mgr_intersect(struct ttm_resource_manager *man,
+struct ttm_resource *res,
+const struct ttm_place *place,
+size_t size)
+{
+   return !place->lpfn || amdgpu_gtt_mgr_has_gart_addr(res);
+}
+
  /**
   * amdgpu_gtt_mgr_debug - dump VRAM table
   *
@@ -225,6 +243,7 @@ static void amdgpu_gtt_mgr_debug(struct 
ttm_resource_manager *man,
  static const struct ttm_resource_manager_func amdgpu_gtt_mgr_func = {
.alloc = amdgpu_gtt_mgr_new,
.free = amdgpu_gtt_mgr_del,
+   .intersect = amdgpu_gtt_mgr_intersect,
.debug = amdgpu_gtt_mgr_debug
  };
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index 28ec5f8ac1c1..ed0d10fe0b88 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -720,6 +720,38 @@ uint64_t amdgpu_vram_mgr_vis_usage(struct amdgpu_vram_mgr 
*mgr)
return atomic64_read(&mgr->vis_usage);
  }
  
+/**

+ * amdgpu_vram_mgr_intersect - test each drm buddy block for intersection
+ *
+ * @man: TTM memory type manager
+ * @res: The resource to test
+ * @place: The place to test against
+ * @size: Size of the new allocation
+ *
+ * Test each drm buddy block for intersection for eviction decision.
+ */
+static bool amdgpu_vram_mgr_intersect(struct ttm_resource_manager *man,
+ struct ttm_resource *res,
+ const struct ttm_place *place,
+ size_t size)
+{
+   struct amdgpu_vram_mgr_resource *mgr = to_amdgpu_vram_mgr_resource(res);
+   struct list_head *list = &mgr->blocks;
+   struct drm_buddy_block *block;
+   u32 num_pages = PFN_UP(size);
+   u32 start;
+
+   /* Check each drm buddy block individually */
+   list_for_each_entry(block, list, link) {
+   start = amdgpu_vram_mgr_block_start(block) >> PAGE_SHIFT;
+   if (start < place->fpfn ||
+   (place->lpfn && (start + num_pages) > place->lpfn))
+   return false;
+   }
+
+   return true;
+}
+
  /**
   * amdgpu_vram_mgr_debug - dump VRAM table
   *
@@ -753,6 +785,7 @@ static void amdgpu_vram_mgr_debug(struct 
ttm_resource_manager *man,
  static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
.alloc  = amdgpu_vram_mgr_new,
.free   = amdgpu_vram_mgr_del,
+   .intersect = amdgpu_vram_mgr_intersect,
.debug  = amdgpu_vram_mgr_debug
  };
  
diff --git a/drivers/gpu/drm/ttm/ttm_range_manager.c b/drivers/gpu/drm/ttm/ttm_range_manager.c

index d91666721dc6..bf5de1978ead 100644
--- a/drivers/gpu/drm/ttm/ttm_range_manager.c
+++ b/drivers/gpu/drm/ttm/ttm_range_manager.c
@@ -113,6 +113,22 @@ static void ttm_range_man_free(struct ttm_resource_manager 
*man,
kfree(node);
  }
  
+static bool ttm_range_man_intersect(struct ttm_resource_manager *man,

+   struct ttm_resource *res,
+   const struct ttm_place *place,
+   size_t size)
+{
+   struct drm_mm_node *node = &to_ttm_range_mgr_node(res)->mm_nodes[0];
+   u32 num_pages = PFN_UP(size);
+
+   /* Don't evict BOs outside of the 

Re: [PATCH v1 4/6] dma-buf: Acquire wait-wound context on attachment

2022-07-20 Thread Christian König

Am 19.07.22 um 22:05 schrieb Dmitry Osipenko:

On 7/15/22 09:59, Dmitry Osipenko wrote:

On 7/15/22 09:50, Christian König wrote:

Am 15.07.22 um 02:52 schrieb Dmitry Osipenko:

Intel i915 GPU driver uses wait-wound mutex to lock multiple GEMs on the
attachment to the i915 dma-buf. In order to let all drivers utilize
shared
wait-wound context during attachment in a general way, make dma-buf
core to
acquire the ww context internally for the attachment operation and update
i915 driver to use the importer's ww context instead of the internal one.

  From now on all dma-buf exporters shall use the importer's ww context
for
the attachment operation.

Signed-off-by: Dmitry Osipenko 
---
   drivers/dma-buf/dma-buf.c |  8 +-
   drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c    |  2 +-
   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  2 +-
   drivers/gpu/drm/i915/gem/i915_gem_object.h    |  6 ++---
   drivers/gpu/drm/i915/i915_gem_evict.c |  2 +-
   drivers/gpu/drm/i915/i915_gem_ww.c    | 26 +++
   drivers/gpu/drm/i915/i915_gem_ww.h    | 15 +--
   7 files changed, 47 insertions(+), 14 deletions(-)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 0ee588276534..37545ecb845a 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -807,6 +807,8 @@ static struct sg_table * __map_dma_buf(struct
dma_buf_attachment *attach,
    * Optionally this calls &dma_buf_ops.attach to allow
device-specific attach
    * functionality.
    *
+ * Exporters shall use ww_ctx acquired by this function.
+ *
    * Returns:
    *
    * A pointer to newly created &dma_buf_attachment on success, or a
negative
@@ -822,6 +824,7 @@ dma_buf_dynamic_attach_unlocked(struct dma_buf
*dmabuf, struct device *dev,
   void *importer_priv)
   {
   struct dma_buf_attachment *attach;
+    struct ww_acquire_ctx ww_ctx;
   int ret;
     if (WARN_ON(!dmabuf || !dev))
@@ -841,7 +844,8 @@ dma_buf_dynamic_attach_unlocked(struct dma_buf
*dmabuf, struct device *dev,
   attach->importer_ops = importer_ops;
   attach->importer_priv = importer_priv;
   -    dma_resv_lock(dmabuf->resv, NULL);
+    ww_acquire_init(&ww_ctx, &reservation_ww_class);
+    dma_resv_lock(dmabuf->resv, &ww_ctx);

That won't work like this. The core property of a WW context is that you
need to unwind all the locks and re-quire them with the contended one
first.

When you statically lock the imported one here you can't do that any more.

You're right. I felt that something is missing here, but couldn't
notice. I'll think more about this and enable
CONFIG_DEBUG_WW_MUTEX_SLOWPATH. Thank you!


Christian, do you think we could make an excuse for the attach()
callback and make the exporter responsible for taking the resv lock? It
will be inconsistent with the rest of the callbacks, where importer
takes the lock, but it will be the simplest and least invasive solution.
It's very messy to do a cross-driver ww locking, I don't think it's the
right approach.


So to summarize the following calls will require that the caller hold 
the resv lock:

1. dma_buf_pin()/dma_buf_unpin()
2. dma_buf_map_attachment()/dma_buf_unmap_attachment()
3. dma_buf_vmap()/dma_buf_vunmap()
4. dma_buf_move_notify()

The following calls require that caller does not held the resv lock:
1. dma_buf_attach()/dma_buf_dynamic_attach()/dma_buf_detach()
2. dma_buf_export()/dma_buf_fd()
3. dma_buf_get()/dma_buf_put()
4. dma_buf_begin_cpu_access()/dma_buf_end_cpu_access()

If that's correct than that would work for me as well, but we should 
probably document this.


Or let me ask the other way around: What calls exactly do you need to 
change to solve your original issue? That was vmap/vunmap, wasn't it? If 
yes then let's concentrate on those for the moment.


Regards,
Christian.


Re: [PATCH] drm/amdgpu: Fix the incomplete product number

2022-07-20 Thread Sun, Roy
[AMD Official Use Only - General]

Double checked.

BR
Roy

From: Roy Sun 
Sent: Wednesday, July 20, 2022 4:26 PM
To: amd-gfx@lists.freedesktop.org 
Cc: Sun, Roy 
Subject: [PATCH] drm/amdgpu: Fix the incomplete product number

The comments say that the product number is a 16-digit HEX string so the
buffer needs to be at least 17 characters to hold the NUL terminator.

Signed-off-by: Roy Sun 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9f729a648005..187e3dae3965 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1019,7 +1019,7 @@ struct amdgpu_device {
 boolpsp_sysfs_en;

 /* Chip product information */
-   charproduct_number[16];
+   charproduct_number[20];
 charproduct_name[AMDGPU_PRODUCT_NAME_LEN];
 charserial[20];

--
2.34.1



[PATCH] drm/amdgpu: Fix the incomplete product number

2022-07-20 Thread Roy Sun
The comments say that the product number is a 16-digit HEX string so the
buffer needs to be at least 17 characters to hold the NUL terminator.

Signed-off-by: Roy Sun 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9f729a648005..187e3dae3965 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1019,7 +1019,7 @@ struct amdgpu_device {
boolpsp_sysfs_en;
 
/* Chip product information */
-   charproduct_number[16];
+   charproduct_number[20];
charproduct_name[AMDGPU_PRODUCT_NAME_LEN];
charserial[20];
 
-- 
2.34.1



[PATCH 3/4] drm/nouveau: Add intersect callback function

2022-07-20 Thread Arunpravin Paneer Selvam
Add a new intersect callback function fetching the
start offset from struct ttm_resource.

Signed-off-by: Christian König 
Signed-off-by: Arunpravin Paneer Selvam 
---
 drivers/gpu/drm/nouveau/nouveau_mem.c | 15 +++
 drivers/gpu/drm/nouveau/nouveau_mem.h |  3 +++
 drivers/gpu/drm/nouveau/nouveau_ttm.c | 12 
 3 files changed, 30 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c 
b/drivers/gpu/drm/nouveau/nouveau_mem.c
index 2e517cdc24c9..b8a773f03c32 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -187,3 +187,18 @@ nouveau_mem_new(struct nouveau_cli *cli, u8 kind, u8 comp,
*res = &mem->base;
return 0;
 }
+
+bool
+nouveau_mem_intersect(struct ttm_resource *res,
+ const struct ttm_place *place,
+ size_t size)
+{
+   u32 num_pages = PFN_UP(size);
+
+   /* Don't evict BOs outside of the requested placement range */
+   if (place->fpfn >= (res->start + num_pages) ||
+   (place->lpfn && place->lpfn <= res->start))
+   return false;
+
+   return true;
+}
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.h 
b/drivers/gpu/drm/nouveau/nouveau_mem.h
index 325551eba5cd..349fefe48e3c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.h
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.h
@@ -25,6 +25,9 @@ int nouveau_mem_new(struct nouveau_cli *, u8 kind, u8 comp,
struct ttm_resource **);
 void nouveau_mem_del(struct ttm_resource_manager *man,
 struct ttm_resource *);
+bool nouveau_mem_intersect(struct ttm_resource *res,
+  const struct ttm_place *place,
+  size_t size);
 int nouveau_mem_vram(struct ttm_resource *, bool contig, u8 page);
 int nouveau_mem_host(struct ttm_resource *, struct ttm_tt *);
 void nouveau_mem_fini(struct nouveau_mem *);
diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c 
b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index 85f1f5a0fe5d..46afd57008dd 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -42,6 +42,15 @@ nouveau_manager_del(struct ttm_resource_manager *man,
nouveau_mem_del(man, reg);
 }
 
+static bool
+nouveau_manager_intersect(struct ttm_resource_manager *man,
+ struct ttm_resource *res,
+ const struct ttm_place *place,
+ size_t size)
+{
+   return nouveau_mem_intersect(res, place, size);
+}
+
 static int
 nouveau_vram_manager_new(struct ttm_resource_manager *man,
 struct ttm_buffer_object *bo,
@@ -73,6 +82,7 @@ nouveau_vram_manager_new(struct ttm_resource_manager *man,
 const struct ttm_resource_manager_func nouveau_vram_manager = {
.alloc = nouveau_vram_manager_new,
.free = nouveau_manager_del,
+   .intersect = nouveau_manager_intersect,
 };
 
 static int
@@ -97,6 +107,7 @@ nouveau_gart_manager_new(struct ttm_resource_manager *man,
 const struct ttm_resource_manager_func nouveau_gart_manager = {
.alloc = nouveau_gart_manager_new,
.free = nouveau_manager_del,
+   .intersect = nouveau_manager_intersect,
 };
 
 static int
@@ -130,6 +141,7 @@ nv04_gart_manager_new(struct ttm_resource_manager *man,
 const struct ttm_resource_manager_func nv04_gart_manager = {
.alloc = nv04_gart_manager_new,
.free = nouveau_manager_del,
+   .intersect = nouveau_manager_intersect,
 };
 
 static int
-- 
2.25.1



[PATCH 4/4] drm/ttm: Switch to using the new intersect callback

2022-07-20 Thread Arunpravin Paneer Selvam
Use new intersect callback instead of having a generic
placement range verification.

Signed-off-by: Christian König 
Signed-off-by: Arunpravin Paneer Selvam 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 45 +++--
 drivers/gpu/drm/ttm/ttm_bo.c|  9 +++--
 drivers/gpu/drm/ttm/ttm_resource.c  |  5 +--
 3 files changed, 20 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 170935c294f5..7d25a10395c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1328,11 +1328,12 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device 
*adev, struct ttm_tt *ttm,
 static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
const struct ttm_place *place)
 {
-   unsigned long num_pages = bo->resource->num_pages;
struct dma_resv_iter resv_cursor;
-   struct amdgpu_res_cursor cursor;
struct dma_fence *f;
 
+   if (!amdgpu_bo_is_amdgpu_bo(bo))
+   return ttm_bo_eviction_valuable(bo, place);
+
/* Swapout? */
if (bo->resource->mem_type == TTM_PL_SYSTEM)
return true;
@@ -1351,40 +1352,20 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct 
ttm_buffer_object *bo,
return false;
}
 
-   switch (bo->resource->mem_type) {
-   case AMDGPU_PL_PREEMPT:
-   /* Preemptible BOs don't own system resources managed by the
-* driver (pages, VRAM, GART space). They point to resources
-* owned by someone else (e.g. pageable memory in user mode
-* or a DMABuf). They are used in a preemptible context so we
-* can guarantee no deadlocks and good QoS in case of MMU
-* notifiers or DMABuf move notifiers from the resource owner.
-*/
+   /* Preemptible BOs don't own system resources managed by the
+* driver (pages, VRAM, GART space). They point to resources
+* owned by someone else (e.g. pageable memory in user mode
+* or a DMABuf). They are used in a preemptible context so we
+* can guarantee no deadlocks and good QoS in case of MMU
+* notifiers or DMABuf move notifiers from the resource owner.
+*/
+   if (bo->resource->mem_type == AMDGPU_PL_PREEMPT)
return false;
-   case TTM_PL_TT:
-   if (amdgpu_bo_is_amdgpu_bo(bo) &&
-   amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo)))
-   return false;
-   return true;
 
-   case TTM_PL_VRAM:
-   /* Check each drm MM node individually */
-   amdgpu_res_first(bo->resource, 0, (u64)num_pages << PAGE_SHIFT,
-&cursor);
-   while (cursor.remaining) {
-   if (place->fpfn < PFN_DOWN(cursor.start + cursor.size)
-   && !(place->lpfn &&
-place->lpfn <= PFN_DOWN(cursor.start)))
-   return true;
-
-   amdgpu_res_next(&cursor, cursor.size);
-   }
+   if (bo->resource->mem_type == TTM_PL_TT &&
+   amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo)))
return false;
 
-   default:
-   break;
-   }
-
return ttm_bo_eviction_valuable(bo, place);
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index c1bd006a5525..03409409e43e 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -518,6 +518,9 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo,
 bool ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
  const struct ttm_place *place)
 {
+   struct ttm_resource *res = bo->resource;
+   struct ttm_device *bdev = bo->bdev;
+
dma_resv_assert_held(bo->base.resv);
if (bo->resource->mem_type == TTM_PL_SYSTEM)
return true;
@@ -525,11 +528,7 @@ bool ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
/* Don't evict this BO if it's outside of the
 * requested placement range
 */
-   if (place->fpfn >= (bo->resource->start + bo->resource->num_pages) ||
-   (place->lpfn && place->lpfn <= bo->resource->start))
-   return false;
-
-   return true;
+   return ttm_resource_intersect(bdev, res, place, bo->base.size);
 }
 EXPORT_SYMBOL(ttm_bo_eviction_valuable);
 
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
b/drivers/gpu/drm/ttm/ttm_resource.c
index 84c21f92b422..ff3e9058943c 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -285,6 +285,8 @@ static bool ttm_resource_places_compat(struct ttm_resource 
*res,
   const struct ttm_place *places,
  

[PATCH 2/4] drm/i915: Add intersect callback function

2022-07-20 Thread Arunpravin Paneer Selvam
Add a new intersect callback function fetching
start offset from backend drm buddy allocator.

Signed-off-by: Christian König 
Signed-off-by: Arunpravin Paneer Selvam 
---
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c 
b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
index a5109548abc0..30bb8ade67a9 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
@@ -178,6 +178,28 @@ static void i915_ttm_buddy_man_free(struct 
ttm_resource_manager *man,
kfree(bman_res);
 }
 
+static bool i915_ttm_buddy_man_intersect(struct ttm_resource_manager *man,
+struct ttm_resource *res,
+const struct ttm_place *place,
+size_t size)
+{
+   struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res);
+   struct list_head *list = &bman_res->blocks;
+   struct drm_buddy_block *block;
+   u32 num_pages = PFN_UP(size);
+   u32 start;
+
+   /* Check each drm buddy block individually */
+   list_for_each_entry(block, list, link) {
+   start = drm_buddy_block_offset(block) >> PAGE_SHIFT;
+   if (start < place->fpfn ||
+   (place->lpfn && (start + num_pages) > place->lpfn))
+   return false;
+   }
+
+   return true;
+}
+
 static void i915_ttm_buddy_man_debug(struct ttm_resource_manager *man,
 struct drm_printer *printer)
 {
@@ -205,6 +227,7 @@ static void i915_ttm_buddy_man_debug(struct 
ttm_resource_manager *man,
 static const struct ttm_resource_manager_func i915_ttm_buddy_manager_func = {
.alloc = i915_ttm_buddy_man_alloc,
.free = i915_ttm_buddy_man_free,
+   .intersect = i915_ttm_buddy_man_intersect,
.debug = i915_ttm_buddy_man_debug,
 };
 
-- 
2.25.1



[PATCH 1/4] drm/ttm: add new intersect callback to res mgr

2022-07-20 Thread Arunpravin Paneer Selvam
- This allows the resource manager to handle intersection
  of placement and resources.

- Add callback function to amdgpu driver module fetching
  start offset from buddy allocator.

Signed-off-by: Christian König 
Signed-off-by: Arunpravin Paneer Selvam 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c  | 19 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 33 
 drivers/gpu/drm/ttm/ttm_range_manager.c  | 17 ++
 drivers/gpu/drm/ttm/ttm_resource.c   | 28 +
 include/drm/ttm/ttm_resource.h   | 20 
 5 files changed, 117 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 8c6b2284cf56..727c80134aa6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -204,6 +204,24 @@ void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr)
amdgpu_gart_invalidate_tlb(adev);
 }
 
+/**
+ * amdgpu_gtt_mgr_intersect - test for intersection
+ *
+ * @man: Our manager object
+ * @res: The resource to test
+ * @place: The place for the new allocation
+ * @size: The size of the new allocation
+ *
+ * Simplified intersection test, only interesting if we need GART or not.
+ */
+static bool amdgpu_gtt_mgr_intersect(struct ttm_resource_manager *man,
+struct ttm_resource *res,
+const struct ttm_place *place,
+size_t size)
+{
+   return !place->lpfn || amdgpu_gtt_mgr_has_gart_addr(res);
+}
+
 /**
  * amdgpu_gtt_mgr_debug - dump VRAM table
  *
@@ -225,6 +243,7 @@ static void amdgpu_gtt_mgr_debug(struct 
ttm_resource_manager *man,
 static const struct ttm_resource_manager_func amdgpu_gtt_mgr_func = {
.alloc = amdgpu_gtt_mgr_new,
.free = amdgpu_gtt_mgr_del,
+   .intersect = amdgpu_gtt_mgr_intersect,
.debug = amdgpu_gtt_mgr_debug
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 28ec5f8ac1c1..ed0d10fe0b88 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -720,6 +720,38 @@ uint64_t amdgpu_vram_mgr_vis_usage(struct amdgpu_vram_mgr 
*mgr)
return atomic64_read(&mgr->vis_usage);
 }
 
+/**
+ * amdgpu_vram_mgr_intersect - test each drm buddy block for intersection
+ *
+ * @man: TTM memory type manager
+ * @res: The resource to test
+ * @place: The place to test against
+ * @size: Size of the new allocation
+ *
+ * Test each drm buddy block for intersection for eviction decision.
+ */
+static bool amdgpu_vram_mgr_intersect(struct ttm_resource_manager *man,
+ struct ttm_resource *res,
+ const struct ttm_place *place,
+ size_t size)
+{
+   struct amdgpu_vram_mgr_resource *mgr = to_amdgpu_vram_mgr_resource(res);
+   struct list_head *list = &mgr->blocks;
+   struct drm_buddy_block *block;
+   u32 num_pages = PFN_UP(size);
+   u32 start;
+
+   /* Check each drm buddy block individually */
+   list_for_each_entry(block, list, link) {
+   start = amdgpu_vram_mgr_block_start(block) >> PAGE_SHIFT;
+   if (start < place->fpfn ||
+   (place->lpfn && (start + num_pages) > place->lpfn))
+   return false;
+   }
+
+   return true;
+}
+
 /**
  * amdgpu_vram_mgr_debug - dump VRAM table
  *
@@ -753,6 +785,7 @@ static void amdgpu_vram_mgr_debug(struct 
ttm_resource_manager *man,
 static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
.alloc  = amdgpu_vram_mgr_new,
.free   = amdgpu_vram_mgr_del,
+   .intersect = amdgpu_vram_mgr_intersect,
.debug  = amdgpu_vram_mgr_debug
 };
 
diff --git a/drivers/gpu/drm/ttm/ttm_range_manager.c 
b/drivers/gpu/drm/ttm/ttm_range_manager.c
index d91666721dc6..bf5de1978ead 100644
--- a/drivers/gpu/drm/ttm/ttm_range_manager.c
+++ b/drivers/gpu/drm/ttm/ttm_range_manager.c
@@ -113,6 +113,22 @@ static void ttm_range_man_free(struct ttm_resource_manager 
*man,
kfree(node);
 }
 
+static bool ttm_range_man_intersect(struct ttm_resource_manager *man,
+   struct ttm_resource *res,
+   const struct ttm_place *place,
+   size_t size)
+{
+   struct drm_mm_node *node = &to_ttm_range_mgr_node(res)->mm_nodes[0];
+   u32 num_pages = PFN_UP(size);
+
+   /* Don't evict BOs outside of the requested placement range */
+   if (place->fpfn >= (node->start + num_pages) ||
+   (place->lpfn && place->lpfn <= node->start))
+   return false;
+
+   return true;
+}
+
 static void ttm_range_man_debug(struct ttm_resource_manager *man,
struct drm_printer *printer