date:20230721

Applied.  Thanks!

On Fri, Jul 21, 2023 at 2:10 AM  wrote:
>
> Fix nine occurrences of the checkpatch.pl error:
> ERROR: "foo * bar" should be "foo *bar"
>
> Signed-off-by: Ran Sun 
> ---
>   drivers/gpu/drm/radeon/atom.c | 4 ++--
>   1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/atom.c
> b/drivers/gpu/drm/radeon/atom.c
> index 11a1940bb26d..93acb0e42bd6 100644
> --- a/drivers/gpu/drm/radeon/atom.c
> +++ b/drivers/gpu/drm/radeon/atom.c
> @@ -68,8 +68,8 @@ typedef struct {
>   } atom_exec_context;
>
>   int atom_debug = 0;
> -static int atom_execute_table_locked(struct atom_context *ctx, int
> index, uint32_t * params);
> -int atom_execute_table(struct atom_context *ctx, int index, uint32_t *
> params);
> +static int atom_execute_table_locked(struct atom_context *ctx, int
> index, uint32_t *params);
> +int atom_execute_table(struct atom_context *ctx, int index, uint32_t
> *params);
>
>   static uint32_t atom_arg_mask[8] = {
> 0x, 0x, 0x0000, 0x,

Re: [PATCH] drm/amd/pm: open brace '{' following struct go on the same line

On Thu, Jul 20, 2023 at 11:53 PM  wrote:
>
> ERROR: open brace '{' following struct go on the same line
>

The description doesn't match what the patch is doing.

Alex

> Signed-off-by: Ran Sun 
> ---
>   drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 8 
>   1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> index ddc488251313..0cf564ea1ed8 100644
> --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> @@ -429,10 +429,10 @@ int amdgpu_pm_load_smu_firmware(struct
> amdgpu_device *adev, uint32_t *smu_versio
>   int amdgpu_dpm_handle_passthrough_sbr(struct amdgpu_device *adev, bool
> enable);
>   int amdgpu_dpm_send_hbm_bad_pages_num(struct amdgpu_device *adev,
> uint32_t size);
>   int amdgpu_dpm_send_hbm_bad_channel_flag(struct amdgpu_device *adev,
> uint32_t size);
> -int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev,enum
> pp_clock_type type,
> - uint32_t *min,uint32_t *max);
> -int amdgpu_dpm_set_soft_freq_range(struct amdgpu_device *adev,enum
> pp_clock_type type,
> -  uint32_t min,uint32_t max);
> +int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev, enum
> pp_clock_type type,
> + uint32_t *min, uint32_t *max);
> +int amdgpu_dpm_set_soft_freq_range(struct amdgpu_device *adev, enum
> pp_clock_type type,
> +  uint32_t min, uint32_t max);
>   int amdgpu_dpm_write_watermarks_table(struct amdgpu_device *adev);
>   int amdgpu_dpm_wait_for_event(struct amdgpu_device *adev, enum
> smu_event_type event,
>   uint64_t event_arg);

Re: [PATCH] drm/amdgpu: open brace '{' following struct go on the same line

Applied.  Thanks!

Alex

On Thu, Jul 20, 2023 at 11:32 PM  wrote:
>
> ERROR: open brace '{' following struct go on the same line
>
> Signed-off-by: Ran Sun 
> ---
>   drivers/gpu/drm/amd/pm/inc/amdgpu_pm.h | 3 +--
>   1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_pm.h
> b/drivers/gpu/drm/amd/pm/inc/amdgpu_pm.h
> index 52045ad59bed..eec816f0cbf9 100644
> --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_pm.h
> +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_pm.h
> @@ -24,8 +24,7 @@
>   #ifndef __AMDGPU_PM_H__
>   #define __AMDGPU_PM_H__
>
> -struct cg_flag_name
> -{
> +struct cg_flag_name {
> u64 flag;
> const char *name;
>   };

Re: [PATCH] drm/amd/pm: open brace '{' following struct go on the same line

This applied properly.  Applied.  Thanks!

Alex

On Thu, Jul 20, 2023 at 11:27 PM  wrote:
>
> ERROR: open brace '{' following struct go on the same line
>
> Signed-off-by: Ran Sun 
> ---
>   .../gpu/drm/amd/pm/inc/smu_v13_0_0_pptable.h  | 21 +++
>   1 file changed, 7 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v13_0_0_pptable.h
> b/drivers/gpu/drm/amd/pm/inc/smu_v13_0_0_pptable.h
> index 1dc7a065a6d4..251ed011b3b0 100644
> --- a/drivers/gpu/drm/amd/pm/inc/smu_v13_0_0_pptable.h
> +++ b/drivers/gpu/drm/amd/pm/inc/smu_v13_0_0_pptable.h
> @@ -41,8 +41,7 @@
>   #define SMU_13_0_0_PP_OVERDRIVE_VERSION 0x83// OverDrive 8
> Table Version 0.2
>   #define SMU_13_0_0_PP_POWERSAVINGCLOCK_VERSION 0x01 // Power Saving
> Clock Table Version 1.00
>
> -enum SMU_13_0_0_ODFEATURE_CAP
> -{
> +enum SMU_13_0_0_ODFEATURE_CAP {
>   SMU_13_0_0_ODCAP_GFXCLK_LIMITS = 0,
>   SMU_13_0_0_ODCAP_UCLK_LIMITS,
>   SMU_13_0_0_ODCAP_POWER_LIMIT,
> @@ -62,8 +61,7 @@ enum SMU_13_0_0_ODFEATURE_CAP
>   SMU_13_0_0_ODCAP_COUNT,
>   };
>
> -enum SMU_13_0_0_ODFEATURE_ID
> -{
> +enum SMU_13_0_0_ODFEATURE_ID {
>   SMU_13_0_0_ODFEATURE_GFXCLK_LIMITS   = 1 <<
> SMU_13_0_0_ODCAP_GFXCLK_LIMITS,   //GFXCLK Limit feature
>   SMU_13_0_0_ODFEATURE_UCLK_LIMITS = 1 <<
> SMU_13_0_0_ODCAP_UCLK_LIMITS, //UCLK Limit feature
>   SMU_13_0_0_ODFEATURE_POWER_LIMIT = 1 <<
> SMU_13_0_0_ODCAP_POWER_LIMIT, //Power Limit feature
> @@ -85,8 +83,7 @@ enum SMU_13_0_0_ODFEATURE_ID
>
>   #define SMU_13_0_0_MAX_ODFEATURE 32 //Maximum Number of OD Features
>
> -enum SMU_13_0_0_ODSETTING_ID
> -{
> +enum SMU_13_0_0_ODSETTING_ID {
>   SMU_13_0_0_ODSETTING_GFXCLKFMAX = 0,
>   SMU_13_0_0_ODSETTING_GFXCLKFMIN,
>   SMU_13_0_0_ODSETTING_UCLKFMIN,
> @@ -123,8 +120,7 @@ enum SMU_13_0_0_ODSETTING_ID
>   };
>   #define SMU_13_0_0_MAX_ODSETTING 64 //Maximum Number of ODSettings
>
> -enum SMU_13_0_0_PWRMODE_SETTING
> -{
> +enum SMU_13_0_0_PWRMODE_SETTING {
>   SMU_13_0_0_PMSETTING_POWER_LIMIT_QUIET = 0,
>   SMU_13_0_0_PMSETTING_POWER_LIMIT_BALANCE,
>   SMU_13_0_0_PMSETTING_POWER_LIMIT_TURBO,
> @@ -144,8 +140,7 @@ enum SMU_13_0_0_PWRMODE_SETTING
>   };
>   #define SMU_13_0_0_MAX_PMSETTING 32 //Maximum Number of PowerMode
> Settings
>
> -struct smu_13_0_0_overdrive_table
> -{
> +struct smu_13_0_0_overdrive_table {
>   uint8_t revision; //Revision =
> SMU_13_0_0_PP_OVERDRIVE_VERSION
>   uint8_t reserve[3];   //Zero filled field
> reserved for future use
>   uint32_t feature_count;   //Total number of
> supported features
> @@ -156,8 +151,7 @@ struct smu_13_0_0_overdrive_table
>   int16_t pm_setting[SMU_13_0_0_MAX_PMSETTING]; //Optimized power
> mode feature settings
>   };
>
> -enum SMU_13_0_0_PPCLOCK_ID
> -{
> +enum SMU_13_0_0_PPCLOCK_ID {
>   SMU_13_0_0_PPCLOCK_GFXCLK = 0,
>   SMU_13_0_0_PPCLOCK_SOCCLK,
>   SMU_13_0_0_PPCLOCK_UCLK,
> @@ -175,8 +169,7 @@ enum SMU_13_0_0_PPCLOCK_ID
>   };
>   #define SMU_13_0_0_MAX_PPCLOCK 16 //Maximum Number of PP Clocks
>
> -struct smu_13_0_0_powerplay_table
> -{
> +struct smu_13_0_0_powerplay_table {
>   struct atom_common_table_header header; //For SMU13,
> header.format_revision = 15, header.content_revision = 0
>   uint8_t table_revision; //For SMU13, table_revision
> = 2
>   uint8_t padding;

Re: [PATCH] drm/amd: open brace '{' following struct go on the same line

On Thu, Jul 20, 2023 at 9:31 PM  wrote:
>
> Fix the checkpatch error as open brace '{' following struct should
> go on the same line.
>
> Signed-off-by: Ran Sun 

git am didn't seem to like the patch, but I was able to apply it
cleanly manually with no fuzz.  Not sure what's up, but I've applied
it.

Alex


> ---
>   drivers/gpu/drm/amd/include/yellow_carp_offset.h | 6 ++
>   1 file changed, 2 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/include/yellow_carp_offset.h
> b/drivers/gpu/drm/amd/include/yellow_carp_offset.h
> index 0fea6a746611..a2c8dca2425e 100644
> --- a/drivers/gpu/drm/amd/include/yellow_carp_offset.h
> +++ b/drivers/gpu/drm/amd/include/yellow_carp_offset.h
> @@ -7,13 +7,11 @@
>   #define MAX_SEGMENT 6
>
>
> -struct IP_BASE_INSTANCE
> -{
> +struct IP_BASE_INSTANCE {
>   unsigned int segment[MAX_SEGMENT];
>   } __maybe_unused;
>
> -struct IP_BASE
> -{
> +struct IP_BASE {
>   struct IP_BASE_INSTANCE instance[MAX_INSTANCE];
>   } __maybe_unused;

Re: [PATCH] drm/amd/display: Unlock on error path in dm_handle_mst_sideband_msg_ready_event()

On Fri, Jul 21, 2023 at 10:57 AM Dan Carpenter  wrote:
>
> This error path needs to unlock the "aconnector->handle_mst_msg_ready"
> mutex before returning.
>
> Fixes: bb4fa525f327 ("drm/amd/display: Add polling method to handle MST reply 
> packet")
> Signed-off-by: Dan Carpenter 

Applied.  Thanks!

Alex

> ---
>  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
> index 1abdec14344e..943959012d04 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
> @@ -707,7 +707,7 @@ void dm_handle_mst_sideband_msg_ready_event(
>
> if (retry == 3) {
> DRM_ERROR("Failed to ack MST event.\n");
> -   return;
> +   break;
> }
>
> 
> drm_dp_mst_hpd_irq_send_new_request(>mst_mgr);
> --
> 2.39.2
>

Re: [PATCH] drm/radeon: Avoid externs & do not initialize globals to 0 in radeon_drv.c

On Sat, Jul 15, 2023 at 11:26 AM Srinivasan Shanmugam
 wrote:
>
> Fixes the following:
>
>  - WARNING: externs should be avoided in .c files
>  - ERROR: do not initialise globals to 0
>  - WARNING: Missing a blank line after declarations
>
> Cc: Christian König 
> Cc: Alex Deucher 
> Signed-off-by: Srinivasan Shanmugam 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/radeon/radeon_drv.c | 51 -
>  drivers/gpu/drm/radeon/radeon_drv.h | 13 
>  2 files changed, 27 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/radeon_drv.c 
> b/drivers/gpu/drm/radeon/radeon_drv.c
> index e4374814f0ef..aa02697e5ea3 100644
> --- a/drivers/gpu/drm/radeon/radeon_drv.c
> +++ b/drivers/gpu/drm/radeon/radeon_drv.c
> @@ -113,59 +113,32 @@
>  #define KMS_DRIVER_MAJOR   2
>  #define KMS_DRIVER_MINOR   50
>  #define KMS_DRIVER_PATCHLEVEL  0
> -int radeon_suspend_kms(struct drm_device *dev, bool suspend,
> -  bool fbcon, bool freeze);
> -int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon);
> -extern int radeon_get_crtc_scanoutpos(struct drm_device *dev, unsigned int 
> crtc,
> - unsigned int flags, int *vpos, int 
> *hpos,
> - ktime_t *stime, ktime_t *etime,
> - const struct drm_display_mode *mode);
> -extern bool radeon_is_px(struct drm_device *dev);
> -int radeon_mode_dumb_mmap(struct drm_file *filp,
> - struct drm_device *dev,
> - uint32_t handle, uint64_t *offset_p);
> -int radeon_mode_dumb_create(struct drm_file *file_priv,
> -   struct drm_device *dev,
> -   struct drm_mode_create_dumb *args);
> -
> -/* atpx handler */
> -#if defined(CONFIG_VGA_SWITCHEROO)
> -void radeon_register_atpx_handler(void);
> -void radeon_unregister_atpx_handler(void);
> -bool radeon_has_atpx_dgpu_power_cntl(void);
> -bool radeon_is_atpx_hybrid(void);
> -#else
> -static inline void radeon_register_atpx_handler(void) {}
> -static inline void radeon_unregister_atpx_handler(void) {}
> -static inline bool radeon_has_atpx_dgpu_power_cntl(void) { return false; }
> -static inline bool radeon_is_atpx_hybrid(void) { return false; }
> -#endif
>
>  int radeon_no_wb;
>  int radeon_modeset = -1;
>  int radeon_dynclks = -1;
> -int radeon_r4xx_atom = 0;
> +int radeon_r4xx_atom;
>  int radeon_agpmode = -1;
> -int radeon_vram_limit = 0;
> +int radeon_vram_limit;
>  int radeon_gart_size = -1; /* auto */
> -int radeon_benchmarking = 0;
> -int radeon_testing = 0;
> -int radeon_connector_table = 0;
> +int radeon_benchmarking;
> +int radeon_testing;
> +int radeon_connector_table;
>  int radeon_tv = 1;
>  int radeon_audio = -1;
> -int radeon_disp_priority = 0;
> -int radeon_hw_i2c = 0;
> +int radeon_disp_priority;
> +int radeon_hw_i2c;
>  int radeon_pcie_gen2 = -1;
>  int radeon_msi = -1;
>  int radeon_lockup_timeout = 1;
> -int radeon_fastfb = 0;
> +int radeon_fastfb;
>  int radeon_dpm = -1;
>  int radeon_aspm = -1;
>  int radeon_runtime_pm = -1;
> -int radeon_hard_reset = 0;
> +int radeon_hard_reset;
>  int radeon_vm_size = 8;
>  int radeon_vm_block_size = -1;
> -int radeon_deep_color = 0;
> +int radeon_deep_color;
>  int radeon_use_pflipirq = 2;
>  int radeon_bapm = -1;
>  int radeon_backlight = -1;
> @@ -384,6 +357,7 @@ radeon_pci_shutdown(struct pci_dev *pdev)
>  static int radeon_pmops_suspend(struct device *dev)
>  {
> struct drm_device *drm_dev = dev_get_drvdata(dev);
> +
> return radeon_suspend_kms(drm_dev, true, true, false);
>  }
>
> @@ -404,12 +378,14 @@ static int radeon_pmops_resume(struct device *dev)
>  static int radeon_pmops_freeze(struct device *dev)
>  {
> struct drm_device *drm_dev = dev_get_drvdata(dev);
> +
> return radeon_suspend_kms(drm_dev, false, true, true);
>  }
>
>  static int radeon_pmops_thaw(struct device *dev)
>  {
> struct drm_device *drm_dev = dev_get_drvdata(dev);
> +
> return radeon_resume_kms(drm_dev, false, true);
>  }
>
> @@ -494,6 +470,7 @@ long radeon_drm_ioctl(struct file *filp,
> struct drm_file *file_priv = filp->private_data;
> struct drm_device *dev;
> long ret;
> +
> dev = file_priv->minor->dev;
> ret = pm_runtime_get_sync(dev->dev);
> if (ret < 0) {
> diff --git a/drivers/gpu/drm/radeon/radeon_drv.h 
> b/drivers/gpu/drm/radeon/radeon_drv.h
> index 2ffe0975ee54..34a1c73d3938 100644
> --- a/drivers/gpu/drm/radeon/radeon_drv.h
> +++ b/drivers/gpu/drm/radeon/radeon_drv.h
> @@ -124,4 +124,17 @@ int radeon_driver_open_kms(struct drm_device *dev, 
> struct drm_file *file_priv);
>  void radeon_driver_postclose_kms(struct drm_device *dev,
>  struct drm_file *file_priv);
>
> +/* atpx handler */
> +#if defined(CONFIG_VGA_SWITCHEROO)
> +void radeon_register_atpx_handler(void);
> +void

Re: [PATCH v2] drm/radeon: Prefer dev_* variant over printk

On Thu, Jul 20, 2023 at 6:29 AM Srinivasan Shanmugam
 wrote:
>
> Changed from pr_err/info to dev_* variants so that
> we get better debug info when there are multiple GPUs
> in the system.
>
> 'Fixes: 8e2503972912c ("drm/radeon: Prefer pr_err/_info over printk")'
> Suggested-by: Alex Deucher 
> Cc: Christian König 
> Cc: Alex Deucher 
> Signed-off-by: Srinivasan Shanmugam 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/radeon/radeon_atpx_handler.c | 12 
>  1 file changed, 8 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c 
> b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
> index 595354e3ce0b..fb4d931fdf18 100644
> --- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c
> +++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
> @@ -94,6 +94,8 @@ static union acpi_object *radeon_atpx_call(acpi_handle 
> handle, int function,
> union acpi_object atpx_arg_elements[2];
> struct acpi_object_list atpx_arg;
> struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
> +   struct acpi_device *adev = container_of(handle, struct acpi_device, 
> handle);
> +   struct device *dev = >dev;
>
> atpx_arg.count = 2;
> atpx_arg.pointer = _arg_elements[0];
> @@ -115,8 +117,8 @@ static union acpi_object *radeon_atpx_call(acpi_handle 
> handle, int function,
>
> /* Fail only if calling the method fails and ATPX is supported */
> if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
> -   pr_err("failed to evaluate ATPX got %s\n",
> -  acpi_format_exception(status));
> +   dev_err(dev, "failed to evaluate ATPX got %s\n",
> +   acpi_format_exception(status));
> kfree(buffer.pointer);
> return NULL;
> }
> @@ -157,6 +159,8 @@ static void radeon_atpx_parse_functions(struct 
> radeon_atpx_functions *f, u32 mas
>  static int radeon_atpx_validate(struct radeon_atpx *atpx)
>  {
> u32 valid_bits = 0;
> +   struct acpi_device *adev = container_of(atpx->handle, struct 
> acpi_device, handle);
> +   struct device *dev = >dev;
>
> if (atpx->functions.px_params) {
> union acpi_object *info;
> @@ -171,7 +175,7 @@ static int radeon_atpx_validate(struct radeon_atpx *atpx)
>
> size = *(u16 *) info->buffer.pointer;
> if (size < 10) {
> -   pr_err("ATPX buffer is too small: %zu\n", size);
> +   dev_err(dev, "ATPX buffer is too small: %zu\n", size);
> kfree(info);
> return -EINVAL;
> }
> @@ -202,7 +206,7 @@ static int radeon_atpx_validate(struct radeon_atpx *atpx)
>
> atpx->is_hybrid = false;
> if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) {
> -   pr_info("ATPX Hybrid Graphics\n");
> +   dev_info(dev, "ATPX Hybrid Graphics\n");
> /*
>  * Disable legacy PM methods only when pcie port PM is usable,
>  * otherwise the device might fail to power off or power on.
> --
> 2.25.1
>

Re: [PATCH] drm/amdgpu: Fix do not add new typedefs in amdgpu_fw_attestation.c

On Fri, Jul 21, 2023 at 4:17 AM Srinivasan Shanmugam
 wrote:
>
> Fixes the following to align to coding style:
>
> WARNING: do not add new typedefs
> +typedef struct FW_ATT_DB_HEADER
>
> WARNING: do not add new typedefs
> +typedef struct FW_ATT_RECORD
>
> WARNING: Symbolic permissions 'S_IRUSR' are not preferred. Consider using 
> octal permissions '0400'.
> +   S_IRUSR,
>
> ERROR: "(foo*)" should be "(foo *)"
> WARNING: please, no space before tabs
>
> Cc: Christian König 
> Cc: Alex Deucher 
> Signed-off-by: Srinivasan Shanmugam 

Reviewed-by: Alex Deucher 

> ---
>  .../drm/amd/amdgpu/amdgpu_fw_attestation.c| 38 +--
>  1 file changed, 18 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c
> index 2ca3c329de6d..2d4b67175b55 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c
> @@ -32,17 +32,15 @@
>  #include "soc15_common.h"
>
>  #define FW_ATTESTATION_DB_COOKIE0x143b6a37
> -#define FW_ATTESTATION_RECORD_VALID1
> +#define FW_ATTESTATION_RECORD_VALID1
>  #define FW_ATTESTATION_MAX_SIZE4096
>
> -typedef struct FW_ATT_DB_HEADER
> -{
> +struct FW_ATT_DB_HEADER {
> uint32_t AttDbVersion;   /* version of the fwar feature */
> uint32_t AttDbCookie;/* cookie as an extra check for 
> corrupt data */
> -} FW_ATT_DB_HEADER;
> +};
>
> -typedef struct FW_ATT_RECORD
> -{
> +struct FW_ATT_RECORD {
> uint16_t AttFwIdV1;  /* Legacy FW Type field */
> uint16_t AttFwIdV2;  /* V2 FW ID field */
> uint32_t AttFWVersion;   /* FW Version */
> @@ -50,7 +48,7 @@ typedef struct FW_ATT_RECORD
> uint8_t  AttSource;  /* FW source indicator */
> uint8_t  RecordValid;/* Indicates whether the record is a 
> valid entry */
> uint32_t AttFwTaId;  /* Ta ID (only in TA Attestation 
> Table) */
> -} FW_ATT_RECORD;
> +};
>
>  static ssize_t amdgpu_fw_attestation_debugfs_read(struct file *f,
>   char __user *buf,
> @@ -60,15 +58,15 @@ static ssize_t amdgpu_fw_attestation_debugfs_read(struct 
> file *f,
> struct amdgpu_device *adev = (struct amdgpu_device 
> *)file_inode(f)->i_private;
> uint64_t records_addr = 0;
> uint64_t vram_pos = 0;
> -   FW_ATT_DB_HEADER fw_att_hdr = {0};
> -   FW_ATT_RECORD fw_att_record = {0};
> +   struct FW_ATT_DB_HEADER fw_att_hdr = {0};
> +   struct FW_ATT_RECORD fw_att_record = {0};
>
> -   if (size < sizeof(FW_ATT_RECORD)) {
> +   if (size < sizeof(struct FW_ATT_RECORD)) {
> DRM_WARN("FW attestation input buffer not enough memory");
> return -EINVAL;
> }
>
> -   if ((*pos + sizeof(FW_ATT_DB_HEADER)) >= FW_ATTESTATION_MAX_SIZE) {
> +   if ((*pos + sizeof(struct FW_ATT_DB_HEADER)) >= 
> FW_ATTESTATION_MAX_SIZE) {
> DRM_WARN("FW attestation out of bounds");
> return 0;
> }
> @@ -83,8 +81,8 @@ static ssize_t amdgpu_fw_attestation_debugfs_read(struct 
> file *f,
> if (*pos == 0) {
> amdgpu_device_vram_access(adev,
>   vram_pos,
> - (uint32_t*)_att_hdr,
> - sizeof(FW_ATT_DB_HEADER),
> + (uint32_t *)_att_hdr,
> + sizeof(struct FW_ATT_DB_HEADER),
>   false);
>
> if (fw_att_hdr.AttDbCookie != FW_ATTESTATION_DB_COOKIE) {
> @@ -96,20 +94,20 @@ static ssize_t amdgpu_fw_attestation_debugfs_read(struct 
> file *f,
> }
>
> amdgpu_device_vram_access(adev,
> - vram_pos + sizeof(FW_ATT_DB_HEADER) + *pos,
> - (uint32_t*)_att_record,
> - sizeof(FW_ATT_RECORD),
> + vram_pos + sizeof(struct FW_ATT_DB_HEADER) 
> + *pos,
> + (uint32_t *)_att_record,
> + sizeof(struct FW_ATT_RECORD),
>   false);
>
> if (fw_att_record.RecordValid != FW_ATTESTATION_RECORD_VALID)
> return 0;
>
> -   if (copy_to_user(buf, (void*)_att_record, sizeof(FW_ATT_RECORD)))
> +   if (copy_to_user(buf, (void *)_att_record, sizeof(struct 
> FW_ATT_RECORD)))
> return -EINVAL;
>
> -   *pos += sizeof(FW_ATT_RECORD);
> +   *pos += sizeof(struct FW_ATT_RECORD);
>
> -   return sizeof(FW_ATT_RECORD);
> +   return sizeof(struct FW_ATT_RECORD);
>  }
>
>  static const struct file_operations amdgpu_fw_attestation_debugfs_ops = {

Re: [PATCH] drm/amdgpu: Prefer #if IS_ENABLED over #if defined in amdgpu_drv.c

On Fri, Jul 21, 2023 at 3:49 AM Srinivasan Shanmugam
 wrote:
>
> Adhere to linux coding style
>
> Fixes the following:
>
> WARNING: Prefer IS_ENABLED() to CONFIG_ || CONFIG__MODULE
> +#if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE)
>
> WARNING: Prefer IS_ENABLED() to CONFIG_ || CONFIG__MODULE
> +#if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE)
>
> Cc: Christian König 
> Cc: Alex Deucher 
> Signed-off-by: Srinivasan Shanmugam 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 3d9171eca11c..c315fe390e71 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -584,7 +584,7 @@ module_param_named(timeout_period, 
> amdgpu_watchdog_timer.period, uint, 0644);
>   */
>  #ifdef CONFIG_DRM_AMDGPU_SI
>
> -#if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE)
> +#if IS_ENABLED(CONFIG_DRM_RADEON) || IS_ENABLED(CONFIG_DRM_RADEON_MODULE)
>  int amdgpu_si_support = 0;
>  MODULE_PARM_DESC(si_support, "SI support (1 = enabled, 0 = disabled 
> (default))");
>  #else
> @@ -603,7 +603,7 @@ module_param_named(si_support, amdgpu_si_support, int, 
> 0444);
>   */
>  #ifdef CONFIG_DRM_AMDGPU_CIK
>
> -#if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE)
> +#if IS_ENABLED(CONFIG_DRM_RADEON) || IS_ENABLED(CONFIG_DRM_RADEON_MODULE)
>  int amdgpu_cik_support = 0;
>  MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled, 0 = disabled 
> (default))");
>  #else
> --
> 2.25.1
>

Re: [PATCH] drm/amdgpu: Return -ENOMEM when there is no memory in 'amdgpu_gfx_mqd_sw_init'

On Fri, Jul 21, 2023 at 5:22 AM Srinivasan Shanmugam
 wrote:
>
> Return -ENOMEM, when there is no sufficient dynamically allocated memory
> to create MQD backup for ring
>
> Cc: Christian König 
> Cc: Alex Deucher 
> Signed-off-by: Srinivasan Shanmugam 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 11 ---
>  1 file changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index a33d4bc34cee..6639fde5dd5c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -407,8 +407,11 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
>
> /* prepare MQD backup */
> kiq->mqd_backup = kmalloc(mqd_size, GFP_KERNEL);
> -   if (!kiq->mqd_backup)
> -   dev_warn(adev->dev, "no memory to create MQD 
> backup for ring %s\n", ring->name);
> +   if (!kiq->mqd_backup) {
> +   dev_warn(adev->dev,
> +"no memory to create MQD backup for ring 
> %s\n", ring->name);
> +   return -ENOMEM;
> +   }
> }
>
> if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
> @@ -427,8 +430,10 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
> ring->mqd_size = mqd_size;
> /* prepare MQD backup */
> adev->gfx.me.mqd_backup[i] = 
> kmalloc(mqd_size, GFP_KERNEL);
> -   if (!adev->gfx.me.mqd_backup[i])
> +   if (!adev->gfx.me.mqd_backup[i]) {
> dev_warn(adev->dev, "no memory to 
> create MQD backup for ring %s\n", ring->name);
> +   return -ENOMEM;
> +   }
> }
> }
> }
> --
> 2.25.1
>

Re: [PATCH] drm/amdgpu: Fix style issues in amdgpu_gem.c

On Fri, Jul 21, 2023 at 4:37 AM Srinivasan Shanmugam
 wrote:
>
> Fixes the following to align to linux coding style:
>
> WARNING: braces {} are not necessary for any arm of this statement
> WARNING: Missing a blank line after declarations
> ERROR: space prohibited before that close parenthesis ')'
> WARNING: unnecessary whitespace before a quoted newline
> WARNING: %LX is non-standard C, use %llX
>
> Cc: Christian König 
> Cc: Alex Deucher 
> Signed-off-by: Srinivasan Shanmugam 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 38 -
>  1 file changed, 19 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 4f9de9a0e2ec..1718d7d75eaf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -181,11 +181,10 @@ static int amdgpu_gem_object_open(struct drm_gem_object 
> *obj,
> return r;
>
> bo_va = amdgpu_vm_bo_find(vm, abo);
> -   if (!bo_va) {
> +   if (!bo_va)
> bo_va = amdgpu_vm_bo_add(adev, vm, abo);
> -   } else {
> +   else
> ++bo_va->ref_count;
> -   }
> amdgpu_bo_unreserve(abo);
> return 0;
>  }
> @@ -217,8 +216,8 @@ static void amdgpu_gem_object_close(struct drm_gem_object 
> *obj,
>
> r = ttm_eu_reserve_buffers(, , false, );
> if (r) {
> -   dev_err(adev->dev, "leaking bo va because "
> -   "we fail to reserve bo (%ld)\n", r);
> +   dev_err(adev->dev, "leaking bo va because we fail to reserve 
> bo (%ld)\n",
> +   r);
> return;
> }
> bo_va = amdgpu_vm_bo_find(vm, bo);
> @@ -238,8 +237,8 @@ static void amdgpu_gem_object_close(struct drm_gem_object 
> *obj,
>
>  out_unlock:
> if (unlikely(r < 0))
> -   dev_err(adev->dev, "failed to clear page "
> -   "tables on GEM object close (%ld)\n", r);
> +   dev_err(adev->dev, "failed to clear page tables on GEM object 
> close (%ld)\n",
> +   r);
> ttm_eu_backoff_reservation(, );
>  }
>
> @@ -463,9 +462,9 @@ int amdgpu_mode_dumb_mmap(struct drm_file *filp,
> struct amdgpu_bo *robj;
>
> gobj = drm_gem_object_lookup(filp, handle);
> -   if (gobj == NULL) {
> +   if (!gobj)
> return -ENOENT;
> -   }
> +
> robj = gem_to_amdgpu_bo(gobj);
> if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm) ||
> (robj->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
> @@ -482,6 +481,7 @@ int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void 
> *data,
>  {
> union drm_amdgpu_gem_mmap *args = data;
> uint32_t handle = args->in.handle;
> +
> memset(args, 0, sizeof(*args));
> return amdgpu_mode_dumb_mmap(filp, dev, handle, >out.addr_ptr);
>  }
> @@ -508,7 +508,7 @@ unsigned long amdgpu_gem_timeout(uint64_t timeout_ns)
>
> timeout_jiffies = nsecs_to_jiffies(ktime_to_ns(timeout));
> /*  clamp timeout to avoid unsigned-> signed overflow */
> -   if (timeout_jiffies > MAX_SCHEDULE_TIMEOUT )
> +   if (timeout_jiffies > MAX_SCHEDULE_TIMEOUT)
> return MAX_SCHEDULE_TIMEOUT - 1;
>
> return timeout_jiffies;
> @@ -526,9 +526,9 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, 
> void *data,
> long ret;
>
> gobj = drm_gem_object_lookup(filp, handle);
> -   if (gobj == NULL) {
> +   if (!gobj)
> return -ENOENT;
> -   }
> +
> robj = gem_to_amdgpu_bo(gobj);
> ret = dma_resv_wait_timeout(robj->tbo.base.resv, DMA_RESV_USAGE_READ,
> true, timeout);
> @@ -555,7 +555,7 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, 
> void *data,
> struct amdgpu_bo *robj;
> int r = -1;
>
> -   DRM_DEBUG("%d \n", args->handle);
> +   DRM_DEBUG("%d\n", args->handle);
> gobj = drm_gem_object_lookup(filp, args->handle);
> if (gobj == NULL)
> return -ENOENT;
> @@ -685,7 +685,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void 
> *data,
>
> if (args->va_address < AMDGPU_VA_RESERVED_SIZE) {
> dev_dbg(dev->dev,
> -   "va_address 0x%LX is in reserved area 0x%LX\n",
> +   "va_address 0x%llx is in reserved area 0x%llx\n",
> args->va_address, AMDGPU_VA_RESERVED_SIZE);
> return -EINVAL;
> }
> @@ -693,7 +693,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void 
> *data,
> if (args->va_address >= AMDGPU_GMC_HOLE_START &&
> args->va_address < AMDGPU_GMC_HOLE_END) {
> dev_dbg(dev->dev,
> -   "va_address 0x%LX is in VA hole 0x%LX-0x%LX\n",
> +   "va_address 0x%llx is in

[PATCH] drm/amd/display: Unlock on error path in dm_handle_mst_sideband_msg_ready_event()

2023-07-21 Thread Dan Carpenter

This error path needs to unlock the "aconnector->handle_mst_msg_ready"
mutex before returning.

Fixes: bb4fa525f327 ("drm/amd/display: Add polling method to handle MST reply 
packet")
Signed-off-by: Dan Carpenter 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 1abdec14344e..943959012d04 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -707,7 +707,7 @@ void dm_handle_mst_sideband_msg_ready_event(
 
if (retry == 3) {
DRM_ERROR("Failed to ack MST event.\n");
-   return;
+   break;
}
 

drm_dp_mst_hpd_irq_send_new_request(>mst_mgr);
-- 
2.39.2

Re: [PATCH v5 05/11] drm/amdgpu: Use RMW accessors for changing LNKCTL

On Fri, Jul 21, 2023 at 4:18 AM Ilpo Järvinen
 wrote:
>
> On Thu, 20 Jul 2023, Bjorn Helgaas wrote:
>
> > On Mon, Jul 17, 2023 at 03:04:57PM +0300, Ilpo Järvinen wrote:
> > > Don't assume that only the driver would be accessing LNKCTL. ASPM
> > > policy changes can trigger write to LNKCTL outside of driver's control.
> > > And in the case of upstream bridge, the driver does not even own the
> > > device it's changing the registers for.
> > >
> > > Use RMW capability accessors which do proper locking to avoid losing
> > > concurrent updates to the register value.
> > >
> > > Fixes: a2e73f56fa62 ("drm/amdgpu: Add support for CIK parts")
> > > Fixes: 62a37553414a ("drm/amdgpu: add si implementation v10")
> > > Suggested-by: Lukas Wunner 
> > > Signed-off-by: Ilpo Järvinen 
> > > Cc: sta...@vger.kernel.org
> >
> > Do we have any reports of problems that are fixed by this patch (or by
> > others in the series)?  If not, I'm not sure it really fits the usual
> > stable kernel criteria:
> >
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/stable-kernel-rules.rst?id=v6.4
>
> I was on the edge with this. The answer to your direct question is no,
> there are no such reports so it would be okay to leave stable out I think.
> This applies to all patches in this series.
>
> Basically, this series came to be after Lukas noted the potential
> concurrency issues with how LNKCTL is unprotected when reviewing
> (internally) my bandwidth controller series. Then I went to look around
> all LNKCTL usage and realized existing things might alreary have similar
> issues.
>
> Do you want me to send another version w/o cc stable or you'll take care
> of that?
>
> > > ---
> > >  drivers/gpu/drm/amd/amdgpu/cik.c | 36 +---
> > >  drivers/gpu/drm/amd/amdgpu/si.c  | 36 +---
> > >  2 files changed, 20 insertions(+), 52 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c 
> > > b/drivers/gpu/drm/amd/amdgpu/cik.c
> > > index 5641cf05d856..e63abdf52b6c 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/cik.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/cik.c
> > > @@ -1574,17 +1574,8 @@ static void cik_pcie_gen3_enable(struct 
> > > amdgpu_device *adev)
> > > u16 bridge_cfg2, gpu_cfg2;
> > > u32 max_lw, current_lw, tmp;
> > >
> > > -   pcie_capability_read_word(root, PCI_EXP_LNKCTL,
> > > - _cfg);
> > > -   pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL,
> > > - _cfg);
> > > -
> > > -   tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
> > > -   pcie_capability_write_word(root, PCI_EXP_LNKCTL, 
> > > tmp16);
> > > -
> > > -   tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
> > > -   pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL,
> > > -  tmp16);
> > > +   pcie_capability_set_word(root, PCI_EXP_LNKCTL, 
> > > PCI_EXP_LNKCTL_HAWD);
> > > +   pcie_capability_set_word(adev->pdev, PCI_EXP_LNKCTL, 
> > > PCI_EXP_LNKCTL_HAWD);
> > >
> > > tmp = RREG32_PCIE(ixPCIE_LC_STATUS1);
> > > max_lw = (tmp & 
> > > PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH_MASK) >>
> > > @@ -1637,21 +1628,14 @@ static void cik_pcie_gen3_enable(struct 
> > > amdgpu_device *adev)
> > > msleep(100);
> > >
> > > /* linkctl */
> > > -   pcie_capability_read_word(root, 
> > > PCI_EXP_LNKCTL,
> > > - );
> > > -   tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
> > > -   tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
> > > -   pcie_capability_write_word(root, 
> > > PCI_EXP_LNKCTL,
> > > -  tmp16);
> > > -
> > > -   pcie_capability_read_word(adev->pdev,
> > > - PCI_EXP_LNKCTL,
> > > - );
> > > -   tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
> > > -   tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
> > > -   pcie_capability_write_word(adev->pdev,
> > > -  PCI_EXP_LNKCTL,
> > > -  tmp16);
> > > +   pcie_capability_clear_and_set_word(root, 
> > > PCI_EXP_LNKCTL,
> > > +  
> > > PCI_EXP_LNKCTL_HAWD,
> > > +  bridge_cfg 
> > > &
> > > +

Re: AMDGPU crash - request for assistance triaging / reporting

2023-07-21 Thread Deucher, Alexander

[AMD Official Use Only - General]

Please file a bug here:
https://gitlab.freedesktop.org/drm/amd/-/issues
and we'll take a look.  I believe  the Z16 was certified on ubuntu, so you 
should have a good experience with the latest ubuntu LTS with the OEM kernel 
package.  One issue we've run into is with the PSR TCON controller on some 
models.  Disabling PSR in the driver can work around that.  A newer kernel also 
fixes the issue.

Thanks!

Alex


From: Matthew Hall 
Sent: Thursday, July 20, 2023 11:43 PM
To: Deucher, Alexander ; Koenig, Christian 
; Pan, Xinhui ; 
amd-gfx@lists.freedesktop.org 
Subject: AMDGPU crash - request for assistance triaging / reporting

Hello,

I see you are listed in the MAINTAINERS for Radeon / AMDGPU.

I would greatly appreciate your advice in the best route to a triage and fix 
for the following:

https://bugzilla.kernel.org/show_bug.cgi?id=217690

It makes it difficult to have a consistently stable Lenovo ThinkPad Z16 which I 
think is one of the best available AMD & Radeon laptops out today, and listed 
as supporting Linux officially.

I have about 30 years of programming experience, though none of it was on GPUs, 
so please let me know what I might be able to do in order to help figure this 
out.

Sincerely,
Matthew Hall

Re: [PATCH 6/7] drm/amdgpu/jpeg: mmsch_v3_0_4 requires doorbell on 32 byte boundary

2023-07-21 Thread Lazar, Lijo





On 7/21/2023 12:49 AM, Samir Dhume wrote:

Signed-off-by: Samir Dhume 
---
  drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 17 ++---
  1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index 85ee74fdb7e3..896e2f895884 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -111,9 +111,20 @@ static int jpeg_v4_0_3_sw_init(void *handle)
ring = >jpeg.inst[i].ring_dec[j];
ring->use_doorbell = true;
ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id);
-   ring->doorbell_index =
-   (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
-   1 + j + 9 * jpeg_inst;
+   if (!amdgpu_sriov_vf(adev)) {
+   ring->doorbell_index =
+   (adev->doorbell_index.vcn.vcn_ring0_1 
<< 1) +
+   1 + j + 9 * jpeg_inst;
+   } else {
+   if (j < 4)
+   ring->doorbell_index =
+   (adev->doorbell_index.vcn.vcn_ring0_1 
<< 1) +
+   4 + j + 32 * jpeg_inst;


Is the requirement to have the index at a 32-byte aligned boundary?

Thanks,
Lijo


+   else
+   ring->doorbell_index =
+   (adev->doorbell_index.vcn.vcn_ring0_1 
<< 1) +
+   8 + j + 32 * jpeg_inst;
+   }
sprintf(ring->name, "jpeg_dec_%d.%d", 
adev->jpeg.inst[i].aid_id, j);
r = amdgpu_ring_init(adev, ring, 512, 
>jpeg.inst->irq, 0,
AMDGPU_RING_PRIO_DEFAULT, NULL);

Re: [PATCH 28/29] drm/amdkfd: Refactor migrate init to support partition switch

2023-07-21 Thread Philip Yang

On 2023-07-21 04:55, Michel Dänzer
wrote:

On 7/20/23 22:48, Philip Yang wrote:

On 2023-07-20 06:46, Michel Dänzer wrote:

On 7/17/23 15:09, Michel Dänzer wrote:

On 5/10/23 23:23, Alex Deucher wrote:

From: Philip Yang

Rename smv_migrate_init to a better name kgd2kfd_init_zone_device
because it setup zone devive pgmap for page migration and keep it in
kfd_migrate.c to access static functions svm_migrate_pgmap_ops. Call it
only once in amdgpu_device_ip_init after adev ip blocks are initialized,
but before amdgpu_amdkfd_device_init initialize kfd nodes which enable
SVM support based on pgmap.

svm_range_set_max_pages is called by kgd2kfd_device_init everytime after
switching compute partition mode.

Signed-off-by: Philip Yang
Reviewed-by: Felix Kuehling
Signed-off-by: Alex Deucher

I bisected a regression to this commit, which broke HW acceleration on this ThinkPad E595 with Picasso APU.

Actually, it doesn't seem to break HW acceleration completely. GDM eventually comes up with HW acceleration, it takes a long time (~30s or so) to start up though.

Later, the same messages as described in https://gitlab.freedesktop.org/drm/amd/-/issues/2659 appear.

Reverting this commit fixes all of the above symptoms.

I reproduced all of the above symptoms with amd-staging-drm-next commit 75515acf4b60 ("i2c: nvidia-gpu: Add ACPI property to align with device-tree") as well.

For full disclosure, I use these kernel command line arguments:

fbcon=font:10x18 drm_kms_helper.drm_fbdev_overalloc=112 amdgpu.noretry=1 amdgpu.mcbp=1

Thanks for the issue report and full disclosure, but I am not able to reproduce this issue, with both drm-next branch and amd-staging-drm-next branch tip on gitlab. The test system has same device id, running Ubuntu 22.04, latest linux-firmware-20230625.tar.gz, and same BIOS version.

FWIW, your system has PCI revision ID 0xC2, while mine has 0xC1.

Also, I'm currently using linux-firmware 20230515. AFAICT there are no relevant changes in 20230625, but I'm attaching the contents of /sys/kernel/debug/dri/0/amdgpu_firmware_info just in case.

I attached full dmesg log, could you help check if there is other difference, maybe kernel config, gcc version... it is hard to guess what could cause the basic driver gfx ring IB test timeout.

I suspect the IOMMU page faults logged in my dmesg might be relevant:

amdgpu: Topology: Add APU node [0x15d8:0x1002]
amdgpu :05:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x address=0x122201800 flags=0x0070]
amdgpu :05:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x address=0x1125fe380 flags=0x0070]
kfd kfd: amdgpu: added device 1002:15d8

There are no such page faults with the commit reverted.

Other than that and the IB test failure messages, our dmesg outputs are mostly identical indeed.

Yes, I don't have IO_PAGE_FAULT message on my system, thanks for
the finding, I will continue investigating the root cause.

You are right, the error message could cause gfx ring IB test
timeout failure, this patch does change the order of driver memory
allocation. IOMMU is in translation mode on Ubuntu config.
To help confirm if this is caused by IOMMU, please add this to
kernel boot option to set IOMMU to passthrough mode, check if this
can workaround the issue
iommu=pt
Regards,
Philip

[PATCH] drm/amd/display: set stream gamut remap matrix to MPC for DCN3+

2023-07-21 Thread Melissa Wen

dc->caps.color.mpc.gamut_remap says there is a post-blending color block
for gamut remap matrix for DCN3 HW family and newer versions. However,
those drivers still follow DCN10 programming that remap stream
gamut_remap_matrix to DPP (pre-blending).

To enable pre-blending and post-blending gamut_remap matrix supports at
the same time, set stream gamut_remap to MPC and plane gamut_remap to
DPP for DCN families that support both.

It was tested using IGT KMS color tests for DRM CRTC CTM property and it
preserves test results.

Signed-off-by: Melissa Wen 

---

Hi,

Two relevant things to consider for this change. One is that mapping DRM
CRTC CTM to MPC is a more consistent way since CRTC CTM is a
post-blending transformation. Second, programming stream
gamut_remap_matrix on MPC enables us to provide support for both plane
CTM and CRTC CTM color properties. If we don't make this separation, we
would need to reject an atomic commit that tries to set both properties
at the same time and userspace may also get unexpected results.

Thanks in advance for any feeback,

Melissa

 .../drm/amd/display/dc/dcn30/dcn30_hwseq.c| 37 +++
 .../drm/amd/display/dc/dcn30/dcn30_hwseq.h|  3 ++
 .../gpu/drm/amd/display/dc/dcn30/dcn30_init.c |  2 +-
 .../drm/amd/display/dc/dcn301/dcn301_init.c   |  2 +-
 .../gpu/drm/amd/display/dc/dcn31/dcn31_init.c |  2 +-
 .../drm/amd/display/dc/dcn314/dcn314_init.c   |  2 +-
 .../gpu/drm/amd/display/dc/dcn32/dcn32_init.c |  2 +-
 7 files changed, 45 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c 
b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
index 4cd4ae07d73d..4fb4e9ec03f1 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
@@ -186,6 +186,43 @@ bool dcn30_set_input_transfer_func(struct dc *dc,
return result;
 }
 
+void dcn30_program_gamut_remap(struct pipe_ctx *pipe_ctx)
+{
+   int i = 0;
+   struct dpp_grph_csc_adjustment dpp_adjust;
+   struct mpc_grph_gamut_adjustment mpc_adjust;
+   int mpcc_id = pipe_ctx->plane_res.hubp->inst;
+   struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc;
+
+   memset(_adjust, 0, sizeof(dpp_adjust));
+   dpp_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS;
+
+   if (pipe_ctx->plane_state &&
+   pipe_ctx->plane_state->gamut_remap_matrix.enable_remap == true) {
+   dpp_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW;
+   for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++)
+   dpp_adjust.temperature_matrix[i] =
+   
pipe_ctx->plane_state->gamut_remap_matrix.matrix[i];
+   }
+
+   
pipe_ctx->plane_res.dpp->funcs->dpp_set_gamut_remap(pipe_ctx->plane_res.dpp,
+   _adjust);
+
+   memset(_adjust, 0, sizeof(mpc_adjust));
+   mpc_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS;
+
+   if (pipe_ctx->top_pipe == NULL) {
+   if (pipe_ctx->stream->gamut_remap_matrix.enable_remap == true) {
+   mpc_adjust.gamut_adjust_type = 
GRAPHICS_GAMUT_ADJUST_TYPE_SW;
+   for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++)
+   mpc_adjust.temperature_matrix[i] =
+   
pipe_ctx->stream->gamut_remap_matrix.matrix[i];
+   }
+   }
+
+   mpc->funcs->set_gamut_remap(mpc, mpcc_id, _adjust);
+}
+
 bool dcn30_set_output_transfer_func(struct dc *dc,
struct pipe_ctx *pipe_ctx,
const struct dc_stream_state *stream)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h 
b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h
index a24a8e33a3d2..cb34ca932a5f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h
@@ -58,6 +58,9 @@ bool dcn30_set_blend_lut(struct pipe_ctx *pipe_ctx,
 bool dcn30_set_input_transfer_func(struct dc *dc,
struct pipe_ctx *pipe_ctx,
const struct dc_plane_state *plane_state);
+
+void dcn30_program_gamut_remap(struct pipe_ctx *pipe_ctx);
+
 bool dcn30_set_output_transfer_func(struct dc *dc,
struct pipe_ctx *pipe_ctx,
const struct dc_stream_state *stream);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c 
b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c
index 3d19acaa12f3..5372eb76fcfc 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c
@@ -32,7 +32,7 @@
 #include "dcn30_init.h"
 
 static const struct hw_sequencer_funcs dcn30_funcs = {
-   .program_gamut_remap = dcn10_program_gamut_remap,
+   .program_gamut_remap =

[PATCH Review 1/1] drm/amdgpu: Check APU flag to disable RAS

2023-07-21 Thread Stanley . Yang

Only disable RAS by default for aqua vanjaram on APU platform.

Signed-off-by: Stanley.Yang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 2221460e23e4..00a3863a6017 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2529,7 +2529,8 @@ static void amdgpu_ras_check_supported(struct 
amdgpu_device *adev)
 * Disable ras feature for aqua vanjaram
 * by default on apu platform.
 */
-   if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 6))
+   if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 6) &&
+   adev->gmc.is_app_apu)
adev->ras_enabled = amdgpu_ras_enable != 1 ? 0 :
adev->ras_hw_enabled & amdgpu_ras_mask;
else
-- 
2.25.1

Re: [PATCH 3/7] drm/amdgpu/vcn: sriov support for vcn_v4_0_3

2023-07-21 Thread Lazar, Lijo





On 7/21/2023 12:49 AM, Samir Dhume wrote:

initialization table handshake with mmsch

Signed-off-by: Samir Dhume 
---
  drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 261 +---
  1 file changed, 237 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 411c1d802823..8650e3c6288d 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -31,6 +31,7 @@
  #include "soc15d.h"
  #include "soc15_hw_ip.h"
  #include "vcn_v2_0.h"
+#include "mmsch_v4_0_3.h"
  
  #include "vcn/vcn_4_0_3_offset.h"

  #include "vcn/vcn_4_0_3_sh_mask.h"
@@ -44,6 +45,7 @@
  #define VCN_VID_SOC_ADDRESS_2_0   0x1fb00
  #define VCN1_VID_SOC_ADDRESS_3_0  0x48300
  
+static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev);

  static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev);
  static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
  static int vcn_v4_0_3_set_powergating_state(void *handle,
@@ -130,6 +132,10 @@ static int vcn_v4_0_3_sw_init(void *handle)
amdgpu_vcn_fwlog_init(>vcn.inst[i]);
}
  
+	r = amdgpu_virt_alloc_mm_table(adev);

+   if (r)
+   return r;
+
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode;
  
@@ -167,6 +173,8 @@ static int vcn_v4_0_3_sw_fini(void *handle)

drm_dev_exit(idx);
}
  
+	amdgpu_virt_free_mm_table(adev);

+
r = amdgpu_vcn_suspend(adev);
if (r)
return r;
@@ -189,33 +197,50 @@ static int vcn_v4_0_3_hw_init(void *handle)
struct amdgpu_ring *ring;
int i, r, vcn_inst;
  
-	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {

-   vcn_inst = GET_INST(VCN, i);
-   ring = >vcn.inst[i].ring_enc[0];
+   if (amdgpu_sriov_vf(adev)) {
+   r = vcn_v4_0_3_start_sriov(adev);
+   if (r)
+   goto done;
+
+   for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+   if (adev->vcn.harvest_config & (1 << i))
+   continue;


In vcn v4.0.3 we have adev->vcn.num_vcn_inst as the actual number of vcn 
instances present and not the max possible number. Better to follow the 
same style 'vcn_inst = GET_INST(VCN, i)' for sriov mapping also. Any 
harvest mapping info is expected to be adjusted during initial parsing 
of IP instance discovery table.


  
-		if (ring->use_doorbell) {

-   adev->nbio.funcs->vcn_doorbell_range(
-   adev, ring->use_doorbell,
-   (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
-   9 * vcn_inst,
-   adev->vcn.inst[i].aid_id);
-
-   WREG32_SOC15(
-   VCN, GET_INST(VCN, ring->me),
-   regVCN_RB1_DB_CTRL,
-   ring->doorbell_index
-   << 
VCN_RB1_DB_CTRL__OFFSET__SHIFT |
-   VCN_RB1_DB_CTRL__EN_MASK);
-
-   /* Read DB_CTRL to flush the write DB_CTRL command. */
-   RREG32_SOC15(
-   VCN, GET_INST(VCN, ring->me),
-   regVCN_RB1_DB_CTRL);
+   ring = >vcn.inst[i].ring_enc[0];
+   ring->wptr = 0;
+   ring->wptr_old = 0;
+   vcn_v4_0_3_unified_ring_set_wptr(ring);
+   ring->sched.ready = true;
}
+   } else {
+   for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+   vcn_inst = GET_INST(VCN, i);
+   ring = >vcn.inst[i].ring_enc[0];
+
+   if (ring->use_doorbell) {
+   adev->nbio.funcs->vcn_doorbell_range(
+   adev, ring->use_doorbell,
+   (adev->doorbell_index.vcn.vcn_ring0_1 
<< 1) +
+   9 * vcn_inst,
+   adev->vcn.inst[i].aid_id);
+
+   WREG32_SOC15(
+   VCN, GET_INST(VCN, ring->me),
+   regVCN_RB1_DB_CTRL,
+   ring->doorbell_index
+   << 
VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+   VCN_RB1_DB_CTRL__EN_MASK);
+
+   /* Read DB_CTRL to flush the write DB_CTRL 
command. */
+   RREG32_SOC15(
+   VCN, GET_INST(VCN, ring->me),
+

Re: [PATCH Review 1/1] drm/amdgpu: Fix out of range

2023-07-21 Thread Lazar, Lijo


cc: Morris/Le

On 7/21/2023 5:36 PM, Stanley.Yang wrote:

The xcc index should be refer to xcc_mask, convert xcc_mask
to counts then calculate device instance.

Signed-off-by: Stanley.Yang 


Reviewed-by: Lijo Lazar 

Thanks,
Lijo


---
  drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 24 +---
  1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 9053435488c5..cd833cd3ebd2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -1076,19 +1076,21 @@ static void gfx_v9_4_3_xcc_unset_safe_mode(struct 
amdgpu_device *adev,
  static void gfx_v9_4_3_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
  {
int xcc_id;
+   int num_xcc, dev_inst;
struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
  
-	for (xcc_id = 0; xcc_id < AMDGPU_MAX_RLC_INSTANCES; xcc_id++) {

-   if (((1 << xcc_id) & adev->gfx.xcc_mask) == 0)
-   continue;
-   reg_access_ctrl = >gfx.rlc.reg_access_ctrl[GET_INST(GC, 
xcc_id)];
-   reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 
GET_INST(GC, xcc_id), regSCRATCH_REG0);
-   reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 
GET_INST(GC, xcc_id), regSCRATCH_REG1);
-   reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 
GET_INST(GC, xcc_id), regSCRATCH_REG2);
-   reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 
GET_INST(GC, xcc_id), regSCRATCH_REG3);
-   reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, GET_INST(GC, 
xcc_id), regGRBM_GFX_CNTL);
-   reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, GET_INST(GC, 
xcc_id), regGRBM_GFX_INDEX);
-   reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, GET_INST(GC, 
xcc_id), regRLC_SPARE_INT);
+   num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+   for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
+   dev_inst = GET_INST(GC, xcc_id);
+
+   reg_access_ctrl = >gfx.rlc.reg_access_ctrl[dev_inst];
+   reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, dev_inst, 
regSCRATCH_REG0);
+   reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, dev_inst, 
regSCRATCH_REG1);
+   reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, dev_inst, 
regSCRATCH_REG2);
+   reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, dev_inst, 
regSCRATCH_REG3);
+   reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, dev_inst, 
regGRBM_GFX_CNTL);
+   reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, dev_inst, 
regGRBM_GFX_INDEX);
+   reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, dev_inst, 
regRLC_SPARE_INT);
}
  }

[PATCH Review 1/1] drm/amdgpu: Fix out of range

2023-07-21 Thread Stanley . Yang

The xcc index should be refer to xcc_mask, convert xcc_mask
to counts then calculate device instance.

Signed-off-by: Stanley.Yang 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 24 +---
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 9053435488c5..cd833cd3ebd2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -1076,19 +1076,21 @@ static void gfx_v9_4_3_xcc_unset_safe_mode(struct 
amdgpu_device *adev,
 static void gfx_v9_4_3_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
 {
int xcc_id;
+   int num_xcc, dev_inst;
struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
 
-   for (xcc_id = 0; xcc_id < AMDGPU_MAX_RLC_INSTANCES; xcc_id++) {
-   if (((1 << xcc_id) & adev->gfx.xcc_mask) == 0)
-   continue;
-   reg_access_ctrl = >gfx.rlc.reg_access_ctrl[GET_INST(GC, 
xcc_id)];
-   reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 
GET_INST(GC, xcc_id), regSCRATCH_REG0);
-   reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 
GET_INST(GC, xcc_id), regSCRATCH_REG1);
-   reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 
GET_INST(GC, xcc_id), regSCRATCH_REG2);
-   reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 
GET_INST(GC, xcc_id), regSCRATCH_REG3);
-   reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, GET_INST(GC, 
xcc_id), regGRBM_GFX_CNTL);
-   reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, GET_INST(GC, 
xcc_id), regGRBM_GFX_INDEX);
-   reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, GET_INST(GC, 
xcc_id), regRLC_SPARE_INT);
+   num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+   for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
+   dev_inst = GET_INST(GC, xcc_id);
+
+   reg_access_ctrl = >gfx.rlc.reg_access_ctrl[dev_inst];
+   reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, dev_inst, 
regSCRATCH_REG0);
+   reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, dev_inst, 
regSCRATCH_REG1);
+   reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, dev_inst, 
regSCRATCH_REG2);
+   reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, dev_inst, 
regSCRATCH_REG3);
+   reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, dev_inst, 
regGRBM_GFX_CNTL);
+   reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, dev_inst, 
regGRBM_GFX_INDEX);
+   reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, dev_inst, 
regRLC_SPARE_INT);
}
 }
 
-- 
2.25.1

Re: [PATCH 28/29] drm/amdkfd: Refactor migrate init to support partition switch

2023-07-21 Thread Michel Dänzer

On 7/21/23 10:55, Michel Dänzer wrote:
> On 7/20/23 22:48, Philip Yang wrote:
>> On 2023-07-20 06:46, Michel Dänzer wrote:
>>> On 7/17/23 15:09, Michel Dänzer wrote:
 On 5/10/23 23:23, Alex Deucher wrote:
> From: Philip Yang 
>
> Rename smv_migrate_init to a better name kgd2kfd_init_zone_device
> because it setup zone devive pgmap for page migration and keep it in
> kfd_migrate.c to access static functions svm_migrate_pgmap_ops. Call it
> only once in amdgpu_device_ip_init after adev ip blocks are initialized,
> but before amdgpu_amdkfd_device_init initialize kfd nodes which enable
> SVM support based on pgmap.
>
> svm_range_set_max_pages is called by kgd2kfd_device_init everytime after
> switching compute partition mode.
>
> Signed-off-by: Philip Yang 
> Reviewed-by: Felix Kuehling 
> Signed-off-by: Alex Deucher 
 I bisected a regression to this commit, which broke HW acceleration on 
 this ThinkPad E595 with Picasso APU.
>>> Actually, it doesn't seem to break HW acceleration completely. GDM 
>>> eventually comes up with HW acceleration, it takes a long time (~30s or so) 
>>> to start up though.
>>>
>>> Later, the same messages as described in 
>>> https://gitlab.freedesktop.org/drm/amd/-/issues/2659 appear.
>>>
>>> Reverting this commit fixes all of the above symptoms.
>>>
>>>
>>> I reproduced all of the above symptoms with amd-staging-drm-next commit 
>>> 75515acf4b60 ("i2c: nvidia-gpu: Add ACPI property to align with 
>>> device-tree") as well.
>>>
>>>
>>> For full disclosure, I use these kernel command line arguments:
>>>
>>>  fbcon=font:10x18 drm_kms_helper.drm_fbdev_overalloc=112 amdgpu.noretry=1 
>>> amdgpu.mcbp=1
>>
>> Thanks for the issue report and full disclosure, but I am not able to 
>> reproduce this issue, with both drm-next branch and amd-staging-drm-next 
>> branch tip on gitlab. The test system has same device id, running Ubuntu 
>> 22.04, latest linux-firmware-20230625.tar.gz, and same BIOS version.
> 
> FWIW, your system has PCI revision ID 0xC2, while mine has 0xC1.
> 
> Also, I'm currently using linux-firmware 20230515. AFAICT there are no 
> relevant changes in 20230625, but I'm attaching the contents of 
> /sys/kernel/debug/dri/0/amdgpu_firmware_info just in case.
> 
> 
>> I attached full dmesg log, could you help check if there is other 
>> difference, maybe kernel config, gcc version... it is hard to guess what 
>> could cause the basic driver gfx ring IB test timeout.
> 
> I suspect the IOMMU page faults logged in my dmesg might be relevant:
> 
>  amdgpu: Topology: Add APU node [0x15d8:0x1002]
>  amdgpu :05:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x 
> address=0x122201800 flags=0x0070]
>  amdgpu :05:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x 
> address=0x1125fe380 flags=0x0070]
>  kfd kfd: amdgpu: added device 1002:15d8

Maybe I should mention my IOMMU related kernel build configuration:

CONFIG_IRQ_MSI_IOMMU=y
CONFIG_GART_IOMMU=y
CONFIG_VFIO_IOMMU_TYPE1=m
# CONFIG_VFIO_NOIOMMU is not set
CONFIG_IOMMU_IOVA=y
CONFIG_IOMMU_API=y
CONFIG_IOMMU_SUPPORT=y
CONFIG_IOMMU_IO_PGTABLE=y
# CONFIG_IOMMU_DEFAULT_DMA_STRICT is not set
CONFIG_IOMMU_DEFAULT_DMA_LAZY=y
# CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set
CONFIG_IOMMU_DMA=y
CONFIG_IOMMU_SVA=y
CONFIG_AMD_IOMMU=y
CONFIG_AMD_IOMMU_V2=y
# CONFIG_IOMMUFD is not set
CONFIG_IOMMU_HELPER=y
# CONFIG_IOMMU_DEBUG is not set


> There are no such page faults with the commit reverted.
> 
> Other than that and the IB test failure messages, our dmesg outputs are 
> mostly identical indeed.
> 
> 

-- 
Earthling Michel Dänzer|  https://redhat.com
Libre software enthusiast  | Mesa and Xwayland developer

[PATCH] drm/amdgpu: Return -ENOMEM when there is no memory in 'amdgpu_gfx_mqd_sw_init'

Return -ENOMEM, when there is no sufficient dynamically allocated memory
to create MQD backup for ring

Cc: Christian König 
Cc: Alex Deucher 
Signed-off-by: Srinivasan Shanmugam 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index a33d4bc34cee..6639fde5dd5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -407,8 +407,11 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
 
/* prepare MQD backup */
kiq->mqd_backup = kmalloc(mqd_size, GFP_KERNEL);
-   if (!kiq->mqd_backup)
-   dev_warn(adev->dev, "no memory to create MQD 
backup for ring %s\n", ring->name);
+   if (!kiq->mqd_backup) {
+   dev_warn(adev->dev,
+"no memory to create MQD backup for ring 
%s\n", ring->name);
+   return -ENOMEM;
+   }
}
 
if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
@@ -427,8 +430,10 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
ring->mqd_size = mqd_size;
/* prepare MQD backup */
adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, 
GFP_KERNEL);
-   if (!adev->gfx.me.mqd_backup[i])
+   if (!adev->gfx.me.mqd_backup[i]) {
dev_warn(adev->dev, "no memory to 
create MQD backup for ring %s\n", ring->name);
+   return -ENOMEM;
+   }
}
}
}
-- 
2.25.1

Re: [PATCH 28/29] drm/amdkfd: Refactor migrate init to support partition switch

2023-07-21 Thread Michel Dänzer

On 7/20/23 22:48, Philip Yang wrote:
> On 2023-07-20 06:46, Michel Dänzer wrote:
>> On 7/17/23 15:09, Michel Dänzer wrote:
>>> On 5/10/23 23:23, Alex Deucher wrote:
 From: Philip Yang 

 Rename smv_migrate_init to a better name kgd2kfd_init_zone_device
 because it setup zone devive pgmap for page migration and keep it in
 kfd_migrate.c to access static functions svm_migrate_pgmap_ops. Call it
 only once in amdgpu_device_ip_init after adev ip blocks are initialized,
 but before amdgpu_amdkfd_device_init initialize kfd nodes which enable
 SVM support based on pgmap.

 svm_range_set_max_pages is called by kgd2kfd_device_init everytime after
 switching compute partition mode.

 Signed-off-by: Philip Yang 
 Reviewed-by: Felix Kuehling 
 Signed-off-by: Alex Deucher 
>>> I bisected a regression to this commit, which broke HW acceleration on this 
>>> ThinkPad E595 with Picasso APU.
>> Actually, it doesn't seem to break HW acceleration completely. GDM 
>> eventually comes up with HW acceleration, it takes a long time (~30s or so) 
>> to start up though.
>>
>> Later, the same messages as described in 
>> https://gitlab.freedesktop.org/drm/amd/-/issues/2659 appear.
>>
>> Reverting this commit fixes all of the above symptoms.
>>
>>
>> I reproduced all of the above symptoms with amd-staging-drm-next commit 
>> 75515acf4b60 ("i2c: nvidia-gpu: Add ACPI property to align with 
>> device-tree") as well.
>>
>>
>> For full disclosure, I use these kernel command line arguments:
>>
>>  fbcon=font:10x18 drm_kms_helper.drm_fbdev_overalloc=112 amdgpu.noretry=1 
>> amdgpu.mcbp=1
> 
> Thanks for the issue report and full disclosure, but I am not able to 
> reproduce this issue, with both drm-next branch and amd-staging-drm-next 
> branch tip on gitlab. The test system has same device id, running Ubuntu 
> 22.04, latest linux-firmware-20230625.tar.gz, and same BIOS version.

FWIW, your system has PCI revision ID 0xC2, while mine has 0xC1.

Also, I'm currently using linux-firmware 20230515. AFAICT there are no relevant 
changes in 20230625, but I'm attaching the contents of 
/sys/kernel/debug/dri/0/amdgpu_firmware_info just in case.


> I attached full dmesg log, could you help check if there is other difference, 
> maybe kernel config, gcc version... it is hard to guess what could cause the 
> basic driver gfx ring IB test timeout.

I suspect the IOMMU page faults logged in my dmesg might be relevant:

 amdgpu: Topology: Add APU node [0x15d8:0x1002]
 amdgpu :05:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x 
address=0x122201800 flags=0x0070]
 amdgpu :05:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x 
address=0x1125fe380 flags=0x0070]
 kfd kfd: amdgpu: added device 1002:15d8

There are no such page faults with the commit reverted.

Other than that and the IB test failure messages, our dmesg outputs are mostly 
identical indeed.


-- 
Earthling Michel Dänzer|  https://redhat.com
Libre software enthusiast  | Mesa and Xwayland developer
VCE feature version: 0, firmware version: 0x
UVD feature version: 0, firmware version: 0x
MC feature version: 0, firmware version: 0x
ME feature version: 53, firmware version: 0x00a6
PFP feature version: 53, firmware version: 0x00c2
CE feature version: 53, firmware version: 0x0050
RLC feature version: 1, firmware version: 0x006f
RLC SRLC feature version: 1, firmware version: 0x0001
RLC SRLG feature version: 1, firmware version: 0x0001
RLC SRLS feature version: 1, firmware version: 0x0001
RLCP feature version: 0, firmware version: 0x
RLCV feature version: 0, firmware version: 0x
MEC feature version: 53, firmware version: 0x01d3
MEC2 feature version: 53, firmware version: 0x01d3
IMU feature version: 0, firmware version: 0x
SOS feature version: 0, firmware version: 0x
ASD feature version: 0, firmware version: 0x2190
TA XGMI feature version: 0x, firmware version: 0x
TA RAS feature version: 0x, firmware version: 0x
TA HDCP feature version: 0x, firmware version: 0x172e
TA DTM feature version: 0x, firmware version: 0x1212
TA RAP feature version: 0x, firmware version: 0x
TA SECUREDISPLAY feature version: 0x, firmware version: 0x2705
SMC feature version: 0, program: 0, firmware version: 0x00041e2a (4.30.42)
SDMA0 feature version: 41, firmware version: 0x00a9
VCN feature version: 0, firmware version: 0x0210d004
DMCU feature version: 0, firmware version: 0x0001
DMCUB feature version: 0, firmware version: 0x
TOC feature version: 0, firmware version: 0x
MES_KIQ feature version: 0, firmware version: 0x
MES feature version: 0, firmware version: 0x
VBIOS version: 113-PICASSO-114

[PATCH] drm/amdgpu: Fix style issues in amdgpu_gem.c

Fixes the following to align to linux coding style:

WARNING: braces {} are not necessary for any arm of this statement
WARNING: Missing a blank line after declarations
ERROR: space prohibited before that close parenthesis ')'
WARNING: unnecessary whitespace before a quoted newline
WARNING: %LX is non-standard C, use %llX

Cc: Christian König 
Cc: Alex Deucher 
Signed-off-by: Srinivasan Shanmugam 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 38 -
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 4f9de9a0e2ec..1718d7d75eaf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -181,11 +181,10 @@ static int amdgpu_gem_object_open(struct drm_gem_object 
*obj,
return r;
 
bo_va = amdgpu_vm_bo_find(vm, abo);
-   if (!bo_va) {
+   if (!bo_va)
bo_va = amdgpu_vm_bo_add(adev, vm, abo);
-   } else {
+   else
++bo_va->ref_count;
-   }
amdgpu_bo_unreserve(abo);
return 0;
 }
@@ -217,8 +216,8 @@ static void amdgpu_gem_object_close(struct drm_gem_object 
*obj,
 
r = ttm_eu_reserve_buffers(, , false, );
if (r) {
-   dev_err(adev->dev, "leaking bo va because "
-   "we fail to reserve bo (%ld)\n", r);
+   dev_err(adev->dev, "leaking bo va because we fail to reserve bo 
(%ld)\n",
+   r);
return;
}
bo_va = amdgpu_vm_bo_find(vm, bo);
@@ -238,8 +237,8 @@ static void amdgpu_gem_object_close(struct drm_gem_object 
*obj,
 
 out_unlock:
if (unlikely(r < 0))
-   dev_err(adev->dev, "failed to clear page "
-   "tables on GEM object close (%ld)\n", r);
+   dev_err(adev->dev, "failed to clear page tables on GEM object 
close (%ld)\n",
+   r);
ttm_eu_backoff_reservation(, );
 }
 
@@ -463,9 +462,9 @@ int amdgpu_mode_dumb_mmap(struct drm_file *filp,
struct amdgpu_bo *robj;
 
gobj = drm_gem_object_lookup(filp, handle);
-   if (gobj == NULL) {
+   if (!gobj)
return -ENOENT;
-   }
+
robj = gem_to_amdgpu_bo(gobj);
if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm) ||
(robj->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
@@ -482,6 +481,7 @@ int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void 
*data,
 {
union drm_amdgpu_gem_mmap *args = data;
uint32_t handle = args->in.handle;
+
memset(args, 0, sizeof(*args));
return amdgpu_mode_dumb_mmap(filp, dev, handle, >out.addr_ptr);
 }
@@ -508,7 +508,7 @@ unsigned long amdgpu_gem_timeout(uint64_t timeout_ns)
 
timeout_jiffies = nsecs_to_jiffies(ktime_to_ns(timeout));
/*  clamp timeout to avoid unsigned-> signed overflow */
-   if (timeout_jiffies > MAX_SCHEDULE_TIMEOUT )
+   if (timeout_jiffies > MAX_SCHEDULE_TIMEOUT)
return MAX_SCHEDULE_TIMEOUT - 1;
 
return timeout_jiffies;
@@ -526,9 +526,9 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void 
*data,
long ret;
 
gobj = drm_gem_object_lookup(filp, handle);
-   if (gobj == NULL) {
+   if (!gobj)
return -ENOENT;
-   }
+
robj = gem_to_amdgpu_bo(gobj);
ret = dma_resv_wait_timeout(robj->tbo.base.resv, DMA_RESV_USAGE_READ,
true, timeout);
@@ -555,7 +555,7 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void 
*data,
struct amdgpu_bo *robj;
int r = -1;
 
-   DRM_DEBUG("%d \n", args->handle);
+   DRM_DEBUG("%d\n", args->handle);
gobj = drm_gem_object_lookup(filp, args->handle);
if (gobj == NULL)
return -ENOENT;
@@ -685,7 +685,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 
if (args->va_address < AMDGPU_VA_RESERVED_SIZE) {
dev_dbg(dev->dev,
-   "va_address 0x%LX is in reserved area 0x%LX\n",
+   "va_address 0x%llx is in reserved area 0x%llx\n",
args->va_address, AMDGPU_VA_RESERVED_SIZE);
return -EINVAL;
}
@@ -693,7 +693,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
if (args->va_address >= AMDGPU_GMC_HOLE_START &&
args->va_address < AMDGPU_GMC_HOLE_END) {
dev_dbg(dev->dev,
-   "va_address 0x%LX is in VA hole 0x%LX-0x%LX\n",
+   "va_address 0x%llx is in VA hole 0x%llx-0x%llx\n",
args->va_address, AMDGPU_GMC_HOLE_START,
AMDGPU_GMC_HOLE_END);
return -EINVAL;
@@ -813,9 +813,9 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
int r;
 
gobj = drm_gem_object_lookup(filp, args->handle);
-   if

[PATCH] drm/amd/pm: open brace '{' following struct go on the same line


ERROR: open brace '{' following struct go on the same line

Signed-off-by: Ran Sun 
---
 .../gpu/drm/amd/pm/inc/smu_v13_0_0_pptable.h  | 21 +++
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v13_0_0_pptable.h 
b/drivers/gpu/drm/amd/pm/inc/smu_v13_0_0_pptable.h

index 1dc7a065a6d4..251ed011b3b0 100644
--- a/drivers/gpu/drm/amd/pm/inc/smu_v13_0_0_pptable.h
+++ b/drivers/gpu/drm/amd/pm/inc/smu_v13_0_0_pptable.h
@@ -41,8 +41,7 @@
 #define SMU_13_0_0_PP_OVERDRIVE_VERSION 0x83// OverDrive 8 
Table Version 0.2
 #define SMU_13_0_0_PP_POWERSAVINGCLOCK_VERSION 0x01 // Power Saving 
Clock Table Version 1.00


-enum SMU_13_0_0_ODFEATURE_CAP
-{
+enum SMU_13_0_0_ODFEATURE_CAP {
 SMU_13_0_0_ODCAP_GFXCLK_LIMITS = 0,
 SMU_13_0_0_ODCAP_UCLK_LIMITS,
 SMU_13_0_0_ODCAP_POWER_LIMIT,
@@ -62,8 +61,7 @@ enum SMU_13_0_0_ODFEATURE_CAP
 SMU_13_0_0_ODCAP_COUNT,
 };

-enum SMU_13_0_0_ODFEATURE_ID
-{
+enum SMU_13_0_0_ODFEATURE_ID {
 SMU_13_0_0_ODFEATURE_GFXCLK_LIMITS   = 1 << 
SMU_13_0_0_ODCAP_GFXCLK_LIMITS,   //GFXCLK Limit feature
 SMU_13_0_0_ODFEATURE_UCLK_LIMITS = 1 << 
SMU_13_0_0_ODCAP_UCLK_LIMITS, //UCLK Limit feature
 SMU_13_0_0_ODFEATURE_POWER_LIMIT = 1 << 
SMU_13_0_0_ODCAP_POWER_LIMIT, //Power Limit feature

@@ -85,8 +83,7 @@ enum SMU_13_0_0_ODFEATURE_ID

 #define SMU_13_0_0_MAX_ODFEATURE 32 //Maximum Number of OD Features

-enum SMU_13_0_0_ODSETTING_ID
-{
+enum SMU_13_0_0_ODSETTING_ID {
 SMU_13_0_0_ODSETTING_GFXCLKFMAX = 0,
 SMU_13_0_0_ODSETTING_GFXCLKFMIN,
 SMU_13_0_0_ODSETTING_UCLKFMIN,
@@ -123,8 +120,7 @@ enum SMU_13_0_0_ODSETTING_ID
 };
 #define SMU_13_0_0_MAX_ODSETTING 64 //Maximum Number of ODSettings

-enum SMU_13_0_0_PWRMODE_SETTING
-{
+enum SMU_13_0_0_PWRMODE_SETTING {
 SMU_13_0_0_PMSETTING_POWER_LIMIT_QUIET = 0,
 SMU_13_0_0_PMSETTING_POWER_LIMIT_BALANCE,
 SMU_13_0_0_PMSETTING_POWER_LIMIT_TURBO,
@@ -144,8 +140,7 @@ enum SMU_13_0_0_PWRMODE_SETTING
 };
 #define SMU_13_0_0_MAX_PMSETTING 32 //Maximum Number of PowerMode 
Settings


-struct smu_13_0_0_overdrive_table
-{
+struct smu_13_0_0_overdrive_table {
 uint8_t revision; //Revision = 
SMU_13_0_0_PP_OVERDRIVE_VERSION
 uint8_t reserve[3];   //Zero filled field 
reserved for future use
 uint32_t feature_count;   //Total number of 
supported features

@@ -156,8 +151,7 @@ struct smu_13_0_0_overdrive_table
 int16_t pm_setting[SMU_13_0_0_MAX_PMSETTING]; //Optimized power 
mode feature settings

 };

-enum SMU_13_0_0_PPCLOCK_ID
-{
+enum SMU_13_0_0_PPCLOCK_ID {
 SMU_13_0_0_PPCLOCK_GFXCLK = 0,
 SMU_13_0_0_PPCLOCK_SOCCLK,
 SMU_13_0_0_PPCLOCK_UCLK,
@@ -175,8 +169,7 @@ enum SMU_13_0_0_PPCLOCK_ID
 };
 #define SMU_13_0_0_MAX_PPCLOCK 16 //Maximum Number of PP Clocks

-struct smu_13_0_0_powerplay_table
-{
+struct smu_13_0_0_powerplay_table {
 struct atom_common_table_header header; //For SMU13, 
header.format_revision = 15, header.content_revision = 0
 uint8_t table_revision; //For SMU13, table_revision 
= 2

 uint8_t padding;

[PATCH] drm/radeon: ERROR: "foo * bar" should be "foo *bar"


Fix two occurrences of the checkpatch.pl error:
ERROR: "foo * bar" should be "foo *bar"

Signed-off-by: Ran Sun 
---
 drivers/gpu/drm/radeon/atom.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/atom.c 
b/drivers/gpu/drm/radeon/atom.c

index 11a1940bb26d..93acb0e42bd6 100644
--- a/drivers/gpu/drm/radeon/atom.c
+++ b/drivers/gpu/drm/radeon/atom.c
@@ -68,8 +68,8 @@ typedef struct {
 } atom_exec_context;

 int atom_debug = 0;
-static int atom_execute_table_locked(struct atom_context *ctx, int 
index, uint32_t * params);
-int atom_execute_table(struct atom_context *ctx, int index, uint32_t * 
params);
+static int atom_execute_table_locked(struct atom_context *ctx, int 
index, uint32_t *params);
+int atom_execute_table(struct atom_context *ctx, int index, uint32_t 
*params);


 static uint32_t atom_arg_mask[8] = {
0x, 0x, 0x0000, 0x,

Re: radeon.ko/i586: BUG: kernel NULL pointer dereference, address:00000004

2023-07-21 Thread kkabe

rost...@goodmis.org sed in <20230717113623.41878...@gandalf.local.home>

>> On Fri, 14 Jul 2023 14:34:04 +0900
>>  wrote:
>> 
>> > Patch in
>> > https://bugzilla.kernel.org/show_bug.cgi?id=217669#c4
>> > fixed the problem in freedesktop.org kernel 5.18.0-rc2 .
>> > This may explain that in kernel.org tree, the said commit is in 
>> > kernel-5.19.
>> 
>> You mean the patch that adds:
>> 
>>  #if defined(FTRACE_MCOUNT_MAX_OFFSET) && (FTRACE_MCOUNT_MAX_OFFSET)
>> 
>> ?
>> 
>> Nothing should be setting FTRACE_MCOUNT_MAX_OFFSET to anything but non
>> zero. But doing a grep, I now see:
>> 
>> # define FTRACE_MCOUNT_MAX_OFFSET ENDBR_INSN_SIZE
>> 
>> Where it breaks that assumption if ENDBR_INSN_SIZE == 0 :-p
>>  (and that's my code!)
>> 
>> OK, does this fix it? (I haven't tested nor compiled this)
>> 
>> -- Steve
>> 
>> diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
>> index 897cf02c20b1..801f4414da3e 100644
>> --- a/arch/x86/include/asm/ftrace.h
>> +++ b/arch/x86/include/asm/ftrace.h
>> @@ -13,7 +13,7 @@
>>  #ifdef CONFIG_HAVE_FENTRY
>>  # include 
>>  /* Add offset for endbr64 if IBT enabled */
>> -# define FTRACE_MCOUNT_MAX_OFFSET   ENDBR_INSN_SIZE
>> +# define FTRACE_MCOUNT_MAX_OFFSET   (ENDBR_INSN_SIZE + MCOUNT_INSN_SIZE)
>>  #endif
>>  
>>  #ifdef CONFIG_DYNAMIC_FTRACE
>> 

Unfortunately this patch didn't fix freedesktop.org 5.18.0-rc2 tree.
(vblank->worker == NULL check fires otherwise a panic)

Applying this to kernel.org 6.4.3 results in totally different error as in 
https://bugzilla.kernel.org/show_bug.cgi?id=217669#c0
so there may be multiple regressions I'm chasing.

-- 
kabe

[PATCH] drm/radeon: ERROR: "foo * bar" should be "foo *bar"


Fix nine occurrences of the checkpatch.pl error:
ERROR: "foo * bar" should be "foo *bar"

Signed-off-by: Ran Sun 
---
 drivers/gpu/drm/radeon/atom.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/atom.c 
b/drivers/gpu/drm/radeon/atom.c

index 11a1940bb26d..93acb0e42bd6 100644
--- a/drivers/gpu/drm/radeon/atom.c
+++ b/drivers/gpu/drm/radeon/atom.c
@@ -68,8 +68,8 @@ typedef struct {
 } atom_exec_context;

 int atom_debug = 0;
-static int atom_execute_table_locked(struct atom_context *ctx, int 
index, uint32_t * params);
-int atom_execute_table(struct atom_context *ctx, int index, uint32_t * 
params);
+static int atom_execute_table_locked(struct atom_context *ctx, int 
index, uint32_t *params);
+int atom_execute_table(struct atom_context *ctx, int index, uint32_t 
*params);


 static uint32_t atom_arg_mask[8] = {
0x, 0x, 0x0000, 0x,

[PATCH] drm/amd/pm: open brace '{' following struct go on the same line


ERROR: open brace '{' following struct go on the same line

Signed-off-by: Ran Sun 
---
 drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h 
b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h

index ddc488251313..0cf564ea1ed8 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
@@ -429,10 +429,10 @@ int amdgpu_pm_load_smu_firmware(struct 
amdgpu_device *adev, uint32_t *smu_versio
 int amdgpu_dpm_handle_passthrough_sbr(struct amdgpu_device *adev, bool 
enable);
 int amdgpu_dpm_send_hbm_bad_pages_num(struct amdgpu_device *adev, 
uint32_t size);
 int amdgpu_dpm_send_hbm_bad_channel_flag(struct amdgpu_device *adev, 
uint32_t size);
-int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev,enum 
pp_clock_type type,

- uint32_t *min,uint32_t *max);
-int amdgpu_dpm_set_soft_freq_range(struct amdgpu_device *adev,enum 
pp_clock_type type,

-  uint32_t min,uint32_t max);
+int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev, enum 
pp_clock_type type,

+ uint32_t *min, uint32_t *max);
+int amdgpu_dpm_set_soft_freq_range(struct amdgpu_device *adev, enum 
pp_clock_type type,

+  uint32_t min, uint32_t max);
 int amdgpu_dpm_write_watermarks_table(struct amdgpu_device *adev);
 int amdgpu_dpm_wait_for_event(struct amdgpu_device *adev, enum 
smu_event_type event,

  uint64_t event_arg);

[PATCH] drm/amdgpu: open brace '{' following struct go on the same line


ERROR: open brace '{' following struct go on the same line

Signed-off-by: Ran Sun 
---
 drivers/gpu/drm/amd/pm/inc/amdgpu_pm.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_pm.h 
b/drivers/gpu/drm/amd/pm/inc/amdgpu_pm.h

index 52045ad59bed..eec816f0cbf9 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_pm.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_pm.h
@@ -24,8 +24,7 @@
 #ifndef __AMDGPU_PM_H__
 #define __AMDGPU_PM_H__

-struct cg_flag_name
-{
+struct cg_flag_name {
u64 flag;
const char *name;
 };

Re: [PATCH v5 05/11] drm/amdgpu: Use RMW accessors for changing LNKCTL

2023-07-21 Thread Ilpo Järvinen

On Thu, 20 Jul 2023, Bjorn Helgaas wrote:

> On Mon, Jul 17, 2023 at 03:04:57PM +0300, Ilpo Järvinen wrote:
> > Don't assume that only the driver would be accessing LNKCTL. ASPM
> > policy changes can trigger write to LNKCTL outside of driver's control.
> > And in the case of upstream bridge, the driver does not even own the
> > device it's changing the registers for.
> > 
> > Use RMW capability accessors which do proper locking to avoid losing
> > concurrent updates to the register value.
> > 
> > Fixes: a2e73f56fa62 ("drm/amdgpu: Add support for CIK parts")
> > Fixes: 62a37553414a ("drm/amdgpu: add si implementation v10")
> > Suggested-by: Lukas Wunner 
> > Signed-off-by: Ilpo Järvinen 
> > Cc: sta...@vger.kernel.org
> 
> Do we have any reports of problems that are fixed by this patch (or by
> others in the series)?  If not, I'm not sure it really fits the usual
> stable kernel criteria:
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/stable-kernel-rules.rst?id=v6.4

I was on the edge with this. The answer to your direct question is no, 
there are no such reports so it would be okay to leave stable out I think. 
This applies to all patches in this series.

Basically, this series came to be after Lukas noted the potential 
concurrency issues with how LNKCTL is unprotected when reviewing 
(internally) my bandwidth controller series. Then I went to look around 
all LNKCTL usage and realized existing things might alreary have similar 
issues.

Do you want me to send another version w/o cc stable or you'll take care 
of that?

> > ---
> >  drivers/gpu/drm/amd/amdgpu/cik.c | 36 +---
> >  drivers/gpu/drm/amd/amdgpu/si.c  | 36 +---
> >  2 files changed, 20 insertions(+), 52 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c 
> > b/drivers/gpu/drm/amd/amdgpu/cik.c
> > index 5641cf05d856..e63abdf52b6c 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/cik.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/cik.c
> > @@ -1574,17 +1574,8 @@ static void cik_pcie_gen3_enable(struct 
> > amdgpu_device *adev)
> > u16 bridge_cfg2, gpu_cfg2;
> > u32 max_lw, current_lw, tmp;
> >  
> > -   pcie_capability_read_word(root, PCI_EXP_LNKCTL,
> > - _cfg);
> > -   pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL,
> > - _cfg);
> > -
> > -   tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
> > -   pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
> > -
> > -   tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
> > -   pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL,
> > -  tmp16);
> > +   pcie_capability_set_word(root, PCI_EXP_LNKCTL, 
> > PCI_EXP_LNKCTL_HAWD);
> > +   pcie_capability_set_word(adev->pdev, PCI_EXP_LNKCTL, 
> > PCI_EXP_LNKCTL_HAWD);
> >  
> > tmp = RREG32_PCIE(ixPCIE_LC_STATUS1);
> > max_lw = (tmp & 
> > PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH_MASK) >>
> > @@ -1637,21 +1628,14 @@ static void cik_pcie_gen3_enable(struct 
> > amdgpu_device *adev)
> > msleep(100);
> >  
> > /* linkctl */
> > -   pcie_capability_read_word(root, PCI_EXP_LNKCTL,
> > - );
> > -   tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
> > -   tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
> > -   pcie_capability_write_word(root, PCI_EXP_LNKCTL,
> > -  tmp16);
> > -
> > -   pcie_capability_read_word(adev->pdev,
> > - PCI_EXP_LNKCTL,
> > - );
> > -   tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
> > -   tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
> > -   pcie_capability_write_word(adev->pdev,
> > -  PCI_EXP_LNKCTL,
> > -  tmp16);
> > +   pcie_capability_clear_and_set_word(root, 
> > PCI_EXP_LNKCTL,
> > +  
> > PCI_EXP_LNKCTL_HAWD,
> > +  bridge_cfg &
> > +  
> > PCI_EXP_LNKCTL_HAWD);
> > +   pcie_capability_clear_and_set_word(adev->pdev, 
> > PCI_EXP_LNKCTL,
> > +  
> > PCI_EXP_LNKCTL_HAWD,
> > +

[PATCH] drm/radeon: ERROR: "foo * bar" should be "foo *bar"


Fix two occurrences of the checkpatch.pl error:
ERROR: "foo * bar" should be "foo *bar"

Signed-off-by: Jianghui Xu 
---
 drivers/gpu/drm/radeon/atom.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/atom.c 
b/drivers/gpu/drm/radeon/atom.c

index 11a1940bb26d..93acb0e42bd6 100644
--- a/drivers/gpu/drm/radeon/atom.c
+++ b/drivers/gpu/drm/radeon/atom.c
@@ -68,8 +68,8 @@ typedef struct {
 } atom_exec_context;

 int atom_debug = 0;
-static int atom_execute_table_locked(struct atom_context *ctx, int 
index, uint32_t * params);
-int atom_execute_table(struct atom_context *ctx, int index, uint32_t * 
params);
+static int atom_execute_table_locked(struct atom_context *ctx, int 
index, uint32_t *params);
+int atom_execute_table(struct atom_context *ctx, int index, uint32_t 
*params);


 static uint32_t atom_arg_mask[8] = {
0x, 0x, 0x0000, 0x,

[PATCH] drm/amd: open brace '{' following struct go on the same line


Fix the checkpatch error as open brace '{' following struct should
go on the same line.

Signed-off-by: Ran Sun 
---
 drivers/gpu/drm/amd/include/yellow_carp_offset.h | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/include/yellow_carp_offset.h 
b/drivers/gpu/drm/amd/include/yellow_carp_offset.h

index 0fea6a746611..a2c8dca2425e 100644
--- a/drivers/gpu/drm/amd/include/yellow_carp_offset.h
+++ b/drivers/gpu/drm/amd/include/yellow_carp_offset.h
@@ -7,13 +7,11 @@
 #define MAX_SEGMENT 6


-struct IP_BASE_INSTANCE
-{
+struct IP_BASE_INSTANCE {
 unsigned int segment[MAX_SEGMENT];
 } __maybe_unused;

-struct IP_BASE
-{
+struct IP_BASE {
 struct IP_BASE_INSTANCE instance[MAX_INSTANCE];
 } __maybe_unused;

[PATCH] drm/amdgpu: Fix do not add new typedefs in amdgpu_fw_attestation.c

Fixes the following to align to coding style:

WARNING: do not add new typedefs
+typedef struct FW_ATT_DB_HEADER

WARNING: do not add new typedefs
+typedef struct FW_ATT_RECORD

WARNING: Symbolic permissions 'S_IRUSR' are not preferred. Consider using octal 
permissions '0400'.
+   S_IRUSR,

ERROR: "(foo*)" should be "(foo *)"
WARNING: please, no space before tabs

Cc: Christian König 
Cc: Alex Deucher 
Signed-off-by: Srinivasan Shanmugam 
---
 .../drm/amd/amdgpu/amdgpu_fw_attestation.c| 38 +--
 1 file changed, 18 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c
index 2ca3c329de6d..2d4b67175b55 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c
@@ -32,17 +32,15 @@
 #include "soc15_common.h"
 
 #define FW_ATTESTATION_DB_COOKIE0x143b6a37
-#define FW_ATTESTATION_RECORD_VALID1
+#define FW_ATTESTATION_RECORD_VALID1
 #define FW_ATTESTATION_MAX_SIZE4096
 
-typedef struct FW_ATT_DB_HEADER
-{
+struct FW_ATT_DB_HEADER {
uint32_t AttDbVersion;   /* version of the fwar feature */
uint32_t AttDbCookie;/* cookie as an extra check for 
corrupt data */
-} FW_ATT_DB_HEADER;
+};
 
-typedef struct FW_ATT_RECORD
-{
+struct FW_ATT_RECORD {
uint16_t AttFwIdV1;  /* Legacy FW Type field */
uint16_t AttFwIdV2;  /* V2 FW ID field */
uint32_t AttFWVersion;   /* FW Version */
@@ -50,7 +48,7 @@ typedef struct FW_ATT_RECORD
uint8_t  AttSource;  /* FW source indicator */
uint8_t  RecordValid;/* Indicates whether the record is a 
valid entry */
uint32_t AttFwTaId;  /* Ta ID (only in TA Attestation 
Table) */
-} FW_ATT_RECORD;
+};
 
 static ssize_t amdgpu_fw_attestation_debugfs_read(struct file *f,
  char __user *buf,
@@ -60,15 +58,15 @@ static ssize_t amdgpu_fw_attestation_debugfs_read(struct 
file *f,
struct amdgpu_device *adev = (struct amdgpu_device 
*)file_inode(f)->i_private;
uint64_t records_addr = 0;
uint64_t vram_pos = 0;
-   FW_ATT_DB_HEADER fw_att_hdr = {0};
-   FW_ATT_RECORD fw_att_record = {0};
+   struct FW_ATT_DB_HEADER fw_att_hdr = {0};
+   struct FW_ATT_RECORD fw_att_record = {0};
 
-   if (size < sizeof(FW_ATT_RECORD)) {
+   if (size < sizeof(struct FW_ATT_RECORD)) {
DRM_WARN("FW attestation input buffer not enough memory");
return -EINVAL;
}
 
-   if ((*pos + sizeof(FW_ATT_DB_HEADER)) >= FW_ATTESTATION_MAX_SIZE) {
+   if ((*pos + sizeof(struct FW_ATT_DB_HEADER)) >= 
FW_ATTESTATION_MAX_SIZE) {
DRM_WARN("FW attestation out of bounds");
return 0;
}
@@ -83,8 +81,8 @@ static ssize_t amdgpu_fw_attestation_debugfs_read(struct file 
*f,
if (*pos == 0) {
amdgpu_device_vram_access(adev,
  vram_pos,
- (uint32_t*)_att_hdr,
- sizeof(FW_ATT_DB_HEADER),
+ (uint32_t *)_att_hdr,
+ sizeof(struct FW_ATT_DB_HEADER),
  false);
 
if (fw_att_hdr.AttDbCookie != FW_ATTESTATION_DB_COOKIE) {
@@ -96,20 +94,20 @@ static ssize_t amdgpu_fw_attestation_debugfs_read(struct 
file *f,
}
 
amdgpu_device_vram_access(adev,
- vram_pos + sizeof(FW_ATT_DB_HEADER) + *pos,
- (uint32_t*)_att_record,
- sizeof(FW_ATT_RECORD),
+ vram_pos + sizeof(struct FW_ATT_DB_HEADER) + 
*pos,
+ (uint32_t *)_att_record,
+ sizeof(struct FW_ATT_RECORD),
  false);
 
if (fw_att_record.RecordValid != FW_ATTESTATION_RECORD_VALID)
return 0;
 
-   if (copy_to_user(buf, (void*)_att_record, sizeof(FW_ATT_RECORD)))
+   if (copy_to_user(buf, (void *)_att_record, sizeof(struct 
FW_ATT_RECORD)))
return -EINVAL;
 
-   *pos += sizeof(FW_ATT_RECORD);
+   *pos += sizeof(struct FW_ATT_RECORD);
 
-   return sizeof(FW_ATT_RECORD);
+   return sizeof(struct FW_ATT_RECORD);
 }
 
 static const struct file_operations amdgpu_fw_attestation_debugfs_ops = {
@@ -136,7 +134,7 @@ void amdgpu_fw_attestation_debugfs_init(struct 
amdgpu_device *adev)
return;
 
debugfs_create_file("amdgpu_fw_attestation",
-   S_IRUSR,
+   0400,
adev_to_drm(adev)->primary->debugfs_root,

[PATCH] drm/amdgpu: Prefer #if IS_ENABLED over #if defined in amdgpu_drv.c