[PATCH v5 17/45] drm/amd: Load GFX10 microcode during early_init

2023-01-04 Thread Mario Limonciello
Simplifies the code so that GFX10 will get the firmware
name from `amdgpu_ucode_ip_version_decode` and then use this filename
to load microcode as part of the early_init process.

Any failures will cause the driver to fail to probe before the firmware
framebuffer has been removed.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 84 ++
 1 file changed, 18 insertions(+), 66 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index d36dd823a319..585b301856d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -3968,9 +3968,9 @@ static void gfx_v10_0_check_gfxoff_flag(struct 
amdgpu_device *adev)
 
 static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
 {
-   const char *chip_name;
char fw_name[40];
-   char *wks = "";
+   char ucode_prefix[30];
+   const char *wks = "";
int err;
const struct rlc_firmware_header_v2_0 *rlc_hdr;
uint16_t version_major;
@@ -3978,71 +3978,31 @@ static int gfx_v10_0_init_microcode(struct 
amdgpu_device *adev)
 
DRM_DEBUG("\n");
 
-   switch (adev->ip_versions[GC_HWIP][0]) {
-   case IP_VERSION(10, 1, 10):
-   chip_name = "navi10";
-   break;
-   case IP_VERSION(10, 1, 1):
-   chip_name = "navi14";
-   if (!(adev->pdev->device == 0x7340 &&
- adev->pdev->revision != 0x00))
-   wks = "_wks";
-   break;
-   case IP_VERSION(10, 1, 2):
-   chip_name = "navi12";
-   break;
-   case IP_VERSION(10, 3, 0):
-   chip_name = "sienna_cichlid";
-   break;
-   case IP_VERSION(10, 3, 2):
-   chip_name = "navy_flounder";
-   break;
-   case IP_VERSION(10, 3, 1):
-   chip_name = "vangogh";
-   break;
-   case IP_VERSION(10, 3, 4):
-   chip_name = "dimgrey_cavefish";
-   break;
-   case IP_VERSION(10, 3, 5):
-   chip_name = "beige_goby";
-   break;
-   case IP_VERSION(10, 3, 3):
-   chip_name = "yellow_carp";
-   break;
-   case IP_VERSION(10, 3, 6):
-   chip_name = "gc_10_3_6";
-   break;
-   case IP_VERSION(10, 1, 3):
-   case IP_VERSION(10, 1, 4):
-   chip_name = "cyan_skillfish2";
-   break;
-   case IP_VERSION(10, 3, 7):
-   chip_name = "gc_10_3_7";
-   break;
-   default:
-   BUG();
-   }
+   if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 1) &&
+  (!(adev->pdev->device == 0x7340 && adev->pdev->revision != 0x00)))
+   wks = "_wks";
+   amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
 
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp%s.bin", chip_name, 
wks);
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp%s.bin", ucode_prefix, 
wks);
err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
 
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me%s.bin", chip_name, 
wks);
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me%s.bin", ucode_prefix, 
wks);
err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
 
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce%s.bin", chip_name, 
wks);
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce%s.bin", ucode_prefix, 
wks);
err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
 
if (!amdgpu_sriov_vf(adev)) {
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", 
chip_name);
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", 
ucode_prefix);
err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
/* don't check this.  There are apparently firmwares in the 
wild with
 * incorrect size in the header
@@ -4051,7 +4011,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device 
*adev)
goto out;
if (err)
dev_dbg(adev->dev,
-   "g

[PATCH v5 19/45] drm/amd: Load GFX11 microcode during early_init

2023-01-04 Thread Mario Limonciello
If GFX11 microcode is required but not available during early init, the
firmware framebuffer will have already been released and the screen will
freeze.

Move the request for GFX11 microcode into the early_init phase
so that if it's not available, driver init will fail.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 22 ++
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index ce018331b093..28efea3b9070 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -532,6 +532,9 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device 
*adev)
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
}
 
+   if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
+   err = gfx_v11_0_init_toc_microcode(adev, ucode_prefix);
+
/* only one MEC for gfx 11.0.0. */
adev->gfx.mec2_fw = NULL;
 
@@ -682,19 +685,11 @@ static void gfx_v11_0_mec_fini(struct amdgpu_device *adev)
amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
 }
 
-static int gfx_v11_0_me_init(struct amdgpu_device *adev)
+static void gfx_v11_0_me_init(struct amdgpu_device *adev)
 {
-   int r;
-
bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
 
amdgpu_gfx_graphics_queue_acquire(adev);
-
-   r = gfx_v11_0_init_microcode(adev);
-   if (r)
-   DRM_ERROR("Failed to load gfx firmware!\n");
-
-   return r;
 }
 
 static int gfx_v11_0_mec_init(struct amdgpu_device *adev)
@@ -1307,9 +1302,7 @@ static int gfx_v11_0_sw_init(void *handle)
}
}
 
-   r = gfx_v11_0_me_init(adev);
-   if (r)
-   return r;
+   gfx_v11_0_me_init(adev);
 
r = gfx_v11_0_rlc_init(adev);
if (r) {
@@ -1377,9 +1370,6 @@ static int gfx_v11_0_sw_init(void *handle)
 
/* allocate visible FB for rlc auto-loading fw */
if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
-   r = gfx_v11_0_init_toc_microcode(adev);
-   if (r)
-   dev_err(adev->dev, "Failed to load toc firmware!\n");
r = gfx_v11_0_rlc_autoload_buffer_init(adev);
if (r)
return r;
@@ -4648,7 +4638,7 @@ static int gfx_v11_0_early_init(void *handle)
 
gfx_v11_0_init_rlcg_reg_access_ctrl(adev);
 
-   return 0;
+   return gfx_v11_0_init_microcode(adev);
 }
 
 static int gfx_v11_0_ras_late_init(void *handle)
-- 
2.34.1



[PATCH v5 14/45] drm/amd: Use `amdgpu_ucode_*` helpers for GFX9

2023-01-04 Thread Mario Limonciello
The `amdgpu_ucode_request` helper will ensure that the return code for
missing firmware is -ENODEV so that early_init can fail.

The `amdgpu_ucode_release` helper will provide symmetry on unload.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 82 +++
 1 file changed, 21 insertions(+), 61 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index f202b45c413c..3ae46eab693c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1078,18 +1078,12 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring 
*ring, long timeout)
 
 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
 {
-   release_firmware(adev->gfx.pfp_fw);
-   adev->gfx.pfp_fw = NULL;
-   release_firmware(adev->gfx.me_fw);
-   adev->gfx.me_fw = NULL;
-   release_firmware(adev->gfx.ce_fw);
-   adev->gfx.ce_fw = NULL;
-   release_firmware(adev->gfx.rlc_fw);
-   adev->gfx.rlc_fw = NULL;
-   release_firmware(adev->gfx.mec_fw);
-   adev->gfx.mec_fw = NULL;
-   release_firmware(adev->gfx.mec2_fw);
-   adev->gfx.mec2_fw = NULL;
+   amdgpu_ucode_release(adev->gfx.pfp_fw);
+   amdgpu_ucode_release(adev->gfx.me_fw);
+   amdgpu_ucode_release(adev->gfx.ce_fw);
+   amdgpu_ucode_release(adev->gfx.rlc_fw);
+   amdgpu_ucode_release(adev->gfx.mec_fw);
+   amdgpu_ucode_release(adev->gfx.mec2_fw);
 
kfree(adev->gfx.rlc.register_list_format);
 }
@@ -1257,43 +1251,28 @@ static int gfx_v9_0_init_cp_gfx_microcode(struct 
amdgpu_device *adev,
int err;
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
-   err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
+   err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
-   err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.me_fw);
+   err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
-   err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.ce_fw);
+   err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
 
 out:
if (err) {
-   dev_err(adev->dev,
-   "gfx9: Failed to init firmware \"%s\"\n",
-   fw_name);
-   release_firmware(adev->gfx.pfp_fw);
-   adev->gfx.pfp_fw = NULL;
-   release_firmware(adev->gfx.me_fw);
-   adev->gfx.me_fw = NULL;
-   release_firmware(adev->gfx.ce_fw);
-   adev->gfx.ce_fw = NULL;
+   amdgpu_ucode_release(adev->gfx.pfp_fw);
+   amdgpu_ucode_release(adev->gfx.me_fw);
+   amdgpu_ucode_release(adev->gfx.ce_fw);
}
return err;
 }
@@ -1328,10 +1307,7 @@ static int gfx_v9_0_init_rlc_microcode(struct 
amdgpu_device *adev,
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", 
chip_name);
else
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", 
chip_name);
-   err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
+   err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
if (err)
goto out;
rlc_hdr = (const struct rlc_firmware_header_v2_0 
*)adev->gfx.rlc_fw->data;
@@ -1340,13 +1316,9 @@ static int gfx_v9_0_init_rlc_microcode(struct 
amdgpu_device *adev,
version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
 out:
-   if (err) {
-   dev_err(adev->dev,
-   "gfx9: Failed to init firmware \"%s\"\n",
-   fw_name);
-   release_firmware(adev->gfx.rlc_fw);
-   adev->gfx.rlc_fw = NULL;
-   

[PATCH v5 24/45] drm/amd/display: Load DMUB microcode during early_init

2023-01-04 Thread Mario Limonciello
If DMUB is required for an ASIC, ensure that the microcode is available
and validates during early_init.

Any failures will cause the driver to fail to probe before the firmware
framebuffer has been removed.

Reviewed-by: Harry Wentland 
Signed-off-by: Mario Limonciello 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 89 ---
 1 file changed, 58 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 4829b5431e4c..c8c5d37c8b3a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1945,7 +1945,6 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
struct dmub_srv_fb_info *fb_info;
struct dmub_srv *dmub_srv;
const struct dmcub_firmware_header_v1_0 *hdr;
-   const char *fw_name_dmub;
enum dmub_asic dmub_asic;
enum dmub_status status;
int r;
@@ -1953,73 +1952,46 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
switch (adev->ip_versions[DCE_HWIP][0]) {
case IP_VERSION(2, 1, 0):
dmub_asic = DMUB_ASIC_DCN21;
-   fw_name_dmub = FIRMWARE_RENOIR_DMUB;
-   if (ASICREV_IS_GREEN_SARDINE(adev->external_rev_id))
-   fw_name_dmub = FIRMWARE_GREEN_SARDINE_DMUB;
break;
case IP_VERSION(3, 0, 0):
-   if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0)) {
+   if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0))
dmub_asic = DMUB_ASIC_DCN30;
-   fw_name_dmub = FIRMWARE_SIENNA_CICHLID_DMUB;
-   } else {
+   else
dmub_asic = DMUB_ASIC_DCN30;
-   fw_name_dmub = FIRMWARE_NAVY_FLOUNDER_DMUB;
-   }
break;
case IP_VERSION(3, 0, 1):
dmub_asic = DMUB_ASIC_DCN301;
-   fw_name_dmub = FIRMWARE_VANGOGH_DMUB;
break;
case IP_VERSION(3, 0, 2):
dmub_asic = DMUB_ASIC_DCN302;
-   fw_name_dmub = FIRMWARE_DIMGREY_CAVEFISH_DMUB;
break;
case IP_VERSION(3, 0, 3):
dmub_asic = DMUB_ASIC_DCN303;
-   fw_name_dmub = FIRMWARE_BEIGE_GOBY_DMUB;
break;
case IP_VERSION(3, 1, 2):
case IP_VERSION(3, 1, 3):
dmub_asic = (adev->external_rev_id == YELLOW_CARP_B0) ? 
DMUB_ASIC_DCN31B : DMUB_ASIC_DCN31;
-   fw_name_dmub = FIRMWARE_YELLOW_CARP_DMUB;
break;
case IP_VERSION(3, 1, 4):
dmub_asic = DMUB_ASIC_DCN314;
-   fw_name_dmub = FIRMWARE_DCN_314_DMUB;
break;
case IP_VERSION(3, 1, 5):
dmub_asic = DMUB_ASIC_DCN315;
-   fw_name_dmub = FIRMWARE_DCN_315_DMUB;
break;
case IP_VERSION(3, 1, 6):
dmub_asic = DMUB_ASIC_DCN316;
-   fw_name_dmub = FIRMWARE_DCN316_DMUB;
break;
case IP_VERSION(3, 2, 0):
dmub_asic = DMUB_ASIC_DCN32;
-   fw_name_dmub = FIRMWARE_DCN_V3_2_0_DMCUB;
break;
case IP_VERSION(3, 2, 1):
dmub_asic = DMUB_ASIC_DCN321;
-   fw_name_dmub = FIRMWARE_DCN_V3_2_1_DMCUB;
break;
default:
/* ASIC doesn't support DMUB. */
return 0;
}
 
-   r = request_firmware_direct(&adev->dm.dmub_fw, fw_name_dmub, adev->dev);
-   if (r) {
-   DRM_ERROR("DMUB firmware loading failed: %d\n", r);
-   return 0;
-   }
-
-   r = amdgpu_ucode_validate(adev->dm.dmub_fw);
-   if (r) {
-   DRM_ERROR("Couldn't validate DMUB firmware: %d\n", r);
-   return 0;
-   }
-
hdr = (const struct dmcub_firmware_header_v1_0 *)adev->dm.dmub_fw->data;
adev->dm.dmcub_fw_version = le32_to_cpu(hdr->header.ucode_version);
 
@@ -4513,6 +4485,61 @@ DEVICE_ATTR_WO(s3_debug);
 
 #endif
 
+static int dm_init_microcode(struct amdgpu_device *adev)
+{
+   char *fw_name_dmub;
+   int r;
+
+   switch (adev->ip_versions[DCE_HWIP][0]) {
+   case IP_VERSION(2, 1, 0):
+   fw_name_dmub = FIRMWARE_RENOIR_DMUB;
+   if (ASICREV_IS_GREEN_SARDINE(adev->external_rev_id))
+   fw_name_dmub = FIRMWARE_GREEN_SARDINE_DMUB;
+   break;
+   case IP_VERSION(3, 0, 0):
+   if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0))
+   fw_name_dmub = FIRMWARE_SIENNA_CICHLID_DMUB;
+   else
+   fw_name_dmub = FIRMWARE_NAVY_FLOUNDER_DMUB;
+   break;
+   case IP_VERSION(3, 0, 1):
+   fw_name

[PATCH v5 20/45] drm/amd: Parse both v1 and v2 TA microcode headers using same function

2023-01-04 Thread Mario Limonciello
Several IP versions duplicate code and can't use the common helpers.
Move this code into a single function so that the helpers can be used.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 120 ++--
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c  |   2 +-
 drivers/gpu/drm/amd/amdgpu/psp_v10_0.c  |  60 +---
 drivers/gpu/drm/amd/amdgpu/psp_v11_0.c  |  74 ++-
 drivers/gpu/drm/amd/amdgpu/psp_v12_0.c  |  62 +---
 5 files changed, 107 insertions(+), 211 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 7a2fc920739b..d971e3785eaf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -3272,41 +3272,75 @@ static int parse_ta_bin_descriptor(struct psp_context 
*psp,
return 0;
 }
 
-int psp_init_ta_microcode(struct psp_context *psp,
- const char *chip_name)
+static int parse_ta_v1_microcode(struct psp_context *psp)
 {
+   const struct ta_firmware_header_v1_0 *ta_hdr;
struct amdgpu_device *adev = psp->adev;
-   char fw_name[PSP_FW_NAME_LEN];
-   const struct ta_firmware_header_v2_0 *ta_hdr;
-   int err = 0;
-   int ta_index = 0;
 
-   if (!chip_name) {
-   dev_err(adev->dev, "invalid chip name for ta microcode\n");
+   ta_hdr = (const struct ta_firmware_header_v1_0 *)
+adev->psp.ta_fw->data;
+
+   if (le16_to_cpu(ta_hdr->header.header_version_major) != 1)
return -EINVAL;
+
+   adev->psp.xgmi_context.context.bin_desc.fw_version =
+   le32_to_cpu(ta_hdr->xgmi.fw_version);
+   adev->psp.xgmi_context.context.bin_desc.size_bytes =
+   le32_to_cpu(ta_hdr->xgmi.size_bytes);
+   adev->psp.xgmi_context.context.bin_desc.start_addr =
+   (uint8_t *)ta_hdr +
+   le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
+   adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version);
+   adev->psp.ras_context.context.bin_desc.fw_version =
+   le32_to_cpu(ta_hdr->ras.fw_version);
+   adev->psp.ras_context.context.bin_desc.size_bytes =
+   le32_to_cpu(ta_hdr->ras.size_bytes);
+   adev->psp.ras_context.context.bin_desc.start_addr =
+   (uint8_t *)adev->psp.xgmi_context.context.bin_desc.start_addr +
+   le32_to_cpu(ta_hdr->ras.offset_bytes);
+   adev->psp.hdcp_context.context.bin_desc.fw_version =
+   le32_to_cpu(ta_hdr->hdcp.fw_version);
+   adev->psp.hdcp_context.context.bin_desc.size_bytes =
+   le32_to_cpu(ta_hdr->hdcp.size_bytes);
+   adev->psp.hdcp_context.context.bin_desc.start_addr =
+   (uint8_t *)ta_hdr +
+   le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
+   adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version);
+   adev->psp.dtm_context.context.bin_desc.fw_version =
+   le32_to_cpu(ta_hdr->dtm.fw_version);
+   adev->psp.dtm_context.context.bin_desc.size_bytes =
+   le32_to_cpu(ta_hdr->dtm.size_bytes);
+   adev->psp.dtm_context.context.bin_desc.start_addr =
+   (uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr +
+   le32_to_cpu(ta_hdr->dtm.offset_bytes);
+   if (adev->apu_flags & AMD_APU_IS_RENOIR) {
+   adev->psp.securedisplay_context.context.bin_desc.fw_version =
+   le32_to_cpu(ta_hdr->securedisplay.fw_version);
+   adev->psp.securedisplay_context.context.bin_desc.size_bytes =
+   le32_to_cpu(ta_hdr->securedisplay.size_bytes);
+   adev->psp.securedisplay_context.context.bin_desc.start_addr =
+   (uint8_t 
*)adev->psp.hdcp_context.context.bin_desc.start_addr +
+   le32_to_cpu(ta_hdr->securedisplay.offset_bytes);
}
 
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
-   err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
+   return 0;
+}
 
-   err = amdgpu_ucode_validate(adev->psp.ta_fw);
-   if (err)
-   goto out;
+static int parse_ta_v2_microcode(struct psp_context *psp)
+{
+   const struct ta_firmware_header_v2_0 *ta_hdr;
+   struct amdgpu_device *adev = psp->adev;
+   int err = 0;
+   int ta_index = 0;
 
ta_hdr = (const struct ta_firmware_header_v2_0 *)adev->psp.ta_fw->data;
 
-   if (le16_to_cpu(ta_hdr->header.header_version_major) != 2) {
-   dev_err(adev->dev, "unsupported TA header version\n");
-   err = -EINVAL;
-   goto out;
-   }
+  

[PATCH v5 18/45] drm/amd: Use `amdgpu_ucode_*` helpers for GFX11

2023-01-04 Thread Mario Limonciello
The `amdgpu_ucode_request` helper will ensure that the return code for
missing firmware is -ENODEV so that early_init can fail.

The `amdgpu_ucode_release` helper will provide symmetery on unload.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 102 +
 drivers/gpu/drm/amd/amdgpu/imu_v11_0.c |   7 +-
 2 files changed, 37 insertions(+), 72 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index a56c6e106d00..ce018331b093 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -431,18 +431,37 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring 
*ring, long timeout)
 
 static void gfx_v11_0_free_microcode(struct amdgpu_device *adev)
 {
-   release_firmware(adev->gfx.pfp_fw);
-   adev->gfx.pfp_fw = NULL;
-   release_firmware(adev->gfx.me_fw);
-   adev->gfx.me_fw = NULL;
-   release_firmware(adev->gfx.rlc_fw);
-   adev->gfx.rlc_fw = NULL;
-   release_firmware(adev->gfx.mec_fw);
-   adev->gfx.mec_fw = NULL;
+   amdgpu_ucode_release(adev->gfx.pfp_fw);
+   amdgpu_ucode_release(adev->gfx.me_fw);
+   amdgpu_ucode_release(adev->gfx.rlc_fw);
+   amdgpu_ucode_release(adev->gfx.mec_fw);
 
kfree(adev->gfx.rlc.register_list_format);
 }
 
+static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, char 
*ucode_prefix)
+{
+   const struct psp_firmware_header_v1_0 *toc_hdr;
+   int err = 0;
+   char fw_name[40];
+
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", ucode_prefix);
+   err = amdgpu_ucode_load(adev, &adev->psp.toc_fw, fw_name);
+   if (err)
+   goto out;
+
+   toc_hdr = (const struct psp_firmware_header_v1_0 
*)adev->psp.toc_fw->data;
+   adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
+   adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
+   adev->psp.toc.size_bytes = 
le32_to_cpu(toc_hdr->header.ucode_size_bytes);
+   adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
+   
le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
+   return 0;
+out:
+   amdgpu_ucode_release(adev->psp.toc_fw);
+   return err;
+}
+
 static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
 {
char fw_name[40];
@@ -457,10 +476,7 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device 
*adev)
amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", ucode_prefix);
-   err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
+   err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
if (err)
goto out;
/* check pfp fw hdr version to decide if enable rs64 for gfx11.*/
@@ -477,10 +493,7 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device 
*adev)
}
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", ucode_prefix);
-   err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.me_fw);
+   err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
if (err)
goto out;
if (adev->gfx.rs64_enable) {
@@ -493,10 +506,7 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device 
*adev)
 
if (!amdgpu_sriov_vf(adev)) {
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", 
ucode_prefix);
-   err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
+   err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
if (err)
goto out;
rlc_hdr = (const struct rlc_firmware_header_v2_0 
*)adev->gfx.rlc_fw->data;
@@ -508,10 +518,7 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device 
*adev)
}
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", ucode_prefix);
-   err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.mec_fw);
+   err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
if (err)
goto out;
if (adev->gfx.rs64_enable) {
@@ -530,54 +537,15 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device 
*adev)
 
 out:
if (err) {
- 

[PATCH v5 21/45] drm/amd: Avoid BUG() for case of SRIOV missing IP version

2023-01-04 Thread Mario Limonciello
No need to crash the kernel.  AMDGPU will now fail to probe.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index d971e3785eaf..a4a62753bfd6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -380,7 +380,7 @@ static int psp_init_sriov_microcode(struct psp_context *psp)
adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MES1_DATA;
break;
default:
-   BUG();
+   ret = -EINVAL;
break;
}
return ret;
-- 
2.34.1



[PATCH v5 15/45] drm/amd: Load GFX9 microcode during early_init

2023-01-04 Thread Mario Limonciello
If GFX9 microcode is required but not available during early init, the
firmware framebuffer will have already been released and the screen will
freeze.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 58 +--
 1 file changed, 9 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 3ae46eab693c..f0c948f11213 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1245,7 +1245,7 @@ static void gfx_v9_0_check_if_need_gfxoff(struct 
amdgpu_device *adev)
 }
 
 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
- const char *chip_name)
+ char *chip_name)
 {
char fw_name[30];
int err;
@@ -1278,7 +1278,7 @@ static int gfx_v9_0_init_cp_gfx_microcode(struct 
amdgpu_device *adev,
 }
 
 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
- const char *chip_name)
+  char *chip_name)
 {
char fw_name[30];
int err;
@@ -1333,7 +1333,7 @@ static bool gfx_v9_0_load_mec2_fw_bin_support(struct 
amdgpu_device *adev)
 }
 
 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
- const char *chip_name)
+ char *chip_name)
 {
char fw_name[30];
int err;
@@ -1379,58 +1379,24 @@ static int gfx_v9_0_init_cp_compute_microcode(struct 
amdgpu_device *adev,
 
 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
 {
-   const char *chip_name;
+   char ucode_prefix[30];
int r;
 
DRM_DEBUG("\n");
-
-   switch (adev->ip_versions[GC_HWIP][0]) {
-   case IP_VERSION(9, 0, 1):
-   chip_name = "vega10";
-   break;
-   case IP_VERSION(9, 2, 1):
-   chip_name = "vega12";
-   break;
-   case IP_VERSION(9, 4, 0):
-   chip_name = "vega20";
-   break;
-   case IP_VERSION(9, 2, 2):
-   case IP_VERSION(9, 1, 0):
-   if (adev->apu_flags & AMD_APU_IS_RAVEN2)
-   chip_name = "raven2";
-   else if (adev->apu_flags & AMD_APU_IS_PICASSO)
-   chip_name = "picasso";
-   else
-   chip_name = "raven";
-   break;
-   case IP_VERSION(9, 4, 1):
-   chip_name = "arcturus";
-   break;
-   case IP_VERSION(9, 3, 0):
-   if (adev->apu_flags & AMD_APU_IS_RENOIR)
-   chip_name = "renoir";
-   else
-   chip_name = "green_sardine";
-   break;
-   case IP_VERSION(9, 4, 2):
-   chip_name = "aldebaran";
-   break;
-   default:
-   BUG();
-   }
+   amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
 
/* No CPG in Arcturus */
if (adev->gfx.num_gfx_rings) {
-   r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
+   r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix);
if (r)
return r;
}
 
-   r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
+   r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix);
if (r)
return r;
 
-   r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
+   r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix);
if (r)
return r;
 
@@ -2118,12 +2084,6 @@ static int gfx_v9_0_sw_init(void *handle)
 
adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
 
-   r = gfx_v9_0_init_microcode(adev);
-   if (r) {
-   DRM_ERROR("Failed to load gfx firmware!\n");
-   return r;
-   }
-
if (adev->gfx.rlc.funcs) {
if (adev->gfx.rlc.funcs->init) {
r = adev->gfx.rlc.funcs->init(adev);
@@ -4565,7 +4525,7 @@ static int gfx_v9_0_early_init(void *handle)
/* init rlcg reg access ctrl */
gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
 
-   return 0;
+   return gfx_v9_0_init_microcode(adev);
 }
 
 static int gfx_v9_0_ecc_late_init(void *handle)
-- 
2.34.1



[PATCH v5 13/45] drm/amd: Remove superfluous assignment for `adev->mes.adev`

2023-01-04 Thread Mario Limonciello
`amdgpu_mes_init` already sets `adev->mes.adev`, so there is no need
to also set it in the IP specific versions.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/mes_v10_1.c | 1 -
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 1 -
 2 files changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
index a2c96a86538c..76151dddf0a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
@@ -924,7 +924,6 @@ static int mes_v10_1_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int pipe, r;
 
-   adev->mes.adev = adev;
adev->mes.funcs = &mes_v10_1_funcs;
adev->mes.kiq_hw_init = &mes_v10_1_kiq_hw_init;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 4dab62335d0d..824af819f447 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -1013,7 +1013,6 @@ static int mes_v11_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int pipe, r;
 
-   adev->mes.adev = adev;
adev->mes.funcs = &mes_v11_0_funcs;
adev->mes.kiq_hw_init = &mes_v11_0_kiq_hw_init;
adev->mes.kiq_hw_fini = &mes_v11_0_kiq_hw_fini;
-- 
2.34.1



[PATCH v5 12/45] drm/amd: Use `amdgpu_ucode_*` helpers for MES

2023-01-04 Thread Mario Limonciello
The `amdgpu_ucode_request` helper will ensure that the return code for
missing firmware is -ENODEV so that early_init can fail.

The `amdgpu_ucode_release` helper provides symmetry for releasing firmware.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 10 ++
 drivers/gpu/drm/amd/amdgpu/mes_v10_1.c  | 10 +-
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c  | 10 +-
 3 files changed, 4 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index dd8f35234507..73ec471a8c2f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -1438,11 +1438,7 @@ int amdgpu_mes_init_microcode(struct amdgpu_device 
*adev, int pipe)
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin",
ucode_prefix,
pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1");
-   r = request_firmware(&adev->mes.fw[pipe], fw_name, adev->dev);
-   if (r)
-   goto out;
-
-   r = amdgpu_ucode_validate(adev->mes.fw[pipe]);
+   r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], fw_name);
if (r)
goto out;
 
@@ -1482,9 +1478,7 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, 
int pipe)
}
 
return 0;
-
 out:
-   release_firmware(adev->mes.fw[pipe]);
-   adev->mes.fw[pipe] = NULL;
+   amdgpu_ucode_release(adev->mes.fw[pipe]);
return r;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
index 9c5ff8b7c202..a2c96a86538c 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
@@ -379,13 +379,6 @@ static const struct amdgpu_mes_funcs mes_v10_1_funcs = {
.resume_gang = mes_v10_1_resume_gang,
 };
 
-static void mes_v10_1_free_microcode(struct amdgpu_device *adev,
-enum admgpu_mes_pipe pipe)
-{
-   release_firmware(adev->mes.fw[pipe]);
-   adev->mes.fw[pipe] = NULL;
-}
-
 static int mes_v10_1_allocate_ucode_buffer(struct amdgpu_device *adev,
   enum admgpu_mes_pipe pipe)
 {
@@ -979,8 +972,7 @@ static int mes_v10_1_sw_fini(void *handle)
amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[pipe],
  &adev->mes.eop_gpu_addr[pipe],
  NULL);
-
-   mes_v10_1_free_microcode(adev, pipe);
+   amdgpu_ucode_release(adev->mes.fw[pipe]);
}
 
amdgpu_bo_free_kernel(&adev->gfx.kiq.ring.mqd_obj,
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 3af77a32baac..4dab62335d0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -459,13 +459,6 @@ static const struct amdgpu_mes_funcs mes_v11_0_funcs = {
.misc_op = mes_v11_0_misc_op,
 };
 
-static void mes_v11_0_free_microcode(struct amdgpu_device *adev,
-enum admgpu_mes_pipe pipe)
-{
-   release_firmware(adev->mes.fw[pipe]);
-   adev->mes.fw[pipe] = NULL;
-}
-
 static int mes_v11_0_allocate_ucode_buffer(struct amdgpu_device *adev,
   enum admgpu_mes_pipe pipe)
 {
@@ -1069,8 +1062,7 @@ static int mes_v11_0_sw_fini(void *handle)
amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[pipe],
  &adev->mes.eop_gpu_addr[pipe],
  NULL);
-
-   mes_v11_0_free_microcode(adev, pipe);
+   amdgpu_ucode_release(adev->mes.fw[pipe]);
}
 
amdgpu_bo_free_kernel(&adev->gfx.kiq.ring.mqd_obj,
-- 
2.34.1



[PATCH v5 10/45] drm/amd: Load VCN microcode during early_init

2023-01-04 Thread Mario Limonciello
Simplifies the code so that all VCN versions will get the firmware
name from `amdgpu_ucode_ip_version_decode` and then use this filename
to load microcode as part of the early_init process.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 91 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h |  1 +
 drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c   |  5 +-
 drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c   |  5 +-
 drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c   |  5 +-
 drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c   |  5 +-
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c   |  5 +-
 7 files changed, 50 insertions(+), 67 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index b5692f825589..55bbe4c8ff5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -36,25 +36,25 @@
 #include "soc15d.h"
 
 /* Firmware Names */
-#define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin"
-#define FIRMWARE_PICASSO   "amdgpu/picasso_vcn.bin"
-#define FIRMWARE_RAVEN2"amdgpu/raven2_vcn.bin"
-#define FIRMWARE_ARCTURUS  "amdgpu/arcturus_vcn.bin"
-#define FIRMWARE_RENOIR"amdgpu/renoir_vcn.bin"
-#define FIRMWARE_GREEN_SARDINE "amdgpu/green_sardine_vcn.bin"
-#define FIRMWARE_NAVI10"amdgpu/navi10_vcn.bin"
-#define FIRMWARE_NAVI14"amdgpu/navi14_vcn.bin"
-#define FIRMWARE_NAVI12"amdgpu/navi12_vcn.bin"
-#define FIRMWARE_SIENNA_CICHLID"amdgpu/sienna_cichlid_vcn.bin"
-#define FIRMWARE_NAVY_FLOUNDER "amdgpu/navy_flounder_vcn.bin"
-#define FIRMWARE_VANGOGH   "amdgpu/vangogh_vcn.bin"
-#define FIRMWARE_DIMGREY_CAVEFISH  "amdgpu/dimgrey_cavefish_vcn.bin"
-#define FIRMWARE_ALDEBARAN "amdgpu/aldebaran_vcn.bin"
-#define FIRMWARE_BEIGE_GOBY"amdgpu/beige_goby_vcn.bin"
-#define FIRMWARE_YELLOW_CARP   "amdgpu/yellow_carp_vcn.bin"
-#define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2.bin"
-#define FIRMWARE_VCN4_0_0  "amdgpu/vcn_4_0_0.bin"
-#define FIRMWARE_VCN4_0_2  "amdgpu/vcn_4_0_2.bin"
+#define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin"
+#define FIRMWARE_PICASSO   "amdgpu/picasso_vcn.bin"
+#define FIRMWARE_RAVEN2"amdgpu/raven2_vcn.bin"
+#define FIRMWARE_ARCTURUS  "amdgpu/arcturus_vcn.bin"
+#define FIRMWARE_RENOIR"amdgpu/renoir_vcn.bin"
+#define FIRMWARE_GREEN_SARDINE "amdgpu/green_sardine_vcn.bin"
+#define FIRMWARE_NAVI10"amdgpu/navi10_vcn.bin"
+#define FIRMWARE_NAVI14"amdgpu/navi14_vcn.bin"
+#define FIRMWARE_NAVI12"amdgpu/navi12_vcn.bin"
+#define FIRMWARE_SIENNA_CICHLID"amdgpu/sienna_cichlid_vcn.bin"
+#define FIRMWARE_NAVY_FLOUNDER "amdgpu/navy_flounder_vcn.bin"
+#define FIRMWARE_VANGOGH   "amdgpu/vangogh_vcn.bin"
+#define FIRMWARE_DIMGREY_CAVEFISH  "amdgpu/dimgrey_cavefish_vcn.bin"
+#define FIRMWARE_ALDEBARAN "amdgpu/aldebaran_vcn.bin"
+#define FIRMWARE_BEIGE_GOBY"amdgpu/beige_goby_vcn.bin"
+#define FIRMWARE_YELLOW_CARP   "amdgpu/yellow_carp_vcn.bin"
+#define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2.bin"
+#define FIRMWARE_VCN4_0_0  "amdgpu/vcn_4_0_0.bin"
+#define FIRMWARE_VCN4_0_2  "amdgpu/vcn_4_0_2.bin"
 #define FIRMWARE_VCN4_0_4  "amdgpu/vcn_4_0_4.bin"
 
 MODULE_FIRMWARE(FIRMWARE_RAVEN);
@@ -80,10 +80,24 @@ MODULE_FIRMWARE(FIRMWARE_VCN4_0_4);
 
 static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
 
+int amdgpu_vcn_early_init(struct amdgpu_device *adev)
+{
+   char ucode_prefix[30];
+   char fw_name[40];
+   int r;
+
+   amdgpu_ucode_ip_version_decode(adev, UVD_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix);
+   r = amdgpu_ucode_request(adev, &adev->vcn.fw, fw_name);
+   if (r)
+   amdgpu_ucode_release(adev->vcn.fw);
+
+   return r;
+}
+
 int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
 {
unsigned long bo_size;
-   const char *fw_name;
const struct common_firmware_header *hdr;
unsigned char fw_check;
unsigned int fw_shared_size, log_offset;
@@ -99,46 +113,27 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
switch (adev->ip_versions[UVD_HWIP][0]) {
case IP_VERSION(1, 0, 0):
case IP_VERSION(1, 0, 1):
-   if (adev->apu_flags & AMD_APU_IS_RAVEN2)
-   fw_name = FIRMWARE_RAVEN2;
-   else if (adev->apu_flags & AMD_APU_IS_PICASSO)

[PATCH v5 09/45] drm/amd: Use `amdgpu_ucode_*` helpers for VCN

2023-01-04 Thread Mario Limonciello
The `amdgpu_ucode_request` helper will ensure that the return code for
missing firmware is -ENODEV so that early_init can fail.

The `amdgpu_ucode_release` helper is for symmetry.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 16 +++-
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index a23e26b272b4..b5692f825589 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -206,19 +206,9 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
return -EINVAL;
}
 
-   r = request_firmware(&adev->vcn.fw, fw_name, adev->dev);
+   r = amdgpu_ucode_request(adev, &adev->vcn.fw, fw_name);
if (r) {
-   dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n",
-   fw_name);
-   return r;
-   }
-
-   r = amdgpu_ucode_validate(adev->vcn.fw);
-   if (r) {
-   dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware 
\"%s\"\n",
-   fw_name);
-   release_firmware(adev->vcn.fw);
-   adev->vcn.fw = NULL;
+   amdgpu_ucode_release(adev->vcn.fw);
return r;
}
 
@@ -333,7 +323,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]);
}
 
-   release_firmware(adev->vcn.fw);
+   amdgpu_ucode_release(adev->vcn.fw);
mutex_destroy(&adev->vcn.vcn1_jpeg1_workaround);
mutex_destroy(&adev->vcn.vcn_pg_lock);
 
-- 
2.34.1



[PATCH v5 07/45] drm/amd: Convert SDMA to use `amdgpu_ucode_ip_version_decode`

2023-01-04 Thread Mario Limonciello
Simplifies the code so that all SDMA versions will get the firmware
name from `amdgpu_ucode_ip_version_decode`.

Signed-off-by: Mario Limonciello 
---
v4->v5:
 * Use instance number to build names like before
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 12 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h |  4 +-
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   | 47 +---
 drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c   | 30 +
 drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c   | 55 +---
 drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c   | 25 +--
 6 files changed, 17 insertions(+), 156 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 1668360bc699..f8e4a52ab67a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -200,15 +200,21 @@ void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device 
*adev,
 }
 
 int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
-  char *fw_name, u32 instance,
-  bool duplicate)
+  u32 instance, bool duplicate)
 {
struct amdgpu_firmware_info *info = NULL;
const struct common_firmware_header *header = NULL;
int err = 0, i;
const struct sdma_firmware_header_v2_0 *sdma_hdr;
uint16_t version_major;
-
+   char ucode_prefix[30];
+   char fw_name[40];
+
+   amdgpu_ucode_ip_version_decode(adev, SDMA0_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
+   if (instance == 0)
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", 
ucode_prefix);
+   else
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma%d.bin", 
ucode_prefix, i);
err = amdgpu_ucode_request(adev, &adev->sdma.instance[instance].fw, 
fw_name);
if (err)
goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 7d99205c2e01..2d16e6d36728 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -124,8 +124,8 @@ int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device 
*adev,
 int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev,
  struct amdgpu_irq_src *source,
  struct amdgpu_iv_entry *entry);
-int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
-char *fw_name, u32 instance, bool duplicate);
+int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, u32 instance,
+  bool duplicate);
 void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev,
 bool duplicate);
 void amdgpu_sdma_unset_buffer_funcs_helper(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 4d780e4430e7..017ae298558e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -575,60 +575,17 @@ static void sdma_v4_0_setup_ulv(struct amdgpu_device 
*adev)
 // vega10 real chip need to use PSP to load firmware
 static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
 {
-   const char *chip_name;
-   char fw_name[30];
int ret, i;
 
-   DRM_DEBUG("\n");
-
-   switch (adev->ip_versions[SDMA0_HWIP][0]) {
-   case IP_VERSION(4, 0, 0):
-   chip_name = "vega10";
-   break;
-   case IP_VERSION(4, 0, 1):
-   chip_name = "vega12";
-   break;
-   case IP_VERSION(4, 2, 0):
-   chip_name = "vega20";
-   break;
-   case IP_VERSION(4, 1, 0):
-   case IP_VERSION(4, 1, 1):
-   if (adev->apu_flags & AMD_APU_IS_RAVEN2)
-   chip_name = "raven2";
-   else if (adev->apu_flags & AMD_APU_IS_PICASSO)
-   chip_name = "picasso";
-   else
-   chip_name = "raven";
-   break;
-   case IP_VERSION(4, 2, 2):
-   chip_name = "arcturus";
-   break;
-   case IP_VERSION(4, 1, 2):
-   if (adev->apu_flags & AMD_APU_IS_RENOIR)
-   chip_name = "renoir";
-   else
-   chip_name = "green_sardine";
-   break;
-   case IP_VERSION(4, 4, 0):
-   chip_name = "aldebaran";
-   break;
-   default:
-   BUG();
-   }
-
for (i = 0; i < adev->sdma.num_instances; i++) {
-   if (i == 0)
-   snprintf(fw_name, sizeof(fw_name), 
"amdgpu/%s_sdma.bin", chip_name);
-   else
-   snprintf(fw_name, sizeof(f

[PATCH v5 06/45] drm/amd: Use `amdgpu_ucode_request` helper for SDMA

2023-01-04 Thread Mario Limonciello
The `amdgpu_ucode_request` helper will ensure that the return code for
missing firmware is -ENODEV so that early_init can fail.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 9 ++---
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index ea5278f094c0..1668360bc699 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -154,16 +154,11 @@ int amdgpu_sdma_process_ecc_irq(struct amdgpu_device 
*adev,
 
 static int amdgpu_sdma_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst)
 {
-   int err = 0;
uint16_t version_major;
const struct common_firmware_header *header = NULL;
const struct sdma_firmware_header_v1_0 *hdr;
const struct sdma_firmware_header_v2_0 *hdr_v2;
 
-   err = amdgpu_ucode_validate(sdma_inst->fw);
-   if (err)
-   return err;
-
header = (const struct common_firmware_header *)
sdma_inst->fw->data;
version_major = le16_to_cpu(header->header_version_major);
@@ -195,7 +190,7 @@ void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device 
*adev,
int i;
 
for (i = 0; i < adev->sdma.num_instances; i++) {
-   release_firmware(adev->sdma.instance[i].fw);
+   amdgpu_ucode_release(adev->sdma.instance[i].fw);
if (duplicate)
break;
}
@@ -214,7 +209,7 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
const struct sdma_firmware_header_v2_0 *sdma_hdr;
uint16_t version_major;
 
-   err = request_firmware(&adev->sdma.instance[instance].fw, fw_name, 
adev->dev);
+   err = amdgpu_ucode_request(adev, &adev->sdma.instance[instance].fw, 
fw_name);
if (err)
goto out;
 
-- 
2.34.1



[PATCH v5 08/45] drm/amd: Make SDMA firmware load failures less noisy.

2023-01-04 Thread Mario Limonciello
When firmware is missing we get failures at every step.
```
[3.855086] amdgpu :04:00.0: Direct firmware load for 
amdgpu/green_sardine_sdma.bin failed with error -2
[3.855087] [drm:amdgpu_sdma_init_microcode [amdgpu]] *ERROR* SDMA: Failed 
to init firmware "amdgpu/green_sardine_sdma.bin"
[3.855398] [drm:sdma_v4_0_early_init [amdgpu]] *ERROR* Failed to load sdma 
firmware!
```
Realistically we don't need all of these, a user can tell from the first one
that request_firmware emitted what happened. Drop the others.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index f8e4a52ab67a..35b79eeb5f09 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -280,10 +280,8 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
}
 
 out:
-   if (err) {
-   DRM_ERROR("SDMA: Failed to init firmware \"%s\"\n", fw_name);
+   if (err)
amdgpu_sdma_destroy_inst_ctx(adev, duplicate);
-   }
return err;
 }
 
-- 
2.34.1



[PATCH v5 04/45] drm/amd: Convert SMUv13 microcode to use `amdgpu_ucode_ip_version_decode`

2023-01-04 Thread Mario Limonciello
The special case for the one dGPU has been moved into
`amdgpu_ucode_ip_version_decode`, so simplify this code.

Reviewed-by: Alex Deucher 
Signed-off-by: Mario Limonciello 
Acked-by: Christian König 
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 12 ++--
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index 0ac9cac805f9..506a49a4b425 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -88,7 +88,6 @@ static const int link_speed[] = {25, 50, 80, 160};
 int smu_v13_0_init_microcode(struct smu_context *smu)
 {
struct amdgpu_device *adev = smu->adev;
-   const char *chip_name;
char fw_name[30];
char ucode_prefix[30];
int err = 0;
@@ -100,16 +99,9 @@ int smu_v13_0_init_microcode(struct smu_context *smu)
if (amdgpu_sriov_vf(adev))
return 0;
 
-   switch (adev->ip_versions[MP1_HWIP][0]) {
-   case IP_VERSION(13, 0, 2):
-   chip_name = "aldebaran_smc";
-   break;
-   default:
-   amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
-   chip_name = ucode_prefix;
-   }
+   amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
 
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", chip_name);
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix);
 
err = request_firmware(&adev->pm.fw, fw_name, adev->dev);
if (err)
-- 
2.34.1



[PATCH v5 03/45] drm/amd: Convert SMUv11 microcode to use `amdgpu_ucode_ip_version_decode`

2023-01-04 Thread Mario Limonciello
Remove the special casing from SMU v11 code. No intended functional
changes.

Signed-off-by: Mario Limonciello 
Acked-by: Christian König 
---
 .../gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c| 35 ++-
 1 file changed, 3 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
index ad66d57aa102..d4756bd30830 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
@@ -93,7 +93,7 @@ static void smu_v11_0_poll_baco_exit(struct smu_context *smu)
 int smu_v11_0_init_microcode(struct smu_context *smu)
 {
struct amdgpu_device *adev = smu->adev;
-   const char *chip_name;
+   char ucode_prefix[30];
char fw_name[SMU_FW_NAME_LEN];
int err = 0;
const struct smc_firmware_header_v1_0 *hdr;
@@ -105,38 +105,9 @@ int smu_v11_0_init_microcode(struct smu_context *smu)
 (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7
return 0;
 
-   switch (adev->ip_versions[MP1_HWIP][0]) {
-   case IP_VERSION(11, 0, 0):
-   chip_name = "navi10";
-   break;
-   case IP_VERSION(11, 0, 5):
-   chip_name = "navi14";
-   break;
-   case IP_VERSION(11, 0, 9):
-   chip_name = "navi12";
-   break;
-   case IP_VERSION(11, 0, 7):
-   chip_name = "sienna_cichlid";
-   break;
-   case IP_VERSION(11, 0, 11):
-   chip_name = "navy_flounder";
-   break;
-   case IP_VERSION(11, 0, 12):
-   chip_name = "dimgrey_cavefish";
-   break;
-   case IP_VERSION(11, 0, 13):
-   chip_name = "beige_goby";
-   break;
-   case IP_VERSION(11, 0, 2):
-   chip_name = "arcturus";
-   break;
-   default:
-   dev_err(adev->dev, "Unsupported IP version 0x%x\n",
-   adev->ip_versions[MP1_HWIP][0]);
-   return -EINVAL;
-   }
+   amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
 
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_smc.bin", chip_name);
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix);
 
err = request_firmware(&adev->pm.fw, fw_name, adev->dev);
if (err)
-- 
2.34.1



[PATCH v5 01/45] drm/amd: Delay removal of the firmware framebuffer

2023-01-04 Thread Mario Limonciello
Removing the firmware framebuffer from the driver means that even
if the driver doesn't support the IP blocks in a GPU it will no
longer be functional after the driver fails to initialize.

This change will ensure that unsupported IP blocks at least cause
the driver to work with the EFI framebuffer.

Cc: sta...@vger.kernel.org
Suggested-by: Alex Deucher 
Signed-off-by: Mario Limonciello 
---
v4->v5:
 * no changes
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c| 6 --
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 9a1a5c2864a0..cdb681398a99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -37,6 +37,7 @@
 #include 
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -89,6 +90,8 @@ MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
 #define AMDGPU_MAX_RETRY_LIMIT 2
 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) 
== -EINVAL)
 
+static const struct drm_driver amdgpu_kms_driver;
+
 const char *amdgpu_asic_name[] = {
"TAHITI",
"PITCAIRN",
@@ -3685,6 +3688,11 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (r)
return r;
 
+   /* Get rid of things like offb */
+   r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, 
&amdgpu_kms_driver);
+   if (r)
+   return r;
+
/* Enable TMZ based on IP_VERSION */
amdgpu_gmc_tmz_set(adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index db7e34eacc35..b9f14ec9edb2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -23,7 +23,6 @@
  */
 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -2096,11 +2095,6 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
}
 #endif
 
-   /* Get rid of things like offb */
-   ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, 
&amdgpu_kms_driver);
-   if (ret)
-   return ret;
-
adev = devm_drm_dev_alloc(&pdev->dev, &amdgpu_kms_driver, 
typeof(*adev), ddev);
if (IS_ERR(adev))
return PTR_ERR(adev);
-- 
2.34.1



[PATCH v5 05/45] drm/amd: Add a new helper for loading/validating microcode

2023-01-04 Thread Mario Limonciello
All microcode runs a basic validation after it's been loaded. Each
IP block as part of init will run both.

Introduce a wrapper for request_firmware and amdgpu_ucode_validate.
This wrapper will also remap any error codes from request_firmware
to -ENODEV.  This is so that early_init will fail if firmware couldn't
be loaded instead of the IP block being disabled.

Signed-off-by: Mario Limonciello 
---
v4->v5:
 * Rename symbols for amdgpu_ucode_request/amdgpu_ucode_release
 * Make argument const
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 36 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h |  3 ++
 2 files changed, 39 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index eafcddce58d3..dc6af1fffdd9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -1312,3 +1312,39 @@ void amdgpu_ucode_ip_version_decode(struct amdgpu_device 
*adev, int block_type,
 
snprintf(ucode_prefix, len, "%s_%d_%d_%d", ip_name, maj, min, rev);
 }
+
+/*
+ * amdgpu_ucode_request - Fetch and validate amdgpu microcode
+ *
+ * @adev: amdgpu device
+ * @fw: pointer to load firmware to
+ * @fw_name: firmware to load
+ *
+ * This is a helper that will use request_firmware and amdgpu_ucode_validate
+ * to load and run basic validation on firmware. If the load fails, remap
+ * the error code to -ENODEV, so that early_init functions will fail to load.
+ */
+int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware 
**fw,
+const char *fw_name)
+{
+   int err = request_firmware(fw, fw_name, adev->dev);
+
+   if (err)
+   return -ENODEV;
+   err = amdgpu_ucode_validate(*fw);
+   if (err)
+   dev_dbg(adev->dev, "\"%s\" failed to validate\n", fw_name);
+
+   return err;
+}
+
+/*
+ * amdgpu_ucode_release - Release firmware microcode
+ *
+ * @fw: pointer to firmware to release
+ */
+void amdgpu_ucode_release(const struct firmware *fw)
+{
+   release_firmware(fw);
+   fw = NULL;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 552e06929229..7fd2f04f7f98 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -544,6 +544,9 @@ void amdgpu_ucode_print_sdma_hdr(const struct 
common_firmware_header *hdr);
 void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr);
 void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr);
 int amdgpu_ucode_validate(const struct firmware *fw);
+int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware 
**fw,
+const char *fw_name);
+void amdgpu_ucode_release(const struct firmware *fw);
 bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
uint16_t hdr_major, uint16_t hdr_minor);
 
-- 
2.34.1



[PATCH v5 02/45] drm/amd: Add a legacy mapping to "amdgpu_ucode_ip_version_decode"

2023-01-04 Thread Mario Limonciello
This will allow other parts of the driver that currently special
case firmware file names to before IP version style naming to just
have a single call to `amdgpu_ucode_ip_version_decode`.

Signed-off-by: Mario Limonciello 
Acked-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 221 ++
 1 file changed, 221 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 5cb62e6249c2..eafcddce58d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -1059,12 +1059,233 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
return 0;
 }
 
+static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int 
block_type)
+{
+   if (block_type == MP0_HWIP) {
+   switch (adev->ip_versions[MP0_HWIP][0]) {
+   case IP_VERSION(9, 0, 0):
+   switch (adev->asic_type) {
+   case CHIP_VEGA10:
+   return "vega10";
+   case CHIP_VEGA12:
+   return "vega12";
+   default:
+   return NULL;
+   }
+   break;
+   case IP_VERSION(10, 0, 0):
+   case IP_VERSION(10, 0, 1):
+   if (adev->asic_type == CHIP_RAVEN) {
+   if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+   return "raven2";
+   else if (adev->apu_flags & AMD_APU_IS_PICASSO)
+   return "picasso";
+   return "raven";
+   }
+   break;
+   case IP_VERSION(11, 0, 0):
+   return "navi10";
+   case IP_VERSION(11, 0, 2):
+   return "vega20";
+   case IP_VERSION(11, 0, 4):
+   return "arcturus";
+   case IP_VERSION(11, 0, 5):
+   return "navi14";
+   case IP_VERSION(11, 0, 7):
+   return "sienna_cichlid";
+   case IP_VERSION(11, 0, 9):
+   return "navi12";
+   case IP_VERSION(11, 0, 11):
+   return "navy_flounder";
+   case IP_VERSION(11, 0, 12):
+   return "dimgrey_cavefish";
+   case IP_VERSION(11, 0, 13):
+   return "beige_goby";
+   case IP_VERSION(11, 5, 0):
+   return "vangogh";
+   case IP_VERSION(12, 0, 1):
+   if (adev->asic_type == CHIP_RENOIR) {
+   if (adev->apu_flags & AMD_APU_IS_RENOIR)
+   return "renoir";
+   return "green_sardine";
+   }
+   break;
+   case IP_VERSION(13, 0, 2):
+   return "aldebaran";
+   case IP_VERSION(13, 0, 1):
+   case IP_VERSION(13, 0, 3):
+   return "yellow_carp";
+   }
+   } else if (block_type == MP1_HWIP) {
+   switch (adev->ip_versions[MP1_HWIP][0]) {
+   case IP_VERSION(9, 0, 0):
+   case IP_VERSION(10, 0, 0):
+   case IP_VERSION(10, 0, 1):
+   case IP_VERSION(11, 0, 2):
+   if (adev->asic_type == CHIP_ARCTURUS)
+   return "arcturus_smc";
+   return NULL;
+   case IP_VERSION(11, 0, 0):
+   return "navi10_smc";
+   case IP_VERSION(11, 0, 5):
+   return "navi14_smc";
+   case IP_VERSION(11, 0, 9):
+   return "navi12_smc";
+   case IP_VERSION(11, 0, 7):
+   return "sienna_cichlid_smc";
+   case IP_VERSION(11, 0, 11):
+   return "navy_flounder_smc";
+   case IP_VERSION(11, 0, 12):
+   return "dimgrey_cavefish_smc";
+   case IP_VERSION(11, 0, 13):
+   return "beige_goby_smc";
+   case IP_VERSION(13, 0, 2):
+   return "aldebaran_smc";
+   }
+   } else if (block_type == SDMA0_HWIP) {
+   switch (adev->ip_versions[SDMA0_HWIP][0]) {
+   case IP_VERSION(4, 0, 0):
+   return "vega10_sdma";
+  

[PATCH v5 00/45] Recover from failure to probe GPU

2023-01-04 Thread Mario Limonciello
One of the first thing that KMS drivers do during initialization is
destroy the system firmware framebuffer by means of
`drm_aperture_remove_conflicting_pci_framebuffers`

This means that if for any reason the GPU failed to probe the user
will be stuck with at best a screen frozen at the last thing that
was shown before the KMS driver continued it's probe.

The problem is most pronounced when new GPU support is introduced
because users will need to have a recent linux-firmware snapshot
on their system when they boot a kernel with matching support.

However the problem is further exaggerated in the case of amdgpu because
it has migrated to "IP discovery" where amdgpu will attempt to load
on "ALL" AMD GPUs even if the driver is missing support for IP blocks
contained in that GPU.

IP discovery requires some probing and isn't run until after the
framebuffer has been destroyed.

This means a situation can occur where a user purchases a new GPU not
yet supported by a distribution and when booting the installer it will
"freeze" even if the distribution doesn't have the matching kernel support
for those IP blocks.

The perfect example of this is Ubuntu 22.10 and the new dGPUs just
launched by AMD.  The installation media ships with kernel 5.19 (which
has IP discovery) but the amdgpu support for those IP blocks landed in
kernel 6.0. The matching linux-firmware was released after 22.10's launch.
The screen will freeze without nomodeset. Even if a user manages to install
and then upgrades to kernel 6.0 after install they'll still have the
problem of missing firmware, and the same experience.

This is quite jarring for users, particularly if they don't know
that they have to use "nomodeset" to install.

To help the situation make changes to GPU discovery:
1) Delay releasing the firmware framebuffer until after early_init
completed.  This will help the situation of an older kernel that doesn't
yet support the IP blocks probing a new GPU. IP discovery will have failed.
2) Request loading all PSP, VCN, SDMA, SMU, DMCUB, MES and GC microcode
into memory during early_init. This will help the situation of new enough
kernel for the IP discovery phase to otherwise pass but missing microcode
from linux-firmware.git.

v4->v5:
 * Rename amdgpu_ucode_load to amdgpu_ucode_request
 * Add and utilize amdgpu_ucode_release throughout existing patches
 * Update all amdgpu code to stop using request_firmware and
   release_firmware for microcode
 * Drop export of amdgpu_ucode_validate outside of amdgpu_ucode.c
 * Pick up relevant tags for some patches
v3->v4:
 * Rework to delay framebuffer release until early_init is done
 * Make IP load microcode during early init phase
 * Add SMU and DMCUB checks for early_init loading
 * Add some new helper code for wrapping request_firmware calls (needed for
   early_init to return something besides -ENOENT)
v2->v3:
 * Pick up tags for patches 1-10
 * Rework patch 11 to not validate during discovery
 * Fix bugs with GFX9 due to gfx.num_gfx_rings not being set during
   discovery
 * Fix naming scheme for SDMA on dGPUs
v1->v2:
 * Take the suggestion from v1 thread to delay the framebuffer release
   until ip discovery is done. This patch is CC to stable to that older
   stable kernels with IP discovery won't try to probe unknown IP.
 * Drop changes to drm aperature.
 * Fetch SDMA, VCN, MES, GC and PSP microcode during IP discovery.

Mario Limonciello (27):
  drm/amd: Delay removal of the firmware framebuffer
  drm/amd: Add a legacy mapping to "amdgpu_ucode_ip_version_decode"
  drm/amd: Convert SMUv11 microcode to use
`amdgpu_ucode_ip_version_decode`
  drm/amd: Convert SMUv13 microcode to use
`amdgpu_ucode_ip_version_decode`
  drm/amd: Add a new helper for loading/validating microcode
  drm/amd: Use `amdgpu_ucode_request` helper for SDMA
  drm/amd: Convert SDMA to use `amdgpu_ucode_ip_version_decode`
  drm/amd: Make SDMA firmware load failures less noisy.
  drm/amd: Use `amdgpu_ucode_*` helpers for VCN
  drm/amd: Load VCN microcode during early_init
  drm/amd: Load MES microcode during early_init
  drm/amd: Use `amdgpu_ucode_*` helpers for MES
  drm/amd: Remove superfluous assignment for `adev->mes.adev`
  drm/amd: Use `amdgpu_ucode_*` helpers for GFX9
  drm/amd: Load GFX9 microcode during early_init
  drm/amd: Use `amdgpu_ucode_*` helpers for GFX10
  drm/amd: Load GFX10 microcode during early_init
  drm/amd: Use `amdgpu_ucode_*` helpers for GFX11
  drm/amd: Load GFX11 microcode during early_init
  drm/amd: Parse both v1 and v2 TA microcode headers using same function
  drm/amd: Avoid BUG() for case of SRIOV missing IP version
  drm/amd: Load PSP microcode during early_init
  drm/amd: Use `amdgpu_ucode_*` helpers for PSP
  drm/amd/display: Load DMUB microcode during early_init
  drm/amd: Use `amdgpu_ucode_release` helper for DMUB
  drm/amd: Use `amdgpu_ucode_*` helpers for SMU
  drm/amd

[PATCH v4 25/27] drm/amd: Use `amdgpu_ucode_load` helper for SMU

2023-01-03 Thread Mario Limonciello
The `amdgpu_ucode_load` helper will ensure that the return code for
missing firmware is -ENODEV so that early_init can fail.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * New patch
---
 drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c | 5 +
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 5 +
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
index d4756bd30830..1d693cda5818 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
@@ -109,10 +109,7 @@ int smu_v11_0_init_microcode(struct smu_context *smu)
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix);
 
-   err = request_firmware(&adev->pm.fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->pm.fw);
+   err = amdgpu_ucode_load(adev, &adev->pm.fw, fw_name);
if (err)
goto out;
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index 506a49a4b425..845a7fc83ba8 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -103,10 +103,7 @@ int smu_v13_0_init_microcode(struct smu_context *smu)
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix);
 
-   err = request_firmware(&adev->pm.fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->pm.fw);
+   err = amdgpu_ucode_load(adev, &adev->pm.fw, fw_name);
if (err)
goto out;
 
-- 
2.34.1



[PATCH v4 27/27] drm/amd: Optimize SRIOV switch/case for PSP microcode load

2023-01-03 Thread Mario Limonciello
Now that IP version decoding is used, a number of case statements
can be combined.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * New patch

 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 8 +---
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index f45362dd8228..83e253b5d928 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -132,14 +132,8 @@ static int psp_init_sriov_microcode(struct psp_context 
*psp)
 
switch (adev->ip_versions[MP0_HWIP][0]) {
case IP_VERSION(9, 0, 0):
-   adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
-   ret = psp_init_cap_microcode(psp, ucode_prefix);
-   break;
-   case IP_VERSION(11, 0, 9):
-   adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
-   ret = psp_init_cap_microcode(psp, ucode_prefix);
-   break;
case IP_VERSION(11, 0, 7):
+   case IP_VERSION(11, 0, 9):
adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
ret = psp_init_cap_microcode(psp, ucode_prefix);
break;
-- 
2.34.1



[PATCH v4 26/27] drm/amd: Load SMU microcode during early_init

2023-01-03 Thread Mario Limonciello
This will ensure that the microcode is available before the firmware
framebuffer has been destroyed.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * new patch
---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 12 +---
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 2fa79f892a92..ec52830dde24 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -623,6 +623,7 @@ static int smu_early_init(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct smu_context *smu;
+   int r;
 
smu = kzalloc(sizeof(struct smu_context), GFP_KERNEL);
if (!smu)
@@ -640,7 +641,10 @@ static int smu_early_init(void *handle)
adev->powerplay.pp_handle = smu;
adev->powerplay.pp_funcs = &swsmu_pm_funcs;
 
-   return smu_set_funcs(adev);
+   r = smu_set_funcs(adev);
+   if (r)
+   return r;
+   return smu_init_microcode(smu);
 }
 
 static int smu_set_default_dpm_table(struct smu_context *smu)
@@ -1067,12 +1071,6 @@ static int smu_sw_init(void *handle)
smu->smu_dpm.dpm_level = AMD_DPM_FORCED_LEVEL_AUTO;
smu->smu_dpm.requested_dpm_level = AMD_DPM_FORCED_LEVEL_AUTO;
 
-   ret = smu_init_microcode(smu);
-   if (ret) {
-   dev_err(adev->dev, "Failed to load smu firmware!\n");
-   return ret;
-   }
-
ret = smu_smc_table_sw_init(smu);
if (ret) {
dev_err(adev->dev, "Failed to sw init smc table!\n");
-- 
2.34.1



[PATCH v4 23/27] drm/amd: Use `amdgpu_ucode_load` helper for PSP

2023-01-03 Thread Mario Limonciello
The `amdgpu_ucode_load` helper will ensure that the return code for
missing firmware is -ENODEV so that early_init can fail.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * New patch
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 42 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h |  3 +-
 2 files changed, 11 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 3b0644600a1f..f45362dd8228 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -2912,11 +2912,7 @@ int psp_init_asd_microcode(struct psp_context *psp, char 
*ucode_prefix)
int err = 0;
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_asd.bin", ucode_prefix);
-   err = request_firmware(&adev->psp.asd_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-
-   err = amdgpu_ucode_validate(adev->psp.asd_fw);
+   err = amdgpu_ucode_load(adev, &adev->psp.asd_fw, fw_name);
if (err)
goto out;
 
@@ -2928,7 +2924,6 @@ int psp_init_asd_microcode(struct psp_context *psp, char 
*ucode_prefix)

le32_to_cpu(asd_hdr->header.ucode_array_offset_bytes);
return 0;
 out:
-   dev_err(adev->dev, "fail to initialize asd microcode\n");
release_firmware(adev->psp.asd_fw);
adev->psp.asd_fw = NULL;
return err;
@@ -2942,11 +2937,7 @@ int psp_init_toc_microcode(struct psp_context *psp, char 
*ucode_prefix)
int err = 0;
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", ucode_prefix);
-   err = request_firmware(&adev->psp.toc_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-
-   err = amdgpu_ucode_validate(adev->psp.toc_fw);
+   err = amdgpu_ucode_load(adev, &adev->psp.toc_fw, fw_name);
if (err)
goto out;
 
@@ -2958,7 +2949,6 @@ int psp_init_toc_microcode(struct psp_context *psp, char 
*ucode_prefix)

le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
return 0;
 out:
-   dev_err(adev->dev, "fail to request/validate toc microcode\n");
release_firmware(adev->psp.toc_fw);
adev->psp.toc_fw = NULL;
return err;
@@ -3105,11 +3095,7 @@ int psp_init_sos_microcode(struct psp_context *psp, char 
*ucode_prefix)
int fw_index = 0;
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sos.bin", ucode_prefix);
-   err = request_firmware(&adev->psp.sos_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-
-   err = amdgpu_ucode_validate(adev->psp.sos_fw);
+   err = amdgpu_ucode_load(adev, &adev->psp.sos_fw, fw_name);
if (err)
goto out;
 
@@ -3181,8 +3167,6 @@ int psp_init_sos_microcode(struct psp_context *psp, char 
*ucode_prefix)
 
return 0;
 out:
-   dev_err(adev->dev,
-   "failed to init sos firmware\n");
release_firmware(adev->psp.sos_fw);
adev->psp.sos_fw = NULL;
 
@@ -3340,10 +3324,7 @@ int psp_init_ta_microcode(struct psp_context *psp, char 
*ucode_prefix)
int err;
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", ucode_prefix);
-   err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
-   if (err)
-   return err;
-   err = amdgpu_ucode_validate(adev->psp.ta_fw);
+   err = amdgpu_ucode_load(adev, &adev->psp.ta_fw, fw_name);
if (err)
return err;
 
@@ -3383,17 +3364,14 @@ int psp_init_cap_microcode(struct psp_context *psp, 
char *ucode_prefix)
}
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_cap.bin", ucode_prefix);
-   err = request_firmware(&adev->psp.cap_fw, fw_name, adev->dev);
-   if (err) {
-   dev_warn(adev->dev, "cap microcode does not exist, skip\n");
-   err = 0;
-   goto out;
-   }
-
-   err = amdgpu_ucode_validate(adev->psp.cap_fw);
+   err = amdgpu_ucode_load(adev, &adev->psp.cap_fw, fw_name);
if (err) {
+   if (err == -ENODEV) {
+   dev_warn(adev->dev, "cap microcode does not exist, 
skip\n");
+   err = 0;
+   goto out;
+   }
dev_err(adev->dev, "fail to initialize cap microcode\n");
-   goto out;
}
 
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CAP];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 47b88233bf94..415d32306b9a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -506,8 +506,7 @@ int psp_in

[PATCH v4 24/27] drm/amd/display: Load DMUB microcode during early_init

2023-01-03 Thread Mario Limonciello
If DMUB is required for an ASIC, ensure that the microcode is available
and validates during early_init.

Any failures will cause the driver to fail to probe before the firmware
framebuffer has been removed.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * New patch
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 89 ---
 1 file changed, 58 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 4829b5431e4c..eeccc8af0320 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1945,7 +1945,6 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
struct dmub_srv_fb_info *fb_info;
struct dmub_srv *dmub_srv;
const struct dmcub_firmware_header_v1_0 *hdr;
-   const char *fw_name_dmub;
enum dmub_asic dmub_asic;
enum dmub_status status;
int r;
@@ -1953,73 +1952,46 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
switch (adev->ip_versions[DCE_HWIP][0]) {
case IP_VERSION(2, 1, 0):
dmub_asic = DMUB_ASIC_DCN21;
-   fw_name_dmub = FIRMWARE_RENOIR_DMUB;
-   if (ASICREV_IS_GREEN_SARDINE(adev->external_rev_id))
-   fw_name_dmub = FIRMWARE_GREEN_SARDINE_DMUB;
break;
case IP_VERSION(3, 0, 0):
-   if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0)) {
+   if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0))
dmub_asic = DMUB_ASIC_DCN30;
-   fw_name_dmub = FIRMWARE_SIENNA_CICHLID_DMUB;
-   } else {
+   else
dmub_asic = DMUB_ASIC_DCN30;
-   fw_name_dmub = FIRMWARE_NAVY_FLOUNDER_DMUB;
-   }
break;
case IP_VERSION(3, 0, 1):
dmub_asic = DMUB_ASIC_DCN301;
-   fw_name_dmub = FIRMWARE_VANGOGH_DMUB;
break;
case IP_VERSION(3, 0, 2):
dmub_asic = DMUB_ASIC_DCN302;
-   fw_name_dmub = FIRMWARE_DIMGREY_CAVEFISH_DMUB;
break;
case IP_VERSION(3, 0, 3):
dmub_asic = DMUB_ASIC_DCN303;
-   fw_name_dmub = FIRMWARE_BEIGE_GOBY_DMUB;
break;
case IP_VERSION(3, 1, 2):
case IP_VERSION(3, 1, 3):
dmub_asic = (adev->external_rev_id == YELLOW_CARP_B0) ? 
DMUB_ASIC_DCN31B : DMUB_ASIC_DCN31;
-   fw_name_dmub = FIRMWARE_YELLOW_CARP_DMUB;
break;
case IP_VERSION(3, 1, 4):
dmub_asic = DMUB_ASIC_DCN314;
-   fw_name_dmub = FIRMWARE_DCN_314_DMUB;
break;
case IP_VERSION(3, 1, 5):
dmub_asic = DMUB_ASIC_DCN315;
-   fw_name_dmub = FIRMWARE_DCN_315_DMUB;
break;
case IP_VERSION(3, 1, 6):
dmub_asic = DMUB_ASIC_DCN316;
-   fw_name_dmub = FIRMWARE_DCN316_DMUB;
break;
case IP_VERSION(3, 2, 0):
dmub_asic = DMUB_ASIC_DCN32;
-   fw_name_dmub = FIRMWARE_DCN_V3_2_0_DMCUB;
break;
case IP_VERSION(3, 2, 1):
dmub_asic = DMUB_ASIC_DCN321;
-   fw_name_dmub = FIRMWARE_DCN_V3_2_1_DMCUB;
break;
default:
/* ASIC doesn't support DMUB. */
return 0;
}
 
-   r = request_firmware_direct(&adev->dm.dmub_fw, fw_name_dmub, adev->dev);
-   if (r) {
-   DRM_ERROR("DMUB firmware loading failed: %d\n", r);
-   return 0;
-   }
-
-   r = amdgpu_ucode_validate(adev->dm.dmub_fw);
-   if (r) {
-   DRM_ERROR("Couldn't validate DMUB firmware: %d\n", r);
-   return 0;
-   }
-
hdr = (const struct dmcub_firmware_header_v1_0 *)adev->dm.dmub_fw->data;
adev->dm.dmcub_fw_version = le32_to_cpu(hdr->header.ucode_version);
 
@@ -4513,6 +4485,61 @@ DEVICE_ATTR_WO(s3_debug);
 
 #endif
 
+static int dm_init_microcode(struct amdgpu_device *adev)
+{
+   char *fw_name_dmub;
+   int r;
+
+   switch (adev->ip_versions[DCE_HWIP][0]) {
+   case IP_VERSION(2, 1, 0):
+   fw_name_dmub = FIRMWARE_RENOIR_DMUB;
+   if (ASICREV_IS_GREEN_SARDINE(adev->external_rev_id))
+   fw_name_dmub = FIRMWARE_GREEN_SARDINE_DMUB;
+   break;
+   case IP_VERSION(3, 0, 0):
+   if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0))
+   fw_name_dmub = FIRMWARE_SIENNA_CICHLID_DMUB;
+   else
+   fw_name_dmub = FIRMWARE_NAVY_FLOUNDER_DMUB;
+   break;
+   case IP_VERSION(3, 0, 1):
+   fw_name

[PATCH v4 20/27] drm/amd: Parse both v1 and v2 TA microcode headers using same function

2023-01-03 Thread Mario Limonciello
Several IP versions duplicate code and can't use the common helpers.
Move this code into a single function so that the helpers can be used.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * New patch
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 120 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h |   9 +-
 drivers/gpu/drm/amd/amdgpu/psp_v10_0.c  |  60 +---
 drivers/gpu/drm/amd/amdgpu/psp_v11_0.c  |  74 ++-
 drivers/gpu/drm/amd/amdgpu/psp_v12_0.c  |  62 +---
 5 files changed, 107 insertions(+), 218 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 7a2fc920739b..ac4d675abcb5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -3272,41 +3272,75 @@ static int parse_ta_bin_descriptor(struct psp_context 
*psp,
return 0;
 }
 
-int psp_init_ta_microcode(struct psp_context *psp,
- const char *chip_name)
+static int parse_ta_v1_microcode(struct psp_context *psp)
 {
+   const struct ta_firmware_header_v1_0 *ta_hdr;
struct amdgpu_device *adev = psp->adev;
-   char fw_name[PSP_FW_NAME_LEN];
-   const struct ta_firmware_header_v2_0 *ta_hdr;
-   int err = 0;
-   int ta_index = 0;
 
-   if (!chip_name) {
-   dev_err(adev->dev, "invalid chip name for ta microcode\n");
+   ta_hdr = (const struct ta_firmware_header_v1_0 *)
+adev->psp.ta_fw->data;
+
+   if (le16_to_cpu(ta_hdr->header.header_version_major) != 1)
return -EINVAL;
+
+   adev->psp.xgmi_context.context.bin_desc.fw_version =
+   le32_to_cpu(ta_hdr->xgmi.fw_version);
+   adev->psp.xgmi_context.context.bin_desc.size_bytes =
+   le32_to_cpu(ta_hdr->xgmi.size_bytes);
+   adev->psp.xgmi_context.context.bin_desc.start_addr =
+   (uint8_t *)ta_hdr +
+   le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
+   adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version);
+   adev->psp.ras_context.context.bin_desc.fw_version =
+   le32_to_cpu(ta_hdr->ras.fw_version);
+   adev->psp.ras_context.context.bin_desc.size_bytes =
+   le32_to_cpu(ta_hdr->ras.size_bytes);
+   adev->psp.ras_context.context.bin_desc.start_addr =
+   (uint8_t *)adev->psp.xgmi_context.context.bin_desc.start_addr +
+   le32_to_cpu(ta_hdr->ras.offset_bytes);
+   adev->psp.hdcp_context.context.bin_desc.fw_version =
+   le32_to_cpu(ta_hdr->hdcp.fw_version);
+   adev->psp.hdcp_context.context.bin_desc.size_bytes =
+   le32_to_cpu(ta_hdr->hdcp.size_bytes);
+   adev->psp.hdcp_context.context.bin_desc.start_addr =
+   (uint8_t *)ta_hdr +
+   le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
+   adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version);
+   adev->psp.dtm_context.context.bin_desc.fw_version =
+   le32_to_cpu(ta_hdr->dtm.fw_version);
+   adev->psp.dtm_context.context.bin_desc.size_bytes =
+   le32_to_cpu(ta_hdr->dtm.size_bytes);
+   adev->psp.dtm_context.context.bin_desc.start_addr =
+   (uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr +
+   le32_to_cpu(ta_hdr->dtm.offset_bytes);
+   if (adev->apu_flags & AMD_APU_IS_RENOIR) {
+   adev->psp.securedisplay_context.context.bin_desc.fw_version =
+   le32_to_cpu(ta_hdr->securedisplay.fw_version);
+   adev->psp.securedisplay_context.context.bin_desc.size_bytes =
+   le32_to_cpu(ta_hdr->securedisplay.size_bytes);
+   adev->psp.securedisplay_context.context.bin_desc.start_addr =
+   (uint8_t 
*)adev->psp.hdcp_context.context.bin_desc.start_addr +
+   le32_to_cpu(ta_hdr->securedisplay.offset_bytes);
}
 
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
-   err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
+   return 0;
+}
 
-   err = amdgpu_ucode_validate(adev->psp.ta_fw);
-   if (err)
-   goto out;
+static int parse_ta_v2_microcode(struct psp_context *psp)
+{
+   const struct ta_firmware_header_v2_0 *ta_hdr;
+   struct amdgpu_device *adev = psp->adev;
+   int err = 0;
+   int ta_index = 0;
 
ta_hdr = (const struct ta_firmware_header_v2_0 *)adev->psp.ta_fw->data;
 
-   if (le16_to_cpu(ta_hdr->header.header_version_major) != 2) {
-   dev_err(adev->dev, "unsupported TA header version\n");
-   err = -EINVAL;
-

[PATCH v4 22/27] drm/amd: Load PSP microcode during early_init

2023-01-03 Thread Mario Limonciello
Simplifies the code so that all PSP versions will get the firmware
name from `amdgpu_ucode_ip_version_decode` and then use this filename
to load microcode as part of the early_init process.

Any failures will cause the driver to fail to probe before the firmware
framebuffer has been removed.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * new patch
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c  | 128 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h  |   3 +
 drivers/gpu/drm/amd/amdgpu/psp_v10_0.c   |  16 +--
 drivers/gpu/drm/amd/amdgpu/psp_v11_0.c   |  55 ++
 drivers/gpu/drm/amd/amdgpu/psp_v12_0.c   |  13 +--
 drivers/gpu/drm/amd/amdgpu/psp_v13_0.c   |  27 ++---
 drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c |  14 +--
 drivers/gpu/drm/amd/amdgpu/psp_v3_1.c|  16 +--
 8 files changed, 79 insertions(+), 193 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index d51fe3431e2b..3b0644600a1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -122,6 +122,44 @@ static void 
psp_check_pmfw_centralized_cstate_management(struct psp_context *psp
}
 }
 
+static int psp_init_sriov_microcode(struct psp_context *psp)
+{
+   struct amdgpu_device *adev = psp->adev;
+   char ucode_prefix[30];
+   int ret = 0;
+
+   amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
+
+   switch (adev->ip_versions[MP0_HWIP][0]) {
+   case IP_VERSION(9, 0, 0):
+   adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
+   ret = psp_init_cap_microcode(psp, ucode_prefix);
+   break;
+   case IP_VERSION(11, 0, 9):
+   adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
+   ret = psp_init_cap_microcode(psp, ucode_prefix);
+   break;
+   case IP_VERSION(11, 0, 7):
+   adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
+   ret = psp_init_cap_microcode(psp, ucode_prefix);
+   break;
+   case IP_VERSION(13, 0, 2):
+   adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
+   ret = psp_init_cap_microcode(psp, ucode_prefix);
+   ret &= psp_init_ta_microcode(psp, ucode_prefix);
+   break;
+   case IP_VERSION(13, 0, 0):
+   adev->virt.autoload_ucode_id = 0;
+   break;
+   case IP_VERSION(13, 0, 10):
+   adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MES1_DATA;
+   break;
+   default:
+   return -EINVAL;
+   }
+   return ret;
+}
+
 static int psp_early_init(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -192,7 +230,10 @@ static int psp_early_init(void *handle)
 
psp_check_pmfw_centralized_cstate_management(psp);
 
-   return 0;
+   if (amdgpu_sriov_vf(adev))
+   return psp_init_sriov_microcode(psp);
+   else
+   return psp_init_microcode(psp);
 }
 
 void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx)
@@ -350,42 +391,6 @@ static bool psp_get_runtime_db_entry(struct amdgpu_device 
*adev,
return ret;
 }
 
-static int psp_init_sriov_microcode(struct psp_context *psp)
-{
-   struct amdgpu_device *adev = psp->adev;
-   int ret = 0;
-
-   switch (adev->ip_versions[MP0_HWIP][0]) {
-   case IP_VERSION(9, 0, 0):
-   adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
-   ret = psp_init_cap_microcode(psp, "vega10");
-   break;
-   case IP_VERSION(11, 0, 9):
-   adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
-   ret = psp_init_cap_microcode(psp, "navi12");
-   break;
-   case IP_VERSION(11, 0, 7):
-   adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
-   ret = psp_init_cap_microcode(psp, "sienna_cichlid");
-   break;
-   case IP_VERSION(13, 0, 2):
-   adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
-   ret = psp_init_cap_microcode(psp, "aldebaran");
-   ret &= psp_init_ta_microcode(psp, "aldebaran");
-   break;
-   case IP_VERSION(13, 0, 0):
-   adev->virt.autoload_ucode_id = 0;
-   break;
-   case IP_VERSION(13, 0, 10):
-   adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MES1_DATA;
-   break;
-   default:
-   ret = -EINVAL;
-   break;
-   }
-   return ret;
-}
-
 static int psp_sw_init(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -401,15 +406,6 @@ static int psp_sw_init(void *handle)
ret = -ENOMEM;
}
 
-   if (amdgpu_sriov_vf(adev))
-

[PATCH v4 21/27] drm/amd: Avoid BUG() for case of SRIOV missing IP version

2023-01-03 Thread Mario Limonciello
No need to crash the kernel.  AMDGPU will now fail to probe.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index ac4d675abcb5..d51fe3431e2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -380,7 +380,7 @@ static int psp_init_sriov_microcode(struct psp_context *psp)
adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MES1_DATA;
break;
default:
-   BUG();
+   ret = -EINVAL;
break;
}
return ret;
-- 
2.34.1



[PATCH v4 16/27] drm/amd: Use `amdgpu_ucode_load` helper for GFX10

2023-01-03 Thread Mario Limonciello
The `amdgpu_ucode_load` helper will ensure that the return code for
missing firmware is -ENODEV so that early_init can fail.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * New patch
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 35 ++
 1 file changed, 8 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 49d34c7bbf20..5f6b59e23313 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -4030,41 +4030,31 @@ static int gfx_v10_0_init_microcode(struct 
amdgpu_device *adev)
}
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp%s.bin", chip_name, 
wks);
-   err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
+   err = amdgpu_ucode_load(adev, &adev->gfx.pfp_fw, fw_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me%s.bin", chip_name, 
wks);
-   err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.me_fw);
+   err = amdgpu_ucode_load(adev, &adev->gfx.me_fw, fw_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce%s.bin", chip_name, 
wks);
-   err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.ce_fw);
+   err = amdgpu_ucode_load(adev, &adev->gfx.ce_fw, fw_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
 
if (!amdgpu_sriov_vf(adev)) {
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", 
chip_name);
-   err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
+   err = amdgpu_ucode_load(adev, &adev->gfx.rlc_fw, fw_name);
/* don't check this.  There are apparently firmwares in the 
wild with
 * incorrect size in the header
 */
-   err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
+   if (err == -ENODEV)
+   goto out;
if (err)
dev_dbg(adev->dev,
"gfx10: amdgpu_ucode_validate() failed 
\"%s\"\n",
@@ -4078,21 +4068,15 @@ static int gfx_v10_0_init_microcode(struct 
amdgpu_device *adev)
}
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", chip_name, 
wks);
-   err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.mec_fw);
+   err = amdgpu_ucode_load(adev, &adev->gfx.mec_fw, fw_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2%s.bin", chip_name, 
wks);
-   err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
+   err = amdgpu_ucode_load(adev, &adev->gfx.mec2_fw, fw_name);
if (!err) {
-   err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
-   if (err)
-   goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
} else {
@@ -4103,9 +4087,6 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device 
*adev)
gfx_v10_0_check_fw_write_wait(adev);
 out:
if (err) {
-   dev_err(adev->dev,
-   "gfx10: Failed to init firmware \"%s\"\n",
-   fw_name);
release_firmware(adev->gfx.pfp_fw);
adev->gfx.pfp_fw = NULL;
release_firmware(adev->gfx.me_fw);
-- 
2.34.1



[PATCH v4 18/27] drm/amd: Use `amdgpu_ucode_load` helper for GFX11

2023-01-03 Thread Mario Limonciello
The `amdgpu_ucode_load` helper will ensure that the return code for
missing firmware is -ENODEV so that early_init can fail.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * New patch
---
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 23 ---
 1 file changed, 4 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index a56c6e106d00..0c77d165caf7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -457,10 +457,7 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device 
*adev)
amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", ucode_prefix);
-   err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
+   err = amdgpu_ucode_load(adev, &adev->gfx.pfp_fw, fw_name);
if (err)
goto out;
/* check pfp fw hdr version to decide if enable rs64 for gfx11.*/
@@ -477,10 +474,7 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device 
*adev)
}
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", ucode_prefix);
-   err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.me_fw);
+   err = amdgpu_ucode_load(adev, &adev->gfx.me_fw, fw_name);
if (err)
goto out;
if (adev->gfx.rs64_enable) {
@@ -493,10 +487,7 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device 
*adev)
 
if (!amdgpu_sriov_vf(adev)) {
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", 
ucode_prefix);
-   err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
+   err = amdgpu_ucode_load(adev, &adev->gfx.rlc_fw, fw_name);
if (err)
goto out;
rlc_hdr = (const struct rlc_firmware_header_v2_0 
*)adev->gfx.rlc_fw->data;
@@ -508,10 +499,7 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device 
*adev)
}
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", ucode_prefix);
-   err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.mec_fw);
+   err = amdgpu_ucode_load(adev, &adev->gfx.mec_fw, fw_name);
if (err)
goto out;
if (adev->gfx.rs64_enable) {
@@ -530,9 +518,6 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device 
*adev)
 
 out:
if (err) {
-   dev_err(adev->dev,
-   "gfx11: Failed to init firmware \"%s\"\n",
-   fw_name);
release_firmware(adev->gfx.pfp_fw);
adev->gfx.pfp_fw = NULL;
release_firmware(adev->gfx.me_fw);
-- 
2.34.1



[PATCH v4 19/27] drm/amd: Load GFX11 microcode during early_init

2023-01-03 Thread Mario Limonciello
If GFX11 microcode is required but not available during early init, the
firmware framebuffer will have already been released and the screen will
freeze.

Move the request for GFX11 microcode into the early_init phase
so that if it's not available, driver init will fail.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * Move to early_init phase
---
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 78 ++
 1 file changed, 30 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 0c77d165caf7..5c7bc286618a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -443,6 +443,30 @@ static void gfx_v11_0_free_microcode(struct amdgpu_device 
*adev)
kfree(adev->gfx.rlc.register_list_format);
 }
 
+static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, char 
*ucode_prefix)
+{
+   const struct psp_firmware_header_v1_0 *toc_hdr;
+   int err = 0;
+   char fw_name[40];
+
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", ucode_prefix);
+   err = amdgpu_ucode_load(adev, &adev->psp.toc_fw, fw_name);
+   if (err)
+   goto out;
+
+   toc_hdr = (const struct psp_firmware_header_v1_0 
*)adev->psp.toc_fw->data;
+   adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
+   adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
+   adev->psp.toc.size_bytes = 
le32_to_cpu(toc_hdr->header.ucode_size_bytes);
+   adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
+   
le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
+   return 0;
+out:
+   release_firmware(adev->psp.toc_fw);
+   adev->psp.toc_fw = NULL;
+   return err;
+}
+
 static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
 {
char fw_name[40];
@@ -513,6 +537,9 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device 
*adev)
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
}
 
+   if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
+   err = gfx_v11_0_init_toc_microcode(adev, ucode_prefix);
+
/* only one MEC for gfx 11.0.0. */
adev->gfx.mec2_fw = NULL;
 
@@ -531,38 +558,6 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device 
*adev)
return err;
 }
 
-static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev)
-{
-   const struct psp_firmware_header_v1_0 *toc_hdr;
-   int err = 0;
-   char fw_name[40];
-   char ucode_prefix[30];
-
-   amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
-
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", ucode_prefix);
-   err = request_firmware(&adev->psp.toc_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-
-   err = amdgpu_ucode_validate(adev->psp.toc_fw);
-   if (err)
-   goto out;
-
-   toc_hdr = (const struct psp_firmware_header_v1_0 
*)adev->psp.toc_fw->data;
-   adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
-   adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
-   adev->psp.toc.size_bytes = 
le32_to_cpu(toc_hdr->header.ucode_size_bytes);
-   adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
-   
le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
-   return 0;
-out:
-   dev_err(adev->dev, "Failed to load TOC microcode\n");
-   release_firmware(adev->psp.toc_fw);
-   adev->psp.toc_fw = NULL;
-   return err;
-}
-
 static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev)
 {
u32 count = 0;
@@ -699,19 +694,11 @@ static void gfx_v11_0_mec_fini(struct amdgpu_device *adev)
amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
 }
 
-static int gfx_v11_0_me_init(struct amdgpu_device *adev)
+static void gfx_v11_0_me_init(struct amdgpu_device *adev)
 {
-   int r;
-
bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
 
amdgpu_gfx_graphics_queue_acquire(adev);
-
-   r = gfx_v11_0_init_microcode(adev);
-   if (r)
-   DRM_ERROR("Failed to load gfx firmware!\n");
-
-   return r;
 }
 
 static int gfx_v11_0_mec_init(struct amdgpu_device *adev)
@@ -1324,9 +1311,7 @@ static int gfx_v11_0_sw_init(void *handle)
}
}
 
-   r = gfx_v11_0_me_init(adev);
-   if (r)
-   return r;
+   gfx_v11_0_me_init(adev);
 
r = gfx_v11_0_rlc_init(adev);
if (r) {
@@ -1394,9 +1379,6 @@ static int gfx_v11_0_sw_init(void *handle)
 
/* allocate visible FB for rlc auto-loading fw */
if (adev->firmware.load_type == AMD

[PATCH v4 17/27] drm/amd: Load GFX10 microcode during early_init

2023-01-03 Thread Mario Limonciello
Simplifies the code so that GFX10 will get the firmware
name from `amdgpu_ucode_ip_version_decode` and then use this filename
to load microcode as part of the early_init process.

Any failures will cause the driver to fail to probe before the firmware
framebuffer has been removed.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * Move out of discovery into early_init
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 82 ++
 1 file changed, 17 insertions(+), 65 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 5f6b59e23313..75781722c7e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -3974,9 +3974,9 @@ static void gfx_v10_0_check_gfxoff_flag(struct 
amdgpu_device *adev)
 
 static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
 {
-   const char *chip_name;
char fw_name[40];
-   char *wks = "";
+   char ucode_prefix[30];
+   const char *wks = "";
int err;
const struct rlc_firmware_header_v2_0 *rlc_hdr;
uint16_t version_major;
@@ -3984,71 +3984,31 @@ static int gfx_v10_0_init_microcode(struct 
amdgpu_device *adev)
 
DRM_DEBUG("\n");
 
-   switch (adev->ip_versions[GC_HWIP][0]) {
-   case IP_VERSION(10, 1, 10):
-   chip_name = "navi10";
-   break;
-   case IP_VERSION(10, 1, 1):
-   chip_name = "navi14";
-   if (!(adev->pdev->device == 0x7340 &&
- adev->pdev->revision != 0x00))
-   wks = "_wks";
-   break;
-   case IP_VERSION(10, 1, 2):
-   chip_name = "navi12";
-   break;
-   case IP_VERSION(10, 3, 0):
-   chip_name = "sienna_cichlid";
-   break;
-   case IP_VERSION(10, 3, 2):
-   chip_name = "navy_flounder";
-   break;
-   case IP_VERSION(10, 3, 1):
-   chip_name = "vangogh";
-   break;
-   case IP_VERSION(10, 3, 4):
-   chip_name = "dimgrey_cavefish";
-   break;
-   case IP_VERSION(10, 3, 5):
-   chip_name = "beige_goby";
-   break;
-   case IP_VERSION(10, 3, 3):
-   chip_name = "yellow_carp";
-   break;
-   case IP_VERSION(10, 3, 6):
-   chip_name = "gc_10_3_6";
-   break;
-   case IP_VERSION(10, 1, 3):
-   case IP_VERSION(10, 1, 4):
-   chip_name = "cyan_skillfish2";
-   break;
-   case IP_VERSION(10, 3, 7):
-   chip_name = "gc_10_3_7";
-   break;
-   default:
-   BUG();
-   }
+   if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 1) &&
+  (!(adev->pdev->device == 0x7340 && adev->pdev->revision != 0x00)))
+   wks = "_wks";
+   amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
 
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp%s.bin", chip_name, 
wks);
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp%s.bin", ucode_prefix, 
wks);
err = amdgpu_ucode_load(adev, &adev->gfx.pfp_fw, fw_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
 
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me%s.bin", chip_name, 
wks);
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me%s.bin", ucode_prefix, 
wks);
err = amdgpu_ucode_load(adev, &adev->gfx.me_fw, fw_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
 
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce%s.bin", chip_name, 
wks);
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce%s.bin", ucode_prefix, 
wks);
err = amdgpu_ucode_load(adev, &adev->gfx.ce_fw, fw_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
 
if (!amdgpu_sriov_vf(adev)) {
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", 
chip_name);
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", 
ucode_prefix);
err = amdgpu_ucode_load(adev, &adev->gfx.rlc_fw, fw_name);
/* don't check this.  There are apparently firmwares in the 
wild with
 * incorrect size in the header
@@ -4067,14 +4027,14 @@ static int gfx_v10_0_init_microcode(struct 
amdgpu_device *adev)
goto out;
}
 
-   snprintf(fw_name, sizeof(fw_name), "am

[PATCH v4 13/27] drm/amd: Remove superfluous assignment for `adev->mes.adev`

2023-01-03 Thread Mario Limonciello
`amdgpu_mes_init` already sets `adev->mes.adev`, so there is no need
to also set it in the IP specific versions.

Signed-off-by: Mario Limonciello 
---
v4:
 * New patch
---
 drivers/gpu/drm/amd/amdgpu/mes_v10_1.c | 1 -
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 1 -
 2 files changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
index 9c5ff8b7c202..f58debf2783c 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
@@ -931,7 +931,6 @@ static int mes_v10_1_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int pipe, r;
 
-   adev->mes.adev = adev;
adev->mes.funcs = &mes_v10_1_funcs;
adev->mes.kiq_hw_init = &mes_v10_1_kiq_hw_init;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 3af77a32baac..c8bdee9a66c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -1020,7 +1020,6 @@ static int mes_v11_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int pipe, r;
 
-   adev->mes.adev = adev;
adev->mes.funcs = &mes_v11_0_funcs;
adev->mes.kiq_hw_init = &mes_v11_0_kiq_hw_init;
adev->mes.kiq_hw_fini = &mes_v11_0_kiq_hw_fini;
-- 
2.34.1



[PATCH v4 11/27] drm/amd: Load MES microcode during early_init

2023-01-03 Thread Mario Limonciello
Add an early_init phase to MES for fetching and validating microcode
from the filesystem.

If MES microcode is required but not available during early init, the
firmware framebuffer will have already been released and the screen will
freeze.

Move the request for MES microcode into the early_init phase
so that if it's not available, early_init will fail.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * Introduce new early_init phase for MES
v2->v3:
 * Add a missing newline
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 65 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h |  1 +
 drivers/gpu/drm/amd/amdgpu/mes_v10_1.c  | 97 +
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c  | 88 +-
 4 files changed, 100 insertions(+), 151 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 0c546245793b..dd8f35234507 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -21,6 +21,8 @@
  *
  */
 
+#include 
+
 #include "amdgpu_mes.h"
 #include "amdgpu.h"
 #include "soc15_common.h"
@@ -1423,3 +1425,66 @@ int amdgpu_mes_self_test(struct amdgpu_device *adev)
kfree(vm);
return 0;
 }
+
+int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe)
+{
+   const struct mes_firmware_header_v1_0 *mes_hdr;
+   struct amdgpu_firmware_info *info;
+   char ucode_prefix[30];
+   char fw_name[40];
+   int r;
+
+   amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin",
+   ucode_prefix,
+   pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1");
+   r = request_firmware(&adev->mes.fw[pipe], fw_name, adev->dev);
+   if (r)
+   goto out;
+
+   r = amdgpu_ucode_validate(adev->mes.fw[pipe]);
+   if (r)
+   goto out;
+
+   mes_hdr = (const struct mes_firmware_header_v1_0 *)
+   adev->mes.fw[pipe]->data;
+   adev->mes.uc_start_addr[pipe] =
+   le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) |
+   ((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32);
+   adev->mes.data_start_addr[pipe] =
+   le32_to_cpu(mes_hdr->mes_data_start_addr_lo) |
+   ((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 
32);
+
+   if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+   int ucode, ucode_data;
+
+   if (pipe == AMDGPU_MES_SCHED_PIPE) {
+   ucode = AMDGPU_UCODE_ID_CP_MES;
+   ucode_data = AMDGPU_UCODE_ID_CP_MES_DATA;
+   } else {
+   ucode = AMDGPU_UCODE_ID_CP_MES1;
+   ucode_data = AMDGPU_UCODE_ID_CP_MES1_DATA;
+   }
+
+   info = &adev->firmware.ucode[ucode];
+   info->ucode_id = ucode;
+   info->fw = adev->mes.fw[pipe];
+   adev->firmware.fw_size +=
+   ALIGN(le32_to_cpu(mes_hdr->mes_ucode_size_bytes),
+ PAGE_SIZE);
+
+   info = &adev->firmware.ucode[ucode_data];
+   info->ucode_id = ucode_data;
+   info->fw = adev->mes.fw[pipe];
+   adev->firmware.fw_size +=
+   ALIGN(le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes),
+ PAGE_SIZE);
+   }
+
+   return 0;
+
+out:
+   release_firmware(adev->mes.fw[pipe]);
+   adev->mes.fw[pipe] = NULL;
+   return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 97c05d08a551..547ec35691fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -306,6 +306,7 @@ struct amdgpu_mes_funcs {
 
 int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs);
 
+int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe);
 int amdgpu_mes_init(struct amdgpu_device *adev);
 void amdgpu_mes_fini(struct amdgpu_device *adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
index 614394118a53..9c5ff8b7c202 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
@@ -379,82 +379,6 @@ static const struct amdgpu_mes_funcs mes_v10_1_funcs = {
.resume_gang = mes_v10_1_resume_gang,
 };
 
-static int mes_v10_1_init_microcode(struct amdgpu_device *adev,
-   enum admgpu_mes_pipe pipe)
-{
-   const char *chip_name;
-   char fw_name[30];
-   int err;
-   const struct mes_firmware_header_v1_0 *mes_hdr;
-   struct amdgpu_firmware_info *info;
-
-

[PATCH v4 15/27] drm/amd: Load GFX9 microcode during early_init

2023-01-03 Thread Mario Limonciello
If GFX9 microcode is required but not available during early init, the
firmware framebuffer will have already been released and the screen will
freeze.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * Move microcode load phase to early init
v2->v3:
 * Fix issues found on real hardware where num_gfx_rings not set during
   discovery
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 58 +--
 1 file changed, 9 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 27040821d764..4e9c230e42ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1251,7 +1251,7 @@ static void gfx_v9_0_check_if_need_gfxoff(struct 
amdgpu_device *adev)
 }
 
 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
- const char *chip_name)
+ char *chip_name)
 {
char fw_name[30];
int err;
@@ -1287,7 +1287,7 @@ static int gfx_v9_0_init_cp_gfx_microcode(struct 
amdgpu_device *adev,
 }
 
 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
- const char *chip_name)
+  char *chip_name)
 {
char fw_name[30];
int err;
@@ -1344,7 +1344,7 @@ static bool gfx_v9_0_load_mec2_fw_bin_support(struct 
amdgpu_device *adev)
 }
 
 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
- const char *chip_name)
+ char *chip_name)
 {
char fw_name[30];
int err;
@@ -1392,58 +1392,24 @@ static int gfx_v9_0_init_cp_compute_microcode(struct 
amdgpu_device *adev,
 
 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
 {
-   const char *chip_name;
+   char ucode_prefix[30];
int r;
 
DRM_DEBUG("\n");
-
-   switch (adev->ip_versions[GC_HWIP][0]) {
-   case IP_VERSION(9, 0, 1):
-   chip_name = "vega10";
-   break;
-   case IP_VERSION(9, 2, 1):
-   chip_name = "vega12";
-   break;
-   case IP_VERSION(9, 4, 0):
-   chip_name = "vega20";
-   break;
-   case IP_VERSION(9, 2, 2):
-   case IP_VERSION(9, 1, 0):
-   if (adev->apu_flags & AMD_APU_IS_RAVEN2)
-   chip_name = "raven2";
-   else if (adev->apu_flags & AMD_APU_IS_PICASSO)
-   chip_name = "picasso";
-   else
-   chip_name = "raven";
-   break;
-   case IP_VERSION(9, 4, 1):
-   chip_name = "arcturus";
-   break;
-   case IP_VERSION(9, 3, 0):
-   if (adev->apu_flags & AMD_APU_IS_RENOIR)
-   chip_name = "renoir";
-   else
-   chip_name = "green_sardine";
-   break;
-   case IP_VERSION(9, 4, 2):
-   chip_name = "aldebaran";
-   break;
-   default:
-   BUG();
-   }
+   amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
 
/* No CPG in Arcturus */
if (adev->gfx.num_gfx_rings) {
-   r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
+   r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix);
if (r)
return r;
}
 
-   r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
+   r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix);
if (r)
return r;
 
-   r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
+   r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix);
if (r)
return r;
 
@@ -2131,12 +2097,6 @@ static int gfx_v9_0_sw_init(void *handle)
 
adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
 
-   r = gfx_v9_0_init_microcode(adev);
-   if (r) {
-   DRM_ERROR("Failed to load gfx firmware!\n");
-   return r;
-   }
-
if (adev->gfx.rlc.funcs) {
if (adev->gfx.rlc.funcs->init) {
r = adev->gfx.rlc.funcs->init(adev);
@@ -4578,7 +4538,7 @@ static int gfx_v9_0_early_init(void *handle)
/* init rlcg reg access ctrl */
gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
 
-   return 0;
+   return gfx_v9_0_init_microcode(adev);
 }
 
 static int gfx_v9_0_ecc_late_init(void *handle)
-- 
2.34.1



[PATCH v4 14/27] drm/amd: Use `amdgpu_ucode_load` helper for GFX9

2023-01-03 Thread Mario Limonciello
The `amdgpu_ucode_load` helper will ensure that the return code for
missing firmware is -ENODEV so that early_init can fail.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * new patch
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 43 +--
 1 file changed, 8 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index f202b45c413c..27040821d764 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1257,37 +1257,25 @@ static int gfx_v9_0_init_cp_gfx_microcode(struct 
amdgpu_device *adev,
int err;
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
-   err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
+   err = amdgpu_ucode_load(adev, &adev->gfx.pfp_fw, fw_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
-   err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.me_fw);
+   err = amdgpu_ucode_load(adev, &adev->gfx.me_fw, fw_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
-   err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.ce_fw);
+   err = amdgpu_ucode_load(adev, &adev->gfx.ce_fw, fw_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
 
 out:
if (err) {
-   dev_err(adev->dev,
-   "gfx9: Failed to init firmware \"%s\"\n",
-   fw_name);
release_firmware(adev->gfx.pfp_fw);
adev->gfx.pfp_fw = NULL;
release_firmware(adev->gfx.me_fw);
@@ -1328,10 +1316,7 @@ static int gfx_v9_0_init_rlc_microcode(struct 
amdgpu_device *adev,
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", 
chip_name);
else
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", 
chip_name);
-   err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
+   err = amdgpu_ucode_load(adev, &adev->gfx.rlc_fw, fw_name);
if (err)
goto out;
rlc_hdr = (const struct rlc_firmware_header_v2_0 
*)adev->gfx.rlc_fw->data;
@@ -1341,12 +1326,10 @@ static int gfx_v9_0_init_rlc_microcode(struct 
amdgpu_device *adev,
err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
 out:
if (err) {
-   dev_err(adev->dev,
-   "gfx9: Failed to init firmware \"%s\"\n",
-   fw_name);
release_firmware(adev->gfx.rlc_fw);
adev->gfx.rlc_fw = NULL;
}
+
return err;
 }
 
@@ -1371,12 +1354,9 @@ static int gfx_v9_0_init_cp_compute_microcode(struct 
amdgpu_device *adev,
else
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", 
chip_name);
 
-   err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.mec_fw);
+   err = amdgpu_ucode_load(adev, &adev->gfx.mec_fw, fw_name);
if (err)
-   goto out;
+   return err;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
 
@@ -1386,11 +1366,8 @@ static int gfx_v9_0_init_cp_compute_microcode(struct 
amdgpu_device *adev,
else
snprintf(fw_name, sizeof(fw_name), 
"amdgpu/%s_mec2.bin", chip_name);
 
-   err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
+   err = amdgpu_ucode_load(adev, &adev->gfx.mec2_fw, fw_name);
if (!err) {
-   err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
-   if (err)
-   goto out;
amdgpu_gfx_cp_init_microcode(adev, 
AMDGPU_UCODE_ID_CP_MEC2);
amdgpu_gfx_cp_init_microcode(adev, 
AMDGPU_UCODE_ID_CP_MEC2_JT);
} else {
@@ -1402,13 +1379,9 @@

[PATCH v4 12/27] drm/amd: Use `amdgpu_ucode_load` helper for MES

2023-01-03 Thread Mario Limonciello
The `amdgpu_ucode_load` helper will ensure that the return code for
missing firmware is -ENODEV so that early_init can fail.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * New patch
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index dd8f35234507..df9efbca0f70 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -1438,11 +1438,7 @@ int amdgpu_mes_init_microcode(struct amdgpu_device 
*adev, int pipe)
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin",
ucode_prefix,
pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1");
-   r = request_firmware(&adev->mes.fw[pipe], fw_name, adev->dev);
-   if (r)
-   goto out;
-
-   r = amdgpu_ucode_validate(adev->mes.fw[pipe]);
+   r = amdgpu_ucode_load(adev, &adev->mes.fw[pipe], fw_name);
if (r)
goto out;
 
@@ -1482,7 +1478,6 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, 
int pipe)
}
 
return 0;
-
 out:
release_firmware(adev->mes.fw[pipe]);
adev->mes.fw[pipe] = NULL;
-- 
2.34.1



[PATCH v4 07/27] drm/amd: Convert SDMA to use `amdgpu_ucode_ip_version_decode`

2023-01-03 Thread Mario Limonciello
Simplifies the code so that all SDMA versions will get the firmware
name from `amdgpu_ucode_ip_version_decode`.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * Move out of IP discovery and instead simplify early_init
v2->v3:
 * Fix dGPU naming scheme
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c |  7 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h |  4 +-
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   | 47 +---
 drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c   | 30 +
 drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c   | 55 +---
 drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c   | 25 +--
 6 files changed, 13 insertions(+), 155 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 9e85a078d918..83e8f0dae647 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -200,15 +200,18 @@ void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device 
*adev,
 }
 
 int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
-  char *fw_name, u32 instance,
-  bool duplicate)
+  u32 instance, bool duplicate)
 {
struct amdgpu_firmware_info *info = NULL;
const struct common_firmware_header *header = NULL;
int err = 0, i;
const struct sdma_firmware_header_v2_0 *sdma_hdr;
uint16_t version_major;
+   char ucode_prefix[30];
+   char fw_name[40];
 
+   amdgpu_ucode_ip_version_decode(adev, SDMA0_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s%s.bin", ucode_prefix, 
!instance ? "" : "1");
err = amdgpu_ucode_load(adev, &adev->sdma.instance[instance].fw, 
fw_name);
if (err)
goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 7d99205c2e01..2d16e6d36728 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -124,8 +124,8 @@ int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device 
*adev,
 int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev,
  struct amdgpu_irq_src *source,
  struct amdgpu_iv_entry *entry);
-int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
-char *fw_name, u32 instance, bool duplicate);
+int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, u32 instance,
+  bool duplicate);
 void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev,
 bool duplicate);
 void amdgpu_sdma_unset_buffer_funcs_helper(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 4d780e4430e7..017ae298558e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -575,60 +575,17 @@ static void sdma_v4_0_setup_ulv(struct amdgpu_device 
*adev)
 // vega10 real chip need to use PSP to load firmware
 static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
 {
-   const char *chip_name;
-   char fw_name[30];
int ret, i;
 
-   DRM_DEBUG("\n");
-
-   switch (adev->ip_versions[SDMA0_HWIP][0]) {
-   case IP_VERSION(4, 0, 0):
-   chip_name = "vega10";
-   break;
-   case IP_VERSION(4, 0, 1):
-   chip_name = "vega12";
-   break;
-   case IP_VERSION(4, 2, 0):
-   chip_name = "vega20";
-   break;
-   case IP_VERSION(4, 1, 0):
-   case IP_VERSION(4, 1, 1):
-   if (adev->apu_flags & AMD_APU_IS_RAVEN2)
-   chip_name = "raven2";
-   else if (adev->apu_flags & AMD_APU_IS_PICASSO)
-   chip_name = "picasso";
-   else
-   chip_name = "raven";
-   break;
-   case IP_VERSION(4, 2, 2):
-   chip_name = "arcturus";
-   break;
-   case IP_VERSION(4, 1, 2):
-   if (adev->apu_flags & AMD_APU_IS_RENOIR)
-   chip_name = "renoir";
-   else
-   chip_name = "green_sardine";
-   break;
-   case IP_VERSION(4, 4, 0):
-   chip_name = "aldebaran";
-   break;
-   default:
-   BUG();
-   }
-
for (i = 0; i < adev->sdma.num_instances; i++) {
-   if (i == 0)
-   snprintf(fw_name, sizeof(fw_name), 
"amdgpu/%s_sdma.bin", chip_name);
-   else
-   snprintf(fw_name, sizeof(fw_name), 
"amdgpu/%s_sdma%d.bin", chip_name, i);
if (adev-

[PATCH v4 09/27] drm/amd: Use `amdgpu_ucode_load` helper for VCN

2023-01-03 Thread Mario Limonciello
The `amdgpu_ucode_load` helper will ensure that the return code for
missing firmware is -ENODEV so that early_init can fail.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * New patch
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 15 +++
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index a23e26b272b4..6d9cb7fb67cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -206,19 +206,10 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
return -EINVAL;
}
 
-   r = request_firmware(&adev->vcn.fw, fw_name, adev->dev);
+   r = amdgpu_ucode_load(adev, &adev->vcn.fw, fw_name);
if (r) {
-   dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n",
-   fw_name);
-   return r;
-   }
-
-   r = amdgpu_ucode_validate(adev->vcn.fw);
-   if (r) {
-   dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware 
\"%s\"\n",
-   fw_name);
-   release_firmware(adev->vcn.fw);
-   adev->vcn.fw = NULL;
+   if (adev->vcn.fw)
+   release_firmware(adev->vcn.fw);
return r;
}
 
-- 
2.34.1



[PATCH v4 10/27] drm/amd: Load VCN microcode during early_init

2023-01-03 Thread Mario Limonciello
Simplifies the code so that all VCN versions will get the firmware
name from `amdgpu_ucode_ip_version_decode` and then use this filename
to load microcode as part of the early_init process.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * Move out of IP discovery and introduce early_init phase for VCN
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 94 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h |  1 +
 drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c   |  5 +-
 drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c   |  5 +-
 drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c   |  5 +-
 drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c   |  5 +-
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c   |  5 +-
 7 files changed, 52 insertions(+), 68 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 6d9cb7fb67cf..48fc9059c386 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -36,25 +36,25 @@
 #include "soc15d.h"
 
 /* Firmware Names */
-#define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin"
-#define FIRMWARE_PICASSO   "amdgpu/picasso_vcn.bin"
-#define FIRMWARE_RAVEN2"amdgpu/raven2_vcn.bin"
-#define FIRMWARE_ARCTURUS  "amdgpu/arcturus_vcn.bin"
-#define FIRMWARE_RENOIR"amdgpu/renoir_vcn.bin"
-#define FIRMWARE_GREEN_SARDINE "amdgpu/green_sardine_vcn.bin"
-#define FIRMWARE_NAVI10"amdgpu/navi10_vcn.bin"
-#define FIRMWARE_NAVI14"amdgpu/navi14_vcn.bin"
-#define FIRMWARE_NAVI12"amdgpu/navi12_vcn.bin"
-#define FIRMWARE_SIENNA_CICHLID"amdgpu/sienna_cichlid_vcn.bin"
-#define FIRMWARE_NAVY_FLOUNDER "amdgpu/navy_flounder_vcn.bin"
-#define FIRMWARE_VANGOGH   "amdgpu/vangogh_vcn.bin"
-#define FIRMWARE_DIMGREY_CAVEFISH  "amdgpu/dimgrey_cavefish_vcn.bin"
-#define FIRMWARE_ALDEBARAN "amdgpu/aldebaran_vcn.bin"
-#define FIRMWARE_BEIGE_GOBY"amdgpu/beige_goby_vcn.bin"
-#define FIRMWARE_YELLOW_CARP   "amdgpu/yellow_carp_vcn.bin"
-#define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2.bin"
-#define FIRMWARE_VCN4_0_0  "amdgpu/vcn_4_0_0.bin"
-#define FIRMWARE_VCN4_0_2  "amdgpu/vcn_4_0_2.bin"
+#define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin"
+#define FIRMWARE_PICASSO   "amdgpu/picasso_vcn.bin"
+#define FIRMWARE_RAVEN2"amdgpu/raven2_vcn.bin"
+#define FIRMWARE_ARCTURUS  "amdgpu/arcturus_vcn.bin"
+#define FIRMWARE_RENOIR"amdgpu/renoir_vcn.bin"
+#define FIRMWARE_GREEN_SARDINE "amdgpu/green_sardine_vcn.bin"
+#define FIRMWARE_NAVI10"amdgpu/navi10_vcn.bin"
+#define FIRMWARE_NAVI14"amdgpu/navi14_vcn.bin"
+#define FIRMWARE_NAVI12"amdgpu/navi12_vcn.bin"
+#define FIRMWARE_SIENNA_CICHLID"amdgpu/sienna_cichlid_vcn.bin"
+#define FIRMWARE_NAVY_FLOUNDER "amdgpu/navy_flounder_vcn.bin"
+#define FIRMWARE_VANGOGH   "amdgpu/vangogh_vcn.bin"
+#define FIRMWARE_DIMGREY_CAVEFISH  "amdgpu/dimgrey_cavefish_vcn.bin"
+#define FIRMWARE_ALDEBARAN "amdgpu/aldebaran_vcn.bin"
+#define FIRMWARE_BEIGE_GOBY"amdgpu/beige_goby_vcn.bin"
+#define FIRMWARE_YELLOW_CARP   "amdgpu/yellow_carp_vcn.bin"
+#define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2.bin"
+#define FIRMWARE_VCN4_0_0  "amdgpu/vcn_4_0_0.bin"
+#define FIRMWARE_VCN4_0_2  "amdgpu/vcn_4_0_2.bin"
 #define FIRMWARE_VCN4_0_4  "amdgpu/vcn_4_0_4.bin"
 
 MODULE_FIRMWARE(FIRMWARE_RAVEN);
@@ -80,10 +80,26 @@ MODULE_FIRMWARE(FIRMWARE_VCN4_0_4);
 
 static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
 
+int amdgpu_vcn_early_init(struct amdgpu_device *adev)
+{
+   char ucode_prefix[30];
+   char fw_name[40];
+   int r;
+
+   amdgpu_ucode_ip_version_decode(adev, UVD_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix);
+   r = amdgpu_ucode_load(adev, &adev->vcn.fw, fw_name);
+   if (r) {
+   release_firmware(adev->vcn.fw);
+   adev->vcn.fw = NULL;
+   }
+
+   return r;
+}
+
 int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
 {
unsigned long bo_size;
-   const char *fw_name;
const struct common_firmware_header *hdr;
unsigned char fw_check;
unsigned int fw_shared_size, log_offset;
@@ -99,46 +115,27 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
switch (adev->ip_versions[UVD_HWIP][0]) {
case IP_VERSION(1, 0, 0):
case IP_VERSION(1, 0, 1):
-   if (adev->apu_flags & AMD_APU_IS_RAVEN2)

[PATCH v4 08/27] drm/amd: Make SDMA firmware load failures less noisy.

2023-01-03 Thread Mario Limonciello
When firmware is missing we get failures at every step.
```
[3.855086] amdgpu :04:00.0: Direct firmware load for 
amdgpu/green_sardine_sdma.bin failed with error -2
[3.855087] [drm:amdgpu_sdma_init_microcode [amdgpu]] *ERROR* SDMA: Failed 
to init firmware "amdgpu/green_sardine_sdma.bin"
[3.855398] [drm:sdma_v4_0_early_init [amdgpu]] *ERROR* Failed to load sdma 
firmware!
```
Realistically we don't need all of these, a user can tell from the first one
that request_firmware emitted what happened. Drop the others.

Signed-off-by: Mario Limonciello 
---
v3->v4:
* New patch
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 83e8f0dae647..f052173ef1e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -277,10 +277,8 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
}
 
 out:
-   if (err) {
-   DRM_ERROR("SDMA: Failed to init firmware \"%s\"\n", fw_name);
+   if (err)
amdgpu_sdma_destroy_inst_ctx(adev, duplicate);
-   }
return err;
 }
 
-- 
2.34.1



[PATCH v4 03/27] drm/amd: Convert SMUv11 microcode to use `amdgpu_ucode_ip_version_decode`

2023-01-03 Thread Mario Limonciello
Remove the special casing from SMU v11 code. No intended functional
changes.

Signed-off-by: Mario Limonciello 
Acked-by: Christian König 
---
v3->v4:
 * No changes
---
 .../gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c| 35 ++-
 1 file changed, 3 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
index ad66d57aa102..d4756bd30830 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
@@ -93,7 +93,7 @@ static void smu_v11_0_poll_baco_exit(struct smu_context *smu)
 int smu_v11_0_init_microcode(struct smu_context *smu)
 {
struct amdgpu_device *adev = smu->adev;
-   const char *chip_name;
+   char ucode_prefix[30];
char fw_name[SMU_FW_NAME_LEN];
int err = 0;
const struct smc_firmware_header_v1_0 *hdr;
@@ -105,38 +105,9 @@ int smu_v11_0_init_microcode(struct smu_context *smu)
 (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7
return 0;
 
-   switch (adev->ip_versions[MP1_HWIP][0]) {
-   case IP_VERSION(11, 0, 0):
-   chip_name = "navi10";
-   break;
-   case IP_VERSION(11, 0, 5):
-   chip_name = "navi14";
-   break;
-   case IP_VERSION(11, 0, 9):
-   chip_name = "navi12";
-   break;
-   case IP_VERSION(11, 0, 7):
-   chip_name = "sienna_cichlid";
-   break;
-   case IP_VERSION(11, 0, 11):
-   chip_name = "navy_flounder";
-   break;
-   case IP_VERSION(11, 0, 12):
-   chip_name = "dimgrey_cavefish";
-   break;
-   case IP_VERSION(11, 0, 13):
-   chip_name = "beige_goby";
-   break;
-   case IP_VERSION(11, 0, 2):
-   chip_name = "arcturus";
-   break;
-   default:
-   dev_err(adev->dev, "Unsupported IP version 0x%x\n",
-   adev->ip_versions[MP1_HWIP][0]);
-   return -EINVAL;
-   }
+   amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
 
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_smc.bin", chip_name);
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix);
 
err = request_firmware(&adev->pm.fw, fw_name, adev->dev);
if (err)
-- 
2.34.1



[PATCH v4 06/27] drm/amd: Use `amdgpu_ucode_load` helper for SDMA

2023-01-03 Thread Mario Limonciello
The `amdgpu_ucode_load` helper will ensure that the return code for
missing firmware is -ENODEV so that early_init can fail.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * New patch
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index ea5278f094c0..9e85a078d918 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -154,16 +154,11 @@ int amdgpu_sdma_process_ecc_irq(struct amdgpu_device 
*adev,
 
 static int amdgpu_sdma_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst)
 {
-   int err = 0;
uint16_t version_major;
const struct common_firmware_header *header = NULL;
const struct sdma_firmware_header_v1_0 *hdr;
const struct sdma_firmware_header_v2_0 *hdr_v2;
 
-   err = amdgpu_ucode_validate(sdma_inst->fw);
-   if (err)
-   return err;
-
header = (const struct common_firmware_header *)
sdma_inst->fw->data;
version_major = le16_to_cpu(header->header_version_major);
@@ -214,7 +209,7 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
const struct sdma_firmware_header_v2_0 *sdma_hdr;
uint16_t version_major;
 
-   err = request_firmware(&adev->sdma.instance[instance].fw, fw_name, 
adev->dev);
+   err = amdgpu_ucode_load(adev, &adev->sdma.instance[instance].fw, 
fw_name);
if (err)
goto out;
 
-- 
2.34.1



[PATCH v4 02/27] drm/amd: Add a legacy mapping to "amdgpu_ucode_ip_version_decode"

2023-01-03 Thread Mario Limonciello
This will allow other parts of the driver that currently special
case firmware file names to before IP version style naming to just
have a single call to `amdgpu_ucode_ip_version_decode`.

Signed-off-by: Mario Limonciello 
Acked-by: Christian König 
---
v3->v4:
 * No changes
v2->v3:
 * Fixes for GFX9 SDMA
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 221 ++
 1 file changed, 221 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 5cb62e6249c2..eafcddce58d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -1059,12 +1059,233 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
return 0;
 }
 
+static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int 
block_type)
+{
+   if (block_type == MP0_HWIP) {
+   switch (adev->ip_versions[MP0_HWIP][0]) {
+   case IP_VERSION(9, 0, 0):
+   switch (adev->asic_type) {
+   case CHIP_VEGA10:
+   return "vega10";
+   case CHIP_VEGA12:
+   return "vega12";
+   default:
+   return NULL;
+   }
+   break;
+   case IP_VERSION(10, 0, 0):
+   case IP_VERSION(10, 0, 1):
+   if (adev->asic_type == CHIP_RAVEN) {
+   if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+   return "raven2";
+   else if (adev->apu_flags & AMD_APU_IS_PICASSO)
+   return "picasso";
+   return "raven";
+   }
+   break;
+   case IP_VERSION(11, 0, 0):
+   return "navi10";
+   case IP_VERSION(11, 0, 2):
+   return "vega20";
+   case IP_VERSION(11, 0, 4):
+   return "arcturus";
+   case IP_VERSION(11, 0, 5):
+   return "navi14";
+   case IP_VERSION(11, 0, 7):
+   return "sienna_cichlid";
+   case IP_VERSION(11, 0, 9):
+   return "navi12";
+   case IP_VERSION(11, 0, 11):
+   return "navy_flounder";
+   case IP_VERSION(11, 0, 12):
+   return "dimgrey_cavefish";
+   case IP_VERSION(11, 0, 13):
+   return "beige_goby";
+   case IP_VERSION(11, 5, 0):
+   return "vangogh";
+   case IP_VERSION(12, 0, 1):
+   if (adev->asic_type == CHIP_RENOIR) {
+   if (adev->apu_flags & AMD_APU_IS_RENOIR)
+   return "renoir";
+   return "green_sardine";
+   }
+   break;
+   case IP_VERSION(13, 0, 2):
+   return "aldebaran";
+   case IP_VERSION(13, 0, 1):
+   case IP_VERSION(13, 0, 3):
+   return "yellow_carp";
+   }
+   } else if (block_type == MP1_HWIP) {
+   switch (adev->ip_versions[MP1_HWIP][0]) {
+   case IP_VERSION(9, 0, 0):
+   case IP_VERSION(10, 0, 0):
+   case IP_VERSION(10, 0, 1):
+   case IP_VERSION(11, 0, 2):
+   if (adev->asic_type == CHIP_ARCTURUS)
+   return "arcturus_smc";
+   return NULL;
+   case IP_VERSION(11, 0, 0):
+   return "navi10_smc";
+   case IP_VERSION(11, 0, 5):
+   return "navi14_smc";
+   case IP_VERSION(11, 0, 9):
+   return "navi12_smc";
+   case IP_VERSION(11, 0, 7):
+   return "sienna_cichlid_smc";
+   case IP_VERSION(11, 0, 11):
+   return "navy_flounder_smc";
+   case IP_VERSION(11, 0, 12):
+   return "dimgrey_cavefish_smc";
+   case IP_VERSION(11, 0, 13):
+   return "beige_goby_smc";
+   case IP_VERSION(13, 0, 2):
+   return "aldebaran_smc";
+   }
+   } else if (block_type == SDMA0_HWIP) {
+   switch (adev->ip_versions[SDMA0_HWIP][0]) {
+   case IP_VERSION(4, 

[PATCH v4 05/27] drm/amd: Add a new helper for loading/validating microcode

2023-01-03 Thread Mario Limonciello
All microcode runs a basic validation after it's been loaded. Each
IP block as part of init will run both.

Introduce a wrapper for request_firmware and amdgpu_ucode_validate.
This wrapper will also remap any error codes from request_firmware
to -ENODEV.  This is so that early_init will fail if firmware couldn't
be loaded instead of the IP block being disabled.

Signed-off-by: Mario Limonciello 
---
v3-v4:
 * New patch
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 24 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h |  1 +
 2 files changed, 25 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index eafcddce58d3..8c4a7b09e344 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -1312,3 +1312,27 @@ void amdgpu_ucode_ip_version_decode(struct amdgpu_device 
*adev, int block_type,
 
snprintf(ucode_prefix, len, "%s_%d_%d_%d", ip_name, maj, min, rev);
 }
+
+/*
+ * amdgpu_ucode_load - Load and validate amdgpu microcode
+ *
+ * @adev: amdgpu device
+ * @fw: pointer to load firmware to
+ * @fw_name: firmware to load
+ *
+ * This is a helper that will use request_firmware and amdgpu_ucode_validate
+ * to load and run basic validation on firmware. If the load fails, remap
+ * the error code to -ENODEV, so that early_init functions will fail to load.
+ */
+int amdgpu_ucode_load(struct amdgpu_device *adev, const struct firmware **fw, 
char *fw_name)
+{
+   int err = request_firmware(fw, fw_name, adev->dev);
+
+   if (err)
+   return -ENODEV;
+   err = amdgpu_ucode_validate(*fw);
+   if (err)
+   dev_dbg(adev->dev, "\"%s\" failed to validate\n", fw_name);
+
+   return err;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 552e06929229..b9139fb44506 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -544,6 +544,7 @@ void amdgpu_ucode_print_sdma_hdr(const struct 
common_firmware_header *hdr);
 void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr);
 void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr);
 int amdgpu_ucode_validate(const struct firmware *fw);
+int amdgpu_ucode_load(struct amdgpu_device *adev, const struct firmware **fw, 
char *fw_name);
 bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
uint16_t hdr_major, uint16_t hdr_minor);
 
-- 
2.34.1



[PATCH v4 04/27] drm/amd: Convert SMUv13 microcode to use `amdgpu_ucode_ip_version_decode`

2023-01-03 Thread Mario Limonciello
The special case for the one dGPU has been moved into
`amdgpu_ucode_ip_version_decode`, so simplify this code.

Reviewed-by: Alex Deucher 
Signed-off-by: Mario Limonciello 
Acked-by: Christian König 
---
v3->v4:
 * No changes
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 12 ++--
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index 0ac9cac805f9..506a49a4b425 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -88,7 +88,6 @@ static const int link_speed[] = {25, 50, 80, 160};
 int smu_v13_0_init_microcode(struct smu_context *smu)
 {
struct amdgpu_device *adev = smu->adev;
-   const char *chip_name;
char fw_name[30];
char ucode_prefix[30];
int err = 0;
@@ -100,16 +99,9 @@ int smu_v13_0_init_microcode(struct smu_context *smu)
if (amdgpu_sriov_vf(adev))
return 0;
 
-   switch (adev->ip_versions[MP1_HWIP][0]) {
-   case IP_VERSION(13, 0, 2):
-   chip_name = "aldebaran_smc";
-   break;
-   default:
-   amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
-   chip_name = ucode_prefix;
-   }
+   amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
 
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", chip_name);
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix);
 
err = request_firmware(&adev->pm.fw, fw_name, adev->dev);
if (err)
-- 
2.34.1



[PATCH v4 01/27] drm/amd: Delay removal of the firmware framebuffer

2023-01-03 Thread Mario Limonciello
Removing the firmware framebuffer from the driver means that even
if the driver doesn't support the IP blocks in a GPU it will no
longer be functional after the driver fails to initialize.

This change will ensure that unsupported IP blocks at least cause
the driver to work with the EFI framebuffer.

Cc: sta...@vger.kernel.org
Suggested-by: Alex Deucher 
Signed-off-by: Mario Limonciello 
---
v3->v4:
 * Drop all R-b/A-b tags.
 * Move to after early IP init instead
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c| 6 --
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 9a1a5c2864a0..cdb681398a99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -37,6 +37,7 @@
 #include 
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -89,6 +90,8 @@ MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
 #define AMDGPU_MAX_RETRY_LIMIT 2
 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) 
== -EINVAL)
 
+static const struct drm_driver amdgpu_kms_driver;
+
 const char *amdgpu_asic_name[] = {
"TAHITI",
"PITCAIRN",
@@ -3685,6 +3688,11 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (r)
return r;
 
+   /* Get rid of things like offb */
+   r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, 
&amdgpu_kms_driver);
+   if (r)
+   return r;
+
/* Enable TMZ based on IP_VERSION */
amdgpu_gmc_tmz_set(adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index db7e34eacc35..b9f14ec9edb2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -23,7 +23,6 @@
  */
 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -2096,11 +2095,6 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
}
 #endif
 
-   /* Get rid of things like offb */
-   ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, 
&amdgpu_kms_driver);
-   if (ret)
-   return ret;
-
adev = devm_drm_dev_alloc(&pdev->dev, &amdgpu_kms_driver, 
typeof(*adev), ddev);
if (IS_ERR(adev))
return PTR_ERR(adev);
-- 
2.34.1



[PATCH v4 00/27] Recover from failure to probe GPU

2023-01-03 Thread Mario Limonciello
One of the first thing that KMS drivers do during initialization is
destroy the system firmware framebuffer by means of
`drm_aperture_remove_conflicting_pci_framebuffers`

This means that if for any reason the GPU failed to probe the user
will be stuck with at best a screen frozen at the last thing that
was shown before the KMS driver continued it's probe.

The problem is most pronounced when new GPU support is introduced
because users will need to have a recent linux-firmware snapshot
on their system when they boot a kernel with matching support.

However the problem is further exaggerated in the case of amdgpu because
it has migrated to "IP discovery" where amdgpu will attempt to load
on "ALL" AMD GPUs even if the driver is missing support for IP blocks
contained in that GPU.

IP discovery requires some probing and isn't run until after the
framebuffer has been destroyed.

This means a situation can occur where a user purchases a new GPU not
yet supported by a distribution and when booting the installer it will
"freeze" even if the distribution doesn't have the matching kernel support
for those IP blocks.

The perfect example of this is Ubuntu 22.10 and the new dGPUs just
launched by AMD.  The installation media ships with kernel 5.19 (which
has IP discovery) but the amdgpu support for those IP blocks landed in
kernel 6.0. The matching linux-firmware was released after 22.10's launch.
The screen will freeze without nomodeset. Even if a user manages to install
and then upgrades to kernel 6.0 after install they'll still have the
problem of missing firmware, and the same experience.

This is quite jarring for users, particularly if they don't know
that they have to use "nomodeset" to install.

To help the situation make changes to GPU discovery:
1) Delay releasing the firmware framebuffer until after early_init
completed.  This will help the situation of an older kernel that doesn't
yet support the IP blocks probing a new GPU. IP discovery will have failed.
2) Request loading all PSP, VCN, SDMA, SMU, DMCUB, MES and GC microcode
into memory during early_init. This will help the situation of new enough
kernel for the IP discovery phase to otherwise pass but missing microcode
from linux-firmware.git.

v3->v4:
 * Rework to delay framebuffer release until early_init is done
 * Make individual IPs load microcode during early init phase
 * Add SMU and DMCUB cases for early_init loading
 * Add some new helper code for wrapping request_firmware calls (needed for
   early_init to return something besides -ENOENT)
v2->v3:
 * Pick up tags for patches 1-10
 * Rework patch 11 to not validate during discovery
 * Fix bugs with GFX9 due to gfx.num_gfx_rings not being set during discovery
 * Fix naming scheme for SDMA on dGPUs
v1->v2:
 * Take the suggestion from v1 thread to delay the framebuffer release until
   ip discovery is done. This patch is CC to stable to that older stable
   kernels with IP discovery won't try to probe unknown IP.
 * Drop changes to drm aperature.
 * Fetch SDMA, VCN, MES, GC and PSP microcode during IP discovery.
Mario Limonciello (27):
  drm/amd: Delay removal of the firmware framebuffer
  drm/amd: Add a legacy mapping to "amdgpu_ucode_ip_version_decode"
  drm/amd: Convert SMUv11 microcode to use
`amdgpu_ucode_ip_version_decode`
  drm/amd: Convert SMUv13 microcode to use
`amdgpu_ucode_ip_version_decode`
  drm/amd: Add a new helper for loading/validating microcode
  drm/amd: Use `amdgpu_ucode_load` helper for SDMA
  drm/amd: Convert SDMA to use `amdgpu_ucode_ip_version_decode`
  drm/amd: Make SDMA firmware load failures less noisy.
  drm/amd: Use `amdgpu_ucode_load` helper for VCN
  drm/amd: Load VCN microcode during early_init
  drm/amd: Load MES microcode during early_init
  drm/amd: Use `amdgpu_ucode_load` helper for MES
  drm/amd: Remove superfluous assignment for `adev->mes.adev`
  drm/amd: Use `amdgpu_ucode_load` helper for GFX9
  drm/amd: Load GFX9 microcode during early_init
  drm/amd: Use `amdgpu_ucode_load` helper for GFX10
  drm/amd: Load GFX10 microcode during early_init
  drm/amd: Use `amdgpu_ucode_load` helper for GFX11
  drm/amd: Load GFX11 microcode during early_init
  drm/amd: Parse both v1 and v2 TA microcode headers using same function
  drm/amd: Avoid BUG() for case of SRIOV missing IP version
  drm/amd: Load PSP microcode during early_init
  drm/amd: Use `amdgpu_ucode_load` helper for PSP
  drm/amd/display: Load DMUB microcode during early_init
  drm/amd: Use `amdgpu_ucode_load` helper for SMU
  drm/amd: Load SMU microcode during early_init
  drm/amd: Optimize SRIOV switch/case for PSP microcode load

 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c|   8 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |   6 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c   |  60 
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h   |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c  

[PATCH v3 11/11] drm/amd: Request PSP microcode during IP discovery

2022-12-29 Thread Mario Limonciello
If PSP microcode is required but not available during early init, the
firmware framebuffer will have already been released and the screen will
freeze.

Move the request for PSP microcode into the IP discovery phase
so that if it's not available, IP discovery will fail.

Signed-off-by: Mario Limonciello 
---
v2->v3:
 * Only request_firmware, don't validate during discovery
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 173 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c   |  58 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h   |  12 +-
 drivers/gpu/drm/amd/amdgpu/psp_v10_0.c|  99 +++--
 drivers/gpu/drm/amd/amdgpu/psp_v11_0.c| 191 +-
 drivers/gpu/drm/amd/amdgpu/psp_v12_0.c|  98 +++--
 drivers/gpu/drm/amd/amdgpu/psp_v13_0.c|  46 +
 drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c  |  18 +-
 drivers/gpu/drm/amd/amdgpu/psp_v3_1.c |  16 +-
 9 files changed, 297 insertions(+), 414 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index ce1aa7683738..6b7dd0cf56ad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -158,6 +158,40 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes1.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes1.bin");
 
+MODULE_FIRMWARE("amdgpu/aldebaran_sos.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_ta.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_cap.bin");
+MODULE_FIRMWARE("amdgpu/green_sardine_asd.bin");
+MODULE_FIRMWARE("amdgpu/green_sardine_ta.bin");
+MODULE_FIRMWARE("amdgpu/raven_asd.bin");
+MODULE_FIRMWARE("amdgpu/picasso_asd.bin");
+MODULE_FIRMWARE("amdgpu/raven2_asd.bin");
+MODULE_FIRMWARE("amdgpu/picasso_ta.bin");
+MODULE_FIRMWARE("amdgpu/raven2_ta.bin");
+MODULE_FIRMWARE("amdgpu/raven_ta.bin");
+MODULE_FIRMWARE("amdgpu/renoir_asd.bin");
+MODULE_FIRMWARE("amdgpu/renoir_ta.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_toc.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_ta.bin");
+MODULE_FIRMWARE("amdgpu/vega10_sos.bin");
+MODULE_FIRMWARE("amdgpu/vega10_asd.bin");
+MODULE_FIRMWARE("amdgpu/vega10_cap.bin");
+MODULE_FIRMWARE("amdgpu/vega12_sos.bin");
+MODULE_FIRMWARE("amdgpu/vega12_asd.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_5_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_5_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_8_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_8_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_7_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_10_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_10_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_4_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_4_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_11_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_11_ta.bin");
 
 /* gfx9 */
 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
@@ -339,6 +373,13 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin");
 
+enum amd_psp_microcode_kind {
+   AMD_PSP_MICROCODE_SOS,
+   AMD_PSP_MICROCODE_ASD,
+   AMD_PSP_MICROCODE_TA,
+   AMD_PSP_MICROCODE_TOC,
+};
+
 static const char *hw_id_names[HW_ID_MAX] = {
[MP1_HWID]  = "MP1",
[MP2_HWID]  = "MP2",
@@ -1856,14 +1897,59 @@ static int amdgpu_discovery_set_ih_ip_blocks(struct 
amdgpu_device *adev)
return 0;
 }
 
+static int amdgpu_discovery_load_psp_fw(struct amdgpu_device *adev,
+   enum amd_psp_microcode_kind kind,
+   const char *chip_name)
+{
+   char fw_name[PSP_FW_NAME_LEN];
+
+   switch (kind) {
+   case AMD_PSP_MICROCODE_SOS:
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sos.bin", 
chip_name);
+   return request_firmware(&adev->psp.sos_fw, fw_name, adev->dev);
+   case AMD_PSP_MICROCODE_ASD:
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_asd.bin", 
chip_name);
+   return request_firmware(&adev->psp.asd_fw, fw_name, adev->dev);
+   case AMD_PSP_MICROCODE_TA:
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", 
chip_name);
+   return request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
+   case AMD_PSP_MICROCODE_TOC:
+  

[PATCH v3 08/11] drm/amd: Request GFX9 microcode during IP discovery

2022-12-29 Thread Mario Limonciello
If GFX9 microcode is required but not available during early init, the
firmware framebuffer will have already been released and the screen will
freeze.

Move the request for GFX9 microcode into the IP discovery phase
so that if it's not available, IP discovery will fail.

Signed-off-by: Mario Limonciello 
---
v2->v3:
 * Fix issues found on real hardware where num_gfx_rings not set during
   discovery
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 147 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 194 ++
 2 files changed, 161 insertions(+), 180 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index edaeec35c39f..0ff1b3872441 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -158,6 +158,68 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes1.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes1.bin");
 
+
+/* gfx9 */
+MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
+MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
+MODULE_FIRMWARE("amdgpu/vega10_me.bin");
+MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
+MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
+MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
+MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
+MODULE_FIRMWARE("amdgpu/vega12_me.bin");
+MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
+MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
+MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
+MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
+MODULE_FIRMWARE("amdgpu/vega20_me.bin");
+MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
+MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
+MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/raven_ce.bin");
+MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
+MODULE_FIRMWARE("amdgpu/raven_me.bin");
+MODULE_FIRMWARE("amdgpu/raven_mec.bin");
+MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
+MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
+MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
+MODULE_FIRMWARE("amdgpu/picasso_me.bin");
+MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
+MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
+MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
+MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
+
+MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
+MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
+MODULE_FIRMWARE("amdgpu/raven2_me.bin");
+MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
+MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
+MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
+MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
+MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
+MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
+MODULE_FIRMWARE("amdgpu/renoir_me.bin");
+MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
+MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
+MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
+MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
+MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
+MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
+MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
+
 static const char *hw_id_names[HW_ID_MAX] = {
[MP1_HWID]  = "MP1",
[MP2_HWID]  = "MP2",
@@ -1845,8 +1907,90 @@ static int amdgpu_discovery_set_display_ip_blocks(struct 
amdgpu_device *adev)
return 0;
 }
 
+static int amdgpu_discovery_load_gfx9(struct amdgpu_device *adev, char 
*ucode_prefix)
+{
+   uint32_t smu_version;
+   char fw_name[40];
+   int r;
+
+   switch (adev->ip_versions[GC_HWIP][0]) {
+   /* No CPG in Arcturus */
+   case IP_VERSION(9, 4, 1):
+   case IP_VERSION(9, 4, 2):
+   break;
+   default:
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", 
ucode_prefix);
+   r = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
+   if (r)
+   return r;
+
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", 
ucode_prefix);
+   r = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
+   if (r)
+   return r;
+
+   snprintf(fw_name, sizeof

[PATCH v3 09/11] drm/amd: Request GFX10 microcode during IP discovery

2022-12-29 Thread Mario Limonciello
If GFX10 microcode is required but not available during early init, the
firmware framebuffer will have already been released and the screen will
freeze.

Move the request for GFX10 microcode into the IP discovery phase
so that if it's not available, IP discovery will fail.

Reviewed-by: Alex Deucher 
Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 137 +
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c| 180 +-
 2 files changed, 144 insertions(+), 173 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 0ff1b3872441..439b10fdff1b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -220,6 +220,102 @@ MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
 
+/* gfx10 */
+MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin");
+
+MODULE_FIRMWARE("amdgpu/navi10_ce.bin");
+MODULE_FIRMWARE("amdgpu/navi10_pfp.bin");
+MODULE_FIRMWARE("amdgpu/navi10_me.bin");
+MODULE_FIRMWARE("amdgpu/navi10_mec.bin");
+MODULE_FIRMWARE("amdgpu/navi10_mec2.bin");
+MODULE_FIRMWARE("amdgpu/navi10_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/navi14_ce_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_pfp_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_me_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_mec_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_mec2_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_ce.bin");
+MODULE_FIRMWARE("amdgpu/navi14_pfp.bin");
+MODULE_FIRMWARE("amdgpu/navi14_me.bin");
+MODULE_FIRMWARE("amdgpu/navi14_mec.bin");
+MODULE_FIRMWARE("amdgpu/navi14_mec2.bin");
+MODULE_FIRMWARE("amdgpu/navi14_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/navi12_ce.bin");
+MODULE_FIRMWARE("amdgpu/navi12_pfp.bin");
+MODULE_FIRMWARE("amdgpu/navi12_me.bin");
+MODULE_FIRMWARE("amdgpu/navi12_mec.bin");
+MODULE_FIRMWARE("amdgpu/navi12_mec2.bin");
+MODULE_FIRMWARE("amdgpu/navi12_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_ce.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_pfp.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_me.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_mec.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_mec2.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/navy_flounder_ce.bin");
+MODULE_FIRMWARE("amdgpu/navy_flounder_pfp.bin");
+MODULE_FIRMWARE("amdgpu/navy_flounder_me.bin");
+MODULE_FIRMWARE("amdgpu/navy_flounder_mec.bin");
+MODULE_FIRMWARE("amdgpu/navy_flounder_mec2.bin");
+MODULE_FIRMWARE("amdgpu/navy_flounder_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/vangogh_ce.bin");
+MODULE_FIRMWARE("amdgpu/vangogh_pfp.bin");
+MODULE_FIRMWARE("amdgpu/vangogh_me.bin");
+MODULE_FIRMWARE("amdgpu/vangogh_mec.bin");
+MODULE_FIRMWARE("amdgpu/vangogh_mec2.bin");
+MODULE_FIRMWARE("amdgpu/vangogh_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_ce.bin");
+MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_pfp.bin");
+MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_me.bin");
+MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_mec.bin");
+MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_mec2.bin");
+MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/beige_goby_ce.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_pfp.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_me.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_mec.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_mec2.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/yellow_carp_ce.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_pfp.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_me.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_mec.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_mec2.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_ce.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_pfp.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_me.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_mec.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_mec2.bin");
+MODULE_FIRMWARE("

[PATCH v3 10/11] drm/amd: Request GFX11 microcode during IP discovery

2022-12-29 Thread Mario Limonciello
If GFX11 microcode is required but not available during early init, the
firmware framebuffer will have already been released and the screen will
freeze.

Move the request for GFX11 microcode into the IP discovery phase
so that if it's not available, IP discovery will fail.

Reviewed-by: Alex Deucher 
Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 52 +++
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c| 64 +--
 2 files changed, 53 insertions(+), 63 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 439b10fdff1b..ce1aa7683738 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -316,6 +316,29 @@ MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec.bin");
 MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec2.bin");
 MODULE_FIRMWARE("amdgpu/gc_10_3_7_rlc.bin");
 
+/* gfx11 */
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin");
+
 static const char *hw_id_names[HW_ID_MAX] = {
[MP1_HWID]  = "MP1",
[MP2_HWID]  = "MP2",
@@ -2114,6 +2137,32 @@ static int amdgpu_discovery_load_gfx10(struct 
amdgpu_device *adev, char *ucode_p
r = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
if (r)
return r;
+   return 0;
+}
+
+static int amdgpu_discovery_load_gfx11(struct amdgpu_device *adev, char 
*ucode_prefix)
+{
+   char fw_name[40];
+   int r;
+
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", ucode_prefix);
+   r = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
+   if (r)
+   return r;
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", ucode_prefix);
+   r = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
+   if (r)
+   return r;
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", ucode_prefix);
+   r = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
+   if (r)
+   return r;
+   if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", 
ucode_prefix);
+   r = request_firmware(&adev->psp.toc_fw, fw_name, adev->dev);
+   if (r)
+   return r;
+   }
 
return 0;
 }
@@ -2162,6 +2211,9 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct 
amdgpu_device *adev)
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
case IP_VERSION(11, 0, 4):
+   r = amdgpu_discovery_load_gfx11(adev, ucode_prefix);
+   if (r)
+   return r;
amdgpu_device_ip_block_add(adev, &gfx_v11_0_ip_block);
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index a56c6e106d00..576fa591c6da 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -60,27 +60,6 @@
 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1  0x4e7e
 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX 1
 
-MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
-MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
-MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
-MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
-MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
-MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
-MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin");
-MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin");
-MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin");
-

[PATCH v3 06/11] drm/amd: Request VCN microcode during IP discovery

2022-12-29 Thread Mario Limonciello
If VCN microcode is not available during early init, the firmware
framebuffer will have already been released and the screen will
freeze.

Move the request for VCN microcode into the IP discovery phase
so that if it's not available, IP discovery will fail.

Reviewed-by: Alex Deucher 
Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 41 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c   | 85 +--
 2 files changed, 41 insertions(+), 85 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 24d54ab0963a..07c05782a0e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -124,6 +124,27 @@ MODULE_FIRMWARE("amdgpu/sdma_6_0_1.bin");
 MODULE_FIRMWARE("amdgpu/sdma_6_0_2.bin");
 MODULE_FIRMWARE("amdgpu/sdma_6_0_3.bin");
 
+MODULE_FIRMWARE("amdgpu/raven_vcn.bin");
+MODULE_FIRMWARE("amdgpu/picasso_vcn.bin");
+MODULE_FIRMWARE("amdgpu/raven2_vcn.bin");
+MODULE_FIRMWARE("amdgpu/arcturus_vcn.bin");
+MODULE_FIRMWARE("amdgpu/renoir_vcn.bin");
+MODULE_FIRMWARE("amdgpu/green_sardine_vcn.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_vcn.bin");
+MODULE_FIRMWARE("amdgpu/navi10_vcn.bin");
+MODULE_FIRMWARE("amdgpu/navi14_vcn.bin");
+MODULE_FIRMWARE("amdgpu/navi12_vcn.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_vcn.bin");
+MODULE_FIRMWARE("amdgpu/navy_flounder_vcn.bin");
+MODULE_FIRMWARE("amdgpu/vangogh_vcn.bin");
+MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_vcn.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_vcn.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_vcn.bin");
+MODULE_FIRMWARE("amdgpu/vcn_3_1_2.bin");
+MODULE_FIRMWARE("amdgpu/vcn_4_0_0.bin");
+MODULE_FIRMWARE("amdgpu/vcn_4_0_2.bin");
+MODULE_FIRMWARE("amdgpu/vcn_4_0_4.bin");
+
 static const char *hw_id_names[HW_ID_MAX] = {
[MP1_HWID]  = "MP1",
[MP2_HWID]  = "MP2",
@@ -1922,8 +1943,23 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct 
amdgpu_device *adev)
return 0;
 }
 
+static int amdgpu_discovery_load_vcn_fw(struct amdgpu_device *adev,
+   char *fname)
+{
+   char fw_name[40];
+
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", fname);
+
+   return request_firmware(&adev->vcn.fw, fw_name, adev->dev);
+}
+
 static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
 {
+   char ucode_prefix[30];
+   int r = 0;
+
+   amdgpu_ucode_ip_version_decode(adev, UVD_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
+
if (adev->ip_versions[VCE_HWIP][0]) {
switch (adev->ip_versions[UVD_HWIP][0]) {
case IP_VERSION(7, 0, 0):
@@ -2001,7 +2037,10 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct 
amdgpu_device *adev)
return -EINVAL;
}
}
-   return 0;
+   if (*ucode_prefix)
+   r = amdgpu_discovery_load_vcn_fw(adev, ucode_prefix);
+   return r;
+}
 }
 
 static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index a23e26b272b4..370c9644a3b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -35,55 +35,11 @@
 #include "amdgpu_vcn.h"
 #include "soc15d.h"
 
-/* Firmware Names */
-#define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin"
-#define FIRMWARE_PICASSO   "amdgpu/picasso_vcn.bin"
-#define FIRMWARE_RAVEN2"amdgpu/raven2_vcn.bin"
-#define FIRMWARE_ARCTURUS  "amdgpu/arcturus_vcn.bin"
-#define FIRMWARE_RENOIR"amdgpu/renoir_vcn.bin"
-#define FIRMWARE_GREEN_SARDINE "amdgpu/green_sardine_vcn.bin"
-#define FIRMWARE_NAVI10"amdgpu/navi10_vcn.bin"
-#define FIRMWARE_NAVI14"amdgpu/navi14_vcn.bin"
-#define FIRMWARE_NAVI12"amdgpu/navi12_vcn.bin"
-#define FIRMWARE_SIENNA_CICHLID"amdgpu/sienna_cichlid_vcn.bin"
-#define FIRMWARE_NAVY_FLOUNDER "amdgpu/navy_flounder_vcn.bin"
-#define FIRMWARE_VANGOGH   "amdgpu/vangogh_vcn.bin"
-#define FIRMWARE_DIMGREY_CAVEFISH  "amdgpu/dimgrey_cavefish_vcn.bin"
-#define FIRMWARE_ALDEBARAN "amdgpu/aldebaran_vcn.bin"
-#define FIRMWARE_BEIGE_GOBY"amdgpu/beige_goby_vcn.bin"
-#define FIRMWARE_YELLOW_CARP   "amdgpu/yellow_carp_vcn.bin"
-#define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2.bin"
-#define FIRMWARE_VCN4_0_0  "a

[PATCH v3 07/11] drm/amd: Request MES microcode during IP discovery

2022-12-29 Thread Mario Limonciello
If MES microcode is required but not available during early init, the
firmware framebuffer will have already been released and the screen will
freeze.

Move the request for MES microcode into the IP discovery phase
so that if it's not available, IP discovery will fail.

Reviewed-by: Alex Deucher 
Signed-off-by: Mario Limonciello 
---
v2->v3:
 * Add a missing newline
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 40 +++
 drivers/gpu/drm/amd/amdgpu/mes_v10_1.c| 28 -
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c| 25 +---
 3 files changed, 41 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 07c05782a0e3..edaeec35c39f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -145,6 +145,19 @@ MODULE_FIRMWARE("amdgpu/vcn_4_0_0.bin");
 MODULE_FIRMWARE("amdgpu/vcn_4_0_2.bin");
 MODULE_FIRMWARE("amdgpu/vcn_4_0_4.bin");
 
+MODULE_FIRMWARE("amdgpu/navi10_mes.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_mes.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_mes1.bin");
+
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes1.bin");
+
 static const char *hw_id_names[HW_ID_MAX] = {
[MP1_HWID]  = "MP1",
[MP2_HWID]  = "MP2",
@@ -2041,10 +2054,30 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct 
amdgpu_device *adev)
r = amdgpu_discovery_load_vcn_fw(adev, ucode_prefix);
return r;
 }
+
+static int amdgpu_discovery_load_mes_fw(struct amdgpu_device *adev,
+   enum admgpu_mes_pipe pipe,
+   const char *ucode_prefix)
+{
+   char fw_name[40];
+
+   if (pipe == AMDGPU_MES_SCHED_PIPE)
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes.bin",
+ucode_prefix);
+   else
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes1.bin",
+ucode_prefix);
+
+   return request_firmware(&adev->mes.fw[pipe], fw_name, adev->dev);
 }
 
 static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev)
 {
+   char ucode_prefix[30];
+   int pipe, r;
+
+   amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
+
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 1):
@@ -2077,6 +2110,13 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct 
amdgpu_device *adev)
default:
break;
}
+   if (adev->enable_mes) {
+   for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+   r = amdgpu_discovery_load_mes_fw(adev, pipe, 
ucode_prefix);
+   if (r)
+   return r;
+   }
+   }
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
index 614394118a53..9faa9867b3c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
@@ -37,10 +37,6 @@
 #define mmRLC_CP_SCHEDULERS_Sienna_Cichlid 0x4ca1
 #define mmRLC_CP_SCHEDULERS_Sienna_Cichlid_BASE_IDX1
 
-MODULE_FIRMWARE("amdgpu/navi10_mes.bin");
-MODULE_FIRMWARE("amdgpu/sienna_cichlid_mes.bin");
-MODULE_FIRMWARE("amdgpu/sienna_cichlid_mes1.bin");
-
 static int mes_v10_1_hw_fini(void *handle);
 static int mes_v10_1_kiq_hw_init(struct amdgpu_device *adev);
 
@@ -382,34 +378,10 @@ static const struct amdgpu_mes_funcs mes_v10_1_funcs = {
 static int mes_v10_1_init_microcode(struct amdgpu_device *adev,
enum admgpu_mes_pipe pipe)
 {
-   const char *chip_name;
-   char fw_name[30];
int err;
const struct mes_firmware_header_v1_0 *mes_hdr;
struct amdgpu_firmware_info *info;
 
-   switch (adev->ip_versions[GC_HWIP][0]) {
-   case IP_VERSION(10, 1, 10):
-   chip_name = "navi10";
-   break;
-   case IP_VERSION(10, 3, 0):
-   chip_name = "sienna_cichlid";
-   break;
-   default:
-   BUG();
-   }
-
-   if (pipe == AMDGPU_MES_SCHED_PIPE)
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes.bin",
-  

[PATCH v3 03/11] drm/amd: Convert SMUv11 microcode init to use `amdgpu_ucode_ip_version_decode`

2022-12-29 Thread Mario Limonciello
Remove the special casing from SMU v11 code. No intended functional
changes.

Signed-off-by: Mario Limonciello 
---
 .../gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c| 35 ++-
 1 file changed, 3 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
index ad66d57aa102..d4756bd30830 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
@@ -93,7 +93,7 @@ static void smu_v11_0_poll_baco_exit(struct smu_context *smu)
 int smu_v11_0_init_microcode(struct smu_context *smu)
 {
struct amdgpu_device *adev = smu->adev;
-   const char *chip_name;
+   char ucode_prefix[30];
char fw_name[SMU_FW_NAME_LEN];
int err = 0;
const struct smc_firmware_header_v1_0 *hdr;
@@ -105,38 +105,9 @@ int smu_v11_0_init_microcode(struct smu_context *smu)
 (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7
return 0;
 
-   switch (adev->ip_versions[MP1_HWIP][0]) {
-   case IP_VERSION(11, 0, 0):
-   chip_name = "navi10";
-   break;
-   case IP_VERSION(11, 0, 5):
-   chip_name = "navi14";
-   break;
-   case IP_VERSION(11, 0, 9):
-   chip_name = "navi12";
-   break;
-   case IP_VERSION(11, 0, 7):
-   chip_name = "sienna_cichlid";
-   break;
-   case IP_VERSION(11, 0, 11):
-   chip_name = "navy_flounder";
-   break;
-   case IP_VERSION(11, 0, 12):
-   chip_name = "dimgrey_cavefish";
-   break;
-   case IP_VERSION(11, 0, 13):
-   chip_name = "beige_goby";
-   break;
-   case IP_VERSION(11, 0, 2):
-   chip_name = "arcturus";
-   break;
-   default:
-   dev_err(adev->dev, "Unsupported IP version 0x%x\n",
-   adev->ip_versions[MP1_HWIP][0]);
-   return -EINVAL;
-   }
+   amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
 
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_smc.bin", chip_name);
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix);
 
err = request_firmware(&adev->pm.fw, fw_name, adev->dev);
if (err)
-- 
2.34.1



[PATCH v3 04/11] drm/amd: Convert SMUv13 to use `amdgpu_ucode_ip_version_decode`

2022-12-29 Thread Mario Limonciello
The special case for the one dGPU has been moved into
`amdgpu_ucode_ip_version_decode`, so simplify this code.

Reviewed-by: Alex Deucher 
Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 12 ++--
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index 0ac9cac805f9..506a49a4b425 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -88,7 +88,6 @@ static const int link_speed[] = {25, 50, 80, 160};
 int smu_v13_0_init_microcode(struct smu_context *smu)
 {
struct amdgpu_device *adev = smu->adev;
-   const char *chip_name;
char fw_name[30];
char ucode_prefix[30];
int err = 0;
@@ -100,16 +99,9 @@ int smu_v13_0_init_microcode(struct smu_context *smu)
if (amdgpu_sriov_vf(adev))
return 0;
 
-   switch (adev->ip_versions[MP1_HWIP][0]) {
-   case IP_VERSION(13, 0, 2):
-   chip_name = "aldebaran_smc";
-   break;
-   default:
-   amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
-   chip_name = ucode_prefix;
-   }
+   amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
 
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", chip_name);
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix);
 
err = request_firmware(&adev->pm.fw, fw_name, adev->dev);
if (err)
-- 
2.34.1



[PATCH v3 05/11] drm/amd: Request SDMA microcode during IP discovery

2022-12-29 Thread Mario Limonciello
If SDMA microcode is not available during early init, the firmware
framebuffer will have already been released and the screen will
freeze.

Move the request from SDMA microcode into the IP discovery phase
so that if it's not available, IP discovery will fail.

Signed-off-by: Mario Limonciello 
---
v2->v3:
 * Fix dGPU naming scheme
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 57 
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c  |  9 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h  |  2 +-
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c| 61 +
 drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c| 42 +---
 drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c| 65 +--
 drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c| 30 +
 7 files changed, 66 insertions(+), 200 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index b719852daa07..24d54ab0963a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -90,6 +90,40 @@ MODULE_FIRMWARE(FIRMWARE_IP_DISCOVERY);
 #define mmMM_INDEX_HI  0x6
 #define mmMM_DATA  0x1
 
+MODULE_FIRMWARE("amdgpu/navi10_sdma.bin");
+MODULE_FIRMWARE("amdgpu/navi10_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/navi14_sdma.bin");
+MODULE_FIRMWARE("amdgpu/navi14_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/navi12_sdma.bin");
+MODULE_FIRMWARE("amdgpu/navi12_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_sdma.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/vega10_sdma.bin");
+MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/vega12_sdma.bin");
+MODULE_FIRMWARE("amdgpu/vega12_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/vega20_sdma.bin");
+MODULE_FIRMWARE("amdgpu/vega20_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/raven_sdma.bin");
+MODULE_FIRMWARE("amdgpu/picasso_sdma.bin");
+MODULE_FIRMWARE("amdgpu/raven2_sdma.bin");
+MODULE_FIRMWARE("amdgpu/arcturus_sdma.bin");
+MODULE_FIRMWARE("amdgpu/renoir_sdma.bin");
+MODULE_FIRMWARE("amdgpu/green_sardine_sdma.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_sdma.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_sdma.bin");
+MODULE_FIRMWARE("amdgpu/navy_flounder_sdma.bin");
+MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_sdma.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_sdma.bin");
+MODULE_FIRMWARE("amdgpu/vangogh_sdma.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_sdma.bin");
+MODULE_FIRMWARE("amdgpu/sdma_5_2_6.bin");
+MODULE_FIRMWARE("amdgpu/sdma_5_2_7.bin");
+MODULE_FIRMWARE("amdgpu/sdma_6_0_0.bin");
+MODULE_FIRMWARE("amdgpu/sdma_6_0_1.bin");
+MODULE_FIRMWARE("amdgpu/sdma_6_0_2.bin");
+MODULE_FIRMWARE("amdgpu/sdma_6_0_3.bin");
+
 static const char *hw_id_names[HW_ID_MAX] = {
[MP1_HWID]  = "MP1",
[MP2_HWID]  = "MP2",
@@ -1821,8 +1855,26 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct 
amdgpu_device *adev)
return 0;
 }
 
+static int amdgpu_discovery_load_sdma_fw(struct amdgpu_device *adev, u32 
instance,
+const char *chip_name)
+{
+   char fw_name[40];
+
+   if (instance == 0)
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", chip_name);
+   else
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s1.bin", chip_name);
+
+   return request_firmware(&adev->sdma.instance[instance].fw, fw_name, 
adev->dev);
+}
+
 static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev)
 {
+   char ucode_prefix[30];
+   int i, r;
+
+   amdgpu_ucode_ip_version_decode(adev, SDMA0_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
+
switch (adev->ip_versions[SDMA0_HWIP][0]) {
case IP_VERSION(4, 0, 0):
case IP_VERSION(4, 0, 1):
@@ -1862,6 +1914,11 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct 
amdgpu_device *adev)
adev->ip_versions[SDMA0_HWIP][0]);
return -EINVAL;
}
+   for (i = 0; i < adev->sdma.num_instances; i++) {
+   r = amdgpu_discovery_load_sdma_fw(adev, i, ucode_prefix);
+   if (r)
+   return r;
+   }
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index ea5278f094c0..9e46d8034c03 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -205,8 +205,7 @@ void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device 
*adev,
 }
 
 int amdgpu_sdma_init_microcode(st

[PATCH v3 02/11] drm/amd: Add a legacy mapping to "amdgpu_ucode_ip_version_decode"

2022-12-29 Thread Mario Limonciello
This will allow other parts of the driver that currently special
case firmware file names to before IP version style naming to just
have a single call to `amdgpu_ucode_ip_version_decode`.

Signed-off-by: Mario Limonciello 
---
v2->v3:
 * Fixes for GFX9 SDMA
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 221 ++
 1 file changed, 221 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 5cb62e6249c2..eafcddce58d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -1059,12 +1059,233 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
return 0;
 }
 
+static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int 
block_type)
+{
+   if (block_type == MP0_HWIP) {
+   switch (adev->ip_versions[MP0_HWIP][0]) {
+   case IP_VERSION(9, 0, 0):
+   switch (adev->asic_type) {
+   case CHIP_VEGA10:
+   return "vega10";
+   case CHIP_VEGA12:
+   return "vega12";
+   default:
+   return NULL;
+   }
+   break;
+   case IP_VERSION(10, 0, 0):
+   case IP_VERSION(10, 0, 1):
+   if (adev->asic_type == CHIP_RAVEN) {
+   if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+   return "raven2";
+   else if (adev->apu_flags & AMD_APU_IS_PICASSO)
+   return "picasso";
+   return "raven";
+   }
+   break;
+   case IP_VERSION(11, 0, 0):
+   return "navi10";
+   case IP_VERSION(11, 0, 2):
+   return "vega20";
+   case IP_VERSION(11, 0, 4):
+   return "arcturus";
+   case IP_VERSION(11, 0, 5):
+   return "navi14";
+   case IP_VERSION(11, 0, 7):
+   return "sienna_cichlid";
+   case IP_VERSION(11, 0, 9):
+   return "navi12";
+   case IP_VERSION(11, 0, 11):
+   return "navy_flounder";
+   case IP_VERSION(11, 0, 12):
+   return "dimgrey_cavefish";
+   case IP_VERSION(11, 0, 13):
+   return "beige_goby";
+   case IP_VERSION(11, 5, 0):
+   return "vangogh";
+   case IP_VERSION(12, 0, 1):
+   if (adev->asic_type == CHIP_RENOIR) {
+   if (adev->apu_flags & AMD_APU_IS_RENOIR)
+   return "renoir";
+   return "green_sardine";
+   }
+   break;
+   case IP_VERSION(13, 0, 2):
+   return "aldebaran";
+   case IP_VERSION(13, 0, 1):
+   case IP_VERSION(13, 0, 3):
+   return "yellow_carp";
+   }
+   } else if (block_type == MP1_HWIP) {
+   switch (adev->ip_versions[MP1_HWIP][0]) {
+   case IP_VERSION(9, 0, 0):
+   case IP_VERSION(10, 0, 0):
+   case IP_VERSION(10, 0, 1):
+   case IP_VERSION(11, 0, 2):
+   if (adev->asic_type == CHIP_ARCTURUS)
+   return "arcturus_smc";
+   return NULL;
+   case IP_VERSION(11, 0, 0):
+   return "navi10_smc";
+   case IP_VERSION(11, 0, 5):
+   return "navi14_smc";
+   case IP_VERSION(11, 0, 9):
+   return "navi12_smc";
+   case IP_VERSION(11, 0, 7):
+   return "sienna_cichlid_smc";
+   case IP_VERSION(11, 0, 11):
+   return "navy_flounder_smc";
+   case IP_VERSION(11, 0, 12):
+   return "dimgrey_cavefish_smc";
+   case IP_VERSION(11, 0, 13):
+   return "beige_goby_smc";
+   case IP_VERSION(13, 0, 2):
+   return "aldebaran_smc";
+   }
+   } else if (block_type == SDMA0_HWIP) {
+   switch (adev->ip_versions[SDMA0_HWIP][0]) {
+   case IP_VERSION(4, 0, 0):
+   return "vega10_sdma&

[PATCH v3 01/11] drm/amd: Delay removal of the firmware framebuffer

2022-12-29 Thread Mario Limonciello
Removing the firmware framebuffer from the driver means that even
if the driver doesn't support the IP blocks in a GPU it will no
longer be functional after the driver fails to initialize.

This change will ensure that unsupported IP blocks at least cause
the driver to work with the EFI framebuffer.

Cc: sta...@vger.kernel.org
Suggested-by: Alex Deucher 
Reviewed-by: Javier Martinez Canillas 
Reviewed-by: Alex Deucher 
Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c| 6 --
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 9a1a5c2864a0..84d83be2087c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -37,6 +37,7 @@
 #include 
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -89,6 +90,8 @@ MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
 #define AMDGPU_MAX_RETRY_LIMIT 2
 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) 
== -EINVAL)
 
+static const struct drm_driver amdgpu_kms_driver;
+
 const char *amdgpu_asic_name[] = {
"TAHITI",
"PITCAIRN",
@@ -2140,6 +2143,11 @@ static int amdgpu_device_ip_early_init(struct 
amdgpu_device *adev)
break;
}
 
+   /* Get rid of things like offb */
+   r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, 
&amdgpu_kms_driver);
+   if (r)
+   return r;
+
if (amdgpu_has_atpx() &&
(amdgpu_is_atpx_hybrid() ||
 amdgpu_has_atpx_dgpu_power_cntl()) &&
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index db7e34eacc35..b9f14ec9edb2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -23,7 +23,6 @@
  */
 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -2096,11 +2095,6 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
}
 #endif
 
-   /* Get rid of things like offb */
-   ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, 
&amdgpu_kms_driver);
-   if (ret)
-   return ret;
-
adev = devm_drm_dev_alloc(&pdev->dev, &amdgpu_kms_driver, 
typeof(*adev), ddev);
if (IS_ERR(adev))
return PTR_ERR(adev);
-- 
2.34.1



[PATCH v3 00/11] Recover from failure to probe GPU

2022-12-29 Thread Mario Limonciello
One of the first thing that KMS drivers do during initialization is
destroy the system firmware framebuffer by means of
`drm_aperture_remove_conflicting_pci_framebuffers`

This means that if for any reason the GPU failed to probe the user
will be stuck with at best a screen frozen at the last thing that
was shown before the KMS driver continued it's probe.

The problem is most pronounced when new GPU support is introduced
because users will need to have a recent linux-firmware snapshot
on their system when they boot a kernel with matching support.

However the problem is further exaggerated in the case of amdgpu because
it has migrated to "IP discovery" where amdgpu will attempt to load
on "ALL" AMD GPUs even if the driver is missing support for IP blocks
contained in that GPU.

IP discovery requires some probing and isn't run until after the
framebuffer has been destroyed.

This means a situation can occur where a user purchases a new GPU not
yet supported by a distribution and when booting the installer it will
"freeze" even if the distribution doesn't have the matching kernel support
for those IP blocks.

The perfect example of this is Ubuntu 22.10 and the new dGPUs just
launched by AMD.  The installation media ships with kernel 5.19 (which
has IP discovery) but the amdgpu support for those IP blocks landed in
kernel 6.0. The matching linux-firmware was released after 22.10's launch.
The screen will freeze without nomodeset. Even if a user manages to install
and then upgrades to kernel 6.0 after install they'll still have the
problem of missing firmware, and the same experience.

This is quite jarring for users, particularly if they don't know
that they have to use "nomodeset" to install.

To help the situation make changes to GPU discovery:
1) Delay releasing the firmware framebuffer until after IP discovery has
completed.  This will help the situation of an older kernel that doesn't
yet support the IP blocks probing a new GPU.
2) Request loading all PSP, VCN, SDMA, MES and GC microcode into memory
during IP discovery. This will help the situation of new enough kernel for
the IP discovery phase to otherwise pass but missing microcode from
linux-firmware.git.

Not all requested firmware will be loaded during IP discovery as some of it
will require larger driver architecture changes. For example SMU firmware
isn't loaded on certain products, but that's not known until later on when
the early_init phase of the SMU load occurs.

v2->v3:
 * Rework patch 11 to not validate PSP microcode during discovery
 * Fix bugs with GFX9 due to gfx.num_gfx_rings not being set during discovery
 * Fix naming scheme for SDMA on dGPUs
 * Pick up tags for patches 1-10 for patches that didn't change from other
   comments.
v1->v2:
 * Take the suggestion from v1 thread to delay the framebuffer release until
   ip discovery is done. This patch is CC to stable to that older stable
   kernels with IP discovery won't try to probe unknown IP.
 * Drop changes to drm aperature.
 * Fetch SDMA, VCN, MES, GC and PSP microcode during IP discovery.

Mario Limonciello (11):
  drm/amd: Delay removal of the firmware framebuffer
  drm/amd: Add a legacy mapping to "amdgpu_ucode_ip_version_decode"
  drm/amd: Convert SMUv11 microcode init to use
`amdgpu_ucode_ip_version_decode`
  drm/amd: Convert SMUv13 to use `amdgpu_ucode_ip_version_decode`
  drm/amd: Request SDMA microcode during IP discovery
  drm/amd: Request VCN microcode during IP discovery
  drm/amd: Request MES microcode during IP discovery
  drm/amd: Request GFX9 microcode during IP discovery
  drm/amd: Request GFX10 microcode during IP discovery
  drm/amd: Request GFX11 microcode during IP discovery
  drm/amd: Request PSP microcode during IP discovery

 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c|   8 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 647 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |   6 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c   |  58 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h   |  12 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c  |   9 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h  |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 221 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c   |  85 +--
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c| 180 +
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c|  64 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 194 +-
 drivers/gpu/drm/amd/amdgpu/mes_v10_1.c|  28 -
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c|  25 +-
 drivers/gpu/drm/amd/amdgpu/psp_v10_0.c|  99 +--
 drivers/gpu/drm/amd/amdgpu/psp_v11_0.c| 191 ++
 drivers/gpu/drm/amd/amdgpu/psp_v12_0.c|  98 +--
 drivers/gpu/drm/amd/amdgpu/psp_v13_0.c|  46 +-
 drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c  |  18 +-
 drivers/gpu/drm/amd/amdgpu/psp_v3_1.c  

[PATCH v2 10/11] drm/amd: Request GFX11 microcode during IP discovery

2022-12-28 Thread Mario Limonciello
If GFX11 microcode is required but not available during early init, the
microcode framebuffer will have already been released and the screen will
freeze.

Move the request for GFX11 microcode into the IP discovery phase
so that if it's not available, IP discovery will fail.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 52 +++
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c| 64 +--
 2 files changed, 53 insertions(+), 63 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index d31559600cae..c8c538a768fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -316,6 +316,29 @@ MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec.bin");
 MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec2.bin");
 MODULE_FIRMWARE("amdgpu/gc_10_3_7_rlc.bin");
 
+/* gfx11 */
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin");
+
 static const char *hw_id_names[HW_ID_MAX] = {
[MP1_HWID]  = "MP1",
[MP2_HWID]  = "MP2",
@@ -2111,6 +2134,32 @@ static int amdgpu_discovery_load_gfx10(struct 
amdgpu_device *adev, char *ucode_p
r = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
if (r)
return r;
+   return 0;
+}
+
+static int amdgpu_discovery_load_gfx11(struct amdgpu_device *adev, char 
*ucode_prefix)
+{
+   char fw_name[40];
+   int r;
+
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", ucode_prefix);
+   r = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
+   if (r)
+   return r;
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", ucode_prefix);
+   r = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
+   if (r)
+   return r;
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", ucode_prefix);
+   r = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
+   if (r)
+   return r;
+   if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", 
ucode_prefix);
+   r = request_firmware(&adev->psp.toc_fw, fw_name, adev->dev);
+   if (r)
+   return r;
+   }
 
return 0;
 }
@@ -2159,6 +2208,9 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct 
amdgpu_device *adev)
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
case IP_VERSION(11, 0, 4):
+   r = amdgpu_discovery_load_gfx11(adev, ucode_prefix);
+   if (r)
+   return r;
amdgpu_device_ip_block_add(adev, &gfx_v11_0_ip_block);
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index a56c6e106d00..576fa591c6da 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -60,27 +60,6 @@
 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1  0x4e7e
 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX 1
 
-MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
-MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
-MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
-MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
-MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
-MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
-MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin");
-MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin");
-MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin");
-MODULE_FIRMWARE("amdgpu

[PATCH v2 06/11] drm/amd: Request VCN microcode during IP discovery

2022-12-28 Thread Mario Limonciello
If VCN microcode is not available during early init, the microcode
framebuffer will have already been released and the screen will
freeze.

Move the request for VCN microcode into the IP discovery phase
so that if it's not available, IP discovery will fail.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 41 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c   | 85 +--
 2 files changed, 41 insertions(+), 85 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index f51ff86293b3..1c26a3a60394 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -124,6 +124,27 @@ MODULE_FIRMWARE("amdgpu/sdma_6_0_1.bin");
 MODULE_FIRMWARE("amdgpu/sdma_6_0_2.bin");
 MODULE_FIRMWARE("amdgpu/sdma_6_0_3.bin");
 
+MODULE_FIRMWARE("amdgpu/raven_vcn.bin");
+MODULE_FIRMWARE("amdgpu/picasso_vcn.bin");
+MODULE_FIRMWARE("amdgpu/raven2_vcn.bin");
+MODULE_FIRMWARE("amdgpu/arcturus_vcn.bin");
+MODULE_FIRMWARE("amdgpu/renoir_vcn.bin");
+MODULE_FIRMWARE("amdgpu/green_sardine_vcn.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_vcn.bin");
+MODULE_FIRMWARE("amdgpu/navi10_vcn.bin");
+MODULE_FIRMWARE("amdgpu/navi14_vcn.bin");
+MODULE_FIRMWARE("amdgpu/navi12_vcn.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_vcn.bin");
+MODULE_FIRMWARE("amdgpu/navy_flounder_vcn.bin");
+MODULE_FIRMWARE("amdgpu/vangogh_vcn.bin");
+MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_vcn.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_vcn.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_vcn.bin");
+MODULE_FIRMWARE("amdgpu/vcn_3_1_2.bin");
+MODULE_FIRMWARE("amdgpu/vcn_4_0_0.bin");
+MODULE_FIRMWARE("amdgpu/vcn_4_0_2.bin");
+MODULE_FIRMWARE("amdgpu/vcn_4_0_4.bin");
+
 static const char *hw_id_names[HW_ID_MAX] = {
[MP1_HWID]  = "MP1",
[MP2_HWID]  = "MP2",
@@ -1922,8 +1943,23 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct 
amdgpu_device *adev)
return 0;
 }
 
+static int amdgpu_discovery_load_vcn_fw(struct amdgpu_device *adev,
+   char *fname)
+{
+   char fw_name[40];
+
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", fname);
+
+   return request_firmware(&adev->vcn.fw, fw_name, adev->dev);
+}
+
 static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
 {
+   char ucode_prefix[30];
+   int r = 0;
+
+   amdgpu_ucode_ip_version_decode(adev, UVD_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
+
if (adev->ip_versions[VCE_HWIP][0]) {
switch (adev->ip_versions[UVD_HWIP][0]) {
case IP_VERSION(7, 0, 0):
@@ -2001,7 +2037,10 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct 
amdgpu_device *adev)
return -EINVAL;
}
}
-   return 0;
+   if (*ucode_prefix)
+   r = amdgpu_discovery_load_vcn_fw(adev, ucode_prefix);
+   return r;
+}
 }
 
 static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index a23e26b272b4..370c9644a3b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -35,55 +35,11 @@
 #include "amdgpu_vcn.h"
 #include "soc15d.h"
 
-/* Firmware Names */
-#define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin"
-#define FIRMWARE_PICASSO   "amdgpu/picasso_vcn.bin"
-#define FIRMWARE_RAVEN2"amdgpu/raven2_vcn.bin"
-#define FIRMWARE_ARCTURUS  "amdgpu/arcturus_vcn.bin"
-#define FIRMWARE_RENOIR"amdgpu/renoir_vcn.bin"
-#define FIRMWARE_GREEN_SARDINE "amdgpu/green_sardine_vcn.bin"
-#define FIRMWARE_NAVI10"amdgpu/navi10_vcn.bin"
-#define FIRMWARE_NAVI14"amdgpu/navi14_vcn.bin"
-#define FIRMWARE_NAVI12"amdgpu/navi12_vcn.bin"
-#define FIRMWARE_SIENNA_CICHLID"amdgpu/sienna_cichlid_vcn.bin"
-#define FIRMWARE_NAVY_FLOUNDER "amdgpu/navy_flounder_vcn.bin"
-#define FIRMWARE_VANGOGH   "amdgpu/vangogh_vcn.bin"
-#define FIRMWARE_DIMGREY_CAVEFISH  "amdgpu/dimgrey_cavefish_vcn.bin"
-#define FIRMWARE_ALDEBARAN "amdgpu/aldebaran_vcn.bin"
-#define FIRMWARE_BEIGE_GOBY"amdgpu/beige_goby_vcn.bin"
-#define FIRMWARE_YELLOW_CARP   "amdgpu/yellow_carp_vcn.bin"
-#define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2.bin"
-#define FIRMWARE_VCN4_0_0  "amdgpu/vcn_4_0_0.

[PATCH v2 11/11] drm/amd: Request PSP microcode during IP discovery

2022-12-28 Thread Mario Limonciello
If PSP microcode is required but not available during early init, the
firmware framebuffer will have already been released and the screen will
freeze.

Move the request for PSP microcode into the IP discovery phase
so that if it's not available, IP discovery will fail.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 120 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c   |   2 -
 drivers/gpu/drm/amd/amdgpu/psp_v10_0.c| 106 +++
 drivers/gpu/drm/amd/amdgpu/psp_v11_0.c| 165 --
 drivers/gpu/drm/amd/amdgpu/psp_v12_0.c| 102 +++
 drivers/gpu/drm/amd/amdgpu/psp_v13_0.c|  82 -
 drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c  |  36 
 drivers/gpu/drm/amd/amdgpu/psp_v3_1.c |  36 
 8 files changed, 202 insertions(+), 447 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index c8c538a768fe..6199ab078bc7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -158,6 +158,40 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes1.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes1.bin");
 
+MODULE_FIRMWARE("amdgpu/aldebaran_sos.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_ta.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_cap.bin");
+MODULE_FIRMWARE("amdgpu/green_sardine_asd.bin");
+MODULE_FIRMWARE("amdgpu/green_sardine_ta.bin");
+MODULE_FIRMWARE("amdgpu/raven_asd.bin");
+MODULE_FIRMWARE("amdgpu/picasso_asd.bin");
+MODULE_FIRMWARE("amdgpu/raven2_asd.bin");
+MODULE_FIRMWARE("amdgpu/picasso_ta.bin");
+MODULE_FIRMWARE("amdgpu/raven2_ta.bin");
+MODULE_FIRMWARE("amdgpu/raven_ta.bin");
+MODULE_FIRMWARE("amdgpu/renoir_asd.bin");
+MODULE_FIRMWARE("amdgpu/renoir_ta.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_toc.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_ta.bin");
+MODULE_FIRMWARE("amdgpu/vega10_sos.bin");
+MODULE_FIRMWARE("amdgpu/vega10_asd.bin");
+MODULE_FIRMWARE("amdgpu/vega10_cap.bin");
+MODULE_FIRMWARE("amdgpu/vega12_sos.bin");
+MODULE_FIRMWARE("amdgpu/vega12_asd.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_5_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_5_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_8_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_8_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_7_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_10_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_10_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_4_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_4_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_11_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_11_ta.bin");
 
 /* gfx9 */
 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
@@ -1858,12 +1892,30 @@ static int amdgpu_discovery_set_ih_ip_blocks(struct 
amdgpu_device *adev)
 
 static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev)
 {
+   char ucode_prefix[30];
+   int r;
+
+   amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
+   adev->psp.adev = adev;
+
switch (adev->ip_versions[MP0_HWIP][0]) {
case IP_VERSION(9, 0, 0):
+   r = psp_init_sos_microcode(&adev->psp, ucode_prefix);
+   if (r)
+   return r;
+   r = psp_init_asd_microcode(&adev->psp, ucode_prefix);
+   if (r)
+   return r;
amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
break;
case IP_VERSION(10, 0, 0):
case IP_VERSION(10, 0, 1):
+   r = psp_init_asd_microcode(&adev->psp, ucode_prefix);
+   if (r)
+   return r;
+   r = psp_init_ta_microcode(&adev->psp, ucode_prefix);
+   if (r)
+   return r;
amdgpu_device_ip_block_add(adev, &psp_v10_0_ip_block);
break;
case IP_VERSION(11, 0, 0):
@@ -1871,11 +1923,34 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct 
amdgpu_device *adev)
case IP_VERSION(11, 0, 4):
case IP_VERSION(11, 0, 5):
case IP_VERSION(11, 0, 9):
+   r = psp_init_sos_microcode(&adev->psp, ucode_prefix);
+   if (r)
+   return r;
+   r = psp_init_asd_microcode(&adev->psp, ucode_prefix);
+   if (r)
+ 

[PATCH v2 08/11] drm/amd: Request GFX9 microcode during IP discovery

2022-12-28 Thread Mario Limonciello
If GFX9 microcode is required but not available during early init, the
microcode framebuffer will have already been released and the screen will
freeze.

Move the request for GFX9 microcode into the IP discovery phase
so that if it's not available, IP discovery will fail.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 144 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 143 +
 2 files changed, 152 insertions(+), 135 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 479266ed2b7f..0da16abd6b24 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -158,6 +158,68 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes1.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes1.bin");
 
+
+/* gfx9 */
+MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
+MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
+MODULE_FIRMWARE("amdgpu/vega10_me.bin");
+MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
+MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
+MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
+MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
+MODULE_FIRMWARE("amdgpu/vega12_me.bin");
+MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
+MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
+MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
+MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
+MODULE_FIRMWARE("amdgpu/vega20_me.bin");
+MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
+MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
+MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/raven_ce.bin");
+MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
+MODULE_FIRMWARE("amdgpu/raven_me.bin");
+MODULE_FIRMWARE("amdgpu/raven_mec.bin");
+MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
+MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
+MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
+MODULE_FIRMWARE("amdgpu/picasso_me.bin");
+MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
+MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
+MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
+MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
+
+MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
+MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
+MODULE_FIRMWARE("amdgpu/raven2_me.bin");
+MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
+MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
+MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
+MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
+MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
+MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
+MODULE_FIRMWARE("amdgpu/renoir_me.bin");
+MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
+MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
+MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
+MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
+MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
+MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
+MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
+
 static const char *hw_id_names[HW_ID_MAX] = {
[MP1_HWID]  = "MP1",
[MP2_HWID]  = "MP2",
@@ -1845,8 +1907,87 @@ static int amdgpu_discovery_set_display_ip_blocks(struct 
amdgpu_device *adev)
return 0;
 }
 
+static int amdgpu_discovery_load_gfx9(struct amdgpu_device *adev, char 
*ucode_prefix)
+{
+   uint32_t smu_version;
+   char fw_name[40];
+   int r;
+
+   /* No CPG in Arcturus */
+   if (adev->gfx.num_gfx_rings) {
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", 
ucode_prefix);
+   r = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
+   if (r)
+   return r;
+
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", 
ucode_prefix);
+   r = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
+   if (r)
+   return r;
+
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", 
ucode_prefix);
+   r = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
+   if (r)
+   return r;
+   

[PATCH v2 09/11] drm/amd: Request GFX10 microcode during IP discovery

2022-12-28 Thread Mario Limonciello
If GFX10 microcode is required but not available during early init, the
microcode framebuffer will have already been released and the screen will
freeze.

Move the request for GFX10 microcode into the IP discovery phase
so that if it's not available, IP discovery will fail.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 137 +
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c| 180 +-
 2 files changed, 144 insertions(+), 173 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 0da16abd6b24..d31559600cae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -220,6 +220,102 @@ MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
 
+/* gfx10 */
+MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin");
+
+MODULE_FIRMWARE("amdgpu/navi10_ce.bin");
+MODULE_FIRMWARE("amdgpu/navi10_pfp.bin");
+MODULE_FIRMWARE("amdgpu/navi10_me.bin");
+MODULE_FIRMWARE("amdgpu/navi10_mec.bin");
+MODULE_FIRMWARE("amdgpu/navi10_mec2.bin");
+MODULE_FIRMWARE("amdgpu/navi10_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/navi14_ce_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_pfp_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_me_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_mec_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_mec2_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_ce.bin");
+MODULE_FIRMWARE("amdgpu/navi14_pfp.bin");
+MODULE_FIRMWARE("amdgpu/navi14_me.bin");
+MODULE_FIRMWARE("amdgpu/navi14_mec.bin");
+MODULE_FIRMWARE("amdgpu/navi14_mec2.bin");
+MODULE_FIRMWARE("amdgpu/navi14_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/navi12_ce.bin");
+MODULE_FIRMWARE("amdgpu/navi12_pfp.bin");
+MODULE_FIRMWARE("amdgpu/navi12_me.bin");
+MODULE_FIRMWARE("amdgpu/navi12_mec.bin");
+MODULE_FIRMWARE("amdgpu/navi12_mec2.bin");
+MODULE_FIRMWARE("amdgpu/navi12_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_ce.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_pfp.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_me.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_mec.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_mec2.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/navy_flounder_ce.bin");
+MODULE_FIRMWARE("amdgpu/navy_flounder_pfp.bin");
+MODULE_FIRMWARE("amdgpu/navy_flounder_me.bin");
+MODULE_FIRMWARE("amdgpu/navy_flounder_mec.bin");
+MODULE_FIRMWARE("amdgpu/navy_flounder_mec2.bin");
+MODULE_FIRMWARE("amdgpu/navy_flounder_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/vangogh_ce.bin");
+MODULE_FIRMWARE("amdgpu/vangogh_pfp.bin");
+MODULE_FIRMWARE("amdgpu/vangogh_me.bin");
+MODULE_FIRMWARE("amdgpu/vangogh_mec.bin");
+MODULE_FIRMWARE("amdgpu/vangogh_mec2.bin");
+MODULE_FIRMWARE("amdgpu/vangogh_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_ce.bin");
+MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_pfp.bin");
+MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_me.bin");
+MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_mec.bin");
+MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_mec2.bin");
+MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/beige_goby_ce.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_pfp.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_me.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_mec.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_mec2.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/yellow_carp_ce.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_pfp.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_me.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_mec.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_mec2.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_ce.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_pfp.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_me.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_mec.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_mec2.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2

[PATCH v2 07/11] drm/amd: Request MES microcode during IP discovery

2022-12-28 Thread Mario Limonciello
If MES microcode is required but not available during early init, the
microcode framebuffer will have already been released and the screen will
freeze.

Move the request for MES microcode into the IP discovery phase
so that if it's not available, IP discovery will fail.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 39 +++
 drivers/gpu/drm/amd/amdgpu/mes_v10_1.c| 28 -
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c| 25 +---
 3 files changed, 40 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 1c26a3a60394..479266ed2b7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -145,6 +145,19 @@ MODULE_FIRMWARE("amdgpu/vcn_4_0_0.bin");
 MODULE_FIRMWARE("amdgpu/vcn_4_0_2.bin");
 MODULE_FIRMWARE("amdgpu/vcn_4_0_4.bin");
 
+MODULE_FIRMWARE("amdgpu/navi10_mes.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_mes.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_mes1.bin");
+
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes1.bin");
+
 static const char *hw_id_names[HW_ID_MAX] = {
[MP1_HWID]  = "MP1",
[MP2_HWID]  = "MP2",
@@ -2041,10 +2054,29 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct 
amdgpu_device *adev)
r = amdgpu_discovery_load_vcn_fw(adev, ucode_prefix);
return r;
 }
+
+static int amdgpu_discovery_load_mes_fw(struct amdgpu_device *adev,
+   enum admgpu_mes_pipe pipe,
+   const char *ucode_prefix)
+{
+   char fw_name[40];
+
+   if (pipe == AMDGPU_MES_SCHED_PIPE)
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes.bin",
+ucode_prefix);
+   else
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes1.bin",
+ucode_prefix);
+
+   return request_firmware(&adev->mes.fw[pipe], fw_name, adev->dev);
 }
 
 static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev)
 {
+   char ucode_prefix[30];
+   int pipe, r;
+   amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
+
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 1):
@@ -2077,6 +2109,13 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct 
amdgpu_device *adev)
default:
break;
}
+   if (adev->enable_mes) {
+   for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+   r = amdgpu_discovery_load_mes_fw(adev, pipe, 
ucode_prefix);
+   if (r)
+   return r;
+   }
+   }
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
index 614394118a53..9faa9867b3c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
@@ -37,10 +37,6 @@
 #define mmRLC_CP_SCHEDULERS_Sienna_Cichlid 0x4ca1
 #define mmRLC_CP_SCHEDULERS_Sienna_Cichlid_BASE_IDX1
 
-MODULE_FIRMWARE("amdgpu/navi10_mes.bin");
-MODULE_FIRMWARE("amdgpu/sienna_cichlid_mes.bin");
-MODULE_FIRMWARE("amdgpu/sienna_cichlid_mes1.bin");
-
 static int mes_v10_1_hw_fini(void *handle);
 static int mes_v10_1_kiq_hw_init(struct amdgpu_device *adev);
 
@@ -382,34 +378,10 @@ static const struct amdgpu_mes_funcs mes_v10_1_funcs = {
 static int mes_v10_1_init_microcode(struct amdgpu_device *adev,
enum admgpu_mes_pipe pipe)
 {
-   const char *chip_name;
-   char fw_name[30];
int err;
const struct mes_firmware_header_v1_0 *mes_hdr;
struct amdgpu_firmware_info *info;
 
-   switch (adev->ip_versions[GC_HWIP][0]) {
-   case IP_VERSION(10, 1, 10):
-   chip_name = "navi10";
-   break;
-   case IP_VERSION(10, 3, 0):
-   chip_name = "sienna_cichlid";
-   break;
-   default:
-   BUG();
-   }
-
-   if (pipe == AMDGPU_MES_SCHED_PIPE)
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes.bin",
-chip_name);
-   else
-   snprintf(fw_name, siz

[PATCH v2 04/11] drm/amd: Convert SMU v13 to use `amdgpu_ucode_ip_version_decode`

2022-12-28 Thread Mario Limonciello
The special case for the one dGPU has been moved into
`amdgpu_ucode_ip_version_decode`, so simplify this code.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 12 ++--
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index 0ac9cac805f9..506a49a4b425 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -88,7 +88,6 @@ static const int link_speed[] = {25, 50, 80, 160};
 int smu_v13_0_init_microcode(struct smu_context *smu)
 {
struct amdgpu_device *adev = smu->adev;
-   const char *chip_name;
char fw_name[30];
char ucode_prefix[30];
int err = 0;
@@ -100,16 +99,9 @@ int smu_v13_0_init_microcode(struct smu_context *smu)
if (amdgpu_sriov_vf(adev))
return 0;
 
-   switch (adev->ip_versions[MP1_HWIP][0]) {
-   case IP_VERSION(13, 0, 2):
-   chip_name = "aldebaran_smc";
-   break;
-   default:
-   amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
-   chip_name = ucode_prefix;
-   }
+   amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
 
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", chip_name);
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix);
 
err = request_firmware(&adev->pm.fw, fw_name, adev->dev);
if (err)
-- 
2.34.1



[PATCH v2 05/11] drm/amd: Request SDMA microcode during IP discovery

2022-12-28 Thread Mario Limonciello
If SDMA microcode is not available during early init, the microcode
framebuffer will have already been released and the screen will
freeze.

Move the request from SDMA microcode into the IP discovery phase
so that if it's not available, IP discovery will fail.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 57 
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c  |  9 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h  |  2 +-
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c| 61 +
 drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c| 42 +---
 drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c| 65 +--
 drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c| 30 +
 7 files changed, 66 insertions(+), 200 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index b719852daa07..f51ff86293b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -90,6 +90,40 @@ MODULE_FIRMWARE(FIRMWARE_IP_DISCOVERY);
 #define mmMM_INDEX_HI  0x6
 #define mmMM_DATA  0x1
 
+MODULE_FIRMWARE("amdgpu/navi10_sdma.bin");
+MODULE_FIRMWARE("amdgpu/navi10_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/navi14_sdma.bin");
+MODULE_FIRMWARE("amdgpu/navi14_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/navi12_sdma.bin");
+MODULE_FIRMWARE("amdgpu/navi12_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_sdma.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/vega10_sdma.bin");
+MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/vega12_sdma.bin");
+MODULE_FIRMWARE("amdgpu/vega12_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/vega20_sdma.bin");
+MODULE_FIRMWARE("amdgpu/vega20_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/raven_sdma.bin");
+MODULE_FIRMWARE("amdgpu/picasso_sdma.bin");
+MODULE_FIRMWARE("amdgpu/raven2_sdma.bin");
+MODULE_FIRMWARE("amdgpu/arcturus_sdma.bin");
+MODULE_FIRMWARE("amdgpu/renoir_sdma.bin");
+MODULE_FIRMWARE("amdgpu/green_sardine_sdma.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_sdma.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_sdma.bin");
+MODULE_FIRMWARE("amdgpu/navy_flounder_sdma.bin");
+MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_sdma.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_sdma.bin");
+MODULE_FIRMWARE("amdgpu/vangogh_sdma.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_sdma.bin");
+MODULE_FIRMWARE("amdgpu/sdma_5_2_6.bin");
+MODULE_FIRMWARE("amdgpu/sdma_5_2_7.bin");
+MODULE_FIRMWARE("amdgpu/sdma_6_0_0.bin");
+MODULE_FIRMWARE("amdgpu/sdma_6_0_1.bin");
+MODULE_FIRMWARE("amdgpu/sdma_6_0_2.bin");
+MODULE_FIRMWARE("amdgpu/sdma_6_0_3.bin");
+
 static const char *hw_id_names[HW_ID_MAX] = {
[MP1_HWID]  = "MP1",
[MP2_HWID]  = "MP2",
@@ -1821,8 +1855,26 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct 
amdgpu_device *adev)
return 0;
 }
 
+static int amdgpu_discovery_load_sdma_fw(struct amdgpu_device *adev, u32 
instance,
+const char *chip_name)
+{
+   char fw_name[40];
+
+   if (instance == 0)
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", chip_name);
+   else
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", 
chip_name);
+
+   return request_firmware(&adev->sdma.instance[instance].fw, fw_name, 
adev->dev);
+}
+
 static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev)
 {
+   char ucode_prefix[30];
+   int i, r;
+
+   amdgpu_ucode_ip_version_decode(adev, SDMA0_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
+
switch (adev->ip_versions[SDMA0_HWIP][0]) {
case IP_VERSION(4, 0, 0):
case IP_VERSION(4, 0, 1):
@@ -1862,6 +1914,11 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct 
amdgpu_device *adev)
adev->ip_versions[SDMA0_HWIP][0]);
return -EINVAL;
}
+   for (i = 0; i < adev->sdma.num_instances; i++) {
+   r = amdgpu_discovery_load_sdma_fw(adev, i, ucode_prefix);
+   if (r)
+   return r;
+   }
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index ea5278f094c0..9e46d8034c03 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -205,8 +205,7 @@ void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device 
*adev,
 }
 
 int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
- 

[PATCH v2 02/11] drm/amd: Add a legacy mapping to "amdgpu_ucode_ip_version_decode"

2022-12-28 Thread Mario Limonciello
This will allow other parts of the driver that currently special
case firmware file names to before IP version style naming to just
have a single call to `amdgpu_ucode_ip_version_decode`.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 208 ++
 1 file changed, 208 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 5cb62e6249c2..5392c1fe434b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -1059,12 +1059,220 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
return 0;
 }
 
+static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int 
block_type)
+{
+   if (block_type == MP0_HWIP) {
+   switch (adev->ip_versions[MP0_HWIP][0]) {
+   case IP_VERSION(9, 0, 0):
+   switch (adev->asic_type) {
+   case CHIP_VEGA10:
+   return "vega10";
+   case CHIP_VEGA12:
+   return "vega12";
+   default:
+   return NULL;
+   }
+   break;
+   case IP_VERSION(10, 0, 0):
+   case IP_VERSION(10, 0, 1):
+   if (adev->asic_type == CHIP_RAVEN) {
+   if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+   return "raven2";
+   else if (adev->apu_flags & AMD_APU_IS_PICASSO)
+   return "picasso";
+   else
+   return "raven";
+   }
+   break;
+   case IP_VERSION(11, 0, 0):
+   return "navi10";
+   case IP_VERSION(11, 0, 2):
+   return "vega20";
+   case IP_VERSION(11, 0, 4):
+   return "arcturus";
+   case IP_VERSION(11, 0, 5):
+   return "navi14";
+   case IP_VERSION(11, 0, 7):
+   return "sienna_cichlid";
+   case IP_VERSION(11, 0, 9):
+   return "navi12";
+   case IP_VERSION(11, 0, 11):
+   return "navy_flounder";
+   case IP_VERSION(11, 0, 12):
+   return "dimgrey_cavefish";
+   case IP_VERSION(11, 0, 13):
+   return "beige_goby";
+   case IP_VERSION(11, 5, 0):
+   return "vangogh";
+   case IP_VERSION(12, 0, 1):
+   if (adev->asic_type == CHIP_RENOIR) {
+   if (adev->apu_flags & AMD_APU_IS_RENOIR)
+   return "renoir";
+   else
+   return "green_sardine";
+   }
+   break;
+   case IP_VERSION(13, 0, 2):
+   return "aldebaran";
+   case IP_VERSION(13, 0, 1):
+   case IP_VERSION(13, 0, 3):
+   return "yellow_carp";
+   }
+   } else if (block_type == MP1_HWIP) {
+   switch (adev->ip_versions[MP1_HWIP][0]) {
+   case IP_VERSION(9, 0, 0):
+   case IP_VERSION(10, 0, 0):
+   case IP_VERSION(10, 0, 1):
+   case IP_VERSION(11, 0, 2):
+   if (adev->asic_type == CHIP_ARCTURUS)
+   return "arcturus_smc";
+   return NULL;
+   case IP_VERSION(11, 0, 0):
+   return "navi10_smc";
+   case IP_VERSION(11, 0, 5):
+   return "navi14_smc";
+   case IP_VERSION(11, 0, 9):
+   return "navi12_smc";
+   case IP_VERSION(11, 0, 7):
+   return "sienna_cichlid_smc";
+   case IP_VERSION(11, 0, 11):
+   return "navy_flounder_smc";
+   case IP_VERSION(11, 0, 12):
+   return "dimgrey_cavefish_smc";
+   case IP_VERSION(11, 0, 13):
+   return "beige_goby_smc";
+   case IP_VERSION(13, 0, 2):
+   return "aldebaran_smc";
+   }
+   } else if (block_type == SDMA0_HWIP) {
+   switch (adev->ip_versions[SDMA0_HWIP][0]) {
+   case IP_VERSION(4, 0, 0

[PATCH v2 00/11] Recover from failure to probe GPU

2022-12-28 Thread Mario Limonciello
One of the first thing that KMS drivers do during initialization is
destroy the system firmware framebuffer by means of
`drm_aperture_remove_conflicting_pci_framebuffers`

This means that if for any reason the GPU failed to probe the user
will be stuck with at best a screen frozen at the last thing that
was shown before the KMS driver continued it's probe.

The problem is most pronounced when new GPU support is introduced
because users will need to have a recent linux-firmware snapshot
on their system when they boot a kernel with matching support.

However the problem is further exaggerated in the case of amdgpu because
it has migrated to "IP discovery" where amdgpu will attempt to load
on "ALL" AMD GPUs even if the driver is missing support for IP blocks
contained in that GPU.

IP discovery requires some probing and isn't run until after the
framebuffer has been destroyed.

This means a situation can occur where a user purchases a new GPU not
yet supported by a distribution and when booting the installer it will
"freeze" even if the distribution doesn't have the matching kernel support
for those IP blocks.

The perfect example of this is Ubuntu 22.10 and the new dGPUs just
launched by AMD.  The installation media ships with kernel 5.19 (which
has IP discovery) but the amdgpu support for those IP blocks landed in
kernel 6.0. The matching linux-firmware was released after 22.10's launch.
The screen will freeze without nomodeset. Even if a user manages to install
and then upgrades to kernel 6.0 after install they'll still have the
problem of missing firmware, and the same experience.

This is quite jarring for users, particularly if they don't know
that they have to use "nomodeset" to install.

To help the situation make changes to GPU discovery:
1) Delay releasing the firmware framebuffer until after IP discovery has
completed.  This will help the situation of an older kernel that doesn't
yet support the IP blocks probing a new GPU.
2) Request loading all PSP, VCN, SDMA, MES and GC microcode into memory
during IP discovery. This will help the situation of new enough kernel for
the IP discovery phase to otherwise pass but missing microcode from
linux-firmware.git.

Not all requested firmware will be loaded during IP discovery as some of it
will require larger driver architecture changes. For example SMU firmware
isn't loaded on certain products, but that's not known until later on when
the early_init phase of the SMU load occurs.

v1->v2:
 * Take the suggestion from v1 thread to delay the framebuffer release until
   ip discovery is done. This patch is CC to stable to that older stable
   kernels with IP discovery won't try to probe unknown IP.
 * Drop changes to drm aperature.
 * Fetch SDMA, VCN, MES, GC and PSP microcode during IP discovery.

Mario Limonciello (11):
  drm/amd: Delay removal of the firmware framebuffer
  drm/amd: Add a legacy mapping to "amdgpu_ucode_ip_version_decode"
  drm/amd: Convert SMUv11 microcode init to use
`amdgpu_ucode_ip_version_decode`
  drm/amd: Convert SMU v13 to use `amdgpu_ucode_ip_version_decode`
  drm/amd: Request SDMA microcode during IP discovery
  drm/amd: Request VCN microcode during IP discovery
  drm/amd: Request MES microcode during IP discovery
  drm/amd: Request GFX9 microcode during IP discovery
  drm/amd: Request GFX10 microcode during IP discovery
  drm/amd: Request GFX11 microcode during IP discovery
  drm/amd: Request PSP microcode during IP discovery

 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c|   8 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 590 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |   6 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c   |   2 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c  |   9 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h  |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 208 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c   |  85 +--
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c| 180 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c|  64 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 143 +
 drivers/gpu/drm/amd/amdgpu/mes_v10_1.c|  28 -
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c|  25 +-
 drivers/gpu/drm/amd/amdgpu/psp_v10_0.c| 106 +---
 drivers/gpu/drm/amd/amdgpu/psp_v11_0.c| 165 +
 drivers/gpu/drm/amd/amdgpu/psp_v12_0.c| 102 +--
 drivers/gpu/drm/amd/amdgpu/psp_v13_0.c|  82 ---
 drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c  |  36 --
 drivers/gpu/drm/amd/amdgpu/psp_v3_1.c |  36 --
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c|  61 +-
 drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c|  42 +-
 drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c|  65 +-
 drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c|  30 +-
 .../gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c|  35 +-
 .../gpu/drm/amd/pm/sw

[PATCH v2 03/11] drm/amd: Convert SMUv11 microcode init to use `amdgpu_ucode_ip_version_decode`

2022-12-28 Thread Mario Limonciello
Remove the special casing from SMU v11 code. No intended functional
changes.

Signed-off-by: Mario Limonciello 
---
 .../gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c| 35 ++-
 1 file changed, 3 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
index ad66d57aa102..d4756bd30830 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
@@ -93,7 +93,7 @@ static void smu_v11_0_poll_baco_exit(struct smu_context *smu)
 int smu_v11_0_init_microcode(struct smu_context *smu)
 {
struct amdgpu_device *adev = smu->adev;
-   const char *chip_name;
+   char ucode_prefix[30];
char fw_name[SMU_FW_NAME_LEN];
int err = 0;
const struct smc_firmware_header_v1_0 *hdr;
@@ -105,38 +105,9 @@ int smu_v11_0_init_microcode(struct smu_context *smu)
 (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7
return 0;
 
-   switch (adev->ip_versions[MP1_HWIP][0]) {
-   case IP_VERSION(11, 0, 0):
-   chip_name = "navi10";
-   break;
-   case IP_VERSION(11, 0, 5):
-   chip_name = "navi14";
-   break;
-   case IP_VERSION(11, 0, 9):
-   chip_name = "navi12";
-   break;
-   case IP_VERSION(11, 0, 7):
-   chip_name = "sienna_cichlid";
-   break;
-   case IP_VERSION(11, 0, 11):
-   chip_name = "navy_flounder";
-   break;
-   case IP_VERSION(11, 0, 12):
-   chip_name = "dimgrey_cavefish";
-   break;
-   case IP_VERSION(11, 0, 13):
-   chip_name = "beige_goby";
-   break;
-   case IP_VERSION(11, 0, 2):
-   chip_name = "arcturus";
-   break;
-   default:
-   dev_err(adev->dev, "Unsupported IP version 0x%x\n",
-   adev->ip_versions[MP1_HWIP][0]);
-   return -EINVAL;
-   }
+   amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
 
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_smc.bin", chip_name);
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix);
 
err = request_firmware(&adev->pm.fw, fw_name, adev->dev);
if (err)
-- 
2.34.1



[PATCH v2 01/11] drm/amd: Delay removal of the firmware framebuffer

2022-12-28 Thread Mario Limonciello
Removing the firmware framebuffer from the driver means that even
if the driver doesn't support the IP blocks in a GPU it will no
longer be functional after the driver fails to initialize.

This change will ensure that unsupported IP blocks at least cause
the driver to work with the EFI framebuffer.

Cc: sta...@vger.kernel.org
Suggested-by: Alex Deucher 
Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c| 6 --
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 9a1a5c2864a0..84d83be2087c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -37,6 +37,7 @@
 #include 
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -89,6 +90,8 @@ MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
 #define AMDGPU_MAX_RETRY_LIMIT 2
 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) 
== -EINVAL)
 
+static const struct drm_driver amdgpu_kms_driver;
+
 const char *amdgpu_asic_name[] = {
"TAHITI",
"PITCAIRN",
@@ -2140,6 +2143,11 @@ static int amdgpu_device_ip_early_init(struct 
amdgpu_device *adev)
break;
}
 
+   /* Get rid of things like offb */
+   r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, 
&amdgpu_kms_driver);
+   if (r)
+   return r;
+
if (amdgpu_has_atpx() &&
(amdgpu_is_atpx_hybrid() ||
 amdgpu_has_atpx_dgpu_power_cntl()) &&
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index db7e34eacc35..b9f14ec9edb2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -23,7 +23,6 @@
  */
 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -2096,11 +2095,6 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
}
 #endif
 
-   /* Get rid of things like offb */
-   ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, 
&amdgpu_kms_driver);
-   if (ret)
-   return ret;
-
adev = devm_drm_dev_alloc(&pdev->dev, &amdgpu_kms_driver, 
typeof(*adev), ddev);
if (IS_ERR(adev))
return PTR_ERR(adev);
-- 
2.34.1



Re: [PATCH 0/2] Recover from failure to probe GPU

2022-12-23 Thread Mario Limonciello

On 12/22/22 13:41, Javier Martinez Canillas wrote:

[adding Thomas Zimmermann to CC list]

Hello Mario,

Interesting case.

On 12/22/22 19:30, Mario Limonciello wrote:

One of the first thing that KMS drivers do during initialization is
destroy the system firmware framebuffer by means of
`drm_aperture_remove_conflicting_pci_framebuffers`



The reason why that's done at the very beginning is that there are no
guarantees that the firmware-provided framebuffer would keep working
after the real display controller driver re-initializes the IP block.


This means that if for any reason the GPU failed to probe the user
will be stuck with at best a screen frozen at the last thing that
was shown before the KMS driver continued it's probe.

The problem is most pronounced when new GPU support is introduced
because users will need to have a recent linux-firmware snapshot
on their system when they boot a kernel with matching support.



Right. That's a problem indeed but as mentioned there's a gap between
the firmware-provided framebuffer is removed and the real driver sets
up its framebuffer.
  

However the problem is further exaggerated in the case of amdgpu because
it has migrated to "IP discovery" where amdgpu will attempt to load
on "ALL" AMD GPUs even if the driver is missing support for IP blocks
contained in that GPU.

IP discovery requires some probing and isn't run until after the
framebuffer has been destroyed.

This means a situation can occur where a user purchases a new GPU not
yet supported by a distribution and when booting the installer it will
"freeze" even if the distribution doesn't have the matching kernel support
for those IP blocks.

The perfect example of this is Ubuntu 21.10 and the new dGPUs just
launched by AMD.  The installation media ships with kernel 5.19 (which
has IP discovery) but the amdgpu support for those IP blocks landed in
kernel 6.0. The matching linux-firmware was released after 21.10's launch.
The screen will freeze without nomodeset. Even if a user manages to install
and then upgrades to kernel 6.0 after install they'll still have the
problem of missing firmware, and the same experience.


s/21.10/22.10/



This is quite jarring for users, particularly if they don't know
that they have to use "nomodeset" to install.



I'm not familiar with AMD GPUs, but could be possible that this discovery
and firmware loading step be done at the beginning before the firmware FB
is removed ? That way the FB removal will not happen unless that succeeds.


Possible?  I think so, but maybe Alex can comment on this after the 
holidays as he's more familiar.


It would mean splitting and introducing an entirely new phase to driver 
initialization.  The information about the discovery table comes from VRAM.


amdgpu_driver_load_kms -> amdgpu_device_init -> amdgpu_device_ip_early_init

Basically that code specific would have to call earlier and then there 
would need to be a separate set of code for all the IP blocks to *just* 
collect what firmware they need.


  

To help the situation, allow drivers to re-run the init process for the
firmware framebuffer during a failed probe. As this problem is most
pronounced with amdgpu, this is the only driver changed.

But if this makes sense more generally for other KMS drivers, the call
can be added to the cleanup routine for those too.



The problem I see is that depending on how far the driver's probe function
went, there may not be possible to re-run the init process. Since firmware
provided framebuffer may already been destroyed or the IP block just be in
a half initialized state.

I'm not against this series if it solves the issue in practice for amdgpu,
but don't think is a general solution and would like to know Thomas' opinion
on this before as well


Running on this idea I'm pretty sure that request_firmware returns 
-ENOENT in this case. So another proposal for when to trigger this flow 
would be to only do it on -ENOENT.  We could then also change 
amdgpu_discovery.c to return -ENOENT when an IP block isn't supported 
instead of the current -EINVAL.


Or we could instead co-opt -ENOTSUPP and remap all the cases that we 
explicitly want the system framebuffer to re-initialize to that.


[PATCH 1/2] firmware: sysfb: Allow re-creating system framebuffer after init

2022-12-22 Thread Mario Limonciello
When GPU kernel drivers have failed to load for any reason the
current experience is that the screen is frozen.  This is because
one of the first things that these drivers do is to call `sysfb_disable`.

For end users this is quite jarring and hard to recover from.  Allow
drivers to request the framebuffer to be re-created for a failure cleanup.

Signed-off-by: Mario Limonciello 
---
 drivers/firmware/efi/sysfb_efi.c  |  6 +++---
 drivers/firmware/sysfb.c  | 15 ++-
 drivers/firmware/sysfb_simplefb.c |  4 ++--
 include/linux/sysfb.h |  5 +
 4 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/drivers/firmware/efi/sysfb_efi.c b/drivers/firmware/efi/sysfb_efi.c
index 7882d4b3f2be..a890cb6d44fa 100644
--- a/drivers/firmware/efi/sysfb_efi.c
+++ b/drivers/firmware/efi/sysfb_efi.c
@@ -185,7 +185,7 @@ static int __init efifb_set_system(const struct 
dmi_system_id *id)
&efifb_dmi_list[enumid] \
}
 
-static const struct dmi_system_id efifb_dmi_system_table[] __initconst = {
+static const struct dmi_system_id efifb_dmi_system_table[] = {
EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "iMac4,1", M_I17),
/* At least one of these two will be right; maybe both? */
EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "iMac5,1", M_I20),
@@ -235,7 +235,7 @@ static const struct dmi_system_id efifb_dmi_system_table[] 
__initconst = {
  * pitch). We simply swap width and height for these devices so that we can
  * correctly deal with some of them coming with multiple resolutions.
  */
-static const struct dmi_system_id efifb_dmi_swap_width_height[] __initconst = {
+static const struct dmi_system_id efifb_dmi_swap_width_height[] = {
{
/*
 * Lenovo MIIX310-10ICR, only some batches have the troublesome
@@ -333,7 +333,7 @@ static const struct fwnode_operations efifb_fwnode_ops = {
 #ifdef CONFIG_EFI
 static struct fwnode_handle efifb_fwnode;
 
-__init void sysfb_apply_efi_quirks(struct platform_device *pd)
+void sysfb_apply_efi_quirks(struct platform_device *pd)
 {
if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI ||
!(screen_info.capabilities & VIDEO_CAPABILITY_SKIP_QUIRKS))
diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c
index 3fd3563d962b..7f2254bd2071 100644
--- a/drivers/firmware/sysfb.c
+++ b/drivers/firmware/sysfb.c
@@ -69,7 +69,7 @@ void sysfb_disable(void)
 }
 EXPORT_SYMBOL_GPL(sysfb_disable);
 
-static __init int sysfb_init(void)
+static int sysfb_init(void)
 {
struct screen_info *si = &screen_info;
struct simplefb_platform_data mode;
@@ -124,6 +124,19 @@ static __init int sysfb_init(void)
mutex_unlock(&disable_lock);
return ret;
 }
+/**
+ * sysfb_enable() - re-enable the Generic System Framebuffers support
+ *
+ * This causes the system framebuffer initialization to be re-run.
+ * It is intended to be called by DRM drivers that failed probe for cleanup.
+ *
+ */
+int sysfb_enable(void)
+{
+   disabled = false;
+   return sysfb_init();
+}
+EXPORT_SYMBOL_GPL(sysfb_enable);
 
 /* must execute after PCI subsystem for EFI quirks */
 device_initcall(sysfb_init);
diff --git a/drivers/firmware/sysfb_simplefb.c 
b/drivers/firmware/sysfb_simplefb.c
index a353e27f83f5..82735ff81191 100644
--- a/drivers/firmware/sysfb_simplefb.c
+++ b/drivers/firmware/sysfb_simplefb.c
@@ -24,7 +24,7 @@ static const char simplefb_resname[] = "BOOTFB";
 static const struct simplefb_format formats[] = SIMPLEFB_FORMATS;
 
 /* try parsing screen_info into a simple-framebuffer mode struct */
-__init bool sysfb_parse_mode(const struct screen_info *si,
+bool sysfb_parse_mode(const struct screen_info *si,
 struct simplefb_platform_data *mode)
 {
const struct simplefb_format *f;
@@ -57,7 +57,7 @@ __init bool sysfb_parse_mode(const struct screen_info *si,
return false;
 }
 
-__init struct platform_device *sysfb_create_simplefb(const struct screen_info 
*si,
+struct platform_device *sysfb_create_simplefb(const struct screen_info *si,
 const struct 
simplefb_platform_data *mode)
 {
struct platform_device *pd;
diff --git a/include/linux/sysfb.h b/include/linux/sysfb.h
index 8ba8b5be5567..14d447576e57 100644
--- a/include/linux/sysfb.h
+++ b/include/linux/sysfb.h
@@ -58,6 +58,7 @@ struct efifb_dmi_info {
 #ifdef CONFIG_SYSFB
 
 void sysfb_disable(void);
+int sysfb_enable(void);
 
 #else /* CONFIG_SYSFB */
 
@@ -65,6 +66,10 @@ static inline void sysfb_disable(void)
 {
 }
 
+static int sysfb_enable(void)
+{
+}
+
 #endif /* CONFIG_SYSFB */
 
 #ifdef CONFIG_EFI
-- 
2.34.1



[PATCH 0/2] Recover from failure to probe GPU

2022-12-22 Thread Mario Limonciello
800x32, linelength=11520, pages=1
[5.974807] efifb: scrolling: redraw
[5.974807] efifb: Truecolor: size=8:8:8:8, shift=24:16:8:0
[5.974974] Console: switching to colour frame buffer device 180x56
[5.978181] fb0: EFI VGA frame buffer device
[5.978199] amdgpu: probe of :63:00.0 failed with error -2
[5.978285] [drm] amdgpu: ttm finalized

Now if the user loads the firmware into the system they can re-load the
driver or re-attach using sysfs and it gracefully recovers.

[  665.080480] [drm] Initialized amdgpu 3.49.0 20150101 for :63:00.0 on 
minor 0
[  665.090075] fbcon: amdgpudrmfb (fb0) is primary device
[  665.090248] [drm] DSC precompute is not needed.

Mario Limonciello (2):
  firmware: sysfb: Allow re-creating system framebuffer after init
  drm/amd: Re-create firmware framebuffer on failure to probe

 drivers/firmware/efi/sysfb_efi.c|  6 +++---
 drivers/firmware/sysfb.c| 15 ++-
 drivers/firmware/sysfb_simplefb.c   |  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  2 ++
 include/linux/sysfb.h   |  5 +
 5 files changed, 26 insertions(+), 6 deletions(-)


base-commit: 830b3c68c1fb1e9176028d02ef86f3cf76aa2476
-- 
2.34.1



[PATCH 2/2] drm/amd: Re-create firmware framebuffer on failure to probe

2022-12-22 Thread Mario Limonciello
If the probe sequence fails then the user is stuck with a frozen
screen and can only really recover via SSH or by rebooting and
applying nomodeset to the kernel command line.

This is particularly problematic as newer GPUs are introduced because
distributions may take some time to land newer GPU firmware.

So when probe fails, re-create the system framebuffer so that the
user at least has basic graphics support.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index bf2d50c8c92a..8961c62ab29b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -40,6 +40,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "amdgpu.h"
 #include "amdgpu_irq.h"
@@ -2187,6 +2188,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
 
 err_pci:
pci_disable_device(pdev);
+   sysfb_enable();
return ret;
 }
 
-- 
2.34.1



Re: [PATCH v2] drm/amdgpu: skip mes self test after s0i3 resume for MES IP v11.0

2022-12-19 Thread Mario Limonciello

On 12/19/22 20:30, Tim Huang wrote:

MES is part of gfxoff and MES suspend and resume are skipped for S0i3.
But the mes_self_test call path is still in the amdgpu_device_ip_late_init.
it's should also be skipped for s0ix as no hardware re-initialization
happened.

Besides, mes_self_test will free the BO that triggers a lot of warning
messages while in the suspend state.

[   81.656085] WARNING: CPU: 2 PID: 1550 at 
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c:425 amdgpu_bo_free_kernel+0xfc/0x110 
[amdgpu]
[   81.679435] Call Trace:
[   81.679726]  
[   81.679981]  amdgpu_mes_remove_hw_queue+0x17a/0x230 [amdgpu]
[   81.680857]  amdgpu_mes_self_test+0x390/0x430 [amdgpu]
[   81.681665]  mes_v11_0_late_init+0x37/0x50 [amdgpu]
[   81.682423]  amdgpu_device_ip_late_init+0x53/0x280 [amdgpu]
[   81.683257]  amdgpu_device_resume+0xae/0x2a0 [amdgpu]
[   81.684043]  amdgpu_pmops_resume+0x37/0x70 [amdgpu]
[   81.684818]  pci_pm_resume+0x5c/0xa0
[   81.685247]  ? pci_pm_thaw+0x90/0x90
[   81.685658]  dpm_run_callback+0x4e/0x160
[   81.686110]  device_resume+0xad/0x210
[   81.686529]  async_resume+0x1e/0x40
[   81.686931]  async_run_entry_fn+0x33/0x120
[   81.687405]  process_one_work+0x21d/0x3f0
[   81.687869]  worker_thread+0x4a/0x3c0
[   81.688293]  ? process_one_work+0x3f0/0x3f0
[   81.688777]  kthread+0xff/0x130
[   81.689157]  ? kthread_complete_and_exit+0x20/0x20
[   81.689707]  ret_from_fork+0x22/0x30
[   81.690118]  
[   81.690380] ---[ end trace  ]---

Signed-off-by: Tim Huang 


Reviewed-by: Mario Limonciello 



v2: make the comment clean and use adev->in_s0ix instead of
adev->suspend
---
  drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 5459366f49ff..970b066b37bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -1342,7 +1342,8 @@ static int mes_v11_0_late_init(void *handle)
  {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  
-	if (!amdgpu_in_reset(adev) &&

+   /* it's only intended for use in mes_self_test case, not for s0ix and 
reset */
+   if (!amdgpu_in_reset(adev) && !adev->in_s0ix &&
(adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3)))
amdgpu_mes_self_test(adev);
  




[PATCH v3 3/3] ACPI: video: Don't enable fallback path for creating ACPI backlight by default

2022-12-08 Thread Mario Limonciello
The ACPI video detection code has a module parameter
`register_backlight_delay` which is currently configured to 8 seconds.
This means that if after 8 seconds of booting no native driver has created
a backlight device then the code will attempt to make an ACPI video
backlight device.

This was intended as a safety mechanism with the backlight overhaul that
occurred in kernel 6.1, but as it doesn't appear necesssary set it to be
disabled by default.

Suggested-by: Hans de Goede 
Signed-off-by: Mario Limonciello 
Reviewed-by: Hans de Goede 
---
v2->v3:
 * Add Hans' R-b
v1->v2:
 * New patch
---
 drivers/acpi/acpi_video.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c
index f64fdb029090..0c79f463fbfd 100644
--- a/drivers/acpi/acpi_video.c
+++ b/drivers/acpi/acpi_video.c
@@ -70,11 +70,7 @@ module_param(device_id_scheme, bool, 0444);
 static int only_lcd = -1;
 module_param(only_lcd, int, 0444);
 
-/*
- * Display probing is known to take up to 5 seconds, so delay the fallback
- * backlight registration by 5 seconds + 3 seconds for some extra margin.
- */
-static int register_backlight_delay = 8;
+static int register_backlight_delay;
 module_param(register_backlight_delay, int, 0444);
 MODULE_PARM_DESC(register_backlight_delay,
"Delay in seconds before doing fallback (non GPU driver triggered) "
-- 
2.34.1



[PATCH v3 1/3] ACPI: video: Allow GPU drivers to report no panels

2022-12-08 Thread Mario Limonciello
The current logic for the ACPI backlight detection will create
a backlight device if no native or vendor drivers have created
8 seconds after the system has booted if the ACPI tables
included backlight control methods.

If the GPU drivers have loaded, they may be able to report whether
any LCD panels were found.  Allow using this information to factor
in whether to enable the fallback logic for making an acpi_video0
backlight device.

Suggested-by: Hans de Goede 
Signed-off-by: Mario Limonciello 
Reviewed-by: Hans de Goede 
---
v2->v3:
 * Add Hans' R-b
 * Add missing declaration for non CONFIG_ACPI_VIDEO case
v1->v2:
 * Cancel registration for backlight device instead (Hans)
 * drop desktop check (Dan)
---
 drivers/acpi/acpi_video.c | 11 +++
 include/acpi/video.h  |  2 ++
 2 files changed, 13 insertions(+)

diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c
index 32953646caeb..f64fdb029090 100644
--- a/drivers/acpi/acpi_video.c
+++ b/drivers/acpi/acpi_video.c
@@ -2178,6 +2178,17 @@ static bool should_check_lcd_flag(void)
return false;
 }
 
+/*
+ * At least one graphics driver has reported that no LCD is connected
+ * via the native interface. cancel the registration for fallback acpi_video0.
+ * If another driver still deems this necessary, it can explicitly register it.
+ */
+void acpi_video_report_nolcd(void)
+{
+   cancel_delayed_work(&video_bus_register_backlight_work);
+}
+EXPORT_SYMBOL(acpi_video_report_nolcd);
+
 int acpi_video_register(void)
 {
int ret = 0;
diff --git a/include/acpi/video.h b/include/acpi/video.h
index a275c35e5249..a56c8d45e9f8 100644
--- a/include/acpi/video.h
+++ b/include/acpi/video.h
@@ -53,6 +53,7 @@ enum acpi_backlight_type {
 };
 
 #if IS_ENABLED(CONFIG_ACPI_VIDEO)
+extern void acpi_video_report_nolcd(void);
 extern int acpi_video_register(void);
 extern void acpi_video_unregister(void);
 extern void acpi_video_register_backlight(void);
@@ -69,6 +70,7 @@ extern int acpi_video_get_levels(struct acpi_device *device,
 struct acpi_video_device_brightness **dev_br,
 int *pmax_level);
 #else
+static inline void acpi_video_report_nolcd(void) { return; };
 static inline int acpi_video_register(void) { return -ENODEV; }
 static inline void acpi_video_unregister(void) { return; }
 static inline void acpi_video_register_backlight(void) { return; }
-- 
2.34.1



[PATCH v3 2/3] drm/amd/display: Report to ACPI video if no panels were found

2022-12-08 Thread Mario Limonciello
On desktop APUs amdgpu doesn't create a native backlight device
as no eDP panels are found.  However if the BIOS has reported
backlight control methods in the ACPI tables then an acpi_video0
backlight device will be made 8 seconds after boot.

This has manifested in a power slider on a number of desktop APUs
ranging from Ryzen 5000 through Ryzen 7000 on various motherboard
manufacturers. To avoid this, report to the acpi video detection
that the system does not have any panel connected in the native
driver.

Link: https://bugzilla.redhat.com/show_bug.cgi?id=1783786
Reported-by: Hans de Goede 
Signed-off-by: Mario Limonciello 
Reviewed-by: Hans de Goede 
---
v2->v3:
 * Add Hans' R-b
v1->v2:
 * No changes
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 512c32327eb1..b73f61ac5dd5 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -4371,6 +4371,10 @@ static int amdgpu_dm_initialize_drm_device(struct 
amdgpu_device *adev)
amdgpu_set_panel_orientation(&aconnector->base);
}
 
+   /* If we didn't find a panel, notify the acpi video detection */
+   if (dm->adev->flags & AMD_IS_APU && dm->num_of_edps == 0)
+   acpi_video_report_nolcd();
+
/* Software is initialized. Now we can register interrupt handlers. */
switch (adev->asic_type) {
 #if defined(CONFIG_DRM_AMD_DC_SI)
-- 
2.34.1



[PATCH v3 0/3] Adjust ACPI video detection fallback path

2022-12-08 Thread Mario Limonciello
In kernel 6.1 the backlight registration code was overhauled so that
at most one backlight device got registered. As part of this change
there was code added to still allow making an acpi_video0 device if the
BIOS contained backlight control methods but no native or vendor drivers
registered.

Even after the overhaul this fallback logic is failing on the BIOS from
a number of motherboard manufacturers supporting Ryzen APUs.
What happens is the amdgpu driver finishes registration and as expected
doesn't create a backlight control device since no eDP panels are connected
to a desktop.

Then 8 seconds later the ACPI video detection code creates an
acpi_video0 device that is non-operational. GNOME then creates a
backlight slider.

To avoid this situation from happening make two sets of changes:

Prevent desktop problems w/ fallback logic
--
1) Add support for the video detect code to let native drivers cancel the
fallback logic if they didn't find a panel.

This is done this way so that if another driver decides that the ACPI
mechanism is still needed it can instead directly call the registration
function.

2) Add code to amdgpu to notify the ACPI video detection code that no panel
was detected on an APU.

Disable fallback logic by default
-
This fallback logic was introduced to prevent regressions in the backlight
overhaul.  As it has been deemed unnecessary by Hans explicitly disable the
timeout.  If this turns out to be mistake and this part is reverted, the
other patches for preventing desktop problems will avoid regressions on
desktops.

Mario Limonciello (3):
  ACPI: video: Allow GPU drivers to report no panels
  drm/amd/display: Report to ACPI video if no panels were found
  ACPI: video: Don't enable fallback path for creating ACPI backlight by
default

 drivers/acpi/acpi_video.c   | 17 -
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c   |  4 
 include/acpi/video.h|  2 ++
 3 files changed, 18 insertions(+), 5 deletions(-)

-- 
2.34.1



[PATCH v2 2/3] drm/amd/display: Report to ACPI video if no panels were found

2022-12-07 Thread Mario Limonciello
On desktop APUs amdgpu doesn't create a native backlight device
as no eDP panels are found.  However if the BIOS has reported
backlight control methods in the ACPI tables then an acpi_video0
backlight device will be made 8 seconds after boot.

This has manifested in a power slider on a number of desktop APUs
ranging from Ryzen 5000 through Ryzen 7000 on various motherboard
manufacturers. To avoid this, report to the acpi video detection
that the system does not have any panel connected in the native
driver.

Link: https://bugzilla.redhat.com/show_bug.cgi?id=1783786
Reported-by: Hans de Goede 
Signed-off-by: Mario Limonciello 
---
v1->v2:
 * No changes

 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 512c32327eb1..b73f61ac5dd5 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -4371,6 +4371,10 @@ static int amdgpu_dm_initialize_drm_device(struct 
amdgpu_device *adev)
amdgpu_set_panel_orientation(&aconnector->base);
}
 
+   /* If we didn't find a panel, notify the acpi video detection */
+   if (dm->adev->flags & AMD_IS_APU && dm->num_of_edps == 0)
+   acpi_video_report_nolcd();
+
/* Software is initialized. Now we can register interrupt handlers. */
switch (adev->asic_type) {
 #if defined(CONFIG_DRM_AMD_DC_SI)
-- 
2.34.1



[PATCH v2 3/3] ACPI: video: Don't enable fallback path for creating ACPI backlight by default

2022-12-07 Thread Mario Limonciello
The ACPI video detection code has a module parameter
`register_backlight_delay` which is currently configured to 8 seconds.
This means that if after 8 seconds of booting no native driver has created
a backlight device then the code will attempt to make an ACPI video
backlight device.

This was intended as a safety mechanism with the backlight overhaul that
occurred in kernel 6.1, but as it doesn't appear necesssary set it to be
disabled by default.

Suggested-by: Hans de Goede 
Signed-off-by: Mario Limonciello 
---
v1->v2:
 * New patch

 drivers/acpi/acpi_video.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c
index f64fdb029090..0c79f463fbfd 100644
--- a/drivers/acpi/acpi_video.c
+++ b/drivers/acpi/acpi_video.c
@@ -70,11 +70,7 @@ module_param(device_id_scheme, bool, 0444);
 static int only_lcd = -1;
 module_param(only_lcd, int, 0444);
 
-/*
- * Display probing is known to take up to 5 seconds, so delay the fallback
- * backlight registration by 5 seconds + 3 seconds for some extra margin.
- */
-static int register_backlight_delay = 8;
+static int register_backlight_delay;
 module_param(register_backlight_delay, int, 0444);
 MODULE_PARM_DESC(register_backlight_delay,
"Delay in seconds before doing fallback (non GPU driver triggered) "
-- 
2.34.1



[PATCH v2 1/3] ACPI: video: Allow GPU drivers to report no panels

2022-12-07 Thread Mario Limonciello
The current logic for the ACPI backlight detection will create
a backlight device if no native or vendor drivers have created
8 seconds after the system has booted if the ACPI tables
included backlight control methods.

If the GPU drivers have loaded, they may be able to report whether
any LCD panels were found.  Allow using this information to factor
in whether to enable the fallback logic for making an acpi_video0
backlight device.

Suggested-by: Hans de Goede 
Signed-off-by: Mario Limonciello 
---
v1->v2:
 * Cancel registration for backlight device instead (Hans)
 * drop desktop check (Dan)

 drivers/acpi/acpi_video.c | 11 +++
 include/acpi/video.h  |  1 +
 2 files changed, 12 insertions(+)

diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c
index 32953646caeb..f64fdb029090 100644
--- a/drivers/acpi/acpi_video.c
+++ b/drivers/acpi/acpi_video.c
@@ -2178,6 +2178,17 @@ static bool should_check_lcd_flag(void)
return false;
 }
 
+/*
+ * At least one graphics driver has reported that no LCD is connected
+ * via the native interface. cancel the registration for fallback acpi_video0.
+ * If another driver still deems this necessary, it can explicitly register it.
+ */
+void acpi_video_report_nolcd(void)
+{
+   cancel_delayed_work(&video_bus_register_backlight_work);
+}
+EXPORT_SYMBOL(acpi_video_report_nolcd);
+
 int acpi_video_register(void)
 {
int ret = 0;
diff --git a/include/acpi/video.h b/include/acpi/video.h
index a275c35e5249..1fccb111c197 100644
--- a/include/acpi/video.h
+++ b/include/acpi/video.h
@@ -53,6 +53,7 @@ enum acpi_backlight_type {
 };
 
 #if IS_ENABLED(CONFIG_ACPI_VIDEO)
+extern void acpi_video_report_nolcd(void);
 extern int acpi_video_register(void);
 extern void acpi_video_unregister(void);
 extern void acpi_video_register_backlight(void);
-- 
2.34.1



[PATCH v2 0/3] Adjust ACPI video detection fallback path

2022-12-07 Thread Mario Limonciello
In kernel 6.1 the backlight registration code was overhauled so that
at most one backlight device got registered. As part of this change
there was code added to still allow making an acpi_video0 device if the
BIOS contained backlight control methods but no native or vendor drivers
registered.

Even after the overhaul this fallback logic is failing on the BIOS from
a number of motherboard manufacturers supporting Ryzen APUs.
What happens is the amdgpu driver finishes registration and as expected
doesn't create a backlight control device since no eDP panels are connected
to a desktop.

Then 8 seconds later the ACPI video detection code creates an
acpi_video0 device that is non-operational. GNOME then creates a
backlight slider.

To avoid this situation from happening make two sets of changes:

Prevent desktop problems w/ fallback logic
--
1) Add support for the video detect code to let native drivers cancel the
fallback logic if they didn't find a panel.

This is done this way so that if another driver decides that the ACPI
mechanism is still needed it can instead directly call the registration
function.

2) Add code to amdgpu to notify the ACPI video detection code that no panel
was detected on an APU.

Disable fallback logic by default
-
This fallback logic was introduced to prevent regressions in the backlight
overhaul.  As it has been deemed unnecessary by Hans explicitly disable the
timeout.  If this turns out to be mistake and this part is reverted, the
other patches for preventing desktop problems will avoid regressions on
desktops.

Mario Limonciello (3):
  ACPI: video: Allow GPU drivers to report no panels
  drm/amd/display: Report to ACPI video if no panels were found
  ACPI: video: Don't enable fallback path for creating ACPI backlight by
default

 drivers/acpi/acpi_video.c   | 17 -
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c   |  4 
 include/acpi/video.h|  1 +
 3 files changed, 17 insertions(+), 5 deletions(-)

-- 
2.34.1



[PATCH 2/2] drm/amd/display: Report to ACPI video if no panels were found

2022-12-07 Thread Mario Limonciello
On desktop APUs amdgpu doesn't create a native backlight device
as no eDP panels are found.  However if the BIOS has reported
backlight control methods in the ACPI tables then an acpi_video0
backlight device will be made 8 seconds after boot.

This has manifested in a power slider on a number of desktop APUs
ranging from Ryzen 5000 through Ryzen 7000 on various motherboard
manufacturers. To avoid this, report to the acpi video detection
that the system does not have any panel connected in the native
driver.

Link: https://bugzilla.redhat.com/show_bug.cgi?id=1783786
Reported-by: Hans de Goede 
Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 512c32327eb1..b73f61ac5dd5 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -4371,6 +4371,10 @@ static int amdgpu_dm_initialize_drm_device(struct 
amdgpu_device *adev)
amdgpu_set_panel_orientation(&aconnector->base);
}
 
+   /* If we didn't find a panel, notify the acpi video detection */
+   if (dm->adev->flags & AMD_IS_APU && dm->num_of_edps == 0)
+   acpi_video_report_nolcd();
+
/* Software is initialized. Now we can register interrupt handlers. */
switch (adev->asic_type) {
 #if defined(CONFIG_DRM_AMD_DC_SI)
-- 
2.34.1



[PATCH 1/2] ACPI: video: Allow GPU drivers to report no panels

2022-12-07 Thread Mario Limonciello
The current logic for the ACPI backlight detection will create
a backlight device if no native or vendor drivers have created
8 seconds after the system has booted if the ACPI tables
included backlight control methods.

If the GPU drivers have loaded, they may be able to report whether
any LCD panels were found.  Allow using this information to factor
in whether to make an acpi_video0 backlight device.

To avoid risks for regressions on complicated configurations with
muxes and multiple native drivers, only take into account drivers
that have reported this when the system is a desktop.

Suggested-by: Hans de Goede 
Signed-off-by: Mario Limonciello 
---
 drivers/acpi/acpi_video.c | 12 
 include/acpi/video.h  |  1 +
 2 files changed, 13 insertions(+)

diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c
index 32953646caeb..e297f8877797 100644
--- a/drivers/acpi/acpi_video.c
+++ b/drivers/acpi/acpi_video.c
@@ -82,6 +82,7 @@ MODULE_PARM_DESC(register_backlight_delay,
 
 static bool may_report_brightness_keys;
 static int register_count;
+static bool native_reported_nolcd;
 static DEFINE_MUTEX(register_count_mutex);
 static DEFINE_MUTEX(video_list_lock);
 static LIST_HEAD(video_bus_head);
@@ -1811,6 +1812,9 @@ static bool acpi_video_should_register_backlight(struct 
acpi_video_device *dev)
return false;
}
 
+   if (native_reported_nolcd)
+   return false;
+
if (only_lcd)
return dev->flags.lcd;
return true;
@@ -2178,6 +2182,14 @@ static bool should_check_lcd_flag(void)
return false;
 }
 
+void acpi_video_report_nolcd(void)
+{
+   /* Only take into account native driver reporting on desktops */
+   if (dmi_is_desktop())
+   native_reported_nolcd = true;
+}
+EXPORT_SYMBOL(acpi_video_report_nolcd);
+
 int acpi_video_register(void)
 {
int ret = 0;
diff --git a/include/acpi/video.h b/include/acpi/video.h
index a275c35e5249..1fccb111c197 100644
--- a/include/acpi/video.h
+++ b/include/acpi/video.h
@@ -53,6 +53,7 @@ enum acpi_backlight_type {
 };
 
 #if IS_ENABLED(CONFIG_ACPI_VIDEO)
+extern void acpi_video_report_nolcd(void);
 extern int acpi_video_register(void);
 extern void acpi_video_unregister(void);
 extern void acpi_video_register_backlight(void);
-- 
2.34.1



[PATCH 0/2] Avoid creating acpi_video0 on desktop APUs

2022-12-07 Thread Mario Limonciello
In kernel 6.1 the backlight registration code was overhauled so that
at most one backlight device got registered. As part of this change
there was code added to cover the "nomodeset" case to still allow
making an acpi_video0 device if the BIOS contained backlight control
methods.

This fallback logic however is failing on the BIOS from a number of
motherboard manufacturers supporting Ryzen APUs.  What happens is
the amdgpu driver finishes registration and as expected doesn't
create a backlight control device since no eDP panels are connected
to a desktop.

Then 8 seconds later the ACPI video detection code creates an
acpi_video0 device that is non-operational. GNOME then creates a
backlight slider.

To avoid this situation from happening add support for video drivers
to notify the ACPI video detection code that no panel was detected.

To reduce the risk of regressions on multi-GPU systems:
* only use this logic when the system is reported as a desktop enclosure.
* in the amdgpu code only report into this for APUs.

Mario Limonciello (2):
  ACPI: video: Allow GPU drivers to report no panels
  drm/amd/display: Report to ACPI video if no panels were found

 drivers/acpi/acpi_video.c | 12 
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  4 
 include/acpi/video.h  |  1 +
 3 files changed, 17 insertions(+)

-- 
2.34.1



Re: [PATCH] drm/amd: Fail the suspend if resources can't be evicted

2022-10-27 Thread Mario Limonciello

On 10/27/22 01:30, Christian König wrote:

Am 26.10.22 um 21:03 schrieb Mario Limonciello:

If a system does not have swap and memory is under 100% usage,
amdgpu will fail to evict resources.  Currently the suspend
carries on proceeding to reset the GPU:

```
[drm] evicting device resources failed
[drm:amdgpu_device_ip_suspend_phase2 [amdgpu]] *ERROR* suspend of IP 
block  failed -12

[drm] free PSP TMR buffer
[TTM] Failed allocating page table
[drm] evicting device resources failed
amdgpu :03:00.0: amdgpu: MODE1 reset
amdgpu :03:00.0: amdgpu: GPU mode1 reset
amdgpu :03:00.0: amdgpu: GPU smu mode1 reset
```

At this point if the suspend actually succeeded I think that amdgpu
would have recovered because the GPU would have power cut off and
restored.  However the kernel fails to continue the suspend from the
memory pressure and amdgpu fails to run the "resume" from the aborted
suspend.

```
ACPI: PM: Preparing to enter system sleep state S3
SLUB: Unable to allocate memory on node -1, 
gfp=0xdc0(GFP_KERNEL|__GFP_ZERO)
   cache: Acpi-State, object size: 80, buffer size: 80, default order: 
0, min order: 0

   node 0: slabs: 22, objs: 1122, free: 0
ACPI Error: AE_NO_MEMORY, Could not update object reference count 
(20210730/utdelete-651)


[drm:psp_hw_start [amdgpu]] *ERROR* PSP load kdb failed!
[drm:psp_resume [amdgpu]] *ERROR* PSP resume failed
[drm:amdgpu_device_fw_loading [amdgpu]] *ERROR* resume of IP block 
 failed -62

amdgpu :03:00.0: amdgpu: amdgpu_device_ip_resume failed (-62).
PM: dpm_run_callback(): pci_pm_resume+0x0/0x100 returns -62
amdgpu :03:00.0: PM: failed to resume async: error -62
```

To avoid this series of unfortunate events, fail amdgpu's suspend
when the memory eviction fails.  This will let the system gracefully
recover and the user can try suspend again when the memory pressure
is relieved.


Yeah, I've been thinking about that handling for a while now as well.

Failing to suspend when we are OOM is certainly the right thing to do 
from a technical perspective.


But it also means that when users close their laptop it can happen that 
it keeps running and draining the battery.


On the other hand when you don't have swap configured it's your fault 
and not the drivers. >

It's a trade off and I'm not sure what's better. Alex any comment here?


There are userspace means to handle this (such as systemd-oomd).  If we 
actually fail the suspend and can signal an out of memory error code all 
the way back up then it can work with the oomd processor to make some 
room and try again.




Thanks,
Christian.



Reported-by: p...@davidak.de
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2223
Signed-off-by: Mario Limonciello 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 15 ++-
  1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 6f958603c8cc2..ae10acede495e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4060,15 +4060,18 @@ void amdgpu_device_fini_sw(struct 
amdgpu_device *adev)

   * at suspend time.
   *
   */
-static void amdgpu_device_evict_resources(struct amdgpu_device *adev)
+static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
  {
+    int ret;
+
  /* No need to evict vram on APUs for suspend to ram or s2idle */
  if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
-    return;
+    return 0;
-    if (amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM))
+    ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
+    if (ret)
  DRM_WARN("evicting device resources failed\n");
-
+    return ret;
  }
  /*
@@ -4118,7 +4121,9 @@ int amdgpu_device_suspend(struct drm_device 
*dev, bool fbcon)

  if (!adev->in_s0ix)
  amdgpu_amdkfd_suspend(adev, adev->in_runpm);
-    amdgpu_device_evict_resources(adev);
+    r = amdgpu_device_evict_resources(adev);
+    if (r)
+    return r;
  amdgpu_fence_driver_hw_fini(adev);






[PATCH] drm/amd: Fail the suspend if resources can't be evicted

2022-10-26 Thread Mario Limonciello
If a system does not have swap and memory is under 100% usage,
amdgpu will fail to evict resources.  Currently the suspend
carries on proceeding to reset the GPU:

```
[drm] evicting device resources failed
[drm:amdgpu_device_ip_suspend_phase2 [amdgpu]] *ERROR* suspend of IP block 
 failed -12
[drm] free PSP TMR buffer
[TTM] Failed allocating page table
[drm] evicting device resources failed
amdgpu :03:00.0: amdgpu: MODE1 reset
amdgpu :03:00.0: amdgpu: GPU mode1 reset
amdgpu :03:00.0: amdgpu: GPU smu mode1 reset
```

At this point if the suspend actually succeeded I think that amdgpu
would have recovered because the GPU would have power cut off and
restored.  However the kernel fails to continue the suspend from the
memory pressure and amdgpu fails to run the "resume" from the aborted
suspend.

```
ACPI: PM: Preparing to enter system sleep state S3
SLUB: Unable to allocate memory on node -1, gfp=0xdc0(GFP_KERNEL|__GFP_ZERO)
  cache: Acpi-State, object size: 80, buffer size: 80, default order: 0, min 
order: 0
  node 0: slabs: 22, objs: 1122, free: 0
ACPI Error: AE_NO_MEMORY, Could not update object reference count 
(20210730/utdelete-651)

[drm:psp_hw_start [amdgpu]] *ERROR* PSP load kdb failed!
[drm:psp_resume [amdgpu]] *ERROR* PSP resume failed
[drm:amdgpu_device_fw_loading [amdgpu]] *ERROR* resume of IP block  failed 
-62
amdgpu :03:00.0: amdgpu: amdgpu_device_ip_resume failed (-62).
PM: dpm_run_callback(): pci_pm_resume+0x0/0x100 returns -62
amdgpu :03:00.0: PM: failed to resume async: error -62
```

To avoid this series of unfortunate events, fail amdgpu's suspend
when the memory eviction fails.  This will let the system gracefully
recover and the user can try suspend again when the memory pressure
is relieved.

Reported-by: p...@davidak.de
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2223
Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 6f958603c8cc2..ae10acede495e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4060,15 +4060,18 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
  * at suspend time.
  *
  */
-static void amdgpu_device_evict_resources(struct amdgpu_device *adev)
+static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
 {
+   int ret;
+
/* No need to evict vram on APUs for suspend to ram or s2idle */
if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
-   return;
+   return 0;
 
-   if (amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM))
+   ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
+   if (ret)
DRM_WARN("evicting device resources failed\n");
-
+   return ret;
 }
 
 /*
@@ -4118,7 +4121,9 @@ int amdgpu_device_suspend(struct drm_device *dev, bool 
fbcon)
if (!adev->in_s0ix)
amdgpu_amdkfd_suspend(adev, adev->in_runpm);
 
-   amdgpu_device_evict_resources(adev);
+   r = amdgpu_device_evict_resources(adev);
+   if (r)
+   return r;
 
amdgpu_fence_driver_hw_fini(adev);
 
-- 
2.25.1



[PATCH] drm/amd: Add a new parameter that can force reset on suspend

2022-09-27 Thread Mario Limonciello
We previously had in place some behavior that would cause APU or GPU
to be reset during suspend entry.  This caused various problems, and
eventually was reverted.  There are however users that preferred this
behavior, so add an option that they can use to force it.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=216516
Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h  | 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  | 8 
 3 files changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index ae9371b172e3a..85999f48e2835 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -230,6 +230,7 @@ extern bool pcie_p2p;
 
 extern int amdgpu_tmz;
 extern int amdgpu_reset_method;
+extern int amdgpu_reset_on_suspend;
 
 #ifdef CONFIG_DRM_AMDGPU_SI
 extern int amdgpu_si_support;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index b14800ac179ee..17c0a0ec21bd6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -1051,6 +1051,8 @@ bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev)
  */
 bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev)
 {
+   if (amdgpu_reset_on_suspend >= 0)
+   return !!amdgpu_reset_on_suspend;
if (adev->flags & AMD_IS_APU)
return false;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 16f6a313335e9..6a786dacf2c2d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -179,6 +179,7 @@ int amdgpu_noretry = -1;
 int amdgpu_force_asic_type = -1;
 int amdgpu_tmz = -1; /* auto */
 int amdgpu_reset_method = -1; /* auto */
+int amdgpu_reset_on_suspend = -1; /* auto */
 int amdgpu_num_kcq = -1;
 int amdgpu_smartshift_bias;
 int amdgpu_use_xgmi_p2p = 1;
@@ -870,6 +871,13 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
 MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = 
legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco/bamaco)");
 module_param_named(reset_method, amdgpu_reset_method, int, 0444);
 
+/**
+ * DOC: reset_on_suspend (int)
+ * GPUs should be reset on suspend (-1 = auto (default), 0 = no, 1 = yes)
+ */
+MODULE_PARM_DESC(reset_on_suspend, "GPUs should be reset on suspend (-1 = auto 
(default), 0 = no 1 = yes");
+module_param_named(reset_on_suspend, amdgpu_reset_on_suspend, int, 0444);
+
 /**
  * DOC: bad_page_threshold (int) Bad page threshold is specifies the
  * threshold value of faulty pages detected by RAS ECC, which may
-- 
2.25.1



[PATCH 3/4] Revert "drm/amdgpu: drop amdgpu_display_gem_fb_init()"

2022-08-09 Thread Mario Limonciello
This reverts commit 60da2f7440f22dba48944602c2a0373068f13880.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=216119
Fixes: 087451f372bf ("drm/amdgpu: use generic fb helpers instead of setting up 
AMD own's.")
Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 25 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h|  5 +
 2 files changed, 30 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 97fff4727724..92281e57f3ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -1075,6 +1075,31 @@ static int amdgpu_display_get_fb_info(const struct 
amdgpu_framebuffer *amdgpu_fb
return r;
 }
 
+int amdgpu_display_gem_fb_init(struct drm_device *dev,
+  struct amdgpu_framebuffer *rfb,
+  const struct drm_mode_fb_cmd2 *mode_cmd,
+  struct drm_gem_object *obj)
+{
+   int ret;
+
+   rfb->base.obj[0] = obj;
+   drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd);
+
+   ret = amdgpu_display_framebuffer_init(dev, rfb, mode_cmd, obj);
+   if (ret)
+   goto err;
+
+   ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
+   if (ret)
+   goto err;
+
+   return 0;
+err:
+   drm_dbg_kms(dev, "Failed to init gem fb: %d\n", ret);
+   rfb->base.obj[0] = NULL;
+   return ret;
+}
+
 static int amdgpu_display_gem_fb_verify_and_init(struct drm_device *dev,
 struct amdgpu_framebuffer *rfb,
 struct drm_file *file_priv,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index dbe2904e015b..fb120abeec96 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -588,6 +588,11 @@ int amdgpu_display_get_crtc_scanoutpos(struct drm_device 
*dev,
int *hpos, ktime_t *stime, ktime_t *etime,
const struct drm_display_mode *mode);
 
+int amdgpu_display_gem_fb_init(struct drm_device *dev,
+  struct amdgpu_framebuffer *rfb,
+  const struct drm_mode_fb_cmd2 *mode_cmd,
+  struct drm_gem_object *obj);
+
 int amdgpufb_remove(struct drm_device *dev, struct drm_framebuffer *fb);
 
 void amdgpu_enc_destroy(struct drm_encoder *encoder);
-- 
2.34.1



[PATCH 4/4] Revert "drm/amdgpu: use generic fb helpers instead of setting up AMD own's."

2022-08-09 Thread Mario Limonciello
Although preferable to use the generic fb helpers there is an underlying issue
that causes problems for S4 that isn't yet identified.  As the workaround for
the S4 issue caused other problems for S3, revert the change to generic fb 
helpers.

If/when that underlying issue is identified switching back to generic fb helpers
can be brought back in for amdgpu.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=216119
Fixes: 087451f372bf ("drm/amdgpu: use generic fb helpers instead of setting up 
AMD own's.")
Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/Makefile |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  |  12 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c |  11 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  13 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c  | 388 
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c |  30 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h|  20 +
 7 files changed, 426 insertions(+), 50 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 4bde7ff55f8a..224d2dca2b7b 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -45,7 +45,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_atombios.o atombios_crtc.o amdgpu_connectors.o \
atom.o amdgpu_fence.o amdgpu_ttm.o amdgpu_object.o amdgpu_gart.o \
amdgpu_encoders.o amdgpu_display.o amdgpu_i2c.o \
-   amdgpu_gem.o amdgpu_ring.o \
+   amdgpu_fb.o amdgpu_gem.o amdgpu_ring.o \
amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \
atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index c84fdef0ac45..a14e2c98d324 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3795,6 +3795,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
/* Get a log2 for easy divisions. */
adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
 
+   amdgpu_fbdev_init(adev);
+
r = amdgpu_pm_sysfs_init(adev);
if (r) {
adev->pm_sysfs_en = false;
@@ -3956,6 +3958,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
/* disable ras feature must before hw fini */
amdgpu_ras_pre_fini(adev);
 
+   amdgpu_fbdev_fini(adev);
+
amdgpu_device_ip_fini_early(adev);
 
amdgpu_irq_fini_hw(adev);
@@ -4066,7 +4070,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool 
fbcon)
drm_kms_helper_poll_disable(dev);
 
if (fbcon)
-   
drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
+   amdgpu_fbdev_set_suspend(adev, 1);
 
cancel_delayed_work_sync(&adev->delayed_init_work);
 
@@ -4138,7 +4142,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool 
fbcon)
flush_delayed_work(&adev->delayed_init_work);
 
if (fbcon)
-   
drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
+   amdgpu_fbdev_set_suspend(adev, 0);
 
drm_kms_helper_poll_enable(dev);
 
@@ -4856,7 +4860,7 @@ int amdgpu_do_asic_reset(struct list_head 
*device_list_handle,
if (r)
goto out;
 
-   
drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
+   amdgpu_fbdev_set_suspend(tmp_adev, 0);
 
/*
 * The GPU enters bad state once faulty pages
@@ -5203,7 +5207,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 */
amdgpu_unregister_gpu_instance(tmp_adev);
 
-   
drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
+   amdgpu_fbdev_set_suspend(tmp_adev, 1);
 
/* disable ras on ALL IPs */
if (!need_emergency_restart &&
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 92281e57f3ac..eea6f8e34850 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -1619,10 +1619,13 @@ int amdgpu_display_suspend_helper(struct amdgpu_device 
*adev)
continue;
}
robj = gem_to_amdgpu_bo(fb->obj[0]);
-   r = amdgpu_bo_reserve(robj, true);
-   if (r == 0) {
-   amdgpu_bo_unpin(robj);
-   amdgpu_bo_unreserve(robj);
+   /* don't unpin kernel fb objects */
+   if (!amdgpu_fbdev_robj_is_fb(ade

[PATCH 0/4] Fix S3 and S4 issues related to fbdev

2022-08-09 Thread Mario Limonciello
Moving to the generic fbdev helper introduced a regression in S4 support
on Hawaii PRO graphics cards.  A workaround commit was introduced to
avoid this S4 regression until the root cause could be found, but that
workaround commit introduced a completely separate regression on S3 on
a WX3200.

This series reverts all the code to move to generic fbdev helpers to
fix both issues.

Mario Limonciello (4):
  Revert "drm/amdgpu/display: disable prefer_shadow for generic fb
helpers"
  Revert "drm/amdgpu: keep fbdev buffers pinned during suspend"
  Revert "drm/amdgpu: drop amdgpu_display_gem_fb_init()"
  Revert "drm/amdgpu: use generic fb helpers instead of setting up AMD
own's."

 drivers/gpu/drm/amd/amdgpu/Makefile   |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c|  12 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c   |  43 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  13 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c| 388 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   |  30 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h  |  25 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c  |   3 +-
 drivers/gpu/drm/amd/amdgpu/dce_v10_0.c|   3 +-
 drivers/gpu/drm/amd/amdgpu/dce_v11_0.c|   3 +-
 drivers/gpu/drm/amd/amdgpu/dce_v6_0.c |   3 +-
 drivers/gpu/drm/amd/amdgpu/dce_v8_0.c |   3 +-
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |   3 +-
 13 files changed, 457 insertions(+), 74 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c

-- 
2.34.1



[PATCH 2/4] Revert "drm/amdgpu: keep fbdev buffers pinned during suspend"

2022-08-09 Thread Mario Limonciello
This reverts commit da427a501e9b5e2cac8c8042df12eb203726903c.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=216119
Fixes: 087451f372bf ("drm/amdgpu: use generic fb helpers instead of setting up 
AMD own's.")
Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 25 -
 1 file changed, 4 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index c20922a5af9f..97fff4727724 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -1559,21 +1559,6 @@ bool amdgpu_crtc_get_scanout_position(struct drm_crtc 
*crtc,
  stime, etime, mode);
 }
 
-static bool
-amdgpu_display_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj)
-{
-   struct drm_device *dev = adev_to_drm(adev);
-   struct drm_fb_helper *fb_helper = dev->fb_helper;
-
-   if (!fb_helper || !fb_helper->buffer)
-   return false;
-
-   if (gem_to_amdgpu_bo(fb_helper->buffer->gem) != robj)
-   return false;
-
-   return true;
-}
-
 int amdgpu_display_suspend_helper(struct amdgpu_device *adev)
 {
struct drm_device *dev = adev_to_drm(adev);
@@ -1609,12 +1594,10 @@ int amdgpu_display_suspend_helper(struct amdgpu_device 
*adev)
continue;
}
robj = gem_to_amdgpu_bo(fb->obj[0]);
-   if (!amdgpu_display_robj_is_fb(adev, robj)) {
-   r = amdgpu_bo_reserve(robj, true);
-   if (r == 0) {
-   amdgpu_bo_unpin(robj);
-   amdgpu_bo_unreserve(robj);
-   }
+   r = amdgpu_bo_reserve(robj, true);
+   if (r == 0) {
+   amdgpu_bo_unpin(robj);
+   amdgpu_bo_unreserve(robj);
}
}
return 0;
-- 
2.34.1



[PATCH 1/4] Revert "drm/amdgpu/display: disable prefer_shadow for generic fb helpers"

2022-08-09 Thread Mario Limonciello
Switching to the generic fb helpers caused a regression for S4, but the
workaround for this regression on S4 caused another regression for S3 on
a WX3200.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=216119
Fixes: 3a4b1cc28fbd ("drm/amdgpu/display: disable prefer_shadow for generic fb 
helpers")
Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c  | 3 +--
 drivers/gpu/drm/amd/amdgpu/dce_v10_0.c| 3 +--
 drivers/gpu/drm/amd/amdgpu/dce_v11_0.c| 3 +--
 drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 3 +--
 drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 3 +--
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +--
 6 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
index 108e8e8a1a36..576849e95296 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
@@ -496,8 +496,7 @@ static int amdgpu_vkms_sw_init(void *handle)
adev_to_drm(adev)->mode_config.max_height = YRES_MAX;
 
adev_to_drm(adev)->mode_config.preferred_depth = 24;
-   /* disable prefer shadow for now due to hibernation issues */
-   adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+   adev_to_drm(adev)->mode_config.prefer_shadow = 1;
 
adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index 9c964cd3b5d4..288fce7dc0ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -2796,8 +2796,7 @@ static int dce_v10_0_sw_init(void *handle)
adev_to_drm(adev)->mode_config.max_height = 16384;
 
adev_to_drm(adev)->mode_config.preferred_depth = 24;
-   /* disable prefer shadow for now due to hibernation issues */
-   adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+   adev_to_drm(adev)->mode_config.prefer_shadow = 1;
 
adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index e0ad9f27dc3f..cbe5250b31cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -2914,8 +2914,7 @@ static int dce_v11_0_sw_init(void *handle)
adev_to_drm(adev)->mode_config.max_height = 16384;
 
adev_to_drm(adev)->mode_config.preferred_depth = 24;
-   /* disable prefer shadow for now due to hibernation issues */
-   adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+   adev_to_drm(adev)->mode_config.prefer_shadow = 1;
 
adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c 
b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index 77f5e998a120..b1c44fab074f 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -2673,8 +2673,7 @@ static int dce_v6_0_sw_init(void *handle)
adev_to_drm(adev)->mode_config.max_width = 16384;
adev_to_drm(adev)->mode_config.max_height = 16384;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
-   /* disable prefer shadow for now due to hibernation issues */
-   adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+   adev_to_drm(adev)->mode_config.prefer_shadow = 1;
adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 802e5c753271..572b9fa7298a 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -2693,8 +2693,7 @@ static int dce_v8_0_sw_init(void *handle)
adev_to_drm(adev)->mode_config.max_height = 16384;
 
adev_to_drm(adev)->mode_config.preferred_depth = 24;
-   /* disable prefer shadow for now due to hibernation issues */
-   adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+   adev_to_drm(adev)->mode_config.prefer_shadow = 1;
 
adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index ddcb7845f642..ee0cd2f279f2 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -3823,8 +3823,7 @@ static int amdgpu_dm_mode_config_init(struct 
amdgpu_device *adev)
adev_to_drm(adev)->mode_config.max_height = 16384;
 
adev_to_drm(adev)->mode_config.preferred_depth = 24;
-   /* disable prefer shadow for now due to hibernation issues */
-   adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+   adev_to_drm(adev)-&

[PATCH] drm/amdkfd: Select PCI_P2PDMA and DMABUF_MOVE_NOTIFY instead of depending

2022-07-07 Thread Mario Limonciello
By having dependencies on PCI_P2PDMA and DMABUF_MOVE_NOTIFY the option
HSA_AMD_P2P is not discoverable.  The kernel menu configuration hides
it if the dependencies aren't satisfied, making it harder to find.

Instead select these options when enabling HSA_AMD_P2P.  This way
distributions and users can just enable HSA_AMD_P2P.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdkfd/Kconfig | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig 
b/drivers/gpu/drm/amd/amdkfd/Kconfig
index 93bd4eda0d94..b153c26903ee 100644
--- a/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -28,7 +28,9 @@ config HSA_AMD_SVM
 
 config HSA_AMD_P2P
bool "HSA kernel driver support for peer-to-peer for AMD GPU devices"
-   depends on HSA_AMD && PCI_P2PDMA && DMABUF_MOVE_NOTIFY
+   depends on HSA_AMD
+   select PCI_P2PDMA
+   select DMABUF_MOVE_NOTIFY
help
  Enable peer-to-peer (P2P) communication between AMD GPUs over
  the PCIe bus. This can improve performance of multi-GPU compute
-- 
2.34.1



[PATCH v3 08/10] drm/amd: Use `pm_suspend_default_s2idle`

2022-06-30 Thread Mario Limonciello
Rather than examining the suspend target, examine what the system is
configured to use.  This should be no functional change, just improves
readability by taking the helper instead.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 2146232c62ef..fc2c6e311979 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -1056,7 +1056,7 @@ bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device 
*adev)
 {
if (adev->flags & AMD_IS_APU)
return false;
-   return pm_suspend_target_state != PM_SUSPEND_TO_IDLE;
+   return !pm_suspend_default_s2idle();
 }
 
 /**
@@ -1069,7 +1069,7 @@ bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device 
*adev)
 bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev)
 {
if (!(adev->flags & AMD_IS_APU) ||
-   (pm_suspend_target_state != PM_SUSPEND_TO_IDLE))
+   !pm_suspend_default_s2idle())
return false;
 
if (!pm_suspend_preferred_s2idle()) {
-- 
2.34.1



[PATCH v3 07/10] drm/amd: Use `pm_suspend_preferred_s2idle`

2022-06-30 Thread Mario Limonciello
Drop the direct check from the FADT and use the helper instead.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 98ac53ee6bb5..2146232c62ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -1072,7 +1072,7 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device 
*adev)
(pm_suspend_target_state != PM_SUSPEND_TO_IDLE))
return false;
 
-   if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)) {
+   if (!pm_suspend_preferred_s2idle()) {
dev_warn_once(adev->dev,
  "Power consumption will be higher as BIOS has not 
been configured for suspend-to-idle.\n"
  "To use suspend-to-idle change the sleep mode in 
BIOS setup.\n");
-- 
2.34.1



<    3   4   5   6   7   8   9   10   >