Re: [PATCH 3/4] drm/amdgpu: Use correct KIQ MEC engine for gfx9.4.3 (v4)
On 11/2/2023 8:34 PM, Victor Lu wrote: amdgpu_kiq_wreg/rreg is hardcoded to use MEC engine 0. Add an xcc_id parameter to amdgpu_kiq_wreg/rreg, define W/RREG32_XCC and amdgpu_device_xcc_wreg/rreg to to use the new xcc_id parameter. Using amdgpu_sriov_runtime to determine whether to access via kiq or RLC is sufficient for now. v4: avoid using amdgpu_sriov_w/rreg v3: use W/RREG32_XCC to handle non-kiq case v2: define amdgpu_device_xcc_wreg/rreg instead of changing parameters of amdgpu_device_wreg/rreg Signed-off-by: Victor Lu --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 13 ++- .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c | 2 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 91 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 8 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 4 + drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 8 +- 9 files changed, 118 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 43c579f5a95e..e8dc75a3ff44 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1162,11 +1162,18 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t acc_flags); u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev, u64 reg_addr); +uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev, + uint32_t reg, uint32_t acc_flags, + uint32_t xcc_id); void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags); void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev, u64 reg_addr, u32 reg_data); +void amdgpu_device_xcc_wreg(struct amdgpu_device *adev, + uint32_t reg, uint32_t v, + uint32_t acc_flags, + uint32_t xcc_id); void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id); void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value); @@ -1207,8 +1214,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ) #define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ) -#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg)) -#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v)) +#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg), 0) +#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v), 0) #define RREG8(reg) amdgpu_mm_rreg8(adev, (reg)) #define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v)) @@ -1218,6 +1225,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0) #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) +#define RREG32_XCC(reg, inst) amdgpu_device_xcc_rreg(adev, (reg), 0, inst) +#define WREG32_XCC(reg, v, inst) amdgpu_device_xcc_wreg(adev, (reg), (v), 0, inst) #define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg)) #define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v)) #define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c index 490c8f5ddb60..80309d39737a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c @@ -300,7 +300,7 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd, hqd_end = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_AQL_DISPATCH_ID_HI); for (reg = hqd_base; reg <= hqd_end; reg++) - WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); + WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst); /* Activate doorbell logic before triggering WPTR poll. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 51011e8ee90d..9285789b3a42 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -239,7 +239,7 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, for (reg = hqd_base; reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI); reg++) - WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); + WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst);
[PATCH 2/2] drm/amd/pm: Hide pp_dpm_pcie device attribute
Hide PCIe DPM attribute on SOCs with GC v9.4.2 and GC v9.4.3. Signed-off-by: Lijo Lazar --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 083048131bca..8f57c77a45dd 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2249,6 +2249,10 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ if (amdgpu_dpm_get_apu_thermal_limit(adev, &limit) == -EOPNOTSUPP) *states = ATTR_STATE_UNSUPPORTED; + } else if (DEVICE_ATTR_IS(pp_dpm_pcie)) { + if (gc_ver == IP_VERSION(9, 4, 2) || + gc_ver == IP_VERSION(9, 4, 3)) + *states = ATTR_STATE_UNSUPPORTED; } switch (gc_ver) { -- 2.25.1
[PATCH 1/2] drm/amd/pm: Hide irrelevant pm device attributes
Change return code to EOPNOTSUPP for unsupported functions. Use the error code information to hide sysfs nodes not valid for the SOC. Signed-off-by: Lijo Lazar --- drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 12 ++-- drivers/gpu/drm/amd/pm/amdgpu_pm.c| 12 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 4 ++-- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index aed635e2da9c..aed232d107b6 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -491,7 +491,7 @@ int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum amd_pp_sensors senso int amdgpu_dpm_get_apu_thermal_limit(struct amdgpu_device *adev, uint32_t *limit) { const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - int ret = -EINVAL; + int ret = -EOPNOTSUPP; if (pp_funcs && pp_funcs->get_apu_thermal_limit) { mutex_lock(&adev->pm.mutex); @@ -505,7 +505,7 @@ int amdgpu_dpm_get_apu_thermal_limit(struct amdgpu_device *adev, uint32_t *limit int amdgpu_dpm_set_apu_thermal_limit(struct amdgpu_device *adev, uint32_t limit) { const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - int ret = -EINVAL; + int ret = -EOPNOTSUPP; if (pp_funcs && pp_funcs->set_apu_thermal_limit) { mutex_lock(&adev->pm.mutex); @@ -1182,7 +1182,7 @@ int amdgpu_dpm_get_sclk_od(struct amdgpu_device *adev) int ret = 0; if (!pp_funcs->get_sclk_od) - return 0; + return -EOPNOTSUPP; mutex_lock(&adev->pm.mutex); ret = pp_funcs->get_sclk_od(adev->powerplay.pp_handle); @@ -1196,7 +1196,7 @@ int amdgpu_dpm_set_sclk_od(struct amdgpu_device *adev, uint32_t value) const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; if (is_support_sw_smu(adev)) - return 0; + return -EOPNOTSUPP; mutex_lock(&adev->pm.mutex); if (pp_funcs->set_sclk_od) @@ -1219,7 +1219,7 @@ int amdgpu_dpm_get_mclk_od(struct amdgpu_device *adev) int ret = 0; if (!pp_funcs->get_mclk_od) - return 0; + return -EOPNOTSUPP; mutex_lock(&adev->pm.mutex); ret = pp_funcs->get_mclk_od(adev->powerplay.pp_handle); @@ -1233,7 +1233,7 @@ int amdgpu_dpm_set_mclk_od(struct amdgpu_device *adev, uint32_t value) const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; if (is_support_sw_smu(adev)) - return 0; + return -EOPNOTSUPP; mutex_lock(&adev->pm.mutex); if (pp_funcs->set_mclk_od) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 6ad957aaef3c..083048131bca 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2237,6 +2237,18 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ } else if (DEVICE_ATTR_IS(xgmi_plpd_policy)) { if (amdgpu_dpm_get_xgmi_plpd_mode(adev, NULL) == XGMI_PLPD_NONE) *states = ATTR_STATE_UNSUPPORTED; + } else if (DEVICE_ATTR_IS(pp_dpm_mclk_od)) { + if (amdgpu_dpm_get_mclk_od(adev) == -EOPNOTSUPP) + *states = ATTR_STATE_UNSUPPORTED; + } else if (DEVICE_ATTR_IS(pp_dpm_sclk_od)) { + if (amdgpu_dpm_get_sclk_od(adev) == -EOPNOTSUPP) + *states = ATTR_STATE_UNSUPPORTED; + } else if (DEVICE_ATTR_IS(apu_thermal_cap)) { + u32 limit; + + if (amdgpu_dpm_get_apu_thermal_limit(adev, &limit) == + -EOPNOTSUPP) + *states = ATTR_STATE_UNSUPPORTED; } switch (gc_ver) { diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 7fe32cdea5a8..6d6221024d7e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -2747,7 +2747,7 @@ static int smu_read_sensor(void *handle, static int smu_get_apu_thermal_limit(void *handle, uint32_t *limit) { - int ret = -EINVAL; + int ret = -EOPNOTSUPP; struct smu_context *smu = handle; if (smu->ppt_funcs && smu->ppt_funcs->get_apu_thermal_limit) @@ -2758,7 +2758,7 @@ static int smu_get_apu_thermal_limit(void *handle, uint32_t *limit) static int smu_set_apu_thermal_limit(void *handle, uint32_t limit) { - int ret = -EINVAL; + int ret = -EOPNOTSUPP; struct smu_context *smu = handle; if (smu->ppt_funcs && smu->ppt_funcs->set_apu_thermal_limit) -- 2.25.1
[PATCH] drm/amdgpu: Enable MES to handle doorbell ring on unmapped queue
On navi4x and up, HW can monitor up to 2048 doorbells that not be mapped currently and trigger the interrupt to MES when these unmapped doorbell been ringed. Signed-off-by: shaoyunl --- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 24 1 file changed, 24 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index ac41c649caa0..eac34ed1a504 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -455,6 +455,27 @@ static void mes_v12_0_init_aggregated_doorbell(struct amdgpu_mes *mes) WREG32_SOC15(GC, 0, regCP_HQD_GFX_CONTROL, data); } + +static void mes_v12_0_enable_unmapped_doorbell_handling( + struct amdgpu_mes *mes, bool enable) +{ + struct amdgpu_device *adev = mes->adev; + uint32_t data = RREG32_SOC15(GC, 0, regCP_UNMAPPED_DOORBELL); + + /* +* The default PROC_LSB settng is 0xc which means doorbell +* addr[16:12] gives the doorbell page number. For kfd, each +* process will use 2 pages of doorbell, we need to change the +* setting to 0xd +*/ + data &= ~CP_UNMAPPED_DOORBELL__PROC_LSB_MASK; + data |= 0xd << CP_UNMAPPED_DOORBELL__PROC_LSB__SHIFT; + + data |= (enable ? 1 : 0) << CP_UNMAPPED_DOORBELL__ENABLE__SHIFT; + + WREG32_SOC15(GC, 0, regCP_UNMAPPED_DOORBELL, data); +} + static const struct amdgpu_mes_funcs mes_v12_0_funcs = { .add_hw_queue = mes_v12_0_add_hw_queue, .remove_hw_queue = mes_v12_0_remove_hw_queue, @@ -1235,6 +1256,9 @@ static int mes_v12_0_hw_init(void *handle) mes_v12_0_init_aggregated_doorbell(&adev->mes); + /* Enable the MES to handle doorbell ring on unmapped queue */ + mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true); + r = mes_v12_0_query_sched_status(&adev->mes); if (r) { DRM_ERROR("MES is busy\n"); -- 2.34.1
Re: mainline build failure due to 7966f319c66d ("drm/amd/display: Introduce DML2")
On Thu, Nov 2, 2023 at 1:07 PM Sudip Mukherjee wrote: > > On Thu, 2 Nov 2023 at 16:52, Alex Deucher wrote: > > > > On Thu, Nov 2, 2023 at 5:32 AM Sudip Mukherjee (Codethink) > > wrote: > > > > > > Hi All, > > > > > > The latest mainline kernel branch fails to build x86_64 allmodconfig > > > with the error: > > > > > > drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c: In > > > function 'dml_prefetch_check': > > > drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6707:1: > > > error: the frame size of 2056 bytes is larger than 2048 bytes > > > [-Werror=frame-larger-than=] > > > 6707 | } > > > | ^ > > > > > > git bisect pointed to 7966f319c66d ("drm/amd/display: Introduce DML2") > > > > > > I will be happy to test any patch or provide any extra log if needed. > > > > This was reported earlier and fixed by: > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=089dbf6a06f1dcaeed4f8b86d619e8d28b235207 > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=b141fa036c901303ca5659cc22e9c08f8b097892 > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=5b2c54e0d0ea09f7a3b500510731878326e1117e > > but I guess maybe different compiler versions are still hitting this. > > Yes, I should have mentioned. gcc-11 and gcc-12 failed to build. but > gcc-13 was ok. Should be fixed with Nathan's patch: https://patchwork.freedesktop.org/patch/565675/ Alex
Re: [PATCH] drm/edid: add a quirk for two 240Hz Samsung monitors
On Thu, Nov 2, 2023 at 3:00 PM Hamza Mahfooz wrote: > > On 11/1/23 17:36, Alex Deucher wrote: > > On Wed, Nov 1, 2023 at 5:01 PM Hamza Mahfooz wrote: > >> > >> Without this fix the 5120x1440@240 timing of these monitors > >> leads to screen flickering. > >> > >> Cc: sta...@vger.kernel.org # 6.1+ > >> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1442 > >> Co-developed-by: Harry Wentland > >> Signed-off-by: Harry Wentland > >> Signed-off-by: Hamza Mahfooz > >> --- > >> drivers/gpu/drm/drm_edid.c | 47 +++--- > >> 1 file changed, 44 insertions(+), 3 deletions(-) > >> > >> diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c > >> index bca2af4fe1fc..3fdb8907f66b 100644 > >> --- a/drivers/gpu/drm/drm_edid.c > >> +++ b/drivers/gpu/drm/drm_edid.c > >> @@ -89,6 +89,8 @@ static int oui(u8 first, u8 second, u8 third) > >> #define EDID_QUIRK_NON_DESKTOP (1 << 12) > >> /* Cap the DSC target bitrate to 15bpp */ > >> #define EDID_QUIRK_CAP_DSC_15BPP (1 << 13) > >> +/* Fix up a particular 5120x1440@240Hz timing */ > >> +#define EDID_QUIRK_FIXUP_5120_1440_240 (1 << 14) > > > > What is wrong with the original timing that needs to be fixed? > > Apparently, all of timing values for the 5120x1440@240 mode of these > monitors aren't set correctly (they are all lower than they should be) > in their EDIDs. For what it's worth, the windows driver has had a quirk > similar the one proposed in this patch for ~2 years. It would be good to at least include the original mode timings from the EDID and the new ones added by the quirk in the commit message and a description of why they are problematic and why the new ones work. Alex > > > > > Alex > > > > > >> > >> #define MICROSOFT_IEEE_OUI 0xca125c > >> > >> @@ -170,6 +172,12 @@ static const struct edid_quirk { > >> EDID_QUIRK('S', 'A', 'M', 596, EDID_QUIRK_PREFER_LARGE_60), > >> EDID_QUIRK('S', 'A', 'M', 638, EDID_QUIRK_PREFER_LARGE_60), > >> > >> + /* Samsung C49G95T */ > >> + EDID_QUIRK('S', 'A', 'M', 0x7053, EDID_QUIRK_FIXUP_5120_1440_240), > >> + > >> + /* Samsung S49AG95 */ > >> + EDID_QUIRK('S', 'A', 'M', 0x71ac, EDID_QUIRK_FIXUP_5120_1440_240), > >> + > >> /* Sony PVM-2541A does up to 12 bpc, but only reports max 8 bpc */ > >> EDID_QUIRK('S', 'N', 'Y', 0x2541, EDID_QUIRK_FORCE_12BPC), > >> > >> @@ -6586,7 +6594,37 @@ static void update_display_info(struct > >> drm_connector *connector, > >> drm_edid_to_eld(connector, drm_edid); > >> } > >> > >> -static struct drm_display_mode *drm_mode_displayid_detailed(struct > >> drm_device *dev, > >> +static void drm_mode_displayid_detailed_edid_quirks(struct drm_connector > >> *connector, > >> + struct > >> drm_display_mode *mode) > >> +{ > >> + unsigned int hsync_width; > >> + unsigned int vsync_width; > >> + > >> + if (connector->display_info.quirks & > >> EDID_QUIRK_FIXUP_5120_1440_240) { > >> + if (mode->hdisplay == 5120 && mode->vdisplay == 1440 && > >> + mode->clock == 1939490) { > >> + hsync_width = mode->hsync_end - mode->hsync_start; > >> + vsync_width = mode->vsync_end - mode->vsync_start; > >> + > >> + mode->clock = 2018490; > >> + mode->hdisplay = 5120; > >> + mode->hsync_start = 5120 + 8; > >> + mode->hsync_end = 5120 + 8 + hsync_width; > >> + mode->htotal = 5200; > >> + > >> + mode->vdisplay = 1440; > >> + mode->vsync_start = 1440 + 165; > >> + mode->vsync_end = 1440 + 165 + vsync_width; > >> + mode->vtotal = 1619; > >> + > >> + drm_dbg_kms(connector->dev, > >> + "[CONNECTOR:%d:%s] Samsung 240Hz mode > >> quirk applied\n", > >> + connector->base.id, connector->name); > >> + } > >> + } > >> +} > >> + > >> +static struct drm_display_mode *drm_mode_displayid_detailed(struct > >> drm_connector *connector, > >> struct > >> displayid_detailed_timings_1 *timings, > >> bool type_7) > >> { > >> @@ -6605,7 +6643,7 @@ static struct drm_display_mode > >> *drm_mode_displayid_detailed(struct drm_device *d > >> bool hsync_positive = (timings->hsync[1] >> 7) & 0x1; > >> bool vsync_positive = (timings->vsync[1] >> 7) & 0x1; > >> > >> - mode = drm_mode_create(dev); > >> + mode = drm_mode_create(connector->dev); > >> if (!mode) > >> return NULL; > >> > >> @@ -6628,6 +,9 @@ static struct drm_display_mode > >> *drm_mode_displa
Re: [PATCH] drm/edid: add a quirk for two 240Hz Samsung monitors
On 11/1/23 17:36, Alex Deucher wrote: On Wed, Nov 1, 2023 at 5:01 PM Hamza Mahfooz wrote: Without this fix the 5120x1440@240 timing of these monitors leads to screen flickering. Cc: sta...@vger.kernel.org # 6.1+ Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1442 Co-developed-by: Harry Wentland Signed-off-by: Harry Wentland Signed-off-by: Hamza Mahfooz --- drivers/gpu/drm/drm_edid.c | 47 +++--- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index bca2af4fe1fc..3fdb8907f66b 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -89,6 +89,8 @@ static int oui(u8 first, u8 second, u8 third) #define EDID_QUIRK_NON_DESKTOP (1 << 12) /* Cap the DSC target bitrate to 15bpp */ #define EDID_QUIRK_CAP_DSC_15BPP (1 << 13) +/* Fix up a particular 5120x1440@240Hz timing */ +#define EDID_QUIRK_FIXUP_5120_1440_240 (1 << 14) What is wrong with the original timing that needs to be fixed? Apparently, all of timing values for the 5120x1440@240 mode of these monitors aren't set correctly (they are all lower than they should be) in their EDIDs. For what it's worth, the windows driver has had a quirk similar the one proposed in this patch for ~2 years. Alex #define MICROSOFT_IEEE_OUI 0xca125c @@ -170,6 +172,12 @@ static const struct edid_quirk { EDID_QUIRK('S', 'A', 'M', 596, EDID_QUIRK_PREFER_LARGE_60), EDID_QUIRK('S', 'A', 'M', 638, EDID_QUIRK_PREFER_LARGE_60), + /* Samsung C49G95T */ + EDID_QUIRK('S', 'A', 'M', 0x7053, EDID_QUIRK_FIXUP_5120_1440_240), + + /* Samsung S49AG95 */ + EDID_QUIRK('S', 'A', 'M', 0x71ac, EDID_QUIRK_FIXUP_5120_1440_240), + /* Sony PVM-2541A does up to 12 bpc, but only reports max 8 bpc */ EDID_QUIRK('S', 'N', 'Y', 0x2541, EDID_QUIRK_FORCE_12BPC), @@ -6586,7 +6594,37 @@ static void update_display_info(struct drm_connector *connector, drm_edid_to_eld(connector, drm_edid); } -static struct drm_display_mode *drm_mode_displayid_detailed(struct drm_device *dev, +static void drm_mode_displayid_detailed_edid_quirks(struct drm_connector *connector, + struct drm_display_mode *mode) +{ + unsigned int hsync_width; + unsigned int vsync_width; + + if (connector->display_info.quirks & EDID_QUIRK_FIXUP_5120_1440_240) { + if (mode->hdisplay == 5120 && mode->vdisplay == 1440 && + mode->clock == 1939490) { + hsync_width = mode->hsync_end - mode->hsync_start; + vsync_width = mode->vsync_end - mode->vsync_start; + + mode->clock = 2018490; + mode->hdisplay = 5120; + mode->hsync_start = 5120 + 8; + mode->hsync_end = 5120 + 8 + hsync_width; + mode->htotal = 5200; + + mode->vdisplay = 1440; + mode->vsync_start = 1440 + 165; + mode->vsync_end = 1440 + 165 + vsync_width; + mode->vtotal = 1619; + + drm_dbg_kms(connector->dev, + "[CONNECTOR:%d:%s] Samsung 240Hz mode quirk applied\n", + connector->base.id, connector->name); + } + } +} + +static struct drm_display_mode *drm_mode_displayid_detailed(struct drm_connector *connector, struct displayid_detailed_timings_1 *timings, bool type_7) { @@ -6605,7 +6643,7 @@ static struct drm_display_mode *drm_mode_displayid_detailed(struct drm_device *d bool hsync_positive = (timings->hsync[1] >> 7) & 0x1; bool vsync_positive = (timings->vsync[1] >> 7) & 0x1; - mode = drm_mode_create(dev); + mode = drm_mode_create(connector->dev); if (!mode) return NULL; @@ -6628,6 +,9 @@ static struct drm_display_mode *drm_mode_displayid_detailed(struct drm_device *d if (timings->flags & 0x80) mode->type |= DRM_MODE_TYPE_PREFERRED; + + drm_mode_displayid_detailed_edid_quirks(connector, mode); + drm_mode_set_name(mode); return mode; @@ -6650,7 +6691,7 @@ static int add_displayid_detailed_1_modes(struct drm_connector *connector, for (i = 0; i < num_timings; i++) { struct displayid_detailed_timings_1 *timings = &det->timings[i]; - newmode = drm_mode_displayid_detailed(connector->dev, timings, type_7); + newmode = drm_mode_displayid_detailed(connector, timings, type_7); if (!newmode) continue; -- 2.42.0 -- Hamza
Re: [PATCH v2] drm/amd/display: Increase frame warning limit with KASAN or KCSAN in dml2
On Thu, Nov 2, 2023 at 1:41 PM Nathan Chancellor wrote: > > When building ARCH=x86_64 allmodconfig with clang, which will typically > have sanitizers enabled, there is a warning about a large stack frame. > > drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6265:13: > error: stack frame size (2520) exceeds limit (2048) in 'dml_prefetch_check' > [-Werror,-Wframe-larger-than] >6265 | static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) > | ^ > 1 error generated. > > Notably, GCC 13.2.0 does not do too much of a better job, as it is right > at the current limit of 2048 (and others have reported being over with > older GCC versions): > > drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c: In > function 'dml_prefetch_check': > drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6705:1: > error: the frame size of 2048 bytes is larger than 1800 bytes > [-Werror=frame-larger-than=] >6705 | } > | ^ > > In the past, these warnings have been avoided by reducing the number of > parameters to various functions so that not as many arguments need to be > passed on the stack. However, these patches take a good amount of effort > to write despite being mechanical due to code structure and complexity > and they are never carried forward to new generations of the code so > that effort has to be expended every new hardware generation, which > becomes harder to justify as time goes on. > > To avoid having a noticeable or lengthy breakage in all{mod,yes}config, > which are easy testing targets that have -Werror enabled, increase the > limit for configurations that have KASAN or KCSAN enabled by 50% so that > cases of extremely poor code generation can still be caught while not > breaking the majority of builds. CONFIG_KMSAN also causes high stack > usage but the frame limit is already set to zero when it is enabled, > which is accounted for by the check for CONFIG_FRAME_WARN=0 in the dml2 > Makefile. > > Signed-off-by: Nathan Chancellor > --- > If there is another DRM pull before 6.7-rc1, it would be much > appreciated if this could make that so that other trees are not > potentially broken by this. If not, no worries, as it was my fault for > not sending this sooner. Applied. Thanks! Will send out a PR this week. Alex > > Changes in v2: > - Adjust workaround to check for either CONFIG_KASAN=y or > CONFIG_KCSAN=y, as the same problem has been reported with older > versions of GCC (Hamza, Alex) > - Link to v1: > https://lore.kernel.org/r/20231102-amdgpu-dml2-increase-frame-size-warning-for-clang-v1-1-6eb157352...@kernel.org > --- > drivers/gpu/drm/amd/display/dc/dml2/Makefile | 4 > 1 file changed, 4 insertions(+) > > diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile > b/drivers/gpu/drm/amd/display/dc/dml2/Makefile > index 70ae5eba624e..acff3449b8d7 100644 > --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile > +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile > @@ -60,8 +60,12 @@ endif > endif > > ifneq ($(CONFIG_FRAME_WARN),0) > +ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) > +frame_warn_flag := -Wframe-larger-than=3072 > +else > frame_warn_flag := -Wframe-larger-than=2048 > endif > +endif > > CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_core.o := $(dml2_ccflags) > $(frame_warn_flag) > CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_util.o := $(dml2_ccflags) > > --- > base-commit: 21e80f3841c01aeaf32d7aee7bbc87b3db1aa0c6 > change-id: > 20231102-amdgpu-dml2-increase-frame-size-warning-for-clang-c93bd2d6a871 > > Best regards, > -- > Nathan Chancellor >
Re: [PATCH] drm/amd/display: Increase frame warning limit for clang in dml2
On Thu, Nov 2, 2023 at 1:12 PM Nathan Chancellor wrote: > > On Thu, Nov 02, 2023 at 12:59:00PM -0400, Hamza Mahfooz wrote: > > On 11/2/23 12:24, Nathan Chancellor wrote: > > > When building ARCH=x86_64 allmodconfig with clang, which have sanitizers > > > enabled, there is a warning about a large stack frame. > > > > > > > > > drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6265:13: > > > error: stack frame size (2520) exceeds limit (2048) in > > > 'dml_prefetch_check' [-Werror,-Wframe-larger-than] > > > 6265 | static void dml_prefetch_check(struct display_mode_lib_st > > > *mode_lib) > > > | ^ > > >1 error generated. > > > > > > Notably, GCC 13.2.0 does not do too much of a better job, as it is right > > > at the current limit of 2048: > > > > > >drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c: In > > > function 'dml_prefetch_check': > > > > > > drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6705:1: > > > error: the frame size of 2048 bytes is larger than 1800 bytes > > > [-Werror=frame-larger-than=] > > > 6705 | } > > > | ^ > > > > > > In the past, these warnings have been avoided by reducing the number of > > > parameters to various functions so that not as many arguments need to be > > > passed on the stack. However, these patches take a good amount of effort > > > to write despite being mechanical due to code structure and complexity > > > and they are never carried forward to new generations of the code so > > > that effort has to be expended every new hardware generation, which > > > becomes harder to justify as time goes on. > > > > > > There is some effort to improve clang's code generation but that may > > > take some time between code review, shifting priorities, and release > > > cycles. To avoid having a noticeable or lengthy breakage in > > > all{mod,yes}config, which are easy testing targets that have -Werror > > > enabled, increase the limit for clang by 50% so that cases of extremely > > > poor code generation can still be caught while not breaking the majority > > > of builds. When clang's code generation improves, the limit increase can > > > be restricted to older clang versions. > > > > > > Signed-off-by: Nathan Chancellor > > > --- > > > If there is another DRM pull before 6.7-rc1, it would be much > > > appreciated if this could make that so that other trees are not > > > potentially broken by this. If not, no worries, as it was my fault for > > > not sending this sooner. > > > --- > > > drivers/gpu/drm/amd/display/dc/dml2/Makefile | 2 +- > > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > > > diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile > > > b/drivers/gpu/drm/amd/display/dc/dml2/Makefile > > > index 70ae5eba624e..dff8237c0999 100644 > > > --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile > > > +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile > > > @@ -60,7 +60,7 @@ endif > > > endif > > > ifneq ($(CONFIG_FRAME_WARN),0) > > > -frame_warn_flag := -Wframe-larger-than=2048 > > > +frame_warn_flag := -Wframe-larger-than=$(if > > > $(CONFIG_CC_IS_CLANG),3072,2048) > > > > I would prefer checking for `CONFIG_KASAN || CONFIG_KCSAN` instead > > since the stack usage shouldn't change much if both of those are disabled. > > So something like this? Or were you talking about replacing the clang > check entirely with the KASAN/KCSAN check? I think replacing the clang check entirely. A similar issue was just reported on different GCC versions: https://lists.freedesktop.org/archives/amd-gfx/2023-November/100725.html Alex > > diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile > b/drivers/gpu/drm/amd/display/dc/dml2/Makefile > index 70ae5eba624e..0fc1b13295eb 100644 > --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile > +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile > @@ -60,8 +60,12 @@ endif > endif > > ifneq ($(CONFIG_FRAME_WARN),0) > +ifeq ($(CONFIG_CC_IS_CLANG)$(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),yy) > +frame_warn_flag := -Wframe-larger-than=3072 > +else > frame_warn_flag := -Wframe-larger-than=2048 > endif > +endif > > CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_core.o := $(dml2_ccflags) > $(frame_warn_flag) > CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_util.o := $(dml2_ccflags) > > > > endif > > > CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_core.o := $(dml2_ccflags) > > > $(frame_warn_flag) > > > > > > --- > > > base-commit: 21e80f3841c01aeaf32d7aee7bbc87b3db1aa0c6 > > > change-id: > > > 20231102-amdgpu-dml2-increase-frame-size-warning-for-clang-c93bd2d6a871 > > > > > > Best regards, > > -- > > Hamza > >
[PATCH v2] drm/amd/display: Increase frame warning limit with KASAN or KCSAN in dml2
When building ARCH=x86_64 allmodconfig with clang, which will typically have sanitizers enabled, there is a warning about a large stack frame. drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6265:13: error: stack frame size (2520) exceeds limit (2048) in 'dml_prefetch_check' [-Werror,-Wframe-larger-than] 6265 | static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) | ^ 1 error generated. Notably, GCC 13.2.0 does not do too much of a better job, as it is right at the current limit of 2048 (and others have reported being over with older GCC versions): drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c: In function 'dml_prefetch_check': drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6705:1: error: the frame size of 2048 bytes is larger than 1800 bytes [-Werror=frame-larger-than=] 6705 | } | ^ In the past, these warnings have been avoided by reducing the number of parameters to various functions so that not as many arguments need to be passed on the stack. However, these patches take a good amount of effort to write despite being mechanical due to code structure and complexity and they are never carried forward to new generations of the code so that effort has to be expended every new hardware generation, which becomes harder to justify as time goes on. To avoid having a noticeable or lengthy breakage in all{mod,yes}config, which are easy testing targets that have -Werror enabled, increase the limit for configurations that have KASAN or KCSAN enabled by 50% so that cases of extremely poor code generation can still be caught while not breaking the majority of builds. CONFIG_KMSAN also causes high stack usage but the frame limit is already set to zero when it is enabled, which is accounted for by the check for CONFIG_FRAME_WARN=0 in the dml2 Makefile. Signed-off-by: Nathan Chancellor --- If there is another DRM pull before 6.7-rc1, it would be much appreciated if this could make that so that other trees are not potentially broken by this. If not, no worries, as it was my fault for not sending this sooner. Changes in v2: - Adjust workaround to check for either CONFIG_KASAN=y or CONFIG_KCSAN=y, as the same problem has been reported with older versions of GCC (Hamza, Alex) - Link to v1: https://lore.kernel.org/r/20231102-amdgpu-dml2-increase-frame-size-warning-for-clang-v1-1-6eb157352...@kernel.org --- drivers/gpu/drm/amd/display/dc/dml2/Makefile | 4 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile index 70ae5eba624e..acff3449b8d7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile @@ -60,8 +60,12 @@ endif endif ifneq ($(CONFIG_FRAME_WARN),0) +ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) +frame_warn_flag := -Wframe-larger-than=3072 +else frame_warn_flag := -Wframe-larger-than=2048 endif +endif CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_core.o := $(dml2_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_util.o := $(dml2_ccflags) --- base-commit: 21e80f3841c01aeaf32d7aee7bbc87b3db1aa0c6 change-id: 20231102-amdgpu-dml2-increase-frame-size-warning-for-clang-c93bd2d6a871 Best regards, -- Nathan Chancellor
Re: [PATCH] drm/amd/display: Increase frame warning limit for clang in dml2
On 11/2/23 13:12, Nathan Chancellor wrote: On Thu, Nov 02, 2023 at 12:59:00PM -0400, Hamza Mahfooz wrote: On 11/2/23 12:24, Nathan Chancellor wrote: When building ARCH=x86_64 allmodconfig with clang, which have sanitizers enabled, there is a warning about a large stack frame. drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6265:13: error: stack frame size (2520) exceeds limit (2048) in 'dml_prefetch_check' [-Werror,-Wframe-larger-than] 6265 | static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) | ^ 1 error generated. Notably, GCC 13.2.0 does not do too much of a better job, as it is right at the current limit of 2048: drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c: In function 'dml_prefetch_check': drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6705:1: error: the frame size of 2048 bytes is larger than 1800 bytes [-Werror=frame-larger-than=] 6705 | } | ^ In the past, these warnings have been avoided by reducing the number of parameters to various functions so that not as many arguments need to be passed on the stack. However, these patches take a good amount of effort to write despite being mechanical due to code structure and complexity and they are never carried forward to new generations of the code so that effort has to be expended every new hardware generation, which becomes harder to justify as time goes on. There is some effort to improve clang's code generation but that may take some time between code review, shifting priorities, and release cycles. To avoid having a noticeable or lengthy breakage in all{mod,yes}config, which are easy testing targets that have -Werror enabled, increase the limit for clang by 50% so that cases of extremely poor code generation can still be caught while not breaking the majority of builds. When clang's code generation improves, the limit increase can be restricted to older clang versions. Signed-off-by: Nathan Chancellor --- If there is another DRM pull before 6.7-rc1, it would be much appreciated if this could make that so that other trees are not potentially broken by this. If not, no worries, as it was my fault for not sending this sooner. --- drivers/gpu/drm/amd/display/dc/dml2/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile index 70ae5eba624e..dff8237c0999 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile @@ -60,7 +60,7 @@ endif endif ifneq ($(CONFIG_FRAME_WARN),0) -frame_warn_flag := -Wframe-larger-than=2048 +frame_warn_flag := -Wframe-larger-than=$(if $(CONFIG_CC_IS_CLANG),3072,2048) I would prefer checking for `CONFIG_KASAN || CONFIG_KCSAN` instead since the stack usage shouldn't change much if both of those are disabled. So something like this? Or were you talking about replacing the clang check entirely with the KASAN/KCSAN check? I think for the time being replacing the clang check with a KASAN/KCSAN check would make more sense. Considering that, the allmodconfig for older versions of gcc is also broken (see [1]). 1. https://lore.kernel.org/amd-gfx/CADVatmO9NCs=ryng72hnzmdpqg862gpgnnfhq4uwtpekjok...@mail.gmail.com/ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile index 70ae5eba624e..0fc1b13295eb 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile @@ -60,8 +60,12 @@ endif endif ifneq ($(CONFIG_FRAME_WARN),0) +ifeq ($(CONFIG_CC_IS_CLANG)$(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),yy) +frame_warn_flag := -Wframe-larger-than=3072 +else frame_warn_flag := -Wframe-larger-than=2048 endif +endif CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_core.o := $(dml2_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_util.o := $(dml2_ccflags) endif CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_core.o := $(dml2_ccflags) $(frame_warn_flag) --- base-commit: 21e80f3841c01aeaf32d7aee7bbc87b3db1aa0c6 change-id: 20231102-amdgpu-dml2-increase-frame-size-warning-for-clang-c93bd2d6a871 Best regards, -- Hamza -- Hamza
[PATCH 3/4] drm/amdgpu: Use correct KIQ MEC engine for gfx9.4.3 (v4)
amdgpu_kiq_wreg/rreg is hardcoded to use MEC engine 0. Add an xcc_id parameter to amdgpu_kiq_wreg/rreg, define W/RREG32_XCC and amdgpu_device_xcc_wreg/rreg to to use the new xcc_id parameter. Using amdgpu_sriov_runtime to determine whether to access via kiq or RLC is sufficient for now. v4: avoid using amdgpu_sriov_w/rreg v3: use W/RREG32_XCC to handle non-kiq case v2: define amdgpu_device_xcc_wreg/rreg instead of changing parameters of amdgpu_device_wreg/rreg Signed-off-by: Victor Lu --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 13 ++- .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c | 2 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 91 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 8 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 4 + drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 8 +- 9 files changed, 118 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 43c579f5a95e..e8dc75a3ff44 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1162,11 +1162,18 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t acc_flags); u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev, u64 reg_addr); +uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev, + uint32_t reg, uint32_t acc_flags, + uint32_t xcc_id); void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags); void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev, u64 reg_addr, u32 reg_data); +void amdgpu_device_xcc_wreg(struct amdgpu_device *adev, + uint32_t reg, uint32_t v, + uint32_t acc_flags, + uint32_t xcc_id); void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id); void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value); @@ -1207,8 +1214,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ) #define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ) -#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg)) -#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v)) +#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg), 0) +#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v), 0) #define RREG8(reg) amdgpu_mm_rreg8(adev, (reg)) #define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v)) @@ -1218,6 +1225,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0) #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) +#define RREG32_XCC(reg, inst) amdgpu_device_xcc_rreg(adev, (reg), 0, inst) +#define WREG32_XCC(reg, v, inst) amdgpu_device_xcc_wreg(adev, (reg), (v), 0, inst) #define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg)) #define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v)) #define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c index 490c8f5ddb60..80309d39737a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c @@ -300,7 +300,7 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd, hqd_end = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_AQL_DISPATCH_ID_HI); for (reg = hqd_base; reg <= hqd_end; reg++) - WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); + WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst); /* Activate doorbell logic before triggering WPTR poll. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 51011e8ee90d..9285789b3a42 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -239,7 +239,7 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, for (reg = hqd_base; reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI); reg++) - WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); + WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst); /* Activate doorbell logic before triggering WPTR poll. */ dif
Re: [PATCH] drm/amd/display: Increase frame warning limit for clang in dml2
On Thu, Nov 02, 2023 at 12:59:00PM -0400, Hamza Mahfooz wrote: > On 11/2/23 12:24, Nathan Chancellor wrote: > > When building ARCH=x86_64 allmodconfig with clang, which have sanitizers > > enabled, there is a warning about a large stack frame. > > > > > > drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6265:13: > > error: stack frame size (2520) exceeds limit (2048) in 'dml_prefetch_check' > > [-Werror,-Wframe-larger-than] > > 6265 | static void dml_prefetch_check(struct display_mode_lib_st > > *mode_lib) > > | ^ > >1 error generated. > > > > Notably, GCC 13.2.0 does not do too much of a better job, as it is right > > at the current limit of 2048: > > > >drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c: In > > function 'dml_prefetch_check': > > > > drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6705:1: > > error: the frame size of 2048 bytes is larger than 1800 bytes > > [-Werror=frame-larger-than=] > > 6705 | } > > | ^ > > > > In the past, these warnings have been avoided by reducing the number of > > parameters to various functions so that not as many arguments need to be > > passed on the stack. However, these patches take a good amount of effort > > to write despite being mechanical due to code structure and complexity > > and they are never carried forward to new generations of the code so > > that effort has to be expended every new hardware generation, which > > becomes harder to justify as time goes on. > > > > There is some effort to improve clang's code generation but that may > > take some time between code review, shifting priorities, and release > > cycles. To avoid having a noticeable or lengthy breakage in > > all{mod,yes}config, which are easy testing targets that have -Werror > > enabled, increase the limit for clang by 50% so that cases of extremely > > poor code generation can still be caught while not breaking the majority > > of builds. When clang's code generation improves, the limit increase can > > be restricted to older clang versions. > > > > Signed-off-by: Nathan Chancellor > > --- > > If there is another DRM pull before 6.7-rc1, it would be much > > appreciated if this could make that so that other trees are not > > potentially broken by this. If not, no worries, as it was my fault for > > not sending this sooner. > > --- > > drivers/gpu/drm/amd/display/dc/dml2/Makefile | 2 +- > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile > > b/drivers/gpu/drm/amd/display/dc/dml2/Makefile > > index 70ae5eba624e..dff8237c0999 100644 > > --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile > > +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile > > @@ -60,7 +60,7 @@ endif > > endif > > ifneq ($(CONFIG_FRAME_WARN),0) > > -frame_warn_flag := -Wframe-larger-than=2048 > > +frame_warn_flag := -Wframe-larger-than=$(if > > $(CONFIG_CC_IS_CLANG),3072,2048) > > I would prefer checking for `CONFIG_KASAN || CONFIG_KCSAN` instead > since the stack usage shouldn't change much if both of those are disabled. So something like this? Or were you talking about replacing the clang check entirely with the KASAN/KCSAN check? diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile index 70ae5eba624e..0fc1b13295eb 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile @@ -60,8 +60,12 @@ endif endif ifneq ($(CONFIG_FRAME_WARN),0) +ifeq ($(CONFIG_CC_IS_CLANG)$(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),yy) +frame_warn_flag := -Wframe-larger-than=3072 +else frame_warn_flag := -Wframe-larger-than=2048 endif +endif CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_core.o := $(dml2_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_util.o := $(dml2_ccflags) > > endif > > CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_core.o := $(dml2_ccflags) > > $(frame_warn_flag) > > > > --- > > base-commit: 21e80f3841c01aeaf32d7aee7bbc87b3db1aa0c6 > > change-id: > > 20231102-amdgpu-dml2-increase-frame-size-warning-for-clang-c93bd2d6a871 > > > > Best regards, > -- > Hamza >
Re: mainline build failure due to 7966f319c66d ("drm/amd/display: Introduce DML2")
On Thu, 2 Nov 2023 at 16:52, Alex Deucher wrote: > > On Thu, Nov 2, 2023 at 5:32 AM Sudip Mukherjee (Codethink) > wrote: > > > > Hi All, > > > > The latest mainline kernel branch fails to build x86_64 allmodconfig > > with the error: > > > > drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c: In > > function 'dml_prefetch_check': > > drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6707:1: > > error: the frame size of 2056 bytes is larger than 2048 bytes > > [-Werror=frame-larger-than=] > > 6707 | } > > | ^ > > > > git bisect pointed to 7966f319c66d ("drm/amd/display: Introduce DML2") > > > > I will be happy to test any patch or provide any extra log if needed. > > This was reported earlier and fixed by: > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=089dbf6a06f1dcaeed4f8b86d619e8d28b235207 > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=b141fa036c901303ca5659cc22e9c08f8b097892 > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=5b2c54e0d0ea09f7a3b500510731878326e1117e > but I guess maybe different compiler versions are still hitting this. Yes, I should have mentioned. gcc-11 and gcc-12 failed to build. but gcc-13 was ok. -- Regards Sudip
Re: mainline build failure due to 7966f319c66d ("drm/amd/display: Introduce DML2")
On Thu, Nov 2, 2023 at 5:32 AM Sudip Mukherjee (Codethink) wrote: > > Hi All, > > The latest mainline kernel branch fails to build x86_64 allmodconfig > with the error: > > drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c: In > function 'dml_prefetch_check': > drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6707:1: > error: the frame size of 2056 bytes is larger than 2048 bytes > [-Werror=frame-larger-than=] > 6707 | } > | ^ > > git bisect pointed to 7966f319c66d ("drm/amd/display: Introduce DML2") > > I will be happy to test any patch or provide any extra log if needed. This was reported earlier and fixed by: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=089dbf6a06f1dcaeed4f8b86d619e8d28b235207 https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=b141fa036c901303ca5659cc22e9c08f8b097892 https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=5b2c54e0d0ea09f7a3b500510731878326e1117e but I guess maybe different compiler versions are still hitting this. Alex > > #regzbot introduced: 7966f319c66d9468623c6a6a017ecbc0dd79be75 > > -- > Regards > Sudip
Re: [PATCH] drm/amd/display: Increase frame warning limit for clang in dml2
On 11/2/23 12:24, Nathan Chancellor wrote: When building ARCH=x86_64 allmodconfig with clang, which have sanitizers enabled, there is a warning about a large stack frame. drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6265:13: error: stack frame size (2520) exceeds limit (2048) in 'dml_prefetch_check' [-Werror,-Wframe-larger-than] 6265 | static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) | ^ 1 error generated. Notably, GCC 13.2.0 does not do too much of a better job, as it is right at the current limit of 2048: drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c: In function 'dml_prefetch_check': drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6705:1: error: the frame size of 2048 bytes is larger than 1800 bytes [-Werror=frame-larger-than=] 6705 | } | ^ In the past, these warnings have been avoided by reducing the number of parameters to various functions so that not as many arguments need to be passed on the stack. However, these patches take a good amount of effort to write despite being mechanical due to code structure and complexity and they are never carried forward to new generations of the code so that effort has to be expended every new hardware generation, which becomes harder to justify as time goes on. There is some effort to improve clang's code generation but that may take some time between code review, shifting priorities, and release cycles. To avoid having a noticeable or lengthy breakage in all{mod,yes}config, which are easy testing targets that have -Werror enabled, increase the limit for clang by 50% so that cases of extremely poor code generation can still be caught while not breaking the majority of builds. When clang's code generation improves, the limit increase can be restricted to older clang versions. Signed-off-by: Nathan Chancellor --- If there is another DRM pull before 6.7-rc1, it would be much appreciated if this could make that so that other trees are not potentially broken by this. If not, no worries, as it was my fault for not sending this sooner. --- drivers/gpu/drm/amd/display/dc/dml2/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile index 70ae5eba624e..dff8237c0999 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile @@ -60,7 +60,7 @@ endif endif ifneq ($(CONFIG_FRAME_WARN),0) -frame_warn_flag := -Wframe-larger-than=2048 +frame_warn_flag := -Wframe-larger-than=$(if $(CONFIG_CC_IS_CLANG),3072,2048) I would prefer checking for `CONFIG_KASAN || CONFIG_KCSAN` instead since the stack usage shouldn't change much if both of those are disabled. endif CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_core.o := $(dml2_ccflags) $(frame_warn_flag) --- base-commit: 21e80f3841c01aeaf32d7aee7bbc87b3db1aa0c6 change-id: 20231102-amdgpu-dml2-increase-frame-size-warning-for-clang-c93bd2d6a871 Best regards, -- Hamza
RE: [PATCH] drm: Disable XNACK on SRIOV environment
[AMD Official Use Only - General] Looks ok to me . Reviewed-by: Shaoyun.liu -Original Message- From: Kakarya, Surbhi Sent: Thursday, November 2, 2023 12:10 PM To: Kakarya, Surbhi ; amd-gfx@lists.freedesktop.org; Yang, Philip ; Liu, Shaoyun Subject: RE: [PATCH] drm: Disable XNACK on SRIOV environment [AMD Official Use Only - General] Ping.. -Original Message- From: Surbhi Kakarya Sent: Monday, October 30, 2023 9:54 PM To: amd-gfx@lists.freedesktop.org; Yang, Philip Cc: Kakarya, Surbhi Subject: [PATCH] drm: Disable XNACK on SRIOV environment The purpose of this patch is to disable XNACK or set XNACK OFF mode on SRIOV platform which doesn't support it. This will prevent user-space application to fail or result into unexpected behaviour whenever the application need to run test-case in XNACK ON mode. Signed-off-by: Surbhi Kakarya --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 5 - drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 9 + drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 1 + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 10 -- 4 files changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 2dce338b0f1e..d582b240f919 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -826,7 +826,10 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev) gc_ver == IP_VERSION(9, 4, 3) || gc_ver >= IP_VERSION(10, 3, 0)); - gmc->noretry = (amdgpu_noretry == -1) ? noretry_default : amdgpu_noretry; + if (!amdgpu_sriov_xnack_support(adev)) + gmc->norety = 1; + else + gmc->noretry = (amdgpu_noretry == -1) ? noretry_default : +amdgpu_noretry; } void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index a0aa624f5a92..41c77d5c5a79 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -1093,3 +1093,12 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev, else return RREG32(offset); } +bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev) { + bool xnack_mode = 1; + + if (amdgpu_sriov_vf(adev) && (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))) + xnack_mode = 0; + + return xnack_mode; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 858ef21ae515..935ca736300e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -365,4 +365,5 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev, bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id); void amdgpu_virt_post_reset(struct amdgpu_device *adev); +bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index fbf053001af9..69954a2a8503 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1416,8 +1416,14 @@ bool kfd_process_xnack_mode(struct kfd_process *p, bool supported) * per-process XNACK mode selection. But let the dev->noretry * setting still influence the default XNACK mode. */ - if (supported && KFD_SUPPORT_XNACK_PER_PROCESS(dev)) - continue; + if (supported && KFD_SUPPORT_XNACK_PER_PROCESS(dev)) { + if (!amdgpu_sriov_xnack_support(dev->kfd->adev)) { + pr_debug("SRIOV platform xnack not supported\n"); + return false; + } + else + continue; + } /* GFXv10 and later GPUs do not support shader preemption * during page faults. This can lead to poor QoS for queue -- 2.25.1
[PATCH] drm/amd/display: Increase frame warning limit for clang in dml2
When building ARCH=x86_64 allmodconfig with clang, which have sanitizers enabled, there is a warning about a large stack frame. drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6265:13: error: stack frame size (2520) exceeds limit (2048) in 'dml_prefetch_check' [-Werror,-Wframe-larger-than] 6265 | static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) | ^ 1 error generated. Notably, GCC 13.2.0 does not do too much of a better job, as it is right at the current limit of 2048: drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c: In function 'dml_prefetch_check': drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6705:1: error: the frame size of 2048 bytes is larger than 1800 bytes [-Werror=frame-larger-than=] 6705 | } | ^ In the past, these warnings have been avoided by reducing the number of parameters to various functions so that not as many arguments need to be passed on the stack. However, these patches take a good amount of effort to write despite being mechanical due to code structure and complexity and they are never carried forward to new generations of the code so that effort has to be expended every new hardware generation, which becomes harder to justify as time goes on. There is some effort to improve clang's code generation but that may take some time between code review, shifting priorities, and release cycles. To avoid having a noticeable or lengthy breakage in all{mod,yes}config, which are easy testing targets that have -Werror enabled, increase the limit for clang by 50% so that cases of extremely poor code generation can still be caught while not breaking the majority of builds. When clang's code generation improves, the limit increase can be restricted to older clang versions. Signed-off-by: Nathan Chancellor --- If there is another DRM pull before 6.7-rc1, it would be much appreciated if this could make that so that other trees are not potentially broken by this. If not, no worries, as it was my fault for not sending this sooner. --- drivers/gpu/drm/amd/display/dc/dml2/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile index 70ae5eba624e..dff8237c0999 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile @@ -60,7 +60,7 @@ endif endif ifneq ($(CONFIG_FRAME_WARN),0) -frame_warn_flag := -Wframe-larger-than=2048 +frame_warn_flag := -Wframe-larger-than=$(if $(CONFIG_CC_IS_CLANG),3072,2048) endif CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_core.o := $(dml2_ccflags) $(frame_warn_flag) --- base-commit: 21e80f3841c01aeaf32d7aee7bbc87b3db1aa0c6 change-id: 20231102-amdgpu-dml2-increase-frame-size-warning-for-clang-c93bd2d6a871 Best regards, -- Nathan Chancellor
RE: [PATCH] drm: Disable XNACK on SRIOV environment
[AMD Official Use Only - General] Ping.. -Original Message- From: Surbhi Kakarya Sent: Monday, October 30, 2023 9:54 PM To: amd-gfx@lists.freedesktop.org; Yang, Philip Cc: Kakarya, Surbhi Subject: [PATCH] drm: Disable XNACK on SRIOV environment The purpose of this patch is to disable XNACK or set XNACK OFF mode on SRIOV platform which doesn't support it. This will prevent user-space application to fail or result into unexpected behaviour whenever the application need to run test-case in XNACK ON mode. Signed-off-by: Surbhi Kakarya --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 5 - drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 9 + drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 1 + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 10 -- 4 files changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 2dce338b0f1e..d582b240f919 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -826,7 +826,10 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev) gc_ver == IP_VERSION(9, 4, 3) || gc_ver >= IP_VERSION(10, 3, 0)); - gmc->noretry = (amdgpu_noretry == -1) ? noretry_default : amdgpu_noretry; + if (!amdgpu_sriov_xnack_support(adev)) + gmc->norety = 1; + else + gmc->noretry = (amdgpu_noretry == -1) ? noretry_default : +amdgpu_noretry; } void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index a0aa624f5a92..41c77d5c5a79 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -1093,3 +1093,12 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev, else return RREG32(offset); } +bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev) { + bool xnack_mode = 1; + + if (amdgpu_sriov_vf(adev) && (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))) + xnack_mode = 0; + + return xnack_mode; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 858ef21ae515..935ca736300e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -365,4 +365,5 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev, bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id); void amdgpu_virt_post_reset(struct amdgpu_device *adev); +bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index fbf053001af9..69954a2a8503 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1416,8 +1416,14 @@ bool kfd_process_xnack_mode(struct kfd_process *p, bool supported) * per-process XNACK mode selection. But let the dev->noretry * setting still influence the default XNACK mode. */ - if (supported && KFD_SUPPORT_XNACK_PER_PROCESS(dev)) - continue; + if (supported && KFD_SUPPORT_XNACK_PER_PROCESS(dev)) { + if (!amdgpu_sriov_xnack_support(dev->kfd->adev)) { + pr_debug("SRIOV platform xnack not supported\n"); + return false; + } + else + continue; + } /* GFXv10 and later GPUs do not support shader preemption * during page faults. This can lead to poor QoS for queue -- 2.25.1
[PATCH 2/4] drm/amdgpu: Add xcc param to SRIOV kiq write and WREG32_SOC15_IP_NO_KIQ (v4)
WREG32/RREG32_SOC15_IP_NO_KIQ and amdgpu_virt_kiq_reg_write_reg_wait are not using the correct rlcg interface or mec engine, respectively. Add xcc instance parameter to them. v4: Use GET_INST and squash commit with: "drm/amdgpu: Add xcc_inst param to amdgpu_virt_kiq_reg_write_reg_wait" v3: xcc not needed for MMMHUB v2: rebase Signed-off-by: Victor Lu --- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 5 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 3 ++- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c| 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c| 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 26 +-- drivers/gpu/drm/amd/amdgpu/soc15_common.h | 6 +++--- 6 files changed, 25 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index a0aa624f5a92..e179f022c428 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -73,9 +73,10 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, uint32_t reg0, uint32_t reg1, - uint32_t ref, uint32_t mask) + uint32_t ref, uint32_t mask, + uint32_t xcc_inst) { - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_inst]; struct amdgpu_ring *ring = &kiq->ring; signed long r, cnt = 0; unsigned long flags; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 858ef21ae515..bb436d41b4ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -334,7 +334,8 @@ bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); void amdgpu_virt_init_setting(struct amdgpu_device *adev); void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, uint32_t reg0, uint32_t rreg1, - uint32_t ref, uint32_t mask); + uint32_t ref, uint32_t mask, + uint32_t xcc_inst); int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index d8a4fddab9c1..a43d1aa42e11 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -268,7 +268,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, - 1 << vmid); + 1 << vmid, GET_INST(GC, 0)); return; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 19eaada35ede..93f100dd5d94 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -228,7 +228,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, - 1 << vmid); + 1 << vmid, GET_INST(GC, 0)); return; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 3a1050344b59..35ef7529cc8e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -817,7 +817,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, uint32_t vmhub, uint32_t flush_type) { bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub); - u32 j, inv_req, tmp, sem, req, ack; + u32 j, inv_req, tmp, sem, req, ack, inst; const unsigned int eng = 17; struct amdgpu_vmhub *hub; @@ -832,13 +832,17 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* This is necessary for a HW workaround under SRIOV as well * as GFXOFF under bare metal */ - if (adev->gfx.kiq[0].ring.sched.ready && + if (vmhub >= AMDGPU_MMHUB0(0)) + inst = GET_INST(GC, 0); + else + inst = vmhub; + if (adev->gfx.kiq[inst].ring.sched.ready &&
Re: [PATCH] drm/amdgpu: Fix the vram base start address
Am 01.11.23 um 20:13 schrieb Arunpravin Paneer Selvam: Hi Christian, On 10/30/2023 9:34 PM, Christian König wrote: Am 30.10.23 um 13:22 schrieb Arunpravin Paneer Selvam: If the size returned by drm buddy allocator is higher than the required size, we take the higher size to calculate the buffer start address. This is required if we couldn't trim the buffer to the requested size. This will fix the display corruption issue on APU's which has limited VRAM size. gitlab issue link: https://gitlab.freedesktop.org/drm/amd/-/issues/2859 JIRA ticket link: https://ontrack-internal.amd.com/browse/SWDEV-425461 Fixes: 0a1844bf0b53 ("drm/buddy: Improve contiguous memory allocation") Signed-off-by: Arunpravin Paneer Selvam Acked-by: Christian König IIRC that hack with the start address is actually not needed any more, but we need to double check this. okay, can we just remove this hack and keep the vres->base.start value as the start address of the first block from the allocated list. Please double check if we don't have any more cases where we compare the start address against the visible VRAM limit. I think we now fixed all those cases and replaced them with calls to check if all segments are visible, but I'm not 100% sure. Regards, Christian. Thanks, Arun Christian. --- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 15 +-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 18f58efc9dc7..08916538a615 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -77,7 +77,16 @@ static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head) return true; } +static inline u64 amdgpu_vram_mgr_blocks_size(struct list_head *head) +{ + struct drm_buddy_block *block; + u64 size = 0; + list_for_each_entry(block, head, link) + size += amdgpu_vram_mgr_block_size(block); + + return size; +} /** * DOC: mem_info_vram_total @@ -516,6 +525,8 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, mutex_unlock(&mgr->lock); vres->base.start = 0; + size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks), + vres->base.size); list_for_each_entry(block, &vres->blocks, link) { unsigned long start; @@ -523,8 +534,8 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, amdgpu_vram_mgr_block_size(block); start >>= PAGE_SHIFT; - if (start > PFN_UP(vres->base.size)) - start -= PFN_UP(vres->base.size); + if (start > PFN_UP(size)) + start -= PFN_UP(size); else start = 0; vres->base.start = max(vres->base.start, start);
[PATCH 4/4] drm/amdgpu: Change WREG32_RLC to WREG32_SOC15_RLC where inst != 0 (v2)
W/RREG32_RLC is hardedcoded to use instance 0. W/RREG32_SOC15_RLC should be used instead when inst != 0. v2: rebase Signed-off-by: Victor Lu --- .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c | 38 -- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 40 +-- drivers/gpu/drm/amd/amdgpu/soc15_common.h | 2 +- 3 files changed, 37 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c index 80309d39737a..f6598b9e4faa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c @@ -306,8 +306,7 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd, /* Activate doorbell logic before triggering WPTR poll. */ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_DOORBELL_CONTROL), - data); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_DOORBELL_CONTROL, data); if (wptr) { /* Don't read wptr with get_user because the user @@ -336,27 +335,24 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd, guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_LO), - lower_32_bits(guessed_wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI), - upper_32_bits(guessed_wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR), - lower_32_bits((uintptr_t)wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), - regCP_HQD_PQ_WPTR_POLL_ADDR_HI), + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_LO, + lower_32_bits(guessed_wptr)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI, + upper_32_bits(guessed_wptr)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR, + lower_32_bits((uintptr_t)wptr)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR_HI, upper_32_bits((uintptr_t)wptr)); - WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_PQ_WPTR_POLL_CNTL1), - (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, - queue_id)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_PQ_WPTR_POLL_CNTL1, + (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id)); } /* Start the EOP fetcher */ - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR), - REG_SET_FIELD(m->cp_hqd_eop_rptr, -CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR, + REG_SET_FIELD(m->cp_hqd_eop_rptr, CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE), data); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE, data); kgd_gfx_v9_release_queue(adev, inst); @@ -494,15 +490,15 @@ static uint32_t kgd_gfx_v9_4_3_set_address_watch( VALID, 1); - WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst), + WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regTCP_WATCH0_ADDR_H) + (watch_id * TCP_WATCH_STRIDE)), - watch_address_high); + watch_address_high, inst); - WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst), + WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regTCP_WATCH0_ADDR_L) + (watch_id * TCP_WATCH_STRIDE)), - watch_address_low); + watch_address_low, inst); return watch_address_cntl; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 9285789b3a42..00fbc0f44c92 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -91,8 +91,8 @@ void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi { kgd_gfx_v9_lock_srbm(adev, 0, 0, 0, vmid, inst); - WREG32_RLC(SOC15_REG_OFFSET(G
[PATCH 1/4] drm/amdgpu: Add flag to enable indirect RLCG access for gfx v9.4.3
The "rlcg_reg_access_supported" flag is missing. Add it back in. Signed-off-by: Victor Lu --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index a1c2c952d882..ce2a9876369e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1101,6 +1101,7 @@ static void gfx_v9_4_3_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regGRBM_GFX_INDEX); reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_SPARE_INT); } + adev->gfx.rlc.rlcg_reg_access_supported = true; } static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev) -- 2.34.1
[PATCH v2 1/3] drm/amdgpu: Don't implicit sync PRT maps.
These are considered map operations rather than unmap, and there is no point of doing implicit synchronization here. Signed-off-by: Tatsuyuki Ishi --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index f5daadcec865..7b9762f1cddd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -902,7 +902,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, /* Implicitly sync to command submissions in the same VM before * unmapping. Sync to moving fences before mapping. */ - if (!(flags & AMDGPU_PTE_VALID)) + if (!(flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT))) sync_mode = AMDGPU_SYNC_EQ_OWNER; else sync_mode = AMDGPU_SYNC_EXPLICIT; -- 2.42.0
[PATCH v2 2/3] drm/amdgpu: Add flag to disable implicit sync for GEM operations.
In Vulkan, it is the application's responsibility to perform adequate synchronization before a sparse unmap, replace or BO destroy operation. Until now, the kernel applied the same rule as implicitly-synchronized APIs like OpenGL, which with per-VM BOs made page table updates stall the queue completely. The newly added AMDGPU_VM_EXPLICIT_SYNC flag allows drivers to opt-out of this behavior, while still ensuring adequate implicit sync happens for kernel-initiated updates (e.g. BO moves). We record whether to use implicit sync or not for each freed mapping. To avoid increasing the mapping struct's size, this is union-ized with the interval tree field which is unused after the unmap. The reason this is done with a GEM ioctl flag, instead of being a VM / context global setting, is that the current libdrm implementation shares the DRM handle even between different kind of drivers (radeonsi vs radv). Signed-off-by: Tatsuyuki Ishi --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 14 -- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h| 7 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 6 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 47 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h| 23 + drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 18 +++ include/uapi/drm/amdgpu_drm.h | 2 + 9 files changed, 71 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 7d6daf8d2bfa..10e129bff977 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1196,7 +1196,7 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem, struct amdgpu_device *adev = entry->adev; struct amdgpu_vm *vm = bo_va->base.vm; - amdgpu_vm_bo_unmap(adev, bo_va, entry->va); + amdgpu_vm_bo_unmap(adev, bo_va, entry->va, true); amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index 720011019741..612279e65bff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -122,7 +122,7 @@ int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, } } - r = amdgpu_vm_bo_unmap(adev, bo_va, csa_addr); + r = amdgpu_vm_bo_unmap(adev, bo_va, csa_addr, true); if (r) { DRM_ERROR("failed to do bo_unmap on static CSA, err=%d\n", r); goto error; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index a1b15d0d6c48..cca68b89754e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -667,9 +667,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, const uint32_t valid_flags = AMDGPU_VM_DELAY_UPDATE | AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_MASK | - AMDGPU_VM_PAGE_NOALLOC; + AMDGPU_VM_PAGE_NOALLOC | AMDGPU_VM_EXPLICIT_SYNC; const uint32_t prt_flags = AMDGPU_VM_DELAY_UPDATE | - AMDGPU_VM_PAGE_PRT; + AMDGPU_VM_PAGE_PRT | AMDGPU_VM_EXPLICIT_SYNC; struct drm_amdgpu_gem_va *args = data; struct drm_gem_object *gobj; @@ -680,6 +680,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct drm_exec exec; uint64_t va_flags; uint64_t vm_size; + bool sync_unmap; int r = 0; if (args->va_address < AMDGPU_VA_RESERVED_SIZE) { @@ -715,6 +716,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, return -EINVAL; } + sync_unmap = !(args->flags & AMDGPU_VM_EXPLICIT_SYNC); + switch (args->operation) { case AMDGPU_VA_OP_MAP: case AMDGPU_VA_OP_UNMAP: @@ -774,19 +777,20 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, va_flags); break; case AMDGPU_VA_OP_UNMAP: - r = amdgpu_vm_bo_unmap(adev, bo_va, args->va_address); + r = amdgpu_vm_bo_unmap(adev, bo_va, args->va_address, + sync_unmap); break; case AMDGPU_VA_OP_CLEAR: r = amdgpu_vm_bo_clear_mappings(adev, &fpriv->vm, args->va_address, - args->map_size); + args->map_size, sync_unmap); break; case AMDGPU_VA_OP_REPLACE: va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
[PATCH v2 0/3] drm/amdgpu: Add flag to disable implicit sync for GEM operations.
In Vulkan, it is the application's responsibility to perform adequate synchronization before a sparse unmap, replace or BO destroy operation. This adds an option to AMDGPU_VA_OPs to disable redundant implicit sync that happens on sparse unmap or replace operations. This has seen a significant improvement in stutter in Forza Horizon 5 and Forza Horizon 4. (As games that had significant issues in sparse binding related stutter). Compared to the previous series [1], this specifically targets the VM operations and keep everything else intact, including implicit sync on kernel-initiated moves. I've been able to pass a full Vulkan CTS run on Navi 10 with this. Userspace code for this is available at [2] and a branch for the kernel code is available at [3]. v2 changes: - Drop the changes to flush split bindings eagerly as its incompatible with TLB flush quirks in current hardware. Drop the refactoring commits related to that change too. - Fixed a missing doc warning. - Removed an accidentally included ioctl change. [1]: https://lore.kernel.org/all/20230821062005.109771-1-ishitatsuy...@gmail.com/ [2]: https://gitlab.freedesktop.org/ishitatsuyuki/mesa/-/commits/vm-explicit-sync [3]: https://github.com/ishitatsuyuki/linux/tree/explicit-sync-drm-misc-next Tatsuyuki Ishi (3): drm/amdgpu: Don't implicit sync PRT maps. drm/amdgpu: Add flag to disable implicit sync for GEM operations. drm/amdgpu: Bump amdgpu driver version. .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 14 -- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h| 7 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 6 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 47 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h| 23 + drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 18 +++ include/uapi/drm/amdgpu_drm.h | 2 + 10 files changed, 73 insertions(+), 51 deletions(-) -- 2.42.0
[PATCH v2 3/3] drm/amdgpu: Bump amdgpu driver version.
For detection of the new explicit sync functionality without having to try the ioctl. Signed-off-by: Tatsuyuki Ishi --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 81edf66dbea8..2aa406dee192 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -113,9 +113,10 @@ *gl1c_cache_size, gl2c_cache_size, mall_size, enabled_rb_pipes_mask_hi * 3.53.0 - Support for GFX11 CP GFX shadowing * 3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support + * - 3.55.0 - Add AMDGPU_VM_EXPLICIT_SYNC flag for GEM operations. */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 54 +#define KMS_DRIVER_MINOR 55 #define KMS_DRIVER_PATCHLEVEL 0 unsigned int amdgpu_vram_limit = UINT_MAX; -- 2.42.0
Re: [PATCH 2/2] drm/amdgpu: Use drm_exec for seq64 bo lock
Am 01.11.23 um 17:26 schrieb Arunpravin Paneer Selvam: Replace seq64 bo lock sequences with drm_exec. Signed-off-by: Alex Deucher Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c | 73 ++- 1 file changed, 33 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c index 63d8b68023be..810f7637096e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c @@ -25,6 +25,8 @@ #include "amdgpu.h" #include "amdgpu_seq64.h" +#include + /** * DOC: amdgpu_seq64 * @@ -68,11 +70,8 @@ static inline u64 amdgpu_seq64_get_va_base(struct amdgpu_device *adev) int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo_va **bo_va) { - struct ttm_validate_buffer seq64_tv; - struct amdgpu_bo_list_entry pd; - struct ww_acquire_ctx ticket; - struct list_head list; struct amdgpu_bo *bo; + struct drm_exec exec; u64 seq64_addr; int r; @@ -80,23 +79,20 @@ int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (!bo) return -EINVAL; - INIT_LIST_HEAD(&list); - INIT_LIST_HEAD(&seq64_tv.head); - - seq64_tv.bo = &bo->tbo; - seq64_tv.num_shared = 1; - - list_add(&seq64_tv.head, &list); - amdgpu_vm_get_pd_bo(vm, &list, &pd); - - r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL); - if (r) - return r; + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_until_all_locked(&exec) { + r = amdgpu_vm_lock_pd(vm, &exec, 0); + if (likely(!r)) + r = drm_exec_lock_obj(&exec, &bo->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto error; + } *bo_va = amdgpu_vm_bo_add(adev, vm, bo); if (!*bo_va) { r = -ENOMEM; - goto error_vm; + goto error; } seq64_addr = amdgpu_seq64_get_va_base(adev); @@ -104,23 +100,19 @@ int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, AMDGPU_PTE_READABLE); if (r) { DRM_ERROR("failed to do bo_map on userq sem, err=%d\n", r); - goto error_map; + amdgpu_vm_bo_del(adev, *bo_va); + goto error; } r = amdgpu_vm_bo_update(adev, *bo_va, false); if (r) { DRM_ERROR("failed to do vm_bo_update on userq sem\n"); - goto error_map; + amdgpu_vm_bo_del(adev, *bo_va); + goto error; } - ttm_eu_backoff_reservation(&ticket, &list); - - return 0; - -error_map: - amdgpu_vm_bo_del(adev, *bo_va); -error_vm: - ttm_eu_backoff_reservation(&ticket, &list); +error: + drm_exec_fini(&exec); return r; } @@ -134,12 +126,10 @@ int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, */ void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv) { - struct ttm_validate_buffer seq64_tv; - struct amdgpu_bo_list_entry pd; - struct ww_acquire_ctx ticket; - struct list_head list; struct amdgpu_vm *vm; struct amdgpu_bo *bo; + struct drm_exec exec; + int r; if (!fpriv->seq64_va) return; @@ -149,20 +139,23 @@ void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv) return; vm = &fpriv->vm; - INIT_LIST_HEAD(&list); - INIT_LIST_HEAD(&seq64_tv.head); - seq64_tv.bo = &bo->tbo; - seq64_tv.num_shared = 1; - - list_add(&seq64_tv.head, &list); - amdgpu_vm_get_pd_bo(vm, &list, &pd); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_until_all_locked(&exec) { + r = amdgpu_vm_lock_pd(vm, &exec, 0); + if (likely(!r)) + r = drm_exec_lock_obj(&exec, &bo->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto error; + } - ttm_eu_reserve_buffers(&ticket, &list, false, NULL); amdgpu_vm_bo_del(adev, fpriv->seq64_va); - ttm_eu_backoff_reservation(&ticket, &list); fpriv->seq64_va = NULL; + +error: + drm_exec_fini(&exec); } /**
Re: [PATCH] drm/amdgpu: don't put MQDs in VRAM on ARM | ARM64
Am 31.10.23 um 18:54 schrieb Alex Deucher: Issues were reported with commit 1cfb4d612127 ("drm/amdgpu: put MQDs in VRAM") on an ADLINK Ampere Altra Developer Platform (AVA developer platform). Various ARM systems seem to have problems related to PCIe and MMIO access. In this case, I'm not sure if this is specific to the ADLINK platform or ARM in general. Seems to be some coherency issue with VRAM. For now, just don't put MQDs in VRAM on ARM. Link: https://lists.freedesktop.org/archives/amd-gfx/2023-October/100453.html Fixes: 1cfb4d612127 ("drm/amdgpu: put MQDs in VRAM") Signed-off-by: Alex Deucher Cc: alexey.kli...@linaro.org Acked-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index c92e0aba69e1..a2a29dcb2422 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -385,9 +385,11 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, struct amdgpu_ring *ring = &kiq->ring; u32 domain = AMDGPU_GEM_DOMAIN_GTT; +#if !defined(CONFIG_ARM) && !defined(CONFIG_ARM64) /* Only enable on gfx10 and 11 for now to avoid changing behavior on older chips */ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0)) domain |= AMDGPU_GEM_DOMAIN_VRAM; +#endif /* create MQD for KIQ */ if (!adev->enable_mes_kiq && !ring->mqd_obj) {
Re: [Patch v13 4/9] wifi: mac80211: Add support for WBRF features
On Thu, 2023-11-02 at 14:24 +0200, Ilpo Järvinen wrote: > On Thu, 2 Nov 2023, Johannes Berg wrote: > > On Thu, 2023-11-02 at 13:55 +0200, Ilpo Järvinen wrote: > > > > > > +static void get_chan_freq_boundary(u32 center_freq, u32 bandwidth, u64 > > > > *start, u64 *end) > > > > +{ > > > > + bandwidth = MHZ_TO_KHZ(bandwidth); > > > > + center_freq = MHZ_TO_KHZ(center_freq); > > > > > > Please use include/linux/units.h ones for these too. > > > > Now we're feature creeping though - this has existed for *years* in the > > wireless stack with many instances? We can convert them over, I guess, > > but not sure that makes much sense here - we'd want to add such macros > > to units.h, but ... moving them can be independent of this patch? > > What new macros you're talking about? Sorry, I got confused - for some reason I was pretty sure something here was already being added to units.h in this patchset. > Nothing new needs to be added > as there's already KHZ_PER_MHZ so these would just be: > > bandwidth *= KHZ_PER_MHZ; > center_freq *= KHZ_PER_MHZ; Sure, and in this case that's probably pretty much equivalent. But having a MHZ_TO_KHZ() macro isn't inherently *bad*, and I'm not sure you're objection to it on anything other than "it's not defined in units.h". > Everything can of course be postponed by the argument that some > subsystem specific mechanism has been there before the generic one > but the end of that road won't be pretty... What I was trying to do > here was to point out the new stuff introduced by this series into the > direction of the generic thing. I just think that the better course of action would be to eventually move MHZ_TO_KHZ() to units.h ... johannes
Re: [Patch v13 4/9] wifi: mac80211: Add support for WBRF features
On Thu, 2 Nov 2023, Johannes Berg wrote: > On Thu, 2023-11-02 at 13:55 +0200, Ilpo Järvinen wrote: > > > > +static void get_chan_freq_boundary(u32 center_freq, u32 bandwidth, u64 > > > *start, u64 *end) > > > +{ > > > + bandwidth = MHZ_TO_KHZ(bandwidth); > > > + center_freq = MHZ_TO_KHZ(center_freq); > > > > Please use include/linux/units.h ones for these too. > > Now we're feature creeping though - this has existed for *years* in the > wireless stack with many instances? We can convert them over, I guess, > but not sure that makes much sense here - we'd want to add such macros > to units.h, but ... moving them can be independent of this patch? What new macros you're talking about? Nothing new needs to be added as there's already KHZ_PER_MHZ so these would just be: bandwidth *= KHZ_PER_MHZ; center_freq *= KHZ_PER_MHZ; Everything can of course be postponed by the argument that some subsystem specific mechanism has been there before the generic one but the end of that road won't be pretty... What I was trying to do here was to point out the new stuff introduced by this series into the direction of the generic thing. -- i.
Re: [Patch v13 4/9] wifi: mac80211: Add support for WBRF features
On Mon, 30 Oct 2023, Ma Jun wrote: > From: Evan Quan > > To support the WBRF mechanism, Wifi adapters utilized in the system must > register the frequencies in use (or unregister those frequencies no longer > used) via the dedicated calls. So that, other drivers responding to the > frequencies can take proper actions to mitigate possible interference. > > Co-developed-by: Mario Limonciello > Signed-off-by: Mario Limonciello > Co-developed-by: Evan Quan > Signed-off-by: Evan Quan > Signed-off-by: Ma Jun > -- > v1->v2: > - place the new added member(`wbrf_supported`) in > ieee80211_local(Johannes) > - handle chandefs change scenario properly(Johannes) > - some minor fixes around code sharing and possible invalid input > checks(Johannes) > v2->v3: > - drop unnecessary input checks and intermediate APIs(Mario) > - Separate some mac80211 common code(Mario, Johannes) > v3->v4: > - some minor fixes around return values(Johannes) > v9->v10: > - get ranges_in->num_of_ranges set and passed in(Johannes) > v12: > - use acpi_amd_wbrf_add_remove to replace the acpi_amd_wbrf_add_exclusion > acpi_amd_wbrf_remove_exclusion > v13: > - Fix the format issue (IIpo Jarvinen) > - Remove KHZ_TO_HZ and use HZ_PER_KHZ in linux/units.h (IIpo Jarvinen) > --- > net/mac80211/Makefile | 2 + > net/mac80211/chan.c| 9 > net/mac80211/ieee80211_i.h | 7 +++ > net/mac80211/main.c| 2 + > net/mac80211/wbrf.c| 95 ++ > 5 files changed, 115 insertions(+) > create mode 100644 net/mac80211/wbrf.c > > diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile > index b8de44da1fb8..d46c36f55fd3 100644 > --- a/net/mac80211/Makefile > +++ b/net/mac80211/Makefile > @@ -65,4 +65,6 @@ rc80211_minstrel-$(CONFIG_MAC80211_DEBUGFS) += \ > > mac80211-$(CONFIG_MAC80211_RC_MINSTREL) += $(rc80211_minstrel-y) > > +mac80211-y += wbrf.o > + > ccflags-y += -DDEBUG > diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c > index 68952752b599..458469c224ae 100644 > --- a/net/mac80211/chan.c > +++ b/net/mac80211/chan.c > @@ -506,11 +506,16 @@ static void _ieee80211_change_chanctx(struct > ieee80211_local *local, > > WARN_ON(!cfg80211_chandef_compatible(&ctx->conf.def, chandef)); > > + ieee80211_remove_wbrf(local, &ctx->conf.def); > + > ctx->conf.def = *chandef; > > /* check if min chanctx also changed */ > changed = IEEE80211_CHANCTX_CHANGE_WIDTH | > _ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for); > + > + ieee80211_add_wbrf(local, &ctx->conf.def); > + > drv_change_chanctx(local, ctx, changed); > > if (!local->use_chanctx) { > @@ -668,6 +673,8 @@ static int ieee80211_add_chanctx(struct ieee80211_local > *local, > lockdep_assert_held(&local->mtx); > lockdep_assert_held(&local->chanctx_mtx); > > + ieee80211_add_wbrf(local, &ctx->conf.def); > + > if (!local->use_chanctx) > local->hw.conf.radar_enabled = ctx->conf.radar_enabled; > > @@ -748,6 +755,8 @@ static void ieee80211_del_chanctx(struct ieee80211_local > *local, > } > > ieee80211_recalc_idle(local); > + > + ieee80211_remove_wbrf(local, &ctx->conf.def); > } > > static void ieee80211_free_chanctx(struct ieee80211_local *local, > diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h > index 98ef1fe1226e..1172554bd831 100644 > --- a/net/mac80211/ieee80211_i.h > +++ b/net/mac80211/ieee80211_i.h > @@ -1600,6 +1600,8 @@ struct ieee80211_local { > > /* extended capabilities provided by mac80211 */ > u8 ext_capa[8]; > + > + bool wbrf_supported; > }; > > static inline struct ieee80211_sub_if_data * > @@ -2637,4 +2639,9 @@ ieee80211_eht_cap_ie_to_sta_eht_cap(struct > ieee80211_sub_if_data *sdata, > const struct ieee80211_eht_cap_elem > *eht_cap_ie_elem, > u8 eht_cap_len, > struct link_sta_info *link_sta); > + > +void ieee80211_check_wbrf_support(struct ieee80211_local *local); > +void ieee80211_add_wbrf(struct ieee80211_local *local, struct > cfg80211_chan_def *chandef); > +void ieee80211_remove_wbrf(struct ieee80211_local *local, struct > cfg80211_chan_def *chandef); > + > #endif /* IEEE80211_I_H */ > diff --git a/net/mac80211/main.c b/net/mac80211/main.c > index 24315d7b3126..b20bdaac84db 100644 > --- a/net/mac80211/main.c > +++ b/net/mac80211/main.c > @@ -1396,6 +1396,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) > debugfs_hw_add(local); > rate_control_add_debugfs(local); > > + ieee80211_check_wbrf_support(local); > + > rtnl_lock(); > wiphy_lock(hw->wiphy); > > diff --git a/net/mac80211/wbrf.c b/net/mac80211/wbrf.c > new file mode 100644 > index ..ca3f30b58476 > --- /dev/null > +++ b/net/mac80211/wbrf.c > @@ -0,0 +1,95 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* >
mainline build failure due to 7966f319c66d ("drm/amd/display: Introduce DML2")
Hi All, The latest mainline kernel branch fails to build x86_64 allmodconfig with the error: drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c: In function 'dml_prefetch_check': drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6707:1: error: the frame size of 2056 bytes is larger than 2048 bytes [-Werror=frame-larger-than=] 6707 | } | ^ git bisect pointed to 7966f319c66d ("drm/amd/display: Introduce DML2") I will be happy to test any patch or provide any extra log if needed. #regzbot introduced: 7966f319c66d9468623c6a6a017ecbc0dd79be75 -- Regards Sudip
Re: [Patch v13 4/9] wifi: mac80211: Add support for WBRF features
On Thu, 2023-11-02 at 13:55 +0200, Ilpo Järvinen wrote: [please trim your quotes] > > +static void get_chan_freq_boundary(u32 center_freq, u32 bandwidth, u64 > > *start, u64 *end) > > +{ > > + bandwidth = MHZ_TO_KHZ(bandwidth); > > + center_freq = MHZ_TO_KHZ(center_freq); > > Please use include/linux/units.h ones for these too. Now we're feature creeping though - this has existed for *years* in the wireless stack with many instances? We can convert them over, I guess, but not sure that makes much sense here - we'd want to add such macros to units.h, but ... moving them can be independent of this patch? johannes
Re: [PATCH 1/2] drm/amdgpu: Enable seq64 manager and fix bugs
Am 01.11.23 um 17:26 schrieb Arunpravin Paneer Selvam: - Enable the seq64 mapping sequence. - Fix wflinfo va conflict and other bugs. v1: - The seq64 area needs to be included in the AMDGPU_VA_RESERVED_SIZE otherwise the areas will conflict with user space allocations (Alex) - It needs to be mapped read only in the user VM (Alex) v2: - Instead of just one define for TOP/BOTTOM reserved space separate them into two (Christian) - Fix the CPU and VA calculations and while at it also cleanup error handling and kerneldoc (Christian) Signed-off-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Arunpravin Paneer Selvam --- drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 8 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c| 69 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h| 9 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 5 +- drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c| 5 +- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c| 5 +- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c| 5 +- 11 files changed, 68 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index 23d054526e7c..c7622efdafee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -28,7 +28,7 @@ uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev) { uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT; - addr -= AMDGPU_VA_RESERVED_SIZE; + addr -= AMDGPU_VA_RESERVED_CSA_SIZE; addr = amdgpu_gmc_sign_extend(addr); return addr; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 849fffbb367d..f4455ed78e72 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -687,10 +687,10 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, uint64_t vm_size; int r = 0; - if (args->va_address < AMDGPU_VA_RESERVED_SIZE) { + if (args->va_address < AMDGPU_VA_RESERVED_BOTTOM) { dev_dbg(dev->dev, "va_address 0x%llx is in reserved area 0x%llx\n", - args->va_address, AMDGPU_VA_RESERVED_SIZE); + args->va_address, AMDGPU_VA_RESERVED_BOTTOM); return -EINVAL; } @@ -706,7 +706,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, args->va_address &= AMDGPU_GMC_HOLE_MASK; vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; - vm_size -= AMDGPU_VA_RESERVED_SIZE; + vm_size -= AMDGPU_VA_RESERVED_TOP; if (args->va_address + args->map_size > vm_size) { dev_dbg(dev->dev, "va_address 0x%llx is in top reserved area 0x%llx\n", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index b5ebafd4a3ad..bb4aa14b868c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -894,14 +894,14 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) dev_info->ids_flags |= AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD; vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; - vm_size -= AMDGPU_VA_RESERVED_SIZE; + vm_size -= AMDGPU_VA_RESERVED_TOP; /* Older VCE FW versions are buggy and can handle only 40bits */ if (adev->vce.fw_version && adev->vce.fw_version < AMDGPU_VCE_FW_53_45) vm_size = min(vm_size, 1ULL << 40); - dev_info->virtual_address_offset = AMDGPU_VA_RESERVED_SIZE; + dev_info->virtual_address_offset = AMDGPU_VA_RESERVED_BOTTOM; dev_info->virtual_address_max = min(vm_size, AMDGPU_GMC_HOLE_START); @@ -1365,6 +1365,10 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) goto error_vm; } + r = amdgpu_seq64_map(adev, &fpriv->vm, &fpriv->seq64_va); + if (r) + goto error_vm; + mutex_init(&fpriv->bo_list_lock); idr_init_base(&fpriv->bo_list_handles, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 70fe3b39c004..108908a10b92 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -1325,7 +1325,7 @@ int amdgpu_mes_self_test(struct amdgpu_device *adev) goto error_fini; } - ctx_data.meta_data_gpu_addr = AMDGPU_VA_RESERVED_SIZE; + ctx_data.meta_data_gpu_addr = AMDGP
RE: [PATCH v2] drm/amdgpu: fix GRBM read timeout when do mes_self_test
[AMD Official Use Only - General] This patch is : Reviewed-by: Yifan Zhang -Original Message- From: Huang, Tim Sent: Wednesday, November 1, 2023 4:53 PM To: amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander ; Zhang, Yifan ; Xiao, Jack ; Huang, Tim Subject: [PATCH v2] drm/amdgpu: fix GRBM read timeout when do mes_self_test Use a proper MEID to make sure the CP_HQD_* and CP_GFX_HQD_* registers can be touched when initialize the compute and gfx mqd in mes_self_test. Otherwise, we expect no response from CP and an GRBM eventual timeout. Signed-off-by: Tim Huang --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 16 1 file changed, 16 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 70fe3b39c004..45280fb0e00c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -556,8 +556,20 @@ static void amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev, mqd_prop.hqd_queue_priority = p->hqd_queue_priority; mqd_prop.hqd_active = false; + if (p->queue_type == AMDGPU_RING_TYPE_GFX || + p->queue_type == AMDGPU_RING_TYPE_COMPUTE) { + mutex_lock(&adev->srbm_mutex); + amdgpu_gfx_select_me_pipe_q(adev, p->ring->me, p->ring->pipe, 0, 0, 0); + } + mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop); + if (p->queue_type == AMDGPU_RING_TYPE_GFX || + p->queue_type == AMDGPU_RING_TYPE_COMPUTE) { + amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } + amdgpu_bo_unreserve(q->mqd_obj); } @@ -993,9 +1005,13 @@ int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, switch (queue_type) { case AMDGPU_RING_TYPE_GFX: ring->funcs = adev->gfx.gfx_ring[0].funcs; + ring->me = adev->gfx.gfx_ring[0].me; + ring->pipe = adev->gfx.gfx_ring[0].pipe; break; case AMDGPU_RING_TYPE_COMPUTE: ring->funcs = adev->gfx.compute_ring[0].funcs; + ring->me = adev->gfx.compute_ring[0].me; + ring->pipe = adev->gfx.compute_ring[0].pipe; break; case AMDGPU_RING_TYPE_SDMA: ring->funcs = adev->sdma.instance[0].ring.funcs; -- 2.39.2
Re: [PATCH] drm/edid: add a quirk for two 240Hz Samsung monitors
On Wed, 01 Nov 2023, Alex Deucher wrote: > On Wed, Nov 1, 2023 at 5:01 PM Hamza Mahfooz wrote: >> >> Without this fix the 5120x1440@240 timing of these monitors >> leads to screen flickering. >> >> Cc: sta...@vger.kernel.org # 6.1+ >> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1442 >> Co-developed-by: Harry Wentland >> Signed-off-by: Harry Wentland >> Signed-off-by: Hamza Mahfooz >> --- >> drivers/gpu/drm/drm_edid.c | 47 +++--- >> 1 file changed, 44 insertions(+), 3 deletions(-) >> >> diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c >> index bca2af4fe1fc..3fdb8907f66b 100644 >> --- a/drivers/gpu/drm/drm_edid.c >> +++ b/drivers/gpu/drm/drm_edid.c >> @@ -89,6 +89,8 @@ static int oui(u8 first, u8 second, u8 third) >> #define EDID_QUIRK_NON_DESKTOP (1 << 12) >> /* Cap the DSC target bitrate to 15bpp */ >> #define EDID_QUIRK_CAP_DSC_15BPP (1 << 13) >> +/* Fix up a particular 5120x1440@240Hz timing */ >> +#define EDID_QUIRK_FIXUP_5120_1440_240 (1 << 14) > > What is wrong with the original timing that needs to be fixed? Indeed. I'd be wary of applying this quirk as-is, because it'll impact all drivers and all connectors. The bug report does not have a single EDID from the affected displays attached. The quirk sets mode members that apparently do not need to be modified. Cc: Ville BR, Jani. > > Alex > > >> >> #define MICROSOFT_IEEE_OUI 0xca125c >> >> @@ -170,6 +172,12 @@ static const struct edid_quirk { >> EDID_QUIRK('S', 'A', 'M', 596, EDID_QUIRK_PREFER_LARGE_60), >> EDID_QUIRK('S', 'A', 'M', 638, EDID_QUIRK_PREFER_LARGE_60), >> >> + /* Samsung C49G95T */ >> + EDID_QUIRK('S', 'A', 'M', 0x7053, EDID_QUIRK_FIXUP_5120_1440_240), >> + >> + /* Samsung S49AG95 */ >> + EDID_QUIRK('S', 'A', 'M', 0x71ac, EDID_QUIRK_FIXUP_5120_1440_240), >> + >> /* Sony PVM-2541A does up to 12 bpc, but only reports max 8 bpc */ >> EDID_QUIRK('S', 'N', 'Y', 0x2541, EDID_QUIRK_FORCE_12BPC), >> >> @@ -6586,7 +6594,37 @@ static void update_display_info(struct drm_connector >> *connector, >> drm_edid_to_eld(connector, drm_edid); >> } >> >> -static struct drm_display_mode *drm_mode_displayid_detailed(struct >> drm_device *dev, >> +static void drm_mode_displayid_detailed_edid_quirks(struct drm_connector >> *connector, >> + struct drm_display_mode >> *mode) >> +{ >> + unsigned int hsync_width; >> + unsigned int vsync_width; >> + >> + if (connector->display_info.quirks & EDID_QUIRK_FIXUP_5120_1440_240) >> { >> + if (mode->hdisplay == 5120 && mode->vdisplay == 1440 && >> + mode->clock == 1939490) { >> + hsync_width = mode->hsync_end - mode->hsync_start; >> + vsync_width = mode->vsync_end - mode->vsync_start; >> + >> + mode->clock = 2018490; >> + mode->hdisplay = 5120; >> + mode->hsync_start = 5120 + 8; >> + mode->hsync_end = 5120 + 8 + hsync_width; >> + mode->htotal = 5200; >> + >> + mode->vdisplay = 1440; >> + mode->vsync_start = 1440 + 165; >> + mode->vsync_end = 1440 + 165 + vsync_width; >> + mode->vtotal = 1619; >> + >> + drm_dbg_kms(connector->dev, >> + "[CONNECTOR:%d:%s] Samsung 240Hz mode >> quirk applied\n", >> + connector->base.id, connector->name); >> + } >> + } >> +} >> + >> +static struct drm_display_mode *drm_mode_displayid_detailed(struct >> drm_connector *connector, >> struct >> displayid_detailed_timings_1 *timings, >> bool type_7) >> { >> @@ -6605,7 +6643,7 @@ static struct drm_display_mode >> *drm_mode_displayid_detailed(struct drm_device *d >> bool hsync_positive = (timings->hsync[1] >> 7) & 0x1; >> bool vsync_positive = (timings->vsync[1] >> 7) & 0x1; >> >> - mode = drm_mode_create(dev); >> + mode = drm_mode_create(connector->dev); >> if (!mode) >> return NULL; >> >> @@ -6628,6 +,9 @@ static struct drm_display_mode >> *drm_mode_displayid_detailed(struct drm_device *d >> >> if (timings->flags & 0x80) >> mode->type |= DRM_MODE_TYPE_PREFERRED; >> + >> + drm_mode_displayid_detailed_edid_quirks(connector, mode); >> + >> drm_mode_set_name(mode); >> >> return mode; >> @@ -6650,7 +6691,7 @@ static int add_displayid_detailed_1_modes(struct >> drm_connector *connector, >> for (i = 0; i < num_timings; i++) { >> struct displayid_detailed_timings_1 *timings =
[PATCH] drm/amdgpu: Don't warn for unsupported set_xgmi_plpd_mode
set_xgmi_plpd_mode may be unsupported and this isn't error, no need to print warning for it. v2: add ret2 to save the status of psp_ras_trigger_error. Suggested-by: lijo.la...@amd.com Signed-off-by: Tao Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 14 -- 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 0533f873001b..a5a72e5aae94 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -1131,28 +1131,30 @@ static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, void *inject_if, uint32_t instance_mask) { - int ret = 0; + int ret1, ret2; struct ta_ras_trigger_error_input *block_info = (struct ta_ras_trigger_error_input *)inject_if; if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW)) dev_warn(adev->dev, "Failed to disallow df cstate"); - if (amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DISALLOW)) + ret1 = amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DISALLOW); + if (ret1 && ret1 != -EOPNOTSUPP) dev_warn(adev->dev, "Failed to disallow XGMI power down"); - ret = psp_ras_trigger_error(&adev->psp, block_info, instance_mask); + ret2 = psp_ras_trigger_error(&adev->psp, block_info, instance_mask); if (amdgpu_ras_intr_triggered()) - return ret; + return ret2; - if (amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DEFAULT)) + ret1 = amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DEFAULT); + if (ret1 && ret1 != -EOPNOTSUPP) dev_warn(adev->dev, "Failed to allow XGMI power down"); if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW)) dev_warn(adev->dev, "Failed to allow df cstate"); - return ret; + return ret2; } struct amdgpu_ras_block_hw_ops xgmi_ras_hw_ops = { -- 2.35.1
RE: [PATCH] drm/amd/pm:Fix return vlaue and drop redundant param
[AMD Official Use Only - General] Reviewed-by: Kenneth Feng -Original Message- From: Ma, Jun Sent: Thursday, November 2, 2023 3:59 PM To: amd-gfx@lists.freedesktop.org; Feng, Kenneth ; Deucher, Alexander Cc: Ma, Jun Subject: [PATCH] drm/amd/pm:Fix return vlaue and drop redundant param Fix the return value and drop redundant parameter of get_asic_baco_capability function to simplify the code Signed-off-by: Ma Jun --- drivers/gpu/drm/amd/include/kgd_pp_interface.h | 2 +- drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 8 +++- drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c | 11 --- drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c | 7 +++ drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h | 2 +- drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c | 9 - drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h | 2 +- drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c | 9 - drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h | 2 +- drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h | 2 +- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c| 12 +--- 11 files changed, 28 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 3201808c2dd8..60e6b82077e8 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -421,7 +421,7 @@ struct amd_pm_funcs { int (*set_hard_min_dcefclk_by_freq)(void *handle, uint32_t clock); int (*set_hard_min_fclk_by_freq)(void *handle, uint32_t clock); int (*set_min_deep_sleep_dcefclk)(void *handle, uint32_t clock); - int (*get_asic_baco_capability)(void *handle, bool *cap); + bool (*get_asic_baco_capability)(void *handle); int (*get_asic_baco_state)(void *handle, int *state); int (*set_asic_baco_state)(void *handle, int state); int (*get_ppfeature_status)(void *handle, char *buf); diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index acf3527fff2d..24fd036a15c0 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -185,8 +185,7 @@ bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev) { const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; void *pp_handle = adev->powerplay.pp_handle; - bool baco_cap; - int ret = 0; + bool ret; if (!pp_funcs || !pp_funcs->get_asic_baco_capability) return false; @@ -204,12 +203,11 @@ bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev) mutex_lock(&adev->pm.mutex); - ret = pp_funcs->get_asic_baco_capability(pp_handle, -&baco_cap); + ret = pp_funcs->get_asic_baco_capability(pp_handle); mutex_unlock(&adev->pm.mutex); - return ret ? false : baco_cap; + return ret; } int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c index 9e4f8a4104a3..e82c2b2fffb5 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c @@ -1368,21 +1368,18 @@ static int pp_set_active_display_count(void *handle, uint32_t count) return phm_set_active_display_count(hwmgr, count); } -static int pp_get_asic_baco_capability(void *handle, bool *cap) +static bool pp_get_asic_baco_capability(void *handle) { struct pp_hwmgr *hwmgr = handle; - *cap = false; if (!hwmgr) - return -EINVAL; + return false; if (!(hwmgr->not_vf && amdgpu_dpm) || !hwmgr->hwmgr_func->get_asic_baco_capability) - return 0; + return false; - hwmgr->hwmgr_func->get_asic_baco_capability(hwmgr, cap); - - return 0; + return hwmgr->hwmgr_func->get_asic_baco_capability(hwmgr); } static int pp_get_asic_baco_state(void *handle, int *state) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c index 044cda005aed..e8a9471c1898 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c @@ -33,21 +33,20 @@ #include "smu/smu_7_1_2_d.h" #include "smu/smu_7_1_2_sh_mask.h" -int smu7_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap) +bool smu7_baco_get_capability(struct pp_hwmgr *hwmgr) { struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); uint32_t reg; - *cap = false; if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_BACO)) return 0; reg = RREG32(mmCC_BIF_BX_FUSESTRAP0); if (reg & CC_BIF_BX_FUSESTRAP0__STRAP_BIF_PX_CAPABLE_M
[PATCH] drm/amd/pm:Fix return vlaue and drop redundant param
Fix the return value and drop redundant parameter of get_asic_baco_capability function to simplify the code Signed-off-by: Ma Jun --- drivers/gpu/drm/amd/include/kgd_pp_interface.h | 2 +- drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 8 +++- drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c | 11 --- drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c | 7 +++ drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h | 2 +- drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c | 9 - drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h | 2 +- drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c | 9 - drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h | 2 +- drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h | 2 +- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c| 12 +--- 11 files changed, 28 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 3201808c2dd8..60e6b82077e8 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -421,7 +421,7 @@ struct amd_pm_funcs { int (*set_hard_min_dcefclk_by_freq)(void *handle, uint32_t clock); int (*set_hard_min_fclk_by_freq)(void *handle, uint32_t clock); int (*set_min_deep_sleep_dcefclk)(void *handle, uint32_t clock); - int (*get_asic_baco_capability)(void *handle, bool *cap); + bool (*get_asic_baco_capability)(void *handle); int (*get_asic_baco_state)(void *handle, int *state); int (*set_asic_baco_state)(void *handle, int state); int (*get_ppfeature_status)(void *handle, char *buf); diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index acf3527fff2d..24fd036a15c0 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -185,8 +185,7 @@ bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev) { const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; void *pp_handle = adev->powerplay.pp_handle; - bool baco_cap; - int ret = 0; + bool ret; if (!pp_funcs || !pp_funcs->get_asic_baco_capability) return false; @@ -204,12 +203,11 @@ bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev) mutex_lock(&adev->pm.mutex); - ret = pp_funcs->get_asic_baco_capability(pp_handle, -&baco_cap); + ret = pp_funcs->get_asic_baco_capability(pp_handle); mutex_unlock(&adev->pm.mutex); - return ret ? false : baco_cap; + return ret; } int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c index 9e4f8a4104a3..e82c2b2fffb5 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c @@ -1368,21 +1368,18 @@ static int pp_set_active_display_count(void *handle, uint32_t count) return phm_set_active_display_count(hwmgr, count); } -static int pp_get_asic_baco_capability(void *handle, bool *cap) +static bool pp_get_asic_baco_capability(void *handle) { struct pp_hwmgr *hwmgr = handle; - *cap = false; if (!hwmgr) - return -EINVAL; + return false; if (!(hwmgr->not_vf && amdgpu_dpm) || !hwmgr->hwmgr_func->get_asic_baco_capability) - return 0; + return false; - hwmgr->hwmgr_func->get_asic_baco_capability(hwmgr, cap); - - return 0; + return hwmgr->hwmgr_func->get_asic_baco_capability(hwmgr); } static int pp_get_asic_baco_state(void *handle, int *state) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c index 044cda005aed..e8a9471c1898 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c @@ -33,21 +33,20 @@ #include "smu/smu_7_1_2_d.h" #include "smu/smu_7_1_2_sh_mask.h" -int smu7_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap) +bool smu7_baco_get_capability(struct pp_hwmgr *hwmgr) { struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); uint32_t reg; - *cap = false; if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_BACO)) return 0; reg = RREG32(mmCC_BIF_BX_FUSESTRAP0); if (reg & CC_BIF_BX_FUSESTRAP0__STRAP_BIF_PX_CAPABLE_MASK) - *cap = true; + return true; - return 0; + return false; } int smu7_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu
Re: [PATCH v4 09/32] drm/amd/display: add plane 3D LUT driver-specific properties
Also, Melissa, you cannot do: if (!plane_state->color_mgmt_changed) return 0; in amdgpu_dm_plane_set_color_properties. The allocation for dc_plane_state could be new and zero'ed so it needs to be set every time. (Until AMDGPU has better dedup'ing of stuff there) The reason it looked like it worked for you now is because the duplicate was broken, so color mgmt for planes was always being marked as dirty there. Thanks - Joshie 🐸✨ On 11/2/23 03:48, Joshua Ashton wrote: On 10/5/23 18:15, Melissa Wen wrote: Add 3D LUT property for plane color transformations using a 3D lookup table. 3D LUT allows for highly accurate and complex color transformations and is suitable to adjust the balance between color channels. It's also more complex to manage and require more computational resources. Since a 3D LUT has a limited number of entries in each dimension we want to use them in an optimal fashion. This means using the 3D LUT in a colorspace that is optimized for human vision, such as sRGB, PQ, or another non-linear space. Therefore, userpace may need one 1D LUT (shaper) before it to delinearize content and another 1D LUT after 3D LUT (blend) to linearize content again for blending. The next patches add these 1D LUTs to the plane color mgmt pipeline. v3: - improve commit message about 3D LUT - describe the 3D LUT entries and size (Harry) v4: - advertise 3D LUT max size as the size of a single-dimension Signed-off-by: Melissa Wen --- drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 18 +++ .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 9 .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 14 +++ .../amd/display/amdgpu_dm/amdgpu_dm_plane.c | 23 +++ 4 files changed, 64 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 62044d41da75..f7adaa52c23f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -363,6 +363,24 @@ struct amdgpu_mode_info { * @plane_hdr_mult_property: */ struct drm_property *plane_hdr_mult_property; + /** + * @plane_lut3d_property: Plane property for color transformation using + * a 3D LUT (pre-blending), a three-dimensional array where each + * element is an RGB triplet. Each dimension has a size of the cubed + * root of lut3d_size. The array contains samples from the approximated + * function. On AMD, values between samples are estimated by + * tetrahedral interpolation. The array is accessed with three indices, + * one for each input dimension (color channel), blue being the + * outermost dimension, red the innermost. + */ + struct drm_property *plane_lut3d_property; + /** + * @plane_degamma_lut_size_property: Plane property to define the max + * size of 3D LUT as supported by the driver (read-only). The max size + * is the max size of one dimension and, therefore, the max number of + * entries for 3D LUT array is the 3D LUT size cubed; + */ + struct drm_property *plane_lut3d_size_property; }; #define AMDGPU_MAX_BL_LEVEL 0xFF diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index bb2ce843369d..7a2350c62cf1 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -784,6 +784,11 @@ struct dm_plane_state { * TF is needed for any subsequent linear-to-non-linear transforms. */ __u64 hdr_mult; + /** + * @lut3d: 3D lookup table blob. The blob (if not NULL) is an array of + * &struct drm_color_lut. + */ + struct drm_property_blob *lut3d; }; struct dm_crtc_state { @@ -869,6 +874,10 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, void amdgpu_dm_trigger_timing_sync(struct drm_device *dev); +/* 3D LUT max size is 17x17x17 (4913 entries) */ +#define MAX_COLOR_3DLUT_SIZE 17 +#define MAX_COLOR_3DLUT_BITDEPTH 12 +/* 1D LUT size */ #define MAX_COLOR_LUT_ENTRIES 4096 /* Legacy gamm LUT users such as X doesn't like large LUT sizes */ #define MAX_COLOR_LEGACY_LUT_ENTRIES 256 diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index caf49a044ab4..011f2f9ec890 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -230,6 +230,20 @@ amdgpu_dm_create_color_properties(struct amdgpu_device *adev) return -ENOMEM; adev->mode_info.plane_hdr_mult_property = prop; + prop = drm_property_create(adev_to_drm(adev), + DRM_MODE_PROP_BLOB, + "AMD_PLANE_LUT3D", 0); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_lut3d_property = prop; + + prop = drm_property_create_range(adev_to_drm(adev),