Re: [PATCH 3/4] drm/amdgpu: Use correct KIQ MEC engine for gfx9.4.3 (v4)

2023-11-02 Thread Lazar, Lijo




On 11/2/2023 8:34 PM, Victor Lu wrote:

amdgpu_kiq_wreg/rreg is hardcoded to use MEC engine 0.

Add an xcc_id parameter to amdgpu_kiq_wreg/rreg, define W/RREG32_XCC
and amdgpu_device_xcc_wreg/rreg to to use the new xcc_id parameter.

Using amdgpu_sriov_runtime to determine whether to access via kiq or
RLC is sufficient for now.

v4: avoid using amdgpu_sriov_w/rreg

v3: use W/RREG32_XCC to handle non-kiq case

v2: define amdgpu_device_xcc_wreg/rreg instead of changing parameters
 of amdgpu_device_wreg/rreg

Signed-off-by: Victor Lu 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h   | 13 ++-
  .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   |  2 +-
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 91 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   |  8 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h   |  4 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  4 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h  |  4 +
  drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c   |  8 +-
  9 files changed, 118 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 43c579f5a95e..e8dc75a3ff44 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1162,11 +1162,18 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t acc_flags);
  u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
u64 reg_addr);
+uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
+   uint32_t reg, uint32_t acc_flags,
+   uint32_t xcc_id);
  void amdgpu_device_wreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t v,
uint32_t acc_flags);
  void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
 u64 reg_addr, u32 reg_data);
+void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
+   uint32_t reg, uint32_t v,
+   uint32_t acc_flags,
+   uint32_t xcc_id);
  void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
 uint32_t reg, uint32_t v, uint32_t xcc_id);
  void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t 
value);
@@ -1207,8 +1214,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
  #define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
  #define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), 
AMDGPU_REGS_NO_KIQ)
  
-#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg))

-#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v))
+#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg), 0)
+#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v), 0)
  
  #define RREG8(reg) amdgpu_mm_rreg8(adev, (reg))

  #define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v))
@@ -1218,6 +1225,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
  #define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0)
  #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
  #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
+#define RREG32_XCC(reg, inst) amdgpu_device_xcc_rreg(adev, (reg), 0, inst)
+#define WREG32_XCC(reg, v, inst) amdgpu_device_xcc_wreg(adev, (reg), (v), 0, 
inst)
  #define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg))
  #define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v))
  #define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
index 490c8f5ddb60..80309d39737a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
@@ -300,7 +300,7 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device 
*adev, void *mqd,
hqd_end = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_AQL_DISPATCH_ID_HI);
  
  	for (reg = hqd_base; reg <= hqd_end; reg++)

-   WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
+   WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst);
  
  
  	/* Activate doorbell logic before triggering WPTR poll. */

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 51011e8ee90d..9285789b3a42 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -239,7 +239,7 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void 
*mqd,
  
  	for (reg = hqd_base;

 reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
mmCP_HQD_PQ_WPTR_HI); reg++)
-   WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
+   WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst);
  
  
  

[PATCH 2/2] drm/amd/pm: Hide pp_dpm_pcie device attribute

2023-11-02 Thread Lijo Lazar
Hide PCIe DPM attribute on SOCs with GC v9.4.2 and GC v9.4.3.

Signed-off-by: Lijo Lazar 
---
 drivers/gpu/drm/amd/pm/amdgpu_pm.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 083048131bca..8f57c77a45dd 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -2249,6 +2249,10 @@ static int default_attr_update(struct amdgpu_device 
*adev, struct amdgpu_device_
if (amdgpu_dpm_get_apu_thermal_limit(adev, &limit) ==
-EOPNOTSUPP)
*states = ATTR_STATE_UNSUPPORTED;
+   } else if (DEVICE_ATTR_IS(pp_dpm_pcie)) {
+   if (gc_ver == IP_VERSION(9, 4, 2) ||
+   gc_ver == IP_VERSION(9, 4, 3))
+   *states = ATTR_STATE_UNSUPPORTED;
}
 
switch (gc_ver) {
-- 
2.25.1



[PATCH 1/2] drm/amd/pm: Hide irrelevant pm device attributes

2023-11-02 Thread Lijo Lazar
Change return code to EOPNOTSUPP for unsupported functions. Use the
error code information to hide sysfs nodes not valid for the SOC.

Signed-off-by: Lijo Lazar 
---
 drivers/gpu/drm/amd/pm/amdgpu_dpm.c   | 12 ++--
 drivers/gpu/drm/amd/pm/amdgpu_pm.c| 12 
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c |  4 ++--
 3 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index aed635e2da9c..aed232d107b6 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -491,7 +491,7 @@ int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum 
amd_pp_sensors senso
 int amdgpu_dpm_get_apu_thermal_limit(struct amdgpu_device *adev, uint32_t 
*limit)
 {
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
-   int ret = -EINVAL;
+   int ret = -EOPNOTSUPP;
 
if (pp_funcs && pp_funcs->get_apu_thermal_limit) {
mutex_lock(&adev->pm.mutex);
@@ -505,7 +505,7 @@ int amdgpu_dpm_get_apu_thermal_limit(struct amdgpu_device 
*adev, uint32_t *limit
 int amdgpu_dpm_set_apu_thermal_limit(struct amdgpu_device *adev, uint32_t 
limit)
 {
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
-   int ret = -EINVAL;
+   int ret = -EOPNOTSUPP;
 
if (pp_funcs && pp_funcs->set_apu_thermal_limit) {
mutex_lock(&adev->pm.mutex);
@@ -1182,7 +1182,7 @@ int amdgpu_dpm_get_sclk_od(struct amdgpu_device *adev)
int ret = 0;
 
if (!pp_funcs->get_sclk_od)
-   return 0;
+   return -EOPNOTSUPP;
 
mutex_lock(&adev->pm.mutex);
ret = pp_funcs->get_sclk_od(adev->powerplay.pp_handle);
@@ -1196,7 +1196,7 @@ int amdgpu_dpm_set_sclk_od(struct amdgpu_device *adev, 
uint32_t value)
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
 
if (is_support_sw_smu(adev))
-   return 0;
+   return -EOPNOTSUPP;
 
mutex_lock(&adev->pm.mutex);
if (pp_funcs->set_sclk_od)
@@ -1219,7 +1219,7 @@ int amdgpu_dpm_get_mclk_od(struct amdgpu_device *adev)
int ret = 0;
 
if (!pp_funcs->get_mclk_od)
-   return 0;
+   return -EOPNOTSUPP;
 
mutex_lock(&adev->pm.mutex);
ret = pp_funcs->get_mclk_od(adev->powerplay.pp_handle);
@@ -1233,7 +1233,7 @@ int amdgpu_dpm_set_mclk_od(struct amdgpu_device *adev, 
uint32_t value)
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
 
if (is_support_sw_smu(adev))
-   return 0;
+   return -EOPNOTSUPP;
 
mutex_lock(&adev->pm.mutex);
if (pp_funcs->set_mclk_od)
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 6ad957aaef3c..083048131bca 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -2237,6 +2237,18 @@ static int default_attr_update(struct amdgpu_device 
*adev, struct amdgpu_device_
} else if (DEVICE_ATTR_IS(xgmi_plpd_policy)) {
if (amdgpu_dpm_get_xgmi_plpd_mode(adev, NULL) == XGMI_PLPD_NONE)
*states = ATTR_STATE_UNSUPPORTED;
+   } else if (DEVICE_ATTR_IS(pp_dpm_mclk_od)) {
+   if (amdgpu_dpm_get_mclk_od(adev) == -EOPNOTSUPP)
+   *states = ATTR_STATE_UNSUPPORTED;
+   } else if (DEVICE_ATTR_IS(pp_dpm_sclk_od)) {
+   if (amdgpu_dpm_get_sclk_od(adev) == -EOPNOTSUPP)
+   *states = ATTR_STATE_UNSUPPORTED;
+   } else if (DEVICE_ATTR_IS(apu_thermal_cap)) {
+   u32 limit;
+
+   if (amdgpu_dpm_get_apu_thermal_limit(adev, &limit) ==
+   -EOPNOTSUPP)
+   *states = ATTR_STATE_UNSUPPORTED;
}
 
switch (gc_ver) {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 7fe32cdea5a8..6d6221024d7e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -2747,7 +2747,7 @@ static int smu_read_sensor(void *handle,
 
 static int smu_get_apu_thermal_limit(void *handle, uint32_t *limit)
 {
-   int ret = -EINVAL;
+   int ret = -EOPNOTSUPP;
struct smu_context *smu = handle;
 
if (smu->ppt_funcs && smu->ppt_funcs->get_apu_thermal_limit)
@@ -2758,7 +2758,7 @@ static int smu_get_apu_thermal_limit(void *handle, 
uint32_t *limit)
 
 static int smu_set_apu_thermal_limit(void *handle, uint32_t limit)
 {
-   int ret = -EINVAL;
+   int ret = -EOPNOTSUPP;
struct smu_context *smu = handle;
 
if (smu->ppt_funcs && smu->ppt_funcs->set_apu_thermal_limit)
-- 
2.25.1



[PATCH] drm/amdgpu: Enable MES to handle doorbell ring on unmapped queue

2023-11-02 Thread shaoyunl
On navi4x and up, HW can monitor up to 2048 doorbells that not be
mapped currently and trigger the interrupt to MES when these unmapped
doorbell been ringed.

Signed-off-by: shaoyunl 
---
 drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
index ac41c649caa0..eac34ed1a504 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
@@ -455,6 +455,27 @@ static void mes_v12_0_init_aggregated_doorbell(struct 
amdgpu_mes *mes)
WREG32_SOC15(GC, 0, regCP_HQD_GFX_CONTROL, data);
 }
 
+
+static void mes_v12_0_enable_unmapped_doorbell_handling(
+   struct amdgpu_mes *mes, bool enable)
+{
+   struct amdgpu_device *adev = mes->adev;
+   uint32_t data = RREG32_SOC15(GC, 0, regCP_UNMAPPED_DOORBELL);
+
+   /*
+* The default PROC_LSB settng is 0xc which means doorbell
+* addr[16:12] gives the doorbell page number. For kfd, each
+* process will use 2 pages of doorbell, we need to change the
+* setting to 0xd
+*/
+   data &= ~CP_UNMAPPED_DOORBELL__PROC_LSB_MASK;
+   data |= 0xd <<  CP_UNMAPPED_DOORBELL__PROC_LSB__SHIFT;
+
+   data |= (enable ? 1 : 0) << CP_UNMAPPED_DOORBELL__ENABLE__SHIFT;
+
+   WREG32_SOC15(GC, 0, regCP_UNMAPPED_DOORBELL, data);
+}
+
 static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
.add_hw_queue = mes_v12_0_add_hw_queue,
.remove_hw_queue = mes_v12_0_remove_hw_queue,
@@ -1235,6 +1256,9 @@ static int mes_v12_0_hw_init(void *handle)
 
mes_v12_0_init_aggregated_doorbell(&adev->mes);
 
+   /* Enable the MES to handle doorbell ring on unmapped queue */
+   mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true);
+
r = mes_v12_0_query_sched_status(&adev->mes);
if (r) {
DRM_ERROR("MES is busy\n");
-- 
2.34.1



Re: mainline build failure due to 7966f319c66d ("drm/amd/display: Introduce DML2")

2023-11-02 Thread Alex Deucher
On Thu, Nov 2, 2023 at 1:07 PM Sudip Mukherjee
 wrote:
>
> On Thu, 2 Nov 2023 at 16:52, Alex Deucher  wrote:
> >
> > On Thu, Nov 2, 2023 at 5:32 AM Sudip Mukherjee (Codethink)
> >  wrote:
> > >
> > > Hi All,
> > >
> > > The latest mainline kernel branch fails to build x86_64 allmodconfig
> > > with the error:
> > >
> > > drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c: In 
> > > function 'dml_prefetch_check':
> > > drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6707:1: 
> > > error: the frame size of 2056 bytes is larger than 2048 bytes 
> > > [-Werror=frame-larger-than=]
> > >  6707 | }
> > >   | ^
> > >
> > > git bisect pointed to 7966f319c66d ("drm/amd/display: Introduce DML2")
> > >
> > > I will be happy to test any patch or provide any extra log if needed.
> >
> > This was reported earlier and fixed by:
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=089dbf6a06f1dcaeed4f8b86d619e8d28b235207
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=b141fa036c901303ca5659cc22e9c08f8b097892
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=5b2c54e0d0ea09f7a3b500510731878326e1117e
> > but I guess maybe different compiler versions are still hitting this.
>
> Yes, I should have mentioned. gcc-11 and gcc-12 failed to build. but
> gcc-13 was ok.

Should be fixed with Nathan's patch:
https://patchwork.freedesktop.org/patch/565675/

Alex


Re: [PATCH] drm/edid: add a quirk for two 240Hz Samsung monitors

2023-11-02 Thread Alex Deucher
On Thu, Nov 2, 2023 at 3:00 PM Hamza Mahfooz  wrote:
>
> On 11/1/23 17:36, Alex Deucher wrote:
> > On Wed, Nov 1, 2023 at 5:01 PM Hamza Mahfooz  wrote:
> >>
> >> Without this fix the 5120x1440@240 timing of these monitors
> >> leads to screen flickering.
> >>
> >> Cc: sta...@vger.kernel.org # 6.1+
> >> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1442
> >> Co-developed-by: Harry Wentland 
> >> Signed-off-by: Harry Wentland 
> >> Signed-off-by: Hamza Mahfooz 
> >> ---
> >>   drivers/gpu/drm/drm_edid.c | 47 +++---
> >>   1 file changed, 44 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
> >> index bca2af4fe1fc..3fdb8907f66b 100644
> >> --- a/drivers/gpu/drm/drm_edid.c
> >> +++ b/drivers/gpu/drm/drm_edid.c
> >> @@ -89,6 +89,8 @@ static int oui(u8 first, u8 second, u8 third)
> >>   #define EDID_QUIRK_NON_DESKTOP (1 << 12)
> >>   /* Cap the DSC target bitrate to 15bpp */
> >>   #define EDID_QUIRK_CAP_DSC_15BPP   (1 << 13)
> >> +/* Fix up a particular 5120x1440@240Hz timing */
> >> +#define EDID_QUIRK_FIXUP_5120_1440_240 (1 << 14)
> >
> > What is wrong with the original timing that needs to be fixed?
>
> Apparently, all of timing values for the 5120x1440@240 mode of these
> monitors aren't set correctly (they are all lower than they should be)
> in their EDIDs. For what it's worth, the windows driver has had a quirk
> similar the one proposed in this patch for ~2 years.

It would be good to at least include the original mode timings from
the EDID and the new ones added by the quirk in the commit message and
a description of why they are problematic and why the new ones work.

Alex


>
> >
> > Alex
> >
> >
> >>
> >>   #define MICROSOFT_IEEE_OUI 0xca125c
> >>
> >> @@ -170,6 +172,12 @@ static const struct edid_quirk {
> >>  EDID_QUIRK('S', 'A', 'M', 596, EDID_QUIRK_PREFER_LARGE_60),
> >>  EDID_QUIRK('S', 'A', 'M', 638, EDID_QUIRK_PREFER_LARGE_60),
> >>
> >> +   /* Samsung C49G95T */
> >> +   EDID_QUIRK('S', 'A', 'M', 0x7053, EDID_QUIRK_FIXUP_5120_1440_240),
> >> +
> >> +   /* Samsung S49AG95 */
> >> +   EDID_QUIRK('S', 'A', 'M', 0x71ac, EDID_QUIRK_FIXUP_5120_1440_240),
> >> +
> >>  /* Sony PVM-2541A does up to 12 bpc, but only reports max 8 bpc */
> >>  EDID_QUIRK('S', 'N', 'Y', 0x2541, EDID_QUIRK_FORCE_12BPC),
> >>
> >> @@ -6586,7 +6594,37 @@ static void update_display_info(struct 
> >> drm_connector *connector,
> >>  drm_edid_to_eld(connector, drm_edid);
> >>   }
> >>
> >> -static struct drm_display_mode *drm_mode_displayid_detailed(struct 
> >> drm_device *dev,
> >> +static void drm_mode_displayid_detailed_edid_quirks(struct drm_connector 
> >> *connector,
> >> +   struct 
> >> drm_display_mode *mode)
> >> +{
> >> +   unsigned int hsync_width;
> >> +   unsigned int vsync_width;
> >> +
> >> +   if (connector->display_info.quirks & 
> >> EDID_QUIRK_FIXUP_5120_1440_240) {
> >> +   if (mode->hdisplay == 5120 && mode->vdisplay == 1440 &&
> >> +   mode->clock == 1939490) {
> >> +   hsync_width = mode->hsync_end - mode->hsync_start;
> >> +   vsync_width = mode->vsync_end - mode->vsync_start;
> >> +
> >> +   mode->clock = 2018490;
> >> +   mode->hdisplay = 5120;
> >> +   mode->hsync_start = 5120 + 8;
> >> +   mode->hsync_end = 5120 + 8 + hsync_width;
> >> +   mode->htotal = 5200;
> >> +
> >> +   mode->vdisplay = 1440;
> >> +   mode->vsync_start = 1440 + 165;
> >> +   mode->vsync_end = 1440 + 165 + vsync_width;
> >> +   mode->vtotal = 1619;
> >> +
> >> +   drm_dbg_kms(connector->dev,
> >> +   "[CONNECTOR:%d:%s] Samsung 240Hz mode 
> >> quirk applied\n",
> >> +   connector->base.id, connector->name);
> >> +   }
> >> +   }
> >> +}
> >> +
> >> +static struct drm_display_mode *drm_mode_displayid_detailed(struct 
> >> drm_connector *connector,
> >>  struct 
> >> displayid_detailed_timings_1 *timings,
> >>  bool type_7)
> >>   {
> >> @@ -6605,7 +6643,7 @@ static struct drm_display_mode 
> >> *drm_mode_displayid_detailed(struct drm_device *d
> >>  bool hsync_positive = (timings->hsync[1] >> 7) & 0x1;
> >>  bool vsync_positive = (timings->vsync[1] >> 7) & 0x1;
> >>
> >> -   mode = drm_mode_create(dev);
> >> +   mode = drm_mode_create(connector->dev);
> >>  if (!mode)
> >>  return NULL;
> >>
> >> @@ -6628,6 +,9 @@ static struct drm_display_mode 
> >> *drm_mode_displa

Re: [PATCH] drm/edid: add a quirk for two 240Hz Samsung monitors

2023-11-02 Thread Hamza Mahfooz

On 11/1/23 17:36, Alex Deucher wrote:

On Wed, Nov 1, 2023 at 5:01 PM Hamza Mahfooz  wrote:


Without this fix the 5120x1440@240 timing of these monitors
leads to screen flickering.

Cc: sta...@vger.kernel.org # 6.1+
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1442
Co-developed-by: Harry Wentland 
Signed-off-by: Harry Wentland 
Signed-off-by: Hamza Mahfooz 
---
  drivers/gpu/drm/drm_edid.c | 47 +++---
  1 file changed, 44 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index bca2af4fe1fc..3fdb8907f66b 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -89,6 +89,8 @@ static int oui(u8 first, u8 second, u8 third)
  #define EDID_QUIRK_NON_DESKTOP (1 << 12)
  /* Cap the DSC target bitrate to 15bpp */
  #define EDID_QUIRK_CAP_DSC_15BPP   (1 << 13)
+/* Fix up a particular 5120x1440@240Hz timing */
+#define EDID_QUIRK_FIXUP_5120_1440_240 (1 << 14)


What is wrong with the original timing that needs to be fixed?


Apparently, all of timing values for the 5120x1440@240 mode of these
monitors aren't set correctly (they are all lower than they should be)
in their EDIDs. For what it's worth, the windows driver has had a quirk
similar the one proposed in this patch for ~2 years.



Alex




  #define MICROSOFT_IEEE_OUI 0xca125c

@@ -170,6 +172,12 @@ static const struct edid_quirk {
 EDID_QUIRK('S', 'A', 'M', 596, EDID_QUIRK_PREFER_LARGE_60),
 EDID_QUIRK('S', 'A', 'M', 638, EDID_QUIRK_PREFER_LARGE_60),

+   /* Samsung C49G95T */
+   EDID_QUIRK('S', 'A', 'M', 0x7053, EDID_QUIRK_FIXUP_5120_1440_240),
+
+   /* Samsung S49AG95 */
+   EDID_QUIRK('S', 'A', 'M', 0x71ac, EDID_QUIRK_FIXUP_5120_1440_240),
+
 /* Sony PVM-2541A does up to 12 bpc, but only reports max 8 bpc */
 EDID_QUIRK('S', 'N', 'Y', 0x2541, EDID_QUIRK_FORCE_12BPC),

@@ -6586,7 +6594,37 @@ static void update_display_info(struct drm_connector 
*connector,
 drm_edid_to_eld(connector, drm_edid);
  }

-static struct drm_display_mode *drm_mode_displayid_detailed(struct drm_device 
*dev,
+static void drm_mode_displayid_detailed_edid_quirks(struct drm_connector 
*connector,
+   struct drm_display_mode 
*mode)
+{
+   unsigned int hsync_width;
+   unsigned int vsync_width;
+
+   if (connector->display_info.quirks & EDID_QUIRK_FIXUP_5120_1440_240) {
+   if (mode->hdisplay == 5120 && mode->vdisplay == 1440 &&
+   mode->clock == 1939490) {
+   hsync_width = mode->hsync_end - mode->hsync_start;
+   vsync_width = mode->vsync_end - mode->vsync_start;
+
+   mode->clock = 2018490;
+   mode->hdisplay = 5120;
+   mode->hsync_start = 5120 + 8;
+   mode->hsync_end = 5120 + 8 + hsync_width;
+   mode->htotal = 5200;
+
+   mode->vdisplay = 1440;
+   mode->vsync_start = 1440 + 165;
+   mode->vsync_end = 1440 + 165 + vsync_width;
+   mode->vtotal = 1619;
+
+   drm_dbg_kms(connector->dev,
+   "[CONNECTOR:%d:%s] Samsung 240Hz mode quirk 
applied\n",
+   connector->base.id, connector->name);
+   }
+   }
+}
+
+static struct drm_display_mode *drm_mode_displayid_detailed(struct 
drm_connector *connector,
 struct 
displayid_detailed_timings_1 *timings,
 bool type_7)
  {
@@ -6605,7 +6643,7 @@ static struct drm_display_mode 
*drm_mode_displayid_detailed(struct drm_device *d
 bool hsync_positive = (timings->hsync[1] >> 7) & 0x1;
 bool vsync_positive = (timings->vsync[1] >> 7) & 0x1;

-   mode = drm_mode_create(dev);
+   mode = drm_mode_create(connector->dev);
 if (!mode)
 return NULL;

@@ -6628,6 +,9 @@ static struct drm_display_mode 
*drm_mode_displayid_detailed(struct drm_device *d

 if (timings->flags & 0x80)
 mode->type |= DRM_MODE_TYPE_PREFERRED;
+
+   drm_mode_displayid_detailed_edid_quirks(connector, mode);
+
 drm_mode_set_name(mode);

 return mode;
@@ -6650,7 +6691,7 @@ static int add_displayid_detailed_1_modes(struct 
drm_connector *connector,
 for (i = 0; i < num_timings; i++) {
 struct displayid_detailed_timings_1 *timings = 
&det->timings[i];

-   newmode = drm_mode_displayid_detailed(connector->dev, timings, 
type_7);
+   newmode = drm_mode_displayid_detailed(connector, timings, 
type_7);
 if (!newmode)
 continue;

--
2.42.0


--
Hamza



Re: [PATCH v2] drm/amd/display: Increase frame warning limit with KASAN or KCSAN in dml2

2023-11-02 Thread Alex Deucher
On Thu, Nov 2, 2023 at 1:41 PM Nathan Chancellor  wrote:
>
> When building ARCH=x86_64 allmodconfig with clang, which will typically
> have sanitizers enabled, there is a warning about a large stack frame.
>
>   drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6265:13: 
> error: stack frame size (2520) exceeds limit (2048) in 'dml_prefetch_check' 
> [-Werror,-Wframe-larger-than]
>6265 | static void dml_prefetch_check(struct display_mode_lib_st *mode_lib)
> | ^
>   1 error generated.
>
> Notably, GCC 13.2.0 does not do too much of a better job, as it is right
> at the current limit of 2048 (and others have reported being over with
> older GCC versions):
>
>   drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c: In 
> function 'dml_prefetch_check':
>   drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6705:1: 
> error: the frame size of 2048 bytes is larger than 1800 bytes 
> [-Werror=frame-larger-than=]
>6705 | }
> | ^
>
> In the past, these warnings have been avoided by reducing the number of
> parameters to various functions so that not as many arguments need to be
> passed on the stack. However, these patches take a good amount of effort
> to write despite being mechanical due to code structure and complexity
> and they are never carried forward to new generations of the code so
> that effort has to be expended every new hardware generation, which
> becomes harder to justify as time goes on.
>
> To avoid having a noticeable or lengthy breakage in all{mod,yes}config,
> which are easy testing targets that have -Werror enabled, increase the
> limit for configurations that have KASAN or KCSAN enabled by 50% so that
> cases of extremely poor code generation can still be caught while not
> breaking the majority of builds. CONFIG_KMSAN also causes high stack
> usage but the frame limit is already set to zero when it is enabled,
> which is accounted for by the check for CONFIG_FRAME_WARN=0 in the dml2
> Makefile.
>
> Signed-off-by: Nathan Chancellor 
> ---
> If there is another DRM pull before 6.7-rc1, it would be much
> appreciated if this could make that so that other trees are not
> potentially broken by this. If not, no worries, as it was my fault for
> not sending this sooner.

Applied.  Thanks!  Will send out a PR this week.

Alex


>
> Changes in v2:
> - Adjust workaround to check for either CONFIG_KASAN=y or
>   CONFIG_KCSAN=y, as the same problem has been reported with older
>   versions of GCC (Hamza, Alex)
> - Link to v1: 
> https://lore.kernel.org/r/20231102-amdgpu-dml2-increase-frame-size-warning-for-clang-v1-1-6eb157352...@kernel.org
> ---
>  drivers/gpu/drm/amd/display/dc/dml2/Makefile | 4 
>  1 file changed, 4 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile 
> b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
> index 70ae5eba624e..acff3449b8d7 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile
> +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
> @@ -60,8 +60,12 @@ endif
>  endif
>
>  ifneq ($(CONFIG_FRAME_WARN),0)
> +ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y)
> +frame_warn_flag := -Wframe-larger-than=3072
> +else
>  frame_warn_flag := -Wframe-larger-than=2048
>  endif
> +endif
>
>  CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_core.o := $(dml2_ccflags) 
> $(frame_warn_flag)
>  CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_util.o := $(dml2_ccflags)
>
> ---
> base-commit: 21e80f3841c01aeaf32d7aee7bbc87b3db1aa0c6
> change-id: 
> 20231102-amdgpu-dml2-increase-frame-size-warning-for-clang-c93bd2d6a871
>
> Best regards,
> --
> Nathan Chancellor 
>


Re: [PATCH] drm/amd/display: Increase frame warning limit for clang in dml2

2023-11-02 Thread Alex Deucher
On Thu, Nov 2, 2023 at 1:12 PM Nathan Chancellor  wrote:
>
> On Thu, Nov 02, 2023 at 12:59:00PM -0400, Hamza Mahfooz wrote:
> > On 11/2/23 12:24, Nathan Chancellor wrote:
> > > When building ARCH=x86_64 allmodconfig with clang, which have sanitizers
> > > enabled, there is a warning about a large stack frame.
> > >
> > >
> > > drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6265:13:
> > >  error: stack frame size (2520) exceeds limit (2048) in 
> > > 'dml_prefetch_check' [-Werror,-Wframe-larger-than]
> > > 6265 | static void dml_prefetch_check(struct display_mode_lib_st 
> > > *mode_lib)
> > >  | ^
> > >1 error generated.
> > >
> > > Notably, GCC 13.2.0 does not do too much of a better job, as it is right
> > > at the current limit of 2048:
> > >
> > >drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c: In 
> > > function 'dml_prefetch_check':
> > >
> > > drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6705:1: 
> > > error: the frame size of 2048 bytes is larger than 1800 bytes 
> > > [-Werror=frame-larger-than=]
> > > 6705 | }
> > >  | ^
> > >
> > > In the past, these warnings have been avoided by reducing the number of
> > > parameters to various functions so that not as many arguments need to be
> > > passed on the stack. However, these patches take a good amount of effort
> > > to write despite being mechanical due to code structure and complexity
> > > and they are never carried forward to new generations of the code so
> > > that effort has to be expended every new hardware generation, which
> > > becomes harder to justify as time goes on.
> > >
> > > There is some effort to improve clang's code generation but that may
> > > take some time between code review, shifting priorities, and release
> > > cycles. To avoid having a noticeable or lengthy breakage in
> > > all{mod,yes}config, which are easy testing targets that have -Werror
> > > enabled, increase the limit for clang by 50% so that cases of extremely
> > > poor code generation can still be caught while not breaking the majority
> > > of builds. When clang's code generation improves, the limit increase can
> > > be restricted to older clang versions.
> > >
> > > Signed-off-by: Nathan Chancellor 
> > > ---
> > > If there is another DRM pull before 6.7-rc1, it would be much
> > > appreciated if this could make that so that other trees are not
> > > potentially broken by this. If not, no worries, as it was my fault for
> > > not sending this sooner.
> > > ---
> > >   drivers/gpu/drm/amd/display/dc/dml2/Makefile | 2 +-
> > >   1 file changed, 1 insertion(+), 1 deletion(-)
> > >
> > > diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile 
> > > b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
> > > index 70ae5eba624e..dff8237c0999 100644
> > > --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile
> > > +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
> > > @@ -60,7 +60,7 @@ endif
> > >   endif
> > >   ifneq ($(CONFIG_FRAME_WARN),0)
> > > -frame_warn_flag := -Wframe-larger-than=2048
> > > +frame_warn_flag := -Wframe-larger-than=$(if 
> > > $(CONFIG_CC_IS_CLANG),3072,2048)
> >
> > I would prefer checking for `CONFIG_KASAN || CONFIG_KCSAN` instead
> > since the stack usage shouldn't change much if both of those are disabled.
>
> So something like this? Or were you talking about replacing the clang
> check entirely with the KASAN/KCSAN check?

I think replacing the clang check entirely.  A similar issue was just
reported on different GCC versions:
https://lists.freedesktop.org/archives/amd-gfx/2023-November/100725.html

Alex

>
> diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile 
> b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
> index 70ae5eba624e..0fc1b13295eb 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile
> +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
> @@ -60,8 +60,12 @@ endif
>  endif
>
>  ifneq ($(CONFIG_FRAME_WARN),0)
> +ifeq ($(CONFIG_CC_IS_CLANG)$(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),yy)
> +frame_warn_flag := -Wframe-larger-than=3072
> +else
>  frame_warn_flag := -Wframe-larger-than=2048
>  endif
> +endif
>
>  CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_core.o := $(dml2_ccflags) 
> $(frame_warn_flag)
>  CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_util.o := $(dml2_ccflags)
>
> > >   endif
> > >   CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_core.o := $(dml2_ccflags) 
> > > $(frame_warn_flag)
> > >
> > > ---
> > > base-commit: 21e80f3841c01aeaf32d7aee7bbc87b3db1aa0c6
> > > change-id: 
> > > 20231102-amdgpu-dml2-increase-frame-size-warning-for-clang-c93bd2d6a871
> > >
> > > Best regards,
> > --
> > Hamza
> >


[PATCH v2] drm/amd/display: Increase frame warning limit with KASAN or KCSAN in dml2

2023-11-02 Thread Nathan Chancellor
When building ARCH=x86_64 allmodconfig with clang, which will typically
have sanitizers enabled, there is a warning about a large stack frame.

  drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6265:13: 
error: stack frame size (2520) exceeds limit (2048) in 'dml_prefetch_check' 
[-Werror,-Wframe-larger-than]
   6265 | static void dml_prefetch_check(struct display_mode_lib_st *mode_lib)
| ^
  1 error generated.

Notably, GCC 13.2.0 does not do too much of a better job, as it is right
at the current limit of 2048 (and others have reported being over with
older GCC versions):

  drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c: In 
function 'dml_prefetch_check':
  drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6705:1: 
error: the frame size of 2048 bytes is larger than 1800 bytes 
[-Werror=frame-larger-than=]
   6705 | }
| ^

In the past, these warnings have been avoided by reducing the number of
parameters to various functions so that not as many arguments need to be
passed on the stack. However, these patches take a good amount of effort
to write despite being mechanical due to code structure and complexity
and they are never carried forward to new generations of the code so
that effort has to be expended every new hardware generation, which
becomes harder to justify as time goes on.

To avoid having a noticeable or lengthy breakage in all{mod,yes}config,
which are easy testing targets that have -Werror enabled, increase the
limit for configurations that have KASAN or KCSAN enabled by 50% so that
cases of extremely poor code generation can still be caught while not
breaking the majority of builds. CONFIG_KMSAN also causes high stack
usage but the frame limit is already set to zero when it is enabled,
which is accounted for by the check for CONFIG_FRAME_WARN=0 in the dml2
Makefile.

Signed-off-by: Nathan Chancellor 
---
If there is another DRM pull before 6.7-rc1, it would be much
appreciated if this could make that so that other trees are not
potentially broken by this. If not, no worries, as it was my fault for
not sending this sooner.

Changes in v2:
- Adjust workaround to check for either CONFIG_KASAN=y or
  CONFIG_KCSAN=y, as the same problem has been reported with older
  versions of GCC (Hamza, Alex)
- Link to v1: 
https://lore.kernel.org/r/20231102-amdgpu-dml2-increase-frame-size-warning-for-clang-v1-1-6eb157352...@kernel.org
---
 drivers/gpu/drm/amd/display/dc/dml2/Makefile | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile 
b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
index 70ae5eba624e..acff3449b8d7 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
@@ -60,8 +60,12 @@ endif
 endif
 
 ifneq ($(CONFIG_FRAME_WARN),0)
+ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y)
+frame_warn_flag := -Wframe-larger-than=3072
+else
 frame_warn_flag := -Wframe-larger-than=2048
 endif
+endif
 
 CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_core.o := $(dml2_ccflags) 
$(frame_warn_flag)
 CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_util.o := $(dml2_ccflags)

---
base-commit: 21e80f3841c01aeaf32d7aee7bbc87b3db1aa0c6
change-id: 
20231102-amdgpu-dml2-increase-frame-size-warning-for-clang-c93bd2d6a871

Best regards,
-- 
Nathan Chancellor 



Re: [PATCH] drm/amd/display: Increase frame warning limit for clang in dml2

2023-11-02 Thread Hamza Mahfooz

On 11/2/23 13:12, Nathan Chancellor wrote:

On Thu, Nov 02, 2023 at 12:59:00PM -0400, Hamza Mahfooz wrote:

On 11/2/23 12:24, Nathan Chancellor wrote:

When building ARCH=x86_64 allmodconfig with clang, which have sanitizers
enabled, there is a warning about a large stack frame.

drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6265:13: 
error: stack frame size (2520) exceeds limit (2048) in 'dml_prefetch_check' 
[-Werror,-Wframe-larger-than]
 6265 | static void dml_prefetch_check(struct display_mode_lib_st *mode_lib)
  | ^
1 error generated.

Notably, GCC 13.2.0 does not do too much of a better job, as it is right
at the current limit of 2048:

drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c: In 
function 'dml_prefetch_check':
drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6705:1: 
error: the frame size of 2048 bytes is larger than 1800 bytes 
[-Werror=frame-larger-than=]
 6705 | }
  | ^

In the past, these warnings have been avoided by reducing the number of
parameters to various functions so that not as many arguments need to be
passed on the stack. However, these patches take a good amount of effort
to write despite being mechanical due to code structure and complexity
and they are never carried forward to new generations of the code so
that effort has to be expended every new hardware generation, which
becomes harder to justify as time goes on.

There is some effort to improve clang's code generation but that may
take some time between code review, shifting priorities, and release
cycles. To avoid having a noticeable or lengthy breakage in
all{mod,yes}config, which are easy testing targets that have -Werror
enabled, increase the limit for clang by 50% so that cases of extremely
poor code generation can still be caught while not breaking the majority
of builds. When clang's code generation improves, the limit increase can
be restricted to older clang versions.

Signed-off-by: Nathan Chancellor 
---
If there is another DRM pull before 6.7-rc1, it would be much
appreciated if this could make that so that other trees are not
potentially broken by this. If not, no worries, as it was my fault for
not sending this sooner.
---
   drivers/gpu/drm/amd/display/dc/dml2/Makefile | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile 
b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
index 70ae5eba624e..dff8237c0999 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
@@ -60,7 +60,7 @@ endif
   endif
   ifneq ($(CONFIG_FRAME_WARN),0)
-frame_warn_flag := -Wframe-larger-than=2048
+frame_warn_flag := -Wframe-larger-than=$(if $(CONFIG_CC_IS_CLANG),3072,2048)


I would prefer checking for `CONFIG_KASAN || CONFIG_KCSAN` instead
since the stack usage shouldn't change much if both of those are disabled.


So something like this? Or were you talking about replacing the clang
check entirely with the KASAN/KCSAN check?


I think for the time being replacing the clang check with a KASAN/KCSAN
check would make more sense. Considering that, the allmodconfig for older
versions of gcc is also broken (see [1]).

1. 
https://lore.kernel.org/amd-gfx/CADVatmO9NCs=ryng72hnzmdpqg862gpgnnfhq4uwtpekjok...@mail.gmail.com/




diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile 
b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
index 70ae5eba624e..0fc1b13295eb 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
@@ -60,8 +60,12 @@ endif
  endif
  
  ifneq ($(CONFIG_FRAME_WARN),0)

+ifeq ($(CONFIG_CC_IS_CLANG)$(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),yy)
+frame_warn_flag := -Wframe-larger-than=3072
+else
  frame_warn_flag := -Wframe-larger-than=2048
  endif
+endif
  
  CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_core.o := $(dml2_ccflags) $(frame_warn_flag)

  CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_util.o := $(dml2_ccflags)


   endif
   CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_core.o := $(dml2_ccflags) 
$(frame_warn_flag)

---
base-commit: 21e80f3841c01aeaf32d7aee7bbc87b3db1aa0c6
change-id: 
20231102-amdgpu-dml2-increase-frame-size-warning-for-clang-c93bd2d6a871

Best regards,

--
Hamza


--
Hamza



[PATCH 3/4] drm/amdgpu: Use correct KIQ MEC engine for gfx9.4.3 (v4)

2023-11-02 Thread Victor Lu
amdgpu_kiq_wreg/rreg is hardcoded to use MEC engine 0.

Add an xcc_id parameter to amdgpu_kiq_wreg/rreg, define W/RREG32_XCC
and amdgpu_device_xcc_wreg/rreg to to use the new xcc_id parameter.

Using amdgpu_sriov_runtime to determine whether to access via kiq or
RLC is sufficient for now.

v4: avoid using amdgpu_sriov_w/rreg

v3: use W/RREG32_XCC to handle non-kiq case

v2: define amdgpu_device_xcc_wreg/rreg instead of changing parameters
of amdgpu_device_wreg/rreg

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   | 13 ++-
 .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   |  2 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 91 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   |  8 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h   |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h  |  4 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c   |  8 +-
 9 files changed, 118 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 43c579f5a95e..e8dc75a3ff44 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1162,11 +1162,18 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t acc_flags);
 u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
u64 reg_addr);
+uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
+   uint32_t reg, uint32_t acc_flags,
+   uint32_t xcc_id);
 void amdgpu_device_wreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t v,
uint32_t acc_flags);
 void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
 u64 reg_addr, u32 reg_data);
+void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
+   uint32_t reg, uint32_t v,
+   uint32_t acc_flags,
+   uint32_t xcc_id);
 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
 uint32_t reg, uint32_t v, uint32_t xcc_id);
 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t 
value);
@@ -1207,8 +1214,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
 #define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
 #define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), 
AMDGPU_REGS_NO_KIQ)
 
-#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg))
-#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v))
+#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg), 0)
+#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v), 0)
 
 #define RREG8(reg) amdgpu_mm_rreg8(adev, (reg))
 #define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v))
@@ -1218,6 +1225,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
 #define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0)
 #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
 #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
+#define RREG32_XCC(reg, inst) amdgpu_device_xcc_rreg(adev, (reg), 0, inst)
+#define WREG32_XCC(reg, v, inst) amdgpu_device_xcc_wreg(adev, (reg), (v), 0, 
inst)
 #define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg))
 #define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v))
 #define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
index 490c8f5ddb60..80309d39737a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
@@ -300,7 +300,7 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device 
*adev, void *mqd,
hqd_end = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_AQL_DISPATCH_ID_HI);
 
for (reg = hqd_base; reg <= hqd_end; reg++)
-   WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
+   WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst);
 
 
/* Activate doorbell logic before triggering WPTR poll. */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 51011e8ee90d..9285789b3a42 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -239,7 +239,7 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void 
*mqd,
 
for (reg = hqd_base;
 reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
mmCP_HQD_PQ_WPTR_HI); reg++)
-   WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
+   WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst);
 
 
/* Activate doorbell logic before triggering WPTR poll. */
dif

Re: [PATCH] drm/amd/display: Increase frame warning limit for clang in dml2

2023-11-02 Thread Nathan Chancellor
On Thu, Nov 02, 2023 at 12:59:00PM -0400, Hamza Mahfooz wrote:
> On 11/2/23 12:24, Nathan Chancellor wrote:
> > When building ARCH=x86_64 allmodconfig with clang, which have sanitizers
> > enabled, there is a warning about a large stack frame.
> > 
> >
> > drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6265:13: 
> > error: stack frame size (2520) exceeds limit (2048) in 'dml_prefetch_check' 
> > [-Werror,-Wframe-larger-than]
> > 6265 | static void dml_prefetch_check(struct display_mode_lib_st 
> > *mode_lib)
> >  | ^
> >1 error generated.
> > 
> > Notably, GCC 13.2.0 does not do too much of a better job, as it is right
> > at the current limit of 2048:
> > 
> >drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c: In 
> > function 'dml_prefetch_check':
> >
> > drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6705:1: 
> > error: the frame size of 2048 bytes is larger than 1800 bytes 
> > [-Werror=frame-larger-than=]
> > 6705 | }
> >  | ^
> > 
> > In the past, these warnings have been avoided by reducing the number of
> > parameters to various functions so that not as many arguments need to be
> > passed on the stack. However, these patches take a good amount of effort
> > to write despite being mechanical due to code structure and complexity
> > and they are never carried forward to new generations of the code so
> > that effort has to be expended every new hardware generation, which
> > becomes harder to justify as time goes on.
> > 
> > There is some effort to improve clang's code generation but that may
> > take some time between code review, shifting priorities, and release
> > cycles. To avoid having a noticeable or lengthy breakage in
> > all{mod,yes}config, which are easy testing targets that have -Werror
> > enabled, increase the limit for clang by 50% so that cases of extremely
> > poor code generation can still be caught while not breaking the majority
> > of builds. When clang's code generation improves, the limit increase can
> > be restricted to older clang versions.
> > 
> > Signed-off-by: Nathan Chancellor 
> > ---
> > If there is another DRM pull before 6.7-rc1, it would be much
> > appreciated if this could make that so that other trees are not
> > potentially broken by this. If not, no worries, as it was my fault for
> > not sending this sooner.
> > ---
> >   drivers/gpu/drm/amd/display/dc/dml2/Makefile | 2 +-
> >   1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile 
> > b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
> > index 70ae5eba624e..dff8237c0999 100644
> > --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile
> > +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
> > @@ -60,7 +60,7 @@ endif
> >   endif
> >   ifneq ($(CONFIG_FRAME_WARN),0)
> > -frame_warn_flag := -Wframe-larger-than=2048
> > +frame_warn_flag := -Wframe-larger-than=$(if 
> > $(CONFIG_CC_IS_CLANG),3072,2048)
> 
> I would prefer checking for `CONFIG_KASAN || CONFIG_KCSAN` instead
> since the stack usage shouldn't change much if both of those are disabled.

So something like this? Or were you talking about replacing the clang
check entirely with the KASAN/KCSAN check?

diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile 
b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
index 70ae5eba624e..0fc1b13295eb 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
@@ -60,8 +60,12 @@ endif
 endif
 
 ifneq ($(CONFIG_FRAME_WARN),0)
+ifeq ($(CONFIG_CC_IS_CLANG)$(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),yy)
+frame_warn_flag := -Wframe-larger-than=3072
+else
 frame_warn_flag := -Wframe-larger-than=2048
 endif
+endif
 
 CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_core.o := $(dml2_ccflags) 
$(frame_warn_flag)
 CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_util.o := $(dml2_ccflags)

> >   endif
> >   CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_core.o := $(dml2_ccflags) 
> > $(frame_warn_flag)
> > 
> > ---
> > base-commit: 21e80f3841c01aeaf32d7aee7bbc87b3db1aa0c6
> > change-id: 
> > 20231102-amdgpu-dml2-increase-frame-size-warning-for-clang-c93bd2d6a871
> > 
> > Best regards,
> -- 
> Hamza
> 


Re: mainline build failure due to 7966f319c66d ("drm/amd/display: Introduce DML2")

2023-11-02 Thread Sudip Mukherjee
On Thu, 2 Nov 2023 at 16:52, Alex Deucher  wrote:
>
> On Thu, Nov 2, 2023 at 5:32 AM Sudip Mukherjee (Codethink)
>  wrote:
> >
> > Hi All,
> >
> > The latest mainline kernel branch fails to build x86_64 allmodconfig
> > with the error:
> >
> > drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c: In 
> > function 'dml_prefetch_check':
> > drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6707:1: 
> > error: the frame size of 2056 bytes is larger than 2048 bytes 
> > [-Werror=frame-larger-than=]
> >  6707 | }
> >   | ^
> >
> > git bisect pointed to 7966f319c66d ("drm/amd/display: Introduce DML2")
> >
> > I will be happy to test any patch or provide any extra log if needed.
>
> This was reported earlier and fixed by:
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=089dbf6a06f1dcaeed4f8b86d619e8d28b235207
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=b141fa036c901303ca5659cc22e9c08f8b097892
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=5b2c54e0d0ea09f7a3b500510731878326e1117e
> but I guess maybe different compiler versions are still hitting this.

Yes, I should have mentioned. gcc-11 and gcc-12 failed to build. but
gcc-13 was ok.


-- 
Regards
Sudip


Re: mainline build failure due to 7966f319c66d ("drm/amd/display: Introduce DML2")

2023-11-02 Thread Alex Deucher
On Thu, Nov 2, 2023 at 5:32 AM Sudip Mukherjee (Codethink)
 wrote:
>
> Hi All,
>
> The latest mainline kernel branch fails to build x86_64 allmodconfig
> with the error:
>
> drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c: In 
> function 'dml_prefetch_check':
> drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6707:1: 
> error: the frame size of 2056 bytes is larger than 2048 bytes 
> [-Werror=frame-larger-than=]
>  6707 | }
>   | ^
>
> git bisect pointed to 7966f319c66d ("drm/amd/display: Introduce DML2")
>
> I will be happy to test any patch or provide any extra log if needed.

This was reported earlier and fixed by:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=089dbf6a06f1dcaeed4f8b86d619e8d28b235207
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=b141fa036c901303ca5659cc22e9c08f8b097892
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=5b2c54e0d0ea09f7a3b500510731878326e1117e
but I guess maybe different compiler versions are still hitting this.

Alex

>
> #regzbot introduced: 7966f319c66d9468623c6a6a017ecbc0dd79be75
>
> --
> Regards
> Sudip


Re: [PATCH] drm/amd/display: Increase frame warning limit for clang in dml2

2023-11-02 Thread Hamza Mahfooz

On 11/2/23 12:24, Nathan Chancellor wrote:

When building ARCH=x86_64 allmodconfig with clang, which have sanitizers
enabled, there is a warning about a large stack frame.

   drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6265:13: 
error: stack frame size (2520) exceeds limit (2048) in 'dml_prefetch_check' 
[-Werror,-Wframe-larger-than]
6265 | static void dml_prefetch_check(struct display_mode_lib_st *mode_lib)
 | ^
   1 error generated.

Notably, GCC 13.2.0 does not do too much of a better job, as it is right
at the current limit of 2048:

   drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c: In 
function 'dml_prefetch_check':
   drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6705:1: 
error: the frame size of 2048 bytes is larger than 1800 bytes 
[-Werror=frame-larger-than=]
6705 | }
 | ^

In the past, these warnings have been avoided by reducing the number of
parameters to various functions so that not as many arguments need to be
passed on the stack. However, these patches take a good amount of effort
to write despite being mechanical due to code structure and complexity
and they are never carried forward to new generations of the code so
that effort has to be expended every new hardware generation, which
becomes harder to justify as time goes on.

There is some effort to improve clang's code generation but that may
take some time between code review, shifting priorities, and release
cycles. To avoid having a noticeable or lengthy breakage in
all{mod,yes}config, which are easy testing targets that have -Werror
enabled, increase the limit for clang by 50% so that cases of extremely
poor code generation can still be caught while not breaking the majority
of builds. When clang's code generation improves, the limit increase can
be restricted to older clang versions.

Signed-off-by: Nathan Chancellor 
---
If there is another DRM pull before 6.7-rc1, it would be much
appreciated if this could make that so that other trees are not
potentially broken by this. If not, no worries, as it was my fault for
not sending this sooner.
---
  drivers/gpu/drm/amd/display/dc/dml2/Makefile | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile 
b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
index 70ae5eba624e..dff8237c0999 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
@@ -60,7 +60,7 @@ endif
  endif
  
  ifneq ($(CONFIG_FRAME_WARN),0)

-frame_warn_flag := -Wframe-larger-than=2048
+frame_warn_flag := -Wframe-larger-than=$(if $(CONFIG_CC_IS_CLANG),3072,2048)


I would prefer checking for `CONFIG_KASAN || CONFIG_KCSAN` instead
since the stack usage shouldn't change much if both of those are disabled.


  endif
  
  CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_core.o := $(dml2_ccflags) $(frame_warn_flag)


---
base-commit: 21e80f3841c01aeaf32d7aee7bbc87b3db1aa0c6
change-id: 
20231102-amdgpu-dml2-increase-frame-size-warning-for-clang-c93bd2d6a871

Best regards,

--
Hamza



RE: [PATCH] drm: Disable XNACK on SRIOV environment

2023-11-02 Thread Liu, Shaoyun
[AMD Official Use Only - General]

Looks ok to me .
Reviewed-by: Shaoyun.liu 

-Original Message-
From: Kakarya, Surbhi 
Sent: Thursday, November 2, 2023 12:10 PM
To: Kakarya, Surbhi ; amd-gfx@lists.freedesktop.org; 
Yang, Philip ; Liu, Shaoyun 
Subject: RE: [PATCH] drm: Disable XNACK on SRIOV environment

[AMD Official Use Only - General]

Ping..

-Original Message-
From: Surbhi Kakarya 
Sent: Monday, October 30, 2023 9:54 PM
To: amd-gfx@lists.freedesktop.org; Yang, Philip 
Cc: Kakarya, Surbhi 
Subject: [PATCH] drm: Disable XNACK on SRIOV environment

The purpose of this patch is to disable XNACK or set XNACK OFF mode on SRIOV 
platform which doesn't support it.

This will prevent user-space application to fail or result into unexpected 
behaviour whenever the application need to run test-case in XNACK ON mode.

Signed-off-by: Surbhi Kakarya 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c  |  5 -  
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c |  9 +  
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h |  1 +  
drivers/gpu/drm/amd/amdkfd/kfd_process.c | 10 --
 4 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 2dce338b0f1e..d582b240f919 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -826,7 +826,10 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev)
gc_ver == IP_VERSION(9, 4, 3) ||
gc_ver >= IP_VERSION(10, 3, 0));

-   gmc->noretry = (amdgpu_noretry == -1) ? noretry_default : 
amdgpu_noretry;
+   if (!amdgpu_sriov_xnack_support(adev))
+   gmc->norety = 1;
+   else
+   gmc->noretry = (amdgpu_noretry == -1) ? noretry_default :
+amdgpu_noretry;
 }

 void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type, 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index a0aa624f5a92..41c77d5c5a79 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -1093,3 +1093,12 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
else
return RREG32(offset);
 }
+bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev) {
+   bool xnack_mode = 1;
+
+   if (amdgpu_sriov_vf(adev) && (adev->ip_versions[GC_HWIP][0] == 
IP_VERSION(9, 4, 2)))
+   xnack_mode = 0;
+
+   return xnack_mode;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 858ef21ae515..935ca736300e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -365,4 +365,5 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,  bool 
amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev,
uint32_t ucode_id);  void amdgpu_virt_post_reset(struct 
amdgpu_device *adev);
+bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev);
 #endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index fbf053001af9..69954a2a8503 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1416,8 +1416,14 @@ bool kfd_process_xnack_mode(struct kfd_process *p, bool 
supported)
 * per-process XNACK mode selection. But let the dev->noretry
 * setting still influence the default XNACK mode.
 */
-   if (supported && KFD_SUPPORT_XNACK_PER_PROCESS(dev))
-   continue;
+   if (supported && KFD_SUPPORT_XNACK_PER_PROCESS(dev)) {
+   if (!amdgpu_sriov_xnack_support(dev->kfd->adev)) {
+   pr_debug("SRIOV platform xnack not 
supported\n");
+   return false;
+   }
+   else
+   continue;
+   }

/* GFXv10 and later GPUs do not support shader preemption
 * during page faults. This can lead to poor QoS for queue
--
2.25.1




[PATCH] drm/amd/display: Increase frame warning limit for clang in dml2

2023-11-02 Thread Nathan Chancellor
When building ARCH=x86_64 allmodconfig with clang, which have sanitizers
enabled, there is a warning about a large stack frame.

  drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6265:13: 
error: stack frame size (2520) exceeds limit (2048) in 'dml_prefetch_check' 
[-Werror,-Wframe-larger-than]
   6265 | static void dml_prefetch_check(struct display_mode_lib_st *mode_lib)
| ^
  1 error generated.

Notably, GCC 13.2.0 does not do too much of a better job, as it is right
at the current limit of 2048:

  drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c: In 
function 'dml_prefetch_check':
  drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6705:1: 
error: the frame size of 2048 bytes is larger than 1800 bytes 
[-Werror=frame-larger-than=]
   6705 | }
| ^

In the past, these warnings have been avoided by reducing the number of
parameters to various functions so that not as many arguments need to be
passed on the stack. However, these patches take a good amount of effort
to write despite being mechanical due to code structure and complexity
and they are never carried forward to new generations of the code so
that effort has to be expended every new hardware generation, which
becomes harder to justify as time goes on.

There is some effort to improve clang's code generation but that may
take some time between code review, shifting priorities, and release
cycles. To avoid having a noticeable or lengthy breakage in
all{mod,yes}config, which are easy testing targets that have -Werror
enabled, increase the limit for clang by 50% so that cases of extremely
poor code generation can still be caught while not breaking the majority
of builds. When clang's code generation improves, the limit increase can
be restricted to older clang versions.

Signed-off-by: Nathan Chancellor 
---
If there is another DRM pull before 6.7-rc1, it would be much
appreciated if this could make that so that other trees are not
potentially broken by this. If not, no worries, as it was my fault for
not sending this sooner.
---
 drivers/gpu/drm/amd/display/dc/dml2/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile 
b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
index 70ae5eba624e..dff8237c0999 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
@@ -60,7 +60,7 @@ endif
 endif
 
 ifneq ($(CONFIG_FRAME_WARN),0)
-frame_warn_flag := -Wframe-larger-than=2048
+frame_warn_flag := -Wframe-larger-than=$(if $(CONFIG_CC_IS_CLANG),3072,2048)
 endif
 
 CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_core.o := $(dml2_ccflags) 
$(frame_warn_flag)

---
base-commit: 21e80f3841c01aeaf32d7aee7bbc87b3db1aa0c6
change-id: 
20231102-amdgpu-dml2-increase-frame-size-warning-for-clang-c93bd2d6a871

Best regards,
-- 
Nathan Chancellor 



RE: [PATCH] drm: Disable XNACK on SRIOV environment

2023-11-02 Thread Kakarya, Surbhi
[AMD Official Use Only - General]

Ping..

-Original Message-
From: Surbhi Kakarya 
Sent: Monday, October 30, 2023 9:54 PM
To: amd-gfx@lists.freedesktop.org; Yang, Philip 
Cc: Kakarya, Surbhi 
Subject: [PATCH] drm: Disable XNACK on SRIOV environment

The purpose of this patch is to disable XNACK or set XNACK OFF mode on SRIOV 
platform which doesn't support it.

This will prevent user-space application to fail or result into unexpected 
behaviour whenever the application need to run test-case in XNACK ON mode.

Signed-off-by: Surbhi Kakarya 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c  |  5 -  
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c |  9 +  
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h |  1 +  
drivers/gpu/drm/amd/amdkfd/kfd_process.c | 10 --
 4 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 2dce338b0f1e..d582b240f919 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -826,7 +826,10 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev)
gc_ver == IP_VERSION(9, 4, 3) ||
gc_ver >= IP_VERSION(10, 3, 0));

-   gmc->noretry = (amdgpu_noretry == -1) ? noretry_default : 
amdgpu_noretry;
+   if (!amdgpu_sriov_xnack_support(adev))
+   gmc->norety = 1;
+   else
+   gmc->noretry = (amdgpu_noretry == -1) ? noretry_default :
+amdgpu_noretry;
 }

 void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type, 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index a0aa624f5a92..41c77d5c5a79 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -1093,3 +1093,12 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
else
return RREG32(offset);
 }
+bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev) {
+   bool xnack_mode = 1;
+
+   if (amdgpu_sriov_vf(adev) && (adev->ip_versions[GC_HWIP][0] == 
IP_VERSION(9, 4, 2)))
+   xnack_mode = 0;
+
+   return xnack_mode;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 858ef21ae515..935ca736300e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -365,4 +365,5 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,  bool 
amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev,
uint32_t ucode_id);
 void amdgpu_virt_post_reset(struct amdgpu_device *adev);
+bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev);
 #endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index fbf053001af9..69954a2a8503 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1416,8 +1416,14 @@ bool kfd_process_xnack_mode(struct kfd_process *p, bool 
supported)
 * per-process XNACK mode selection. But let the dev->noretry
 * setting still influence the default XNACK mode.
 */
-   if (supported && KFD_SUPPORT_XNACK_PER_PROCESS(dev))
-   continue;
+   if (supported && KFD_SUPPORT_XNACK_PER_PROCESS(dev)) {
+   if (!amdgpu_sriov_xnack_support(dev->kfd->adev)) {
+   pr_debug("SRIOV platform xnack not 
supported\n");
+   return false;
+   }
+   else
+   continue;
+   }

/* GFXv10 and later GPUs do not support shader preemption
 * during page faults. This can lead to poor QoS for queue
--
2.25.1



[PATCH 2/4] drm/amdgpu: Add xcc param to SRIOV kiq write and WREG32_SOC15_IP_NO_KIQ (v4)

2023-11-02 Thread Victor Lu
WREG32/RREG32_SOC15_IP_NO_KIQ and amdgpu_virt_kiq_reg_write_reg_wait
are not using the correct rlcg interface or mec engine, respectively.

Add xcc instance parameter to them.

v4: Use GET_INST and squash commit with:
"drm/amdgpu: Add xcc_inst param to amdgpu_virt_kiq_reg_write_reg_wait"

v3: xcc not needed for MMMHUB

v2: rebase

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  5 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h  |  3 ++-
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 26 +--
 drivers/gpu/drm/amd/amdgpu/soc15_common.h |  6 +++---
 6 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index a0aa624f5a92..e179f022c428 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -73,9 +73,10 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
 
 void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t reg1,
-   uint32_t ref, uint32_t mask)
+   uint32_t ref, uint32_t mask,
+   uint32_t xcc_inst)
 {
-   struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+   struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_inst];
struct amdgpu_ring *ring = &kiq->ring;
signed long r, cnt = 0;
unsigned long flags;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 858ef21ae515..bb436d41b4ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -334,7 +334,8 @@ bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
 void amdgpu_virt_init_setting(struct amdgpu_device *adev);
 void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t rreg1,
-   uint32_t ref, uint32_t mask);
+   uint32_t ref, uint32_t mask,
+   uint32_t xcc_inst);
 int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index d8a4fddab9c1..a43d1aa42e11 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -268,7 +268,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
-   1 << vmid);
+   1 << vmid, GET_INST(GC, 0));
return;
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 19eaada35ede..93f100dd5d94 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -228,7 +228,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
-   1 << vmid);
+   1 << vmid, GET_INST(GC, 0));
return;
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 3a1050344b59..35ef7529cc8e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -817,7 +817,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
uint32_t vmhub, uint32_t flush_type)
 {
bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
-   u32 j, inv_req, tmp, sem, req, ack;
+   u32 j, inv_req, tmp, sem, req, ack, inst;
const unsigned int eng = 17;
struct amdgpu_vmhub *hub;
 
@@ -832,13 +832,17 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
/* This is necessary for a HW workaround under SRIOV as well
 * as GFXOFF under bare metal
 */
-   if (adev->gfx.kiq[0].ring.sched.ready &&
+   if (vmhub >= AMDGPU_MMHUB0(0))
+   inst = GET_INST(GC, 0);
+   else
+   inst = vmhub;
+   if (adev->gfx.kiq[inst].ring.sched.ready &&
  

Re: [PATCH] drm/amdgpu: Fix the vram base start address

2023-11-02 Thread Christian König

Am 01.11.23 um 20:13 schrieb Arunpravin Paneer Selvam:

Hi Christian,

On 10/30/2023 9:34 PM, Christian König wrote:



Am 30.10.23 um 13:22 schrieb Arunpravin Paneer Selvam:

If the size returned by drm buddy allocator is higher than
the required size, we take the higher size to calculate
the buffer start address. This is required if we couldn't
trim the buffer to the requested size. This will fix the
display corruption issue on APU's which has limited VRAM
size.

gitlab issue link: https://gitlab.freedesktop.org/drm/amd/-/issues/2859
JIRA ticket link: https://ontrack-internal.amd.com/browse/SWDEV-425461

Fixes: 0a1844bf0b53 ("drm/buddy: Improve contiguous memory allocation")
Signed-off-by: Arunpravin Paneer Selvam 



Acked-by: Christian König 

IIRC that hack with the start address is actually not needed any 
more, but we need to double check this.
okay, can we just remove this hack and keep the vres->base.start value 
as the start address of the first block from the

allocated list.


Please double check if we don't have any more cases where we compare the 
start address against the visible VRAM limit.


I think we now fixed all those cases and replaced them with calls to 
check if all segments are visible, but I'm not 100% sure.


Regards,
Christian.



Thanks,
Arun


Christian.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 15 +--
  1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index 18f58efc9dc7..08916538a615 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -77,7 +77,16 @@ static inline bool 
amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head)

  return true;
  }
  +static inline u64 amdgpu_vram_mgr_blocks_size(struct list_head 
*head)

+{
+    struct drm_buddy_block *block;
+    u64 size = 0;
  +    list_for_each_entry(block, head, link)
+    size += amdgpu_vram_mgr_block_size(block);
+
+    return size;
+}
    /**
   * DOC: mem_info_vram_total
@@ -516,6 +525,8 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

  mutex_unlock(&mgr->lock);
    vres->base.start = 0;
+    size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks),
+ vres->base.size);
  list_for_each_entry(block, &vres->blocks, link) {
  unsigned long start;
  @@ -523,8 +534,8 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

  amdgpu_vram_mgr_block_size(block);
  start >>= PAGE_SHIFT;
  -    if (start > PFN_UP(vres->base.size))
-    start -= PFN_UP(vres->base.size);
+    if (start > PFN_UP(size))
+    start -= PFN_UP(size);
  else
  start = 0;
  vres->base.start = max(vres->base.start, start);








[PATCH 4/4] drm/amdgpu: Change WREG32_RLC to WREG32_SOC15_RLC where inst != 0 (v2)

2023-11-02 Thread Victor Lu
W/RREG32_RLC is hardedcoded to use instance 0. W/RREG32_SOC15_RLC
should be used instead when inst != 0.

v2: rebase

Signed-off-by: Victor Lu 
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   | 38 --
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 40 +--
 drivers/gpu/drm/amd/amdgpu/soc15_common.h |  2 +-
 3 files changed, 37 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
index 80309d39737a..f6598b9e4faa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
@@ -306,8 +306,7 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device 
*adev, void *mqd,
/* Activate doorbell logic before triggering WPTR poll. */
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
 CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_DOORBELL_CONTROL),
-   data);
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_DOORBELL_CONTROL, 
data);
 
if (wptr) {
/* Don't read wptr with get_user because the user
@@ -336,27 +335,24 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device 
*adev, void *mqd,
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
 
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_LO),
-  lower_32_bits(guessed_wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_HI),
-  upper_32_bits(guessed_wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_POLL_ADDR),
-  lower_32_bits((uintptr_t)wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
-   regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_LO,
+   lower_32_bits(guessed_wptr));
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI,
+   upper_32_bits(guessed_wptr));
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_POLL_ADDR,
+   lower_32_bits((uintptr_t)wptr));
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
upper_32_bits((uintptr_t)wptr));
-   WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_PQ_WPTR_POLL_CNTL1),
-  (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id,
-  queue_id));
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), 
regCP_PQ_WPTR_POLL_CNTL1,
+   (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, 
queue_id));
}
 
/* Start the EOP fetcher */
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR),
-  REG_SET_FIELD(m->cp_hqd_eop_rptr,
-CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR,
+  REG_SET_FIELD(m->cp_hqd_eop_rptr, CP_HQD_EOP_RPTR, INIT_FETCHER, 
1));
 
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE), 
data);
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE, data);
 
kgd_gfx_v9_release_queue(adev, inst);
 
@@ -494,15 +490,15 @@ static uint32_t kgd_gfx_v9_4_3_set_address_watch(
VALID,
1);
 
-   WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+   WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
regTCP_WATCH0_ADDR_H) +
(watch_id * TCP_WATCH_STRIDE)),
-   watch_address_high);
+   watch_address_high, inst);
 
-   WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+   WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
regTCP_WATCH0_ADDR_L) +
(watch_id * TCP_WATCH_STRIDE)),
-   watch_address_low);
+   watch_address_low, inst);
 
return watch_address_cntl;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 9285789b3a42..00fbc0f44c92 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -91,8 +91,8 @@ void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device 
*adev, uint32_t vmi
 {
kgd_gfx_v9_lock_srbm(adev, 0, 0, 0, vmid, inst);
 
-   WREG32_RLC(SOC15_REG_OFFSET(G

[PATCH 1/4] drm/amdgpu: Add flag to enable indirect RLCG access for gfx v9.4.3

2023-11-02 Thread Victor Lu
The "rlcg_reg_access_supported" flag is missing. Add it back in.

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index a1c2c952d882..ce2a9876369e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -1101,6 +1101,7 @@ static void gfx_v9_4_3_init_rlcg_reg_access_ctrl(struct 
amdgpu_device *adev)
reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, GET_INST(GC, 
xcc_id), regGRBM_GFX_INDEX);
reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, GET_INST(GC, 
xcc_id), regRLC_SPARE_INT);
}
+   adev->gfx.rlc.rlcg_reg_access_supported = true;
 }
 
 static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev)
-- 
2.34.1



[PATCH v2 1/3] drm/amdgpu: Don't implicit sync PRT maps.

2023-11-02 Thread Tatsuyuki Ishi
These are considered map operations rather than unmap, and there is no
point of doing implicit synchronization here.

Signed-off-by: Tatsuyuki Ishi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index f5daadcec865..7b9762f1cddd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -902,7 +902,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
/* Implicitly sync to command submissions in the same VM before
 * unmapping. Sync to moving fences before mapping.
 */
-   if (!(flags & AMDGPU_PTE_VALID))
+   if (!(flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)))
sync_mode = AMDGPU_SYNC_EQ_OWNER;
else
sync_mode = AMDGPU_SYNC_EXPLICIT;
-- 
2.42.0



[PATCH v2 2/3] drm/amdgpu: Add flag to disable implicit sync for GEM operations.

2023-11-02 Thread Tatsuyuki Ishi
In Vulkan, it is the application's responsibility to perform adequate
synchronization before a sparse unmap, replace or BO destroy operation.
Until now, the kernel applied the same rule as implicitly-synchronized
APIs like OpenGL, which with per-VM BOs made page table updates stall the
queue completely. The newly added AMDGPU_VM_EXPLICIT_SYNC flag allows
drivers to opt-out of this behavior, while still ensuring adequate implicit
sync happens for kernel-initiated updates (e.g. BO moves).

We record whether to use implicit sync or not for each freed mapping. To
avoid increasing the mapping struct's size, this is union-ized with the
interval tree field which is unused after the unmap.

The reason this is done with a GEM ioctl flag, instead of being a VM /
context global setting, is that the current libdrm implementation shares
the DRM handle even between different kind of drivers (radeonsi vs radv).

Signed-off-by: Tatsuyuki Ishi 
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   | 14 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h|  7 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h |  6 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 47 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h| 23 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c  | 18 +++
 include/uapi/drm/amdgpu_drm.h |  2 +
 9 files changed, 71 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 7d6daf8d2bfa..10e129bff977 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1196,7 +1196,7 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem,
struct amdgpu_device *adev = entry->adev;
struct amdgpu_vm *vm = bo_va->base.vm;
 
-   amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
+   amdgpu_vm_bo_unmap(adev, bo_va, entry->va, true);
 
amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index 720011019741..612279e65bff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -122,7 +122,7 @@ int amdgpu_unmap_static_csa(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
}
}
 
-   r = amdgpu_vm_bo_unmap(adev, bo_va, csa_addr);
+   r = amdgpu_vm_bo_unmap(adev, bo_va, csa_addr, true);
if (r) {
DRM_ERROR("failed to do bo_unmap on static CSA, err=%d\n", r);
goto error;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index a1b15d0d6c48..cca68b89754e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -667,9 +667,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
const uint32_t valid_flags = AMDGPU_VM_DELAY_UPDATE |
AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_MASK |
-   AMDGPU_VM_PAGE_NOALLOC;
+   AMDGPU_VM_PAGE_NOALLOC | AMDGPU_VM_EXPLICIT_SYNC;
const uint32_t prt_flags = AMDGPU_VM_DELAY_UPDATE |
-   AMDGPU_VM_PAGE_PRT;
+   AMDGPU_VM_PAGE_PRT | AMDGPU_VM_EXPLICIT_SYNC;
 
struct drm_amdgpu_gem_va *args = data;
struct drm_gem_object *gobj;
@@ -680,6 +680,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
struct drm_exec exec;
uint64_t va_flags;
uint64_t vm_size;
+   bool sync_unmap;
int r = 0;
 
if (args->va_address < AMDGPU_VA_RESERVED_SIZE) {
@@ -715,6 +716,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
}
 
+   sync_unmap = !(args->flags & AMDGPU_VM_EXPLICIT_SYNC);
+
switch (args->operation) {
case AMDGPU_VA_OP_MAP:
case AMDGPU_VA_OP_UNMAP:
@@ -774,19 +777,20 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void 
*data,
 va_flags);
break;
case AMDGPU_VA_OP_UNMAP:
-   r = amdgpu_vm_bo_unmap(adev, bo_va, args->va_address);
+   r = amdgpu_vm_bo_unmap(adev, bo_va, args->va_address,
+  sync_unmap);
break;
 
case AMDGPU_VA_OP_CLEAR:
r = amdgpu_vm_bo_clear_mappings(adev, &fpriv->vm,
args->va_address,
-   args->map_size);
+   args->map_size, sync_unmap);
break;
case AMDGPU_VA_OP_REPLACE:
va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
 

[PATCH v2 0/3] drm/amdgpu: Add flag to disable implicit sync for GEM operations.

2023-11-02 Thread Tatsuyuki Ishi
In Vulkan, it is the application's responsibility to perform adequate
synchronization before a sparse unmap, replace or BO destroy operation.
This adds an option to AMDGPU_VA_OPs to disable redundant implicit sync
that happens on sparse unmap or replace operations.

This has seen a significant improvement in stutter in Forza Horizon 5
and Forza Horizon 4. (As games that had significant issues in sparse
binding related stutter).

Compared to the previous series [1], this specifically targets the VM
operations and keep everything else intact, including implicit sync on
kernel-initiated moves.

I've been able to pass a full Vulkan CTS run on Navi 10 with this.

Userspace code for this is available at [2] and a branch for the kernel
code is available at [3].

v2 changes:
- Drop the changes to flush split bindings eagerly as its incompatible
  with TLB flush quirks in current hardware. Drop the refactoring
  commits related to that change too.
- Fixed a missing doc warning.
- Removed an accidentally included ioctl change.

[1]: 
https://lore.kernel.org/all/20230821062005.109771-1-ishitatsuy...@gmail.com/
[2]: 
https://gitlab.freedesktop.org/ishitatsuyuki/mesa/-/commits/vm-explicit-sync
[3]: https://github.com/ishitatsuyuki/linux/tree/explicit-sync-drm-misc-next

Tatsuyuki Ishi (3):
  drm/amdgpu: Don't implicit sync PRT maps.
  drm/amdgpu: Add flag to disable implicit sync for GEM operations.
  drm/amdgpu: Bump amdgpu driver version.

 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   | 14 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h|  7 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h |  6 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 47 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h| 23 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c  | 18 +++
 include/uapi/drm/amdgpu_drm.h |  2 +
 10 files changed, 73 insertions(+), 51 deletions(-)

-- 
2.42.0



[PATCH v2 3/3] drm/amdgpu: Bump amdgpu driver version.

2023-11-02 Thread Tatsuyuki Ishi
For detection of the new explicit sync functionality without having to try
the ioctl.

Signed-off-by: Tatsuyuki Ishi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 81edf66dbea8..2aa406dee192 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -113,9 +113,10 @@
  *gl1c_cache_size, gl2c_cache_size, mall_size, 
enabled_rb_pipes_mask_hi
  *   3.53.0 - Support for GFX11 CP GFX shadowing
  *   3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support
+ * - 3.55.0 - Add AMDGPU_VM_EXPLICIT_SYNC flag for GEM operations.
  */
 #define KMS_DRIVER_MAJOR   3
-#define KMS_DRIVER_MINOR   54
+#define KMS_DRIVER_MINOR   55
 #define KMS_DRIVER_PATCHLEVEL  0
 
 unsigned int amdgpu_vram_limit = UINT_MAX;
-- 
2.42.0



Re: [PATCH 2/2] drm/amdgpu: Use drm_exec for seq64 bo lock

2023-11-02 Thread Christian König

Am 01.11.23 um 17:26 schrieb Arunpravin Paneer Selvam:

Replace seq64 bo lock sequences with drm_exec.

Signed-off-by: Alex Deucher 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c | 73 ++-
  1 file changed, 33 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
index 63d8b68023be..810f7637096e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
@@ -25,6 +25,8 @@
  #include "amdgpu.h"
  #include "amdgpu_seq64.h"
  
+#include 

+
  /**
   * DOC: amdgpu_seq64
   *
@@ -68,11 +70,8 @@ static inline u64 amdgpu_seq64_get_va_base(struct 
amdgpu_device *adev)
  int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 struct amdgpu_bo_va **bo_va)
  {
-   struct ttm_validate_buffer seq64_tv;
-   struct amdgpu_bo_list_entry pd;
-   struct ww_acquire_ctx ticket;
-   struct list_head list;
struct amdgpu_bo *bo;
+   struct drm_exec exec;
u64 seq64_addr;
int r;
  
@@ -80,23 +79,20 @@ int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,

if (!bo)
return -EINVAL;
  
-	INIT_LIST_HEAD(&list);

-   INIT_LIST_HEAD(&seq64_tv.head);
-
-   seq64_tv.bo = &bo->tbo;
-   seq64_tv.num_shared = 1;
-
-   list_add(&seq64_tv.head, &list);
-   amdgpu_vm_get_pd_bo(vm, &list, &pd);
-
-   r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
-   if (r)
-   return r;
+   drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+   drm_exec_until_all_locked(&exec) {
+   r = amdgpu_vm_lock_pd(vm, &exec, 0);
+   if (likely(!r))
+   r = drm_exec_lock_obj(&exec, &bo->tbo.base);
+   drm_exec_retry_on_contention(&exec);
+   if (unlikely(r))
+   goto error;
+   }
  
  	*bo_va = amdgpu_vm_bo_add(adev, vm, bo);

if (!*bo_va) {
r = -ENOMEM;
-   goto error_vm;
+   goto error;
}
  
  	seq64_addr = amdgpu_seq64_get_va_base(adev);

@@ -104,23 +100,19 @@ int amdgpu_seq64_map(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
 AMDGPU_PTE_READABLE);
if (r) {
DRM_ERROR("failed to do bo_map on userq sem, err=%d\n", r);
-   goto error_map;
+   amdgpu_vm_bo_del(adev, *bo_va);
+   goto error;
}
  
  	r = amdgpu_vm_bo_update(adev, *bo_va, false);

if (r) {
DRM_ERROR("failed to do vm_bo_update on userq sem\n");
-   goto error_map;
+   amdgpu_vm_bo_del(adev, *bo_va);
+   goto error;
}
  
-	ttm_eu_backoff_reservation(&ticket, &list);

-
-   return 0;
-
-error_map:
-   amdgpu_vm_bo_del(adev, *bo_va);
-error_vm:
-   ttm_eu_backoff_reservation(&ticket, &list);
+error:
+   drm_exec_fini(&exec);
return r;
  }
  
@@ -134,12 +126,10 @@ int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,

   */
  void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv 
*fpriv)
  {
-   struct ttm_validate_buffer seq64_tv;
-   struct amdgpu_bo_list_entry pd;
-   struct ww_acquire_ctx ticket;
-   struct list_head list;
struct amdgpu_vm *vm;
struct amdgpu_bo *bo;
+   struct drm_exec exec;
+   int r;
  
  	if (!fpriv->seq64_va)

return;
@@ -149,20 +139,23 @@ void amdgpu_seq64_unmap(struct amdgpu_device *adev, 
struct amdgpu_fpriv *fpriv)
return;
  
  	vm = &fpriv->vm;

-   INIT_LIST_HEAD(&list);
-   INIT_LIST_HEAD(&seq64_tv.head);
  
-	seq64_tv.bo = &bo->tbo;

-   seq64_tv.num_shared = 1;
-
-   list_add(&seq64_tv.head, &list);
-   amdgpu_vm_get_pd_bo(vm, &list, &pd);
+   drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+   drm_exec_until_all_locked(&exec) {
+   r = amdgpu_vm_lock_pd(vm, &exec, 0);
+   if (likely(!r))
+   r = drm_exec_lock_obj(&exec, &bo->tbo.base);
+   drm_exec_retry_on_contention(&exec);
+   if (unlikely(r))
+   goto error;
+   }
  
-	ttm_eu_reserve_buffers(&ticket, &list, false, NULL);

amdgpu_vm_bo_del(adev, fpriv->seq64_va);
-   ttm_eu_backoff_reservation(&ticket, &list);
  
  	fpriv->seq64_va = NULL;

+
+error:
+   drm_exec_fini(&exec);
  }
  
  /**




Re: [PATCH] drm/amdgpu: don't put MQDs in VRAM on ARM | ARM64

2023-11-02 Thread Christian König

Am 31.10.23 um 18:54 schrieb Alex Deucher:

Issues were reported with commit 1cfb4d612127
("drm/amdgpu: put MQDs in VRAM") on an ADLINK Ampere
Altra Developer Platform (AVA developer platform).

Various ARM systems seem to have problems related
to PCIe and MMIO access.  In this case, I'm not sure
if this is specific to the ADLINK platform or ARM
in general.  Seems to be some coherency issue with
VRAM.  For now, just don't put MQDs in VRAM on ARM.

Link: https://lists.freedesktop.org/archives/amd-gfx/2023-October/100453.html
Fixes: 1cfb4d612127 ("drm/amdgpu: put MQDs in VRAM")
Signed-off-by: Alex Deucher 
Cc: alexey.kli...@linaro.org


Acked-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index c92e0aba69e1..a2a29dcb2422 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -385,9 +385,11 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
struct amdgpu_ring *ring = &kiq->ring;
u32 domain = AMDGPU_GEM_DOMAIN_GTT;
  
+#if !defined(CONFIG_ARM) && !defined(CONFIG_ARM64)

/* Only enable on gfx10 and 11 for now to avoid changing behavior on 
older chips */
if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
domain |= AMDGPU_GEM_DOMAIN_VRAM;
+#endif
  
  	/* create MQD for KIQ */

if (!adev->enable_mes_kiq && !ring->mqd_obj) {




Re: [Patch v13 4/9] wifi: mac80211: Add support for WBRF features

2023-11-02 Thread Johannes Berg
On Thu, 2023-11-02 at 14:24 +0200, Ilpo Järvinen wrote:
> On Thu, 2 Nov 2023, Johannes Berg wrote:
> > On Thu, 2023-11-02 at 13:55 +0200, Ilpo Järvinen wrote:
> > 
> > > > +static void get_chan_freq_boundary(u32 center_freq, u32 bandwidth, u64 
> > > > *start, u64 *end)
> > > > +{
> > > > +   bandwidth = MHZ_TO_KHZ(bandwidth);
> > > > +   center_freq = MHZ_TO_KHZ(center_freq);
> > > 
> > > Please use include/linux/units.h ones for these too.
> > 
> > Now we're feature creeping though - this has existed for *years* in the
> > wireless stack with many instances? We can convert them over, I guess,
> > but not sure that makes much sense here - we'd want to add such macros
> > to units.h, but ... moving them can be independent of this patch?
> 
> What new macros you're talking about? 

Sorry, I got confused - for some reason I was pretty sure something here
was already being added to units.h in this patchset.

> Nothing new needs to be added 
> as there's already KHZ_PER_MHZ so these would just be:
> 
>   bandwidth *= KHZ_PER_MHZ;
>   center_freq *= KHZ_PER_MHZ;

Sure, and in this case that's probably pretty much equivalent. But
having a MHZ_TO_KHZ() macro isn't inherently *bad*, and I'm not sure
you're objection to it on anything other than "it's not defined in
units.h".

> Everything can of course be postponed by the argument that some 
> subsystem specific mechanism has been there before the generic one
> but the end of that road won't be pretty... What I was trying to do
> here was to point out the new stuff introduced by this series into the 
> direction of the generic thing.

I just think that the better course of action would be to eventually
move MHZ_TO_KHZ() to units.h ...

johannes


Re: [Patch v13 4/9] wifi: mac80211: Add support for WBRF features

2023-11-02 Thread Ilpo Järvinen
On Thu, 2 Nov 2023, Johannes Berg wrote:
> On Thu, 2023-11-02 at 13:55 +0200, Ilpo Järvinen wrote:
> 
> > > +static void get_chan_freq_boundary(u32 center_freq, u32 bandwidth, u64 
> > > *start, u64 *end)
> > > +{
> > > + bandwidth = MHZ_TO_KHZ(bandwidth);
> > > + center_freq = MHZ_TO_KHZ(center_freq);
> > 
> > Please use include/linux/units.h ones for these too.
> 
> Now we're feature creeping though - this has existed for *years* in the
> wireless stack with many instances? We can convert them over, I guess,
> but not sure that makes much sense here - we'd want to add such macros
> to units.h, but ... moving them can be independent of this patch?

What new macros you're talking about? Nothing new needs to be added 
as there's already KHZ_PER_MHZ so these would just be:

bandwidth *= KHZ_PER_MHZ;
center_freq *= KHZ_PER_MHZ;

Everything can of course be postponed by the argument that some 
subsystem specific mechanism has been there before the generic one
but the end of that road won't be pretty... What I was trying to do
here was to point out the new stuff introduced by this series into the 
direction of the generic thing.

-- 
 i.

Re: [Patch v13 4/9] wifi: mac80211: Add support for WBRF features

2023-11-02 Thread Ilpo Järvinen
On Mon, 30 Oct 2023, Ma Jun wrote:

> From: Evan Quan 
> 
> To support the WBRF mechanism, Wifi adapters utilized in the system must
> register the frequencies in use (or unregister those frequencies no longer
> used) via the dedicated calls. So that, other drivers responding to the
> frequencies can take proper actions to mitigate possible interference.
> 
> Co-developed-by: Mario Limonciello 
> Signed-off-by: Mario Limonciello 
> Co-developed-by: Evan Quan 
> Signed-off-by: Evan Quan 
> Signed-off-by: Ma Jun 
> --
> v1->v2:
>   - place the new added member(`wbrf_supported`) in
> ieee80211_local(Johannes)
>   - handle chandefs change scenario properly(Johannes)
>   - some minor fixes around code sharing and possible invalid input
> checks(Johannes)
> v2->v3:
>   - drop unnecessary input checks and intermediate APIs(Mario)
>   - Separate some mac80211 common code(Mario, Johannes)
> v3->v4:
>   - some minor fixes around return values(Johannes)
> v9->v10:
>   - get ranges_in->num_of_ranges set and passed in(Johannes)
> v12:
>   - use acpi_amd_wbrf_add_remove to replace the acpi_amd_wbrf_add_exclusion
> acpi_amd_wbrf_remove_exclusion
> v13:
>   - Fix the format issue (IIpo Jarvinen)
>   - Remove KHZ_TO_HZ and use HZ_PER_KHZ in linux/units.h (IIpo Jarvinen)
> ---
>  net/mac80211/Makefile  |  2 +
>  net/mac80211/chan.c|  9 
>  net/mac80211/ieee80211_i.h |  7 +++
>  net/mac80211/main.c|  2 +
>  net/mac80211/wbrf.c| 95 ++
>  5 files changed, 115 insertions(+)
>  create mode 100644 net/mac80211/wbrf.c
> 
> diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
> index b8de44da1fb8..d46c36f55fd3 100644
> --- a/net/mac80211/Makefile
> +++ b/net/mac80211/Makefile
> @@ -65,4 +65,6 @@ rc80211_minstrel-$(CONFIG_MAC80211_DEBUGFS) += \
>  
>  mac80211-$(CONFIG_MAC80211_RC_MINSTREL) += $(rc80211_minstrel-y)
>  
> +mac80211-y += wbrf.o
> +
>  ccflags-y += -DDEBUG
> diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
> index 68952752b599..458469c224ae 100644
> --- a/net/mac80211/chan.c
> +++ b/net/mac80211/chan.c
> @@ -506,11 +506,16 @@ static void _ieee80211_change_chanctx(struct 
> ieee80211_local *local,
>  
>   WARN_ON(!cfg80211_chandef_compatible(&ctx->conf.def, chandef));
>  
> + ieee80211_remove_wbrf(local, &ctx->conf.def);
> +
>   ctx->conf.def = *chandef;
>  
>   /* check if min chanctx also changed */
>   changed = IEEE80211_CHANCTX_CHANGE_WIDTH |
> _ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for);
> +
> + ieee80211_add_wbrf(local, &ctx->conf.def);
> +
>   drv_change_chanctx(local, ctx, changed);
>  
>   if (!local->use_chanctx) {
> @@ -668,6 +673,8 @@ static int ieee80211_add_chanctx(struct ieee80211_local 
> *local,
>   lockdep_assert_held(&local->mtx);
>   lockdep_assert_held(&local->chanctx_mtx);
>  
> + ieee80211_add_wbrf(local, &ctx->conf.def);
> +
>   if (!local->use_chanctx)
>   local->hw.conf.radar_enabled = ctx->conf.radar_enabled;
>  
> @@ -748,6 +755,8 @@ static void ieee80211_del_chanctx(struct ieee80211_local 
> *local,
>   }
>  
>   ieee80211_recalc_idle(local);
> +
> + ieee80211_remove_wbrf(local, &ctx->conf.def);
>  }
>  
>  static void ieee80211_free_chanctx(struct ieee80211_local *local,
> diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
> index 98ef1fe1226e..1172554bd831 100644
> --- a/net/mac80211/ieee80211_i.h
> +++ b/net/mac80211/ieee80211_i.h
> @@ -1600,6 +1600,8 @@ struct ieee80211_local {
>  
>   /* extended capabilities provided by mac80211 */
>   u8 ext_capa[8];
> +
> + bool wbrf_supported;
>  };
>  
>  static inline struct ieee80211_sub_if_data *
> @@ -2637,4 +2639,9 @@ ieee80211_eht_cap_ie_to_sta_eht_cap(struct 
> ieee80211_sub_if_data *sdata,
>   const struct ieee80211_eht_cap_elem 
> *eht_cap_ie_elem,
>   u8 eht_cap_len,
>   struct link_sta_info *link_sta);
> +
> +void ieee80211_check_wbrf_support(struct ieee80211_local *local);
> +void ieee80211_add_wbrf(struct ieee80211_local *local, struct 
> cfg80211_chan_def *chandef);
> +void ieee80211_remove_wbrf(struct ieee80211_local *local, struct 
> cfg80211_chan_def *chandef);
> +
>  #endif /* IEEE80211_I_H */
> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
> index 24315d7b3126..b20bdaac84db 100644
> --- a/net/mac80211/main.c
> +++ b/net/mac80211/main.c
> @@ -1396,6 +1396,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
>   debugfs_hw_add(local);
>   rate_control_add_debugfs(local);
>  
> + ieee80211_check_wbrf_support(local);
> +
>   rtnl_lock();
>   wiphy_lock(hw->wiphy);
>  
> diff --git a/net/mac80211/wbrf.c b/net/mac80211/wbrf.c
> new file mode 100644
> index ..ca3f30b58476
> --- /dev/null
> +++ b/net/mac80211/wbrf.c
> @@ -0,0 +1,95 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> 

mainline build failure due to 7966f319c66d ("drm/amd/display: Introduce DML2")

2023-11-02 Thread Sudip Mukherjee (Codethink)
Hi All,

The latest mainline kernel branch fails to build x86_64 allmodconfig
with the error:

drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c: In function 
'dml_prefetch_check':
drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:6707:1: 
error: the frame size of 2056 bytes is larger than 2048 bytes 
[-Werror=frame-larger-than=]
 6707 | }
  | ^

git bisect pointed to 7966f319c66d ("drm/amd/display: Introduce DML2")

I will be happy to test any patch or provide any extra log if needed.

#regzbot introduced: 7966f319c66d9468623c6a6a017ecbc0dd79be75

-- 
Regards
Sudip


Re: [Patch v13 4/9] wifi: mac80211: Add support for WBRF features

2023-11-02 Thread Johannes Berg
On Thu, 2023-11-02 at 13:55 +0200, Ilpo Järvinen wrote:


[please trim your quotes]

> > +static void get_chan_freq_boundary(u32 center_freq, u32 bandwidth, u64 
> > *start, u64 *end)
> > +{
> > +   bandwidth = MHZ_TO_KHZ(bandwidth);
> > +   center_freq = MHZ_TO_KHZ(center_freq);
> 
> Please use include/linux/units.h ones for these too.

Now we're feature creeping though - this has existed for *years* in the
wireless stack with many instances? We can convert them over, I guess,
but not sure that makes much sense here - we'd want to add such macros
to units.h, but ... moving them can be independent of this patch?

johannes


Re: [PATCH 1/2] drm/amdgpu: Enable seq64 manager and fix bugs

2023-11-02 Thread Christian König

Am 01.11.23 um 17:26 schrieb Arunpravin Paneer Selvam:

- Enable the seq64 mapping sequence.
- Fix wflinfo va conflict and other bugs.

v1:
   - The seq64 area needs to be included in the AMDGPU_VA_RESERVED_SIZE
 otherwise the areas will conflict with user space allocations (Alex)

   - It needs to be mapped read only in the user VM (Alex)

v2:
   - Instead of just one define for TOP/BOTTOM
 reserved space separate them into two (Christian)

   - Fix the CPU and VA calculations and while at it
 also cleanup error handling and kerneldoc (Christian)

Signed-off-by: Christian König 
Signed-off-by: Alex Deucher 
Signed-off-by: Arunpravin Paneer Selvam 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c  |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c  |  6 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c  |  8 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c  |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c| 69 +++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h|  9 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h   |  5 +-
  drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c|  5 +-
  drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c|  5 +-
  drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c|  5 +-
  11 files changed, 68 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index 23d054526e7c..c7622efdafee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -28,7 +28,7 @@ uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
  {
uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
  
-	addr -= AMDGPU_VA_RESERVED_SIZE;

+   addr -= AMDGPU_VA_RESERVED_CSA_SIZE;
addr = amdgpu_gmc_sign_extend(addr);
  
  	return addr;

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 849fffbb367d..f4455ed78e72 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -687,10 +687,10 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void 
*data,
uint64_t vm_size;
int r = 0;
  
-	if (args->va_address < AMDGPU_VA_RESERVED_SIZE) {

+   if (args->va_address < AMDGPU_VA_RESERVED_BOTTOM) {
dev_dbg(dev->dev,
"va_address 0x%llx is in reserved area 0x%llx\n",
-   args->va_address, AMDGPU_VA_RESERVED_SIZE);
+   args->va_address, AMDGPU_VA_RESERVED_BOTTOM);
return -EINVAL;
}
  
@@ -706,7 +706,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,

args->va_address &= AMDGPU_GMC_HOLE_MASK;
  
  	vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;

-   vm_size -= AMDGPU_VA_RESERVED_SIZE;
+   vm_size -= AMDGPU_VA_RESERVED_TOP;
if (args->va_address + args->map_size > vm_size) {
dev_dbg(dev->dev,
"va_address 0x%llx is in top reserved area 0x%llx\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index b5ebafd4a3ad..bb4aa14b868c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -894,14 +894,14 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
dev_info->ids_flags |= 
AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD;
  
  		vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;

-   vm_size -= AMDGPU_VA_RESERVED_SIZE;
+   vm_size -= AMDGPU_VA_RESERVED_TOP;
  
  		/* Older VCE FW versions are buggy and can handle only 40bits */

if (adev->vce.fw_version &&
adev->vce.fw_version < AMDGPU_VCE_FW_53_45)
vm_size = min(vm_size, 1ULL << 40);
  
-		dev_info->virtual_address_offset = AMDGPU_VA_RESERVED_SIZE;

+   dev_info->virtual_address_offset = AMDGPU_VA_RESERVED_BOTTOM;
dev_info->virtual_address_max =
min(vm_size, AMDGPU_GMC_HOLE_START);
  
@@ -1365,6 +1365,10 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)

goto error_vm;
}
  
+	r = amdgpu_seq64_map(adev, &fpriv->vm, &fpriv->seq64_va);

+   if (r)
+   goto error_vm;
+
mutex_init(&fpriv->bo_list_lock);
idr_init_base(&fpriv->bo_list_handles, 1);
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c

index 70fe3b39c004..108908a10b92 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -1325,7 +1325,7 @@ int amdgpu_mes_self_test(struct amdgpu_device *adev)
goto error_fini;
}
  
-	ctx_data.meta_data_gpu_addr = AMDGPU_VA_RESERVED_SIZE;

+   ctx_data.meta_data_gpu_addr = AMDGP

RE: [PATCH v2] drm/amdgpu: fix GRBM read timeout when do mes_self_test

2023-11-02 Thread Zhang, Yifan
[AMD Official Use Only - General]

This patch is :

Reviewed-by: Yifan Zhang 

-Original Message-
From: Huang, Tim 
Sent: Wednesday, November 1, 2023 4:53 PM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Zhang, Yifan 
; Xiao, Jack ; Huang, Tim 

Subject: [PATCH v2] drm/amdgpu: fix GRBM read timeout when do mes_self_test

Use a proper MEID to make sure the CP_HQD_* and CP_GFX_HQD_* registers can be 
touched when initialize the compute and gfx mqd in mes_self_test.
Otherwise, we expect no response from CP and an GRBM eventual timeout.

Signed-off-by: Tim Huang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 70fe3b39c004..45280fb0e00c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -556,8 +556,20 @@ static void amdgpu_mes_queue_init_mqd(struct amdgpu_device 
*adev,
mqd_prop.hqd_queue_priority = p->hqd_queue_priority;
mqd_prop.hqd_active = false;

+   if (p->queue_type == AMDGPU_RING_TYPE_GFX ||
+   p->queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+   mutex_lock(&adev->srbm_mutex);
+   amdgpu_gfx_select_me_pipe_q(adev, p->ring->me, p->ring->pipe, 
0, 0, 0);
+   }
+
mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop);

+   if (p->queue_type == AMDGPU_RING_TYPE_GFX ||
+   p->queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+   amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, 0);
+   mutex_unlock(&adev->srbm_mutex);
+   }
+
amdgpu_bo_unreserve(q->mqd_obj);
 }

@@ -993,9 +1005,13 @@ int amdgpu_mes_add_ring(struct amdgpu_device *adev, int 
gang_id,
switch (queue_type) {
case AMDGPU_RING_TYPE_GFX:
ring->funcs = adev->gfx.gfx_ring[0].funcs;
+   ring->me = adev->gfx.gfx_ring[0].me;
+   ring->pipe = adev->gfx.gfx_ring[0].pipe;
break;
case AMDGPU_RING_TYPE_COMPUTE:
ring->funcs = adev->gfx.compute_ring[0].funcs;
+   ring->me = adev->gfx.compute_ring[0].me;
+   ring->pipe = adev->gfx.compute_ring[0].pipe;
break;
case AMDGPU_RING_TYPE_SDMA:
ring->funcs = adev->sdma.instance[0].ring.funcs;
--
2.39.2



Re: [PATCH] drm/edid: add a quirk for two 240Hz Samsung monitors

2023-11-02 Thread Jani Nikula
On Wed, 01 Nov 2023, Alex Deucher  wrote:
> On Wed, Nov 1, 2023 at 5:01 PM Hamza Mahfooz  wrote:
>>
>> Without this fix the 5120x1440@240 timing of these monitors
>> leads to screen flickering.
>>
>> Cc: sta...@vger.kernel.org # 6.1+
>> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1442
>> Co-developed-by: Harry Wentland 
>> Signed-off-by: Harry Wentland 
>> Signed-off-by: Hamza Mahfooz 
>> ---
>>  drivers/gpu/drm/drm_edid.c | 47 +++---
>>  1 file changed, 44 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
>> index bca2af4fe1fc..3fdb8907f66b 100644
>> --- a/drivers/gpu/drm/drm_edid.c
>> +++ b/drivers/gpu/drm/drm_edid.c
>> @@ -89,6 +89,8 @@ static int oui(u8 first, u8 second, u8 third)
>>  #define EDID_QUIRK_NON_DESKTOP (1 << 12)
>>  /* Cap the DSC target bitrate to 15bpp */
>>  #define EDID_QUIRK_CAP_DSC_15BPP   (1 << 13)
>> +/* Fix up a particular 5120x1440@240Hz timing */
>> +#define EDID_QUIRK_FIXUP_5120_1440_240 (1 << 14)
>
> What is wrong with the original timing that needs to be fixed?

Indeed. I'd be wary of applying this quirk as-is, because it'll impact
all drivers and all connectors.

The bug report does not have a single EDID from the affected displays
attached.

The quirk sets mode members that apparently do not need to be modified.

Cc: Ville


BR,
Jani.


>
> Alex
>
>
>>
>>  #define MICROSOFT_IEEE_OUI 0xca125c
>>
>> @@ -170,6 +172,12 @@ static const struct edid_quirk {
>> EDID_QUIRK('S', 'A', 'M', 596, EDID_QUIRK_PREFER_LARGE_60),
>> EDID_QUIRK('S', 'A', 'M', 638, EDID_QUIRK_PREFER_LARGE_60),
>>
>> +   /* Samsung C49G95T */
>> +   EDID_QUIRK('S', 'A', 'M', 0x7053, EDID_QUIRK_FIXUP_5120_1440_240),
>> +
>> +   /* Samsung S49AG95 */
>> +   EDID_QUIRK('S', 'A', 'M', 0x71ac, EDID_QUIRK_FIXUP_5120_1440_240),
>> +
>> /* Sony PVM-2541A does up to 12 bpc, but only reports max 8 bpc */
>> EDID_QUIRK('S', 'N', 'Y', 0x2541, EDID_QUIRK_FORCE_12BPC),
>>
>> @@ -6586,7 +6594,37 @@ static void update_display_info(struct drm_connector 
>> *connector,
>> drm_edid_to_eld(connector, drm_edid);
>>  }
>>
>> -static struct drm_display_mode *drm_mode_displayid_detailed(struct 
>> drm_device *dev,
>> +static void drm_mode_displayid_detailed_edid_quirks(struct drm_connector 
>> *connector,
>> +   struct drm_display_mode 
>> *mode)
>> +{
>> +   unsigned int hsync_width;
>> +   unsigned int vsync_width;
>> +
>> +   if (connector->display_info.quirks & EDID_QUIRK_FIXUP_5120_1440_240) 
>> {
>> +   if (mode->hdisplay == 5120 && mode->vdisplay == 1440 &&
>> +   mode->clock == 1939490) {
>> +   hsync_width = mode->hsync_end - mode->hsync_start;
>> +   vsync_width = mode->vsync_end - mode->vsync_start;
>> +
>> +   mode->clock = 2018490;
>> +   mode->hdisplay = 5120;
>> +   mode->hsync_start = 5120 + 8;
>> +   mode->hsync_end = 5120 + 8 + hsync_width;
>> +   mode->htotal = 5200;
>> +
>> +   mode->vdisplay = 1440;
>> +   mode->vsync_start = 1440 + 165;
>> +   mode->vsync_end = 1440 + 165 + vsync_width;
>> +   mode->vtotal = 1619;
>> +
>> +   drm_dbg_kms(connector->dev,
>> +   "[CONNECTOR:%d:%s] Samsung 240Hz mode 
>> quirk applied\n",
>> +   connector->base.id, connector->name);
>> +   }
>> +   }
>> +}
>> +
>> +static struct drm_display_mode *drm_mode_displayid_detailed(struct 
>> drm_connector *connector,
>> struct 
>> displayid_detailed_timings_1 *timings,
>> bool type_7)
>>  {
>> @@ -6605,7 +6643,7 @@ static struct drm_display_mode 
>> *drm_mode_displayid_detailed(struct drm_device *d
>> bool hsync_positive = (timings->hsync[1] >> 7) & 0x1;
>> bool vsync_positive = (timings->vsync[1] >> 7) & 0x1;
>>
>> -   mode = drm_mode_create(dev);
>> +   mode = drm_mode_create(connector->dev);
>> if (!mode)
>> return NULL;
>>
>> @@ -6628,6 +,9 @@ static struct drm_display_mode 
>> *drm_mode_displayid_detailed(struct drm_device *d
>>
>> if (timings->flags & 0x80)
>> mode->type |= DRM_MODE_TYPE_PREFERRED;
>> +
>> +   drm_mode_displayid_detailed_edid_quirks(connector, mode);
>> +
>> drm_mode_set_name(mode);
>>
>> return mode;
>> @@ -6650,7 +6691,7 @@ static int add_displayid_detailed_1_modes(struct 
>> drm_connector *connector,
>> for (i = 0; i < num_timings; i++) {
>> struct displayid_detailed_timings_1 *timings = 

[PATCH] drm/amdgpu: Don't warn for unsupported set_xgmi_plpd_mode

2023-11-02 Thread Tao Zhou
set_xgmi_plpd_mode may be unsupported and this isn't error, no need to
print warning for it.

v2: add ret2 to save the status of psp_ras_trigger_error.

Suggested-by: lijo.la...@amd.com
Signed-off-by: Tao Zhou 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 0533f873001b..a5a72e5aae94 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -1131,28 +1131,30 @@ static void amdgpu_xgmi_query_ras_error_count(struct 
amdgpu_device *adev,
 static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,
void *inject_if, uint32_t instance_mask)
 {
-   int ret = 0;
+   int ret1, ret2;
struct ta_ras_trigger_error_input *block_info =
(struct ta_ras_trigger_error_input *)inject_if;
 
if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
dev_warn(adev->dev, "Failed to disallow df cstate");
 
-   if (amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DISALLOW))
+   ret1 = amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DISALLOW);
+   if (ret1 && ret1 != -EOPNOTSUPP)
dev_warn(adev->dev, "Failed to disallow XGMI power down");
 
-   ret = psp_ras_trigger_error(&adev->psp, block_info, instance_mask);
+   ret2 = psp_ras_trigger_error(&adev->psp, block_info, instance_mask);
 
if (amdgpu_ras_intr_triggered())
-   return ret;
+   return ret2;
 
-   if (amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DEFAULT))
+   ret1 = amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DEFAULT);
+   if (ret1 && ret1 != -EOPNOTSUPP)
dev_warn(adev->dev, "Failed to allow XGMI power down");
 
if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
dev_warn(adev->dev, "Failed to allow df cstate");
 
-   return ret;
+   return ret2;
 }
 
 struct amdgpu_ras_block_hw_ops  xgmi_ras_hw_ops = {
-- 
2.35.1



RE: [PATCH] drm/amd/pm:Fix return vlaue and drop redundant param

2023-11-02 Thread Feng, Kenneth
[AMD Official Use Only - General]

Reviewed-by: Kenneth Feng 


-Original Message-
From: Ma, Jun 
Sent: Thursday, November 2, 2023 3:59 PM
To: amd-gfx@lists.freedesktop.org; Feng, Kenneth ; 
Deucher, Alexander 
Cc: Ma, Jun 
Subject: [PATCH] drm/amd/pm:Fix return vlaue and drop redundant param

Fix the return value and drop redundant parameter of get_asic_baco_capability 
function to simplify the code

Signed-off-by: Ma Jun 
---
 drivers/gpu/drm/amd/include/kgd_pp_interface.h   |  2 +-
 drivers/gpu/drm/amd/pm/amdgpu_dpm.c  |  8 +++-
 drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c | 11 ---
 drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c   |  7 +++
 drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h   |  2 +-
 drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c   |  9 -
 drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h   |  2 +-
 drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c |  9 -  
drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h |  2 +-
 drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h |  2 +-
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c| 12 +---
 11 files changed, 28 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h 
b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 3201808c2dd8..60e6b82077e8 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -421,7 +421,7 @@ struct amd_pm_funcs {
int (*set_hard_min_dcefclk_by_freq)(void *handle, uint32_t clock);
int (*set_hard_min_fclk_by_freq)(void *handle, uint32_t clock);
int (*set_min_deep_sleep_dcefclk)(void *handle, uint32_t clock);
-   int (*get_asic_baco_capability)(void *handle, bool *cap);
+   bool (*get_asic_baco_capability)(void *handle);
int (*get_asic_baco_state)(void *handle, int *state);
int (*set_asic_baco_state)(void *handle, int state);
int (*get_ppfeature_status)(void *handle, char *buf); diff --git 
a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index acf3527fff2d..24fd036a15c0 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -185,8 +185,7 @@ bool amdgpu_dpm_is_baco_supported(struct amdgpu_device 
*adev)  {
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
void *pp_handle = adev->powerplay.pp_handle;
-   bool baco_cap;
-   int ret = 0;
+   bool ret;

if (!pp_funcs || !pp_funcs->get_asic_baco_capability)
return false;
@@ -204,12 +203,11 @@ bool amdgpu_dpm_is_baco_supported(struct amdgpu_device 
*adev)

mutex_lock(&adev->pm.mutex);

-   ret = pp_funcs->get_asic_baco_capability(pp_handle,
-&baco_cap);
+   ret = pp_funcs->get_asic_baco_capability(pp_handle);

mutex_unlock(&adev->pm.mutex);

-   return ret ? false : baco_cap;
+   return ret;
 }

 int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev) diff --git 
a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c 
b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
index 9e4f8a4104a3..e82c2b2fffb5 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
@@ -1368,21 +1368,18 @@ static int pp_set_active_display_count(void *handle, 
uint32_t count)
return phm_set_active_display_count(hwmgr, count);  }

-static int pp_get_asic_baco_capability(void *handle, bool *cap)
+static bool pp_get_asic_baco_capability(void *handle)
 {
struct pp_hwmgr *hwmgr = handle;

-   *cap = false;
if (!hwmgr)
-   return -EINVAL;
+   return false;

if (!(hwmgr->not_vf && amdgpu_dpm) ||
!hwmgr->hwmgr_func->get_asic_baco_capability)
-   return 0;
+   return false;

-   hwmgr->hwmgr_func->get_asic_baco_capability(hwmgr, cap);
-
-   return 0;
+   return hwmgr->hwmgr_func->get_asic_baco_capability(hwmgr);
 }

 static int pp_get_asic_baco_state(void *handle, int *state) diff --git 
a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c 
b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c
index 044cda005aed..e8a9471c1898 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c
@@ -33,21 +33,20 @@
 #include "smu/smu_7_1_2_d.h"
 #include "smu/smu_7_1_2_sh_mask.h"

-int smu7_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap)
+bool smu7_baco_get_capability(struct pp_hwmgr *hwmgr)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
uint32_t reg;

-   *cap = false;
if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, 
PHM_PlatformCaps_BACO))
return 0;

reg = RREG32(mmCC_BIF_BX_FUSESTRAP0);

if (reg & CC_BIF_BX_FUSESTRAP0__STRAP_BIF_PX_CAPABLE_M

[PATCH] drm/amd/pm:Fix return vlaue and drop redundant param

2023-11-02 Thread Ma Jun
Fix the return value and drop redundant parameter of
get_asic_baco_capability function to simplify the code

Signed-off-by: Ma Jun 
---
 drivers/gpu/drm/amd/include/kgd_pp_interface.h   |  2 +-
 drivers/gpu/drm/amd/pm/amdgpu_dpm.c  |  8 +++-
 drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c | 11 ---
 drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c   |  7 +++
 drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h   |  2 +-
 drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c   |  9 -
 drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h   |  2 +-
 drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c |  9 -
 drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h |  2 +-
 drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h |  2 +-
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c| 12 +---
 11 files changed, 28 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h 
b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 3201808c2dd8..60e6b82077e8 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -421,7 +421,7 @@ struct amd_pm_funcs {
int (*set_hard_min_dcefclk_by_freq)(void *handle, uint32_t clock);
int (*set_hard_min_fclk_by_freq)(void *handle, uint32_t clock);
int (*set_min_deep_sleep_dcefclk)(void *handle, uint32_t clock);
-   int (*get_asic_baco_capability)(void *handle, bool *cap);
+   bool (*get_asic_baco_capability)(void *handle);
int (*get_asic_baco_state)(void *handle, int *state);
int (*set_asic_baco_state)(void *handle, int state);
int (*get_ppfeature_status)(void *handle, char *buf);
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index acf3527fff2d..24fd036a15c0 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -185,8 +185,7 @@ bool amdgpu_dpm_is_baco_supported(struct amdgpu_device 
*adev)
 {
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
void *pp_handle = adev->powerplay.pp_handle;
-   bool baco_cap;
-   int ret = 0;
+   bool ret;
 
if (!pp_funcs || !pp_funcs->get_asic_baco_capability)
return false;
@@ -204,12 +203,11 @@ bool amdgpu_dpm_is_baco_supported(struct amdgpu_device 
*adev)
 
mutex_lock(&adev->pm.mutex);
 
-   ret = pp_funcs->get_asic_baco_capability(pp_handle,
-&baco_cap);
+   ret = pp_funcs->get_asic_baco_capability(pp_handle);
 
mutex_unlock(&adev->pm.mutex);
 
-   return ret ? false : baco_cap;
+   return ret;
 }
 
 int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c 
b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
index 9e4f8a4104a3..e82c2b2fffb5 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
@@ -1368,21 +1368,18 @@ static int pp_set_active_display_count(void *handle, 
uint32_t count)
return phm_set_active_display_count(hwmgr, count);
 }
 
-static int pp_get_asic_baco_capability(void *handle, bool *cap)
+static bool pp_get_asic_baco_capability(void *handle)
 {
struct pp_hwmgr *hwmgr = handle;
 
-   *cap = false;
if (!hwmgr)
-   return -EINVAL;
+   return false;
 
if (!(hwmgr->not_vf && amdgpu_dpm) ||
!hwmgr->hwmgr_func->get_asic_baco_capability)
-   return 0;
+   return false;
 
-   hwmgr->hwmgr_func->get_asic_baco_capability(hwmgr, cap);
-
-   return 0;
+   return hwmgr->hwmgr_func->get_asic_baco_capability(hwmgr);
 }
 
 static int pp_get_asic_baco_state(void *handle, int *state)
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c 
b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c
index 044cda005aed..e8a9471c1898 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c
@@ -33,21 +33,20 @@
 #include "smu/smu_7_1_2_d.h"
 #include "smu/smu_7_1_2_sh_mask.h"
 
-int smu7_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap)
+bool smu7_baco_get_capability(struct pp_hwmgr *hwmgr)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
uint32_t reg;
 
-   *cap = false;
if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, 
PHM_PlatformCaps_BACO))
return 0;
 
reg = RREG32(mmCC_BIF_BX_FUSESTRAP0);
 
if (reg & CC_BIF_BX_FUSESTRAP0__STRAP_BIF_PX_CAPABLE_MASK)
-   *cap = true;
+   return true;
 
-   return 0;
+   return false;
 }
 
 int smu7_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state)
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h 
b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu

Re: [PATCH v4 09/32] drm/amd/display: add plane 3D LUT driver-specific properties

2023-11-02 Thread Joshua Ashton

Also, Melissa, you cannot do:

if (!plane_state->color_mgmt_changed)
return 0;

in amdgpu_dm_plane_set_color_properties.

The allocation for dc_plane_state could be new and zero'ed so it needs 
to be set every time. (Until AMDGPU has better dedup'ing of stuff there)


The reason it looked like it worked for you now is because the duplicate 
was broken, so color mgmt for planes was always being marked as dirty there.


Thanks

- Joshie 🐸✨

On 11/2/23 03:48, Joshua Ashton wrote:



On 10/5/23 18:15, Melissa Wen wrote:

Add 3D LUT property for plane color transformations using a 3D lookup
table. 3D LUT allows for highly accurate and complex color
transformations and is suitable to adjust the balance between color
channels. It's also more complex to manage and require more
computational resources.

Since a 3D LUT has a limited number of entries in each dimension we want
to use them in an optimal fashion. This means using the 3D LUT in a
colorspace that is optimized for human vision, such as sRGB, PQ, or
another non-linear space. Therefore, userpace may need one 1D LUT
(shaper) before it to delinearize content and another 1D LUT after 3D
LUT (blend) to linearize content again for blending. The next patches
add these 1D LUTs to the plane color mgmt pipeline.

v3:
- improve commit message about 3D LUT
- describe the 3D LUT entries and size (Harry)

v4:
- advertise 3D LUT max size as the size of a single-dimension

Signed-off-by: Melissa Wen 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h  | 18 +++
  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  9 
  .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 14 +++
  .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   | 23 +++
  4 files changed, 64 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h

index 62044d41da75..f7adaa52c23f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -363,6 +363,24 @@ struct amdgpu_mode_info {
   * @plane_hdr_mult_property:
   */
  struct drm_property *plane_hdr_mult_property;
+    /**
+ * @plane_lut3d_property: Plane property for color transformation 
using

+ * a 3D LUT (pre-blending), a three-dimensional array where each
+ * element is an RGB triplet. Each dimension has a size of the cubed
+ * root of lut3d_size. The array contains samples from the 
approximated

+ * function. On AMD, values between samples are estimated by
+ * tetrahedral interpolation. The array is accessed with three 
indices,

+ * one for each input dimension (color channel), blue being the
+ * outermost dimension, red the innermost.
+ */
+    struct drm_property *plane_lut3d_property;
+    /**
+ * @plane_degamma_lut_size_property: Plane property to define the 
max
+ * size of 3D LUT as supported by the driver (read-only). The max 
size
+ * is the max size of one dimension and, therefore, the max 
number of

+ * entries for 3D LUT array is the 3D LUT size cubed;
+ */
+    struct drm_property *plane_lut3d_size_property;
  };
  #define AMDGPU_MAX_BL_LEVEL 0xFF
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h

index bb2ce843369d..7a2350c62cf1 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -784,6 +784,11 @@ struct dm_plane_state {
   * TF is needed for any subsequent linear-to-non-linear transforms.
   */
  __u64 hdr_mult;
+    /**
+ * @lut3d: 3D lookup table blob. The blob (if not NULL) is an 
array of

+ * &struct drm_color_lut.
+ */
+    struct drm_property_blob *lut3d;
  };
  struct dm_crtc_state {
@@ -869,6 +874,10 @@ void amdgpu_dm_update_freesync_caps(struct 
drm_connector *connector,

  void amdgpu_dm_trigger_timing_sync(struct drm_device *dev);
+/* 3D LUT max size is 17x17x17 (4913 entries) */
+#define MAX_COLOR_3DLUT_SIZE 17
+#define MAX_COLOR_3DLUT_BITDEPTH 12
+/* 1D LUT size */
  #define MAX_COLOR_LUT_ENTRIES 4096
  /* Legacy gamm LUT users such as X doesn't like large LUT sizes */
  #define MAX_COLOR_LEGACY_LUT_ENTRIES 256
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c

index caf49a044ab4..011f2f9ec890 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -230,6 +230,20 @@ amdgpu_dm_create_color_properties(struct 
amdgpu_device *adev)

  return -ENOMEM;
  adev->mode_info.plane_hdr_mult_property = prop;
+    prop = drm_property_create(adev_to_drm(adev),
+   DRM_MODE_PROP_BLOB,
+   "AMD_PLANE_LUT3D", 0);
+    if (!prop)
+    return -ENOMEM;
+    adev->mode_info.plane_lut3d_property = prop;
+
+    prop = drm_property_create_range(adev_to_drm(adev),