[PATCH] drm/amdgpu: fix overflowed array index read warning

2024-04-24 Thread Tim Huang
From: Tim Huang 

Clear warning that cast operation might have overflowed.

Signed-off-by: Tim Huang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 06f0a6534a94..6dfcd62e83ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -473,7 +473,7 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, 
char __user *buf,
size_t size, loff_t *pos)
 {
struct amdgpu_ring *ring = file_inode(f)->i_private;
-   int r, i;
+   int r;
uint32_t value, result, early[3];
 
if (*pos & 3 || size & 3)
@@ -485,7 +485,7 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, 
char __user *buf,
early[0] = amdgpu_ring_get_rptr(ring) & ring->buf_mask;
early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask;
early[2] = ring->wptr & ring->buf_mask;
-   for (i = *pos / 4; i < 3 && size; i++) {
+   for (loff_t i = *pos / 4; i < 3 && size; i++) {
r = put_user(early[i], (uint32_t *)buf);
if (r)
return r;
-- 
2.39.2



[PATCH] drm/amdgpu: fix potential resource leak warning

2024-04-24 Thread Tim Huang
From: Tim Huang 

Clear resource leak warning that when the prepare fails,
the allocated amdgpu job object will never be released.

Signed-off-by: Tim Huang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
index 66e8a016126b..9b748d7058b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -102,6 +102,11 @@ static int amdgpu_vm_sdma_prepare(struct 
amdgpu_vm_update_params *p,
if (!r)
r = amdgpu_sync_push_to_job(, p->job);
amdgpu_sync_free();
+
+   if (r) {
+   p->num_dw_left = 0;
+   amdgpu_job_free(p->job);
+   }
return r;
 }
 
-- 
2.39.2



RE: [PATCH 04/15] drm/amdgpu: add poison creation handler

2024-04-24 Thread Chai, Thomas
[AMD Official Use Only - General]

OK, I will do this.


-
Best Regards,
Thomas

-Original Message-
From: Zhang, Hawking 
Sent: Thursday, April 25, 2024 10:33 AM
To: Chai, Thomas ; amd-gfx@lists.freedesktop.org
Cc: Chai, Thomas ; Zhou1, Tao ; Li, 
Candice ; Wang, Yang(Kevin) ; Yang, 
Stanley ; Chai, Thomas 
Subject: RE: [PATCH 04/15] drm/amdgpu: add poison creation handler

[AMD Official Use Only - General]

Is it okay to drop below static function and just implement the logic in poison 
creation handler leveraging the ras query api: amdgpu_ras_query_error_status.

It seems to me the static function may not be able to be used for other IP 
blocks.

Regards,
Hawking

+ static int amdgpu_ras_query_ecc_status(struct amdgpu_device *adev,
+   enum amdgpu_ras_block ras_block, uint32_t timeout_ms) {
+   int ret = 0;
+   struct ras_ecc_log_info *ecc_log;
+   struct ras_query_if info;
+   uint32_t timeout = timeout_ms;
+   struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+   memset(, 0, sizeof(info));
+   info.head.block = ras_block;
+
+   ecc_log = >umc_ecc_log;
+   ecc_log->de_updated = false;
+   do {
+   ret = amdgpu_ras_query_error_status(adev, );
+   if (ret) {
+   dev_err(adev->dev, "Failed to query ras error! 
ret:%d\n", ret);
+   return ret;
+   }
+
+   if (timeout && !ecc_log->de_updated) {
+   msleep(1);
+   timeout--;
+   }
+   } while (timeout && !ecc_log->de_updated);
+
+   if (timeout_ms && !timeout) {
+   dev_warn(adev->dev, "Can't find deferred error\n");
+   return -ETIMEDOUT;
+   }
+
+   return 0;
+}
+
+static void amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev,
+   uint32_t timeout) {
+   amdgpu_ras_query_ecc_status(adev, AMDGPU_RAS_BLOCK__UMC,
+timeout); }
+

-Original Message-
From: amd-gfx  On Behalf Of YiPeng Chai
Sent: Thursday, April 18, 2024 10:58
To: amd-gfx@lists.freedesktop.org
Cc: Chai, Thomas ; Zhang, Hawking ; 
Zhou1, Tao ; Li, Candice ; Wang, 
Yang(Kevin) ; Yang, Stanley ; 
Chai, Thomas 
Subject: [PATCH 04/15] drm/amdgpu: add poison creation handler

Add poison creation handler.

Signed-off-by: YiPeng Chai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 74 +++--
 1 file changed, 69 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 64e6e20c6de7..126616eaeec1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2080,6 +2080,17 @@ static void 
amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj  {
dev_info(obj->adev->dev,
"Poison is created\n");
+
+   if (amdgpu_ip_version(obj->adev, UMC_HWIP, 0) >= IP_VERSION(12, 0, 0)) {
+   struct amdgpu_ras *con =
+ amdgpu_ras_get_context(obj->adev);
+
+   amdgpu_ras_put_poison_req(obj->adev,
+   AMDGPU_RAS_BLOCK__UMC, 0, NULL, NULL, false);
+
+   atomic_inc(>page_retirement_req_cnt);
+
+   wake_up(>page_retirement_wq);
+   }
 }

 static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj, @@ 
-2754,10 +2765,54 @@ static void amdgpu_ras_ecc_log_fini(struct 
ras_ecc_log_info *ecc_log)
mutex_destroy(_log->lock);
ecc_log->de_updated = false;
 }
+
+static int amdgpu_ras_query_ecc_status(struct amdgpu_device *adev,
+   enum amdgpu_ras_block ras_block, uint32_t timeout_ms) {
+   int ret = 0;
+   struct ras_ecc_log_info *ecc_log;
+   struct ras_query_if info;
+   uint32_t timeout = timeout_ms;
+   struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+   memset(, 0, sizeof(info));
+   info.head.block = ras_block;
+
+   ecc_log = >umc_ecc_log;
+   ecc_log->de_updated = false;
+   do {
+   ret = amdgpu_ras_query_error_status(adev, );
+   if (ret) {
+   dev_err(adev->dev, "Failed to query ras error! 
ret:%d\n", ret);
+   return ret;
+   }
+
+   if (timeout && !ecc_log->de_updated) {
+   msleep(1);
+   timeout--;
+   }
+   } while (timeout && !ecc_log->de_updated);
+
+   if (timeout_ms && !timeout) {
+   dev_warn(adev->dev, "Can't find deferred error\n");
+   return -ETIMEDOUT;
+   }
+
+   return 0;
+}
+
+static void amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev,
+   uint32_t timeout) {
+   amdgpu_ras_query_ecc_status(adev, AMDGPU_RAS_BLOCK__UMC,
+timeout); }
+
 static int amdgpu_ras_page_retirement_thread(void *param)  {
struct 

RE: [PATCH 06/15] drm/amdgpu: umc v12_0 converts error address

2024-04-24 Thread Chai, Thomas
[AMD Official Use Only - General]

amdgpu_umc_fill_error_record  is called in umc_v12_0_convert_error_address 
directly to prepare for page retirement,
The new path need to check if these converted pages already exist before 
filling the error page,  umc_v12_0_convert_error_address is not suitable for 
new requirements, so I created a new interface.

-
Best Regards,
Thomas

-Original Message-
From: Zhang, Hawking 
Sent: Thursday, April 25, 2024 11:03 AM
To: Chai, Thomas ; amd-gfx@lists.freedesktop.org
Cc: Zhou1, Tao ; Li, Candice ; Wang, 
Yang(Kevin) ; Yang, Stanley 
Subject: RE: [PATCH 06/15] drm/amdgpu: umc v12_0 converts error address

[AMD Official Use Only - General]

I might lose some context here. Can you please elaborate why we don't leverage 
the existing umc_v12_0_convert_error_address implementation?

Regards,
Hawking

-Original Message-
From: Chai, Thomas 
Sent: Thursday, April 18, 2024 10:58
To: amd-gfx@lists.freedesktop.org
Cc: Chai, Thomas ; Zhang, Hawking ; 
Zhou1, Tao ; Li, Candice ; Wang, 
Yang(Kevin) ; Yang, Stanley ; 
Chai, Thomas 
Subject: [PATCH 06/15] drm/amdgpu: umc v12_0 converts error address

Umc v12_0 converts error address.

Signed-off-by: YiPeng Chai 
---
 drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 94 +-  
drivers/gpu/drm/amd/amdgpu/umc_v12_0.h | 12 
 2 files changed, 105 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c 
b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
index 81435533c4a7..085dcfe16b5e 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -222,6 +222,66 @@ static void umc_v12_0_convert_error_address(struct 
amdgpu_device *adev,
}
 }

+static int umc_v12_0_convert_err_addr(struct amdgpu_device *adev,
+   struct ta_ras_query_address_input *addr_in,
+   uint64_t *pfns, int len) {
+   uint32_t col, row, row_xor, bank, channel_index;
+   uint64_t soc_pa, retired_page, column, err_addr;
+   struct ta_ras_query_address_output addr_out;
+   uint32_t pos = 0;
+
+   err_addr = addr_in->ma.err_addr;
+   addr_in->addr_type = TA_RAS_MCA_TO_PA;
+   if (psp_ras_query_address(>psp, addr_in, _out)) {
+   dev_warn(adev->dev, "Failed to query RAS physical address for 
0x%llx",
+   err_addr);
+   return 0;
+   }
+
+   soc_pa = addr_out.pa.pa;
+   bank = addr_out.pa.bank;
+   channel_index = addr_out.pa.channel_idx;
+
+   col = (err_addr >> 1) & 0x1fULL;
+   row = (err_addr >> 10) & 0x3fffULL;
+   row_xor = row ^ (0x1ULL << 13);
+   /* clear [C3 C2] in soc physical address */
+   soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT);
+   /* clear [C4] in soc physical address */
+   soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT);
+
+   /* loop for all possibilities of [C4 C3 C2] */
+   for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) {
+   retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT);
+   retired_page |= (((column & 0x4) >> 2) <<
+ UMC_V12_0_PA_C4_BIT);
+
+   if (pos >= len)
+   return 0;
+   pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
+
+   /* include column bit 0 and 1 */
+   col &= 0x3;
+   col |= (column << 2);
+   dev_info(adev->dev,
+   "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x 
Bank:0x%x Channel:0x%x\n",
+   retired_page, row, col, bank, channel_index);
+
+   /* shift R13 bit */
+   retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT);
+
+   if (pos >= len)
+   return 0;
+   pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
+
+   dev_info(adev->dev,
+   "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x 
Bank:0x%x Channel:0x%x\n",
+   retired_page, row_xor, col, bank, channel_index);
+   }
+
+   return pos;
+}
+
 static int umc_v12_0_query_error_address(struct amdgpu_device *adev,
uint32_t node_inst, uint32_t umc_inst,
uint32_t ch_inst, void *data) @@ -482,8 
+542,12 @@ static int umc_v12_0_ras_late_init(struct amdgpu_device *adev, 
struct ras_common  static int umc_v12_0_update_ecc_status(struct amdgpu_device 
*adev,
uint64_t status, uint64_t ipid, uint64_t addr)  {
-   uint16_t hwid, mcatype;
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+   uint16_t hwid, mcatype;
+   struct ta_ras_query_address_input addr_in;
+   uint64_t page_pfn[UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL];
+   uint64_t err_addr;
+   int count;

hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID);
mcatype = REG_GET_FIELD(ipid, 

RE: [PATCH V2] drm/amdgpu: Fix ras mode2 reset failure in ras aca mode

2024-04-24 Thread Wang, Yang(Kevin)
[AMD Official Use Only - General]

>> Alternatively, we need to explore the opportunity to centralize legacy ras 
>> and aca ras implementation in the same API. Take sysfs create/remove 
>> interface for example, legacy RAS and ACA RAS do share the same logic, just 
>> have different filesystem node.
>> For now, ACA RAS is trending to back to IP specific ras late init. Let's 
>> revisit the code to see if we can re-use the common ras_late_init or create 
>> aca_ras_late_init api.

Sure, thanks.
We will make improvements in this direction.

Best Regards,
Kevin

-Original Message-
From: Zhang, Hawking 
Sent: Thursday, April 25, 2024 10:46 AM
To: Chai, Thomas ; amd-gfx@lists.freedesktop.org
Cc: Zhou1, Tao ; Li, Candice ; Wang, 
Yang(Kevin) ; Yang, Stanley 
Subject: RE: [PATCH V2] drm/amdgpu: Fix ras mode2 reset failure in ras aca mode

[AMD Official Use Only - General]

The patch is Reviewed-by: Hawking Zhang 

Kevin, Thomas,

Alternatively, we need to explore the opportunity to centralize legacy ras and 
aca ras implementation in the same API. Take sysfs create/remove interface for 
example, legacy RAS and ACA RAS do share the same logic, just have different 
filesystem node.

For now, ACA RAS is trending to back to IP specific ras late init. Let's 
revisit the code to see if we can re-use the common ras_late_init or create 
aca_ras_late_init api.

Regards,
Hawking

-Original Message-
From: Chai, Thomas 
Sent: Wednesday, April 24, 2024 13:52
To: amd-gfx@lists.freedesktop.org
Cc: Chai, Thomas ; Zhang, Hawking ; 
Zhou1, Tao ; Li, Candice ; Wang, 
Yang(Kevin) ; Yang, Stanley ; 
Chai, Thomas 
Subject: [PATCH V2] drm/amdgpu: Fix ras mode2 reset failure in ras aca mode

Fix ras mode2 reset failure in ras aca mode.

Signed-off-by: YiPeng Chai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index edb3cd0cef96..11a70991152c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1254,6 +1254,10 @@ int amdgpu_ras_bind_aca(struct amdgpu_device *adev, enum 
amdgpu_ras_block blk,  {
struct ras_manager *obj;

+   /* in resume phase, no need to create aca fs node */
+   if (adev->in_suspend || amdgpu_in_reset(adev))
+   return 0;
+
obj = get_ras_manager(adev, blk);
if (!obj)
return -EINVAL;
--
2.34.1




RE: [PATCH 11/15] drm/amdgpu: prepare to handle pasid poison consumption

2024-04-24 Thread Chai, Thomas
[AMD Official Use Only - General]

-
Best Regards,
Thomas

-Original Message-
From: Zhang, Hawking 
Sent: Thursday, April 25, 2024 11:01 AM
To: Chai, Thomas ; amd-gfx@lists.freedesktop.org
Cc: Zhou1, Tao ; Li, Candice ; Wang, 
Yang(Kevin) ; Yang, Stanley 
Subject: RE: [PATCH 11/15] drm/amdgpu: prepare to handle pasid poison 
consumption

[AMD Official Use Only - General]

+void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device 
*adev,
+   enum amdgpu_ras_block block, uint16_t pasid,
+   pasid_notify pasid_fn, void *data, uint32_t
+reset);

> So we ultimately switch to above poison consumption handler for all the 
> existing v9 adapters, right? If so, we shall be able to make this function 
> backwards compatible. I'm wondering if we can just change the existing 
> amdgpu_amdkfd_ras_poison_consumption_handler.

> Pasid_poison_consumption_handler is a little bit confusing.

[Thomas] No,  Only  UMC_HWIP  greater or equal to IP_VERSION(12, 0, 0)),  it 
works on  the new path.  The IP check is in amdgpu_umc_pasid_poison_handler 
function.



Regards,
Hawking

-Original Message-
From: Chai, Thomas 
Sent: Thursday, April 18, 2024 10:59
To: amd-gfx@lists.freedesktop.org
Cc: Chai, Thomas ; Zhang, Hawking ; 
Zhou1, Tao ; Li, Candice ; Wang, 
Yang(Kevin) ; Yang, Stanley ; 
Chai, Thomas 
Subject: [PATCH 11/15] drm/amdgpu: prepare to handle pasid poison consumption

Prepare to handle pasid poison consumption.

Signed-off-by: YiPeng Chai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c|  9 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  5 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c   | 20 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h   |  3 +++
 .../gpu/drm/amd/amdkfd/kfd_int_process_v9.c   |  3 ++-
 5 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 66753940bb4d..287ce431901c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -759,10 +759,17 @@ bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev)
return amdgpu_ras_get_fed_status(adev);  }

+void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device 
*adev,
+   enum amdgpu_ras_block block, uint16_t pasid,
+   pasid_notify pasid_fn, void *data, uint32_t 
reset) {
+   amdgpu_umc_pasid_poison_handler(adev, block, pasid, pasid_fn,
+data, reset); }
+
 void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
enum amdgpu_ras_block block, uint32_t reset)  {
-   amdgpu_umc_poison_handler(adev, block, reset);
+   amdgpu_umc_pasid_poison_handler(adev, block, 0, NULL, NULL,
+ reset);
 }

 int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, diff 
--git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index ad50c7bbc326..54e15994d02b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -401,6 +401,11 @@ int amdgpu_amdkfd_get_tile_config(struct amdgpu_device 
*adev,
struct tile_config *config);  void 
amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
enum amdgpu_ras_block block, uint32_t reset);
+
+void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device 
*adev,
+   enum amdgpu_ras_block block, uint16_t pasid,
+   pasid_notify pasid_fn, void *data, uint32_t
+reset);
+
 bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev);  bool 
amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem 
*mem);  void amdgpu_amdkfd_block_mmu_notifications(void *p); diff --git 
a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index dcda3d24bee3..8ebbca9e2e22 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -252,8 +252,9 @@ int amdgpu_umc_bad_page_polling_timeout(struct 
amdgpu_device *adev,
return 0;
 }

-int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
-   enum amdgpu_ras_block block, uint32_t reset)
+int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev,
+   enum amdgpu_ras_block block, uint16_t pasid,
+   pasid_notify pasid_fn, void *data, uint32_t
+reset)
 {
int ret = AMDGPU_RAS_SUCCESS;

@@ -291,16 +292,14 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev,

amdgpu_ras_error_data_fini(_data);
} else {
-   if (reset) {
-   amdgpu_umc_bad_page_polling_timeout(adev,
-   reset, 

RE: [PATCH 06/15] drm/amdgpu: umc v12_0 converts error address

2024-04-24 Thread Zhang, Hawking
[AMD Official Use Only - General]

I might lose some context here. Can you please elaborate why we don't leverage 
the existing umc_v12_0_convert_error_address implementation?

Regards,
Hawking

-Original Message-
From: Chai, Thomas 
Sent: Thursday, April 18, 2024 10:58
To: amd-gfx@lists.freedesktop.org
Cc: Chai, Thomas ; Zhang, Hawking ; 
Zhou1, Tao ; Li, Candice ; Wang, 
Yang(Kevin) ; Yang, Stanley ; 
Chai, Thomas 
Subject: [PATCH 06/15] drm/amdgpu: umc v12_0 converts error address

Umc v12_0 converts error address.

Signed-off-by: YiPeng Chai 
---
 drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 94 +-  
drivers/gpu/drm/amd/amdgpu/umc_v12_0.h | 12 
 2 files changed, 105 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c 
b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
index 81435533c4a7..085dcfe16b5e 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -222,6 +222,66 @@ static void umc_v12_0_convert_error_address(struct 
amdgpu_device *adev,
}
 }

+static int umc_v12_0_convert_err_addr(struct amdgpu_device *adev,
+   struct ta_ras_query_address_input *addr_in,
+   uint64_t *pfns, int len)
+{
+   uint32_t col, row, row_xor, bank, channel_index;
+   uint64_t soc_pa, retired_page, column, err_addr;
+   struct ta_ras_query_address_output addr_out;
+   uint32_t pos = 0;
+
+   err_addr = addr_in->ma.err_addr;
+   addr_in->addr_type = TA_RAS_MCA_TO_PA;
+   if (psp_ras_query_address(>psp, addr_in, _out)) {
+   dev_warn(adev->dev, "Failed to query RAS physical address for 
0x%llx",
+   err_addr);
+   return 0;
+   }
+
+   soc_pa = addr_out.pa.pa;
+   bank = addr_out.pa.bank;
+   channel_index = addr_out.pa.channel_idx;
+
+   col = (err_addr >> 1) & 0x1fULL;
+   row = (err_addr >> 10) & 0x3fffULL;
+   row_xor = row ^ (0x1ULL << 13);
+   /* clear [C3 C2] in soc physical address */
+   soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT);
+   /* clear [C4] in soc physical address */
+   soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT);
+
+   /* loop for all possibilities of [C4 C3 C2] */
+   for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) {
+   retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT);
+   retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT);
+
+   if (pos >= len)
+   return 0;
+   pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
+
+   /* include column bit 0 and 1 */
+   col &= 0x3;
+   col |= (column << 2);
+   dev_info(adev->dev,
+   "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x 
Bank:0x%x Channel:0x%x\n",
+   retired_page, row, col, bank, channel_index);
+
+   /* shift R13 bit */
+   retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT);
+
+   if (pos >= len)
+   return 0;
+   pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
+
+   dev_info(adev->dev,
+   "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x 
Bank:0x%x Channel:0x%x\n",
+   retired_page, row_xor, col, bank, channel_index);
+   }
+
+   return pos;
+}
+
 static int umc_v12_0_query_error_address(struct amdgpu_device *adev,
uint32_t node_inst, uint32_t umc_inst,
uint32_t ch_inst, void *data)
@@ -482,8 +542,12 @@ static int umc_v12_0_ras_late_init(struct amdgpu_device 
*adev, struct ras_common  static int umc_v12_0_update_ecc_status(struct 
amdgpu_device *adev,
uint64_t status, uint64_t ipid, uint64_t addr)  {
-   uint16_t hwid, mcatype;
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+   uint16_t hwid, mcatype;
+   struct ta_ras_query_address_input addr_in;
+   uint64_t page_pfn[UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL];
+   uint64_t err_addr;
+   int count;

hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID);
mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType); @@ -497,6 +561,34 
@@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev,
if (!umc_v12_0_is_deferred_error(adev, status))
return 0;

+   err_addr = REG_GET_FIELD(addr,
+   MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+
+   dev_info(adev->dev,
+   "UMC:IPID:0x%llx, socket:%llu, aid:%llu, inst:%llu, ch:%llu, 
err_addr:0x%llx\n",
+   ipid,
+   MCA_IPID_2_SOCKET_ID(ipid),
+   MCA_IPID_2_DIE_ID(ipid),
+   MCA_IPID_2_UMC_INST(ipid),
+   MCA_IPID_2_UMC_CH(ipid),
+   err_addr);
+
+   

RE: [PATCH 11/15] drm/amdgpu: prepare to handle pasid poison consumption

2024-04-24 Thread Zhang, Hawking
[AMD Official Use Only - General]

+void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device 
*adev,
+   enum amdgpu_ras_block block, uint16_t pasid,
+   pasid_notify pasid_fn, void *data, uint32_t reset);

So we ultimately switch to above poison consumption handler for all the 
existing v9 adapters, right? If so, we shall be able to make this function 
backwards compatible. I'm wondering if we can just change the existing 
amdgpu_amdkfd_ras_poison_consumption_handler.

Pasid_poison_consumption_handler is a little bit confusing.

Regards,
Hawking

-Original Message-
From: Chai, Thomas 
Sent: Thursday, April 18, 2024 10:59
To: amd-gfx@lists.freedesktop.org
Cc: Chai, Thomas ; Zhang, Hawking ; 
Zhou1, Tao ; Li, Candice ; Wang, 
Yang(Kevin) ; Yang, Stanley ; 
Chai, Thomas 
Subject: [PATCH 11/15] drm/amdgpu: prepare to handle pasid poison consumption

Prepare to handle pasid poison consumption.

Signed-off-by: YiPeng Chai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c|  9 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  5 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c   | 20 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h   |  3 +++
 .../gpu/drm/amd/amdkfd/kfd_int_process_v9.c   |  3 ++-
 5 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 66753940bb4d..287ce431901c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -759,10 +759,17 @@ bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev)
return amdgpu_ras_get_fed_status(adev);  }

+void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device 
*adev,
+   enum amdgpu_ras_block block, uint16_t pasid,
+   pasid_notify pasid_fn, void *data, uint32_t 
reset) {
+   amdgpu_umc_pasid_poison_handler(adev, block, pasid, pasid_fn, data,
+reset); }
+
 void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
enum amdgpu_ras_block block, uint32_t reset)  {
-   amdgpu_umc_poison_handler(adev, block, reset);
+   amdgpu_umc_pasid_poison_handler(adev, block, 0, NULL, NULL, reset);
 }

 int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, diff 
--git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index ad50c7bbc326..54e15994d02b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -401,6 +401,11 @@ int amdgpu_amdkfd_get_tile_config(struct amdgpu_device 
*adev,
struct tile_config *config);
 void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
enum amdgpu_ras_block block, uint32_t reset);
+
+void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device 
*adev,
+   enum amdgpu_ras_block block, uint16_t pasid,
+   pasid_notify pasid_fn, void *data, uint32_t reset);
+
 bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev);  bool 
amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem 
*mem);  void amdgpu_amdkfd_block_mmu_notifications(void *p); diff --git 
a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index dcda3d24bee3..8ebbca9e2e22 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -252,8 +252,9 @@ int amdgpu_umc_bad_page_polling_timeout(struct 
amdgpu_device *adev,
return 0;
 }

-int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
-   enum amdgpu_ras_block block, uint32_t reset)
+int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev,
+   enum amdgpu_ras_block block, uint16_t pasid,
+   pasid_notify pasid_fn, void *data, uint32_t reset)
 {
int ret = AMDGPU_RAS_SUCCESS;

@@ -291,16 +292,14 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev,

amdgpu_ras_error_data_fini(_data);
} else {
-   if (reset) {
-   amdgpu_umc_bad_page_polling_timeout(adev,
-   reset, 
MAX_UMC_POISON_POLLING_TIME_SYNC);
-   } else {
struct amdgpu_ras *con = 
amdgpu_ras_get_context(adev);

+   amdgpu_ras_put_poison_req(adev,
+   block, pasid, pasid_fn, data, reset);
+
atomic_inc(>page_retirement_req_cnt);

wake_up(>page_retirement_wq);
-   }
}
} else {
if (adev->virt.ops && 

RE: [PATCH V2] drm/amdgpu: Fix ras mode2 reset failure in ras aca mode

2024-04-24 Thread Zhang, Hawking
[AMD Official Use Only - General]

The patch is Reviewed-by: Hawking Zhang 

Kevin, Thomas,

Alternatively, we need to explore the opportunity to centralize legacy ras and 
aca ras implementation in the same API. Take sysfs create/remove interface for 
example, legacy RAS and ACA RAS do share the same logic, just have different 
filesystem node.

For now, ACA RAS is trending to back to IP specific ras late init. Let's 
revisit the code to see if we can re-use the common ras_late_init or create 
aca_ras_late_init api.

Regards,
Hawking

-Original Message-
From: Chai, Thomas 
Sent: Wednesday, April 24, 2024 13:52
To: amd-gfx@lists.freedesktop.org
Cc: Chai, Thomas ; Zhang, Hawking ; 
Zhou1, Tao ; Li, Candice ; Wang, 
Yang(Kevin) ; Yang, Stanley ; 
Chai, Thomas 
Subject: [PATCH V2] drm/amdgpu: Fix ras mode2 reset failure in ras aca mode

Fix ras mode2 reset failure in ras aca mode.

Signed-off-by: YiPeng Chai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index edb3cd0cef96..11a70991152c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1254,6 +1254,10 @@ int amdgpu_ras_bind_aca(struct amdgpu_device *adev, enum 
amdgpu_ras_block blk,  {
struct ras_manager *obj;

+   /* in resume phase, no need to create aca fs node */
+   if (adev->in_suspend || amdgpu_in_reset(adev))
+   return 0;
+
obj = get_ras_manager(adev, blk);
if (!obj)
return -EINVAL;
--
2.34.1



RE: [PATCH 04/15] drm/amdgpu: add poison creation handler

2024-04-24 Thread Zhang, Hawking
[AMD Official Use Only - General]

Is it okay to drop below static function and just implement the logic in poison 
creation handler leveraging the ras query api: amdgpu_ras_query_error_status.

It seems to me the static function may not be able to be used for other IP 
blocks.

Regards,
Hawking

+ static int amdgpu_ras_query_ecc_status(struct amdgpu_device *adev,
+   enum amdgpu_ras_block ras_block, uint32_t timeout_ms) {
+   int ret = 0;
+   struct ras_ecc_log_info *ecc_log;
+   struct ras_query_if info;
+   uint32_t timeout = timeout_ms;
+   struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+   memset(, 0, sizeof(info));
+   info.head.block = ras_block;
+
+   ecc_log = >umc_ecc_log;
+   ecc_log->de_updated = false;
+   do {
+   ret = amdgpu_ras_query_error_status(adev, );
+   if (ret) {
+   dev_err(adev->dev, "Failed to query ras error! 
ret:%d\n", ret);
+   return ret;
+   }
+
+   if (timeout && !ecc_log->de_updated) {
+   msleep(1);
+   timeout--;
+   }
+   } while (timeout && !ecc_log->de_updated);
+
+   if (timeout_ms && !timeout) {
+   dev_warn(adev->dev, "Can't find deferred error\n");
+   return -ETIMEDOUT;
+   }
+
+   return 0;
+}
+
+static void amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev,
+   uint32_t timeout)
+{
+   amdgpu_ras_query_ecc_status(adev, AMDGPU_RAS_BLOCK__UMC, timeout); }
+

-Original Message-
From: amd-gfx  On Behalf Of YiPeng Chai
Sent: Thursday, April 18, 2024 10:58
To: amd-gfx@lists.freedesktop.org
Cc: Chai, Thomas ; Zhang, Hawking ; 
Zhou1, Tao ; Li, Candice ; Wang, 
Yang(Kevin) ; Yang, Stanley ; 
Chai, Thomas 
Subject: [PATCH 04/15] drm/amdgpu: add poison creation handler

Add poison creation handler.

Signed-off-by: YiPeng Chai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 74 +++--
 1 file changed, 69 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 64e6e20c6de7..126616eaeec1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2080,6 +2080,17 @@ static void 
amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj  {
dev_info(obj->adev->dev,
"Poison is created\n");
+
+   if (amdgpu_ip_version(obj->adev, UMC_HWIP, 0) >= IP_VERSION(12, 0, 0)) {
+   struct amdgpu_ras *con = amdgpu_ras_get_context(obj->adev);
+
+   amdgpu_ras_put_poison_req(obj->adev,
+   AMDGPU_RAS_BLOCK__UMC, 0, NULL, NULL, false);
+
+   atomic_inc(>page_retirement_req_cnt);
+
+   wake_up(>page_retirement_wq);
+   }
 }

 static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj, @@ 
-2754,10 +2765,54 @@ static void amdgpu_ras_ecc_log_fini(struct 
ras_ecc_log_info *ecc_log)
mutex_destroy(_log->lock);
ecc_log->de_updated = false;
 }
+
+static int amdgpu_ras_query_ecc_status(struct amdgpu_device *adev,
+   enum amdgpu_ras_block ras_block, uint32_t timeout_ms) {
+   int ret = 0;
+   struct ras_ecc_log_info *ecc_log;
+   struct ras_query_if info;
+   uint32_t timeout = timeout_ms;
+   struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+   memset(, 0, sizeof(info));
+   info.head.block = ras_block;
+
+   ecc_log = >umc_ecc_log;
+   ecc_log->de_updated = false;
+   do {
+   ret = amdgpu_ras_query_error_status(adev, );
+   if (ret) {
+   dev_err(adev->dev, "Failed to query ras error! 
ret:%d\n", ret);
+   return ret;
+   }
+
+   if (timeout && !ecc_log->de_updated) {
+   msleep(1);
+   timeout--;
+   }
+   } while (timeout && !ecc_log->de_updated);
+
+   if (timeout_ms && !timeout) {
+   dev_warn(adev->dev, "Can't find deferred error\n");
+   return -ETIMEDOUT;
+   }
+
+   return 0;
+}
+
+static void amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev,
+   uint32_t timeout)
+{
+   amdgpu_ras_query_ecc_status(adev, AMDGPU_RAS_BLOCK__UMC, timeout); }
+
 static int amdgpu_ras_page_retirement_thread(void *param)  {
struct amdgpu_device *adev = (struct amdgpu_device *)param;
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+   struct ras_poison_msg poison_msg;
+   enum amdgpu_ras_block ras_block;

while (!kthread_should_stop()) {

@@ -2768,13 +2823,22 @@ static int amdgpu_ras_page_retirement_thread(void 
*param)
if (kthread_should_stop())
break;

-   

[linux-next:master] BUILD REGRESSION 5e4f84f18c4ee9b0ccdc19e39b7de41df21699dd

2024-04-24 Thread kernel test robot
tree/branch: 
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master
branch HEAD: 5e4f84f18c4ee9b0ccdc19e39b7de41df21699dd  Add linux-next specific 
files for 20240424

Error/Warning reports:

https://lore.kernel.org/oe-kbuild-all/202404242144.8931hnhx-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202404242330.fb2cmamd-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202404242344.myso5vxe-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202404250156.2pqrwmex-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202404250209.hmhcgegb-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202404250552.gnss0wy7-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202404250558.hmgiruu0-...@intel.com

Error/Warning: (recently discovered and may have been fixed)

/usr/bin/ld: pse_core.c:(.text+0x72): undefined reference to `rdev_get_id'
ERROR: modpost: "__spi_register_driver" [drivers/iio/dac/ad9739a.ko] undefined!
ERROR: modpost: "spi_async" [drivers/base/regmap/regmap-spi.ko] undefined!
ERROR: modpost: "spi_sync" [drivers/base/regmap/regmap-spi.ko] undefined!
ERROR: modpost: "spi_write_then_read" [drivers/base/regmap/regmap-spi.ko] 
undefined!
WARNING: modpost: vmlinux: section mismatch in reference: dentry_name+0x7c 
(section: .text) -> .LVL1195 (section: .init.text)
WARNING: modpost: vmlinux: section mismatch in reference: fwnode_string+0x230 
(section: .text) -> .LVL1131 (section: .init.text)
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c:379:52: error: '%s' directive output 
may be truncated writing up to 29 bytes into a region of size 23 
[-Werror=format-truncation=]
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c:379:52: warning: '%s' directive output 
may be truncated writing up to 29 bytes into a region of size 23 
[-Wformat-truncation=]
kismet: WARNING: unmet direct dependencies detected for REGMAP_SPI when 
selected by AD9739A
powerpc-linux-ld: 
drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn401/dcn401_fpu.o uses hard 
float, drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_helpers.o uses 
soft float
powerpc-linux-ld: 
drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/dml21/dml21_translation_helper.o 
uses hard float, 
drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_helpers.o uses soft 
float
powerpc-linux-ld: 
drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/dml21/dml21_utils.o uses hard 
float, drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_helpers.o uses 
soft float
powerpc-linux-ld: 
drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.o
 uses hard float, 
drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_helpers.o uses soft 
float
powerpc-linux-ld: 
drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.o
 uses hard float, 
drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_helpers.o uses soft 
float
powerpc-linux-ld: 
drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.o
 uses hard float, 
drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_helpers.o uses soft 
float
powerpc-linux-ld: 
drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.o
 uses hard float, 
drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_helpers.o uses soft 
float
powerpc-linux-ld: 
drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.o
 uses hard float, 
drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_helpers.o uses soft 
float
powerpc-linux-ld: 
drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.o
 uses hard float, 
drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_helpers.o uses soft 
float
powerpc-linux-ld: 
drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.o
 uses hard float, 
drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_helpers.o uses soft 
float
powerpc-linux-ld: 
drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.o
 uses hard float, 
drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_helpers.o uses soft 
float
powerpc-linux-ld: 
drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.o
 uses hard float, 
drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_helpers.o uses soft 
float
powerpc-linux-ld: 
drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.o
 uses hard float, 
drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_helpers.o uses soft 
float
powerpc-linux-ld: drivers/gpu/drm/amd/display/dc/dml/dcn401/dcn401_fpu.o uses 
hard float, arch/powerpc/kernel/udbg.o uses soft float
powerpc-linux-ld: 
drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.o uses hard 
float, arch/powerpc/kernel/udbg.o uses soft float
powerpc-linux-ld: drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.o uses 
hard float, arch/powerpc/kernel/udbg.o uses soft float

Re: [PATCH 1/2] drm/print: drop include debugfs.h and include where needed

2024-04-24 Thread Lucas De Marchi

On Mon, Apr 22, 2024 at 03:10:10PM GMT, Jani Nikula wrote:

drivers/gpu/drm/xe/xe_debugfs.c | 1 +
drivers/gpu/drm/xe/xe_gt_debugfs.c  | 2 ++
drivers/gpu/drm/xe/xe_uc_debugfs.c  | 2 ++



Acked-by: Lucas De Marchi 

thanks
Lucas De Marchi


Re: [PATCH 1/2] drm/print: drop include debugfs.h and include where needed

2024-04-24 Thread Lyude Paul
For the nouveau bits:

Reviewed-by: Lyude Paul 

On Mon, 2024-04-22 at 15:10 +0300, Jani Nikula wrote:
> Surprisingly many places depend on debugfs.h to be included via
> drm_print.h. Fix them.
> 
> v3: Also fix armada, ite-it6505, imagination, msm, sti, vc4, and xe
> 
> v2: Also fix ivpu and vmwgfx
> 
> Reviewed-by: Andrzej Hajda 
> Acked-by: Maxime Ripard 
> Link:
> https://patchwork.freedesktop.org/patch/msgid/20240410141434.157908-1-jani.nik...@intel.com
> Signed-off-by: Jani Nikula 
> 
> ---
> 
> Cc: Jacek Lawrynowicz 
> Cc: Stanislaw Gruszka 
> Cc: Oded Gabbay 
> Cc: Russell King 
> Cc: David Airlie 
> Cc: Daniel Vetter 
> Cc: Andrzej Hajda 
> Cc: Neil Armstrong 
> Cc: Robert Foss 
> Cc: Laurent Pinchart 
> Cc: Jonas Karlman 
> Cc: Jernej Skrabec 
> Cc: Maarten Lankhorst 
> Cc: Maxime Ripard 
> Cc: Thomas Zimmermann 
> Cc: Jani Nikula 
> Cc: Rodrigo Vivi 
> Cc: Joonas Lahtinen 
> Cc: Tvrtko Ursulin 
> Cc: Frank Binns 
> Cc: Matt Coster 
> Cc: Rob Clark 
> Cc: Abhinav Kumar 
> Cc: Dmitry Baryshkov 
> Cc: Sean Paul 
> Cc: Marijn Suijten 
> Cc: Karol Herbst 
> Cc: Lyude Paul 
> Cc: Danilo Krummrich 
> Cc: Alex Deucher 
> Cc: "Christian König" 
> Cc: "Pan, Xinhui" 
> Cc: Alain Volmat 
> Cc: Huang Rui 
> Cc: Zack Rusin 
> Cc: Broadcom internal kernel review list
> 
> Cc: Lucas De Marchi 
> Cc: "Thomas Hellström" 
> Cc: dri-de...@lists.freedesktop.org
> Cc: intel-...@lists.freedesktop.org
> Cc: intel...@lists.freedesktop.org
> Cc: linux-arm-...@vger.kernel.org
> Cc: freedr...@lists.freedesktop.org
> Cc: nouv...@lists.freedesktop.org
> Cc: amd-gfx@lists.freedesktop.org
> ---
>  drivers/accel/ivpu/ivpu_debugfs.c   | 2 ++
>  drivers/gpu/drm/armada/armada_debugfs.c | 1 +
>  drivers/gpu/drm/bridge/ite-it6505.c | 1 +
>  drivers/gpu/drm/bridge/panel.c  | 2 ++
>  drivers/gpu/drm/drm_print.c | 6 +++---
>  drivers/gpu/drm/i915/display/intel_dmc.c    | 1 +
>  drivers/gpu/drm/imagination/pvr_fw_trace.c  | 1 +
>  drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c | 2 ++
>  drivers/gpu/drm/nouveau/dispnv50/crc.c  | 2 ++
>  drivers/gpu/drm/radeon/r100.c   | 1 +
>  drivers/gpu/drm/radeon/r300.c   | 1 +
>  drivers/gpu/drm/radeon/r420.c   | 1 +
>  drivers/gpu/drm/radeon/r600.c   | 3 ++-
>  drivers/gpu/drm/radeon/radeon_fence.c   | 1 +
>  drivers/gpu/drm/radeon/radeon_gem.c | 1 +
>  drivers/gpu/drm/radeon/radeon_ib.c  | 2 ++
>  drivers/gpu/drm/radeon/radeon_pm.c  | 1 +
>  drivers/gpu/drm/radeon/radeon_ring.c    | 2 ++
>  drivers/gpu/drm/radeon/radeon_ttm.c | 1 +
>  drivers/gpu/drm/radeon/rs400.c  | 1 +
>  drivers/gpu/drm/radeon/rv515.c  | 1 +
>  drivers/gpu/drm/sti/sti_drv.c   | 1 +
>  drivers/gpu/drm/ttm/ttm_device.c    | 1 +
>  drivers/gpu/drm/ttm/ttm_resource.c  | 3 ++-
>  drivers/gpu/drm/ttm/ttm_tt.c    | 5 +++--
>  drivers/gpu/drm/vc4/vc4_drv.h   | 1 +
>  drivers/gpu/drm/vmwgfx/vmwgfx_gem.c | 2 ++
>  drivers/gpu/drm/xe/xe_debugfs.c | 1 +
>  drivers/gpu/drm/xe/xe_gt_debugfs.c  | 2 ++
>  drivers/gpu/drm/xe/xe_uc_debugfs.c  | 2 ++
>  include/drm/drm_print.h | 2 +-
>  31 files changed, 46 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/accel/ivpu/ivpu_debugfs.c
> b/drivers/accel/ivpu/ivpu_debugfs.c
> index d09d29775b3f..e07e447d08d1 100644
> --- a/drivers/accel/ivpu/ivpu_debugfs.c
> +++ b/drivers/accel/ivpu/ivpu_debugfs.c
> @@ -3,6 +3,8 @@
>   * Copyright (C) 2020-2023 Intel Corporation
>   */
>  
> +#include 
> +
>  #include 
>  #include 
>  #include 
> diff --git a/drivers/gpu/drm/armada/armada_debugfs.c
> b/drivers/gpu/drm/armada/armada_debugfs.c
> index 29f4b52e3c8d..a763349dd89f 100644
> --- a/drivers/gpu/drm/armada/armada_debugfs.c
> +++ b/drivers/gpu/drm/armada/armada_debugfs.c
> @@ -5,6 +5,7 @@
>   */
>  
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> diff --git a/drivers/gpu/drm/bridge/ite-it6505.c
> b/drivers/gpu/drm/bridge/ite-it6505.c
> index 27334173e911..3f68c82888c2 100644
> --- a/drivers/gpu/drm/bridge/ite-it6505.c
> +++ b/drivers/gpu/drm/bridge/ite-it6505.c
> @@ -3,6 +3,7 @@
>   * Copyright (c) 2020, The Linux Foundation. All rights reserved.
>   */
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> diff --git a/drivers/gpu/drm/bridge/panel.c
> b/drivers/gpu/drm/bridge/panel.c
> index 7f41525f7a6e..32506524d9a2 100644
> --- a/drivers/gpu/drm/bridge/panel.c
> +++ b/drivers/gpu/drm/bridge/panel.c
> @@ -4,6 +4,8 @@
>   * Copyright (C) 2017 Broadcom
>   */
>  
> +#include 
> +
>  #include 
>  #include 
>  #include 
> diff --git a/drivers/gpu/drm/drm_print.c
> b/drivers/gpu/drm/drm_print.c
> index 699b7dbffd7b..cf2efb44722c 100644
> --- a/drivers/gpu/drm/drm_print.c
> +++ b/drivers/gpu/drm/drm_print.c
> @@ -23,13 +23,13 @@
>   * Rob Clark 
>   */
>  
> -#include 
> -
> +#include 
> +#include 
>  #include 
>  

[pull] amdgpu, amdkfd drm-fixes-6.9

2024-04-24 Thread Alex Deucher
Hi Dave, Sima,

Fixes for 6.9.

The following changes since commit ed30a4a51bb196781c8058073ea720133a65596f:

  Linux 6.9-rc5 (2024-04-21 12:35:54 -0700)

are available in the Git repository at:

  https://gitlab.freedesktop.org/agd5f/linux.git 
tags/amd-drm-fixes-6.9-2024-04-24

for you to fetch changes up to 948255282074d9367e01908b3f5dcf8c10fc9c3d:

  drm/amdgpu/mes: fix use-after-free issue (2024-04-23 23:23:46 -0400)


amd-drm-fixes-6.9-2024-04-24:

amdgpu:
- Suspend/resume fix
- Don't expose gpu_od directory if it's empty
- SDMA 4.4.2 fix
- VPE fix
- BO eviction fix
- UMSCH fix
- SMU 13.0.6 reset fixes
- GPUVM flush accounting fix
- SDMA 5.2 fix
- Fix possible UAF in mes code

amdkfd:
- Eviction fence handling fix
- Fix memory leak when GPU memory allocation fails
- Fix dma-buf validation
- Fix rescheduling of restore worker
- SVM fix


Alex Deucher (1):
  drm/amdgpu/sdma5.2: use legacy HDP flush for SDMA2/3

Felix Kuehling (3):
  drm/amdkfd: Fix eviction fence handling
  drm/amdgpu: Update BO eviction priorities
  drm/amdkfd: Fix rescheduling of restore worker

Jack Xiao (1):
  drm/amdgpu/mes: fix use-after-free issue

Joshua Ashton (1):
  drm/amd/display: Set color_mgmt_changed to true on unsuspend

Lang Yu (2):
  drm/amdkfd: make sure VM is ready for updating operations
  drm/amdgpu/umsch: don't execute umsch test when GPU is in reset/suspend

Lijo Lazar (2):
  drm/amdgpu: Assign correct bits for SDMA HDP flush
  drm/amd/pm: Restore config space after reset

Ma Jun (1):
  drm/amdgpu/pm: Remove gpu_od if it's an empty directory

Mukul Joshi (2):
  drm/amdgpu: Fix leak when GPU memory allocation fails
  drm/amdkfd: Add VRAM accounting for SVM migration

Peyton Lee (1):
  drm/amdgpu/vpe: fix vpe dpm setup failed

Prike Liang (1):
  drm/amdgpu: Fix the ring buffer size for queue VM flush

 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c   | 35 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c|  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c   |  3 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c |  3 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c |  3 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  |  2 --
 drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c   |  3 +-
 drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 26 +---
 drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c  | 14 -
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c   | 16 +-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c   | 15 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c   |  2 +-
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c  |  1 +
 drivers/gpu/drm/amd/pm/amdgpu_pm.c |  7 +
 .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c   | 25 
 17 files changed, 111 insertions(+), 49 deletions(-)


RE: [PATCH 1/2] drm/print: drop include debugfs.h and include where needed

2024-04-24 Thread Deucher, Alexander
[Public]

> -Original Message-
> From: Jani Nikula 
> Sent: Wednesday, April 24, 2024 9:55 AM
> To: dri-de...@lists.freedesktop.org
> Cc: Andrzej Hajda ; Maxime Ripard
> ; Jacek Lawrynowicz
> ; Stanislaw Gruszka
> ; Oded Gabbay ;
> Russell King ; David Airlie ; Daniel
> Vetter ; Neil Armstrong ; Robert
> Foss ; Laurent Pinchart
> ; Jonas Karlman ;
> Jernej Skrabec ; Maarten Lankhorst
> ; Thomas Zimmermann
> ; Rodrigo Vivi ; Joonas
> Lahtinen ; Tvrtko Ursulin
> ; Frank Binns ; Matt Coster
> ; Rob Clark ; Abhinav
> Kumar ; Dmitry Baryshkov
> ; Sean Paul ; Marijn Suijten
> ; Karol Herbst ; Lyude
> Paul ; Danilo Krummrich ; Deucher,
> Alexander ; Koenig, Christian
> ; Pan, Xinhui ; Alain
> Volmat ; Huang, Ray ;
> Zack Rusin ; Broadcom internal kernel review list
> ; Lucas De Marchi
> ; Thomas Hellström
> ; intel-...@lists.freedesktop.org; intel-
> x...@lists.freedesktop.org; linux-arm-...@vger.kernel.org;
> freedr...@lists.freedesktop.org; nouv...@lists.freedesktop.org; amd-
> g...@lists.freedesktop.org
> Subject: Re: [PATCH 1/2] drm/print: drop include debugfs.h and include where
> needed
>
> On Mon, 22 Apr 2024, Jani Nikula  wrote:
> > Surprisingly many places depend on debugfs.h to be included via
> > drm_print.h. Fix them.
> >
> > v3: Also fix armada, ite-it6505, imagination, msm, sti, vc4, and xe
> >
> > v2: Also fix ivpu and vmwgfx
> >
> > Reviewed-by: Andrzej Hajda 
> > Acked-by: Maxime Ripard 
> > Link:
> >
> https://patchwork.freedesktop.org/patch/msgid/20240410141434.157908
> -1-
> > jani.nik...@intel.com
> > Signed-off-by: Jani Nikula 
>
> While the changes all over the place are small, mostly just adding the
> debugfs.h include, please consider acking. I've sent this a few times already.
>

For radeon:
Acked-by: Alex Deucher 

> Otherwise, I'll merge this by the end of the week, acks or not.
>
> Thanks,
> Jani.
>
>
>
> >
> > ---
> >
> > Cc: Jacek Lawrynowicz 
> > Cc: Stanislaw Gruszka 
> > Cc: Oded Gabbay 
> > Cc: Russell King 
> > Cc: David Airlie 
> > Cc: Daniel Vetter 
> > Cc: Andrzej Hajda 
> > Cc: Neil Armstrong 
> > Cc: Robert Foss 
> > Cc: Laurent Pinchart 
> > Cc: Jonas Karlman 
> > Cc: Jernej Skrabec 
> > Cc: Maarten Lankhorst 
> > Cc: Maxime Ripard 
> > Cc: Thomas Zimmermann 
> > Cc: Jani Nikula 
> > Cc: Rodrigo Vivi 
> > Cc: Joonas Lahtinen 
> > Cc: Tvrtko Ursulin 
> > Cc: Frank Binns 
> > Cc: Matt Coster 
> > Cc: Rob Clark 
> > Cc: Abhinav Kumar 
> > Cc: Dmitry Baryshkov 
> > Cc: Sean Paul 
> > Cc: Marijn Suijten 
> > Cc: Karol Herbst 
> > Cc: Lyude Paul 
> > Cc: Danilo Krummrich 
> > Cc: Alex Deucher 
> > Cc: "Christian König" 
> > Cc: "Pan, Xinhui" 
> > Cc: Alain Volmat 
> > Cc: Huang Rui 
> > Cc: Zack Rusin 
> > Cc: Broadcom internal kernel review list
> > 
> > Cc: Lucas De Marchi 
> > Cc: "Thomas Hellström" 
> > Cc: dri-de...@lists.freedesktop.org
> > Cc: intel-...@lists.freedesktop.org
> > Cc: intel...@lists.freedesktop.org
> > Cc: linux-arm-...@vger.kernel.org
> > Cc: freedr...@lists.freedesktop.org
> > Cc: nouv...@lists.freedesktop.org
> > Cc: amd-gfx@lists.freedesktop.org
> > ---
> >  drivers/accel/ivpu/ivpu_debugfs.c   | 2 ++
> >  drivers/gpu/drm/armada/armada_debugfs.c | 1 +
> >  drivers/gpu/drm/bridge/ite-it6505.c | 1 +
> >  drivers/gpu/drm/bridge/panel.c  | 2 ++
> >  drivers/gpu/drm/drm_print.c | 6 +++---
> >  drivers/gpu/drm/i915/display/intel_dmc.c| 1 +
> >  drivers/gpu/drm/imagination/pvr_fw_trace.c  | 1 +
> > drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c | 2 ++
> >  drivers/gpu/drm/nouveau/dispnv50/crc.c  | 2 ++
> >  drivers/gpu/drm/radeon/r100.c   | 1 +
> >  drivers/gpu/drm/radeon/r300.c   | 1 +
> >  drivers/gpu/drm/radeon/r420.c   | 1 +
> >  drivers/gpu/drm/radeon/r600.c   | 3 ++-
> >  drivers/gpu/drm/radeon/radeon_fence.c   | 1 +
> >  drivers/gpu/drm/radeon/radeon_gem.c | 1 +
> >  drivers/gpu/drm/radeon/radeon_ib.c  | 2 ++
> >  drivers/gpu/drm/radeon/radeon_pm.c  | 1 +
> >  drivers/gpu/drm/radeon/radeon_ring.c| 2 ++
> >  drivers/gpu/drm/radeon/radeon_ttm.c | 1 +
> >  drivers/gpu/drm/radeon/rs400.c  | 1 +
> >  drivers/gpu/drm/radeon/rv515.c  | 1 +
> >  drivers/gpu/drm/sti/sti_drv.c   | 1 +
> >  drivers/gpu/drm/ttm/ttm_device.c| 1 +
> >  drivers/gpu/drm/ttm/ttm_resource.c  | 3 ++-
> >  drivers/gpu/drm/ttm/ttm_tt.c| 5 +++--
> >  drivers/gpu/drm/vc4/vc4_drv.h   | 1 +
> >  drivers/gpu/drm/vmwgfx/vmwgfx_gem.c | 2 ++
> >  drivers/gpu/drm/xe/xe_debugfs.c | 1 +
> >  drivers/gpu/drm/xe/xe_gt_debugfs.c  | 2 ++
> >  drivers/gpu/drm/xe/xe_uc_debugfs.c  | 2 ++
> >  include/drm/drm_print.h | 2 +-
> >  31 files changed, 46 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/accel/ivpu/ivpu_debugfs.c
> > b/drivers/accel/ivpu/ivpu_debugfs.c
> > index d09d29775b3f..e07e447d08d1 100644
> > 

Re: [PATCH 2/2] drm/amd/display: Fix CFLAGS for dml2_core_dcn4_calcs.o

2024-04-24 Thread Pillai, Aurabindo
[AMD Official Use Only - General]

Thanks for the fix.

Reviewed-by: Aurabindo Pillai 

--

Regards,
Jay

From: Nathan Chancellor 
Sent: Wednesday, April 24, 2024 2:19 PM
To: Wentland, Harry ; Li, Sun peng (Leo) 
; Siqueira, Rodrigo ; Deucher, 
Alexander ; Koenig, Christian 
; Pan, Xinhui 
Cc: Pillai, Aurabindo ; amd-gfx@lists.freedesktop.org 
; dri-de...@lists.freedesktop.org 
; l...@lists.linux.dev ; 
patc...@lists.linux.dev ; Nathan Chancellor 

Subject: [PATCH 2/2] drm/amd/display: Fix CFLAGS for dml2_core_dcn4_calcs.o

-Wframe-larger-than=2048 is a part of both CFLAGS and CFLAGS_REMOVE for
dml2_core_dcn4_calcs.o, which means that it ultimately gets removed
altogether for 64-bit targets, as 2048 is the default FRAME_WARN value
for 64-bit platforms, resulting in no -Wframe-larger-than coverage for
this file.

Remove -Wframe-larger-than from CFLAGS_REMOVE_dml2_core_dcn4_calcs.o and
move to $(frame_warn_flag) for CFLAGS_dml2_core_dcn4_calcs.o, as that
accounts for the fact that -Wframe-larger-than may need to be larger
than 2048 in certain situations, such as when the sanitizers are
enabled.

Fixes: d546a39c6b10 ("drm/amd/display: Add misc DC changes for DCN401")
Signed-off-by: Nathan Chancellor 
---
 drivers/gpu/drm/amd/display/dc/dml2/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile 
b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
index c35212a4a968..904a2d419638 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
@@ -111,7 +111,7 @@ CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml_top.o 
:= $(dml2_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml_top_mcache.o := 
$(dml2_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_optimization := 
$(dml2_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.o := 
$(dml2_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := 
$(dml2_ccflags) -Wframe-larger-than=2048
+CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := 
$(dml2_ccflags) $(frame_warn_flag)
 CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_factory.o := 
$(dml2_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_shared.o := 
$(dml2_ccflags) $(frame_warn_flag)
 CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.o := 
$(dml2_ccflags)
@@ -134,7 +134,7 @@ 
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml_top.o := $(dml2_rcfla
 CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml_top_mcache.o := 
$(dml2_rcflags)
 CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.o 
:= $(dml2_rcflags)
 CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.o := 
$(dml2_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.o 
:= $(dml2_rcflags) -Wframe-larger-than=2048
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.o 
:= $(dml2_rcflags)
 CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_factory.o := 
$(dml2_rcflags)
 CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_shared.o := 
$(dml2_rcflags)
 CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.o := 
$(dml2_rcflags)

--
2.44.0



Re: [PATCH] drm/amdkfd: Enforce queue BO's adev

2024-04-24 Thread Felix Kuehling

On 2024-04-24 13:40, Harish Kasiviswanathan wrote:

Queue buffer, though it is in system memory, has to be created using the
correct amdgpu device. Enforce this as the BO needs to mapped to the
GART for MES Hardware scheduler to access it.

Signed-off-by: Harish Kasiviswanathan 


I guess this doesn't break existing user mode. It only makes it fail in 
a more obvious way. If that's the case, the patch is


Reviewed-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 5 +
  1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 8fd5e0da628c..963cf6d657cb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -373,6 +373,11 @@ static int kfd_ioctl_create_queue(struct file *filep, 
struct kfd_process *p,
err = -EINVAL;
goto err_wptr_map_gart;
}
+   if (dev->adev != amdgpu_ttm_adev(wptr_bo->tbo.bdev)) {
+   pr_err("Queue memory allocated to wrong device\n");
+   err = -EINVAL;
+   goto err_wptr_map_gart;
+   }
  
  		err = amdgpu_amdkfd_map_gtt_bo_to_gart(dev->adev, wptr_bo);

if (err) {


RE: [PATCH 3/3] drm/amdgpu: Fix the uninitialized variable warning

2024-04-24 Thread Deucher, Alexander
[AMD Official Use Only - General]

> -Original Message-
> From: Ma, Jun 
> Sent: Wednesday, April 24, 2024 6:04 AM
> To: amd-gfx@lists.freedesktop.org; Koenig, Christian
> ; Deucher, Alexander
> 
> Cc: Ma, Jun 
> Subject: [PATCH 3/3] drm/amdgpu: Fix the uninitialized variable warning
>
> Initialize the phy_id to 0 to fix the warning of "Using uninitialized value 
> phy_id"
>
> Signed-off-by: Ma Jun 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
> index 8ed0e073656f..df81078aa26d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
> @@ -95,7 +95,7 @@ static ssize_t
> amdgpu_securedisplay_debugfs_write(struct file *f, const char __u
>   struct psp_context *psp = >psp;
>   struct ta_securedisplay_cmd *securedisplay_cmd;
>   struct drm_device *dev = adev_to_drm(adev);
> - uint32_t phy_id;
> + uint32_t phy_id = 0;

Would be better to return an error in case 2: below if size < 3.  Otherwise we 
are just blindly using 0 for phy id.

Alex

>   uint32_t op;
>   char str[64];
>   int ret;
> --
> 2.34.1



[PATCH 2/2] drm/amd/display: Fix CFLAGS for dml2_core_dcn4_calcs.o

2024-04-24 Thread Nathan Chancellor
-Wframe-larger-than=2048 is a part of both CFLAGS and CFLAGS_REMOVE for
dml2_core_dcn4_calcs.o, which means that it ultimately gets removed
altogether for 64-bit targets, as 2048 is the default FRAME_WARN value
for 64-bit platforms, resulting in no -Wframe-larger-than coverage for
this file.

Remove -Wframe-larger-than from CFLAGS_REMOVE_dml2_core_dcn4_calcs.o and
move to $(frame_warn_flag) for CFLAGS_dml2_core_dcn4_calcs.o, as that
accounts for the fact that -Wframe-larger-than may need to be larger
than 2048 in certain situations, such as when the sanitizers are
enabled.

Fixes: d546a39c6b10 ("drm/amd/display: Add misc DC changes for DCN401")
Signed-off-by: Nathan Chancellor 
---
 drivers/gpu/drm/amd/display/dc/dml2/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile 
b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
index c35212a4a968..904a2d419638 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
@@ -111,7 +111,7 @@ CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml_top.o 
:= $(dml2_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml_top_mcache.o := 
$(dml2_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_optimization := 
$(dml2_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.o := 
$(dml2_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := 
$(dml2_ccflags) -Wframe-larger-than=2048
+CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := 
$(dml2_ccflags) $(frame_warn_flag)
 CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_factory.o := 
$(dml2_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_shared.o := 
$(dml2_ccflags) $(frame_warn_flag)
 CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.o := 
$(dml2_ccflags)
@@ -134,7 +134,7 @@ 
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml_top.o := $(dml2_rcfla
 CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml_top_mcache.o := 
$(dml2_rcflags)
 CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.o 
:= $(dml2_rcflags)
 CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.o := 
$(dml2_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.o 
:= $(dml2_rcflags) -Wframe-larger-than=2048
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.o 
:= $(dml2_rcflags)
 CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_factory.o := 
$(dml2_rcflags)
 CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_shared.o := 
$(dml2_rcflags)
 CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.o := 
$(dml2_rcflags)

-- 
2.44.0



[PATCH 1/2] drm/amd/display: Add frame_warn_flag to dml2_core_shared.o

2024-04-24 Thread Nathan Chancellor
When building with tip of tree Clang, there are some new instances of
-Wframe-larger-than from the new display code (which become fatal with
CONFIG_WERROR=y):

  
drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c:754:6:
 error: stack frame size (2488) exceeds limit (2048) in 
'dml2_core_shared_mode_support' [-Werror,-Wframe-larger-than]
754 | bool dml2_core_shared_mode_support(struct 
dml2_core_calcs_mode_support_ex *in_out_params)
|  ^
  
drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c:9834:6:
 error: stack frame size (2152) exceeds limit (2048) in 
'dml2_core_shared_mode_programming' [-Werror,-Wframe-larger-than]
   9834 | bool dml2_core_shared_mode_programming(struct 
dml2_core_calcs_mode_programming_ex *in_out_params)
|  ^
  2 errors generated.

These warnings do not occur when CONFIG_K{A,C,M}SAN are disabled, so add
$(frame_warn_flag) to dml2_core_shared.o's CFLAGS, which was added in
commit 6740ec97bcdb ("drm/amd/display: Increase frame warning limit with
KASAN or KCSAN in dml2") to account for this situation.

Fixes: d546a39c6b10 ("drm/amd/display: Add misc DC changes for DCN401")
Signed-off-by: Nathan Chancellor 
---
 drivers/gpu/drm/amd/display/dc/dml2/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile 
b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
index 6c76f346b237..c35212a4a968 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
@@ -113,7 +113,7 @@ 
CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_optimization := $(dml2_
 CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.o := 
$(dml2_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := 
$(dml2_ccflags) -Wframe-larger-than=2048
 CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_factory.o := 
$(dml2_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_shared.o := 
$(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_shared.o := 
$(dml2_ccflags) $(frame_warn_flag)
 CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.o := 
$(dml2_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.o := 
$(dml2_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.o := 
$(dml2_ccflags)

-- 
2.44.0



[PATCH 0/2] drm/amd/display: Use frame_warn_flag consistently in dml2 Makefile

2024-04-24 Thread Nathan Chancellor
Hi all,

This series resolves a couple instances of -Wframe-larger-than from
the new display code that appear with newer versions of clang along
without another inconsistency I noticed while fixing this, which have
been accounted for with the $(frame_warn_flag) variable.

---
Nathan Chancellor (2):
  drm/amd/display: Add frame_warn_flag to dml2_core_shared.o
  drm/amd/display: Fix CFLAGS for dml2_core_dcn4_calcs.o

 drivers/gpu/drm/amd/display/dc/dml2/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
---
base-commit: d60dc4dd72412d5d9566fdf391e4202b05f88912
change-id: 20240424-amdgpu-dml2-fix-frame-larger-than-dcn401-48ff7e1f51ea

Best regards,
-- 
Nathan Chancellor 



RE: [PATCH 1/3] drm/amdgpu: Fix uninitialized variable warning in amdgpu_afmt_acr

2024-04-24 Thread Deucher, Alexander
[AMD Official Use Only - General]

> -Original Message-
> From: Ma, Jun 
> Sent: Wednesday, April 24, 2024 6:04 AM
> To: amd-gfx@lists.freedesktop.org; Koenig, Christian
> ; Deucher, Alexander
> 
> Cc: Ma, Jun 
> Subject: [PATCH 1/3] drm/amdgpu: Fix uninitialized variable warning in
> amdgpu_afmt_acr
>
> Assign value to clock to fix the warning below:
> "Using uninitialized value res. Field res.clock is uninitialized"
>
> Signed-off-by: Ma Jun 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c
> index a4d65973bf7c..9e3442b2d2ec 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c
> @@ -87,7 +87,7 @@ static void amdgpu_afmt_calc_cts(uint32_t clock, int
> *CTS, int *N, int freq)
>
>  struct amdgpu_afmt_acr amdgpu_afmt_acr(uint32_t clock)  {
> - struct amdgpu_afmt_acr res;
> + struct amdgpu_afmt_acr res = {0};

I think you can drop this hunk since all of the fields are initialized below.

Alex

>   u8 i;
>
>   /* Precalculated values for common clocks */ @@ -100,6 +100,7 @@
> struct amdgpu_afmt_acr amdgpu_afmt_acr(uint32_t clock)
>   amdgpu_afmt_calc_cts(clock, _32khz, _32khz, 32000);
>   amdgpu_afmt_calc_cts(clock, _44_1khz, _44_1khz,
> 44100);
>   amdgpu_afmt_calc_cts(clock, _48khz, _48khz, 48000);
> + res.clock = clock;
>
>   return res;
>  }
> --
> 2.34.1



Re: [PATCH] drm/amdkfd: Enforce queue BO's adev

2024-04-24 Thread Alex Deucher
On Wed, Apr 24, 2024 at 1:57 PM Harish Kasiviswanathan
 wrote:
>
> Queue buffer, though it is in system memory, has to be created using the
> correct amdgpu device. Enforce this as the BO needs to mapped to the
> GART for MES Hardware scheduler to access it.
>
> Signed-off-by: Harish Kasiviswanathan 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 5 +
>  1 file changed, 5 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 8fd5e0da628c..963cf6d657cb 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -373,6 +373,11 @@ static int kfd_ioctl_create_queue(struct file *filep, 
> struct kfd_process *p,
> err = -EINVAL;
> goto err_wptr_map_gart;
> }
> +   if (dev->adev != amdgpu_ttm_adev(wptr_bo->tbo.bdev)) {
> +   pr_err("Queue memory allocated to wrong device\n");
> +   err = -EINVAL;
> +   goto err_wptr_map_gart;
> +   }
>
> err = amdgpu_amdkfd_map_gtt_bo_to_gart(dev->adev, wptr_bo);
> if (err) {
> --
> 2.34.1
>


[PATCH] drm/amdkfd: Enforce queue BO's adev

2024-04-24 Thread Harish Kasiviswanathan
Queue buffer, though it is in system memory, has to be created using the
correct amdgpu device. Enforce this as the BO needs to mapped to the
GART for MES Hardware scheduler to access it.

Signed-off-by: Harish Kasiviswanathan 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 8fd5e0da628c..963cf6d657cb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -373,6 +373,11 @@ static int kfd_ioctl_create_queue(struct file *filep, 
struct kfd_process *p,
err = -EINVAL;
goto err_wptr_map_gart;
}
+   if (dev->adev != amdgpu_ttm_adev(wptr_bo->tbo.bdev)) {
+   pr_err("Queue memory allocated to wrong device\n");
+   err = -EINVAL;
+   goto err_wptr_map_gart;
+   }
 
err = amdgpu_amdkfd_map_gtt_bo_to_gart(dev->adev, wptr_bo);
if (err) {
-- 
2.34.1



Re: [PATCH 1/2] drm/amdgpu: fix double free err_addr pointer warnings

2024-04-24 Thread Deucher, Alexander
[AMD Official Use Only - General]

Acked-by: Alex Deucher 

From: Bob Zhou 
Sent: Tuesday, April 23, 2024 1:32 AM
To: amd-gfx@lists.freedesktop.org ; Deucher, 
Alexander ; Koenig, Christian 

Cc: Zhou, Bob 
Subject: [PATCH 1/2] drm/amdgpu: fix double free err_addr pointer warnings

In amdgpu_umc_bad_page_polling_timeout, the amdgpu_umc_handle_bad_pages
will be run many times so that double free err_addr in some special case.
So set the err_addr to NULL to avoid the warnings.

Signed-off-by: Bob Zhou 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index f486510fc94c..32e818d182fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -170,6 +170,7 @@ static void amdgpu_umc_handle_bad_pages(struct 
amdgpu_device *adev,
 }

 kfree(err_data->err_addr);
+   err_data->err_addr = NULL;

 mutex_unlock(>page_retirement_lock);
 }
--
2.34.1



[RFC PATCH 11/18] drm/ttm: Bump BO priority count

2024-04-24 Thread Friedrich Vock
For adjustable priorities by userspace, it is nice to have a bit more
granularity.

Signed-off-by: Friedrich Vock 
---
 include/drm/ttm/ttm_resource.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
index 7d1ce059c8805..241643447488a 100644
--- a/include/drm/ttm/ttm_resource.h
+++ b/include/drm/ttm/ttm_resource.h
@@ -35,7 +35,7 @@
 #include 
 #include 

-#define TTM_MAX_BO_PRIORITY4U
+#define TTM_MAX_BO_PRIORITY8U
 #define TTM_NUM_MEM_TYPES 8

 struct ttm_device;
--
2.44.0



[RFC PATCH 17/18] drm/amdgpu: Implement EVICTED_VRAM query

2024-04-24 Thread Friedrich Vock
Used by userspace to gauge the severity of memory overcommit and make
prioritization decisions based on it.

Used by userspace to gauge the severity of memory overcommit and make
prioritization decisions based on it.

Signed-off-by: Friedrich Vock 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 3 +++
 include/uapi/drm/amdgpu_drm.h   | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 924baf58e3226..8cba30144bac6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1246,6 +1246,9 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
return copy_to_user(out, _fault,
min((size_t)size, sizeof(gpuvm_fault))) ? 
-EFAULT : 0;
}
+   case AMDGPU_INFO_EVICTED_VRAM:
+   ui64 = 
ttm_resource_manager_evicted_bytes(>mman.vram_mgr.manager);
+   return copy_to_user(out, , min(size, 8u)) ? -EFAULT : 0;
default:
DRM_DEBUG_KMS("Invalid request %d\n", info->query);
return -EINVAL;
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 53552dd489b9b..5d04719386686 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -920,6 +920,8 @@ struct drm_amdgpu_cs_chunk_cp_gfx_shadow {
 #define AMDGPU_INFO_MAX_IBS0x22
 /* query last page fault info */
 #define AMDGPU_INFO_GPUVM_FAULT0x23
+/* query size of evicted vram allocations */
+#define AMDGPU_INFO_EVICTED_VRAM0x24

 #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0
 #define AMDGPU_INFO_MMR_SE_INDEX_MASK  0xff
--
2.44.0



[RFC PATCH 16/18] drm/amdgpu: Implement SET_PRIORITY GEM op

2024-04-24 Thread Friedrich Vock
Used by userspace to adjust buffer priorities in response to changes in
application demand and memory pressure.

Signed-off-by: Friedrich Vock 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 20 
 include/uapi/drm/amdgpu_drm.h   |  1 +
 2 files changed, 21 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 5ca13e2e50f50..6107810a9c205 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -836,8 +836,10 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
 {
struct amdgpu_device *adev = drm_to_adev(dev);
struct drm_amdgpu_gem_op *args = data;
+   struct ttm_resource_manager *man;
struct drm_gem_object *gobj;
struct amdgpu_vm_bo_base *base;
+   struct ttm_operation_ctx ctx;
struct amdgpu_bo *robj;
int r;

@@ -851,6 +853,9 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
if (unlikely(r))
goto out;

+   memset(, 0, sizeof(ctx));
+   ctx.interruptible = true;
+
switch (args->op) {
case AMDGPU_GEM_OP_GET_GEM_CREATE_INFO: {
struct drm_amdgpu_gem_create_in info;
@@ -898,6 +903,21 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,

amdgpu_bo_unreserve(robj);
break;
+   case AMDGPU_GEM_OP_SET_PRIORITY:
+   if (args->value > AMDGPU_BO_PRIORITY_MAX_USER)
+   args->value = AMDGPU_BO_PRIORITY_MAX_USER;
+   ttm_bo_update_priority(>tbo, args->value);
+   if (robj->tbo.evicted_type != TTM_NUM_MEM_TYPES) {
+   ttm_bo_try_unevict(>tbo, );
+   amdgpu_bo_unreserve(robj);
+   } else {
+   amdgpu_bo_unreserve(robj);
+   man = ttm_manager_type(robj->tbo.bdev,
+   robj->tbo.resource->mem_type);
+   ttm_mem_unevict_evicted(robj->tbo.bdev, man,
+   true);
+   }
+   break;
default:
amdgpu_bo_unreserve(robj);
r = -EINVAL;
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index bdbe6b262a78d..53552dd489b9b 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -531,6 +531,7 @@ union drm_amdgpu_wait_fences {

 #define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO  0
 #define AMDGPU_GEM_OP_SET_PLACEMENT1
+#define AMDGPU_GEM_OP_SET_PRIORITY  2

 /* Sets or returns a value associated with a buffer. */
 struct drm_amdgpu_gem_op {
--
2.44.0



[RFC PATCH 08/18] drm/amdgpu: Don't try moving BOs to preferred domain before submit

2024-04-24 Thread Friedrich Vock
TTM now takes care of moving buffers to the best possible domain.

Signed-off-by: Friedrich Vock 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|   2 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 191 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h |   4 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |   7 -
 4 files changed, 3 insertions(+), 201 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index cac0ca64367b3..3004adc6fa679 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1404,8 +1404,6 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev);
 bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev);
 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev);

-void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
- u64 num_vis_bytes);
 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev);
 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
 const u32 *registers,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index e9168677ef0a6..92a0cffc1adc3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -638,196 +638,19 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
return 0;
 }

-/* Convert microseconds to bytes. */
-static u64 us_to_bytes(struct amdgpu_device *adev, s64 us)
-{
-   if (us <= 0 || !adev->mm_stats.log2_max_MBps)
-   return 0;
-
-   /* Since accum_us is incremented by a million per second, just
-* multiply it by the number of MB/s to get the number of bytes.
-*/
-   return us << adev->mm_stats.log2_max_MBps;
-}
-
-static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
-{
-   if (!adev->mm_stats.log2_max_MBps)
-   return 0;
-
-   return bytes >> adev->mm_stats.log2_max_MBps;
-}
-
-/* Returns how many bytes TTM can move right now. If no bytes can be moved,
- * it returns 0. If it returns non-zero, it's OK to move at least one buffer,
- * which means it can go over the threshold once. If that happens, the driver
- * will be in debt and no other buffer migrations can be done until that debt
- * is repaid.
- *
- * This approach allows moving a buffer of any size (it's important to allow
- * that).
- *
- * The currency is simply time in microseconds and it increases as the clock
- * ticks. The accumulated microseconds (us) are converted to bytes and
- * returned.
- */
-static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
- u64 *max_bytes,
- u64 *max_vis_bytes)
-{
-   s64 time_us, increment_us;
-   u64 free_vram, total_vram, used_vram;
-   /* Allow a maximum of 200 accumulated ms. This is basically per-IB
-* throttling.
-*
-* It means that in order to get full max MBps, at least 5 IBs per
-* second must be submitted and not more than 200ms apart from each
-* other.
-*/
-   const s64 us_upper_bound = 20;
-
-   if (!adev->mm_stats.log2_max_MBps) {
-   *max_bytes = 0;
-   *max_vis_bytes = 0;
-   return;
-   }
-
-   total_vram = adev->gmc.real_vram_size - 
atomic64_read(>vram_pin_size);
-   used_vram = ttm_resource_manager_usage(>mman.vram_mgr.manager);
-   free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
-
-   spin_lock(>mm_stats.lock);
-
-   /* Increase the amount of accumulated us. */
-   time_us = ktime_to_us(ktime_get());
-   increment_us = time_us - adev->mm_stats.last_update_us;
-   adev->mm_stats.last_update_us = time_us;
-   adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us,
- us_upper_bound);
-
-   /* This prevents the short period of low performance when the VRAM
-* usage is low and the driver is in debt or doesn't have enough
-* accumulated us to fill VRAM quickly.
-*
-* The situation can occur in these cases:
-* - a lot of VRAM is freed by userspace
-* - the presence of a big buffer causes a lot of evictions
-*   (solution: split buffers into smaller ones)
-*
-* If 128 MB or 1/8th of VRAM is free, start filling it now by setting
-* accum_us to a positive number.
-*/
-   if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) {
-   s64 min_us;
-
-   /* Be more aggressive on dGPUs. Try to fill a portion of free
-* VRAM now.
-*/
-   if (!(adev->flags & AMD_IS_APU))
-   min_us = bytes_to_us(adev, free_vram / 4);
-   else
-   

[RFC PATCH 01/18] drm/ttm: Add tracking for evicted memory

2024-04-24 Thread Friedrich Vock
These utilities will be used to keep track of what buffers have been
evicted from any particular place, to try and decide when to try undoing
the eviction.

Signed-off-by: Friedrich Vock 
---
 drivers/gpu/drm/ttm/ttm_device.c   |  1 +
 drivers/gpu/drm/ttm/ttm_resource.c | 14 ++
 include/drm/ttm/ttm_device.h   |  5 +
 include/drm/ttm/ttm_resource.h |  9 +
 4 files changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index f5187b384ae9a..969d627ba06c0 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -219,6 +219,7 @@ int ttm_device_init(struct ttm_device *bdev, const struct 
ttm_device_funcs *func

bdev->vma_manager = vma_manager;
spin_lock_init(>lru_lock);
+   spin_lock_init(>unevict_lock);
INIT_LIST_HEAD(>pinned);
bdev->dev_mapping = mapping;
mutex_lock(_global_mutex);
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
b/drivers/gpu/drm/ttm/ttm_resource.c
index 46ff9c75bb124..1d6755a1153b1 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -25,6 +25,7 @@
 #include 
 #include 
 #include 
+#include 

 #include 
 #include 
@@ -392,9 +393,11 @@ void ttm_resource_manager_init(struct ttm_resource_manager 
*man,
man->bdev = bdev;
man->size = size;
man->usage = 0;
+   man->evicted_bytes = 0;

for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
INIT_LIST_HEAD(>lru[i]);
+   INIT_LIST_HEAD(>evicted);
man->move = NULL;
 }
 EXPORT_SYMBOL(ttm_resource_manager_init);
@@ -470,6 +473,17 @@ uint64_t ttm_resource_manager_usage(struct 
ttm_resource_manager *man)
 }
 EXPORT_SYMBOL(ttm_resource_manager_usage);

+uint64_t ttm_resource_manager_evicted_bytes(struct ttm_resource_manager *man)
+{
+   uint64_t evicted;
+
+   spin_lock(>bdev->unevict_lock);
+   evicted = man->evicted_bytes;
+   spin_unlock(>bdev->unevict_lock);
+   return evicted;
+}
+EXPORT_SYMBOL(ttm_resource_manager_evicted_bytes);
+
 /**
  * ttm_resource_manager_debug
  *
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index c22f30535c848..baa264efe483d 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -251,6 +251,11 @@ struct ttm_device {
 */
spinlock_t lru_lock;

+   /**
+* @unevict_lock: Protection for per-manager uneviction tracking
+*/
+   spinlock_t unevict_lock;
+
/**
 * @pinned: Buffer objects which are pinned and so not on any LRU list.
 */
diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
index 78a226eba953c..7d1ce059c8805 100644
--- a/include/drm/ttm/ttm_resource.h
+++ b/include/drm/ttm/ttm_resource.h
@@ -145,6 +145,7 @@ struct ttm_resource_manager_func {
  * @move_lock: lock for move fence
  * @move: The fence of the last pipelined move operation.
  * @lru: The lru list for this memory type.
+ * @evicted: List of bos evicted from this memory type
  *
  * This structure is used to identify and manage memory types for a device.
  */
@@ -163,6 +164,7 @@ struct ttm_resource_manager {
 * Protected by @move_lock.
 */
struct dma_fence *move;
+   struct list_head evicted;

/*
 * Protected by the bdev->lru_lock.
@@ -174,6 +176,12 @@ struct ttm_resource_manager {
 * bdev->lru_lock.
 */
uint64_t usage;
+
+   /**
+* @evicted_bytes: How many bytes are evicted from this manager,
+* protexted by bdev->unevict_lock
+*/
+   uint64_t evicted_bytes;
 };

 /**
@@ -382,6 +390,7 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
   struct ttm_resource_manager *man);

 uint64_t ttm_resource_manager_usage(struct ttm_resource_manager *man);
+uint64_t ttm_resource_manager_evicted_bytes(struct ttm_resource_manager *man);
 void ttm_resource_manager_debug(struct ttm_resource_manager *man,
struct drm_printer *p);

--
2.44.0



[RFC PATCH 07/18] drm/amdgpu: Add TTM uneviction control functions

2024-04-24 Thread Friedrich Vock
Try unevicting only VRAM/GTT BOs.

Signed-off-by: Friedrich Vock 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 50 +
 1 file changed, 50 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 64f5001a7dc5d..98e8a40408804 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -166,6 +166,31 @@ static void amdgpu_evict_flags(struct ttm_buffer_object 
*bo,
*placement = abo->placement;
 }

+/**
+ * amdgpu_unevict_flags - Compute placement flags
+ *
+ * @bo: The buffer object to unevict
+ * @dest: Destination for unevicted BO
+ *
+ * Fill in placement data when for restoring evicted BOs
+ */
+static void amdgpu_unevict_flags(struct ttm_buffer_object *bo,
+struct ttm_placement *dest)
+{
+   struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
+
+   WARN_ON(bo->evicted_type == AMDGPU_PL_GDS ||
+   bo->evicted_type == AMDGPU_PL_GWS ||
+   bo->evicted_type == AMDGPU_PL_OA ||
+   bo->evicted_type == AMDGPU_PL_DOORBELL);
+   WARN_ON(bo->evicted_type == TTM_NUM_MEM_TYPES);
+
+   amdgpu_bo_placement_from_domain(abo, abo->preferred_domains);
+   *dest = abo->placement;
+   dest->num_placement = 1;
+   dest->num_busy_placement = 1;
+}
+
 /**
  * amdgpu_ttm_map_buffer - Map memory into the GART windows
  * @bo: buffer object to map
@@ -1424,6 +1449,29 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct 
ttm_buffer_object *bo,
return ttm_bo_eviction_valuable(bo, place);
 }

+/*
+ * amdgpu_ttm_bo_uneviction_valuable - Check to see if we can unevict a
+ * buffer object.
+ *
+ * Return true if uneviction is sensible. Called by ttm_bo_evict to
+ * decide whether to consider the buffer object for uneviction later.
+ */
+static bool amdgpu_ttm_bo_uneviction_valuable(struct ttm_buffer_object *bo)
+{
+   struct amdgpu_bo *abo;
+
+   if (!amdgpu_bo_is_amdgpu_bo(bo))
+   return false;
+
+   abo = ttm_to_amdgpu_bo(bo);
+
+   if (bo->type != ttm_bo_type_device)
+   return false;
+
+   return (abo->preferred_domains &
+   (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) != 0;
+}
+
 static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,
  void *buf, size_t size, bool write)
 {
@@ -1581,6 +1629,8 @@ static struct ttm_device_funcs amdgpu_bo_driver = {
.ttm_tt_destroy = _ttm_backend_destroy,
.eviction_valuable = amdgpu_ttm_bo_eviction_valuable,
.evict_flags = _evict_flags,
+   .uneviction_valuable = _ttm_bo_uneviction_valuable,
+   .unevict_flags = _unevict_flags,
.move = _bo_move,
.delete_mem_notify = _bo_delete_mem_notify,
.release_notify = _bo_release_notify,
--
2.44.0



[RFC PATCH 10/18] drm/amdgpu: Don't add GTT to initial domains after failing to allocate VRAM

2024-04-24 Thread Friedrich Vock
This adds GTT to the "preferred domains" of this buffer object, which
will also prevent any attempts at moving the buffer back to VRAM if
there is space. If VRAM is full, GTT will already be chosen as a
fallback.

Signed-off-by: Friedrich Vock 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c| 4 
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +-
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 6bbab141eaaeb..aea3770d3ea2e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -378,10 +378,6 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void 
*data,
goto retry;
}

-   if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
-   initial_domain |= AMDGPU_GEM_DOMAIN_GTT;
-   goto retry;
-   }
DRM_DEBUG("Failed to allocate GEM object (%llu, %d, %llu, 
%d)\n",
size, initial_domain, args->in.alignment, r);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 85c10d8086188..9978b85ed6f40 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -619,7 +619,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
  AMDGPU_GEM_DOMAIN_GDS))
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
else
-   amdgpu_bo_placement_from_domain(bo, bp->domain);
+   amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
if (bp->type == ttm_bo_type_kernel)
bo->tbo.priority = 2;
else if (!(bp->flags & AMDGPU_GEM_CREATE_DISCARDABLE))
--
2.44.0



[RFC PATCH 14/18] drm/ttm: Consider BOs placed in non-favorite locations evicted

2024-04-24 Thread Friedrich Vock
If we didn't get the favorite placement because it was full, we should
try moving it into the favorite placement once there is space.

Signed-off-by: Friedrich Vock 
---
 drivers/gpu/drm/ttm/ttm_bo.c | 28 +++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 6ac939c58a6b8..af8209f3bc894 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -,7 +,9 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
struct ttm_placement *placement,
struct ttm_operation_ctx *ctx)
 {
-   int ret;
+   struct ttm_resource_manager *man;
+   int favorite_mem_type;
+   int ret, i;

dma_resv_assert_held(bo->base.resv);

@@ -1133,6 +1135,30 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
if (ret)
return ret;

+   if (bo->resource) {
+   /*
+* Also mark the buffer as evicted if we ended up in a
+* non-favorite busy placement, so the buffer get
+* moved into the favorite spot if possible.
+*/
+   for (i = 1; i < placement->num_busy_placement; ++i) {
+   if (bo->resource->mem_type !=
+   placement->busy_placement[i].mem_type)
+   continue;
+
+   favorite_mem_type =
+   placement->busy_placement[0].mem_type;
+   man = ttm_manager_type(bo->bdev,
+  favorite_mem_type);
+
+   spin_lock(>bdev->unevict_lock);
+   list_add_tail(>evicted, >evicted);
+   man->evicted_bytes += bo->base.size;
+   spin_unlock(>bdev->unevict_lock);
+   bo->evicted_type = favorite_mem_type;
+   }
+   }
+
/*
 * We might need to add a TTM.
 */
--
2.44.0



[RFC PATCH 13/18] drm/ttm: Implement ttm_bo_update_priority

2024-04-24 Thread Friedrich Vock
Used to dynamically adjust priorities of buffers at runtime, to react to
changes in memory pressure/usage patterns.

Signed-off-by: Friedrich Vock 
---
 drivers/gpu/drm/ttm/ttm_bo.c | 17 +
 include/drm/ttm/ttm_bo.h |  2 ++
 2 files changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index eae54cd4a7ce9..6ac939c58a6b8 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -112,6 +112,23 @@ void ttm_bo_set_bulk_move(struct ttm_buffer_object *bo,
 }
 EXPORT_SYMBOL(ttm_bo_set_bulk_move);

+void ttm_bo_update_priority(struct ttm_buffer_object *bo, unsigned int 
new_prio)
+{
+   struct ttm_resource_manager *man;
+
+   if (!bo->resource)
+   return;
+
+   man = ttm_manager_type(bo->bdev, bo->resource->mem_type);
+
+   spin_lock(>bdev->lru_lock);
+   ttm_resource_del_bulk_move(bo->resource, bo);
+   bo->priority = new_prio;
+   ttm_resource_add_bulk_move(bo->resource, bo);
+   spin_unlock(>bdev->lru_lock);
+}
+EXPORT_SYMBOL(ttm_bo_update_priority);
+
 static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
  struct ttm_resource *mem, bool evict,
  struct ttm_operation_ctx *ctx,
diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
index 91299a3b6fcfa..51040bc443ea0 100644
--- a/include/drm/ttm/ttm_bo.h
+++ b/include/drm/ttm/ttm_bo.h
@@ -359,6 +359,8 @@ static inline void *ttm_kmap_obj_virtual(struct 
ttm_bo_kmap_obj *map,
return map->virtual;
 }

+void ttm_bo_update_priority(struct ttm_buffer_object *bo,
+   unsigned int new_prio);

 int ttm_bo_wait_ctx(struct ttm_buffer_object *bo,
struct ttm_operation_ctx *ctx);
--
2.44.0



[RFC PATCH 12/18] drm/ttm: Do not evict BOs with higher priority

2024-04-24 Thread Friedrich Vock
This makes buffer eviction significantly more stable by avoiding
ping-ponging caused by low-priority buffers evicting high-priority
buffers and vice versa.

Signed-off-by: Friedrich Vock 
---
 drivers/gpu/drm/ttm/ttm_bo.c   | 9 +++--
 drivers/gpu/drm/ttm/ttm_resource.c | 5 +++--
 include/drm/ttm/ttm_bo.h   | 1 +
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 3047c763eb4eb..eae54cd4a7ce9 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -776,6 +776,7 @@ static int ttm_mem_evict_wait_busy(struct ttm_buffer_object 
*busy_bo,
 int ttm_mem_evict_first(struct ttm_device *bdev,
struct ttm_resource_manager *man,
const struct ttm_place *place,
+   unsigned int max_priority,
struct ttm_operation_ctx *ctx,
struct ww_acquire_ctx *ticket)
 {
@@ -788,6 +789,8 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
spin_lock(>lru_lock);
ttm_resource_manager_for_each_res(man, , res) {
bool busy;
+   if (res->bo->priority > max_priority)
+   break;

if (!ttm_bo_evict_swapout_allowable(res->bo, ctx, place,
, )) {
@@ -930,8 +933,10 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object 
*bo,
return ret;
if (ctx->no_evict)
return -ENOSPC;
-   ret = ttm_mem_evict_first(bdev, man, place, ctx,
- ticket);
+   if (!bo->priority)
+   return -ENOSPC;
+   ret = ttm_mem_evict_first(bdev, man, place, bo->priority - 1,
+ ctx, ticket);
if (unlikely(ret != 0))
return ret;
} while (1);
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
b/drivers/gpu/drm/ttm/ttm_resource.c
index 1d6755a1153b1..63d4371adb519 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -431,8 +431,9 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
while (!list_empty(>lru[i])) {
spin_unlock(>lru_lock);
-   ret = ttm_mem_evict_first(bdev, man, NULL, ,
- NULL);
+   ret = ttm_mem_evict_first(bdev, man, NULL,
+ TTM_MAX_BO_PRIORITY,
+ , NULL);
if (ret)
return ret;
spin_lock(>lru_lock);
diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
index 8f4e6366c0417..91299a3b6fcfa 100644
--- a/include/drm/ttm/ttm_bo.h
+++ b/include/drm/ttm/ttm_bo.h
@@ -396,6 +396,7 @@ void ttm_bo_unpin(struct ttm_buffer_object *bo);
 int ttm_mem_evict_first(struct ttm_device *bdev,
struct ttm_resource_manager *man,
const struct ttm_place *place,
+   unsigned int max_priority,
struct ttm_operation_ctx *ctx,
struct ww_acquire_ctx *ticket);
 void ttm_mem_unevict_evicted(struct ttm_device *bdev,
--
2.44.0



[RFC PATCH 05/18] drm/ttm: Add option to evict no BOs in operation

2024-04-24 Thread Friedrich Vock
When undoing evictions because of decreased memory pressure, it makes no
sense to try evicting other buffers.

Signed-off-by: Friedrich Vock 
---
 drivers/gpu/drm/ttm/ttm_bo.c | 2 ++
 include/drm/ttm/ttm_bo.h | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 9a0efbf79316c..3b89fabc2f00a 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -764,6 +764,8 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object 
*bo,
break;
if (unlikely(ret != -ENOSPC))
return ret;
+   if (ctx->no_evict)
+   return -ENOSPC;
ret = ttm_mem_evict_first(bdev, man, place, ctx,
  ticket);
if (unlikely(ret != 0))
diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
index 8a1a29c6fbc50..a8f21092403d6 100644
--- a/include/drm/ttm/ttm_bo.h
+++ b/include/drm/ttm/ttm_bo.h
@@ -192,6 +192,7 @@ struct ttm_operation_ctx {
bool gfp_retry_mayfail;
bool allow_res_evict;
bool force_alloc;
+   bool no_evict;
struct dma_resv *resv;
uint64_t bytes_moved;
 };
@@ -358,6 +359,7 @@ static inline void *ttm_kmap_obj_virtual(struct 
ttm_bo_kmap_obj *map,
return map->virtual;
 }

+
 int ttm_bo_wait_ctx(struct ttm_buffer_object *bo,
struct ttm_operation_ctx *ctx);
 int ttm_bo_validate(struct ttm_buffer_object *bo,
--
2.44.0



[RFC PATCH 18/18] drm/amdgpu: Bump minor version

2024-04-24 Thread Friedrich Vock
Indicates support for EVICTED_VRAM queries and
AMDGPU_GEM_OP_SET_PRIORITY

Signed-off-by: Friedrich Vock 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index ea14f1c8f4304..4f8b62dbba17f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -116,9 +116,10 @@
  * - 3.55.0 - Add AMDGPU_INFO_GPUVM_FAULT query
  * - 3.56.0 - Update IB start address and size alignment for decode and encode
  * - 3.57.0 - Compute tunneling on GFX10+
+ * - 3.58.0 - Per-BO priorities and evicted memory size queries
  */
 #define KMS_DRIVER_MAJOR   3
-#define KMS_DRIVER_MINOR   57
+#define KMS_DRIVER_MINOR   58
 #define KMS_DRIVER_PATCHLEVEL  0

 /*
--
2.44.0



[RFC PATCH 15/18] drm/amdgpu: Set a default priority for user/kernel BOs

2024-04-24 Thread Friedrich Vock
Reserve the highest priority for the kernel, and choose a balanced value
as userspace default. Userspace is intended to be able to modify these
later to mark buffers as important/unimportant.

Signed-off-by: Friedrich Vock 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c| 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 4 
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index aea3770d3ea2e..5ca13e2e50f50 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -114,6 +114,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, 
unsigned long size,
bp.type = type;
bp.resv = resv;
bp.preferred_domain = initial_domain;
+   bp.priority = 4;
bp.flags = flags;
bp.domain = initial_domain;
bp.bo_ptr_size = sizeof(struct amdgpu_bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 9978b85ed6f40..0e9ea11a873ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -621,9 +621,9 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
else
amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
if (bp->type == ttm_bo_type_kernel)
-   bo->tbo.priority = 2;
+   bo->tbo.priority = AMDGPU_BO_PRIORITY_KERNEL;
else if (!(bp->flags & AMDGPU_GEM_CREATE_DISCARDABLE))
-   bo->tbo.priority = 1;
+   bo->tbo.priority = bp->priority;

if (!bp->destroy)
bp->destroy = _bo_destroy;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 0f277bc6a2e32..36513da0ec767 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -42,6 +42,9 @@
 /* BO flag to indicate a KFD userptr BO */
 #define AMDGPU_AMDKFD_CREATE_USERPTR_BO(1ULL << 63)

+#define AMDGPU_BO_PRIORITY_KERNEL(TTM_MAX_BO_PRIORITY - 1)
+#define AMDGPU_BO_PRIORITY_MAX_USER  (TTM_MAX_BO_PRIORITY - 2)
+
 #define to_amdgpu_bo_user(abo) container_of((abo), struct amdgpu_bo_user, bo)
 #define to_amdgpu_bo_vm(abo) container_of((abo), struct amdgpu_bo_vm, bo)

@@ -52,6 +55,7 @@ struct amdgpu_bo_param {
u32 domain;
u32 preferred_domain;
u64 flags;
+   unsigned intpriority;
enum ttm_bo_typetype;
boolno_wait_gpu;
struct dma_resv *resv;
--
2.44.0



[RFC PATCH 09/18] drm/amdgpu: Don't mark VRAM as a busy placement for VRAM|GTT resources

2024-04-24 Thread Friedrich Vock
We will never try evicting things from VRAM for these resources anyway.
This affects TTM buffer uneviction logic, which would otherwise try to
move these buffers into VRAM (clashing with VRAM-only allocations).

Signed-off-by: Friedrich Vock 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 5834a95d680d9..85c10d8086188 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -127,6 +127,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, 
u32 domain)
struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
struct ttm_placement *placement = >placement;
struct ttm_place *places = abo->placements;
+   bool skip_vram_busy = false;
u64 flags = abo->flags;
u32 c = 0;

@@ -156,6 +157,13 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo 
*abo, u32 domain)
if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
c++;
+
+   /*
+* If GTT is preferred by the buffer as well, don't try VRAM 
when it's
+* busy.
+*/
+   if ((domain & abo->preferred_domains) & AMDGPU_GEM_DOMAIN_GTT)
+   skip_vram_busy = true;
}

if (domain & AMDGPU_GEM_DOMAIN_DOORBELL) {
@@ -223,6 +231,11 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo 
*abo, u32 domain)

placement->num_busy_placement = c;
placement->busy_placement = places;
+
+   if (skip_vram_busy) {
+   --placement->num_busy_placement;
+   ++placement->busy_placement;
+   }
 }

 /**
--
2.44.0



[RFC PATCH 04/18] drm/ttm: Add driver funcs for uneviction control

2024-04-24 Thread Friedrich Vock
Provides fine-grained control for drivers over which buffers should be
considered when attempting to undo evictions.

Signed-off-by: Friedrich Vock 
---
 include/drm/ttm/ttm_device.h | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index baa264efe483d..283795d674189 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -133,6 +133,29 @@ struct ttm_device_funcs {
void (*evict_flags)(struct ttm_buffer_object *bo,
struct ttm_placement *placement);

+   /**
+* struct ttm_bo_driver member uneviction_valuable
+*
+* @bo: the buffer object to be unevicted
+*
+* Check with the driver if it is valuable to unevict a BO,
+* that is to move it back to its placecment before it was
+* evicted.
+*/
+   bool (*uneviction_valuable)(struct ttm_buffer_object *bo);
+   /**
+* struct ttm_bo_driver member evict_flags:
+*
+* @bo: the buffer object that can be unevicted
+* @dest: The placement for the unevicted buffer
+*
+* This should not cause multihop evictions, and the core will warn
+* if one is proposed.
+*/
+
+   void (*unevict_flags)(struct ttm_buffer_object *bo,
+ struct ttm_placement *dest);
+
/**
 * struct ttm_bo_driver member move:
 *
--
2.44.0



[RFC PATCH 00/18] TTM interface for managing VRAM oversubscription

2024-04-24 Thread Friedrich Vock
Hi everyone,

recently I've been looking into remedies for apps (in particular, newer
games) that experience significant performance loss when they start to
hit VRAM limits, especially on older or lower-end cards that struggle
to fit both desktop apps and all the game data into VRAM at once.

The root of the problem lies in the fact that from userspace's POV,
buffer eviction is very opaque: Userspace applications/drivers cannot
tell how oversubscribed VRAM is, nor do they have fine-grained control
over which buffers get evicted.  At the same time, with GPU APIs becoming
increasingly lower-level and GPU-driven, only the application itself
can know which buffers are used within a particular submission, and
how important each buffer is. For this, GPU APIs include interfaces
to query oversubscription and specify memory priorities: In Vulkan,
oversubscription can be queried through the VK_EXT_memory_budget
extension. Different buffers can also be assigned priorities via the
VK_EXT_pageable_device_local_memory extension. Modern games, especially
D3D12 games via vkd3d-proton, rely on oversubscription being reported and
priorities being respected in order to perform their memory management.

However, relaying this information to the kernel via the current KMD uAPIs
is not possible. On AMDGPU for example, all work submissions include a
"bo list" that contains any buffer object that is accessed during the
course of the submission. If VRAM is oversubscribed and a buffer in the
list was evicted to system memory, that buffer is moved back to VRAM
(potentially evicting other unused buffers).

Since the usermode driver doesn't know what buffers are used by the
application, its only choice is to submit a bo list that contains every
buffer the application has allocated. In case of VRAM oversubscription,
it is highly likely that some of the application's buffers were evicted,
which almost guarantees that some buffers will get moved around. Since
the bo list is only known at submit time, this also means the buffers
will get moved right before submitting application work, which is the
worst possible time to move buffers from a latency perspective. Another
consequence of the large bo list is that nearly all memory from other
applications will be evicted, too. When different applications (e.g. game
and compositor) submit work one after the other, this causes a ping-pong
effect where each app's submission evicts the other app's memory,
resulting in a large amount of unnecessary moves.

This overly aggressive eviction behavior led to RADV adopting a change
that effectively allows all VRAM applications to reside in system memory
[1].  This worked around the ping-ponging/excessive buffer moving problem,
but also meant that any memory evicted to system memory would forever
stay there, regardless of how VRAM is used.

My proposal aims at providing a middle ground between these extremes.
The goals I want to meet are:
- Userspace is accurately informed about VRAM oversubscription/how much
  VRAM has been evicted
- Buffer eviction respects priorities set by userspace - Wasteful
  ping-ponging is avoided to the extent possible

I have been testing out some prototypes, and came up with this rough
sketch of an API:

- For each ttm_resource_manager, the amount of evicted memory is tracked
  (similarly to how "usage" tracks the memory usage). When memory is
  evicted via ttm_bo_evict, the size of the evicted memory is added, when
  memory is un-evicted (see below), its size is subtracted. The amount of
  evicted memory for e.g. VRAM can be queried by userspace via an ioctl.

- Each ttm_resource_manager maintains a list of evicted buffer objects.

- ttm_mem_unevict walks the list of evicted bos for a given
  ttm_resource_manager and tries moving evicted resources back. When a
  buffer is freed, this function is called to immediately restore some
  evicted memory.

- Each ttm_buffer_object independently tracks the mem_type it wants
  to reside in.

- ttm_bo_try_unevict is added as a helper function which attempts to
  move the buffer to its preferred mem_type. If no space is available
  there, it fails with -ENOSPC/-ENOMEM.

- Similar to how ttm_bo_evict works, each driver can implement
  uneviction_valuable/unevict_flags callbacks to control buffer
  un-eviction.

This is what patches 1-10 accomplish (together with an amdgpu
implementation utilizing the new API).

Userspace priorities could then be implemented as follows:

- TTM already manages priorities for each buffer object. These priorities
  can be updated by userspace via a GEM_OP ioctl to inform the kernel
  which buffers should be evicted before others. If an ioctl increases
  the priority of a buffer, ttm_bo_try_unevict is called on that buffer to
  try and move it back (potentially evicting buffers with a lower
  priority)

- Buffers should never be evicted by other buffers with equal/lower
  priority, but if there is a buffer with lower priority occupying VRAM,
  it should be 

[RFC PATCH 02/18] drm/ttm: Add per-BO eviction tracking

2024-04-24 Thread Friedrich Vock
Make each buffer object aware of whether it has been evicted or not.

Signed-off-by: Friedrich Vock 
---
 drivers/gpu/drm/ttm/ttm_bo.c |  1 +
 include/drm/ttm/ttm_bo.h | 11 +++
 2 files changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index edf10618fe2b2..3968b17453569 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -980,6 +980,7 @@ int ttm_bo_init_reserved(struct ttm_device *bdev, struct 
ttm_buffer_object *bo,
bo->pin_count = 0;
bo->sg = sg;
bo->bulk_move = NULL;
+   bo->evicted_type = TTM_NUM_MEM_TYPES;
if (resv)
bo->base.resv = resv;
else
diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
index 0223a41a64b24..8a1a29c6fbc50 100644
--- a/include/drm/ttm/ttm_bo.h
+++ b/include/drm/ttm/ttm_bo.h
@@ -121,6 +121,17 @@ struct ttm_buffer_object {
unsigned priority;
unsigned pin_count;

+   /**
+* @evicted_type: Memory type this BO was evicted from, if any.
+* TTM_NUM_MEM_TYPES if this BO was not evicted.
+*/
+   int evicted_type;
+   /**
+* @evicted: Entry in the evicted list for the resource manager
+* this BO was evicted from.
+*/
+   struct list_head evicted;
+
/**
 * @delayed_delete: Work item used when we can't delete the BO
 * immediately
--
2.44.0



[RFC PATCH 03/18] drm/ttm: Implement BO eviction tracking

2024-04-24 Thread Friedrich Vock
For each buffer object, remember evictions and try undoing them if
memory pressure gets lower again.

Signed-off-by: Friedrich Vock 
---
 drivers/gpu/drm/ttm/ttm_bo.c  | 28 +++-
 drivers/gpu/drm/ttm/ttm_bo_util.c |  3 +++
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 3968b17453569..9a0efbf79316c 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -178,6 +178,12 @@ static void ttm_bo_cleanup_memtype_use(struct 
ttm_buffer_object *bo)
 {
if (bo->bdev->funcs->delete_mem_notify)
bo->bdev->funcs->delete_mem_notify(bo);
+   if (bo->evicted_type != TTM_NUM_MEM_TYPES) {
+   spin_lock(>bdev->unevict_lock);
+   list_del_init(>evicted);
+   man->evicted_bytes -= bo->base.size;
+   spin_unlock(>bdev->unevict_lock);
+   }

ttm_bo_tt_destroy(bo);
ttm_resource_free(bo, >resource);
@@ -429,7 +435,9 @@ static int ttm_bo_bounce_temp_buffer(struct 
ttm_buffer_object *bo,
 static int ttm_bo_evict(struct ttm_buffer_object *bo,
struct ttm_operation_ctx *ctx)
 {
+   int evicted_type = bo->resource->mem_type;
struct ttm_device *bdev = bo->bdev;
+   struct ttm_resource_manager *man;
struct ttm_resource *evict_mem;
struct ttm_placement placement;
struct ttm_place hop;
@@ -438,6 +446,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo,
memset(, 0, sizeof(hop));

dma_resv_assert_held(bo->base.resv);
+   man = ttm_manager_type(bdev, evicted_type);

placement.num_placement = 0;
placement.num_busy_placement = 0;
@@ -477,6 +486,14 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo,
ttm_resource_free(bo, _mem);
if (ret != -ERESTARTSYS && ret != -EINTR)
pr_err("Buffer eviction failed\n");
+   } else if (bo->evicted_type == TTM_NUM_MEM_TYPES &&
+  bo->bdev->funcs->uneviction_valuable &&
+  bo->bdev->funcs->uneviction_valuable(bo)) {
+   bo->evicted_type = evicted_type;
+   spin_lock(>bdev->unevict_lock);
+   list_add_tail(>evicted, >evicted);
+   man->evicted_bytes += bo->base.size;
+   spin_unlock(>bdev->unevict_lock);
}
 out:
return ret;
@@ -845,6 +862,7 @@ static int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
  struct ttm_placement *placement,
  struct ttm_operation_ctx *ctx)
 {
+   struct ttm_resource_manager *man;
struct ttm_resource *mem;
struct ttm_place hop;
int ret;
@@ -873,8 +891,16 @@ static int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
goto bounce;
}
 out:
-   if (ret)
+   if (ret) {
ttm_resource_free(bo, );
+   } else if (bo->evicted_type != TTM_NUM_MEM_TYPES) {
+   man = ttm_manager_type(bo->bdev, bo->evicted_type);
+   spin_lock(>bdev->unevict_lock);
+   list_del_init(>evicted);
+   man->evicted_bytes -= bo->base.size;
+   spin_unlock(>bdev->unevict_lock);
+   bo->evicted_type = TTM_NUM_MEM_TYPES;
+   }
return ret;
 }

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index fd9fd3d15101c..119291c5ed85e 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -262,6 +262,9 @@ static int ttm_buffer_object_transfer(struct 
ttm_buffer_object *bo,
fbo->base.bulk_move = NULL;
}

+   INIT_LIST_HEAD(>base.evicted);
+   fbo->base.evicted_type = TTM_NUM_MEM_TYPES;
+
ret = dma_resv_reserve_fences(>base.base._resv, 1);
if (ret) {
kfree(fbo);
--
2.44.0



[RFC PATCH 06/18] drm/ttm: Add public buffer eviction/uneviction functions

2024-04-24 Thread Friedrich Vock
For now, they are only used internally inside TTM, but this will change
with the introduction of dynamic buffer priorities.

Signed-off-by: Friedrich Vock 
---
 drivers/gpu/drm/ttm/ttm_bo.c | 168 ++-
 include/drm/ttm/ttm_bo.h |   6 ++
 2 files changed, 172 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 3b89fabc2f00a..3047c763eb4eb 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -166,6 +166,111 @@ static int ttm_bo_handle_move_mem(struct 
ttm_buffer_object *bo,
return ret;
 }

+/**
+ * Fetches the next BO from the manager's list of evicted BOs.
+ * bdev->unevict_lock should be held when calling this function.
+ */
+static struct ttm_buffer_object *ttm_next_evicted_bo(struct ttm_device *bdev,
+struct 
ttm_resource_manager *man,
+struct ttm_buffer_object 
*cursor)
+{
+   struct ttm_buffer_object *bo = NULL;
+
+   if (cursor)
+   cursor = list_next_entry(cursor, evicted);
+   else
+   cursor = list_first_entry(>evicted, struct 
ttm_buffer_object, evicted);
+
+   if (!list_entry_is_head(cursor, >evicted, evicted))
+   bo = ttm_bo_get_unless_zero(cursor);
+   return bo;
+}
+
+void ttm_mem_unevict_evicted(struct ttm_device *bdev,
+struct ttm_resource_manager *man,
+bool interruptible)
+{
+   struct ttm_buffer_object *evicted_bo = NULL, *next_evicted_bo = NULL;
+   struct ttm_operation_ctx ctx;
+   int ret;
+
+   memset(, 0, sizeof(ctx));
+   ctx.interruptible = interruptible;
+   ctx.no_evict = true;
+
+   spin_lock(>unevict_lock);
+   evicted_bo = ttm_next_evicted_bo(bdev, man, NULL);
+   spin_unlock(>unevict_lock);
+
+   while (evicted_bo) {
+   if (interruptible)
+   ret = dma_resv_lock_interruptible(
+   evicted_bo->base.resv, NULL);
+   else
+   ret = dma_resv_lock(evicted_bo->base.resv,
+   NULL);
+   if (ret) {
+   ttm_bo_put(evicted_bo);
+   break;
+   }
+
+   /* If we raced with another thread (and lost), the
+* other thread already removed the buffer from the
+* list. In that case, we need to start over because
+* our current cursor got removed.
+*/
+   if (evicted_bo->evicted_type == TTM_NUM_MEM_TYPES)
+   ret = 0;
+   else
+   ret = ttm_bo_try_unevict(evicted_bo, );
+
+   next_evicted_bo = ret ? evicted_bo : NULL;
+
+   spin_lock(>unevict_lock);
+   next_evicted_bo = ttm_next_evicted_bo(bdev, man,
+ next_evicted_bo);
+   spin_unlock(>unevict_lock);
+
+   dma_resv_unlock(evicted_bo->base.resv);
+   ttm_bo_put(evicted_bo);
+
+   evicted_bo = next_evicted_bo;
+   }
+}
+EXPORT_SYMBOL(ttm_mem_unevict_evicted);
+
+struct ttm_mem_unevict_work {
+   struct work_struct work;
+   struct ttm_device *bdev;
+   struct ttm_resource_manager *man;
+};
+
+static void ttm_mem_unevict_work(struct work_struct *work)
+{
+   struct ttm_mem_unevict_work *unevict_work;
+
+   unevict_work = container_of(work, typeof(*unevict_work), work);
+
+   ttm_mem_unevict_evicted(unevict_work->bdev, unevict_work->man,
+   false);
+}
+
+static void ttm_mem_queue_unevict(struct ttm_device *bdev,
+ struct ttm_resource_manager *man)
+{
+   struct ttm_mem_unevict_work *work;
+
+   work = kzalloc(sizeof(*work), GFP_KERNEL);
+
+   if (!work)
+   return;
+
+   INIT_WORK(>work, ttm_mem_unevict_work);
+   work->bdev = bdev;
+   work->man = man;
+   queue_work_node(bdev->pool.nid, bdev->wq, >work);
+}
+
 /*
  * Call bo::reserved.
  * Will release GPU memory type usage on destruction.
@@ -176,6 +281,12 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object 
*bo,

 static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo)
 {
+   struct ttm_resource_manager *man = NULL;
+   struct ttm_device *bdev = bo->bdev;
+
+   if (bo->resource)
+   man = ttm_manager_type(bo->bdev, bo->resource->mem_type);
+
if (bo->bdev->funcs->delete_mem_notify)
bo->bdev->funcs->delete_mem_notify(bo);
if (bo->evicted_type != TTM_NUM_MEM_TYPES) {
@@ -187,6 +298,9 @@ static void ttm_bo_cleanup_memtype_use(struct 
ttm_buffer_object *bo)

ttm_bo_tt_destroy(bo);
ttm_resource_free(bo, >resource);
+
+   if 

Re: [PATCH v6 0/5] Best effort contiguous VRAM allocation

2024-04-24 Thread Felix Kuehling

The series is

Reviewed-by: Felix Kuehling 

On 2024-04-24 11:27, Philip Yang wrote:

This patch series implement new KFD memory alloc flag for best effort contiguous
VRAM allocation, to support peer direct access RDMA device with limited 
scatter-gather
dma capability.

v2: rebase on patch ("drm/amdgpu: Modify the contiguous flags behaviour")
 to avoid adding the new GEM flag

v3: add patch 2 to handle sg segment size limit (Christian)

v4: remove the buddy block size limit from vram mgr because sg table creation 
already
 remove the limit, and resource uses u64 to handle block start, size 
(Christian)

v5: remove patch 7 which is not for upstream, add AMDGPU prefix to the macro 
name.

v6: use shorter flag name, use interruptible wait ctx, drop patch 5/6 (Felix)

Philip Yang (5):
   drm/amdgpu: Support contiguous VRAM allocation
   drm/amdgpu: Handle sg size limit for contiguous allocation
   drm/amdgpu: Evict BOs from same process for contiguous allocation
   drm/amdkfd: Evict BO itself for contiguous allocation
   drm/amdkfd: Bump kfd version for contiguous VRAM allocation

  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 23 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |  3 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 12 +-
  include/uapi/linux/kfd_ioctl.h|  4 +++-
  4 files changed, 33 insertions(+), 9 deletions(-)



[PATCH] drm/amd/display: Avoid -Wenum-float-conversion in add_margin_and_round_to_dfs_grainularity()

2024-04-24 Thread Nathan Chancellor
When building with clang 19 or newer (which strengthened some of the
enum conversion warnings for C), there is a warning (or error with
CONFIG_WERROR=y) around doing arithmetic with an enumerated type and a
floating point expression.

  
drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c:181:58:
 error: arithmetic between enumeration type 'enum dentist_divider_range' and 
floating-point type 'double' [-Werror,-Wenum-float-conversion]
181 | divider = (unsigned int)(DFS_DIVIDER_RANGE_SCALE_FACTOR * 
(vco_freq_khz / clock_khz));
|  ~~ ^ 
~~
  1 error generated.

This conversion is expected due to the nature of the enumerated value
and definition, so silence the warning by casting the enumeration to an
integer explicitly to make it clear to the compiler.

Fixes: 3df48ddedee4 ("drm/amd/display: Add new DCN401 sources")
Signed-off-by: Nathan Chancellor 
---
Alternatively, perhaps the potential truncation could happen before the
multiplication?

  divider = DFS_DIVIDER_RANGE_SCALE_FACTOR * (unsigned int)(vco_freq_khz / 
clock_khz);

I suspect the result of the division is probably not very large
(certainly not within UINT_MAX / 4), so I would not expect the
multiplication to overflow, but I was not sure so I decided to take the
safer, NFC change. I am happy to respin as necessary.
---
 .../gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c| 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c 
b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c
index e6698ee65843..65eb0187e965 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c
@@ -178,7 +178,7 @@ static bool add_margin_and_round_to_dfs_grainularity(double 
clock_khz, double ma
 
clock_khz *= 1.0 + margin;
 
-   divider = (unsigned int)(DFS_DIVIDER_RANGE_SCALE_FACTOR * (vco_freq_khz 
/ clock_khz));
+   divider = (unsigned int)((int)DFS_DIVIDER_RANGE_SCALE_FACTOR * 
(vco_freq_khz / clock_khz));
 
/* we want to floor here to get higher clock than required rather than 
lower */
if (divider < DFS_DIVIDER_RANGE_2_START) {

---
base-commit: d60dc4dd72412d5d9566fdf391e4202b05f88912
change-id: 20240424-amdgpu-display-dcn401-enum-float-conversion-c09cc1826ea2

Best regards,
-- 
Nathan Chancellor 



Re: [PATCH] drm/amdgpu: Fix buffer size in gfx_v9_4_3_init_ cp_compute_microcode() and rlc_microcode()

2024-04-24 Thread Lazar, Lijo



On 4/23/2024 7:13 AM, Srinivasan Shanmugam wrote:
> The buffer size is determined by the declaration char fw_name[30]; This
> means fw_name can hold up to 30 characters, including the null character
> that marks the end of the string.
> 
> The string to be written is "amdgpu/%s_mec.bin" or "amdgpu/%s_rlc.bin",
> where %s will be replaced by the value of chip_name.
> 
> The length of the string "amdgpu/%s_mec.bin" or "amdgpu/%s_rlc.bin"
> without the %s is 16 characters.
> 
> The warning message is saying that the chip_name could be up to 29
> characters long. If we add the 16 characters from the string
> "amdgpu/%s_mec.bin" or "amdgpu/%s_rlc.bin", we get a total of 45
> characters.
> 
> This is potentially longer than the buffer size of 30 characters.
> 
> if chip_name is longer than 14 characters (30 buffer size - 16
> characters from the string), the resulting string will not fit into the
> fw_name buffer, Thus increasing fw_name buffer size to 50
> 
> Fixes the below with gcc W=1:
> drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c: In function ‘gfx_v9_4_3_early_init’:
> drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c:379:52: warning: ‘%s’ directive 
> output may be truncated writing up to 29 bytes into a region of size 23 
> [-Wformat-truncation=]
>   379 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", 
> chip_name);
>   |^~
> ..
>   439 | r = gfx_v9_4_3_init_rlc_microcode(adev, ucode_prefix);
>   | 
> drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c:379:9: note: ‘snprintf’ output 
> between 16 and 45 bytes into a destination of size 30
>   379 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", 
> chip_name);
>   | 
> ^~
> drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c:413:52: warning: ‘%s’ directive 
> output may be truncated writing up to 29 bytes into a region of size 23 
> [-Wformat-truncation=]
>   413 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", 
> chip_name);
>   |^~
> ..
>   443 | r = gfx_v9_4_3_init_cp_compute_microcode(adev, ucode_prefix);
>   |
> drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c:413:9: note: ‘snprintf’ output 
> between 16 and 45 bytes into a destination of size 30
>   413 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", 
> chip_name);
>   | 
> ^~
> 
> Fixes: 86301129698b ("drm/amdgpu: split gc v9_4_3 functionality from gc v9_0")
> Cc: Hawking Zhang 
> Cc: Christian König 
> Cc: Alex Deucher 
> Cc: Lijo Lazar 
> Signed-off-by: Srinivasan Shanmugam 
> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> index 0e429b7ed036..1c46d5f6677f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> @@ -370,7 +370,7 @@ static void gfx_v9_4_3_free_microcode(struct 
> amdgpu_device *adev)
>  static int gfx_v9_4_3_init_rlc_microcode(struct amdgpu_device *adev,
> const char *chip_name)
>  {
> - char fw_name[30];
> + char fw_name[50];

Thanks for the patch. You may solve this by changing ucode_prefix to 15
in gfx_v9_4_3_init_microcode.

Thanks,
Lijo

>   int err;
>   const struct rlc_firmware_header_v2_0 *rlc_hdr;
>   uint16_t version_major;
> @@ -407,7 +407,7 @@ static void gfx_v9_4_3_check_if_need_gfxoff(struct 
> amdgpu_device *adev)
>  static int gfx_v9_4_3_init_cp_compute_microcode(struct amdgpu_device *adev,
> const char *chip_name)
>  {
> - char fw_name[30];
> + char fw_name[50];
>   int err;
>  
>   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);


[PATCH v6 1/5] drm/amdgpu: Support contiguous VRAM allocation

2024-04-24 Thread Philip Yang
RDMA device with limited scatter-gather ability requires contiguous VRAM
buffer allocation for RDMA peer direct support.

Add a new KFD alloc memory flag and store as bo alloc flag
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS. When pin this bo to export for RDMA
peerdirect access, this will set TTM_PL_FLAG_CONTIFUOUS flag, and ask
VRAM buddy allocator to get contiguous VRAM.

Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 4 
 include/uapi/linux/kfd_ioctl.h   | 1 +
 2 files changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index f672205243e0..02d66faaade5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1712,6 +1712,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) 
?
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0;
+
+   /* For contiguous VRAM allocation */
+   if (flags & KFD_IOC_ALLOC_MEM_FLAGS_CONTIGUOUS)
+   alloc_flags |= 
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
}
xcp_id = fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION ?
0 : fpriv->xcp_id;
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 2040a470ddb4..d09c4a18e571 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -407,6 +407,7 @@ struct kfd_ioctl_acquire_vm_args {
 #define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT   (1 << 26)
 #define KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED   (1 << 25)
 #define KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT   (1 << 24)
+#define KFD_IOC_ALLOC_MEM_FLAGS_CONTIGUOUS (1 << 23)
 
 /* Allocate memory for later SVM (shared virtual memory) mapping.
  *
-- 
2.43.2



[PATCH v6 5/5] drm/amdkfd: Bump kfd version for contiguous VRAM allocation

2024-04-24 Thread Philip Yang
Bump the kfd ioctl minor version to delcare the contiguous VRAM
allocation flag support.

Signed-off-by: Philip Yang 
---
 include/uapi/linux/kfd_ioctl.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index d09c4a18e571..f8e9d3c1d117 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -41,9 +41,10 @@
  * - 1.13 - Add debugger API
  * - 1.14 - Update kfd_event_data
  * - 1.15 - Enable managing mappings in compute VMs with GEM_VA ioctl
+ * - 1.16 - Add contiguous VRAM allocation flag
  */
 #define KFD_IOCTL_MAJOR_VERSION 1
-#define KFD_IOCTL_MINOR_VERSION 15
+#define KFD_IOCTL_MINOR_VERSION 16
 
 struct kfd_ioctl_get_version_args {
__u32 major_version;/* from KFD */
-- 
2.43.2



[PATCH v6 4/5] drm/amdkfd: Evict BO itself for contiguous allocation

2024-04-24 Thread Philip Yang
If the BO pages pinned for RDMA is not contiguous on VRAM, evict it to
system memory first to free the VRAM space, then allocate contiguous
VRAM space, and then move it from system memory back to VRAM.

v6: user context should use interruptible call (Felix)

Signed-off-by: Philip Yang 
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 19 ++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 02d66faaade5..acc825b84113 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1470,13 +1470,30 @@ static int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo 
*bo, u32 domain)
if (unlikely(ret))
return ret;
 
+   if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) {
+   /*
+* If bo is not contiguous on VRAM, move to system memory first 
to ensure
+* we can get contiguous VRAM space after evicting other BOs.
+*/
+   if (!(bo->tbo.resource->placement & TTM_PL_FLAG_CONTIGUOUS)) {
+   struct ttm_operation_ctx ctx = { true, false };
+
+   amdgpu_bo_placement_from_domain(bo, 
AMDGPU_GEM_DOMAIN_GTT);
+   ret = ttm_bo_validate(>tbo, >placement, );
+   if (unlikely(ret)) {
+   pr_debug("validate bo 0x%p to GTT failed %d\n", 
>tbo, ret);
+   goto out;
+   }
+   }
+   }
+
ret = amdgpu_bo_pin_restricted(bo, domain, 0, 0);
if (ret)
pr_err("Error in Pinning BO to domain: %d\n", domain);
 
amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false);
+out:
amdgpu_bo_unreserve(bo);
-
return ret;
 }
 
-- 
2.43.2



[PATCH v6 0/5] Best effort contiguous VRAM allocation

2024-04-24 Thread Philip Yang
This patch series implement new KFD memory alloc flag for best effort contiguous
VRAM allocation, to support peer direct access RDMA device with limited 
scatter-gather
dma capability.

v2: rebase on patch ("drm/amdgpu: Modify the contiguous flags behaviour")
to avoid adding the new GEM flag

v3: add patch 2 to handle sg segment size limit (Christian)

v4: remove the buddy block size limit from vram mgr because sg table creation 
already
remove the limit, and resource uses u64 to handle block start, size 
(Christian)

v5: remove patch 7 which is not for upstream, add AMDGPU prefix to the macro 
name.

v6: use shorter flag name, use interruptible wait ctx, drop patch 5/6 (Felix)

Philip Yang (5):
  drm/amdgpu: Support contiguous VRAM allocation
  drm/amdgpu: Handle sg size limit for contiguous allocation
  drm/amdgpu: Evict BOs from same process for contiguous allocation
  drm/amdkfd: Evict BO itself for contiguous allocation
  drm/amdkfd: Bump kfd version for contiguous VRAM allocation

 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 23 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |  3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 12 +-
 include/uapi/linux/kfd_ioctl.h|  4 +++-
 4 files changed, 33 insertions(+), 9 deletions(-)

-- 
2.43.2



[PATCH v6 3/5] drm/amdgpu: Evict BOs from same process for contiguous allocation

2024-04-24 Thread Philip Yang
When TTM failed to alloc VRAM, TTM try evict BOs from VRAM to system
memory then retry the allocation, this skips the KFD BOs from the same
process because KFD require all BOs are resident for user queues.

If TTM with TTM_PL_FLAG_CONTIGUOUS flag to alloc contiguous VRAM, allow
TTM evict KFD BOs from the same process, this will evict the user queues
first, and restore the queues later after contiguous VRAM allocation.

Signed-off-by: Philip Yang 
Reviewed-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 64f5001a7dc5..c21ea808f931 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1403,7 +1403,8 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct 
ttm_buffer_object *bo,
 */
dma_resv_for_each_fence(_cursor, bo->base.resv,
DMA_RESV_USAGE_BOOKKEEP, f) {
-   if (amdkfd_fence_check_mm(f, current->mm))
+   if (amdkfd_fence_check_mm(f, current->mm) &&
+   !(place->flags & TTM_PL_FLAG_CONTIGUOUS))
return false;
}
 
-- 
2.43.2



[PATCH v6 2/5] drm/amdgpu: Handle sg size limit for contiguous allocation

2024-04-24 Thread Philip Yang
Define macro AMDGPU_MAX_SG_SEGMENT_SIZE 2GB, because struct scatterlist
length is unsigned int, and some users of it cast to a signed int, so
every segment of sg table is limited to size 2GB maximum.

For contiguous VRAM allocation, don't limit the max buddy block size in
order to get contiguous VRAM memory. To workaround the sg table segment
size limit, allocate multiple segments if contiguous size is bigger than
AMDGPU_MAX_SG_SEGMENT_SIZE.

Signed-off-by: Philip Yang 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 4be8b091099a..ebffb58ea53a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -31,6 +31,8 @@
 #include "amdgpu_atomfirmware.h"
 #include "atom.h"
 
+#define AMDGPU_MAX_SG_SEGMENT_SIZE (2UL << 30)
+
 struct amdgpu_vram_reservation {
u64 start;
u64 size;
@@ -532,9 +534,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
 
BUG_ON(min_block_size < mm->chunk_size);
 
-   /* Limit maximum size to 2GiB due to SG table limitations */
-   size = min(remaining_size, 2ULL << 30);
-
+   size = remaining_size;
if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&
!(size & (((u64)pages_per_block << PAGE_SHIFT) 
- 1)))
min_block_size = (u64)pages_per_block << PAGE_SHIFT;
@@ -675,7 +675,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
amdgpu_res_first(res, offset, length, );
while (cursor.remaining) {
num_entries++;
-   amdgpu_res_next(, cursor.size);
+   amdgpu_res_next(, min(cursor.size, 
AMDGPU_MAX_SG_SEGMENT_SIZE));
}
 
r = sg_alloc_table(*sgt, num_entries, GFP_KERNEL);
@@ -695,7 +695,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
amdgpu_res_first(res, offset, length, );
for_each_sgtable_sg((*sgt), sg, i) {
phys_addr_t phys = cursor.start + adev->gmc.aper_base;
-   size_t size = cursor.size;
+   unsigned long size = min(cursor.size, 
AMDGPU_MAX_SG_SEGMENT_SIZE);
dma_addr_t addr;
 
addr = dma_map_resource(dev, phys, size, dir,
@@ -708,7 +708,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
sg_dma_address(sg) = addr;
sg_dma_len(sg) = size;
 
-   amdgpu_res_next(, cursor.size);
+   amdgpu_res_next(, size);
}
 
return 0;
-- 
2.43.2



Re: [PATCH v3] drm/amdgpu: fix uninitialized scalar variable warning

2024-04-24 Thread Christian König

Am 23.04.24 um 16:31 schrieb Tim Huang:

From: Tim Huang 

Clear warning that uses uninitialized value fw_size.

Signed-off-by: Tim Huang 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index d9dc5485..fb5de23fa8d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -1205,7 +1205,8 @@ void amdgpu_gfx_cp_init_microcode(struct amdgpu_device 
*adev,
fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
break;
default:
-   break;
+   dev_err(adev->dev, "Invalid ucode id %u\n", ucode_id);
+   return;
}
  
  	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {




Re: [PATCH v5 1/6] drm/amdgpu: Support contiguous VRAM allocation

2024-04-24 Thread Philip Yang

  


On 2024-04-23 18:17, Felix Kuehling
  wrote:


  
  On 2024-04-23 11:28, Philip Yang wrote:
  
  RDMA device with limited scatter-gather
ability requires contiguous VRAM

buffer allocation for RDMA peer direct support.


Add a new KFD alloc memory flag and store as bo alloc flag

AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS. When pin this bo to export
for RDMA

peerdirect access, this will set TTM_PL_FLAG_CONTIFUOUS flag,
and ask

VRAM buddy allocator to get contiguous VRAM.


Signed-off-by: Philip Yang 

---

  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 4 

  include/uapi/linux/kfd_ioctl.h   | 1 +

  2 files changed, 5 insertions(+)


diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

index 0ae9fd844623..ef9154043757 100644

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

@@ -1712,6 +1712,10 @@ int
amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(

  alloc_flags =
AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;

  alloc_flags |= (flags &
KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?

  AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0;

+

+    /* For contiguous VRAM allocation */

+    if (flags &
KFD_IOC_ALLOC_MEM_FLAGS_CONTIGUOUS_BEST_EFFORT)

+    alloc_flags |=
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;

  }

  xcp_id = fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION ?

  0 : fpriv->xcp_id;

diff --git a/include/uapi/linux/kfd_ioctl.h
b/include/uapi/linux/kfd_ioctl.h

index 2040a470ddb4..c1394c162d4e 100644

--- a/include/uapi/linux/kfd_ioctl.h

+++ b/include/uapi/linux/kfd_ioctl.h

@@ -407,6 +407,7 @@ struct kfd_ioctl_acquire_vm_args {

  #define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT    (1 << 26)

  #define KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED    (1 << 25)

  #define KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT    (1 <<
24)

+#define KFD_IOC_ALLOC_MEM_FLAGS_CONTIGUOUS_BEST_EFFORT    (1
<< 23)

  
  
  If I understand it correctly, AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS
  was redefined to mean "best effort". Maybe we can drop the
  explicit "BEST_EFFORT" from this flag as well to keep the name to
  a reasonable length.
  

yes, AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS is redefined, to implement
  "best effort" without adding new upstream GEM flag, so we may get
  scattered allocation if contiguous allocation failed. If we drop
  the "BEST_EFFORT" from flag name, this may mislead the users.
Regards,
Philip  

  
  Regards,
  
    Felix
  
  
  
      /* Allocate memory for later SVM
(shared virtual memory) mapping.

   *

  

  



RE: [PATCH 28/46] drm/amd/display: Enable RCO for PHYSYMCLK in DCN35

2024-04-24 Thread Li, Roman
[Public]

> -Original Message-
> From: Wayne Lin 
> Sent: Wednesday, April 24, 2024 4:49 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Wentland, Harry ; Li, Sun peng (Leo)
> ; Siqueira, Rodrigo ;
> Pillai, Aurabindo ; Li, Roman
> ; Lin, Wayne ; Gutierrez,
> Agustin ; Chung, ChiaHsuan (Tom)
> ; Wu, Hersen ; Zuo,
> Jerry ; Miess, Daniel ; Liu,
> Charlene 
> Subject: [PATCH 28/46] drm/amd/display: Enable RCO for PHYSYMCLK in
> DCN35
>
> From: Daniel Miess 
>
> [Why & How]
> Enable root clock optimization for PHYSYMCLK and only disable it when it's
> actively being used
>
> Reviewed-by: Charlene Liu 
> Acked-by: Wayne Lin 
> Signed-off-by: Daniel Miess 
> ---
>  drivers/gpu/drm/amd/display/dc/dc.h   |  1 +
>  .../gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c | 45 ---
>  .../amd/display/dc/hwss/dcn35/dcn35_hwseq.c   | 32 +
>  .../amd/display/dc/hwss/dcn35/dcn35_hwseq.h   |  2 +
>  .../amd/display/dc/hwss/dcn35/dcn35_init.c|  1 +
>  .../amd/display/dc/hwss/dcn351/dcn351_init.c  |  1 +
>  .../display/dc/hwss/hw_sequencer_private.h|  4 ++
>  7 files changed, 41 insertions(+), 45 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/dc/dc.h
> b/drivers/gpu/drm/amd/display/dc/dc.h
> index 3048d5a0e87d..dd8940c2a4bf 100644
> --- a/drivers/gpu/drm/amd/display/dc/dc.h
> +++ b/drivers/gpu/drm/amd/display/dc/dc.h
> @@ -724,6 +724,7 @@ enum pg_hw_pipe_resources {
>   PG_OPTC,
>   PG_DPSTREAM,
>   PG_HDMISTREAM,
> + PG_PHYSYMCLK,
>   PG_HW_PIPE_RESOURCES_NUM_ELEMENT
>  };
>
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c
> b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c
> index 4b282b7e0996..795320a25fd2 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c
> +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c
> @@ -461,32 +461,22 @@ static void
> dccg35_set_physymclk_root_clock_gating(
>   case 0:
>   REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
>   PHYASYMCLK_ROOT_GATE_DISABLE, enable ?
> 1 : 0);
> -//   REG_UPDATE(DCCG_GATE_DISABLE_CNTL4,
> -//   PHYA_REFCLK_ROOT_GATE_DISABLE, enable ?
> 1 : 0);
>   break;
>   case 1:
>   REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
>   PHYBSYMCLK_ROOT_GATE_DISABLE, enable ?
> 1 : 0);
> -//   REG_UPDATE(DCCG_GATE_DISABLE_CNTL4,
> -//   PHYB_REFCLK_ROOT_GATE_DISABLE, enable ?
> 1 : 0);
>   break;
>   case 2:
>   REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
>   PHYCSYMCLK_ROOT_GATE_DISABLE, enable ?
> 1 : 0);
> -//   REG_UPDATE(DCCG_GATE_DISABLE_CNTL4,
> -//   PHYC_REFCLK_ROOT_GATE_DISABLE, enable ?
> 1 : 0);
>   break;
>   case 3:
>   REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
>   PHYDSYMCLK_ROOT_GATE_DISABLE, enable ?
> 1 : 0);
> -//   REG_UPDATE(DCCG_GATE_DISABLE_CNTL4,
> -//   PHYD_REFCLK_ROOT_GATE_DISABLE, enable ?
> 1 : 0);
>   break;
>   case 4:
>   REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
>   PHYESYMCLK_ROOT_GATE_DISABLE, enable ?
> 1 : 0);
> -//   REG_UPDATE(DCCG_GATE_DISABLE_CNTL4,
> -//   PHYE_REFCLK_ROOT_GATE_DISABLE, enable ?
> 1 : 0);
>   break;
>   default:
>   BREAK_TO_DEBUGGER();
> @@ -509,16 +499,10 @@ static void dccg35_set_physymclk(
>   REG_UPDATE_2(PHYASYMCLK_CLOCK_CNTL,
>   PHYASYMCLK_EN, 1,
>   PHYASYMCLK_SRC_SEL, clk_src);
> -//   if (dccg->ctx->dc-
> >debug.root_clock_optimization.bits.physymclk)
> -//   REG_UPDATE(DCCG_GATE_DISABLE_CNTL4,
> -//
>   PHYA_REFCLK_ROOT_GATE_DISABLE, 0);
>   } else {
>   REG_UPDATE_2(PHYASYMCLK_CLOCK_CNTL,
>   PHYASYMCLK_EN, 0,
>   PHYASYMCLK_SRC_SEL, 0);
> -//   if (dccg->ctx->dc-
> >debug.root_clock_optimization.bits.physymclk)
> -//   REG_UPDATE(DCCG_GATE_DISABLE_CNTL4,
> -//
>   PHYA_REFCLK_ROOT_GATE_DISABLE, 1);
>   }
>   break;
>   case 1:
> @@ -526,16 +510,10 @@ static void dccg35_set_physymclk(
>   REG_UPDATE_2(PHYBSYMCLK_CLOCK_CNTL,
>   PHYBSYMCLK_EN, 1,
>   PHYBSYMCLK_SRC_SEL, clk_src);
> -//   if (dccg->ctx->dc-
> >debug.root_clock_optimization.bits.physymclk)
> -//   REG_UPDATE(DCCG_GATE_DISABLE_CNTL4,
> -//
>   PHYB_REFCLK_ROOT_GATE_DISABLE, 0);
>   } else {
>   REG_UPDATE_2(PHYBSYMCLK_CLOCK_CNTL,
>  

Re: [PATCH v5 4/6] drm/amdkfd: Evict BO itself for contiguous allocation

2024-04-24 Thread Philip Yang

  


On 2024-04-23 18:15, Felix Kuehling
  wrote:

On
  2024-04-23 11:28, Philip Yang wrote:
  
  If the BO pages pinned for RDMA is not
contiguous on VRAM, evict it to

system memory first to free the VRAM space, then allocate
contiguous

VRAM space, and then move it from system memory back to VRAM.


Signed-off-by: Philip Yang 

---

  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 16
+++-

  1 file changed, 15 insertions(+), 1 deletion(-)


diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

index ef9154043757..5d118e5580ce 100644

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

@@ -1470,13 +1470,27 @@ static int
amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain)

  if (unlikely(ret))

  return ret;

  +    if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
{

+    /*

+ * If bo is not contiguous on VRAM, move to system
memory first to ensure

+ * we can get contiguous VRAM space after evicting
other BOs.

+ */

+    if (!(bo->tbo.resource->placement &
TTM_PL_FLAG_CONTIGUOUS)) {

+    ret = amdgpu_amdkfd_bo_validate(bo,
AMDGPU_GEM_DOMAIN_GTT, false);

  
  
  amdgpu_amdkfd_bo_validate is meant for use in kernel threads. It
  always runs uninterruptible. I believe pin_bo runs in the context
  of ioctls from user mode. So it should be interruptible.
  

yes, pin_bo is in the context of user mode, from KFD alloc memory
  or from rdma driver get pages, should use interruptible wait.
amdgpu_amdkfd_bo_validate is currently used by kernel threads and
  ioctl amdgpu_amdkfd_add_gws_to_process (this seems bug), does it
  make sense to add parameter interruptible, then we can remove many
  duplicate code amdgpu_bo_placement_from_domain + ttm_bo_validate
  or I can fix it here and leave the cleanup and bug fix in the
  future?
Regards,
Philip


  
  Regards,
  
    Felix
  
  
  
  +    if (unlikely(ret)) {

+    pr_debug("validate bo 0x%p to GTT failed %d\n",
>tbo, ret);

+    goto out;

+    }

+    }

+    }

+

  ret = amdgpu_bo_pin_restricted(bo, domain, 0, 0);

  if (ret)

  pr_err("Error in Pinning BO to domain: %d\n", domain);

    amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false);

+out:

  amdgpu_bo_unreserve(bo);

-

  return ret;

  }

  

  



Re: [PATCH] drm/amd/display: re-indent dc_power_down_on_boot()

2024-04-24 Thread Christian König

Am 24.04.24 um 15:20 schrieb Dan Carpenter:

On Wed, Apr 24, 2024 at 03:11:08PM +0200, Christian König wrote:

Am 24.04.24 um 13:41 schrieb Dan Carpenter:

These lines are indented too far.  Clean the whitespace.

Signed-off-by: Dan Carpenter 
---
   drivers/gpu/drm/amd/display/dc/core/dc.c | 7 +++
   1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 8eefba757da4..f64d7229eb6c 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -5043,11 +5043,10 @@ void dc_interrupt_ack(struct dc *dc, enum dc_irq_source 
src)
   void dc_power_down_on_boot(struct dc *dc)
   {
if (dc->ctx->dce_environment != DCE_ENV_VIRTUAL_HW &&
-   dc->hwss.power_down_on_boot) {
-
-   if (dc->caps.ips_support)
-   dc_exit_ips_for_hw_access(dc);
+   dc->hwss.power_down_on_boot) {
+   if (dc->caps.ips_support)
+   dc_exit_ips_for_hw_access(dc);

Well while at it can't the two ifs be merged here?

(I don't know this code to well, but it looks like it).


I'm sorry, I don't see what you're saying.


The indentation was so messed up that I though the call to 
power_down_on_boot() was after both ifs, but it is still inside the first.


So your patch is actually right, sorry for the noise.

Regards,
Christian.



I probably should have deleted the other blank line as well, though.
It introduces a checkpatch.pl --strict warning.

regards,
dan carpenter





RE: [PATCH] drm/amdgpu: Fix two reset triggered in a row

2024-04-24 Thread Li, Yunxiang (Teddy)
[Public]

> We have the KFD, FLR, the per engine one in the scheduler and IIRC one more 
> for the CP (illegal operation and register write).
>
> I'm not sure about the CP one, but all others should be handled correctly 
> with the V2 patch as far as I can see.

Where can I find the CP one? Nothing came up when I search for 
amdgpu_device_gpu_recover


Re: [PATCH] drm/amd/display: re-indent dc_power_down_on_boot()

2024-04-24 Thread Christian König

Am 24.04.24 um 13:41 schrieb Dan Carpenter:

These lines are indented too far.  Clean the whitespace.

Signed-off-by: Dan Carpenter 
---
  drivers/gpu/drm/amd/display/dc/core/dc.c | 7 +++
  1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 8eefba757da4..f64d7229eb6c 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -5043,11 +5043,10 @@ void dc_interrupt_ack(struct dc *dc, enum dc_irq_source 
src)
  void dc_power_down_on_boot(struct dc *dc)
  {
if (dc->ctx->dce_environment != DCE_ENV_VIRTUAL_HW &&
-   dc->hwss.power_down_on_boot) {
-
-   if (dc->caps.ips_support)
-   dc_exit_ips_for_hw_access(dc);
+   dc->hwss.power_down_on_boot) {
  
+		if (dc->caps.ips_support)

+   dc_exit_ips_for_hw_access(dc);


Well while at it can't the two ifs be merged here?

(I don't know this code to well, but it looks like it).

Regards,
Christian.


dc->hwss.power_down_on_boot(dc);
}
  }




Re: [PATCH 2/3] drm/amdgpu: Initialize timestamp for some legacy SOCs

2024-04-24 Thread Christian König

Am 24.04.24 um 12:03 schrieb Ma Jun:

Initialize the interrupt timestamp for some legacy SOCs
to fix the coverity issue "Uninitialized scalar variable"

Signed-off-by: Ma Jun 
Suggested-by: Christian König 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 8 
  1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 7e6d09730e6d..665c63f55278 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -445,6 +445,14 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,
  
  	entry.ih = ih;

entry.iv_entry = (const uint32_t *)>ring[ring_index];
+
+   /*
+* timestamp is not supported on some legacy SOCs (cik, cz, iceland,
+* si and tonga), so initialize timestamp and timestamp_src to 0
+*/
+   entry.timestamp = 0;
+   entry.timestamp_src = 0;
+
amdgpu_ih_decode_iv(adev, );
  
  	trace_amdgpu_iv(ih - >irq.ih, );




[PATCH 3/3] drm/amdgpu: Fix the uninitialized variable warning

2024-04-24 Thread Ma Jun
Initialize the phy_id to 0 to fix the warning of
"Using uninitialized value phy_id"

Signed-off-by: Ma Jun 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
index 8ed0e073656f..df81078aa26d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
@@ -95,7 +95,7 @@ static ssize_t amdgpu_securedisplay_debugfs_write(struct file 
*f, const char __u
struct psp_context *psp = >psp;
struct ta_securedisplay_cmd *securedisplay_cmd;
struct drm_device *dev = adev_to_drm(adev);
-   uint32_t phy_id;
+   uint32_t phy_id = 0;
uint32_t op;
char str[64];
int ret;
-- 
2.34.1



[PATCH 2/3] drm/amdgpu: Initialize timestamp for some legacy SOCs

2024-04-24 Thread Ma Jun
Initialize the interrupt timestamp for some legacy SOCs
to fix the coverity issue "Uninitialized scalar variable"

Signed-off-by: Ma Jun 
Suggested-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 7e6d09730e6d..665c63f55278 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -445,6 +445,14 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,
 
entry.ih = ih;
entry.iv_entry = (const uint32_t *)>ring[ring_index];
+
+   /*
+* timestamp is not supported on some legacy SOCs (cik, cz, iceland,
+* si and tonga), so initialize timestamp and timestamp_src to 0
+*/
+   entry.timestamp = 0;
+   entry.timestamp_src = 0;
+
amdgpu_ih_decode_iv(adev, );
 
trace_amdgpu_iv(ih - >irq.ih, );
-- 
2.34.1



[PATCH 1/3] drm/amdgpu: Fix uninitialized variable warning in amdgpu_afmt_acr

2024-04-24 Thread Ma Jun
Assign value to clock to fix the warning below:
"Using uninitialized value res. Field res.clock is uninitialized"

Signed-off-by: Ma Jun 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c
index a4d65973bf7c..9e3442b2d2ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c
@@ -87,7 +87,7 @@ static void amdgpu_afmt_calc_cts(uint32_t clock, int *CTS, 
int *N, int freq)
 
 struct amdgpu_afmt_acr amdgpu_afmt_acr(uint32_t clock)
 {
-   struct amdgpu_afmt_acr res;
+   struct amdgpu_afmt_acr res = {0};
u8 i;
 
/* Precalculated values for common clocks */
@@ -100,6 +100,7 @@ struct amdgpu_afmt_acr amdgpu_afmt_acr(uint32_t clock)
amdgpu_afmt_calc_cts(clock, _32khz, _32khz, 32000);
amdgpu_afmt_calc_cts(clock, _44_1khz, _44_1khz, 44100);
amdgpu_afmt_calc_cts(clock, _48khz, _48khz, 48000);
+   res.clock = clock;
 
return res;
 }
-- 
2.34.1



Re: [PATCH v3] drm/amdgpu: add return result for amdgpu_i2c_{get/put}_byte

2024-04-24 Thread Christian König

Am 24.04.24 um 11:36 schrieb Bob Zhou:

After amdgpu_i2c_get_byte fail, amdgpu_i2c_put_byte shouldn't be
conducted to put wrong value.
So return and check the i2c transfer result.

Signed-off-by: Bob Zhou 
Suggested-by: Christian König 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c | 47 +++--
  1 file changed, 28 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
index 82608df43396..e0f3bff335c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
@@ -280,7 +280,7 @@ amdgpu_i2c_lookup(struct amdgpu_device *adev,
return NULL;
  }
  
-static void amdgpu_i2c_get_byte(struct amdgpu_i2c_chan *i2c_bus,

+static int amdgpu_i2c_get_byte(struct amdgpu_i2c_chan *i2c_bus,
 u8 slave_addr,
 u8 addr,
 u8 *val)
@@ -305,16 +305,18 @@ static void amdgpu_i2c_get_byte(struct amdgpu_i2c_chan 
*i2c_bus,
out_buf[0] = addr;
out_buf[1] = 0;
  
-	if (i2c_transfer(_bus->adapter, msgs, 2) == 2) {

-   *val = in_buf[0];
-   DRM_DEBUG("val = 0x%02x\n", *val);
-   } else {
-   DRM_DEBUG("i2c 0x%02x 0x%02x read failed\n",
- addr, *val);
+   if (i2c_transfer(_bus->adapter, msgs, 2) != 2) {
+   DRM_DEBUG("i2c 0x%02x read failed\n", addr);
+   return -EIO;
}
+
+   *val = in_buf[0];
+   DRM_DEBUG("val = 0x%02x\n", *val);
+
+   return 0;
  }
  
-static void amdgpu_i2c_put_byte(struct amdgpu_i2c_chan *i2c_bus,

+static int amdgpu_i2c_put_byte(struct amdgpu_i2c_chan *i2c_bus,
 u8 slave_addr,
 u8 addr,
 u8 val)
@@ -330,9 +332,12 @@ static void amdgpu_i2c_put_byte(struct amdgpu_i2c_chan 
*i2c_bus,
out_buf[0] = addr;
out_buf[1] = val;
  
-	if (i2c_transfer(_bus->adapter, , 1) != 1)

-   DRM_DEBUG("i2c 0x%02x 0x%02x write failed\n",
- addr, val);
+   if (i2c_transfer(_bus->adapter, , 1) != 1) {
+   DRM_DEBUG("i2c 0x%02x 0x%02x write failed\n", addr, val);
+   return -EIO;
+   }
+
+   return 0;
  }
  
  /* ddc router switching */

@@ -347,16 +352,18 @@ amdgpu_i2c_router_select_ddc_port(const struct 
amdgpu_connector *amdgpu_connecto
if (!amdgpu_connector->router_bus)
return;
  
-	amdgpu_i2c_get_byte(amdgpu_connector->router_bus,

+   if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
-   0x3, );
+   0x3, ))
+   return;
val &= ~amdgpu_connector->router.ddc_mux_control_pin;
amdgpu_i2c_put_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
0x3, val);
-   amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
+   if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
-   0x1, );
+   0x1, ))
+   return;
val &= ~amdgpu_connector->router.ddc_mux_control_pin;
val |= amdgpu_connector->router.ddc_mux_state;
amdgpu_i2c_put_byte(amdgpu_connector->router_bus,
@@ -376,16 +383,18 @@ amdgpu_i2c_router_select_cd_port(const struct 
amdgpu_connector *amdgpu_connector
if (!amdgpu_connector->router_bus)
return;
  
-	amdgpu_i2c_get_byte(amdgpu_connector->router_bus,

+   if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
-   0x3, );
+   0x3, ))
+   return;
val &= ~amdgpu_connector->router.cd_mux_control_pin;
amdgpu_i2c_put_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
0x3, val);
-   amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
+   if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
-   0x1, );
+   0x1, ))
+   return;
val &= ~amdgpu_connector->router.cd_mux_control_pin;
val |= amdgpu_connector->router.cd_mux_state;
amdgpu_i2c_put_byte(amdgpu_connector->router_bus,




[PATCH v3] drm/amdgpu: add return result for amdgpu_i2c_{get/put}_byte

2024-04-24 Thread Bob Zhou
After amdgpu_i2c_get_byte fail, amdgpu_i2c_put_byte shouldn't be
conducted to put wrong value.
So return and check the i2c transfer result.

Signed-off-by: Bob Zhou 
Suggested-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c | 47 +++--
 1 file changed, 28 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
index 82608df43396..e0f3bff335c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
@@ -280,7 +280,7 @@ amdgpu_i2c_lookup(struct amdgpu_device *adev,
return NULL;
 }
 
-static void amdgpu_i2c_get_byte(struct amdgpu_i2c_chan *i2c_bus,
+static int amdgpu_i2c_get_byte(struct amdgpu_i2c_chan *i2c_bus,
 u8 slave_addr,
 u8 addr,
 u8 *val)
@@ -305,16 +305,18 @@ static void amdgpu_i2c_get_byte(struct amdgpu_i2c_chan 
*i2c_bus,
out_buf[0] = addr;
out_buf[1] = 0;
 
-   if (i2c_transfer(_bus->adapter, msgs, 2) == 2) {
-   *val = in_buf[0];
-   DRM_DEBUG("val = 0x%02x\n", *val);
-   } else {
-   DRM_DEBUG("i2c 0x%02x 0x%02x read failed\n",
- addr, *val);
+   if (i2c_transfer(_bus->adapter, msgs, 2) != 2) {
+   DRM_DEBUG("i2c 0x%02x read failed\n", addr);
+   return -EIO;
}
+
+   *val = in_buf[0];
+   DRM_DEBUG("val = 0x%02x\n", *val);
+
+   return 0;
 }
 
-static void amdgpu_i2c_put_byte(struct amdgpu_i2c_chan *i2c_bus,
+static int amdgpu_i2c_put_byte(struct amdgpu_i2c_chan *i2c_bus,
 u8 slave_addr,
 u8 addr,
 u8 val)
@@ -330,9 +332,12 @@ static void amdgpu_i2c_put_byte(struct amdgpu_i2c_chan 
*i2c_bus,
out_buf[0] = addr;
out_buf[1] = val;
 
-   if (i2c_transfer(_bus->adapter, , 1) != 1)
-   DRM_DEBUG("i2c 0x%02x 0x%02x write failed\n",
- addr, val);
+   if (i2c_transfer(_bus->adapter, , 1) != 1) {
+   DRM_DEBUG("i2c 0x%02x 0x%02x write failed\n", addr, val);
+   return -EIO;
+   }
+
+   return 0;
 }
 
 /* ddc router switching */
@@ -347,16 +352,18 @@ amdgpu_i2c_router_select_ddc_port(const struct 
amdgpu_connector *amdgpu_connecto
if (!amdgpu_connector->router_bus)
return;
 
-   amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
+   if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
-   0x3, );
+   0x3, ))
+   return;
val &= ~amdgpu_connector->router.ddc_mux_control_pin;
amdgpu_i2c_put_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
0x3, val);
-   amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
+   if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
-   0x1, );
+   0x1, ))
+   return;
val &= ~amdgpu_connector->router.ddc_mux_control_pin;
val |= amdgpu_connector->router.ddc_mux_state;
amdgpu_i2c_put_byte(amdgpu_connector->router_bus,
@@ -376,16 +383,18 @@ amdgpu_i2c_router_select_cd_port(const struct 
amdgpu_connector *amdgpu_connector
if (!amdgpu_connector->router_bus)
return;
 
-   amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
+   if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
-   0x3, );
+   0x3, ))
+   return;
val &= ~amdgpu_connector->router.cd_mux_control_pin;
amdgpu_i2c_put_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
0x3, val);
-   amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
+   if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
-   0x1, );
+   0x1, ))
+   return;
val &= ~amdgpu_connector->router.cd_mux_control_pin;
val |= amdgpu_connector->router.cd_mux_state;
amdgpu_i2c_put_byte(amdgpu_connector->router_bus,
-- 
2.34.1



Re: [PATCH 4/4 V2] drm/amdgpu: Using uninitialized value *size when calling amdgpu_vce_cs_reloc

2024-04-24 Thread Christian König

Am 24.04.24 um 11:04 schrieb Jesse Zhang:

Initialize the size before calling amdgpu_vce_cs_reloc, such as case 0x0301.
V2: To really improve the handling we would actually
need to have a separate value of 0x.(Christian)

Signed-off-by: Jesse Zhang 
Suggested-by: Christian König 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 59acf424a078..968ca2c84ef7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -743,7 +743,8 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p,
uint32_t created = 0;
uint32_t allocated = 0;
uint32_t tmp, handle = 0;
-   uint32_t *size = 
+   uint32_t dummy = 0x;
+   uint32_t *size = 
unsigned int idx;
int i, r = 0;
  




[PATCH 4/4 V2] drm/amdgpu: Using uninitialized value *size when calling amdgpu_vce_cs_reloc

2024-04-24 Thread Jesse Zhang
Initialize the size before calling amdgpu_vce_cs_reloc, such as case 0x0301.
V2: To really improve the handling we would actually
   need to have a separate value of 0x.(Christian)

Signed-off-by: Jesse Zhang 
Suggested-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 59acf424a078..968ca2c84ef7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -743,7 +743,8 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p,
uint32_t created = 0;
uint32_t allocated = 0;
uint32_t tmp, handle = 0;
-   uint32_t *size = 
+   uint32_t dummy = 0x;
+   uint32_t *size = 
unsigned int idx;
int i, r = 0;
 
-- 
2.25.1



[PATCH 46/46] drm/amd/display: 3.2.283

2024-04-24 Thread Wayne Lin
From: Aric Cyr 

This version brings along following fixes:
- Disable seamless boot on 128b/132b encoding
- Have cursor and surface updates together
- Change ASSR disable sequence to avoid corruption
- Fix few IPS problems
- Enable Replay for DCN315
- Fix few ODM problems
- Fix FEC_READY write timing
- Fix few FPO problems
- Adjust DML21 gpuvm_enable assignment
- Fix divide by 0 error in VM environment
- Fix few DCN35 problems
- Fix flickering on DCN321
- Fix mst resume problem
- Fix multi-disp FAMS problem
- Refactor Replay
- Update some of the dcn303 parameters
- Enable legacy fast update for dcn301
- Add VCO parameter for DCN31 FPU
- Fix problems reported by Coverity

Acked-by: Wayne Lin 
Signed-off-by: Aric Cyr 
---
 drivers/gpu/drm/amd/display/dc/dc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
b/drivers/gpu/drm/amd/display/dc/dc.h
index dd8940c2a4bf..b6e92dda4b2d 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -55,7 +55,7 @@ struct aux_payload;
 struct set_config_cmd_payload;
 struct dmub_notification;
 
-#define DC_VER "3.2.282"
+#define DC_VER "3.2.283"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
-- 
2.37.3



[PATCH 45/46] drm/amd/display: Disable seamless boot on 128b/132b encoding

2024-04-24 Thread Wayne Lin
From: Sung Joon Kim 

[why]
preOS will not support display mode programming and link training
for UHBR rates.

[how]
If we detect a sink that's UHBR capable, disable seamless boot

Reviewed-by: Anthony Koo 
Acked-by: Wayne Lin 
Signed-off-by: Sung Joon Kim 
---
 drivers/gpu/drm/amd/display/dc/core/dc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
b/drivers/gpu/drm/amd/display/dc/core/dc.c
index a8eb286ee4ff..9f56b2743f80 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -1838,6 +1838,9 @@ bool dc_validate_boot_timing(const struct dc *dc,
return false;
}
 
+   if (link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED)
+   return false;
+
if (dc->link_srv->edp_is_ilr_optimization_required(link, crtc_timing)) {
DC_LOG_EVENT_LINK_TRAINING("Seamless boot disabled to optimize 
eDP link rate\n");
return false;
-- 
2.37.3



[PATCH 44/46] drm/amd/display: Fix uninitialized variables in DC

2024-04-24 Thread Wayne Lin
From: Alex Hung 

This fixes 29 UNINIT issues reported by Coverity.

Reviewed-by: Hersen Wu 
Acked-by: Wayne Lin 
Signed-off-by: Alex Hung 
---
 drivers/gpu/drm/amd/display/dc/bios/command_table.c   | 2 +-
 drivers/gpu/drm/amd/display/dc/bios/command_table2.c  | 2 +-
 drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c  | 4 ++--
 drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c  | 2 +-
 drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 2 +-
 .../drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c   | 4 ++--
 drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c | 4 ++--
 drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c   | 2 +-
 drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c | 2 +-
 drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c   | 4 ++--
 drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c   | 2 +-
 drivers/gpu/drm/amd/display/dc/link/link_detection.c  | 4 ++--
 .../gpu/drm/amd/display/dc/link/protocols/link_dp_training.c  | 2 +-
 .../gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c  | 2 +-
 14 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table.c 
b/drivers/gpu/drm/amd/display/dc/bios/command_table.c
index 86f9198e7501..2bcae0643e61 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table.c
@@ -399,7 +399,7 @@ static enum bp_result transmitter_control_v1_6(
 static void init_transmitter_control(struct bios_parser *bp)
 {
uint8_t frev;
-   uint8_t crev;
+   uint8_t crev = 0;
 
if (BIOS_CMD_TABLE_REVISION(UNIPHYTransmitterControl,
frev, crev) == false)
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c 
b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
index cbae1be7b009..cc000833d300 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
@@ -225,7 +225,7 @@ static enum bp_result transmitter_control_fallback(
 static void init_transmitter_control(struct bios_parser *bp)
 {
uint8_t frev;
-   uint8_t crev;
+   uint8_t crev = 0;
 
BIOS_CMD_TABLE_REVISION(dig1transmittercontrol, frev, crev);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
index 25d46c69464f..74da9ebda016 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
@@ -2372,7 +2372,7 @@ bool dcn21_validate_bandwidth_fp(struct dc *dc, struct 
dc_state *context,
 
 static struct _vcs_dpi_voltage_scaling_st construct_low_pstate_lvl(struct 
clk_limit_table *clk_table, unsigned int high_voltage_lvl)
 {
-   struct _vcs_dpi_voltage_scaling_st low_pstate_lvl;
+   struct _vcs_dpi_voltage_scaling_st low_pstate_lvl = {0};
int i;
 
low_pstate_lvl.state = 1;
@@ -2477,7 +2477,7 @@ void 
dcn201_populate_dml_writeback_from_context_fpu(struct dc *dc,
int pipe_cnt, i, j;
double max_calc_writeback_dispclk;
double writeback_dispclk;
-   struct writeback_st dout_wb;
+   struct writeback_st dout_wb = {0};
 
dc_assert_fp_enabled();
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c
index ccb4ad78f667..81f7b90849ce 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c
@@ -260,7 +260,7 @@ void dcn30_fpu_populate_dml_writeback_from_context(
int pipe_cnt, i, j;
double max_calc_writeback_dispclk;
double writeback_dispclk;
-   struct writeback_st dout_wb;
+   struct writeback_st dout_wb = {0};
 
dc_assert_fp_enabled();
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 8912475f01e2..d74f51efb703 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -723,7 +723,7 @@ static bool dcn32_enough_pipes_for_subvp(struct dc *dc, 
struct dc_state *context
  */
 static bool subvp_subvp_schedulable(struct dc *dc, struct dc_state *context)
 {
-   struct pipe_ctx *subvp_pipes[2];
+   struct pipe_ctx *subvp_pipes[2] = {0};
struct dc_stream_state *phantom = NULL;
uint32_t microschedule_lines = 0;
uint32_t index = 0;
diff --git 
a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
index 76399c66bc3b..ba1310c8fd77 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
@@ -1973,8 +1973,8 @@ void dml32_CalculateVMRowAndSwath(

[PATCH 43/46] drm/amd/display: Change ASSR disable sequence

2024-04-24 Thread Wayne Lin
From: Swapnil Patel 

[Why]
Currently disabling ASSR before stream is disabled causes visible
display corruption.

[How]
Move disable ASSR command to after stream has been disabled.

Cc: Mario Limonciello 
Cc: Alex Deucher 
Cc: sta...@vger.kernel.org
Reviewed-by: Wenjing Liu 
Acked-by: Wayne Lin 
Signed-off-by: Swapnil Patel 
---
 drivers/gpu/drm/amd/display/dc/link/link_dpms.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c 
b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
index 16549068d836..8402ca0695cc 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
@@ -2317,8 +2317,6 @@ void link_set_dpms_off(struct pipe_ctx *pipe_ctx)
 
dc->hwss.disable_audio_stream(pipe_ctx);
 
-   edp_set_panel_assr(link, pipe_ctx, _mode_dp, false);
-
update_psp_stream_config(pipe_ctx, true);
dc->hwss.blank_stream(pipe_ctx);
 
@@ -2372,6 +2370,7 @@ void link_set_dpms_off(struct pipe_ctx *pipe_ctx)
dc->hwss.disable_stream(pipe_ctx);
disable_link(pipe_ctx->stream->link, _ctx->link_res, 
pipe_ctx->stream->signal);
}
+   edp_set_panel_assr(link, pipe_ctx, _mode_dp, false);
 
if (pipe_ctx->stream->timing.flags.DSC) {
if (dc_is_dp_signal(pipe_ctx->stream->signal))
-- 
2.37.3



[PATCH 42/46] drm/amd/display: Add periodic detection for IPS

2024-04-24 Thread Wayne Lin
From: Roman Li 

[Why]
HPD interrupt cannot be handled in IPS2 state.
So if there's a display topology change while system in IPS2
it can be missed.

[How]
Implement worker to check each 5 sec in IPS for HPD.

Reviewed-by: Hamza Mahfooz 
Acked-by: Wayne Lin 
Signed-off-by: Roman Li 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 13 
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  9 +++
 .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c| 59 ++-
 .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c |  5 +-
 .../drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c | 28 +
 .../drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h |  1 +
 6 files changed, 113 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 29b5c953a656..eb946f1ad4d5 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1838,6 +1838,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
DRM_ERROR("amdgpu: failed to initialize 
vblank_workqueue.\n");
}
 
+   if (adev->dm.dc->caps.ips_support && adev->dm.dc->config.disable_ips == 
DMUB_IPS_ENABLE)
+   adev->dm.idle_workqueue = idle_create_workqueue(adev);
+
if (adev->dm.dc->caps.max_links > 0 && adev->family >= 
AMDGPU_FAMILY_RV) {
adev->dm.hdcp_workqueue = hdcp_create_workqueue(adev, 
_params.cp_psp, adev->dm.dc);
 
@@ -1935,6 +1938,16 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
adev->dm.vblank_control_workqueue = NULL;
}
 
+   if (adev->dm.idle_workqueue) {
+   if (adev->dm.idle_workqueue->running) {
+   adev->dm.idle_workqueue->enable = false;
+   flush_work(>dm.idle_workqueue->work);
+   }
+
+   kfree(adev->dm.idle_workqueue);
+   adev->dm.idle_workqueue = NULL;
+   }
+
amdgpu_dm_destroy_drm_device(>dm);
 
 #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 09519b7abf67..79469cdc3b10 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -137,6 +137,13 @@ struct vblank_control_work {
bool enable;
 };
 
+struct idle_workqueue {
+   struct work_struct work;
+   struct amdgpu_display_manager *dm;
+   bool enable;
+   bool running;
+};
+
 /**
  * struct amdgpu_dm_backlight_caps - Information about backlight
  *
@@ -487,6 +494,7 @@ struct amdgpu_display_manager {
 * Deferred work for vblank control events.
 */
struct workqueue_struct *vblank_control_workqueue;
+   struct idle_workqueue *idle_workqueue;
 
struct drm_atomic_state *cached_state;
struct dc_state *cached_dc_state;
@@ -956,4 +964,5 @@ amdgpu_dm_find_first_crtc_matching_connector(struct 
drm_atomic_state *state,
 struct drm_crtc *crtc);
 
 int convert_dc_color_depth_into_bpc(enum dc_color_depth display_color_depth);
+struct idle_workqueue *idle_create_workqueue(struct amdgpu_device *adev);
 #endif /* __AMDGPU_DM_H__ */
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
index e23a0a276e33..83ea0afddda7 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
@@ -35,6 +35,9 @@
 #include "amdgpu_dm_trace.h"
 #include "amdgpu_dm_debugfs.h"
 
+#define HPD_DETECTION_PERIOD_uS 500
+#define HPD_DETECTION_TIME_uS 1000
+
 void amdgpu_dm_crtc_handle_vblank(struct amdgpu_crtc *acrtc)
 {
struct drm_crtc *crtc = >base;
@@ -146,11 +149,65 @@ static void amdgpu_dm_crtc_set_panel_sr_feature(
struct amdgpu_dm_connector *aconn =
(struct amdgpu_dm_connector *) 
vblank_work->stream->dm_stream_context;
 
-   if (!aconn->disallow_edp_enter_psr)
+   if (!aconn->disallow_edp_enter_psr) {
+   struct amdgpu_display_manager *dm = vblank_work->dm;
+
amdgpu_dm_psr_enable(vblank_work->stream);
+   if (dm->idle_workqueue &&
+   dm->dc->idle_optimizations_allowed &&
+   dm->idle_workqueue->enable &&
+   !dm->idle_workqueue->running)
+   schedule_work(>idle_workqueue->work);
+   }
}
 }
 
+static void amdgpu_dm_idle_worker(struct work_struct *work)
+{
+   struct idle_workqueue *idle_work;
+
+   idle_work = container_of(work, struct idle_workqueue, work);
+   idle_work->dm->idle_workqueue->running = true;
+   fsleep(HPD_DETECTION_PERIOD_uS);
+   mutex_lock(_work->dm->dc_lock);
+  

[PATCH 41/46] drm/amd/display: Notify idle link detection through shared state

2024-04-24 Thread Wayne Lin
From: Nicholas Kazlauskas 

[Why]
We can hang in IPS2 checking DMCUB_SCRATCH0 for link detection state.

[How]
Replace the HW access with a check on the shared state bit. This will
work the same way as the SCRATCH0 but won't require a wake in the case
where link detection isn't required.

Reviewed-by: Duncan Ma 
Acked-by: Wayne Lin 
Signed-off-by: Nicholas Kazlauskas 
---
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c  | 30 +++
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h  | 10 +++
 drivers/gpu/drm/amd/display/dmub/dmub_srv.h   |  1 +
 .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h   | 15 +-
 .../gpu/drm/amd/display/dmub/src/dmub_srv.c   |  2 ++
 5 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c 
b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index 33d3307f5c1c..364ef9ae32f1 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -1460,6 +1460,36 @@ void dc_dmub_srv_set_power_state(struct dc_dmub_srv 
*dc_dmub_srv, enum dc_acpi_c
dmub_srv_set_power_state(dmub, DMUB_POWER_STATE_D3);
 }
 
+bool dc_dmub_srv_should_detect(struct dc_dmub_srv *dc_dmub_srv)
+{
+   volatile const struct dmub_shared_state_ips_fw *ips_fw;
+   bool reallow_idle = false, should_detect = false;
+
+   if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+   return false;
+
+   if (dc_dmub_srv->dmub->shared_state &&
+   
dc_dmub_srv->dmub->meta_info.feature_bits.bits.shared_state_link_detection) {
+   ips_fw = 
_dmub_srv->dmub->shared_state[DMUB_SHARED_SHARE_FEATURE__IPS_FW].data.ips_fw;
+   return ips_fw->signals.bits.detection_required;
+   }
+
+   /* Detection may require reading scratch 0 - exit out of idle prior to 
the read. */
+   if (dc_dmub_srv->idle_allowed) {
+   
dc_dmub_srv_apply_idle_power_optimizations(dc_dmub_srv->ctx->dc, false);
+   reallow_idle = true;
+   }
+
+   should_detect = dmub_srv_should_detect(dc_dmub_srv->dmub);
+
+   /* Re-enter idle if we're not about to immediately redetect links. */
+   if (!should_detect && reallow_idle && dc_dmub_srv->idle_exit_counter == 
0 &&
+   !dc_dmub_srv->ctx->dc->debug.disable_dmub_reallow_idle)
+   
dc_dmub_srv_apply_idle_power_optimizations(dc_dmub_srv->ctx->dc, true);
+
+   return should_detect;
+}
+
 void dc_dmub_srv_apply_idle_power_optimizations(const struct dc *dc, bool 
allow_idle)
 {
struct dc_dmub_srv *dc_dmub_srv = dc->ctx->dmub_srv;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h 
b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
index 3297c5b33265..580940222777 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
@@ -111,6 +111,16 @@ void dc_dmub_srv_apply_idle_power_optimizations(const 
struct dc *dc, bool allow_
 
 void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum 
dc_acpi_cm_power_state powerState);
 
+/**
+ * @dc_dmub_srv_should_detect() - Checks if link detection is required.
+ *
+ * While in idle power states we may need driver to manually redetect in
+ * the case of a missing hotplug. Should be called from a polling timer.
+ *
+ * Return: true if redetection is required.
+ */
+bool dc_dmub_srv_should_detect(struct dc_dmub_srv *dc_dmub_srv);
+
 /**
  * dc_wake_and_execute_dmub_cmd() - Wrapper for DMUB command execution.
  *
diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h 
b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
index cec8aa1face5..cd51c91a822b 100644
--- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
@@ -529,6 +529,7 @@ struct dmub_srv {
uint32_t psp_version;
 
/* Feature capabilities reported by fw */
+   struct dmub_fw_meta_info meta_info;
struct dmub_feature_caps feature_caps;
struct dmub_visual_confirm_color visual_confirm_color;
 
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h 
b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index 7a0574e6c129..35096aa3d85b 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -496,6 +496,17 @@ struct dmub_visual_confirm_color {
 /* Offset from the end of the file to the dmub_fw_meta_info */
 #define DMUB_FW_META_OFFSET 0x24
 
+/**
+ * union dmub_fw_meta_feature_bits - Static feature bits for pre-initialization
+ */
+union dmub_fw_meta_feature_bits {
+   struct {
+   uint32_t shared_state_link_detection : 1; /**< 1 supports link 
detection via shared state */
+   uint32_t reserved : 31;
+   } bits; /**< status bits */
+   uint32_t all; /**< 32-bit access to status bits */
+};
+
 /**
  * struct dmub_fw_meta_info - metadata associated with fw binary
  *
@@ -521,6 +532,7 @@ struct dmub_fw_meta_info {
uint32_t shared_state_size; /**< size of 

[PATCH 40/46] drm/amd/display: Enable Replay for DCN315

2024-04-24 Thread Wayne Lin
From: Joan Lee 

[why & how]
Enable Replay for DCN315.

Reviewed-by: Robin Chen 
Acked-by: Wayne Lin 
Signed-off-by: Joan Lee 
---
 .../amd/display/dc/resource/dcn315/dcn315_resource.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c 
b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c
index 4ce0f4bf1d9b..ad40a657e173 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c
@@ -125,6 +125,7 @@
 #include "reg_helper.h"
 #include "dce/dmub_abm.h"
 #include "dce/dmub_psr.h"
+#include "dce/dmub_replay.h"
 #include "dce/dce_aux.h"
 #include "dce/dce_i2c.h"
 
@@ -1484,6 +1485,9 @@ static void dcn315_resource_destruct(struct 
dcn315_resource_pool *pool)
if (pool->base.psr != NULL)
dmub_psr_destroy(>base.psr);
 
+   if (pool->base.replay != NULL)
+   dmub_replay_destroy(>base.replay);
+
if (pool->base.dccg != NULL)
dcn_dccg_destroy(>base.dccg);
 }
@@ -2048,6 +2052,14 @@ static bool dcn315_resource_construct(
goto create_fail;
}
 
+   /* Replay */
+   pool->base.replay = dmub_replay_create(ctx);
+   if (pool->base.replay == NULL) {
+   dm_error("DC: failed to create replay obj!\n");
+   BREAK_TO_DEBUGGER();
+   goto create_fail;
+   }
+
/* ABM */
for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) {
pool->base.multiple_abms[i] = dmub_abm_create(ctx,
-- 
2.37.3



[PATCH 39/46] drm/amd/display: use even ODM slice width for two pixels per container

2024-04-24 Thread Wayne Lin
From: Wenjing Liu 

[why]
When optc uses two pixel per container, each ODM slice width must be an
even number.

[how]
If ODM slice width is odd number increase it by 1.

Reviewed-by: Dillon Varone 
Acked-by: Wayne Lin 
Signed-off-by: Wenjing Liu 
---
 .../gpu/drm/amd/display/dc/core/dc_resource.c |  8 
 .../dc/dce110/dce110_timing_generator.c   | 18 
 .../dc/dce110/dce110_timing_generator.h   |  2 +
 .../dc/dce110/dce110_timing_generator_v.c |  3 +-
 .../dc/dce120/dce120_timing_generator.c   |  1 +
 .../display/dc/dce80/dce80_timing_generator.c |  1 +
 .../display/dc/dml2/dml2_translation_helper.c |  2 +-
 .../amd/display/dc/hwss/dcn20/dcn20_hwseq.c   | 19 ++--
 .../amd/display/dc/hwss/dcn201/dcn201_hwseq.c |  2 +-
 .../amd/display/dc/hwss/dcn314/dcn314_hwseq.c |  4 +-
 .../amd/display/dc/hwss/dcn32/dcn32_hwseq.c   |  6 +--
 drivers/gpu/drm/amd/display/dc/inc/hw/optc.h  |  4 +-
 .../amd/display/dc/inc/hw/timing_generator.h  |  1 +
 .../amd/display/dc/optc/dcn10/dcn10_optc.c| 46 +--
 .../amd/display/dc/optc/dcn20/dcn20_optc.c| 10 +---
 .../amd/display/dc/optc/dcn20/dcn20_optc.h|  1 -
 .../amd/display/dc/optc/dcn201/dcn201_optc.c  |  7 +--
 .../amd/display/dc/optc/dcn201/dcn201_optc.h  |  3 --
 .../amd/display/dc/optc/dcn30/dcn30_optc.c|  3 +-
 .../amd/display/dc/optc/dcn301/dcn301_optc.c  |  1 +
 .../amd/display/dc/optc/dcn31/dcn31_optc.c|  1 +
 .../amd/display/dc/optc/dcn314/dcn314_optc.c  |  3 +-
 .../amd/display/dc/optc/dcn32/dcn32_optc.c|  3 +-
 .../amd/display/dc/optc/dcn35/dcn35_optc.c|  1 +
 .../amd/display/dc/optc/dcn401/dcn401_optc.c  | 15 --
 .../dc/resource/dcn20/dcn20_resource.c|  2 +-
 26 files changed, 103 insertions(+), 64 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index ebbeb37f36a6..8dcd7eac4b2b 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -827,6 +827,11 @@ static struct rect 
calculate_odm_slice_in_timing_active(struct pipe_ctx *pipe_ct
stream->timing.h_border_right;
int odm_slice_width = h_active / odm_slice_count;
struct rect odm_rec;
+   bool is_two_pixels_per_container =
+   
pipe_ctx->stream_res.tg->funcs->is_two_pixels_per_container(>timing);
+
+   if ((odm_slice_width % 2) && is_two_pixels_per_container)
+   odm_slice_width++;
 
odm_rec.x = odm_slice_width * odm_slice_idx;
odm_rec.width = is_last_odm_slice ?
@@ -1464,6 +1469,7 @@ void resource_build_test_pattern_params(struct 
resource_context *res_ctx,
int v_active = otg_master->stream->timing.v_addressable +
otg_master->stream->timing.v_border_bottom +
otg_master->stream->timing.v_border_top;
+   bool is_two_pixels_per_container = 
otg_master->stream_res.tg->funcs->is_two_pixels_per_container(_master->stream->timing);
int i;
 
controller_test_pattern = convert_dp_to_controller_test_pattern(
@@ -1477,6 +1483,8 @@ void resource_build_test_pattern_params(struct 
resource_context *res_ctx,
odm_cnt = resource_get_opp_heads_for_otg_master(otg_master, res_ctx, 
opp_heads);
 
odm_slice_width = h_active / odm_cnt;
+   if ((odm_slice_width % 2) && is_two_pixels_per_container)
+   odm_slice_width++;
last_odm_slice_width = h_active - odm_slice_width * (odm_cnt - 1);
 
for (i = 0; i < odm_cnt; i++) {
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c 
b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
index 6424e7f279dc..49bcfe6ec999 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
@@ -2015,6 +2015,23 @@ bool dce110_tg_validate_timing(struct timing_generator 
*tg,
return dce110_timing_generator_validate_timing(tg, timing, 
SIGNAL_TYPE_NONE);
 }
 
+/* "Container" vs. "pixel" is a concept within HW blocks, mostly those closer 
to the back-end. It works like this:
+ *
+ * - In most of the formats (RGB or YCbCr 4:4:4, 4:2:2 uncompressed and DSC 
4:2:2 Simple) pixel rate is the same as
+ *   container rate.
+ *
+ * - In 4:2:0 (DSC or uncompressed) there are two pixels per container, hence 
the target container rate has to be
+ *   halved to maintain the correct pixel rate.
+ *
+ * - Unlike 4:2:2 uncompressed, DSC 4:2:2 Native also has two pixels per 
container (this happens when DSC is applied
+ *   to it) and has to be treated the same as 4:2:0, i.e. target containter 
rate has to be halved in this case as well.
+ *
+ */
+bool dce110_is_two_pixels_per_container(const struct dc_crtc_timing *timing)
+{
+   return timing->pixel_encoding == PIXEL_ENCODING_YCBCR420;
+}
+
 void dce110_tg_wait_for_state(struct timing_generator *tg,
enum crtc_state 

[PATCH 38/46] drm/amd/display: Fix FEC_READY write on DP LT

2024-04-24 Thread Wayne Lin
From: Ilya Bakoulin 

[Why/How]
We can miss writing FEC_READY in some cases before LT start, which
violates DP spec. Remove the condition guarding the DPCD write so that
the write happens unconditionally.

Cc: Mario Limonciello 
Cc: Alex Deucher 
Cc: sta...@vger.kernel.org
Reviewed-by: Wenjing Liu 
Acked-by: Wayne Lin 
Signed-off-by: Ilya Bakoulin 
---
 .../amd/display/dc/link/protocols/link_dp_phy.c| 14 ++
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.c 
b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.c
index 5cbf5f93e584..bafa52a0165a 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.c
@@ -151,16 +151,14 @@ enum dc_status dp_set_fec_ready(struct dc_link *link, 
const struct link_resource
return DC_NOT_SUPPORTED;
 
if (ready && dp_should_enable_fec(link)) {
-   if (link->fec_state == dc_link_fec_not_ready) {
-   fec_config = 1;
+   fec_config = 1;
 
-   status = core_link_write_dpcd(link, 
DP_FEC_CONFIGURATION,
-   _config, sizeof(fec_config));
+   status = core_link_write_dpcd(link, DP_FEC_CONFIGURATION,
+   _config, sizeof(fec_config));
 
-   if (status == DC_OK) {
-   link_enc->funcs->fec_set_ready(link_enc, true);
-   link->fec_state = dc_link_fec_ready;
-   }
+   if (status == DC_OK) {
+   link_enc->funcs->fec_set_ready(link_enc, true);
+   link->fec_state = dc_link_fec_ready;
}
} else {
if (link->fec_state == dc_link_fec_ready) {
-- 
2.37.3



[PATCH 37/46] drm/amd/display: Fix uninitialized variables in DC

2024-04-24 Thread Wayne Lin
From: Alex Hung 

This fixes 49 UNINIT issues reported by Coverity.

Reviewed-by: Hersen Wu 
Acked-by: Wayne Lin 
Signed-off-by: Alex Hung 
---
 drivers/gpu/drm/amd/display/dc/core/dc.c |  4 ++--
 .../gpu/drm/amd/display/dc/core/dc_resource.c|  2 +-
 drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c |  2 +-
 .../amd/display/dc/dml2/dml2_dc_resource_mgmt.c  |  2 +-
 .../drm/amd/display/dc/dpp/dcn10/dcn10_dpp_cm.c  |  2 +-
 .../drm/amd/display/dc/dpp/dcn20/dcn20_dpp_cm.c  |  2 +-
 .../drm/amd/display/dc/dpp/dcn30/dcn30_dpp_cm.c  |  2 +-
 .../gpu/drm/amd/display/dc/gpio/gpio_service.c   |  6 +++---
 .../amd/display/dc/hwss/dce110/dce110_hwseq.c|  4 ++--
 .../drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c  |  6 +++---
 .../amd/display/dc/hwss/dcn201/dcn201_hwseq.c|  2 +-
 .../drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c  |  2 +-
 .../drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c  |  2 +-
 drivers/gpu/drm/amd/display/dc/link/link_dpms.c  |  8 
 .../dc/link/protocols/link_dp_capability.c   | 16 
 .../dc/link/protocols/link_dp_irq_handler.c  | 10 +-
 .../dc/link/protocols/link_edp_panel_control.c   |  4 ++--
 .../drm/amd/display/dc/link/protocols/link_hpd.c |  2 +-
 18 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 71f211bb4ed8..a8eb286ee4ff 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -1310,7 +1310,7 @@ static void disable_vbios_mode_if_required(
 
if (link != NULL && 
link->link_enc->funcs->is_dig_enabled(link->link_enc)) {
unsigned int enc_inst, tg_inst = 0;
-   unsigned int pix_clk_100hz;
+   unsigned int pix_clk_100hz = 0;
 
enc_inst = 
link->link_enc->funcs->get_dig_frontend(link->link_enc);
if (enc_inst != ENGINE_ID_UNKNOWN) {
@@ -1796,7 +1796,7 @@ bool dc_validate_boot_timing(const struct dc *dc,
return false;
 
if (dc_is_dp_signal(link->connector_signal)) {
-   unsigned int pix_clk_100hz;
+   unsigned int pix_clk_100hz = 0;
uint32_t numOdmPipes = 1;
uint32_t id_src[4] = {0};
 
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 263e21756481..ebbeb37f36a6 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -3084,7 +3084,7 @@ bool resource_update_pipes_for_plane_with_slice_count(
int i;
int dpp_pipe_count;
int cur_slice_count;
-   struct pipe_ctx *dpp_pipes[MAX_PIPES];
+   struct pipe_ctx *dpp_pipes[MAX_PIPES] = {0};
bool result = true;
 
dpp_pipe_count = resource_get_dpp_pipes_for_plane(plane,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c 
b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c
index ebf6e9458be8..3aeb85ec40b0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c
@@ -1183,7 +1183,7 @@ void mpc3_get_gamut_remap(struct mpc *mpc,
  struct mpc_grph_gamut_adjustment *adjust)
 {
struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
-   uint16_t arr_reg_val[12];
+   uint16_t arr_reg_val[12] = {0};
int select;
 
read_gamut_remap(mpc30, mpcc_id, arr_reg_val, );
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c 
b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c
index a2ced0bc772c..507cff525f97 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c
@@ -901,7 +901,7 @@ static unsigned int get_source_mpc_factor(const struct 
dml2_context *ctx,
struct dc_state *state,
const struct dc_plane_state *plane)
 {
-   struct pipe_ctx *dpp_pipes[MAX_PIPES];
+   struct pipe_ctx *dpp_pipes[MAX_PIPES] = {0};
int dpp_pipe_count = 
ctx->config.callbacks.get_dpp_pipes_for_plane(plane,
>res_ctx, dpp_pipes);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_cm.c 
b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_cm.c
index 2d5d64276cb0..f2a2d53e9689 100644
--- a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_cm.c
+++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_cm.c
@@ -234,7 +234,7 @@ void dpp1_cm_get_gamut_remap(struct dpp *dpp_base,
 struct dpp_grph_csc_adjustment *adjust)
 {
struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base);
-   uint16_t arr_reg_val[12];
+   uint16_t arr_reg_val[12] = {0};
enum gamut_remap_select select;
 
read_gamut_remap(dpp, arr_reg_val, );
diff --git 

[PATCH 36/46] drm/amd/display: Fix uninitialized variables in DM

2024-04-24 Thread Wayne Lin
From: Alex Hung 

This fixes 11 UNINIT issues reported by Coverity.

Reviewed-by: Hersen Wu 
Acked-by: Wayne Lin 
Signed-off-by: Alex Hung 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 8 
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 3ece11883941..29b5c953a656 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -277,7 +277,7 @@ static u32 dm_vblank_get_counter(struct amdgpu_device 
*adev, int crtc)
 static int dm_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
  u32 *vbl, u32 *position)
 {
-   u32 v_blank_start, v_blank_end, h_position, v_position;
+   u32 v_blank_start = 0, v_blank_end = 0, h_position = 0, v_position = 0;
struct amdgpu_crtc *acrtc = NULL;
struct dc *dc = adev->dm.dc;
 
@@ -851,7 +851,7 @@ static void dm_handle_hpd_work(struct work_struct *work)
  */
 static void dm_dmub_outbox1_low_irq(void *interrupt_params)
 {
-   struct dmub_notification notify;
+   struct dmub_notification notify = {0};
struct common_irq_params *irq_params = interrupt_params;
struct amdgpu_device *adev = irq_params->adev;
struct amdgpu_display_manager *dm = >dm;
@@ -7228,7 +7228,7 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct 
drm_atomic_state *state,
struct amdgpu_dm_connector *aconnector;
struct dm_connector_state *dm_conn_state;
int i, j, ret;
-   int vcpi, pbn_div, pbn, slot_num = 0;
+   int vcpi, pbn_div, pbn = 0, slot_num = 0;
 
for_each_new_connector_in_state(state, connector, new_con_state, i) {
 
@@ -10713,7 +10713,7 @@ static int amdgpu_dm_atomic_check(struct drm_device 
*dev,
struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state;
struct drm_dp_mst_topology_mgr *mgr;
struct drm_dp_mst_topology_state *mst_state;
-   struct dsc_mst_fairness_vars vars[MAX_PIPES];
+   struct dsc_mst_fairness_vars vars[MAX_PIPES] = {0};
 
trace_amdgpu_dm_atomic_check_begin(state);
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
index c7715a17f388..4d7a5d470b1e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
@@ -1249,7 +1249,7 @@ static ssize_t dp_sdp_message_debugfs_write(struct file 
*f, const char __user *b
 size_t size, loff_t *pos)
 {
int r;
-   uint8_t data[36];
+   uint8_t data[36] = {0};
struct amdgpu_dm_connector *connector = file_inode(f)->i_private;
struct dm_crtc_state *acrtc_state;
uint32_t write_size = 36;
@@ -2960,7 +2960,7 @@ static int psr_read_residency(void *data, u64 *val)
 {
struct amdgpu_dm_connector *connector = data;
struct dc_link *link = connector->dc_link;
-   u32 residency;
+   u32 residency = 0;
 
link->dc->link_srv->edp_get_psr_residency(link, );
 
-- 
2.37.3



[PATCH 35/46] drm/amd/display: For FPO + Vactive check that all pipes support VA

2024-04-24 Thread Wayne Lin
From: Alvin Lee 

[Description]
For FPO + Vactive scenarios we must check that all non-FPO pipes
have VACTIVE margin to allow it. The previous check only confirmed
that there is at least one pipe that has vactive margin, but this
is incorrect as the vactive display could be using two pipes (MPO)
where the desktop plane has vactive margin, and the video plane
does not.

Reviewed-by: Samson Tam 
Reviewed-by: Chaitanya Dhere 
Acked-by: Wayne Lin 
Signed-off-by: Alvin Lee 
---
 .../display/dc/dcn32/dcn32_resource_helpers.c |  2 +-
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 20 ++-
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h  |  2 +-
 3 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
index a8c36eda1d09..eba7bfc7e4af 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
@@ -545,7 +545,7 @@ struct dc_stream_state 
*dcn32_can_support_mclk_switch_using_fw_based_vblank_stre
if (fpo_candidate_stream)
fpo_stream_status = dc_state_get_stream_status(context, 
fpo_candidate_stream);
DC_FP_START();
-   is_fpo_vactive = dcn32_find_vactive_pipe(dc, context, 
dc->debug.fpo_vactive_min_active_margin_us);
+   is_fpo_vactive = dcn32_find_vactive_pipe(dc, context, 
fpo_candidate_stream, dc->debug.fpo_vactive_min_active_margin_us);
DC_FP_END();
if (!is_fpo_vactive || dc->debug.disable_fpo_vactive)
return NULL;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 5be976fa44f9..8912475f01e2 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -3521,15 +3521,16 @@ void dcn32_assign_fpo_vactive_candidate(struct dc *dc, 
const struct dc_state *co
  *
  * @dc: current dc state
  * @context: new dc state
+ * @fpo_candidate_stream: candidate stream to be chosen for FPO
  * @vactive_margin_req_us: The vactive marign required for a vactive pipe to 
be considered "found"
  *
  * Return: True if VACTIVE display is found, false otherwise
  */
-bool dcn32_find_vactive_pipe(struct dc *dc, const struct dc_state *context, 
uint32_t vactive_margin_req_us)
+bool dcn32_find_vactive_pipe(struct dc *dc, const struct dc_state *context, 
struct dc_stream_state *fpo_candidate_stream, uint32_t vactive_margin_req_us)
 {
unsigned int i, pipe_idx;
const struct vba_vars_st *vba = >bw_ctx.dml.vba;
-   bool vactive_found = false;
+   bool vactive_found = true;
unsigned int blank_us = 0;
 
for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
@@ -3538,11 +3539,20 @@ bool dcn32_find_vactive_pipe(struct dc *dc, const 
struct dc_state *context, uint
if (!pipe->stream)
continue;
 
+   /* Don't need to check for vactive margin on the FPO candidate 
stream */
+   if (fpo_candidate_stream && pipe->stream == 
fpo_candidate_stream) {
+   pipe_idx++;
+   continue;
+   }
+
+   /* Every plane (apart from the ones driven by the FPO pipes) 
needs to have active margin
+* in order for us to have found a valid "vactive" config for 
FPO + Vactive
+*/
blank_us = ((pipe->stream->timing.v_total - 
pipe->stream->timing.v_addressable) * pipe->stream->timing.h_total /
(double)(pipe->stream->timing.pix_clk_100hz * 
100)) * 100;
-   if 
(vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]]
 >= vactive_margin_req_us &&
-   !(pipe->stream->vrr_active_variable || 
pipe->stream->vrr_active_fixed) && blank_us < 
dc->debug.fpo_vactive_max_blank_us) {
-   vactive_found = true;
+   if 
(vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]]
 < vactive_margin_req_us ||
+   pipe->stream->vrr_active_variable || 
pipe->stream->vrr_active_fixed || blank_us >= 
dc->debug.fpo_vactive_max_blank_us) {
+   vactive_found = false;
break;
}
pipe_idx++;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
index d25c3f730a59..276e90e4e0ce 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
@@ -71,7 +71,7 @@ void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st 
*pipes,
 
 void 

[PATCH 34/46] drm/amd/display: gpuvm handling in DML21

2024-04-24 Thread Wayne Lin
From: Nevenko Stupar 

[Why & How]
Currently in DML2.1 gpuvm_enable was hardcoded.

Use passed info from DC for DML21 to be in sync with
what is used in DC.

Reviewed-by: Chaitanya Dhere 
Acked-by: Wayne Lin 
Signed-off-by: Nevenko Stupar 
---
 drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c  | 1 +
 .../drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c| 2 +-
 drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h  | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c
index eda2152dcd1f..d1e68dc57a2a 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c
@@ -47,6 +47,7 @@ int dc_setup_system_context(struct dc *dc, struct 
dc_phy_addr_space_config *pa_c
 */
memcpy(>vm_pa_config, pa_config, sizeof(struct 
dc_phy_addr_space_config));
dc->vm_pa_config.valid = true;
+   dc->dml2_options.gpuvm_enable = true;
dc_z10_save_init(dc);
}
 
diff --git 
a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c 
b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
index b3602f897872..63f9bda3b130 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
@@ -943,7 +943,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct 
dc *in_dc, struct dc_s
 
memset(_ctx->v21.dml_to_dc_pipe_mapping, 0, sizeof(struct 
dml2_dml_to_dc_pipe_mapping));
 
-   dml_dispcfg->gpuvm_enable = true;
+   dml_dispcfg->gpuvm_enable = dml_ctx->config.gpuvm_enable;
dml_dispcfg->gpuvm_max_page_table_levels = 4;
dml_dispcfg->hostvm_enable = false;
dml_dispcfg->minimize_det_reallocation = true;
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h 
b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h
index 97e013ce5516..4e4ed1678d91 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h
@@ -232,6 +232,7 @@ struct dml2_configuration_options {
bool map_dc_pipes_with_callbacks;
 
bool use_clock_dc_limits;
+   bool gpuvm_enable;
 };
 
 /*
-- 
2.37.3



[PATCH 33/46] drm/amd/display: Assign linear_pitch_alignment even for VM

2024-04-24 Thread Wayne Lin
From: Alvin Lee 

[Description]
Assign linear_pitch_alignment so we don't cause a divide by 0
error in VM environments

Reviewed-by: Sohaib Nadeem 
Acked-by: Wayne Lin 
Signed-off-by: Alvin Lee 
---
 drivers/gpu/drm/amd/display/dc/core/dc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
b/drivers/gpu/drm/amd/display/dc/core/dc.c
index e955c97697ff..71f211bb4ed8 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -1425,6 +1425,7 @@ struct dc *dc_create(const struct dc_init_data 
*init_params)
return NULL;
 
if (init_params->dce_environment == DCE_ENV_VIRTUAL_HW) {
+   dc->caps.linear_pitch_alignment = 64;
if (!dc_construct_ctx(dc, init_params))
goto destruct_dc;
} else {
-- 
2.37.3



[PATCH 32/46] drm/amd/display: Refactor HUBBUB into component folder

2024-04-24 Thread Wayne Lin
From: Revalla Hari Krishna 

[why]
cleaning up the code refactor requires hubbub to be in its own component.

[how]
Move all files under newly created hubbub folder and fix the makefiles.

Reviewed-by: Martin Leung 
Acked-by: Wayne Lin 
Signed-off-by: Revalla Hari Krishna 
---
 drivers/gpu/drm/amd/display/Makefile  |   1 +
 drivers/gpu/drm/amd/display/dc/Makefile   |   2 +-
 drivers/gpu/drm/amd/display/dc/dcn10/Makefile |   2 +-
 .../dc/dcn10/dcn10_hw_sequencer_debug.c   |   2 +-
 drivers/gpu/drm/amd/display/dc/dcn20/Makefile |   2 +-
 .../gpu/drm/amd/display/dc/dcn201/Makefile|   3 +-
 drivers/gpu/drm/amd/display/dc/dcn21/Makefile |   2 +-
 drivers/gpu/drm/amd/display/dc/dcn30/Makefile |   3 +-
 .../gpu/drm/amd/display/dc/dcn301/Makefile|   2 +-
 drivers/gpu/drm/amd/display/dc/dcn31/Makefile |   2 +-
 drivers/gpu/drm/amd/display/dc/dcn32/Makefile |   2 +-
 drivers/gpu/drm/amd/display/dc/dcn35/Makefile |   2 +-
 .../gpu/drm/amd/display/dc/hubbub/Makefile| 100 ++
 .../dc/{ => hubbub}/dcn10/dcn10_hubbub.c  |   2 +-
 .../dc/{ => hubbub}/dcn10/dcn10_hubbub.h  |   0
 .../dc/{ => hubbub}/dcn20/dcn20_hubbub.c  |   0
 .../dc/{ => hubbub}/dcn20/dcn20_hubbub.h  |   2 +-
 .../dc/{ => hubbub}/dcn201/dcn201_hubbub.c|   0
 .../dc/{ => hubbub}/dcn201/dcn201_hubbub.h|   0
 .../dc/{ => hubbub}/dcn21/dcn21_hubbub.c  |   0
 .../dc/{ => hubbub}/dcn21/dcn21_hubbub.h  |   0
 .../dc/{ => hubbub}/dcn30/dcn30_hubbub.c  |   0
 .../dc/{ => hubbub}/dcn30/dcn30_hubbub.h  |   0
 .../dc/{ => hubbub}/dcn301/dcn301_hubbub.c|   0
 .../dc/{ => hubbub}/dcn301/dcn301_hubbub.h|   0
 .../dc/{ => hubbub}/dcn31/dcn31_hubbub.c  |   0
 .../dc/{ => hubbub}/dcn31/dcn31_hubbub.h  |   0
 .../dc/{ => hubbub}/dcn32/dcn32_hubbub.c  |   0
 .../dc/{ => hubbub}/dcn32/dcn32_hubbub.h  |   0
 .../dc/{ => hubbub}/dcn35/dcn35_hubbub.c  |   0
 .../dc/{ => hubbub}/dcn35/dcn35_hubbub.h  |   0
 31 files changed, 114 insertions(+), 15 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/display/dc/hubbub/Makefile
 rename drivers/gpu/drm/amd/display/dc/{ => hubbub}/dcn10/dcn10_hubbub.c (99%)
 rename drivers/gpu/drm/amd/display/dc/{ => hubbub}/dcn10/dcn10_hubbub.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hubbub}/dcn20/dcn20_hubbub.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hubbub}/dcn20/dcn20_hubbub.h (99%)
 rename drivers/gpu/drm/amd/display/dc/{ => hubbub}/dcn201/dcn201_hubbub.c 
(100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hubbub}/dcn201/dcn201_hubbub.h 
(100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hubbub}/dcn21/dcn21_hubbub.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hubbub}/dcn21/dcn21_hubbub.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hubbub}/dcn30/dcn30_hubbub.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hubbub}/dcn30/dcn30_hubbub.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hubbub}/dcn301/dcn301_hubbub.c 
(100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hubbub}/dcn301/dcn301_hubbub.h 
(100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hubbub}/dcn31/dcn31_hubbub.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hubbub}/dcn31/dcn31_hubbub.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hubbub}/dcn32/dcn32_hubbub.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hubbub}/dcn32/dcn32_hubbub.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hubbub}/dcn35/dcn35_hubbub.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hubbub}/dcn35/dcn35_hubbub.h (100%)

diff --git a/drivers/gpu/drm/amd/display/Makefile 
b/drivers/gpu/drm/amd/display/Makefile
index 9a5bcafbf730..839e71aa7d0c 100644
--- a/drivers/gpu/drm/amd/display/Makefile
+++ b/drivers/gpu/drm/amd/display/Makefile
@@ -34,6 +34,7 @@ subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/resource
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dsc
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/optc
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dpp
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/hubbub
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/inc
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/freesync
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/color
diff --git a/drivers/gpu/drm/amd/display/dc/Makefile 
b/drivers/gpu/drm/amd/display/dc/Makefile
index 8d963befc756..f1b0b1f66fb0 100644
--- a/drivers/gpu/drm/amd/display/dc/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/Makefile
@@ -22,7 +22,7 @@
 #
 # Makefile for Display Core (dc) component.
 
-DC_LIBS = basics bios dml clk_mgr dce gpio hwss irq link virtual dsc resource 
optc dpp
+DC_LIBS = basics bios dml clk_mgr dce gpio hwss irq link virtual dsc resource 
optc dpp hubbub
 
 ifdef CONFIG_DRM_AMD_DC_FP
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile 
b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
index 8dc7938c36d8..508306baa65a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
+++ 

[PATCH 30/46] drm/amd/display: Only program P-State force if pipe config changed

2024-04-24 Thread Wayne Lin
From: Alvin Lee 

[Description]
Today for MED update type we do not call update clocks. However, for FPO
the assumption is that update clocks should be called to disable P-State
switch before any HW programming since FPO in FW and driver are not
synchronized. This causes an issue where on a MED update, an FPO P-State
switch could be taking place, then driver forces P-State disallow in the below
code and prevents FPO from completing the sequence. In this case we add a check
to avoid re-programming (and thus re-setting) the P-State force register by
only reprogramming if the pipe was not previously Subvp or FPO. The assumption
is that the P-State force register should be programmed correctly the first
time SubVP / FPO was enabled, so there's no need to update / reset it if the
pipe config has never exited SubVP / FPO.

Reviewed-by: Samson Tam 
Acked-by: Wayne Lin 
Signed-off-by: Alvin Lee 
---
 .../amd/display/dc/hwss/dcn32/dcn32_hwseq.c| 18 +-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c 
b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
index 9f1a86ddadb5..272c4cdfbfe3 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
@@ -614,10 +614,26 @@ void dcn32_update_force_pstate(struct dc *dc, struct 
dc_state *context)
 */
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *pipe = >res_ctx.pipe_ctx[i];
+   struct pipe_ctx *old_pipe = 
>current_state->res_ctx.pipe_ctx[i];
struct hubp *hubp = pipe->plane_res.hubp;
 
+   /* Today for MED update type we do not call update clocks. 
However, for FPO
+* the assumption is that update clocks should be called to 
disable P-State
+* switch before any HW programming since FPO in FW and driver 
are not
+* synchronized. This causes an issue where on a MED update, an 
FPO P-State
+* switch could be taking place, then driver forces P-State 
disallow in the below
+* code and prevents FPO from completing the sequence. In this 
case we add a check
+* to avoid re-programming (and thus re-setting) the P-State 
force register by
+* only reprogramming if the pipe was not previously Subvp or 
FPO. The assumption
+* is that the P-State force register should be programmed 
correctly the first
+* time SubVP / FPO was enabled, so there's no need to update / 
reset it if the
+* pipe config has never exited SubVP / FPO.
+*/
if (pipe->stream && (dc_state_get_pipe_subvp_type(context, 
pipe) == SUBVP_MAIN ||
-   pipe->stream->fpo_in_use)) {
+   pipe->stream->fpo_in_use) &&
+   (!old_pipe->stream ||
+   (dc_state_get_pipe_subvp_type(context, 
old_pipe) != SUBVP_MAIN &&
+   !old_pipe->stream->fpo_in_use))) {
if (hubp && 
hubp->funcs->hubp_update_force_pstate_disallow)

hubp->funcs->hubp_update_force_pstate_disallow(hubp, true);
if (hubp && 
hubp->funcs->hubp_update_force_cursor_pstate_disallow)
-- 
2.37.3



[PATCH 31/46] drm/amd/display: Remove redundant include file

2024-04-24 Thread Wayne Lin
From: Alex Hung 

This fixes 1 PW.INCLUDE_RECURSION reported by Coverity.

"./drivers/gpu/drm/amd/amdgpu/../display/dc/dc_types.h"
includes itself: dc_types.h -> dal_types.h -> dc_types.h

Acked-by: Wayne Lin 
Signed-off-by: Alex Hung 
---
 drivers/gpu/drm/amd/display/include/dal_types.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/include/dal_types.h 
b/drivers/gpu/drm/amd/display/include/dal_types.h
index e9591d4aded5..654387cf057f 100644
--- a/drivers/gpu/drm/amd/display/include/dal_types.h
+++ b/drivers/gpu/drm/amd/display/include/dal_types.h
@@ -27,7 +27,6 @@
 #define __DAL_TYPES_H__
 
 #include "signal_types.h"
-#include "dc_types.h"
 
 struct dal_logger;
 struct dc_bios;
-- 
2.37.3



[PATCH 29/46] drm/amd/display: Revert "dc: Keep VBios pixel rate div setting util next mode set"

2024-04-24 Thread Wayne Lin
From: Webb Chen 

This reverts commit f7131558f362 ("drm/amd/display: Keep VBios pixel rate div
setting util next mode set") which causes issue.

Reviewed-by: Charlene Liu 
Acked-by: Wayne Lin 
Signed-off-by: Webb Chen 
---
 .../gpu/drm/amd/display/dc/core/dc_resource.c |  4 --
 .../drm/amd/display/dc/dcn314/dcn314_dccg.c   | 12 +++--
 .../gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c | 12 +++--
 .../dc/dcn32/dcn32_dio_stream_encoder.c   | 40 +++--
 .../gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c |  8 ++--
 .../dc/dcn35/dcn35_dio_stream_encoder.c   | 36 ++-
 .../amd/display/dc/hwss/dce110/dce110_hwseq.c | 24 ++
 .../amd/display/dc/hwss/dcn20/dcn20_hwseq.c   | 21 ++---
 .../amd/display/dc/hwss/dcn314/dcn314_hwseq.c | 23 --
 .../amd/display/dc/hwss/dcn314/dcn314_hwseq.h |  4 --
 .../amd/display/dc/hwss/dcn314/dcn314_init.c  |  1 -
 .../amd/display/dc/hwss/dcn32/dcn32_hwseq.c   | 44 ---
 .../amd/display/dc/hwss/dcn32/dcn32_hwseq.h   |  4 --
 .../amd/display/dc/hwss/dcn32/dcn32_init.c|  1 -
 .../amd/display/dc/hwss/dcn35/dcn35_init.c|  1 -
 .../amd/display/dc/hwss/dcn351/dcn351_init.c  |  1 -
 .../display/dc/hwss/hw_sequencer_private.h|  3 --
 .../gpu/drm/amd/display/dc/inc/core_types.h   |  7 ---
 drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h  |  5 ---
 .../amd/display/dc/inc/hw/stream_encoder.h|  1 -
 20 files changed, 111 insertions(+), 141 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 25c64fdcfa44..263e21756481 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -49,7 +49,6 @@
 #include "link/hwss/link_hwss_hpo_dp.h"
 #include "link/hwss/link_hwss_dio_fixed_vs_pe_retimer.h"
 #include "link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.h"
-#include "hw_sequencer_private.h"
 
 #if defined(CONFIG_DRM_AMD_DC_SI)
 #include "dce60/dce60_resource.h"
@@ -3903,9 +3902,6 @@ enum dc_status dc_validate_with_context(struct dc *dc,
if (res != DC_OK)
goto fail;
 
-   if (dc->hwseq->funcs.calculate_pix_rate_divider)
-   dc->hwseq->funcs.calculate_pix_rate_divider(dc, 
context, add_streams[i]);
-
if (!add_all_planes_for_stream(dc, add_streams[i], set, 
set_count, context)) {
res = DC_FAIL_ATTACH_SURFACES;
goto fail;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c 
b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c
index 8f6edd8e9beb..17a1174b8d80 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c
@@ -58,8 +58,8 @@ static void dccg314_trigger_dio_fifo_resync(
 static void dccg314_get_pixel_rate_div(
struct dccg *dccg,
uint32_t otg_inst,
-   uint32_t *k1,
-   uint32_t *k2)
+   enum pixel_rate_div *k1,
+   enum pixel_rate_div *k2)
 {
struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
uint32_t val_k1 = PIXEL_RATE_DIV_NA, val_k2 = PIXEL_RATE_DIV_NA;
@@ -93,8 +93,8 @@ static void dccg314_get_pixel_rate_div(
return;
}
 
-   *k1 = val_k1;
-   *k2 = val_k2;
+   *k1 = (enum pixel_rate_div)val_k1;
+   *k2 = (enum pixel_rate_div)val_k2;
 }
 
 static void dccg314_set_pixel_rate_div(
@@ -104,8 +104,7 @@ static void dccg314_set_pixel_rate_div(
enum pixel_rate_div k2)
 {
struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
-   uint32_t cur_k1 = PIXEL_RATE_DIV_NA;
-   uint32_t cur_k2 = PIXEL_RATE_DIV_NA;
+   enum pixel_rate_div cur_k1 = PIXEL_RATE_DIV_NA, cur_k2 = 
PIXEL_RATE_DIV_NA;
 
// Don't program 0xF into the register field. Not valid since
// K1 / K2 field is only 1 / 2 bits wide
@@ -374,7 +373,6 @@ static const struct dccg_funcs dccg314_funcs = {
.disable_dsc = dccg31_disable_dscclk,
.enable_dsc = dccg31_enable_dscclk,
.set_pixel_rate_div = dccg314_set_pixel_rate_div,
-   .get_pixel_rate_div = dccg314_get_pixel_rate_div,
.trigger_dio_fifo_resync = dccg314_trigger_dio_fifo_resync,
.set_valid_pixel_rate = dccg314_set_valid_pixel_rate,
.set_dtbclk_p_src = dccg314_set_dtbclk_p_src
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c
index 21a6ca5ca192..56385cede113 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c
@@ -58,8 +58,8 @@ static void dccg32_trigger_dio_fifo_resync(
 static void dccg32_get_pixel_rate_div(
struct dccg *dccg,
uint32_t otg_inst,
-   uint32_t *k1,
-   uint32_t *k2)
+   enum pixel_rate_div *k1,
+   enum pixel_rate_div *k2)
 {
   

[PATCH 28/46] drm/amd/display: Enable RCO for PHYSYMCLK in DCN35

2024-04-24 Thread Wayne Lin
From: Daniel Miess 

[Why & How]
Enable root clock optimization for PHYSYMCLK and only
disable it when it's actively being used

Reviewed-by: Charlene Liu 
Acked-by: Wayne Lin 
Signed-off-by: Daniel Miess 
---
 drivers/gpu/drm/amd/display/dc/dc.h   |  1 +
 .../gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c | 45 ---
 .../amd/display/dc/hwss/dcn35/dcn35_hwseq.c   | 32 +
 .../amd/display/dc/hwss/dcn35/dcn35_hwseq.h   |  2 +
 .../amd/display/dc/hwss/dcn35/dcn35_init.c|  1 +
 .../amd/display/dc/hwss/dcn351/dcn351_init.c  |  1 +
 .../display/dc/hwss/hw_sequencer_private.h|  4 ++
 7 files changed, 41 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
b/drivers/gpu/drm/amd/display/dc/dc.h
index 3048d5a0e87d..dd8940c2a4bf 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -724,6 +724,7 @@ enum pg_hw_pipe_resources {
PG_OPTC,
PG_DPSTREAM,
PG_HDMISTREAM,
+   PG_PHYSYMCLK,
PG_HW_PIPE_RESOURCES_NUM_ELEMENT
 };
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c 
b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c
index 4b282b7e0996..795320a25fd2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c
@@ -461,32 +461,22 @@ static void dccg35_set_physymclk_root_clock_gating(
case 0:
REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
PHYASYMCLK_ROOT_GATE_DISABLE, enable ? 1 : 0);
-// REG_UPDATE(DCCG_GATE_DISABLE_CNTL4,
-// PHYA_REFCLK_ROOT_GATE_DISABLE, enable ? 1 : 0);
break;
case 1:
REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
PHYBSYMCLK_ROOT_GATE_DISABLE, enable ? 1 : 0);
-// REG_UPDATE(DCCG_GATE_DISABLE_CNTL4,
-// PHYB_REFCLK_ROOT_GATE_DISABLE, enable ? 1 : 0);
break;
case 2:
REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
PHYCSYMCLK_ROOT_GATE_DISABLE, enable ? 1 : 0);
-// REG_UPDATE(DCCG_GATE_DISABLE_CNTL4,
-// PHYC_REFCLK_ROOT_GATE_DISABLE, enable ? 1 : 0);
break;
case 3:
REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
PHYDSYMCLK_ROOT_GATE_DISABLE, enable ? 1 : 0);
-// REG_UPDATE(DCCG_GATE_DISABLE_CNTL4,
-// PHYD_REFCLK_ROOT_GATE_DISABLE, enable ? 1 : 0);
break;
case 4:
REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
PHYESYMCLK_ROOT_GATE_DISABLE, enable ? 1 : 0);
-// REG_UPDATE(DCCG_GATE_DISABLE_CNTL4,
-// PHYE_REFCLK_ROOT_GATE_DISABLE, enable ? 1 : 0);
break;
default:
BREAK_TO_DEBUGGER();
@@ -509,16 +499,10 @@ static void dccg35_set_physymclk(
REG_UPDATE_2(PHYASYMCLK_CLOCK_CNTL,
PHYASYMCLK_EN, 1,
PHYASYMCLK_SRC_SEL, clk_src);
-// if 
(dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
-// REG_UPDATE(DCCG_GATE_DISABLE_CNTL4,
-// PHYA_REFCLK_ROOT_GATE_DISABLE, 
0);
} else {
REG_UPDATE_2(PHYASYMCLK_CLOCK_CNTL,
PHYASYMCLK_EN, 0,
PHYASYMCLK_SRC_SEL, 0);
-// if 
(dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
-// REG_UPDATE(DCCG_GATE_DISABLE_CNTL4,
-// PHYA_REFCLK_ROOT_GATE_DISABLE, 
1);
}
break;
case 1:
@@ -526,16 +510,10 @@ static void dccg35_set_physymclk(
REG_UPDATE_2(PHYBSYMCLK_CLOCK_CNTL,
PHYBSYMCLK_EN, 1,
PHYBSYMCLK_SRC_SEL, clk_src);
-// if 
(dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
-// REG_UPDATE(DCCG_GATE_DISABLE_CNTL4,
-// PHYB_REFCLK_ROOT_GATE_DISABLE, 
0);
} else {
REG_UPDATE_2(PHYBSYMCLK_CLOCK_CNTL,
PHYBSYMCLK_EN, 0,
PHYBSYMCLK_SRC_SEL, 0);
-// if 
(dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
-// REG_UPDATE(DCCG_GATE_DISABLE_CNTL4,
-// PHYB_REFCLK_ROOT_GATE_DISABLE, 
1);
}
break;
case 2:
@@ -543,16 +521,10 @@ static void 

[PATCH 27/46] drm/amd/display: Add trigger FIFO resync path for DCN35

2024-04-24 Thread Wayne Lin
From: Nicholas Kazlauskas 

[Why]
FIFO error can occur if we don't trigger a DISPCLK change after
touching K1/K2 dividers. For 4k144 eDP + hotplug of USB-C DP display
we see FIFO underflow.

[How]
We have the path to trigger the resync as the workaround in
DCN314/DCN32, it just needs to be ported over to DCN35.

Reviewed-by: Charlene Liu 
Acked-by: Wayne Lin 
Signed-off-by: Nicholas Kazlauskas 
---
 drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c  | 10 ++
 drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c |  2 ++
 2 files changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c 
b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c
index 4c53e339e325..4b282b7e0996 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c
@@ -41,6 +41,15 @@
 #define DC_LOGGER \
dccg->ctx->logger
 
+static void dccg35_trigger_dio_fifo_resync(struct dccg *dccg)
+{
+   struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+   uint32_t dispclk_rdivider_value = 0;
+
+   REG_GET(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_RDIVIDER, 
_rdivider_value);
+   REG_UPDATE(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, 
dispclk_rdivider_value);
+}
+
 static void dcn35_set_dppclk_enable(struct dccg *dccg,
 uint32_t dpp_inst, uint32_t enable)
 {
@@ -1056,6 +1065,7 @@ static const struct dccg_funcs dccg35_funcs = {
.enable_dsc = dccg35_enable_dscclk,
.set_pixel_rate_div = dccg35_set_pixel_rate_div,
.get_pixel_rate_div = dccg35_get_pixel_rate_div,
+   .trigger_dio_fifo_resync = dccg35_trigger_dio_fifo_resync,
.set_valid_pixel_rate = dccg35_set_valid_pixel_rate,
.enable_symclk_se = dccg35_enable_symclk_se,
.disable_symclk_se = dccg35_disable_symclk_se,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c 
b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c
index 7ed5de5c5ec1..0e87f3503265 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c
@@ -31,6 +31,7 @@
 #include "dcn30/dcn30_hwseq.h"
 #include "dcn301/dcn301_hwseq.h"
 #include "dcn31/dcn31_hwseq.h"
+#include "dcn314/dcn314_hwseq.h"
 #include "dcn32/dcn32_hwseq.h"
 #include "dcn35/dcn35_hwseq.h"
 
@@ -158,6 +159,7 @@ static const struct hwseq_private_funcs dcn35_private_funcs 
= {
.setup_hpo_hw_control = dcn35_setup_hpo_hw_control,
.calculate_dccg_k1_k2_values = dcn32_calculate_dccg_k1_k2_values,
.set_pixels_per_cycle = dcn32_set_pixels_per_cycle,
+   .resync_fifo_dccg_dio = dcn314_resync_fifo_dccg_dio,
.is_dp_dig_pixel_rate_div_policy = 
dcn32_is_dp_dig_pixel_rate_div_policy,
.calculate_pix_rate_divider = dcn32_calculate_pix_rate_divider,
.dsc_pg_control = dcn35_dsc_pg_control,
-- 
2.37.3



[PATCH 26/46] drm/amd/display: Re-enable IPS2 for static screen

2024-04-24 Thread Wayne Lin
From: Roman Li 

[Why]
IPS stability was fixed in bios.

[How]
Set disable_ips init flag to DMUB_IPS_ENABLE.

Reviewed-by: Nicholas Kazlauskas 
Acked-by: Wayne Lin 
Signed-off-by: Roman Li 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 961b5984afa0..3ece11883941 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1740,7 +1740,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
if (amdgpu_dc_debug_mask & DC_DISABLE_IPS)
init_data.flags.disable_ips = DMUB_IPS_DISABLE_ALL;
else
-   init_data.flags.disable_ips = 
DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF;
+   init_data.flags.disable_ips = DMUB_IPS_ENABLE;
 
init_data.flags.disable_ips_in_vpb = 0;
 
-- 
2.37.3



[PATCH 25/46] drm/amd/display: take ODM slice count into account when deciding DSC slice

2024-04-24 Thread Wayne Lin
From: Wenjing Liu 

[why]
DSC slice must be divisible by ODM slice count.

[how]
If DSC slice count is not a multiple of ODM slice count, increase DSC
slice until it is. Otherwise fail to compute DSC configuration.

Reviewed-by: Chaitanya Dhere 
Acked-by: Wayne Lin 
Signed-off-by: Wenjing Liu 
---
 drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c | 30 -
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c 
b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
index 7c2d74f4efd8..db795b1a94f0 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
@@ -922,14 +922,30 @@ static bool setup_dsc_config(
else
is_dsc_possible = false;
}
-   // When we force 2:1 ODM, we can't have 1 slice to divide amongst 2 
separate DSC instances
-   // need to enforce at minimum 2 horizontal slices
-   if (options->dsc_force_odm_hslice_override) {
-   num_slices_h = fit_num_slices_up(dsc_common_caps.slice_caps, 2);
-   if (num_slices_h == 0)
-   is_dsc_possible = false;
+   // When we force ODM, num dsc h slices must be divisible by num odm h 
slices
+   switch (options->dsc_force_odm_hslice_override) {
+   case 0:
+   case 1:
+   break;
+   case 2:
+   if (num_slices_h < 2)
+   num_slices_h = 
fit_num_slices_up(dsc_common_caps.slice_caps, 2);
+   break;
+   case 3:
+   if (dsc_common_caps.slice_caps.bits.NUM_SLICES_12)
+   num_slices_h = 12;
+   else
+   num_slices_h = 0;
+   break;
+   case 4:
+   if (num_slices_h < 4)
+   num_slices_h = 
fit_num_slices_up(dsc_common_caps.slice_caps, 4);
+   break;
+   default:
+   break;
}
-
+   if (num_slices_h == 0)
+   is_dsc_possible = false;
if (!is_dsc_possible)
goto done;
 
-- 
2.37.3



[PATCH 24/46] drm/amd/display: Fix DC mode screen flickering on DCN321

2024-04-24 Thread Wayne Lin
From: Leo Ma 

[Why && How]
Screen flickering saw on 4K@60 eDP with high refresh rate external
monitor when booting up in DC mode. DC Mode Capping is disabled
which caused wrong UCLK being used.

Reviewed-by: Alvin Lee 
Acked-by: Wayne Lin 
Signed-off-by: Leo Ma 
---
 .../amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c  | 15 ---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c 
b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
index b9e1f3e0b31d..ff5fdc7b1198 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
@@ -712,8 +712,12 @@ static void dcn32_update_clocks(struct clk_mgr 
*clk_mgr_base,
 * since we calculate mode support 
based on softmax being the max UCLK
 * frequency.
 */
-   dcn32_smu_set_hard_min_by_freq(clk_mgr, 
PPCLK_UCLK,
-   
dc->clk_mgr->bw_params->dc_mode_softmax_memclk);
+   if 
(dc->debug.disable_dc_mode_overwrite) {
+   
dcn30_smu_set_hard_max_by_freq(clk_mgr, PPCLK_UCLK, 
dc->clk_mgr->bw_params->max_memclk_mhz);
+   
dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, 
dc->clk_mgr->bw_params->max_memclk_mhz);
+   } else
+   
dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK,
+   
dc->clk_mgr->bw_params->dc_mode_softmax_memclk);
} else {
dcn32_smu_set_hard_min_by_freq(clk_mgr, 
PPCLK_UCLK, dc->clk_mgr->bw_params->max_memclk_mhz);
}
@@ -746,8 +750,13 @@ static void dcn32_update_clocks(struct clk_mgr 
*clk_mgr_base,
/* set UCLK to requested value if P-State switching is 
supported, or to re-enable P-State switching */
if (clk_mgr_base->clks.p_state_change_support &&
(update_uclk || 
!clk_mgr_base->clks.prev_p_state_change_support) &&
-   
!dc->work_arounds.clock_update_disable_mask.uclk)
+   
!dc->work_arounds.clock_update_disable_mask.uclk) {
+   if (dc->clk_mgr->dc_mode_softmax_enabled && 
dc->debug.disable_dc_mode_overwrite)
+   dcn30_smu_set_hard_max_by_freq(clk_mgr, 
PPCLK_UCLK,
+   
max((int)dc->clk_mgr->bw_params->dc_mode_softmax_memclk, 
khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz)));
+
dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, 
khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz));
+   }
 
if (clk_mgr_base->clks.num_ways != new_clocks->num_ways &&
clk_mgr_base->clks.num_ways > 
new_clocks->num_ways) {
-- 
2.37.3



[PATCH 23/46] drm/amd/display: Defer handling mst up request in resume

2024-04-24 Thread Wayne Lin
From: Wayne Lin 

[Why]
Like commit ec5fa9fcdeca ("drm/amd/display: Adjust the MST resume flow"), we
want to avoid handling mst topology changes before restoring the old state.
If we enable DP_UP_REQ_EN before calling drm_atomic_helper_resume(), have
changce to handle CSN event first and fire hotplug event before restoring the
cached state.

[How]
Disable mst branch sending up request event before we restoring the cached 
state.
DP_UP_REQ_EN will be set later when we call drm_dp_mst_topology_mgr_resume().

Cc: Mario Limonciello 
Cc: Alex Deucher 
Cc: sta...@vger.kernel.org

Reviewed-by: Hersen Wu 
Signed-off-by: Wayne Lin 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 9d36dba914e9..961b5984afa0 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2429,7 +2429,6 @@ static void resume_mst_branch_status(struct 
drm_dp_mst_topology_mgr *mgr)
 
ret = drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL,
 DP_MST_EN |
-DP_UP_REQ_EN |
 DP_UPSTREAM_IS_SRC);
if (ret < 0) {
drm_dbg_kms(mgr->dev, "mst write failed - undocked during 
suspend?\n");
-- 
2.37.3



[PATCH 22/46] drm/amd/display: Restrict multi-disp support for in-game FAMS

2024-04-24 Thread Wayne Lin
From: Iswara Nagulendran 

[HOW]
In multi-monitor cases the VBLANK stretch that is required to align both
monitors may be so large that it may create issues for gaming performance.

Use debug value to restrict in-game FAMS support for multi-disp use case.

Reviewed-by: Harry Vanzylldejong 
Acked-by: Wayne Lin 
Signed-off-by: Iswara Nagulendran 
---
 drivers/gpu/drm/amd/display/dc/dc.h   | 8 +++-
 .../gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c | 4 +++-
 .../drm/amd/display/dc/resource/dcn30/dcn30_resource.c| 2 +-
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
b/drivers/gpu/drm/amd/display/dc/dc.h
index a3ebe4f00779..3048d5a0e87d 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -499,6 +499,12 @@ enum dcc_option {
DCC_HALF_REQ_DISALBE = 2,
 };
 
+enum in_game_fams_config {
+   INGAME_FAMS_SINGLE_DISP_ENABLE, // enable in-game fams
+   INGAME_FAMS_DISABLE, // disable in-game fams
+   INGAME_FAMS_MULTI_DISP_ENABLE, //enable in-game fams for multi-display
+};
+
 /**
  * enum pipe_split_policy - Pipe split strategy supported by DCN
  *
@@ -951,7 +957,7 @@ struct dc_debug_options {
/* Enable dmub aux for legacy ddc */
bool enable_dmub_aux_for_legacy_ddc;
bool disable_fams;
-   bool disable_fams_gaming;
+   enum in_game_fams_config disable_fams_gaming;
/* FEC/PSR1 sequence enable delay in 100us */
uint8_t fec_enable_delay_in100us;
bool enable_driver_sequence_debug;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
index 6472da2c361e..a8c36eda1d09 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
@@ -580,7 +580,9 @@ struct dc_stream_state 
*dcn32_can_support_mclk_switch_using_fw_based_vblank_stre
if (!fpo_candidate_stream->allow_freesync)
return NULL;
 
-   if (fpo_candidate_stream->vrr_active_variable && 
dc->debug.disable_fams_gaming)
+   if (fpo_candidate_stream->vrr_active_variable &&
+   ((dc->debug.disable_fams_gaming == INGAME_FAMS_DISABLE) ||
+   (context->stream_count > 1 && !(dc->debug.disable_fams_gaming == 
INGAME_FAMS_MULTI_DISP_ENABLE
return NULL;
 
return fpo_candidate_stream;
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c 
b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
index fa1305f04341..1ce727351c39 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
@@ -1996,7 +1996,7 @@ bool 
dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc,
if (!context->streams[0]->allow_freesync)
return false;
 
-   if (context->streams[0]->vrr_active_variable && 
dc->debug.disable_fams_gaming)
+   if (context->streams[0]->vrr_active_variable && 
(dc->debug.disable_fams_gaming == INGAME_FAMS_DISABLE))
return false;
 
context->streams[0]->fpo_in_use = true;
-- 
2.37.3



[PATCH 21/46] drm/amd/display: Refactor for Replay Link off frame count

2024-04-24 Thread Wayne Lin
From: Dennis Chan 

[why]
To refine for link off frame count in diagnose tool,
the driver show the link off frame count number instead of showing link
off frame count level.

Reviewed-by: ChunTao Tso 
Reviewed-by: Robin Chen 
Acked-by: Wayne Lin 
Signed-off-by: Dennis Chan 
---
 drivers/gpu/drm/amd/display/dc/dc_types.h | 2 +-
 drivers/gpu/drm/amd/display/modules/power/power_helpers.c | 8 ++--
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h 
b/drivers/gpu/drm/amd/display/dc/dc_types.h
index d79de4780151..cee012587e6e 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -1092,7 +1092,7 @@ struct replay_settings {
/* Coasting vtotal table */
uint32_t coasting_vtotal_table[PR_COASTING_TYPE_NUM];
/* Maximum link off frame count */
-   enum replay_link_off_frame_count_level link_off_frame_count_level;
+   uint32_t link_off_frame_count;
/* Replay pseudo vtotal for abm + ips on full screen video which can 
improve ips residency */
uint16_t abm_with_ips_on_full_screen_video_pseudo_vtotal;
/* Replay last pseudo vtotal set to DMUB */
diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c 
b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
index 2a3698fd2dc2..530379508a69 100644
--- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
+++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
@@ -994,16 +994,12 @@ void calculate_replay_link_off_frame_count(struct dc_link 
*link,
max_deviation_line = link->dpcd_caps.pr_info.max_deviation_line;
pixel_deviation_per_line = 
link->dpcd_caps.pr_info.pixel_deviation_per_line;
 
-   if (htotal != 0 && vtotal != 0)
+   if (htotal != 0 && vtotal != 0 && pixel_deviation_per_line != 0)
max_link_off_frame_count = htotal * max_deviation_line / 
(pixel_deviation_per_line * vtotal);
else
ASSERT(0);
 
-   link->replay_settings.link_off_frame_count_level =
-   max_link_off_frame_count >= PR_LINK_OFF_FRAME_COUNT_BEST ? 
PR_LINK_OFF_FRAME_COUNT_BEST :
-   max_link_off_frame_count >= PR_LINK_OFF_FRAME_COUNT_GOOD ? 
PR_LINK_OFF_FRAME_COUNT_GOOD :
-   PR_LINK_OFF_FRAME_COUNT_FAIL;
-
+   link->replay_settings.link_off_frame_count = max_link_off_frame_count;
 }
 
 bool fill_custom_backlight_caps(unsigned int config_no, struct 
dm_acpi_atif_backlight_caps *caps)
-- 
2.37.3



[PATCH 20/46] drm/amd/display: Handle the case which quad_part is equal 0

2024-04-24 Thread Wayne Lin
From: Rodrigo Siqueira 

Add code to handle case when quad_part is 0 in gpu_addr_to_uma().

Acked-by: Wayne Lin 
Signed-off-by: Rodrigo Siqueira 
---
 drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.c 
b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.c
index d5769f38874f..7f7b6bf76a8d 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.c
@@ -95,8 +95,11 @@ static bool gpu_addr_to_uma(struct dce_hwseq *hwseq,
} else if (hwseq->fb_offset.quad_part <= addr->quad_part &&
addr->quad_part <= hwseq->uma_top.quad_part) {
is_in_uma = true;
+   } else if (addr->quad_part == 0) {
+   is_in_uma = false;
} else {
is_in_uma = false;
+   BREAK_TO_DEBUGGER();
}
return is_in_uma;
 }
-- 
2.37.3



[PATCH 19/46] drm/amd/display: Add log_color_state callback to multiple DCNs

2024-04-24 Thread Wayne Lin
From: Rodrigo Siqueira 

Set up to enable log color state for multiple DCNs.

Acked-by: Wayne Lin 
Signed-off-by: Rodrigo Siqueira 
---
 drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c   | 1 +
 drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c   | 1 +
 drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c | 1 +
 drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c   | 1 +
 drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c | 1 +
 5 files changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c 
b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c
index 18249c6b6d81..3dfac372d165 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c
@@ -68,6 +68,7 @@ static const struct hw_sequencer_funcs dcn21_funcs = {
.set_avmute = dce110_set_avmute,
.log_hw_state = dcn10_log_hw_state,
.get_hw_state = dcn10_get_hw_state,
+   .log_color_state = dcn20_log_color_state,
.clear_status_bits = dcn10_clear_status_bits,
.wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect,
.edp_backlight_control = dce110_edp_backlight_control,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c 
b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c
index ef913445a795..4b32497c09d0 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c
@@ -68,6 +68,7 @@ static const struct hw_sequencer_funcs dcn30_funcs = {
.setup_stereo = dcn10_setup_stereo,
.set_avmute = dcn30_set_avmute,
.log_hw_state = dcn10_log_hw_state,
+   .log_color_state = dcn30_log_color_state,
.get_hw_state = dcn10_get_hw_state,
.clear_status_bits = dcn10_clear_status_bits,
.wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c 
b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c
index 0e5c037e82a6..97e33eb7ac5a 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c
@@ -69,6 +69,7 @@ static const struct hw_sequencer_funcs dcn301_funcs = {
.setup_stereo = dcn10_setup_stereo,
.set_avmute = dcn30_set_avmute,
.log_hw_state = dcn10_log_hw_state,
+   .log_color_state = dcn30_log_color_state,
.get_hw_state = dcn10_get_hw_state,
.clear_status_bits = dcn10_clear_status_bits,
.wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c 
b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c
index c06cc2c5da92..9cb7afe0e731 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c
@@ -71,6 +71,7 @@ static const struct hw_sequencer_funcs dcn31_funcs = {
.setup_stereo = dcn10_setup_stereo,
.set_avmute = dcn30_set_avmute,
.log_hw_state = dcn10_log_hw_state,
+   .log_color_state = dcn30_log_color_state,
.get_hw_state = dcn10_get_hw_state,
.clear_status_bits = dcn10_clear_status_bits,
.wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c 
b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c
index 934203ef52bb..f9120b1c1c1f 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c
@@ -74,6 +74,7 @@ static const struct hw_sequencer_funcs dcn314_funcs = {
.set_avmute = dcn30_set_avmute,
.log_hw_state = dcn10_log_hw_state,
.get_hw_state = dcn10_get_hw_state,
+   .log_color_state = dcn30_log_color_state,
.clear_status_bits = dcn10_clear_status_bits,
.wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect,
.edp_backlight_control = dce110_edp_backlight_control,
-- 
2.37.3



[PATCH 18/46] drm/amd/display: Remove legacy code in DC

2024-04-24 Thread Wayne Lin
From: Rodrigo Siqueira 

This commit just remove some trivial legacy code in some of the DC
files.

Acked-by: Wayne Lin 
Signed-off-by: Rodrigo Siqueira 
---
 drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c | 12 
 drivers/gpu/drm/amd/display/dc/hwss/Makefile |  6 --
 .../gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.h  |  2 +-
 .../gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c |  3 ---
 drivers/gpu/drm/amd/display/dc/os_types.h|  2 --
 5 files changed, 1 insertion(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
index bfc042209007..17a21bcbde17 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
@@ -762,23 +762,11 @@ void dcn316_update_bw_bounding_box(struct dc *dc, struct 
clk_bw_params *bw_param
break;
}
}
-   // Ported from DCN315
-   if (clk_table->num_entries == 1) {
-   /*smu gives one DPM level, let's take the highest one*/
-   closest_clk_lvl = dcn3_16_soc.num_states - 1;
-   }
 
s[i].state = i;
 
/* Clocks dependent on voltage level. */
s[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
-   if (clk_table->num_entries == 1 &&
-   s[i].dcfclk_mhz <
-   dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) {
-   /*SMU fix not released yet*/
-   s[i].dcfclk_mhz =
-   
dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz;
-   }
s[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
s[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
s[i].dram_speed_mts = clk_table->entries[i].memclk_mhz *
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/Makefile 
b/drivers/gpu/drm/amd/display/dc/hwss/Makefile
index ba55050be161..40ecebea1ba0 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/hwss/Makefile
@@ -110,10 +110,6 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN21)
 
 ###
 
-###
-
-###
-
 HWSS_DCN30 = dcn30_hwseq.o dcn30_init.o
 
 AMD_DAL_HWSS_DCN30 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn30/,$(HWSS_DCN30))
@@ -188,8 +184,6 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN351)
 
 ###
 
-###
-
 HWSS_DCN401 = dcn401_hwseq.o dcn401_init.o
 
 AMD_DAL_HWSS_DCN401 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn401/,$(HWSS_DCN401))
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.h 
b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.h
index 76b16839486a..6a153e7ce910 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.h
@@ -27,7 +27,7 @@
 #define __DC_HWSS_DCN30_H__
 
 #include "hw_sequencer_private.h"
-#include "dcn20/dcn20_hwseq.h"
+
 struct dc;
 
 void dcn30_init_hw(struct dc *dc);
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c 
b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c
index 6477009ce065..0e5c037e82a6 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c
@@ -53,9 +53,6 @@ static const struct hw_sequencer_funcs dcn301_funcs = {
.enable_stream = dcn20_enable_stream,
.disable_stream = dce110_disable_stream,
.unblank_stream = dcn20_unblank_stream,
-#ifdef FREESYNC_POWER_OPTIMIZE
-   .are_streams_coarse_grain_aligned = 
dcn20_are_streams_coarse_grain_aligned,
-#endif
.blank_stream = dce110_blank_stream,
.enable_audio_stream = dce110_enable_audio_stream,
.disable_audio_stream = dce110_disable_audio_stream,
diff --git a/drivers/gpu/drm/amd/display/dc/os_types.h 
b/drivers/gpu/drm/amd/display/dc/os_types.h
index 6c4578d347af..f2ba76c1e0c0 100644
--- a/drivers/gpu/drm/amd/display/dc/os_types.h
+++ b/drivers/gpu/drm/amd/display/dc/os_types.h
@@ -29,8 +29,6 @@
 
 #include 
 #include 
-#include 
-#include 
 #include 
 #include 
 
-- 
2.37.3



[PATCH 17/46] drm/amd/display: Update some of the dcn303 parameters

2024-04-24 Thread Wayne Lin
From: Rodrigo Siqueira 

Adjust to update some of the dcn303 parameters.

Acked-by: Wayne Lin 
Signed-off-by: Rodrigo Siqueira 
---
 .../amd/display/dc/resource/dcn303/dcn303_resource.c  | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c 
b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c
index 25cd6236b054..d2bc66904217 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c
@@ -97,8 +97,9 @@ static const struct dc_debug_options debug_defaults_drv = {
.underflow_assert_delay_us = 0x,
.dwb_fi_phase = -1, // -1 = disable,
.dmub_command_table = true,
+   .use_max_lb = true,
.exit_idle_opt_for_cursor_updates = true,
-   .disable_idle_power_optimizations = false,
+   .enable_legacy_fast_update = false,
.using_dml2 = false,
 };
 
@@ -145,9 +146,9 @@ static const struct dc_plane_cap plane_cap = {
.fp16 = 16000
},
.max_downscale_factor = {
-   .argb = 600,
-   .nv12 = 600,
-   .fp16 = 600
+   .argb = 167,
+   .nv12 = 167,
+   .fp16 = 167
},
16,
16
@@ -1171,6 +1172,8 @@ static bool dcn303_resource_construct(
dc->caps.cursor_cache_size =
dc->caps.max_cursor_size * dc->caps.max_cursor_size * 8;
dc->caps.max_slave_planes = 1;
+   dc->caps.max_slave_yuv_planes = 1;
+   dc->caps.max_slave_rgb_planes = 1;
dc->caps.post_blend_color_processing = true;
dc->caps.force_dp_tps4_for_cp2520 = true;
dc->caps.extended_aux_timeout_support = true;
-- 
2.37.3



[PATCH 16/46] drm/amd/display: Enable legacy fast update for dcn301

2024-04-24 Thread Wayne Lin
From: Rodrigo Siqueira 

Set up to enable legacy fast update.

Acked-by: Wayne Lin 
Signed-off-by: Rodrigo Siqueira 
---
 drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c 
b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c
index 346cec70de96..7d04739c3ba1 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c
@@ -702,6 +702,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.dmub_command_table = true,
.use_max_lb = false,
.exit_idle_opt_for_cursor_updates = true,
+   .enable_legacy_fast_update = true,
.using_dml2 = false,
 };
 
-- 
2.37.3



[PATCH 15/46] drm/amd/display: Adjust functions prefix for some of the dcn301 fpu functions

2024-04-24 Thread Wayne Lin
From: Rodrigo Siqueira 

Add dcn301_fpu prefix to some of the FPU function with the required
adjustments.

Acked-by: Wayne Lin 
Signed-off-by: Rodrigo Siqueira 
---
 .../amd/display/dc/dml/dcn301/dcn301_fpu.c|  4 ++--
 .../amd/display/dc/dml/dcn301/dcn301_fpu.h|  7 +++
 .../dc/resource/dcn301/dcn301_resource.c  | 19 +--
 3 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
index 6ce90678b33c..0c0b2d67c9cd 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
@@ -320,7 +320,7 @@ static void calculate_wm_set_for_vlevel(int vlevel,
 
 }
 
-void dcn301_update_bw_bounding_box(struct dc *dc, struct clk_bw_params 
*bw_params)
+void dcn301_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params 
*bw_params)
 {
struct _vcs_dpi_voltage_scaling_st *s = 
dc->scratch.update_bw_bounding_box.clock_limits;
struct dcn301_resource_pool *pool = TO_DCN301_RES_POOL(dc->res_pool);
@@ -409,7 +409,7 @@ void dcn301_fpu_init_soc_bounding_box(struct bp_soc_bb_info 
bb_info)
dcn3_01_soc.sr_exit_time_us = 
bb_info.dram_sr_exit_latency_100ns * 10;
 }
 
-void dcn301_calculate_wm_and_dlg_fp(struct dc *dc,
+void dcn301_fpu_calculate_wm_and_dlg(struct dc *dc,
struct dc_state *context,
display_e2e_pipe_params_st *pipes,
int pipe_cnt,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h 
b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h
index 774b0fdfc80b..3e103e23dc6f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h
@@ -26,15 +26,14 @@
 #ifndef __DCN301_FPU_H__
 #define __DCN301_FPU_H__
 
-void dcn301_update_bw_bounding_box(struct dc *dc, struct clk_bw_params 
*bw_params);
+void dcn301_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info);
+void dcn301_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params 
*bw_params);
 
 void dcn301_fpu_set_wm_ranges(int i,
struct pp_smu_wm_range_sets *ranges,
struct _vcs_dpi_soc_bounding_box_st *loaded_bb);
 
-void dcn301_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info);
-
-void dcn301_calculate_wm_and_dlg_fp(struct dc *dc,
+void dcn301_fpu_calculate_wm_and_dlg(struct dc *dc,
struct dc_state *context,
display_e2e_pipe_params_st *pipes,
int pipe_cnt,
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c 
b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c
index 7538b548c572..346cec70de96 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c
@@ -1363,14 +1363,21 @@ static void set_wm_ranges(
pp_smu->nv_funcs.set_wm_ranges(_smu->nv_funcs.pp_smu, );
 }
 
-static void dcn301_calculate_wm_and_dlg(
-   struct dc *dc, struct dc_state *context,
-   display_e2e_pipe_params_st *pipes,
-   int pipe_cnt,
-   int vlevel)
+static void dcn301_update_bw_bounding_box(struct dc *dc, struct clk_bw_params 
*bw_params)
 {
DC_FP_START();
-   dcn301_calculate_wm_and_dlg_fp(dc, context, pipes, pipe_cnt, vlevel);
+   dcn301_fpu_update_bw_bounding_box(dc, bw_params);
+   DC_FP_END();
+}
+
+static void dcn301_calculate_wm_and_dlg(struct dc *dc,
+   struct dc_state *context,
+   display_e2e_pipe_params_st *pipes,
+   int pipe_cnt,
+   int vlevel_req)
+{
+   DC_FP_START();
+   dcn301_fpu_calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, 
vlevel_req);
DC_FP_END();
 }
 
-- 
2.37.3



  1   2   >