date:20230110

[PATCH] drm/amdgpu: Skip specific mmhub and sdma registers accessing under sriov

2023-01-10 Thread Yifan Zha

[Why]
SDMA0_CNTL and MMHUB system aperture related registers are blocked by L1 Policy.
Therefore, they cannot be accessed by VF and loged in violation.

[How]
For MMHUB registers, they will be programmed by PF. So VF will skip to program 
them in mmhubv3_0.
For SDMA0_CNTL which is a PF_only register, VF don't need to program it in 
sdma_v6_0.

Signed-off-by: Yifan Zha 
---
 drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c | 34 -
 drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c  | 10 +---
 2 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c 
b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c
index e9dcd6fcde7f..ae9cd1a4cfee 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c
@@ -169,23 +169,23 @@ static void mmhub_v3_0_init_system_aperture_regs(struct 
amdgpu_device *adev)
uint64_t value;
uint32_t tmp;
 
-   if (!amdgpu_sriov_vf(adev)) {
-   /*
-* the new L1 policy will block SRIOV guest from writing
-* these regs, and they will be programed at host.
-* so skip programing these regs.
-*/
-   /* Disable AGP. */
-   WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE, 0);
-   WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, 0);
-   WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, 0x00FF);
-
-   /* Program the system aperture low logical page number. */
-   WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR,
-adev->gmc.vram_start >> 18);
-   WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
-adev->gmc.vram_end >> 18);
-   }
+   if (amdgpu_sriov_vf(adev))
+   return;
+
+   /*
+* the new L1 policy will block SRIOV guest from writing
+* these regs, and they will be programed at host.
+* so skip programing these regs.
+*/
+   /* Disable AGP. */
+   WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE, 0);
+   WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, 0);
+   WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, 0x00FF);
+   /* Program the system aperture low logical page number. */
+   WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+adev->gmc.vram_start >> 18);
+   WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+adev->gmc.vram_end >> 18);
 
/* Set default page address. */
value = adev->mem_scratch.gpu_addr - adev->gmc.vram_start +
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
index bf1fa5e8d2f9..6fe292a2486b 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@@ -1403,10 +1403,12 @@ static int sdma_v6_0_set_trap_irq_state(struct 
amdgpu_device *adev,
 
u32 reg_offset = sdma_v6_0_get_reg_offset(adev, type, regSDMA0_CNTL);
 
-   sdma_cntl = RREG32(reg_offset);
-   sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
-  state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
-   WREG32(reg_offset, sdma_cntl);
+   if (!amdgpu_sriov_vf(adev)) {
+   sdma_cntl = RREG32(reg_offset);
+   sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
+   state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+   WREG32(reg_offset, sdma_cntl);
+   }
 
return 0;
 }
-- 
2.25.1

[PATCH v3 4/4] drm/i915: Clean up page shift operation

2023-01-10 Thread Somalapuram Amaranath

Remove page shift operations as ttm_resource moved
from num_pages to size_t size in bytes.

Signed-off-by: Somalapuram Amaranath 
---
 drivers/gpu/drm/i915/i915_scatterlist.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c 
b/drivers/gpu/drm/i915/i915_scatterlist.c
index 114e5e39aa72..bd7aaf7738f4 100644
--- a/drivers/gpu/drm/i915/i915_scatterlist.c
+++ b/drivers/gpu/drm/i915/i915_scatterlist.c
@@ -94,7 +94,7 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct 
drm_mm_node *node,
if (!rsgt)
return ERR_PTR(-ENOMEM);
 
-   i915_refct_sgt_init(rsgt, node->size << PAGE_SHIFT);
+   i915_refct_sgt_init(rsgt, node->size);
st = &rsgt->table;
if (sg_alloc_table(st, DIV_ROUND_UP_ULL(node->size, segment_pages),
   GFP_KERNEL)) {
@@ -105,8 +105,8 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct 
drm_mm_node *node,
sg = st->sgl;
st->nents = 0;
prev_end = (resource_size_t)-1;
-   block_size = node->size << PAGE_SHIFT;
-   offset = node->start << PAGE_SHIFT;
+   block_size = node->size;
+   offset = node->start;
 
while (block_size) {
u64 len;
-- 
2.32.0

[PATCH v3 3/4] drm/amdgpu: Clean up page shift operation and GWS and OA

2023-01-10 Thread Somalapuram Amaranath

Remove page shift operations as ttm_resource moved
from num_pages to size_t size in bytes.

Signed-off-by: Somalapuram Amaranath 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  4 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h | 12 ++--
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 974e85d8b6cc..19ad365dc159 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -541,12 +541,10 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) {
/* GWS and OA don't need any alignment. */
page_align = bp->byte_align;
-   size <<= PAGE_SHIFT;
-
} else if (bp->domain & AMDGPU_GEM_DOMAIN_GDS) {
/* Both size and alignment must be a multiple of 4. */
page_align = ALIGN(bp->byte_align, 4);
-   size = ALIGN(size, 4) << PAGE_SHIFT;
+   size = ALIGN(size, 4);
} else {
/* Memory should be aligned at least to a page size. */
page_align = ALIGN(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index 5c4f93ee0c57..f92b61350efe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -91,11 +91,11 @@ static inline void amdgpu_res_first(struct ttm_resource 
*res,
break;
case TTM_PL_TT:
node = to_ttm_range_mgr_node(res)->mm_nodes;
-   while (start >= node->size << PAGE_SHIFT)
-   start -= node++->size << PAGE_SHIFT;
+   while (start >= node->size)
+   start -= node++->size;
 
-   cur->start = (node->start << PAGE_SHIFT) + start;
-   cur->size = min((node->size << PAGE_SHIFT) - start, size);
+   cur->start = (node->start) + start;
+   cur->size = min(node->size - start, size);
cur->remaining = size;
cur->node = node;
break;
@@ -155,8 +155,8 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor 
*cur, uint64_t size)
node = cur->node;
 
cur->node = ++node;
-   cur->start = node->start << PAGE_SHIFT;
-   cur->size = min(node->size << PAGE_SHIFT, cur->remaining);
+   cur->start = node->start;
+   cur->size = min(node->size, cur->remaining);
break;
default:
return;
-- 
2.32.0

[PATCH v3 1/4] drm/ttm: Clean up page shift operation

2023-01-10 Thread Somalapuram Amaranath

Remove page shift operations as ttm_resource moved
from num_pages to size_t size in bytes.
v1 -> v2: fix missing page shift to fpfn and lpfn
v2 -> v3: separate patch’s based on driver module

Signed-off-by: Somalapuram Amaranath 
---
 drivers/gpu/drm/ttm/ttm_range_manager.c | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_range_manager.c 
b/drivers/gpu/drm/ttm/ttm_range_manager.c
index ae11d07eb63a..3703cbc6d368 100644
--- a/drivers/gpu/drm/ttm/ttm_range_manager.c
+++ b/drivers/gpu/drm/ttm/ttm_range_manager.c
@@ -83,9 +83,10 @@ static int ttm_range_man_alloc(struct ttm_resource_manager 
*man,
 
spin_lock(&rman->lock);
ret = drm_mm_insert_node_in_range(mm, &node->mm_nodes[0],
- PFN_UP(node->base.size),
+ node->base.size,
  bo->page_alignment, 0,
- place->fpfn, lpfn, mode);
+ place->fpfn << PAGE_SHIFT,
+ lpfn << PAGE_SHIFT, mode);
spin_unlock(&rman->lock);
 
if (unlikely(ret)) {
@@ -119,11 +120,10 @@ static bool ttm_range_man_intersects(struct 
ttm_resource_manager *man,
 size_t size)
 {
struct drm_mm_node *node = &to_ttm_range_mgr_node(res)->mm_nodes[0];
-   u32 num_pages = PFN_UP(size);
 
/* Don't evict BOs outside of the requested placement range */
-   if (place->fpfn >= (node->start + num_pages) ||
-   (place->lpfn && place->lpfn <= node->start))
+   if ((place->fpfn << PAGE_SHIFT) >= (node->start + size) ||
+   (place->lpfn && (place->lpfn << PAGE_SHIFT) <= node->start))
return false;
 
return true;
@@ -135,10 +135,9 @@ static bool ttm_range_man_compatible(struct 
ttm_resource_manager *man,
 size_t size)
 {
struct drm_mm_node *node = &to_ttm_range_mgr_node(res)->mm_nodes[0];
-   u32 num_pages = PFN_UP(size);
 
if (node->start < place->fpfn ||
-   (place->lpfn && (node->start + num_pages) > place->lpfn))
+   (place->lpfn && (node->start + size) > place->lpfn << PAGE_SHIFT))
return false;
 
return true;
-- 
2.32.0

[PATCH v3 2/4] drm/gem: Remove BUG_ON in drm_gem_private_object_init

2023-01-10 Thread Somalapuram Amaranath

ttm_resource allocate size in bytes to support less than page size

Signed-off-by: Somalapuram Amaranath 
---
 drivers/gpu/drm/drm_gem.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 59a0bb5ebd85..ee8b5c2b6c60 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -152,8 +152,6 @@ EXPORT_SYMBOL(drm_gem_object_init);
 void drm_gem_private_object_init(struct drm_device *dev,
 struct drm_gem_object *obj, size_t size)
 {
-   BUG_ON((size & (PAGE_SIZE - 1)) != 0);
-
obj->dev = dev;
obj->filp = NULL;
 
-- 
2.32.0

Re: [PATCH v4] drm/amdkfd: Page aligned memory reserve size

2023-01-10 Thread Felix Kuehling


Am 2023-01-10 um 17:41 schrieb Philip Yang:

Use page aligned size to reserve memory usage because page aligned TTM
BO size is used to unreserve memory usage, otherwise no page aligned
size causes memory usage accounting unbalanced.

Change vram_used definition type to int64_t to be able to trigger
WARN_ONCE(adev && adev->kfd.vram_used < 0, "..."), to help debug the
accounting issue with warning and backtrace.

Signed-off-by: Philip Yang 


Reviewed-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h   |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 12 +++-
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  9 +++--
  3 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index fb41869e357a..333780491867 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -97,7 +97,7 @@ struct amdgpu_amdkfd_fence {
  
  struct amdgpu_kfd_dev {

struct kfd_dev *dev;
-   uint64_t vram_used;
+   int64_t vram_used;
uint64_t vram_used_aligned;
bool init_complete;
struct work_struct reset_work;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index cd5de5e08d2f..6f236ded5f12 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1598,6 +1598,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct amdgpu_bo *bo;
struct drm_gem_object *gobj = NULL;
u32 domain, alloc_domain;
+   uint64_t aligned_size;
u64 alloc_flags;
int ret;
  
@@ -1653,22 +1654,23 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(

 * the memory.
 */
if ((*mem)->aql_queue)
-   size = size >> 1;
+   size >>= 1;
+   aligned_size = PAGE_ALIGN(size);
  
  	(*mem)->alloc_flags = flags;
  
  	amdgpu_sync_create(&(*mem)->sync);
  
-	ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, flags);

+   ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags);
if (ret) {
pr_debug("Insufficient memory\n");
goto err_reserve_limit;
}
  
  	pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",

-   va, size, domain_string(alloc_domain));
+   va, (*mem)->aql_queue ? size << 1 : size, 
domain_string(alloc_domain));
  
-	ret = amdgpu_gem_object_create(adev, size, 1, alloc_domain, alloc_flags,

+   ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain, 
alloc_flags,
   bo_type, NULL, &gobj);
if (ret) {
pr_debug("Failed to create BO on domain %s. ret %d\n",
@@ -1725,7 +1727,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
/* Don't unreserve system mem limit twice */
goto err_reserve_limit;
  err_bo_create:
-   amdgpu_amdkfd_unreserve_mem_limit(adev, size, flags);
+   amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags);
  err_reserve_limit:
mutex_destroy(&(*mem)->lock);
if (gobj)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 6d291aa6386b..f79b8e964140 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1127,8 +1127,13 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file 
*filep,
}
  
  	/* Update the VRAM usage count */

-   if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
-   WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + args->size);
+   if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+   uint64_t size = args->size;
+
+   if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)
+   size >>= 1;
+   WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + PAGE_ALIGN(size));
+   }
  
  	mutex_unlock(&p->mutex);

RE: [PATCH] drm/amdgpu/vcn4: add missing encoder cap

2023-01-10 Thread Liu, Leo

[AMD Official Use Only - General]

Reviewed-by: Leo Liu 

-Original Message-
From: amd-gfx  On Behalf Of Alex Deucher
Sent: January 10, 2023 5:48 PM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander 
Subject: [PATCH] drm/amdgpu/vcn4: add missing encoder cap

VCN4.x supports AV1 encode.

Fixes: 9ac0edaa0f8323 ("drm/amdgpu: add vcn_4_0_0 video codec query")
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/soc21.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c 
b/drivers/gpu/drm/amd/amdgpu/soc21.c
index 5562670b7b52..bea6b499568a 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -52,6 +52,7 @@ static const struct amdgpu_video_codec_info 
vcn_4_0_0_video_codecs_encode_array[
 {
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 
2304, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 
0)},
+   {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
 };

 static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode =
--
2.39.0

[PATCH] drm/amd/display: Fix COLOR_SPACE_YCBCR2020_TYPE matrix

2023-01-10 Thread Joshua Ashton

The YCC conversion matrix for RGB -> COLOR_SPACE_YCBCR2020_TYPE is
missing the values for the fourth column of the matrix.

The fourth column of the matrix is essentially just a value that is
added given that the color is 3 components in size.
These values are needed to bias the chroma from the [-1, 1] -> [0, 1]
range.

This fixes color being very green when using Gamescope HDR on HDMI
output which prefers YCC 4:4:4.

Signed-off-by: Joshua Ashton 
---
 drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
index 471078fc3900..652270a0b498 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
@@ -90,8 +90,8 @@ static const struct out_csc_color_matrix_type 
output_csc_matrix[] = {
{ 0xE00, 0xF349, 0xFEB7, 0x1000, 0x6CE, 0x16E3,
0x24F, 0x200, 0xFCCB, 0xF535, 0xE00, 0x1000} },
{ COLOR_SPACE_YCBCR2020_TYPE,
-   { 0x1000, 0xF149, 0xFEB7, 0x, 0x0868, 0x15B2,
-   0x01E6, 0x, 0xFB88, 0xF478, 0x1000, 0x} 
},
+   { 0x1000, 0xF149, 0xFEB7, 0x1004, 0x0868, 0x15B2,
+   0x01E6, 0x201, 0xFB88, 0xF478, 0x1000, 0x1004} 
},
{ COLOR_SPACE_YCBCR709_BLACK_TYPE,
{ 0x, 0x, 0x, 0x1000, 0x, 0x,
0x, 0x0200, 0x, 0x, 0x, 0x1000} 
},
-- 
2.39.0

[PATCH] drm/amdgpu/vcn4: add missing encoder cap

2023-01-10 Thread Alex Deucher

VCN4.x supports AV1 encode.

Fixes: 9ac0edaa0f8323 ("drm/amdgpu: add vcn_4_0_0 video codec query")
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/soc21.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c 
b/drivers/gpu/drm/amd/amdgpu/soc21.c
index 5562670b7b52..bea6b499568a 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -52,6 +52,7 @@ static const struct amdgpu_video_codec_info 
vcn_4_0_0_video_codecs_encode_array[
 {
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 
2304, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 
0)},
+   {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
 };
 
 static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode =
-- 
2.39.0

[PATCH v4] drm/amdkfd: Page aligned memory reserve size

2023-01-10 Thread Philip Yang

Use page aligned size to reserve memory usage because page aligned TTM
BO size is used to unreserve memory usage, otherwise no page aligned
size causes memory usage accounting unbalanced.

Change vram_used definition type to int64_t to be able to trigger
WARN_ONCE(adev && adev->kfd.vram_used < 0, "..."), to help debug the
accounting issue with warning and backtrace.

Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 12 +++-
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  9 +++--
 3 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index fb41869e357a..333780491867 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -97,7 +97,7 @@ struct amdgpu_amdkfd_fence {
 
 struct amdgpu_kfd_dev {
struct kfd_dev *dev;
-   uint64_t vram_used;
+   int64_t vram_used;
uint64_t vram_used_aligned;
bool init_complete;
struct work_struct reset_work;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index cd5de5e08d2f..6f236ded5f12 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1598,6 +1598,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct amdgpu_bo *bo;
struct drm_gem_object *gobj = NULL;
u32 domain, alloc_domain;
+   uint64_t aligned_size;
u64 alloc_flags;
int ret;
 
@@ -1653,22 +1654,23 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 * the memory.
 */
if ((*mem)->aql_queue)
-   size = size >> 1;
+   size >>= 1;
+   aligned_size = PAGE_ALIGN(size);
 
(*mem)->alloc_flags = flags;
 
amdgpu_sync_create(&(*mem)->sync);
 
-   ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, flags);
+   ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags);
if (ret) {
pr_debug("Insufficient memory\n");
goto err_reserve_limit;
}
 
pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
-   va, size, domain_string(alloc_domain));
+   va, (*mem)->aql_queue ? size << 1 : size, 
domain_string(alloc_domain));
 
-   ret = amdgpu_gem_object_create(adev, size, 1, alloc_domain, alloc_flags,
+   ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain, 
alloc_flags,
   bo_type, NULL, &gobj);
if (ret) {
pr_debug("Failed to create BO on domain %s. ret %d\n",
@@ -1725,7 +1727,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
/* Don't unreserve system mem limit twice */
goto err_reserve_limit;
 err_bo_create:
-   amdgpu_amdkfd_unreserve_mem_limit(adev, size, flags);
+   amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags);
 err_reserve_limit:
mutex_destroy(&(*mem)->lock);
if (gobj)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 6d291aa6386b..f79b8e964140 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1127,8 +1127,13 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file 
*filep,
}
 
/* Update the VRAM usage count */
-   if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
-   WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + args->size);
+   if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+   uint64_t size = args->size;
+
+   if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)
+   size >>= 1;
+   WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + PAGE_ALIGN(size));
+   }
 
mutex_unlock(&p->mutex);
 
-- 
2.35.1

Re: [PATCH] drm/amdkfd: Page aligned memory reserve size

2023-01-10 Thread Philip Yang




On 2023-01-10 15:49, Felix Kuehling wrote:

Am 2023-01-10 um 15:44 schrieb Philip Yang:


On 2023-01-10 13:33, Felix Kuehling wrote:

Am 2023-01-10 um 12:11 schrieb Philip Yang:

Use page aligned size to reserve memory usage because page aligned TTM
BO size is used to unreserve memory usage, otherwise no page aligned
size causes memory usage accounting unbalanced.

Change vram_used definition type to int64_t to be able to trigger
WARN_ONCE(adev && adev->kfd.vram_used < 0, "..."), to help debug the
accouting issue with warning and backtrace.

Signed-off-by: Philip Yang 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h   | 2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 9 ++---
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 7 +--
  3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

index fb41869e357a..333780491867 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -97,7 +97,7 @@ struct amdgpu_amdkfd_fence {
    struct amdgpu_kfd_dev {
  struct kfd_dev *dev;
-    uint64_t vram_used;
+    int64_t vram_used;
  uint64_t vram_used_aligned;
  bool init_complete;
  struct work_struct reset_work;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

index 2a118669d0e3..7efee672bc41 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1598,6 +1598,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
  struct amdgpu_bo *bo;
  struct drm_gem_object *gobj = NULL;
  u32 domain, alloc_domain;
+    uint64_t aligned_size;
  u64 alloc_flags;
  int ret;
  @@ -1653,13 +1654,15 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
   * the memory.
   */
  if ((*mem)->aql_queue)
-    size = size >> 1;
+    size >>= 1;
+
+    aligned_size = PAGE_ALIGN(size);


Why do you need a new variable for this? Can't you just update size 
to be page-aligned here? Is the unaligned size still needed anywhere?
amdgpu_gem_object_create ->...-> amdgpu_bo_create needs the original 
size for domain GWS etc, as the size is used as number of pages, not 
bytes.


I don't think GWS is ever allocated through this code path. This type 
of memory is not exposed in the KFD ioctl API. KFD allocates a GWS BO 
using amdgpu_amdkfd_alloc_gws.


yes, as size is used in pr_debug, it is better to show the original size 
in debug log to match application, also notice the pr_debug size should 
base on aql_queue flag, will send v4 patch with the fix.


Regards,

Philip



Regards,
  Felix






    (*mem)->alloc_flags = flags;
    amdgpu_sync_create(&(*mem)->sync);
  -    ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, flags);
+    ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags);
  if (ret) {
  pr_debug("Insufficient memory\n");
  goto err_reserve_limit;
@@ -1725,7 +1728,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
  /* Don't unreserve system mem limit twice */
  goto err_reserve_limit;
  err_bo_create:
-    amdgpu_amdkfd_unreserve_mem_limit(adev, size, flags);
+    amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags);
  err_reserve_limit:
  mutex_destroy(&(*mem)->lock);
  if (gobj)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c

index 6d291aa6386b..e11451100a20 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1127,8 +1127,11 @@ static int 
kfd_ioctl_alloc_memory_of_gpu(struct file *filep,

  }
    /* Update the VRAM usage count */
-    if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
-    WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + args->size);
+    if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+    if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)
+    args->size >>= 1;


This will return the updated size to user mode. That's probably not 
what you want. It may be harmless, but technically it breaks the 
ABI. It would be better to use a local variable for the updated size.


Submit v3 patch to fix this.

Thanks,

Philip



Regards,
  Felix


+ WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + 
PAGE_ALIGN(args->size));

+    }
    mutex_unlock(&p->mutex);

Re: [regression, bisected, pci/iommu] Bug 216865 - Black screen when amdgpu started during 6.2-rc1 boot with AMD IOMMU enabled

2023-01-10 Thread Matt Fagnani


Christian,

I'm attaching the output of sudo lspci -. I'm not sure what $bus_id 
is in this case. I guess it might be 00 in 00:00.0. I attached the dmesg 
from previous boots with 6.2-rc1 at 
https://bugzilla.kernel.org/show_bug.cgi?id=216865#c2 as I mentioned at 
https://lore.kernel.org/all/52583644-d875-a454-7288-8b00ea056...@bell.net/ 
and 6.2-rc2 + Vasant's patch with rd.driver.blacklist=amdgpu on the 
kernel command line at 
https://lore.kernel.org/all/ff26929d-9fb0-3c85-2594-dc2937c1b...@bell.net/ 
I'm using the Radeon R5 integrated GPU which is called Wani in lspci and 
Carrizo in dmesg. The CPU is AMD A10-9620P which is Bristol Ridge or 
Excavator+ according to 
https://en.wikipedia.org/wiki/List_of_AMD_accelerated_processing_units 
I'm using the internal Elan touchscreen in the laptop. I'm not using the 
HDMI port for an external monitor or audio which I think is called 
Kabini HDMI/DP Audio in lspci


Thanks,

Matt

On 1/10/23 08:56, Christian König wrote:

Am 10.01.23 um 14:51 schrieb Jason Gunthorpe:

On Tue, Jan 10, 2023 at 02:45:30PM +0100, Christian König wrote:

Since this is a device integrated in the CPU it could be that the 
ACS/ATS

functionalities are controlled by the BIOS and can be enabled/disabled
there. But this should always enable/disable both.

This sounds like a GPU driver bug then, it should tolerate PASID being
unavailable because of BIOS issues/whatever and not black screen on
boot?


Yeah, potentially. Could I get a full "sudo lspci - -s $bus_id" + 
dmesg of that device?


Thanks,
Christian.



Jason
00:00.0 Host bridge: Advanced Micro Devices, Inc. [AMD] Family 15h (Models 
60h-6fh) Processor Root Complex
Subsystem: Hewlett-Packard Company Device 8332
Control: I/O- Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- 
Stepping- SERR- FastB2B- DisINTx-
Status: Cap- 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- SERR- TAbort- SERR- 
Capabilities: [64] MSI: Enable+ Count=1/4 Maskable- 64bit+
Address: fee04004  Data: 0021
Capabilities: [74] HyperTransport: MSI Mapping Enable+ Fixed+

00:01.0 VGA compatible controller: Advanced Micro Devices, Inc. [AMD/ATI] Wani 
[Radeon R5/R6/R7 Graphics] (rev ca) (prog-if 00 [VGA controller])
DeviceName: ATI EG BROADWAY
Subsystem: Hewlett-Packard Company Device 8332
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- 
Stepping- SERR- FastB2B- DisINTx+
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- SERR- 
Capabilities: [50] Power Management version 3
Flags: PMEClk- DSI- D1+ D2+ AuxCurrent=0mA 
PME(D0-,D1+,D2+,D3hot+,D3cold-)
Status: D0 NoSoftRst- PME-Enable- DSel=0 DScale=0 PME-
Capabilities: [58] Express (v2) Root Complex Integrated Endpoint, MSI 00
DevCap: MaxPayload 256 bytes, PhantFunc 0
ExtTag+ RBE+ FLReset-
DevCtl: CorrErr- NonFatalErr- FatalErr- UnsupReq-
RlxdOrd+ ExtTag+ PhantFunc- AuxPwr- NoSnoop+
MaxPayload 256 bytes, MaxReadReq 512 bytes
DevSta: CorrErr- NonFatalErr- FatalErr- UnsupReq- AuxPwr- 
TransPend-
DevCap2: Completion Timeout: Not Supported, TimeoutDis- 
NROPrPrP- LTR-
 10BitTagComp- 10BitTagReq- OBFF Not Supported, ExtFmt+ 
EETLPPrefix+, MaxEETLPPrefixes 1
 EmergencyPowerReduction Not Supported, 
EmergencyPowerReductionInit-
 FRS-
 AtomicOpsCap: 32bit- 64bit- 128bitCAS-
DevCtl2: Completion Timeout: 50us to 50ms, TimeoutDis- LTR- 
10BitTagReq- OBFF Disabled,
 AtomicOpsCtl: ReqEn-
Capabilities: [a0] MSI: Enable+ Count=1/1 Maskable- 64bit+
Address: fee0  Data: 
Capabilities: [100 v1] Vendor Specific Information: ID=0001 Rev=1 
Len=010 
Capabilities: [270 v1] Secondary PCI Express
LnkCtl3: LnkEquIntrruptEn- PerformEqu-
LaneErrStat: 0
Capabilities: [2b0 v1] Address Translation Service (ATS)
ATSCap: Invalidate Queue Depth: 00
ATSCtl: Enable+, Smallest Translation Unit: 00
Capabilities: [2c0 v1] Page Request Interface (PRI)
PRICtl: Enable+ Reset-
PRISta: RF- UPRGI- Stopped+
Page Request Capacity: 0020, Page Request Allocation: 
0020
Capabilities: [2d0 v1] Process Address Space ID (PASID)
PASIDCap: Exec- Priv-, Max PASID Width: 10
PASIDCtl: Enable+ Exec- Priv-
Kernel driver in use: amdgpu
Kernel modules: amdgpu

00:01.1 Audio device: Advanced Micro Devices, Inc. [AMD/ATI] Kabini HDMI/DP 
Audio
Subsystem: Hewlett-Packard Company Device 8332
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- 
Stepping- SE

Re: [PATCH] drm/amdkfd: Page aligned memory reserve size

2023-01-10 Thread Felix Kuehling


Am 2023-01-10 um 15:44 schrieb Philip Yang:


On 2023-01-10 13:33, Felix Kuehling wrote:

Am 2023-01-10 um 12:11 schrieb Philip Yang:

Use page aligned size to reserve memory usage because page aligned TTM
BO size is used to unreserve memory usage, otherwise no page aligned
size causes memory usage accounting unbalanced.

Change vram_used definition type to int64_t to be able to trigger
WARN_ONCE(adev && adev->kfd.vram_used < 0, "..."), to help debug the
accouting issue with warning and backtrace.

Signed-off-by: Philip Yang 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h   | 2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 9 ++---
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 7 +--
  3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

index fb41869e357a..333780491867 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -97,7 +97,7 @@ struct amdgpu_amdkfd_fence {
    struct amdgpu_kfd_dev {
  struct kfd_dev *dev;
-    uint64_t vram_used;
+    int64_t vram_used;
  uint64_t vram_used_aligned;
  bool init_complete;
  struct work_struct reset_work;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

index 2a118669d0e3..7efee672bc41 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1598,6 +1598,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
  struct amdgpu_bo *bo;
  struct drm_gem_object *gobj = NULL;
  u32 domain, alloc_domain;
+    uint64_t aligned_size;
  u64 alloc_flags;
  int ret;
  @@ -1653,13 +1654,15 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
   * the memory.
   */
  if ((*mem)->aql_queue)
-    size = size >> 1;
+    size >>= 1;
+
+    aligned_size = PAGE_ALIGN(size);


Why do you need a new variable for this? Can't you just update size 
to be page-aligned here? Is the unaligned size still needed anywhere?
amdgpu_gem_object_create ->...-> amdgpu_bo_create needs the original 
size for domain GWS etc, as the size is used as number of pages, not 
bytes.


I don't think GWS is ever allocated through this code path. This type of 
memory is not exposed in the KFD ioctl API. KFD allocates a GWS BO using 
amdgpu_amdkfd_alloc_gws.


Regards,
  Felix






    (*mem)->alloc_flags = flags;
    amdgpu_sync_create(&(*mem)->sync);
  -    ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, flags);
+    ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags);
  if (ret) {
  pr_debug("Insufficient memory\n");
  goto err_reserve_limit;
@@ -1725,7 +1728,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
  /* Don't unreserve system mem limit twice */
  goto err_reserve_limit;
  err_bo_create:
-    amdgpu_amdkfd_unreserve_mem_limit(adev, size, flags);
+    amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags);
  err_reserve_limit:
  mutex_destroy(&(*mem)->lock);
  if (gobj)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c

index 6d291aa6386b..e11451100a20 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1127,8 +1127,11 @@ static int 
kfd_ioctl_alloc_memory_of_gpu(struct file *filep,

  }
    /* Update the VRAM usage count */
-    if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
-    WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + args->size);
+    if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+    if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)
+    args->size >>= 1;


This will return the updated size to user mode. That's probably not 
what you want. It may be harmless, but technically it breaks the ABI. 
It would be better to use a local variable for the updated size.


Submit v3 patch to fix this.

Thanks,

Philip



Regards,
  Felix


+    WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + 
PAGE_ALIGN(args->size));

+    }
    mutex_unlock(&p->mutex);

[PATCH v3] drm/amdkfd: Page aligned memory reserve size

2023-01-10 Thread Philip Yang

Use page aligned size to reserve memory usage because page aligned TTM
BO size is used to unreserve memory usage, otherwise no page aligned
size causes memory usage accounting unbalanced.

Change vram_used definition type to int64_t to be able to trigger
WARN_ONCE(adev && adev->kfd.vram_used < 0, "..."), to help debug the
accounting issue with warning and backtrace.

Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h   | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 9 ++---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 9 +++--
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index fb41869e357a..333780491867 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -97,7 +97,7 @@ struct amdgpu_amdkfd_fence {
 
 struct amdgpu_kfd_dev {
struct kfd_dev *dev;
-   uint64_t vram_used;
+   int64_t vram_used;
uint64_t vram_used_aligned;
bool init_complete;
struct work_struct reset_work;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index cd5de5e08d2f..08e2c1dc92d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1598,6 +1598,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct amdgpu_bo *bo;
struct drm_gem_object *gobj = NULL;
u32 domain, alloc_domain;
+   uint64_t aligned_size;
u64 alloc_flags;
int ret;
 
@@ -1653,13 +1654,15 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 * the memory.
 */
if ((*mem)->aql_queue)
-   size = size >> 1;
+   size >>= 1;
+
+   aligned_size = PAGE_ALIGN(size);
 
(*mem)->alloc_flags = flags;
 
amdgpu_sync_create(&(*mem)->sync);
 
-   ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, flags);
+   ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags);
if (ret) {
pr_debug("Insufficient memory\n");
goto err_reserve_limit;
@@ -1725,7 +1728,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
/* Don't unreserve system mem limit twice */
goto err_reserve_limit;
 err_bo_create:
-   amdgpu_amdkfd_unreserve_mem_limit(adev, size, flags);
+   amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags);
 err_reserve_limit:
mutex_destroy(&(*mem)->lock);
if (gobj)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 6d291aa6386b..f79b8e964140 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1127,8 +1127,13 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file 
*filep,
}
 
/* Update the VRAM usage count */
-   if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
-   WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + args->size);
+   if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+   uint64_t size = args->size;
+
+   if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)
+   size >>= 1;
+   WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + PAGE_ALIGN(size));
+   }
 
mutex_unlock(&p->mutex);
 
-- 
2.35.1

Re: [PATCH] drm/amdkfd: Page aligned memory reserve size

2023-01-10 Thread Philip Yang




On 2023-01-10 13:33, Felix Kuehling wrote:

Am 2023-01-10 um 12:11 schrieb Philip Yang:

Use page aligned size to reserve memory usage because page aligned TTM
BO size is used to unreserve memory usage, otherwise no page aligned
size causes memory usage accounting unbalanced.

Change vram_used definition type to int64_t to be able to trigger
WARN_ONCE(adev && adev->kfd.vram_used < 0, "..."), to help debug the
accouting issue with warning and backtrace.

Signed-off-by: Philip Yang 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h   | 2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 9 ++---
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 7 +--
  3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

index fb41869e357a..333780491867 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -97,7 +97,7 @@ struct amdgpu_amdkfd_fence {
    struct amdgpu_kfd_dev {
  struct kfd_dev *dev;
-    uint64_t vram_used;
+    int64_t vram_used;
  uint64_t vram_used_aligned;
  bool init_complete;
  struct work_struct reset_work;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

index 2a118669d0e3..7efee672bc41 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1598,6 +1598,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
  struct amdgpu_bo *bo;
  struct drm_gem_object *gobj = NULL;
  u32 domain, alloc_domain;
+    uint64_t aligned_size;
  u64 alloc_flags;
  int ret;
  @@ -1653,13 +1654,15 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
   * the memory.
   */
  if ((*mem)->aql_queue)
-    size = size >> 1;
+    size >>= 1;
+
+    aligned_size = PAGE_ALIGN(size);


Why do you need a new variable for this? Can't you just update size to 
be page-aligned here? Is the unaligned size still needed anywhere?
amdgpu_gem_object_create ->...-> amdgpu_bo_create needs the original 
size for domain GWS etc, as the size is used as number of pages, not bytes.




    (*mem)->alloc_flags = flags;
    amdgpu_sync_create(&(*mem)->sync);
  -    ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, flags);
+    ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags);
  if (ret) {
  pr_debug("Insufficient memory\n");
  goto err_reserve_limit;
@@ -1725,7 +1728,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
  /* Don't unreserve system mem limit twice */
  goto err_reserve_limit;
  err_bo_create:
-    amdgpu_amdkfd_unreserve_mem_limit(adev, size, flags);
+    amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags);
  err_reserve_limit:
  mutex_destroy(&(*mem)->lock);
  if (gobj)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c

index 6d291aa6386b..e11451100a20 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1127,8 +1127,11 @@ static int 
kfd_ioctl_alloc_memory_of_gpu(struct file *filep,

  }
    /* Update the VRAM usage count */
-    if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
-    WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + args->size);
+    if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+    if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)
+    args->size >>= 1;


This will return the updated size to user mode. That's probably not 
what you want. It may be harmless, but technically it breaks the ABI. 
It would be better to use a local variable for the updated size.


Submit v3 patch to fix this.

Thanks,

Philip



Regards,
  Felix


+    WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + 
PAGE_ALIGN(args->size));

+    }
    mutex_unlock(&p->mutex);

[PATCH v2] drm/amd/display: Calculate output_color_space after pixel encoding adjustment

2023-01-10 Thread Joshua Ashton

Code in get_output_color_space depends on knowing the pixel encoding to
determine whether to pick between eg. COLOR_SPACE_SRGB or
COLOR_SPACE_YCBCR709 for transparent RGB -> YCbCr 4:4:4 in the driver.

v2: Fixed patch being accidentally based on a personal feature branch, oops!

Signed-off-by: Joshua Ashton 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index b4d60eedbcbf..9da71ee8fcc4 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -5341,8 +5341,6 @@ static void fill_stream_properties_from_drm_display_mode(
 
timing_out->aspect_ratio = get_aspect_ratio(mode_in);
 
-   stream->output_color_space = get_output_color_space(timing_out);
-
stream->out_transfer_func->type = TF_TYPE_PREDEFINED;
stream->out_transfer_func->tf = TRANSFER_FUNCTION_SRGB;
if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) {
@@ -5353,6 +5351,8 @@ static void fill_stream_properties_from_drm_display_mode(
adjust_colour_depth_from_display_info(timing_out, info);
}
}
+
+   stream->output_color_space = get_output_color_space(timing_out);
 }
 
 static void fill_audio_info(struct audio_info *audio_info,
-- 
2.39.0

[PATCH] drm/amd/display: Calculate output_color_space after pixel encoding adjustment

2023-01-10 Thread Joshua Ashton

Code in get_output_color_space depends on knowing the pixel encoding to make 
determinations about whether to pick between eg. COLOR_SPACE_SRGB or 
COLOR_SPACE_YCBCR709 for transparent RGB -> YCbCr 4:4:4 in the driver.

Signed-off-by: Joshua Ashton 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 599adaab6c30..eb440b1bf1ac 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -5342,8 +5342,6 @@ static void fill_stream_properties_from_drm_display_mode(
 
timing_out->aspect_ratio = get_aspect_ratio(mode_in);
 
-   stream->output_color_space = get_output_color_space(timing_out, 
connector_state);
-
stream->out_transfer_func->type = TF_TYPE_PREDEFINED;
stream->out_transfer_func->tf = TRANSFER_FUNCTION_SRGB;
if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) {
@@ -5354,6 +5352,8 @@ static void fill_stream_properties_from_drm_display_mode(
adjust_colour_depth_from_display_info(timing_out, info);
}
}
+
+   stream->output_color_space = get_output_color_space(timing_out, 
connector_state);
 }
 
 static void fill_audio_info(struct audio_info *audio_info,
-- 
2.39.0

Re: [PATCH] drm/amd: Only load TA microcode for psp v12_0 once

2023-01-10 Thread Alex Deucher

On Tue, Jan 10, 2023 at 2:40 PM Limonciello, Mario
 wrote:
>
> [AMD Official Use Only - General]
>
>
>
> > -Original Message-
> > From: Alex Deucher 
> > Sent: Tuesday, January 10, 2023 13:29
> > To: Limonciello, Mario 
> > Cc: amd-gfx@lists.freedesktop.org
> > Subject: Re: [PATCH] drm/amd: Only load TA microcode for psp v12_0 once
> >
> > On Tue, Jan 10, 2023 at 2:16 PM Mario Limonciello
> >  wrote:
> > >
> > > During rebase from patch series accidentally ended up with two calls
> > > to load TA microcode for psp v12_0. Only one is needed, so remove the
> > > second.
> > >
> > > Fixes: f1efed401badb ("drm/amd: Parse both v1 and v2 TA microcode
> > headers using same function")
> > > Signed-off-by: Mario Limonciello 
> >
> > Reviewed-by: Alex Deucher 
> >
> > I'll squash this into f1efed401badb.
>
> You mean when you send it up for drm-next?  At least for amd-staging-drm-next 
> it
> should probably be it's own separate commit though still right?

Correct.

Alex


>
> >
> > Alex
> >
> > > ---
> > >  drivers/gpu/drm/amd/amdgpu/psp_v12_0.c | 3 ---
> > >  1 file changed, 3 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
> > b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
> > > index e82a0c2bf1faa..fcd708eae75cc 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
> > > @@ -55,9 +55,6 @@ static int psp_v12_0_init_microcode(struct
> > psp_context *psp)
> > > amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix,
> > sizeof(ucode_prefix));
> > >
> > > err = psp_init_asd_microcode(psp, ucode_prefix);
> > > -   if (err)
> > > -   return err;
> > > -   err = psp_init_ta_microcode(psp, ucode_prefix);
> > > if (err)
> > > return err;
> > >
> > > --
> > > 2.25.1
> > >

RE: [PATCH] drm/amd: Only load TA microcode for psp v12_0 once

2023-01-10 Thread Limonciello, Mario

[AMD Official Use Only - General]



> -Original Message-
> From: Alex Deucher 
> Sent: Tuesday, January 10, 2023 13:29
> To: Limonciello, Mario 
> Cc: amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amd: Only load TA microcode for psp v12_0 once
> 
> On Tue, Jan 10, 2023 at 2:16 PM Mario Limonciello
>  wrote:
> >
> > During rebase from patch series accidentally ended up with two calls
> > to load TA microcode for psp v12_0. Only one is needed, so remove the
> > second.
> >
> > Fixes: f1efed401badb ("drm/amd: Parse both v1 and v2 TA microcode
> headers using same function")
> > Signed-off-by: Mario Limonciello 
> 
> Reviewed-by: Alex Deucher 
> 
> I'll squash this into f1efed401badb.

You mean when you send it up for drm-next?  At least for amd-staging-drm-next it
should probably be it's own separate commit though still right?

> 
> Alex
> 
> > ---
> >  drivers/gpu/drm/amd/amdgpu/psp_v12_0.c | 3 ---
> >  1 file changed, 3 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
> b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
> > index e82a0c2bf1faa..fcd708eae75cc 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
> > @@ -55,9 +55,6 @@ static int psp_v12_0_init_microcode(struct
> psp_context *psp)
> > amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix,
> sizeof(ucode_prefix));
> >
> > err = psp_init_asd_microcode(psp, ucode_prefix);
> > -   if (err)
> > -   return err;
> > -   err = psp_init_ta_microcode(psp, ucode_prefix);
> > if (err)
> > return err;
> >
> > --
> > 2.25.1
> >

Re: [PATCH] drm/amd: Only load TA microcode for psp v12_0 once

2023-01-10 Thread Alex Deucher

On Tue, Jan 10, 2023 at 2:16 PM Mario Limonciello
 wrote:
>
> During rebase from patch series accidentally ended up with two calls
> to load TA microcode for psp v12_0. Only one is needed, so remove the
> second.
>
> Fixes: f1efed401badb ("drm/amd: Parse both v1 and v2 TA microcode headers 
> using same function")
> Signed-off-by: Mario Limonciello 

Reviewed-by: Alex Deucher 

I'll squash this into f1efed401badb.

Alex

> ---
>  drivers/gpu/drm/amd/amdgpu/psp_v12_0.c | 3 ---
>  1 file changed, 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c 
> b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
> index e82a0c2bf1faa..fcd708eae75cc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
> @@ -55,9 +55,6 @@ static int psp_v12_0_init_microcode(struct psp_context *psp)
> amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, 
> sizeof(ucode_prefix));
>
> err = psp_init_asd_microcode(psp, ucode_prefix);
> -   if (err)
> -   return err;
> -   err = psp_init_ta_microcode(psp, ucode_prefix);
> if (err)
> return err;
>
> --
> 2.25.1
>

[PATCH] drm/amd: Only load TA microcode for psp v12_0 once

2023-01-10 Thread Mario Limonciello

During rebase from patch series accidentally ended up with two calls
to load TA microcode for psp v12_0. Only one is needed, so remove the
second.

Fixes: f1efed401badb ("drm/amd: Parse both v1 and v2 TA microcode headers using 
same function")
Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/psp_v12_0.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
index e82a0c2bf1faa..fcd708eae75cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
@@ -55,9 +55,6 @@ static int psp_v12_0_init_microcode(struct psp_context *psp)
amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
 
err = psp_init_asd_microcode(psp, ucode_prefix);
-   if (err)
-   return err;
-   err = psp_init_ta_microcode(psp, ucode_prefix);
if (err)
return err;
 
-- 
2.25.1

[PATCH 6.0 108/148] drm/amdgpu: Fix size validation for non-exclusive domains (v4)

2023-01-10 Thread Greg Kroah-Hartman

From: Luben Tuikov 

[ Upstream commit 7554886daa31eacc8e7fac9e15bbce67d10b8f1f ]

Fix amdgpu_bo_validate_size() to check whether the TTM domain manager for the
requested memory exists, else we get a kernel oops when dereferencing "man".

v2: Make the patch standalone, i.e. not dependent on local patches.
v3: Preserve old behaviour and just check that the manager pointer is not
NULL.
v4: Complain if GTT domain requested and it is uninitialized--most likely a
bug.

Cc: Alex Deucher 
Cc: Christian König 
Cc: AMD Graphics 
Signed-off-by: Luben Tuikov 
Reviewed-by: Christian König 
Signed-off-by: Alex Deucher 
Signed-off-by: Sasha Levin 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 19 ---
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index bfe0fc258fc1..60ab2d952d5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -446,27 +446,24 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device 
*adev,
 
/*
 * If GTT is part of requested domains the check must succeed to
-* allow fall back to GTT
+* allow fall back to GTT.
 */
if (domain & AMDGPU_GEM_DOMAIN_GTT) {
man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
 
-   if (size < man->size)
+   if (man && size < man->size)
return true;
-   else
-   goto fail;
-   }
-
-   if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
+   else if (!man)
+   WARN_ON_ONCE("GTT domain requested but GTT mem manager 
uninitialized");
+   goto fail;
+   } else if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
 
-   if (size < man->size)
+   if (man && size < man->size)
return true;
-   else
-   goto fail;
+   goto fail;
}
 
-
/* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU */
return true;
 
-- 
2.35.1

[PATCH 6.1 113/159] drm/amdgpu: Fix size validation for non-exclusive domains (v4)

2023-01-10 Thread Greg Kroah-Hartman

From: Luben Tuikov 

[ Upstream commit 7554886daa31eacc8e7fac9e15bbce67d10b8f1f ]

Fix amdgpu_bo_validate_size() to check whether the TTM domain manager for the
requested memory exists, else we get a kernel oops when dereferencing "man".

v2: Make the patch standalone, i.e. not dependent on local patches.
v3: Preserve old behaviour and just check that the manager pointer is not
NULL.
v4: Complain if GTT domain requested and it is uninitialized--most likely a
bug.

Cc: Alex Deucher 
Cc: Christian König 
Cc: AMD Graphics 
Signed-off-by: Luben Tuikov 
Reviewed-by: Christian König 
Signed-off-by: Alex Deucher 
Signed-off-by: Sasha Levin 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 19 ---
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 3df13d841e4d..3be3cba3a16d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -446,27 +446,24 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device 
*adev,
 
/*
 * If GTT is part of requested domains the check must succeed to
-* allow fall back to GTT
+* allow fall back to GTT.
 */
if (domain & AMDGPU_GEM_DOMAIN_GTT) {
man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
 
-   if (size < man->size)
+   if (man && size < man->size)
return true;
-   else
-   goto fail;
-   }
-
-   if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
+   else if (!man)
+   WARN_ON_ONCE("GTT domain requested but GTT mem manager 
uninitialized");
+   goto fail;
+   } else if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
 
-   if (size < man->size)
+   if (man && size < man->size)
return true;
-   else
-   goto fail;
+   goto fail;
}
 
-
/* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU */
return true;
 
-- 
2.35.1

Re: [PATCH] drm/amdkfd: Page aligned memory reserve size

2023-01-10 Thread Felix Kuehling


Am 2023-01-10 um 12:11 schrieb Philip Yang:

Use page aligned size to reserve memory usage because page aligned TTM
BO size is used to unreserve memory usage, otherwise no page aligned
size causes memory usage accounting unbalanced.

Change vram_used definition type to int64_t to be able to trigger
WARN_ONCE(adev && adev->kfd.vram_used < 0, "..."), to help debug the
accouting issue with warning and backtrace.

Signed-off-by: Philip Yang 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h   | 2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 9 ++---
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 7 +--
  3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index fb41869e357a..333780491867 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -97,7 +97,7 @@ struct amdgpu_amdkfd_fence {
  
  struct amdgpu_kfd_dev {

struct kfd_dev *dev;
-   uint64_t vram_used;
+   int64_t vram_used;
uint64_t vram_used_aligned;
bool init_complete;
struct work_struct reset_work;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 2a118669d0e3..7efee672bc41 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1598,6 +1598,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct amdgpu_bo *bo;
struct drm_gem_object *gobj = NULL;
u32 domain, alloc_domain;
+   uint64_t aligned_size;
u64 alloc_flags;
int ret;
  
@@ -1653,13 +1654,15 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(

 * the memory.
 */
if ((*mem)->aql_queue)
-   size = size >> 1;
+   size >>= 1;
+
+   aligned_size = PAGE_ALIGN(size);


Why do you need a new variable for this? Can't you just update size to 
be page-aligned here? Is the unaligned size still needed anywhere?



  
  	(*mem)->alloc_flags = flags;
  
  	amdgpu_sync_create(&(*mem)->sync);
  
-	ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, flags);

+   ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags);
if (ret) {
pr_debug("Insufficient memory\n");
goto err_reserve_limit;
@@ -1725,7 +1728,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
/* Don't unreserve system mem limit twice */
goto err_reserve_limit;
  err_bo_create:
-   amdgpu_amdkfd_unreserve_mem_limit(adev, size, flags);
+   amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags);
  err_reserve_limit:
mutex_destroy(&(*mem)->lock);
if (gobj)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 6d291aa6386b..e11451100a20 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1127,8 +1127,11 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file 
*filep,
}
  
  	/* Update the VRAM usage count */

-   if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
-   WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + args->size);
+   if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+   if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)
+   args->size >>= 1;


This will return the updated size to user mode. That's probably not what 
you want. It may be harmless, but technically it breaks the ABI. It 
would be better to use a local variable for the updated size.


Regards,
  Felix



+   WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + 
PAGE_ALIGN(args->size));
+   }
  
  	mutex_unlock(&p->mutex);

[PATCH] drm/amdkfd: Page aligned memory reserve size

2023-01-10 Thread Philip Yang

Use page aligned size to reserve memory usage because page aligned TTM
BO size is used to unreserve memory usage, otherwise no page aligned
size causes memory usage accounting unbalanced.

Change vram_used definition type to int64_t to be able to trigger
WARN_ONCE(adev && adev->kfd.vram_used < 0, "..."), to help debug the
accouting issue with warning and backtrace.

Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h   | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 9 ++---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 7 +--
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index fb41869e357a..333780491867 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -97,7 +97,7 @@ struct amdgpu_amdkfd_fence {
 
 struct amdgpu_kfd_dev {
struct kfd_dev *dev;
-   uint64_t vram_used;
+   int64_t vram_used;
uint64_t vram_used_aligned;
bool init_complete;
struct work_struct reset_work;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 2a118669d0e3..7efee672bc41 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1598,6 +1598,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct amdgpu_bo *bo;
struct drm_gem_object *gobj = NULL;
u32 domain, alloc_domain;
+   uint64_t aligned_size;
u64 alloc_flags;
int ret;
 
@@ -1653,13 +1654,15 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 * the memory.
 */
if ((*mem)->aql_queue)
-   size = size >> 1;
+   size >>= 1;
+
+   aligned_size = PAGE_ALIGN(size);
 
(*mem)->alloc_flags = flags;
 
amdgpu_sync_create(&(*mem)->sync);
 
-   ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, flags);
+   ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags);
if (ret) {
pr_debug("Insufficient memory\n");
goto err_reserve_limit;
@@ -1725,7 +1728,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
/* Don't unreserve system mem limit twice */
goto err_reserve_limit;
 err_bo_create:
-   amdgpu_amdkfd_unreserve_mem_limit(adev, size, flags);
+   amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags);
 err_reserve_limit:
mutex_destroy(&(*mem)->lock);
if (gobj)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 6d291aa6386b..e11451100a20 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1127,8 +1127,11 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file 
*filep,
}
 
/* Update the VRAM usage count */
-   if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
-   WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + args->size);
+   if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+   if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)
+   args->size >>= 1;
+   WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + 
PAGE_ALIGN(args->size));
+   }
 
mutex_unlock(&p->mutex);
 
-- 
2.35.1

[PATCH 36/37] drm/amd/display: hdcp not enabled on connector 0

2023-01-10 Thread Rodrigo Siqueira

From: hersen wu 

[Why]
incorrect skip when drm_connector.index = 0 within
event_property_validate and update

[How] handle hdcp validate and update for connector 0

Reviewed-by: Bhawanpreet Lakha 
Acked-by: Rodrigo Siqueira 
Signed-off-by: hersen wu 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 7 ---
 1 file changed, 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
index 0301faaf5d48..8e572f07ec47 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
@@ -314,9 +314,6 @@ static void event_property_update(struct work_struct *work)
if (!aconnector)
continue;
 
-   if (!aconnector->base.index)
-   continue;
-
connector = &aconnector->base;
 
/* check if display connected */
@@ -390,13 +387,9 @@ static void event_property_validate(struct work_struct 
*work)
 conn_index++) {
aconnector = hdcp_work->aconnector[conn_index];
 
-
if (!aconnector)
continue;
 
-   if (!aconnector->base.index)
-   continue;
-
/* check if display connected */
if (aconnector->base.status != connector_status_connected)
continue;
-- 
2.39.0

[PATCH 37/37] drm/amd/display: 3.2.218

2023-01-10 Thread Rodrigo Siqueira

From: Aric Cyr 

This version brings along following fixes:

- Revert patches that caused regressions associated with audio and an
  old change that checks the DCN version.
- Refactor DDC and HDP.
- Move DPIA and DPCD logic to new files.
- Updates to DMUB.
- Optimization and bug fixes for SUBVP/DRR.
- Drop legacy code.

Reviewed-by: Rodrigo Siqueira 
Signed-off-by: Aric Cyr 
---
 drivers/gpu/drm/amd/display/dc/dc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
b/drivers/gpu/drm/amd/display/dc/dc.h
index 53f449fa2b71..22e754ad22c8 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -47,7 +47,7 @@ struct aux_payload;
 struct set_config_cmd_payload;
 struct dmub_notification;
 
-#define DC_VER "3.2.217"
+#define DC_VER "3.2.218"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
-- 
2.39.0

[PATCH 35/37] drm/amd/display: phase2 enable mst hdcp multiple displays

2023-01-10 Thread Rodrigo Siqueira

From: hersen wu 

[why]
For MST topology with 1 physical link and multiple connectors (>=2),
e.g. daisy cahined MST + SST, or 1-to-multi MST hub, if userspace
set to enable the HDCP simultaneously on all connected outputs, the
commit tail iteratively call the hdcp_update_display() for each
display (connector). However, the hdcp workqueue data structure for
each link has only one DM connector and encryption status members,
which means the work queue of property_validate/update() would only
be triggered for the last connector within this physical link, and
therefore the HDCP property value of other connectors would stay on
DESIRED instead of switching to ENABLED, which is NOT as expected.

[how]
Use array of AMDGPU_DM_MAX_DISPLAY_INDEX for both aconnector and
encryption status in hdcp workqueue data structure for each physical
link. For property validate/update work queue, we iterates over the
array and do similar operation/check for each connected display.

Signed-off-by: hersen wu 
---
 .../amd/display/amdgpu_dm/amdgpu_dm_hdcp.c| 160 +-
 .../amd/display/amdgpu_dm/amdgpu_dm_hdcp.h|   5 +-
 2 files changed, 122 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
index a7fd98f57f94..0301faaf5d48 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
@@ -170,9 +170,10 @@ void hdcp_update_display(struct hdcp_workqueue *hdcp_work,
struct mod_hdcp_display *display = &hdcp_work[link_index].display;
struct mod_hdcp_link *link = &hdcp_work[link_index].link;
struct mod_hdcp_display_query query;
+   unsigned int conn_index = aconnector->base.index;
 
mutex_lock(&hdcp_w->mutex);
-   hdcp_w->aconnector = aconnector;
+   hdcp_w->aconnector[conn_index] = aconnector;
 
query.display = NULL;
mod_hdcp_query_display(&hdcp_w->hdcp, aconnector->base.index, &query);
@@ -204,7 +205,7 @@ void hdcp_update_display(struct hdcp_workqueue *hdcp_work,
  
msecs_to_jiffies(DRM_HDCP_CHECK_PERIOD_MS));
} else {
display->adjust.disable = 
MOD_HDCP_DISPLAY_DISABLE_AUTHENTICATION;
-   hdcp_w->encryption_status = 
MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF;
+   hdcp_w->encryption_status[conn_index] = 
MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF;
cancel_delayed_work(&hdcp_w->property_validate_dwork);
}
 
@@ -223,9 +224,10 @@ static void hdcp_remove_display(struct hdcp_workqueue 
*hdcp_work,
 {
struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index];
struct drm_connector_state *conn_state = aconnector->base.state;
+   unsigned int conn_index = aconnector->base.index;
 
mutex_lock(&hdcp_w->mutex);
-   hdcp_w->aconnector = aconnector;
+   hdcp_w->aconnector[conn_index] = aconnector;
 
/* the removal of display will invoke auth reset -> hdcp destroy and
 * we'd expect the Content Protection (CP) property changed back to
@@ -247,13 +249,18 @@ static void hdcp_remove_display(struct hdcp_workqueue 
*hdcp_work,
 void hdcp_reset_display(struct hdcp_workqueue *hdcp_work, unsigned int 
link_index)
 {
struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index];
+   unsigned int conn_index;
 
mutex_lock(&hdcp_w->mutex);
 
mod_hdcp_reset_connection(&hdcp_w->hdcp,  &hdcp_w->output);
 
cancel_delayed_work(&hdcp_w->property_validate_dwork);
-   hdcp_w->encryption_status = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF;
+
+   for (conn_index = 0; conn_index < AMDGPU_DM_MAX_DISPLAY_INDEX; 
conn_index++) {
+   hdcp_w->encryption_status[conn_index] =
+   MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF;
+   }
 
process_output(hdcp_w);
 
@@ -290,49 +297,83 @@ static void event_callback(struct work_struct *work)
 
 
 }
+
 static void event_property_update(struct work_struct *work)
 {
-
struct hdcp_workqueue *hdcp_work = container_of(work, struct 
hdcp_workqueue, property_update_work);
-   struct amdgpu_dm_connector *aconnector = hdcp_work->aconnector;
-   struct drm_device *dev = hdcp_work->aconnector->base.dev;
+   struct amdgpu_dm_connector *aconnector = NULL;
+   struct drm_device *dev;
long ret;
+   unsigned int conn_index;
+   struct drm_connector *connector;
+   struct drm_connector_state *conn_state;
 
-   drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
-   mutex_lock(&hdcp_work->mutex);
+   for (conn_index = 0; conn_index < AMDGPU_DM_MAX_DISPLAY_INDEX; 
conn_index++) {
+   aconnector = hdcp_work->aconnector[conn_index];
 
+   if (!aconnector)
+   continue;
 
-   if (aconnector->base.state && aconnector->base.state->commit) {
-

[PATCH 32/37] drm/amd/display: Request min clocks after disabling pipes on init

2023-01-10 Thread Rodrigo Siqueira

From: Alvin Lee 

[Description]
- Request min clocks after disabling pipes on init
- This optimizes for power savings during init sequence
- Also handles the case where we boot up with no display connected

Reviewed-by: Jun Lei 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Alvin Lee 
---
 .../drm/amd/display/dc/dcn32/dcn32_hwseq.c| 21 +++
 1 file changed, 21 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
index 478412220360..766002619a9c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
@@ -685,6 +685,25 @@ void dcn32_program_mall_pipe_config(struct dc *dc, struct 
dc_state *context)
}
 }
 
+static void dcn32_initialize_min_clocks(struct dc *dc)
+{
+   struct dc_clocks *clocks = &dc->current_state->bw_ctx.bw.dcn.clk;
+
+   clocks->dcfclk_khz = 
dc->clk_mgr->bw_params->clk_table.entries[0].dcfclk_mhz * 1000;
+   clocks->socclk_khz = 
dc->clk_mgr->bw_params->clk_table.entries[0].socclk_mhz * 1000;
+   clocks->dramclk_khz = 
dc->clk_mgr->bw_params->clk_table.entries[0].memclk_mhz * 1000;
+   clocks->dppclk_khz = 
dc->clk_mgr->bw_params->clk_table.entries[0].dppclk_mhz * 1000;
+   clocks->dispclk_khz = 
dc->clk_mgr->bw_params->clk_table.entries[0].dispclk_mhz * 1000;
+   clocks->ref_dtbclk_khz = 
dc->clk_mgr->bw_params->clk_table.entries[0].dtbclk_mhz * 1000;
+   clocks->fclk_p_state_change_support = true;
+   clocks->p_state_change_support = true;
+
+   dc->clk_mgr->funcs->update_clocks(
+   dc->clk_mgr,
+   dc->current_state,
+   true);
+}
+
 void dcn32_init_hw(struct dc *dc)
 {
struct abm **abms = dc->res_pool->multiple_abms;
@@ -779,6 +798,8 @@ void dcn32_init_hw(struct dc *dc)
if (dc->res_pool->hubbub->funcs->allow_self_refresh_control)

dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub,

!dc->res_pool->hubbub->ctx->dc->debug.disable_stutter);
+
+   dcn32_initialize_min_clocks(dc);
}
 
/* In headless boot cases, DIG may be turned
-- 
2.39.0

[PATCH 34/37] drm/amd/display: Account for MPO planes in dcn32 mall alloc calculations

2023-01-10 Thread Rodrigo Siqueira

From: Dillon Varone 

[WHY?]
Cannot only consider the MALL required from top pipes because of the MPO
case.

[HOW?]
Only count a pipe if it fits the following criteria:
1) does not have a top pipe (is the topmost pipe for that plane)
2) it does have a top pipe, but that pipe is associated with a different
   plane

Reviewed-by: Nevenko Stupar 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Dillon Varone 
---
 drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 5b928f3b719d..7feb8759e475 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -1356,9 +1356,10 @@ static void dcn32_calculate_dlg_params(struct dc *dc, 
struct dc_state *context,
context->res_ctx.pipe_ctx[i].surface_size_in_mall_bytes = 
get_surface_size_in_mall(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
 
/* MALL Allocation Sizes */
-   /* count from active, top pipes only */
+   /* count from active, top pipes per plane only */
if (context->res_ctx.pipe_ctx[i].stream && 
context->res_ctx.pipe_ctx[i].plane_state &&
-   context->res_ctx.pipe_ctx[i].top_pipe == NULL &&
+   (context->res_ctx.pipe_ctx[i].top_pipe == NULL 
||
+   context->res_ctx.pipe_ctx[i].plane_state != 
context->res_ctx.pipe_ctx[i].top_pipe->plane_state) &&
context->res_ctx.pipe_ctx[i].prev_odm_pipe == 
NULL) {
/* SS: all active surfaces stored in MALL */
if 
(context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type != SUBVP_PHANTOM) 
{
-- 
2.39.0

[PATCH 33/37] drm/amd/display: Allow subvp on vactive pipes that are 2560x1440@60

2023-01-10 Thread Rodrigo Siqueira

From: Alvin Lee 

Enable subvp on specifically 1440p@60hz displays even though it can
switch in vactive.

Reviewed-by: Jun Lei 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Alvin Lee 
---
 .../drm/amd/display/dc/dcn32/dcn32_resource.h |  2 ++
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 31 ++-
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h
index 40cda0f4c12c..b07d3b0e6a5c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h
@@ -144,6 +144,8 @@ void dcn32_restore_mall_state(struct dc *dc,
struct dc_state *context,
struct mall_temp_config *temp_config);
 
+bool dcn32_allow_subvp_with_active_margin(struct pipe_ctx *pipe);
+
 /* definitions for run time init of reg offsets */
 
 /* CLK SRC */
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 2e22600ad5df..5b928f3b719d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -694,7 +694,9 @@ static bool dcn32_assign_subvp_pipe(struct dc *dc,
 */
if (pipe->plane_state && !pipe->top_pipe && 
!dcn32_is_center_timing(pipe) &&
pipe->stream->mall_stream_config.type == 
SUBVP_NONE && refresh_rate < 120 && !pipe->plane_state->address.tmz_surface &&
-   
vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]]
 <= 0) {
+   
(vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]]
 <= 0 ||
+   
(vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]]
 > 0 &&
+   
dcn32_allow_subvp_with_active_margin(pipe {
while (pipe) {
num_pipes++;
pipe = pipe->bottom_pipe;
@@ -2675,3 +2677,30 @@ void 
dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
 }
+
+bool dcn32_allow_subvp_with_active_margin(struct pipe_ctx *pipe)
+{
+   bool allow = false;
+   uint32_t refresh_rate = 0;
+
+   /* Allow subvp on displays that have active margin for 2560x1440@60hz 
displays
+* only for now. There must be no scaling as well.
+*
+* For now we only enable on 2560x1440@60hz displays to enable 4K60 + 
1440p60 configs
+* for p-state switching.
+*/
+   if (pipe->stream && pipe->plane_state) {
+   refresh_rate = (pipe->stream->timing.pix_clk_100hz * 100 +
+   pipe->stream->timing.v_total * 
pipe->stream->timing.h_total - 1)
+   / 
(double)(pipe->stream->timing.v_total * pipe->stream->timing.h_total);
+   if (pipe->stream->timing.v_addressable == 1440 &&
+   pipe->stream->timing.h_addressable == 2560 &&
+   refresh_rate >= 55 && refresh_rate <= 65 &&
+   pipe->plane_state->src_rect.height == 1440 &&
+   pipe->plane_state->src_rect.width == 2560 &&
+   pipe->plane_state->dst_rect.height == 1440 &&
+   pipe->plane_state->dst_rect.width == 2560)
+   allow = true;
+   }
+   return allow;
+}
-- 
2.39.0

[PATCH 29/37] drm/amd/display: contional remove disable dig_fifo when blank

2023-01-10 Thread Rodrigo Siqueira

From: Charlene Liu 

keep dig_fifo enable for test pattern generation.

Reviewed-by: Chris Park 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Charlene Liu 
---
 drivers/gpu/drm/amd/display/dc/dc.h| 1 +
 .../gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c  | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
b/drivers/gpu/drm/amd/display/dc/dc.h
index 72963617553e..53f449fa2b71 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -873,6 +873,7 @@ struct dc_debug_options {
unsigned int dsc_delay_factor_wa_x1000;
unsigned int min_prefetch_in_strobe_ns;
bool disable_unbounded_requesting;
+   bool dig_fifo_off_in_blank;
 };
 
 struct gpu_info_soc_bounding_box_v1_0;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c 
b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c
index 9c9875368bea..67f4589f3e23 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c
@@ -281,7 +281,8 @@ static void enc314_stream_encoder_dp_blank(
enc1_stream_encoder_dp_blank(link, enc);
 
/* Disable FIFO after the DP vid stream is disabled to avoid 
corruption. */
-   enc314_disable_fifo(enc);
+   if (enc->ctx->dc->debug.dig_fifo_off_in_blank)
+   enc314_disable_fifo(enc);
 }
 
 static void enc314_stream_encoder_dp_unblank(
-- 
2.39.0

[PATCH 31/37] drm/amd/display: fix mapping to non-allocated address

2023-01-10 Thread Rodrigo Siqueira

From: Brandon Syu 

[Why]
There is an issue mapping non-allocated location of memory.
It would allocate gpio registers from an array out of bounds.

[How]
Patch correct numbers of bounds for using.

Reviewed-by: Martin Leung 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Brandon Syu 
---
 .../gpu/drm/amd/display/dc/gpio/dcn20/hw_factory_dcn20.c   | 6 --
 .../gpu/drm/amd/display/dc/gpio/dcn30/hw_factory_dcn30.c   | 6 --
 .../gpu/drm/amd/display/dc/gpio/dcn32/hw_factory_dcn32.c   | 6 --
 drivers/gpu/drm/amd/display/dc/gpio/ddc_regs.h | 7 +++
 4 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn20/hw_factory_dcn20.c 
b/drivers/gpu/drm/amd/display/dc/gpio/dcn20/hw_factory_dcn20.c
index 9b63c6c0cc84..e0bd0c722e00 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/dcn20/hw_factory_dcn20.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn20/hw_factory_dcn20.c
@@ -138,7 +138,8 @@ static const struct ddc_sh_mask ddc_shift[] = {
DDC_MASK_SH_LIST_DCN2(__SHIFT, 3),
DDC_MASK_SH_LIST_DCN2(__SHIFT, 4),
DDC_MASK_SH_LIST_DCN2(__SHIFT, 5),
-   DDC_MASK_SH_LIST_DCN2(__SHIFT, 6)
+   DDC_MASK_SH_LIST_DCN2(__SHIFT, 6),
+   DDC_MASK_SH_LIST_DCN2_VGA(__SHIFT)
 };
 
 static const struct ddc_sh_mask ddc_mask[] = {
@@ -147,7 +148,8 @@ static const struct ddc_sh_mask ddc_mask[] = {
DDC_MASK_SH_LIST_DCN2(_MASK, 3),
DDC_MASK_SH_LIST_DCN2(_MASK, 4),
DDC_MASK_SH_LIST_DCN2(_MASK, 5),
-   DDC_MASK_SH_LIST_DCN2(_MASK, 6)
+   DDC_MASK_SH_LIST_DCN2(_MASK, 6),
+   DDC_MASK_SH_LIST_DCN2_VGA(_MASK)
 };
 
 #include "../generic_regs.h"
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn30/hw_factory_dcn30.c 
b/drivers/gpu/drm/amd/display/dc/gpio/dcn30/hw_factory_dcn30.c
index 687d4f128480..36a5736c58c9 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/dcn30/hw_factory_dcn30.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn30/hw_factory_dcn30.c
@@ -145,7 +145,8 @@ static const struct ddc_sh_mask ddc_shift[] = {
DDC_MASK_SH_LIST_DCN2(__SHIFT, 3),
DDC_MASK_SH_LIST_DCN2(__SHIFT, 4),
DDC_MASK_SH_LIST_DCN2(__SHIFT, 5),
-   DDC_MASK_SH_LIST_DCN2(__SHIFT, 6)
+   DDC_MASK_SH_LIST_DCN2(__SHIFT, 6),
+   DDC_MASK_SH_LIST_DCN2_VGA(__SHIFT)
 };
 
 static const struct ddc_sh_mask ddc_mask[] = {
@@ -154,7 +155,8 @@ static const struct ddc_sh_mask ddc_mask[] = {
DDC_MASK_SH_LIST_DCN2(_MASK, 3),
DDC_MASK_SH_LIST_DCN2(_MASK, 4),
DDC_MASK_SH_LIST_DCN2(_MASK, 5),
-   DDC_MASK_SH_LIST_DCN2(_MASK, 6)
+   DDC_MASK_SH_LIST_DCN2(_MASK, 6),
+   DDC_MASK_SH_LIST_DCN2_VGA(_MASK)
 };
 
 #include "../generic_regs.h"
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn32/hw_factory_dcn32.c 
b/drivers/gpu/drm/amd/display/dc/gpio/dcn32/hw_factory_dcn32.c
index 9fd8b269dd79..985f10b39750 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/dcn32/hw_factory_dcn32.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn32/hw_factory_dcn32.c
@@ -149,7 +149,8 @@ static const struct ddc_sh_mask ddc_shift[] = {
DDC_MASK_SH_LIST_DCN2(__SHIFT, 3),
DDC_MASK_SH_LIST_DCN2(__SHIFT, 4),
DDC_MASK_SH_LIST_DCN2(__SHIFT, 5),
-   DDC_MASK_SH_LIST_DCN2(__SHIFT, 6)
+   DDC_MASK_SH_LIST_DCN2(__SHIFT, 6),
+   DDC_MASK_SH_LIST_DCN2_VGA(__SHIFT)
 };
 
 static const struct ddc_sh_mask ddc_mask[] = {
@@ -158,7 +159,8 @@ static const struct ddc_sh_mask ddc_mask[] = {
DDC_MASK_SH_LIST_DCN2(_MASK, 3),
DDC_MASK_SH_LIST_DCN2(_MASK, 4),
DDC_MASK_SH_LIST_DCN2(_MASK, 5),
-   DDC_MASK_SH_LIST_DCN2(_MASK, 6)
+   DDC_MASK_SH_LIST_DCN2(_MASK, 6),
+   DDC_MASK_SH_LIST_DCN2_VGA(_MASK)
 };
 
 #include "../generic_regs.h"
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/ddc_regs.h 
b/drivers/gpu/drm/amd/display/dc/gpio/ddc_regs.h
index 308a543178a5..59884ef651b3 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/ddc_regs.h
+++ b/drivers/gpu/drm/amd/display/dc/gpio/ddc_regs.h
@@ -113,6 +113,13 @@
(PHY_AUX_CNTL__AUX## cd ##_PAD_RXSEL## mask_sh),\
(DC_GPIO_AUX_CTRL_5__DDC_PAD## cd ##_I2CMODE## mask_sh)}
 
+#define DDC_MASK_SH_LIST_DCN2_VGA(mask_sh) \
+   {DDC_MASK_SH_LIST_COMMON(mask_sh),\
+   0,\
+   0,\
+   0,\
+   0}
+
 struct ddc_registers {
struct gpio_registers gpio;
uint32_t ddc_setup;
-- 
2.39.0

[PATCH 30/37] drm/amd/display: Skip backlight control delay on external powered links

2023-01-10 Thread Rodrigo Siqueira

From: Tony Tascioglu 

[Why]
When an eDP panel is powered externally from a different GPU, we can avoid
waiting for hardware sequencing delays when switching the backlight on/off
as the display backlight is no longer powered by the original source.

[How]
This commit extends the 'link_powered_externally' variable to allow
bypassing hardware delays for additional backlight commands and force the
backlight on/off when a link is powered by another GPU.

Reviewed-by: Felipe Clark 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Tony Tascioglu 
---
 drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c   |  4 ++--
 drivers/gpu/drm/amd/display/dc/bios/command_table2.c | 12 ++--
 drivers/gpu/drm/amd/display/dc/bios/command_table2.h |  2 +-
 drivers/gpu/drm/amd/display/dc/dc_bios_types.h   |  2 +-
 .../drm/amd/display/dc/dce110/dce110_hw_sequencer.c  |  9 ++---
 drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h  |  2 +-
 6 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c 
b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
index 9f11dcf67c28..2c278b626256 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
@@ -1698,14 +1698,14 @@ static enum bp_result bios_parser_enable_lvtma_control(
struct dc_bios *dcb,
uint8_t uc_pwr_on,
uint8_t panel_instance,
-   uint8_t bypass_powerdown_wait)
+   uint8_t bypass_panel_control_wait)
 {
struct bios_parser *bp = BP_FROM_DCB(dcb);
 
if (!bp->cmd_tbl.enable_lvtma_control)
return BP_RESULT_FAILURE;
 
-   return bp->cmd_tbl.enable_lvtma_control(bp, uc_pwr_on, panel_instance, 
bypass_powerdown_wait);
+   return bp->cmd_tbl.enable_lvtma_control(bp, uc_pwr_on, panel_instance, 
bypass_panel_control_wait);
 }
 
 static bool bios_parser_is_accelerated_mode(
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c 
b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
index 91adebc5c5b7..1ef9e4053bb7 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
@@ -987,7 +987,7 @@ static enum bp_result enable_lvtma_control(
struct bios_parser *bp,
uint8_t uc_pwr_on,
uint8_t panel_instance,
-   uint8_t bypass_powerdown_wait);
+   uint8_t bypass_panel_control_wait);
 
 static void init_enable_lvtma_control(struct bios_parser *bp)
 {
@@ -1000,7 +1000,7 @@ static void enable_lvtma_control_dmcub(
struct dc_dmub_srv *dmcub,
uint8_t uc_pwr_on,
uint8_t panel_instance,
-   uint8_t bypass_powerdown_wait)
+   uint8_t bypass_panel_control_wait)
 {
 
union dmub_rb_cmd cmd;
@@ -1014,8 +1014,8 @@ static void enable_lvtma_control_dmcub(
uc_pwr_on;
cmd.lvtma_control.data.panel_inst =
panel_instance;
-   cmd.lvtma_control.data.bypass_powerdown_wait =
-   bypass_powerdown_wait;
+   cmd.lvtma_control.data.bypass_panel_control_wait =
+   bypass_panel_control_wait;
dc_dmub_srv_cmd_queue(dmcub, &cmd);
dc_dmub_srv_cmd_execute(dmcub);
dc_dmub_srv_wait_idle(dmcub);
@@ -1026,7 +1026,7 @@ static enum bp_result enable_lvtma_control(
struct bios_parser *bp,
uint8_t uc_pwr_on,
uint8_t panel_instance,
-   uint8_t bypass_powerdown_wait)
+   uint8_t bypass_panel_control_wait)
 {
enum bp_result result = BP_RESULT_FAILURE;
 
@@ -1035,7 +1035,7 @@ static enum bp_result enable_lvtma_control(
enable_lvtma_control_dmcub(bp->base.ctx->dmub_srv,
uc_pwr_on,
panel_instance,
-   bypass_powerdown_wait);
+   bypass_panel_control_wait);
return BP_RESULT_OK;
}
return result;
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.h 
b/drivers/gpu/drm/amd/display/dc/bios/command_table2.h
index acb7cc69f699..b6d09bf6cf72 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.h
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.h
@@ -97,7 +97,7 @@ struct cmd_tbl {
enum bp_result (*enable_lvtma_control)(struct bios_parser *bp,
uint8_t uc_pwr_on,
uint8_t panel_instance,
-   uint8_t bypass_powerdown_wait);
+   uint8_t bypass_panel_control_wait);
 };
 
 void dal_firmware_parser_init_cmd_tbl(struct bios_parser *bp);
diff --git a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h 
b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h
index 07d996c992ed..be9aa1a71847 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h
@@ -141,7 +141,7 @@ struct dc_vbios_funcs {
struct dc_bios *bi

[PATCH 28/37] drm/amd/display: Remove DISPCLK dentist programming for dcn32

2023-01-10 Thread Rodrigo Siqueira

From: Dillon Varone 

[WHY?]
For dcn32, SMU handles DISPCLK dentist programming.

[HOW?]
Only use calls to SMU to set DISPCLK.

Reviewed-by: Aric Cyr 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Dillon Varone 
---
 .../display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c  | 96 ++-
 1 file changed, 91 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c 
b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
index 791a788c52f2..352c977d1495 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
@@ -256,6 +256,94 @@ static void dcn32_update_dppclk_dispclk_freq(struct 
clk_mgr_internal *clk_mgr, s
}
 }
 
+static void dcn32_update_clocks_update_dentist(
+   struct clk_mgr_internal *clk_mgr,
+   struct dc_state *context,
+   uint32_t old_dispclk_khz)
+{
+   uint32_t new_disp_divider = 0;
+   uint32_t old_disp_divider = 0;
+   uint32_t new_dispclk_wdivider = 0;
+   uint32_t old_dispclk_wdivider = 0;
+   uint32_t i;
+
+   if (old_dispclk_khz == 0 || clk_mgr->base.clks.dispclk_khz == 0)
+   return;
+
+   new_disp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+   * clk_mgr->base.dentist_vco_freq_khz / 
clk_mgr->base.clks.dispclk_khz;
+   old_disp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+   * clk_mgr->base.dentist_vco_freq_khz / old_dispclk_khz;
+
+   new_dispclk_wdivider = dentist_get_did_from_divider(new_disp_divider);
+   old_dispclk_wdivider = dentist_get_did_from_divider(old_disp_divider);
+
+   /* When changing divider to or from 127, some extra programming is 
required to prevent corruption */
+   if (old_dispclk_wdivider == 127 && new_dispclk_wdivider != 127) {
+   for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; 
i++) {
+   struct pipe_ctx *pipe_ctx = 
&context->res_ctx.pipe_ctx[i];
+   uint32_t fifo_level;
+   struct dccg *dccg = 
clk_mgr->base.ctx->dc->res_pool->dccg;
+   struct stream_encoder *stream_enc = 
pipe_ctx->stream_res.stream_enc;
+   int32_t N;
+   int32_t j;
+
+   if (!pipe_ctx->stream)
+   continue;
+   /* Virtual encoders don't have this function */
+   if (!stream_enc->funcs->get_fifo_cal_average_level)
+   continue;
+   fifo_level = 
stream_enc->funcs->get_fifo_cal_average_level(
+   stream_enc);
+   N = fifo_level / 4;
+   dccg->funcs->set_fifo_errdet_ovr_en(
+   dccg,
+   true);
+   for (j = 0; j < N - 4; j++)
+   dccg->funcs->otg_drop_pixel(
+   dccg,
+   pipe_ctx->stream_res.tg->inst);
+   dccg->funcs->set_fifo_errdet_ovr_en(
+   dccg,
+   false);
+   }
+   } else if (new_dispclk_wdivider == 127 && old_dispclk_wdivider != 127) {
+   /* request clock with 126 divider first */
+   uint32_t temp_disp_divider = dentist_get_divider_from_did(126);
+   uint32_t temp_dispclk_khz = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR 
* clk_mgr->base.dentist_vco_freq_khz) / temp_disp_divider;
+
+   if (clk_mgr->smu_present)
+   dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DISPCLK, 
khz_to_mhz_ceil(temp_dispclk_khz));
+
+   for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; 
i++) {
+   struct pipe_ctx *pipe_ctx = 
&context->res_ctx.pipe_ctx[i];
+   struct dccg *dccg = 
clk_mgr->base.ctx->dc->res_pool->dccg;
+   struct stream_encoder *stream_enc = 
pipe_ctx->stream_res.stream_enc;
+   uint32_t fifo_level;
+   int32_t N;
+   int32_t j;
+
+   if (!pipe_ctx->stream)
+   continue;
+   /* Virtual encoders don't have this function */
+   if (!stream_enc->funcs->get_fifo_cal_average_level)
+   continue;
+   fifo_level = 
stream_enc->funcs->get_fifo_cal_average_level(
+   stream_enc);
+   N = fifo_level / 4;
+   dccg->funcs->set_fifo_errdet_ovr_en(dccg, true);
+   for (j = 0; j < 12 - N; j++)
+   dccg->funcs-

[PATCH 27/37] drm/amd/display: Add extra mblk for DCC

2023-01-10 Thread Rodrigo Siqueira

From: Saaem Rizvi 

[Why]
DCC meta was found to be detached from usable pixel data. Due to this
DCC meta and the end of the fetched part of the frame will be on not
be on the same mblk. Furthermore if the meta is not aligned to the mblk
size, then we require an extra mblk in MALL to account for this.

[How]
Always add an additional mblk when DCC is enabled for detachment and
misalignment.

Reviewed-by: Dillon Varone 
Reviewed-by: Jun Lei 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Saaem Rizvi 
---
 .../display/dc/dml/dcn32/display_mode_vba_util_32.c| 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git 
a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
index a089be99b2d0..0932f49cd819 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
@@ -1833,15 +1833,15 @@ void dml32_CalculateSurfaceSizeInMall(
}
if (DCCEnable[k] == true) {
SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
-   
dml_min(dml_ceil(DCCMetaPitchY[k], 8 * Read256BytesBlockWidthY[k]),
+   
(dml_min(dml_ceil(DCCMetaPitchY[k], 8 * Read256BytesBlockWidthY[k]),

dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 *

Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k])
- 
dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k]))
* 
dml_min(dml_ceil(SurfaceHeightY[k], 8 *

Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
ViewportHeightY[k] + 8 
* Read256BytesBlockHeightY[k] - 1, 8 *
-   
Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8
-   * 
Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256;
+   
Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8 *
+   
Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256) + (64 * 1024);
if (Read256BytesBlockWidthC[k] > 0) {
SurfaceSizeInMALL[k] = 
SurfaceSizeInMALL[k] +

dml_min(dml_ceil(DCCMetaPitchC[k], 8 *
@@ -1877,12 +1877,12 @@ void dml32_CalculateSurfaceSizeInMall(
}
if (DCCEnable[k] == true) {
SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
-   
dml_ceil(dml_min(DCCMetaPitchY[k], ViewportWidthY[k] + 8 *
+   
(dml_ceil(dml_min(DCCMetaPitchY[k], ViewportWidthY[k] + 8 *

Read256BytesBlockWidthY[k] - 1), 8 *

Read256BytesBlockWidthY[k]) *

dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 *

Read256BytesBlockHeightY[k] - 1), 8 *
-   
Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256;
+   
Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256) + (64 * 1024);
 
if (Read256BytesBlockWidthC[k] > 0) {
SurfaceSizeInMALL[k] = 
SurfaceSizeInMALL[k] +
-- 
2.39.0

[PATCH 25/37] drm/amd/display: Remove unused code

2023-01-10 Thread Rodrigo Siqueira

Remove some code that is never used from freesync file.

Signed-off-by: Rodrigo Siqueira 
---
 .../amd/display/modules/freesync/freesync.c   | 44 ---
 1 file changed, 44 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c 
b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
index c2e00f7b8381..e61cfce9f77c 100644
--- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
+++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
@@ -898,35 +898,9 @@ static void build_vrr_infopacket_v2(enum signal_type 
signal,
 
infopacket->valid = true;
 }
-#ifndef TRIM_FSFT
-static void build_vrr_infopacket_fast_transport_data(
-   bool ftActive,
-   unsigned int ftOutputRate,
-   struct dc_info_packet *infopacket)
-{
-   /* PB9 : bit7 - fast transport Active*/
-   unsigned char activeBit = (ftActive) ? 1 << 7 : 0;
-
-   infopacket->sb[1] &= ~activeBit;  //clear bit
-   infopacket->sb[1] |=  activeBit;  //set bit
-
-   /* PB13 : Target Output Pixel Rate [kHz] - bits 7:0  */
-   infopacket->sb[13] = ftOutputRate & 0xFF;
-
-   /* PB14 : Target Output Pixel Rate [kHz] - bits 15:8  */
-   infopacket->sb[14] = (ftOutputRate >> 8) & 0xFF;
-
-   /* PB15 : Target Output Pixel Rate [kHz] - bits 23:16  */
-   infopacket->sb[15] = (ftOutputRate >> 16) & 0xFF;
-
-}
-#endif
 
 static void build_vrr_infopacket_v3(enum signal_type signal,
const struct mod_vrr_params *vrr,
-#ifndef TRIM_FSFT
-   bool ftActive, unsigned int ftOutputRate,
-#endif
enum color_transfer_func app_tf,
struct dc_info_packet *infopacket)
 {
@@ -937,13 +911,6 @@ static void build_vrr_infopacket_v3(enum signal_type 
signal,
 
build_vrr_infopacket_fs2_data(app_tf, infopacket);
 
-#ifndef TRIM_FSFT
-   build_vrr_infopacket_fast_transport_data(
-   ftActive,
-   ftOutputRate,
-   infopacket);
-#endif
-
build_vrr_infopacket_checksum(&payload_size, infopacket);
 
infopacket->valid = true;
@@ -985,18 +952,7 @@ void mod_freesync_build_vrr_infopacket(struct mod_freesync 
*mod_freesync,
 
switch (packet_type) {
case PACKET_TYPE_FS_V3:
-#ifndef TRIM_FSFT
-   // always populate with pixel rate.
-   build_vrr_infopacket_v3(
-   stream->signal, vrr,
-   stream->timing.flags.FAST_TRANSPORT,
-   (stream->timing.flags.FAST_TRANSPORT) ?
-   
stream->timing.fast_transport_output_rate_100hz :
-   stream->timing.pix_clk_100hz,
-   app_tf, infopacket);
-#else
build_vrr_infopacket_v3(stream->signal, vrr, app_tf, 
infopacket);
-#endif
break;
case PACKET_TYPE_FS_V2:
build_vrr_infopacket_v2(stream->signal, vrr, app_tf, 
infopacket, stream->freesync_on_desktop);
-- 
2.39.0

[PATCH 26/37] drm/amd/display: set active bit for desktop with VSDBv3

2023-01-10 Thread Rodrigo Siqueira

From: Dillon Varone 

When using freesync on desktop, need to set freesync active bit for AMD
VSDBv3 infopacket.

Reviewed-by: Martin Leung 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Dillon Varone 
---
 .../amd/display/modules/freesync/freesync.c   | 20 +--
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c 
b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
index e61cfce9f77c..315da61ee897 100644
--- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
+++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
@@ -616,7 +616,8 @@ static void build_vrr_infopacket_data_v1(const struct 
mod_vrr_params *vrr,
 }
 
 static void build_vrr_infopacket_data_v3(const struct mod_vrr_params *vrr,
-   struct dc_info_packet *infopacket)
+   struct dc_info_packet *infopacket,
+   bool freesync_on_desktop)
 {
unsigned int min_refresh;
unsigned int max_refresh;
@@ -649,9 +650,15 @@ static void build_vrr_infopacket_data_v3(const struct 
mod_vrr_params *vrr,
infopacket->sb[6] |= 0x02;
 
/* PB6 = [Bit 2 = FreeSync Active] */
-   if (vrr->state == VRR_STATE_ACTIVE_VARIABLE ||
+   if (freesync_on_desktop) {
+   if (vrr->state != VRR_STATE_DISABLED &&
+   vrr->state != VRR_STATE_UNSUPPORTED)
+   infopacket->sb[6] |= 0x04;
+   } else {
+   if (vrr->state == VRR_STATE_ACTIVE_VARIABLE ||
vrr->state == VRR_STATE_ACTIVE_FIXED)
-   infopacket->sb[6] |= 0x04;
+   infopacket->sb[6] |= 0x04;
+   }
 
min_refresh = (vrr->min_refresh_in_uhz + 50) / 100;
max_refresh = (vrr->max_refresh_in_uhz + 50) / 100;
@@ -902,12 +909,13 @@ static void build_vrr_infopacket_v2(enum signal_type 
signal,
 static void build_vrr_infopacket_v3(enum signal_type signal,
const struct mod_vrr_params *vrr,
enum color_transfer_func app_tf,
-   struct dc_info_packet *infopacket)
+   struct dc_info_packet *infopacket,
+   bool freesync_on_desktop)
 {
unsigned int payload_size = 0;
 
build_vrr_infopacket_header_v3(signal, infopacket, &payload_size);
-   build_vrr_infopacket_data_v3(vrr, infopacket);
+   build_vrr_infopacket_data_v3(vrr, infopacket, freesync_on_desktop);
 
build_vrr_infopacket_fs2_data(app_tf, infopacket);
 
@@ -952,7 +960,7 @@ void mod_freesync_build_vrr_infopacket(struct mod_freesync 
*mod_freesync,
 
switch (packet_type) {
case PACKET_TYPE_FS_V3:
-   build_vrr_infopacket_v3(stream->signal, vrr, app_tf, 
infopacket);
+   build_vrr_infopacket_v3(stream->signal, vrr, app_tf, 
infopacket, stream->freesync_on_desktop);
break;
case PACKET_TYPE_FS_V2:
build_vrr_infopacket_v2(stream->signal, vrr, app_tf, 
infopacket, stream->freesync_on_desktop);
-- 
2.39.0

[PATCH 24/37] drm/amd/display: Optimize link power-down when link powered externally

2023-01-10 Thread Rodrigo Siqueira

From: Tony Tascioglu 

[Why]
When an eDP panel is powered externally by a different GPU, we don't need
to wait for hardware sequencing delays when powering down a link, as the
display is not dependent on the GPU being powered down.

[How]
This commit adds a variable 'link_powered_externally' to indicate when a
link is being powered by another GPU.

Reviewed-by: Felipe Clark 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Tony Tascioglu 
---
 drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c |  5 +++--
 .../gpu/drm/amd/display/dc/bios/command_table2.c   | 14 ++
 .../gpu/drm/amd/display/dc/bios/command_table2.h   |  3 ++-
 drivers/gpu/drm/amd/display/dc/dc_bios_types.h |  3 ++-
 drivers/gpu/drm/amd/display/dc/dc_link.h   |  2 ++
 .../amd/display/dc/dce110/dce110_hw_sequencer.c| 14 --
 drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h|  3 ++-
 7 files changed, 29 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c 
b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
index 8ca50c088858..9f11dcf67c28 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
@@ -1697,14 +1697,15 @@ static enum bp_result 
bios_parser_enable_disp_power_gating(
 static enum bp_result bios_parser_enable_lvtma_control(
struct dc_bios *dcb,
uint8_t uc_pwr_on,
-   uint8_t panel_instance)
+   uint8_t panel_instance,
+   uint8_t bypass_powerdown_wait)
 {
struct bios_parser *bp = BP_FROM_DCB(dcb);
 
if (!bp->cmd_tbl.enable_lvtma_control)
return BP_RESULT_FAILURE;
 
-   return bp->cmd_tbl.enable_lvtma_control(bp, uc_pwr_on, panel_instance);
+   return bp->cmd_tbl.enable_lvtma_control(bp, uc_pwr_on, panel_instance, 
bypass_powerdown_wait);
 }
 
 static bool bios_parser_is_accelerated_mode(
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c 
b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
index f52f7ff7ead4..91adebc5c5b7 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
@@ -986,7 +986,8 @@ static unsigned int get_smu_clock_info_v3_1(struct 
bios_parser *bp, uint8_t id)
 static enum bp_result enable_lvtma_control(
struct bios_parser *bp,
uint8_t uc_pwr_on,
-   uint8_t panel_instance);
+   uint8_t panel_instance,
+   uint8_t bypass_powerdown_wait);
 
 static void init_enable_lvtma_control(struct bios_parser *bp)
 {
@@ -998,7 +999,8 @@ static void init_enable_lvtma_control(struct bios_parser 
*bp)
 static void enable_lvtma_control_dmcub(
struct dc_dmub_srv *dmcub,
uint8_t uc_pwr_on,
-   uint8_t panel_instance)
+   uint8_t panel_instance,
+   uint8_t bypass_powerdown_wait)
 {
 
union dmub_rb_cmd cmd;
@@ -1012,6 +1014,8 @@ static void enable_lvtma_control_dmcub(
uc_pwr_on;
cmd.lvtma_control.data.panel_inst =
panel_instance;
+   cmd.lvtma_control.data.bypass_powerdown_wait =
+   bypass_powerdown_wait;
dc_dmub_srv_cmd_queue(dmcub, &cmd);
dc_dmub_srv_cmd_execute(dmcub);
dc_dmub_srv_wait_idle(dmcub);
@@ -1021,7 +1025,8 @@ static void enable_lvtma_control_dmcub(
 static enum bp_result enable_lvtma_control(
struct bios_parser *bp,
uint8_t uc_pwr_on,
-   uint8_t panel_instance)
+   uint8_t panel_instance,
+   uint8_t bypass_powerdown_wait)
 {
enum bp_result result = BP_RESULT_FAILURE;
 
@@ -1029,7 +1034,8 @@ static enum bp_result enable_lvtma_control(
bp->base.ctx->dc->debug.dmub_command_table) {
enable_lvtma_control_dmcub(bp->base.ctx->dmub_srv,
uc_pwr_on,
-   panel_instance);
+   panel_instance,
+   bypass_powerdown_wait);
return BP_RESULT_OK;
}
return result;
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.h 
b/drivers/gpu/drm/amd/display/dc/bios/command_table2.h
index be060b4b87db..acb7cc69f699 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.h
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.h
@@ -96,7 +96,8 @@ struct cmd_tbl {
struct bios_parser *bp, uint8_t id);
enum bp_result (*enable_lvtma_control)(struct bios_parser *bp,
uint8_t uc_pwr_on,
-   uint8_t panel_instance);
+   uint8_t panel_instance,
+   uint8_t bypass_powerdown_wait);
 };
 
 void dal_firmware_parser_init_cmd_tbl(struct bios_parser *bp);
diff --git a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h 
b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h
index 260ac4458870..07d996c992ed 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h
+++ b/drivers/

[PATCH 23/37] drm/amd/display: fix an error check condition for synced pipes

2023-01-10 Thread Rodrigo Siqueira

From: Aurabindo Pillai 

Checking for disabled master pipe on a timing synchronized pipe is
incorrect in the case of ODM combine. This case is acceptable as long as
the disabled master pipe is part of the ODM tree. Skip printing error
message if this condition holds true.

Reviewed-by: Dillon Varone 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Aurabindo Pillai 
---
 drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 15 +--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 91d56a38a829..a5b5f8592c1b 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -3821,9 +3821,20 @@ void check_syncd_pipes_for_disabled_master_pipe(struct 
dc *dc,
pipe_ctx_check = &context->res_ctx.pipe_ctx[i];
 
if ((GET_PIPE_SYNCD_FROM_PIPE(pipe_ctx_check) == 
disabled_master_pipe_idx) &&
-   IS_PIPE_SYNCD_VALID(pipe_ctx_check) && (i != 
disabled_master_pipe_idx))
+   IS_PIPE_SYNCD_VALID(pipe_ctx_check) && (i != 
disabled_master_pipe_idx)) {
+   struct pipe_ctx *first_pipe = pipe_ctx_check;
+
+   while (first_pipe->prev_odm_pipe)
+   first_pipe = first_pipe->prev_odm_pipe;
+   /* When ODM combine is enabled, this case is expected. 
If the disabled pipe
+* is part of the ODM tree, then we should not print an 
error.
+* */
+   if (first_pipe->pipe_idx == disabled_master_pipe_idx)
+   continue;
+
DC_ERR("DC: Failure: pipe_idx[%d] syncd with disabled 
master pipe_idx[%d]\n",
-   i, disabled_master_pipe_idx);
+  i, disabled_master_pipe_idx);
+   }
}
 }
 
-- 
2.39.0

[PATCH 20/37] drm/amd/display: move dp phy related logic to link_dp_phy

2023-01-10 Thread Rodrigo Siqueira

From: Wenjing Liu 

Reviewed-by: Wesley Chalmers 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Wenjing Liu 
---
 .../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c |   3 +-
 drivers/gpu/drm/amd/display/dc/core/dc_link.c |   3 +-
 .../gpu/drm/amd/display/dc/core/dc_link_dp.c  | 127 +--
 drivers/gpu/drm/amd/display/dc/dc_link.h  |   2 +
 .../display/dc/dce110/dce110_hw_sequencer.h   |   2 +-
 .../gpu/drm/amd/display/dc/inc/dc_link_dp.h   |  19 ---
 drivers/gpu/drm/amd/display/dc/link/Makefile  |   2 +-
 .../gpu/drm/amd/display/dc/link/link_dp_phy.c | 144 ++
 .../gpu/drm/amd/display/dc/link/link_dp_phy.h |  51 +++
 .../amd/display/dc/link/link_dp_training.c|   1 +
 .../dc/link/link_dp_training_128b_132b.c  |   1 +
 .../display/dc/link/link_dp_training_8b_10b.c |   1 +
 .../dc/link/link_dp_training_auxless.c|   2 +-
 .../link_dp_training_fixed_vs_pe_retimer.c|   1 +
 14 files changed, 210 insertions(+), 149 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/display/dc/link/link_dp_phy.c
 create mode 100644 drivers/gpu/drm/amd/display/dc/link/link_dp_phy.h

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
index ae54a9719910..704860e6ba84 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
@@ -35,6 +35,7 @@
 #include "resource.h"
 #include "dsc.h"
 #include "dc_link_dp.h"
+#include "dc_link.h"
 #include "link_hwss.h"
 #include "dc/dc_dmub_srv.h"
 
@@ -3395,7 +3396,7 @@ static int trigger_hpd_mst_set(void *data, u64 val)
continue;
 
link = aconnector->dc_link;
-   dp_receiver_power_ctrl(link, false);
+   dc_link_dp_receiver_power_ctrl(link, false);

drm_dp_mst_topology_mgr_set_mst(&aconnector->mst_port->mst_mgr, false);
link->mst_stream_alloc_table.stream_count = 0;
memset(link->mst_stream_alloc_table.stream_allocations, 
0,
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index 6a2e0d867853..957e1bc8981b 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -55,6 +55,7 @@
 #include "link/link_dp_trace.h"
 #include "link/link_hpd.h"
 #include "link/link_dp_training.h"
+#include "link/link_dp_phy.h"
 
 #include "dc/dcn30/dcn30_vpg.h"
 
@@ -2174,7 +2175,7 @@ void dc_link_blank_dp_stream(struct dc_link *link, bool 
hw_init)
}
 
if ((!link->wa_flags.dp_keep_receiver_powered) || hw_init)
-   dp_receiver_power_ctrl(link, false);
+   dc_link_dp_receiver_power_ctrl(link, false);
}
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index baf2e35d0ac6..cef89519c12e 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -45,6 +45,7 @@
 #include "link/link_dp_training_fixed_vs_pe_retimer.h"
 #include "link/link_dp_training_dpia.h"
 #include "link/link_dp_training_auxless.h"
+#include "link/link_dp_phy.h"
 #include "resource.h"
 #define DC_LOGGER \
link->ctx->logger
@@ -139,21 +140,6 @@ uint8_t dp_convert_to_count(uint8_t lttpr_repeater_count)
return 0; // invalid value
 }
 
-void dc_link_dp_set_drive_settings(
-   struct dc_link *link,
-   const struct link_resource *link_res,
-   struct link_training_settings *lt_settings)
-{
-   /* program ASIC PHY settings*/
-   dp_set_hw_lane_settings(link, link_res, lt_settings, DPRX);
-
-   dp_hw_to_dpcd_lane_settings(lt_settings,
-   lt_settings->hw_lane_settings, 
lt_settings->dpcd_lane_settings);
-
-   /* Notify DP sink the PHY settings from source */
-   dpcd_set_lane_settings(link, lt_settings, DPRX);
-}
-
 static enum clock_source_id get_clock_source_id(struct dc_link *link)
 {
enum clock_source_id dp_cs_id = CLOCK_SOURCE_ID_UNDEFINED;
@@ -2195,7 +2181,7 @@ static void dp_wa_power_up_0010FA(struct dc_link *link, 
uint8_t *dpcd_data,
 
if (!link->dpcd_caps.dpcd_rev.raw) {
do {
-   dp_receiver_power_ctrl(link, true);
+   dc_link_dp_receiver_power_ctrl(link, true);
core_link_read_dpcd(link, DP_DPCD_REV,
dpcd_data, length);
link->dpcd_caps.dpcd_rev.raw = dpcd_data[
@@ -4106,20 +4092,6 @@ void dc_link_clear_dprx_states(struct dc_link *link)
memset(&link->dprx_states, 0, sizeof(link->dprx_states));
 }
 
-void dp_receiver_power_ctrl(struct dc_link *link, bool on)
-{
-   uint8_t state;
-
-   state = on ? DP_POWER_STATE_D0 : DP_PO

[PATCH 22/37] Revert "drm/amd/display: Demote Error Level When ODM Transition Supported"

2023-01-10 Thread Rodrigo Siqueira

From: Aurabindo Pillai 

This reverts commit 08f4ff51e7667a473ae3d5435942a9dfda5953c4.

Reverting to put in a better solution which does not involve checking
DCN version.

Reviewed-by: Dillon Varone 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Aurabindo Pillai 
---
 drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 15 +++
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index efbfb880f390..91d56a38a829 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -3811,8 +3811,6 @@ void check_syncd_pipes_for_disabled_master_pipe(struct dc 
*dc,
int i;
struct pipe_ctx *pipe_ctx, *pipe_ctx_check;
 
-   DC_LOGGER_INIT(dc->ctx->logger);
-
pipe_ctx = &context->res_ctx.pipe_ctx[disabled_master_pipe_idx];
if ((GET_PIPE_SYNCD_FROM_PIPE(pipe_ctx) != disabled_master_pipe_idx) ||
!IS_PIPE_SYNCD_VALID(pipe_ctx))
@@ -3823,16 +3821,9 @@ void check_syncd_pipes_for_disabled_master_pipe(struct 
dc *dc,
pipe_ctx_check = &context->res_ctx.pipe_ctx[i];
 
if ((GET_PIPE_SYNCD_FROM_PIPE(pipe_ctx_check) == 
disabled_master_pipe_idx) &&
-   IS_PIPE_SYNCD_VALID(pipe_ctx_check) && (i != 
disabled_master_pipe_idx)) {
-   /* On dcn32, this error isn't fatal since hw supports 
odm transition in fast update*/
-   if (dc->ctx->dce_version == DCN_VERSION_3_2 ||
-   dc->ctx->dce_version == 
DCN_VERSION_3_21)
-   DC_LOG_DEBUG("DC: pipe_idx[%d] syncd with 
disabled master pipe_idx[%d]\n",
-   i, disabled_master_pipe_idx);
-   else
-   DC_ERR("DC: Failure: pipe_idx[%d] syncd with 
disabled master pipe_idx[%d]\n",
-   i, disabled_master_pipe_idx);
-   }
+   IS_PIPE_SYNCD_VALID(pipe_ctx_check) && (i != 
disabled_master_pipe_idx))
+   DC_ERR("DC: Failure: pipe_idx[%d] syncd with disabled 
master pipe_idx[%d]\n",
+   i, disabled_master_pipe_idx);
}
 }
 
-- 
2.39.0

[PATCH 18/37] drm/amd/display: Remove SubVp support if src/dst rect does not equal stream timing

2023-01-10 Thread Rodrigo Siqueira

From: Saaem Rizvi 

Current implementation of SubVP does not support cases where stream
timing matched neither the destination rect nor the source rect.

Will need to further debug to see how we can support these cases.

Reviewed-by: Alvin Lee 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Saaem Rizvi 
---
 .../gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
index 50f20549c951..0fc79d75ce76 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
@@ -240,6 +240,14 @@ bool dcn32_is_center_timing(struct pipe_ctx *pipe)
is_center_timing = true;
}
}
+
+   if (pipe->plane_state) {
+   if (pipe->stream->timing.v_addressable != 
pipe->plane_state->dst_rect.height &&
+   pipe->stream->timing.v_addressable != 
pipe->plane_state->src_rect.height) {
+   is_center_timing = true;
+   }
+   }
+
return is_center_timing;
 }
 
-- 
2.39.0

[PATCH 16/37] drm/amd/display: cleanup function args in dml

2023-01-10 Thread Rodrigo Siqueira

From: Dillon Varone 

Remove array size on array passed to CalculateDETSwathFillLatencyHiding.

Reviewed-by: Alvin Lee 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Dillon Varone 
---
 .../gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c | 2 +-
 .../gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git 
a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
index ba23b199afa6..a089be99b2d0 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
@@ -6254,7 +6254,7 @@ bool dml32_CalculateDETSwathFillLatencyHiding(unsigned 
int NumberOfActiveSurface
double  PixelClock[],
double  VRatioY[],
double  VRatioC[],
-   enum dm_use_mall_for_pstate_change_mode 
UsesMALLForPStateChange[DC__NUM_DPP__MAX])
+   enum dm_use_mall_for_pstate_change_mode 
UsesMALLForPStateChange[])
 {
int k;
double SwathSizeAllSurfaces = 0;
diff --git 
a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
index 5c7196d1ddef..d41c4d8b0c7a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
@@ -1160,6 +1160,6 @@ bool dml32_CalculateDETSwathFillLatencyHiding(unsigned 
int NumberOfActiveSurface
double  PixelClock[],
double  VRatioY[],
double  VRatioC[],
-   enum dm_use_mall_for_pstate_change_mode 
UsesMALLForPStateChange[DC__NUM_DPP__MAX]);
+   enum dm_use_mall_for_pstate_change_mode 
UsesMALLForPStateChange[]);
 
 #endif
-- 
2.39.0

[PATCH 17/37] drm/amd/display: Change i2c speed for hdcp

2023-01-10 Thread Rodrigo Siqueira

From: Bhawanpreet Lakha 

[why]
HDCP 1.4 failed on SL8800 SW w/a test driver use.

[how]
slower down the HW i2c speed when used by HW i2c.

Reviewed-by: Aurabindo Pillai 
Reviewed-by: Rodrigo Siqueira 
Signed-off-by: Bhawanpreet Lakha 
---
 drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c
index b4d5076e124c..dc0b49506275 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c
@@ -1776,7 +1776,7 @@ static bool dcn316_resource_construct(
pool->base.mpcc_count = pool->base.res_cap->num_timing_generator;
dc->caps.max_downscale_ratio = 600;
dc->caps.i2c_speed_in_khz = 100;
-   dc->caps.i2c_speed_in_khz_hdcp = 100;
+   dc->caps.i2c_speed_in_khz_hdcp = 5; /*1.5 w/a applied by default*/
dc->caps.max_cursor_size = 256;
dc->caps.min_horizontal_blanking_period = 80;
dc->caps.dmdata_alloc_size = 2048;
-- 
2.39.0

[PATCH 15/37] drm/amd/display: Use DML for MALL SS and Subvp allocation calculations

2023-01-10 Thread Rodrigo Siqueira

From: Dillon Varone 

MALL SS and Subvp use the same calculations for determining the size of
the required allocation for a given surface, which is already done in
DML. Add an interface to extract this information from VBA variables and
use in their respective helper functions. Also refactor existing code to
remove stale workarounds.

Reviewed-by: Alvin Lee 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Dillon Varone 
---
 .../drm/amd/display/dc/dcn32/dcn32_hwseq.c| 146 ++-
 .../drm/amd/display/dc/dcn32/dcn32_resource.h |  13 +-
 .../display/dc/dcn32/dcn32_resource_helpers.c | 175 --
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  |  29 ++-
 .../drm/amd/display/dc/dml/display_mode_vba.c |   1 +
 .../drm/amd/display/dc/dml/display_mode_vba.h |   1 +
 .../gpu/drm/amd/display/dc/inc/core_types.h   |   6 +-
 7 files changed, 134 insertions(+), 237 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
index f2cffb96ebf1..07362c66f023 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
@@ -207,151 +207,31 @@ static bool dcn32_check_no_memory_request_for_cab(struct 
dc *dc)
  */
 static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state 
*ctx)
 {
-   int i, j;
-   struct dc_stream_state *stream = NULL;
-   struct dc_plane_state *plane = NULL;
-   uint32_t cursor_size = 0;
-   uint32_t total_lines = 0;
-   uint32_t lines_per_way = 0;
+   int i;
uint8_t num_ways = 0;
-   uint8_t bytes_per_pixel = 0;
-   uint8_t cursor_bpp = 0;
-   uint16_t mblk_width = 0;
-   uint16_t mblk_height = 0;
-   uint16_t mall_alloc_width_blk_aligned = 0;
-   uint16_t mall_alloc_height_blk_aligned = 0;
-   uint16_t num_mblks = 0;
-   uint32_t bytes_in_mall = 0;
-   uint32_t cache_lines_used = 0;
-   uint32_t cache_lines_per_plane = 0;
-
-   for (i = 0; i < dc->res_pool->pipe_count; i++) {
-   struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
-
-   /* If PSR is supported on an eDP panel that's connected, but 
that panel is
-* not in PSR at the time of trying to enter MALL SS, we have 
to include it
-* in the static screen CAB calculation
-*/
-   if (!pipe->stream || !pipe->plane_state ||
-   (pipe->stream->link->psr_settings.psr_version 
!= DC_PSR_VERSION_UNSUPPORTED &&
-   
pipe->stream->link->psr_settings.psr_allow_active) ||
-   pipe->stream->mall_stream_config.type == 
SUBVP_PHANTOM)
-   continue;
-
-   bytes_per_pixel = pipe->plane_state->format >= 
SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4;
-   mblk_width = DCN3_2_MBLK_WIDTH;
-   mblk_height = bytes_per_pixel == 4 ? DCN3_2_MBLK_HEIGHT_4BPE : 
DCN3_2_MBLK_HEIGHT_8BPE;
-
-   /* full_vp_width_blk_aligned = FLOOR(vp_x_start + full_vp_width 
+ blk_width - 1, blk_width) -
-* FLOOR(vp_x_start, blk_width)
-*
-* mall_alloc_width_blk_aligned_l/c = 
full_vp_width_blk_aligned_l/c
-*/
-   mall_alloc_width_blk_aligned = 
((pipe->plane_res.scl_data.viewport.x +
-   pipe->plane_res.scl_data.viewport.width + 
mblk_width - 1) / mblk_width * mblk_width) -
-   
(pipe->plane_res.scl_data.viewport.x / mblk_width * mblk_width);
-
-   /* full_vp_height_blk_aligned = FLOOR(vp_y_start + 
full_vp_height + blk_height - 1, blk_height) -
-* FLOOR(vp_y_start, blk_height)
-*
-* mall_alloc_height_blk_aligned_l/c = 
full_vp_height_blk_aligned_l/c
-*/
-   mall_alloc_height_blk_aligned = 
((pipe->plane_res.scl_data.viewport.y +
-   pipe->plane_res.scl_data.viewport.height + 
mblk_height - 1) / mblk_height * mblk_height) -
-   
(pipe->plane_res.scl_data.viewport.y / mblk_height * mblk_height);
-
-   num_mblks = ((mall_alloc_width_blk_aligned + mblk_width - 1) / 
mblk_width) *
-   ((mall_alloc_height_blk_aligned + mblk_height - 
1) / mblk_height);
-
-   /*For DCC:
-* meta_num_mblk = 
CEILING(meta_pitch*full_vp_height*Bpe/256/mblk_bytes, 1)
-*/
-   if (pipe->plane_state->dcc.enable)
-   num_mblks += (pipe->plane_state->dcc.meta_pitch * 
pipe->plane_res.scl_data.viewport.height * bytes_per_pixel +
-   (256 * DCN3_2_MALL_MBLK_SIZE_BYTES) - 
1) / (256 * DCN3_2_MALL_MBLK_SIZE_BYTES);
+   uint32_t mall_ss_size_bytes = 0;
 
-   bytes_in_mall

[PATCH 14/37] drm/amd/display: Account for Subvp Phantoms in DML MALL surface calculations

2023-01-10 Thread Rodrigo Siqueira

From: Dillon Varone 

DML does not explicitly consider support for space in MALL required for
subvp phantom pipes. This adds a check to make sure portion of phantom
surface can fit in MALL.

Reviewed-by: Alvin Lee 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Dillon Varone 
---
 .../display/dc/dml/dcn32/display_mode_vba_32.c|  2 ++
 .../dc/dml/dcn32/display_mode_vba_util_32.c   | 15 +++
 .../dc/dml/dcn32/display_mode_vba_util_32.h   |  1 +
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
index 83765008dd5f..6c5ab5c26b38 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
@@ -387,6 +387,7 @@ static void 
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
mode_lib->vba.NumberOfActiveSurfaces,
mode_lib->vba.MALLAllocatedForDCNFinal,
mode_lib->vba.UseMALLForStaticScreen,
+   mode_lib->vba.UsesMALLForPStateChange,
mode_lib->vba.DCCEnable,
mode_lib->vba.ViewportStationary,
mode_lib->vba.ViewportXStartY,
@@ -2628,6 +2629,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct 
display_mode_lib *mode_l
mode_lib->vba.NumberOfActiveSurfaces,
mode_lib->vba.MALLAllocatedForDCNFinal,
mode_lib->vba.UseMALLForStaticScreen,
+   mode_lib->vba.UsesMALLForPStateChange,
mode_lib->vba.DCCEnable,
mode_lib->vba.ViewportStationary,
mode_lib->vba.ViewportXStartY,
diff --git 
a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
index 4279cd481de1..ba23b199afa6 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
@@ -1772,6 +1772,7 @@ void dml32_CalculateSurfaceSizeInMall(
unsigned int NumberOfActiveSurfaces,
unsigned int MALLAllocatedForDCN,
enum dm_use_mall_for_static_screen_mode 
UseMALLForStaticScreen[],
+   enum dm_use_mall_for_pstate_change_mode 
UsesMALLForPStateChange[],
bool DCCEnable[],
bool ViewportStationary[],
unsigned int ViewportXStartY[],
@@ -1803,8 +1804,10 @@ void dml32_CalculateSurfaceSizeInMall(
unsigned intSurfaceSizeInMALL[],
bool *ExceededMALLSize)
 {
-   unsigned int TotalSurfaceSizeInMALL  = 0;
unsigned int k;
+   unsigned int TotalSurfaceSizeInMALLForSS = 0;
+   unsigned int TotalSurfaceSizeInMALLForSubVP = 0;
+   unsigned int MALLAllocatedForDCNInBytes = MALLAllocatedForDCN * 1024 * 
1024;
 
for (k = 0; k < NumberOfActiveSurfaces; ++k) {
if (ViewportStationary[k]) {
@@ -1896,10 +1899,14 @@ void dml32_CalculateSurfaceSizeInMall(
}
 
for (k = 0; k < NumberOfActiveSurfaces; ++k) {
-   if (UseMALLForStaticScreen[k] == 
dm_use_mall_static_screen_enable)
-   TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + 
SurfaceSizeInMALL[k];
+   /* SS and Subvp counted separate as they are never used at the 
same time */
+   if (UsesMALLForPStateChange[k] == 
dm_use_mall_pstate_change_phantom_pipe)
+   TotalSurfaceSizeInMALLForSubVP = 
TotalSurfaceSizeInMALLForSubVP + SurfaceSizeInMALL[k];
+   else if (UseMALLForStaticScreen[k] == 
dm_use_mall_static_screen_enable)
+   TotalSurfaceSizeInMALLForSS = 
TotalSurfaceSizeInMALLForSS + SurfaceSizeInMALL[k];
}
-   *ExceededMALLSize =  (TotalSurfaceSizeInMALL > MALLAllocatedForDCN * 
1024 * 1024);
+   *ExceededMALLSize =  (TotalSurfaceSizeInMALLForSS > 
MALLAllocatedForDCNInBytes) ||
+   
(TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes);
 } // CalculateSurfaceSizeInMall
 
 void dml32_CalculateVMRowAndSwath(
diff --git 
a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
index 83edfcee8851..5c7196d1ddef 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
@@ -334,6 +334,7 @@ void dml32_CalculateSurfaceSizeInMall(
unsigned int NumberOfActiveSurfaces,
unsigned int MALLAllocatedForDCN,
enum dm_use_mall_for_static_screen_mo

[PATCH 12/37] drm/amd/display: Optimize subvp and drr validation

2023-01-10 Thread Rodrigo Siqueira

From: Dillon Varone 

Two issues existed:
1) Configs that support DRR, but have it disabled will fail subvp+vblank
validation incorrectly. Use subvp+vblank path for this case.
2) Configs that support DRR and have it enabled can use higher voltage level
than required if they also support subvp+vblank. Use lowest supported voltage
level for this case.

Reviewed-by: Alvin Lee 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Dillon Varone 
---
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 42 ---
 1 file changed, 26 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 025139a22e84..0c7ddd6f05b5 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -977,13 +977,12 @@ static bool subvp_vblank_schedulable(struct dc *dc, 
struct dc_state *context)
if (!subvp_pipe && pipe->stream->mall_stream_config.type == 
SUBVP_MAIN)
subvp_pipe = pipe;
}
-   // Use ignore_msa_timing_param flag to identify as DRR
-   if (found && 
context->res_ctx.pipe_ctx[vblank_index].stream->ignore_msa_timing_param) {
-   // SUBVP + DRR case -- don't enable SubVP + DRR for HDMI VRR 
cases
-   if 
(context->res_ctx.pipe_ctx[vblank_index].stream->allow_freesync)
-   schedulable = subvp_drr_schedulable(dc, context, 
&context->res_ctx.pipe_ctx[vblank_index]);
-   else
-   schedulable = false;
+   // Use ignore_msa_timing_param and VRR active, or Freesync flag to 
identify as DRR On
+   if (found && 
context->res_ctx.pipe_ctx[vblank_index].stream->ignore_msa_timing_param &&
+   
(context->res_ctx.pipe_ctx[vblank_index].stream->allow_freesync ||
+   
context->res_ctx.pipe_ctx[vblank_index].stream->vrr_active_variable)) {
+   // SUBVP + DRR case -- only allowed if run through DRR 
validation path
+   schedulable = false;
} else if (found) {
main_timing = &subvp_pipe->stream->timing;
phantom_timing = 
&subvp_pipe->stream->mall_stream_config.paired_stream->timing;
@@ -1087,12 +1086,12 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
 {
struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
unsigned int dc_pipe_idx = 0;
+   int i = 0;
bool found_supported_config = false;
struct pipe_ctx *pipe = NULL;
uint32_t non_subvp_pipes = 0;
bool drr_pipe_found = false;
uint32_t drr_pipe_index = 0;
-   uint32_t i = 0;
 
dc_assert_fp_enabled();
 
@@ -1186,11 +1185,11 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,

vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] != 
dm_dram_clock_change_unsupported
&& subvp_validate_static_schedulability(dc, 
context, *vlevel)) {
found_supported_config = true;
-   } else if (*vlevel < context->bw_ctx.dml.soc.num_states 
&&
-   
vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == 
dm_dram_clock_change_unsupported) {
-   /* Case where 1 SubVP is added, and DML reports 
MCLK unsupported. This handles
-* the case for SubVP + DRR, where the DRR 
display does not support MCLK switch
-* at it's native refresh rate / timing.
+   } else if (*vlevel < 
context->bw_ctx.dml.soc.num_states) {
+   /* Case where 1 SubVP is added, and DML reports 
MCLK unsupported or DRR is allowed.
+* This handles the case for SubVP + DRR, where 
the DRR display does not support MCLK
+* switch at it's native refresh rate / timing, 
or DRR is allowed for the non-subvp
+* display.
 */
for (i = 0; i < dc->res_pool->pipe_count; i++) {
pipe = &context->res_ctx.pipe_ctx[i];
@@ -1207,6 +1206,15 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
// If there is only 1 remaining non SubVP pipe 
that is DRR, check static
// schedulability for SubVP + DRR.
if (non_subvp_pipes == 1 && drr_pipe_found) {
+   /* find lowest vlevel that supports the 
config */
+   for (i = *vlevel; i >= 0; i--) {
+   if 
(vba->ModeSupport[i][vba->maxMpcComb]) {
+   *vlevel = i;
+

[PATCH 13/37] drm/amd/display: Account for DCC Meta pitch in DML MALL surface calculations

2023-01-10 Thread Rodrigo Siqueira

From: Dillon Varone 

DML incorrectly uses surface width for determining DCC meta size in MALL
allocation calculations.  Meta pitch should be used instead.

Reviewed-by: Alvin Lee 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Dillon Varone 
---
 .../drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c |  4 
 .../display/dc/dml/dcn32/display_mode_vba_util_32.c| 10 ++
 .../display/dc/dml/dcn32/display_mode_vba_util_32.h|  2 ++
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
index 4b8f5fa0f0ad..83765008dd5f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
@@ -411,6 +411,8 @@ static void 
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
v->BlockWidthC,
v->BlockHeightY,
v->BlockHeightC,
+   mode_lib->vba.DCCMetaPitchY,
+   mode_lib->vba.DCCMetaPitchC,
 
/* Output */
v->SurfaceSizeInMALL,
@@ -2650,6 +2652,8 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct 
display_mode_lib *mode_l
mode_lib->vba.MacroTileWidthC,
mode_lib->vba.MacroTileHeightY,
mode_lib->vba.MacroTileHeightC,
+   mode_lib->vba.DCCMetaPitchY,
+   mode_lib->vba.DCCMetaPitchC,
 
/* Output */
mode_lib->vba.SurfaceSizeInMALL,
diff --git 
a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
index b53feeaf5cf1..4279cd481de1 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
@@ -1796,6 +1796,8 @@ void dml32_CalculateSurfaceSizeInMall(
unsigned int ReadBlockWidthC[],
unsigned int ReadBlockHeightY[],
unsigned int ReadBlockHeightC[],
+   unsigned int DCCMetaPitchY[],
+   unsigned int DCCMetaPitchC[],
 
/* Output */
unsigned intSurfaceSizeInMALL[],
@@ -1828,7 +1830,7 @@ void dml32_CalculateSurfaceSizeInMall(
}
if (DCCEnable[k] == true) {
SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
-   
dml_min(dml_ceil(SurfaceWidthY[k], 8 * Read256BytesBlockWidthY[k]),
+   
dml_min(dml_ceil(DCCMetaPitchY[k], 8 * Read256BytesBlockWidthY[k]),

dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 *

Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k])
- 
dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k]))
@@ -1839,7 +1841,7 @@ void dml32_CalculateSurfaceSizeInMall(
* 
Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256;
if (Read256BytesBlockWidthC[k] > 0) {
SurfaceSizeInMALL[k] = 
SurfaceSizeInMALL[k] +
-   
dml_min(dml_ceil(SurfaceWidthC[k], 8 *
+   
dml_min(dml_ceil(DCCMetaPitchC[k], 8 *

Read256BytesBlockWidthC[k]),

dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8
* 
Read256BytesBlockWidthC[k] - 1, 8 *
@@ -1872,7 +1874,7 @@ void dml32_CalculateSurfaceSizeInMall(
}
if (DCCEnable[k] == true) {
SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
-   
dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 8 *
+   
dml_ceil(dml_min(DCCMetaPitchY[k], ViewportWidthY[k] + 8 *

Read256BytesBlockWidthY[k] - 1), 8 *

Read256BytesBlockWidthY[k]) *

dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 *
@@ -1881,7 +1883,7 @@ void dml32_CalculateSurfaceSizeInMall(
 
if (Read256BytesBlockWidthC[k] >

[PATCH 11/37] drm/amd/display: add hubbub_init related

2023-01-10 Thread Rodrigo Siqueira

From: Charlene Liu 

Required by display init, also update get_dig_mode

Reviewed-by: Hansen Dsouza 
Reviewed-by: Duncan Ma 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Charlene Liu 
---
 .../drm/amd/display/dc/dcn10/dcn10_hubbub.h   | 12 +++-
 .../drm/amd/display/dc/dcn31/dcn31_hubbub.c   | 18 
 .../drm/amd/display/dc/dcn31/dcn31_hubbub.h   | 10 ++-
 .../drm/amd/display/dc/dcn32/dcn32_hubbub.c   | 29 +++
 .../drm/amd/display/dc/dcn32/dcn32_hubbub.h   | 17 +--
 .../gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c |  6 +++-
 .../gpu/drm/amd/display/dc/inc/hw/dchubbub.h  |  1 +
 7 files changed, 88 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h 
b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h
index ba1c0621f0f8..e8752077571a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h
@@ -172,6 +172,10 @@ struct dcn_hubbub_registers {
uint32_t DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_C;
uint32_t DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_D;
uint32_t SDPIF_REQUEST_RATE_LIMIT;
+   uint32_t DCHUBBUB_SDPIF_CFG0;
+   uint32_t DCHUBBUB_SDPIF_CFG1;
+   uint32_t DCHUBBUB_CLOCK_CNTL;
+   uint32_t DCHUBBUB_MEM_PWR_MODE_CTRL;
 };
 
 #define HUBBUB_REG_FIELD_LIST_DCN32(type) \
@@ -362,7 +366,13 @@ struct dcn_hubbub_registers {
type DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_C;\
type DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_D;\
type DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_D;\
-   type SDPIF_REQUEST_RATE_LIMIT
+   type SDPIF_REQUEST_RATE_LIMIT;\
+   type DISPCLK_R_DCHUBBUB_GATE_DIS;\
+   type DCFCLK_R_DCHUBBUB_GATE_DIS;\
+   type SDPIF_MAX_NUM_OUTSTANDING;\
+   type DCHUBBUB_ARB_MAX_REQ_OUTSTAND;\
+   type SDPIF_PORT_CONTROL;\
+   type DET_MEM_PWR_LS_MODE
 
 
 struct dcn_hubbub_shift {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c 
b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c
index 6360dc9502e7..7e7cd5b64e6a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c
@@ -1008,6 +1008,24 @@ static bool 
hubbub31_verify_allow_pstate_change_high(struct hubbub *hubbub)
return false;
 }
 
+void hubbub31_init(struct hubbub *hubbub)
+{
+   struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
+
+   /*Enable clock gate*/
+   if (hubbub->ctx->dc->debug.disable_clock_gate) {
+   /*done in hwseq*/
+   /*REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0);*/
+   REG_UPDATE_2(DCHUBBUB_CLOCK_CNTL,
+   DISPCLK_R_DCHUBBUB_GATE_DIS, 0,
+   DCFCLK_R_DCHUBBUB_GATE_DIS, 0);
+   }
+
+   /*
+   only the DCN will determine when to connect the SDP port
+   */
+   REG_UPDATE(DCHUBBUB_SDPIF_CFG0, SDPIF_PORT_CONTROL, 1);
+}
 static const struct hubbub_funcs hubbub31_funcs = {
.update_dchub = hubbub2_update_dchub,
.init_dchub_sys_ctx = hubbub31_init_dchub_sys_ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.h 
b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.h
index 70c60de448ac..e015e5a6c866 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.h
@@ -42,6 +42,10 @@
SR(DCHUBBUB_COMPBUF_CTRL),\
SR(COMPBUF_RESERVED_SPACE),\
SR(DCHUBBUB_DEBUG_CTRL_0),\
+   SR(DCHUBBUB_CLOCK_CNTL),\
+   SR(DCHUBBUB_SDPIF_CFG0),\
+   SR(DCHUBBUB_SDPIF_CFG1),\
+   SR(DCHUBBUB_MEM_PWR_MODE_CTRL),\
SR(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_A),\
SR(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_A),\
SR(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_B),\
@@ -120,7 +124,11 @@
HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_VMID, mask_sh), \
HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_TABLE_LEVEL, mask_sh), \
HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_PIPE, mask_sh), \
-   HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_INTERRUPT_STATUS, mask_sh)
+   HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_INTERRUPT_STATUS, mask_sh),\
+   HUBBUB_SF(DCHUBBUB_CLOCK_CNTL, DISPCLK_R_DCHUBBUB_GATE_DIS, mask_sh),\
+   HUBBUB_SF(DCHUBBUB_CLOCK_CNTL, DCFCLK_R_DCHUBBUB_GATE_DIS, mask_sh),\
+   HUBBUB_SF(DCHUBBUB_SDPIF_CFG0, SDPIF_PORT_CONTROL, mask_sh),\
+   HUBBUB_SF(DCHUBBUB_MEM_PWR_MODE_CTRL, DET_MEM_PWR_LS_MODE, mask_sh)
 
 int hubbub31_init_dchub_sys_ctx(struct hubbub *hubbub,
struct dcn_hubbub_phys_addr_config *pa_config);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c
index 9501403a48a9..eb08ccc38e79 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c
+++ b

[PATCH 10/37] drm/amd/display: Update dmub header to match DMUB

2023-01-10 Thread Rodrigo Siqueira

From: Mustapha Ghaddar 

[WHY]
Last PR missed name of a struct to match in DMUB

[HOW]
Update the logic in dmub_cmh.h header

Reviewed-by: Meenakshikumar Somasundaram 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Mustapha Ghaddar 
---
 drivers/gpu/drm/amd/display/dmub/dmub_srv.h   |  16 +-
 .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h   | 165 +-
 .../drm/amd/display/dmub/src/dmub_srv_stat.c  |  20 +--
 3 files changed, 109 insertions(+), 92 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h 
b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
index c8274967de94..a391b939d709 100644
--- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
@@ -130,6 +130,17 @@ enum dmub_notification_type {
DMUB_NOTIFICATION_MAX
 };
 
+/**
+ * DPIA NOTIFICATION Response Type
+ */
+enum dpia_notify_bw_alloc_status {
+
+   DPIA_BW_REQ_FAILED = 0,
+   DPIA_BW_REQ_SUCCESS,
+   DPIA_EST_BW_CHANGED,
+   DPIA_BW_ALLOC_CAPS_CHANGED
+};
+
 /**
  * struct dmub_region - dmub hw memory region
  * @base: base address for region, must be 256 byte aligned
@@ -465,7 +476,10 @@ struct dmub_notification {
struct aux_reply_data aux_reply;
enum dp_hpd_status hpd_status;
enum set_config_status sc_status;
-   struct dpia_notification_reply_data bw_alloc_reply;
+   /**
+* DPIA notification command.
+*/
+   struct dmub_rb_cmd_dpia_notification dpia_notification;
};
 };
 
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h 
b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index 4dcd82d19ccf..328978ec6814 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -770,7 +770,10 @@ enum dmub_out_cmd_type {
 * Command type used for SET_CONFIG Reply notification
 */
DMUB_OUT_CMD__SET_CONFIG_REPLY = 3,
-   DMUB_OUT_CMD__DPIA_NOTIFICATION = 5
+   /**
+* Command type used for USB4 DPIA notification
+*/
+   DMUB_OUT_CMD__DPIA_NOTIFICATION = 5,
 };
 
 /* DMUB_CMD__DPIA command sub-types. */
@@ -780,6 +783,11 @@ enum dmub_cmd_dpia_type {
DMUB_CMD__DPIA_MST_ALLOC_SLOTS = 2,
 };
 
+/* DMUB_OUT_CMD__DPIA_NOTIFICATION command types. */
+enum dmub_cmd_dpia_notification_type {
+   DPIA_NOTIFY__BW_ALLOCATION = 0,
+};
+
 #pragma pack(push, 1)
 
 /**
@@ -1517,84 +1525,6 @@ struct dp_hpd_data {
uint8_t pad;
 };
 
-/**
- * DPIA NOTIFICATION Response Type
- */
-enum dpia_notify_bw_alloc_status {
-
-   DPIA_BW_REQ_FAILED = 0,
-   DPIA_BW_REQ_SUCCESS,
-   DPIA_EST_BW_CHANGED,
-   DPIA_BW_ALLOC_CAPS_CHANGED
-};
-
-/* DMUB_OUT_CMD__DPIA_NOTIFY Reply command - OutBox Cmd */
-/**
- * Data passed to driver from FW in a DMUB_OUT_CMD__DPIA_NOTIFY command.
- */
-struct dpia_notification_reply_data {
-   uint8_t allocated_bw;
-   uint8_t estimated_bw;
-};
-
-struct dpia_notification_common {
-   bool shared;
-};
-
-struct dpia_bw_allocation_notify_data {
-   union {
-   struct {
-   uint16_t cm_bw_alloc_support: 1;/**< USB4 CM BW 
Allocation mode support */
-   uint16_t bw_request_failed: 1;  /**< 
BW_Request_Failed */
-   uint16_t bw_request_succeeded: 1;   /**< 
BW_Request_Succeeded */
-   uint16_t est_bw_changed: 1; /**< 
Estimated_BW changed */
-   uint16_t bw_alloc_cap_changed: 1;   /**< 
BW_Allocation_Capabiity_Changed */
-   uint16_t reserved: 11;
-   } bits;
-   uint16_t flags;
-   };
-   uint8_t cm_id;  /**< CM ID */
-   uint8_t group_id;   /**< Group ID */
-   uint8_t granularity;/**< BW Allocation Granularity */
-   uint8_t estimated_bw;   /**< Estimated_BW */
-   uint8_t allocated_bw;   /**< Allocated_BW */
-   uint8_t reserved;
-};
-
-union dpia_notification_data {
-   struct dpia_notification_common common_data;
-   struct dpia_bw_allocation_notify_data dpia_bw_alloc;/**< Used for 
DPIA BW Allocation mode notification */
-};
-
-enum dmub_cmd_dpia_notification_type {
-   DPIA_NOTIFY__BW_ALLOCATION = 0,
-};
-
-struct dpia_notification_header {
-   uint8_t instance;   
/**< DPIA Instance */
-   uint8_t reserved[3];
-   enum dmub_cmd_dpia_notification_type type;  /**< DPIA notification 
type */
-};
-
-struct dpia_notification_payload {
-   struct dpia_notification_header  header;
-   union dpia_notification_data  data;   /**< DPIA notification data */
-};
-
-/**
- * Definition of a DMUB_OUT_CMD__DPIA_NOTIFY command.
- */
-struct dmub_rb_cmd_dpia_notification {
-   /**
-* Command header.
-*/
-   struct dmub_cmd_header

[PATCH 07/37] drm/amd/display: refactor ddc logic from dc_link_ddc to link_ddc

2023-01-10 Thread Rodrigo Siqueira

From: Wenjing Liu 

[why]
1. Move dd_link_ddc functions to link_ddc.
2. Move link ddc functions declaration exposed in dc to link.h
3. Move link ddc functions declaration exposed in dm to dc_link.h
4. Remove i2caux_interface.h file

Reviewed-by: Jun Lei 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Wenjing Liu 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |   1 -
 .../display/amdgpu_dm/amdgpu_dm_mst_types.c   |   2 -
 drivers/gpu/drm/amd/display/dc/Makefile   |   4 +-
 .../gpu/drm/amd/display/dc/bios/bios_parser.c |   1 -
 .../drm/amd/display/dc/bios/bios_parser2.c|   1 -
 drivers/gpu/drm/amd/display/dc/core/dc.c  |   5 +-
 drivers/gpu/drm/amd/display/dc/core/dc_link.c | 200 -
 .../gpu/drm/amd/display/dc/core/dc_link_dp.c  |  10 +-
 drivers/gpu/drm/amd/display/dc/dc_ddc_types.h |  28 ++
 .../gpu/drm/amd/display/dc/dc_hdmi_types.h| 114 +
 drivers/gpu/drm/amd/display/dc/dc_link.h  |  10 +
 drivers/gpu/drm/amd/display/dc/dc_types.h |   1 +
 drivers/gpu/drm/amd/display/dc/dce/dce_aux.h  |   2 +-
 .../drm/amd/display/dc/dce/dce_link_encoder.c |   1 -
 .../amd/display/dc/dcn10/dcn10_link_encoder.c |   1 -
 .../amd/display/dc/dcn20/dcn20_link_encoder.c |   1 -
 .../drm/amd/display/dc/dcn20/dcn20_resource.c |   6 +-
 .../display/dc/dcn201/dcn201_link_encoder.c   |   1 -
 .../amd/display/dc/dcn21/dcn21_link_encoder.c |   1 -
 .../display/dc/dcn30/dcn30_dio_link_encoder.c |   1 -
 .../drm/amd/display/dc/dcn30/dcn30_resource.c |   6 +-
 .../dc/dcn301/dcn301_dio_link_encoder.c   |   1 -
 .../amd/display/dc/dcn302/dcn302_resource.c   |  16 +
 .../amd/display/dc/dcn303/dcn303_resource.c   |   6 +-
 .../display/dc/dcn31/dcn31_dio_link_encoder.c |   1 -
 .../display/dc/dcn32/dcn32_dio_link_encoder.c |   1 -
 .../drm/amd/display/dc/dcn32/dcn32_resource.c |   6 +-
 .../dc/dcn321/dcn321_dio_link_encoder.c   |   1 -
 .../amd/display/dc/dcn321/dcn321_resource.c   |   6 +-
 .../gpu/drm/amd/display/dc/hdcp/hdcp_msg.c|   3 +-
 .../gpu/drm/amd/display/dc/inc/dc_link_ddc.h  | 133 --
 .../drm/amd/display/dc/inc/hw/aux_engine.h|   8 +-
 drivers/gpu/drm/amd/display/dc/inc/link.h |  37 ++
 drivers/gpu/drm/amd/display/dc/link/Makefile  |   2 +-
 .../{core/dc_link_ddc.c => link/link_ddc.c}   | 409 +++---
 .../i2caux_interface.h => dc/link/link_ddc.h} |  69 +--
 36 files changed, 512 insertions(+), 584 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/display/dc/dc_hdmi_types.h
 delete mode 100644 drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h
 rename drivers/gpu/drm/amd/display/dc/{core/dc_link_ddc.c => link/link_ddc.c} 
(57%)
 rename drivers/gpu/drm/amd/display/{include/i2caux_interface.h => 
dc/link/link_ddc.h} (52%)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index b4d60eedbcbf..28f03c558af1 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -66,7 +66,6 @@
 
 #include "ivsrcid/ivsrcid_vislands30.h"
 
-#include "i2caux_interface.h"
 #include 
 #include 
 #include 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 41f35d75d0a8..bb7c5d7c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -39,12 +39,10 @@
 #include "dc.h"
 #include "dm_helpers.h"
 
-#include "dc_link_ddc.h"
 #include "dc_link_dp.h"
 #include "ddc_service_types.h"
 #include "dpcd_defs.h"
 
-#include "i2caux_interface.h"
 #include "dmub_cmd.h"
 #if defined(CONFIG_DEBUG_FS)
 #include "amdgpu_dm_debugfs.h"
diff --git a/drivers/gpu/drm/amd/display/dc/Makefile 
b/drivers/gpu/drm/amd/display/dc/Makefile
index b9effadfc4bb..4438f3c16636 100644
--- a/drivers/gpu/drm/amd/display/dc/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/Makefile
@@ -64,8 +64,8 @@ AMD_DC = $(addsuffix /Makefile, $(addprefix 
$(FULL_AMD_DISPLAY_PATH)/dc/,$(DC_LI
 
 include $(AMD_DC)
 
-DISPLAY_CORE = dc.o  dc_stat.o dc_link.o dc_resource.o dc_hw_sequencer.o 
dc_sink.o \
-dc_surface.o dc_link_dp.o dc_link_ddc.o dc_debug.o dc_stream.o \
+DISPLAY_CORE = dc.o dc_stat.o dc_link.o dc_resource.o dc_hw_sequencer.o 
dc_sink.o \
+dc_surface.o dc_link_dp.o dc_debug.o dc_stream.o \
 dc_link_enc_cfg.o dc_link_dpia.o dc_link_dpcd.o
 
 DISPLAY_CORE += dc_vm_helper.o
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c 
b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c
index a1a00f432168..27af9d3c2b73 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c
@@ -33,7 +33,6 @@
 #include "include/gpio_service_interface.h"
 #include "include/grph_object_ctrl_defs.h"
 #include "include/bios_parser_interface.h"
-#include "include/i2caux_interface.h"
 #include "include/logger_interface.h"
 
 #include "command_table.h"
diff --git a/drivers/gpu/drm/amd/displ

[PATCH 09/37] drm/amd/display: move dc_link_dpia logic to link_dp_dpia

2023-01-10 Thread Rodrigo Siqueira

From: Wenjing Liu 

Reviewed-by: George Shen 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Wenjing Liu 
---
 drivers/gpu/drm/amd/display/dc/Makefile   | 2 +-
 drivers/gpu/drm/amd/display/dc/core/dc_link.c | 2 +-
 drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c  | 2 +-
 drivers/gpu/drm/amd/display/dc/link/Makefile  | 2 +-
 .../display/dc/{core/dc_link_dpia.c => link/link_dp_dpia.c}   | 3 ++-
 .../display/dc/{inc/dc_link_dpia.h => link/link_dp_dpia.h}| 4 +---
 6 files changed, 7 insertions(+), 8 deletions(-)
 rename drivers/gpu/drm/amd/display/dc/{core/dc_link_dpia.c => 
link/link_dp_dpia.c} (99%)
 rename drivers/gpu/drm/amd/display/dc/{inc/dc_link_dpia.h => 
link/link_dp_dpia.h} (98%)

diff --git a/drivers/gpu/drm/amd/display/dc/Makefile 
b/drivers/gpu/drm/amd/display/dc/Makefile
index c5b7bcba5a3d..98c508313350 100644
--- a/drivers/gpu/drm/amd/display/dc/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/Makefile
@@ -66,7 +66,7 @@ include $(AMD_DC)
 
 DISPLAY_CORE = dc.o dc_stat.o dc_link.o dc_resource.o dc_hw_sequencer.o 
dc_sink.o \
 dc_surface.o dc_link_dp.o dc_debug.o dc_stream.o \
-dc_link_enc_cfg.o dc_link_dpia.o
+dc_link_enc_cfg.o
 
 DISPLAY_CORE += dc_vm_helper.o
 
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index b20dde4b05d5..cb3a57190cb7 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -33,7 +33,7 @@
 #include "gpio_service_interface.h"
 #include "core_status.h"
 #include "dc_link_dp.h"
-#include "dc_link_dpia.h"
+#include "link/link_dp_dpia.h"
 #include "link/link_ddc.h"
 #include "link_hwss.h"
 #include "link.h"
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index 9f2f10a957f6..9edfcdf3db3b 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -37,7 +37,7 @@
 #include "dpcd_defs.h"
 #include "dc_dmub_srv.h"
 #include "dce/dmub_hw_lock_mgr.h"
-#include "inc/dc_link_dpia.h"
+#include "link/link_dp_dpia.h"
 #include "inc/link_enc_cfg.h"
 #include "link/link_dp_trace.h"
 
diff --git a/drivers/gpu/drm/amd/display/dc/link/Makefile 
b/drivers/gpu/drm/amd/display/dc/link/Makefile
index b905c53e738b..69333ac9bab0 100644
--- a/drivers/gpu/drm/amd/display/dc/link/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/link/Makefile
@@ -24,7 +24,7 @@
 # PHY, HPD, DDC and etc).
 
 LINK = link_hwss_dio.o link_hwss_dpia.o link_hwss_hpo_dp.o link_dp_trace.o \
-link_hpd.o link_ddc.o link_dpcd.o
+link_hpd.o link_ddc.o link_dpcd.o link_dp_dpia.o
 
 AMD_DAL_LINK = $(addprefix $(AMDDALPATH)/dc/link/,$(LINK))
 
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c 
b/drivers/gpu/drm/amd/display/dc/link/link_dp_dpia.c
similarity index 99%
rename from drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c
rename to drivers/gpu/drm/amd/display/dc/link/link_dp_dpia.c
index e1db05966d83..47ad2cae483b 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_dp_dpia.c
@@ -25,11 +25,12 @@
  */
 
 #include "dc.h"
-#include "dc_link_dpia.h"
 #include "inc/core_status.h"
 #include "dc_link.h"
 #include "dc_link_dp.h"
 #include "dpcd_defs.h"
+
+#include "link_dp_dpia.h"
 #include "link_hwss.h"
 #include "dm_helpers.h"
 #include "dmub/inc/dmub_cmd.h"
diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dpia.h 
b/drivers/gpu/drm/amd/display/dc/link/link_dp_dpia.h
similarity index 98%
rename from drivers/gpu/drm/amd/display/dc/inc/dc_link_dpia.h
rename to drivers/gpu/drm/amd/display/dc/link/link_dp_dpia.h
index 39c1d1d07357..1e97e4264dde 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dpia.h
+++ b/drivers/gpu/drm/amd/display/dc/link/link_dp_dpia.h
@@ -27,11 +27,9 @@
 #ifndef __DC_LINK_DPIA_H__
 #define __DC_LINK_DPIA_H__
 
+#include "link.h"
 /* This module implements functionality for training DPIA links. */
 
-struct dc_link;
-struct dc_link_settings;
-
 /* The approximate time (us) it takes to transmit 9 USB4 DP clock sync 
packets. */
 #define DPIA_CLK_SYNC_DELAY 16000
 
-- 
2.39.0

[PATCH 08/37] drm/amd/display: move dpcd logic from dc_link_dpcd to link_dpcd

2023-01-10 Thread Rodrigo Siqueira

From: Wenjing Liu 

[why]
Moving dpcd logic from dc_link_dpcd to link_dpcd as part of link file
restructure

Reviewed-by: George Shen 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Wenjing Liu 
---
 drivers/gpu/drm/amd/display/dc/Makefile |  2 +-
 drivers/gpu/drm/amd/display/dc/core/dc_link.c   |  2 +-
 drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c|  2 +-
 drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c  |  2 +-
 .../drm/amd/display/dc/dce110/dce110_hw_sequencer.c |  1 -
 .../drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c   |  1 -
 .../drm/amd/display/dc/dcn10/dcn10_stream_encoder.c |  2 +-
 drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c  |  1 -
 .../drm/amd/display/dc/dcn20/dcn20_stream_encoder.c |  2 +-
 drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c  |  1 -
 drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c  |  1 -
 .../display/dc/dcn314/dcn314_dio_stream_encoder.c   |  2 +-
 .../gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c|  1 -
 .../amd/display/dc/dcn32/dcn32_dio_stream_encoder.c |  2 +-
 drivers/gpu/drm/amd/display/dc/hdcp/hdcp_msg.c  |  2 +-
 drivers/gpu/drm/amd/display/dc/link/Makefile|  2 +-
 .../dc/{core/dc_link_dpcd.c => link/link_dpcd.c}| 13 -
 .../drm/amd/display/dc/{inc => link}/link_dpcd.h|  4 +---
 18 files changed, 19 insertions(+), 24 deletions(-)
 rename drivers/gpu/drm/amd/display/dc/{core/dc_link_dpcd.c => 
link/link_dpcd.c} (97%)
 rename drivers/gpu/drm/amd/display/dc/{inc => link}/link_dpcd.h (95%)

diff --git a/drivers/gpu/drm/amd/display/dc/Makefile 
b/drivers/gpu/drm/amd/display/dc/Makefile
index 4438f3c16636..c5b7bcba5a3d 100644
--- a/drivers/gpu/drm/amd/display/dc/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/Makefile
@@ -66,7 +66,7 @@ include $(AMD_DC)
 
 DISPLAY_CORE = dc.o dc_stat.o dc_link.o dc_resource.o dc_hw_sequencer.o 
dc_sink.o \
 dc_surface.o dc_link_dp.o dc_debug.o dc_stream.o \
-dc_link_enc_cfg.o dc_link_dpia.o dc_link_dpcd.o
+dc_link_enc_cfg.o dc_link_dpia.o
 
 DISPLAY_CORE += dc_vm_helper.o
 
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index b5572f5202ca..b20dde4b05d5 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -51,7 +51,7 @@
 #include "dmub/dmub_srv.h"
 #include "inc/hw/panel_cntl.h"
 #include "inc/link_enc_cfg.h"
-#include "inc/link_dpcd.h"
+#include "link/link_dpcd.h"
 #include "link/link_dp_trace.h"
 #include "link/link_hpd.h"
 
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index 815652da4126..9f2f10a957f6 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -50,7 +50,7 @@ static const uint8_t DP_VGA_LVDS_CONVERTER_ID_3[] = "dnomlA";
link->ctx->logger
 #define DC_TRACE_LEVEL_MESSAGE(...) /* do nothing */
 
-#include "link_dpcd.h"
+#include "link/link_dpcd.h"
 
 #ifndef MAX
 #define MAX(X, Y) ((X) > (Y) ? (X) : (Y))
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c
index d130d58ac08e..e1db05966d83 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c
@@ -33,7 +33,7 @@
 #include "link_hwss.h"
 #include "dm_helpers.h"
 #include "dmub/inc/dmub_cmd.h"
-#include "inc/link_dpcd.h"
+#include "link/link_dpcd.h"
 #include "dc_dmub_srv.h"
 
 #define DC_LOGGER \
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c 
b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index 358431f0d98a..833a1c37cbe4 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -55,7 +55,6 @@
 #include "audio.h"
 #include "reg_helper.h"
 #include "panel_cntl.h"
-#include "inc/link_dpcd.h"
 #include "dpcd_defs.h"
 /* include DCE11 register header files */
 #include "dce/dce_11_0_d.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c 
b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index fe2023f18b7d..c1d4e66b413f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -57,7 +57,6 @@
 #include "dc_trace.h"
 #include "dce/dmub_outbox.h"
 #include "inc/dc_link_dp.h"
-#include "inc/link_dpcd.h"
 
 #define DC_LOGGER_INIT(logger)
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c 
b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
index 484e7cdf00b8..1527c3b4fb19 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
@@ -28,7 +28,7 @@
 #include "dcn10_stream_encoder.h"
 #include "reg_helper.h"
 #include "hw_shared.h"
-#include "inc/link_dpcd.h"
+#include "dc_link_dp.h"
 #include "dpcd_defs.

[PATCH 06/37] drm/amd/display: refactor hpd logic from dc_link to link_hpd

2023-01-10 Thread Rodrigo Siqueira

From: Wenjing Liu 

[why]
Factor out hpd handling logic from generic dc link file.

Reviewed-by: Jun Lei 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Wenjing Liu 
---
 drivers/gpu/drm/amd/display/dc/core/dc_link.c | 224 +---
 .../gpu/drm/amd/display/dc/core/dc_link_dp.c  |  16 --
 drivers/gpu/drm/amd/display/dc/dc_link.h  |  22 +-
 .../display/dc/dce110/dce110_hw_sequencer.c   |   3 +-
 drivers/gpu/drm/amd/display/dc/inc/link.h |  47 
 drivers/gpu/drm/amd/display/dc/link/Makefile  |   3 +-
 .../gpu/drm/amd/display/dc/link/link_hpd.c| 240 ++
 .../gpu/drm/amd/display/dc/link/link_hpd.h|  47 
 8 files changed, 357 insertions(+), 245 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/display/dc/inc/link.h
 create mode 100644 drivers/gpu/drm/amd/display/dc/link/link_hpd.c
 create mode 100644 drivers/gpu/drm/amd/display/dc/link/link_hpd.h

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index ee20b4d3afd4..13e33f581e73 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -36,6 +36,7 @@
 #include "dc_link_dpia.h"
 #include "dc_link_ddc.h"
 #include "link_hwss.h"
+#include "link.h"
 #include "opp.h"
 
 #include "link_encoder.h"
@@ -52,6 +53,7 @@
 #include "inc/link_enc_cfg.h"
 #include "inc/link_dpcd.h"
 #include "link/link_dp_trace.h"
+#include "link/link_hpd.h"
 
 #include "dc/dcn30/dcn30_vpg.h"
 
@@ -102,108 +104,6 @@ static void dc_link_destruct(struct dc_link *link)
dc_sink_release(link->remote_sinks[i]);
 }
 
-struct gpio *get_hpd_gpio(struct dc_bios *dcb,
- struct graphics_object_id link_id,
- struct gpio_service *gpio_service)
-{
-   enum bp_result bp_result;
-   struct graphics_object_hpd_info hpd_info;
-   struct gpio_pin_info pin_info;
-
-   if (dcb->funcs->get_hpd_info(dcb, link_id, &hpd_info) != BP_RESULT_OK)
-   return NULL;
-
-   bp_result = dcb->funcs->get_gpio_pin_info(dcb,
-   hpd_info.hpd_int_gpio_uid, &pin_info);
-
-   if (bp_result != BP_RESULT_OK) {
-   ASSERT(bp_result == BP_RESULT_NORECORD);
-   return NULL;
-   }
-
-   return dal_gpio_service_create_irq(gpio_service,
-  pin_info.offset,
-  pin_info.mask);
-}
-
-/*
- *  Function: program_hpd_filter
- *
- *  @brief
- * Programs HPD filter on associated HPD line
- *
- *  @param [in] delay_on_connect_in_ms: Connect filter timeout
- *  @param [in] delay_on_disconnect_in_ms: Disconnect filter timeout
- *
- *  @return
- * true on success, false otherwise
- */
-static bool program_hpd_filter(const struct dc_link *link)
-{
-   bool result = false;
-   struct gpio *hpd;
-   int delay_on_connect_in_ms = 0;
-   int delay_on_disconnect_in_ms = 0;
-
-   if (link->is_hpd_filter_disabled)
-   return false;
-   /* Verify feature is supported */
-   switch (link->connector_signal) {
-   case SIGNAL_TYPE_DVI_SINGLE_LINK:
-   case SIGNAL_TYPE_DVI_DUAL_LINK:
-   case SIGNAL_TYPE_HDMI_TYPE_A:
-   /* Program hpd filter */
-   delay_on_connect_in_ms = 500;
-   delay_on_disconnect_in_ms = 100;
-   break;
-   case SIGNAL_TYPE_DISPLAY_PORT:
-   case SIGNAL_TYPE_DISPLAY_PORT_MST:
-   /* Program hpd filter to allow DP signal to settle */
-   /* 500: not able to detect MST <-> SST switch as HPD is low for
-* only 100ms on DELL U2413
-* 0: some passive dongle still show aux mode instead of i2c
-* 20-50: not enough to hide bouncing HPD with passive dongle.
-* also see intermittent i2c read issues.
-*/
-   delay_on_connect_in_ms = 80;
-   delay_on_disconnect_in_ms = 0;
-   break;
-   case SIGNAL_TYPE_LVDS:
-   case SIGNAL_TYPE_EDP:
-   default:
-   /* Don't program hpd filter */
-   return false;
-   }
-
-   /* Obtain HPD handle */
-   hpd = get_hpd_gpio(link->ctx->dc_bios, link->link_id,
-  link->ctx->gpio_service);
-
-   if (!hpd)
-   return result;
-
-   /* Setup HPD filtering */
-   if (dal_gpio_open(hpd, GPIO_MODE_INTERRUPT) == GPIO_RESULT_OK) {
-   struct gpio_hpd_config config;
-
-   config.delay_on_connect = delay_on_connect_in_ms;
-   config.delay_on_disconnect = delay_on_disconnect_in_ms;
-
-   dal_irq_setup_hpd_filter(hpd, &config);
-
-   dal_gpio_close(hpd);
-
-   result = true;
-   } else {
-   ASSERT_CRITICAL(false);
-   }
-
-   /* Release HPD handle */
-   dal_gpio_destroy_irq(&hpd);
-
-   return result;
-}
-
 bool dc_l

[PATCH 04/37] drm/amd/display: Fix DPIA link encoder assignment issue

2023-01-10 Thread Rodrigo Siqueira

From: Cruise Hung 

[Why]
The DPIA link encoder (DIG) was not released when a Display XDR 6K
monitor was disconnected. That's because the DPIA link encoder logic
checked the BW when it removed the link encoder for it. And the timing
BW was less than DP link BW.  So, it failed to release the link encoder.
And that caused the DIG was occupied and can't be used.  The reason it
checked the BW is it wants to identify whether this link was using DIG
or HPO. It skips if it's not DIG.

[How]
Not to check the BW when it removes the link encoder because it's
already in the assignment table, it must be the DIG.

Reviewed-by: Jun Lei 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Cruise Hung 
---
 .../drm/amd/display/dc/core/dc_link_enc_cfg.c | 57 ++-
 1 file changed, 30 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c
index 614f022d1cff..049e755792e6 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c
@@ -305,15 +305,17 @@ void link_enc_cfg_link_encs_assign(
for (i = 0; i < stream_count; i++) {
struct dc_stream_state *stream = streams[i];
 
+   /* skip it if the link is mappable endpoint. */
+   if (stream->link->is_dig_mapping_flexible)
+   continue;
+
/* Skip stream if not supported by DIG link encoder. */
if (!is_dig_link_enc_stream(stream))
continue;
 
/* Physical endpoints have a fixed mapping to DIG link 
encoders. */
-   if (!stream->link->is_dig_mapping_flexible) {
-   eng_id = stream->link->eng_id;
-   add_link_enc_assignment(state, stream, eng_id);
-   }
+   eng_id = stream->link->eng_id;
+   add_link_enc_assignment(state, stream, eng_id);
}
 
/* (b) Retain previous assignments for mappable endpoints if encoders 
still available. */
@@ -325,11 +327,12 @@ void link_enc_cfg_link_encs_assign(
for (i = 0; i < stream_count; i++) {
struct dc_stream_state *stream = state->streams[i];
 
-   /* Skip stream if not supported by DIG link encoder. */
-   if (!is_dig_link_enc_stream(stream))
+   /* Skip it if the link is NOT mappable endpoint. */
+   if (!stream->link->is_dig_mapping_flexible)
continue;
 
-   if (!stream->link->is_dig_mapping_flexible)
+   /* Skip stream if not supported by DIG link encoder. */
+   if (!is_dig_link_enc_stream(stream))
continue;
 
for (j = 0; j < prev_state->stream_count; j++) {
@@ -338,6 +341,7 @@ void link_enc_cfg_link_encs_assign(
if (stream == prev_stream && stream->link == 
prev_stream->link &&

prev_state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[j].valid) {
eng_id = 
prev_state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[j].eng_id;
+
if (is_avail_link_enc(state, eng_id, 
stream))
add_link_enc_assignment(state, 
stream, eng_id);
}
@@ -350,6 +354,15 @@ void link_enc_cfg_link_encs_assign(
 
for (i = 0; i < stream_count; i++) {
struct dc_stream_state *stream = streams[i];
+   struct link_encoder *link_enc = NULL;
+
+   /* Skip it if the link is NOT mappable endpoint. */
+   if (!stream->link->is_dig_mapping_flexible)
+   continue;
+
+   /* Skip if encoder assignment retained in step (b) above. */
+   if (stream->link_enc)
+   continue;
 
/* Skip stream if not supported by DIG link encoder. */
if (!is_dig_link_enc_stream(stream)) {
@@ -358,24 +371,18 @@ void link_enc_cfg_link_encs_assign(
}
 
/* Mappable endpoints have a flexible mapping to DIG link 
encoders. */
-   if (stream->link->is_dig_mapping_flexible) {
-   struct link_encoder *link_enc = NULL;
 
-   /* Skip if encoder assignment retained in step (b) 
above. */
-   if (stream->link_enc)
-   continue;
+   /* For MST, multiple streams will share the same link / display
+* endpoint. These streams should use the same link encoder
+* assigned to that endpoint.
+*/
+   link_enc = get_link_enc_used_by_link(state, stream->link);
+   if

[PATCH 05/37] drm/amd/display: Implement FIFO enable sequence on DCN32

2023-01-10 Thread Rodrigo Siqueira

From: Dillon Varone 

[WHY?]
FIFO enable sequence is incomplete as it is currently implemented in FW,
and requires reset to prevent the FIFO to be enabled in an invalid
state. This cannot be done until DIG FE is connected to the BE.

[HOW?]
Add FIFO enable sequence in driver for dcn32 with reset after DIG FE is
connected to BE.

Reviewed-by: Alvin Lee 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Dillon Varone 
---
 .../dc/dcn32/dcn32_dio_stream_encoder.c   | 28 +++
 1 file changed, 28 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.c
index d19fc93dbc75..7d09c62a405a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.c
@@ -421,6 +421,33 @@ static void enc32_set_dig_input_mode(struct stream_encoder 
*enc, unsigned int pi
REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_OUTPUT_PIXEL_MODE, 
pix_per_container == 2 ? 0x1 : 0x0);
 }
 
+static void enc32_reset_fifo(struct stream_encoder *enc, bool reset)
+{
+   struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+   uint32_t reset_val = reset ? 1 : 0;
+   uint32_t is_symclk_on;
+
+   REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_RESET, reset_val);
+   REG_GET(DIG_FE_CNTL, DIG_SYMCLK_FE_ON, &is_symclk_on);
+
+   if (is_symclk_on)
+   REG_WAIT(DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, reset_val, 10, 
5000);
+   else
+   udelay(10);
+}
+
+static void enc32_enable_fifo(struct stream_encoder *enc)
+{
+   struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+   REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_READ_START_LEVEL, 0x7);
+
+   enc32_reset_fifo(enc, true);
+   enc32_reset_fifo(enc, false);
+
+   REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 1);
+}
+
 static const struct stream_encoder_funcs dcn32_str_enc_funcs = {
.dp_set_odm_combine =
enc32_dp_set_odm_combine,
@@ -466,6 +493,7 @@ static const struct stream_encoder_funcs 
dcn32_str_enc_funcs = {
.hdmi_reset_stream_attribute = enc1_reset_hdmi_stream_attribute,
 
.set_input_mode = enc32_set_dig_input_mode,
+   .enable_fifo = enc32_enable_fifo,
 };
 
 void dcn32_dio_stream_encoder_construct(
-- 
2.39.0

[PATCH 02/37] Revert "drm/amd/display: Speed up DML fast_validate path"

2023-01-10 Thread Rodrigo Siqueira

From: Martin Leung 

This reverts commit ef4e2703d6c81eb08e1cd29d14c819791e6898a6.

It caused corruption in some media players.

Reviewed-by: Aric Cyr 
Reviewed-by: Nevenko Stupar 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Martin Leung 
---
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  |  2 -
 .../dc/dml/dcn32/display_mode_vba_32.c| 37 +--
 .../drm/amd/display/dc/dml/display_mode_lib.h |  1 -
 3 files changed, 17 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index e7459fd50bf9..025139a22e84 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -1564,7 +1564,6 @@ bool dcn32_internal_validate_bw(struct dc *dc,

context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
dm_prefetch_support_fclk_and_stutter;
 
-   context->bw_ctx.dml.validate_max_state = fast_validate;
vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, 
pipe_cnt);
 
/* Last attempt with Prefetch mode 2 
(dm_prefetch_support_stutter == 3) */
@@ -1573,7 +1572,6 @@ bool dcn32_internal_validate_bw(struct dc *dc,
dm_prefetch_support_stutter;
vlevel = dml_get_voltage_level(&context->bw_ctx.dml, 
pipes, pipe_cnt);
}
-   context->bw_ctx.dml.validate_max_state = false;
 
if (vlevel < context->bw_ctx.dml.soc.num_states) {
memset(split, 0, sizeof(split));
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
index bc22078751f8..4b8f5fa0f0ad 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
@@ -1707,7 +1707,7 @@ static void mode_support_configuration(struct vba_vars_st 
*v,
 void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib 
*mode_lib)
 {
struct vba_vars_st *v = &mode_lib->vba;
-   int i, j, start_state;
+   int i, j;
unsigned int k, m;
unsigned int MaximumMPCCombine;
unsigned int NumberOfNonCombinedSurfaceOfMaximumBandwidth;
@@ -1720,10 +1720,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct 
display_mode_lib *mode_l
 #endif
 
/*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
-   if (mode_lib->validate_max_state)
-   start_state = v->soc.num_states - 1;
-   else
-   start_state = 0;
+
/*Scale Ratio, taps Support Check*/
 
mode_lib->vba.ScaleRatioAndTapsSupport = true;
@@ -2012,7 +2009,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct 
display_mode_lib *mode_l
mode_lib->vba.MPCCombineMethodIncompatible = 
v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsNeededForPStateChangeAndVoltage
&& 
v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsPossible;
 
-   for (i = start_state; i < v->soc.num_states; i++) {
+   for (i = 0; i < v->soc.num_states; i++) {
for (j = 0; j < 2; j++) {
mode_lib->vba.TotalNumberOfActiveDPP[i][j] = 0;
mode_lib->vba.TotalAvailablePipesSupport[i][j] = true;
@@ -2289,7 +2286,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct 
display_mode_lib *mode_l
}
}
 
-   for (i = start_state; i < v->soc.num_states; ++i) {
+   for (i = 0; i < v->soc.num_states; ++i) {
mode_lib->vba.ExceededMultistreamSlots[i] = false;
for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
if (mode_lib->vba.OutputMultistreamEn[k] == true && 
mode_lib->vba.OutputMultistreamId[k] == k) {
@@ -2389,7 +2386,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct 
display_mode_lib *mode_l
}
}
 
-   for (i = start_state; i < v->soc.num_states; ++i) {
+   for (i = 0; i < v->soc.num_states; ++i) {
mode_lib->vba.DTBCLKRequiredMoreThanSupported[i] = false;
for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
if (mode_lib->vba.BlendingAndTiming[k] == k
@@ -2406,7 +2403,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct 
display_mode_lib *mode_l
}
}
 
-   for (i = start_state; i < v->soc.num_states; ++i) {
+   for (i = 0; i < v->soc.num_states; ++i) {
mode_lib->vba.ODMCombine2To1SupportCheckOK[i] = true;
mode_lib->vba.ODMCombine4To1SupportCheckOK[i] = true;
for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
@@ -2424,7 +2421,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(

[PATCH 03/37] drm/amd/display: fix multi edp panel instancing

2023-01-10 Thread Rodrigo Siqueira

From: Dmytro Laktyushkin 

A previous fix attempted to correct mismatch between DM display
targets and dc panel instancing by only counting connected panels.
This behaviour breaks a feature, thus this is an alternative solution
that allows mapping display targets to dc links during mod_power_create.

Reviewed-by: Nicholas Kazlauskas 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Dmytro Laktyushkin 
---
 drivers/gpu/drm/amd/display/dc/dc_link.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h 
b/drivers/gpu/drm/amd/display/dc/dc_link.h
index 3b9315a38b30..1226ecb625b4 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_link.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_link.h
@@ -338,14 +338,13 @@ static inline bool dc_get_edp_link_panel_inst(const 
struct dc *dc,
int edp_num, i;
 
*inst_out = 0;
-   if (link->connector_signal != SIGNAL_TYPE_EDP || !link->local_sink)
+   if (link->connector_signal != SIGNAL_TYPE_EDP)
return false;
get_edp_links(dc, edp_links, &edp_num);
for (i = 0; i < edp_num; i++) {
if (link == edp_links[i])
break;
-   if (edp_links[i]->local_sink)
-   (*inst_out)++;
+   (*inst_out)++;
}
return true;
 }
-- 
2.39.0

[PATCH 00/37] DC Patches Jan 10, 2023

2023-01-10 Thread Rodrigo Siqueira

This DC patchset brings improvements in multiple areas. In summary, we
highlight the following areas:

- Revert patches that caused regressions associated with audio and an
  old change that checks the DCN version.
- Refactor DDC and HDP.
- Move DPIA and DPCD logic to new files.
- Updates to DMUB.
- Optimization and bug fixes for SUBVP/DRR.
- Drop legacy code.

Cc: Daniel Wheeler 

Thanks
Siqueira

Alvin Lee (2):
  drm/amd/display: Request min clocks after disabling pipes on init
  drm/amd/display: Allow subvp on vactive pipes that are 2560x1440@60

Aric Cyr (1):
  drm/amd/display: 3.2.218

Aurabindo Pillai (2):
  Revert "drm/amd/display: Demote Error Level When ODM Transition
Supported"
  drm/amd/display: fix an error check condition for synced pipes

Bhawanpreet Lakha (1):
  drm/amd/display: Change i2c speed for hdcp

Brandon Syu (1):
  drm/amd/display: fix mapping to non-allocated address

Charlene Liu (2):
  drm/amd/display: add hubbub_init related
  drm/amd/display: contional remove disable dig_fifo when blank

Cruise Hung (1):
  drm/amd/display: Fix DPIA link encoder assignment issue

Dillon Varone (9):
  drm/amd/display: Implement FIFO enable sequence on DCN32
  drm/amd/display: Optimize subvp and drr validation
  drm/amd/display: Account for DCC Meta pitch in DML MALL surface
calculations
  drm/amd/display: Account for Subvp Phantoms in DML MALL surface
calculations
  drm/amd/display: Use DML for MALL SS and Subvp allocation calculations
  drm/amd/display: cleanup function args in dml
  drm/amd/display: set active bit for desktop with VSDBv3
  drm/amd/display: Remove DISPCLK dentist programming for dcn32
  drm/amd/display: Account for MPO planes in dcn32 mall alloc
calculations

Dmytro Laktyushkin (1):
  drm/amd/display: fix multi edp panel instancing

Martin Leung (1):
  Revert "drm/amd/display: Speed up DML fast_validate path"

Mustapha Ghaddar (2):
  drm/amd/display: Update BW alloc after new DMUB logic
  drm/amd/display: Update dmub header to match DMUB

Rodrigo Siqueira (1):
  drm/amd/display: Remove unused code

Saaem Rizvi (2):
  drm/amd/display: Remove SubVp support if src/dst rect does not equal
stream timing
  drm/amd/display: Add extra mblk for DCC

Tony Tascioglu (2):
  drm/amd/display: Optimize link power-down when link powered externally
  drm/amd/display: Skip backlight control delay on external powered
links

Wenjing Liu (7):
  drm/amd/display: refactor hpd logic from dc_link to link_hpd
  drm/amd/display: refactor ddc logic from dc_link_ddc to link_ddc
  drm/amd/display: move dpcd logic from dc_link_dpcd to link_dpcd
  drm/amd/display: move dc_link_dpia logic to link_dp_dpia
  drm/amd/display: move dp link training logic to link_dp_training
  drm/amd/display: move dp phy related logic to link_dp_phy
  drm/amd/display: move dp capability related logic to
link_dp_capability

hersen wu (2):
  drm/amd/display: phase2 enable mst hdcp multiple displays
  drm/amd/display: hdcp not enabled on connector 0

 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |1 -
 .../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c |3 +-
 .../amd/display/amdgpu_dm/amdgpu_dm_hdcp.c|  153 +-
 .../amd/display/amdgpu_dm/amdgpu_dm_hdcp.h|5 +-
 .../display/amdgpu_dm/amdgpu_dm_mst_types.c   |2 -
 drivers/gpu/drm/amd/display/dc/Makefile   |6 +-
 .../gpu/drm/amd/display/dc/bios/bios_parser.c |1 -
 .../drm/amd/display/dc/bios/bios_parser2.c|6 +-
 .../drm/amd/display/dc/bios/command_table2.c  |   14 +-
 .../drm/amd/display/dc/bios/command_table2.h  |3 +-
 .../display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c  |   97 +-
 drivers/gpu/drm/amd/display/dc/core/dc.c  |5 +-
 drivers/gpu/drm/amd/display/dc/core/dc_link.c |  542 +-
 .../gpu/drm/amd/display/dc/core/dc_link_dp.c  | 6682 ++---
 .../drm/amd/display/dc/core/dc_link_enc_cfg.c |   60 +-
 .../gpu/drm/amd/display/dc/core/dc_resource.c |   35 +-
 drivers/gpu/drm/amd/display/dc/dc.h   |3 +-
 .../gpu/drm/amd/display/dc/dc_bios_types.h|3 +-
 drivers/gpu/drm/amd/display/dc/dc_ddc_types.h |   28 +
 drivers/gpu/drm/amd/display/dc/dc_dp_types.h  |3 +
 .../gpu/drm/amd/display/dc/dc_hdmi_types.h|  114 +
 drivers/gpu/drm/amd/display/dc/dc_link.h  |   69 +-
 drivers/gpu/drm/amd/display/dc/dc_types.h |1 +
 drivers/gpu/drm/amd/display/dc/dce/dce_aux.h  |2 +-
 .../drm/amd/display/dc/dce/dce_link_encoder.c |1 -
 .../display/dc/dce110/dce110_hw_sequencer.c   |   41 +-
 .../display/dc/dce110/dce110_hw_sequencer.h   |2 +-
 .../drm/amd/display/dc/dcn10/dcn10_hubbub.h   |   12 +-
 .../amd/display/dc/dcn10/dcn10_hw_sequencer.c |4 +-
 .../amd/display/dc/dcn10/dcn10_link_encoder.c |1 -
 .../display/dc/dcn10/dcn10_stream_encoder.c   |2 +-
 .../drm/amd/display/dc/dcn20/dcn20_hwseq.c|   10 +-
 .../amd/display/dc/dcn20/dcn20_link_encoder.c |1 -
 .../drm/amd/display/dc/dcn20/dcn20_resource.c |6 +-
 .../display/dc/dcn20/dcn20_stream_encoder.c   |

[PATCH 01/37] drm/amd/display: Update BW alloc after new DMUB logic

2023-01-10 Thread Rodrigo Siqueira

From: Mustapha Ghaddar 

[WHY]
After introducing new DPIA NOTIFICATION we will need
to update the exiting BW allocation logic

[HOW]
Updated the BW alloc source and header files

Reviewed-by: Meenakshikumar Somasundaram 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Mustapha Ghaddar 
---
 drivers/gpu/drm/amd/display/dc/dc_link.h  |  2 +-
 .../drm/amd/display/dc/link/link_dp_dpia_bw.h | 47 +++
 .../drm/amd/display/dmub/src/dmub_srv_stat.c  | 18 +++
 3 files changed, 49 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h 
b/drivers/gpu/drm/amd/display/dc/dc_link.h
index 8565bbb75177..3b9315a38b30 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_link.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_link.h
@@ -158,11 +158,11 @@ struct dc_panel_config {
 struct dc_dpia_bw_alloc {
int sink_verified_bw;  // The Verified BW that sink can allocated and 
use that has been verified already
int sink_allocated_bw; // The Actual Allocated BW that sink currently 
allocated
-   int padding_bw;// The Padding "Un-used" BW allocated by CM for 
padding reasons
int sink_max_bw;   // The Max BW that sink can require/support
int estimated_bw;  // The estimated available BW for this DPIA
int bw_granularity;// BW Granularity
bool bw_alloc_enabled; // The BW Alloc Mode Support is turned ON for 
all 3:  DP-Tx & Dpia & CM
+   bool response_ready;   // Response ready from the CM side
 };
 
 /*
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dp_dpia_bw.h 
b/drivers/gpu/drm/amd/display/dc/link/link_dp_dpia_bw.h
index 669e995f825f..58eb7b581093 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_dp_dpia_bw.h
+++ b/drivers/gpu/drm/amd/display/dc/link/link_dp_dpia_bw.h
@@ -26,13 +26,13 @@
 #ifndef DC_INC_LINK_DP_DPIA_BW_H_
 #define DC_INC_LINK_DP_DPIA_BW_H_
 
-// XXX: TODO: Re-add for Phase 2
-/* Number of Host Routers per motherboard is 2 and 2 DPIA per host router */
-#define MAX_HR_NUM 2
-
-struct dc_host_router_bw_alloc {
-   int max_bw[MAX_HR_NUM]; // The Max BW that each Host Router 
has available to be shared btw DPIAs
-   int total_estimated_bw[MAX_HR_NUM]; // The Total Verified and available 
BW that Host Router has
+/*
+ * Host Router BW type
+ */
+enum bw_type {
+   HOST_ROUTER_BW_ESTIMATED,
+   HOST_ROUTER_BW_ALLOCATED,
+   HOST_ROUTER_BW_INVALID,
 };
 
 /*
@@ -61,9 +61,40 @@ void set_usb4_req_bw_req(struct dc_link *link, int req_bw);
  * find out the result of allocating on CM and update structs accordingly
  *
  * @link: pointer to the dc_link struct instance
+ * @bw: Allocated or Estimated BW depending on the result
+ * @result: Response type
+ *
+ * return: none
+ */
+void get_usb4_req_bw_resp(struct dc_link *link, uint8_t bw, uint8_t result);
+
+/*
+ * Return the response_ready flag from dc_link struct
+ *
+ * @link: pointer to the dc_link struct instance
+ *
+ * return: response_ready flag from dc_link struct
+ */
+bool get_cm_response_ready_flag(struct dc_link *link);
+
+/*
+ * Get the Max Available BW or Max Estimated BW for each Host Router
+ *
+ * @link: pointer to the dc_link struct instance
+ * @type: ESTIMATD BW or MAX AVAILABLE BW
+ *
+ * return: response_ready flag from dc_link struct
+ */
+int get_host_router_total_bw(struct dc_link *link, uint8_t type);
+
+/*
+ * Cleanup function for when the dpia is unplugged to reset struct
+ * and perform any required clean up
+ *
+ * @link: pointer to the dc_link struct instance
  *
  * return: none
  */
-void get_usb4_req_bw_resp(struct dc_link *link);
+bool dpia_bw_alloc_unplug(struct dc_link *link);
 
 #endif /* DC_INC_LINK_DP_DPIA_BW_H_ */
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c 
b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c
index 55a534ec0794..4948f9724db2 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c
@@ -98,19 +98,19 @@ enum dmub_status dmub_srv_stat_get_notification(struct 
dmub_srv *dmub,
 
if (cmd.dpia_notify.payload.header.type == 
DPIA_NOTIFY__BW_ALLOCATION) {
 
-   if 
(cmd.dpia_notify.payload.data.dpia_bw_alloc.bits.bw_request_failed) {
+   notify->bw_alloc_reply.estimated_bw =
+   
cmd.dpia_notify.payload.data.dpia_bw_alloc.estimated_bw;
+   notify->bw_alloc_reply.allocated_bw =
+   
cmd.dpia_notify.payload.data.dpia_bw_alloc.allocated_bw;
+
+   if 
(cmd.dpia_notify.payload.data.dpia_bw_alloc.bits.bw_request_failed)
notify->result = DPIA_BW_REQ_FAILED;
-   } else if 
(cmd.dpia_notify.payload.data.dpia_bw_alloc.bits.bw_request_succeeded) {
+   else if 
(cmd.dpia_notify.payload.data.dpia_bw_alloc.bits.bw_request_succeeded)
n

Re: [PATCH 2/2] drm/amdgpu: add AMDGPU_INFO_VM_STAT to return GPU VM

2023-01-10 Thread Marek Olšák

On Tue, Jan 10, 2023 at 11:23 AM Christian König <
ckoenig.leichtzumer...@gmail.com> wrote:

> Am 10.01.23 um 16:28 schrieb Marek Olšák:
>
> On Wed, Jan 4, 2023 at 9:51 AM Christian König <
> ckoenig.leichtzumer...@gmail.com> wrote:
>
>> Am 04.01.23 um 00:08 schrieb Marek Olšák:
>>
>> I see about the access now, but did you even look at the patch?
>>
>>
>> I did look at the patch, but I haven't fully understood yet what you are
>> trying to do here.
>>
>
> First and foremost, it returns the evicted size of VRAM and visible VRAM,
> and returns visible VRAM usage. It should be obvious which stat includes
> the size of another.
>
>
>> Because what the patch does isn't even exposed to common drm code, such
>> as the preferred domain and visible VRAM placement, so it can't be in
>> fdinfo right now.
>>
>> Or do you even know what fdinfo contains? Because it contains nothing
>> useful. It only has VRAM and GTT usage, which we already have in the INFO
>> ioctl, so it has nothing that we need. We mainly need the eviction
>> information and visible VRAM information now. Everything else is a bonus.
>>
>>
>> Well the main question is what are you trying to get from that
>> information? The eviction list for example is completely meaningless to
>> userspace, that stuff is only temporary and will be cleared on the next CS
>> again.
>>
>
> I don't know what you mean. The returned eviction stats look correct and
> are stable (they don't change much). You can suggest changes if you think
> some numbers are not reported correctly.
>
>
>>
>> What we could expose is the VRAM over-commit value, e.g. how much BOs
>> which where supposed to be in VRAM are in GTT now. I think that's what you
>> are looking for here, right?
>>
>
> The VRAM overcommit value is "evicted_vram".
>
>
>>
>> Also, it's undesirable to open and parse a text file if we can just call
>> an ioctl.
>>
>>
>> Well I see the reasoning for that, but I also see why other drivers do a
>> lot of the stuff we have as IOCTL as separate files in sysfs, fdinfo or
>> debugfs.
>>
>> Especially repeating all the static information which were already
>> available under sysfs in the INFO IOCTL was a design mistake as far as I
>> can see. Just compare what AMDGPU and the KFD code is doing to what for
>> example i915 is doing.
>>
>> Same for things like debug information about a process. The fdinfo stuff
>> can be queried from external tools (gdb, gputop, umr etc...) as well which
>> makes that interface more preferred.
>>
>
> Nothing uses fdinfo in Mesa. No driver uses sysfs in Mesa except drm
> shims, noop drivers, and Intel for perf metrics. sysfs itself is an
> unusable mess for the PCIe query and is missing information.
>
> I'm not against exposing more stuff through sysfs and fdinfo for tools,
> but I don't see any reason why drivers should use it (other than for
> slowing down queries and initialization).
>
>
> That's what I'm asking: Is this for some tool or to make some driver
> decision based on it?
>
> If you just want the numbers for over displaying then I think it would be
> better to put this into fdinfo together with the other existing stuff there.
>

> If you want to make allocation decisions based on this then we should have
> that as IOCTL or even better as mmap() page between kernel and userspace.
> But in this case I would also calculation the numbers completely different
> as well.
>
> See we have at least the following things in the kernel:
> 1. The eviction list in the VM.
> Those are the BOs which are currently evicted and tried to moved back
> in on the next CS.
>
> 2. The VRAM over commit value.
> In other words how much more VRAM than available has the application
> tried to allocate?
>
> 3. The visible VRAM usage by this application.
>
> The end goal is that the eviction list will go away, e.g. we will always
> have stable allocations based on allocations of other applications and not
> constantly swap things in and out.
>
> When you now expose the eviction list to userspace we will be stuck with
> this interface forever.
>

It's for the GALLIUM HUD.

The only missing thing is the size of all evicted VRAM allocations, and the
size of all evicted visible VRAM allocations.

1. No list is exposed. Only sums of buffer sizes are exposed. Also, the
eviction list has no meaning here. All lists are treated equally, and
mem_type is compared with preferred_domains to determine where buffers are
and where they should be.

2. I'm not interested in the overcommit value. I'm only interested in
knowing the number of bytes of evicted VRAM right now. It can be as
variable as the CPU load, but in practice it shouldn't be because PCIe
doesn't have the bandwidth to move things quickly.

3. Yes, that's true.

Marek

Re: [PATCH] drm/amdkfd: Page aligned VRAM reserve size

2023-01-10 Thread Philip Yang




On 2023-01-09 22:14, Felix Kuehling wrote:

Am 2023-01-09 um 19:01 schrieb Philip Yang:

Use page aligned size to reserve VRAM usage because page aligned TTM BO
size is used to unreserve VRAM usage, otherwise this cause vram_used
accounting unbalanced.

Change vram_used definition type to int64_t to be able to trigger
WARN_ONCE(adev && adev->kfd.vram_used < 0, "..."), to help debug the
accouting issue with warning and backtrace.

Signed-off-by: Philip Yang 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h   | 2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 +-
  2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

index fb41869e357a..333780491867 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -97,7 +97,7 @@ struct amdgpu_amdkfd_fence {
    struct amdgpu_kfd_dev {
  struct kfd_dev *dev;
-    uint64_t vram_used;
+    int64_t vram_used;
  uint64_t vram_used_aligned;
  bool init_complete;
  struct work_struct reset_work;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

index 2a118669d0e3..f23d94e57762 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -151,7 +151,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct 
amdgpu_device *adev,

   * to avoid fragmentation caused by 4K allocations in the tail
   * 2M BO chunk.
   */
-    vram_needed = size;
+    vram_needed = PAGE_ALIGN(size);


This only solves part of the problem. size is used in other places in 
this function that should all use the page-aligned size. I think we 
should do the page-alignment at a much higher level, in 
kfd_ioctl_alloc_memory_of_gpu. That way all the kernel code can safely 
assume that buffer sizes are page aligned, and we avoid future surprises.


yes, the error handling unreserve should use aligned_size too, and size 
is also used as number of pages in amdgpu_bo_create for DOMAIN_GWS etc, 
we can not pass aligned size at higher level, I will send v2 patch for 
review.


Regards,

Philip



Regards,
  Felix



  } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
  system_mem_needed = size;
  } else if (!(alloc_flag &

Re: [PATCH 2/2] drm/amdgpu: add AMDGPU_INFO_VM_STAT to return GPU VM

2023-01-10 Thread Christian König


Am 10.01.23 um 16:28 schrieb Marek Olšák:
On Wed, Jan 4, 2023 at 9:51 AM Christian König 
 wrote:


Am 04.01.23 um 00:08 schrieb Marek Olšák:

I see about the access now, but did you even look at the patch?


I did look at the patch, but I haven't fully understood yet what
you are trying to do here.


First and foremost, it returns the evicted size of VRAM and visible 
VRAM, and returns visible VRAM usage. It should be obvious which stat 
includes the size of another.




Because what the patch does isn't even exposed to common drm
code, such as the preferred domain and visible VRAM placement, so
it can't be in fdinfo right now.

Or do you even know what fdinfo contains? Because it contains
nothing useful. It only has VRAM and GTT usage, which we already
have in the INFO ioctl, so it has nothing that we need. We mainly
need the eviction information and visible VRAM information now.
Everything else is a bonus.


Well the main question is what are you trying to get from that
information? The eviction list for example is completely
meaningless to userspace, that stuff is only temporary and will be
cleared on the next CS again.


I don't know what you mean. The returned eviction stats look correct 
and are stable (they don't change much). You can suggest changes if 
you think some numbers are not reported correctly.



What we could expose is the VRAM over-commit value, e.g. how much
BOs which where supposed to be in VRAM are in GTT now. I think
that's what you are looking for here, right?


The VRAM overcommit value is "evicted_vram".



Also, it's undesirable to open and parse a text file if we can
just call an ioctl.


Well I see the reasoning for that, but I also see why other
drivers do a lot of the stuff we have as IOCTL as separate files
in sysfs, fdinfo or debugfs.

Especially repeating all the static information which were already
available under sysfs in the INFO IOCTL was a design mistake as
far as I can see. Just compare what AMDGPU and the KFD code is
doing to what for example i915 is doing.

Same for things like debug information about a process. The fdinfo
stuff can be queried from external tools (gdb, gputop, umr etc...)
as well which makes that interface more preferred.


Nothing uses fdinfo in Mesa. No driver uses sysfs in Mesa except drm 
shims, noop drivers, and Intel for perf metrics. sysfs itself is an 
unusable mess for the PCIe query and is missing information.


I'm not against exposing more stuff through sysfs and fdinfo for 
tools, but I don't see any reason why drivers should use it (other 
than for slowing down queries and initialization).


That's what I'm asking: Is this for some tool or to make some driver 
decision based on it?


If you just want the numbers for over displaying then I think it would 
be better to put this into fdinfo together with the other existing stuff 
there.


If you want to make allocation decisions based on this then we should 
have that as IOCTL or even better as mmap() page between kernel and 
userspace. But in this case I would also calculation the numbers 
completely different as well.


See we have at least the following things in the kernel:
1. The eviction list in the VM.
    Those are the BOs which are currently evicted and tried to moved 
back in on the next CS.


2. The VRAM over commit value.
    In other words how much more VRAM than available has the 
application tried to allocate?


3. The visible VRAM usage by this application.

The end goal is that the eviction list will go away, e.g. we will always 
have stable allocations based on allocations of other applications and 
not constantly swap things in and out.


When you now expose the eviction list to userspace we will be stuck with 
this interface forever.


Christian.



Marek

Re: [PATCH 0/9] Reduce stack size for DCN31/314

2023-01-10 Thread Geert Uytterhoeven

Hi Rodrigo,

On Tue, Jan 10, 2023 at 4:44 PM Rodrigo Siqueira
 wrote:
> Geert Uytterhoeven reported compiler issues in some specific gcc
> versions that point to a large stack size for DCN31/314. Unfortunately,
> I could not reproduce the problem with the latest version of GCC/Clang;
> but I still believe this is a valid issue. Therefore, this patchset
> attempts to address those issues by moving many arrays of doubles from
> the local declaration to some specific structs. The first patch in this
> series introduces the new struct, and the subsequent patches move arrays
> of doubles from the local function in favor of using those values from
> the struct.
>
> Geert Uytterhoeven, since I don't have a simple way to reproduce this
> issue, could you run the CI in this series to check if this patchset
> solves the issues?

As I am just analyzing the build logs of the kisskb build service,
I cannot trigger builds at will.

> Link: 
> https://lore.kernel.org/all/20221227082932.798359-1-ge...@linux-m68k.org/

The issues with DCN31/314 were seen with arm64-gcc5, when
building an arm64 allmodconfig kernel:
http://kisskb.ellerman.id.au/kisskb/buildresult/14858073/

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds

Re: [PATCH] drm/amd/display: Fix set scaling doesn's work

2023-01-10 Thread Rodrigo Siqueira Jordao





On 11/22/22 06:20, hongao wrote:

[Why]
Setting scaling does not correctly update CRTC state. As a result
dc stream state's src (composition area) && dest (addressable area)
was not calculated as expected. This causes set scaling doesn's work.

[How]
Correctly update CRTC state when setting scaling property.

Signed-off-by: hongao 

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 3e1ecca72430..a88a6f758748 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -9386,8 +9386,8 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
goto fail;
}
  
-		if (dm_old_con_state->abm_level !=

-   dm_new_con_state->abm_level)
+   if (dm_old_con_state->abm_level != dm_new_con_state->abm_level 
||
+   dm_old_con_state->scaling != dm_new_con_state->scaling)
new_crtc_state->connectors_changed = true;
}
  


Hi,

This change lgtm, and I also run it in our CI, and from IGT perspective, 
we are good.


Harry, do you have any comment about this change?

Thanks
Siqueira

Re: [PATCH] drm/amd/display: No need for Null pointer check before kfree

2023-01-10 Thread Rodrigo Siqueira Jordao





On 12/27/22 13:39, Deepak R Varma wrote:

kfree() & vfree() internally performs NULL check on the pointer handed
to it and take no action if it indeed is NULL. Hence there is no need
for a pre-check of the memory pointer before handing it to
kfree()/vfree().

Issue reported by ifnullfree.cocci Coccinelle semantic patch script.

Signed-off-by: Deepak R Varma 
---
  drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c | 3 +--
  drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c | 3 +--
  2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c 
b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
index 3ce0ee0d012f..694a9d3d92ae 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
@@ -577,8 +577,7 @@ void dcn3_clk_mgr_construct(

  void dcn3_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr)
  {
-   if (clk_mgr->base.bw_params)
-   kfree(clk_mgr->base.bw_params);
+   kfree(clk_mgr->base.bw_params);

if (clk_mgr->wm_range_table)
dm_helpers_free_gpu_mem(clk_mgr->base.ctx, 
DC_MEM_ALLOC_TYPE_GART,
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c 
b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
index 200fcec19186..ba9814f88f48 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
@@ -783,8 +783,7 @@ void dcn32_clk_mgr_construct(

  void dcn32_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr)
  {
-   if (clk_mgr->base.bw_params)
-   kfree(clk_mgr->base.bw_params);
+   kfree(clk_mgr->base.bw_params);

if (clk_mgr->wm_range_table)
dm_helpers_free_gpu_mem(clk_mgr->base.ctx, 
DC_MEM_ALLOC_TYPE_GART,
--
2.34.1





Hi,

Reviewed-by: Rodrigo Siqueira 

And applied to amd-staging-drm-next.

Thanks
Siqueira

Re: [PATCH] drm: Alloc high address for drm buddy topdown flag

2023-01-10 Thread Matthew Auld


On 10/01/2023 12:02, Matthew Auld wrote:

On 07/01/2023 15:15, Arunpravin Paneer Selvam wrote:

As we are observing low numbers in viewperf graphics benchmark, we
are strictly not allowing the top down flag enabled allocations
to steal the memory space from cpu visible region.

The approach is, we are sorting each order list entries in
ascending order and compare the last entry of each order
list in the freelist and return the max block.


Did you also run the selftests? Does everything still pass and complete 
in a reasonable amount of time?




This patch improves the viewperf 3D benchmark scores.

Signed-off-by: Arunpravin Paneer Selvam 
---
  drivers/gpu/drm/drm_buddy.c | 81 -
  1 file changed, 54 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 11bb59399471..50916b2f2fc5 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -38,6 +38,25 @@ static void drm_block_free(struct drm_buddy *mm,
  kmem_cache_free(slab_blocks, block);
  }
+static void list_insert_sorted(struct drm_buddy *mm,
+   struct drm_buddy_block *block)
+{
+    struct drm_buddy_block *node;
+    struct list_head *head;
+
+    head = &mm->free_list[drm_buddy_block_order(block)];
+    if (list_empty(head)) {
+    list_add(&block->link, head);
+    return;
+    }
+
+    list_for_each_entry(node, head, link)
+    if (drm_buddy_block_offset(block) < 
drm_buddy_block_offset(node))

+    break;
+
+    __list_add(&block->link, node->link.prev, &node->link);
+}
+
  static void mark_allocated(struct drm_buddy_block *block)
  {
  block->header &= ~DRM_BUDDY_HEADER_STATE;
@@ -52,8 +71,7 @@ static void mark_free(struct drm_buddy *mm,
  block->header &= ~DRM_BUDDY_HEADER_STATE;
  block->header |= DRM_BUDDY_FREE;
-    list_add(&block->link,
- &mm->free_list[drm_buddy_block_order(block)]);
+    list_insert_sorted(mm, block);


One advantage of not sorting is when splitting down a large block. 
Previously the most-recently-split would be at the start of the list for 
the next order down, where potentially the next allocation could use it. 
So perhaps less fragmentation if it's all part of one BO. Otherwise I 
don't see any other downsides, other than the extra overhead of sorting.



  }
  static void mark_split(struct drm_buddy_block *block)
@@ -387,20 +405,26 @@ alloc_range_bias(struct drm_buddy *mm,
  }
  static struct drm_buddy_block *
-get_maxblock(struct list_head *head)
+get_maxblock(struct drm_buddy *mm, unsigned int order)
  {
  struct drm_buddy_block *max_block = NULL, *node;
+    unsigned int i;
-    max_block = list_first_entry_or_null(head,
- struct drm_buddy_block,
- link);
-    if (!max_block)
-    return NULL;
+    for (i = order; i <= mm->max_order; ++i) {
+    if (!list_empty(&mm->free_list[i])) {
+    node = list_last_entry(&mm->free_list[i],
+   struct drm_buddy_block,
+   link);
+    if (!max_block) {
+    max_block = node;
+    continue;
+    }
-    list_for_each_entry(node, head, link) {
-    if (drm_buddy_block_offset(node) >
-    drm_buddy_block_offset(max_block))
-    max_block = node;
+    if (drm_buddy_block_offset(node) >
+    drm_buddy_block_offset(max_block)) {


Formatting doesn't look right here.

Going to test this today with some workloads with small-bar and i915 
just to see if this improves/impacts anything for us.


No surprises, and as advertised seems to lead to reduced utilisation of 
the mappable part for buffers that don't explicitly need it (TOPDOWN). 
Assuming the selftests are still happy,

Reviewed-by: Matthew Auld 




+    max_block = node;
+    }
+    }
  }
  return max_block;
@@ -412,20 +436,23 @@ alloc_from_freelist(struct drm_buddy *mm,
  unsigned long flags)
  {
  struct drm_buddy_block *block = NULL;
-    unsigned int i;
+    unsigned int tmp;
  int err;
-    for (i = order; i <= mm->max_order; ++i) {
-    if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
-    block = get_maxblock(&mm->free_list[i]);
-    if (block)
-    break;
-    } else {
-    block = list_first_entry_or_null(&mm->free_list[i],
- struct drm_buddy_block,
- link);
-    if (block)
-    break;
+    if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
+    block = get_maxblock(mm, order);
+    if (block)
+    /* Store the obtained block order */
+    tmp = drm_buddy_block_order(block);
+    } else {
+    for (tmp = order; tmp <= mm->max_order; ++tmp) {
+    if (!list_empty(&mm->free_list[tmp])) {
+    block = list_last_entry(&mm->free_list[tmp],
+

[PATCH 9/9] drm/amd/display: Use DynamicMetadataVMExtraLatency from struct

2023-01-10 Thread Rodrigo Siqueira

It was reported that on kernel v6.2-rc1, we have the following stack
size issue:

[...]/display/dc/dml/dcn314/display_mode_vba_314.c: In function
'UseMinimumDCFCLK':
[...]/display/dc/dml/dcn314/display_mode_vba_314.c:7127:1: error: the
frame size of 2208 bytes is larger than 2048 bytes
[-Werror=frame-larger-than=]

This commit replaces the local DynamicMetadataVMExtraLatency the same
version in the UseMinimumDCFCLK_vars struct.

Cc: Alex Deucher 
Cc: Aurabindo Pillai 
Cc: Hamza Mahfooz 
Cc: Roman Li 
Cc: Geert Uytterhoeven 
Link: https://lore.kernel.org/all/20221227082932.798359-1-ge...@linux-m68k.org/
Reported-by: Geert Uytterhoeven 
Signed-off-by: Rodrigo Siqueira 
---
 .../drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
index 0811b18c22c3..b5519e9d1a1e 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
@@ -6976,7 +6976,6 @@ static void UseMinimumDCFCLK(
NormalEfficiency = 
v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
for (i = 0; i < v->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
-   double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
double MinimumTWait;
double NonDPTEBandwidth;
double DPTEBandwidth;
@@ -7035,14 +7034,14 @@ static void UseMinimumDCFCLK(

v->UseMinimumDCFCLK_stack_reduction.PrefetchPixelLinesTime[k] = 
dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] 
/ v->PixelClock[k];
ExpectedPrefetchBWAcceleration = 
(v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k])
/ (v->ReadBandwidthLuma[k] + 
v->ReadBandwidthChroma[k]);
-   DynamicMetadataVMExtraLatency[k] =
+   
v->UseMinimumDCFCLK_stack_reduction.DynamicMetadataVMExtraLatency[k] =
(v->GPUVMEnable == true && 
v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?

v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? 
v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
PrefetchTime = (v->MaximumVStartup[i][j][k] - 
1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait
- v->UrgLatency[i]
* 
((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : 
v->GPUVMMaxPageTableLevels - 2)

* (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
-   - 
DynamicMetadataVMExtraLatency[k];
+   - 
v->UseMinimumDCFCLK_stack_reduction.DynamicMetadataVMExtraLatency[k];
 
if (PrefetchTime > 0) {
double ExpectedVRatioPrefetch;
@@ -7085,7 +7084,7 @@ static void UseMinimumDCFCLK(
&dummy2,
&dummy3);
AllowedTimeForUrgentExtraLatency = 
v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - 
TSetupPipe - TdmbfPipe - TdmecPipe
-   - TdmsksPipe - 
DynamicMetadataVMExtraLatency[k];
+   - TdmsksPipe - 
v->UseMinimumDCFCLK_stack_reduction.DynamicMetadataVMExtraLatency[k];
if (AllowedTimeForUrgentExtraLatency > 
0) {

v->UseMinimumDCFCLK_stack_reduction.DCFCLKRequiredForPeakBandwidthPerPlane[k] = 
dml_max(

v->UseMinimumDCFCLK_stack_reduction.DCFCLKRequiredForPeakBandwidthPerPlane[k],
@@ -7107,7 +7106,7 @@ static void UseMinimumDCFCLK(
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
double MaximumTvmPlus2Tr0PlusTsw;
 
-   MaximumTvmPlus2Tr0PlusTsw = 
(v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - 
MinimumTWait - DynamicMetadataVMExtraLatency[k];
+   MaximumTvmPlus2Tr0PlusTsw = 
(v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - 
MinimumTWait - 
v->UseMinimumDCFCLK_stac

[PATCH 7/9] drm/amd/display: Use PrefetchPixelLinesTime from struct instead of local variable

2023-01-10 Thread Rodrigo Siqueira

It was reported that on kernel v6.2-rc1, we have the following stack
size issue:

[...]/display/dc/dml/dcn314/display_mode_vba_314.c: In function
'UseMinimumDCFCLK':
[...]/display/dc/dml/dcn314/display_mode_vba_314.c:7127:1: error: the
frame size of 2208 bytes is larger than 2048 bytes
[-Werror=frame-larger-than=]

This commit replaces the local PrefetchPixelLinesTime from the same
version in the UseMinimumDCFCLK_vars struct.

Cc: Alex Deucher 
Cc: Aurabindo Pillai 
Cc: Hamza Mahfooz 
Cc: Roman Li 
Cc: Geert Uytterhoeven 
Link: https://lore.kernel.org/all/20221227082932.798359-1-ge...@linux-m68k.org/
Reported-by: Geert Uytterhoeven 
Signed-off-by: Rodrigo Siqueira 
---
 .../amd/display/dc/dml/dcn314/display_mode_vba_314.c  | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
index 8de1f7840b5a..f51885d3dd2d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
@@ -6976,7 +6976,6 @@ static void UseMinimumDCFCLK(
NormalEfficiency = 
v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
for (i = 0; i < v->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
-   double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
double 
DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
double MinimumTWait;
@@ -7034,7 +7033,7 @@ static void UseMinimumDCFCLK(
+ 
v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / 
v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0)
+ 2 * 
v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / 
v->ReturnBusWidth
+ 2 * v->MetaRowBytes[i][j][k] 
/ NormalEfficiency / v->ReturnBusWidth + 
v->UseMinimumDCFCLK_stack_reduction.PixelDCFCLKCyclesRequiredInPrefetch[k];
-   PrefetchPixelLinesTime[k] = 
dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] 
/ v->PixelClock[k];
+   
v->UseMinimumDCFCLK_stack_reduction.PrefetchPixelLinesTime[k] = 
dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] 
/ v->PixelClock[k];
ExpectedPrefetchBWAcceleration = 
(v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k])
/ (v->ReadBandwidthLuma[k] + 
v->ReadBandwidthChroma[k]);
DynamicMetadataVMExtraLatency[k] =
@@ -7049,9 +7048,9 @@ static void UseMinimumDCFCLK(
if (PrefetchTime > 0) {
double ExpectedVRatioPrefetch;
 
-   ExpectedVRatioPrefetch = 
PrefetchPixelLinesTime[k]
+   ExpectedVRatioPrefetch = 
v->UseMinimumDCFCLK_stack_reduction.PrefetchPixelLinesTime[k]
/ (PrefetchTime * 
v->UseMinimumDCFCLK_stack_reduction.PixelDCFCLKCyclesRequiredInPrefetch[k] / 
DCFCLKCyclesRequiredInPrefetch);
-   
DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * 
v->UseMinimumDCFCLK_stack_reduction.PixelDCFCLKCyclesRequiredInPrefetch[k] / 
PrefetchPixelLinesTime[k]
+   
DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * 
v->UseMinimumDCFCLK_stack_reduction.PixelDCFCLKCyclesRequiredInPrefetch[k] / 
v->UseMinimumDCFCLK_stack_reduction.PrefetchPixelLinesTime[k]
* dml_max(1.0, 
ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * 
ExpectedPrefetchBWAcceleration;
if (v->HostVMEnable == true || 
v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {

DCFCLKRequiredForPeakBandwidthPerPlane[k] = 
DCFCLKRequiredForPeakBandwidthPerPlane[k]
@@ -7110,12 +7109,12 @@ static void UseMinimumDCFCLK(
double MaximumTvmPlus2Tr0PlusTsw;
 
MaximumTvmPlus2Tr0PlusTsw = 
(v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - 
MinimumTWait - DynamicMetadataVMExtraLatency[k];
-   if (MaximumTvmPlus2Tr0PlusTsw <= 
MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
+   if (MaximumTvmPlus2Tr0PlusTsw <= 
MinimumTvmPlus2Tr0 + 
v->UseMinimumDCFCLK_stack_reduction.PrefetchPixelLinesTime[k] / 4) {

[PATCH 5/9] drm/amd/display: Use matrix of double from struct instead of local variable

2023-01-10 Thread Rodrigo Siqueira

It was reported that on kernel v6.2-rc1, we have the following stack
size issue:

[...]/display/dc/dml/dcn314/display_mode_vba_314.c: In function
'UseMinimumDCFCLK':
[...]/display/dc/dml/dcn314/display_mode_vba_314.c:7127:1: error: the
frame size of 2208 bytes is larger than 2048 bytes
[-Werror=frame-larger-than=]

Remove TotalMaxPrefetchFlipDPTERowBandwidth from UseMinimumDCFCLK
(DCN314), and use TotalMaxPrefetchFlipDPTERowBandwidth from struct.

Cc: Alex Deucher 
Cc: Aurabindo Pillai 
Cc: Hamza Mahfooz 
Cc: Roman Li 
Cc: Geert Uytterhoeven 
Link: https://lore.kernel.org/all/20221227082932.798359-1-ge...@linux-m68k.org/
Reported-by: Geert Uytterhoeven 
Signed-off-by: Rodrigo Siqueira 
---
 .../drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c   | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
index 950669f2c10d..2ea89a26c6e8 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
@@ -6972,7 +6972,6 @@ static void UseMinimumDCFCLK(
struct vba_vars_st *v = &mode_lib->vba;
int dummy1, i, j, k;
double NormalEfficiency,  dummy2, dummy3;
-   double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
 
NormalEfficiency = 
v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
for (i = 0; i < v->soc.num_states; ++i) {
@@ -6991,9 +6990,9 @@ static void UseMinimumDCFCLK(
int NoOfDPPState[DC__NUM_DPP__MAX];
double MinimumTvmPlus2Tr0;
 
-   TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
+   
v->UseMinimumDCFCLK_stack_reduction.TotalMaxPrefetchFlipDPTERowBandwidth[i][j] 
= 0;
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
-   TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 
TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
+   
v->UseMinimumDCFCLK_stack_reduction.TotalMaxPrefetchFlipDPTERowBandwidth[i][j] 
= v->UseMinimumDCFCLK_stack_reduction.TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
+ v->NoOfDPP[i][j][k] * 
v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
}
 
@@ -7003,7 +7002,7 @@ static void UseMinimumDCFCLK(
MinimumTWait = CalculateTWait(MaxPrefetchMode, 
v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] 
+ v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
DPTEBandwidth = (v->HostVMEnable == true || 
v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
-   
TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
+   
v->UseMinimumDCFCLK_stack_reduction.TotalMaxPrefetchFlipDPTERowBandwidth[i][j] 
: v->TotalDPTERowBandwidth[i][j];
DCFCLKRequiredForAverageBandwidth = dml_max3(
v->ProjectedDCFCLKDeepSleep[i][j],
(NonDPTEBandwidth + 
v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth
-- 
2.39.0

[PATCH 6/9] drm/amd/display: Use PixelDCFCLKCyclesRequiredInPrefetch from struct

2023-01-10 Thread Rodrigo Siqueira

It was reported that on kernel v6.2-rc1, we have the following stack
size issue:

[...]/display/dc/dml/dcn314/display_mode_vba_314.c: In function
'UseMinimumDCFCLK':
[...]/display/dc/dml/dcn314/display_mode_vba_314.c:7127:1: error: the
frame size of 2208 bytes is larger than 2048 bytes
[-Werror=frame-larger-than=]

This commit replaces the local PixelDCFCLKCyclesRequiredInPrefetch from
the same version in the UseMinimumDCFCLK_vars struct.

Cc: Alex Deucher 
Cc: Aurabindo Pillai 
Cc: Hamza Mahfooz 
Cc: Roman Li 
Cc: Geert Uytterhoeven 
Link: https://lore.kernel.org/all/20221227082932.798359-1-ge...@linux-m68k.org/
Reported-by: Geert Uytterhoeven 
Signed-off-by: Rodrigo Siqueira 
---
 .../amd/display/dc/dml/dcn314/display_mode_vba_314.c  | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
index 2ea89a26c6e8..8de1f7840b5a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
@@ -6976,7 +6976,6 @@ static void UseMinimumDCFCLK(
NormalEfficiency = 
v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
for (i = 0; i < v->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
-   double 
PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
double 
DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
@@ -7029,12 +7028,12 @@ static void UseMinimumDCFCLK(
double ExpectedPrefetchBWAcceleration;
double PrefetchTime;
 
-   PixelDCFCLKCyclesRequiredInPrefetch[k] = 
(v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * 
v->BytePerPixelY[k]
+   
v->UseMinimumDCFCLK_stack_reduction.PixelDCFCLKCyclesRequiredInPrefetch[k] = 
(v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * 
v->BytePerPixelY[k]
+ v->PrefetchLinesC[i][j][k] * 
v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / 
NormalEfficiency / v->ReturnBusWidth;
DCFCLKCyclesRequiredInPrefetch = 2 * 
ExtraLatencyCycles / NoOfDPPState[k]
+ 
v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / 
v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0)
+ 2 * 
v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / 
v->ReturnBusWidth
-   + 2 * v->MetaRowBytes[i][j][k] 
/ NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
+   + 2 * v->MetaRowBytes[i][j][k] 
/ NormalEfficiency / v->ReturnBusWidth + 
v->UseMinimumDCFCLK_stack_reduction.PixelDCFCLKCyclesRequiredInPrefetch[k];
PrefetchPixelLinesTime[k] = 
dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] 
/ v->PixelClock[k];
ExpectedPrefetchBWAcceleration = 
(v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k])
/ (v->ReadBandwidthLuma[k] + 
v->ReadBandwidthChroma[k]);
@@ -7051,8 +7050,8 @@ static void UseMinimumDCFCLK(
double ExpectedVRatioPrefetch;
 
ExpectedVRatioPrefetch = 
PrefetchPixelLinesTime[k]
-   / (PrefetchTime * 
PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
-   
DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * 
PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
+   / (PrefetchTime * 
v->UseMinimumDCFCLK_stack_reduction.PixelDCFCLKCyclesRequiredInPrefetch[k] / 
DCFCLKCyclesRequiredInPrefetch);
+   
DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * 
v->UseMinimumDCFCLK_stack_reduction.PixelDCFCLKCyclesRequiredInPrefetch[k] / 
PrefetchPixelLinesTime[k]
* dml_max(1.0, 
ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * 
ExpectedPrefetchBWAcceleration;
if (v->HostVMEnable == true || 
v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {

DCFCLKRequiredForPeakBandwidthPerPlane[k] =

[PATCH 8/9] drm/amd/display: Use DCFCLKRequiredForPeakBandwidthPerPlane from struct

2023-01-10 Thread Rodrigo Siqueira

It was reported that on kernel v6.2-rc1, we have the following stack
size issue:

[...]/display/dc/dml/dcn314/display_mode_vba_314.c: In function
'UseMinimumDCFCLK':
[...]/display/dc/dml/dcn314/display_mode_vba_314.c:7127:1: error: the
frame size of 2208 bytes is larger than 2048 bytes
[-Werror=frame-larger-than=]

This commit replaces the local DCFCLKRequiredForPeakBandwidthPerPlane
from the same version in the UseMinimumDCFCLK_vars struct.

Cc: Alex Deucher 
Cc: Aurabindo Pillai 
Cc: Hamza Mahfooz 
Cc: Roman Li 
Cc: Geert Uytterhoeven 
Link: https://lore.kernel.org/all/20221227082932.798359-1-ge...@linux-m68k.org/
Reported-by: Geert Uytterhoeven 
Signed-off-by: Rodrigo Siqueira 
---
 .../display/dc/dml/dcn314/display_mode_vba_314.c  | 15 +++
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
index f51885d3dd2d..0811b18c22c3 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
@@ -6976,7 +6976,6 @@ static void UseMinimumDCFCLK(
NormalEfficiency = 
v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
for (i = 0; i < v->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
-   double 
DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
double MinimumTWait;
double NonDPTEBandwidth;
@@ -7050,14 +7049,14 @@ static void UseMinimumDCFCLK(
 
ExpectedVRatioPrefetch = 
v->UseMinimumDCFCLK_stack_reduction.PrefetchPixelLinesTime[k]
/ (PrefetchTime * 
v->UseMinimumDCFCLK_stack_reduction.PixelDCFCLKCyclesRequiredInPrefetch[k] / 
DCFCLKCyclesRequiredInPrefetch);
-   
DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * 
v->UseMinimumDCFCLK_stack_reduction.PixelDCFCLKCyclesRequiredInPrefetch[k] / 
v->UseMinimumDCFCLK_stack_reduction.PrefetchPixelLinesTime[k]
+   
v->UseMinimumDCFCLK_stack_reduction.DCFCLKRequiredForPeakBandwidthPerPlane[k] = 
NoOfDPPState[k] * 
v->UseMinimumDCFCLK_stack_reduction.PixelDCFCLKCyclesRequiredInPrefetch[k] / 
v->UseMinimumDCFCLK_stack_reduction.PrefetchPixelLinesTime[k]
* dml_max(1.0, 
ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * 
ExpectedPrefetchBWAcceleration;
if (v->HostVMEnable == true || 
v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
-   
DCFCLKRequiredForPeakBandwidthPerPlane[k] = 
DCFCLKRequiredForPeakBandwidthPerPlane[k]
+   
v->UseMinimumDCFCLK_stack_reduction.DCFCLKRequiredForPeakBandwidthPerPlane[k] = 
v->UseMinimumDCFCLK_stack_reduction.DCFCLKRequiredForPeakBandwidthPerPlane[k]
+ 
NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / 
v->ReturnBusWidth;
}
} else {
-   
DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
+   
v->UseMinimumDCFCLK_stack_reduction.DCFCLKRequiredForPeakBandwidthPerPlane[k] = 
v->DCFCLKPerState[i];
}
if (v->DynamicMetadataEnable[k] == true) {
double TSetupPipe;
@@ -7088,17 +7087,17 @@ static void UseMinimumDCFCLK(
AllowedTimeForUrgentExtraLatency = 
v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - 
TSetupPipe - TdmbfPipe - TdmecPipe
- TdmsksPipe - 
DynamicMetadataVMExtraLatency[k];
if (AllowedTimeForUrgentExtraLatency > 
0) {
-   
DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
-   
DCFCLKRequiredForPeakBandwidthPerPlane[k],
+   
v->UseMinimumDCFCLK_stack_reduction.DCFCLKRequiredForPeakBandwidthPerPlane[k] = 
dml_max(
+   
v->UseMinimumDCFCLK_stack_reduction.DCFCLKRequiredForPeakBandwidthPerPlane[k],

ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
} else {
-

[PATCH 4/9] drm/amd/display: Move DynamicMetadataVMExtraLatency to UseMinimumDCFCLK_vars

2023-01-10 Thread Rodrigo Siqueira

It was reported that on kernel v6.2-rc1, we have the following stack
size issue:

make[3]: *** [/kisskb/src/scripts/Makefile.build:504: drivers/media]
Error 2
[...]/display/dc/dml/dcn31/display_mode_vba_31.c: In function
'UseMinimumDCFCLK':
[...]/display/dc/dml/dcn31/display_mode_vba_31.c:7082:1: error: the
frame size of 2224 bytes is larger than 2048 bytes
[-Werror=frame-larger-than=]

This commit move the array of doubles DynamicMetadataVMExtraLatency to a
separated struct (UseMinimumDCFCLK_vars) to reduce the stack size.

Cc: Alex Deucher 
Cc: Aurabindo Pillai 
Cc: Hamza Mahfooz 
Cc: Roman Li 
Cc: Geert Uytterhoeven 
Link: https://lore.kernel.org/all/20221227082932.798359-1-ge...@linux-m68k.org/
Reported-by: Geert Uytterhoeven 
Signed-off-by: Rodrigo Siqueira 
---
 .../drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c   | 9 -
 drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h| 1 +
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
index 8175f3603f00..904703353958 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
@@ -6932,7 +6932,6 @@ static void UseMinimumDCFCLK(
NormalEfficiency = 
v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
for (i = 0; i < v->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
-   double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
double MinimumTWait;
double NonDPTEBandwidth;
double DPTEBandwidth;
@@ -6992,14 +6991,14 @@ static void UseMinimumDCFCLK(

v->UseMinimumDCFCLK_stack_reduction.PrefetchPixelLinesTime[k] = 
dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] 
/ v->PixelClock[k];
ExpectedPrefetchBWAcceleration = 
(v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k])
/ (v->ReadBandwidthLuma[k] + 
v->ReadBandwidthChroma[k]);
-   DynamicMetadataVMExtraLatency[k] =
+   
v->UseMinimumDCFCLK_stack_reduction.DynamicMetadataVMExtraLatency[k] =
(v->GPUVMEnable == true && 
v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?

v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? 
v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
PrefetchTime = (v->MaximumVStartup[i][j][k] - 
1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait
- v->UrgLatency[i]
* 
((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : 
v->GPUVMMaxPageTableLevels - 2)

* (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
-   - 
DynamicMetadataVMExtraLatency[k];
+   - 
v->UseMinimumDCFCLK_stack_reduction.DynamicMetadataVMExtraLatency[k];
 
if (PrefetchTime > 0) {
double ExpectedVRatioPrefetch;
@@ -7041,7 +7040,7 @@ static void UseMinimumDCFCLK(
&dummy2,
&dummy3);
AllowedTimeForUrgentExtraLatency = 
v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - 
TSetupPipe - TdmbfPipe - TdmecPipe
-   - TdmsksPipe - 
DynamicMetadataVMExtraLatency[k];
+   - TdmsksPipe - 
v->UseMinimumDCFCLK_stack_reduction.DynamicMetadataVMExtraLatency[k];
if (AllowedTimeForUrgentExtraLatency > 
0) {

v->UseMinimumDCFCLK_stack_reduction.DCFCLKRequiredForPeakBandwidthPerPlane[k] = 
dml_max(

v->UseMinimumDCFCLK_stack_reduction.DCFCLKRequiredForPeakBandwidthPerPlane[k],
@@ -7062,7 +7061,7 @@ static void UseMinimumDCFCLK(
0);
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
double MaximumTvmPlus2Tr0PlusTsw;
-   MaximumTvmPlus2Tr0PlusTsw = 
(v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - 
MinimumT

[PATCH 2/9] drm/amd/display: Move two arrays of doubles to UseMinimumDCFCLK_vars

2023-01-10 Thread Rodrigo Siqueira

It was reported that on kernel v6.2-rc1, we have the following stack
size issue:

make[3]: *** [/kisskb/src/scripts/Makefile.build:504: drivers/media]
Error 2
[...]/display/dc/dml/dcn31/display_mode_vba_31.c: In function
'UseMinimumDCFCLK':
[...]/display/dc/dml/dcn31/display_mode_vba_31.c:7082:1: error: the
frame size of 2224 bytes is larger than 2048 bytes
[-Werror=frame-larger-than=]

This commit moves two arrays of doubles from UseMinimumDCFCLK to
UseMinimumDCFCLK_vars and makes the necessary changes to access those
values from the struct.

Cc: Alex Deucher 
Cc: Aurabindo Pillai 
Cc: Hamza Mahfooz 
Cc: Roman Li 
Cc: Geert Uytterhoeven 
Link: https://lore.kernel.org/all/20221227082932.798359-1-ge...@linux-m68k.org/
Reported-by: Geert Uytterhoeven 
Signed-off-by: Rodrigo Siqueira 
---
 .../dc/dml/dcn31/display_mode_vba_31.c| 20 +--
 .../drm/amd/display/dc/dml/display_mode_vba.h |  2 ++
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
index 00d3c57f0d98..28dcd46a28c0 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
@@ -6932,8 +6932,6 @@ static void UseMinimumDCFCLK(
NormalEfficiency = 
v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
for (i = 0; i < v->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
-   double 
PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
-   double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
double 
DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
double MinimumTWait;
@@ -6986,13 +6984,13 @@ static void UseMinimumDCFCLK(
double ExpectedPrefetchBWAcceleration;
double PrefetchTime;
 
-   PixelDCFCLKCyclesRequiredInPrefetch[k] = 
(v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * 
v->BytePerPixelY[k]
+   
v->UseMinimumDCFCLK_stack_reduction.PixelDCFCLKCyclesRequiredInPrefetch[k] = 
(v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * 
v->BytePerPixelY[k]
+ v->PrefetchLinesC[i][j][k] * 
v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / 
NormalEfficiency / v->ReturnBusWidth;
DCFCLKCyclesRequiredInPrefetch = 2 * 
ExtraLatencyCycles / NoOfDPPState[k]
+ 
v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / 
v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0)
+ 2 * 
v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / 
v->ReturnBusWidth
-   + 2 * v->MetaRowBytes[i][j][k] 
/ NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
-   PrefetchPixelLinesTime[k] = 
dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] 
/ v->PixelClock[k];
+   + 2 * v->MetaRowBytes[i][j][k] 
/ NormalEfficiency / v->ReturnBusWidth + 
v->UseMinimumDCFCLK_stack_reduction.PixelDCFCLKCyclesRequiredInPrefetch[k];
+   
v->UseMinimumDCFCLK_stack_reduction.PrefetchPixelLinesTime[k] = 
dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] 
/ v->PixelClock[k];
ExpectedPrefetchBWAcceleration = 
(v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k])
/ (v->ReadBandwidthLuma[k] + 
v->ReadBandwidthChroma[k]);
DynamicMetadataVMExtraLatency[k] =
@@ -7006,9 +7004,9 @@ static void UseMinimumDCFCLK(
 
if (PrefetchTime > 0) {
double ExpectedVRatioPrefetch;
-   ExpectedVRatioPrefetch = 
PrefetchPixelLinesTime[k]
-   / (PrefetchTime * 
PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
-   
DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * 
PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
+   ExpectedVRatioPrefetch = 
v->UseMinimumDCFCLK_stack_reduction.PrefetchPixelLinesTime[k]
+   / (PrefetchTime * 
v->UseMinimumDCFCLK_stack_reduction.PixelDCFCLKCyclesRequiredInPrefetch[k

[PATCH 3/9] drm/amd/display: Move DCFCLKRequiredForPeakBandwidthPerPlane to UseMinimumDCFCLK_vars

2023-01-10 Thread Rodrigo Siqueira

It was reported that on kernel v6.2-rc1, we have the following stack
size issue:

make[3]: *** [/kisskb/src/scripts/Makefile.build:504: drivers/media]
Error 2
[...]/display/dc/dml/dcn31/display_mode_vba_31.c: In function
'UseMinimumDCFCLK':
[...]/display/dc/dml/dcn31/display_mode_vba_31.c:7082:1: error: the
frame size of 2224 bytes is larger than 2048 bytes
[-Werror=frame-larger-than=]

This commit move DCFCLKRequiredForPeakBandwidthPerPlane from
UseMinimumDCFCLK to reduce the stack size.

Cc: Alex Deucher 
Cc: Aurabindo Pillai 
Cc: Hamza Mahfooz 
Cc: Roman Li 
Cc: Geert Uytterhoeven 
Link: https://lore.kernel.org/all/20221227082932.798359-1-ge...@linux-m68k.org/
Reported-by: Geert Uytterhoeven 
Signed-off-by: Rodrigo Siqueira 
---
 .../display/dc/dml/dcn31/display_mode_vba_31.c| 15 +++
 .../gpu/drm/amd/display/dc/dml/display_mode_vba.h |  1 +
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
index 28dcd46a28c0..8175f3603f00 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
@@ -6932,7 +6932,6 @@ static void UseMinimumDCFCLK(
NormalEfficiency = 
v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
for (i = 0; i < v->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
-   double 
DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
double MinimumTWait;
double NonDPTEBandwidth;
@@ -7006,14 +7005,14 @@ static void UseMinimumDCFCLK(
double ExpectedVRatioPrefetch;
ExpectedVRatioPrefetch = 
v->UseMinimumDCFCLK_stack_reduction.PrefetchPixelLinesTime[k]
/ (PrefetchTime * 
v->UseMinimumDCFCLK_stack_reduction.PixelDCFCLKCyclesRequiredInPrefetch[k] / 
DCFCLKCyclesRequiredInPrefetch);
-   
DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * 
v->UseMinimumDCFCLK_stack_reduction.PixelDCFCLKCyclesRequiredInPrefetch[k] / 
v->UseMinimumDCFCLK_stack_reduction.PrefetchPixelLinesTime[k]
+   
v->UseMinimumDCFCLK_stack_reduction.DCFCLKRequiredForPeakBandwidthPerPlane[k] = 
NoOfDPPState[k] * 
v->UseMinimumDCFCLK_stack_reduction.PixelDCFCLKCyclesRequiredInPrefetch[k] / 
v->UseMinimumDCFCLK_stack_reduction.PrefetchPixelLinesTime[k]
* dml_max(1.0, 
ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * 
ExpectedPrefetchBWAcceleration;
if (v->HostVMEnable == true || 
v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
-   
DCFCLKRequiredForPeakBandwidthPerPlane[k] = 
DCFCLKRequiredForPeakBandwidthPerPlane[k]
+   
v->UseMinimumDCFCLK_stack_reduction.DCFCLKRequiredForPeakBandwidthPerPlane[k] = 
v->UseMinimumDCFCLK_stack_reduction.DCFCLKRequiredForPeakBandwidthPerPlane[k]
+ 
NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / 
v->ReturnBusWidth;
}
} else {
-   
DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
+   
v->UseMinimumDCFCLK_stack_reduction.DCFCLKRequiredForPeakBandwidthPerPlane[k] = 
v->DCFCLKPerState[i];
}
if (v->DynamicMetadataEnable[k] == true) {
double TSetupPipe;
@@ -7044,17 +7043,17 @@ static void UseMinimumDCFCLK(
AllowedTimeForUrgentExtraLatency = 
v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - 
TSetupPipe - TdmbfPipe - TdmecPipe
- TdmsksPipe - 
DynamicMetadataVMExtraLatency[k];
if (AllowedTimeForUrgentExtraLatency > 
0) {
-   
DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
-   
DCFCLKRequiredForPeakBandwidthPerPlane[k],
+   
v->UseMinimumDCFCLK_stack_reduction.DCFCLKRequiredForPeakBandwidthPerPlane[k] = 
dml_max(
+   
v->UseMinimumDCFCLK_stack_reduction.DCFCLKRequiredForPeakBandwidthPerPlane[k],

[PATCH 1/9] drm/amd/display: Introduce UseMinimumDCFCLK_vars to reduce stack size in DML

2023-01-10 Thread Rodrigo Siqueira

It was reported that on kernel v6.2-rc1, we have the following stack
size issue:

make[3]: *** [/kisskb/src/scripts/Makefile.build:504: drivers/media]
Error 2
[...]/display/dc/dml/dcn31/display_mode_vba_31.c: In function
'UseMinimumDCFCLK':
[...]/display/dc/dml/dcn31/display_mode_vba_31.c:7082:1: error: the
frame size of 2224 bytes is larger than 2048 bytes
[-Werror=frame-larger-than=]

This commit introduces a new struct that will accommodate some internal
variables under the UseMinimumDCFCLK function. This change is the first
of a series of patches that gradually move each of the arrays of double
to the new struct to reduce the stack size for DCN31 and DCN314.

Cc: Alex Deucher 
Cc: Aurabindo Pillai 
Cc: Hamza Mahfooz 
Cc: Roman Li 
Cc: Geert Uytterhoeven 
Link: https://lore.kernel.org/all/20221227082932.798359-1-ge...@linux-m68k.org/
Reported-by: Geert Uytterhoeven 
Signed-off-by: Rodrigo Siqueira 
---
 .../gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c | 7 +++
 drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h  | 5 +
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
index ec351c8418cb..00d3c57f0d98 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
@@ -6928,7 +6928,6 @@ static void UseMinimumDCFCLK(
struct vba_vars_st *v = &mode_lib->vba;
int dummy1, i, j, k;
double NormalEfficiency,  dummy2, dummy3;
-   double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
 
NormalEfficiency = 
v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
for (i = 0; i < v->soc.num_states; ++i) {
@@ -6947,9 +6946,9 @@ static void UseMinimumDCFCLK(
int NoOfDPPState[DC__NUM_DPP__MAX];
double MinimumTvmPlus2Tr0;
 
-   TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
+   
v->UseMinimumDCFCLK_stack_reduction.TotalMaxPrefetchFlipDPTERowBandwidth[i][j] 
= 0;
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
-   TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 
TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
+   
v->UseMinimumDCFCLK_stack_reduction.TotalMaxPrefetchFlipDPTERowBandwidth[i][j] 
= v->UseMinimumDCFCLK_stack_reduction.TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
+ v->NoOfDPP[i][j][k] * 
v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
}
 
@@ -6960,7 +6959,7 @@ static void UseMinimumDCFCLK(
MinimumTWait = CalculateTWait(MaxPrefetchMode, 
v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] 
+ v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
DPTEBandwidth = (v->HostVMEnable == true || 
v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
-   
TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
+   
v->UseMinimumDCFCLK_stack_reduction.TotalMaxPrefetchFlipDPTERowBandwidth[i][j] 
: v->TotalDPTERowBandwidth[i][j];
DCFCLKRequiredForAverageBandwidth = dml_max3(
v->ProjectedDCFCLKDeepSleep[i][j],
(NonDPTEBandwidth + 
v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h 
b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
index 81e53e67cd0b..660c22a19c8d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
@@ -249,6 +249,10 @@ struct dml32_ModeSupportAndSystemConfigurationFull {
bool dummy_boolean[2];
 };
 
+struct UseMinimumDCFCLK_vars {
+   double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
+};
+
 struct dummy_vars {
struct 
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation

DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation;
@@ -1236,6 +1240,7 @@ struct vba_vars_st {
bool VActiveBandwithSupport[DC__VOLTAGE_STATES][2];
bool NotEnoughDETSwathFillLatencyHidingPerState[DC__VOLTAGE_STATES][2];
struct dummy_vars dummy_vars;
+   struct UseMinimumDCFCLK_vars UseMinimumDCFCLK_stack_reduction;
 };
 
 bool CalculateMinAndMaxPrefetchMode(
-- 
2.39.0

[PATCH 0/9] Reduce stack size for DCN31/314

2023-01-10 Thread Rodrigo Siqueira

Geert Uytterhoeven reported compiler issues in some specific gcc
versions that point to a large stack size for DCN31/314. Unfortunately,
I could not reproduce the problem with the latest version of GCC/Clang;
but I still believe this is a valid issue. Therefore, this patchset
attempts to address those issues by moving many arrays of doubles from
the local declaration to some specific structs. The first patch in this
series introduces the new struct, and the subsequent patches move arrays
of doubles from the local function in favor of using those values from
the struct.

Geert Uytterhoeven, since I don't have a simple way to reproduce this
issue, could you run the CI in this series to check if this patchset
solves the issues?

Cc: Alex Deucher 
Cc: Aurabindo Pillai 
Cc: Hamza Mahfooz 
Cc: Roman Li 
Cc: Harry Wentland 
Cc: Geert Uytterhoeven 
Link: https://lore.kernel.org/all/20221227082932.798359-1-ge...@linux-m68k.org/

Thanks
Siqueira

Rodrigo Siqueira (9):
  drm/amd/display: Introduce UseMinimumDCFCLK_vars to reduce stack size
in DML
  drm/amd/display: Move two arrays of doubles to UseMinimumDCFCLK_vars
  drm/amd/display: Move DCFCLKRequiredForPeakBandwidthPerPlane to
UseMinimumDCFCLK_vars
  drm/amd/display: Move DynamicMetadataVMExtraLatency to
UseMinimumDCFCLK_vars
  drm/amd/display: Use matrix of double from struct instead of local
variable
  drm/amd/display: Use PixelDCFCLKCyclesRequiredInPrefetch from struct
  drm/amd/display: Use PrefetchPixelLinesTime from struct instead of
local variable
  drm/amd/display: Use DCFCLKRequiredForPeakBandwidthPerPlane from
struct
  drm/amd/display: Use DynamicMetadataVMExtraLatency from struct

 .../dc/dml/dcn31/display_mode_vba_31.c| 49 +--
 .../dc/dml/dcn314/display_mode_vba_314.c  | 49 +--
 .../drm/amd/display/dc/dml/display_mode_vba.h |  9 
 3 files changed, 53 insertions(+), 54 deletions(-)

-- 
2.39.0

Re: [PATCH] drm/amdkfd: Fix NULL pointer error for GC 11.0.1 on mGPU

2023-01-10 Thread Felix Kuehling


Am 2023-01-05 um 14:28 schrieb Eric Huang:

The point bo->kfd_bo is NULL for queue's write pointer BO
when creating queue on mGPU. To avoid using the pointer
fixes the error.

Signed-off-by: Eric Huang 


Reviewed-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 2 +-
  drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +-
  2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 9885735f1a30..d4c29e9edf34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2179,7 +2179,7 @@ int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device 
*adev, struct amdgpu_b
}
  
  	amdgpu_amdkfd_remove_eviction_fence(

-   bo, bo->kfd_bo->process_info->eviction_fence);
+   bo, bo->vm_bo->vm->process_info->eviction_fence);
  
  	amdgpu_bo_unreserve(bo);
  
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c

index 6013f498ea1e..55c2dc48e567 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -231,7 +231,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, 
struct queue *q,
queue_input.wptr_addr = (uint64_t)q->properties.write_ptr;
  
  	if (q->wptr_bo) {

-   wptr_addr_off = (uint64_t)q->properties.write_ptr - 
(uint64_t)q->wptr_bo->kfd_bo->va;
+   wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE 
- 1);
queue_input.wptr_mc_addr = ((uint64_t)q->wptr_bo->tbo.resource->start 
<< PAGE_SHIFT) + wptr_addr_off;
}

Re: [PATCH 2/2] drm/amdgpu: add AMDGPU_INFO_VM_STAT to return GPU VM

2023-01-10 Thread Marek Olšák

On Wed, Jan 4, 2023 at 9:51 AM Christian König <
ckoenig.leichtzumer...@gmail.com> wrote:

> Am 04.01.23 um 00:08 schrieb Marek Olšák:
>
> I see about the access now, but did you even look at the patch?
>
>
> I did look at the patch, but I haven't fully understood yet what you are
> trying to do here.
>

First and foremost, it returns the evicted size of VRAM and visible VRAM,
and returns visible VRAM usage. It should be obvious which stat includes
the size of another.


> Because what the patch does isn't even exposed to common drm code, such as
> the preferred domain and visible VRAM placement, so it can't be in fdinfo
> right now.
>
> Or do you even know what fdinfo contains? Because it contains nothing
> useful. It only has VRAM and GTT usage, which we already have in the INFO
> ioctl, so it has nothing that we need. We mainly need the eviction
> information and visible VRAM information now. Everything else is a bonus.
>
>
> Well the main question is what are you trying to get from that
> information? The eviction list for example is completely meaningless to
> userspace, that stuff is only temporary and will be cleared on the next CS
> again.
>

I don't know what you mean. The returned eviction stats look correct and
are stable (they don't change much). You can suggest changes if you think
some numbers are not reported correctly.


>
> What we could expose is the VRAM over-commit value, e.g. how much BOs
> which where supposed to be in VRAM are in GTT now. I think that's what you
> are looking for here, right?
>

The VRAM overcommit value is "evicted_vram".


>
> Also, it's undesirable to open and parse a text file if we can just call
> an ioctl.
>
>
> Well I see the reasoning for that, but I also see why other drivers do a
> lot of the stuff we have as IOCTL as separate files in sysfs, fdinfo or
> debugfs.
>
> Especially repeating all the static information which were already
> available under sysfs in the INFO IOCTL was a design mistake as far as I
> can see. Just compare what AMDGPU and the KFD code is doing to what for
> example i915 is doing.
>
> Same for things like debug information about a process. The fdinfo stuff
> can be queried from external tools (gdb, gputop, umr etc...) as well which
> makes that interface more preferred.
>

Nothing uses fdinfo in Mesa. No driver uses sysfs in Mesa except drm shims,
noop drivers, and Intel for perf metrics. sysfs itself is an unusable mess
for the PCIe query and is missing information.

I'm not against exposing more stuff through sysfs and fdinfo for tools, but
I don't see any reason why drivers should use it (other than for slowing
down queries and initialization).

Marek

Re: [PATCH] drm: Alloc high address for drm buddy topdown flag

2023-01-10 Thread Arunpravin Paneer Selvam


Hi Matthew,

On 1/10/2023 5:32 PM, Matthew Auld wrote:

On 07/01/2023 15:15, Arunpravin Paneer Selvam wrote:

As we are observing low numbers in viewperf graphics benchmark, we
are strictly not allowing the top down flag enabled allocations
to steal the memory space from cpu visible region.

The approach is, we are sorting each order list entries in
ascending order and compare the last entry of each order
list in the freelist and return the max block.


Did you also run the selftests? Does everything still pass and 
complete in a reasonable amount of time?

I will try giving a run


This patch improves the viewperf 3D benchmark scores.

Signed-off-by: Arunpravin Paneer Selvam 


---
  drivers/gpu/drm/drm_buddy.c | 81 -
  1 file changed, 54 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 11bb59399471..50916b2f2fc5 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -38,6 +38,25 @@ static void drm_block_free(struct drm_buddy *mm,
  kmem_cache_free(slab_blocks, block);
  }
  +static void list_insert_sorted(struct drm_buddy *mm,
+   struct drm_buddy_block *block)
+{
+    struct drm_buddy_block *node;
+    struct list_head *head;
+
+    head = &mm->free_list[drm_buddy_block_order(block)];
+    if (list_empty(head)) {
+    list_add(&block->link, head);
+    return;
+    }
+
+    list_for_each_entry(node, head, link)
+    if (drm_buddy_block_offset(block) < 
drm_buddy_block_offset(node))

+    break;
+
+    __list_add(&block->link, node->link.prev, &node->link);
+}
+
  static void mark_allocated(struct drm_buddy_block *block)
  {
  block->header &= ~DRM_BUDDY_HEADER_STATE;
@@ -52,8 +71,7 @@ static void mark_free(struct drm_buddy *mm,
  block->header &= ~DRM_BUDDY_HEADER_STATE;
  block->header |= DRM_BUDDY_FREE;
  -    list_add(&block->link,
- &mm->free_list[drm_buddy_block_order(block)]);
+    list_insert_sorted(mm, block);


One advantage of not sorting is when splitting down a large block. 
Previously the most-recently-split would be at the start of the list 
for the next order down, where potentially the next allocation could 
use it. So perhaps less fragmentation if it's all part of one BO. 
Otherwise I don't see any other downsides, other than the extra 
overhead of sorting.


Allocating from freelist is traversing through right side (i.e top most 
address range) and for TOPDOWN flag allocations we just split the top 
most large block once and the subsequent TOPDOWN low order allocations 
would get block from same already split large block
For the normal allocations, I modified to retrieve the blocks in each 
order list from the last entry which has the high probability of getting 
top most blocks as we have sorted the blocks in each order list.
Thus the bottom most large blocks are not frequently used, hence we have 
more space for the visible region on dGPU.


For APU which has small sized VRAM space, the allocations are now 
ordered and we don't allocate randomly from freelist solving 
fragmentation issues.

  }
    static void mark_split(struct drm_buddy_block *block)
@@ -387,20 +405,26 @@ alloc_range_bias(struct drm_buddy *mm,
  }
    static struct drm_buddy_block *
-get_maxblock(struct list_head *head)
+get_maxblock(struct drm_buddy *mm, unsigned int order)
  {
  struct drm_buddy_block *max_block = NULL, *node;
+    unsigned int i;
  -    max_block = list_first_entry_or_null(head,
- struct drm_buddy_block,
- link);
-    if (!max_block)
-    return NULL;
+    for (i = order; i <= mm->max_order; ++i) {
+    if (!list_empty(&mm->free_list[i])) {
+    node = list_last_entry(&mm->free_list[i],
+   struct drm_buddy_block,
+   link);
+    if (!max_block) {
+    max_block = node;
+    continue;
+    }
  -    list_for_each_entry(node, head, link) {
-    if (drm_buddy_block_offset(node) >
-    drm_buddy_block_offset(max_block))
-    max_block = node;
+    if (drm_buddy_block_offset(node) >
+    drm_buddy_block_offset(max_block)) {


Formatting doesn't look right here.

I will check.


Going to test this today with some workloads with small-bar and i915 
just to see if this improves/impacts anything for us.



+    max_block = node;
+    }
+    }
  }
    return max_block;
@@ -412,20 +436,23 @@ alloc_from_freelist(struct drm_buddy *mm,
  unsigned long flags)
  {
  struct drm_buddy_block *block = NULL;
-    unsigned int i;
+    unsigned int tmp;
  int err;
  -    for (i = order; i <= mm->max_order; ++i) {
-    if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
-    block = get_maxblock(&mm->free_list[i]);
-    if (block)
-    break;
-    } else {
-    blo

Re: [regression, bisected, pci/iommu] Bug 216865 - Black screen when amdgpu started during 6.2-rc1 boot with AMD IOMMU enabled

2023-01-10 Thread Jason Gunthorpe

On Tue, Jan 10, 2023 at 10:05:44AM -0500, Felix Kuehling wrote:
> Am 2023-01-10 um 08:45 schrieb Christian König:
> > And I'm like 99% sure that Kabini/Wani should be identical to that.
> 
> Kabini is not supported by KFD. There should be no calls to amd_iommu_...
> functions on Kabini, at least not from kfd_iommu.c. And I'm not aware of any
> other callers in amdgpu.ko.

The backtrace from the system says otherwise..

>> [   13.515710]  amd_iommu_attach_device+0x2e0/0x300
>> [   13.515719]  __iommu_attach_device+0x1b/0x90
>> [   13.515727]  iommu_attach_group+0x65/0xa0
>> [   13.515735]  amd_iommu_init_device+0x16b/0x250 [iommu_v2]
>> [   13.515747]  kfd_iommu_resume+0x4c/0x1a0 [amdgpu]
>> [   13.517094]  kgd2kfd_resume_iommu+0x12/0x30 [amdgpu]
>> [   13.518419]  kgd2kfd_device_init.cold+0x346/0x49a [amdgpu]
>> [   13.519699]  amdgpu_amdkfd_device_init+0x142/0x1d0 [amdgpu]
>> [   13.520877]  amdgpu_device_init.cold+0x19f5/0x1e21 [amdgpu]
>> [   13.522118]  ? _raw_spin_lock_irqsave+0x23/0x50
>> [   13.522126]  amdgpu_driver_load_kms+0x15/0x110 [amdgpu]
>> [   13.523386]  amdgpu_pci_probe+0x161/0x370 [amdgpu]
>> [   13.524516]  local_pci_probe+0x41/0x80
>> [   13.524525]  pci_device_probe+0xb3/0x220
>> [   13.524533]  really_probe+0xde/0x380
>> [   13.524540]  ? pm_runtime_barrier+0x50/0x90
>> [   13.524546]  __driver_probe_device+0x78/0x170
>> [   13.524555]  driver_probe_device+0x1f/0x90
>> [   13.524560]  __driver_attach+0xce/0x1c0
>> [   13.524565]  ? __pfx___driver_attach+0x10/0x10
>> [   13.524570]  bus_for_each_dev+0x73/0xa0
>> [   13.524575]  bus_add_driver+0x1ae/0x200
>> [   13.524580]  driver_register+0x89/0xe0
>> [   13.524586]  ? __pfx_init_module+0x10/0x10 [amdgpu]
>> [   13.525819]  do_one_initcall+0x59/0x230
>> [   13.525828]  do_init_module+0x4a/0x200
>> [   13.525834]  __do_sys_init_module+0x157/0x180
>> [   13.525839]  do_syscall_64+0x5b/0x80
>> [   13.525845]  ? handle_mm_fault+0xff/0x2f0
>> [   13.525850]  ? do_user_addr_fault+0x1ef/0x690
>> [   13.525856]  ? exc_page_fault+0x70/0x170
>> [   13.525860]  entry_SYSCALL_64_after_hwframe+0x72/0xdc
>> [   13.525867] RIP: 0033:0x7fabd66cde4e

https://lore.kernel.org/all/157c4ca4-370a-5d7e-fe32-c64d934f6...@amd.com/

Jason

Re: [regression, bisected, pci/iommu] Bug 216865 - Black screen when amdgpu started during 6.2-rc1 boot with AMD IOMMU enabled

2023-01-10 Thread Felix Kuehling


Am 2023-01-10 um 10:19 schrieb Jason Gunthorpe:

On Tue, Jan 10, 2023 at 10:05:44AM -0500, Felix Kuehling wrote:

Am 2023-01-10 um 08:45 schrieb Christian König:

And I'm like 99% sure that Kabini/Wani should be identical to that.

Kabini is not supported by KFD. There should be no calls to amd_iommu_...
functions on Kabini, at least not from kfd_iommu.c. And I'm not aware of any
other callers in amdgpu.ko.

The backtrace from the system says otherwise..


That log is for Carrizo, not Kabini:

[   13.143970] [drm] initializing kernel modesetting (CARRIZO 
0x1002:0x9874 >> 0x103C:0x8332 0xCA).

Carrizo is supported by KFD, and it does support ATS/PRI.

Regards,
  Felix





[   13.515710]  amd_iommu_attach_device+0x2e0/0x300
[   13.515719]  __iommu_attach_device+0x1b/0x90
[   13.515727]  iommu_attach_group+0x65/0xa0
[   13.515735]  amd_iommu_init_device+0x16b/0x250 [iommu_v2]
[   13.515747]  kfd_iommu_resume+0x4c/0x1a0 [amdgpu]
[   13.517094]  kgd2kfd_resume_iommu+0x12/0x30 [amdgpu]
[   13.518419]  kgd2kfd_device_init.cold+0x346/0x49a [amdgpu]
[   13.519699]  amdgpu_amdkfd_device_init+0x142/0x1d0 [amdgpu]
[   13.520877]  amdgpu_device_init.cold+0x19f5/0x1e21 [amdgpu]
[   13.522118]  ? _raw_spin_lock_irqsave+0x23/0x50
[   13.522126]  amdgpu_driver_load_kms+0x15/0x110 [amdgpu]
[   13.523386]  amdgpu_pci_probe+0x161/0x370 [amdgpu]
[   13.524516]  local_pci_probe+0x41/0x80
[   13.524525]  pci_device_probe+0xb3/0x220
[   13.524533]  really_probe+0xde/0x380
[   13.524540]  ? pm_runtime_barrier+0x50/0x90
[   13.524546]  __driver_probe_device+0x78/0x170
[   13.524555]  driver_probe_device+0x1f/0x90
[   13.524560]  __driver_attach+0xce/0x1c0
[   13.524565]  ? __pfx___driver_attach+0x10/0x10
[   13.524570]  bus_for_each_dev+0x73/0xa0
[   13.524575]  bus_add_driver+0x1ae/0x200
[   13.524580]  driver_register+0x89/0xe0
[   13.524586]  ? __pfx_init_module+0x10/0x10 [amdgpu]
[   13.525819]  do_one_initcall+0x59/0x230
[   13.525828]  do_init_module+0x4a/0x200
[   13.525834]  __do_sys_init_module+0x157/0x180
[   13.525839]  do_syscall_64+0x5b/0x80
[   13.525845]  ? handle_mm_fault+0xff/0x2f0
[   13.525850]  ? do_user_addr_fault+0x1ef/0x690
[   13.525856]  ? exc_page_fault+0x70/0x170
[   13.525860]  entry_SYSCALL_64_after_hwframe+0x72/0xdc
[   13.525867] RIP: 0033:0x7fabd66cde4e

https://lore.kernel.org/all/157c4ca4-370a-5d7e-fe32-c64d934f6...@amd.com/

Jason

Re: [PATCH] drm/amdkfd: Fix NULL pointer error for GC 11.0.1 on mGPU

2023-01-10 Thread Eric Huang


Ping.

On 2023-01-05 14:28, Eric Huang wrote:

The point bo->kfd_bo is NULL for queue's write pointer BO
when creating queue on mGPU. To avoid using the pointer
fixes the error.

Signed-off-by: Eric Huang 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 2 +-
  drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +-
  2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 9885735f1a30..d4c29e9edf34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2179,7 +2179,7 @@ int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device 
*adev, struct amdgpu_b
}
  
  	amdgpu_amdkfd_remove_eviction_fence(

-   bo, bo->kfd_bo->process_info->eviction_fence);
+   bo, bo->vm_bo->vm->process_info->eviction_fence);
  
  	amdgpu_bo_unreserve(bo);
  
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c

index 6013f498ea1e..55c2dc48e567 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -231,7 +231,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, 
struct queue *q,
queue_input.wptr_addr = (uint64_t)q->properties.write_ptr;
  
  	if (q->wptr_bo) {

-   wptr_addr_off = (uint64_t)q->properties.write_ptr - 
(uint64_t)q->wptr_bo->kfd_bo->va;
+   wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE 
- 1);
queue_input.wptr_mc_addr = ((uint64_t)q->wptr_bo->tbo.resource->start 
<< PAGE_SHIFT) + wptr_addr_off;
}

Re: [regression, bisected, pci/iommu] Bug 216865 - Black screen when amdgpu started during 6.2-rc1 boot with AMD IOMMU enabled

2023-01-10 Thread Felix Kuehling


Am 2023-01-10 um 08:45 schrieb Christian König:
And I'm like 99% sure that Kabini/Wani should be identical to that. 


Kabini is not supported by KFD. There should be no calls to 
amd_iommu_... functions on Kabini, at least not from kfd_iommu.c. And 
I'm not aware of any other callers in amdgpu.ko.


Regards,
  Felix

Re: [PATCH 8/8] drm/amd/pm: drop the support for manual fan speed setting on SMU13.0.7

2023-01-10 Thread Matt Coffin


On 1/9/23 23:48, Quan, Evan wrote:

[AMD Official Use Only - General]

We need these to address the fan speed setting failure reported for the new 
SMU13 asics.
My opinion shouldn't matter much given sparseness of activity, but, 
despite his... short tonality, I agree with Lijo's assessment there.


As someone less familiar with the code base, the use of "multiple 
sources of truth" contributes to making it harder to understand and ramp 
up with.


As for the sysfs fan control issue itself, this won't really "fix" the 
issue, but rather just remove write permissions from the hwmon files (if 
my testing+understanding is right), so it wouldn't seem to be a 
hyper-critical deliverable to me as a random outsider looking in 
(despite being effected by the issue personally). Even with that 
interface removed, there isn't another way to control the "auto" fans, 
as the FW reports it doesn't support that capability in the OD table, 
and ignores anything set therein for that purpose. Hopefully that's 
temporary until FW gets fixed?


I also think the behavior of the other proposed solution (removal of 
interface functions from the ppt_funcs), is objectively a better 
experience w/r/t outcome. If the functions are NULL, then the hwmon 
device files go away *completely*, instead of just being masked from 
write permission, which would make the message clearer to the end user 
that it's not an error, but an actual lack of functionality.


Cheers, sorry I couldn't help, but wanted to at least have something 
come of the last few days I spent implmenting OD on SMU13 for nothing!


~Matt

Re: [regression, bisected, pci/iommu] Bug 216865 - Black screen when amdgpu started during 6.2-rc1 boot with AMD IOMMU enabled

2023-01-10 Thread Christian König


Am 10.01.23 um 14:51 schrieb Jason Gunthorpe:

On Tue, Jan 10, 2023 at 02:45:30PM +0100, Christian König wrote:


Since this is a device integrated in the CPU it could be that the ACS/ATS
functionalities are controlled by the BIOS and can be enabled/disabled
there. But this should always enable/disable both.

This sounds like a GPU driver bug then, it should tolerate PASID being
unavailable because of BIOS issues/whatever and not black screen on
boot?


Yeah, potentially. Could I get a full "sudo lspci - -s $bus_id" + 
dmesg of that device?


Thanks,
Christian.



Jason

Re: [regression, bisected, pci/iommu] Bug 216865 - Black screen when amdgpu started during 6.2-rc1 boot with AMD IOMMU enabled

2023-01-10 Thread Jason Gunthorpe

On Tue, Jan 10, 2023 at 01:48:39PM +0800, Baolu Lu wrote:
> On 2023/1/6 22:14, Jason Gunthorpe wrote:
> > On Thu, Jan 05, 2023 at 03:57:28PM +0530, Vasant Hegde wrote:
> > > Matt,
> > > 
> > > On 1/5/2023 6:39 AM, Matt Fagnani wrote:
> > > > I built 6.2-rc2 with the patch applied. The same black screen problem 
> > > > happened
> > > > with 6.2-rc2 with the patch. I tried to use early kdump with 6.2-rc2 
> > > > with the
> > > > patch twice by panicking the kernel with sysrq+alt+c after the black 
> > > > screen
> > > > happened. The system rebooted after about 10-20 seconds both times, but 
> > > > no kdump
> > > > and dmesg files were saved in /var/crash. I'm attaching the lspci -vvv 
> > > > output as
> > > > requested.
> > > > 
> > > Thanks for testing. As mentioned earlier I was not expecting this patch 
> > > to fix
> > > the black screen issue. It should fix kernel warnings and IOMMU page fault
> > > related call traces. By any chance do you have the kernel boot logs?
> > > 
> > > 
> > > @Baolu,
> > >Looking into lspci output, it doesn't list ACS feature for Graphics 
> > > card. So
> > > with your fix it didn't enable PASID and hence it failed to boot.
> > The ACS checks being done are feature of the path not the end point or
> > root port.
> > 
> > If we are expecting ACS on the end port then it is just a bug in how
> > the test was written.. The test should be a NOP because there are no
> > switches in this topology.
> > 
> > Looking at it, this seems to just be because pci_enable_pasid is
> > calling pci_acs_path_enabled wrong, the only other user is here:
> > 
> > for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) {
> > if (!bus->self)
> > continue;
> > 
> > if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
> > break;
> > 
> > pdev = bus->self;
> > 
> > group = iommu_group_get(&pdev->dev);
> > if (group)
> > return group;
> > }
> > 
> > And notice it is calling it on pdev->bus not on pdev itself which
> > naturally excludes the end point from the ACS validation.
> > 
> > So try something like:
> > 
> > if (!pci_acs_path_enabled(pdev->bus->self, NULL, PCI_ACS_RR | 
> > PCI_ACS_UF))
> > 
> > (and probably need to check for null ?)
> 
> Hi Matt,
> 
> Do you mind helping to test below change? No other change needed.
> 
> diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
> index f9cc2e10b676..48f34cc996e4 100644
> --- a/drivers/pci/ats.c
> +++ b/drivers/pci/ats.c
> @@ -382,8 +382,15 @@ int pci_enable_pasid(struct pci_dev *pdev, int
> features)
> if (!pasid)
> return -EINVAL;
> 
> -   if (!pci_acs_path_enabled(pdev, NULL, PCI_ACS_RR | PCI_ACS_UF))
> -   return -EINVAL;
> +   if (pdev->multifunction) {
> +   if (!pci_acs_path_enabled(pdev, NULL, PCI_ACS_RR |
> PCI_ACS_UF))
> +   return -EINVAL;

The AMD device is multi-function according to the lspci, and we
already know that 'pci_acs_path_enabled' will fail on it because that
is the problem..

Actually, I remember it is supposed to be like this:

 https://lore.kernel.org/linux-iommu/ygpb6cxmtduhi...@8bytes.org/

The GPU and sound device are considered non-isolated by the group
code, presumably because of the missing ACS caps.

So, if I remember the issue, PCIe says that MemWr/Rd are routed
according to their address and ignore the PASID header.

A multifunction device is permitted to loop back DMAs one function
issues that match a MMIO BAR of another function. eg the GPU could DMA
to an MMIO address that overlaps the sound device and the function
will deliver the MMIO to the sound device not the host bridge even
though it is PASID tagged.

This is what get_pci_function_alias_group() is looking for.

Multifunction devices that do not do that are supposed to set the ACS
RR|UF bits and get_pci_function_alias_group()/etc are supposed to
succeed.

Thus - the PCI information is telling us that the AMD GPU device does
not support PASID because it may be looping back the MMIO to the other
functions on the device and thus creating an unacceptable hole in the
PASID address space.

So - we need AMD to comment on which of these describes their GPU device:

 1) Is the issue that the PCI Caps are incorrect on this device and
 there is no loopback? Thus we should fix it with a quirk to correct
 the caps which will naturally split the iommu group too.

 2) Is the device broken and loops back PASID DMAs and we are
 legimiate and correct in blocking PASID? So far AMD just got lucky
 that no user had a SVA that overlaps with MMIO? Seems unlikely

 3) Is the device odd in that it doesn't loop back PASID tagged DMAs,
 but does loop untagged? I would say this is non-compliant and PCI
 provides no way to describe this. But we should again quirk it to
 allow the PASID to be enabled but keep the group separated.

Alex/Christian/Pan - can you ple

Re: [regression, bisected, pci/iommu] Bug 216865 - Black screen when amdgpu started during 6.2-rc1 boot with AMD IOMMU enabled

2023-01-10 Thread Jason Gunthorpe

On Tue, Jan 10, 2023 at 02:45:30PM +0100, Christian König wrote:

> Since this is a device integrated in the CPU it could be that the ACS/ATS
> functionalities are controlled by the BIOS and can be enabled/disabled
> there. But this should always enable/disable both.

This sounds like a GPU driver bug then, it should tolerate PASID being
unavailable because of BIOS issues/whatever and not black screen on
boot?

Jason

Re: [regression, bisected, pci/iommu] Bug 216865 - Black screen when amdgpu started during 6.2-rc1 boot with AMD IOMMU enabled

2023-01-10 Thread Christian König


Am 10.01.23 um 14:25 schrieb Jason Gunthorpe:

On Tue, Jan 10, 2023 at 01:48:39PM +0800, Baolu Lu wrote:

On 2023/1/6 22:14, Jason Gunthorpe wrote:

On Thu, Jan 05, 2023 at 03:57:28PM +0530, Vasant Hegde wrote:

Matt,

On 1/5/2023 6:39 AM, Matt Fagnani wrote:

I built 6.2-rc2 with the patch applied. The same black screen problem happened
with 6.2-rc2 with the patch. I tried to use early kdump with 6.2-rc2 with the
patch twice by panicking the kernel with sysrq+alt+c after the black screen
happened. The system rebooted after about 10-20 seconds both times, but no kdump
and dmesg files were saved in /var/crash. I'm attaching the lspci -vvv output as
requested.


Thanks for testing. As mentioned earlier I was not expecting this patch to fix
the black screen issue. It should fix kernel warnings and IOMMU page fault
related call traces. By any chance do you have the kernel boot logs?


@Baolu,
Looking into lspci output, it doesn't list ACS feature for Graphics card. So
with your fix it didn't enable PASID and hence it failed to boot.

The ACS checks being done are feature of the path not the end point or
root port.

If we are expecting ACS on the end port then it is just a bug in how
the test was written.. The test should be a NOP because there are no
switches in this topology.

Looking at it, this seems to just be because pci_enable_pasid is
calling pci_acs_path_enabled wrong, the only other user is here:

for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) {
if (!bus->self)
continue;

if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
break;

pdev = bus->self;

group = iommu_group_get(&pdev->dev);
if (group)
return group;
}

And notice it is calling it on pdev->bus not on pdev itself which
naturally excludes the end point from the ACS validation.

So try something like:

if (!pci_acs_path_enabled(pdev->bus->self, NULL, PCI_ACS_RR | 
PCI_ACS_UF))

(and probably need to check for null ?)

Hi Matt,

Do you mind helping to test below change? No other change needed.

diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index f9cc2e10b676..48f34cc996e4 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -382,8 +382,15 @@ int pci_enable_pasid(struct pci_dev *pdev, int
features)
 if (!pasid)
 return -EINVAL;

-   if (!pci_acs_path_enabled(pdev, NULL, PCI_ACS_RR | PCI_ACS_UF))
-   return -EINVAL;
+   if (pdev->multifunction) {
+   if (!pci_acs_path_enabled(pdev, NULL, PCI_ACS_RR |
PCI_ACS_UF))
+   return -EINVAL;

The AMD device is multi-function according to the lspci, and we
already know that 'pci_acs_path_enabled' will fail on it because that
is the problem..

Actually, I remember it is supposed to be like this:

  
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.kernel.org%2Flinux-iommu%2FYgpb6CxmTdUHiN50%408bytes.org%2F&data=05%7C01%7Cchristian.koenig%40amd.com%7Cb45e8c5a24394d66ae2908daf30e3802%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C638089539666187724%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&sdata=vf9QsDFqp9s1NUxuP5iMsQJn1R0K9tVRTImTR6uZWAE%3D&reserved=0

The GPU and sound device are considered non-isolated by the group
code, presumably because of the missing ACS caps.

So, if I remember the issue, PCIe says that MemWr/Rd are routed
according to their address and ignore the PASID header.

A multifunction device is permitted to loop back DMAs one function
issues that match a MMIO BAR of another function. eg the GPU could DMA
to an MMIO address that overlaps the sound device and the function
will deliver the MMIO to the sound device not the host bridge even
though it is PASID tagged.

This is what get_pci_function_alias_group() is looking for.

Multifunction devices that do not do that are supposed to set the ACS
RR|UF bits and get_pci_function_alias_group()/etc are supposed to
succeed.

Thus - the PCI information is telling us that the AMD GPU device does
not support PASID because it may be looping back the MMIO to the other
functions on the device and thus creating an unacceptable hole in the
PASID address space.

So - we need AMD to comment on which of these describes their GPU device:

  1) Is the issue that the PCI Caps are incorrect on this device and
  there is no loopback? Thus we should fix it with a quirk to correct
  the caps which will naturally split the iommu group too.

  2) Is the device broken and loops back PASID DMAs and we are
  legimiate and correct in blocking PASID? So far AMD just got lucky
  that no user had a SVA that overlaps with MMIO? Seems unlikely

  3) Is the device odd in that it doesn't loop back PASID tagged DMAs,
  but does loop untagged? I would say this is non-compliant and PCI
  provides no way to describe t

Re: [PATCH] drm: Alloc high address for drm buddy topdown flag

2023-01-10 Thread Matthew Auld


On 07/01/2023 15:15, Arunpravin Paneer Selvam wrote:

As we are observing low numbers in viewperf graphics benchmark, we
are strictly not allowing the top down flag enabled allocations
to steal the memory space from cpu visible region.

The approach is, we are sorting each order list entries in
ascending order and compare the last entry of each order
list in the freelist and return the max block.


Did you also run the selftests? Does everything still pass and complete 
in a reasonable amount of time?




This patch improves the viewperf 3D benchmark scores.

Signed-off-by: Arunpravin Paneer Selvam 
---
  drivers/gpu/drm/drm_buddy.c | 81 -
  1 file changed, 54 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 11bb59399471..50916b2f2fc5 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -38,6 +38,25 @@ static void drm_block_free(struct drm_buddy *mm,
kmem_cache_free(slab_blocks, block);
  }
  
+static void list_insert_sorted(struct drm_buddy *mm,

+  struct drm_buddy_block *block)
+{
+   struct drm_buddy_block *node;
+   struct list_head *head;
+
+   head = &mm->free_list[drm_buddy_block_order(block)];
+   if (list_empty(head)) {
+   list_add(&block->link, head);
+   return;
+   }
+
+   list_for_each_entry(node, head, link)
+   if (drm_buddy_block_offset(block) < 
drm_buddy_block_offset(node))
+   break;
+
+   __list_add(&block->link, node->link.prev, &node->link);
+}
+
  static void mark_allocated(struct drm_buddy_block *block)
  {
block->header &= ~DRM_BUDDY_HEADER_STATE;
@@ -52,8 +71,7 @@ static void mark_free(struct drm_buddy *mm,
block->header &= ~DRM_BUDDY_HEADER_STATE;
block->header |= DRM_BUDDY_FREE;
  
-	list_add(&block->link,

-&mm->free_list[drm_buddy_block_order(block)]);
+   list_insert_sorted(mm, block);


One advantage of not sorting is when splitting down a large block. 
Previously the most-recently-split would be at the start of the list for 
the next order down, where potentially the next allocation could use it. 
So perhaps less fragmentation if it's all part of one BO. Otherwise I 
don't see any other downsides, other than the extra overhead of sorting.



  }
  
  static void mark_split(struct drm_buddy_block *block)

@@ -387,20 +405,26 @@ alloc_range_bias(struct drm_buddy *mm,
  }
  
  static struct drm_buddy_block *

-get_maxblock(struct list_head *head)
+get_maxblock(struct drm_buddy *mm, unsigned int order)
  {
struct drm_buddy_block *max_block = NULL, *node;
+   unsigned int i;
  
-	max_block = list_first_entry_or_null(head,

-struct drm_buddy_block,
-link);
-   if (!max_block)
-   return NULL;
+   for (i = order; i <= mm->max_order; ++i) {
+   if (!list_empty(&mm->free_list[i])) {
+   node = list_last_entry(&mm->free_list[i],
+  struct drm_buddy_block,
+  link);
+   if (!max_block) {
+   max_block = node;
+   continue;
+   }
  
-	list_for_each_entry(node, head, link) {

-   if (drm_buddy_block_offset(node) >
-   drm_buddy_block_offset(max_block))
-   max_block = node;
+   if (drm_buddy_block_offset(node) >
+   drm_buddy_block_offset(max_block)) {


Formatting doesn't look right here.

Going to test this today with some workloads with small-bar and i915 
just to see if this improves/impacts anything for us.



+   max_block = node;
+   }
+   }
}
  
  	return max_block;

@@ -412,20 +436,23 @@ alloc_from_freelist(struct drm_buddy *mm,
unsigned long flags)
  {
struct drm_buddy_block *block = NULL;
-   unsigned int i;
+   unsigned int tmp;
int err;
  
-	for (i = order; i <= mm->max_order; ++i) {

-   if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
-   block = get_maxblock(&mm->free_list[i]);
-   if (block)
-   break;
-   } else {
-   block = list_first_entry_or_null(&mm->free_list[i],
-struct drm_buddy_block,
-link);
-   if (block)
-   break;
+   if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
+   block = get_maxblock(mm, order);
+   if (block)
+   /* Store the obt

Re: [PATCH v3] drm: Only select I2C_ALGOBIT for drivers that actually need it

2023-01-10 Thread Javier Martinez Canillas

On 12/19/22 09:49, Javier Martinez Canillas wrote:
> Hello Uwe,
> 
> On 12/19/22 09:36, Uwe Kleine-König wrote:
>> While working on a drm driver that doesn't need the i2c algobit stuff I
>> noticed that DRM selects this code even though only 8 drivers actually use
>> it. While also only some drivers use i2c, keep the select for I2C for the
>> next cleanup patch. Still prepare this already by also selecting I2C for
>> the individual drivers.
>>
>> Signed-off-by: Uwe Kleine-König 
>> ---
> 
> Thanks for sending a v3 of this.
> 
> Reviewed-by: Javier Martinez Canillas 
> 

I've pushed this to drm-misc (dri-misc-next) now. Thanks!

-- 
Best regards,

Javier Martinez Canillas
Core Platforms
Red Hat

92 matches

Mail list logo