Re: [PATCH v2 2/3] drm/amdkfd: Enable GFX11 usermode queue oversubscription

2022-06-10 Thread Lang Yu
On 06/10/ , Graham Sider wrote:
> Starting with GFX11, MES requires wptr BOs to be GTT allocated/mapped to
> GART for usermode queues in order to support oversubscription. In the
> case that work is submitted to an unmapped queue, MES must have a GART
> wptr address to determine whether the queue should be mapped.
> 
> This change is accompanied with changes in MES and is applicable for
> MES_VERSION >= 3.
> 
> v2: Update MES_VERSION check from 2 to 3.
> 
> Signed-off-by: Graham Sider 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  1 +
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 39 
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 45 ++-
>  .../drm/amd/amdkfd/kfd_device_queue_manager.c |  9 +++-
>  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  |  2 +
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  3 ++
>  .../amd/amdkfd/kfd_process_queue_manager.c| 19 +---
>  7 files changed, 110 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 429b16ba10bf..dba26d1e3be9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -301,6 +301,7 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct 
> amdgpu_device *adev,
>   struct kgd_mem *mem, void **kptr, uint64_t *size);
>  void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct amdgpu_device *adev,
>   struct kgd_mem *mem);
> +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct 
> amdgpu_bo *bo);
>  
>  int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
>   struct dma_fence **ef);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index efab923056f4..2d452655eb04 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -2030,6 +2030,45 @@ int amdgpu_amdkfd_gpuvm_sync_memory(
>   return ret;
>  }
>  
> +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct 
> amdgpu_bo *bo)
> +{
> + int ret;
> +
> + ret = amdgpu_bo_reserve(bo, true);
> + if (ret) {
> + pr_err("Failed to reserve bo. ret %d\n", ret);
> + goto err_reserve_bo_failed;
> + }
> +
> + ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
> + if (ret) {
> + pr_err("Failed to pin bo. ret %d\n", ret);
> + goto err_pin_bo_failed;
> + }
> +
> + ret = amdgpu_ttm_alloc_gart(>tbo);
> + if (ret) {
> + pr_err("Failed to bind bo to GART. ret %d\n", ret);
> + goto err_map_bo_gart_failed;
> + }
> +
> + amdgpu_amdkfd_remove_eviction_fence(
> + bo, bo->kfd_bo->process_info->eviction_fence);
> + list_del_init(>kfd_bo->validate_list.head);
> +
> + amdgpu_bo_unreserve(bo);
> +
> + return 0;
> +
> +err_map_bo_gart_failed:
> + amdgpu_bo_unpin(bo);
> +err_pin_bo_failed:
> + amdgpu_bo_unreserve(bo);
> +err_reserve_bo_failed:
> +
> + return ret;
> +}
> +
>  int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev,
>   struct kgd_mem *mem, void **kptr, uint64_t *size)
>  {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index e9766e165c38..58d5ebed1b32 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -290,6 +290,11 @@ static int kfd_ioctl_create_queue(struct file *filep, 
> struct kfd_process *p,
>   struct queue_properties q_properties;
>   uint32_t doorbell_offset_in_process = 0;
>  
> + struct amdgpu_bo_va_mapping *wptr_mapping;
> + struct interval_tree_node *wptr_node;
> + struct amdgpu_vm *wptr_vm;
> + struct amdgpu_bo *wptr_bo = NULL;
> +
>   memset(_properties, 0, sizeof(struct queue_properties));
>  
>   pr_debug("Creating queue ioctl\n");
> @@ -316,12 +321,44 @@ static int kfd_ioctl_create_queue(struct file *filep, 
> struct kfd_process *p,
>   goto err_bind_process;
>   }
>  
> + /* Starting with GFX11, MES requires wptr BOs to be GTT 
> allocated/mapped to
> +  * GART for usermode queues in order to support oversubscription. In the
> +  * case that work is submitted to an unmapped queue, MES must have a 
> GART
> +  * wptr address to determine whether the queue should be mapped.
> +  */
> + if (dev->shared_resources.enable_mes && (dev->adev->mes.sched_version & 
> 0xff) >= 3) {
> + wptr_vm = drm_priv_to_vm(pdd->drm_priv);
> + err = amdgpu_bo_reserve(wptr_vm->root.bo, false);
> + if (err)
> + goto err_wptr_bo_reserve;
> +
> + wptr_node = interval_tree_iter_first(_vm->va,
> + args->write_pointer_address >> 

[PATCH 1/1] drm/amdkfd: Remove queue sysfs and doorbell after unmapping

2022-06-10 Thread Philip Yang
If destroying/unmapping queue failed, application may destroy queue
again, cause below kernel warning backtrace.

For outstanding queues, either applications forget to destroy or failed
to destroy, kfd_process_notifier_release will remove queue sysfs
objects, kfd_process_wq_release will free queue doorbell.

 refcount_t: underflow; use-after-free.
 WARNING: CPU: 7 PID: 3053 at lib/refcount.c:28
  Call Trace:
   kobject_put+0xd6/0x1a0
   kfd_procfs_del_queue+0x27/0x30 [amdgpu]
   pqm_destroy_queue+0xeb/0x240 [amdgpu]
   kfd_ioctl_destroy_queue+0x32/0x70 [amdgpu]
   kfd_ioctl+0x27d/0x500 [amdgpu]
   do_syscall_64+0x35/0x80

 WARNING: CPU: 2 PID: 3053 at 
drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_device_queue_manager.c:400
  Call Trace:
   deallocate_doorbell.isra.0+0x39/0x40 [amdgpu]
   destroy_queue_cpsch+0xb3/0x270 [amdgpu]
   pqm_destroy_queue+0x108/0x240 [amdgpu]
   kfd_ioctl_destroy_queue+0x32/0x70 [amdgpu]
   kfd_ioctl+0x27d/0x500 [amdgpu]

Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 4 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index e1797657b04c..1c519514ca1a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1876,8 +1876,6 @@ static int destroy_queue_cpsch(struct 
device_queue_manager *dqm,
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
 
-   deallocate_doorbell(qpd, q);
-
if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
(q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
deallocate_sdma_queue(dqm, q);
@@ -1898,6 +1896,8 @@ static int destroy_queue_cpsch(struct 
device_queue_manager *dqm,
}
}
 
+   deallocate_doorbell(qpd, q);
+
/*
 * Unconditionally decrement this counter, regardless of the queue's
 * type
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index dc00484ff484..99f2a6412201 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -419,7 +419,6 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, 
unsigned int qid)
}
 
if (pqn->q) {
-   kfd_procfs_del_queue(pqn->q);
dqm = pqn->q->device->dqm;
retval = dqm->ops.destroy_queue(dqm, >qpd, pqn->q);
if (retval) {
@@ -439,6 +438,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, 
unsigned int qid)
if (dev->shared_resources.enable_mes)
amdgpu_amdkfd_free_gtt_mem(dev->adev,
   pqn->q->gang_ctx_bo);
+   kfd_procfs_del_queue(pqn->q);
uninit_queue(pqn->q);
}
 
-- 
2.35.1



Re: [PATCH v2 1/3] drm/amdgpu: Fetch MES scheduler/KIQ versions

2022-06-10 Thread Felix Kuehling

Am 2022-06-10 um 13:13 schrieb Graham Sider:

Store MES scheduler and MES KIQ version numbers in amdgpu_mes for GFX11.

Signed-off-by: Graham Sider 


Acked-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h |  3 +++
  drivers/gpu/drm/amd/amdgpu/mes_v11_0.c  | 12 
  2 files changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 92ddee5e33db..aa06c8396ee0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -64,6 +64,9 @@ struct amdgpu_mes {
  
  	spinlock_t  queue_id_lock;
  
+	uint32_t			sched_version;

+   uint32_tkiq_version;
+
uint32_ttotal_max_queue;
uint32_tdoorbell_id_offset;
uint32_tmax_doorbell_slices;
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index e4eb87689f7f..2a9ef308e71c 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -890,6 +890,18 @@ static int mes_v11_0_queue_init(struct amdgpu_device *adev,
mes_v11_0_queue_init_register(ring);
}
  
+	/* get MES scheduler/KIQ versions */

+   mutex_lock(>srbm_mutex);
+   soc21_grbm_select(adev, 3, pipe, 0, 0);
+
+   if (pipe == AMDGPU_MES_SCHED_PIPE)
+   adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
+   else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq)
+   adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
+
+   soc21_grbm_select(adev, 0, 0, 0, 0);
+   mutex_unlock(>srbm_mutex);
+
return 0;
  }
  


Re: [PATCH v2 3/3] drm/amdgpu: Update mes_v11_api_def.h

2022-06-10 Thread Felix Kuehling

Am 2022-06-10 um 13:13 schrieb Graham Sider:

Update MES API to support oversubscription without aggregated doorbell
for usermode queues.

Signed-off-by: Graham Sider 


Acked-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c   | 1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h   | 1 +
  drivers/gpu/drm/amd/amdgpu/mes_v11_0.c| 3 +++
  drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 ++
  drivers/gpu/drm/amd/include/mes_v11_api_def.h | 4 +++-
  5 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 2e86baa32c55..3d9a81a8fa1c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -681,6 +681,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int 
gang_id,
queue_input.wptr_addr = qprops->wptr_gpu_addr;
queue_input.queue_type = qprops->queue_type;
queue_input.paging = qprops->paging;
+   queue_input.oversubscription_no_aggregated_en = 0;
  
  	r = adev->mes.funcs->add_hw_queue(>mes, _input);

if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index aa06c8396ee0..26765a9946a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -207,6 +207,7 @@ struct mes_add_queue_input {
uint32_tdebug_vmid;
uint64_ttba_addr;
uint64_ttma_addr;
+   uint64_toversubscription_no_aggregated_en;
  };
  
  struct mes_remove_queue_input {

diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 2a9ef308e71c..95a1394d3943 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -163,6 +163,8 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
mes_add_queue_pkt.gws_size = input->gws_size;
mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
mes_add_queue_pkt.tma_addr = input->tma_addr;
+   mes_add_queue_pkt.oversubscription_no_aggregated_en =
+   input->oversubscription_no_aggregated_en;
  
  	mes_add_queue_pkt.api_status.api_completion_fence_addr =

mes->ring.fence_drv.gpu_addr;
@@ -341,6 +343,7 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes 
*mes)
mes_set_hw_res_pkt.disable_reset = 1;
mes_set_hw_res_pkt.disable_mes_log = 1;
mes_set_hw_res_pkt.use_different_vmid_compute = 1;
+   mes_set_hw_res_pkt.oversubscription_timer = 50;
  
  	mes_set_hw_res_pkt.api_status.api_completion_fence_addr =

mes->ring.fence_drv.gpu_addr;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index d8de2fbdfc7d..762bc6059387 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -235,6 +235,8 @@ static int add_queue_mes(struct device_queue_manager *dqm, 
struct queue *q,
} else
queue_input.wptr_addr = (uint64_t)q->properties.write_ptr;
  
+	queue_input.oversubscription_no_aggregated_en = 1;

+
queue_input.paging = false;
queue_input.tba_addr = qpd->tba_addr;
queue_input.tma_addr = qpd->tma_addr;
diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h 
b/drivers/gpu/drm/amd/include/mes_v11_api_def.h
index f9d02d7bdf77..95f0246eb045 100644
--- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h
+++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h
@@ -226,6 +226,7 @@ union MESAPI_SET_HW_RESOURCES {
};
uint32_tuint32_t_all;
};
+   uint32_toversubscription_timer;
};
  
  	uint32_t	max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];

@@ -265,7 +266,8 @@ union MESAPI__ADD_QUEUE {
uint32_t is_gang_suspended  : 1;
uint32_t is_tmz_queue   : 1;
uint32_t map_kiq_utility_queue  : 1;
-   uint32_t reserved   : 23;
+   uint32_t oversubscription_no_aggregated_en : 1;
+   uint32_t reserved   : 22;
};
struct MES_API_STATUS   api_status;
uint64_ttma_addr;


Re: [PATCH v2 2/3] drm/amdkfd: Enable GFX11 usermode queue oversubscription

2022-06-10 Thread Felix Kuehling

Am 2022-06-10 um 13:13 schrieb Graham Sider:

Starting with GFX11, MES requires wptr BOs to be GTT allocated/mapped to
GART for usermode queues in order to support oversubscription. In the
case that work is submitted to an unmapped queue, MES must have a GART
wptr address to determine whether the queue should be mapped.

This change is accompanied with changes in MES and is applicable for
MES_VERSION >= 3.

v2: Update MES_VERSION check from 2 to 3.

Signed-off-by: Graham Sider 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  1 +
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 39 
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 45 ++-
  .../drm/amd/amdkfd/kfd_device_queue_manager.c |  9 +++-
  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  |  2 +
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  3 ++
  .../amd/amdkfd/kfd_process_queue_manager.c| 19 +---
  7 files changed, 110 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 429b16ba10bf..dba26d1e3be9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -301,6 +301,7 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct 
amdgpu_device *adev,
struct kgd_mem *mem, void **kptr, uint64_t *size);
  void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct amdgpu_device *adev,
struct kgd_mem *mem);
+int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct 
amdgpu_bo *bo);
  
  int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,

struct dma_fence **ef);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index efab923056f4..2d452655eb04 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2030,6 +2030,45 @@ int amdgpu_amdkfd_gpuvm_sync_memory(
return ret;
  }
  
+int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo)


I think this function should take a reference count on the bo to ensure 
the pinned BO is not freed prematurely (even if broken user mode frees 
the BO before destroying the queue). Add a comment that the correct way 
to release the reference and unpin/unmap the BO is to call 
amdgpu_amdkfd_free_gtt_mem. See another comment below about moving the 
amdgpu_bo_ref here.




+{
+   int ret;
+
+   ret = amdgpu_bo_reserve(bo, true);
+   if (ret) {
+   pr_err("Failed to reserve bo. ret %d\n", ret);
+   goto err_reserve_bo_failed;
+   }
+
+   ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+   if (ret) {
+   pr_err("Failed to pin bo. ret %d\n", ret);
+   goto err_pin_bo_failed;
+   }
+
+   ret = amdgpu_ttm_alloc_gart(>tbo);
+   if (ret) {
+   pr_err("Failed to bind bo to GART. ret %d\n", ret);
+   goto err_map_bo_gart_failed;
+   }
+
+   amdgpu_amdkfd_remove_eviction_fence(
+   bo, bo->kfd_bo->process_info->eviction_fence);
+   list_del_init(>kfd_bo->validate_list.head);


Please see Lang, Yu's patch "drm/amdkfd: add pinned BOs to kfd_bo_list". 
We realized that pinned BOs still need to be on the validate list. So 
please remove the list_del_init here.




+
+   amdgpu_bo_unreserve(bo);
+
+   return 0;
+
+err_map_bo_gart_failed:
+   amdgpu_bo_unpin(bo);
+err_pin_bo_failed:
+   amdgpu_bo_unreserve(bo);
+err_reserve_bo_failed:
+
+   return ret;
+}
+
  int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev,
struct kgd_mem *mem, void **kptr, uint64_t *size)
  {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index e9766e165c38..58d5ebed1b32 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -290,6 +290,11 @@ static int kfd_ioctl_create_queue(struct file *filep, 
struct kfd_process *p,
struct queue_properties q_properties;
uint32_t doorbell_offset_in_process = 0;
  
+	struct amdgpu_bo_va_mapping *wptr_mapping;

+   struct interval_tree_node *wptr_node;
+   struct amdgpu_vm *wptr_vm;


It's good practice to minimize the scope of local variables. The above 
three variables are only used inside the "if" below. So they should be 
declared there.




+   struct amdgpu_bo *wptr_bo = NULL;
+
memset(_properties, 0, sizeof(struct queue_properties));
  
  	pr_debug("Creating queue ioctl\n");

@@ -316,12 +321,44 @@ static int kfd_ioctl_create_queue(struct file *filep, 
struct kfd_process *p,
goto err_bind_process;
}
  
+	/* Starting with GFX11, MES requires wptr BOs to be GTT allocated/mapped to

+* GART for usermode queues in order to support oversubscription. 

Re: [PATCH v3] drm/amdkfd: Add available memory ioctl

2022-06-10 Thread Felix Kuehling

Am 2022-06-09 um 19:32 schrieb David Yat Sin:

From: Daniel Phillips 

Add a new KFD ioctl to return the largest possible memory size that
can be allocated as a buffer object using
kfd_ioctl_alloc_memory_of_gpu. It attempts to use exactly the same
accept/reject criteria as that function so that allocating a new
buffer object of the size returned by this new ioctl is guaranteed to
succeed, barring races with other allocating tasks.

This IOCTL will be used by libhsakmt:
https://www.mail-archive.com/amd-gfx@lists.freedesktop.org/msg75743.html

Signed-off-by: Daniel Phillips 
Signed-off-by: David Yat Sin 


Reviewed-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  1 +
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 38 +--
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 34 +
  include/uapi/linux/kfd_ioctl.h| 14 ++-
  4 files changed, 81 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index ffb2b7d9b9a5..648c031942e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -268,6 +268,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct 
amdgpu_device *adev,
  void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
void *drm_priv);
  uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
+size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev);
  int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct amdgpu_device *adev, uint64_t va, uint64_t size,
void *drm_priv, struct kgd_mem **mem,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index a1de900ba677..afd6e6923189 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -38,6 +38,12 @@
   */
  #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
  
+/*

+ * Align VRAM allocations to 2MB to avoid fragmentation caused by 4K 
allocations in the tail 2MB
+ * BO chunk
+ */
+#define VRAM_ALLOCATION_ALIGN (1 << 21)
+
  /* Impose limit on how much memory KFD can use */
  static struct {
uint64_t max_system_mem_limit;
@@ -108,7 +114,7 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size)
   * compromise that should work in most cases without reserving too
   * much memory for page tables unnecessarily (factor 16K, >> 14).
   */
-#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14)
+#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), 
AMDGPU_VM_RESERVED_VRAM)
  
  static size_t amdgpu_amdkfd_acc_size(uint64_t size)

  {
@@ -148,7 +154,13 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct 
amdgpu_device *adev,
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
system_mem_needed = acc_size;
ttm_mem_needed = acc_size;
-   vram_needed = size;
+
+   /*
+* Conservatively round up the allocation requirement to 2 MB
+* to avoid fragmentation caused by 4K allocations in the tail
+* 2M BO chunk.
+*/
+   vram_needed = ALIGN(size, VRAM_ALLOCATION_ALIGN);
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
system_mem_needed = acc_size + size;
ttm_mem_needed = acc_size;
@@ -173,7 +185,9 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct 
amdgpu_device *adev,
(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
 kfd_mem_limit.max_ttm_mem_limit) ||
(adev->kfd.vram_used + vram_needed >
-adev->gmc.real_vram_size - reserved_for_pt)) {
+adev->gmc.real_vram_size -
+atomic64_read(>vram_pin_size) -
+reserved_for_pt)) {
ret = -ENOMEM;
goto release;
}
@@ -205,7 +219,7 @@ static void unreserve_mem_limit(struct amdgpu_device *adev,
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
kfd_mem_limit.system_mem_used -= acc_size;
kfd_mem_limit.ttm_mem_used -= acc_size;
-   adev->kfd.vram_used -= size;
+   adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN);
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
kfd_mem_limit.system_mem_used -= (acc_size + size);
kfd_mem_limit.ttm_mem_used -= acc_size;
@@ -1668,6 +1682,22 @@ int amdgpu_amdkfd_criu_resume(void *p)
return ret;
  }
  
+size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev)

+{
+   uint64_t reserved_for_pt =
+   ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
+   size_t available;
+
+   spin_lock(_mem_limit.mem_limit_lock);
+   available = adev->gmc.real_vram_size
+   - adev->kfd.vram_used
+ 

Re: [PATCH v2] drm/amdkfd: fix warning when CONFIG_HSA_AMD_P2P is not set

2022-06-10 Thread Felix Kuehling

Am 2022-06-10 um 11:46 schrieb Alex Deucher:

drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_topology.c:1542:11:
warning: variable 'i' set but not used [-Wunused-but-set-variable]

Reported-by: kernel test robot 
Signed-off-by: Alex Deucher 


Thank you for taking care of this.

Reviewed-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 7 +--
  1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 3e240b22ec91..411447357196 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1539,7 +1539,10 @@ static int kfd_dev_create_p2p_links(void)
  {
struct kfd_topology_device *dev;
struct kfd_topology_device *new_dev;
-   uint32_t i, k;
+#if defined(CONFIG_HSA_AMD_P2P)
+   uint32_t i;
+#endif
+   uint32_t k;
int ret = 0;
  
  	k = 0;

@@ -1553,7 +1556,6 @@ static int kfd_dev_create_p2p_links(void)
return 0;
  
  	k--;

-   i = 0;
  
  	/* create in-direct links */

ret = kfd_create_indirect_link_prop(new_dev, k);
@@ -1562,6 +1564,7 @@ static int kfd_dev_create_p2p_links(void)
  
  	/* create p2p links */

  #if defined(CONFIG_HSA_AMD_P2P)
+   i = 0;
list_for_each_entry(dev, _device_list, list) {
if (dev == new_dev)
break;


Re: [PATCH] drm/amdgpu: Fix error handling in amdgpu_amdkfd_gpuvm_free_memory_of_gpu

2022-06-10 Thread Felix Kuehling

Am 2022-06-10 um 00:04 schrieb Ramesh Errabolu:

Following error conditions are fixed:
   Unpin MMIO and DOORBELL BOs only after map count goes to zero
   Remove BO from validate list of a KFD process in a safe manner
   Print a warning message if unreserving GPUVMs encounters an error

Signed-off-by: Ramesh Errabolu 
---
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 42 +++
  1 file changed, 25 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index a1de900ba677..ee48e6591f99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1013,14 +1013,22 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem 
*mem,
mutex_unlock(_info->lock);
  }
  
-static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,

+/**
+ * remove_kgd_mem_from_validate_list() - Remove BO from process's validate 
list,
+ * in an idempotent manner, so that restore worker can't access it anymore
+ * @mem: BO's membership handle in validate list
+ * @process_info: KFD process handle to which BO belongs
+ *
+ * Return: void


I don't think you need to state a void return explicitly. [+David], 
since you were looking into KFD documentation and kernel-doc comments 
lately, do you have any feedback on the kernel-doc syntax?


Other than that, this patch is

Reviewed-by: Felix Kuehling 



+ */
+static void remove_kgd_mem_from_validate_list(struct kgd_mem *mem,
struct amdkfd_process_info *process_info)
  {
struct ttm_validate_buffer *bo_list_entry;
  
  	bo_list_entry = >validate_list;

mutex_lock(_info->lock);
-   list_del(_list_entry->head);
+   list_del_init(_list_entry->head);
mutex_unlock(_info->lock);
  }
  
@@ -1796,7 +1804,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
  
  allocate_init_user_pages_failed:

  err_pin_bo:
-   remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
+   remove_kgd_mem_from_validate_list(*mem, avm->process_info);
drm_vma_node_revoke(>vma_node, drm_priv);
  err_node_allow:
/* Don't unreserve system mem limit twice */
@@ -1825,20 +1833,12 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
unsigned long bo_size = mem->bo->tbo.base.size;
struct kfd_mem_attachment *entry, *tmp;
struct bo_vm_reservation_context ctx;
-   struct ttm_validate_buffer *bo_list_entry;
unsigned int mapped_to_gpu_memory;
int ret;
bool is_imported = false;
  
  	mutex_lock(>lock);
  
-	/* Unpin MMIO/DOORBELL BO's that were pinned during allocation */

-   if (mem->alloc_flags &
-   (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
-KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
-   amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo);
-   }
-
mapped_to_gpu_memory = mem->mapped_to_gpu_memory;
is_imported = mem->is_imported;
mutex_unlock(>lock);
@@ -1853,10 +1853,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
}
  
  	/* Make sure restore workers don't access the BO any more */

-   bo_list_entry = >validate_list;
-   mutex_lock(_info->lock);
-   list_del(_list_entry->head);
-   mutex_unlock(_info->lock);
+   remove_kgd_mem_from_validate_list(mem, process_info);
  
  	/* No more MMU notifiers */

amdgpu_mn_unregister(mem->bo);
@@ -1878,7 +1875,18 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
list_for_each_entry_safe(entry, tmp, >attachments, list)
kfd_mem_detach(entry);
  
+	/* Return success even in case of error */

ret = unreserve_bo_and_vms(, false, false);
+   if (unlikely(ret)) {
+   WARN_ONCE(ret, "Error in unreserving BO and associated VMs");
+   ret = 0;
+   }
+
+   /* Unpin MMIO/DOORBELL BO's that were pinned during allocation */
+   if (mem->alloc_flags &
+   (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))
+   amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo);
  
  	/* Free the sync object */

amdgpu_sync_free(>sync);
@@ -2814,7 +2822,7 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void 
*gws, struct kgd_mem **mem
  bo_reservation_failure:
mutex_unlock(&(*mem)->process_info->lock);
amdgpu_sync_free(&(*mem)->sync);
-   remove_kgd_mem_from_kfd_bo_list(*mem, process_info);
+   remove_kgd_mem_from_validate_list(*mem, process_info);
amdgpu_bo_unref(_bo);
mutex_destroy(&(*mem)->lock);
kfree(*mem);
@@ -2832,7 +2840,7 @@ int amdgpu_amdkfd_remove_gws_from_process(void *info, 
void *mem)
/* Remove BO from process's validate list so restore worker won't touch
 * it anymore
 */
-   remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info);
+   remove_kgd_mem_from_validate_list(kgd_mem, process_info);
  
  	ret = amdgpu_bo_reserve(gws_bo, false);


[PATCH 23/23] drm/amd/display: Blank for uclk OC in dm instead of dc

2022-06-10 Thread Hamza Mahfooz
From: Joshua Aberback 

[Why]
All displays need to be blanked during the uclk OC interface so that we can
guarantee pstate switching support. If the display config doesn't support
pstate switching, only using core_link_disable_stream will not enable it
as the front-end is untouched. We need to go through the full plane removal
sequence to properly program the pipe to allow pstate switching.

[How]
 - guard clk_mgr functions with non-NULL checks

Acked-by: Alan Liu 
Signed-off-by: Joshua Aberback 
---
 drivers/gpu/drm/amd/display/dc/core/dc.c | 34 +---
 drivers/gpu/drm/amd/display/dc/dc.h  | 10 ++-
 2 files changed, 14 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 258322c39e9a..48a14a5bda56 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -3666,37 +3666,27 @@ void dc_allow_idle_optimizations(struct dc *dc, bool 
allow)
dc->idle_optimizations_allowed = allow;
 }
 
-/*
- * blank all streams, and set min and max memory clock to
- * lowest and highest DPM level, respectively
- */
+/* set min and max memory clock to lowest and highest DPM level, respectively 
*/
 void dc_unlock_memory_clock_frequency(struct dc *dc)
 {
-   unsigned int i;
-
-   for (i = 0; i < MAX_PIPES; i++)
-   if (dc->current_state->res_ctx.pipe_ctx[i].plane_state)
-   
core_link_disable_stream(>current_state->res_ctx.pipe_ctx[i]);
+   if (dc->clk_mgr->funcs->set_hard_min_memclk)
+   dc->clk_mgr->funcs->set_hard_min_memclk(dc->clk_mgr, false);
 
-   dc->clk_mgr->funcs->set_hard_min_memclk(dc->clk_mgr, false);
-   dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr);
+   if (dc->clk_mgr->funcs->set_hard_max_memclk)
+   dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr);
 }
 
-/*
- * set min memory clock to the min required for current mode,
- * max to maxDPM, and unblank streams
- */
+/* set min memory clock to the min required for current mode, max to maxDPM */
 void dc_lock_memory_clock_frequency(struct dc *dc)
 {
-   unsigned int i;
+   if (dc->clk_mgr->funcs->get_memclk_states_from_smu)
+   dc->clk_mgr->funcs->get_memclk_states_from_smu(dc->clk_mgr);
 
-   dc->clk_mgr->funcs->get_memclk_states_from_smu(dc->clk_mgr);
-   dc->clk_mgr->funcs->set_hard_min_memclk(dc->clk_mgr, true);
-   dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr);
+   if (dc->clk_mgr->funcs->set_hard_min_memclk)
+   dc->clk_mgr->funcs->set_hard_min_memclk(dc->clk_mgr, true);
 
-   for (i = 0; i < MAX_PIPES; i++)
-   if (dc->current_state->res_ctx.pipe_ctx[i].plane_state)
-   core_link_enable_stream(dc->current_state, 
>current_state->res_ctx.pipe_ctx[i]);
+   if (dc->clk_mgr->funcs->set_hard_max_memclk)
+   dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr);
 }
 
 static void blank_and_force_memclk(struct dc *dc, bool apply, unsigned int 
memclk_mhz)
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
b/drivers/gpu/drm/amd/display/dc/dc.h
index 0549fa2c572a..ba57e03d3d9e 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -1437,16 +1437,10 @@ bool dc_is_plane_eligible_for_idle_optimizations(struct 
dc *dc, struct dc_plane_
 
 void dc_allow_idle_optimizations(struct dc *dc, bool allow);
 
-/*
- * blank all streams, and set min and max memory clock to
- * lowest and highest DPM level, respectively
- */
+/* set min and max memory clock to lowest and highest DPM level, respectively 
*/
 void dc_unlock_memory_clock_frequency(struct dc *dc);
 
-/*
- * set min memory clock to the min required for current mode,
- * max to maxDPM, and unblank streams
- */
+/* set min memory clock to the min required for current mode, max to maxDPM */
 void dc_lock_memory_clock_frequency(struct dc *dc);
 
 /* set soft max for memclk, to be used for AC/DC switching clock limitations */
-- 
2.36.1



[PATCH 20/23] drm/amd/display: Disables dynamic memory clock switching in games

2022-06-10 Thread Hamza Mahfooz
From: Harry VanZyllDeJong 

[WHY]
Game performace may be affected if dynamic memory clock switching
is enabled while playing games.

[HOW]
Propagate the vrr active state to dirty bit so that on mode set it
disables dynamic memory clock switching.

Acked-by: Alan Liu 
Signed-off-by: Harry VanZyllDeJong 
---
 drivers/gpu/drm/amd/display/dc/core/dc.c| 3 +++
 drivers/gpu/drm/amd/display/dc/dc_stream.h  | 1 +
 drivers/gpu/drm/amd/display/modules/freesync/freesync.c | 2 +-
 drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h  | 3 +--
 4 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 7c2b65226131..49339c5c7230 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -2652,6 +2652,9 @@ static void copy_stream_update_to_stream(struct dc *dc,
if (update->allow_freesync)
stream->allow_freesync = *update->allow_freesync;
 
+   if (update->vrr_active_variable)
+   stream->vrr_active_variable = *update->vrr_active_variable;
+
if (update->crtc_timing_adjust)
stream->adjust = *update->crtc_timing_adjust;
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h 
b/drivers/gpu/drm/amd/display/dc/dc_stream.h
index ae9382ce82d3..5a894c19b0ea 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -304,6 +304,7 @@ struct dc_stream_update {
bool *dpms_off;
bool integer_scaling_update;
bool *allow_freesync;
+   bool *vrr_active_variable;
 
struct colorspace_transform *gamut_remap;
enum dc_color_space *output_color_space;
diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c 
b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
index aa121d45d9b8..0686223034de 100644
--- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
+++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
@@ -1374,7 +1374,7 @@ unsigned long long 
mod_freesync_calc_field_rate_from_timing(
return field_rate_in_uhz;
 }
 
-bool mod_freesync_get_freesync_enabled(struct mod_vrr_params *pVrr, struct 
dc_stream_state *const pStream)
+bool mod_freesync_get_freesync_enabled(struct mod_vrr_params *pVrr)
 {
return (pVrr->state != VRR_STATE_UNSUPPORTED) && (pVrr->state != 
VRR_STATE_DISABLED);
 }
diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h 
b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h
index 62e326dd29a8..afe1f6cce528 100644
--- a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h
+++ b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h
@@ -195,7 +195,6 @@ unsigned int mod_freesync_calc_v_total_from_refresh(
unsigned int refresh_in_uhz);
 
 // Returns true when FreeSync is supported and enabled (even if it is inactive)
-bool mod_freesync_get_freesync_enabled(struct mod_vrr_params *pVrr,
-   struct dc_stream_state *const pStream);
+bool mod_freesync_get_freesync_enabled(struct mod_vrr_params *pVrr);
 
 #endif
-- 
2.36.1



[PATCH 22/23] drm/amd/display: Add null check to dc_submit_i2c_oem

2022-06-10 Thread Hamza Mahfooz
From: Martin Leung 

[why]
dc_submit_i2c_oem could be called with ddc null

[how]
add null check and fail the call instead

Acked-by: Alan Liu 
Signed-off-by: Martin Leung 
---
 drivers/gpu/drm/amd/display/dc/core/dc.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 49339c5c7230..258322c39e9a 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -3477,10 +3477,13 @@ bool dc_submit_i2c_oem(
struct i2c_command *cmd)
 {
struct ddc_service *ddc = dc->res_pool->oem_device;
-   return dce_i2c_submit_command(
-   dc->res_pool,
-   ddc->ddc_pin,
-   cmd);
+   if (ddc)
+   return dce_i2c_submit_command(
+   dc->res_pool,
+   ddc->ddc_pin,
+   cmd);
+
+   return false;
 }
 
 static bool link_add_remote_sink_helper(struct dc_link *dc_link, struct 
dc_sink *sink)
-- 
2.36.1



[PATCH 21/23] drm/amd/display: FVA timing adjustment

2022-06-10 Thread Hamza Mahfooz
From: Charlene Liu 

[why]
need to add timing adjustment for fva.

[how]
add hook to optc and hwseq.

Acked-by: Alan Liu 
Signed-off-by: Charlene Liu 
---
 drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c 
b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
index b1671b00ce40..e1a9a45b03b6 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
@@ -165,6 +165,7 @@ void optc1_program_timing(
optc1->vupdate_width = vupdate_width;
patched_crtc_timing = *dc_crtc_timing;
apply_front_porch_workaround(_crtc_timing);
+   optc1->orginal_patched_timing = patched_crtc_timing;
 
/* Load horizontal timing */
 
-- 
2.36.1



[PATCH 19/23] drm/amd/display: Pass vrr mode to dcn

2022-06-10 Thread Hamza Mahfooz
From: Felipe 

[WHY]
New features will require knowing the vrr mode for their enablement.

[HOW]
Pass the state via a member of dc_stream.

Acked-by: Alan Liu 
Signed-off-by: Felipe Clark 
---
 drivers/gpu/drm/amd/display/dc/dc_stream.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h 
b/drivers/gpu/drm/amd/display/dc/dc_stream.h
index c76fac3c153d..ae9382ce82d3 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -208,6 +208,7 @@ struct dc_stream_state {
bool ignore_msa_timing_param;
 
bool allow_freesync;
+   bool vrr_active_variable;
bool freesync_on_desktop;
 
bool converter_disable_audio;
-- 
2.36.1



[PATCH 17/23] drm/amd/display: Copy hfvsif_infopacket when stream update

2022-06-10 Thread Hamza Mahfooz
From: Nicholas Kazlauskas 

[Why & How]
Miss to copy hfvsif_infopacket when copying stream updates.
Check and copy it.

Acked-by: Alan Liu 
Signed-off-by: Nicholas Kazlauskas 
---
 drivers/gpu/drm/amd/display/dc/core/dc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 7d71fd61c0a5..400b37e393b7 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -2658,6 +2658,9 @@ static void copy_stream_update_to_stream(struct dc *dc,
if (update->dpms_off)
stream->dpms_off = *update->dpms_off;
 
+   if (update->hfvsif_infopacket)
+   stream->hfvsif_infopacket = *update->hfvsif_infopacket;
+
if (update->vsc_infopacket)
stream->vsc_infopacket = *update->vsc_infopacket;
 
-- 
2.36.1



[PATCH 18/23] drm/amd/display: Adding VTEM to dc

2022-06-10 Thread Hamza Mahfooz
From: Ahmad Othman 

[Why]
Video Timing Extended Metadata packet (VTEM) is required for features
like VRR and FVA

[How]
Adding support for VTEM transmission to stream encoders in DCN20 and DCN30
as part of FVA support

Acked-by: Alan Liu 
Signed-off-by: Ahmad Othman 
---
 drivers/gpu/drm/amd/display/dc/core/dc.c   |  6 +-
 drivers/gpu/drm/amd/display/dc/core/dc_resource.c  | 14 +-
 drivers/gpu/drm/amd/display/dc/dc_stream.h |  2 ++
 .../amd/display/dc/dcn20/dcn20_stream_encoder.c|  1 +
 .../display/dc/dcn30/dcn30_dio_stream_encoder.c|  1 +
 .../gpu/drm/amd/display/dc/inc/hw/stream_encoder.h |  1 +
 6 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 400b37e393b7..7c2b65226131 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -2661,6 +2661,9 @@ static void copy_stream_update_to_stream(struct dc *dc,
if (update->hfvsif_infopacket)
stream->hfvsif_infopacket = *update->hfvsif_infopacket;
 
+   if (update->vtem_infopacket)
+   stream->vtem_infopacket = *update->vtem_infopacket;
+
if (update->vsc_infopacket)
stream->vsc_infopacket = *update->vsc_infopacket;
 
@@ -2736,7 +2739,8 @@ static void commit_planes_do_stream_update(struct dc *dc,
stream_update->vrr_infopacket ||
stream_update->vsc_infopacket ||
stream_update->vsp_infopacket ||
-   stream_update->hfvsif_infopacket) {
+   stream_update->hfvsif_infopacket ||
+   stream_update->vtem_infopacket) {
resource_build_info_frame(pipe_ctx);
dc->hwss.update_info_frame(pipe_ctx);
 
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 5749db88b7c3..60b780385bbd 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -2816,6 +2816,17 @@ static void set_hfvs_info_packet(
*info_packet = stream->hfvsif_infopacket;
 }
 
+
+static void set_vtem_info_packet(
+   struct dc_info_packet *info_packet,
+   struct dc_stream_state *stream)
+{
+   if (!stream->vtem_infopacket.valid)
+   return;
+
+   *info_packet = stream->vtem_infopacket;
+}
+
 void dc_resource_state_destruct(struct dc_state *context)
 {
int i, j;
@@ -2896,7 +2907,7 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx)
info->hdrsmd.valid = false;
info->vsc.valid = false;
info->hfvsif.valid = false;
-
+   info->vtem.valid = false;
signal = pipe_ctx->stream->signal;
 
/* HDMi and DP have different info packets*/
@@ -2905,6 +2916,7 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx)
 
set_vendor_info_packet(>vendor, pipe_ctx->stream);
set_hfvs_info_packet(>hfvsif, pipe_ctx->stream);
+   set_vtem_info_packet(>vtem, pipe_ctx->stream);
 
set_spd_info_packet(>spd, pipe_ctx->stream);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h 
b/drivers/gpu/drm/amd/display/dc/dc_stream.h
index 376dddf54ec1..c76fac3c153d 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -181,6 +181,7 @@ struct dc_stream_state {
struct dc_info_packet vsc_infopacket;
struct dc_info_packet vsp_infopacket;
struct dc_info_packet hfvsif_infopacket;
+   struct dc_info_packet vtem_infopacket;
uint8_t dsc_packed_pps[128];
struct rect src; /* composition area */
struct rect dst; /* stream addressable area */
@@ -298,6 +299,7 @@ struct dc_stream_update {
struct dc_info_packet *vsc_infopacket;
struct dc_info_packet *vsp_infopacket;
struct dc_info_packet *hfvsif_infopacket;
+   struct dc_info_packet *vtem_infopacket;
bool *dpms_off;
bool integer_scaling_update;
bool *allow_freesync;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c 
b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c
index aab25ca8343a..e8f5c01688ec 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c
@@ -159,6 +159,7 @@ static void enc2_stream_encoder_update_hdmi_info_packets(
enc2_update_hdmi_info_packet(enc1, 3, _frame->vendor);
enc2_update_hdmi_info_packet(enc1, 4, _frame->spd);
enc2_update_hdmi_info_packet(enc1, 5, _frame->hdrsmd);
+   enc2_update_hdmi_info_packet(enc1, 6, _frame->vtem);
 }
 
 static void 

[PATCH 16/23] drm/amd/display: Add support for HF-VSIF

2022-06-10 Thread Hamza Mahfooz
From: Ahmad Othman 

[Why]
- Currently there is no support for HF-VSIF
- The current support of VSIF is limited to H14b infoframe

[How]
- refactor VSIF
- Added new builder for HF-VSIF
- Added the HF-VSIF packet to DisplayTarget
- Updates DC to apply HF-VSIF updates when updating streams

Acked-by: Alan Liu 
Signed-off-by: Ahmad Othman 
---
 drivers/gpu/drm/amd/display/dc/core/dc.c  |  3 ++-
 drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 11 +++
 drivers/gpu/drm/amd/display/dc/dc_stream.h|  2 ++
 .../drm/amd/display/dc/dcn10/dcn10_stream_encoder.c   |  8 +---
 4 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
b/drivers/gpu/drm/amd/display/dc/core/dc.c
index cfa6c2d1fc69..7d71fd61c0a5 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -2732,7 +2732,8 @@ static void commit_planes_do_stream_update(struct dc *dc,
if ((stream_update->hdr_static_metadata && 
!stream->use_dynamic_meta) ||
stream_update->vrr_infopacket ||
stream_update->vsc_infopacket ||
-   stream_update->vsp_infopacket) {
+   stream_update->vsp_infopacket ||
+   stream_update->hfvsif_infopacket) {
resource_build_info_frame(pipe_ctx);
dc->hwss.update_info_frame(pipe_ctx);
 
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 21d217e84192..5749db88b7c3 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -2806,6 +2806,15 @@ static void set_vsc_info_packet(
 
*info_packet = stream->vsc_infopacket;
 }
+static void set_hfvs_info_packet(
+   struct dc_info_packet *info_packet,
+   struct dc_stream_state *stream)
+{
+   if (!stream->hfvsif_infopacket.valid)
+   return;
+
+   *info_packet = stream->hfvsif_infopacket;
+}
 
 void dc_resource_state_destruct(struct dc_state *context)
 {
@@ -2886,6 +2895,7 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx)
info->spd.valid = false;
info->hdrsmd.valid = false;
info->vsc.valid = false;
+   info->hfvsif.valid = false;
 
signal = pipe_ctx->stream->signal;
 
@@ -2894,6 +2904,7 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx)
set_avi_info_frame(>avi, pipe_ctx);
 
set_vendor_info_packet(>vendor, pipe_ctx->stream);
+   set_hfvs_info_packet(>hfvsif, pipe_ctx->stream);
 
set_spd_info_packet(>spd, pipe_ctx->stream);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h 
b/drivers/gpu/drm/amd/display/dc/dc_stream.h
index 68cf06a5a3e3..376dddf54ec1 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -180,6 +180,7 @@ struct dc_stream_state {
struct dc_info_packet vrr_infopacket;
struct dc_info_packet vsc_infopacket;
struct dc_info_packet vsp_infopacket;
+   struct dc_info_packet hfvsif_infopacket;
uint8_t dsc_packed_pps[128];
struct rect src; /* composition area */
struct rect dst; /* stream addressable area */
@@ -296,6 +297,7 @@ struct dc_stream_update {
struct dc_info_packet *vrr_infopacket;
struct dc_info_packet *vsc_infopacket;
struct dc_info_packet *vsp_infopacket;
+   struct dc_info_packet *hfvsif_infopacket;
bool *dpms_off;
bool integer_scaling_update;
bool *allow_freesync;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c 
b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
index 7608187751c8..92f474e6a96b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
@@ -675,11 +675,13 @@ static void enc1_stream_encoder_update_hdmi_info_packets(
/* for bring up, disable dp double  TODO */
REG_UPDATE(HDMI_DB_CONTROL, HDMI_DB_DISABLE, 1);
 
+   /*Always add mandatory packets first followed by optional ones*/
enc1_update_hdmi_info_packet(enc1, 0, _frame->avi);
-   enc1_update_hdmi_info_packet(enc1, 1, _frame->vendor);
+   enc1_update_hdmi_info_packet(enc1, 1, _frame->hfvsif);
enc1_update_hdmi_info_packet(enc1, 2, _frame->gamut);
-   enc1_update_hdmi_info_packet(enc1, 3, _frame->spd);
-   enc1_update_hdmi_info_packet(enc1, 4, _frame->hdrsmd);
+   enc1_update_hdmi_info_packet(enc1, 3, _frame->vendor);
+   enc1_update_hdmi_info_packet(enc1, 4, _frame->spd);
+   enc1_update_hdmi_info_packet(enc1, 5, _frame->hdrsmd);
 }
 
 static void 

[PATCH 15/23] drm/amd/display: Firmware assisted MCLK switch and FS

2022-06-10 Thread Hamza Mahfooz
From: Felipe 

[WHY]
Memory clock switching has great potential for power savings.

[HOW]
The driver code was modified to notify the DMCUB firmware that it should
stretch the vertical blank of frames when a memory clock switch is about
to start so that no blackouts happen on the screen due to unavailability
of the frame buffer.
The driver logic to determine when such firmware assisted strategy can
be initiated is also implemented and consists on checking prerequisites
of the feature.

Acked-by: Alan Liu 
Signed-off-by: Felipe Clark 
---
 drivers/gpu/drm/amd/display/dc/core/dc.c  |  8 --
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c  |  2 ++
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h  |  5 +++-
 drivers/gpu/drm/amd/display/dc/dc_stream.h|  3 +-
 .../amd/display/dc/dcn10/dcn10_hw_sequencer.c |  1 -
 .../drm/amd/display/dc/dcn20/dcn20_hwseq.c|  1 -
 .../drm/amd/display/dc/dcn30/dcn30_hwseq.c| 28 +++
 .../drm/amd/display/dc/dcn30/dcn30_hwseq.h| 11 ++--
 .../gpu/drm/amd/display/dc/dcn30/dcn30_init.c |  2 +-
 .../gpu/drm/amd/display/dc/dcn30/dcn30_optc.c |  1 +
 .../drm/amd/display/dc/dcn30/dcn30_resource.c |  1 +
 .../amd/display/dc/inc/hw_sequencer_private.h |  1 +
 .../amd/display/modules/freesync/freesync.c   |  5 
 .../amd/display/modules/inc/mod_freesync.h|  4 +++
 14 files changed, 51 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 645ec5bc3a7d..cfa6c2d1fc69 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -397,7 +397,6 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc,
struct dc_crtc_timing_adjust *adjust)
 {
int i;
-   bool ret = false;
 
stream->adjust.v_total_max = adjust->v_total_max;
stream->adjust.v_total_mid = adjust->v_total_mid;
@@ -412,10 +411,10 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc,
1,
*adjust);
 
-   ret = true;
+   return true;
}
}
-   return ret;
+   return false;
 }
 
 /**
@@ -2650,6 +2649,9 @@ static void copy_stream_update_to_stream(struct dc *dc,
if (update->vrr_infopacket)
stream->vrr_infopacket = *update->vrr_infopacket;
 
+   if (update->allow_freesync)
+   stream->allow_freesync = *update->allow_freesync;
+
if (update->crtc_timing_adjust)
stream->adjust = *update->crtc_timing_adjust;
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c 
b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index 11597bca966a..548c91ad1b82 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -27,6 +27,8 @@
 #include "dc_dmub_srv.h"
 #include "../dmub/dmub_srv.h"
 #include "dm_helpers.h"
+#include "dc_hw_types.h"
+#include "core_types.h"
 
 #define CTX dc_dmub_srv->ctx
 #define DC_LOGGER CTX->logger
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h 
b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
index 50e44b53f14c..52758ff1e405 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
@@ -31,6 +31,10 @@
 
 struct dmub_srv;
 struct dc;
+struct pipe_ctx;
+struct dc_crtc_timing_adjust;
+struct dc_crtc_timing;
+struct dc_state;
 
 struct dc_reg_helper_state {
bool gather_in_progress;
@@ -69,7 +73,6 @@ bool dc_dmub_srv_get_dmub_outbox0_msg(const struct dc *dc, 
struct dmcub_trace_bu
 void dc_dmub_trace_event_control(struct dc *dc, bool enable);
 
 void dc_dmub_srv_query_caps_cmd(struct dmub_srv *dmub);
-
 void dc_dmub_srv_clear_inbox0_ack(struct dc_dmub_srv *dmub_srv);
 void dc_dmub_srv_wait_for_inbox0_ack(struct dc_dmub_srv *dmub_srv);
 void dc_dmub_srv_send_inbox0_cmd(struct dc_dmub_srv *dmub_srv, union 
dmub_inbox0_data_register data);
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h 
b/drivers/gpu/drm/amd/display/dc/dc_stream.h
index f8f66790d09b..68cf06a5a3e3 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -205,6 +205,7 @@ struct dc_stream_state {
bool use_vsc_sdp_for_colorimetry;
bool ignore_msa_timing_param;
 
+   bool allow_freesync;
bool freesync_on_desktop;
 
bool converter_disable_audio;
@@ -295,9 +296,9 @@ struct dc_stream_update {
struct dc_info_packet *vrr_infopacket;
struct dc_info_packet *vsc_infopacket;
struct dc_info_packet *vsp_infopacket;
-
bool *dpms_off;
bool integer_scaling_update;
+   bool *allow_freesync;
 
struct colorspace_transform *gamut_remap;
enum dc_color_space *output_color_space;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c 
b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index 

[PATCH 14/23] drm/amd/display: DAL ACR, dc part, fix missing dcn30

2022-06-10 Thread Hamza Mahfooz
From: Ian Chen 

[Why]
- missing in dcn30 function
- Fix a divide by 0 when ACR trigger

[How]
- Add IS_SMU_TIMEOUT() to dcn30_smu_send_msg_with_param
- Add zero check in dcn20_update_clocks_update_dentist

Acked-by: Alan Liu 
Signed-off-by: Ian Chen 
---
 .../display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c  | 22 +--
 .../dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c  | 11 +-
 2 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c 
b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
index fb82e9f9738e..0d30d1d9d67e 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
@@ -126,16 +126,24 @@ void dcn20_update_clocks_update_dpp_dto(struct 
clk_mgr_internal *clk_mgr,
 
 void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr, 
struct dc_state *context)
 {
-   int dpp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR
-   * clk_mgr->base.dentist_vco_freq_khz / 
clk_mgr->base.clks.dppclk_khz;
-   int disp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR
-   * clk_mgr->base.dentist_vco_freq_khz / 
clk_mgr->base.clks.dispclk_khz;
-
-   uint32_t dppclk_wdivider = dentist_get_did_from_divider(dpp_divider);
-   uint32_t dispclk_wdivider = dentist_get_did_from_divider(disp_divider);
+   int dpp_divider = 0;
+   int disp_divider = 0;
+   uint32_t dppclk_wdivider = 0;
+   uint32_t dispclk_wdivider = 0;
uint32_t current_dispclk_wdivider;
uint32_t i;
 
+   if (clk_mgr->base.clks.dppclk_khz == 0 || 
clk_mgr->base.clks.dispclk_khz == 0)
+   return;
+
+   dpp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+   * clk_mgr->base.dentist_vco_freq_khz / 
clk_mgr->base.clks.dppclk_khz;
+   disp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+   * clk_mgr->base.dentist_vco_freq_khz / 
clk_mgr->base.clks.dispclk_khz;
+
+   dppclk_wdivider = dentist_get_did_from_divider(dpp_divider);
+   dispclk_wdivider = dentist_get_did_from_divider(disp_divider);
+
REG_GET(DENTIST_DISPCLK_CNTL,
DENTIST_DISPCLK_WDIVIDER, _dispclk_wdivider);
 
diff --git 
a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c 
b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c
index bfc960579760..1fbf1c105dc1 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c
@@ -28,6 +28,8 @@
 
 #include "clk_mgr_internal.h"
 #include "reg_helper.h"
+#include "dm_helpers.h"
+
 #include "dalsmc.h"
 #include "dcn30_smu11_driver_if.h"
 
@@ -74,6 +76,7 @@ static uint32_t dcn30_smu_wait_for_response(struct 
clk_mgr_internal *clk_mgr, un
 
 static bool dcn30_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, 
uint32_t msg_id, uint32_t param_in, uint32_t *param_out)
 {
+   uint32_t result;
/* Wait for response register to be ready */
dcn30_smu_wait_for_response(clk_mgr, 10, 20);
 
@@ -86,8 +89,14 @@ static bool dcn30_smu_send_msg_with_param(struct 
clk_mgr_internal *clk_mgr, uint
/* Trigger the message transaction by writing the message ID */
REG_WRITE(DAL_MSG_REG, msg_id);
 
+   result = dcn30_smu_wait_for_response(clk_mgr, 10, 20);
+
+   if (IS_SMU_TIMEOUT(result)) {
+   dm_helpers_smu_timeout(CTX, msg_id, param_in, 10 * 20);
+   }
+
/* Wait for response */
-   if (dcn30_smu_wait_for_response(clk_mgr, 10, 20) == 
DALSMC_Result_OK) {
+   if (result == DALSMC_Result_OK) {
if (param_out)
*param_out = REG_READ(DAL_ARG_REG);
 
-- 
2.36.1



[PATCH 13/23] drm/amd/display: Fix comments

2022-06-10 Thread Hamza Mahfooz
From: Samson Tam 

[Why & how]
Fix format and typo of comments.

Acked-by: Alan Liu 
Signed-off-by: Samson Tam 
---
 .../display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c  |  1 -
 drivers/gpu/drm/amd/display/dc/core/dc_link.c |  8 ++--
 .../dc/gpio/dcn20/hw_translate_dcn20.c| 17 ++---
 .../dc/gpio/dcn21/hw_translate_dcn21.c| 17 ++---
 .../dc/gpio/dcn30/hw_translate_dcn30.c| 19 +++
 5 files changed, 37 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c 
b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
index cac80ba69072..fb82e9f9738e 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
@@ -436,7 +436,6 @@ void dcn2_read_clocks_from_hw_dentist(struct clk_mgr 
*clk_mgr_base)
clk_mgr_base->clks.dppclk_khz = 
(DENTIST_DIVIDER_RANGE_SCALE_FACTOR
* clk_mgr->base.dentist_vco_freq_khz) / 
dpp_divider;
}
-
 }
 
 void dcn2_get_clock(struct clk_mgr *clk_mgr,
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index 7884530cc02b..199868925fe4 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -1379,7 +1379,9 @@ bool dc_link_get_hpd_state(struct dc_link *dc_link)
 static enum hpd_source_id get_hpd_line(struct dc_link *link)
 {
struct gpio *hpd;
-   enum hpd_source_id hpd_id = HPD_SOURCEID_UNKNOWN;
+   enum hpd_source_id hpd_id;
+
+   hpd_id = HPD_SOURCEID_UNKNOWN;
 
hpd = get_hpd_gpio(link->ctx->dc_bios, link->link_id,
   link->ctx->gpio_service);
@@ -1418,7 +1420,9 @@ static enum hpd_source_id get_hpd_line(struct dc_link 
*link)
 static enum channel_id get_ddc_line(struct dc_link *link)
 {
struct ddc *ddc;
-   enum channel_id channel = CHANNEL_ID_UNKNOWN;
+   enum channel_id channel;
+
+   channel = CHANNEL_ID_UNKNOWN;
 
ddc = dal_ddc_service_get_ddc_pin(link->ddc);
 
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn20/hw_translate_dcn20.c 
b/drivers/gpu/drm/amd/display/dc/gpio/dcn20/hw_translate_dcn20.c
index 52ba62b3b5e4..3005ee7751a0 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/dcn20/hw_translate_dcn20.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn20/hw_translate_dcn20.c
@@ -150,7 +150,8 @@ static bool offset_to_id(
/* DDC */
/* we don't care about the GPIO_ID for DDC
 * in DdcHandle it will use GPIO_ID_DDC_DATA/GPIO_ID_DDC_CLOCK
-* directly in the create method */
+* directly in the create method
+*/
case REG(DC_GPIO_DDC1_A):
*en = GPIO_DDC_LINE_DDC1;
return true;
@@ -173,14 +174,16 @@ static bool offset_to_id(
*en = GPIO_DDC_LINE_DDC_VGA;
return true;
 
-// case REG(DC_GPIO_I2CPAD_A): not exit
-// case REG(DC_GPIO_PWRSEQ_A):
-// case REG(DC_GPIO_PAD_STRENGTH_1):
-// case REG(DC_GPIO_PAD_STRENGTH_2):
-// case REG(DC_GPIO_DEBUG):
+/*
+ * case REG(DC_GPIO_I2CPAD_A): not exit
+ * case REG(DC_GPIO_PWRSEQ_A):
+ * case REG(DC_GPIO_PAD_STRENGTH_1):
+ * case REG(DC_GPIO_PAD_STRENGTH_2):
+ * case REG(DC_GPIO_DEBUG):
+ */
/* UNEXPECTED */
default:
-// case REG(DC_GPIO_SYNCA_A): not exist
+/* case REG(DC_GPIO_SYNCA_A): not exist */
ASSERT_CRITICAL(false);
return false;
}
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_translate_dcn21.c 
b/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_translate_dcn21.c
index 291966efe63d..d734e3a134d1 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_translate_dcn21.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_translate_dcn21.c
@@ -153,7 +153,8 @@ static bool offset_to_id(
/* DDC */
/* we don't care about the GPIO_ID for DDC
 * in DdcHandle it will use GPIO_ID_DDC_DATA/GPIO_ID_DDC_CLOCK
-* directly in the create method */
+* directly in the create method
+*/
case REG(DC_GPIO_DDC1_A):
*en = GPIO_DDC_LINE_DDC1;
return true;
@@ -173,14 +174,16 @@ static bool offset_to_id(
*en = GPIO_DDC_LINE_DDC_VGA;
return true;
 
-// case REG(DC_GPIO_I2CPAD_A): not exit
-// case REG(DC_GPIO_PWRSEQ_A):
-// case REG(DC_GPIO_PAD_STRENGTH_1):
-// case REG(DC_GPIO_PAD_STRENGTH_2):
-// case REG(DC_GPIO_DEBUG):
+/*
+ * case REG(DC_GPIO_I2CPAD_A): not exit
+ * case REG(DC_GPIO_PWRSEQ_A):
+ * case REG(DC_GPIO_PAD_STRENGTH_1):
+ * case REG(DC_GPIO_PAD_STRENGTH_2):
+ * case REG(DC_GPIO_DEBUG):
+ */
/* UNEXPECTED */
default:
-// case REG(DC_GPIO_SYNCA_A): not exist
+/* case REG(DC_GPIO_SYNCA_A): not exista */
 #ifdef 

[PATCH 12/23] drm/amd/display: Drop unused privacy_mask setters and getters

2022-06-10 Thread Hamza Mahfooz
From: Oliver Logush 

[Why and How]
dwbc_funcs.set/get_privacy_mask isn't being used anymore, drop it

Reviewed-by: Charlene Liu 
Acked-by: Alan Liu 
Signed-off-by: Oliver Logush 
---
 drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h | 6 --
 1 file changed, 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h 
b/drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h
index fd6572ba3fb2..b982be64c792 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h
@@ -219,12 +219,6 @@ struct dwbc_funcs {
struct dwbc *dwbc,
const struct dc_transfer_func *in_transfer_func_dwb_ogam);
 
-   void (*get_privacy_mask)(
-   struct dwbc *dwbc, uint32_t *mask_id);
-
-   void (*set_privacy_mask)(
-   struct dwbc *dwbc, uint32_t mask_id);
-
//TODO: merge with output_transfer_func?
bool (*dwb_ogam_set_input_transfer_func)(
struct dwbc *dwbc,
-- 
2.36.1



[PATCH 10/23] drm/amd/display: update topology_update_input_v3 struct

2022-06-10 Thread Hamza Mahfooz
From: Qingqing Zhuo 

[Why]
DIO parameters were missing in topology_update_intput_v3 struct.

[How]
Add DIO parameters in v3 struct and update in functions perspectively.

Reviewed-by: Bhawanpreet Lakha 
Acked-by: Hamza Mahfooz 
Signed-off-by: Qingqing Zhuo 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c|  2 ++
 drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c   |  4 
 drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h   | 11 +++
 3 files changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
index bf0d50277f8f..c76b628e6791 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
@@ -476,6 +476,7 @@ static void update_config(void *handle, struct 
cp_psp_stream_config *config)
link->ddc_line = aconnector->dc_link->ddc_hw_inst + 1;
display->stream_enc_idx = config->stream_enc_idx;
link->link_enc_idx = config->link_enc_idx;
+   link->dio_output_id = config->dio_output_idx;
link->phy_idx = config->phy_idx;
if (sink)
link_is_hdcp14 = dc_link_is_hdcp14(aconnector->dc_link, 
sink->sink_signal);
@@ -483,6 +484,7 @@ static void update_config(void *handle, struct 
cp_psp_stream_config *config)
link->dp.rev = aconnector->dc_link->dpcd_caps.dpcd_rev.raw;
link->dp.assr_enabled = config->assr_enabled;
link->dp.mst_enabled = config->mst_enabled;
+   link->dp.usb4_enabled = config->usb4_enabled;
display->adjust.disable = MOD_HDCP_DISPLAY_DISABLE_AUTHENTICATION;
link->adjust.auth_delay = 3;
link->adjust.hdcp1.disable = 0;
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c 
b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c
index be61975f1470..ee67a35c2a8e 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c
@@ -202,6 +202,10 @@ static enum mod_hdcp_status add_display_to_topology_v3(
dtm_cmd->dtm_status = TA_DTM_STATUS__GENERIC_FAILURE;
dtm_cmd->dtm_in_message.topology_update_v3.phy_id = link->phy_idx;
dtm_cmd->dtm_in_message.topology_update_v3.link_hdcp_cap = 
link->hdcp_supported_informational;
+   dtm_cmd->dtm_in_message.topology_update_v3.dio_output_type = 
link->dp.usb4_enabled ?
+   TA_DTM_DIO_OUTPUT_TYPE__DPIA :
+   TA_DTM_DIO_OUTPUT_TYPE__DIRECT;
+   dtm_cmd->dtm_in_message.topology_update_v3.dio_output_id = 
link->dio_output_id;
 
psp_dtm_invoke(psp, dtm_cmd->cmd_id);
mutex_unlock(>dtm_context.mutex);
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h 
b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h
index 2937b4b61461..5b71bc96b98c 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h
@@ -94,6 +94,15 @@ enum ta_dtm_encoder_type {
TA_DTM_ENCODER_TYPE__DIG= 0x10
 };
 
+/* @enum ta_dtm_dio_output_type
+ * This enum defines software value for dio_output_type
+ */
+typedef enum {
+TA_DTM_DIO_OUTPUT_TYPE__INVALID,
+TA_DTM_DIO_OUTPUT_TYPE__DIRECT,
+TA_DTM_DIO_OUTPUT_TYPE__DPIA
+} ta_dtm_dio_output_type;
+
 struct ta_dtm_topology_update_input_v3 {
/* display handle is unique across the driver and is used to identify a 
display */
/* for all security interfaces which reference displays such as HDCP */
@@ -111,6 +120,8 @@ struct ta_dtm_topology_update_input_v3 {
enum ta_dtm_encoder_type encoder_type;
uint32_t phy_id;
uint32_t link_hdcp_cap;
+   ta_dtm_dio_output_type dio_output_type;
+   uint32_t dio_output_id;
 };
 
 struct ta_dtm_topology_assr_enable {
-- 
2.36.1



[PATCH 11/23] drm/amd/display: 3.2.190

2022-06-10 Thread Hamza Mahfooz
From: Aric Cyr 

This version brings along the following:
- DP fixes
- Reduced frame size in the bouding boxes of a number of ASICs.
- Exiting idle optimizations on mouse updates

Acked-by: Hamza Mahfooz 
Signed-off-by: Aric Cyr 
---
 drivers/gpu/drm/amd/display/dc/dc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
b/drivers/gpu/drm/amd/display/dc/dc.h
index 7191fc48c2e7..0549fa2c572a 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -47,7 +47,7 @@ struct aux_payload;
 struct set_config_cmd_payload;
 struct dmub_notification;
 
-#define DC_VER "3.2.189"
+#define DC_VER "3.2.190"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
-- 
2.36.1



[PATCH 09/23] drm/amd/display: Add debug option for exiting idle optimizations on cursor updates

2022-06-10 Thread Hamza Mahfooz
From: "Lee, Alvin" 

[Description]
- Have option to exit idle opt on cursor updates
for debug and optimizations purposes

Reviewed-by: Samson Tam 
Acked-by: Hamza Mahfooz 
Signed-off-by: Alvin Lee 
---
 drivers/gpu/drm/amd/display/dc/core/dc_stream.c | 5 +++--
 drivers/gpu/drm/amd/display/dc/dc.h | 1 +
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index de8b214132a2..167bb3310877 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -389,7 +389,7 @@ bool dc_stream_set_cursor_position(
struct dc_stream_state *stream,
const struct dc_cursor_position *position)
 {
-   struct dc  *dc;
+   struct dc  *dc = stream->ctx->dc;
bool reset_idle_optimizations = false;
 
if (NULL == stream) {
@@ -406,7 +406,8 @@ bool dc_stream_set_cursor_position(
dc_z10_restore(dc);
 
/* disable idle optimizations if enabling cursor */
-   if (dc->idle_optimizations_allowed && !stream->cursor_position.enable 
&& position->enable) {
+   if (dc->idle_optimizations_allowed && (!stream->cursor_position.enable 
|| dc->debug.exit_idle_opt_for_cursor_updates)
+   && position->enable) {
dc_allow_idle_optimizations(dc, false);
reset_idle_optimizations = true;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
b/drivers/gpu/drm/amd/display/dc/dc.h
index 76db013aac6e..7191fc48c2e7 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -748,6 +748,7 @@ struct dc_debug_options {
uint8_t psr_power_use_phy_fsm;
enum dml_hostvm_override_opts dml_hostvm_override;
bool use_legacy_soc_bb_mechanism;
+   bool exit_idle_opt_for_cursor_updates;
 };
 
 struct gpu_info_soc_bounding_box_v1_0;
-- 
2.36.1



[PATCH 07/23] drm/amd/display: Reduce frame size in the bouding box for DCN21

2022-06-10 Thread Hamza Mahfooz
From: Rodrigo Siqueira 

GCC throw warnings for the function dcn21_update_bw_bounding_box and
dcn316_update_bw_bounding_box due to its frame size that looks like
this:

 error: the frame size of 1936 bytes is larger than 1024 bytes 
[-Werror=frame-larger-than=]

For fixing this issue I dropped an intermadiate variable.

Reviewed-by: Dmytro Laktyushkin 
Acked-by: Hamza Mahfooz 
Signed-off-by: Rodrigo Siqueira 
---
 .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c  | 29 +--
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
index d9cc178f6980..c2fec0d85da4 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
@@ -2004,7 +2004,6 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct 
clk_bw_params *bw_params
 {
struct dcn21_resource_pool *pool = TO_DCN21_RES_POOL(dc->res_pool);
struct clk_limit_table *clk_table = _params->clk_table;
-   struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES];
unsigned int i, closest_clk_lvl = 0, k = 0;
int j;
 
@@ -2017,7 +2016,7 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct 
clk_bw_params *bw_params
ASSERT(clk_table->num_entries);
/* Copy dcn2_1_soc.clock_limits to clock_limits to avoid copying over 
null states later */
for (i = 0; i < dcn2_1_soc.num_states + 1; i++) {
-   clock_limits[i] = dcn2_1_soc.clock_limits[i];
+   dcn2_1_soc.clock_limits[i] = dcn2_1_soc.clock_limits[i];
}
 
for (i = 0; i < clk_table->num_entries; i++) {
@@ -2033,24 +2032,22 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct 
clk_bw_params *bw_params
if (i == 1)
k++;
 
-   clock_limits[k].state = k;
-   clock_limits[k].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
-   clock_limits[k].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
-   clock_limits[k].socclk_mhz = clk_table->entries[i].socclk_mhz;
-   clock_limits[k].dram_speed_mts = 
clk_table->entries[i].memclk_mhz * 2;
+   dcn2_1_soc.clock_limits[k].state = k;
+   dcn2_1_soc.clock_limits[k].dcfclk_mhz = 
clk_table->entries[i].dcfclk_mhz;
+   dcn2_1_soc.clock_limits[k].fabricclk_mhz = 
clk_table->entries[i].fclk_mhz;
+   dcn2_1_soc.clock_limits[k].socclk_mhz = 
clk_table->entries[i].socclk_mhz;
+   dcn2_1_soc.clock_limits[k].dram_speed_mts = 
clk_table->entries[i].memclk_mhz * 2;
 
-   clock_limits[k].dispclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
-   clock_limits[k].dppclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
-   clock_limits[k].dram_bw_per_chan_gbps = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
-   clock_limits[k].dscclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
-   clock_limits[k].dtbclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
-   clock_limits[k].phyclk_d18_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
-   clock_limits[k].phyclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
+   dcn2_1_soc.clock_limits[k].dispclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
+   dcn2_1_soc.clock_limits[k].dppclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
+   dcn2_1_soc.clock_limits[k].dram_bw_per_chan_gbps = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
+   dcn2_1_soc.clock_limits[k].dscclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
+   dcn2_1_soc.clock_limits[k].dtbclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
+   dcn2_1_soc.clock_limits[k].phyclk_d18_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
+   dcn2_1_soc.clock_limits[k].phyclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
 
k++;
}
-   for (i = 0; i < clk_table->num_entries + 1; i++)
-   dcn2_1_soc.clock_limits[i] = clock_limits[i];
if (clk_table->num_entries) {
dcn2_1_soc.num_states = clk_table->num_entries + 1;
/* fill in min DF PState */
-- 
2.36.1



[PATCH 08/23] drm/amd/display: dsc validate fail not pass to atomic check

2022-06-10 Thread Hamza Mahfooz
From: hersen wu 

[Why] when 4k@144hz dp connect to dp1.4 dsc mst hub, requested
bandwidth exceeds caps of dsc hub. but dsc bw valid functions,
increase_dsc_bpp, try_disable_dsc, pre_validate_dsc,
compute_mst_dsc_configs_for_state, do not return false to
atomic check. this cause user mode initiate mode set to kernel,
then cause kernel assert, system hang.

[How] dsc bandwidth valid functions return pass or fail to atomic
check.

Reviewed-by: Wayne Lin 
Reviewed-by: Rodrigo Siqueira 
Acked-by: Hamza Mahfooz 
Signed-off-by: hersen wu 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  6 ++-
 .../display/amdgpu_dm/amdgpu_dm_mst_types.c   | 43 +--
 .../display/amdgpu_dm/amdgpu_dm_mst_types.h   |  2 +-
 3 files changed, 35 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 06da4f2ed7ad..b048e40f23bf 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -11209,7 +11209,10 @@ static int amdgpu_dm_atomic_check(struct drm_device 
*dev,
}
}
}
-   pre_validate_dsc(state, _state, vars);
+   if (!pre_validate_dsc(state, _state, vars)) {
+   ret = -EINVAL;
+   goto fail;
+   }
}
 #endif
for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, 
new_crtc_state, i) {
@@ -11455,6 +11458,7 @@ static int amdgpu_dm_atomic_check(struct drm_device 
*dev,
 #if defined(CONFIG_DRM_AMD_DC_DCN)
if (!compute_mst_dsc_configs_for_state(state, 
dm_state->context, vars)) {
DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() 
failed\n");
+   ret = -EINVAL;
goto fail;
}
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 78df51b8693e..bdfe5a9a08dd 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -670,7 +670,7 @@ static int bpp_x16_from_pbn(struct dsc_mst_fairness_params 
param, int pbn)
return dsc_config.bits_per_pixel;
 }
 
-static void increase_dsc_bpp(struct drm_atomic_state *state,
+static bool increase_dsc_bpp(struct drm_atomic_state *state,
 struct dc_link *dc_link,
 struct dsc_mst_fairness_params *params,
 struct dsc_mst_fairness_vars *vars,
@@ -730,7 +730,7 @@ static void increase_dsc_bpp(struct drm_atomic_state *state,
  
params[next_index].port,
  vars[next_index].pbn,
  pbn_per_timeslot) < 0)
-   return;
+   return false;
if (!drm_dp_mst_atomic_check(state)) {
vars[next_index].bpp_x16 = 
bpp_x16_from_pbn(params[next_index], vars[next_index].pbn);
} else {
@@ -740,7 +740,7 @@ static void increase_dsc_bpp(struct drm_atomic_state *state,
  
params[next_index].port,
  
vars[next_index].pbn,
  
pbn_per_timeslot) < 0)
-   return;
+   return false;
}
} else {
vars[next_index].pbn += initial_slack[next_index];
@@ -749,7 +749,7 @@ static void increase_dsc_bpp(struct drm_atomic_state *state,
  
params[next_index].port,
  vars[next_index].pbn,
  pbn_per_timeslot) < 0)
-   return;
+   return false;
if (!drm_dp_mst_atomic_check(state)) {
vars[next_index].bpp_x16 = 
params[next_index].bw_range.max_target_bpp_x16;
} else {
@@ -759,16 +759,17 @@ static void increase_dsc_bpp(struct drm_atomic_state 
*state,
  
params[next_index].port,
  
vars[next_index].pbn,
  
pbn_per_timeslot) < 0)
-   return;
+   return false;
}
 

[PATCH 05/23] drm/amd/display: Reduce frame size in the bouding box for DCN301

2022-06-10 Thread Hamza Mahfooz
From: Rodrigo Siqueira 

GCC throw warnings for the function dcn301_fpu_update_bw_bounding_box
due to its frame size that looks like this:

 error: the frame size of 1936 bytes is larger than 1024 bytes 
[-Werror=frame-larger-than=]

For fixing this issue I dropped an intermadiate variable.

Reviewed-by: Dmytro Laktyushkin 
Acked-by: Hamza Mahfooz 
Signed-off-by: Rodrigo Siqueira 
---
 .../amd/display/dc/dml/dcn301/dcn301_fpu.c| 30 ---
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
index 0a7a33864973..62cf283d9f41 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
@@ -249,7 +249,6 @@ void dcn301_update_bw_bounding_box(struct dc *dc, struct 
clk_bw_params *bw_param
 {
struct dcn301_resource_pool *pool = TO_DCN301_RES_POOL(dc->res_pool);
struct clk_limit_table *clk_table = _params->clk_table;
-   struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES];
unsigned int i, closest_clk_lvl;
int j;
 
@@ -271,24 +270,21 @@ void dcn301_update_bw_bounding_box(struct dc *dc, struct 
clk_bw_params *bw_param
}
}
 
-   clock_limits[i].state = i;
-   clock_limits[i].dcfclk_mhz = 
clk_table->entries[i].dcfclk_mhz;
-   clock_limits[i].fabricclk_mhz = 
clk_table->entries[i].fclk_mhz;
-   clock_limits[i].socclk_mhz = 
clk_table->entries[i].socclk_mhz;
-   clock_limits[i].dram_speed_mts = 
clk_table->entries[i].memclk_mhz * 2;
-
-   clock_limits[i].dispclk_mhz = 
dcn3_01_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
-   clock_limits[i].dppclk_mhz = 
dcn3_01_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
-   clock_limits[i].dram_bw_per_chan_gbps = 
dcn3_01_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
-   clock_limits[i].dscclk_mhz = 
dcn3_01_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
-   clock_limits[i].dtbclk_mhz = 
dcn3_01_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
-   clock_limits[i].phyclk_d18_mhz = 
dcn3_01_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
-   clock_limits[i].phyclk_mhz = 
dcn3_01_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
+   dcn3_01_soc.clock_limits[i].state = i;
+   dcn3_01_soc.clock_limits[i].dcfclk_mhz = 
clk_table->entries[i].dcfclk_mhz;
+   dcn3_01_soc.clock_limits[i].fabricclk_mhz = 
clk_table->entries[i].fclk_mhz;
+   dcn3_01_soc.clock_limits[i].socclk_mhz = 
clk_table->entries[i].socclk_mhz;
+   dcn3_01_soc.clock_limits[i].dram_speed_mts = 
clk_table->entries[i].memclk_mhz * 2;
+
+   dcn3_01_soc.clock_limits[i].dispclk_mhz = 
dcn3_01_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
+   dcn3_01_soc.clock_limits[i].dppclk_mhz = 
dcn3_01_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
+   dcn3_01_soc.clock_limits[i].dram_bw_per_chan_gbps = 
dcn3_01_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
+   dcn3_01_soc.clock_limits[i].dscclk_mhz = 
dcn3_01_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
+   dcn3_01_soc.clock_limits[i].dtbclk_mhz = 
dcn3_01_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
+   dcn3_01_soc.clock_limits[i].phyclk_d18_mhz = 
dcn3_01_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
+   dcn3_01_soc.clock_limits[i].phyclk_mhz = 
dcn3_01_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
}
 
-   for (i = 0; i < clk_table->num_entries; i++)
-   dcn3_01_soc.clock_limits[i] = clock_limits[i];
-
if (clk_table->num_entries) {
dcn3_01_soc.num_states = clk_table->num_entries;
/* duplicate last level */
-- 
2.36.1



[PATCH 06/23] drm/amd/display: Reduce frame size in the bouding box for DCN31/316

2022-06-10 Thread Hamza Mahfooz
From: Rodrigo Siqueira 

GCC throw warnings for the function dcn31_update_bw_bounding_box and
dcn316_update_bw_bounding_box due to its frame size that looks like
this:

 error: the frame size of 1936 bytes is larger than 1024 bytes 
[-Werror=frame-larger-than=]

For fixing this issue I dropped an intermadiate variable.

Reviewed-by: Dmytro Laktyushkin 
Acked-by: Hamza Mahfooz 
Signed-off-by: Rodrigo Siqueira 
---
 .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c  | 64 +--
 1 file changed, 29 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
index 1b02f0ebe957..858c5cd141b8 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
@@ -575,7 +575,6 @@ void dcn31_calculate_wm_and_dlg_fp(
 void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params 
*bw_params)
 {
struct clk_limit_table *clk_table = _params->clk_table;
-   struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES];
unsigned int i, closest_clk_lvl;
int j;
 
@@ -608,29 +607,27 @@ void dcn31_update_bw_bounding_box(struct dc *dc, struct 
clk_bw_params *bw_params
}
}
 
-   clock_limits[i].state = i;
+   dcn3_1_soc.clock_limits[i].state = i;
 
/* Clocks dependent on voltage level. */
-   clock_limits[i].dcfclk_mhz = 
clk_table->entries[i].dcfclk_mhz;
-   clock_limits[i].fabricclk_mhz = 
clk_table->entries[i].fclk_mhz;
-   clock_limits[i].socclk_mhz = 
clk_table->entries[i].socclk_mhz;
-   clock_limits[i].dram_speed_mts = 
clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio;
+   dcn3_1_soc.clock_limits[i].dcfclk_mhz = 
clk_table->entries[i].dcfclk_mhz;
+   dcn3_1_soc.clock_limits[i].fabricclk_mhz = 
clk_table->entries[i].fclk_mhz;
+   dcn3_1_soc.clock_limits[i].socclk_mhz = 
clk_table->entries[i].socclk_mhz;
+   dcn3_1_soc.clock_limits[i].dram_speed_mts = 
clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio;
 
/* Clocks independent of voltage level. */
-   clock_limits[i].dispclk_mhz = max_dispclk_mhz ? 
max_dispclk_mhz :
+   dcn3_1_soc.clock_limits[i].dispclk_mhz = 
max_dispclk_mhz ? max_dispclk_mhz :

dcn3_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
 
-   clock_limits[i].dppclk_mhz = max_dppclk_mhz ? 
max_dppclk_mhz :
+   dcn3_1_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz 
? max_dppclk_mhz :

dcn3_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
 
-   clock_limits[i].dram_bw_per_chan_gbps = 
dcn3_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
-   clock_limits[i].dscclk_mhz = 
dcn3_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
-   clock_limits[i].dtbclk_mhz = 
dcn3_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
-   clock_limits[i].phyclk_d18_mhz = 
dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
-   clock_limits[i].phyclk_mhz = 
dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
+   dcn3_1_soc.clock_limits[i].dram_bw_per_chan_gbps = 
dcn3_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
+   dcn3_1_soc.clock_limits[i].dscclk_mhz = 
dcn3_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
+   dcn3_1_soc.clock_limits[i].dtbclk_mhz = 
dcn3_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
+   dcn3_1_soc.clock_limits[i].phyclk_d18_mhz = 
dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
+   dcn3_1_soc.clock_limits[i].phyclk_mhz = 
dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
}
-   for (i = 0; i < clk_table->num_entries; i++)
-   dcn3_1_soc.clock_limits[i] = clock_limits[i];
if (clk_table->num_entries) {
dcn3_1_soc.num_states = clk_table->num_entries;
}
@@ -702,7 +699,6 @@ void dcn315_update_bw_bounding_box(struct dc *dc, struct 
clk_bw_params *bw_param
 void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params 
*bw_params)
 {
struct clk_limit_table *clk_table = _params->clk_table;
-   struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES];
unsigned int i, closest_clk_lvl;
int max_dispclk_mhz = 0, max_dppclk_mhz = 0;
int j;
@@ -740,34 +736,32 @@ void dcn316_update_bw_bounding_box(struct dc *dc, struct 
clk_bw_params *bw_param

[PATCH 04/23] drm/amd/display: Reduce frame size in the bouding box for DCN20

2022-06-10 Thread Hamza Mahfooz
From: Rodrigo Siqueira 

GCC throw warnings for the function dcn20_update_bounding_box due to its
frame size that looks like this:

 error: the frame size of 1936 bytes is larger than 1024 bytes 
[-Werror=frame-larger-than=]

This commit fixes this issue by eliminating an intermediary variable
that creates a large array.

Reviewed-by: Dmytro Laktyushkin 
Acked-by: Hamza Mahfooz 
Signed-off-by: Rodrigo Siqueira 
---
 .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c  | 38 +--
 1 file changed, 18 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
index eeec40f6fd0a..d9cc178f6980 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
@@ -1456,21 +1456,20 @@ void dcn20_calculate_wm(
 void dcn20_update_bounding_box(struct dc *dc, struct 
_vcs_dpi_soc_bounding_box_st *bb,
struct pp_smu_nv_clock_table *max_clocks, unsigned int 
*uclk_states, unsigned int num_states)
 {
-   struct _vcs_dpi_voltage_scaling_st 
calculated_states[DC__VOLTAGE_STATES];
-   int i;
int num_calculated_states = 0;
int min_dcfclk = 0;
+   int i;
 
dc_assert_fp_enabled();
 
if (num_states == 0)
return;
 
-   memset(calculated_states, 0, sizeof(calculated_states));
+   memset(bb->clock_limits, 0, sizeof(bb->clock_limits));
 
-   if (dc->bb_overrides.min_dcfclk_mhz > 0)
+   if (dc->bb_overrides.min_dcfclk_mhz > 0) {
min_dcfclk = dc->bb_overrides.min_dcfclk_mhz;
-   else {
+   } else {
if (ASICREV_IS_NAVI12_P(dc->ctx->asic_id.hw_internal_rev))
min_dcfclk = 310;
else
@@ -1481,36 +1480,35 @@ void dcn20_update_bounding_box(struct dc *dc, struct 
_vcs_dpi_soc_bounding_box_s
 
for (i = 0; i < num_states; i++) {
int min_fclk_required_by_uclk;
-   calculated_states[i].state = i;
-   calculated_states[i].dram_speed_mts = uclk_states[i] * 16 / 
1000;
+   bb->clock_limits[i].state = i;
+   bb->clock_limits[i].dram_speed_mts = uclk_states[i] * 16 / 1000;
 
// FCLK:UCLK ratio is 1.08
min_fclk_required_by_uclk = div_u64(((unsigned long 
long)uclk_states[i]) * 1080,
100);
 
-   calculated_states[i].fabricclk_mhz = (min_fclk_required_by_uclk 
< min_dcfclk) ?
+   bb->clock_limits[i].fabricclk_mhz = (min_fclk_required_by_uclk 
< min_dcfclk) ?
min_dcfclk : min_fclk_required_by_uclk;
 
-   calculated_states[i].socclk_mhz = 
(calculated_states[i].fabricclk_mhz > max_clocks->socClockInKhz / 1000) ?
-   max_clocks->socClockInKhz / 1000 : 
calculated_states[i].fabricclk_mhz;
+   bb->clock_limits[i].socclk_mhz = 
(bb->clock_limits[i].fabricclk_mhz > max_clocks->socClockInKhz / 1000) ?
+   max_clocks->socClockInKhz / 1000 : 
bb->clock_limits[i].fabricclk_mhz;
 
-   calculated_states[i].dcfclk_mhz = 
(calculated_states[i].fabricclk_mhz > max_clocks->dcfClockInKhz / 1000) ?
-   max_clocks->dcfClockInKhz / 1000 : 
calculated_states[i].fabricclk_mhz;
+   bb->clock_limits[i].dcfclk_mhz = 
(bb->clock_limits[i].fabricclk_mhz > max_clocks->dcfClockInKhz / 1000) ?
+   max_clocks->dcfClockInKhz / 1000 : 
bb->clock_limits[i].fabricclk_mhz;
 
-   calculated_states[i].dispclk_mhz = 
max_clocks->displayClockInKhz / 1000;
-   calculated_states[i].dppclk_mhz = max_clocks->displayClockInKhz 
/ 1000;
-   calculated_states[i].dscclk_mhz = max_clocks->displayClockInKhz 
/ (1000 * 3);
+   bb->clock_limits[i].dispclk_mhz = max_clocks->displayClockInKhz 
/ 1000;
+   bb->clock_limits[i].dppclk_mhz = max_clocks->displayClockInKhz 
/ 1000;
+   bb->clock_limits[i].dscclk_mhz = max_clocks->displayClockInKhz 
/ (1000 * 3);
 
-   calculated_states[i].phyclk_mhz = max_clocks->phyClockInKhz / 
1000;
+   bb->clock_limits[i].phyclk_mhz = max_clocks->phyClockInKhz / 
1000;
 
num_calculated_states++;
}
 
-   calculated_states[num_calculated_states - 1].socclk_mhz = 
max_clocks->socClockInKhz / 1000;
-   calculated_states[num_calculated_states - 1].fabricclk_mhz = 
max_clocks->socClockInKhz / 1000;
-   calculated_states[num_calculated_states - 1].dcfclk_mhz = 
max_clocks->dcfClockInKhz / 1000;
+   bb->clock_limits[num_calculated_states - 1].socclk_mhz = 
max_clocks->socClockInKhz / 1000;
+   bb->clock_limits[num_calculated_states - 1].fabricclk_mhz = 
max_clocks->socClockInKhz / 1000;
+   bb->clock_limits[num_calculated_states - 1].dcfclk_mhz = 
max_clocks->dcfClockInKhz / 

[PATCH 03/23] drm/amd/display: Fix monitor flash issue

2022-06-10 Thread Hamza Mahfooz
From: "Shah, Dharati" 

[Why & How]
For a some specific monitors, when connected on boot or hot plug,
monitor flash for 1/2 seconds can happen during first HDCP query
operation. Ading some delay in the init sequence for these monitors
fixes the issue, so it is implemented as monitor specific patch.

Co-authored-by: Shah Dharati 
Reviewed-by: Hansen Dsouza 
Reviewed-by: Nicholas Kazlauskas 
Acked-by: Hamza Mahfooz 
Signed-off-by: Shah Dharati 
---
 drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c 
b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c
index 1f4095b26409..c5f6c11de7e5 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c
@@ -524,7 +524,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_dp_transition(struct 
mod_hdcp *hdcp,
set_watchdog_in_ms(hdcp, 3000, output);
set_state_id(hdcp, output, D2_A6_WAIT_FOR_RX_ID_LIST);
} else {
-   callback_in_ms(0, output);
+   callback_in_ms(1, output);
set_state_id(hdcp, output, D2_SEND_CONTENT_STREAM_TYPE);
}
break;
-- 
2.36.1



[PATCH 00/23] DC Patches June 06, 2022

2022-06-10 Thread Hamza Mahfooz
This DC patchset brings improvements in multiple areas. In summary, we
have:

* DP fixes
* Reduced frame size in the bouding boxes of a number of ASICs.
* Exiting idle optimizations
* General cleanup
* Power management optimizations
* HF-VSIF support
* VTEM support
* FVA timing improvements

Ahmad Othman (2):
  drm/amd/display: Add support for HF-VSIF
  drm/amd/display: Adding VTEM to dc

Aric Cyr (1):
  drm/amd/display: 3.2.190

Charlene Liu (1):
  drm/amd/display: FVA timing adjustment

Felipe (2):
  drm/amd/display: Firmware assisted MCLK switch and FS
  drm/amd/display: Pass vrr mode to dcn

Hamza Mahfooz (1):
  drm/amd/display: fix build when CONFIG_DRM_AMD_DC_DCN is not defined

Harry VanZyllDeJong (1):
  drm/amd/display: Disables dynamic memory clock switching in games

Ian Chen (1):
  drm/amd/display: DAL ACR, dc part, fix missing dcn30

Joshua Aberback (1):
  drm/amd/display: Blank for uclk OC in dm instead of dc

Lee, Alvin (1):
  drm/amd/display: Add debug option for exiting idle optimizations on
cursor updates

Martin Leung (1):
  drm/amd/display: Add null check to dc_submit_i2c_oem

Nicholas Kazlauskas (1):
  drm/amd/display: Copy hfvsif_infopacket when stream update

Oliver Logush (1):
  drm/amd/display: Drop unused privacy_mask setters and getters

Qingqing Zhuo (1):
  drm/amd/display: update topology_update_input_v3 struct

Rodrigo Siqueira (4):
  drm/amd/display: Reduce frame size in the bouding box for DCN20
  drm/amd/display: Reduce frame size in the bouding box for DCN301
  drm/amd/display: Reduce frame size in the bouding box for DCN31/316
  drm/amd/display: Reduce frame size in the bouding box for DCN21

Samson Tam (1):
  drm/amd/display: Fix comments

Shah, Dharati (1):
  drm/amd/display: Fix monitor flash issue

Wenjing Liu (1):
  drm/amd/display: lower lane count first when CR done partially fails
in EQ

hersen wu (1):
  drm/amd/display: dsc validate fail not pass to atomic check

 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  6 +-
 .../amd/display/amdgpu_dm/amdgpu_dm_hdcp.c|  2 +
 .../display/amdgpu_dm/amdgpu_dm_mst_types.c   | 43 
 .../display/amdgpu_dm/amdgpu_dm_mst_types.h   |  2 +-
 .../display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c  | 23 ---
 .../dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c  | 11 ++-
 drivers/gpu/drm/amd/display/dc/core/dc.c  | 66 +-
 drivers/gpu/drm/amd/display/dc/core/dc_link.c |  8 ++-
 .../gpu/drm/amd/display/dc/core/dc_link_dp.c  | 13 ++--
 .../gpu/drm/amd/display/dc/core/dc_resource.c | 27 +++-
 .../gpu/drm/amd/display/dc/core/dc_stream.c   |  5 +-
 drivers/gpu/drm/amd/display/dc/dc.h   | 13 ++--
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c  |  2 +
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h  |  5 +-
 drivers/gpu/drm/amd/display/dc/dc_stream.h|  9 ++-
 .../amd/display/dc/dcn10/dcn10_hw_sequencer.c |  1 -
 .../gpu/drm/amd/display/dc/dcn10/dcn10_optc.c |  1 +
 .../display/dc/dcn10/dcn10_stream_encoder.c   |  8 ++-
 .../drm/amd/display/dc/dcn20/dcn20_hwseq.c|  1 -
 .../display/dc/dcn20/dcn20_stream_encoder.c   |  1 +
 .../dc/dcn30/dcn30_dio_stream_encoder.c   |  1 +
 .../drm/amd/display/dc/dcn30/dcn30_hwseq.c| 28 +---
 .../drm/amd/display/dc/dcn30/dcn30_hwseq.h| 11 ++-
 .../gpu/drm/amd/display/dc/dcn30/dcn30_init.c |  2 +-
 .../gpu/drm/amd/display/dc/dcn30/dcn30_optc.c |  1 +
 .../drm/amd/display/dc/dcn30/dcn30_resource.c |  1 +
 .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c  | 67 +--
 .../amd/display/dc/dml/dcn301/dcn301_fpu.c| 30 -
 .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c  | 64 --
 .../dc/gpio/dcn20/hw_translate_dcn20.c| 17 +++--
 .../dc/gpio/dcn21/hw_translate_dcn21.c| 17 +++--
 .../dc/gpio/dcn30/hw_translate_dcn30.c| 19 +++---
 drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h   |  6 --
 .../amd/display/dc/inc/hw/stream_encoder.h|  1 +
 .../amd/display/dc/inc/hw_sequencer_private.h |  1 +
 .../amd/display/include/link_service_types.h  |  2 +
 .../amd/display/modules/freesync/freesync.c   |  5 ++
 .../display/modules/hdcp/hdcp2_transition.c   |  2 +-
 .../drm/amd/display/modules/hdcp/hdcp_psp.c   |  4 ++
 .../drm/amd/display/modules/hdcp/hdcp_psp.h   | 11 +++
 .../amd/display/modules/inc/mod_freesync.h|  3 +
 41 files changed, 326 insertions(+), 214 deletions(-)

-- 
2.36.1



[PATCH 01/23] drm/amd/display: fix build when CONFIG_DRM_AMD_DC_DCN is not defined

2022-06-10 Thread Hamza Mahfooz
Fixes:
drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_resource.c: In function 
‘dc_remove_stream_from_ctx’:
drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_resource.c:2010:3: error: 
implicit declaration of function ‘remove_hpo_dp_link_enc_from_ctx’; did you 
mean ‘add_hpo_dp_link_enc_to_ctx’? [-Werror=implicit-function-declaration]
 2010 |   remove_hpo_dp_link_enc_from_ctx(_ctx->res_ctx, del_pipe, 
del_pipe->stream);
  |   ^~~
  |   add_hpo_dp_link_enc_to_ctx

Fixes: b109b1468223 ("drm/amdgpu/display: Protect some functions with 
CONFIG_DRM_AMD_DC_DCN")
Signed-off-by: Hamza Mahfooz 
---
 drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 7357efb8b439..21d217e84192 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -2007,7 +2007,9 @@ enum dc_status dc_remove_stream_from_ctx(
_ctx->res_ctx, dc->res_pool,
del_pipe->stream_res.hpo_dp_stream_enc,
false);
+#if defined(CONFIG_DRM_AMD_DC_DCN)
remove_hpo_dp_link_enc_from_ctx(_ctx->res_ctx, del_pipe, 
del_pipe->stream);
+#endif
}
 
if (del_pipe->stream_res.audio)
-- 
2.36.1



[PATCH 02/23] drm/amd/display: lower lane count first when CR done partially fails in EQ

2022-06-10 Thread Hamza Mahfooz
From: Wenjing Liu 

[why]
According to DP specs, in EQ DONE phase of link training, we
should lower lane count when at least one CR DONE bit is set to 1, while
lower link rate when all CR DONE bits are 0s. However in our code, we will
treat both cases as latter. This is not exactly correct based on the specs
expectation.

[how]
Check lane0 CR DONE bit when it is still set but CR DONE fails,
we treat it as a partial CR DONE failure in EQ DONE phase, we
will follow the same fallback flow as when ED DONE fails in EQ
DONE phase.

Reviewed-by: George Shen 
Acked-by: Hamza Mahfooz 
Signed-off-by: Wenjing Liu 
---
 drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c| 13 +++--
 .../drm/amd/display/include/link_service_types.h|  2 ++
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index c1207049dbc5..f9c10d044da6 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -1326,7 +1326,9 @@ static enum link_training_result 
perform_channel_equalization_sequence(
 
/* 5. check CR done*/
if (!dp_is_cr_done(lane_count, dpcd_lane_status))
-   return LINK_TRAINING_EQ_FAIL_CR;
+   return dpcd_lane_status[0].bits.CR_DONE_0 ?
+   LINK_TRAINING_EQ_FAIL_CR_PARTIAL :
+   LINK_TRAINING_EQ_FAIL_CR;
 
/* 6. check CHEQ done*/
if (dp_is_ch_eq_done(lane_count, dpcd_lane_status) &&
@@ -1882,6 +1884,9 @@ static void print_status_message(
case LINK_TRAINING_EQ_FAIL_CR:
lt_result = "CR failed in EQ";
break;
+   case LINK_TRAINING_EQ_FAIL_CR_PARTIAL:
+   lt_result = "CR failed in EQ partially";
+   break;
case LINK_TRAINING_EQ_FAIL_EQ:
lt_result = "EQ failed";
break;
@@ -3612,11 +3617,6 @@ static bool decide_fallback_link_setting(
struct dc_link_settings *cur,
enum link_training_result training_result)
 {
-   if (!cur)
-   return false;
-   if (!max)
-   return false;
-
if (dp_get_link_encoding_format(max) == DP_128b_132b_ENCODING ||
link->dc->debug.force_dp2_lt_fallback_method)
return decide_fallback_link_setting_max_bw_policy(link, max, 
cur,
@@ -3646,6 +3646,7 @@ static bool decide_fallback_link_setting(
break;
}
case LINK_TRAINING_EQ_FAIL_EQ:
+   case LINK_TRAINING_EQ_FAIL_CR_PARTIAL:
{
if (!reached_minimum_lane_count(cur->lane_count)) {
cur->lane_count = reduce_lane_count(cur->lane_count);
diff --git a/drivers/gpu/drm/amd/display/include/link_service_types.h 
b/drivers/gpu/drm/amd/display/include/link_service_types.h
index 447a56286dd0..23f7d7354aaa 100644
--- a/drivers/gpu/drm/amd/display/include/link_service_types.h
+++ b/drivers/gpu/drm/amd/display/include/link_service_types.h
@@ -67,6 +67,8 @@ enum link_training_result {
LINK_TRAINING_CR_FAIL_LANE23,
/* CR DONE bit is cleared during EQ step */
LINK_TRAINING_EQ_FAIL_CR,
+   /* CR DONE bit is cleared but LANE0_CR_DONE is set during EQ step */
+   LINK_TRAINING_EQ_FAIL_CR_PARTIAL,
/* other failure during EQ step */
LINK_TRAINING_EQ_FAIL_EQ,
LINK_TRAINING_LQA_FAIL,
-- 
2.36.1



[linux-next:master] BUILD REGRESSION 6d0c806803170f120f8cb97b321de7bd89d3a791

2022-06-10 Thread kernel test robot
tree/branch: 
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master
branch HEAD: 6d0c806803170f120f8cb97b321de7bd89d3a791  Add linux-next specific 
files for 20220610

Error/Warning reports:

https://lore.kernel.org/llvm/202205220805.wrw5woek-...@intel.com

Error/Warning: (recently discovered and may have been fixed)

arch/riscv/include/asm/pgtable-64.h:215:2: error: invalid input constraint '0' 
in asm
arch/riscv/include/asm/pgtable.h:257:2: error: invalid input constraint '0' in 
asm
drivers/gpu/drm/amd/amdgpu/../display/include/ddc_service_types.h:130:17: 
warning: 'DP_SINK_BRANCH_DEV_NAME_7580' defined but not used 
[-Wunused-const-variable=]
drivers/misc/cardreader/rts5261.c:406:13: warning: variable 'setting_reg2' is 
used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized]
drivers/staging/rtl8723bs/hal/hal_btcoex.c:1182:30: warning: variable 
'pHalData' set but not used [-Wunused-but-set-variable]

Error/Warning ids grouped by kconfigs:

gcc_recent_errors
|-- alpha-allyesconfig
|   `-- 
drivers-staging-rtl8723bs-hal-hal_btcoex.c:warning:variable-pHalData-set-but-not-used
|-- arc-allyesconfig
|   `-- 
drivers-staging-rtl8723bs-hal-hal_btcoex.c:warning:variable-pHalData-set-but-not-used
|-- arm-allmodconfig
|   `-- 
drivers-staging-rtl8723bs-hal-hal_btcoex.c:warning:variable-pHalData-set-but-not-used
|-- arm-allyesconfig
|   `-- 
drivers-staging-rtl8723bs-hal-hal_btcoex.c:warning:variable-pHalData-set-but-not-used
|-- arm64-allyesconfig
|   `-- 
drivers-staging-rtl8723bs-hal-hal_btcoex.c:warning:variable-pHalData-set-but-not-used
|-- i386-allyesconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-..-display-include-ddc_service_types.h:warning:DP_SINK_BRANCH_DEV_NAME_7580-defined-but-not-used
|   `-- 
drivers-staging-rtl8723bs-hal-hal_btcoex.c:warning:variable-pHalData-set-but-not-used
|-- ia64-allmodconfig
|   `-- 
drivers-staging-rtl8723bs-hal-hal_btcoex.c:warning:variable-pHalData-set-but-not-used
|-- m68k-allmodconfig
|   `-- 
drivers-staging-rtl8723bs-hal-hal_btcoex.c:warning:variable-pHalData-set-but-not-used
|-- m68k-allyesconfig
|   `-- 
drivers-staging-rtl8723bs-hal-hal_btcoex.c:warning:variable-pHalData-set-but-not-used
|-- mips-allmodconfig
|   `-- 
drivers-staging-rtl8723bs-hal-hal_btcoex.c:warning:variable-pHalData-set-but-not-used
|-- mips-allyesconfig
|   `-- 
drivers-staging-rtl8723bs-hal-hal_btcoex.c:warning:variable-pHalData-set-but-not-used
|-- nios2-allyesconfig
|   `-- 
drivers-staging-rtl8723bs-hal-hal_btcoex.c:warning:variable-pHalData-set-but-not-used
|-- parisc-allyesconfig
|   `-- 
drivers-staging-rtl8723bs-hal-hal_btcoex.c:warning:variable-pHalData-set-but-not-used
|-- powerpc-allmodconfig
|   `-- 
drivers-staging-rtl8723bs-hal-hal_btcoex.c:warning:variable-pHalData-set-but-not-used
|-- powerpc-allyesconfig
|   `-- 
drivers-staging-rtl8723bs-hal-hal_btcoex.c:warning:variable-pHalData-set-but-not-used
|-- riscv-allmodconfig
|   `-- 
drivers-staging-rtl8723bs-hal-hal_btcoex.c:warning:variable-pHalData-set-but-not-used
|-- riscv-allyesconfig
|   `-- 
drivers-staging-rtl8723bs-hal-hal_btcoex.c:warning:variable-pHalData-set-but-not-used
|-- sh-allmodconfig
|   `-- 
drivers-staging-rtl8723bs-hal-hal_btcoex.c:warning:variable-pHalData-set-but-not-used
|-- sparc-allyesconfig
|   `-- 
drivers-staging-rtl8723bs-hal-hal_btcoex.c:warning:variable-pHalData-set-but-not-used
|-- x86_64-allyesconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-..-display-include-ddc_service_types.h:warning:DP_SINK_BRANCH_DEV_NAME_7580-defined-but-not-used
|   `-- 
drivers-staging-rtl8723bs-hal-hal_btcoex.c:warning:variable-pHalData-set-but-not-used
`-- xtensa-allyesconfig
`-- 
drivers-staging-rtl8723bs-hal-hal_btcoex.c:warning:variable-pHalData-set-but-not-used

clang_recent_errors
|-- i386-randconfig-a013
|   `-- 
drivers-misc-cardreader-rts5261.c:warning:variable-setting_reg2-is-used-uninitialized-whenever-if-condition-is-false
`-- riscv-randconfig-r013-20220610
|-- arch-riscv-include-asm-pgtable-.h:error:invalid-input-constraint-in-asm
`-- arch-riscv-include-asm-pgtable.h:error:invalid-input-constraint-in-asm

elapsed time: 873m

configs tested: 61
configs skipped: 3

gcc tested configs:
arm defconfig
arm  allmodconfig
arm  allyesconfig
arm64   defconfig
arm64allyesconfig
um i386_defconfig
um   x86_64_defconfig
ia64defconfig
ia64 allmodconfig
ia64 allyesconfig
m68kdefconfig
m68k allyesconfig
m68k allmodconfig
nios2   defconfig
arc  allyesconfig
alpha   defconfig
cskydefconfig
nios2

DRM/KMS PWL API Thoughts and Questions

2022-06-10 Thread Harry Wentland
(I'm sending this as an email as lowest common denominator but feel an issue on 
the color-and-hdr repo would be a better interface for productive discussion. 
Please pop over to https://gitlab.freedesktop.org/pq/color-and-hdr/-/issues/10 
if you agree. Hopefully we can drive the discussion there but if there is a 
strong preference for email that works as well. :) )

I've wanted to start a thread to discuss the use of PWL APIs that were 
introduced by Uma a year ago and for which Bhanuprakash provided IGT tests. I 
have come to like the API but as we're getting closer to a real-world use of it 
I have a few questions and comments. As with a lot of complex APIs the devil is 
in the details. Some of those details are currently underspecified, or 
underdocumented and it's important that we all interpret the API the same way.

**The API**

The original patches posted by Uma:
https://patchwork.freedesktop.org/series/90822/
https://patchwork.freedesktop.org/series/90826/

The IGT tests for PWL API:
https://patchwork.freedesktop.org/series/96895/

I've rebased the kernel patches on a slightly more recent kernel, along with an 
AMD implementation:
https://gitlab.freedesktop.org/hwentland/linux/-/tree/color-and-hdr

I've also rebased them on an IGT tree, but that's not too up-to-date:
https://gitlab.freedesktop.org/hwentland/igt-gpu-tools/-/tree/color-and-hdr


**Why do I like the API?**

In order to allow HW composition of HDR planes in linear space we need the 
ability to program at least a per-CRTC regamma (inv_EOTF) to go from linear to 
wire format post-blending. Since userspace might want to apply corrections on 
top of a simple transfer function (such as PQ, BT.709, etc.) it would like a 
way to set a custom LUT.

The existing CRTC gamma LUT defines equally spaced entries. As Pekka shows in 
[1] equally-spaced LUTs have unacceptable error for regamma/inv_EOTF. Hence we 
need finer granularity of our entries near zero while coarse granularity works 
fine toward the brighter values.

[1] https://gitlab.freedesktop.org/pq/color-and-hdr/-/merge_requests/9

HW (at least AMD and Intel HW) implements this ability as segmented piece-wise 
linear LUTs. These define segments of equally spaced entries. These segments 
are constrained by the HW implementation. I like how the PWL API allows 
different drivers to specify the constraints imposed by different HW while 
allowing userspace a generic way of parsing the PWL. This also avoids complex 
calculations in the kernel driver, which might be required for other APIs one 
could envision. If anyone likes I can elaborate on some ideas for an alternate 
API, though all of them will require non-trivial transformations by the kernel 
driver in order to program them to HW.


**Nitpicks**

The API defines everything inside the segments, including flags and values that 
apply to the entire PWL, such as DRM_MODE_LUT_GAMMA, 
DRM_MODE_LUT_REFLECT_NEGATIVE, input_bpc, and output_bpc. If these don't stay 
constant for segments it might complicate the interpretation of segments. I 
suggest we consider these as effectively applying to the entire PWL. We could 
encode them in an overall drm_color_lut struct that includes an array of 
drm_color_lut_range but that's probably not necessary, hence why I called this 
out as a nitpick. I would just like us to be aware of this ambiguity and 
document that these values applies to the entire PWL.


**How to read the PWL**

Let me first give a summary for how this LUT is used in userspace. If you're 
familiar with this please review and comment if I got things wrong. As I 
mentioned, a lot of this is underspecified at the moment so you're reading my 
interpretation.

You can see this behavior in plane_degamma_test [2] in the kms_color.c IGT test 
suite. I suggest the plane_degamma_test here here instead of the 
test_pipe_gamma test as the latter still has Intelisms (assumptions around 
Intel driver/HW behavior) and will not work for other drivers.

Iterate over all enums in PLANE_DEGAMMA_MODE and find a suitable one. How do we 
find the suitable one? More on that below.

Once we have the right PLANE_DEGAMMA_MODE we read the blob for the blob ID 
associated with the PLANE_DEGAMMA_MODE enum. We interpret the blob as an array 
of drm_color_lut_range. See get_segment_data [3].

We can think of our LUT/PWL as f(x) = y. For a traditional equally spaced LUT 
with 1024 entries x would be 0, 1, 2, ..., 1023. For a PWL LUT we need to parse 
the segment data provided in drm_color_lut_range.

Let's look at the 2nd-last entry of the nonlinear_pwl definition for the AMD 
driver [4] (I've correct it here and dropped the DRM_MODE_LUT_REUSE_LAST but 
it's still incorrect in the link) and simplify it to 4 entries for sake of 
readability:

{
.flags = (DRM_MODE_LUT_GAMMA | DRM_MODE_LUT_REFLECT_NEGATIVE | 
DRM_MODE_LUT_INTERPOLATE | DRM_MODE_LUT_NON_DECREASING),
.count = 4,
.input_bpc = 13, .output_bpc = 18,
.start = 1 << 12, .end = 

Re: [PATCH] drm/amd/display: dml: move some variables to heap

2022-06-10 Thread Deucher, Alexander
[Public]

Acked-by: Alex Deucher 

From: amd-gfx  on behalf of Aurabindo 
Pillai 
Sent: Friday, June 10, 2022 1:26 PM
To: amd-gfx@lists.freedesktop.org ; Deucher, 
Alexander 
Cc: Siqueira, Rodrigo ; Wentland, Harry 

Subject: [PATCH] drm/amd/display: dml: move some variables to heap

[Why]
To reduce stack usage, move some variables into heap in the DML function
dml32_ModeSupportAndSystemConfigurationFull()

Fixes: d03037269bf2d ("drm/amd/display: DML changes for DCN32/321")
Signed-off-by: Aurabindo Pillai 
---
 .../display/dc/dml/dcn32/display_mode_vba_32.c  | 17 +++--
 .../drm/amd/display/dc/dml/display_mode_vba.h   |  3 +++
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
index 5828e60f291d..b9f5bfa67791 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
@@ -1675,9 +1675,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct 
display_mode_lib *mode_l
 bool FullFrameMALLPStateMethod;
 bool SubViewportMALLPStateMethod;
 bool PhantomPipeMALLPStateMethod;
-   double MaxTotalVActiveRDBandwidth;
-   double DSTYAfterScaler[DC__NUM_DPP__MAX];
-   double DSTXAfterScaler[DC__NUM_DPP__MAX];
 unsigned int MaximumMPCCombine;

 #ifdef __DML_VBA_DEBUG__
@@ -3095,10 +3092,10 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct 
display_mode_lib *mode_l
 }

 //Vertical Active BW support check
-   MaxTotalVActiveRDBandwidth = 0;
+   
v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaxTotalVActiveRDBandwidth
 = 0;

 for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
-   MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + 
mode_lib->vba.ReadBandwidthLuma[k]
+   
v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaxTotalVActiveRDBandwidth
 += mode_lib->vba.ReadBandwidthLuma[k]
 + mode_lib->vba.ReadBandwidthChroma[k];
 }

@@ -3115,7 +3112,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct 
display_mode_lib *mode_l
 * mode_lib->vba.DRAMChannelWidth
 * (i < 2 ? 
mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperationSTROBE
 : 
mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation)
 / 100);

-   if (MaxTotalVActiveRDBandwidth
+   if 
(v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaxTotalVActiveRDBandwidth
 <= 
mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
 
mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][j] = true;
 } else {
@@ -3281,8 +3278,8 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct 
display_mode_lib *mode_l
 
mode_lib->vba.SwathHeightCThisState[k], mode_lib->vba.TWait,

 /* Output */
-   [k],
-   [k],
+   
>dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTXAfterScaler[k],
+   
>dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTYAfterScaler[k],
 
_lib->vba.LineTimesForPrefetch[k],
 
_lib->vba.PrefetchBW[k],
 
_lib->vba.LinesForMetaPTE[k],
@@ -3579,8 +3576,8 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct 
display_mode_lib *mode_l
 mode_lib->vba.NoOfDPPThisState,
 
mode_lib->vba.BytePerPixelInDETY,
 
mode_lib->vba.BytePerPixelInDETC,
-   DSTXAfterScaler,
-   DSTYAfterScaler,
+   
v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTXAfterScaler,
+   
v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTYAfterScaler,
 mode_lib->vba.WritebackEnable,
 
mode_lib->vba.WritebackPixelFormat,
 
mode_lib->vba.WritebackDestinationWidth,
diff --git 

[PATCH] drm/amd/display: dml: move some variables to heap

2022-06-10 Thread Aurabindo Pillai
[Why]
To reduce stack usage, move some variables into heap in the DML function
dml32_ModeSupportAndSystemConfigurationFull()

Fixes: d03037269bf2d ("drm/amd/display: DML changes for DCN32/321")
Signed-off-by: Aurabindo Pillai 
---
 .../display/dc/dml/dcn32/display_mode_vba_32.c  | 17 +++--
 .../drm/amd/display/dc/dml/display_mode_vba.h   |  3 +++
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
index 5828e60f291d..b9f5bfa67791 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
@@ -1675,9 +1675,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct 
display_mode_lib *mode_l
bool FullFrameMALLPStateMethod;
bool SubViewportMALLPStateMethod;
bool PhantomPipeMALLPStateMethod;
-   double MaxTotalVActiveRDBandwidth;
-   double DSTYAfterScaler[DC__NUM_DPP__MAX];
-   double DSTXAfterScaler[DC__NUM_DPP__MAX];
unsigned int MaximumMPCCombine;
 
 #ifdef __DML_VBA_DEBUG__
@@ -3095,10 +3092,10 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct 
display_mode_lib *mode_l
}
 
//Vertical Active BW support check
-   MaxTotalVActiveRDBandwidth = 0;
+   
v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaxTotalVActiveRDBandwidth
 = 0;
 
for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
-   MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + 
mode_lib->vba.ReadBandwidthLuma[k]
+   
v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaxTotalVActiveRDBandwidth
 += mode_lib->vba.ReadBandwidthLuma[k]
+ mode_lib->vba.ReadBandwidthChroma[k];
}
 
@@ -3115,7 +3112,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct 
display_mode_lib *mode_l
* mode_lib->vba.DRAMChannelWidth
* (i < 2 ? 
mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperationSTROBE
 : 
mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation)
 / 100);
 
-   if (MaxTotalVActiveRDBandwidth
+   if 
(v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaxTotalVActiveRDBandwidth
<= 
mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][j]) {

mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][j] = true;
} else {
@@ -3281,8 +3278,8 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct 
display_mode_lib *mode_l

mode_lib->vba.SwathHeightCThisState[k], mode_lib->vba.TWait,
 
/* Output */
-   [k],
-   [k],
+   
>dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTXAfterScaler[k],
+   
>dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTYAfterScaler[k],

_lib->vba.LineTimesForPrefetch[k],

_lib->vba.PrefetchBW[k],

_lib->vba.LinesForMetaPTE[k],
@@ -3579,8 +3576,8 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct 
display_mode_lib *mode_l
mode_lib->vba.NoOfDPPThisState,

mode_lib->vba.BytePerPixelInDETY,

mode_lib->vba.BytePerPixelInDETC,
-   DSTXAfterScaler,
-   DSTYAfterScaler,
+   
v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTXAfterScaler,
+   
v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTYAfterScaler,
mode_lib->vba.WritebackEnable,

mode_lib->vba.WritebackPixelFormat,

mode_lib->vba.WritebackDestinationWidth,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h 
b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
index 43e327080552..9ad49ad38814 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
@@ -204,6 +204,9 @@ struct dml32_ModeSupportAndSystemConfigurationFull {

[PATCH v2 2/3] drm/amdkfd: Enable GFX11 usermode queue oversubscription

2022-06-10 Thread Graham Sider
Starting with GFX11, MES requires wptr BOs to be GTT allocated/mapped to
GART for usermode queues in order to support oversubscription. In the
case that work is submitted to an unmapped queue, MES must have a GART
wptr address to determine whether the queue should be mapped.

This change is accompanied with changes in MES and is applicable for
MES_VERSION >= 3.

v2: Update MES_VERSION check from 2 to 3.

Signed-off-by: Graham Sider 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  1 +
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 39 
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 45 ++-
 .../drm/amd/amdkfd/kfd_device_queue_manager.c |  9 +++-
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  |  2 +
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  3 ++
 .../amd/amdkfd/kfd_process_queue_manager.c| 19 +---
 7 files changed, 110 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 429b16ba10bf..dba26d1e3be9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -301,6 +301,7 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct 
amdgpu_device *adev,
struct kgd_mem *mem, void **kptr, uint64_t *size);
 void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct amdgpu_device *adev,
struct kgd_mem *mem);
+int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct 
amdgpu_bo *bo);
 
 int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
struct dma_fence **ef);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index efab923056f4..2d452655eb04 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2030,6 +2030,45 @@ int amdgpu_amdkfd_gpuvm_sync_memory(
return ret;
 }
 
+int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct 
amdgpu_bo *bo)
+{
+   int ret;
+
+   ret = amdgpu_bo_reserve(bo, true);
+   if (ret) {
+   pr_err("Failed to reserve bo. ret %d\n", ret);
+   goto err_reserve_bo_failed;
+   }
+
+   ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+   if (ret) {
+   pr_err("Failed to pin bo. ret %d\n", ret);
+   goto err_pin_bo_failed;
+   }
+
+   ret = amdgpu_ttm_alloc_gart(>tbo);
+   if (ret) {
+   pr_err("Failed to bind bo to GART. ret %d\n", ret);
+   goto err_map_bo_gart_failed;
+   }
+
+   amdgpu_amdkfd_remove_eviction_fence(
+   bo, bo->kfd_bo->process_info->eviction_fence);
+   list_del_init(>kfd_bo->validate_list.head);
+
+   amdgpu_bo_unreserve(bo);
+
+   return 0;
+
+err_map_bo_gart_failed:
+   amdgpu_bo_unpin(bo);
+err_pin_bo_failed:
+   amdgpu_bo_unreserve(bo);
+err_reserve_bo_failed:
+
+   return ret;
+}
+
 int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev,
struct kgd_mem *mem, void **kptr, uint64_t *size)
 {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index e9766e165c38..58d5ebed1b32 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -290,6 +290,11 @@ static int kfd_ioctl_create_queue(struct file *filep, 
struct kfd_process *p,
struct queue_properties q_properties;
uint32_t doorbell_offset_in_process = 0;
 
+   struct amdgpu_bo_va_mapping *wptr_mapping;
+   struct interval_tree_node *wptr_node;
+   struct amdgpu_vm *wptr_vm;
+   struct amdgpu_bo *wptr_bo = NULL;
+
memset(_properties, 0, sizeof(struct queue_properties));
 
pr_debug("Creating queue ioctl\n");
@@ -316,12 +321,44 @@ static int kfd_ioctl_create_queue(struct file *filep, 
struct kfd_process *p,
goto err_bind_process;
}
 
+   /* Starting with GFX11, MES requires wptr BOs to be GTT 
allocated/mapped to
+* GART for usermode queues in order to support oversubscription. In the
+* case that work is submitted to an unmapped queue, MES must have a 
GART
+* wptr address to determine whether the queue should be mapped.
+*/
+   if (dev->shared_resources.enable_mes && (dev->adev->mes.sched_version & 
0xff) >= 3) {
+   wptr_vm = drm_priv_to_vm(pdd->drm_priv);
+   err = amdgpu_bo_reserve(wptr_vm->root.bo, false);
+   if (err)
+   goto err_wptr_bo_reserve;
+
+   wptr_node = interval_tree_iter_first(_vm->va,
+   args->write_pointer_address >> PAGE_SHIFT,
+   args->write_pointer_address >> PAGE_SHIFT);
+   if (!wptr_node) {
+   pr_err("Failed to lookup wptr bo\n");
+   

[PATCH v2 1/3] drm/amdgpu: Fetch MES scheduler/KIQ versions

2022-06-10 Thread Graham Sider
Store MES scheduler and MES KIQ version numbers in amdgpu_mes for GFX11.

Signed-off-by: Graham Sider 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h |  3 +++
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c  | 12 
 2 files changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 92ddee5e33db..aa06c8396ee0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -64,6 +64,9 @@ struct amdgpu_mes {
 
spinlock_t  queue_id_lock;
 
+   uint32_tsched_version;
+   uint32_tkiq_version;
+
uint32_ttotal_max_queue;
uint32_tdoorbell_id_offset;
uint32_tmax_doorbell_slices;
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index e4eb87689f7f..2a9ef308e71c 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -890,6 +890,18 @@ static int mes_v11_0_queue_init(struct amdgpu_device *adev,
mes_v11_0_queue_init_register(ring);
}
 
+   /* get MES scheduler/KIQ versions */
+   mutex_lock(>srbm_mutex);
+   soc21_grbm_select(adev, 3, pipe, 0, 0);
+
+   if (pipe == AMDGPU_MES_SCHED_PIPE)
+   adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
+   else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq)
+   adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
+
+   soc21_grbm_select(adev, 0, 0, 0, 0);
+   mutex_unlock(>srbm_mutex);
+
return 0;
 }
 
-- 
2.25.1



[PATCH v2 3/3] drm/amdgpu: Update mes_v11_api_def.h

2022-06-10 Thread Graham Sider
Update MES API to support oversubscription without aggregated doorbell
for usermode queues.

Signed-off-by: Graham Sider 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c   | 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h   | 1 +
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c| 3 +++
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 ++
 drivers/gpu/drm/amd/include/mes_v11_api_def.h | 4 +++-
 5 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 2e86baa32c55..3d9a81a8fa1c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -681,6 +681,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int 
gang_id,
queue_input.wptr_addr = qprops->wptr_gpu_addr;
queue_input.queue_type = qprops->queue_type;
queue_input.paging = qprops->paging;
+   queue_input.oversubscription_no_aggregated_en = 0;
 
r = adev->mes.funcs->add_hw_queue(>mes, _input);
if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index aa06c8396ee0..26765a9946a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -207,6 +207,7 @@ struct mes_add_queue_input {
uint32_tdebug_vmid;
uint64_ttba_addr;
uint64_ttma_addr;
+   uint64_toversubscription_no_aggregated_en;
 };
 
 struct mes_remove_queue_input {
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 2a9ef308e71c..95a1394d3943 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -163,6 +163,8 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
mes_add_queue_pkt.gws_size = input->gws_size;
mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
mes_add_queue_pkt.tma_addr = input->tma_addr;
+   mes_add_queue_pkt.oversubscription_no_aggregated_en =
+   input->oversubscription_no_aggregated_en;
 
mes_add_queue_pkt.api_status.api_completion_fence_addr =
mes->ring.fence_drv.gpu_addr;
@@ -341,6 +343,7 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes 
*mes)
mes_set_hw_res_pkt.disable_reset = 1;
mes_set_hw_res_pkt.disable_mes_log = 1;
mes_set_hw_res_pkt.use_different_vmid_compute = 1;
+   mes_set_hw_res_pkt.oversubscription_timer = 50;
 
mes_set_hw_res_pkt.api_status.api_completion_fence_addr =
mes->ring.fence_drv.gpu_addr;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index d8de2fbdfc7d..762bc6059387 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -235,6 +235,8 @@ static int add_queue_mes(struct device_queue_manager *dqm, 
struct queue *q,
} else
queue_input.wptr_addr = (uint64_t)q->properties.write_ptr;
 
+   queue_input.oversubscription_no_aggregated_en = 1;
+
queue_input.paging = false;
queue_input.tba_addr = qpd->tba_addr;
queue_input.tma_addr = qpd->tma_addr;
diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h 
b/drivers/gpu/drm/amd/include/mes_v11_api_def.h
index f9d02d7bdf77..95f0246eb045 100644
--- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h
+++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h
@@ -226,6 +226,7 @@ union MESAPI_SET_HW_RESOURCES {
};
uint32_tuint32_t_all;
};
+   uint32_toversubscription_timer;
};
 
uint32_tmax_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
@@ -265,7 +266,8 @@ union MESAPI__ADD_QUEUE {
uint32_t is_gang_suspended  : 1;
uint32_t is_tmz_queue   : 1;
uint32_t map_kiq_utility_queue  : 1;
-   uint32_t reserved   : 23;
+   uint32_t oversubscription_no_aggregated_en : 1;
+   uint32_t reserved   : 22;
};
struct MES_API_STATUS   api_status;
uint64_ttma_addr;
-- 
2.25.1



Re: [PATCH 2/2] drm/amd/pm: enable MACO support for SMU 13.0.0

2022-06-10 Thread Alex Deucher
On Fri, Jun 10, 2022 at 2:26 AM Evan Quan  wrote:
>
> Enable BAMACO reset support for SMU 13.0.0.
>
> Signed-off-by: Evan Quan 
> Change-Id: I67696671c216790a0b6d5b84f7d4b430d734ac3c
> --
> v1->v2:
>   - maximum code sharing around smu13.0.0 and smu13.0.7
> ---
>  .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c|  3 +-
>  .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  |  3 ++
>  .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c  | 52 +--
>  3 files changed, 7 insertions(+), 51 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c 
> b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
> index fba0b87d01fb..f18f9605e586 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
> @@ -2271,7 +2271,8 @@ int smu_v13_0_baco_set_state(struct smu_context *smu,
> if (state == SMU_BACO_STATE_ENTER) {
> ret = smu_cmn_send_smc_msg_with_param(smu,
>   SMU_MSG_EnterBaco,
> - 0,
> + smu_baco->maco_support ?
> + BACO_SEQ_BAMACO : 
> BACO_SEQ_BACO,
>   NULL);
> } else {
> ret = smu_cmn_send_smc_msg(smu,
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c 
> b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> index 495713e4ebd4..6fb2b072a730 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> @@ -325,6 +325,9 @@ static int smu_v13_0_0_check_powerplay_table(struct 
> smu_context *smu)
> powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_MACO)
> smu_baco->platform_support = true;

Not really related to this patch, but is
SMU_13_0_0_PP_PLATFORM_CAP_MACO really used to determine with the
platform supports BACO in general?  Is this right?  I don't see how we
would ever end up using plain BACO.

This patch is:
Reviewed-by: Alex Deucher 


>
> +   if (powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_MACO)
> +   smu_baco->maco_support = true;
> +
> table_context->thermal_controller_type =
> powerplay_table->thermal_controller_type;
>
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c 
> b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
> index 693bb6bda350..96ae5ff8e19e 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
> @@ -1551,54 +1551,6 @@ static int smu_v13_0_7_set_power_profile_mode(struct 
> smu_context *smu, long *inp
> return ret;
>  }
>
> -static int smu_v13_0_7_baco_set_state(struct smu_context *smu,
> -enum smu_baco_state state)
> -{
> -   struct smu_baco_context *smu_baco = >smu_baco;
> -   struct amdgpu_device *adev = smu->adev;
> -   bool is_maco_support = smu_baco->maco_support;
> -   int ret;
> -
> -   if (smu_v13_0_baco_get_state(smu) == state)
> -   return 0;
> -
> -   if (state == SMU_BACO_STATE_ENTER) {
> -   ret = smu_cmn_send_smc_msg_with_param(smu,
> - SMU_MSG_EnterBaco,
> - (is_maco_support ? 2 : 
> 0),
> - NULL);
> -   } else {
> -   ret = smu_cmn_send_smc_msg(smu,
> -  SMU_MSG_ExitBaco,
> -  NULL);
> -   if (ret)
> -   return ret;
> -
> -   /* clear vbios scratch 6 and 7 for coming asic reinit */
> -   WREG32(adev->bios_scratch_reg_offset + 6, 0);
> -   WREG32(adev->bios_scratch_reg_offset + 7, 0);
> -   }
> -
> -   if (!ret)
> -   smu_baco->state = state;
> -
> -   return ret;
> -}
> -
> -static int smu_v13_0_7_baco_enter(struct smu_context *smu)
> -{
> -   int ret = 0;
> -
> -   ret = smu_v13_0_7_baco_set_state(smu,
> -  SMU_BACO_STATE_ENTER);
> -   if (ret)
> -   return ret;
> -
> -   msleep(10);
> -
> -   return ret;
> -}
> -
>  static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
> .get_allowed_feature_mask = smu_v13_0_7_get_allowed_feature_mask,
> .set_default_dpm_table = smu_v13_0_7_set_default_dpm_table,
> @@ -1653,8 +1605,8 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs 
> = {
> .set_pp_feature_mask = smu_cmn_set_pp_feature_mask,
> .baco_is_support = smu_v13_0_baco_is_support,
> .baco_get_state = smu_v13_0_baco_get_state,
> -   .baco_set_state = smu_v13_0_7_baco_set_state,
> -  

Re: [PATCH 1/2] drm/amd/pm: enable UCLK DS feature for SMU 13.0.0

2022-06-10 Thread Alex Deucher
On Fri, Jun 10, 2022 at 2:26 AM Evan Quan  wrote:
>
> The feature is ready with latest PMFW and IFWI.
>
> Signed-off-by: Evan Quan 

Reviewed-by: Alex Deucher 

> Change-Id: I0b1ea6a32bc092eec4b95118a1442597688ee8d0
> ---
>  drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c 
> b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> index fda89e309b07..495713e4ebd4 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> @@ -306,6 +306,8 @@ smu_v13_0_0_get_allowed_feature_mask(struct smu_context 
> *smu,
>
> *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_FW_CTF_BIT);
>
> +   *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DS_UCLK_BIT);
> +
> return 0;
>  }
>
> --
> 2.29.0
>


Re: [PATCH] drm/amdgpu: Fix GTT size reporting in amdgpu_ioctl

2022-06-10 Thread Alex Deucher
Applied.  Thanks!

Alex

On Fri, Jun 10, 2022 at 10:01 AM Michel Dänzer  wrote:
>
> From: Michel Dänzer 
>
> The commit below changed the TTM manager size unit from pages to
> bytes, but failed to adjust the corresponding calculations in
> amdgpu_ioctl.
>
> Fixes: dfa714b88eb0 ("drm/amdgpu: remove GTT accounting v2")
> Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1930
> Bug: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6642
> Signed-off-by: Michel Dänzer 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 --
>  1 file changed, 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index 801f6fa692e9..6de63ea6687e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -642,7 +642,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, 
> struct drm_file *filp)
> atomic64_read(>visible_pin_size),
> vram_gtt.vram_size);
> vram_gtt.gtt_size = ttm_manager_type(>mman.bdev, 
> TTM_PL_TT)->size;
> -   vram_gtt.gtt_size *= PAGE_SIZE;
> vram_gtt.gtt_size -= atomic64_read(>gart_pin_size);
> return copy_to_user(out, _gtt,
> min((size_t)size, sizeof(vram_gtt))) ? 
> -EFAULT : 0;
> @@ -675,7 +674,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, 
> struct drm_file *filp)
> mem.cpu_accessible_vram.usable_heap_size * 3 / 4;
>
> mem.gtt.total_heap_size = gtt_man->size;
> -   mem.gtt.total_heap_size *= PAGE_SIZE;
> mem.gtt.usable_heap_size = mem.gtt.total_heap_size -
> atomic64_read(>gart_pin_size);
> mem.gtt.heap_usage = ttm_resource_manager_usage(gtt_man);
> --
> 2.36.1
>


[PATCH v2] drm/amdkfd: fix warning when CONFIG_HSA_AMD_P2P is not set

2022-06-10 Thread Alex Deucher
drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_topology.c:1542:11:
warning: variable 'i' set but not used [-Wunused-but-set-variable]

Reported-by: kernel test robot 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 3e240b22ec91..411447357196 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1539,7 +1539,10 @@ static int kfd_dev_create_p2p_links(void)
 {
struct kfd_topology_device *dev;
struct kfd_topology_device *new_dev;
-   uint32_t i, k;
+#if defined(CONFIG_HSA_AMD_P2P)
+   uint32_t i;
+#endif
+   uint32_t k;
int ret = 0;
 
k = 0;
@@ -1553,7 +1556,6 @@ static int kfd_dev_create_p2p_links(void)
return 0;
 
k--;
-   i = 0;
 
/* create in-direct links */
ret = kfd_create_indirect_link_prop(new_dev, k);
@@ -1562,6 +1564,7 @@ static int kfd_dev_create_p2p_links(void)
 
/* create p2p links */
 #if defined(CONFIG_HSA_AMD_P2P)
+   i = 0;
list_for_each_entry(dev, _device_list, list) {
if (dev == new_dev)
break;
-- 
2.35.3



Re: [PATCH] drm/amdkfd: fix warning when CONFIG_HSA_AMD_P2P is not set

2022-06-10 Thread Alex Deucher
On Fri, Jun 10, 2022 at 11:43 AM Alex Deucher  wrote:
>
> drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_topology.c:1542:11:
> warning: variable 'i' set but not used [-Wunused-but-set-variable]
>
> Reported-by: kernel test robot 
> Signed-off-by: Alex Deucher 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c   | 4 
>  drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 7 +--
>  2 files changed, 9 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> index aebc384531ac..88a0e3ac0f33 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> @@ -773,6 +773,10 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
>   */
>  uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr)
>  {
> +   dev_info(adev->dev, "amdgpu_gmc_vram_mc2pa: 0x%0llx, 0x%0llx, 
> 0x%0llx\n",
> +mc_addr,
> +adev->gmc.vram_start,
> +adev->vm_manager.vram_base_offset);
> return mc_addr - adev->gmc.vram_start + 
> adev->vm_manager.vram_base_offset;
>  }

Ignore this hunk.  Didn't mean to commit this.

Alex

>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> index 3e240b22ec91..411447357196 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> @@ -1539,7 +1539,10 @@ static int kfd_dev_create_p2p_links(void)
>  {
> struct kfd_topology_device *dev;
> struct kfd_topology_device *new_dev;
> -   uint32_t i, k;
> +#if defined(CONFIG_HSA_AMD_P2P)
> +   uint32_t i;
> +#endif
> +   uint32_t k;
> int ret = 0;
>
> k = 0;
> @@ -1553,7 +1556,6 @@ static int kfd_dev_create_p2p_links(void)
> return 0;
>
> k--;
> -   i = 0;
>
> /* create in-direct links */
> ret = kfd_create_indirect_link_prop(new_dev, k);
> @@ -1562,6 +1564,7 @@ static int kfd_dev_create_p2p_links(void)
>
> /* create p2p links */
>  #if defined(CONFIG_HSA_AMD_P2P)
> +   i = 0;
> list_for_each_entry(dev, _device_list, list) {
> if (dev == new_dev)
> break;
> --
> 2.35.3
>


[PATCH] drm/amdgpu/display: make FP handling in Makefiles consistent

2022-06-10 Thread Alex Deucher
Use the same pattern as the DML Makefile and while we are here
add a missing x86 guard around the msse flags for DCN3.2.x.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile | 3 +--
 drivers/gpu/drm/amd/display/dc/dcn201/Makefile  | 1 -
 drivers/gpu/drm/amd/display/dc/dcn30/Makefile   | 6 ++
 drivers/gpu/drm/amd/display/dc/dcn302/Makefile  | 8 +++-
 drivers/gpu/drm/amd/display/dc/dcn32/Makefile   | 5 +++--
 drivers/gpu/drm/amd/display/dc/dcn321/Makefile  | 5 +++--
 6 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile 
b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
index c935c10b5f4f..7b505e1e9308 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
@@ -180,7 +180,7 @@ CLK_MGR_DCN32 = dcn32_clk_mgr.o dcn32_clk_mgr_smu_msg.o
 AMD_DAL_CLK_MGR_DCN32 = $(addprefix 
$(AMDDALPATH)/dc/clk_mgr/dcn32/,$(CLK_MGR_DCN32))
 
 ifdef CONFIG_X86
-CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -msse
+CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -mhard-float -msse
 endif
 
 ifdef CONFIG_PPC64
@@ -191,7 +191,6 @@ ifdef CONFIG_CC_IS_GCC
 ifeq ($(call cc-ifversion, -lt, 0701, y), y)
 IS_OLD_GCC = 1
 endif
-CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -mhard-float
 endif
 
 ifdef CONFIG_X86
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/Makefile 
b/drivers/gpu/drm/amd/display/dc/dcn201/Makefile
index f68038ceb1b1..96cbd4ccd344 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn201/Makefile
@@ -18,7 +18,6 @@ ifdef CONFIG_CC_IS_GCC
 ifeq ($(call cc-ifversion, -lt, 0701, y), y)
 IS_OLD_GCC = 1
 endif
-CFLAGS_$(AMDDALPATH)/dc/dcn201/dcn201_resource.o += -mhard-float
 endif
 
 ifdef CONFIG_X86
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile 
b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
index dfd77b3cc84d..c20331eb62e0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
@@ -32,8 +32,8 @@ DCN30 = dcn30_init.o dcn30_hubbub.o dcn30_hubp.o dcn30_dpp.o 
dcn30_optc.o \
 
 
 ifdef CONFIG_X86
-CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -msse
-CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -msse
+CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -mhard-float -msse
+CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mhard-float -msse
 endif
 
 ifdef CONFIG_PPC64
@@ -45,8 +45,6 @@ ifdef CONFIG_CC_IS_GCC
 ifeq ($(call cc-ifversion, -lt, 0701, y), y)
 IS_OLD_GCC = 1
 endif
-CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o += -mhard-float
-CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o += -mhard-float
 endif
 
 ifdef CONFIG_X86
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile 
b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile
index f9561d7f97a1..e4b69ad0dde5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile
@@ -8,7 +8,7 @@
 DCN3_02 = dcn302_init.o dcn302_hwseq.o dcn302_resource.o
 
 ifdef CONFIG_X86
-CFLAGS_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o := -msse
+CFLAGS_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o := -mhard-float -msse
 endif
 
 ifdef CONFIG_PPC64
@@ -16,6 +16,12 @@ CFLAGS_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o := 
-mhard-float -maltivec
 endif
 
 ifdef CONFIG_X86
+ifdef CONFIG_CC_IS_GCC
+ifeq ($(call cc-ifversion, -lt, 0701, y), y)
+IS_OLD_GCC = 1
+endif
+endif
+
 ifdef IS_OLD_GCC
 # Stack alignment mismatch, proceed with caution.
 # GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/Makefile 
b/drivers/gpu/drm/amd/display/dc/dcn32/Makefile
index 3d09db3070f4..34f2e37b6704 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/Makefile
@@ -16,7 +16,7 @@ DCN32 = dcn32_resource.o dcn32_hubbub.o dcn32_hwseq.o 
dcn32_init.o \
dcn32_mpc.o
 
 ifdef CONFIG_X86
-CFLAGS_$(AMDDALPATH)/dc/dcn32/dcn32_resource.o := -msse
+CFLAGS_$(AMDDALPATH)/dc/dcn32/dcn32_resource.o := -mhard-float -msse
 endif
 
 ifdef CONFIG_PPC64
@@ -27,9 +27,9 @@ ifdef CONFIG_CC_IS_GCC
 ifeq ($(call cc-ifversion, -lt, 0701, y), y)
 IS_OLD_GCC = 1
 endif
-CFLAGS_$(AMDDALPATH)/dc/dcn32/dcn32_resource.o += -mhard-float
 endif
 
+ifdef CONFIG_X86
 ifdef IS_OLD_GCC
 # Stack alignment mismatch, proceed with caution.
 # GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
@@ -38,6 +38,7 @@ CFLAGS_$(AMDDALPATH)/dc/dcn32/dcn32_resource.o += 
-mpreferred-stack-boundary=4
 else
 CFLAGS_$(AMDDALPATH)/dc/dcn32/dcn32_resource.o += -msse2
 endif
+endif
 
 AMD_DAL_DCN32 = $(addprefix $(AMDDALPATH)/dc/dcn32/,$(DCN32))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/Makefile 
b/drivers/gpu/drm/amd/display/dc/dcn321/Makefile
index 5896ca303e39..e554fd6c16f2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn321/Makefile
+++ 

[PATCH] drm/amdkfd: fix warning when CONFIG_HSA_AMD_P2P is not set

2022-06-10 Thread Alex Deucher
drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_topology.c:1542:11:
warning: variable 'i' set but not used [-Wunused-but-set-variable]

Reported-by: kernel test robot 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c   | 4 
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 7 +--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index aebc384531ac..88a0e3ac0f33 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -773,6 +773,10 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
  */
 uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr)
 {
+   dev_info(adev->dev, "amdgpu_gmc_vram_mc2pa: 0x%0llx, 0x%0llx, 
0x%0llx\n",
+mc_addr,
+adev->gmc.vram_start,
+adev->vm_manager.vram_base_offset);
return mc_addr - adev->gmc.vram_start + 
adev->vm_manager.vram_base_offset;
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 3e240b22ec91..411447357196 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1539,7 +1539,10 @@ static int kfd_dev_create_p2p_links(void)
 {
struct kfd_topology_device *dev;
struct kfd_topology_device *new_dev;
-   uint32_t i, k;
+#if defined(CONFIG_HSA_AMD_P2P)
+   uint32_t i;
+#endif
+   uint32_t k;
int ret = 0;
 
k = 0;
@@ -1553,7 +1556,6 @@ static int kfd_dev_create_p2p_links(void)
return 0;
 
k--;
-   i = 0;
 
/* create in-direct links */
ret = kfd_create_indirect_link_prop(new_dev, k);
@@ -1562,6 +1564,7 @@ static int kfd_dev_create_p2p_links(void)
 
/* create p2p links */
 #if defined(CONFIG_HSA_AMD_P2P)
+   i = 0;
list_for_each_entry(dev, _device_list, list) {
if (dev == new_dev)
break;
-- 
2.35.3



Re: [PATCH v6 00/22] Add generic memory shrinker to VirtIO-GPU and Panfrost DRM drivers

2022-06-10 Thread Dmitry Osipenko
On 5/27/22 02:50, Dmitry Osipenko wrote:
> Hello,
> 
> This patchset introduces memory shrinker for the VirtIO-GPU DRM driver
> and adds memory purging and eviction support to VirtIO-GPU driver.
> 
> The new dma-buf locking convention is introduced here as well.
> 
> During OOM, the shrinker will release BOs that are marked as "not needed"
> by userspace using the new madvise IOCTL, it will also evict idling BOs
> to SWAP. The userspace in this case is the Mesa VirGL driver, it will mark
> the cached BOs as "not needed", allowing kernel driver to release memory
> of the cached shmem BOs on lowmem situations, preventing OOM kills.
> 
> The Panfrost driver is switched to use generic memory shrinker.
> 
> This patchset includes improvements and fixes for various things that
> I found while was working on the shrinker.
> 
> The Mesa and IGT patches will be kept on hold until this kernel series
> will be approved and merged.
> 
> This patchset was tested using Qemu and crosvm, including both cases of
> IOMMU off/on.
> 
> Mesa: https://gitlab.freedesktop.org/digetx/mesa/-/commits/virgl-madvise
> IGT:  
> https://gitlab.freedesktop.org/digetx/igt-gpu-tools/-/commits/virtio-madvise
>   
> https://gitlab.freedesktop.org/digetx/igt-gpu-tools/-/commits/panfrost-madvise
> 
> Changelog:
> 
> v6: - Added new VirtIO-related fix patch that previously was sent separately
>   and didn't get much attention:
> 
> drm/gem: Properly annotate WW context on drm_gem_lock_reservations() 
> error
> 
> - Added new patch that fixes mapping of imported dma-bufs for
>   Tegra DRM and other affected drivers. It's also handy to have it
>   for switching to the new dma-buf locking convention scheme:
> 
> drm/gem: Move mapping of imported dma-bufs to drm_gem_mmap_obj()
> 
> - Added new patch that fixes shrinker list corruption for stable Panfrost
>   driver:
> 
> drm/panfrost: Fix shrinker list corruption by madvise IOCTL
> 
> - Added new minor patch-fix for drm-shmem:
> 
> drm/shmem-helper: Add missing vunmap on error
> 
> - Added fixes tag to the "Put mapping ..." patch like was suggested by
>   Steven Price.
> 
> - Added new VirtIO-GPU driver improvement patch:
> 
> drm/virtio: Return proper error codes instead of -1
> 
> - Reworked shrinker patches like was suggested by Daniel Vetter:
> 
> - Introduced the new locking convention for dma-bufs. Tested on
>   VirtIO-GPU, Panfrost, Lima, Tegra and Intel selftests.
> 
> - Dropped separate purge() callback. Now single evict() does
>   everything.
> 
> - Dropped swap_in() callback from drm-shmem objects. DRM drivers
>   now could and should restore only the required mappings.
> 
> - Dropped dynamic counting of evictable pages. This simplifies
>   code in exchange to *potentially* burning more CPU time on OOM.
> 
> v5: - Added new for-stable patch "drm/panfrost: Put mapping instead of
>   shmem obj on panfrost_mmu_map_fault_addr() error" that corrects GEM's
>   refcounting in case of error.
> 
> - The drm_gem_shmem_v[un]map() now takes a separate vmap_lock for
>   imported GEMs to avoid recursive locking of DMA reservations.
>   This addresses v4 comment from Thomas Zimmermann about the potential
>   deadlocking of vmapping.
> 
> - Added ack from Thomas Zimmermann to "drm/shmem-helper: Correct
>   doc-comment of drm_gem_shmem_get_sg_table()" patch.
> 
> - Dropped explicit shmem states from the generic shrinker patch as
>   was requested by Thomas Zimmermann.
> 
> - Improved variable names and comments of the generic shrinker code.
> 
> - Extended drm_gem_shmem_print_info() with the shrinker-state info in
>   the "drm/virtio: Support memory shrinking" patch.
> 
> - Moved evict()/swap_in()/purge() callbacks from drm_gem_object_funcs
>   to drm_gem_shmem_object in the generic shrinker patch, for more
>   consistency.
> 
> - Corrected bisectability of the patches that was broken in v4
>   by accident.
> 
> - The virtio_gpu_plane_prepare_fb() now uses drm_gem_shmem_pin() instead
>   of drm_gem_shmem_set_unpurgeable_and_unevictable() and does it only for
>   shmem BOs in the "drm/virtio: Support memory shrinking" patch.
> 
> - Made more functions private to drm_gem_shmem_helper.c as was requested
>   by Thomas Zimmermann. This minimizes number of the public shmem helpers.
> 
> v4: - Corrected minor W=1 warnings reported by kernel test robot for v3.
> 
> - Renamed DRM_GEM_SHMEM_PAGES_STATE_ACTIVE/INACTIVE to PINNED/UNPINNED,
>   for more clarity.
> 
> v3: - Hardened shrinker's count() with usage of READ_ONCE() since we don't
>   use atomic type for counting and technically compiler is free to
>   re-fetch counter's variable.
> 
> - "Correct drm_gem_shmem_get_sg_table() error handling" now uses
>   PTR_ERR_OR_ZERO(), fixing typo that was 

[PATCH] drm/amdgpu: Fix GTT size reporting in amdgpu_ioctl

2022-06-10 Thread Michel Dänzer
From: Michel Dänzer 

The commit below changed the TTM manager size unit from pages to
bytes, but failed to adjust the corresponding calculations in
amdgpu_ioctl.

Fixes: dfa714b88eb0 ("drm/amdgpu: remove GTT accounting v2")
Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1930
Bug: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6642
Signed-off-by: Michel Dänzer 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 801f6fa692e9..6de63ea6687e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -642,7 +642,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
atomic64_read(>visible_pin_size),
vram_gtt.vram_size);
vram_gtt.gtt_size = ttm_manager_type(>mman.bdev, 
TTM_PL_TT)->size;
-   vram_gtt.gtt_size *= PAGE_SIZE;
vram_gtt.gtt_size -= atomic64_read(>gart_pin_size);
return copy_to_user(out, _gtt,
min((size_t)size, sizeof(vram_gtt))) ? 
-EFAULT : 0;
@@ -675,7 +674,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
mem.cpu_accessible_vram.usable_heap_size * 3 / 4;
 
mem.gtt.total_heap_size = gtt_man->size;
-   mem.gtt.total_heap_size *= PAGE_SIZE;
mem.gtt.usable_heap_size = mem.gtt.total_heap_size -
atomic64_read(>gart_pin_size);
mem.gtt.heap_usage = ttm_resource_manager_usage(gtt_man);
-- 
2.36.1



Re: [PATCH] drm/amd/display: ignore modifiers when checking for format support

2022-06-10 Thread Pillai, Aurabindo
[AMD Official Use Only - General]

Thanks for noticing, will fix it in a separate patch since I already merged 
this.

--

Regards,
Jay

From: Chen, Guchun 
Sent: Thursday, June 9, 2022 9:28 PM
To: Pillai, Aurabindo ; Olsak, Marek 
; amd-gfx@lists.freedesktop.org 

Cc: Li, Sun peng (Leo) ; Siqueira, Rodrigo 
; Li, Roman ; Qiao, Ken 
; Pillai, Aurabindo ; Deucher, 
Alexander ; Wentland, Harry 
Subject: RE: [PATCH] drm/amd/display: ignore modifiers when checking for format 
support

+   return true;
+   break;

Possibly a coding style problem, 'break' after 'return' looks redundant.

Regards,
Guchun

-Original Message-
From: amd-gfx  On Behalf Of Aurabindo 
Pillai
Sent: Thursday, June 9, 2022 10:27 PM
To: Olsak, Marek ; amd-gfx@lists.freedesktop.org
Cc: Li, Sun peng (Leo) ; Siqueira, Rodrigo 
; Li, Roman ; Qiao, Ken 
; Pillai, Aurabindo ; Deucher, 
Alexander ; Wentland, Harry 
Subject: [PATCH] drm/amd/display: ignore modifiers when checking for format 
support

[Why]
There are cases where swizzle modes are set but modifiers arent. For such a 
userspace, we need not check modifiers while checking compatibilty in the drm 
hook for checking plane format.

Ignore checking modifiers but check the DCN generation for the supported 
swizzle mode.

Signed-off-by: Aurabindo Pillai 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 51 +--
 1 file changed, 46 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 2023baf41b7e..1322df491736 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -4938,6 +4938,7 @@ static bool dm_plane_format_mod_supported(struct 
drm_plane *plane,  {
 struct amdgpu_device *adev = drm_to_adev(plane->dev);
 const struct drm_format_info *info = drm_format_info(format);
+   struct hw_asic_id asic_id = adev->dm.dc->ctx->asic_id;
 int i;

 enum dm_micro_swizzle microtile = modifier_gfx9_swizzle_mode(modifier) 
& 3; @@ -4955,13 +4956,53 @@ static bool dm_plane_format_mod_supported(struct 
drm_plane *plane,
 return true;
 }

-   /* Check that the modifier is on the list of the plane's supported 
modifiers. */
-   for (i = 0; i < plane->modifier_count; i++) {
-   if (modifier == plane->modifiers[i])
+   /* check if swizzle mode is supported by this version of DCN */
+   switch (asic_id.chip_family) {
+   case FAMILY_SI:
+   case FAMILY_CI:
+   case FAMILY_KV:
+   case FAMILY_CZ:
+   case FAMILY_VI:
+   /* AI and earlier asics does not have modifier support 
*/
+   return false;
+   break;
+   case FAMILY_AI:
+   case FAMILY_RV:
+   case FAMILY_NV:
+   case FAMILY_VGH:
+   case FAMILY_YELLOW_CARP:
+   case AMDGPU_FAMILY_GC_10_3_6:
+   case AMDGPU_FAMILY_GC_10_3_7:
+   switch (AMD_FMT_MOD_GET(TILE, modifier)) {
+   case AMD_FMT_MOD_TILE_GFX9_64K_R_X:
+   case AMD_FMT_MOD_TILE_GFX9_64K_D_X:
+   case AMD_FMT_MOD_TILE_GFX9_64K_S_X:
+   case AMD_FMT_MOD_TILE_GFX9_64K_D:
+   return true;
+   break;
+   default:
+   return false;
+   break;
+   }
+   break;
+   case AMDGPU_FAMILY_GC_11_0_0:
+   switch (AMD_FMT_MOD_GET(TILE, modifier)) {
+   case AMD_FMT_MOD_TILE_GFX11_256K_R_X:
+   case AMD_FMT_MOD_TILE_GFX9_64K_R_X:
+   case AMD_FMT_MOD_TILE_GFX9_64K_D_X:
+   case AMD_FMT_MOD_TILE_GFX9_64K_S_X:
+   case AMD_FMT_MOD_TILE_GFX9_64K_D:
+   return true;
+   break;
+   default:
+   return false;
+   break;
+   }
+   break;
+   default:
+   ASSERT(0); /* Unknown asic */
 break;
 }
-   if (i == plane->modifier_count)
-   return false;

 /*
  * For D swizzle the canonical modifier depends on the bpp, so check
--
2.36.1



Re: [PATCH 03/13] mm: shmem: provide oom badness for shmem files

2022-06-10 Thread Christian König

Am 10.06.22 um 13:44 schrieb Michal Hocko:

On Fri 10-06-22 12:58:53, Christian König wrote:
[SNIP]

I do realize this is a long term problem and there is a demand for some
solution at least. I am not sure how to deal with shared resources
myself. The best approximation I can come up with is to limit the scope
of the damage into a memcg context. One idea I was playing with (but
never convinced myself it is really a worth) is to allow a new mode of
the oom victim selection for the global oom event.

And just for the clarity. I have mentioned global oom event here but the
concept could be extended to per-memcg oom killer as well.


Then what exactly do you mean with "limiting the scope of the damage"? 
Cause that doesn't make sense without memcg.



It would be an opt in
and the victim would be selected from the biggest leaf memcg (or kill
the whole memcg if it has group_oom configured.

That would address at least some of the accounting issue because charges
are better tracked than per process memory consumption. It is a crude
and ugly hack and it doesn't solve the underlying problem as shared
resources are not guaranteed to be freed when processes die but maybe it
would be just slightly better than the existing scheme which is clearly
lacking behind existing userspace.

Well, what is so bad at the approach of giving each process holding a
reference to some shared memory it's equal amount of badness even when the
processes belong to different memory control groups?

I am not claiming this is wrong per se. It is just an approximation and
it can surely be wrong in some cases (e.g. in those workloads where the
share memory is mostly owned by one process while the shared content is
consumed by many).


Yeah, completely agree. Basically we can only do an educated guess.

Key point is that we should do the most educated guess we can and not 
just try to randomly kill something until we hit the right target. 
That's essentially what's happening today.



The primary question is whether it actually helps much or what kind of
scenarios it can help with and whether we can actually do better for
those.


Well, it does help massively with a standard Linux desktop and GPU 
workloads (e.g. games).


See what currently happens is that when games allocate for example 
textures the memory for that is not accounted against that game. Instead 
it's usually the display server (X or Wayland) which most of the shared 
resources accounts to because it needs to compose a desktop from it and 
usually also mmaps it for fallback CPU operations.


So what happens when a games over allocates texture resources is that 
your whole desktop restarts because the compositor is killed. This 
obviously also kills the game, but it would be much nice if we would be 
more selective here.


For hardware rendering DMA-buf and GPU drivers are used, but for the 
software fallback shmem files is what is used under the hood as far as I 
know. And the underlying problem is the same for both.



Also do not forget that shared file memory is not the only thing
to care about. What about the kernel memory used on behalf of processes?


Yeah, I'm aware of that as well. But at least inside the GPU drivers we 
try to keep that in a reasonable ratio.



Just consider the above mentioned memcg driven model. It doesn't really
require to chase specific files and do some arbitrary math to share the
responsibility. It has a clear accounting and responsibility model.


Ok, how does that work then?


It shares the same underlying problem that the oom killing is not
resource aware and therefore there is no guarantee that memory really
gets freed.  But it allows sane configurations where shared resources do
not cross memcg boundaries at least. With that in mind and oom_cgroup
semantic you can get at least some semi-sane guarantees. Is it
pefect? No, by any means. But I would expect it to be more predictable.

Maybe we can come up with a saner model, but just going with per file
stats sounds like a hard to predict and debug approach to me. OOM
killing is a very disruptive operation and having random tasks killed
just because they have mapped few pages from a shared resource sounds
like a terrible thing to debug and explain to users.


Well to be honest I think it's much saner than what we do today.

As I said you currently can get any Linux system down within seconds and 
that's basically a perfect deny of service attack.



If you really think that this would be a hard problem for upstreaming we
could as well keep the behavior for memcg as it is for now. We would just
need to adjust the paramters to oom_badness() a bit.

Say we ignore the memcg side of things for now. How does it help long
term? Special casing the global oom is not all that hard but any future
change would very likely be disruptive with some semantic implications
AFAICS.


What else can we do? I mean the desktop instability we are facing is 
really massive.


Regards,
Christian.


Re: [PATCH 03/13] mm: shmem: provide oom badness for shmem files

2022-06-10 Thread Christian König

Am 09.06.22 um 17:07 schrieb Michal Hocko:

On Thu 09-06-22 16:29:46, Christian König wrote:
[...]

Is that a show stopper? How should we address this?

This is a hard problem to deal with and I am not sure this simple
solution is really a good fit. Not only because of the memcg side of
things. I have my doubts that sparse files handling is ok as well.


Well I didn't claimed that this would be easy, we juts need to start 
somewhere.


Regarding the sparse file handling, how about using 
file->f_mapping->nrpages as badness for shmem files?


That should give us the real number of pages allocated through this 
shmem file and gracefully handles sparse files.



I do realize this is a long term problem and there is a demand for some
solution at least. I am not sure how to deal with shared resources
myself. The best approximation I can come up with is to limit the scope
of the damage into a memcg context. One idea I was playing with (but
never convinced myself it is really a worth) is to allow a new mode of
the oom victim selection for the global oom event. It would be an opt in
and the victim would be selected from the biggest leaf memcg (or kill
the whole memcg if it has group_oom configured.

That would address at least some of the accounting issue because charges
are better tracked than per process memory consumption. It is a crude
and ugly hack and it doesn't solve the underlying problem as shared
resources are not guaranteed to be freed when processes die but maybe it
would be just slightly better than the existing scheme which is clearly
lacking behind existing userspace.


Well, what is so bad at the approach of giving each process holding a 
reference to some shared memory it's equal amount of badness even when 
the processes belong to different memory control groups?


If you really think that this would be a hard problem for upstreaming we 
could as well keep the behavior for memcg as it is for now. We would 
just need to adjust the paramters to oom_badness() a bit.


Regards,
Christian.


[PATCH 1/2] drm/amd/pm: enable UCLK DS feature for SMU 13.0.0

2022-06-10 Thread Evan Quan
The feature is ready with latest PMFW and IFWI.

Signed-off-by: Evan Quan 
Change-Id: I0b1ea6a32bc092eec4b95118a1442597688ee8d0
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index fda89e309b07..495713e4ebd4 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -306,6 +306,8 @@ smu_v13_0_0_get_allowed_feature_mask(struct smu_context 
*smu,
 
*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_FW_CTF_BIT);
 
+   *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DS_UCLK_BIT);
+
return 0;
 }
 
-- 
2.29.0



[PATCH 2/2] drm/amd/pm: enable MACO support for SMU 13.0.0

2022-06-10 Thread Evan Quan
Enable BAMACO reset support for SMU 13.0.0.

Signed-off-by: Evan Quan 
Change-Id: I67696671c216790a0b6d5b84f7d4b430d734ac3c
--
v1->v2:
  - maximum code sharing around smu13.0.0 and smu13.0.7
---
 .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c|  3 +-
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  |  3 ++
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c  | 52 +--
 3 files changed, 7 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index fba0b87d01fb..f18f9605e586 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -2271,7 +2271,8 @@ int smu_v13_0_baco_set_state(struct smu_context *smu,
if (state == SMU_BACO_STATE_ENTER) {
ret = smu_cmn_send_smc_msg_with_param(smu,
  SMU_MSG_EnterBaco,
- 0,
+ smu_baco->maco_support ?
+ BACO_SEQ_BAMACO : 
BACO_SEQ_BACO,
  NULL);
} else {
ret = smu_cmn_send_smc_msg(smu,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 495713e4ebd4..6fb2b072a730 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -325,6 +325,9 @@ static int smu_v13_0_0_check_powerplay_table(struct 
smu_context *smu)
powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_MACO)
smu_baco->platform_support = true;
 
+   if (powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_MACO)
+   smu_baco->maco_support = true;
+
table_context->thermal_controller_type =
powerplay_table->thermal_controller_type;
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index 693bb6bda350..96ae5ff8e19e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -1551,54 +1551,6 @@ static int smu_v13_0_7_set_power_profile_mode(struct 
smu_context *smu, long *inp
return ret;
 }
 
-static int smu_v13_0_7_baco_set_state(struct smu_context *smu,
-enum smu_baco_state state)
-{
-   struct smu_baco_context *smu_baco = >smu_baco;
-   struct amdgpu_device *adev = smu->adev;
-   bool is_maco_support = smu_baco->maco_support;
-   int ret;
-
-   if (smu_v13_0_baco_get_state(smu) == state)
-   return 0;
-
-   if (state == SMU_BACO_STATE_ENTER) {
-   ret = smu_cmn_send_smc_msg_with_param(smu,
- SMU_MSG_EnterBaco,
- (is_maco_support ? 2 : 0),
- NULL);
-   } else {
-   ret = smu_cmn_send_smc_msg(smu,
-  SMU_MSG_ExitBaco,
-  NULL);
-   if (ret)
-   return ret;
-
-   /* clear vbios scratch 6 and 7 for coming asic reinit */
-   WREG32(adev->bios_scratch_reg_offset + 6, 0);
-   WREG32(adev->bios_scratch_reg_offset + 7, 0);
-   }
-
-   if (!ret)
-   smu_baco->state = state;
-
-   return ret;
-}
-
-static int smu_v13_0_7_baco_enter(struct smu_context *smu)
-{
-   int ret = 0;
-
-   ret = smu_v13_0_7_baco_set_state(smu,
-  SMU_BACO_STATE_ENTER);
-   if (ret)
-   return ret;
-
-   msleep(10);
-
-   return ret;
-}
-
 static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
.get_allowed_feature_mask = smu_v13_0_7_get_allowed_feature_mask,
.set_default_dpm_table = smu_v13_0_7_set_default_dpm_table,
@@ -1653,8 +1605,8 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = 
{
.set_pp_feature_mask = smu_cmn_set_pp_feature_mask,
.baco_is_support = smu_v13_0_baco_is_support,
.baco_get_state = smu_v13_0_baco_get_state,
-   .baco_set_state = smu_v13_0_7_baco_set_state,
-   .baco_enter = smu_v13_0_7_baco_enter,
+   .baco_set_state = smu_v13_0_baco_set_state,
+   .baco_enter = smu_v13_0_baco_enter,
.baco_exit = smu_v13_0_baco_exit,
.set_mp1_state = smu_cmn_set_mp1_state,
 };
-- 
2.29.0