date:20230828

[PATCH] drm/amd/display: Fix up kdoc for 'optc35_set_odm_combine'

2023-08-28 Thread Srinivasan Shanmugam

Fixes the following W=1 kernel build warning:

drivers/gpu/drm/amd/amdgpu/../display/dc/dcn35/dcn35_optc.c:46: warning: This 
comment starts with '/**', but isn't a kernel-doc comment. Refer 
Documentation/doc-guide/kernel-doc.rst
* Enable CRTC

Cc: Qingqing Zhuo 
Cc: Rodrigo Siqueira 
Cc: Harry Wentland 
Cc: Aurabindo Pillai 
Cc: Alex Deucher 
Signed-off-by: Srinivasan Shanmugam 
---
 drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c 
b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c
index 5f7adc83258b..294799d8c34e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c
@@ -43,10 +43,15 @@
optc1->tg_shift->field_name, optc1->tg_mask->field_name
 
 /**
- * Enable CRTC
- * Enable CRTC - call ASIC Control Object to enable Timing generator.
+ * optc35_set_odm_combine() - Enable CRTC - call ASIC Control Object to enable 
Timing generator.
+ *
+ * @optc: timing_generator instance.
+ * @opp_id: OPP instance.
+ * @opp_cnt: OPP count.
+ * @timing: Timing parameters used to configure DCN blocks.
+ *
+ * Return: void.
  */
-
 static void optc35_set_odm_combine(struct timing_generator *optc, int *opp_id, 
int opp_cnt,
struct dc_crtc_timing *timing)
 {
-- 
2.25.1

Re: [PATCH] drm/amdkfd: Checkpoint and restore queues on GFX11

2023-08-28 Thread Alex Deucher

Acked-by: Alex Deucher 

On Fri, Aug 25, 2023 at 4:10 PM David Francis  wrote:
>
> The code in kfd_mqd_manager_v11.c to support criu dump and
> restore of queue state was missing.
>
> Added it; should be equivalent to kfd_mqd_manager_v10.c.
>
> CC: Felix Kuehling 
> Signed-off-by: David Francis 
> ---
>  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  | 41 +++
>  1 file changed, 41 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
> index 2319467d2d95..2a79d37da95d 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
> @@ -321,6 +321,43 @@ static int get_wave_state(struct mqd_manager *mm, void 
> *mqd,
> return 0;
>  }
>
> +static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, 
> void *ctl_stack_dst)
> +{
> +   struct v11_compute_mqd *m;
> +
> +   m = get_mqd(mqd);
> +
> +   memcpy(mqd_dst, m, sizeof(struct v11_compute_mqd));
> +}
> +
> +static void restore_mqd(struct mqd_manager *mm, void **mqd,
> +   struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
> +   struct queue_properties *qp,
> +   const void *mqd_src,
> +   const void *ctl_stack_src, const u32 ctl_stack_size)
> +{
> +   uint64_t addr;
> +   struct v11_compute_mqd *m;
> +
> +   m = (struct v11_compute_mqd *) mqd_mem_obj->cpu_ptr;
> +   addr = mqd_mem_obj->gpu_addr;
> +
> +   memcpy(m, mqd_src, sizeof(*m));
> +
> +   *mqd = m;
> +   if (gart_addr)
> +   *gart_addr = addr;
> +
> +   m->cp_hqd_pq_doorbell_control =
> +   qp->doorbell_off <<
> +   CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
> +   pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
> +   m->cp_hqd_pq_doorbell_control);
> +
> +   qp->is_active = 0;
> +}
> +
> +
>  static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
> struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
> struct queue_properties *q)
> @@ -457,6 +494,8 @@ struct mqd_manager *mqd_manager_init_v11(enum 
> KFD_MQD_TYPE type,
> mqd->is_occupied = kfd_is_occupied_cp;
> mqd->mqd_size = sizeof(struct v11_compute_mqd);
> mqd->get_wave_state = get_wave_state;
> +   mqd->checkpoint_mqd = checkpoint_mqd;
> +   mqd->restore_mqd = restore_mqd;
>  #if defined(CONFIG_DEBUG_FS)
> mqd->debugfs_show_mqd = debugfs_show_mqd;
>  #endif
> @@ -500,6 +539,8 @@ struct mqd_manager *mqd_manager_init_v11(enum 
> KFD_MQD_TYPE type,
> mqd->update_mqd = update_mqd_sdma;
> mqd->destroy_mqd = kfd_destroy_mqd_sdma;
> mqd->is_occupied = kfd_is_occupied_sdma;
> +   mqd->checkpoint_mqd = checkpoint_mqd;
> +   mqd->restore_mqd = restore_mqd;
> mqd->mqd_size = sizeof(struct v11_sdma_mqd);
>  #if defined(CONFIG_DEBUG_FS)
> mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
> --
> 2.34.1
>

Re: [PATCH] drm/amdgpu: access RLC_SPM_MC_CNTL through MMIO in SRIOV

2023-08-28 Thread Alex Deucher

Acked-by: Alex Deucher 

On Mon, Aug 28, 2023 at 2:50 AM ZhenGuo Yin  wrote:
>
> Register RLC_SPM_MC_CNTL is not blocked by L1 policy, VF can
> directly access it through MMIO.
>
> Signed-off-by: ZhenGuo Yin 
> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 10 ++
>  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 10 ++
>  2 files changed, 4 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 0aee9c8288a2..65619f73f717 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -7901,18 +7901,12 @@ static void gfx_v10_0_update_spm_vmid_internal(struct 
> amdgpu_device *adev,
>
> /* not for *_SOC15 */
> reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
> -   if (amdgpu_sriov_is_pp_one_vf(adev))
> -   data = RREG32_NO_KIQ(reg);
> -   else
> -   data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
> +   data = RREG32_NO_KIQ(reg);
>
> data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
> data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << 
> RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
>
> -   if (amdgpu_sriov_is_pp_one_vf(adev))
> -   WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
> -   else
> -   WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
> +   WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
>  }
>
>  static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned 
> int vmid)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> index b0c32521efdc..7f8c5c6fd36e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> @@ -4989,18 +4989,12 @@ static void gfx_v11_0_update_spm_vmid(struct 
> amdgpu_device *adev, unsigned vmid)
> amdgpu_gfx_off_ctrl(adev, false);
>
> reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
> -   if (amdgpu_sriov_is_pp_one_vf(adev))
> -   data = RREG32_NO_KIQ(reg);
> -   else
> -   data = RREG32(reg);
> +   data = RREG32_NO_KIQ(reg);
>
> data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
> data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << 
> RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
>
> -   if (amdgpu_sriov_is_pp_one_vf(adev))
> -   WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
> -   else
> -   WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
> +   WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
>
> amdgpu_gfx_off_ctrl(adev, true);
>  }
> --
> 2.35.1
>

Re: [PATCH] drm/amdkfd: Use partial migrations in GPU page faults

2023-08-28 Thread Felix Kuehling




On 2023-08-28 16:57, Chen, Xiaogang wrote:


On 8/28/2023 2:06 PM, Felix Kuehling wrote:


On 2023-08-24 18:08, Xiaogang.Chen wrote:

From: Xiaogang Chen 

This patch implements partial migration in gpu page fault according 
to migration
granularity(default 2MB) and not split svm range in cpu page fault 
handling.
Now a svm range may have pages from both system ram and vram of one 
gpu.

These chagnes are expected to improve migration performance and reduce
mmu callback and TLB flush workloads.

Signed-off-by: xiaogang chen 
---
  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 153 
+++

  drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |   6 +-
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c |  87 -
  drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   7 +-
  4 files changed, 162 insertions(+), 91 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c

index 7d82c7da223a..5a3aa80a1834 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -479,6 +479,8 @@ svm_migrate_vma_to_vram(struct kfd_node *node, 
struct svm_range *prange,

   * svm_migrate_ram_to_vram - migrate svm range from system to device
   * @prange: range structure
   * @best_loc: the device to migrate to
+ * @start_mgr: start page to migrate
+ * @last_mgr: last page to migrate
   * @mm: the process mm structure
   * @trigger: reason of migration
   *
@@ -489,6 +491,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, 
struct svm_range *prange,

   */
  static int
  svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+    unsigned long start_mgr, unsigned long last_mgr,
  struct mm_struct *mm, uint32_t trigger)
  {
  unsigned long addr, start, end;
@@ -498,9 +501,9 @@ svm_migrate_ram_to_vram(struct svm_range 
*prange, uint32_t best_loc,

  unsigned long cpages = 0;
  long r = 0;
  -    if (prange->actual_loc == best_loc) {
-    pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
- prange->svms, prange->start, prange->last, best_loc);
+    if (!best_loc) {
+    pr_debug("request svms 0x%p [0x%lx 0x%lx] migrate to sys 
ram\n",

+ prange->svms, start_mgr, last_mgr);
  return 0;
  }
  @@ -513,8 +516,8 @@ svm_migrate_ram_to_vram(struct svm_range 
*prange, uint32_t best_loc,

  pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
   prange->start, prange->last, best_loc);
  -    start = prange->start << PAGE_SHIFT;
-    end = (prange->last + 1) << PAGE_SHIFT;
+    start = start_mgr << PAGE_SHIFT;
+    end = (last_mgr + 1) << PAGE_SHIFT;
    r = svm_range_vram_node_new(node, prange, true);
  if (r) {
@@ -544,10 +547,12 @@ svm_migrate_ram_to_vram(struct svm_range 
*prange, uint32_t best_loc,

    if (cpages) {
  prange->actual_loc = best_loc;
-    svm_range_free_dma_mappings(prange, true);
-    } else {
+    /* only free dma mapping in the migrated range */
+    svm_range_free_dma_mappings(prange, true,  start_mgr - 
prange->start,

+ last_mgr - start_mgr + 1);


This is wrong. If we only migrated some of the pages, we should not 
free the DMA mapping array at all. The array is needed as long as 
there are any valid DMA mappings in it.


yes, I realized it after submit. I can not free DMA mapping array at 
this stage.


The concern(also related to comments below) is I do not know how many 
pages in vram after partial migration. Originally I used bitmap to 
record that.  I used bitmap to record which pages were migrated at 
each migration functions. Here I do not need use hmm function to get 
that info,  inside each migration function we can know which pages got 
migrated, then update the bitmap accordingly inside each migration 
function.




I think the condition above with cpages should be updated. Instead of 
cpages, we need to keep track of a count of pages in VRAM in struct 
svm_range. See more below.


I think you want add a new integer in svm_range to remember how many 
pages are in vram side for each svm_range, instead of bitmap. There is 
a problem I saw: when we need split a prange(such as user uses 
set_attr api) how do we know how many pages in vram for each splitted 
prange?


Right, that's a bit problematic. But it should be a relatively rare 
corner case. It may be good enough to make a "pessimistic" assumption 
when splitting ranges that have some pages in VRAM, that everything is 
in VRAM. And update that to 0 after migrate_to_ram for the entire range, 
to allow the BO reference to be released.


So in the worst case, you keep your DMA addresses and BOs allocated 
slightly longer than necessary. If that doesn't work, I agree that we 
need a bitmap with one bit per 4KB page. But I hope that can be avoided.


That said, I'm not actually sure why we're freeing the DMA address array 
after migration to RAM at all. I think we still need it even

Re: [PATCH] drm/amdkfd: Use partial migrations in GPU page faults

2023-08-28 Thread Chen, Xiaogang




On 8/28/2023 2:06 PM, Felix Kuehling wrote:


On 2023-08-24 18:08, Xiaogang.Chen wrote:

From: Xiaogang Chen 

This patch implements partial migration in gpu page fault according 
to migration
granularity(default 2MB) and not split svm range in cpu page fault 
handling.

Now a svm range may have pages from both system ram and vram of one gpu.
These chagnes are expected to improve migration performance and reduce
mmu callback and TLB flush workloads.

Signed-off-by: xiaogang chen 
---
  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 153 +++
  drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |   6 +-
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c |  87 -
  drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   7 +-
  4 files changed, 162 insertions(+), 91 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c

index 7d82c7da223a..5a3aa80a1834 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -479,6 +479,8 @@ svm_migrate_vma_to_vram(struct kfd_node *node, 
struct svm_range *prange,

   * svm_migrate_ram_to_vram - migrate svm range from system to device
   * @prange: range structure
   * @best_loc: the device to migrate to
+ * @start_mgr: start page to migrate
+ * @last_mgr: last page to migrate
   * @mm: the process mm structure
   * @trigger: reason of migration
   *
@@ -489,6 +491,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, 
struct svm_range *prange,

   */
  static int
  svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+    unsigned long start_mgr, unsigned long last_mgr,
  struct mm_struct *mm, uint32_t trigger)
  {
  unsigned long addr, start, end;
@@ -498,9 +501,9 @@ svm_migrate_ram_to_vram(struct svm_range *prange, 
uint32_t best_loc,

  unsigned long cpages = 0;
  long r = 0;
  -    if (prange->actual_loc == best_loc) {
-    pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
- prange->svms, prange->start, prange->last, best_loc);
+    if (!best_loc) {
+    pr_debug("request svms 0x%p [0x%lx 0x%lx] migrate to sys 
ram\n",

+ prange->svms, start_mgr, last_mgr);
  return 0;
  }
  @@ -513,8 +516,8 @@ svm_migrate_ram_to_vram(struct svm_range 
*prange, uint32_t best_loc,

  pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
   prange->start, prange->last, best_loc);
  -    start = prange->start << PAGE_SHIFT;
-    end = (prange->last + 1) << PAGE_SHIFT;
+    start = start_mgr << PAGE_SHIFT;
+    end = (last_mgr + 1) << PAGE_SHIFT;
    r = svm_range_vram_node_new(node, prange, true);
  if (r) {
@@ -544,10 +547,12 @@ svm_migrate_ram_to_vram(struct svm_range 
*prange, uint32_t best_loc,

    if (cpages) {
  prange->actual_loc = best_loc;
-    svm_range_free_dma_mappings(prange, true);
-    } else {
+    /* only free dma mapping in the migrated range */
+    svm_range_free_dma_mappings(prange, true,  start_mgr - 
prange->start,

+ last_mgr - start_mgr + 1);


This is wrong. If we only migrated some of the pages, we should not 
free the DMA mapping array at all. The array is needed as long as 
there are any valid DMA mappings in it.


yes, I realized it after submit. I can not free DMA mapping array at 
this stage.


The concern(also related to comments below) is I do not know how many 
pages in vram after partial migration. Originally I used bitmap to 
record that.  I used bitmap to record which pages were migrated at each 
migration functions. Here I do not need use hmm function to get that 
info,  inside each migration function we can know which pages got 
migrated, then update the bitmap accordingly inside each migration 
function.




I think the condition above with cpages should be updated. Instead of 
cpages, we need to keep track of a count of pages in VRAM in struct 
svm_range. See more below.


I think you want add a new integer in svm_range to remember how many 
pages are in vram side for each svm_range, instead of bitmap. There is a 
problem I saw: when we need split a prange(such as user uses set_attr 
api) how do we know how many pages in vram for each splitted prange?



+    } else if (!prange->actual_loc)
+    /* if all pages from prange are at sys ram */
  svm_range_vram_node_free(prange);
-    }
    return r < 0 ? r : 0;
  }
@@ -762,6 +767,8 @@ svm_migrate_vma_to_ram(struct kfd_node *node, 
struct svm_range *prange,

   * svm_migrate_vram_to_ram - migrate svm range from device to system
   * @prange: range structure
   * @mm: process mm, use current->mm if NULL
+ * @start_mgr: start page need be migrated to sys ram
+ * @last_mgr: last page need be migrated to sys ram
   * @trigger: reason of migration
   * @fault_page: is from vmf->page, svm_migrate_to_ram(), this is 
CPU page fault callback

   *
@@ -771,7 +778,8 @@ svm_migrate_vma_to_ram(struct kfd_node *node,

Re: [PATCH v3 0/7] GPU workload hints for better performance

2023-08-28 Thread Helen Koike





On 28/08/2023 17:14, Yadav, Arvind wrote:


On 8/28/2023 9:13 PM, Helen Mae Koike Fornazier wrote:
On Monday, August 28, 2023 09:26 -03, Arvind Yadav 
 wrote:



AMDGPU SOCs supports dynamic workload based power profiles, which can
provide fine-tuned performance for a particular type of workload.
This patch series adds an interface to set/reset these power profiles
based on the submitted job. The driver can dynamically switch
the power profiles based on submitted job. This can optimize the power
performance when the particular workload is on.

Hi Arvind,

Would you mind to test your patchset with drm-ci ? There is a amdgpu
test there and I would love to get your feedback of the ci.

You basically just need to apply the ci patch which is available on
https://cgit.freedesktop.org/drm/drm/log/?h=topic/drm-ci

There are instruction on the docs, but in short: to configure it, you 
push

your branch to gitlab.freedesktop.org, go to the settings and change the
CI/CD configuration file from .gitlab-ci.yml to 
drivers/gpu/drm/ci/gitlab-ci.yml,

and you can trigger a pipeline on your branch to get tests running.

(by the time of this writing, gitlab.fdo is under maintenance but should
be up soonish).


Hi Helen,

I tried the steps as mentioned by you but looks like something is 
missing and build itself is failing.


https://gitlab.freedesktop.org/ArvindYadav/drm-next/-/commits/smu_workload


Thanks for your feedback!

You need to apply this patch 
https://gitlab.freedesktop.org/ArvindYadav/drm-next/-/commit/cc6dcff192d07f9fe82645fbc4213c97e872156b


This patch adds the file drivers/gpu/drm/ci/gitlab-ci.yml for you.

And you can drop the patch where gitlab added the ci template.

I replied here too 
https://gitlab.freedesktop.org/ArvindYadav/drm-next/-/commit/cc6dcff192d07f9fe82645fbc4213c97e872156b


Could you try again with that patch?

Thanks a lot!
Helen




Regards,
~Arvind


Thank you!
Helen


v2:
- Splitting workload_profile_set and workload_profile_put
   into two separate patches.
- Addressed review comment.
- Added new suspend function.
- Added patch to switches the GPU workload mode for KFD.

v3:
- Addressed all review comment.
- Changed the function name from *_set() to *_get().
- Now clearing all the profile in work handler.
- Added *_clear_all function to clear all the power profile.


Arvind Yadav (7):
   drm/amdgpu: Added init/fini functions for workload
   drm/amdgpu: Add new function to set GPU power profile
   drm/amdgpu: Add new function to put GPU power profile
   drm/amdgpu: Add suspend function to clear the GPU power profile.
   drm/amdgpu: Set/Reset GPU workload profile
   drm/amdgpu: switch workload context to/from compute
   Revert "drm/amd/amdgpu: switch on/off vcn power profile mode"

  drivers/gpu/drm/amd/amdgpu/Makefile   |   2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   3 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c    |   8 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |   6 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c   |   5 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c   |  14 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c  | 226 ++
  drivers/gpu/drm/amd/include/amdgpu_workload.h |  61 +
  8 files changed, 309 insertions(+), 16 deletions(-)
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
  create mode 100644 drivers/gpu/drm/amd/include/amdgpu_workload.h

--
2.34.1

Re: [PATCH v3 0/7] GPU workload hints for better performance

2023-08-28 Thread Yadav, Arvind




On 8/28/2023 9:13 PM, Helen Mae Koike Fornazier wrote:

On Monday, August 28, 2023 09:26 -03, Arvind Yadav  wrote:


AMDGPU SOCs supports dynamic workload based power profiles, which can
provide fine-tuned performance for a particular type of workload.
This patch series adds an interface to set/reset these power profiles
based on the submitted job. The driver can dynamically switch
the power profiles based on submitted job. This can optimize the power
performance when the particular workload is on.

Hi Arvind,

Would you mind to test your patchset with drm-ci ? There is a amdgpu
test there and I would love to get your feedback of the ci.

You basically just need to apply the ci patch which is available on
https://cgit.freedesktop.org/drm/drm/log/?h=topic/drm-ci

There are instruction on the docs, but in short: to configure it, you push
your branch to gitlab.freedesktop.org, go to the settings and change the
CI/CD configuration file from .gitlab-ci.yml to 
drivers/gpu/drm/ci/gitlab-ci.yml,
and you can trigger a pipeline on your branch to get tests running.

(by the time of this writing, gitlab.fdo is under maintenance but should
be up soonish).


Hi Helen,

I tried the steps as mentioned by you but looks like something is 
missing and build itself is failing.


https://gitlab.freedesktop.org/ArvindYadav/drm-next/-/commits/smu_workload

Regards,
~Arvind


Thank you!
Helen


v2:
- Splitting workload_profile_set and workload_profile_put
   into two separate patches.
- Addressed review comment.
- Added new suspend function.
- Added patch to switches the GPU workload mode for KFD.

v3:
- Addressed all review comment.
- Changed the function name from *_set() to *_get().
- Now clearing all the profile in work handler.
- Added *_clear_all function to clear all the power profile.


Arvind Yadav (7):
   drm/amdgpu: Added init/fini functions for workload
   drm/amdgpu: Add new function to set GPU power profile
   drm/amdgpu: Add new function to put GPU power profile
   drm/amdgpu: Add suspend function to clear the GPU power profile.
   drm/amdgpu: Set/Reset GPU workload profile
   drm/amdgpu: switch workload context to/from compute
   Revert "drm/amd/amdgpu: switch on/off vcn power profile mode"

  drivers/gpu/drm/amd/amdgpu/Makefile   |   2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   3 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c|   8 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c|   6 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c   |   5 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c   |  14 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c  | 226 ++
  drivers/gpu/drm/amd/include/amdgpu_workload.h |  61 +
  8 files changed, 309 insertions(+), 16 deletions(-)
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
  create mode 100644 drivers/gpu/drm/amd/include/amdgpu_workload.h

--
2.34.1

Re: [PATCH] drm/amdkfd: Use partial migrations in GPU page faults

2023-08-28 Thread Felix Kuehling




On 2023-08-24 18:08, Xiaogang.Chen wrote:

From: Xiaogang Chen 

This patch implements partial migration in gpu page fault according to migration
granularity(default 2MB) and not split svm range in cpu page fault handling.
Now a svm range may have pages from both system ram and vram of one gpu.
These chagnes are expected to improve migration performance and reduce
mmu callback and TLB flush workloads.

Signed-off-by: xiaogang chen 
---
  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 153 +++
  drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |   6 +-
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c |  87 -
  drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   7 +-
  4 files changed, 162 insertions(+), 91 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 7d82c7da223a..5a3aa80a1834 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -479,6 +479,8 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
   * svm_migrate_ram_to_vram - migrate svm range from system to device
   * @prange: range structure
   * @best_loc: the device to migrate to
+ * @start_mgr: start page to migrate
+ * @last_mgr: last page to migrate
   * @mm: the process mm structure
   * @trigger: reason of migration
   *
@@ -489,6 +491,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
   */
  static int
  svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+   unsigned long start_mgr, unsigned long last_mgr,
struct mm_struct *mm, uint32_t trigger)
  {
unsigned long addr, start, end;
@@ -498,9 +501,9 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t 
best_loc,
unsigned long cpages = 0;
long r = 0;
  
-	if (prange->actual_loc == best_loc) {

-   pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
-prange->svms, prange->start, prange->last, best_loc);
+   if (!best_loc) {
+   pr_debug("request svms 0x%p [0x%lx 0x%lx] migrate to sys ram\n",
+prange->svms, start_mgr, last_mgr);
return 0;
}
  
@@ -513,8 +516,8 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,

pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
 prange->start, prange->last, best_loc);
  
-	start = prange->start << PAGE_SHIFT;

-   end = (prange->last + 1) << PAGE_SHIFT;
+   start = start_mgr << PAGE_SHIFT;
+   end = (last_mgr + 1) << PAGE_SHIFT;
  
  	r = svm_range_vram_node_new(node, prange, true);

if (r) {
@@ -544,10 +547,12 @@ svm_migrate_ram_to_vram(struct svm_range *prange, 
uint32_t best_loc,
  
  	if (cpages) {

prange->actual_loc = best_loc;
-   svm_range_free_dma_mappings(prange, true);
-   } else {
+   /* only free dma mapping in the migrated range */
+   svm_range_free_dma_mappings(prange, true,  start_mgr - 
prange->start,
+last_mgr - start_mgr + 1);


This is wrong. If we only migrated some of the pages, we should not free 
the DMA mapping array at all. The array is needed as long as there are 
any valid DMA mappings in it.


I think the condition above with cpages should be updated. Instead of 
cpages, we need to keep track of a count of pages in VRAM in struct 
svm_range. See more below.




+   } else if (!prange->actual_loc)
+   /* if all pages from prange are at sys ram */
svm_range_vram_node_free(prange);
-   }
  
  	return r < 0 ? r : 0;

  }
@@ -762,6 +767,8 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct 
svm_range *prange,
   * svm_migrate_vram_to_ram - migrate svm range from device to system
   * @prange: range structure
   * @mm: process mm, use current->mm if NULL
+ * @start_mgr: start page need be migrated to sys ram
+ * @last_mgr: last page need be migrated to sys ram
   * @trigger: reason of migration
   * @fault_page: is from vmf->page, svm_migrate_to_ram(), this is CPU page 
fault callback
   *
@@ -771,7 +778,8 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct 
svm_range *prange,
   * 0 - OK, otherwise error code
   */
  int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
-   uint32_t trigger, struct page *fault_page)
+   unsigned long start_mgr, unsigned long 
last_mgr,
+   uint32_t trigger, struct page 
*fault_page)
  {
struct kfd_node *node;
struct vm_area_struct *vma;
@@ -781,23 +789,30 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, 
struct mm_struct *mm,
unsigned long upages = 0;
long r = 0;
  
+	/* this pragne has no any vram page to migrate to sys ram */

if

Re: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for gfx10 onwards APUs

2023-08-28 Thread Deucher, Alexander

[AMD Official Use Only - General]

Technically the AMD IOMMU uses direct mapping mode for any device which claims 
to support ATS in order to support the IOMMUv2 functionality, but that was also 
the case with Raven systems which were problematic when remapping mode was 
enabled.  That said, now that IOMMUv2 support has been removed, there's no 
reason to use direct mapping in the IOMMU driver so that may change.

Alex


From: Koenig, Christian 
Sent: Monday, August 28, 2023 7:30 AM
To: Zhang, Yifan ; Christian König 
; amd-gfx@lists.freedesktop.org 

Cc: Deucher, Alexander 
Subject: Re: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for gfx10 
onwards APUs

Well, there seems to be a very basic misunderstood here: The IOMMU
isolation level is *not* ASIC dependent!

Try to set amd_iommu=force_isolation on the kernel command line.

This is a configuration option customers can use to harden their systems
and when this isn't properly tested we can't allow page tables in system
memory.

Regards,
Christian.

Am 28.08.23 um 13:23 schrieb Zhang, Yifan:
> [Public]
>
> Not yet. It will be only enabled for gfx10.3.3 and later APU initially, IOMMU 
> is pass through in these ASIC.
>
> -Original Message-
> From: Christian König 
> Sent: Monday, August 28, 2023 5:41 PM
> To: Zhang, Yifan ; amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander ; Koenig, Christian 
> 
> Subject: Re: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for 
> gfx10 onwards APUs
>
> Is that now validated with IOMMU in non pass through mode?
>
> Christian.
>
> Am 28.08.23 um 10:58 schrieb Zhang, Yifan:
>> [AMD Official Use Only - General]
>>
>> Ping
>>
>> -Original Message-
>> From: Zhang, Yifan 
>> Sent: Friday, August 25, 2023 8:34 AM
>> To: amd-gfx@lists.freedesktop.org
>> Cc: Deucher, Alexander ; Koenig, Christian 
>> ; Zhang, Yifan 
>> Subject: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for gfx10 
>> onwards APUs
>>
>> To decrease VRAM pressure for APUs, put page tables to GTT domain for gfx10 
>> and newer APUs.
>>
>> v2: only enable it for gfx10 and newer APUs (Alex, Christian)
>>
>> Signed-off-by: Yifan Zhang 
>> ---
>>drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 9 ++---
>>1 file changed, 6 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
>> index 96d601e209b8..4603d87c61a0 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
>> @@ -515,10 +515,13 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, 
>> struct amdgpu_vm *vm,
>>   bp.size = amdgpu_vm_pt_size(adev, level);
>>   bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
>>
>> -   if (!adev->gmc.is_app_apu)
>> -   bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
>> -   else
>> +   if (adev->gmc.is_app_apu ||
>> +   ((adev->flags & AMD_IS_APU) &&
>> +   (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 3
>>   bp.domain = AMDGPU_GEM_DOMAIN_GTT;
>> +   else
>> +   bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
>> +
>>
>>   bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain);
>>   bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>> --
>> 2.37.3
>>

Re: [PATCH] drm/amdkfd: Add missing gfx11 MQD manager callbacks

2023-08-28 Thread Felix Kuehling


On 2023-08-25 17:30, Harish Kasiviswanathan wrote:

From: Jay Cornwall 

mqd_stride function was introduced in commit 129c7b6a0217
("drm/amdkfd: Update MQD management on multi XCC setup")
but not assigned for gfx11. Fixes a NULL dereference in debugfs.

Signed-off-by: Jay Cornwall 
Signed-off-by: Harish Kasiviswanathan 


Reviewed-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
index 2319467d2d95..0bbf0edbabd4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
@@ -457,6 +457,7 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE 
type,
mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct v11_compute_mqd);
mqd->get_wave_state = get_wave_state;
+   mqd->mqd_stride = kfd_mqd_stride;
  #if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
  #endif
@@ -472,6 +473,7 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE 
type,
mqd->destroy_mqd = destroy_hiq_mqd;
mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct v11_compute_mqd);
+   mqd->mqd_stride = kfd_mqd_stride;
  #if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
  #endif
@@ -501,6 +503,7 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE 
type,
mqd->destroy_mqd = kfd_destroy_mqd_sdma;
mqd->is_occupied = kfd_is_occupied_sdma;
mqd->mqd_size = sizeof(struct v11_sdma_mqd);
+   mqd->mqd_stride = kfd_mqd_stride;
  #if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
  #endif

Re: [PATCH] drm/amdkfd: use mask to get v9 interrupt sq data bits correctly

2023-08-28 Thread Felix Kuehling




On 2023-08-28 11:35, Alex Sierra wrote:

Interrupt sq data bits were not taken properly from contextid0 and contextid1.
Use macro KFD_CONTEXT_ID_GET_SQ_INT_DATA instead.

Signed-off-by: Alex Sierra 


Reviewed-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index f0731a6a5306..830396b1c3b1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -384,7 +384,7 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
default:
break;
}
-   kfd_signal_event_interrupt(pasid, context_id0 & 
0xff, 24);
+   kfd_signal_event_interrupt(pasid, sq_int_data, 24);
} else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) {
kfd_set_dbg_ev_from_interrupt(dev, pasid,
KFD_DEBUG_DOORBELL_ID(context_id0),

Re: [PATCH v2] drm/amdkfd: Replace pr_err with dev_err

2023-08-28 Thread Felix Kuehling




On 2023-08-26 09:41, Asad Kamal wrote:

Replace pr_err with dev_err to show the bus-id of
failing device with kfd queue errors

Signed-off-by: Asad Kamal 
Reviewed-by: Lijo Lazar 


Reviewed-by: Felix Kuehling 



---
  .../drm/amd/amdkfd/kfd_device_queue_manager.c | 116 +++---
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |   2 +-
  2 files changed, 71 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index b166f30f083e..cd6cfffd6436 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -232,8 +232,8 @@ static int add_queue_mes(struct device_queue_manager *dqm, 
struct queue *q,
  
  	queue_type = convert_to_mes_queue_type(q->properties.type);

if (queue_type < 0) {
-   pr_err("Queue type not supported with MES, queue:%d\n",
-   q->properties.type);
+   dev_err(adev->dev, "Queue type not supported with MES, 
queue:%d\n",
+   q->properties.type);
return -EINVAL;
}
queue_input.queue_type = (uint32_t)queue_type;
@@ -244,9 +244,9 @@ static int add_queue_mes(struct device_queue_manager *dqm, 
struct queue *q,
r = adev->mes.funcs->add_hw_queue(>mes, _input);
amdgpu_mes_unlock(>mes);
if (r) {
-   pr_err("failed to add hardware queue to MES, doorbell=0x%x\n",
+   dev_err(adev->dev, "failed to add hardware queue to MES, 
doorbell=0x%x\n",
q->properties.doorbell_off);
-   pr_err("MES might be in unrecoverable state, issue a GPU 
reset\n");
+   dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU 
reset\n");
kfd_hws_hang(dqm);
}
  
@@ -272,9 +272,9 @@ static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,

amdgpu_mes_unlock(>mes);
  
  	if (r) {

-   pr_err("failed to remove hardware queue from MES, 
doorbell=0x%x\n",
+   dev_err(adev->dev, "failed to remove hardware queue from MES, 
doorbell=0x%x\n",
q->properties.doorbell_off);
-   pr_err("MES might be in unrecoverable state, issue a GPU 
reset\n");
+   dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU 
reset\n");
kfd_hws_hang(dqm);
}
  
@@ -284,6 +284,7 @@ static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,

  static int remove_all_queues_mes(struct device_queue_manager *dqm)
  {
struct device_process_node *cur;
+   struct device *dev = dqm->dev->adev->dev;
struct qcm_process_device *qpd;
struct queue *q;
int retval = 0;
@@ -294,7 +295,7 @@ static int remove_all_queues_mes(struct 
device_queue_manager *dqm)
if (q->properties.is_active) {
retval = remove_queue_mes(dqm, q, qpd);
if (retval) {
-   pr_err("%s: Failed to remove queue %d for 
dev %d",
+   dev_err(dev, "%s: Failed to remove queue %d 
for dev %d",
__func__,
q->properties.queue_id,
dqm->dev->id);
@@ -443,6 +444,7 @@ static int allocate_vmid(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
struct queue *q)
  {
+   struct device *dev = dqm->dev->adev->dev;
int allocated_vmid = -1, i;
  
  	for (i = dqm->dev->vm_info.first_vmid_kfd;

@@ -454,7 +456,7 @@ static int allocate_vmid(struct device_queue_manager *dqm,
}
  
  	if (allocated_vmid < 0) {

-   pr_err("no more vmid to allocate\n");
+   dev_err(dev, "no more vmid to allocate\n");
return -ENOSPC;
}
  
@@ -510,10 +512,12 @@ static void deallocate_vmid(struct device_queue_manager *dqm,

struct qcm_process_device *qpd,
struct queue *q)
  {
+   struct device *dev = dqm->dev->adev->dev;
+
/* On GFX v7, CP doesn't flush TC at dequeue */
if (q->device->adev->asic_type == CHIP_HAWAII)
if (flush_texture_cache_nocpsch(q->device, qpd))
-   pr_err("Failed to flush TC\n");
+   dev_err(dev, "Failed to flush TC\n");
  
  	kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
  
@@ -708,7 +712,7 @@ static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process

pr_debug("Killing all process wavefronts\n");
  
  	if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) {

-   pr_err("no vmid pasid mapping supported \n");
+

Re: [PATCH (set 1) 00/20] Rid W=1 warnings from GPU

2023-08-28 Thread Michel Dänzer

On 8/24/23 14:07, Lee Jones wrote:
> On Thu, 24 Aug 2023, Jani Nikula wrote:
>> On Thu, 24 Aug 2023, Lee Jones  wrote:
>>> This set is part of a larger effort attempting to clean-up W=1
>>> kernel builds, which are currently overwhelmingly riddled with
>>> niggly little warnings.
>>
>> The next question is, how do we keep it W=1 clean going forward?
> 
> My plan was to fix them all, then move each warning to W=0.
> 
> Arnd recently submitted a set doing just that for a bunch of them.
> 
> https://lore.kernel.org/all/20230811140327.3754597-1-a...@kernel.org/
> 
> I like to think a bunch of this is built on top of my previous efforts.
> 
> GPU is a particularly tricky though - the warnings seem to come in faster
> than I can squash them.  Maybe the maintainers can find a way to test
> new patches on merge?

One approach for this which has proved effective in Mesa and other projects is 
to make warnings fatal in CI which must pass for any changes to be merged. 
There is ongoing work toward introducing this for the DRM subsystem, using 
gitlab.freedesktop.org CI.


-- 
Earthling Michel Dänzer|  https://redhat.com
Libre software enthusiast  | Mesa and Xwayland developer

Re: [PATCH v3 0/7] GPU workload hints for better performance

2023-08-28 Thread Helen Mae Koike Fornazier

On Monday, August 28, 2023 09:26 -03, Arvind Yadav  wrote:

> AMDGPU SOCs supports dynamic workload based power profiles, which can
> provide fine-tuned performance for a particular type of workload.
> This patch series adds an interface to set/reset these power profiles
> based on the submitted job. The driver can dynamically switch
> the power profiles based on submitted job. This can optimize the power
> performance when the particular workload is on. 

Hi Arvind,

Would you mind to test your patchset with drm-ci ? There is a amdgpu
test there and I would love to get your feedback of the ci.

You basically just need to apply the ci patch which is available on
https://cgit.freedesktop.org/drm/drm/log/?h=topic/drm-ci

There are instruction on the docs, but in short: to configure it, you push
your branch to gitlab.freedesktop.org, go to the settings and change the
CI/CD configuration file from .gitlab-ci.yml to 
drivers/gpu/drm/ci/gitlab-ci.yml,
and you can trigger a pipeline on your branch to get tests running.

(by the time of this writing, gitlab.fdo is under maintenance but should
be up soonish).

Thank you!
Helen

> 
> v2:
> - Splitting workload_profile_set and workload_profile_put
>   into two separate patches.
> - Addressed review comment.
> - Added new suspend function.
> - Added patch to switches the GPU workload mode for KFD. 
> 
> v3:
> - Addressed all review comment.
> - Changed the function name from *_set() to *_get().
> - Now clearing all the profile in work handler.
> - Added *_clear_all function to clear all the power profile.
> 
> 
> Arvind Yadav (7):
>   drm/amdgpu: Added init/fini functions for workload
>   drm/amdgpu: Add new function to set GPU power profile
>   drm/amdgpu: Add new function to put GPU power profile
>   drm/amdgpu: Add suspend function to clear the GPU power profile.
>   drm/amdgpu: Set/Reset GPU workload profile
>   drm/amdgpu: switch workload context to/from compute
>   Revert "drm/amd/amdgpu: switch on/off vcn power profile mode"
> 
>  drivers/gpu/drm/amd/amdgpu/Makefile   |   2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   3 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c|   8 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c|   6 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c   |   5 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c   |  14 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c  | 226 ++
>  drivers/gpu/drm/amd/include/amdgpu_workload.h |  61 +
>  8 files changed, 309 insertions(+), 16 deletions(-)
>  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
>  create mode 100644 drivers/gpu/drm/amd/include/amdgpu_workload.h
> 
> -- 
> 2.34.1
>

[PATCH] drm/amdkfd: use mask to get v9 interrupt sq data bits correctly

2023-08-28 Thread Alex Sierra

Interrupt sq data bits were not taken properly from contextid0 and contextid1.
Use macro KFD_CONTEXT_ID_GET_SQ_INT_DATA instead.

Signed-off-by: Alex Sierra 
---
 drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index f0731a6a5306..830396b1c3b1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -384,7 +384,7 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
default:
break;
}
-   kfd_signal_event_interrupt(pasid, context_id0 & 
0xff, 24);
+   kfd_signal_event_interrupt(pasid, sq_int_data, 24);
} else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) {
kfd_set_dbg_ev_from_interrupt(dev, pasid,
KFD_DEBUG_DOORBELL_ID(context_id0),
-- 
2.32.0

Re: [PATCH] drm/amdkfd: ratelimited SQ interrupt messages

2023-08-28 Thread Philip Yang


  


On 2023-08-10 12:13, Harish
  Kasiviswanathan wrote:


  No functional change. Use ratelimited version of pr_ to avoid
overflowing of dmesg buffer

Signed-off-by: Harish Kasiviswanathan 

Reviewed-by: Philip Yang 

  
---
 drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c | 6 +++---
 drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c | 6 +++---
 drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c  | 6 +++---
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
index c7991e07b6be..a7697ec8188e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
@@ -268,7 +268,7 @@ static void event_interrupt_wq_v10(struct kfd_node *dev,
 		SQ_INTERRUPT_WORD_WAVE_CTXID1, ENCODING);
 			switch (encoding) {
 			case SQ_INTERRUPT_WORD_ENCODING_AUTO:
-pr_debug(
+pr_debug_ratelimited(
 	"sq_intr: auto, se %d, ttrace %d, wlt %d, ttrac_buf0_full %d, ttrac_buf1_full %d, ttrace_utc_err %d\n",
 	REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_AUTO_CTXID1,
 			SE_ID),
@@ -284,7 +284,7 @@ static void event_interrupt_wq_v10(struct kfd_node *dev,
 			THREAD_TRACE_UTC_ERROR));
 break;
 			case SQ_INTERRUPT_WORD_ENCODING_INST:
-pr_debug("sq_intr: inst, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n",
+pr_debug_ratelimited("sq_intr: inst, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n",
 	REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1,
 			SE_ID),
 	REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
@@ -310,7 +310,7 @@ static void event_interrupt_wq_v10(struct kfd_node *dev,
 			case SQ_INTERRUPT_WORD_ENCODING_ERROR:
 sq_intr_err_type = REG_GET_FIELD(context_id0, KFD_CTXID0,
 ERR_TYPE);
-pr_warn("sq_intr: error, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d, err_type %d\n",
+pr_warn_ratelimited("sq_intr: error, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d, err_type %d\n",
 	REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1,
 			SE_ID),
 	REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
index f933bd231fb9..2a65792fd116 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
@@ -150,7 +150,7 @@ enum SQ_INTERRUPT_ERROR_TYPE {
 
 static void print_sq_intr_info_auto(uint32_t context_id0, uint32_t context_id1)
 {
-	pr_debug(
+	pr_debug_ratelimited(
 		"sq_intr: auto, ttrace %d, wlt %d, ttrace_buf_full %d, reg_tms %d, cmd_tms %d, host_cmd_ovf %d, host_reg_ovf %d, immed_ovf %d, ttrace_utc_err %d\n",
 		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE),
 		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, WLT),
@@ -165,7 +165,7 @@ static void print_sq_intr_info_auto(uint32_t context_id0, uint32_t context_id1)
 
 static void print_sq_intr_info_inst(uint32_t context_id0, uint32_t context_id1)
 {
-	pr_debug(
+	pr_debug_ratelimited(
 		"sq_intr: inst, data 0x%08x, sh %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n",
 		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, DATA),
 		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, SH_ID),
@@ -177,7 +177,7 @@ static void print_sq_intr_info_inst(uint32_t context_id0, uint32_t context_id1)
 
 static void print_sq_intr_info_error(uint32_t context_id0, uint32_t context_id1)
 {
-	pr_warn(
+	pr_warn_ratelimited(
 		"sq_intr: error, detail 0x%08x, type %d, sh %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n",
 		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, DETAIL),
 		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, TYPE),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index f0731a6a5306..02695ccd22d6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -333,7 +333,7 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
 			encoding = REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, ENCODING);
 			switch (encoding) {
 			case SQ_INTERRUPT_WORD_ENCODING_AUTO:
-pr_debug(
+pr_debug_ratelimited(
 	"sq_intr: auto, se %d, ttrace %d, wlt %d, ttrac_buf_full %d, reg_tms %d, cmd_tms %d, host_cmd_ovf %d, host_reg_ovf %d, immed_ovf %d, ttrace_utc_err %d\n",
 	REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, SE_ID),
 	REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE),
@@ -347,7 +347,7 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
 	REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE_UTC_ERROR));
 break;
 			case SQ_INTERRUPT_WORD_ENCODING_INST:
-

Re: [PATCH v3 0/7] GPU workload hints for better performance

2023-08-28 Thread Lazar, Lijo

[AMD Official Use Only - General]

As mentioned with an older version of this series, this is an 'abuse' of power 
profile interface.

This series is oversimplifying what PMFW algorithms are supposed to be doing. 
Whatever this series is doing, FW can do it better.

To explain in simpler terms - it just tries to boost a profile based on ring 
type without even knowing how much of activity a job can trigger on a 
particular ring. A job scheduled to a GFX ring doesn't deserve a profile boost 
unless it can create a certain level of activity. In CPU terms, a job scheduled 
to a processor doesn't mean it deserves a frequency boost of that CPU.  At 
minimum it depends on more details like whether that job is compute bound or 
memory bound or memory bound.

While FW algorithms are designed to do that, this series tries to trivialise 
all such things.

Unless you are able to show the tangible benefits in some terms like 
performance, power, or performance per watt,  I don't think this should be the 
default behaviour where driver tries to override FW just based on job 
submissions to rings.

Thanks,
Lijo

From: amd-gfx  on behalf of Arvind Yadav 

Sent: Monday, August 28, 2023 5:56:07 PM
To: Koenig, Christian ; Deucher, Alexander 
; Sharma, Shashank ; Pan, 
Xinhui ; airl...@gmail.com ; 
dan...@ffwll.ch ; Kuehling, Felix ; 
amd-gfx@lists.freedesktop.org 
Cc: Yadav, Arvind ; linux-ker...@vger.kernel.org 
; dri-de...@lists.freedesktop.org 

Subject: [PATCH v3 0/7] GPU workload hints for better performance

AMDGPU SOCs supports dynamic workload based power profiles, which can
provide fine-tuned performance for a particular type of workload.
This patch series adds an interface to set/reset these power profiles
based on the submitted job. The driver can dynamically switch
the power profiles based on submitted job. This can optimize the power
performance when the particular workload is on.

v2:
- Splitting workload_profile_set and workload_profile_put
  into two separate patches.
- Addressed review comment.
- Added new suspend function.
- Added patch to switches the GPU workload mode for KFD.

v3:
- Addressed all review comment.
- Changed the function name from *_set() to *_get().
- Now clearing all the profile in work handler.
- Added *_clear_all function to clear all the power profile.


Arvind Yadav (7):
  drm/amdgpu: Added init/fini functions for workload
  drm/amdgpu: Add new function to set GPU power profile
  drm/amdgpu: Add new function to put GPU power profile
  drm/amdgpu: Add suspend function to clear the GPU power profile.
  drm/amdgpu: Set/Reset GPU workload profile
  drm/amdgpu: switch workload context to/from compute
  Revert "drm/amd/amdgpu: switch on/off vcn power profile mode"

 drivers/gpu/drm/amd/amdgpu/Makefile   |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c|   8 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c|   6 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c   |   5 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c   |  14 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c  | 226 ++
 drivers/gpu/drm/amd/include/amdgpu_workload.h |  61 +
 8 files changed, 309 insertions(+), 16 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
 create mode 100644 drivers/gpu/drm/amd/include/amdgpu_workload.h

--
2.34.1

RE: [PATCH] drm/amdkfd: Checkpoint and restore queues on GFX11

2023-08-28 Thread Kasiviswanathan, Harish

[AMD Official Use Only - General]

Reviewed-by: Harish Kasiviswanathan 

-Original Message-
From: amd-gfx  On Behalf Of David Francis
Sent: Friday, August 25, 2023 3:14 PM
To: amd-gfx@lists.freedesktop.org
Cc: Francis, David ; Kuehling, Felix 

Subject: [PATCH] drm/amdkfd: Checkpoint and restore queues on GFX11

The code in kfd_mqd_manager_v11.c to support criu dump and
restore of queue state was missing.

Added it; should be equivalent to kfd_mqd_manager_v10.c.

CC: Felix Kuehling 
Signed-off-by: David Francis 
---
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  | 41 +++
 1 file changed, 41 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
index 2319467d2d95..2a79d37da95d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
@@ -321,6 +321,43 @@ static int get_wave_state(struct mqd_manager *mm, void 
*mqd,
return 0;
 }

+static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, 
void *ctl_stack_dst)
+{
+   struct v11_compute_mqd *m;
+
+   m = get_mqd(mqd);
+
+   memcpy(mqd_dst, m, sizeof(struct v11_compute_mqd));
+}
+
+static void restore_mqd(struct mqd_manager *mm, void **mqd,
+   struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+   struct queue_properties *qp,
+   const void *mqd_src,
+   const void *ctl_stack_src, const u32 ctl_stack_size)
+{
+   uint64_t addr;
+   struct v11_compute_mqd *m;
+
+   m = (struct v11_compute_mqd *) mqd_mem_obj->cpu_ptr;
+   addr = mqd_mem_obj->gpu_addr;
+
+   memcpy(m, mqd_src, sizeof(*m));
+
+   *mqd = m;
+   if (gart_addr)
+   *gart_addr = addr;
+
+   m->cp_hqd_pq_doorbell_control =
+   qp->doorbell_off <<
+   CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+   pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+   m->cp_hqd_pq_doorbell_control);
+
+   qp->is_active = 0;
+}
+
+
 static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -457,6 +494,8 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE 
type,
mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct v11_compute_mqd);
mqd->get_wave_state = get_wave_state;
+   mqd->checkpoint_mqd = checkpoint_mqd;
+   mqd->restore_mqd = restore_mqd;
 #if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
 #endif
@@ -500,6 +539,8 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE 
type,
mqd->update_mqd = update_mqd_sdma;
mqd->destroy_mqd = kfd_destroy_mqd_sdma;
mqd->is_occupied = kfd_is_occupied_sdma;
+   mqd->checkpoint_mqd = checkpoint_mqd;
+   mqd->restore_mqd = restore_mqd;
mqd->mqd_size = sizeof(struct v11_sdma_mqd);
 #if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
--
2.34.1

Re: [PATCH v2 19/34] drm/amd/display: decouple steps for mapping CRTC degamma to DC plane

2023-08-28 Thread Melissa Wen

On 08/28, Pekka Paalanen wrote:
> On Mon, 28 Aug 2023 09:45:44 +0100
> Joshua Ashton  wrote:
> 
> > Degamma has always been on the plane on AMD. CRTC DEGAMMA_LUT has actually
> > just been applying it to every plane pre-blend.
> 
> I've never seen that documented anywhere.
> 
> It has seemed obvious, that since we have KMS objects for planes and
> CRTCs, stuff on the CRTC does not do plane stuff before blending. That
> also has not been documented in the past, but it seemed the most
> logical choice.
> 
> Even today
> https://dri.freedesktop.org/docs/drm/gpu/drm-kms.html#color-management-properties
> make no mention of whether they apply before or after blending.

It's mentioned in the next section:
https://dri.freedesktop.org/docs/drm/gpu/amdgpu/display/display-manager.html#dc-color-capabilities-between-dcn-generations
In hindsight, maybe it isn't the best place...

> 
> > Degamma makes no sense after blending anyway.
> 
> If the goal is to allow blending in optical or other space, you are
> correct. However, APIs do not need to make sense to exist, like most of
> the options of "Colorspace" connector property.
> 
> I have always thought the CRTC DEGAMMA only exists to allow the CRTC
> CTM to work in linear or other space.
> 
> I have at times been puzzled by what the DEGAMMA and CTM are actually
> good for.
> 
> > The entire point is for it to happen before blending to blend in linear
> > space. Otherwise DEGAMMA_LUT and REGAMMA_LUT are the exact same thing...
> 
> The CRTC CTM is between CRTC DEGAMMA and CRTC GAMMA, meaning they are
> not interchangeable.
> 
> I have literally believed that DRM KMS UAPI simply does not support
> blending in optical space, unless your framebuffers are in optical
> which no-one does, until the color management properties are added to
> KMS planes. This never even seemed weird, because non-linear blending
> is so common.
> 
> So I have been misunderstanding the CRTC DEGAMMA property forever. Am I
> the only one? Do all drivers agree today at what point does CRTC
> DEGAMMA apply, before blending on all planes or after blending?
> 

I'd like to know current userspace cases on Linux of this CRTC DEGAMMA
LUT.

> Does anyone know of any doc about that?

From what I retrieved about the introduction of CRTC color props[1], it
seems the main concern at that point was getting a linear space for
CTM[2] and CRTC degamma property seems to have followed intel
requirements, but didn't find anything about the blending space.

AFAIU, we have just interpreted that all CRTC color properties for DRM
interface are after blending[3]. Can this be seen in another way?

[1] https://patchwork.freedesktop.org/series/2720/
[2] https://codereview.chromium.org/1182063002
[3] https://dri.freedesktop.org/docs/drm/_images/dcn3_cm_drm_current.svg

> 
> If drivers do not agree on the behaviour of a KMS property, then that
> property is useless for generic userspace.
> 
> 
> Thanks,
> pq
> 
> 
> > On Tuesday, 22 August 2023, Pekka Paalanen 
> > wrote:
> > > On Thu, 10 Aug 2023 15:02:59 -0100
> > > Melissa Wen  wrote:
> > >  
> > >> The next patch adds pre-blending degamma to AMD color mgmt pipeline, but
> > >> pre-blending degamma caps (DPP) is currently in use to provide DRM CRTC
> > >> atomic degamma or implict degamma on legacy gamma. Detach degamma usage
> > >> regarging CRTC color properties to manage plane and CRTC color
> > >> correction combinations.
> > >>
> > >> Reviewed-by: Harry Wentland 
> > >> Signed-off-by: Melissa Wen 
> > >> ---
> > >>  .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 59 +--
> > >>  1 file changed, 41 insertions(+), 18 deletions(-)
> > >>
> > >> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c  
> > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
> > >> index 68e9f2c62f2e..74eb02655d96 100644
> > >> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
> > >> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
> > >> @@ -764,20 +764,9 @@ int amdgpu_dm_update_crtc_color_mgmt(struct  
> > dm_crtc_state *crtc)
> > >>   return 0;
> > >>  }
> > >>
> > >> -/**
> > >> - * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC  
> > plane.
> > >> - * @crtc: amdgpu_dm crtc state
> > >> - * @dc_plane_state: target DC surface
> > >> - *
> > >> - * Update the underlying dc_stream_state's input transfer function  
> > (ITF) in
> > >> - * preparation for hardware commit. The transfer function used depends  
> > on
> > >> - * the preparation done on the stream for color management.
> > >> - *
> > >> - * Returns:
> > >> - * 0 on success. -ENOMEM if mem allocation fails.
> > >> - */
> > >> -int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
> > >> -   struct dc_plane_state  
> > *dc_plane_state)
> > >> +static int
> > >> +map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc,
> > >> +  struct dc_plane_state *dc_plane_state)
> > >>  {
> > >>

Re: [PATCH] drm/amdkfd: Add missing gfx11 MQD manager callbacks

2023-08-28 Thread Deucher, Alexander

[AMD Official Use Only - General]

Acked-by: Alex Deucher 

From: amd-gfx  on behalf of Harish 
Kasiviswanathan 
Sent: Friday, August 25, 2023 5:30 PM
To: amd-gfx@lists.freedesktop.org 
Cc: Cornwall, Jay ; Kasiviswanathan, Harish 

Subject: [PATCH] drm/amdkfd: Add missing gfx11 MQD manager callbacks

From: Jay Cornwall 

mqd_stride function was introduced in commit 129c7b6a0217
("drm/amdkfd: Update MQD management on multi XCC setup")
but not assigned for gfx11. Fixes a NULL dereference in debugfs.

Signed-off-by: Jay Cornwall 
Signed-off-by: Harish Kasiviswanathan 
---
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
index 2319467d2d95..0bbf0edbabd4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
@@ -457,6 +457,7 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE 
type,
 mqd->is_occupied = kfd_is_occupied_cp;
 mqd->mqd_size = sizeof(struct v11_compute_mqd);
 mqd->get_wave_state = get_wave_state;
+   mqd->mqd_stride = kfd_mqd_stride;
 #if defined(CONFIG_DEBUG_FS)
 mqd->debugfs_show_mqd = debugfs_show_mqd;
 #endif
@@ -472,6 +473,7 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE 
type,
 mqd->destroy_mqd = destroy_hiq_mqd;
 mqd->is_occupied = kfd_is_occupied_cp;
 mqd->mqd_size = sizeof(struct v11_compute_mqd);
+   mqd->mqd_stride = kfd_mqd_stride;
 #if defined(CONFIG_DEBUG_FS)
 mqd->debugfs_show_mqd = debugfs_show_mqd;
 #endif
@@ -501,6 +503,7 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE 
type,
 mqd->destroy_mqd = kfd_destroy_mqd_sdma;
 mqd->is_occupied = kfd_is_occupied_sdma;
 mqd->mqd_size = sizeof(struct v11_sdma_mqd);
+   mqd->mqd_stride = kfd_mqd_stride;
 #if defined(CONFIG_DEBUG_FS)
 mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
 #endif
--
2.34.1

Re: [PATCH v2 31/34] drm/amd/display: set stream gamut remap matrix to MPC for DCN301

2023-08-28 Thread Pekka Paalanen

On Fri, 25 Aug 2023 13:37:08 -0100
Melissa Wen  wrote:

> On 08/22, Pekka Paalanen wrote:
> > On Thu, 10 Aug 2023 15:03:11 -0100
> > Melissa Wen  wrote:
> >   
> > > dc->caps.color.mpc.gamut_remap says there is a post-blending color block
> > > for gamut remap matrix for DCN3 HW family and newer versions. However,
> > > those drivers still follow DCN10 programming that remap stream
> > > gamut_remap_matrix to DPP (pre-blending).  
> > 
> > That's ok only as long as CRTC degamma is pass-through. Blending itself
> > is a linear operation, so it doesn't matter if a matrix is applied to
> > the blending result or to all blending inputs. But you cannot move a
> > matrix operation to the other side of a non-linear operation, and you
> > cannot move a non-linear operation across blending.  
> 
> Oh, I'm not moving it, what I'm doing here is the opposite and fixing
> it. This patch puts each pre- and post-blending CTM in their right
> place, since we have the HW caps for it on DCN3+... Or are you just
> pointing out the implementation mistake on old driver versions?

It's just the old mistake.

I hope no-one complains, forcing you to revert this fix as a regression.


Thanks,
pq


> > > To enable pre-blending and post-blending gamut_remap matrix supports at
> > > the same time, set stream gamut_remap to MPC and plane gamut_remap to
> > > DPP for DCN301 that support both.
> > > 
> > > It was tested using IGT KMS color tests for DRM CRTC CTM property and it
> > > preserves test results.
> > > 
> > > Signed-off-by: Melissa Wen 
> > > ---
> > >  .../drm/amd/display/dc/dcn30/dcn30_hwseq.c| 37 +++
> > >  .../drm/amd/display/dc/dcn30/dcn30_hwseq.h|  3 ++
> > >  .../drm/amd/display/dc/dcn301/dcn301_init.c   |  2 +-
> > >  3 files changed, 41 insertions(+), 1 deletion(-)


pgp46GyGJt3wv.pgp
Description: OpenPGP digital signature

Re: [PATCH v2 19/34] drm/amd/display: decouple steps for mapping CRTC degamma to DC plane

2023-08-28 Thread Pekka Paalanen

On Mon, 28 Aug 2023 09:45:44 +0100
Joshua Ashton  wrote:

> Degamma has always been on the plane on AMD. CRTC DEGAMMA_LUT has actually
> just been applying it to every plane pre-blend.

I've never seen that documented anywhere.

It has seemed obvious, that since we have KMS objects for planes and
CRTCs, stuff on the CRTC does not do plane stuff before blending. That
also has not been documented in the past, but it seemed the most
logical choice.

Even today
https://dri.freedesktop.org/docs/drm/gpu/drm-kms.html#color-management-properties
make no mention of whether they apply before or after blending.

> Degamma makes no sense after blending anyway.

If the goal is to allow blending in optical or other space, you are
correct. However, APIs do not need to make sense to exist, like most of
the options of "Colorspace" connector property.

I have always thought the CRTC DEGAMMA only exists to allow the CRTC
CTM to work in linear or other space.

I have at times been puzzled by what the DEGAMMA and CTM are actually
good for.

> The entire point is for it to happen before blending to blend in linear
> space. Otherwise DEGAMMA_LUT and REGAMMA_LUT are the exact same thing...

The CRTC CTM is between CRTC DEGAMMA and CRTC GAMMA, meaning they are
not interchangeable.

I have literally believed that DRM KMS UAPI simply does not support
blending in optical space, unless your framebuffers are in optical
which no-one does, until the color management properties are added to
KMS planes. This never even seemed weird, because non-linear blending
is so common.

So I have been misunderstanding the CRTC DEGAMMA property forever. Am I
the only one? Do all drivers agree today at what point does CRTC
DEGAMMA apply, before blending on all planes or after blending?

Does anyone know of any doc about that?

If drivers do not agree on the behaviour of a KMS property, then that
property is useless for generic userspace.

Thanks,
pq

> On Tuesday, 22 August 2023, Pekka Paalanen 
> wrote:
> > On Thu, 10 Aug 2023 15:02:59 -0100
> > Melissa Wen  wrote:
> >  
> >> The next patch adds pre-blending degamma to AMD color mgmt pipeline, but
> >> pre-blending degamma caps (DPP) is currently in use to provide DRM CRTC
> >> atomic degamma or implict degamma on legacy gamma. Detach degamma usage
> >> regarging CRTC color properties to manage plane and CRTC color
> >> correction combinations.
> >>
> >> Reviewed-by: Harry Wentland 
> >> Signed-off-by: Melissa Wen 
> >> ---
> >>  .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 59 +--
> >>  1 file changed, 41 insertions(+), 18 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c  
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
> >> index 68e9f2c62f2e..74eb02655d96 100644
> >> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
> >> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
> >> @@ -764,20 +764,9 @@ int amdgpu_dm_update_crtc_color_mgmt(struct  
> dm_crtc_state *crtc)
> >>   return 0;
> >>  }
> >>
> >> -/**
> >> - * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC  
> plane.
> >> - * @crtc: amdgpu_dm crtc state
> >> - * @dc_plane_state: target DC surface
> >> - *
> >> - * Update the underlying dc_stream_state's input transfer function  
> (ITF) in
> >> - * preparation for hardware commit. The transfer function used depends  
> on
> >> - * the preparation done on the stream for color management.
> >> - *
> >> - * Returns:
> >> - * 0 on success. -ENOMEM if mem allocation fails.
> >> - */
> >> -int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
> >> -   struct dc_plane_state  
> *dc_plane_state)
> >> +static int
> >> +map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc,
> >> +  struct dc_plane_state *dc_plane_state)
> >>  {
> >>   const struct drm_color_lut *degamma_lut;
> >>   enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB;
> >> @@ -800,8 +789,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct  
> dm_crtc_state *crtc,
> >>_size);
> >>   ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES);
> >>
> >> - dc_plane_state->in_transfer_func->type =
> >> - TF_TYPE_DISTRIBUTED_POINTS;
> >> + dc_plane_state->in_transfer_func->type =  
> TF_TYPE_DISTRIBUTED_POINTS;
> >>
> >>   /*
> >>* This case isn't fully correct, but also fairly
> >> @@ -837,7 +825,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct  
> dm_crtc_state *crtc,
> >>  degamma_lut, degamma_size);
> >>   if (r)
> >>   return r;
> >> - } else if (crtc->cm_is_degamma_srgb) {
> >> + } else {
> >>   /*
> >>* For legacy gamma support we need the regamma input
> >>* in linear space. Assume that the input is

Re: [PATCH v2 19/34] drm/amd/display: decouple steps for mapping CRTC degamma to DC plane

2023-08-28 Thread Pekka Paalanen

On Fri, 25 Aug 2023 13:29:44 -0100
Melissa Wen  wrote:

> On 08/22, Pekka Paalanen wrote:
> > On Thu, 10 Aug 2023 15:02:59 -0100
> > Melissa Wen  wrote:
> >   
> > > The next patch adds pre-blending degamma to AMD color mgmt pipeline, but
> > > pre-blending degamma caps (DPP) is currently in use to provide DRM CRTC
> > > atomic degamma or implict degamma on legacy gamma. Detach degamma usage
> > > regarging CRTC color properties to manage plane and CRTC color
> > > correction combinations.
> > > 
> > > Reviewed-by: Harry Wentland 
> > > Signed-off-by: Melissa Wen 
> > > ---
> > >  .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 59 +--
> > >  1 file changed, 41 insertions(+), 18 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
> > > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
> > > index 68e9f2c62f2e..74eb02655d96 100644
> > > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
> > > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
> > > @@ -764,20 +764,9 @@ int amdgpu_dm_update_crtc_color_mgmt(struct 
> > > dm_crtc_state *crtc)
> > >   return 0;
> > >  }
> > >  
> > > -/**
> > > - * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC 
> > > plane.
> > > - * @crtc: amdgpu_dm crtc state
> > > - * @dc_plane_state: target DC surface
> > > - *
> > > - * Update the underlying dc_stream_state's input transfer function (ITF) 
> > > in
> > > - * preparation for hardware commit. The transfer function used depends on
> > > - * the preparation done on the stream for color management.
> > > - *
> > > - * Returns:
> > > - * 0 on success. -ENOMEM if mem allocation fails.
> > > - */
> > > -int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
> > > -   struct dc_plane_state *dc_plane_state)
> > > +static int
> > > +map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc,
> > > +  struct dc_plane_state *dc_plane_state)
> > >  {
> > >   const struct drm_color_lut *degamma_lut;
> > >   enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB;
> > > @@ -800,8 +789,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct 
> > > dm_crtc_state *crtc,
> > >_size);
> > >   ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES);
> > >  
> > > - dc_plane_state->in_transfer_func->type =
> > > - TF_TYPE_DISTRIBUTED_POINTS;
> > > + dc_plane_state->in_transfer_func->type = 
> > > TF_TYPE_DISTRIBUTED_POINTS;
> > >  
> > >   /*
> > >* This case isn't fully correct, but also fairly
> > > @@ -837,7 +825,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct 
> > > dm_crtc_state *crtc,
> > >  degamma_lut, degamma_size);
> > >   if (r)
> > >   return r;
> > > - } else if (crtc->cm_is_degamma_srgb) {
> > > + } else {
> > >   /*
> > >* For legacy gamma support we need the regamma input
> > >* in linear space. Assume that the input is sRGB.
> > > @@ -847,8 +835,43 @@ int amdgpu_dm_update_plane_color_mgmt(struct 
> > > dm_crtc_state *crtc,
> > >  
> > >   if (tf != TRANSFER_FUNCTION_SRGB &&
> > >   !mod_color_calculate_degamma_params(NULL,
> > > - dc_plane_state->in_transfer_func, NULL, false))
> > > + 
> > > dc_plane_state->in_transfer_func,
> > > + NULL, false))
> > >   return -ENOMEM;
> > > + }
> > > +
> > > + return 0;
> > > +}
> > > +
> > > +/**
> > > + * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC 
> > > plane.
> > > + * @crtc: amdgpu_dm crtc state
> > > + * @dc_plane_state: target DC surface
> > > + *
> > > + * Update the underlying dc_stream_state's input transfer function (ITF) 
> > > in
> > > + * preparation for hardware commit. The transfer function used depends on
> > > + * the preparation done on the stream for color management.
> > > + *
> > > + * Returns:
> > > + * 0 on success. -ENOMEM if mem allocation fails.
> > > + */
> > > +int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
> > > +   struct dc_plane_state *dc_plane_state)
> > > +{
> > > + bool has_crtc_cm_degamma;
> > > + int ret;
> > > +
> > > + has_crtc_cm_degamma = (crtc->cm_has_degamma || 
> > > crtc->cm_is_degamma_srgb);
> > > + if (has_crtc_cm_degamma){
> > > + /* AMD HW doesn't have post-blending degamma caps. When DRM
> > > +  * CRTC atomic degamma is set, we maps it to DPP degamma block
> > > +  * (pre-blending) or, on legacy gamma, we use DPP degamma to
> > > +  * linearize (implicit degamma) from sRGB/BT709 according to
> > > +  * the input space.  
> > 
> > Uhh, you can't just move degamma before blending if KMS userspace
> > wants it after blending. That would be

RE: [PATCH 00/21] DC Patches August 23, 2023

2023-08-28 Thread Wheeler, Daniel

[Public]

Hi all,

This week this patchset was tested on the following systems:
* Lenovo ThinkBook T13s Gen4 with AMD Ryzen 5 6600U
* MSI Gaming X Trio RX 6800
* Gigabyte Gaming OC RX 7900 XTX

These systems were tested on the following display/connection types:
* eDP, (1080p 60hz [5650U]) (1920x1200 60hz [6600U]) (2560x1600 
120hz[6600U])
* VGA and DVI (1680x1050 60hz [DP to VGA/DVI, USB-C to VGA/DVI])
* DP/HDMI/USB-C (1440p 170hz, 4k 60hz, 4k 144hz, 4k 240hz [Includes 
USB-C to DP/HDMI adapters])
* Thunderbolt (LG Ultrafine 5k)
* MST (Startech MST14DP123DP [DP to 3x DP] and 2x 4k 60Hz displays)
* DSC (with Cable Matters 101075 [DP to 3x DP] with 3x 4k60 displays, 
and HP Hook G2 with 1 4k60 display)
* USB 4 (Kensington SD5700T and 1x 4k 60Hz display)
* PCON (Club3D CAC-1085 and 1x 4k 144Hz display [at 4k 120HZ, as that 
is the max the adapter supports])

The testing is a mix of automated and manual tests. Manual testing includes 
(but is not limited to):
* Changing display configurations and settings
* Benchmark testing
* Feature testing (Freesync, etc.)

Automated testing includes (but is not limited to):
* Script testing (scripts to automate some of the manual checks)
* IGT testing

The patchset consists of the amd-staging-drm-next branch (Head commit - 
1d35782cce09 drm/amdgpu: Fix the return for gpu mode1_reset) with new patches 
added on top of it.

Tested on Ubuntu 22.04.3, on Wayland and X11, using Gnome for both.

Tested-by: Daniel Wheeler 


Thank you,

Dan Wheeler
Sr. Technologist | AMD
SW Display
--
1 Commerce Valley Dr E, Thornhill, ON L3T 7X6
amd.com

-Original Message-
From: Mahfooz, Hamza 
Sent: Wednesday, August 23, 2023 11:58 AM
To: amd-gfx@lists.freedesktop.org
Cc: Wentland, Harry ; Li, Sun peng (Leo) 
; Lakha, Bhawanpreet ; Siqueira, 
Rodrigo ; Pillai, Aurabindo 
; Zhuo, Qingqing (Lillian) ; 
Li, Roman ; Lin, Wayne ; Wang, Chao-kai 
(Stylon) ; Chiu, Solomon ; Kotarac, 
Pavle ; Gutierrez, Agustin ; 
Zuo, Jerry ; Mahfooz, Hamza ; 
Wheeler, Daniel 
Subject: [PATCH 00/21] DC Patches August 23, 2023

This DC patch-set brings improvements in multiple areas. In summary, we
highlight:

* DCN315 fixes
* DCN31 fixes
* DPIA fixes
* Dump the pipe topology when it updates
* Misc code cleanups
* New debugfs interface to query the current ODM combine configuration
* ODM fixes
* Potential deadlock while waiting for MPC idle fix
* Support for windowed MPO ODM

Cc: Daniel Wheeler 

Aurabindo Pillai (2):
  drm/amd/display: Fix incorrect comment
  drm/amd/display: Add debugfs interface for ODM combine info

Charlene Liu (1):
  drm/amd/display: correct z8_watermark 16bit to 20bit mask

Dillon Varone (1):
  drm/amd/display: Skip dmub memory flush when not needed

Ethan Bitnun (1):
  drm/amd/display: Add support for 1080p SubVP to reduce idle power

Fudong Wang (1):
  drm/amd/display: Add smu write msg id fail retry process

Gabe Teeger (1):
  drm/amd/display: Remove wait while locked

Martin Leung (1):
  drm/amd/display: 3.2.249

Mustapha Ghaddar (1):
  drm/amd/display: Add DPIA Link Encoder Assignment Fix

Wenjing Liu (12):
  Partially revert "drm/amd/display: update add plane to context logic
with a new algorithm"
  drm/amd/display: update blank state on ODM changes
  drm/amd/display: always switch off ODM before committing more streams
  drm/amd/display: add comments to add plane functions
  drm/amd/display: rename function to add otg master for stream
  drm/amd/display: add new resource interface for acquiring sec opp
heads and release pipe
  drm/amd/display: add new resource interfaces to update odm mpc slice
count
  drm/amd/display: add more pipe resource interfaces
  drm/amd/display: use new pipe allocation interface in dcn32 fpu
  drm/amd/display: switch to new ODM policy for windowed MPO ODM support
  drm/amd/display: add pipe topology update log
  drm/amd/display: fix pipe topology logging error

 .../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c |   36 +-
 drivers/gpu/drm/amd/display/dc/Makefile   |1 +
 .../display/dc/clk_mgr/dcn315/dcn315_smu.c|   20 +-
 drivers/gpu/drm/amd/display/dc/core/dc.c  |   68 +-
 .../drm/amd/display/dc/core/dc_link_enc_cfg.c |   35 +-
 .../gpu/drm/amd/display/dc/core/dc_resource.c | 1473 +
 .../gpu/drm/amd/display/dc/core/dc_stream.c   |2 +-
 drivers/gpu/drm/amd/display/dc/dc.h   |3 +-
 .../drm/amd/display/dc/dcn20/dcn20_hwseq.c|   59 +-
 .../drm/amd/display/dc/dcn20/dcn20_resource.c |   22 +-
 .../drm/amd/display/dc/dcn20/dcn20_resource.h |4 +-
 .../amd/display/dc/dcn201/dcn201_resource.c   |1 +
 .../drm/amd/display/dc/dcn21/dcn21_resource.c |1 +
 .../drm/amd/display/dc/dcn30/dcn30_resource.c |1 +
 .../amd/display/dc/dcn301/dcn301_resource.c   |

[PATCH v3 7/7] Revert "drm/amd/amdgpu: switch on/off vcn power profile mode"

2023-08-28 Thread Arvind Yadav

This reverts commit 5ce71f59bb9bd3d8a09b96afdbc92975cb6dc303.

Reason for revert: New  amdgpu_workload_profile* api is added
to switch on/off profile mode. These new api will allow to
change the GPU power profile based on a submitted job.

Cc: Christian Koenig 
Cc: Alex Deucher 
Acked-by: Shashank Sharma 
Signed-off-by: Arvind Yadav 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 14 ++
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 2d94f1b63bd6..70777fcfa626 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -363,7 +363,6 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct 
*work)
container_of(work, struct amdgpu_device, vcn.idle_work.work);
unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0};
unsigned int i, j;
-   int r = 0;
 
for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
if (adev->vcn.harvest_config & (1 << j))
@@ -392,10 +391,6 @@ static void amdgpu_vcn_idle_work_handler(struct 
work_struct *work)
if (!fences && !atomic_read(>vcn.total_submission_cnt)) {
amdgpu_device_ip_set_powergating_state(adev, 
AMD_IP_BLOCK_TYPE_VCN,
   AMD_PG_STATE_GATE);
-   r = amdgpu_dpm_switch_power_profile(adev, 
PP_SMC_POWER_PROFILE_VIDEO,
-   false);
-   if (r)
-   dev_warn(adev->dev, "(%d) failed to disable video power 
profile mode\n", r);
} else {
schedule_delayed_work(>vcn.idle_work, VCN_IDLE_TIMEOUT);
}
@@ -404,16 +399,11 @@ static void amdgpu_vcn_idle_work_handler(struct 
work_struct *work)
 void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
 {
struct amdgpu_device *adev = ring->adev;
-   int r = 0;
 
atomic_inc(>vcn.total_submission_cnt);
 
-   if (!cancel_delayed_work_sync(>vcn.idle_work)) {
-   r = amdgpu_dpm_switch_power_profile(adev, 
PP_SMC_POWER_PROFILE_VIDEO,
-   true);
-   if (r)
-   dev_warn(adev->dev, "(%d) failed to switch to video 
power profile mode\n", r);
-   }
+   if (!cancel_delayed_work_sync(>vcn.idle_work))
+   amdgpu_gfx_off_ctrl(adev, false);
 
mutex_lock(>vcn.vcn_pg_lock);
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
-- 
2.34.1

[PATCH v3 6/7] drm/amdgpu: switch workload context to/from compute

2023-08-28 Thread Arvind Yadav

This patch switches the GPU workload mode to/from
compute mode, while submitting compute workload.

v3:
- Addressed the review comment about changing the
  function name from *_set() to *_get().

Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
Reviewed-by: Shashank Sharma 
Signed-off-by: Arvind Yadav 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 0385f7f69278..fa939ac17120 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -713,9 +713,11 @@ void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device 
*adev, bool idle)
pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled");
amdgpu_gfx_off_ctrl(adev, idle);
}
-   amdgpu_dpm_switch_power_profile(adev,
-   PP_SMC_POWER_PROFILE_COMPUTE,
-   !idle);
+
+   if (idle)
+   amdgpu_workload_profile_put(adev, AMDGPU_RING_TYPE_COMPUTE);
+   else
+   amdgpu_workload_profile_get(adev, AMDGPU_RING_TYPE_COMPUTE);
 }
 
 bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
-- 
2.34.1

[PATCH v3 5/7] drm/amdgpu: Set/Reset GPU workload profile

2023-08-28 Thread Arvind Yadav

This patch is to switch the GPU workload profile based
on the submitted job. The workload profile is reset to
default when the job is done.

v3:
- Addressed the review comment about changing the function
  name from *_set() to *_get().

Cc: Christian Koenig 
Cc: Alex Deucher 
Reviewed-by: Shashank Sharma 
Signed-off-by: Arvind Yadav 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index c3d9d75143f4..c5032762d497 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -176,6 +176,9 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)
 static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
 {
struct amdgpu_job *job = to_amdgpu_job(s_job);
+   struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
+
+   amdgpu_workload_profile_put(ring->adev, ring->funcs->type);
 
drm_sched_job_cleanup(s_job);
 
@@ -295,6 +298,8 @@ static struct dma_fence *amdgpu_job_run(struct 
drm_sched_job *sched_job)
DRM_ERROR("Error scheduling IBs (%d)\n", r);
}
 
+   amdgpu_workload_profile_get(adev, ring->funcs->type);
+
job->job_run_counter++;
amdgpu_job_free_resources(job);
 
-- 
2.34.1

[PATCH v3 4/7] drm/amdgpu: Add suspend function to clear the GPU power profile.

2023-08-28 Thread Arvind Yadav

This patch adds a suspend function that will clear the GPU
power profile before going into suspend state.

v2:
- Add the new suspend function based on review comment.

v3:
- Adressed the review comment.
- Now clearing all the profile in work handler.

Cc: Shashank Sharma 
Cc: Christian Koenig 
Cc: Alex Deucher 
Signed-off-by: Arvind Yadav 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c  | 7 +++
 drivers/gpu/drm/amd/include/amdgpu_workload.h | 2 ++
 3 files changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index cd3bf641b630..3b70e657b439 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4212,6 +4212,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool 
fbcon)
 
amdgpu_ras_suspend(adev);
 
+   amdgpu_workload_profile_suspend(adev);
+
amdgpu_device_ip_suspend_phase1(adev);
 
if (!adev->in_s0ix)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
index fbe86ee5b8bf..0bab274a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
@@ -197,6 +197,13 @@ void amdgpu_workload_profile_get(struct amdgpu_device 
*adev,
mutex_unlock(>workload_lock);
 }
 
+void amdgpu_workload_profile_suspend(struct amdgpu_device *adev)
+{
+   struct amdgpu_smu_workload *workload = >smu_workload;
+
+   amdgpu_power_profile_clear_all(adev, workload);
+}
+
 void amdgpu_workload_profile_init(struct amdgpu_device *adev)
 {
adev->smu_workload.adev = adev;
diff --git a/drivers/gpu/drm/amd/include/amdgpu_workload.h 
b/drivers/gpu/drm/amd/include/amdgpu_workload.h
index 596a962800e9..34b96a5e7b50 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_workload.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_workload.h
@@ -52,6 +52,8 @@ void amdgpu_workload_profile_put(struct amdgpu_device *adev,
 void amdgpu_workload_profile_get(struct amdgpu_device *adev,
 uint32_t ring_type);
 
+void amdgpu_workload_profile_suspend(struct amdgpu_device *adev);
+
 void amdgpu_workload_profile_init(struct amdgpu_device *adev);
 
 void amdgpu_workload_profile_fini(struct amdgpu_device *adev);
-- 
2.34.1

[PATCH v3 3/7] drm/amdgpu: Add new function to put GPU power profile

2023-08-28 Thread Arvind Yadav

This patch adds a function which will clear the GPU
power profile after job finished.

This is how it works:
- schedular will set the GPU power profile based on ring_type.
- Schedular will clear the GPU Power profile once job finished.
- Here, the *_workload_profile_set function will set the GPU
  power profile and the *_workload_profile_put function will
  schedule the smu_delayed_work task after 100ms delay. This
  smu_delayed_work task will clear a GPU power profile if any
  new jobs are not scheduled within 100 ms. But if any new job
  comes within 100ms then the *_workload_profile_set function
  will cancel this work and set the GPU power profile based on
  preferences.

v2:
- Splitting workload_profile_set and workload_profile_put
  into two separate patches.
- Addressed review comment.

v3:
- Adressed all the review comment.
- Now clearing all the profile in work handler.
- Added *_clear_all function to clear all the power profile.
- scheduling delay work to clear the power profile when refcount
  becomes zero.

Cc: Shashank Sharma 
Cc: Christian Koenig 
Cc: Alex Deucher 
Signed-off-by: Arvind Yadav 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c  | 118 +-
 drivers/gpu/drm/amd/include/amdgpu_workload.h |   3 +
 2 files changed, 120 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
index 67eacaac6c9b..fbe86ee5b8bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
@@ -24,6 +24,9 @@
 
 #include "amdgpu.h"
 
+/* 100 millsecond timeout */
+#define SMU_IDLE_TIMEOUT   msecs_to_jiffies(100)
+
 static enum PP_SMC_POWER_PROFILE
 ring_to_power_profile(uint32_t ring_type)
 {
@@ -58,16 +61,111 @@ amdgpu_power_profile_set(struct amdgpu_device *adev,
return ret;
 }
 
+static int
+amdgpu_power_profile_clear(struct amdgpu_device *adev,
+  enum PP_SMC_POWER_PROFILE profile)
+{
+   int ret = amdgpu_dpm_switch_power_profile(adev, profile, false);
+
+   if (!ret) {
+   /* Clear the bit for the submitted workload profile */
+   clear_bit(profile, >smu_workload.submit_workload_status);
+   }
+
+   return ret;
+}
+
+static void
+amdgpu_power_profile_clear_all(struct amdgpu_device *adev,
+  struct amdgpu_smu_workload *workload)
+{
+   int ret;
+   int profile = PP_SMC_POWER_PROFILE_COMPUTE;
+
+   cancel_delayed_work_sync(>power_profile_work);
+   mutex_lock(>workload_lock);
+
+   /* Clear all the GPU power profile*/
+   for (; profile > 0; profile--) {
+   atomic_set(>power_profile_ref[profile], 0);
+   ret = amdgpu_power_profile_clear(adev, profile);
+   if (ret) {
+   DRM_WARN("Failed to clear workload %s,error = %d\n",
+amdgpu_workload_mode_name[profile], ret);
+   }
+   }
+
+   workload->submit_workload_status = 0;
+   mutex_unlock(>workload_lock);
+}
+
+static void
+amdgpu_power_profile_idle_work_handler(struct work_struct *work)
+{
+
+   struct amdgpu_smu_workload *workload = container_of(work,
+ struct 
amdgpu_smu_workload,
+ power_profile_work.work);
+   struct amdgpu_device *adev = workload->adev;
+   int ret;
+   int profile;
+
+   mutex_lock(>workload_lock);
+
+   /* Clear all the GPU power profile*/
+   for_each_set_bit(profile, >submit_workload_status,
+PP_SMC_POWER_PROFILE_CUSTOM) {
+   if (!atomic_read(>power_profile_ref[profile])) {
+   ret = amdgpu_power_profile_clear(adev, profile);
+   if (ret) {
+   DRM_WARN("Failed to clear workload %s,error = 
%d\n",
+amdgpu_workload_mode_name[profile], 
ret);
+   }
+   }
+   }
+
+   mutex_unlock(>workload_lock);
+}
+
+void amdgpu_workload_profile_put(struct amdgpu_device *adev,
+uint32_t ring_type)
+{
+   struct amdgpu_smu_workload *workload = >smu_workload;
+   enum PP_SMC_POWER_PROFILE profile = ring_to_power_profile(ring_type);
+   int refcount;
+
+   if (profile == PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT)
+   return;
+
+   mutex_lock(>workload_lock);
+
+   refcount = atomic_read(>power_profile_ref[profile]);
+   if (!refcount) {
+   DRM_WARN("Power profile %s ref. count error\n",
+amdgpu_workload_mode_name[profile]);
+   } else {
+   if (refcount == 1)
+   schedule_delayed_work(>power_profile_work,
+ SMU_IDLE_TIMEOUT);
+
+   atomic_dec(>power_profile_ref[profile]);

[PATCH v3 2/7] drm/amdgpu: Add new function to set GPU power profile

2023-08-28 Thread Arvind Yadav

This patch adds a function which will change the GPU
power profile based on a submitted job. This can optimize
the power performance when the workload is on.

v2:
- Splitting workload_profile_set and workload_profile_put
  into two separate patches.
- Addressed review comment.

v3:
- Adressed all the review comment.
- Changing the function name from *_set() to *_get().
- Now setting a power profile when refcount is zero.

Cc: Shashank Sharma 
Cc: Christian Koenig 
Cc: Alex Deucher 
Signed-off-by: Arvind Yadav 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c  | 59 +++
 drivers/gpu/drm/amd/include/amdgpu_workload.h |  3 +
 2 files changed, 62 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
index 32166f482f77..67eacaac6c9b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
@@ -24,6 +24,65 @@
 
 #include "amdgpu.h"
 
+static enum PP_SMC_POWER_PROFILE
+ring_to_power_profile(uint32_t ring_type)
+{
+   switch (ring_type) {
+   case AMDGPU_RING_TYPE_GFX:
+   return PP_SMC_POWER_PROFILE_FULLSCREEN3D;
+   case AMDGPU_RING_TYPE_COMPUTE:
+   return PP_SMC_POWER_PROFILE_COMPUTE;
+   case AMDGPU_RING_TYPE_UVD:
+   case AMDGPU_RING_TYPE_VCE:
+   case AMDGPU_RING_TYPE_UVD_ENC:
+   case AMDGPU_RING_TYPE_VCN_DEC:
+   case AMDGPU_RING_TYPE_VCN_ENC:
+   case AMDGPU_RING_TYPE_VCN_JPEG:
+   return PP_SMC_POWER_PROFILE_VIDEO;
+   default:
+   return PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
+   }
+}
+
+static int
+amdgpu_power_profile_set(struct amdgpu_device *adev,
+enum PP_SMC_POWER_PROFILE profile)
+{
+   int ret = amdgpu_dpm_switch_power_profile(adev, profile, true);
+
+   if (!ret) {
+   /* Set the bit for the submitted workload profile */
+   set_bit(profile, >smu_workload.submit_workload_status);
+   }
+
+   return ret;
+}
+
+void amdgpu_workload_profile_get(struct amdgpu_device *adev,
+uint32_t ring_type)
+{
+   struct amdgpu_smu_workload *workload = >smu_workload;
+   enum PP_SMC_POWER_PROFILE profile = ring_to_power_profile(ring_type);
+   int ret, refcount;
+
+   if (profile == PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT)
+   return;
+
+   mutex_lock(>workload_lock);
+
+   refcount = atomic_read(>power_profile_ref[profile]);
+   if (!refcount) {
+   ret = amdgpu_power_profile_set(adev, profile);
+   if (ret) {
+   DRM_WARN("Failed to set workload profile to %s, error = 
%d\n",
+amdgpu_workload_mode_name[profile], ret);
+   }
+   }
+
+   atomic_inc(>smu_workload.power_profile_ref[profile]);
+   mutex_unlock(>workload_lock);
+}
+
 void amdgpu_workload_profile_init(struct amdgpu_device *adev)
 {
adev->smu_workload.adev = adev;
diff --git a/drivers/gpu/drm/amd/include/amdgpu_workload.h 
b/drivers/gpu/drm/amd/include/amdgpu_workload.h
index dc12448764a3..5fc0bc2a74a4 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_workload.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_workload.h
@@ -46,6 +46,9 @@ static const char * const amdgpu_workload_mode_name[] = {
"Window3D"
 };
 
+void amdgpu_workload_profile_get(struct amdgpu_device *adev,
+uint32_t ring_type);
+
 void amdgpu_workload_profile_init(struct amdgpu_device *adev);
 
 void amdgpu_workload_profile_fini(struct amdgpu_device *adev);
-- 
2.34.1

[PATCH v3 1/7] drm/amdgpu: Added init/fini functions for workload

2023-08-28 Thread Arvind Yadav

The'struct amdgpu_smu_workload' initialization/cleanup
functions is added by this patch.

v2:
- Splitting big patch into separate patches.
- Added new fini function.

v3:
- Addressed review comment to change 'power_profile_work'
  instead of 'smu_delayed_work'.

Cc: Christian Koenig 
Cc: Alex Deucher 
Reviewed-by: Shashank Sharma 
Signed-off-by: Arvind Yadav 
---
 drivers/gpu/drm/amd/amdgpu/Makefile   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  3 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c|  4 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c  | 44 +++
 drivers/gpu/drm/amd/include/amdgpu_workload.h | 53 +++
 5 files changed, 105 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
 create mode 100644 drivers/gpu/drm/amd/include/amdgpu_workload.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 415a7fa395c4..6a9e187d61e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -60,7 +60,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
-   amdgpu_ring_mux.o
+   amdgpu_ring_mux.o amdgpu_workload.o
 
 amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 02b827785e39..1939fa1af8a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -107,6 +107,7 @@
 #include "amdgpu_fdinfo.h"
 #include "amdgpu_mca.h"
 #include "amdgpu_ras.h"
+#include "amdgpu_workload.h"
 
 #define MAX_GPU_INSTANCE   16
 
@@ -1050,6 +1051,8 @@ struct amdgpu_device {
 
booljob_hang;
booldc_enabled;
+
+   struct amdgpu_smu_workload  smu_workload;
 };
 
 static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5c7d40873ee2..cd3bf641b630 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2243,6 +2243,8 @@ static int amdgpu_device_ip_early_init(struct 
amdgpu_device *adev)
adev->cg_flags &= amdgpu_cg_mask;
adev->pg_flags &= amdgpu_pg_mask;
 
+   amdgpu_workload_profile_init(adev);
+
return 0;
 }
 
@@ -2890,6 +2892,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device 
*adev)
 {
int i, r;
 
+   amdgpu_workload_profile_fini(adev);
+
if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
amdgpu_virt_release_ras_err_handler_data(adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
new file mode 100644
index ..32166f482f77
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+
+void amdgpu_workload_profile_init(struct amdgpu_device *adev)
+{
+   adev->smu_workload.adev = adev;
+   adev->smu_workload.submit_workload_status = 0;
+   adev->smu_workload.initialized = true;
+
+   mutex_init(>smu_workload.workload_lock);
+}
+
+void amdgpu_workload_profile_fini(struct amdgpu_device *adev)
+{
+   if (!adev->smu_workload.initialized)
+   return;
+
+   adev->smu_workload.submit_workload_status = 0;
+   adev->smu_workload.initialized = false;
+   mutex_destroy(>smu_workload.workload_lock);
+}
diff --git a/drivers/gpu/drm/amd/include/amdgpu_workload.h

[PATCH v3 0/7] GPU workload hints for better performance

2023-08-28 Thread Arvind Yadav

AMDGPU SOCs supports dynamic workload based power profiles, which can
provide fine-tuned performance for a particular type of workload.
This patch series adds an interface to set/reset these power profiles
based on the submitted job. The driver can dynamically switch
the power profiles based on submitted job. This can optimize the power
performance when the particular workload is on. 

v2:
- Splitting workload_profile_set and workload_profile_put
  into two separate patches.
- Addressed review comment.
- Added new suspend function.
- Added patch to switches the GPU workload mode for KFD. 

v3:
- Addressed all review comment.
- Changed the function name from *_set() to *_get().
- Now clearing all the profile in work handler.
- Added *_clear_all function to clear all the power profile.


Arvind Yadav (7):
  drm/amdgpu: Added init/fini functions for workload
  drm/amdgpu: Add new function to set GPU power profile
  drm/amdgpu: Add new function to put GPU power profile
  drm/amdgpu: Add suspend function to clear the GPU power profile.
  drm/amdgpu: Set/Reset GPU workload profile
  drm/amdgpu: switch workload context to/from compute
  Revert "drm/amd/amdgpu: switch on/off vcn power profile mode"

 drivers/gpu/drm/amd/amdgpu/Makefile   |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c|   8 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c|   6 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c   |   5 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c   |  14 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c  | 226 ++
 drivers/gpu/drm/amd/include/amdgpu_workload.h |  61 +
 8 files changed, 309 insertions(+), 16 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
 create mode 100644 drivers/gpu/drm/amd/include/amdgpu_workload.h

-- 
2.34.1

Re: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for gfx10 onwards APUs

2023-08-28 Thread Christian König

Well, there seems to be a very basic misunderstood here: The IOMMU 
isolation level is *not* ASIC dependent!


Try to set amd_iommu=force_isolation on the kernel command line.

This is a configuration option customers can use to harden their systems 
and when this isn't properly tested we can't allow page tables in system 
memory.


Regards,
Christian.

Am 28.08.23 um 13:23 schrieb Zhang, Yifan:

[Public]

Not yet. It will be only enabled for gfx10.3.3 and later APU initially, IOMMU 
is pass through in these ASIC.

-Original Message-
From: Christian König 
Sent: Monday, August 28, 2023 5:41 PM
To: Zhang, Yifan ; amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Koenig, Christian 

Subject: Re: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for gfx10 
onwards APUs

Is that now validated with IOMMU in non pass through mode?

Christian.

Am 28.08.23 um 10:58 schrieb Zhang, Yifan:

[AMD Official Use Only - General]

Ping

-Original Message-
From: Zhang, Yifan 
Sent: Friday, August 25, 2023 8:34 AM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Koenig, Christian 
; Zhang, Yifan 
Subject: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for gfx10 
onwards APUs

To decrease VRAM pressure for APUs, put page tables to GTT domain for gfx10 and 
newer APUs.

v2: only enable it for gfx10 and newer APUs (Alex, Christian)

Signed-off-by: Yifan Zhang 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 9 ++---
   1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index 96d601e209b8..4603d87c61a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -515,10 +515,13 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
  bp.size = amdgpu_vm_pt_size(adev, level);
  bp.byte_align = AMDGPU_GPU_PAGE_SIZE;

-   if (!adev->gmc.is_app_apu)
-   bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
-   else
+   if (adev->gmc.is_app_apu ||
+   ((adev->flags & AMD_IS_APU) &&
+   (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 3
  bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+   else
+   bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+

  bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain);
  bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
--
2.37.3

RE: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for gfx10 onwards APUs

2023-08-28 Thread Zhang, Yifan

[Public]

Not yet. It will be only enabled for gfx10.3.3 and later APU initially, IOMMU 
is pass through in these ASIC.

-Original Message-
From: Christian König 
Sent: Monday, August 28, 2023 5:41 PM
To: Zhang, Yifan ; amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Koenig, Christian 

Subject: Re: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for gfx10 
onwards APUs

Is that now validated with IOMMU in non pass through mode?

Christian.

Am 28.08.23 um 10:58 schrieb Zhang, Yifan:
> [AMD Official Use Only - General]
>
> Ping
>
> -Original Message-
> From: Zhang, Yifan 
> Sent: Friday, August 25, 2023 8:34 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander ; Koenig, Christian 
> ; Zhang, Yifan 
> Subject: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for gfx10 
> onwards APUs
>
> To decrease VRAM pressure for APUs, put page tables to GTT domain for gfx10 
> and newer APUs.
>
> v2: only enable it for gfx10 and newer APUs (Alex, Christian)
>
> Signed-off-by: Yifan Zhang 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 9 ++---
>   1 file changed, 6 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> index 96d601e209b8..4603d87c61a0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> @@ -515,10 +515,13 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, 
> struct amdgpu_vm *vm,
>  bp.size = amdgpu_vm_pt_size(adev, level);
>  bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
>
> -   if (!adev->gmc.is_app_apu)
> -   bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
> -   else
> +   if (adev->gmc.is_app_apu ||
> +   ((adev->flags & AMD_IS_APU) &&
> +   (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 3
>  bp.domain = AMDGPU_GEM_DOMAIN_GTT;
> +   else
> +   bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
> +
>
>  bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain);
>  bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
> --
> 2.37.3
>

Re: [PATCH v2] drm/amd: Simplify the bo size check funciton

2023-08-28 Thread Christian König


Am 28.08.23 um 12:02 schrieb Ma Jun:

Simplify the code logic of size check function amdgpu_bo_validate_size

Signed-off-by: Ma Jun 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 29 +-
  1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 807ea74ece25..e603ca062fcc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -480,7 +480,7 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 
*gpu_addr,
*cpu_addr = NULL;
  }
  
-/* Validate bo size is bit bigger then the request domain */

+/* Validate bo size is bit bigger than the request domain */
  static bool amdgpu_bo_validate_size(struct amdgpu_device *adev,
  unsigned long size, u32 domain)
  {
@@ -490,29 +490,24 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device 
*adev,
 * If GTT is part of requested domains the check must succeed to
 * allow fall back to GTT.
 */
-   if (domain & AMDGPU_GEM_DOMAIN_GTT) {
+   if (domain & AMDGPU_GEM_DOMAIN_GTT)
man = ttm_manager_type(>mman.bdev, TTM_PL_TT);
-
-   if (man && size < man->size)
-   return true;
-   else if (!man)
-   WARN_ON_ONCE("GTT domain requested but GTT mem manager 
uninitialized");
-   goto fail;
-   } else if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
+   else if (domain & AMDGPU_GEM_DOMAIN_VRAM)
man = ttm_manager_type(>mman.bdev, TTM_PL_VRAM);
+   else
+   return true;
  
-		if (man && size < man->size)

-   return true;
-   goto fail;
+   if (!man) {
+   if (domain & AMDGPU_GEM_DOMAIN_GTT)
+   WARN_ON_ONCE("GTT domain requested but GTT mem manager 
uninitialized");
+   return false;
}
  
  	/* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU, _DOMAIN_DOORBELL */

-   return true;
+   if (size < man->size)
+   return true;
  
-fail:

-   if (man)
-   DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size,
- man->size);
+   DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, 
man->size);
return false;
  }

[PATCH v2] drm/amd: Simplify the bo size check funciton

2023-08-28 Thread Ma Jun

Simplify the code logic of size check function amdgpu_bo_validate_size

Signed-off-by: Ma Jun 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 29 +-
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 807ea74ece25..e603ca062fcc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -480,7 +480,7 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 
*gpu_addr,
*cpu_addr = NULL;
 }
 
-/* Validate bo size is bit bigger then the request domain */
+/* Validate bo size is bit bigger than the request domain */
 static bool amdgpu_bo_validate_size(struct amdgpu_device *adev,
  unsigned long size, u32 domain)
 {
@@ -490,29 +490,24 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device 
*adev,
 * If GTT is part of requested domains the check must succeed to
 * allow fall back to GTT.
 */
-   if (domain & AMDGPU_GEM_DOMAIN_GTT) {
+   if (domain & AMDGPU_GEM_DOMAIN_GTT)
man = ttm_manager_type(>mman.bdev, TTM_PL_TT);
-
-   if (man && size < man->size)
-   return true;
-   else if (!man)
-   WARN_ON_ONCE("GTT domain requested but GTT mem manager 
uninitialized");
-   goto fail;
-   } else if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
+   else if (domain & AMDGPU_GEM_DOMAIN_VRAM)
man = ttm_manager_type(>mman.bdev, TTM_PL_VRAM);
+   else
+   return true;
 
-   if (man && size < man->size)
-   return true;
-   goto fail;
+   if (!man) {
+   if (domain & AMDGPU_GEM_DOMAIN_GTT)
+   WARN_ON_ONCE("GTT domain requested but GTT mem manager 
uninitialized");
+   return false;
}
 
/* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU, 
_DOMAIN_DOORBELL */
-   return true;
+   if (size < man->size)
+   return true;
 
-fail:
-   if (man)
-   DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size,
- man->size);
+   DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, 
man->size);
return false;
 }
 
-- 
2.34.1

Re: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for gfx10 onwards APUs

2023-08-28 Thread Christian König


Is that now validated with IOMMU in non pass through mode?

Christian.

Am 28.08.23 um 10:58 schrieb Zhang, Yifan:

[AMD Official Use Only - General]

Ping

-Original Message-
From: Zhang, Yifan 
Sent: Friday, August 25, 2023 8:34 AM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Koenig, Christian 
; Zhang, Yifan 
Subject: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for gfx10 
onwards APUs

To decrease VRAM pressure for APUs, put page tables to GTT domain for gfx10 and 
newer APUs.

v2: only enable it for gfx10 and newer APUs (Alex, Christian)

Signed-off-by: Yifan Zhang 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 9 ++---
  1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index 96d601e209b8..4603d87c61a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -515,10 +515,13 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
 bp.size = amdgpu_vm_pt_size(adev, level);
 bp.byte_align = AMDGPU_GPU_PAGE_SIZE;

-   if (!adev->gmc.is_app_apu)
-   bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
-   else
+   if (adev->gmc.is_app_apu ||
+   ((adev->flags & AMD_IS_APU) &&
+   (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 3
 bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+   else
+   bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+

 bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain);
 bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
--
2.37.3

[PATCH v2] drm/amdgpu: access RLC_SPM_MC_CNTL through MMIO in SRIOV runtime

2023-08-28 Thread ZhenGuo Yin

Register RLC_SPM_MC_CNTL is not blocked by L1 policy, VF can
directly access it through MMIO during SRIOV runtime.

v2: use SOC15 interface to access registers

Signed-off-by: ZhenGuo Yin 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 13 +++--
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 13 +++--
 2 files changed, 6 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 0aee9c8288a2..6ccde07ed63e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -7897,22 +7897,15 @@ static int gfx_v10_0_update_gfx_clock_gating(struct 
amdgpu_device *adev,
 static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev,
   unsigned int vmid)
 {
-   u32 reg, data;
+   u32 data;
 
/* not for *_SOC15 */
-   reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
-   if (amdgpu_sriov_is_pp_one_vf(adev))
-   data = RREG32_NO_KIQ(reg);
-   else
-   data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
+   data = RREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL);
 
data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << 
RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
 
-   if (amdgpu_sriov_is_pp_one_vf(adev))
-   WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
-   else
-   WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
+   WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
 }
 
 static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned int 
vmid)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index b0c32521efdc..337ed771605f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -4984,23 +4984,16 @@ static int gfx_v11_0_update_gfx_clock_gating(struct 
amdgpu_device *adev,
 
 static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned 
vmid)
 {
-   u32 reg, data;
+   u32 data;
 
amdgpu_gfx_off_ctrl(adev, false);
 
-   reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
-   if (amdgpu_sriov_is_pp_one_vf(adev))
-   data = RREG32_NO_KIQ(reg);
-   else
-   data = RREG32(reg);
+   data = RREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL);
 
data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << 
RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
 
-   if (amdgpu_sriov_is_pp_one_vf(adev))
-   WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
-   else
-   WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
+   WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
 
amdgpu_gfx_off_ctrl(adev, true);
 }
-- 
2.35.1

RE: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for gfx10 onwards APUs

2023-08-28 Thread Zhang, Yifan

[AMD Official Use Only - General]

Ping

-Original Message-
From: Zhang, Yifan 
Sent: Friday, August 25, 2023 8:34 AM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Koenig, Christian 
; Zhang, Yifan 
Subject: [PATCH v3 2/2] drm/amdgpu: Put page tables to GTT memory for gfx10 
onwards APUs

To decrease VRAM pressure for APUs, put page tables to GTT domain for gfx10 and 
newer APUs.

v2: only enable it for gfx10 and newer APUs (Alex, Christian)

Signed-off-by: Yifan Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index 96d601e209b8..4603d87c61a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -515,10 +515,13 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
bp.size = amdgpu_vm_pt_size(adev, level);
bp.byte_align = AMDGPU_GPU_PAGE_SIZE;

-   if (!adev->gmc.is_app_apu)
-   bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
-   else
+   if (adev->gmc.is_app_apu ||
+   ((adev->flags & AMD_IS_APU) &&
+   (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 3
bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+   else
+   bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+

bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain);
bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
--
2.37.3

Re: [PATCH v2 19/34] drm/amd/display: decouple steps for mapping CRTC degamma to DC plane

2023-08-28 Thread Joshua Ashton

Degamma has always been on the plane on AMD. CRTC DEGAMMA_LUT has actually
just been applying it to every plane pre-blend.

Degamma makes no sense after blending anyway.
The entire point is for it to happen before blending to blend in linear
space. Otherwise DEGAMMA_LUT and REGAMMA_LUT are the exact same thing...

- Joshie 

On Tuesday, 22 August 2023, Pekka Paalanen 
wrote:
> On Thu, 10 Aug 2023 15:02:59 -0100
> Melissa Wen  wrote:
>
>> The next patch adds pre-blending degamma to AMD color mgmt pipeline, but
>> pre-blending degamma caps (DPP) is currently in use to provide DRM CRTC
>> atomic degamma or implict degamma on legacy gamma. Detach degamma usage
>> regarging CRTC color properties to manage plane and CRTC color
>> correction combinations.
>>
>> Reviewed-by: Harry Wentland 
>> Signed-off-by: Melissa Wen 
>> ---
>>  .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 59 +--
>>  1 file changed, 41 insertions(+), 18 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
>> index 68e9f2c62f2e..74eb02655d96 100644
>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
>> @@ -764,20 +764,9 @@ int amdgpu_dm_update_crtc_color_mgmt(struct
dm_crtc_state *crtc)
>>   return 0;
>>  }
>>
>> -/**
>> - * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC
plane.
>> - * @crtc: amdgpu_dm crtc state
>> - * @dc_plane_state: target DC surface
>> - *
>> - * Update the underlying dc_stream_state's input transfer function
(ITF) in
>> - * preparation for hardware commit. The transfer function used depends
on
>> - * the preparation done on the stream for color management.
>> - *
>> - * Returns:
>> - * 0 on success. -ENOMEM if mem allocation fails.
>> - */
>> -int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
>> -   struct dc_plane_state
*dc_plane_state)
>> +static int
>> +map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc,
>> +  struct dc_plane_state *dc_plane_state)
>>  {
>>   const struct drm_color_lut *degamma_lut;
>>   enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB;
>> @@ -800,8 +789,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct
dm_crtc_state *crtc,
>>_size);
>>   ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES);
>>
>> - dc_plane_state->in_transfer_func->type =
>> - TF_TYPE_DISTRIBUTED_POINTS;
>> + dc_plane_state->in_transfer_func->type =
TF_TYPE_DISTRIBUTED_POINTS;
>>
>>   /*
>>* This case isn't fully correct, but also fairly
>> @@ -837,7 +825,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct
dm_crtc_state *crtc,
>>  degamma_lut, degamma_size);
>>   if (r)
>>   return r;
>> - } else if (crtc->cm_is_degamma_srgb) {
>> + } else {
>>   /*
>>* For legacy gamma support we need the regamma input
>>* in linear space. Assume that the input is sRGB.
>> @@ -847,8 +835,43 @@ int amdgpu_dm_update_plane_color_mgmt(struct
dm_crtc_state *crtc,
>>
>>   if (tf != TRANSFER_FUNCTION_SRGB &&
>>   !mod_color_calculate_degamma_params(NULL,
>> - dc_plane_state->in_transfer_func, NULL, false))
>> +
 dc_plane_state->in_transfer_func,
>> + NULL, false))
>>   return -ENOMEM;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +/**
>> + * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC
plane.
>> + * @crtc: amdgpu_dm crtc state
>> + * @dc_plane_state: target DC surface
>> + *
>> + * Update the underlying dc_stream_state's input transfer function
(ITF) in
>> + * preparation for hardware commit. The transfer function used depends
on
>> + * the preparation done on the stream for color management.
>> + *
>> + * Returns:
>> + * 0 on success. -ENOMEM if mem allocation fails.
>> + */
>> +int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
>> +   struct dc_plane_state
*dc_plane_state)
>> +{
>> + bool has_crtc_cm_degamma;
>> + int ret;
>> +
>> + has_crtc_cm_degamma = (crtc->cm_has_degamma ||
crtc->cm_is_degamma_srgb);
>> + if (has_crtc_cm_degamma){
>> + /* AMD HW doesn't have post-blending degamma caps. When DRM
>> +  * CRTC atomic degamma is set, we maps it to DPP degamma
block
>> +  * (pre-blending) or, on legacy gamma, we use DPP degamma
to
>> +  * linearize (implicit degamma) from sRGB/BT709 according
to
>> +  * the input space.
>
> Uhh, you can't just move degamma before blending if KMS userspace
> wants it after blending. That would be incorrect behaviour.

Re: [PATCH] drm/amdgpu: remove unused parameter in amdgpu_vmid_grab_idle

2023-08-28 Thread Christian König


Am 17.08.23 um 09:00 schrieb Yifan Zhang:

amdgpu_vm is not used in amdgpu_vmid_grab_idle.

Signed-off-by: Yifan Zhang 


Sorry for the delay, Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 6 ++
  1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index ff1ea99292fb..ddd0891da116 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -188,7 +188,6 @@ static bool amdgpu_vmid_compatible(struct amdgpu_vmid *id,
  /**
   * amdgpu_vmid_grab_idle - grab idle VMID
   *
- * @vm: vm to allocate id for
   * @ring: ring we want to submit job to
   * @idle: resulting idle VMID
   * @fence: fence to wait for if no id could be grabbed
@@ -196,8 +195,7 @@ static bool amdgpu_vmid_compatible(struct amdgpu_vmid *id,
   * Try to find an idle VMID, if none is idle add a fence to wait to the sync
   * object. Returns -ENOMEM when we are out of memory.
   */
-static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
-struct amdgpu_ring *ring,
+static int amdgpu_vmid_grab_idle(struct amdgpu_ring *ring,
 struct amdgpu_vmid **idle,
 struct dma_fence **fence)
  {
@@ -405,7 +403,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct 
amdgpu_ring *ring,
int r = 0;
  
  	mutex_lock(_mgr->lock);

-   r = amdgpu_vmid_grab_idle(vm, ring, , fence);
+   r = amdgpu_vmid_grab_idle(ring, , fence);
if (r || !idle)
goto error;

Re: [PATCH] drm/amd: Simplify the size check funciton

2023-08-28 Thread Ma, Jun




On 8/28/2023 2:00 PM, Christian König wrote:
> Am 28.08.23 um 07:09 schrieb Ma, Jun:
>> Hi Christian,
>>
>> On 8/25/2023 4:08 PM, Christian König wrote:
>>>
>>> Am 25.08.23 um 07:22 schrieb Ma Jun:
 Simplify the code logic of size check function amdgpu_bo_validate_size

 Signed-off-by: Ma Jun 
 ---
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 28 +-
1 file changed, 11 insertions(+), 17 deletions(-)

 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
 b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
 index 807ea74ece25..4c95db954a76 100644
 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
 @@ -480,7 +480,7 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 
 *gpu_addr,
*cpu_addr = NULL;
}

 -/* Validate bo size is bit bigger then the request domain */
 +/* Validate bo size is bit bigger than the request domain */
static bool amdgpu_bo_validate_size(struct amdgpu_device *adev,
  unsigned long size, u32 
 domain)
{
 @@ -490,29 +490,23 @@ static bool amdgpu_bo_validate_size(struct 
 amdgpu_device *adev,
 * If GTT is part of requested domains the check must succeed to
 * allow fall back to GTT.
 */
 -  if (domain & AMDGPU_GEM_DOMAIN_GTT) {
 +  if (domain & AMDGPU_GEM_DOMAIN_GTT)
man = ttm_manager_type(>mman.bdev, TTM_PL_TT);
 -
 -  if (man && size < man->size)
 -  return true;
 -  else if (!man)
 -  WARN_ON_ONCE("GTT domain requested but GTT mem manager 
 uninitialized");
 -  goto fail;
 -  } else if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
 +  else if (domain & AMDGPU_GEM_DOMAIN_VRAM)
man = ttm_manager_type(>mman.bdev, TTM_PL_VRAM);
 +  else
 +  return true;

 -  if (man && size < man->size)
 -  return true;
 -  goto fail;
 +  if (!man) {
 +  WARN_ON_ONCE("Mem mananger of mem domain %d is uninitialized", 
 domain);
 +  return false;
}
>>> That change here is not correct. It's perfectly valid for userspace to
>>> request VRAM even if VRAM isn't initialized.
>>>
>>> Only the GTT manager is mandatory. That's why the code previously looked
>>> like it does.
>>>
>> Thanks for your explanation.
>> How about changing the code to the following?
>>
>> +   if (!man) {
>> +   if (domain & AMDGPU_GEM_DOMAIN_GTT)
>> +   WARN_ON_ONCE("GTT domain requested but GTT mem 
>> manager uninitialized");
>> +   return false;
>>  }
> 
> Of hand that looks like it should work, but I need to see the full patch.
> 
Thanks, I'll send v2 with this change.

Regards,
Ma Jun
> Regards,
> Christian.
> 
>>
>> Regards,
>> Ma Jun
>>
>>> regards,
>>> Christian.
>>>

/* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU, 
 _DOMAIN_DOORBELL */
 -  return true;
 +  if (size < man->size)
 +  return true;

 -fail:
 -  if (man)
 -  DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size,
 -man->size);
 +  DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, 
 man->size);
return false;
}

>

[PATCH] drm/amdgpu: access RLC_SPM_MC_CNTL through MMIO in SRIOV

2023-08-28 Thread ZhenGuo Yin

Register RLC_SPM_MC_CNTL is not blocked by L1 policy, VF can
directly access it through MMIO.

Signed-off-by: ZhenGuo Yin 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 10 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 10 ++
 2 files changed, 4 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 0aee9c8288a2..65619f73f717 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -7901,18 +7901,12 @@ static void gfx_v10_0_update_spm_vmid_internal(struct 
amdgpu_device *adev,
 
/* not for *_SOC15 */
reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
-   if (amdgpu_sriov_is_pp_one_vf(adev))
-   data = RREG32_NO_KIQ(reg);
-   else
-   data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
+   data = RREG32_NO_KIQ(reg);
 
data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << 
RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
 
-   if (amdgpu_sriov_is_pp_one_vf(adev))
-   WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
-   else
-   WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
+   WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
 }
 
 static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned int 
vmid)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index b0c32521efdc..7f8c5c6fd36e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -4989,18 +4989,12 @@ static void gfx_v11_0_update_spm_vmid(struct 
amdgpu_device *adev, unsigned vmid)
amdgpu_gfx_off_ctrl(adev, false);
 
reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
-   if (amdgpu_sriov_is_pp_one_vf(adev))
-   data = RREG32_NO_KIQ(reg);
-   else
-   data = RREG32(reg);
+   data = RREG32_NO_KIQ(reg);
 
data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << 
RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
 
-   if (amdgpu_sriov_is_pp_one_vf(adev))
-   WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
-   else
-   WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
+   WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
 
amdgpu_gfx_off_ctrl(adev, true);
 }
-- 
2.35.1

Re: [PATCH] drm/amd: Simplify the size check funciton

2023-08-28 Thread Christian König


Am 28.08.23 um 07:09 schrieb Ma, Jun:

Hi Christian,

On 8/25/2023 4:08 PM, Christian König wrote:


Am 25.08.23 um 07:22 schrieb Ma Jun:

Simplify the code logic of size check function amdgpu_bo_validate_size

Signed-off-by: Ma Jun 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 28 +-
   1 file changed, 11 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 807ea74ece25..4c95db954a76 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -480,7 +480,7 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 
*gpu_addr,
*cpu_addr = NULL;
   }
   
-/* Validate bo size is bit bigger then the request domain */

+/* Validate bo size is bit bigger than the request domain */
   static bool amdgpu_bo_validate_size(struct amdgpu_device *adev,
  unsigned long size, u32 domain)
   {
@@ -490,29 +490,23 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device 
*adev,
 * If GTT is part of requested domains the check must succeed to
 * allow fall back to GTT.
 */
-   if (domain & AMDGPU_GEM_DOMAIN_GTT) {
+   if (domain & AMDGPU_GEM_DOMAIN_GTT)
man = ttm_manager_type(>mman.bdev, TTM_PL_TT);
-
-   if (man && size < man->size)
-   return true;
-   else if (!man)
-   WARN_ON_ONCE("GTT domain requested but GTT mem manager 
uninitialized");
-   goto fail;
-   } else if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
+   else if (domain & AMDGPU_GEM_DOMAIN_VRAM)
man = ttm_manager_type(>mman.bdev, TTM_PL_VRAM);
+   else
+   return true;
   
-		if (man && size < man->size)

-   return true;
-   goto fail;
+   if (!man) {
+   WARN_ON_ONCE("Mem mananger of mem domain %d is uninitialized", 
domain);
+   return false;
}

That change here is not correct. It's perfectly valid for userspace to
request VRAM even if VRAM isn't initialized.

Only the GTT manager is mandatory. That's why the code previously looked
like it does.


Thanks for your explanation.
How about changing the code to the following?

+   if (!man) {
+   if (domain & AMDGPU_GEM_DOMAIN_GTT)
+   WARN_ON_ONCE("GTT domain requested but GTT mem manager 
uninitialized");
+   return false;
 }


Of hand that looks like it should work, but I need to see the full patch.

Regards,
Christian.



Regards,
Ma Jun


regards,
Christian.

   
   	/* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU, _DOMAIN_DOORBELL */

-   return true;
+   if (size < man->size)
+   return true;
   
-fail:

-   if (man)
-   DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size,
- man->size);
+   DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, 
man->size);
return false;
   }

44 matches

Mail list logo