date:20230509

Re: [RFC PATCH 12/40] drm/amd/display: add plane HDR multiplier driver-private property

2023-05-09 Thread Harry Wentland




On 5/9/23 16:35, Joshua Ashton wrote:
> FWIW, we technically do use it right now, but it is always set to 1 in 
> S.31.32.
> 
> Before we used shaper + 3D LUT we did use it for scaling SDR content,
> but given we always have a shaper + 3D LUT it made sense for us to
> roll that into there.
> 

Ah, that's good. No problem then.

Harry

> On Tue, 9 May 2023 at 20:00, Harry Wentland  wrote:
>>
>> On 5/9/23 12:54, Joshua Ashton wrote:
>>> We currently do not have a use for this as we settled on per-plane 3D
>>> LUT + Shaper, but we might end up wanting to use in our scRGB stack
>>> someday so I would like to keep it.
>>>
>>
>> uAPI should always have a userspace that uses it. But if we go
>> and put it behind an #ifdef anyways I don't mind taking this
>> if we foresee use for it in the near future. A gamescope experiment
>> showing how this can be used to scale sRGB planes would be great.
>> I assume that's sort of how you intend to use it.
>>
>> Harry
>>
>>> On Tue, 9 May 2023 at 16:37, Melissa Wen  wrote:

 On 05/08, Harry Wentland wrote:
>
>
> On 4/23/23 10:10, Melissa Wen wrote:
>> From: Joshua Ashton 
>>
>> Multiplier to 'gain' the plane. When PQ is decoded using the fixed func
>> transfer function to the internal FP16 fb, 1.0 -> 80 nits (on AMD at
>> least) When sRGB is decoded, 1.0 -> 1.0.  Therefore, 1.0 multiplier = 80
>> nits for SDR content. So if you want, 203 nits for SDR content, pass in
>> (203.0 / 80.0).
>>
>
> Is gamescope intending to use this?

 I don't think so. Again, I'll double check and drop it accordingly.

 Melissa

>
> Harry
>
>> Co-developed-by: Melissa Wen 
>> Signed-off-by: Melissa Wen 
>> Signed-off-by: Joshua Ashton 
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_display.c   |  6 +
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h  |  4 +++
>>  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 12 +
>>  .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   | 25 ++-
>>  4 files changed, 41 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
>> index 24595906dab1..dd658f162f6f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
>> @@ -1326,6 +1326,12 @@ amdgpu_display_create_color_properties(struct 
>> amdgpu_device *adev)
>> return -ENOMEM;
>> adev->mode_info.plane_degamma_tf_property = prop;
>>
>> +   prop = drm_property_create_range(adev_to_drm(adev),
>> +0, "AMD_PLANE_HDR_MULT", 0, 
>> UINT_MAX);
>> +   if (!prop)
>> +   return -ENOMEM;
>> +   adev->mode_info.plane_hdr_mult_property = prop;
>> +
>> return 0;
>>  }
>>  #endif
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
>> index ab9ce6f26c90..65a9d62ffbe4 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
>> @@ -387,6 +387,10 @@ struct amdgpu_mode_info {
>>  * linearize content with or without LUT.
>>  */
>> struct drm_property *plane_degamma_tf_property;
>> +   /**
>> +* @plane_hdr_mult_property:
>> +*/
>> +   struct drm_property *plane_hdr_mult_property;
>>  #endif
>>  };
>>
>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
>> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
>> index 005632c1c9ec..bb7307b9cfd5 100644
>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
>> @@ -51,6 +51,7 @@
>>
>>  #define AMDGPU_DMUB_NOTIFICATION_MAX 5
>>
>> +#define AMDGPU_HDR_MULT_DEFAULT (0x1LL)
>>  /*
>>  #include "include/amdgpu_dal_power_if.h"
>>  #include "amdgpu_dm_irq.h"
>> @@ -736,6 +737,17 @@ struct dm_plane_state {
>>  * linearize.
>>  */
>> enum drm_transfer_function degamma_tf;
>> +   /**
>> +* @hdr_mult:
>> +*
>> +* Multiplier to 'gain' the plane.  When PQ is decoded using the 
>> fixed
>> +* func transfer function to the internal FP16 fb, 1.0 -> 80 nits (on
>> +* AMD at least). When sRGB is decoded, 1.0 -> 1.0, obviously.
>> +* Therefore, 1.0 multiplier = 80 nits for SDR content.  So if you
>> +* want, 203 nits for SDR content, pass in (203.0 / 80.0).  Format is
>> +* S31.32 sign-magnitude.
>> +*/
>> +   __u64 hdr_mult;
>>  #endif
>>  };
>>
>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c 
>> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
>> index 5b458cc0781c..57169dae8b3d 100644
>>

[PATCH 4/6] drm/amdkfd: Move pgmap to amdgpu_kfd_dev structure

2023-05-09 Thread Alex Deucher

From: Philip Yang 

VRAM pgmap resource is allocated every time when switching compute
partitions because kfd_dev is re-initialized by post_partition_switch,
As a result, it causes memory region resource leaking and system
memory usage accounting unbalanced.

pgmap resource should be allocated and registered only once when loading
driver and freed when unloading driver, move it from kfd_dev to
amdgpu_kfd_dev.

Signed-off-by: Philip Yang 
Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 4 
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c   | 8 
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h  | 3 ---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c   | 6 +++---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h   | 4 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c  | 2 +-
 6 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 9cc28fe32cdb..844cc366ad7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -30,6 +30,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include "amdgpu_sync.h"
@@ -101,6 +102,9 @@ struct amdgpu_kfd_dev {
uint64_t vram_used_aligned;
bool init_complete;
struct work_struct reset_work;
+
+   /* HMM page migration MEMORY_DEVICE_PRIVATE mapping */
+   struct dev_pagemap pgmap;
 };
 
 enum kgd_engine_type {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 42e599912e52..199d32c7c289 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -206,7 +206,7 @@ svm_migrate_copy_done(struct amdgpu_device *adev, struct 
dma_fence *mfence)
 unsigned long
 svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr)
 {
-   return (addr + adev->kfd.dev->pgmap.range.start) >> PAGE_SHIFT;
+   return (addr + adev->kfd.pgmap.range.start) >> PAGE_SHIFT;
 }
 
 static void
@@ -236,7 +236,7 @@ svm_migrate_addr(struct amdgpu_device *adev, struct page 
*page)
unsigned long addr;
 
addr = page_to_pfn(page) << PAGE_SHIFT;
-   return (addr - adev->kfd.dev->pgmap.range.start);
+   return (addr - adev->kfd.pgmap.range.start);
 }
 
 static struct page *
@@ -990,14 +990,14 @@ static const struct dev_pagemap_ops svm_migrate_pgmap_ops 
= {
 
 int svm_migrate_init(struct amdgpu_device *adev)
 {
-   struct kfd_dev *kfddev = adev->kfd.dev;
+   struct amdgpu_kfd_dev *kfddev = &adev->kfd;
struct dev_pagemap *pgmap;
struct resource *res = NULL;
unsigned long size;
void *r;
 
/* Page migration works on Vega10 or newer */
-   if (!KFD_IS_SOC15(kfddev))
+   if (!KFD_IS_SOC15(kfddev->dev))
return -EINVAL;
 
pgmap = &kfddev->pgmap;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 02a90fd7f646..214d950f948e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -378,9 +378,6 @@ struct kfd_dev {
 
int noretry;
 
-   /* HMM page migration MEMORY_DEVICE_PRIVATE mapping */
-   struct dev_pagemap pgmap;
-
struct kfd_node *nodes[MAX_KFD_NODES];
unsigned int num_nodes;
 };
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 8a1075eed5b4..83f8e4e50315 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -173,7 +173,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct 
svm_range *prange,
 
addr[i] = (hmm_pfns[i] << PAGE_SHIFT) +
   bo_adev->vm_manager.vram_base_offset -
-  bo_adev->kfd.dev->pgmap.range.start;
+  bo_adev->kfd.pgmap.range.start;
addr[i] |= SVM_RANGE_VRAM_DOMAIN;
pr_debug_ratelimited("vram address: 0x%llx\n", addr[i]);
continue;
@@ -2826,7 +2826,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
bool migration = false;
int r = 0;
 
-   if (!KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) {
+   if (!KFD_IS_SVM_API_SUPPORTED(adev)) {
pr_debug("device does not support SVM\n");
return -EFAULT;
}
@@ -3111,7 +3111,7 @@ int svm_range_list_init(struct kfd_process *p)
spin_lock_init(&svms->deferred_list_lock);
 
for (i = 0; i < p->n_pdds; i++)
-   if (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev->kfd))
+   if (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev->adev))
bitmap_set(svms->bitmap_supported, i, 1);
 
return 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 7

[PATCH 3/6] drm/amdgpu: Skip halting RLC on GFX v9.4.3

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

RLC-PMFW handshake happens periodically when GFXCLK DPM is enabled and
halting RLC may cause unexpected results. Avoid halting RLC from driver
side.

Signed-off-by: Lijo Lazar 
Reviewed-by: Le Ma 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 23 +++
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 7ef2c9b515ef..6cde05421a10 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -1256,21 +1256,20 @@ static int gfx_v9_4_3_xcc_rlc_resume(struct 
amdgpu_device *adev, int xcc_id)
 {
int r;
 
-   gfx_v9_4_3_xcc_rlc_stop(adev, xcc_id);
-
-   /* disable CG */
-   WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, 0);
-
-   gfx_v9_4_3_xcc_init_pg(adev, xcc_id);
-
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+   gfx_v9_4_3_xcc_rlc_stop(adev, xcc_id);
/* legacy rlc firmware loading */
r = gfx_v9_4_3_xcc_rlc_load_microcode(adev, xcc_id);
if (r)
return r;
+   gfx_v9_4_3_xcc_rlc_start(adev, xcc_id);
}
 
-   gfx_v9_4_3_xcc_rlc_start(adev, xcc_id);
+   amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
+   /* disable CG */
+   WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, 0);
+   gfx_v9_4_3_xcc_init_pg(adev, xcc_id);
+   amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id);
 
return 0;
 }
@@ -1967,14 +1966,6 @@ static void gfx_v9_4_3_xcc_fini(struct amdgpu_device 
*adev, int xcc_id)
 
gfx_v9_4_3_xcc_kcq_fini_register(adev, xcc_id);
gfx_v9_4_3_xcc_cp_enable(adev, false, xcc_id);
-
-   /* Skip suspend with A+A reset */
-   if (adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) {
-   dev_dbg(adev->dev, "Device in reset. Skipping RLC halt\n");
-   return;
-   }
-
-   gfx_v9_4_3_xcc_rlc_stop(adev, xcc_id);
 }
 
 static int gfx_v9_4_3_hw_init(void *handle)
-- 
2.40.1

[PATCH 6/6] drm/amdgpu: For GFX 9.4.3 APU fix vram_usage value

2023-05-09 Thread Alex Deucher

From: Harish Kasiviswanathan 

For GFX 9.4.3 APP APU VRAM is allocated in GTT domain. While freeing
memory check for GTT domain instead of VRAM if it is APP APU

Signed-off-by: Harish Kasiviswanathan 
Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 40078c0a5585..ca0d326b43c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1885,11 +1885,14 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
}
 
/* Update the size of the BO being freed if it was allocated from
-* VRAM and is not imported.
+* VRAM and is not imported. For APP APU VRAM allocations are done
+* in GTT domain
 */
if (size) {
-   if ((mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) &&
-   (!is_imported))
+   if (!is_imported &&
+  (mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM ||
+  (adev->gmc.is_app_apu &&
+   mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT)))
*size = bo_size;
else
*size = 0;
-- 
2.40.1

[PATCH 5/6] drm/amdgpu: Enable NPS4 CPX mode

2023-05-09 Thread Alex Deucher

From: Philip Yang 

CPX compute mode is valid mode for NPS4 memory partition mode.

Signed-off-by: Philip Yang 
Reviewed-by: Lijo Lazar 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c 
b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
index 848049db00ab..97011e7e031d 100644
--- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
@@ -281,9 +281,9 @@ static bool __aqua_vanjaram_is_valid_mode(struct 
amdgpu_xcp_mgr *xcp_mgr,
adev->gmc.num_mem_partitions == 4) &&
   (num_xccs_per_xcp >= 2);
case AMDGPU_CPX_PARTITION_MODE:
-   return (num_xcc > 1) &&
-  (adev->gmc.num_mem_partitions == 1 ||
-   adev->gmc.num_mem_partitions == num_xcc);
+   return ((num_xcc > 1) &&
+  (adev->gmc.num_mem_partitions == 1 || 
adev->gmc.num_mem_partitions == 4) &&
+  (num_xcc % adev->gmc.num_mem_partitions) == 0);
default:
return false;
}
-- 
2.40.1

[PATCH 2/6] drm/amdgpu: Fix register accesses in GFX v9.4.3

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

Access registers with the right xcc id. Also, remove the unused logic as
PG is not used in GFX v9.4.3

Signed-off-by: Lijo Lazar 
Reviewed-by: Hawking Zhang 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 16 +++-
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index d0ddcd751432..7ef2c9b515ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -1004,16 +1004,6 @@ static void gfx_v9_4_3_xcc_init_pg(struct amdgpu_device 
*adev, int xcc_id)
 */
if (adev->gfx.rlc.is_rlc_v2_1)
gfx_v9_4_3_xcc_enable_save_restore_machine(adev, xcc_id);
-
-   if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
- AMD_PG_SUPPORT_GFX_SMG |
- AMD_PG_SUPPORT_GFX_DMG |
- AMD_PG_SUPPORT_CP |
- AMD_PG_SUPPORT_GDS |
- AMD_PG_SUPPORT_RLC_SMU_HS)) {
-   WREG32_SOC15(GC, GET_INST(GC, 0), regRLC_JUMP_TABLE_RESTORE,
-  adev->gfx.rlc.cp_table_gpu_addr >> 8);
-   }
 }
 
 static void gfx_v9_4_3_xcc_disable_gpa_mode(struct amdgpu_device *adev, int 
xcc_id)
@@ -1071,7 +1061,7 @@ static void gfx_v9_4_3_xcc_set_safe_mode(struct 
amdgpu_device *adev, int xcc_id)
 
/* wait for RLC_SAFE_MODE */
for (i = 0; i < adev->usec_timeout; i++) {
-   if (!REG_GET_FIELD(RREG32_SOC15(GC, GET_INST(GC, 0), 
regRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
+   if (!REG_GET_FIELD(RREG32_SOC15(GC, GET_INST(GC, xcc_id), 
regRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
break;
udelay(1);
}
@@ -1107,7 +1097,7 @@ static void gfx_v9_4_3_xcc_wait_for_rlc_serdes(struct 
amdgpu_device *adev,
gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0x,
xcc_id);
for (k = 0; k < adev->usec_timeout; k++) {
-   if (RREG32_SOC15(GC, GET_INST(GC, 0), 
regRLC_SERDES_CU_MASTER_BUSY) == 0)
+   if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), 
regRLC_SERDES_CU_MASTER_BUSY) == 0)
break;
udelay(1);
}
@@ -1131,7 +1121,7 @@ static void gfx_v9_4_3_xcc_wait_for_rlc_serdes(struct 
amdgpu_device *adev,
RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
for (k = 0; k < adev->usec_timeout; k++) {
-   if ((RREG32_SOC15(GC, GET_INST(GC, 0), 
regRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
+   if ((RREG32_SOC15(GC, GET_INST(GC, xcc_id), 
regRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
break;
udelay(1);
}
-- 
2.40.1

[PATCH 1/6] drm/amdkfd: Increase queue number per process to 255 on GFX9.4.3

2023-05-09 Thread Alex Deucher

From: Mukul Joshi 

Increase the maximum number of queues that can be created per process
to 255 on GFX 9.4.3. There is no HWS limitation restricting the number
queues that can be created.

Signed-off-by: Mukul Joshi 
Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index a3c23d07c7df..b100933340d2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -242,6 +242,13 @@ int pqm_create_queue(struct process_queue_manager *pqm,
enum kfd_queue_type type = properties->type;
unsigned int max_queues = 127; /* HWS limit */
 
+   /*
+* On GFX 9.4.3, increase the number of queues that
+* can be created to 255. No HWS limit on GFX 9.4.3.
+*/
+   if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3))
+   max_queues = 255;
+
q = NULL;
kq = NULL;
 
-- 
2.40.1

[PATCH 6/9] drm/amdgpu: Add query_ras_error_count for jpeg v4_0_3

2023-05-09 Thread Alex Deucher

From: Hawking Zhang 

Add query_ras_error_count callback for jpeg v4_0_3.
It will be used to query and log jpeg error count.

Signed-off-by: Hawking Zhang 
Reviewed-by: Tao Zhou 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 64 
 1 file changed, 64 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index ea9cb098a144..5dedba91fa32 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -960,3 +960,67 @@ const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block 
= {
.rev = 3,
.funcs = &jpeg_v4_0_3_ip_funcs,
 };
+
+static const struct amdgpu_ras_err_status_reg_entry jpeg_v4_0_3_ue_reg_list[] 
= {
+   {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG0S, 
regVCN_UE_ERR_STATUS_HI_JPEG0S),
+   1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG0S"},
+   {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG0D, 
regVCN_UE_ERR_STATUS_HI_JPEG0D),
+   1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG0D"},
+   {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG1S, 
regVCN_UE_ERR_STATUS_HI_JPEG1S),
+   1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG1S"},
+   {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG1D, 
regVCN_UE_ERR_STATUS_HI_JPEG1D),
+   1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG1D"},
+   {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG2S, 
regVCN_UE_ERR_STATUS_HI_JPEG2S),
+   1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG2S"},
+   {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG2D, 
regVCN_UE_ERR_STATUS_HI_JPEG2D),
+   1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG2D"},
+   {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG3S, 
regVCN_UE_ERR_STATUS_HI_JPEG3S),
+   1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG3S"},
+   {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG3D, 
regVCN_UE_ERR_STATUS_HI_JPEG3D),
+   1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG3D"},
+   {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG4S, 
regVCN_UE_ERR_STATUS_HI_JPEG4S),
+   1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG4S"},
+   {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG4D, 
regVCN_UE_ERR_STATUS_HI_JPEG4D),
+   1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG4D"},
+   {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG5S, 
regVCN_UE_ERR_STATUS_HI_JPEG5S),
+   1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG5S"},
+   {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG5D, 
regVCN_UE_ERR_STATUS_HI_JPEG5D),
+   1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG5D"},
+   {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG6S, 
regVCN_UE_ERR_STATUS_HI_JPEG6S),
+   1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG6S"},
+   {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG6D, 
regVCN_UE_ERR_STATUS_HI_JPEG6D),
+   1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG6D"},
+   {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG7S, 
regVCN_UE_ERR_STATUS_HI_JPEG7S),
+   1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG7S"},
+   {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG7D, 
regVCN_UE_ERR_STATUS_HI_JPEG7D),
+   1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG7D"},
+};
+
+static void jpeg_v4_0_3_inst_query_ras_error_count(struct amdgpu_device *adev,
+  uint32_t jpeg_inst,
+  void *ras_err_status)
+{
+   struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status;
+
+   /* jpeg v4_0_3 only support uncorrectable errors */
+   amdgpu_ras_inst_query_ras_error_count(adev,
+   jpeg_v4_0_3_ue_reg_list,
+   ARRAY_SIZE(jpeg_v4_0_3_ue_reg_list),
+   NULL, 0, GET_INST(VCN, jpeg_inst),
+   AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+   &err_data->ue_count);
+}
+
+static void jpeg_v4_0_3_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_err_status)
+{
+   uint32_t i;
+
+   if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) {
+   dev_warn(adev->dev, "JPEG RAS is not supported\n");
+   return;
+   }
+
+   for (i = 0; i < adev->jpeg.num_jpeg_inst; i++)
+   jpeg_v4_0_3_inst_query_ras_error_count(adev, i, ras_err_status);
+}
-- 
2.40.1

[PATCH 8/9] drm/amdgpu: Initialize jpeg v4_0_3 ras function

2023-05-09 Thread Alex Deucher

From: Hawking Zhang 

Initialize jpeg v4_0_3 ras function.

Signed-off-by: Hawking Zhang 
Reviewed-by: Tao Zhou 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 26 
 1 file changed, 26 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index 21226d6d26f8..ede15a3a4701 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -40,6 +40,7 @@ static void jpeg_v4_0_3_set_dec_ring_funcs(struct 
amdgpu_device *adev);
 static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
 static int jpeg_v4_0_3_set_powergating_state(void *handle,
enum amd_powergating_state state);
+static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev);
 
 static int amdgpu_ih_srcid_jpeg[] = {
VCN_4_0__SRCID__JPEG_DECODE,
@@ -67,6 +68,7 @@ static int jpeg_v4_0_3_early_init(void *handle)
 
jpeg_v4_0_3_set_dec_ring_funcs(adev);
jpeg_v4_0_3_set_irq_funcs(adev);
+   jpeg_v4_0_3_set_ras_funcs(adev);
 
return 0;
 }
@@ -126,6 +128,14 @@ static int jpeg_v4_0_3_sw_init(void *handle)
}
}
 
+   if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) {
+   r = amdgpu_jpeg_ras_sw_init(adev);
+   if (r) {
+   dev_err(adev->dev, "Failed to initialize jpeg ras 
block!\n");
+   return r;
+   }
+   }
+
return 0;
 }
 
@@ -1046,3 +1056,19 @@ static void jpeg_v4_0_3_reset_ras_error_count(struct 
amdgpu_device *adev)
for (i = 0; i < adev->jpeg.num_jpeg_inst; i++)
jpeg_v4_0_3_inst_reset_ras_error_count(adev, i);
 }
+
+static const struct amdgpu_ras_block_hw_ops jpeg_v4_0_3_ras_hw_ops = {
+   .query_ras_error_count = jpeg_v4_0_3_query_ras_error_count,
+   .reset_ras_error_count = jpeg_v4_0_3_reset_ras_error_count,
+};
+
+static struct amdgpu_jpeg_ras jpeg_v4_0_3_ras = {
+   .ras_block = {
+   .hw_ops = &jpeg_v4_0_3_ras_hw_ops,
+   },
+};
+
+static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev)
+{
+   adev->jpeg.ras = &jpeg_v4_0_3_ras;
+}
-- 
2.40.1

[PATCH 9/9] drm/amdgpu: Adjust the sequence to query ras error info

2023-05-09 Thread Alex Deucher

From: Hawking Zhang 

It turns out STATUS_VALID_FLAG needs to be checked
ahead of any other fields. ADDRESS_VALID_FLAG and
ERR_INFO_VALID_FLAG only manages ADDRESS and ERR_INFO
field respectively. driver should continue poll
ERR CNT field even ERR_INFO_VALD_FLAG is not set.

Signed-off-by: Hawking Zhang 
Reviewed-by: Tao Zhou 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 5ae89602a116..64f80e8cbd63 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -3164,7 +3164,8 @@ bool amdgpu_ras_inst_get_err_cnt_field(struct 
amdgpu_device *adev,
 
if ((reg_entry->flags & AMDGPU_RAS_ERR_INFO_VALID) &&
!REG_GET_FIELD(err_status_hi_data, ERR_STATUS_HI, 
ERR_INFO_VALID_FLAG))
-   return false;
+   /* keep the check here in case we need to refer to the result 
later */
+   dev_dbg(adev->dev, "Invalid err_info field\n");
 
/* read err count */
*err_cnt = REG_GET_FIELD(err_status_hi_data, ERR_STATUS, ERR_CNT);
@@ -3187,17 +3188,17 @@ void amdgpu_ras_inst_query_ras_error_count(struct 
amdgpu_device *adev,
uint32_t i, j;
 
for (i = 0; i < reg_list_size; i++) {
+   /* query memory_id from err_status_lo */
+   if (!amdgpu_ras_inst_get_memory_id_field(adev, ®_list[i],
+instance, &memory_id))
+   continue;
+
/* query err_cnt from err_status_hi */
if (!amdgpu_ras_inst_get_err_cnt_field(adev, ®_list[i],
   instance, &err_cnt) ||
!err_cnt)
continue;
 
-   /* query memory_id from err_status_lo */
-   if (!amdgpu_ras_inst_get_memory_id_field(adev, ®_list[i],
-instance, &memory_id))
-   continue;
-
*err_count += err_cnt;
 
/* log the errors */
-- 
2.40.1

[PATCH 5/9] drm/amdgpu: Re-enable VCN RAS if DPG is enabled

2023-05-09 Thread Alex Deucher

From: Hawking Zhang 

VCN RAS enablement sequence needs to be added in
DPG HW init sequence.

Signed-off-by: Hawking Zhang 
Reviewed-by: Tao Zhou 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 27 -
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 7558095ecf6a..c77ceaf53dcd 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -52,7 +52,8 @@ static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_device 
*adev,
int inst_idx, struct dpg_pause_state *new_state);
 static void vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring *ring);
 static void vcn_v4_0_3_set_ras_funcs(struct amdgpu_device *adev);
-
+static void vcn_v4_0_3_enable_ras(struct amdgpu_device *adev,
+ int inst_idx, bool indirect);
 /**
  * vcn_v4_0_3_early_init - set function pointers
  *
@@ -769,6 +770,8 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device 
*adev, int inst_idx, b
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, regUVD_LMI_CTRL2), tmp, 0, indirect);
 
+   vcn_v4_0_3_enable_ras(adev, inst_idx, indirect);
+
/* enable master interrupt */
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, regUVD_MASTINT_EN),
@@ -1514,3 +1517,25 @@ static void vcn_v4_0_3_set_ras_funcs(struct 
amdgpu_device *adev)
 {
adev->vcn.ras = &vcn_v4_0_3_ras;
 }
+
+static void vcn_v4_0_3_enable_ras(struct amdgpu_device *adev,
+ int inst_idx, bool indirect)
+{
+   uint32_t tmp;
+
+   if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
+   return;
+
+   tmp = VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK |
+ VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK |
+ VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK |
+ VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK;
+   WREG32_SOC15_DPG_MODE(inst_idx,
+ SOC15_DPG_MODE_OFFSET(VCN, 0, regVCN_RAS_CNTL),
+ tmp, 0, indirect);
+
+   tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK;
+   WREG32_SOC15_DPG_MODE(inst_idx,
+ SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_SYS_INT_EN),
+ tmp, 0, indirect);
+}
-- 
2.40.1

[PATCH 7/9] drm/amdgpu: Add reset_ras_error_count for jpeg v4_0_3

2023-05-09 Thread Alex Deucher

From: Hawking Zhang 

Add reset_ras_error_count callback for jpeg v4_0_3.
It will be used to reset jpeg ras error count.

Signed-off-by: Hawking Zhang 
Reviewed-by: Tao Zhou 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index 5dedba91fa32..21226d6d26f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -1024,3 +1024,25 @@ static void jpeg_v4_0_3_query_ras_error_count(struct 
amdgpu_device *adev,
for (i = 0; i < adev->jpeg.num_jpeg_inst; i++)
jpeg_v4_0_3_inst_query_ras_error_count(adev, i, ras_err_status);
 }
+
+static void jpeg_v4_0_3_inst_reset_ras_error_count(struct amdgpu_device *adev,
+  uint32_t jpeg_inst)
+{
+   amdgpu_ras_inst_reset_ras_error_count(adev,
+   jpeg_v4_0_3_ue_reg_list,
+   ARRAY_SIZE(jpeg_v4_0_3_ue_reg_list),
+   GET_INST(VCN, jpeg_inst));
+}
+
+static void jpeg_v4_0_3_reset_ras_error_count(struct amdgpu_device *adev)
+{
+   uint32_t i;
+
+   if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) {
+   dev_warn(adev->dev, "JPEG RAS is not supported\n");
+   return;
+   }
+
+   for (i = 0; i < adev->jpeg.num_jpeg_inst; i++)
+   jpeg_v4_0_3_inst_reset_ras_error_count(adev, i);
+}
-- 
2.40.1

[PATCH 1/9] drm/amdgpu: Add vcn/jpeg ras err status registers

2023-05-09 Thread Alex Deucher

From: Hawking Zhang 

Add new ras error status registers introduced in
vcn v4_0_3 to log vcn and jpeg ras error.

Signed-off-by: Hawking Zhang 
Reviewed-by: Tao Zhou 
Signed-off-by: Alex Deucher 
---
 .../include/asic_reg/vcn/vcn_4_0_3_offset.h   |  78 +++
 .../include/asic_reg/vcn/vcn_4_0_3_sh_mask.h  | 495 ++
 2 files changed, 573 insertions(+)

diff --git a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_4_0_3_offset.h 
b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_4_0_3_offset.h
index facad93cd06f..e9742d10de1c 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_4_0_3_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_4_0_3_offset.h
@@ -1500,6 +1500,84 @@
 #define regVCN_RAS_CNTL_MMSCH  
 0x0914
 #define regVCN_RAS_CNTL_MMSCH_BASE_IDX 
 1
 
+// addressBlock: aid_uvd0_vcn_edcc_dec
+// base address: 0x21d20
+#define regVCN_UE_ERR_STATUS_LO_VIDD   
 0x094c
+#define regVCN_UE_ERR_STATUS_LO_VIDD_BASE_IDX  
 1
+#define regVCN_UE_ERR_STATUS_HI_VIDD   
 0x094d
+#define regVCN_UE_ERR_STATUS_HI_VIDD_BASE_IDX  
 1
+#define regVCN_UE_ERR_STATUS_LO_VIDV   
 0x094e
+#define regVCN_UE_ERR_STATUS_LO_VIDV_BASE_IDX  
 1
+#define regVCN_UE_ERR_STATUS_HI_VIDV   
 0x094f
+#define regVCN_UE_ERR_STATUS_HI_VIDV_BASE_IDX  
 1
+#define regVCN_CE_ERR_STATUS_LO_MMSCHD 
 0x0950
+#define regVCN_CE_ERR_STATUS_LO_MMSCHD_BASE_IDX
 1
+#define regVCN_CE_ERR_STATUS_HI_MMSCHD 
 0x0951
+#define regVCN_CE_ERR_STATUS_HI_MMSCHD_BASE_IDX
 1
+#define regVCN_UE_ERR_STATUS_LO_JPEG0S 
 0x0952
+#define regVCN_UE_ERR_STATUS_LO_JPEG0S_BASE_IDX
 1
+#define regVCN_UE_ERR_STATUS_HI_JPEG0S 
 0x0953
+#define regVCN_UE_ERR_STATUS_HI_JPEG0S_BASE_IDX
 1
+#define regVCN_UE_ERR_STATUS_LO_JPEG0D 
 0x0954
+#define regVCN_UE_ERR_STATUS_LO_JPEG0D_BASE_IDX
 1
+#define regVCN_UE_ERR_STATUS_HI_JPEG0D 
 0x0955
+#define regVCN_UE_ERR_STATUS_HI_JPEG0D_BASE_IDX
 1
+#define regVCN_UE_ERR_STATUS_LO_JPEG1S 
 0x0956
+#define regVCN_UE_ERR_STATUS_LO_JPEG1S_BASE_IDX
 1
+#define regVCN_UE_ERR_STATUS_HI_JPEG1S 
 0x0957
+#define regVCN_UE_ERR_STATUS_HI_JPEG1S_BASE_IDX
 1
+#define regVCN_UE_ERR_STATUS_LO_JPEG1D 
 0x0958
+#define regVCN_UE_ERR_STATUS_LO_JPEG1D_BASE_IDX
 1
+#define regVCN_UE_ERR_STATUS_HI_JPEG1D 
 0x0959
+#define regVCN_UE_ERR_STATUS_HI_JPEG1D_BASE_IDX
 1
+#define regVCN_UE_ERR_STATUS_LO_JPEG2S 
 0x095a
+#define regVCN_UE_ERR_STATUS_LO_JPEG2S_BASE_IDX
 1
+#define regVCN_UE_ERR_STATUS_HI_JPEG2S 
 0x095b
+#define regVCN_UE_ERR_STATUS_HI_JPEG2S_BASE_IDX
 1
+#define regVCN_UE_ERR_STATUS_LO_JPEG2D 
 0x095c
+#define regVCN_UE_ERR_STATUS_LO_JPEG2D_BASE_IDX
 1
+#define regVCN_UE_ERR_STATUS_HI_JPEG2D 
 0x095d
+#define regVCN_UE_ERR_STATUS_HI_JPEG2D_BASE_IDX
 1
+#define regVCN_UE_ERR_STATUS_LO_JP

[PATCH 3/9] drm/amdgpu: Add reset_ras_error_count for vcn v4_0_3

2023-05-09 Thread Alex Deucher

From: Hawking Zhang 

Add reset_ras_error_count callback for vcn v4_0_3.
It will be used to reset vcn ras error count.

Signed-off-by: Hawking Zhang 
Reviewed-by: Tao Zhou 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index e5037d6f884b..e8933039bcd6 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -1466,3 +1466,25 @@ static void vcn_v4_0_3_query_ras_error_count(struct 
amdgpu_device *adev,
for (i = 0; i < adev->vcn.num_vcn_inst; i++)
vcn_v4_0_3_inst_query_ras_error_count(adev, i, ras_err_status);
 }
+
+static void vcn_v4_0_3_inst_reset_ras_error_count(struct amdgpu_device *adev,
+ uint32_t vcn_inst)
+{
+   amdgpu_ras_inst_reset_ras_error_count(adev,
+   vcn_v4_0_3_ue_reg_list,
+   ARRAY_SIZE(vcn_v4_0_3_ue_reg_list),
+   GET_INST(VCN, vcn_inst));
+}
+
+static void vcn_v4_0_3_reset_ras_error_count(struct amdgpu_device *adev)
+{
+   uint32_t i;
+
+   if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) {
+   dev_warn(adev->dev, "VCN RAS is not supported\n");
+   return;
+   }
+
+   for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+   vcn_v4_0_3_inst_reset_ras_error_count(adev, i);
+}
-- 
2.40.1

[PATCH 2/9] drm/amdgpu: Add query_ras_error_count for vcn v4_0_3

2023-05-09 Thread Alex Deucher

From: Hawking Zhang 

Add query_ras_error_count callback for vcn v4_0_3.
It will be used to query and log vcn error count.

Signed-off-by: Hawking Zhang 
Reviewed-by: Tao Zhou 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 36 +
 1 file changed, 36 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 9d0c3dc76547..e5037d6f884b 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -1430,3 +1430,39 @@ const struct amdgpu_ip_block_version vcn_v4_0_3_ip_block 
= {
.rev = 3,
.funcs = &vcn_v4_0_3_ip_funcs,
 };
+
+static const struct amdgpu_ras_err_status_reg_entry vcn_v4_0_3_ue_reg_list[] = 
{
+   {AMDGPU_RAS_REG_ENTRY(VCN, 0, regVCN_UE_ERR_STATUS_LO_VIDD, 
regVCN_UE_ERR_STATUS_HI_VIDD),
+   1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "VIDD"},
+   {AMDGPU_RAS_REG_ENTRY(VCN, 0, regVCN_UE_ERR_STATUS_LO_VIDV, 
regVCN_UE_ERR_STATUS_HI_VIDV),
+   1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "VIDV"},
+};
+
+static void vcn_v4_0_3_inst_query_ras_error_count(struct amdgpu_device *adev,
+ uint32_t vcn_inst,
+ void *ras_err_status)
+{
+   struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status;
+
+   /* vcn v4_0_3 only support query uncorrectable errors */
+   amdgpu_ras_inst_query_ras_error_count(adev,
+   vcn_v4_0_3_ue_reg_list,
+   ARRAY_SIZE(vcn_v4_0_3_ue_reg_list),
+   NULL, 0, GET_INST(VCN, vcn_inst),
+   AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+   &err_data->ue_count);
+}
+
+static void vcn_v4_0_3_query_ras_error_count(struct amdgpu_device *adev,
+void *ras_err_status)
+{
+   uint32_t i;
+
+   if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) {
+   dev_warn(adev->dev, "VCN RAS is not supported\n");
+   return;
+   }
+
+   for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+   vcn_v4_0_3_inst_query_ras_error_count(adev, i, ras_err_status);
+}
-- 
2.40.1

[PATCH 4/9] drm/amdgpu: Initialize vcn v4_0_3 ras function

2023-05-09 Thread Alex Deucher

From: Hawking Zhang 

Initialize vcn v4_0_3 ras function

Signed-off-by: Hawking Zhang 
Reviewed-by: Tao Zhou 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 26 +
 1 file changed, 26 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index e8933039bcd6..7558095ecf6a 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -51,6 +51,7 @@ static int vcn_v4_0_3_set_powergating_state(void *handle,
 static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_device *adev,
int inst_idx, struct dpg_pause_state *new_state);
 static void vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring *ring);
+static void vcn_v4_0_3_set_ras_funcs(struct amdgpu_device *adev);
 
 /**
  * vcn_v4_0_3_early_init - set function pointers
@@ -68,6 +69,7 @@ static int vcn_v4_0_3_early_init(void *handle)
 
vcn_v4_0_3_set_unified_ring_funcs(adev);
vcn_v4_0_3_set_irq_funcs(adev);
+   vcn_v4_0_3_set_ras_funcs(adev);
 
return amdgpu_vcn_early_init(adev);
 }
@@ -130,6 +132,14 @@ static int vcn_v4_0_3_sw_init(void *handle)
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode;
 
+   if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) {
+   r = amdgpu_vcn_ras_sw_init(adev);
+   if (r) {
+   dev_err(adev->dev, "Failed to initialize vcn ras 
block!\n");
+   return r;
+   }
+   }
+
return 0;
 }
 
@@ -1488,3 +1498,19 @@ static void vcn_v4_0_3_reset_ras_error_count(struct 
amdgpu_device *adev)
for (i = 0; i < adev->vcn.num_vcn_inst; i++)
vcn_v4_0_3_inst_reset_ras_error_count(adev, i);
 }
+
+static const struct amdgpu_ras_block_hw_ops vcn_v4_0_3_ras_hw_ops = {
+   .query_ras_error_count = vcn_v4_0_3_query_ras_error_count,
+   .reset_ras_error_count = vcn_v4_0_3_reset_ras_error_count,
+};
+
+static struct amdgpu_vcn_ras vcn_v4_0_3_ras = {
+   .ras_block = {
+   .hw_ops = &vcn_v4_0_3_ras_hw_ops,
+   },
+};
+
+static void vcn_v4_0_3_set_ras_funcs(struct amdgpu_device *adev)
+{
+   adev->vcn.ras = &vcn_v4_0_3_ras;
+}
-- 
2.40.1

[PATCH 3/4] drm/amdgpu: Set memory partitions to 1 for SRIOV.

2023-05-09 Thread Alex Deucher

From: Gavin Wan 

For SRIOV, the memory partitions are set on host drover. Each VF only
has one memory partition. We need set the memory partitions to 1 on
guest driver for SRIOV.

V2: sqaush in fix ("drm/amdgpu: Fix memory range info of GC 9.4.3 VFs")

Signed-off-by: Gavin Wan 
Acked-by: Zhigang Luo 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 394644d9d559..f000e0e89bd0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1346,6 +1346,9 @@ gmc_v9_0_get_memory_partition(struct amdgpu_device *adev, 
u32 *supp_modes)
 static enum amdgpu_memory_partition
 gmc_v9_0_query_memory_partition(struct amdgpu_device *adev)
 {
+   if (amdgpu_sriov_vf(adev))
+   return AMDGPU_NPS1_PARTITION_MODE;
+
return gmc_v9_0_get_memory_partition(adev, NULL);
 }
 
@@ -1897,7 +1900,10 @@ static int gmc_v9_0_init_mem_ranges(struct amdgpu_device 
*adev)
else
gmc_v9_0_init_sw_mem_ranges(adev, adev->gmc.mem_partitions);
 
-   valid = gmc_v9_0_validate_partition_info(adev);
+   if (amdgpu_sriov_vf(adev))
+   valid = true;
+   else
+   valid = gmc_v9_0_validate_partition_info(adev);
if (!valid) {
/* TODO: handle invalid case */
dev_WARN(adev->dev,
-- 
2.40.1

[PATCH 4/4] drm/amdgpu: Checked if the pointer NULL before use it.

2023-05-09 Thread Alex Deucher

From: Gavin Wan 

For SRIOV on some parts, the host driver does not post VBIOS. So the guest
cannot get bios information. Therefore, adev->virt.fw_reserve.p_pf2vf
and adev->mode_info.atom_context are NULL.

Signed-off-by: Gavin Wan 
Reviewed-by: Zhigang Luo 
Acked-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 20 
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5d62e0d5cc11..c2136accd523 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3854,21 +3854,24 @@ int amdgpu_device_init(struct amdgpu_device *adev,
}
 
/* enable PCIE atomic ops */
-   if (amdgpu_sriov_vf(adev))
-   adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info 
*)
-   
adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
-   (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | 
PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+   if (amdgpu_sriov_vf(adev)) {
+   if (adev->virt.fw_reserve.p_pf2vf)
+   adev->have_atomics_support = ((struct 
amd_sriov_msg_pf2vf_info *)
+ 
adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
+   (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | 
PCI_EXP_DEVCAP2_ATOMIC_COMP64);
/* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
 * internal path natively support atomics, set have_atomics_support to 
true.
 */
-   else if ((adev->flags & AMD_IS_APU) &&
-   (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0)))
+   } else if ((adev->flags & AMD_IS_APU) &&
+  (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))) {
adev->have_atomics_support = true;
-   else
+   } else {
adev->have_atomics_support =
!pci_enable_atomic_ops_to_root(adev->pdev,
  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+   }
+
if (!adev->have_atomics_support)
dev_info(adev->dev, "PCIE atomic ops is not supported\n");
 
@@ -3884,7 +3887,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
amdgpu_reset_init(adev);
 
/* detect if we are with an SRIOV vbios */
-   amdgpu_device_detect_sriov_bios(adev);
+   if (adev->bios)
+   amdgpu_device_detect_sriov_bios(adev);
 
/* check if we need to reset the asic
 *  E.g., driver was not cleanly unloaded previously, etc.
-- 
2.40.1

[PATCH 1/4] drm/amdgpu: Add PSP supporting PSP 13.0.6 SRIOV ucode init.

2023-05-09 Thread Alex Deucher

From: Gavin Wan 

Add PSP supporting PSP 13.0.6 SRIOV ucode init.

Signed-off-by: Gavin Wan 
Reviewed-by: Yang Wang 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index d62746b596f5..ec79a5c2f500 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -146,6 +146,9 @@ static int psp_init_sriov_microcode(struct psp_context *psp)
case IP_VERSION(13, 0, 0):
adev->virt.autoload_ucode_id = 0;
break;
+   case IP_VERSION(13, 0, 6):
+   ret = psp_init_cap_microcode(psp, ucode_prefix);
+   break;
case IP_VERSION(13, 0, 10):
adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MES1_DATA;
ret = psp_init_cap_microcode(psp, ucode_prefix);
-- 
2.40.1

[PATCH 2/4] drm/amdgpu: Skip using MC FB Offset when APU flag is set for SRIOV.

2023-05-09 Thread Alex Deucher

From: Gavin Wan 

The MC_VM_FB_OFFSET is PF only register. It cannot be read on VF.
So, the driver should not use MC_VM_FB_OFFSET address to set the
address of dev->gmc.aper_base.

Signed-off-by: Gavin Wan 
Reviewed-by: Zhigang Luo 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 393e90d8b27f..394644d9d559 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1644,7 +1644,8 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
 */
 
/* check whether both host-gpu and gpu-gpu xgmi links exist */
-   if (((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) ||
+   if ((!amdgpu_sriov_vf(adev) &&
+   (adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) ||
(adev->gmc.xgmi.supported &&
 adev->gmc.xgmi.connected_to_cpu)) {
adev->gmc.aper_base =
-- 
2.40.1

[PATCH 2/3] drm/amdgpu: Return error on invalid compute mode

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

Return error if an invalid compute partition mode is requested.

Signed-off-by: Lijo Lazar 
Reviewed-by: Le Ma 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c | 8 ++--
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 6 +-
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c 
b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
index a165b51e9e58..848049db00ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
@@ -328,10 +328,14 @@ static int aqua_vanjaram_switch_partition_mode(struct 
amdgpu_xcp_mgr *xcp_mgr,
adev = xcp_mgr->adev;
num_xcc = NUM_XCC(adev->gfx.xcc_mask);
 
-   if (mode == AMDGPU_AUTO_COMPUTE_PARTITION_MODE)
+   if (mode == AMDGPU_AUTO_COMPUTE_PARTITION_MODE) {
mode = __aqua_vanjaram_get_auto_mode(xcp_mgr);
-   else if (!__aqua_vanjaram_is_valid_mode(xcp_mgr, mode))
+   } else if (!__aqua_vanjaram_is_valid_mode(xcp_mgr, mode)) {
+   dev_err(adev->dev,
+   "Invalid compute partition mode requested, requested: 
%s, available memory partitions: %d",
+   amdgpu_gfx_compute_mode_desc(mode), 
adev->gmc.num_mem_partitions);
return -EINVAL;
+   }
 
if (adev->kfd.init_complete)
flags |= AMDGPU_XCP_OPS_KFD;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 81ab3cd2f229..d0ddcd751432 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -1933,7 +1933,11 @@ static int gfx_v9_4_3_cp_resume(struct amdgpu_device 
*adev)
if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
AMDGPU_XCP_FL_NONE) ==
AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE)
-   amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, 
amdgpu_user_partt_mode);
+   r = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr,
+amdgpu_user_partt_mode);
+
+   if (r)
+   return r;
 
num_xcc = NUM_XCC(adev->gfx.xcc_mask);
for (i = 0; i < num_xcc; i++) {
-- 
2.40.1

[PATCH 3/3] drm/amdgpu: Add PSP spatial parition interface

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

Add PSP ring command interface for spatial partitioning.

Signed-off-by: Lijo Lazar 
Reviewed-by: Hawking Zhang 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 21 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h |  2 ++
 drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h |  9 +
 3 files changed, 32 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index ea47012795e7..d62746b596f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -991,6 +991,27 @@ static int psp_rl_load(struct amdgpu_device *adev)
return ret;
 }
 
+int psp_spatial_partition(struct psp_context *psp, int mode)
+{
+   struct psp_gfx_cmd_resp *cmd;
+   int ret;
+
+   if (amdgpu_sriov_vf(psp->adev))
+   return 0;
+
+   cmd = acquire_psp_cmd_buf(psp);
+
+   cmd->cmd_id = GFX_CMD_ID_SRIOV_SPATIAL_PART;
+   cmd->cmd.cmd_spatial_part.mode = mode;
+
+   dev_info(psp->adev->dev, "Requesting %d paritions through PSP", mode);
+   ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+
+   release_psp_cmd_buf(psp);
+
+   return ret;
+}
+
 static int psp_asd_initialize(struct psp_context *psp)
 {
int ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index cf4f60c66122..0a409da749d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -519,6 +519,8 @@ int psp_load_fw_list(struct psp_context *psp,
 struct amdgpu_firmware_info **ucode_list, int ucode_count);
 void psp_copy_fw(struct psp_context *psp, uint8_t *start_addr, uint32_t 
bin_size);
 
+int psp_spatial_partition(struct psp_context *psp, int mode);
+
 int is_psp_fw_valid(struct psp_bin_desc bin);
 
 int amdgpu_psp_sysfs_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h 
b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
index 22c775f39119..18917df785ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -102,6 +102,7 @@ enum psp_gfx_cmd_id
 GFX_CMD_ID_LOAD_TOC   = 0x0020,   /* Load TOC and obtain TMR 
size */
 GFX_CMD_ID_AUTOLOAD_RLC   = 0x0021,   /* Indicates all graphics fw 
loaded, start RLC autoload */
 GFX_CMD_ID_BOOT_CFG   = 0x0022,   /* Boot Config */
+GFX_CMD_ID_SRIOV_SPATIAL_PART = 0x0027,   /* Configure spatial 
partitioning mode */
 };
 
 /* PSP boot config sub-commands */
@@ -338,6 +339,13 @@ struct psp_gfx_cmd_boot_cfg
 uint32_tboot_config_valid;/* dynamic boot 
configuration valid bits bitmask */
 };
 
+struct psp_gfx_cmd_sriov_spatial_part {
+   uint32_t mode;
+   uint32_t override_ips;
+   uint32_t override_xcds_avail;
+   uint32_t override_this_aid;
+};
+
 /* All GFX ring buffer commands. */
 union psp_gfx_commands
 {
@@ -351,6 +359,7 @@ union psp_gfx_commands
 struct psp_gfx_cmd_setup_tmrcmd_setup_vmr;
 struct psp_gfx_cmd_load_toc cmd_load_toc;
 struct psp_gfx_cmd_boot_cfg boot_cfg;
+struct psp_gfx_cmd_sriov_spatial_part cmd_spatial_part;
 };
 
 struct psp_gfx_uresp_reserved
-- 
2.40.1

[PATCH 1/3] drm/amdgpu: Add compute mode descriptor function

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

Keep a helper function to get description of compute partition mode.

Signed-off-by: Lijo Lazar 
Reviewed-by: Le Ma 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 24 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 21 +
 2 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 817e7b7d32b7..2ebf5c6f4ff7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -1169,33 +1169,11 @@ static ssize_t 
amdgpu_gfx_get_current_compute_partition(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
int mode;
-   char *partition_mode;
 
mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
   AMDGPU_XCP_FL_NONE);
 
-   switch (mode) {
-   case AMDGPU_SPX_PARTITION_MODE:
-   partition_mode = "SPX";
-   break;
-   case AMDGPU_DPX_PARTITION_MODE:
-   partition_mode = "DPX";
-   break;
-   case AMDGPU_TPX_PARTITION_MODE:
-   partition_mode = "TPX";
-   break;
-   case AMDGPU_QPX_PARTITION_MODE:
-   partition_mode = "QPX";
-   break;
-   case AMDGPU_CPX_PARTITION_MODE:
-   partition_mode = "CPX";
-   break;
-   default:
-   partition_mode = "UNKNOWN";
-   break;
-   }
-
-   return sysfs_emit(buf, "%s\n", partition_mode);
+   return sysfs_emit(buf, "%s\n", amdgpu_gfx_compute_mode_desc(mode));
 }
 
 static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index e9c93f6e12b8..3d11b7a0bd75 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -494,4 +494,25 @@ int amdgpu_gfx_poison_consumption_handler(struct 
amdgpu_device *adev,
 bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id);
 int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev);
 void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev);
+
+static inline const char *amdgpu_gfx_compute_mode_desc(int mode)
+{
+   switch (mode) {
+   case AMDGPU_SPX_PARTITION_MODE:
+   return "SPX";
+   case AMDGPU_DPX_PARTITION_MODE:
+   return "DPX";
+   case AMDGPU_TPX_PARTITION_MODE:
+   return "TPX";
+   case AMDGPU_QPX_PARTITION_MODE:
+   return "QPX";
+   case AMDGPU_CPX_PARTITION_MODE:
+   return "CPX";
+   default:
+   return "UNKNOWN";
+   }
+
+   return "UNKNOWN";
+}
+
 #endif
-- 
2.40.1

[PATCH] drm/amdkfd: Remove skiping userptr buffer mapping when mmu notifier marks it as invalid

2023-05-09 Thread Alex Deucher

From: Xiaogang Chen 

mmu notifier does not always hold mm->sem during call back. That causes
a race condition between kfd userprt buffer mapping and mmu notifier
which leds to gpu shadder or SDMA access userptr buffer before it has been
mapped to gpu VM. Always map userptr buffer to avoid that though it may make
some userprt buffers mapped two times.

Suggested-by: Felix Kuehling 
Signed-off-by: Xiaogang Chen 
Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 10 --
 1 file changed, 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 58a774647573..40078c0a5585 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1942,16 +1942,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 */
mutex_lock(&mem->process_info->lock);
 
-   /* Lock notifier lock. If we find an invalid userptr BO, we can be
-* sure that the MMU notifier is no longer running
-* concurrently and the queues are actually stopped
-*/
-   if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
-   mutex_lock(&mem->process_info->notifier_lock);
-   is_invalid_userptr = !!mem->invalid;
-   mutex_unlock(&mem->process_info->notifier_lock);
-   }
-
mutex_lock(&mem->lock);
 
domain = mem->domain;
-- 
2.40.1

[PATCH] drm/amdgpu: Fix unmapping of aperture

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

When aperture size is zero, there is no mapping done.

Signed-off-by: Lijo Lazar 
Reviewed-by: Hawking Zhang 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 3 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 7 ---
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index d1dca02860b2..5d62e0d5cc11 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4120,7 +4120,7 @@ static void amdgpu_device_unmap_mmio(struct amdgpu_device 
*adev)
adev->mman.aper_base_kaddr = NULL;
 
/* Memory manager related */
-   if (!adev->gmc.xgmi.connected_to_cpu) {
+   if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
arch_phys_wc_del(adev->gmc.vram_mtrr);
arch_io_free_memtype_wc(adev->gmc.aper_base, 
adev->gmc.aper_size);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index a5c4f98f8cd1..c6214db42bda 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1080,8 +1080,7 @@ void amdgpu_bo_fini(struct amdgpu_device *adev)
amdgpu_ttm_fini(adev);
 
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
-
-   if (!adev->gmc.xgmi.connected_to_cpu) {
+   if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
arch_phys_wc_del(adev->gmc.vram_mtrr);
arch_io_free_memtype_wc(adev->gmc.aper_base, 
adev->gmc.aper_size);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 7885162b7368..254927c596ba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1848,12 +1848,13 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base,
adev->gmc.visible_vram_size);
 
-   else if (!adev->gmc.is_app_apu)
+   else if (adev->gmc.is_app_apu)
+   DRM_DEBUG_DRIVER(
+   "No need to ioremap when real vram size is 0\n");
+   else
 #endif
adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base,
adev->gmc.visible_vram_size);
-   else
-   DRM_DEBUG_DRIVER("No need to ioremap when real vram size is 
0\n");
 #endif
 
/*
-- 
2.40.1

[PATCH] drm/amdgpu: Fix xGMI access P2P mapping failure on GFXIP 9.4.3

2023-05-09 Thread Alex Deucher

From: Rajneesh Bhardwaj 

On GFXIP 9.4.3, we dont need to rely on xGMI hive info to determine P2P
access.

Reviewed-by: Felix Kuehling 
Acked-and-tested-by:  Mukul Joshi 
Signed-off-by: Rajneesh Bhardwaj 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index c6bf66c9377f..58a774647573 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -813,7 +813,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, 
struct kgd_mem *mem,
 * if peer device has large BAR. In contrast, access over xGMI is
 * allowed for both small and large BAR configurations of peer device
 */
-   if ((adev != bo_adev) &&
+   if ((adev != bo_adev && !adev->gmc.is_app_apu) &&
((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) ||
 (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
 (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
-- 
2.40.1

[PATCH 2/3] drm/amdgpu: Set TTM pools for memory partitions

2023-05-09 Thread Alex Deucher

From: Philip Yang 

For native mode only, create TTM pool for each memory partition to store
the NUMA node id, then the TTM pool will be selected using memory
partition id to allocate memory from the correct partition.

Acked-by: Christian König 
(rajneesh: changed need_swiotlb and need_dma32 to false for pool init)
Reviewed-by: Felix Kuehling 
Acked-and-tested-by:  Mukul Joshi 
Signed-off-by: Philip Yang 
Signed-off-by: Rajneesh Bhardwaj 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 61 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  1 +
 2 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 1582ef092bf1..f4f92133f505 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -635,6 +635,7 @@ struct amdgpu_ttm_tt {
struct task_struct  *usertask;
uint32_tuserflags;
boolbound;
+   int32_t pool_id;
 };
 
 #define ttm_to_amdgpu_ttm_tt(ptr)  container_of(ptr, struct amdgpu_ttm_tt, 
ttm)
@@ -1063,6 +1064,7 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct 
ttm_buffer_object *bo,
return NULL;
}
gtt->gobj = &bo->base;
+   gtt->pool_id = NUMA_NO_NODE;
 
if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
caching = ttm_write_combined;
@@ -1089,6 +1091,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
 {
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
+   struct ttm_pool *pool;
pgoff_t i;
int ret;
 
@@ -1103,7 +1106,11 @@ static int amdgpu_ttm_tt_populate(struct ttm_device 
*bdev,
if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL)
return 0;
 
-   ret = ttm_pool_alloc(&adev->mman.bdev.pool, ttm, ctx);
+   if (adev->mman.ttm_pools && gtt->pool_id >= 0)
+   pool = &adev->mman.ttm_pools[gtt->pool_id];
+   else
+   pool = &adev->mman.bdev.pool;
+   ret = ttm_pool_alloc(pool, ttm, ctx);
if (ret)
return ret;
 
@@ -1124,6 +1131,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device 
*bdev,
 {
struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
struct amdgpu_device *adev;
+   struct ttm_pool *pool;
pgoff_t i;
 
amdgpu_ttm_backend_unbind(bdev, ttm);
@@ -1142,7 +1150,13 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device 
*bdev,
ttm->pages[i]->mapping = NULL;
 
adev = amdgpu_ttm_adev(bdev);
-   return ttm_pool_free(&adev->mman.bdev.pool, ttm);
+
+   if (adev->mman.ttm_pools && gtt->pool_id >= 0)
+   pool = &adev->mman.ttm_pools[gtt->pool_id];
+   else
+   pool = &adev->mman.bdev.pool;
+
+   return ttm_pool_free(pool, ttm);
 }
 
 /**
@@ -1732,6 +1746,41 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device 
*adev)
return 0;
 }
 
+static int amdgpu_ttm_pools_init(struct amdgpu_device *adev)
+{
+   int i;
+
+   if (!adev->gmc.is_app_apu || !adev->gmc.num_mem_partitions)
+   return 0;
+
+   adev->mman.ttm_pools = kcalloc(adev->gmc.num_mem_partitions,
+  sizeof(*adev->mman.ttm_pools),
+  GFP_KERNEL);
+   if (!adev->mman.ttm_pools)
+   return -ENOMEM;
+
+   for (i = 0; i < adev->gmc.num_mem_partitions; i++) {
+   ttm_pool_init(&adev->mman.ttm_pools[i], adev->dev,
+ adev->gmc.mem_partitions[i].numa.node,
+ false, false);
+   }
+   return 0;
+}
+
+static void amdgpu_ttm_pools_fini(struct amdgpu_device *adev)
+{
+   int i;
+
+   if (!adev->gmc.is_app_apu || !adev->mman.ttm_pools)
+   return;
+
+   for (i = 0; i < adev->gmc.num_mem_partitions; i++)
+   ttm_pool_fini(&adev->mman.ttm_pools[i]);
+
+   kfree(adev->mman.ttm_pools);
+   adev->mman.ttm_pools = NULL;
+}
+
 /*
  * amdgpu_ttm_init - Init the memory management (ttm) as well as various
  * gtt/vram related fields.
@@ -1758,6 +1807,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
return r;
}
+
+   r = amdgpu_ttm_pools_init(adev);
+   if (r) {
+   DRM_ERROR("failed to init ttm pools(%d).\n", r);
+   return r;
+   }
adev->mman.initialized = true;
 
/* Initialize VRAM pool with all of VRAM divided into pages */
@@ -1905,6 +1960,8 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
if (!adev->mman.initialized)
return;
 
+   amdgpu_ttm_pools_fini(adev);
+
amdgpu_ttm_training_reserve_vram_fini(adev);
/* return

[PATCH 3/3] drm/amdkfd: Native mode memory partition support

2023-05-09 Thread Alex Deucher

From: Rajneesh Bhardwaj 

For native mode, after amdgpu_bo is created on CPU domain, then call
amdgpu_ttm_tt_set_mem_pool to select the TTM pool using bo->mem_id.
ttm_bo_validate will allocate the memory to the correct memory partition
before mapping to GPUs.

Reviewed-by: Felix Kuehling 
Acked-and-tested-by:  Mukul Joshi 
Signed-off-by: Philip Yang 
Signed-off-by: Rajneesh Bhardwaj 
Signed-off-by: Alex Deucher 
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c   |  7 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 18 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h|  1 +
 3 files changed, 26 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 0ebd39a41e74..c6bf66c9377f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1642,6 +1642,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
uint64_t aligned_size;
u64 alloc_flags;
int ret;
+   int mem_id = 0; /* Fixme : to be changed when mem_id support patch 
lands, until then NPS1, SPX only */
 
/*
 * Check on which domain to allocate BO
@@ -1749,6 +1750,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
((*mem)->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) {
bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
+   ret = amdgpu_ttm_tt_set_mem_pool(&bo->tbo, mem_id);
+   if (ret) {
+   pr_debug("failed to set ttm mem pool %d\n", ret);
+   goto err_set_mem_partition;
+   }
}
 
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
@@ -1777,6 +1783,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 allocate_init_user_pages_failed:
 err_pin_bo:
remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
+err_set_mem_partition:
drm_vma_node_revoke(&gobj->vma_node, drm_priv);
 err_node_allow:
/* Don't unreserve system mem limit twice */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index f4f92133f505..7885162b7368 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1159,6 +1159,24 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device 
*bdev,
return ttm_pool_free(pool, ttm);
 }
 
+/**
+ * amdgpu_ttm_tt_set_mem_pool - Set the TTM memory pool for the TTM BO
+ * @tbo: The ttm_buffer_object that backs the VRAM bo
+ * @mem_id: to select the initialized ttm pool corresponding to the memory 
partition
+ */
+int amdgpu_ttm_tt_set_mem_pool(struct ttm_buffer_object *tbo, int mem_id)
+{
+   struct ttm_tt *ttm = tbo->ttm;
+   struct amdgpu_ttm_tt *gtt;
+
+   if (!ttm && !ttm_tt_is_populated(ttm))
+   return -EINVAL;
+
+   gtt = ttm_to_amdgpu_ttm_tt(ttm);
+   gtt->pool_id = mem_id;
+   return 0;
+}
+
 /**
  * amdgpu_ttm_tt_get_userptr - Return the userptr GTT ttm_tt for the current
  * task
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 8ef048a0a33e..fe32de1bf4d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -192,6 +192,7 @@ bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm);
 struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm);
 bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
  unsigned long end, unsigned long *userptr);
+int amdgpu_ttm_tt_set_mem_pool(struct ttm_buffer_object *tbo, int mem_id);
 bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
   int *last_invalidated);
 bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm);
-- 
2.40.1

[PATCH 1/3] drm/ttm: export ttm_pool_fini for cleanup

2023-05-09 Thread Alex Deucher

From: Rajneesh Bhardwaj 

ttm_pool_init is exported and used outside of ttm subsystem with
amdgpu_ttm interface, similarly export ttm_pool_fini for proper cleanup.

Reviewed-by: Felix Kuehling 
Signed-off-by: Rajneesh Bhardwaj 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/ttm/ttm_pool.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 1068a41cded1..d4f6cc262e9a 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -573,6 +573,7 @@ void ttm_pool_fini(struct ttm_pool *pool)
 */
synchronize_shrinkers();
 }
+EXPORT_SYMBOL(ttm_pool_fini);
 
 /* As long as pages are available make sure to release at least one */
 static unsigned long ttm_pool_shrinker_scan(struct shrinker *shrink,
-- 
2.40.1

[PATCH 05/14] drm/amdgpu: Add API to get numa information of XCC

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

Add interface to get numa information of ACPI XCC object. The interface
uses logical id to identify an XCC.

Signed-off-by: Lijo Lazar 
Reviewed-by: Le Ma 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h  | 14 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 32 +++-
 2 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 65b3aeef37ed..4ecaff14f2b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1404,6 +1404,12 @@ struct amdgpu_afmt_acr amdgpu_afmt_acr(uint32_t clock);
 
 /* amdgpu_acpi.c */
 
+struct amdgpu_numa_info {
+   uint64_t size;
+   int pxm;
+   int nid;
+};
+
 /* ATCS Device/Driver State */
 #define AMDGPU_ATCS_PSC_DEV_STATE_D0   0
 #define AMDGPU_ATCS_PSC_DEV_STATE_D3_HOT   3
@@ -1423,6 +1429,8 @@ int amdgpu_acpi_smart_shift_update(struct drm_device 
*dev, enum amdgpu_ss ss_sta
 int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev);
 int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, u64 *tmr_offset,
 u64 *tmr_size);
+int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev, int xcc_id,
+struct amdgpu_numa_info *numa_info);
 
 void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps);
 bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev);
@@ -1435,6 +1443,12 @@ static inline int amdgpu_acpi_get_tmr_info(struct 
amdgpu_device *adev,
 {
return -EINVAL;
 }
+static inline int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev,
+  int xcc_id,
+  struct amdgpu_numa_info *numa_info)
+{
+   return -EINVAL;
+}
 static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { }
 static inline bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { 
return false; }
 static inline void amdgpu_acpi_detect(void) { }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 26efca660f0d..7150c09933cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -54,12 +54,6 @@ static const guid_t amd_xcc_dsm_guid = GUID_INIT(0x8267f5d5, 
0xa556, 0x44f2,
 
 #define AMD_XCC_MAX_HID 24
 
-struct amdgpu_numa_info {
-   uint64_t size;
-   int pxm;
-   int nid;
-};
-
 struct xarray numa_info_xa;
 
 /* Encapsulates the XCD acpi object information */
@@ -1156,6 +1150,32 @@ int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, 
u64 *tmr_offset,
return 0;
 }
 
+int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev, int xcc_id,
+struct amdgpu_numa_info *numa_info)
+{
+   struct amdgpu_acpi_dev_info *dev_info;
+   struct amdgpu_acpi_xcc_info *xcc_info;
+   u16 bdf;
+
+   if (!numa_info)
+   return -EINVAL;
+
+   bdf = (adev->pdev->bus->number << 8) | adev->pdev->devfn;
+   dev_info = amdgpu_acpi_get_dev(bdf);
+   if (!dev_info)
+   return -ENOENT;
+
+   list_for_each_entry(xcc_info, &dev_info->xcc_list, list) {
+   if (xcc_info->phy_id == xcc_id) {
+   memcpy(numa_info, xcc_info->numa_info,
+  sizeof(*numa_info));
+   return 0;
+   }
+   }
+
+   return -ENOENT;
+}
+
 /**
  * amdgpu_acpi_event - handle notify events
  *
-- 
2.40.1

[PATCH 10/14] drm/amdgpu: Move initialization of xcp before kfd

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

After partition switch, fill all relevant xcp information before kfd
starts initialization.

Signed-off-by: Lijo Lazar 
Reviewed-by: Le Ma 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c  | 16 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h  |  1 +
 .../gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c  |  6 --
 3 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
index e1d3727036a1..bca226cc4e0b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
@@ -113,12 +113,17 @@ static void __amdgpu_xcp_add_block(struct amdgpu_xcp_mgr 
*xcp_mgr, int xcp_id,
xcp->valid = true;
 }
 
-static int __amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps)
+int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps, int mode)
 {
struct amdgpu_xcp_ip ip;
uint8_t mem_id;
int i, j, ret;
 
+   if (!num_xcps || num_xcps > MAX_XCP)
+   return -EINVAL;
+
+   xcp_mgr->mode = mode;
+
for (i = 0; i < MAX_XCP; ++i)
xcp_mgr->xcp[i].valid = false;
 
@@ -181,13 +186,6 @@ int amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr 
*xcp_mgr, int mode)
goto out;
}
 
-   if (!num_xcps || num_xcps > MAX_XCP) {
-   ret = -EINVAL;
-   goto out;
-   }
-
-   xcp_mgr->mode = mode;
-   __amdgpu_xcp_init(xcp_mgr, num_xcps);
 out:
mutex_unlock(&xcp_mgr->xcp_lock);
 
@@ -240,7 +238,7 @@ int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int 
init_mode,
mutex_init(&xcp_mgr->xcp_lock);
 
if (init_mode != AMDGPU_XCP_MODE_NONE)
-   __amdgpu_xcp_init(xcp_mgr, init_num_xcps);
+   amdgpu_xcp_init(xcp_mgr, init_num_xcps, init_mode);
 
adev->xcp_mgr = xcp_mgr;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
index 7e7e458d307e..e1319b887bf3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
@@ -105,6 +105,7 @@ int amdgpu_xcp_resume(struct amdgpu_xcp_mgr *xcp_mgr, int 
xcp_id);
 
 int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode,
int init_xcps, struct amdgpu_xcp_mgr_funcs *xcp_funcs);
+int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps, int mode);
 int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags);
 int amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, int mode);
 int amdgpu_xcp_get_partition(struct amdgpu_xcp_mgr *xcp_mgr,
diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c 
b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
index aa1bb7883158..004400fb89b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
@@ -321,9 +321,11 @@ static int aqua_vanjaram_switch_partition_mode(struct 
amdgpu_xcp_mgr *xcp_mgr,
if (adev->nbio.funcs->set_compute_partition_mode)
adev->nbio.funcs->set_compute_partition_mode(adev, mode);
 
-   ret = __aqua_vanjaram_post_partition_switch(xcp_mgr, flags);
-
+   /* Init info about new xcps */
*num_xcps = num_xcc / num_xcc_per_xcp;
+   amdgpu_xcp_init(xcp_mgr, *num_xcps, mode);
+
+   ret = __aqua_vanjaram_post_partition_switch(xcp_mgr, flags);
 unlock:
if (flags & AMDGPU_XCP_OPS_KFD)
amdgpu_amdkfd_unlock_kfd(adev);
-- 
2.40.1

[PATCH 12/14] drm/amdkfd: Use xcc mask for identifying xcc

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

Instead of start xcc id and number of xcc per node, use the xcc mask
which is the mask of logical ids of xccs belonging to a parition.

Signed-off-by: Lijo Lazar 
Reviewed-by: Le Ma 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c   |  9 +-
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 86 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c  |  2 +-
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   | 71 +++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  4 -
 drivers/gpu/drm/amd/amdkfd/kfd_process.c  |  8 +-
 .../amd/amdkfd/kfd_process_queue_manager.c|  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c |  8 +-
 8 files changed, 95 insertions(+), 95 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 647c3313c27e..b5497d2ee984 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -745,15 +745,14 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
node->vm_info.vmid_num_kfd = vmid_num_kfd;
node->xcp = amdgpu_get_next_xcp(kfd->adev->xcp_mgr, &xcp_idx);
/* TODO : Check if error handling is needed */
-   if (node->xcp)
+   if (node->xcp) {
amdgpu_xcp_get_inst_details(node->xcp, AMDGPU_XCP_GFX,
&node->xcc_mask);
-   else
+   ++xcp_idx;
+   } else {
node->xcc_mask =
(1U << NUM_XCC(kfd->adev->gfx.xcc_mask)) - 1;
-
-   node->num_xcc_per_node = max(1U, 
kfd->adev->gfx.num_xcc_per_xcp);
-   node->start_xcc_id = node->num_xcc_per_node * i;
+   }
 
if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) &&
partition_mode == AMDGPU_CPX_PARTITION_MODE &&
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 2b5c4b2dd242..493b4b66f180 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -136,16 +136,14 @@ static void init_sdma_bitmaps(struct device_queue_manager 
*dqm)
 void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
 {
-   int xcc = 0;
+   uint32_t xcc_mask = dqm->dev->xcc_mask;
+   int xcc_id;
 
-   for (xcc = 0; xcc < dqm->dev->num_xcc_per_node; xcc++)
+   for_each_inst(xcc_id, xcc_mask)
dqm->dev->kfd2kgd->program_sh_mem_settings(
-   dqm->dev->adev, qpd->vmid,
-   qpd->sh_mem_config,
-   qpd->sh_mem_ape1_base,
-   qpd->sh_mem_ape1_limit,
-   qpd->sh_mem_bases,
-   dqm->dev->start_xcc_id + xcc);
+   dqm->dev->adev, qpd->vmid, qpd->sh_mem_config,
+   qpd->sh_mem_ape1_base, qpd->sh_mem_ape1_limit,
+   qpd->sh_mem_bases, xcc_id);
 }
 
 static void kfd_hws_hang(struct device_queue_manager *dqm)
@@ -427,14 +425,14 @@ static void deallocate_doorbell(struct qcm_process_device 
*qpd,
 static void program_trap_handler_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
 {
-   int xcc = 0;
+   uint32_t xcc_mask = dqm->dev->xcc_mask;
+   int xcc_id;
 
if (dqm->dev->kfd2kgd->program_trap_handler_settings)
-   for (xcc = 0; xcc < dqm->dev->num_xcc_per_node; xcc++)
+   for_each_inst(xcc_id, xcc_mask)
dqm->dev->kfd2kgd->program_trap_handler_settings(
-   dqm->dev->adev, qpd->vmid,
-   qpd->tba_addr, qpd->tma_addr,
-   dqm->dev->start_xcc_id + xcc);
+   dqm->dev->adev, qpd->vmid, qpd->tba_addr,
+   qpd->tma_addr, xcc_id);
 }
 
 static int allocate_vmid(struct device_queue_manager *dqm,
@@ -697,7 +695,8 @@ static int dbgdev_wave_reset_wavefronts(struct kfd_node 
*dev, struct kfd_process
struct kfd_process_device *pdd;
int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
-   int xcc = 0;
+   uint32_t xcc_mask = dev->xcc_mask;
+   int xcc_id;
 
reg_sq_cmd.u32All = 0;
reg_gfx_index.u32All = 0;
@@ -742,11 +741,10 @@ static int dbgdev_wave_reset_wavefronts(struct kfd_node 
*dev, struct kfd_process
reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;

[PATCH 13/14] drm/amdgpu: Check memory ranges for valid xcp mode

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

Check the memory ranges available to the device also for deciding a
valid partition mode. Only select combinations are valid for a
particular mode.

Signed-off-by: Lijo Lazar 
Reviewed-by: Le Ma 
Reviewed-by: Philip Yang 
Signed-off-by: Alex Deucher 
---
 .../drm/amd/amdgpu/aqua_vanjaram_reg_init.c   | 19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c 
b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
index 004400fb89b0..7469de3fd6fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
@@ -238,21 +238,28 @@ int __aqua_vanjaram_get_xcp_ip_info(struct amdgpu_xcp_mgr 
*xcp_mgr, int xcp_id,
 static bool __aqua_vanjaram_is_valid_mode(struct amdgpu_xcp_mgr *xcp_mgr,
  enum amdgpu_gfx_partition mode)
 {
+   struct amdgpu_device *adev = xcp_mgr->adev;
int num_xcc, num_xccs_per_xcp;
 
-   num_xcc = NUM_XCC(xcp_mgr->adev->gfx.xcc_mask);
+   num_xcc = NUM_XCC(adev->gfx.xcc_mask);
switch (mode) {
case AMDGPU_SPX_PARTITION_MODE:
-   return num_xcc > 0;
+   return adev->gmc.num_mem_partitions == 1 && num_xcc > 0;
case AMDGPU_DPX_PARTITION_MODE:
-   return (num_xcc % 4) == 0;
+   return adev->gmc.num_mem_partitions != 8 && (num_xcc % 4) == 0;
case AMDGPU_TPX_PARTITION_MODE:
-   return (num_xcc % 3) == 0;
+   return (adev->gmc.num_mem_partitions == 1 ||
+   adev->gmc.num_mem_partitions == 3) &&
+  ((num_xcc % 3) == 0);
case AMDGPU_QPX_PARTITION_MODE:
num_xccs_per_xcp = num_xcc / 4;
-   return (num_xccs_per_xcp >= 2);
+   return (adev->gmc.num_mem_partitions == 1 ||
+   adev->gmc.num_mem_partitions == 4) &&
+  (num_xccs_per_xcp >= 2);
case AMDGPU_CPX_PARTITION_MODE:
-   return (num_xcc > 1);
+   return (num_xcc > 1) &&
+  (adev->gmc.num_mem_partitions == 1 ||
+   adev->gmc.num_mem_partitions == num_xcc);
default:
return false;
}
-- 
2.40.1

[PATCH 09/14] drm/amdgpu: Fill xcp mem node in aquavanjaram

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

Implement callbacks to fill memory node information in aquavanjaram.

Signed-off-by: Lijo Lazar 
Reviewed-by: Le Ma 
Signed-off-by: Alex Deucher 
---
 .../drm/amd/amdgpu/aqua_vanjaram_reg_init.c   | 61 ++-
 1 file changed, 60 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c 
b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
index 68d732dd9ecb..aa1bb7883158 100644
--- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
@@ -331,6 +331,64 @@ static int aqua_vanjaram_switch_partition_mode(struct 
amdgpu_xcp_mgr *xcp_mgr,
return ret;
 }
 
+static int __aqua_vanjaram_get_xcp_mem_id(struct amdgpu_device *adev,
+ int xcc_id, uint8_t *mem_id)
+{
+   /* TODO: Check if any validation is required based on current
+* memory/spatial modes
+*/
+   *mem_id = xcc_id / adev->gfx.num_xcc_per_xcp;
+
+   return 0;
+}
+
+static int aqua_vanjaram_get_xcp_mem_id(struct amdgpu_xcp_mgr *xcp_mgr,
+   struct amdgpu_xcp *xcp, uint8_t *mem_id)
+{
+   struct amdgpu_numa_info numa_info;
+   struct amdgpu_device *adev;
+   uint32_t xcc_mask;
+   int r, i, xcc_id;
+
+   adev = xcp_mgr->adev;
+   /* TODO: BIOS is not returning the right info now
+* Check on this later
+*/
+   /*
+   if (adev->gmc.gmc_funcs->query_mem_partition_mode)
+   mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+   */
+   if (adev->gmc.num_mem_partitions == 1) {
+   /* Only one range */
+   *mem_id = 0;
+   return 0;
+   }
+
+   r = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_GFX, &xcc_mask);
+   if (r || !xcc_mask)
+   return -EINVAL;
+
+   xcc_id = ffs(xcc_mask) - 1;
+   if (!adev->gmc.is_app_apu)
+   return __aqua_vanjaram_get_xcp_mem_id(adev, xcc_id, mem_id);
+
+   r = amdgpu_acpi_get_mem_info(adev, xcc_id, &numa_info);
+
+   if (r)
+   return r;
+
+   r = -EINVAL;
+   for (i = 0; i < adev->gmc.num_mem_partitions; ++i) {
+   if (adev->gmc.mem_partitions[i].numa.node == numa_info.nid) {
+   *mem_id = i;
+   r = 0;
+   break;
+   }
+   }
+
+   return r;
+}
+
 int aqua_vanjaram_get_xcp_ip_details(struct amdgpu_xcp_mgr *xcp_mgr, int 
xcp_id,
 enum AMDGPU_XCP_IP_BLOCK ip_id,
 struct amdgpu_xcp_ip *ip)
@@ -344,7 +402,8 @@ int aqua_vanjaram_get_xcp_ip_details(struct amdgpu_xcp_mgr 
*xcp_mgr, int xcp_id,
 struct amdgpu_xcp_mgr_funcs aqua_vanjaram_xcp_funcs = {
.switch_partition_mode = &aqua_vanjaram_switch_partition_mode,
.query_partition_mode = &aqua_vanjaram_query_partition_mode,
-   .get_ip_details = &aqua_vanjaram_get_xcp_ip_details
+   .get_ip_details = &aqua_vanjaram_get_xcp_ip_details,
+   .get_xcp_mem_id = &aqua_vanjaram_get_xcp_mem_id
 };
 
 static int aqua_vanjaram_xcp_mgr_init(struct amdgpu_device *adev)
-- 
2.40.1

[PATCH 14/14] drm/amdgpu: Add auto mode for compute partition

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

When auto mode is specified, driver will choose the right compute
partition mode.

Signed-off-by: Lijo Lazar 
Reviewed-by: Le Ma 
Reviewed-by: Philip Yang 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  8 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h   |  2 ++
 .../drm/amd/amdgpu/aqua_vanjaram_reg_init.c   | 28 ++-
 4 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4ecaff14f2b3..9904ce78b8fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -243,7 +243,7 @@ extern int amdgpu_num_kcq;
 extern int amdgpu_vcnfw_log;
 extern int amdgpu_sg_display;
 
-extern uint amdgpu_user_partt_mode;
+extern int amdgpu_user_partt_mode;
 
 extern int gart_ram_alloc;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 39975724be73..e4d09bf0887d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -194,7 +194,7 @@ int amdgpu_smartshift_bias;
 int amdgpu_use_xgmi_p2p = 1;
 int amdgpu_vcnfw_log;
 int amdgpu_sg_display = -1; /* auto */
-uint amdgpu_user_partt_mode;
+int amdgpu_user_partt_mode = AMDGPU_AUTO_COMPUTE_PARTITION_MODE;
 
 static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
 
@@ -964,8 +964,10 @@ module_param_named(smu_pptable_id, amdgpu_smu_pptable_id, 
int, 0444);
  * DOC: partition_mode (int)
  * Used to override the default SPX mode.
  */
-MODULE_PARM_DESC(user_partt_mode,
-   "specify partition mode to be used (0 = 
AMDGPU_SPX_PARTITION_MODE(default value), \
+MODULE_PARM_DESC(
+   user_partt_mode,
+   "specify partition mode to be used (-2 = 
AMDGPU_AUTO_COMPUTE_PARTITION_MODE(default value) \
+   0 = AMDGPU_SPX_PARTITION_MODE, \
1 = AMDGPU_DPX_PARTITION_MODE, \
2 = AMDGPU_TPX_PARTITION_MODE, \
3 = AMDGPU_QPX_PARTITION_MODE, \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 728977f8afe7..e9c93f6e12b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -62,6 +62,8 @@ enum amdgpu_gfx_partition {
AMDGPU_QPX_PARTITION_MODE = 3,
AMDGPU_CPX_PARTITION_MODE = 4,
AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE = -1,
+   /* Automatically choose the right mode */
+   AMDGPU_AUTO_COMPUTE_PARTITION_MODE = -2,
 };
 
 #define NUM_XCC(x) hweight16(x)
diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c 
b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
index 7469de3fd6fe..a165b51e9e58 100644
--- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
@@ -235,6 +235,30 @@ int __aqua_vanjaram_get_xcp_ip_info(struct amdgpu_xcp_mgr 
*xcp_mgr, int xcp_id,
return 0;
 }
 
+static enum amdgpu_gfx_partition
+__aqua_vanjaram_get_auto_mode(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+   struct amdgpu_device *adev = xcp_mgr->adev;
+   int num_xcc;
+
+   num_xcc = NUM_XCC(xcp_mgr->adev->gfx.xcc_mask);
+
+   if (adev->gmc.num_mem_partitions == 1)
+   return AMDGPU_SPX_PARTITION_MODE;
+
+   if (adev->gmc.num_mem_partitions == num_xcc)
+   return AMDGPU_CPX_PARTITION_MODE;
+
+   if (adev->gmc.num_mem_partitions == num_xcc / 2)
+   return (adev->flags & AMD_IS_APU) ? AMDGPU_TPX_PARTITION_MODE :
+   AMDGPU_QPX_PARTITION_MODE;
+
+   if (adev->gmc.num_mem_partitions == 2 && !(adev->flags & AMD_IS_APU))
+   return AMDGPU_DPX_PARTITION_MODE;
+
+   return AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE;
+}
+
 static bool __aqua_vanjaram_is_valid_mode(struct amdgpu_xcp_mgr *xcp_mgr,
  enum amdgpu_gfx_partition mode)
 {
@@ -304,7 +328,9 @@ static int aqua_vanjaram_switch_partition_mode(struct 
amdgpu_xcp_mgr *xcp_mgr,
adev = xcp_mgr->adev;
num_xcc = NUM_XCC(adev->gfx.xcc_mask);
 
-   if (!__aqua_vanjaram_is_valid_mode(xcp_mgr, mode))
+   if (mode == AMDGPU_AUTO_COMPUTE_PARTITION_MODE)
+   mode = __aqua_vanjaram_get_auto_mode(xcp_mgr);
+   else if (!__aqua_vanjaram_is_valid_mode(xcp_mgr, mode))
return -EINVAL;
 
if (adev->kfd.init_complete)
-- 
2.40.1

[PATCH 07/14] drm/amdgpu: Initialize memory ranges for GC 9.4.3

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

GC 9.4.3 ASICS may have memory split into multiple partitions.Initialize
the memory partition information for each range. The information may be
in the form of a numa node id or a range of pages.

Signed-off-by: Lijo Lazar 
Reviewed-by: Le Ma 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 172 ++
 1 file changed, 172 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 59412b263dee..393e90d8b27f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -79,6 +79,7 @@
 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2 
 0x05ea
 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2_BASE_IDX
 2
 
+#define MAX_MEM_RANGES 8
 
 static const char *gfxhub_client_ids[] = {
"CB",
@@ -1742,6 +1743,169 @@ static void gmc_v9_0_save_registers(struct 
amdgpu_device *adev)
adev->gmc.sdpif_register = RREG32_SOC15(DCE, 0, 
mmDCHUBBUB_SDPIF_MMIO_CNTRL_0);
 }
 
+static bool gmc_v9_0_validate_partition_info(struct amdgpu_device *adev)
+{
+   enum amdgpu_memory_partition mode;
+   u32 supp_modes;
+   bool valid;
+
+   mode = gmc_v9_0_get_memory_partition(adev, &supp_modes);
+
+   /* Mode detected by hardware not present in supported modes */
+   if ((mode != UNKNOWN_MEMORY_PARTITION_MODE) &&
+   !(BIT(mode - 1) & supp_modes))
+   return false;
+
+   switch (mode) {
+   case UNKNOWN_MEMORY_PARTITION_MODE:
+   case AMDGPU_NPS1_PARTITION_MODE:
+   valid = (adev->gmc.num_mem_partitions == 1);
+   break;
+   case AMDGPU_NPS2_PARTITION_MODE:
+   valid = (adev->gmc.num_mem_partitions == 2);
+   break;
+   case AMDGPU_NPS4_PARTITION_MODE:
+   valid = (adev->gmc.num_mem_partitions == 3 ||
+adev->gmc.num_mem_partitions == 4);
+   break;
+   default:
+   valid = false;
+   }
+
+   return valid;
+}
+
+static bool gmc_v9_0_is_node_present(int *node_ids, int num_ids, int nid)
+{
+   int i;
+
+   /* Check if node with id 'nid' is present in 'node_ids' array */
+   for (i = 0; i < num_ids; ++i)
+   if (node_ids[i] == nid)
+   return true;
+
+   return false;
+}
+
+static void
+gmc_v9_0_init_acpi_mem_ranges(struct amdgpu_device *adev,
+ struct amdgpu_mem_partition_info *mem_ranges)
+{
+   int num_ranges = 0, ret, mem_groups;
+   struct amdgpu_numa_info numa_info;
+   int node_ids[MAX_MEM_RANGES];
+   int num_xcc, xcc_id;
+   uint32_t xcc_mask;
+
+   num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+   xcc_mask = (1U << num_xcc) - 1;
+   mem_groups = hweight32(adev->aid_mask);
+
+   for_each_inst(xcc_id, xcc_mask) {
+   ret = amdgpu_acpi_get_mem_info(adev, xcc_id, &numa_info);
+   if (ret)
+   continue;
+
+   if (numa_info.nid == NUMA_NO_NODE) {
+   mem_ranges[0].size = numa_info.size;
+   mem_ranges[0].numa.node = numa_info.nid;
+   num_ranges = 1;
+   break;
+   }
+
+   if (gmc_v9_0_is_node_present(node_ids, num_ranges,
+numa_info.nid))
+   continue;
+
+   node_ids[num_ranges] = numa_info.nid;
+   mem_ranges[num_ranges].numa.node = numa_info.nid;
+   mem_ranges[num_ranges].size = numa_info.size;
+   ++num_ranges;
+   }
+
+   adev->gmc.num_mem_partitions = num_ranges;
+
+   /* If there is only partition, don't use entire size */
+   if (adev->gmc.num_mem_partitions == 1)
+   mem_ranges[0].size =
+   (mem_ranges[0].size * (mem_groups - 1) / mem_groups);
+}
+
+static void
+gmc_v9_0_init_sw_mem_ranges(struct amdgpu_device *adev,
+   struct amdgpu_mem_partition_info *mem_ranges)
+{
+   enum amdgpu_memory_partition mode;
+   u32 start_addr = 0, size;
+   int i;
+
+   mode = gmc_v9_0_query_memory_partition(adev);
+
+   switch (mode) {
+   case UNKNOWN_MEMORY_PARTITION_MODE:
+   case AMDGPU_NPS1_PARTITION_MODE:
+   adev->gmc.num_mem_partitions = 1;
+   break;
+   case AMDGPU_NPS2_PARTITION_MODE:
+   adev->gmc.num_mem_partitions = 2;
+   break;
+   case AMDGPU_NPS4_PARTITION_MODE:
+   if (adev->flags & AMD_IS_APU)
+   adev->gmc.num_mem_partitions = 3;
+   else
+   adev->gmc.num_mem_partitions = 4;
+   break;
+   default:
+   adev->gmc.num_mem_partitions = 1;
+

[PATCH 11/14] drm/amdkfd: Add xcp reference to kfd node

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

Fetch xcp information from xcp_mgr and also add xcc_mask to kfd node.

Signed-off-by: Lijo Lazar 
Reviewed-by: Le Ma 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c | 19 +--
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h   |  3 +++
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 4293cbf9ceb0..647c3313c27e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -594,6 +594,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
uint32_t first_vmid_kfd, last_vmid_kfd, vmid_num_kfd;
unsigned int max_proc_per_quantum;
int num_xcd, partition_mode;
+   int xcp_idx;
 
kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
KGD_ENGINE_MEC1);
@@ -603,11 +604,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
KGD_ENGINE_SDMA1);
kfd->shared_resources = *gpu_resources;
 
-   num_xcd = NUM_XCC(kfd->adev->gfx.xcc_mask);
-   if (num_xcd == 0 || num_xcd == 1 || kfd->adev->gfx.num_xcc_per_xcp == 0)
-   kfd->num_nodes = 1;
-   else
-   kfd->num_nodes = num_xcd / kfd->adev->gfx.num_xcc_per_xcp;
+   kfd->num_nodes = amdgpu_xcp_get_num_xcp(kfd->adev->xcp_mgr);
+
if (kfd->num_nodes == 0) {
dev_err(kfd_device,
"KFD num nodes cannot be 0, GC inst: %d, 
num_xcc_in_node: %d\n",
@@ -735,7 +733,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd->num_nodes);
 
/* Allocate the KFD nodes */
-   for (i = 0; i < kfd->num_nodes; i++) {
+   for (i = 0, xcp_idx = 0; i < kfd->num_nodes; i++) {
node = kzalloc(sizeof(struct kfd_node), GFP_KERNEL);
if (!node)
goto node_alloc_error;
@@ -745,6 +743,15 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
node->kfd = kfd;
node->kfd2kgd = kfd->kfd2kgd;
node->vm_info.vmid_num_kfd = vmid_num_kfd;
+   node->xcp = amdgpu_get_next_xcp(kfd->adev->xcp_mgr, &xcp_idx);
+   /* TODO : Check if error handling is needed */
+   if (node->xcp)
+   amdgpu_xcp_get_inst_details(node->xcp, AMDGPU_XCP_GFX,
+   &node->xcc_mask);
+   else
+   node->xcc_mask =
+   (1U << NUM_XCC(kfd->adev->gfx.xcc_mask)) - 1;
+
node->num_xcc_per_node = max(1U, 
kfd->adev->gfx.num_xcc_per_xcp);
node->start_xcc_id = node->num_xcc_per_node * i;
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 6e1c15682c28..559ac5efdc26 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -278,6 +278,9 @@ struct kfd_node {
unsigned int start_xcc_id;  /* Starting XCC instance
 * number for the node
 */
+   uint32_t xcc_mask; /* Instance mask of XCCs present */
+   struct amdgpu_xcp *xcp;
+
/* Interrupts */
struct kfifo ih_fifo;
struct workqueue_struct *ih_wq;
-- 
2.40.1

[PATCH 08/14] drm/amdgpu: Add callback to fill xcp memory id

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

Add callback in xcp interface to fill xcp memory id information. Memory
id is used to identify the range/partition of an XCP from the available
memory partitions in device. Also, fill the id information.

Signed-off-by: Lijo Lazar 
Reviewed-by: Le Ma 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c | 12 
 drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h |  4 +++-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
index 337d558a3145..e1d3727036a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
@@ -116,6 +116,7 @@ static void __amdgpu_xcp_add_block(struct amdgpu_xcp_mgr 
*xcp_mgr, int xcp_id,
 static int __amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps)
 {
struct amdgpu_xcp_ip ip;
+   uint8_t mem_id;
int i, j, ret;
 
for (i = 0; i < MAX_XCP; ++i)
@@ -130,6 +131,17 @@ static int __amdgpu_xcp_init(struct amdgpu_xcp_mgr 
*xcp_mgr, int num_xcps)
 
__amdgpu_xcp_add_block(xcp_mgr, i, &ip);
}
+
+   xcp_mgr->xcp[i].id = i;
+
+   if (xcp_mgr->funcs->get_xcp_mem_id) {
+   ret = xcp_mgr->funcs->get_xcp_mem_id(
+   xcp_mgr, &xcp_mgr->xcp[i], &mem_id);
+   if (ret)
+   continue;
+   else
+   xcp_mgr->xcp[i].mem_id = mem_id;
+   }
}
 
xcp_mgr->num_xcps = num_xcps;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
index 45d590d7fd95..7e7e458d307e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
@@ -68,7 +68,7 @@ struct amdgpu_xcp {
struct amdgpu_xcp_ip ip[AMDGPU_XCP_MAX_BLOCKS];
 
uint8_t id;
-   uint8_t mem_node;
+   uint8_t mem_id;
bool valid;
 };
 
@@ -89,6 +89,8 @@ struct amdgpu_xcp_mgr_funcs {
int (*get_ip_details)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
  enum AMDGPU_XCP_IP_BLOCK ip_id,
  struct amdgpu_xcp_ip *ip);
+   int (*get_xcp_mem_id)(struct amdgpu_xcp_mgr *xcp_mgr,
+ struct amdgpu_xcp *xcp, uint8_t *mem_id);
 
int (*prepare_suspend)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
int (*suspend)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
-- 
2.40.1

[PATCH 01/14] drm/amdgpu: Add utility functions for xcp

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

Add utility functions to get details of xcp and iterate through
available xcps.

Signed-off-by: Lijo Lazar 
Reviewed-by: Le Ma 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c | 12 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h | 31 +
 2 files changed, 43 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
index e8aa4d6c6b62..337d558a3145 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
@@ -256,3 +256,15 @@ int amdgpu_xcp_get_partition(struct amdgpu_xcp_mgr 
*xcp_mgr,
 
return id_mask;
 }
+
+int amdgpu_xcp_get_inst_details(struct amdgpu_xcp *xcp,
+   enum AMDGPU_XCP_IP_BLOCK ip,
+   uint32_t *inst_mask)
+{
+   if (!xcp->valid || !inst_mask || !(xcp->ip[ip].valid))
+   return -EINVAL;
+
+   *inst_mask = xcp->ip[ip].inst_mask;
+
+   return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
index 1d3dc7d68f54..45d590d7fd95 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
@@ -108,4 +108,35 @@ int amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr 
*xcp_mgr, int mode);
 int amdgpu_xcp_get_partition(struct amdgpu_xcp_mgr *xcp_mgr,
 enum AMDGPU_XCP_IP_BLOCK ip, int instance);
 
+int amdgpu_xcp_get_inst_details(struct amdgpu_xcp *xcp,
+   enum AMDGPU_XCP_IP_BLOCK ip,
+   uint32_t *inst_mask);
+
+static inline int amdgpu_xcp_get_num_xcp(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+   if (!xcp_mgr)
+   return 1;
+   else
+   return xcp_mgr->num_xcps;
+}
+
+static inline struct amdgpu_xcp *
+amdgpu_get_next_xcp(struct amdgpu_xcp_mgr *xcp_mgr, int *from)
+{
+   if (!xcp_mgr)
+   return NULL;
+
+   while (*from < MAX_XCP) {
+   if (xcp_mgr->xcp[*from].valid)
+   return &xcp_mgr->xcp[*from];
+   ++(*from);
+   }
+
+   return NULL;
+}
+
+#define for_each_xcp(xcp_mgr, xcp, i)\
+   for (i = 0, xcp = amdgpu_get_next_xcp(xcp_mgr, &i); xcp; \
+xcp = amdgpu_get_next_xcp(xcp_mgr, &i))
+
 #endif
-- 
2.40.1

[PATCH 06/14] drm/amdgpu: Add memory partitions to gmc

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

Some ASICs have the device memory divided into multiple partitions. The
parititions could be denoted by a numa node or by a range of pages.

Signed-off-by: Lijo Lazar 
Reviewed-by: Le Ma 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 17 +
 1 file changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 2bd3b9665ebf..43357d699e6e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -178,6 +178,21 @@ struct amdgpu_xgmi {
struct amdgpu_xgmi_ras *ras;
 };
 
+struct amdgpu_mem_partition_info {
+   union {
+   struct {
+   uint32_t fpfn;
+   uint32_t lpfn;
+   } range;
+   struct {
+   int node;
+   } numa;
+   };
+   uint64_t size;
+};
+
+#define INVALID_PFN-1
+
 struct amdgpu_gmc {
/* FB's physical address in MMIO space (for CPU to
 * map FB). This is different compared to the agp/
@@ -266,6 +281,8 @@ struct amdgpu_gmc {
bool tmz_enabled;
bool is_app_apu;
 
+   struct amdgpu_mem_partition_info *mem_partitions;
+   uint8_t num_mem_partitions;
const struct amdgpu_gmc_funcs   *gmc_funcs;
 
struct amdgpu_xgmi xgmi;
-- 
2.40.1

[PATCH 04/14] drm/amdgpu: Store additional numa node information

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

Use a struct to store additional numa node information including size
and base address. Add numa_info pointer to xcc object to point to the
relevant structure based on its proximity domain.

Signed-off-by: Lijo Lazar 
Reviewed-by: Le Ma 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 79 ++--
 1 file changed, 75 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 52c16b041193..26efca660f0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -25,6 +25,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -53,10 +54,18 @@ static const guid_t amd_xcc_dsm_guid = 
GUID_INIT(0x8267f5d5, 0xa556, 0x44f2,
 
 #define AMD_XCC_MAX_HID 24
 
+struct amdgpu_numa_info {
+   uint64_t size;
+   int pxm;
+   int nid;
+};
+
+struct xarray numa_info_xa;
+
 /* Encapsulates the XCD acpi object information */
 struct amdgpu_acpi_xcc_info {
struct list_head list;
-   int mem_node;
+   struct amdgpu_numa_info *numa_info;
uint8_t xcp_node;
uint8_t phy_id;
acpi_handle handle;
@@ -838,6 +847,52 @@ int amdgpu_acpi_smart_shift_update(struct drm_device *dev, 
enum amdgpu_ss ss_sta
return r;
 }
 
+static inline uint64_t amdgpu_acpi_get_numa_size(int nid)
+{
+   /* This is directly using si_meminfo_node implementation as the
+* function is not exported.
+*/
+   int zone_type;
+   uint64_t managed_pages = 0;
+
+   pg_data_t *pgdat = NODE_DATA(nid);
+
+   for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
+   managed_pages +=
+   zone_managed_pages(&pgdat->node_zones[zone_type]);
+   return managed_pages * PAGE_SIZE;
+}
+
+static struct amdgpu_numa_info *amdgpu_acpi_get_numa_info(uint32_t pxm)
+{
+   struct amdgpu_numa_info *numa_info;
+   int nid;
+
+   numa_info = xa_load(&numa_info_xa, pxm);
+
+   if (!numa_info) {
+   struct sysinfo info;
+
+   numa_info = kzalloc(sizeof *numa_info, GFP_KERNEL);
+   if (!numa_info)
+   return NULL;
+
+   nid = pxm_to_node(pxm);
+   numa_info->pxm = pxm;
+   numa_info->nid = nid;
+
+   if (numa_info->nid == NUMA_NO_NODE) {
+   si_meminfo(&info);
+   numa_info->size = info.totalram * info.mem_unit;
+   } else {
+   numa_info->size = amdgpu_acpi_get_numa_size(nid);
+   }
+   xa_store(&numa_info_xa, numa_info->pxm, numa_info, GFP_KERNEL);
+   }
+
+   return numa_info;
+}
+
 /**
  * amdgpu_acpi_get_node_id - obtain the NUMA node id for corresponding amdgpu
  * acpi device handle
@@ -850,18 +905,25 @@ int amdgpu_acpi_smart_shift_update(struct drm_device 
*dev, enum amdgpu_ss ss_sta
  *
  * Returns ACPI STATUS OK with Node ID on success or the corresponding failure 
reason
  */
-acpi_status amdgpu_acpi_get_node_id(acpi_handle handle, int *nid)
+acpi_status amdgpu_acpi_get_node_id(acpi_handle handle,
+   struct amdgpu_numa_info **numa_info)
 {
 #ifdef CONFIG_ACPI_NUMA
u64 pxm;
acpi_status status;
 
+   if (!numa_info)
+   return_ACPI_STATUS(AE_ERROR);
+
status = acpi_evaluate_integer(handle, "_PXM", NULL, &pxm);
 
if (ACPI_FAILURE(status))
return status;
 
-   *nid = pxm_to_node(pxm);
+   *numa_info = amdgpu_acpi_get_numa_info(pxm);
+
+   if (!*numa_info)
+   return_ACPI_STATUS(AE_ERROR);
 
return_ACPI_STATUS(AE_OK);
 #else
@@ -1001,7 +1063,8 @@ static int amdgpu_acpi_get_xcc_info(struct 
amdgpu_acpi_xcc_info *xcc_info,
ACPI_FREE(obj);
obj = NULL;
 
-   status = amdgpu_acpi_get_node_id(xcc_info->handle, &xcc_info->mem_node);
+   status =
+   amdgpu_acpi_get_node_id(xcc_info->handle, &xcc_info->numa_info);
 
/* TODO: check if this check is required */
if (ACPI_SUCCESS(status))
@@ -1023,6 +1086,7 @@ static int amdgpu_acpi_enumerate_xcc(void)
u16 bdf;
 
INIT_LIST_HEAD(&amdgpu_acpi_dev_list);
+   xa_init(&numa_info_xa);
 
for (id = 0; id < AMD_XCC_MAX_HID; id++) {
sprintf(hid, "%s%d", "AMD", AMD_XCC_HID_START + id);
@@ -1353,6 +1417,13 @@ void amdgpu_acpi_release(void)
 {
struct amdgpu_acpi_dev_info *dev_info, *dev_tmp;
struct amdgpu_acpi_xcc_info *xcc_info, *xcc_tmp;
+   struct amdgpu_numa_info *numa_info;
+   unsigned long index;
+
+   xa_for_each(&numa_info_xa, index, numa_info) {
+   kfree(numa_info);
+   xa_erase(&numa_info_xa, index);
+   }
 
if (list_empty(&amdgpu_acpi_dev_list))
return;
-- 
2.40.1

[PATCH 02/14] drm/amdgpu: Move memory partition query to gmc

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

GMC block handles memory related information, it makes more sense to
keep memory partition functions in gmc block.

Signed-off-by: Lijo Lazar 
Reviewed-by: Le Ma 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 30 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 11 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 44 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 16 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 10 --
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 18 ++
 6 files changed, 79 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index a939ffe2969a..817e7b7d32b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -1198,24 +1198,6 @@ static ssize_t 
amdgpu_gfx_get_current_compute_partition(struct device *dev,
return sysfs_emit(buf, "%s\n", partition_mode);
 }
 
-static ssize_t amdgpu_gfx_get_current_memory_partition(struct device *dev,
-   struct device_attribute *addr,
-   char *buf)
-{
-   struct drm_device *ddev = dev_get_drvdata(dev);
-   struct amdgpu_device *adev = drm_to_adev(ddev);
-   enum amdgpu_memory_partition mode;
-   static const char *partition_modes[] = {
-   "UNKNOWN", "NPS1", "NPS2", "NPS4", "NPS8"
-   };
-   BUILD_BUG_ON(ARRAY_SIZE(partition_modes) <= AMDGPU_NPS8_PARTITION_MODE);
-
-   mode = min((int)adev->gfx.funcs->query_mem_partition_mode(adev),
-   AMDGPU_NPS8_PARTITION_MODE);
-
-   return sysfs_emit(buf, "%s\n", partition_modes[mode]);
-}
-
 static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev,
struct device_attribute *addr,
const char *buf, size_t count)
@@ -1299,9 +1281,6 @@ static DEVICE_ATTR(current_compute_partition, S_IRUGO | 
S_IWUSR,
 static DEVICE_ATTR(available_compute_partition, S_IRUGO,
   amdgpu_gfx_get_available_compute_partition, NULL);
 
-static DEVICE_ATTR(current_memory_partition, S_IRUGO,
-  amdgpu_gfx_get_current_memory_partition, NULL);
-
 int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)
 {
int r;
@@ -1311,19 +1290,12 @@ int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)
return r;
 
r = device_create_file(adev->dev, 
&dev_attr_available_compute_partition);
-   if (r)
-   return r;
 
-   r = device_create_file(adev->dev, &dev_attr_current_memory_partition);
-   if (r)
-   return r;
-
-   return 0;
+   return r;
 }
 
 void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev)
 {
device_remove_file(adev->dev, &dev_attr_current_compute_partition);
device_remove_file(adev->dev, &dev_attr_available_compute_partition);
-   device_remove_file(adev->dev, &dev_attr_current_memory_partition);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 81b4c7e684af..728977f8afe7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -71,14 +71,6 @@ enum amdgpu_pkg_type {
AMDGPU_PKG_TYPE_UNKNOWN,
 };
 
-enum amdgpu_memory_partition {
-   UNKNOWN_MEMORY_PARTITION_MODE = 0,
-   AMDGPU_NPS1_PARTITION_MODE = 1,
-   AMDGPU_NPS2_PARTITION_MODE = 2,
-   AMDGPU_NPS4_PARTITION_MODE = 3,
-   AMDGPU_NPS8_PARTITION_MODE = 4,
-};
-
 struct amdgpu_mec {
struct amdgpu_bo*hpd_eop_obj;
u64 hpd_eop_gpu_addr;
@@ -276,8 +268,6 @@ struct amdgpu_gfx_funcs {
   struct amdgpu_gfx_shadow_info *shadow_info);
enum amdgpu_gfx_partition
(*query_partition_mode)(struct amdgpu_device *adev);
-   enum amdgpu_memory_partition
-   (*query_mem_partition_mode)(struct amdgpu_device *adev);
int (*switch_partition_mode)(struct amdgpu_device *adev,
 int num_xccs_per_xcp);
int (*ih_node_to_logical_xcc)(struct amdgpu_device *adev, int ih_node);
@@ -414,7 +404,6 @@ struct amdgpu_gfx {
boolcp_gfx_shadow; /* for gfx11 */
 
uint16_txcc_mask;
-   enum amdgpu_memory_partitionmem_partition_mode;
uint32_tnum_xcc_per_xcp;
struct mutexpartition_mutex;
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index efb08b4d093b..3f5dd9e32e08 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -890,3 +890,47 @@ int amdgpu_gmc_vram_checking(struct amdgpu_device *adev)
 
return 0;
 }

[PATCH 03/14] drm/amdgpu: Get supported memory partition modes

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

Expand the interface to get supported memory partition modes also along
with the current memory partition mode.

Signed-off-by: Lijo Lazar 
Reviewed-by: Le Ma 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h |  3 ++-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 11 +--
 drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c   |  9 -
 3 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
index eb25ac98903f..095aecfb201e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
@@ -97,7 +97,8 @@ struct amdgpu_nbio_funcs {
void (*clear_doorbell_interrupt)(struct amdgpu_device *adev);
u32 (*get_rom_offset)(struct amdgpu_device *adev);
int (*get_compute_partition_mode)(struct amdgpu_device *adev);
-   u32 (*get_memory_partition_mode)(struct amdgpu_device *adev);
+   u32 (*get_memory_partition_mode)(struct amdgpu_device *adev,
+u32 *supp_modes);
void (*set_compute_partition_mode)(struct amdgpu_device *adev,
   enum amdgpu_gfx_partition mode);
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 17459c4951fe..59412b263dee 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1331,16 +1331,23 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct 
amdgpu_device *adev)
 }
 
 static enum amdgpu_memory_partition
-gmc_v9_0_query_memory_partition(struct amdgpu_device *adev)
+gmc_v9_0_get_memory_partition(struct amdgpu_device *adev, u32 *supp_modes)
 {
enum amdgpu_memory_partition mode = UNKNOWN_MEMORY_PARTITION_MODE;
 
if (adev->nbio.funcs->get_memory_partition_mode)
-   mode = adev->nbio.funcs->get_memory_partition_mode(adev);
+   mode = adev->nbio.funcs->get_memory_partition_mode(adev,
+  supp_modes);
 
return mode;
 }
 
+static enum amdgpu_memory_partition
+gmc_v9_0_query_memory_partition(struct amdgpu_device *adev)
+{
+   return gmc_v9_0_get_memory_partition(adev, NULL);
+}
+
 static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
.flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
.flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
index fa4b423c399b..e1552d645308 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
@@ -408,12 +408,19 @@ static void nbio_v7_9_set_compute_partition_mode(struct 
amdgpu_device *adev,
WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_PARTITION_COMPUTE_STATUS, tmp);
 }
 
-static enum amdgpu_memory_partition nbio_v7_9_get_memory_partition_mode(struct 
amdgpu_device *adev)
+static enum amdgpu_memory_partition
+nbio_v7_9_get_memory_partition_mode(struct amdgpu_device *adev, u32 
*supp_modes)
 {
u32 tmp;
+
tmp = RREG32_SOC15(NBIO, 0, regBIF_BX_PF0_PARTITION_MEM_STATUS);
tmp = REG_GET_FIELD(tmp, BIF_BX_PF0_PARTITION_MEM_STATUS, NPS_MODE);
 
+   if (supp_modes) {
+   *supp_modes =
+   RREG32_SOC15(NBIO, 0, regBIF_BX_PF0_PARTITION_MEM_CAP);
+   }
+
return ffs(tmp);
 }
 
-- 
2.40.1

[PATCH] drm/amdgpu: Use apt name for FW reserved region

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

Use the generic term fw_reserved_memory for FW reserve region. This
region may also hold discovery TMR in addition to other reserve
regions. This region size could be larger than discovery tmr size, hence
don't change the discovery tmr size based on this.

Signed-off-by: Lijo Lazar 
Reviewed-by: Le Ma 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 36 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  3 ++-
 2 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 2bdd6bcad506..1582ef092bf1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1648,14 +1648,15 @@ static int amdgpu_ttm_training_reserve_vram_fini(struct 
amdgpu_device *adev)
return 0;
 }
 
-static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev)
+static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev,
+   uint32_t reserve_size)
 {
struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
 
memset(ctx, 0, sizeof(*ctx));
 
ctx->c2p_train_data_offset =
-   ALIGN((adev->gmc.mc_vram_size - adev->mman.discovery_tmr_size - 
SZ_1M), SZ_1M);
+   ALIGN((adev->gmc.mc_vram_size - reserve_size - SZ_1M), SZ_1M);
ctx->p2c_train_data_offset =
(adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);
ctx->train_data_size =
@@ -1673,9 +1674,10 @@ static void amdgpu_ttm_training_data_block_init(struct 
amdgpu_device *adev)
  */
 static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
 {
-   int ret;
struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
bool mem_train_support = false;
+   uint32_t reserve_size = 0;
+   int ret;
 
if (!amdgpu_sriov_vf(adev)) {
if (amdgpu_atomfirmware_mem_training_supported(adev))
@@ -1691,14 +1693,15 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device 
*adev)
 * Otherwise, fallback to legacy approach to check and reserve tmr 
block for ip
 * discovery data and G6 memory training data respectively
 */
-   adev->mman.discovery_tmr_size =
-   amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
-   if (!adev->mman.discovery_tmr_size)
-   adev->mman.discovery_tmr_size = DISCOVERY_TMR_OFFSET;
+   if (adev->bios)
+   reserve_size =
+   amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
+   if (!reserve_size)
+   reserve_size = DISCOVERY_TMR_OFFSET;
 
if (mem_train_support) {
/* reserve vram for mem train according to TMR location */
-   amdgpu_ttm_training_data_block_init(adev);
+   amdgpu_ttm_training_data_block_init(adev, reserve_size);
ret = amdgpu_bo_create_kernel_at(adev,
 ctx->c2p_train_data_offset,
 ctx->train_data_size,
@@ -1713,15 +1716,13 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device 
*adev)
}
 
if (!adev->gmc.is_app_apu) {
-   ret = amdgpu_bo_create_kernel_at(adev,
-adev->gmc.real_vram_size -
-adev->mman.discovery_tmr_size,
-adev->mman.discovery_tmr_size,
-&adev->mman.discovery_memory,
-NULL);
+   ret = amdgpu_bo_create_kernel_at(
+   adev, adev->gmc.real_vram_size - reserve_size,
+   reserve_size, &adev->mman.fw_reserved_memory, NULL);
if (ret) {
DRM_ERROR("alloc tmr failed(%d)!\n", ret);
-   amdgpu_bo_free_kernel(&adev->mman.discovery_memory, 
NULL, NULL);
+   amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory,
+ NULL, NULL);
return ret;
}
} else {
@@ -1909,8 +1910,9 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
if (!adev->gmc.is_app_apu) {
amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, 
NULL);
amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, 
NULL);
-   /* return the IP Discovery TMR memory back to VRAM */
-   amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
+   /* return the FW reserved memory back to VRAM */
+   amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL,
+ NULL);
if (adev->mman.stolen_reserved_size)

[PATCH] drm/amdgpu: Use GPU VA space for IH v4.4.2 in APU

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

For IH ring buffer and read/write pointers, use GPU VA space rather than
Guest PA on APU configs. Access through Guest PA doesn't work when IOMMU
is enabled. It is also beneficial in NUMA configs as it allocates from
the closest numa pool in a numa enabled system.

Signed-off-by: Lijo Lazar 
Reviewed-by: Harish Kasiviswanathan 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/vega20_ih.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c 
b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
index 536128447b71..17ccf02462ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
@@ -526,6 +526,7 @@ static int vega20_ih_early_init(void *handle)
 static int vega20_ih_sw_init(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+   bool use_bus_addr = true;
int r;
 
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_IH, 0,
@@ -533,14 +534,18 @@ static int vega20_ih_sw_init(void *handle)
if (r)
return r;
 
-   r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, true);
+   if ((adev->flags & AMD_IS_APU) &&
+   (adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 2)))
+   use_bus_addr = false;
+
+   r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr);
if (r)
return r;
 
adev->irq.ih.use_doorbell = true;
adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1;
 
-   r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, PAGE_SIZE, true);
+   r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, PAGE_SIZE, use_bus_addr);
if (r)
return r;
 
@@ -559,7 +564,7 @@ static int vega20_ih_sw_init(void *handle)
/* initialize ih control registers offset */
vega20_ih_init_register_offset(adev);
 
-   r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true);
+   r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, 
use_bus_addr);
if (r)
return r;
 
-- 
2.40.1

[PATCH] drm/amdgpu: Simplify aquavanjram instance mapping

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

Simplify so as to use the same sequence to assign logical to physical
ids for all IPs.

Signed-off-by: Lijo Lazar 
Acked-by: Leo Liu 
Tested-by: James Zhu 
Signed-off-by: Alex Deucher 
---
 .../drm/amd/amdgpu/aqua_vanjaram_reg_init.c   | 33 +--
 1 file changed, 8 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c 
b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
index 51d3cb81e37a..68d732dd9ecb 100644
--- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
@@ -101,32 +101,15 @@ static void aqua_vanjaram_populate_ip_map(struct 
amdgpu_device *adev,
 
 void aqua_vanjaram_ip_map_init(struct amdgpu_device *adev)
 {
-   int xcc_mask, sdma_mask;
-   int l, i;
-
-   /* Map GC instances */
-   l = 0;
-   xcc_mask = adev->gfx.xcc_mask;
-   while (xcc_mask) {
-   i = ffs(xcc_mask) - 1;
-   adev->ip_map.dev_inst[GC_HWIP][l++] = i;
-   xcc_mask &= ~(1 << i);
-   }
-   for (; l < HWIP_MAX_INSTANCE; l++)
-   adev->ip_map.dev_inst[GC_HWIP][l] = -1;
-
-   l = 0;
-   sdma_mask = adev->sdma.sdma_mask;
-   while (sdma_mask) {
-   i = ffs(sdma_mask) - 1;
-   adev->ip_map.dev_inst[SDMA0_HWIP][l++] = i;
-   sdma_mask &= ~(1 << i);
-   }
-   for (; l < HWIP_MAX_INSTANCE; l++)
-   adev->ip_map.dev_inst[SDMA0_HWIP][l] = -1;
+   u32 ip_map[][2] = {
+   { GC_HWIP, adev->gfx.xcc_mask },
+   { SDMA0_HWIP, adev->sdma.sdma_mask },
+   { VCN_HWIP, adev->vcn.inst_mask },
+   };
+   int i;
 
-   /* This covers both VCN and JPEG, JPEG is only alias of VCN */
-   aqua_vanjaram_populate_ip_map(adev, VCN_HWIP, adev->vcn.inst_mask);
+   for (i = 0; i < ARRAY_SIZE(ip_map); ++i)
+   aqua_vanjaram_populate_ip_map(adev, ip_map[i][0], ip_map[i][1]);
 
adev->ip_map.logical_to_dev_inst = aqua_vanjaram_logical_to_dev_inst;
 }
-- 
2.40.1

[PATCH 12/12] drm/amdgpu/vcn: Use buffer object's deletion logic

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

VCN DPG buffer object is intialized to NULL. If allotted, buffer object
deletion logic will take care of NULL check and delete accordingly. This
is useful for cases where indirect sram flag could be manipulated later
after buffer allocation.

Signed-off-by: Lijo Lazar 
Reviewed-by: Christian König 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 57dabfe1a1be..06ec2dc55857 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -235,11 +235,11 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
if (adev->vcn.harvest_config & (1 << j))
continue;
 
-   if (adev->vcn.indirect_sram) {
-   amdgpu_bo_free_kernel(&adev->vcn.inst[j].dpg_sram_bo,
- 
&adev->vcn.inst[j].dpg_sram_gpu_addr,
- (void 
**)&adev->vcn.inst[j].dpg_sram_cpu_addr);
-   }
+   amdgpu_bo_free_kernel(
+   &adev->vcn.inst[j].dpg_sram_bo,
+   &adev->vcn.inst[j].dpg_sram_gpu_addr,
+   (void **)&adev->vcn.inst[j].dpg_sram_cpu_addr);
+
kvfree(adev->vcn.inst[j].saved_bo);
 
amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo,
-- 
2.40.1

[PATCH 10/12] drm/amdgpu: Add a read after write DB_CTRL for vcn_v4_0_3

2023-05-09 Thread Alex Deucher

From: Sonny Jiang 

To make sure VCN DB_CTRL is delivered before doorbell write.

Signed-off-by: Sonny Jiang 
Reviewed-by: Lijo Lazar 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index b0e28d611f2d..9d0c3dc76547 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -195,6 +195,11 @@ static int vcn_v4_0_3_hw_init(void *handle)
ring->doorbell_index
<< 
VCN_RB1_DB_CTRL__OFFSET__SHIFT |
VCN_RB1_DB_CTRL__EN_MASK);
+
+   /* Read DB_CTRL to flush the write DB_CTRL command. */
+   RREG32_SOC15(
+   VCN, GET_INST(VCN, ring->me),
+   regVCN_RB1_DB_CTRL);
}
 
r = amdgpu_ring_test_helper(ring);
-- 
2.40.1

[PATCH 07/12] drm/amdgpu: use physical AID index for ring name

2023-05-09 Thread Alex Deucher

From: James Zhu 

Use physical AID index for VCN/JPEG ring name instead of
logical AID index.

Signed-off-by: James Zhu 
Reviewed-by: Sonny Jiang 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index c0e90e27f24b..ea9cb098a144 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -110,7 +110,7 @@ static int jpeg_v4_0_3_sw_init(void *handle)
ring->doorbell_index =
(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
1 + j + 9 * jpeg_inst;
-   sprintf(ring->name, "jpeg_dec_%d.%d", i, j);
+   sprintf(ring->name, "jpeg_dec_%d.%d", 
adev->jpeg.inst[i].aid_id, j);
r = amdgpu_ring_init(adev, ring, 512, 
&adev->jpeg.inst->irq, 0,
AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 759f64a4acf4..b0e28d611f2d 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -112,7 +112,7 @@ static int vcn_v4_0_3_sw_init(void *handle)
(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
9 * vcn_inst;
ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id);
-   sprintf(ring->name, "vcn_unified_%d", i);
+   sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id);
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
 AMDGPU_RING_PRIO_DEFAULT,
 &adev->vcn.inst[i].sched_score);
-- 
2.40.1

[PATCH 08/12] drm/amdgpu: A workaround for JPEG_v4_0_3 ring test fail

2023-05-09 Thread Alex Deucher

From: Sonny Jiang 

The jpeg_v4_0_3 jpeg_pitch register uses UVD_JRBC_SCRATCH0. It needs to
move WREG() to after jpeg_start.
Switch to a posted register write when doing the ring test to make sure
the register write lands before we test the result.

Signed-off-by: Sonny Jiang 
Reviewed-by: Leo Liu 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
index 388466a5f730..9a1db2bd03e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
@@ -124,11 +124,14 @@ int amdgpu_jpeg_dec_ring_test_ring(struct amdgpu_ring 
*ring)
if (amdgpu_sriov_vf(adev))
return 0;
 
-   WREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe], 
0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r)
return r;
 
+   WREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe], 
0xCAFEDEAD);
+   /* Add a read register to make sure the write register is executed. */
+   RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]);
+
amdgpu_ring_write(ring, 
PACKET0(adev->jpeg.internal.jpeg_pitch[ring->pipe], 0));
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
-- 
2.40.1

[PATCH 11/12] drm/amdgpu: Use a different value than 0xDEADBEEF for jpeg ring test

2023-05-09 Thread Alex Deucher

From: Sonny Jiang 

The 0xDEADBEEF standard anti-hang value. Use it may cause
fake pass.

Signed-off-by: Sonny Jiang 
Reviewed-by: Lijo Lazar 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
index 9a1db2bd03e7..8c479669c459 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
@@ -133,12 +133,12 @@ int amdgpu_jpeg_dec_ring_test_ring(struct amdgpu_ring 
*ring)
RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]);
 
amdgpu_ring_write(ring, 
PACKET0(adev->jpeg.internal.jpeg_pitch[ring->pipe], 0));
-   amdgpu_ring_write(ring, 0xDEADBEEF);
+   amdgpu_ring_write(ring, 0xABADCAFE);
amdgpu_ring_commit(ring);
 
for (i = 0; i < adev->usec_timeout; i++) {
tmp = 
RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]);
-   if (tmp == 0xDEADBEEF)
+   if (tmp == 0xABADCAFE)
break;
udelay(1);
}
-- 
2.40.1

[PATCH 05/12] drm/amdgpu: Fix harvest reporting of VCN

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

Use VCN instance mask to check if an instance is harvested or not.

Signed-off-by: Lijo Lazar 
Reviewed-by: James Zhu 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index ae5852f80549..caae6bf2fd30 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -313,6 +313,7 @@ static void amdgpu_discovery_harvest_config_quirk(struct 
amdgpu_device *adev)
case 0xCF:
case 0xDF:
adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1;
+   adev->vcn.inst_mask &= ~AMDGPU_VCN_HARVEST_VCN1;
break;
default:
break;
@@ -899,7 +900,7 @@ static uint8_t amdgpu_discovery_get_harvest_info(struct 
amdgpu_device *adev,
/* Until a uniform way is figured, get mask based on hwid */
switch (hw_id) {
case VCN_HWID:
-   harvest = (1 << inst) & adev->vcn.harvest_config;
+   harvest = ((1 << inst) & adev->vcn.inst_mask) == 0;
break;
case DMU_HWID:
if (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK)
-- 
2.40.1

[PATCH 09/12] drm/amdgpu: fixes a JPEG get write/read pointer bug

2023-05-09 Thread Alex Deucher

From: Sonny Jiang 

Need parentheses for the micro parameters.

Signed-off-by: Sonny Jiang 
Reviewed-by: David (Ming Qiang) Wu 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/soc15_common.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h 
b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index 1c9e924b5f8c..3730c5ec202f 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -71,7 +71,8 @@
 AMDGPU_REGS_NO_KIQ, ip##_HWIP)
 
 #define RREG32_SOC15_OFFSET(ip, inst, reg, offset) \
-
__RREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) 
+ offset, 0, ip##_HWIP)
+
__RREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + 
(reg)) + \
+(offset), 0, ip##_HWIP)
 
 #define WREG32_SOC15(ip, inst, reg, value) \
 
__WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg), 
\
-- 
2.40.1

[PATCH 06/12] drm/amdgpu/vcn: use dummy register selects AID for VCN_RAM ucode

2023-05-09 Thread Alex Deucher

From: James Zhu 

Use dummy register 0xDEADBEEF selects AID for PSP VCN_RAM ucode.

Signed-off-by: James Zhu 
Reviewed-by: Sonny Jiang 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 49b07843efd1..759f64a4acf4 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -681,9 +681,15 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device 
*adev, int inst_idx, b
tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
WREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS, tmp);
 
-   if (indirect)
+   if (indirect) {
+   DRM_DEV_DEBUG(adev->dev, "VCN %d start: on AID %d",
+   inst_idx, adev->vcn.inst[inst_idx].aid_id);
adev->vcn.inst[inst_idx].dpg_sram_curr_addr =
(uint32_t 
*)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
+   /* Use dummy register 0xDEADBEEF passing AID selection to PSP 
FW */
+   WREG32_SOC15_DPG_MODE(inst_idx, 0xDEADBEEF,
+   adev->vcn.inst[inst_idx].aid_id, 0, true);
+   }
 
/* enable clock gating */
vcn_v4_0_3_disable_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
-- 
2.40.1

[PATCH 04/12] drm/amdgpu: Use logical ids for VCN/JPEG v4.0.3

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

Address VCN/JPEG instances using logical ids. Whenever register access is
required, get the physical instance using GET_INST.

Signed-off-by: Lijo Lazar 
Acked-by: Leo Liu 
Tested-by: James Zhu 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h   |  29 +-
 .../drm/amd/amdgpu/aqua_vanjaram_reg_init.c   |  14 +-
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c  | 251 +-
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c   | 431 ++
 4 files changed, 408 insertions(+), 317 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 47463ef10fce..1eb9ccd1d83d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -141,18 +141,23 @@
RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA);
\
})
 
-#define WREG32_SOC15_DPG_MODE(inst_idx, offset, value, mask_en, indirect)  
\
-   do {
\
-   if (!indirect) {
\
-   WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA, value); 
\
-   WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL,  
\
-   (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT |
\
-mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT |   
\
-offset << 
UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT));   \
-   } else {
\
-   *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = 
offset;\
-   *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = value; 
\
-   }   
\
+#define WREG32_SOC15_DPG_MODE(inst_idx, offset, value, mask_en, indirect)  
   \
+   do {
  \
+   if (!indirect) {
  \
+   WREG32_SOC15(VCN, GET_INST(VCN, inst_idx),  
  \
+mmUVD_DPG_LMA_DATA, value);
  \
+   WREG32_SOC15(   
  \
+   VCN, GET_INST(VCN, inst_idx),   
  \
+   mmUVD_DPG_LMA_CTL,  
  \
+   (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT |
  \
+mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT |   
  \
+offset << 
UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
+   } else {
  \
+   *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ =
  \
+   offset; 
  \
+   *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ =
  \
+   value;  
  \
+   }   
  \
} while (0)
 
 #define AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE (1 << 2)
diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c 
b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
index 90fe77db9bee..51d3cb81e37a 100644
--- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
@@ -70,6 +70,8 @@ static int8_t aqua_vanjaram_logical_to_dev_inst(struct 
amdgpu_device *adev,
switch (block) {
case GC_HWIP:
case SDMA0_HWIP:
+   /* Both JPEG and VCN as JPEG is only alias of VCN */
+   case VCN_HWIP:
dev_inst = adev->ip_map.dev_inst[block][inst];
break;
default:
@@ -379,7 +381,7 @@ static int aqua_vanjaram_xcp_mgr_init(struct amdgpu_device 
*adev)
 int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev)
 {
u32 mask, inst_mask = adev->sdma.sdma_mask;
-   int ret, i, num_inst;
+   int ret, i;
 
/* generally 1 AID supports 4 instances */
adev->sdma.num_inst_per_aid = 4;
@@ -394,11 +396,15 @@ int aqua_vanjaram_init_soc_config(struct amdgpu_device 
*adev)
adev->aid_mask |= (1 << i);
}
 
-   num_inst = hweight32(adev->aid_mask);
+   /* Harvest config is not used for aqua vanjaram. VCN and JPEGs will be
+* addressed based on logical instance ids.
+

[PATCH 02/12] drm/amdgpu: Add instance mask for VCN and JPEG

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

Keep an instance mask formed by physical instance numbers for VCN and JPEG
IPs. Populate the mask from discovery table information.

Signed-off-by: Lijo Lazar 
Acked-by: Leo Liu 
Tested-by: James Zhu 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 30 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h  |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h   |  1 +
 3 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index d81b2e1e8aee..ae5852f80549 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -549,10 +549,19 @@ static void 
amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev,
switch (le16_to_cpu(ip->hw_id)) {
case VCN_HWID:
(*vcn_harvest_count)++;
-   if (ip->instance_number == 0)
+   if (ip->instance_number == 0) {
adev->vcn.harvest_config |= 
AMDGPU_VCN_HARVEST_VCN0;
-   else
+   adev->vcn.inst_mask &=
+   
~AMDGPU_VCN_HARVEST_VCN0;
+   adev->jpeg.inst_mask &=
+   
~AMDGPU_VCN_HARVEST_VCN0;
+   } else {
adev->vcn.harvest_config |= 
AMDGPU_VCN_HARVEST_VCN1;
+   adev->vcn.inst_mask &=
+   
~AMDGPU_VCN_HARVEST_VCN1;
+   adev->jpeg.inst_mask &=
+   
~AMDGPU_VCN_HARVEST_VCN1;
+   }
break;
case DMU_HWID:
adev->harvest_ip_mask |= 
AMD_HARVEST_IP_DMU_MASK;
@@ -601,6 +610,11 @@ static void 
amdgpu_discovery_read_from_harvest_table(struct amdgpu_device *adev,
(1 << harvest_info->list[i].number_instance);
adev->jpeg.harvest_config |=
(1 << harvest_info->list[i].number_instance);
+
+   adev->vcn.inst_mask &=
+   ~(1U << harvest_info->list[i].number_instance);
+   adev->jpeg.inst_mask &=
+   ~(1U << harvest_info->list[i].number_instance);
break;
case DMU_HWID:
adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK;
@@ -1188,6 +1202,8 @@ static int amdgpu_discovery_reg_base_init(struct 
amdgpu_device *adev)
 
adev->gfx.xcc_mask = 0;
adev->sdma.sdma_mask = 0;
+   adev->vcn.inst_mask = 0;
+   adev->jpeg.inst_mask = 0;
bhdr = (struct binary_header *)adev->mman.discovery_bin;
ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin +
le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
@@ -1235,12 +1251,18 @@ static int amdgpu_discovery_reg_base_init(struct 
amdgpu_device *adev)
adev->vcn.vcn_config[adev->vcn.num_vcn_inst] =
ip->revision & 0xc0;
ip->revision &= ~0xc0;
-   if (adev->vcn.num_vcn_inst < 
AMDGPU_MAX_VCN_INSTANCES)
+   if (adev->vcn.num_vcn_inst <
+   AMDGPU_MAX_VCN_INSTANCES) {
adev->vcn.num_vcn_inst++;
-   else
+   adev->vcn.inst_mask |=
+   (1U << ip->instance_number);
+   adev->jpeg.inst_mask |=
+   (1U << ip->instance_number);
+   } else {
dev_err(adev->dev, "Too many VCN 
instances: %d vs %d\n",
adev->vcn.num_vcn_inst + 1,
AMDGPU_MAX_VCN_INSTANCES);
+   }
}
if (le16_to_cpu(ip->hw_id) == SDMA0_HWID ||
le16_to_cpu(ip->hw_id) == SDMA1_HWID ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
index bb700a2b97c2..90516f623f56 100644
--- a/drivers/gpu/drm/amd/amdgpu/amd

[PATCH 03/12] drm/amdgpu: Add VCN logical to physical id mapping

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

Add mappings for logical to physical id for VCN/JPEG 4.0.3

v2: make local function static (Alex)

Signed-off-by: Lijo Lazar 
Acked-by: Leo Liu 
Tested-by: James Zhu 
Signed-off-by: Alex Deucher 
---
 .../drm/amd/amdgpu/aqua_vanjaram_reg_init.c| 18 ++
 1 file changed, 18 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c 
b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
index fdc728f678d7..90fe77db9bee 100644
--- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
@@ -82,6 +82,21 @@ static int8_t aqua_vanjaram_logical_to_dev_inst(struct 
amdgpu_device *adev,
return dev_inst;
 }
 
+static void aqua_vanjaram_populate_ip_map(struct amdgpu_device *adev,
+ enum amd_hw_ip_block_type ip_block,
+ uint32_t inst_mask)
+{
+   int l = 0, i;
+
+   while (inst_mask) {
+   i = ffs(inst_mask) - 1;
+   adev->ip_map.dev_inst[ip_block][l++] = i;
+   inst_mask &= ~(1 << i);
+   }
+   for (; l < HWIP_MAX_INSTANCE; l++)
+   adev->ip_map.dev_inst[ip_block][l] = -1;
+}
+
 void aqua_vanjaram_ip_map_init(struct amdgpu_device *adev)
 {
int xcc_mask, sdma_mask;
@@ -108,6 +123,9 @@ void aqua_vanjaram_ip_map_init(struct amdgpu_device *adev)
for (; l < HWIP_MAX_INSTANCE; l++)
adev->ip_map.dev_inst[SDMA0_HWIP][l] = -1;
 
+   /* This covers both VCN and JPEG, JPEG is only alias of VCN */
+   aqua_vanjaram_populate_ip_map(adev, VCN_HWIP, adev->vcn.inst_mask);
+
adev->ip_map.logical_to_dev_inst = aqua_vanjaram_logical_to_dev_inst;
 }
 
-- 
2.40.1

[PATCH 01/12] drm/amdgpu: Load vcn_v4_0_3 ucode during early_init

2023-05-09 Thread Alex Deucher

From: Sonny Jiang 

VCN loading ucode is moved to early_init with using 'amdgpu_ucode_*'
helpers.

Reviewed-by: Leo Liu 
Signed-off-by: Sonny Jiang 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 746df23b2eaa..308dfe80a87c 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -69,7 +69,7 @@ static int vcn_v4_0_3_early_init(void *handle)
vcn_v4_0_3_set_unified_ring_funcs(adev);
vcn_v4_0_3_set_irq_funcs(adev);
 
-   return 0;
+   return amdgpu_vcn_early_init(adev);
 }
 
 /**
-- 
2.40.1

[PATCH 6/7] drm/amdgpu: Fix discovery sys node harvest info

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

Initalize syfs nodes after harvest information is fetched and fetch the
correct harvest info based on each IP instance.

Signed-off-by: Lijo Lazar 
Reviewed-by: Le Ma 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 41 +--
 1 file changed, 38 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 01b21988c1ae..d81b2e1e8aee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -877,6 +877,36 @@ static void ip_disc_release(struct kobject *kobj)
kfree(ip_top);
 }
 
+static uint8_t amdgpu_discovery_get_harvest_info(struct amdgpu_device *adev,
+uint16_t hw_id, uint8_t inst)
+{
+   uint8_t harvest = 0;
+
+   /* Until a uniform way is figured, get mask based on hwid */
+   switch (hw_id) {
+   case VCN_HWID:
+   harvest = (1 << inst) & adev->vcn.harvest_config;
+   break;
+   case DMU_HWID:
+   if (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK)
+   harvest = 0x1;
+   break;
+   case UMC_HWID:
+   /* TODO: It needs another parsing; for now, ignore.*/
+   break;
+   case GC_HWID:
+   harvest = ((1 << inst) & adev->gfx.xcc_mask) == 0;
+   break;
+   case SDMA0_HWID:
+   harvest = ((1 << inst) & adev->sdma.sdma_mask) == 0;
+   break;
+   default:
+   break;
+   }
+
+   return harvest;
+}
+
 static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev,
  struct ip_die_entry *ip_die_entry,
  const size_t _ip_offset, const int 
num_ips,
@@ -949,7 +979,10 @@ static int amdgpu_discovery_sysfs_ips(struct amdgpu_device 
*adev,
ip_hw_instance->major = ip->major;
ip_hw_instance->minor = ip->minor;
ip_hw_instance->revision = ip->revision;
-   ip_hw_instance->harvest = ip->variant;
+   ip_hw_instance->harvest =
+   amdgpu_discovery_get_harvest_info(
+   adev, ip_hw_instance->hw_id,
+   ip_hw_instance->num_instance);
ip_hw_instance->num_base_addresses = 
ip->num_base_address;
 
for (kk = 0; kk < ip_hw_instance->num_base_addresses; 
kk++) {
@@ -1035,6 +1068,9 @@ static int amdgpu_discovery_sysfs_init(struct 
amdgpu_device *adev)
struct kset *die_kset;
int res, ii;
 
+   if (!adev->mman.discovery_bin)
+   return -EINVAL;
+
adev->ip_top = kzalloc(sizeof(*adev->ip_top), GFP_KERNEL);
if (!adev->ip_top)
return -ENOMEM;
@@ -1282,8 +1318,6 @@ static int amdgpu_discovery_reg_base_init(struct 
amdgpu_device *adev)
}
}
 
-   amdgpu_discovery_sysfs_init(adev);
-
return 0;
 }
 
@@ -2224,6 +2258,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device 
*adev)
}
 
amdgpu_discovery_init_soc_config(adev);
+   amdgpu_discovery_sysfs_init(adev);
 
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(9, 0, 1):
-- 
2.40.1

[PATCH 1/7] drm/amdgpu: Add parsing of acpi xcc objects

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

Add parsing of ACPI xcc objects and fill in relevant info from them by
invoking the DSM methods.

Signed-off-by: Lijo Lazar 
Reviewed-and-tested-by: Rajneesh Bhardwaj 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h  |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 294 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  |   1 +
 3 files changed, 297 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 743fc5f137b0..196c2a8a6b48 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1425,11 +1425,13 @@ int amdgpu_acpi_pcie_notify_device_ready(struct 
amdgpu_device *adev);
 void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps);
 bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev);
 void amdgpu_acpi_detect(void);
+void amdgpu_acpi_release(void);
 #else
 static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; }
 static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { }
 static inline bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { 
return false; }
 static inline void amdgpu_acpi_detect(void) { }
+static inline void amdgpu_acpi_release(void) { }
 static inline bool amdgpu_acpi_is_power_shift_control_supported(void) { return 
false; }
 static inline int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
  u8 dev_state, bool drv_state) 
{ return 0; }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index edaf3ded4a04..d9cc07e01293 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -38,6 +38,43 @@
 #include "amd_acpi.h"
 #include "atom.h"
 
+/* Declare GUID for AMD _DSM method for XCCs */
+static const guid_t amd_xcc_dsm_guid = GUID_INIT(0x8267f5d5, 0xa556, 0x44f2,
+0xb8, 0xb4, 0x45, 0x56, 0x2e,
+0x8c, 0x5b, 0xec);
+
+#define AMD_XCC_HID_START 3000
+#define AMD_XCC_DSM_GET_NUM_FUNCS 0
+#define AMD_XCC_DSM_GET_SUPP_MODE 1
+#define AMD_XCC_DSM_GET_XCP_MODE 2
+#define AMD_XCC_DSM_GET_VF_XCC_MAPPING 4
+#define AMD_XCC_DSM_GET_TMR_INFO 5
+#define AMD_XCC_DSM_NUM_FUNCS 5
+
+#define AMD_XCC_MAX_HID 24
+
+/* Encapsulates the XCD acpi object information */
+struct amdgpu_acpi_xcc_info {
+   struct list_head list;
+   int mem_node;
+   uint8_t xcp_node;
+   uint8_t phy_id;
+   acpi_handle handle;
+};
+
+struct amdgpu_acpi_dev_info {
+   struct list_head list;
+   struct list_head xcc_list;
+   uint16_t bdf;
+   uint16_t supp_xcp_mode;
+   uint16_t xcp_mode;
+   uint16_t mem_mode;
+   uint64_t tmr_base;
+   uint64_t tmr_size;
+};
+
+struct list_head amdgpu_acpi_dev_list;
+
 struct amdgpu_atif_notification_cfg {
bool enabled;
int command_code;
@@ -801,6 +838,240 @@ int amdgpu_acpi_smart_shift_update(struct drm_device 
*dev, enum amdgpu_ss ss_sta
return r;
 }
 
+/**
+ * amdgpu_acpi_get_node_id - obtain the NUMA node id for corresponding amdgpu
+ * acpi device handle
+ *
+ * @handle: acpi handle
+ * @nid: NUMA Node id returned by the platform firmware
+ *
+ * Queries the ACPI interface to fetch the corresponding NUMA Node ID for a
+ * given amdgpu acpi device.
+ *
+ * Returns ACPI STATUS OK with Node ID on success or the corresponding failure 
reason
+ */
+acpi_status amdgpu_acpi_get_node_id(acpi_handle handle, int *nid)
+{
+#ifdef CONFIG_ACPI_NUMA
+   u64 pxm;
+   acpi_status status;
+
+   status = acpi_evaluate_integer(handle, "_PXM", NULL, &pxm);
+
+   if (ACPI_FAILURE(status))
+   return status;
+
+   *nid = pxm_to_node(pxm);
+
+   return_ACPI_STATUS(AE_OK);
+#else
+   return_ACPI_STATUS(AE_NOT_EXIST);
+#endif
+}
+
+struct amdgpu_acpi_dev_info *amdgpu_acpi_get_dev(u16 bdf)
+{
+   struct amdgpu_acpi_dev_info *acpi_dev;
+
+   if (list_empty(&amdgpu_acpi_dev_list))
+   return NULL;
+
+   list_for_each_entry(acpi_dev, &amdgpu_acpi_dev_list, list)
+   if (acpi_dev->bdf == bdf)
+   return acpi_dev;
+
+   return NULL;
+}
+
+static int amdgpu_acpi_dev_init(struct amdgpu_acpi_dev_info **dev_info,
+   struct amdgpu_acpi_xcc_info *xcc_info, u16 bdf)
+{
+   struct amdgpu_acpi_dev_info *tmp;
+   union acpi_object *obj;
+   int ret = -ENOENT;
+
+   *dev_info = NULL;
+   tmp = kzalloc(sizeof(struct amdgpu_acpi_dev_info), GFP_KERNEL);
+   if (!tmp)
+   return -ENOMEM;
+
+   INIT_LIST_HEAD(&tmp->xcc_list);
+   INIT_LIST_HEAD(&tmp->list);
+   tmp->bdf = bdf;
+
+   obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
+ AMD_XCC_DSM_GET_SUPP_MODE, NULL,
+

[PATCH 7/7] drm/amdgpu: preserve the num_links in case of reflection

2023-05-09 Thread Alex Deucher

From: Shiwu Zhang 

For topology reflection, each socket to every other socket has the
exactly same topology info as the other way around. So it is safe
to keep the reflected num_links value otherwise it will be overriden
by the link info output of GET_PEER_LINKS command.

Signed-off-by: Shiwu Zhang 
Reviewed-by: Le Ma 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 4395c53d09d8..ea47012795e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -1402,7 +1402,8 @@ int psp_xgmi_get_topology_info(struct psp_context *psp,
topology->nodes[i].num_links = get_extended_data ?
topology->nodes[i].num_links +

link_info_output->nodes[i].num_links :
-   link_info_output->nodes[i].num_links;
+   ((requires_reflection && 
topology->nodes[i].num_links) ? topology->nodes[i].num_links :
+link_info_output->nodes[i].num_links);
 
/* reflect the topology information for 
bi-directionality */
if (requires_reflection && topology->nodes[i].num_hops)
-- 
2.40.1

[PATCH 5/7] drm/amdkfd: Flush TLB after unmapping for GFX v9.4.3

2023-05-09 Thread Alex Deucher

From: Philip Yang 

kfd_flush_tlb_after_unmap should return true for GFX v9.4.3, to do TLB
heavyweight flush after unmapping from GPU to guarantee that the GPU
will not access pages after they have been unmapped. This also helps
improve the mapping to GPU performance.

Without this, KFD accidently flush TLB after mapping to GPU because the
vm update sequence number is increased by previous unmapping.

Signed-off-by: Philip Yang 
Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index fb3cf2c51da8..6e1c15682c28 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1405,9 +1405,9 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum 
TLB_FLUSH_TYPE type);
 
 static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev)
 {
-   return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
-  (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) &&
-  dev->adev->sdma.instance[0].fw_version >= 18) ||
+   return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
+  KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
+  (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) && 
dev->sdma_fw_version >= 18) ||
   KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0);
 }
 
-- 
2.40.1

[PATCH 4/7] drm/amdgpu: Add fallback path for discovery info

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

If SOC doesn't expose dedicated vram, discovery region may be
available through system memory. Rename the existing interface to
generic read_binary_from_mem and add a fallback path to read from system
memory.

Signed-off-by: Lijo Lazar 
Reviewed-by: Hawking Zhang 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 21 ++-
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 246070938c41..01b21988c1ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -226,13 +226,21 @@ static int 
amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev,
return -ENOENT;
 }
 
-static void amdgpu_discovery_read_binary_from_vram(struct amdgpu_device *adev, 
uint8_t *binary)
+static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
+uint8_t *binary)
 {
uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
-   uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
+   int ret = 0;
 
-   amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
- adev->mman.discovery_tmr_size, false);
+   if (vram_size) {
+   uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
+   amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
+ adev->mman.discovery_tmr_size, false);
+   } else {
+   ret = amdgpu_discovery_read_binary_from_sysmem(adev, binary);
+   }
+
+   return ret;
 }
 
 static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, 
uint8_t *binary)
@@ -338,7 +346,10 @@ static int amdgpu_discovery_init(struct amdgpu_device 
*adev)
}
 
} else {
-   amdgpu_discovery_read_binary_from_vram(adev, 
adev->mman.discovery_bin);
+   r = amdgpu_discovery_read_binary_from_mem(
+   adev, adev->mman.discovery_bin);
+   if (r)
+   goto out;
}
 
/* check the ip discovery binary signature */
-- 
2.40.1

[PATCH 3/7] drm/amdgpu: Read discovery info from system memory

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

On certain ASICs, discovery info is available at reserved region in system
memory. The location is available through ACPI interface. Add API to read
discovery info from there.

Signed-off-by: Lijo Lazar 
Reviewed-by: Hawking Zhang 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 6701f17a4db6..246070938c41 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -203,6 +203,29 @@ static int hw_id_map[MAX_HWIP] = {
[PCIE_HWIP] = PCIE_HWID,
 };
 
+static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device 
*adev, uint8_t *binary)
+{
+   u64 tmr_offset, tmr_size, pos;
+   void *discv_regn;
+   int ret;
+
+   ret = amdgpu_acpi_get_tmr_info(adev, &tmr_offset, &tmr_size);
+   if (ret)
+   return ret;
+
+   pos = tmr_offset + tmr_size - DISCOVERY_TMR_OFFSET;
+
+   /* This region is read-only and reserved from system use */
+   discv_regn = memremap(pos, adev->mman.discovery_tmr_size, MEMREMAP_WC);
+   if (discv_regn) {
+   memcpy(binary, discv_regn, adev->mman.discovery_tmr_size);
+   memunmap(discv_regn);
+   return 0;
+   }
+
+   return -ENOENT;
+}
+
 static void amdgpu_discovery_read_binary_from_vram(struct amdgpu_device *adev, 
uint8_t *binary)
 {
uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
-- 
2.40.1

[PATCH 2/7] drm/amdgpu: Add API to get tmr info from acpi

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

In certain configs, TMR information is available from ACPI. Add API to
fetch the information.

Signed-off-by: Lijo Lazar 
Reviewed-by: Hawking Zhang 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h  |  7 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 20 
 2 files changed, 27 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 196c2a8a6b48..65b3aeef37ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1421,6 +1421,8 @@ int amdgpu_acpi_power_shift_control(struct amdgpu_device 
*adev,
u8 dev_state, bool drv_state);
 int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss 
ss_state);
 int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev);
+int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, u64 *tmr_offset,
+u64 *tmr_size);
 
 void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps);
 bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev);
@@ -1428,6 +1430,11 @@ void amdgpu_acpi_detect(void);
 void amdgpu_acpi_release(void);
 #else
 static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; }
+static inline int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev,
+  u64 *tmr_offset, u64 *tmr_size)
+{
+   return -EINVAL;
+}
 static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { }
 static inline bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { 
return false; }
 static inline void amdgpu_acpi_detect(void) { }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index d9cc07e01293..52c16b041193 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -1072,6 +1072,26 @@ static int amdgpu_acpi_enumerate_xcc(void)
return 0;
 }
 
+int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, u64 *tmr_offset,
+u64 *tmr_size)
+{
+   struct amdgpu_acpi_dev_info *dev_info;
+   u16 bdf;
+
+   if (!tmr_offset || !tmr_size)
+   return -EINVAL;
+
+   bdf = (adev->pdev->bus->number << 8) | adev->pdev->devfn;
+   dev_info = amdgpu_acpi_get_dev(bdf);
+   if (!dev_info)
+   return -ENOENT;
+
+   *tmr_offset = dev_info->tmr_base;
+   *tmr_size = dev_info->tmr_size;
+
+   return 0;
+}
+
 /**
  * amdgpu_acpi_event - handle notify events
  *
-- 
2.40.1

[PATCH 01/12] drm/amd/pm: Update pmfw header files for SMU v13.0.6

2023-05-09 Thread Alex Deucher

From: Asad kamal 

Update driver metrics table for SMU v13.0.6 to be
compatible with PMFW v85.47 version

Signed-off-by: Asad kamal 
Reviewed-by: Lijo Lazar 
Signed-off-by: Alex Deucher 
---
 .../drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_6.h  | 4 ++--
 drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h | 6 +-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_6.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_6.h
index be596777cd2c..370c6125d718 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_6.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_6.h
@@ -26,7 +26,7 @@
 // *** IMPORTANT ***
 // PMFW TEAM: Always increment the interface version if
 // anything is changed in this file
-#define SMU13_0_6_DRIVER_IF_VERSION 0x08042022
+#define SMU13_0_6_DRIVER_IF_VERSION 0x08042023
 
 //I2C Interface
 #define NUM_I2C_CONTROLLERS8
@@ -106,7 +106,7 @@ typedef enum {
 } UCLK_DPM_MODE_e;
 
 typedef struct {
-  //0-26 SOC, 27-29 SOCIO
+  //0-23 SOC, 24-26 SOCIO, 27-29 SOC
   uint16_t avgPsmCount[30];
   uint16_t minPsmCount[30];
   floatavgPsmVoltage[30];
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
index bdccbb4a6276..3fe403615d86 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
@@ -123,7 +123,7 @@ typedef enum {
   VOLTAGE_GUARDBAND_COUNT
 } GFX_GUARDBAND_e;
 
-#define SMU_METRICS_TABLE_VERSION 0x1
+#define SMU_METRICS_TABLE_VERSION 0x3
 
 typedef struct {
   uint32_t AccumulationCounter;
@@ -198,6 +198,10 @@ typedef struct {
   uint32_t SocketThmResidencyAcc;
   uint32_t VrThmResidencyAcc;
   uint32_t HbmThmResidencyAcc;
+  uint32_t spare;
+
+  // New Items at end to maintain driver compatibility
+  uint32_t GfxclkFrequency[8];
 } MetricsTable_t;
 
 #define SMU_VF_METRICS_TABLE_VERSION 0x1
-- 
2.40.1

[PATCH 4/4] drm/amdkfd: Enable SVM on Native mode

2023-05-09 Thread Alex Deucher

From: Mukul Joshi 

This patch enables SVM capability on GFX9.4.3 when
run in Native mode. It also sets best_prefetch and
best_restore locations to CPU as there is no VRAM.

Signed-off-by: Mukul Joshi 
Acked-by: Rajneesh Bhardwaj 
Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 8 
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 3 ++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index c87b12f1c957..8a1075eed5b4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -2542,6 +2542,9 @@ svm_range_best_restore_location(struct svm_range *prange,
return -1;
}
 
+   if (node->adev->gmc.is_app_apu)
+   return 0;
+
if (prange->preferred_loc == gpuid ||
prange->preferred_loc == KFD_IOCTL_SVM_LOCATION_SYSMEM) {
return prange->preferred_loc;
@@ -3255,6 +3258,11 @@ svm_range_best_prefetch_location(struct svm_range 
*prange)
goto out;
}
 
+   if (bo_node->adev->gmc.is_app_apu) {
+   best_loc = 0;
+   goto out;
+   }
+
if (p->xnack_enabled)
bitmap_copy(bitmap, prange->bitmap_aip, MAX_GPU_INSTANCE);
else
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 5116786718b6..7515ddade3ae 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -200,7 +200,8 @@ void svm_range_list_lock_and_flush_work(struct 
svm_range_list *svms, struct mm_s
 /* SVM API and HMM page migration work together, device memory type
  * is initialized to not 0 when page migration register device memory.
  */
-#define KFD_IS_SVM_API_SUPPORTED(dev) ((dev)->pgmap.type != 0)
+#define KFD_IS_SVM_API_SUPPORTED(dev) ((dev)->pgmap.type != 0 ||\
+  (dev)->adev->gmc.is_app_apu)
 
 void svm_range_bo_unref_async(struct svm_range_bo *svm_bo);
 
-- 
2.40.1

[PATCH 3/4] drm/amdgpu: Add FGCG for GFX v9.4.3

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

It's not fine grain, behaves similar to MGCG.

Signed-off-by: Lijo Lazar 
Reviewed-by: Le Ma 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/soc15.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 082c1e9308d0..122ba1a505c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -1156,8 +1156,9 @@ static int soc15_common_early_init(void *handle)
adev->cg_flags =
AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_CGCG |
AMD_CG_SUPPORT_GFX_CGLS | AMD_CG_SUPPORT_SDMA_MGCG |
-   AMD_CG_SUPPORT_IH_CG | AMD_CG_SUPPORT_VCN_MGCG |
-   AMD_CG_SUPPORT_JPEG_MGCG;
+   AMD_CG_SUPPORT_GFX_FGCG | AMD_CG_SUPPORT_REPEATER_FGCG |
+   AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG |
+   AMD_CG_SUPPORT_IH_CG;
adev->pg_flags =
AMD_PG_SUPPORT_VCN |
AMD_PG_SUPPORT_VCN_DPG |
-- 
2.40.1

[PATCH 2/4] drm/amdgpu: Use transient mode during xcp switch

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

During partition switch, keep the state as transient mode. Fetch the
latest state if switch fails.

Signed-off-by: Lijo Lazar 
Reviewed-by: Hawking Zhang 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c | 18 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h |  1 +
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
index 5b999e5334bb..e8aa4d6c6b62 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
@@ -139,7 +139,7 @@ static int __amdgpu_xcp_init(struct amdgpu_xcp_mgr 
*xcp_mgr, int num_xcps)
 
 int amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, int mode)
 {
-   int ret, num_xcps = 0;
+   int ret, curr_mode, num_xcps = 0;
 
if (!xcp_mgr || mode == AMDGPU_XCP_MODE_NONE)
return -EINVAL;
@@ -152,10 +152,22 @@ int amdgpu_xcp_switch_partition_mode(struct 
amdgpu_xcp_mgr *xcp_mgr, int mode)
 
mutex_lock(&xcp_mgr->xcp_lock);
 
+   curr_mode = xcp_mgr->mode;
+   /* State set to transient mode */
+   xcp_mgr->mode = AMDGPU_XCP_MODE_TRANS;
+
ret = xcp_mgr->funcs->switch_partition_mode(xcp_mgr, mode, &num_xcps);
 
-   if (ret)
+   if (ret) {
+   /* Failed, get whatever mode it's at now */
+   if (xcp_mgr->funcs->query_partition_mode)
+   xcp_mgr->mode = amdgpu_xcp_query_partition_mode(
+   xcp_mgr, AMDGPU_XCP_FL_LOCKED);
+   else
+   xcp_mgr->mode = curr_mode;
+
goto out;
+   }
 
if (!num_xcps || num_xcps > MAX_XCP) {
ret = -EINVAL;
@@ -183,7 +195,7 @@ int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr 
*xcp_mgr, u32 flags)
if (!(flags & AMDGPU_XCP_FL_LOCKED))
mutex_lock(&xcp_mgr->xcp_lock);
mode = xcp_mgr->funcs->query_partition_mode(xcp_mgr);
-   if (mode != xcp_mgr->mode)
+   if (xcp_mgr->mode != AMDGPU_XCP_MODE_TRANS && mode != xcp_mgr->mode)
dev_WARN(
xcp_mgr->adev->dev,
"Cached partition mode %d not matching with device mode 
%d",
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
index 9fa6f0ea2061..1d3dc7d68f54 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
@@ -29,6 +29,7 @@
 #define MAX_XCP 8
 
 #define AMDGPU_XCP_MODE_NONE -1
+#define AMDGPU_XCP_MODE_TRANS -2
 
 #define AMDGPU_XCP_FL_NONE 0
 #define AMDGPU_XCP_FL_LOCKED (1 << 0)
-- 
2.40.1

[PATCH 1/4] drm/amdgpu: Add flags for partition mode query

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

It's not required to take lock on all cases while querying partition
mode. Querying partition mode during KFD init process doesn't need to
take a lock. Init process after a switch will already be happening under
lock. Control the behaviour by adding flags to xcp_query_partition_mode.

Signed-off-by: Lijo Lazar 
Reviewed-by: Hawking Zhang 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c | 8 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h | 5 -
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 4 +++-
 drivers/gpu/drm/amd/amdkfd/kfd_device.c | 2 +-
 5 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index dc0e5d18a0cc..a939ffe2969a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -1171,7 +1171,8 @@ static ssize_t 
amdgpu_gfx_get_current_compute_partition(struct device *dev,
int mode;
char *partition_mode;
 
-   mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr);
+   mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
+  AMDGPU_XCP_FL_NONE);
 
switch (mode) {
case AMDGPU_SPX_PARTITION_MODE:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
index f59bc450cabe..5b999e5334bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
@@ -170,7 +170,7 @@ int amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr 
*xcp_mgr, int mode)
return ret;
 }
 
-int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr)
+int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags)
 {
int mode;
 
@@ -180,7 +180,8 @@ int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr 
*xcp_mgr)
if (!xcp_mgr->funcs || !xcp_mgr->funcs->query_partition_mode)
return xcp_mgr->mode;
 
-   mutex_lock(&xcp_mgr->xcp_lock);
+   if (!(flags & AMDGPU_XCP_FL_LOCKED))
+   mutex_lock(&xcp_mgr->xcp_lock);
mode = xcp_mgr->funcs->query_partition_mode(xcp_mgr);
if (mode != xcp_mgr->mode)
dev_WARN(
@@ -188,7 +189,8 @@ int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr 
*xcp_mgr)
"Cached partition mode %d not matching with device mode 
%d",
xcp_mgr->mode, mode);
 
-   mutex_unlock(&xcp_mgr->xcp_lock);
+   if (!(flags & AMDGPU_XCP_FL_LOCKED))
+   mutex_unlock(&xcp_mgr->xcp_lock);
 
return mode;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
index f0b973c6092f..9fa6f0ea2061 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
@@ -30,6 +30,9 @@
 
 #define AMDGPU_XCP_MODE_NONE -1
 
+#define AMDGPU_XCP_FL_NONE 0
+#define AMDGPU_XCP_FL_LOCKED (1 << 0)
+
 enum AMDGPU_XCP_IP_BLOCK {
AMDGPU_XCP_GFXHUB,
AMDGPU_XCP_GFX,
@@ -99,7 +102,7 @@ int amdgpu_xcp_resume(struct amdgpu_xcp_mgr *xcp_mgr, int 
xcp_id);
 
 int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode,
int init_xcps, struct amdgpu_xcp_mgr_funcs *xcp_funcs);
-int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr);
+int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags);
 int amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, int mode);
 int amdgpu_xcp_get_partition(struct amdgpu_xcp_mgr *xcp_mgr,
 enum AMDGPU_XCP_IP_BLOCK ip, int instance);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 42877c4505f1..69867294117e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -1940,7 +1940,9 @@ static int gfx_v9_4_3_cp_resume(struct amdgpu_device 
*adev)
 {
int r, i, num_xcc;
 
-   if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr) == 
AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE)
+   if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
+   AMDGPU_XCP_FL_NONE) ==
+   AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE)
amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, 
amdgpu_user_partt_mode);
 
num_xcc = NUM_XCC(adev->gfx.xcc_mask);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index d7cffd91f1d7..4293cbf9ceb0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -645,7 +645,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 * If the VMID range changes for GFX9.4.3, then this code MUST be
 * revisited.
 */
-   partition_mode = amdgpu_xcp_query_partition_mode(kfd->adev->xcp_mgr);
+   partition_mode

[PATCH 12/12] drm/amd/pm: fix wrong smu socclk value

2023-05-09 Thread Alex Deucher

From: Yang Wang 

fix typo about smu socclk value.

Signed-off-by: Yang Wang 
Reviewed-by: Lijo Lazar 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 4b808c0addc2..a712b2bf2d25 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -387,7 +387,7 @@ static int smu_v13_0_6_get_dpm_ultimate_freq(struct 
smu_context *smu,
break;
case SMU_SOCCLK:
if (pptable->Init)
-   clock_limit = pptable->UclkFrequencyTable[0];
+   clock_limit = pptable->SocclkFrequencyTable[0];
break;
case SMU_FCLK:
if (pptable->Init)
-- 
2.40.1

[PATCH 11/12] drm/amdgpu: Add mode-2 reset in SMU v13.0.6

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

Modifications to mode-2 reset flow for SMU v13.0.6 ASICs.

Signed-off-by: Lijo Lazar 
Reviewed-by: Le Ma 
Reviewed-by: Asad Kamal 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/soc15.c   |  9 +
 .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 16 +++-
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 9fbfd0811d06..082c1e9308d0 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -556,6 +556,15 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
if (connected_to_cpu)
return AMD_RESET_METHOD_MODE2;
break;
+   case IP_VERSION(13, 0, 6):
+   /* Use gpu_recovery param to target a reset method.
+* Enable triggering of GPU reset only if specified
+* by module parameter.
+*/
+   if (amdgpu_gpu_recovery == 4 || amdgpu_gpu_recovery == 5)
+   return AMD_RESET_METHOD_MODE2;
+   else
+   return AMD_RESET_METHOD_NONE;
default:
break;
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 6dcafd04c98d..4b808c0addc2 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -2024,27 +2024,27 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct 
smu_context *smu, void **table
 
 static int smu_v13_0_6_mode2_reset(struct smu_context *smu)
 {
-   u32 smu_version;
int ret = 0, index;
struct amdgpu_device *adev = smu->adev;
int timeout = 10;
 
-   smu_cmn_get_smc_version(smu, NULL, &smu_version);
-
index = smu_cmn_to_asic_specific_index(smu, CMN2ASIC_MAPPING_MSG,
   SMU_MSG_GfxDeviceDriverReset);
 
mutex_lock(&smu->message_lock);
+
ret = smu_cmn_send_msg_without_waiting(smu, (uint16_t)index,
   SMU_RESET_MODE_2);
+
/* This is similar to FLR, wait till max FLR timeout */
msleep(100);
+
dev_dbg(smu->adev->dev, "restore config space...\n");
/* Restore the config space saved during init */
amdgpu_device_load_pci_state(adev->pdev);
 
dev_dbg(smu->adev->dev, "wait for reset ack\n");
-   while (ret == -ETIME && timeout) {
+   do {
ret = smu_cmn_wait_for_response(smu);
/* Wait a bit more time for getting ACK */
if (ret == -ETIME) {
@@ -2053,16 +2053,14 @@ static int smu_v13_0_6_mode2_reset(struct smu_context 
*smu)
continue;
}
 
-   if (ret != 1) {
+   if (ret) {
dev_err(adev->dev,
-   "failed to send mode2 message \tparam: 0x%08x 
response %#x\n",
+   "failed to send mode2 message \tparam: 0x%08x 
error code %d\n",
SMU_RESET_MODE_2, ret);
goto out;
}
-   }
+   } while (ret == -ETIME && timeout);
 
-   if (ret == 1)
-   ret = 0;
 out:
mutex_unlock(&smu->message_lock);
 
-- 
2.40.1

[PATCH 07/12] drm/amd/pm: Update hw mon attributes for GC version 9.4.3

2023-05-09 Thread Alex Deucher

From: Asad Kamal 

Update hw mon attributes for GC Version 9.4.3 to valid ones
on APU and Non APU systems

v2: Group checks along existing one
Added power limit & mclock for gc version 9.4.3

Signed-off-by: Asad Kamal 
Reviewed-by: Lijo Lazar 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/pm/amdgpu_pm.c | 36 +-
 1 file changed, 25 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 58c2246918fd..385d83eb8706 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -3362,7 +3362,8 @@ static umode_t hwmon_attributes_visible(struct kobject 
*kobj,
return 0;
 
/* Skip crit temp on APU */
-   if ((adev->flags & AMD_IS_APU) && (adev->family >= AMDGPU_FAMILY_CZ) &&
+   if adev->flags & AMD_IS_APU) && (adev->family >= AMDGPU_FAMILY_CZ)) 
||
+   (gc_ver == IP_VERSION(9, 4, 3))) &&
(attr == &sensor_dev_attr_temp1_crit.dev_attr.attr ||
 attr == &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr))
return 0;
@@ -3395,9 +3396,10 @@ static umode_t hwmon_attributes_visible(struct kobject 
*kobj,
  attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't 
manage state */
effective_mode &= ~S_IWUSR;
 
-   /* In the case of APUs, this is only implemented on Vangogh */
+   /* not implemented yet for APUs other than GC 10.3.1 (vangogh) and 
9.4.3 */
if (((adev->family == AMDGPU_FAMILY_SI) ||
-((adev->flags & AMD_IS_APU) && (gc_ver != IP_VERSION(10, 3, 1 
&&
+((adev->flags & AMD_IS_APU) && (gc_ver != IP_VERSION(10, 3, 1)) &&
+ (gc_ver != IP_VERSION(9, 4, 3 &&
(attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr ||
 attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr ||
 attr == &sensor_dev_attr_power1_cap.dev_attr.attr ||
@@ -3426,25 +3428,39 @@ static umode_t hwmon_attributes_visible(struct kobject 
*kobj,
return 0;
 
if ((adev->family == AMDGPU_FAMILY_SI ||/* not implemented yet 
*/
-adev->family == AMDGPU_FAMILY_KV) &&   /* not implemented yet 
*/
+adev->family == AMDGPU_FAMILY_KV ||/* not implemented yet 
*/
+(gc_ver == IP_VERSION(9, 4, 3))) &&
(attr == &sensor_dev_attr_in0_input.dev_attr.attr ||
 attr == &sensor_dev_attr_in0_label.dev_attr.attr))
return 0;
 
-   /* only APUs have vddnb */
-   if (!(adev->flags & AMD_IS_APU) &&
+   /* only APUs other than gc 9,4,3 have vddnb */
+   if ((!(adev->flags & AMD_IS_APU) || (gc_ver == IP_VERSION(9, 4, 3))) &&
(attr == &sensor_dev_attr_in1_input.dev_attr.attr ||
 attr == &sensor_dev_attr_in1_label.dev_attr.attr))
return 0;
 
-   /* no mclk on APUs */
-   if ((adev->flags & AMD_IS_APU) &&
+   /* no mclk on APUs other than gc 9,4,3*/
+   if (((adev->flags & AMD_IS_APU) && (gc_ver != IP_VERSION(9, 4, 3))) &&
(attr == &sensor_dev_attr_freq2_input.dev_attr.attr ||
 attr == &sensor_dev_attr_freq2_label.dev_attr.attr))
return 0;
 
-   /* only SOC15 dGPUs support hotspot and mem temperatures */
if (((adev->flags & AMD_IS_APU) || gc_ver < IP_VERSION(9, 0, 0)) &&
+   (gc_ver != IP_VERSION(9, 4, 3)) &&
+   (attr == &sensor_dev_attr_temp2_input.dev_attr.attr ||
+attr == &sensor_dev_attr_temp2_label.dev_attr.attr))
+   return 0;
+
+   /* Only hotspot temperature for gc 9,4,3*/
+   if ((gc_ver == IP_VERSION(9, 4, 3)) &&
+   (attr == &sensor_dev_attr_temp1_input.dev_attr.attr ||
+attr == &sensor_dev_attr_temp1_label.dev_attr.attr))
+   return 0;
+
+   /* only SOC15 dGPUs support hotspot and mem temperatures */
+   if (((adev->flags & AMD_IS_APU) || gc_ver < IP_VERSION(9, 0, 0) ||
+   (gc_ver == IP_VERSION(9, 4, 3))) &&
(attr == &sensor_dev_attr_temp2_crit.dev_attr.attr ||
 attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr ||
 attr == &sensor_dev_attr_temp3_crit.dev_attr.attr ||
@@ -3452,9 +3468,7 @@ static umode_t hwmon_attributes_visible(struct kobject 
*kobj,
 attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr ||
 attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr ||
 attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr ||
-attr == &sensor_dev_attr_temp2_input.dev_attr.attr ||
 attr == &sensor_dev_attr_temp3_input.dev_attr.attr ||
-attr == &sensor_dev_attr_temp2_label.dev_attr.attr ||
 attr == &sensor_dev_attr_temp3_label.dev_attr.attr))
return 0;
 
-- 
2.40.1

[PATCH 10/12] drm/amd/pm: Notify PMFW about driver unload cases

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

On SMU v13.0.6 APUs, FW will need to take some actions if driver is going
to halt RLC. Notify PMFW that driver is not going to manage device so
that FW takes care of the required actions.

Signed-off-by: Lijo Lazar 
Reviewed-by: Le Ma 
Reviewed-by: Asad Kamal 
Signed-off-by: Alex Deucher 
---
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c  | 25 +--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 7474d3ffab93..6dcafd04c98d 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -122,6 +122,7 @@ static const struct cmn2asic_msg_mapping 
smu_v13_0_6_message_map[SMU_MSG_MAX_COU
MSG_MAP(GetMaxGfxclkFrequency,   
PPSMC_MSG_GetMaxGfxDpmFreq,0),
MSG_MAP(SetSoftMinGfxclk,
PPSMC_MSG_SetSoftMinGfxClk,0),
MSG_MAP(SetSoftMaxGfxClk,
PPSMC_MSG_SetSoftMaxGfxClk,0),
+   MSG_MAP(PrepareMp1ForUnload, 
PPSMC_MSG_PrepareForDriverUnload,  0),
 };
 
 static const struct cmn2asic_mapping smu_v13_0_6_clk_map[SMU_CLK_COUNT] = {
@@ -1385,14 +1386,34 @@ int smu_v13_0_6_register_irq_handler(struct smu_context 
*smu)
return ret;
 }
 
+static int smu_v13_0_6_notify_unload(struct smu_context *smu)
+{
+   uint32_t smu_version;
+
+   smu_cmn_get_smc_version(smu, NULL, &smu_version);
+   if (smu_version <= 0x553500)
+   return 0;
+
+   dev_dbg(smu->adev->dev, "Notify PMFW about driver unload");
+   /* Ignore return, just intimate FW that driver is not going to be there 
*/
+   smu_cmn_send_smc_msg(smu, SMU_MSG_PrepareMp1ForUnload, NULL);
+
+   return 0;
+}
+
 static int smu_v13_0_6_system_features_control(struct smu_context *smu,
   bool enable)
 {
+   struct amdgpu_device *adev = smu->adev;
int ret;
 
-   /* Nothing to be done for APU */
-   if (smu->adev->flags & AMD_IS_APU)
+   /* On APUs, notify FW that the device is no longer driver managed */
+   if (adev->flags & AMD_IS_APU) {
+   if (!enable)
+   smu_v13_0_6_notify_unload(smu);
+
return 0;
+   }
 
ret = smu_v13_0_system_features_control(smu, enable);
 
-- 
2.40.1

[PATCH 09/12] drm/amd/pm: Update PMFW headers for version 85.54

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

It adds message support for FW notification on driver unload.

Signed-off-by: Lijo Lazar 
Reviewed-by: Le Ma 
Reviewed-by: Asad Kamal 
Signed-off-by: Alex Deucher 
---
 .../inc/pmfw_if/smu13_driver_if_v13_0_6.h  | 18 --
 .../pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h   |  3 ++-
 2 files changed, 2 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_6.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_6.h
index 8b7fa0fa59c3..de84fff39799 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_6.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_6.h
@@ -132,22 +132,4 @@ typedef struct {
 #define THROTTLER_TEMP_AID_BIT 8
 #define THROTTLER_VRHOT_BIT9
 
-
-// These defines are used with the following messages:
-// SMC_MSG_TransferTableDram2Smu
-// SMC_MSG_TransferTableSmu2Dram
-// #define TABLE_PPTABLE 0
-// #define TABLE_AVFS_PSM_DEBUG  1
-// #define TABLE_AVFS_FUSE_OVERRIDE  2
-// #define TABLE_PMSTATUSLOG 3
-// #define TABLE_SMU_METRICS 4
-// #define TABLE_DRIVER_SMU_CONFIG   5
-// #define TABLE_I2C_COMMANDS6
-// #define TABLE_COUNT   7
-
-// // Table transfer status
-// #define TABLE_TRANSFER_OK 0x0
-// #define TABLE_TRANSFER_FAILED 0xFF
-// #define TABLE_TRANSFER_PENDING0xAB
-
 #endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h
index b838e8db395a..ae4f44c4b877 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h
@@ -82,7 +82,8 @@
 #define PPSMC_MSG_SetSoftMaxGfxClk  0x31
 #define PPSMC_MSG_GetMinGfxDpmFreq  0x32
 #define PPSMC_MSG_GetMaxGfxDpmFreq  0x33
-#define PPSMC_Message_Count 0x34
+#define PPSMC_MSG_PrepareForDriverUnload0x34
+#define PPSMC_Message_Count 0x35
 
 //PPSMC Reset Types for driver msg argument
 #define PPSMC_RESET_TYPE_DRIVER_MODE_1_RESET0x1
-- 
2.40.1

[PATCH 05/12] drm/amd/pm: Keep interface version in PMFW header

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

Use the interface version directly from PMFW interface header file rather
than keeping another definition in common smu13 file.

Signed-off-by: Lijo Lazar 
Reviewed-by: Asad kamal 
Signed-off-by: Alex Deucher 
---
 .../inc/pmfw_if/smu13_driver_if_aldebaran.h   |  2 +
 .../inc/pmfw_if/smu13_driver_if_v13_0_0.h |  2 +
 .../inc/pmfw_if/smu13_driver_if_v13_0_4.h |  2 +-
 .../inc/pmfw_if/smu13_driver_if_v13_0_5.h |  2 +-
 .../inc/pmfw_if/smu13_driver_if_v13_0_7.h |  2 +-
 .../inc/pmfw_if/smu13_driver_if_yellow_carp.h |  2 +-
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h  | 11 --
 .../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c|  1 +
 .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c| 39 +--
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  |  1 +
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c  |  1 +
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c  |  1 +
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c  |  1 +
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c  |  1 +
 .../drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c  |  1 +
 15 files changed, 17 insertions(+), 52 deletions(-)

diff --git 
a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_aldebaran.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_aldebaran.h
index 90200f31ff52..cddf45eebee8 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_aldebaran.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_aldebaran.h
@@ -24,6 +24,8 @@
 #ifndef SMU13_DRIVER_IF_ALDEBARAN_H
 #define SMU13_DRIVER_IF_ALDEBARAN_H
 
+#define SMU13_DRIVER_IF_VERSION_ALDE 0x08
+
 #define NUM_VCLK_DPM_LEVELS   8
 #define NUM_DCLK_DPM_LEVELS   8
 #define NUM_SOCCLK_DPM_LEVELS 8
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
index b686fb68a6e7..fe995651c6f5 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
@@ -24,6 +24,8 @@
 #ifndef SMU13_DRIVER_IF_V13_0_0_H
 #define SMU13_DRIVER_IF_V13_0_0_H
 
+#define SMU13_0_0_DRIVER_IF_VERSION 0x32
+
 //Increment this version if SkuTable_t or BoardTable_t change
 #define PPTABLE_VERSION 0x26
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h
index 2162ecd1057d..fee9293b3f97 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h
@@ -27,7 +27,7 @@
 // *** IMPORTANT ***
 // SMU TEAM: Always increment the interface version if
 // any structure is changed in this file
-#define PMFW_DRIVER_IF_VERSION 8
+#define SMU13_0_4_DRIVER_IF_VERSION 8
 
 typedef struct {
   int32_t value;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_5.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_5.h
index aa971412b434..7589faa0232d 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_5.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_5.h
@@ -23,7 +23,7 @@
 #ifndef __SMU13_DRIVER_IF_V13_0_5_H__
 #define __SMU13_DRIVER_IF_V13_0_5_H__
 
-#define PMFW_DRIVER_IF_VERSION 4
+#define SMU13_0_5_DRIVER_IF_VERSION 4
 
 // Throttler Status Bitmask
 #define THROTTLER_STATUS_BIT_SPL0
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
index 4c46a0392451..44e879c51cae 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
@@ -25,7 +25,7 @@
 
 // *** IMPORTANT ***
 // PMFW TEAM: Always increment the interface version on any change to this file
-#define SMU13_DRIVER_IF_VERSION  0x35
+#define SMU13_0_7_DRIVER_IF_VERSION  0x35
 
 //Increment this version if SkuTable_t or BoardTable_t change
 #define PPTABLE_VERSION 0x27
diff --git 
a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_yellow_carp.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_yellow_carp.h
index 25540cb28208..7417634827ad 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_yellow_carp.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_yellow_carp.h
@@ -26,7 +26,7 @@
 // *** IMPORTANT ***
 // SMU TEAM: Always increment the interface version if
 // any structure is changed in this file
-#define SMU13_DRIVER_IF_VERSION 4
+#define SMU13_YELLOW_CARP_DRIVER_IF_VERSION 4
 
 typedef struct {
   int32_t value;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index df3baaab0037..3ae8d5d252a3 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -25,17 +25,6 @@
 
 #include "amdgpu_smu.h"
 
-#define SMU13

[PATCH 08/12] drm/amd/pm: Expose mem temperature for GC version 9.4.3

2023-05-09 Thread Alex Deucher

From: Asad Kamal 

Add mem temperature as part of hw mon attributes for GC version 9.4.3

Signed-off-by: Asad Kamal 
Reviewed-by: Lijo Lazar 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/pm/amdgpu_pm.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 385d83eb8706..40100b77b2d9 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -3449,10 +3449,12 @@ static umode_t hwmon_attributes_visible(struct kobject 
*kobj,
if (((adev->flags & AMD_IS_APU) || gc_ver < IP_VERSION(9, 0, 0)) &&
(gc_ver != IP_VERSION(9, 4, 3)) &&
(attr == &sensor_dev_attr_temp2_input.dev_attr.attr ||
-attr == &sensor_dev_attr_temp2_label.dev_attr.attr))
+attr == &sensor_dev_attr_temp2_label.dev_attr.attr ||
+attr == &sensor_dev_attr_temp3_input.dev_attr.attr ||
+attr == &sensor_dev_attr_temp3_label.dev_attr.attr))
return 0;
 
-   /* Only hotspot temperature for gc 9,4,3*/
+   /* hotspot temperature for gc 9,4,3*/
if ((gc_ver == IP_VERSION(9, 4, 3)) &&
(attr == &sensor_dev_attr_temp1_input.dev_attr.attr ||
 attr == &sensor_dev_attr_temp1_label.dev_attr.attr))
@@ -3467,9 +3469,7 @@ static umode_t hwmon_attributes_visible(struct kobject 
*kobj,
 attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr ||
 attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr ||
 attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr ||
-attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr ||
-attr == &sensor_dev_attr_temp3_input.dev_attr.attr ||
-attr == &sensor_dev_attr_temp3_label.dev_attr.attr))
+attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr))
return 0;
 
/* only Vangogh has fast PPT limit and power labels */
-- 
2.40.1

[PATCH 03/12] drm/amd/pm: Update pmfw header files for SMU v13.0.6

2023-05-09 Thread Alex Deucher

From: Asad kamal 

Update driver interface for SMU v13.0.6 to be
compatible with PMFW v85.48 version

Signed-off-by: Asad kamal 
Reviewed-by: Lijo Lazar 
Signed-off-by: Alex Deucher 
---
 .../pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_6.h   | 12 
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c |  1 -
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_6.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_6.h
index 370c6125d718..8b7fa0fa59c3 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_6.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_6.h
@@ -121,6 +121,18 @@ typedef struct {
   floatminPsmVoltage[30];
 } AvfsDebugTableXcd_t;
 
+// Defines used for IH-based thermal interrupts to GFX driver - A/X only
+#define IH_INTERRUPT_ID_TO_DRIVER   0xFE
+#define IH_INTERRUPT_CONTEXT_ID_THERMAL_THROTTLING  0x7
+
+//thermal over-temp mask defines
+#define THROTTLER_TEMP_CCD_BIT 5
+#define THROTTLER_TEMP_XCD_BIT 6
+#define THROTTLER_TEMP_HBM_BIT 7
+#define THROTTLER_TEMP_AID_BIT 8
+#define THROTTLER_VRHOT_BIT9
+
+
 // These defines are used with the following messages:
 // SMC_MSG_TransferTableDram2Smu
 // SMC_MSG_TransferTableSmu2Dram
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index d0c49e8883e7..b08608caafd0 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -175,7 +175,6 @@ static const struct cmn2asic_mapping 
smu_v13_0_6_table_map[SMU_TABLE_COUNT] = {
 #define THROTTLER_PPT_BIT 1
 #define THROTTLER_TEMP_SOC_BIT 2
 #define THROTTLER_TEMP_VR_GFX_BIT 3
-#define THROTTLER_TEMP_HBM_BIT 4
 
 static const uint8_t smu_v13_0_6_throttler_map[] = {
[THROTTLER_PPT_BIT] = (SMU_THROTTLER_PPT0_BIT),
-- 
2.40.1

[PATCH 06/12] drm/amd/pm: Initialize power limit for SMU v13.0.6

2023-05-09 Thread Alex Deucher

From: Lijo Lazar 

PMFW will initialize the power limit values even if PPT throttler
feature is disabled. Fetch the limit value from FW.

Signed-off-by: Lijo Lazar 
Reviewed-by: Asad Kamal 
Signed-off-by: Alex Deucher 
---
 .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c  | 15 ---
 1 file changed, 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index b46e0414be60..7474d3ffab93 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -1256,21 +1256,6 @@ static int smu_v13_0_6_get_power_limit(struct 
smu_context *smu,
uint32_t power_limit = 0;
int ret;
 
-   if (!smu_cmn_feature_is_enabled(smu, SMU_FEATURE_PPT_BIT)) {
-   if (current_power_limit)
-   *current_power_limit = 0;
-   if (default_power_limit)
-   *default_power_limit = 0;
-   if (max_power_limit)
-   *max_power_limit = 0;
-
-   dev_warn(
-   smu->adev->dev,
-   "PPT feature is not enabled, power values can't be 
fetched.");
-
-   return 0;
-   }
-
ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetPptLimit, &power_limit);
 
if (ret) {
-- 
2.40.1

[PATCH 02/12] drm/amd/pm: Update gfx clock frequency for SMU v13.0.6

2023-05-09 Thread Alex Deucher

From: Asad kamal 

Update gfx clock frequency from metric table for SMU v13.0.6

Signed-off-by: Asad kamal 
Reviewed-by: Lijo Lazar 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 8969b3ff5c8f..d0c49e8883e7 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -660,7 +660,10 @@ static int smu_v13_0_6_get_smu_metrics_data(struct 
smu_context *smu,
 {
struct smu_table_context *smu_table = &smu->smu_table;
MetricsTable_t *metrics = (MetricsTable_t *)smu_table->metrics_table;
+   struct amdgpu_device *adev = smu->adev;
+   uint32_t smu_version;
int ret = 0;
+   int xcc_id;
 
ret = smu_v13_0_6_get_metrics_table(smu, NULL, false);
if (ret)
@@ -670,7 +673,13 @@ static int smu_v13_0_6_get_smu_metrics_data(struct 
smu_context *smu,
switch (member) {
case METRICS_CURR_GFXCLK:
case METRICS_AVERAGE_GFXCLK:
-   *value = 0;
+   smu_cmn_get_smc_version(smu, NULL, &smu_version);
+   if (smu_version >= 0x552F00) {
+   xcc_id = GET_INST(GC, 0);
+   *value = 
SMUQ10_TO_UINT(metrics->GfxclkFrequency[xcc_id]);
+   } else {
+   *value = 0;
+   }
break;
case METRICS_CURR_SOCCLK:
case METRICS_AVERAGE_SOCCLK:
-- 
2.40.1

[PATCH 04/12] drm/amd/pm: Add ih for SMU v13.0.6 thermal throttling

2023-05-09 Thread Alex Deucher

From: Asad kamal 

Add interrupt handler for thermal throttler events from
PMFW on SMUv13.0.6

Signed-off-by: Asad kamal 
Acked-by: Evan Quan 
Reviewed-by: Lijo Lazar 
Signed-off-by: Alex Deucher 
---
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c  | 107 +-
 1 file changed, 104 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index b08608caafd0..43a855de7e9e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -1297,6 +1297,109 @@ static int smu_v13_0_6_set_power_limit(struct 
smu_context *smu,
return smu_v13_0_set_power_limit(smu, limit_type, limit);
 }
 
+static int smu_v13_0_6_irq_process(struct amdgpu_device *adev,
+  struct amdgpu_irq_src *source,
+  struct amdgpu_iv_entry *entry)
+{
+   struct smu_context *smu = adev->powerplay.pp_handle;
+   uint32_t client_id = entry->client_id;
+   uint32_t src_id = entry->src_id;
+   /*
+* ctxid is used to distinguish different
+* events for SMCToHost interrupt
+*/
+   uint32_t ctxid = entry->src_data[0];
+   uint32_t data;
+
+   if (client_id == SOC15_IH_CLIENTID_MP1) {
+   if (src_id == IH_INTERRUPT_ID_TO_DRIVER) {
+   /* ACK SMUToHost interrupt */
+   data = RREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL);
+   data = REG_SET_FIELD(data, MP1_SMN_IH_SW_INT_CTRL, 
INT_ACK, 1);
+   WREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL, data);
+
+   switch (ctxid) {
+   case IH_INTERRUPT_CONTEXT_ID_THERMAL_THROTTLING:
+   /*
+* Increment the throttle interrupt counter
+*/
+   atomic64_inc(&smu->throttle_int_counter);
+
+   if 
(!atomic_read(&adev->throttling_logging_enabled))
+   return 0;
+
+   if (__ratelimit(&adev->throttling_logging_rs))
+   
schedule_work(&smu->throttling_logging_work);
+
+   break;
+   }
+   }
+   }
+
+   return 0;
+}
+
+int smu_v13_0_6_set_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned tyep,
+ enum amdgpu_interrupt_state state)
+{
+   uint32_t val = 0;
+
+   switch (state) {
+   case AMDGPU_IRQ_STATE_DISABLE:
+   /* For MP1 SW irqs */
+   val = RREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL);
+   val = REG_SET_FIELD(val, MP1_SMN_IH_SW_INT_CTRL, INT_MASK, 1);
+   WREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL, val);
+
+   break;
+   case AMDGPU_IRQ_STATE_ENABLE:
+   /* For MP1 SW irqs */
+   val = RREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT);
+   val = REG_SET_FIELD(val, MP1_SMN_IH_SW_INT, ID, 0xFE);
+   val = REG_SET_FIELD(val, MP1_SMN_IH_SW_INT, VALID, 0);
+   WREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT, val);
+
+   val = RREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL);
+   val = REG_SET_FIELD(val, MP1_SMN_IH_SW_INT_CTRL, INT_MASK, 0);
+   WREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL, val);
+
+   break;
+   default:
+   break;
+   }
+
+   return 0;
+}
+
+static const struct amdgpu_irq_src_funcs smu_v13_0_6_irq_funcs =
+{
+   .set = smu_v13_0_6_set_irq_state,
+   .process = smu_v13_0_6_irq_process,
+};
+
+int smu_v13_0_6_register_irq_handler(struct smu_context *smu)
+{
+   struct amdgpu_device *adev = smu->adev;
+   struct amdgpu_irq_src *irq_src = &smu->irq_source;
+   int ret = 0;
+
+   if (amdgpu_sriov_vf(adev))
+   return 0;
+
+   irq_src->num_types = 1;
+   irq_src->funcs = &smu_v13_0_6_irq_funcs;
+
+   ret = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_MP1,
+   IH_INTERRUPT_ID_TO_DRIVER,
+   irq_src);
+   if (ret)
+   return ret;
+
+   return ret;
+}
+
 static int smu_v13_0_6_system_features_control(struct smu_context *smu,
   bool enable)
 {
@@ -2042,11 +2145,9 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs 
= {
.feature_is_enabled = smu_cmn_feature_is_enabled,
.set_power_limit = smu_v13_0_6_set_power_limit,
.set_xgmi_pstate = smu_v13_0_set_xgmi_pstate,
-   /* TODO: Thermal limits unknown, skip these for now
-   .register_irq_handler = smu_v13_0_register_irq_handler,

[PATCH] drm/amdgpu: remove irq_get return value check in gfx_ras_late_init

2023-05-09 Thread Alex Deucher

From: Tao Zhou 

Not all ASICs support GFX CP ECC irq.

Signed-off-by: Tao Zhou 
Reviewed-by: Hawking Zhang 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 5218d4837656..dc0e5d18a0cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -786,17 +786,13 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, 
struct ras_common_if *r
if (r)
return r;
 
-   r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
-   if (r)
-   goto late_fini;
+   /* Not all ASICs support the irq, no need to check return value 
*/
+   amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
} else {
amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
}
 
return 0;
-late_fini:
-   amdgpu_ras_block_late_fini(adev, ras_block);
-   return r;
 }
 
 int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev)
-- 
2.40.1

[PATCH] drm/amdgpu: change the print level to warn for ip block disabled

2023-05-09 Thread Alex Deucher

From: Le Ma 

Avoid to mislead users as it's not a real error.

Signed-off-by: Le Ma 
Reviewed-by: Asad Kamal 
Reviewed-by: Amber Lin 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f95591b90f0b..d1dca02860b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2291,7 +2291,7 @@ static int amdgpu_device_ip_early_init(struct 
amdgpu_device *adev)
total = true;
for (i = 0; i < adev->num_ip_blocks; i++) {
if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
-   DRM_ERROR("disabled ip block: %d <%s>\n",
+   DRM_WARN("disabled ip block: %d <%s>\n",
  i, adev->ip_blocks[i].version->funcs->name);
adev->ip_blocks[i].status.valid = false;
} else {
-- 
2.40.1

[PATCH] drm/amdgpu: fix sdma instance

2023-05-09 Thread Alex Deucher

From: "Stanley.Yang" 

It should change logical instance to device instance
to query ras info

Signed-off-by: Stanley.Yang 
Reviewed-by: Hawking Zhang 
Reviewed-by: Tao Zhou 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index 570ea68c521f..bf47eb33c12e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -2123,6 +2123,7 @@ static void sdma_v4_4_2_inst_query_ras_error_count(struct 
amdgpu_device *adev,
   void *ras_err_status)
 {
struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status;
+   uint32_t sdma_dev_inst = GET_INST(SDMA0, sdma_inst);
 
/* sdma v4_4_2 doesn't support query ce counts */
amdgpu_ras_inst_query_ras_error_count(adev,
@@ -2130,7 +2131,7 @@ static void sdma_v4_4_2_inst_query_ras_error_count(struct 
amdgpu_device *adev,
ARRAY_SIZE(sdma_v4_2_2_ue_reg_list),
sdma_v4_4_2_ras_memory_list,
ARRAY_SIZE(sdma_v4_4_2_ras_memory_list),
-   sdma_inst,
+   sdma_dev_inst,
AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
&err_data->ue_count);
 }
@@ -2153,10 +2154,12 @@ static void sdma_v4_4_2_query_ras_error_count(struct 
amdgpu_device *adev,
 static void sdma_v4_4_2_inst_reset_ras_error_count(struct amdgpu_device *adev,
   uint32_t sdma_inst)
 {
+   uint32_t sdma_dev_inst = GET_INST(SDMA0, sdma_inst);
+
amdgpu_ras_inst_reset_ras_error_count(adev,
sdma_v4_2_2_ue_reg_list,
ARRAY_SIZE(sdma_v4_2_2_ue_reg_list),
-   sdma_inst);
+   sdma_dev_inst);
 }
 
 static void sdma_v4_4_2_reset_ras_error_count(struct amdgpu_device *adev)
-- 
2.40.1

[PATCH] drm/amdgpu: increase AMDGPU_MAX_RINGS

2023-05-09 Thread Alex Deucher

From: Le Ma 

On newer GPUs, the number of kernel rings are increased.

Signed-off-by: Le Ma 
Reviewed-by: Hawking Zhang 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 1ac8a82b9b09..5192e3577e99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -36,7 +36,7 @@ struct amdgpu_job;
 struct amdgpu_vm;
 
 /* max number of rings */
-#define AMDGPU_MAX_RINGS   102
+#define AMDGPU_MAX_RINGS   124
 #define AMDGPU_MAX_HWIP_RINGS  64
 #define AMDGPU_MAX_GFX_RINGS   2
 #define AMDGPU_MAX_SW_GFX_RINGS 2
-- 
2.40.1

[PATCH] drm/amdgpu: Increase Max GPU instance to 64

2023-05-09 Thread Alex Deucher

From: Mukul Joshi 

Increase Max GPU instances to 64 to handle multi-socket
system with GFX 9.4.3 asic.

Signed-off-by: Mukul Joshi 
Acked-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 85812d72ae7c..743fc5f137b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -109,7 +109,7 @@
 #include "amdgpu_mca.h"
 #include "amdgpu_ras.h"
 
-#define MAX_GPU_INSTANCE   16
+#define MAX_GPU_INSTANCE   64
 
 struct amdgpu_gpu_instance
 {
-- 
2.40.1

[PATCH 2/3] drm/amdgpu: Implement new dummy vram manager

2023-05-09 Thread Alex Deucher

From: Rajneesh Bhardwaj 

This adds dummy vram manager to support ASICs that do not have a
dedicated or carvedout vram domain.

Reviewed-by: Felix Kuehling 
Signed-off-by: Rajneesh Bhardwaj 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 67 ++--
 1 file changed, 60 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 43d6a9d6a538..89d35d194f2c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -370,6 +370,45 @@ int amdgpu_vram_mgr_query_page_status(struct 
amdgpu_vram_mgr *mgr,
return ret;
 }
 
+static void amdgpu_dummy_vram_mgr_debug(struct ttm_resource_manager *man,
+ struct drm_printer *printer)
+{
+   DRM_DEBUG_DRIVER("Dummy vram mgr debug\n");
+}
+
+static bool amdgpu_dummy_vram_mgr_compatible(struct ttm_resource_manager *man,
+  struct ttm_resource *res,
+  const struct ttm_place *place,
+  size_t size)
+{
+   DRM_DEBUG_DRIVER("Dummy vram mgr compatible\n");
+   return false;
+}
+
+static bool amdgpu_dummy_vram_mgr_intersects(struct ttm_resource_manager *man,
+  struct ttm_resource *res,
+  const struct ttm_place *place,
+  size_t size)
+{
+   DRM_DEBUG_DRIVER("Dummy vram mgr intersects\n");
+   return true;
+}
+
+static void amdgpu_dummy_vram_mgr_del(struct ttm_resource_manager *man,
+   struct ttm_resource *res)
+{
+   DRM_DEBUG_DRIVER("Dummy vram mgr deleted\n");
+}
+
+static int amdgpu_dummy_vram_mgr_new(struct ttm_resource_manager *man,
+  struct ttm_buffer_object *tbo,
+  const struct ttm_place *place,
+  struct ttm_resource **res)
+{
+   DRM_DEBUG_DRIVER("Dummy vram mgr new\n");
+   return -ENOSPC;
+}
+
 /**
  * amdgpu_vram_mgr_new - allocate new ranges
  *
@@ -817,6 +856,14 @@ static void amdgpu_vram_mgr_debug(struct 
ttm_resource_manager *man,
mutex_unlock(&mgr->lock);
 }
 
+static const struct ttm_resource_manager_func amdgpu_dummy_vram_mgr_func = {
+   .alloc  = amdgpu_dummy_vram_mgr_new,
+   .free   = amdgpu_dummy_vram_mgr_del,
+   .intersects = amdgpu_dummy_vram_mgr_intersects,
+   .compatible = amdgpu_dummy_vram_mgr_compatible,
+   .debug  = amdgpu_dummy_vram_mgr_debug
+};
+
 static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
.alloc  = amdgpu_vram_mgr_new,
.free   = amdgpu_vram_mgr_del,
@@ -841,17 +888,22 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
ttm_resource_manager_init(man, &adev->mman.bdev,
  adev->gmc.real_vram_size);
 
-   man->func = &amdgpu_vram_mgr_func;
-
-   err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE);
-   if (err)
-   return err;
-
mutex_init(&mgr->lock);
INIT_LIST_HEAD(&mgr->reservations_pending);
INIT_LIST_HEAD(&mgr->reserved_pages);
mgr->default_page_size = PAGE_SIZE;
 
+   if (!adev->gmc.is_app_apu) {
+   man->func = &amdgpu_vram_mgr_func;
+
+   err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE);
+   if (err)
+   return err;
+   } else {
+   man->func = &amdgpu_dummy_vram_mgr_func;
+   DRM_INFO("Setup dummy vram mgr\n");
+   }
+
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager);
ttm_resource_manager_set_used(man, true);
return 0;
@@ -886,7 +938,8 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
drm_buddy_free_list(&mgr->mm, &rsv->allocated);
kfree(rsv);
}
-   drm_buddy_fini(&mgr->mm);
+   if (!adev->gmc.is_app_apu)
+   drm_buddy_fini(&mgr->mm);
mutex_unlock(&mgr->lock);
 
ttm_resource_manager_cleanup(man);
-- 
2.40.1

[PATCH 1/3] drm/amdgpu: Handle VRAM dependencies on GFXIP9.4.3

2023-05-09 Thread Alex Deucher

From: Rajneesh Bhardwaj 

[For 1P NPS1 mode driver bringup]

Changes required to initialize the amdgpu driver with frontdoor firmware
loading and discovery=2 with the native mode SBIOS that enables CPU GPU
unified interleaved memory.

sudo modprobe amdgpu discovery=2

Once PSP TMR region is reported via the ACPI interface, the dependency
on the ip_discovery.bin will be removed.

Choice of where to allocate driver table is given to each IP version. In
general, both GTT and VRAM domains will be considered. If one of the
tables has a strict restriction for VRAM domain, then only VRAM domain
is considered.

Reviewed-by: Felix Kuehling 
(lijo: Modified the handling for SMU Tables)
Signed-off-by: Lijo Lazar 
Signed-off-by: Rajneesh Bhardwaj 
Signed-off-by: Alex Deucher 
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c   |  6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 89 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c |  7 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c   |  3 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  9 +-
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  |  6 ++
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c |  5 ++
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 10 ++-
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c  |  6 +-
 11 files changed, 99 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 9abf940dbac1..bb7e9ab27a4c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2291,8 +2291,9 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct 
amdgpu_device *adev,
(*mem)->dmabuf = dma_buf;
(*mem)->bo = bo;
(*mem)->va = va;
-   (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
+   (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) && 
!adev->gmc.is_app_apu ?
AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
+
(*mem)->mapped_to_gpu_memory = 0;
(*mem)->process_info = avm->process_info;
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 7c9b788ae0a9..a5c4f98f8cd1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1044,7 +1044,7 @@ static const char *amdgpu_vram_names[] = {
 int amdgpu_bo_init(struct amdgpu_device *adev)
 {
/* On A+A platform, VRAM can be mapped as WB */
-   if (!adev->gmc.xgmi.connected_to_cpu) {
+   if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
/* reserve PAT memory space to WC for VRAM */
int r = arch_io_reserve_memtype_wc(adev->gmc.aper_base,
adev->gmc.aper_size);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 863fa331e6ff..4395c53d09d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -476,7 +476,8 @@ static int psp_sw_init(void *handle)
return ret;
 
ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
  &psp->fence_buf_bo,
  &psp->fence_buf_mc_addr,
  &psp->fence_buf);
@@ -484,7 +485,8 @@ static int psp_sw_init(void *handle)
goto failed1;
 
ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
  &psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
  (void **)&psp->cmd_buf_mem);
if (ret)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 4fb93f367b99..2bdd6bcad506 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1712,15 +1712,20 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device 
*adev)
ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
}
 
-   ret = amdgpu_bo_create_kernel_at(adev,
-adev->gmc.real_vram_size - 
adev->mman.discovery_tmr_size,
-adev->mman.discovery_tmr_size,
-&adev->mman.discovery_memory,
-

[PATCH 3/3] drm/amdgpu: Create VRAM BOs on GTT for GFXIP9.4.3

2023-05-09 Thread Alex Deucher

From: Rajneesh Bhardwaj 

On GFXIP9.4.3 APP APU where there is no dedicated VRAM domain handle
VRAM BO allocation requests on CPU domain and validate them on GTT.

Support for handling multi-socket and multi-numa partitions within a
socket will be added by future patches, this enables 1P NPS1 asic
bringup configuration.

Reviewed-by: Felix Kuehling 
Signed-off-by: Rajneesh Bhardwaj 
Signed-off-by: Alex Deucher 
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c   | 18 --
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index bb7e9ab27a4c..0ebd39a41e74 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1648,9 +1648,16 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 */
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
-   alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
-   alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
+
+   if (adev->gmc.is_app_apu) {
+   domain = AMDGPU_GEM_DOMAIN_GTT;
+   alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
+   alloc_flags = 0;
+   } else {
+   alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+   alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) 
?
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0;
+   }
} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_flags = 0;
@@ -1737,6 +1744,13 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
(*mem)->domain = domain;
(*mem)->mapped_to_gpu_memory = 0;
(*mem)->process_info = avm->process_info;
+
+   if (adev->gmc.is_app_apu &&
+   ((*mem)->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) {
+   bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
+   bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
+   }
+
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
 
if (user_addr) {
-- 
2.40.1

Re: [PATCH] drm/sched: Check scheduler work queue before calling timeout handling

2023-05-09 Thread Alex Deucher

+ dri-devel

On Tue, May 9, 2023 at 5:43 PM  wrote:
>
> From: Vitaly Prosyak 
>
> During an IGT GPU reset test we see again oops despite of
> commit 0c8c901aaaebc9bf8bf189ffc116e678f7a2dc16
> drm/sched: Check scheduler ready before calling timeout handling.
>
> It uses ready condition whether to call drm_sched_fault which unwind
> the TDR leads to GPU reset.
> However it looks the ready condition is overloaded with other meanings,
> for example, for the following stack is related GPU reset :
>
> 0  gfx_v9_0_cp_gfx_start
> 1  gfx_v9_0_cp_gfx_resume
> 2  gfx_v9_0_cp_resume
> 3  gfx_v9_0_hw_init
> 4  gfx_v9_0_resume
> 5  amdgpu_device_ip_resume_phase2
>
> does the following:
> /* start the ring */
> gfx_v9_0_cp_gfx_start(adev);
> ring->sched.ready = true;
>
> The same approach is for other ASICs as well :
> gfx_v8_0_cp_gfx_resume
> gfx_v10_0_kiq_resume, etc...
>
> As a result, our GPU reset test causes GPU fault which calls unconditionally 
> gfx_v9_0_fault
> and then drm_sched_fault. However now it depends on whether the interrupt 
> service routine
> drm_sched_fault is executed after gfx_v9_0_cp_gfx_start is completed which 
> sets the ready
> field of the scheduler to true even  for not initialized schedulers and 
> causes oops vs
> no fault or when ISR  drm_sched_fault is completed prior  
> gfx_v9_0_cp_gfx_start and
> NULL pointer dereference does not occur.
>
> Use the field timeout_wq  to prevent oops for uninitialized schedulers.
> The field could be initialized by the work queue of resetting the domain.
>
> Signed-off-by: Vitaly Prosyak 
> ---
>  drivers/gpu/drm/scheduler/sched_main.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
> b/drivers/gpu/drm/scheduler/sched_main.c
> index 649fac2e1ccb..670b7997f389 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -308,7 +308,7 @@ static void drm_sched_start_timeout(struct 
> drm_gpu_scheduler *sched)
>   */
>  void drm_sched_fault(struct drm_gpu_scheduler *sched)
>  {
> -   if (sched->ready)
> +   if (sched->timeout_wq)
> mod_delayed_work(sched->timeout_wq, &sched->work_tdr, 0);
>  }
>  EXPORT_SYMBOL(drm_sched_fault);
> --
> 2.25.1
>

[PATCH] drm/sched: Check scheduler work queue before calling timeout handling

2023-05-09 Thread vitaly.prosyak

From: Vitaly Prosyak 

During an IGT GPU reset test we see again oops despite of
commit 0c8c901aaaebc9bf8bf189ffc116e678f7a2dc16
drm/sched: Check scheduler ready before calling timeout handling.

It uses ready condition whether to call drm_sched_fault which unwind
the TDR leads to GPU reset.
However it looks the ready condition is overloaded with other meanings,
for example, for the following stack is related GPU reset :

0  gfx_v9_0_cp_gfx_start
1  gfx_v9_0_cp_gfx_resume
2  gfx_v9_0_cp_resume
3  gfx_v9_0_hw_init
4  gfx_v9_0_resume
5  amdgpu_device_ip_resume_phase2

does the following:
/* start the ring */
gfx_v9_0_cp_gfx_start(adev);
ring->sched.ready = true;

The same approach is for other ASICs as well :
gfx_v8_0_cp_gfx_resume
gfx_v10_0_kiq_resume, etc...

As a result, our GPU reset test causes GPU fault which calls unconditionally 
gfx_v9_0_fault
and then drm_sched_fault. However now it depends on whether the interrupt 
service routine
drm_sched_fault is executed after gfx_v9_0_cp_gfx_start is completed which sets 
the ready
field of the scheduler to true even  for not initialized schedulers and causes 
oops vs
no fault or when ISR  drm_sched_fault is completed prior  gfx_v9_0_cp_gfx_start 
and
NULL pointer dereference does not occur.

Use the field timeout_wq  to prevent oops for uninitialized schedulers.
The field could be initialized by the work queue of resetting the domain.

Signed-off-by: Vitaly Prosyak 
---
 drivers/gpu/drm/scheduler/sched_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index 649fac2e1ccb..670b7997f389 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -308,7 +308,7 @@ static void drm_sched_start_timeout(struct 
drm_gpu_scheduler *sched)
  */
 void drm_sched_fault(struct drm_gpu_scheduler *sched)
 {
-   if (sched->ready)
+   if (sched->timeout_wq)
mod_delayed_work(sched->timeout_wq, &sched->work_tdr, 0);
 }
 EXPORT_SYMBOL(drm_sched_fault);
-- 
2.25.1

Re: [RFC PATCH 12/40] drm/amd/display: add plane HDR multiplier driver-private property

2023-05-09 Thread Joshua Ashton

FWIW, we technically do use it right now, but it is always set to 1 in S.31.32.

Before we used shaper + 3D LUT we did use it for scaling SDR content,
but given we always have a shaper + 3D LUT it made sense for us to
roll that into there.

On Tue, 9 May 2023 at 20:00, Harry Wentland  wrote:
>
> On 5/9/23 12:54, Joshua Ashton wrote:
> > We currently do not have a use for this as we settled on per-plane 3D
> > LUT + Shaper, but we might end up wanting to use in our scRGB stack
> > someday so I would like to keep it.
> >
>
> uAPI should always have a userspace that uses it. But if we go
> and put it behind an #ifdef anyways I don't mind taking this
> if we foresee use for it in the near future. A gamescope experiment
> showing how this can be used to scale sRGB planes would be great.
> I assume that's sort of how you intend to use it.
>
> Harry
>
> > On Tue, 9 May 2023 at 16:37, Melissa Wen  wrote:
> >>
> >> On 05/08, Harry Wentland wrote:
> >>>
> >>>
> >>> On 4/23/23 10:10, Melissa Wen wrote:
>  From: Joshua Ashton 
> 
>  Multiplier to 'gain' the plane. When PQ is decoded using the fixed func
>  transfer function to the internal FP16 fb, 1.0 -> 80 nits (on AMD at
>  least) When sRGB is decoded, 1.0 -> 1.0.  Therefore, 1.0 multiplier = 80
>  nits for SDR content. So if you want, 203 nits for SDR content, pass in
>  (203.0 / 80.0).
> 
> >>>
> >>> Is gamescope intending to use this?
> >>
> >> I don't think so. Again, I'll double check and drop it accordingly.
> >>
> >> Melissa
> >>
> >>>
> >>> Harry
> >>>
>  Co-developed-by: Melissa Wen 
>  Signed-off-by: Melissa Wen 
>  Signed-off-by: Joshua Ashton 
>  ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_display.c   |  6 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h  |  4 +++
>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 12 +
>   .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   | 25 ++-
>   4 files changed, 41 insertions(+), 6 deletions(-)
> 
>  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c 
>  b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
>  index 24595906dab1..dd658f162f6f 100644
>  --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
>  +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
>  @@ -1326,6 +1326,12 @@ amdgpu_display_create_color_properties(struct 
>  amdgpu_device *adev)
>  return -ENOMEM;
>  adev->mode_info.plane_degamma_tf_property = prop;
> 
>  +   prop = drm_property_create_range(adev_to_drm(adev),
>  +0, "AMD_PLANE_HDR_MULT", 0, 
>  UINT_MAX);
>  +   if (!prop)
>  +   return -ENOMEM;
>  +   adev->mode_info.plane_hdr_mult_property = prop;
>  +
>  return 0;
>   }
>   #endif
>  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h 
>  b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
>  index ab9ce6f26c90..65a9d62ffbe4 100644
>  --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
>  +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
>  @@ -387,6 +387,10 @@ struct amdgpu_mode_info {
>   * linearize content with or without LUT.
>   */
>  struct drm_property *plane_degamma_tf_property;
>  +   /**
>  +* @plane_hdr_mult_property:
>  +*/
>  +   struct drm_property *plane_hdr_mult_property;
>   #endif
>   };
> 
>  diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
>  b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
>  index 005632c1c9ec..bb7307b9cfd5 100644
>  --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
>  +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
>  @@ -51,6 +51,7 @@
> 
>   #define AMDGPU_DMUB_NOTIFICATION_MAX 5
> 
>  +#define AMDGPU_HDR_MULT_DEFAULT (0x1LL)
>   /*
>   #include "include/amdgpu_dal_power_if.h"
>   #include "amdgpu_dm_irq.h"
>  @@ -736,6 +737,17 @@ struct dm_plane_state {
>   * linearize.
>   */
>  enum drm_transfer_function degamma_tf;
>  +   /**
>  +* @hdr_mult:
>  +*
>  +* Multiplier to 'gain' the plane.  When PQ is decoded using the 
>  fixed
>  +* func transfer function to the internal FP16 fb, 1.0 -> 80 nits (on
>  +* AMD at least). When sRGB is decoded, 1.0 -> 1.0, obviously.
>  +* Therefore, 1.0 multiplier = 80 nits for SDR content.  So if you
>  +* want, 203 nits for SDR content, pass in (203.0 / 80.0).  Format is
>  +* S31.32 sign-magnitude.
>  +*/
>  +   __u64 hdr_mult;
>   #endif
>   };
> 
>  diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c 
>  b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
>  index 5b458cc0781c..57169dae8b3d 100644
>  --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
>  +++ b/drivers/gpu/drm/amd/display/amdgpu_d

Re: [RFC PATCH 12/40] drm/amd/display: add plane HDR multiplier driver-private property

2023-05-09 Thread Harry Wentland

On 5/9/23 12:54, Joshua Ashton wrote:
> We currently do not have a use for this as we settled on per-plane 3D
> LUT + Shaper, but we might end up wanting to use in our scRGB stack
> someday so I would like to keep it.
> 

uAPI should always have a userspace that uses it. But if we go
and put it behind an #ifdef anyways I don't mind taking this
if we foresee use for it in the near future. A gamescope experiment
showing how this can be used to scale sRGB planes would be great.
I assume that's sort of how you intend to use it.

Harry

> On Tue, 9 May 2023 at 16:37, Melissa Wen  wrote:
>>
>> On 05/08, Harry Wentland wrote:
>>>
>>>
>>> On 4/23/23 10:10, Melissa Wen wrote:
 From: Joshua Ashton 

 Multiplier to 'gain' the plane. When PQ is decoded using the fixed func
 transfer function to the internal FP16 fb, 1.0 -> 80 nits (on AMD at
 least) When sRGB is decoded, 1.0 -> 1.0.  Therefore, 1.0 multiplier = 80
 nits for SDR content. So if you want, 203 nits for SDR content, pass in
 (203.0 / 80.0).

>>>
>>> Is gamescope intending to use this?
>>
>> I don't think so. Again, I'll double check and drop it accordingly.
>>
>> Melissa
>>
>>>
>>> Harry
>>>
 Co-developed-by: Melissa Wen 
 Signed-off-by: Melissa Wen 
 Signed-off-by: Joshua Ashton 
 ---
  drivers/gpu/drm/amd/amdgpu/amdgpu_display.c   |  6 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h  |  4 +++
  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 12 +
  .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   | 25 ++-
  4 files changed, 41 insertions(+), 6 deletions(-)

 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c 
 b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
 index 24595906dab1..dd658f162f6f 100644
 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
 @@ -1326,6 +1326,12 @@ amdgpu_display_create_color_properties(struct 
 amdgpu_device *adev)
 return -ENOMEM;
 adev->mode_info.plane_degamma_tf_property = prop;

 +   prop = drm_property_create_range(adev_to_drm(adev),
 +0, "AMD_PLANE_HDR_MULT", 0, UINT_MAX);
 +   if (!prop)
 +   return -ENOMEM;
 +   adev->mode_info.plane_hdr_mult_property = prop;
 +
 return 0;
  }
  #endif
 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h 
 b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
 index ab9ce6f26c90..65a9d62ffbe4 100644
 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
 @@ -387,6 +387,10 @@ struct amdgpu_mode_info {
  * linearize content with or without LUT.
  */
 struct drm_property *plane_degamma_tf_property;
 +   /**
 +* @plane_hdr_mult_property:
 +*/
 +   struct drm_property *plane_hdr_mult_property;
  #endif
  };

 diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
 b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
 index 005632c1c9ec..bb7307b9cfd5 100644
 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
 +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
 @@ -51,6 +51,7 @@

  #define AMDGPU_DMUB_NOTIFICATION_MAX 5

 +#define AMDGPU_HDR_MULT_DEFAULT (0x1LL)
  /*
  #include "include/amdgpu_dal_power_if.h"
  #include "amdgpu_dm_irq.h"
 @@ -736,6 +737,17 @@ struct dm_plane_state {
  * linearize.
  */
 enum drm_transfer_function degamma_tf;
 +   /**
 +* @hdr_mult:
 +*
 +* Multiplier to 'gain' the plane.  When PQ is decoded using the fixed
 +* func transfer function to the internal FP16 fb, 1.0 -> 80 nits (on
 +* AMD at least). When sRGB is decoded, 1.0 -> 1.0, obviously.
 +* Therefore, 1.0 multiplier = 80 nits for SDR content.  So if you
 +* want, 203 nits for SDR content, pass in (203.0 / 80.0).  Format is
 +* S31.32 sign-magnitude.
 +*/
 +   __u64 hdr_mult;
  #endif
  };

 diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c 
 b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
 index 5b458cc0781c..57169dae8b3d 100644
 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
 +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
 @@ -1321,8 +1321,10 @@ static void dm_drm_plane_reset(struct drm_plane 
 *plane)
 __drm_atomic_helper_plane_reset(plane, &amdgpu_state->base);

  #ifdef CONFIG_STEAM_DECK
 -   if (amdgpu_state)
 +   if (amdgpu_state) {
 amdgpu_state->degamma_tf = DRM_TRANSFER_FUNCTION_DEFAULT;
 +   amdgpu_state->hdr_mult = AMDGPU_HDR_MULT_DEFAULT;
 +   }
  #endif
  }

 @@ -1424,11 +1426,11 @@ static void dm_drm_plane_destroy_state(struct 
 drm_p

[PATCH 2/2] amdgpu: validate drm_amdgpu_gem_va against overflows

2023-05-09 Thread Chia-I Wu

The existing validations are incorrect and insufficient.  This is
motivated by OOB access in amdgpu_vm_update_range when
offset_in_bo+map_size overflows.

Fixes: 9f7eb5367d00 ("drm/amdgpu: actually use the VM map parameters")
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 7 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 6 ++
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 071f6565cf971..36d5adfdf0f69 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -688,8 +688,11 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
}
 
-   if (args->map_size == 0) {
-   dev_dbg(dev->dev, "invalid map_size 0x%LX\n", args->map_size);
+   if (args->map_size == 0 ||
+   args->va_address + args->map_size < args->va_address ||
+   args->offset_in_bo + args->map_size < args->offset_in_bo) {
+   dev_dbg(dev->dev, "invalid map_size 0x%LX (va_address 0x%LX, 
offset_in_bo 0x%LX)\n",
+   args->map_size, args->va_address, args->offset_in_bo);
return -EINVAL;
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index fa5819d581655..cd0a0f06e11ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1437,8 +1437,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 
/* make sure object fit at this offset */
eaddr = saddr + size - 1;
-   if (saddr >= eaddr ||
-   (bo && offset + size > amdgpu_bo_size(bo)) ||
+   if ((bo && offset + size > amdgpu_bo_size(bo)) ||
(eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
return -EINVAL;
 
@@ -1498,8 +1497,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
 
/* make sure object fit at this offset */
eaddr = saddr + size - 1;
-   if (saddr >= eaddr ||
-   (bo && offset + size > amdgpu_bo_size(bo)) ||
+   if ((bo && offset + size > amdgpu_bo_size(bo)) ||
(eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
return -EINVAL;
 
-- 
2.40.1.521.gf1e218fcd8-goog

[PATCH 1/2] amdgpu: validate drm_amdgpu_gem_va addrs for all ops

2023-05-09 Thread Chia-I Wu

Extend the address and size validations to AMDGPU_VA_OP_UNMAP and
AMDGPU_VA_OP_CLEAR by moving the validations to amdgpu_gem_va_ioctl.

Internal users of amdgpu_vm_bo_map are no longer validated but they
should be fine.

Userspace (radeonsi and radv) seems fine as well.
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 12 
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 10 --
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index d8e683688daab..071f6565cf971 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -681,6 +681,18 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
uint64_t vm_size;
int r = 0;
 
+   if (args->va_address & ~PAGE_MASK || args->offset_in_bo & ~PAGE_MASK ||
+   args->map_size & ~PAGE_MASK) {
+   dev_dbg(dev->dev, "unaligned va_address 0x%LX, offset_in_bo 
0x%LX, or map_size 0x%LX\n",
+   args->va_address, args->offset_in_bo, args->map_size);
+   return -EINVAL;
+   }
+
+   if (args->map_size == 0) {
+   dev_dbg(dev->dev, "invalid map_size 0x%LX\n", args->map_size);
+   return -EINVAL;
+   }
+
if (args->va_address < AMDGPU_VA_RESERVED_SIZE) {
dev_dbg(dev->dev,
"va_address 0x%LX is in reserved area 0x%LX\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b9441ab457ea7..fa5819d581655 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1435,11 +1435,6 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
struct amdgpu_vm *vm = bo_va->base.vm;
uint64_t eaddr;
 
-   /* validate the parameters */
-   if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK ||
-   size == 0 || size & ~PAGE_MASK)
-   return -EINVAL;
-
/* make sure object fit at this offset */
eaddr = saddr + size - 1;
if (saddr >= eaddr ||
@@ -1501,11 +1496,6 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
uint64_t eaddr;
int r;
 
-   /* validate the parameters */
-   if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK ||
-   size == 0 || size & ~PAGE_MASK)
-   return -EINVAL;
-
/* make sure object fit at this offset */
eaddr = saddr + size - 1;
if (saddr >= eaddr ||
-- 
2.40.1.521.gf1e218fcd8-goog

Re: [RFC PATCH 00/40] drm/amd/display: add AMD driver-specific properties for color mgmt

2023-05-09 Thread Harry Wentland




On 5/9/23 12:52, Melissa Wen wrote:
> On 05/08, Harry Wentland wrote:
>>
>>
>> On 4/23/23 10:10, Melissa Wen wrote:
>>> Hi all,
>>>
>>> Joshua Ashton and I (with the great collaboration of Harry Wentland -
>>> thanks) have been working on KMS color pipeline enhancement for Steam
>>> Deck/SteamOS by exposing the large set of color caps available in AMD
>>> display HW.
>>>
>>
>> Thank you for your work on this.
>>
>>> This patchset results from this full-stack work, including pre-blending
>>> and post-blending new color properties. The first two patches fix
>>> quantization issues on shaper LUT programming. Just after, we have one
>>> patch that adds a config option to restrict AMD colo feature usage. The
>>> following 13 patches implement AMD driver-private color properties
>>> (pending detachment of property counter and plane color_mgmt_changed
>>> from DRM). Finally, the last 24 patches rework the AMD display manager
>>> and color management to support the properties exposed.
>>>
>>> In short, for pre-blending, we added the following:
>>> - plane degamma LUT and predefined transfer function;
>>> - plane HDR multiplier
>>> - plane shaper LUT/transfer function;
>>> - plane 3D LUT; and finally,
>>> - plane blend LUT/transfer function, just before blending.
>>>
>>> After blending, we already have DRM CRTC degamma/gamma LUTs and CTM,
>>> therefore, we extend CRTC color pipeline with the following:
>>> - CRTC shaper LUT/transfer function;
>>> - CRTC 3D LUT; and
>>> - CRTC gamma transfer function.
>>>
>>> You can already find the AMD color capabilities and color management
>>> pipeline documented here:
>>> https://dri.freedesktop.org/docs/drm/gpu/amdgpu/display/display-manager.html#color-management-properties
>>>
>>> In previous iterations, we tried to provide a generic solution for
>>> post-blending shaper and 3D LUT [1][2][3], and also Alex Hung worked on
>>> a pre-blending 3D LUT solution[4] extending plane color mgmt proposal
>>> from Uma Shankar [5]. However, we identified during our work [6] that
>>> AMD provides many other valuable capabilities that we don't find in
>>> other vendors, so we started to work on AMD driver-private color
>>> properties that better describe its color pipeline, enabling us to
>>> expose full AMD color capabilities on Deck HW.
>>>
>>> Our primary purpose is to avoid usage limitations of color calibration
>>> features provided by HW just because we don't have an interface for
>>> that. At the same time, a generic solution doesn't fit well since some
>>> of these capabilities seem AMD HW specific, such as hardcoded
>>> curve/predefined transfer function and shaper 1D LUTs sandwiching 3D
>>> LUT.
>>>
>>> So far, we keep these properties' usage under an AMD display config
>>> option (STEAM_DECK). However, we are fine with having them fully
>>> available to other DCN HW generations. In the current proposal, we are
>>> already checking ASICs before exposing a color feature. We can work on
>>> 3D LUT resource acquisition details to fit them to DCN 3+ families that
>>> support them. Indeed, before moving to these config boundaries, we
>>> started working on an open solution for any AMD HW [7].
>>>
>>
>> The problem with a CONFIG_XYZ option is that it becomes uAPI and can't be
>> removed. I feel we have a good proposal going for the generic solution.
>> Would it work for you if we don't make this a CONFIG_ option? What I mean
>> is using
>>
>> #define AMD_PRIVATE_COLOR
>>
>> around the interface bits, which are only compiled when building with
>> -DAMD_PRIVATE_COLOR
> 
> I think we can go with this approach for the properties already in use
> by Gamescope/SteamOS.
>>
>> That way we have the option to rip the driver-private stuff out later
>> while still allowing for experimentation now.
>>
>> Or, alternatively, we can merge everything but the stuff currently
>> guarded by CONFIG_STEAM_DECK, so that custom kernels can enable this
>> functionality by simply merging one patch that includes all the
>> CONFIG_STEAM_DECK stuff.
> 
> An then we can drop the interface of things that Gamescope is not
> managing, but keep those things already programmed on DM color for any
> future usage. What do you think?
> 

Sure.

Harry

> Melissa
> 
>>
>> This will allow us to merge the vast majority of the code without
>> having to maintain it in downstream repo.
>>
>>> The userspace case here is Gamescope which is the compositor for
>>> SteamOS. It's already using all of this functionality (although with a
>>> VALVE1_ prefix instead of AMD) to implement its color management
>>> pipeline right now:
>>> https://github.com/ValveSoftware/gamescope
>>>
>>> We are planning on shipping our color management support with gamut
>>> mapping, HDR, SDR on HDR, HDR on SDR, and much more in Steam OS 3.5. A
>>> brief overview of our color pipeline can be found here:
>>> https://github.com/ValveSoftware/gamescope/blob/master/src/docs/Steam%20Deck%20Display%20Pipeline.png
>>>
>>> We have also had some other userspace inte

Re: [RFC PATCH 03/40] drm/amd/display: introduce Steam Deck color features to AMD display driver

2023-05-09 Thread Harry Wentland




On 5/9/23 12:23, Melissa Wen wrote:
> On 05/08, Harry Wentland wrote:
>> On 4/23/23 10:10, Melissa Wen wrote:
>>> We are enabling a large set of color calibration features to enhance KMS
>>> color mgmt but these properties are specific of AMD display HW, and
>>> cannot be provided by other vendors. Therefore, set a config option to
>>> enable AMD driver-private properties used on Steam Deck color mgmt
>>> pipeline.
>>>
>>> Co-developed-by: Joshua Ashton 
>>> Signed-off-by: Joshua Ashton 
>>> Signed-off-by: Melissa Wen 
>>> ---
>>>  drivers/gpu/drm/amd/display/Kconfig | 6 ++
>>>  1 file changed, 6 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/amd/display/Kconfig 
>>> b/drivers/gpu/drm/amd/display/Kconfig
>>> index 06b438217c61..c45a8deb1098 100644
>>> --- a/drivers/gpu/drm/amd/display/Kconfig
>>> +++ b/drivers/gpu/drm/amd/display/Kconfig
>>> @@ -53,5 +53,11 @@ config DRM_AMD_SECURE_DISPLAY
>>>  of crc of specific region via debugfs.
>>>  Cooperate with specific DMCU FW.
>>>  
>>> +config STEAM_DECK
>>> +   bool "Enable color calibration features for Steam Deck"
>>> +   depends on DRM_AMD_DC
>>> +   help
>>> + Choose this option if you want to use AMDGPU features for broader
>>> + color management support on Steam Deck.
>>>  
>>
>> If we can drop this (i.e. don't offer a CONFIG_ option to allow enablement of
>> the uAPI, but build with -DCONFIG_STEAM_DECK) it would go a long way to keep
>> us from requiring to support this forever.
> 
> I see, I'll follow this path. Still on that, I've changed
> CONFIG_STEAM_DECK (too generic) to CONFIG_DRM_AMD_COLOR_STEAMDECK.

I'm not sure I like the steamdeck name in there. There's nothing
inherently in this API that's only for the steamdeck.

Harry

> Does it sound better?
> 
> Thanks,
> 
> Melissa
> 
>>
>> Harry
>>
>>>  endmenu
>>
>>

Re: [RFC PATCH 03/40] drm/amd/display: introduce Steam Deck color features to AMD display driver

2023-05-09 Thread Harry Wentland




On 5/9/23 12:52, Joshua Ashton wrote:
> I think the idea is that we wouldn't have a config option so it
> doesn't inherently become linux kernel uAPI?
> 
> Then we can just build our SteamOS kernels with that definiton set.
> 

That's the idea. Would that work for you?

Harry

> On Tue, 9 May 2023 at 16:26, Melissa Wen  wrote:
>>
>> On 05/08, Harry Wentland wrote:
>>> On 4/23/23 10:10, Melissa Wen wrote:
 We are enabling a large set of color calibration features to enhance KMS
 color mgmt but these properties are specific of AMD display HW, and
 cannot be provided by other vendors. Therefore, set a config option to
 enable AMD driver-private properties used on Steam Deck color mgmt
 pipeline.

 Co-developed-by: Joshua Ashton 
 Signed-off-by: Joshua Ashton 
 Signed-off-by: Melissa Wen 
 ---
  drivers/gpu/drm/amd/display/Kconfig | 6 ++
  1 file changed, 6 insertions(+)

 diff --git a/drivers/gpu/drm/amd/display/Kconfig 
 b/drivers/gpu/drm/amd/display/Kconfig
 index 06b438217c61..c45a8deb1098 100644
 --- a/drivers/gpu/drm/amd/display/Kconfig
 +++ b/drivers/gpu/drm/amd/display/Kconfig
 @@ -53,5 +53,11 @@ config DRM_AMD_SECURE_DISPLAY
  of crc of specific region via debugfs.
  Cooperate with specific DMCU FW.

 +config STEAM_DECK
 +   bool "Enable color calibration features for Steam Deck"
 +   depends on DRM_AMD_DC
 +   help
 + Choose this option if you want to use AMDGPU features for broader
 + color management support on Steam Deck.

>>>
>>> If we can drop this (i.e. don't offer a CONFIG_ option to allow enablement 
>>> of
>>> the uAPI, but build with -DCONFIG_STEAM_DECK) it would go a long way to keep
>>> us from requiring to support this forever.
>>
>> I see, I'll follow this path. Still on that, I've changed
>> CONFIG_STEAM_DECK (too generic) to CONFIG_DRM_AMD_COLOR_STEAMDECK.
>> Does it sound better?
>>
>> Thanks,
>>
>> Melissa
>>
>>>
>>> Harry
>>>
  endmenu
>>>
>>>

Re: [PATCH 2/2] drm/amdgpu/gfx11: Adjust gfxoff before powergating on gfx11 as well

2023-05-09 Thread Guilherme G. Piccoli

On 09/05/2023 13:49, Bas Nieuwenhuizen wrote:
> From: "Guilherme G. Piccoli" 
> 
> (Bas: speculative change to mirror gfx10/gfx9)
> 
> Signed-off-by: Guilherme G. Piccoli 
> Cc: Alex Deucher 
> ---

Thanks a lot for both patches Bas! This second one, despite I've
attached it on gitlab, you should also Sign-off since you're the one
sending it to the ML (I guess heh)

BTW, I couldn't test this one since I don't have GFX11 HW, so appreciate
if anyone from AMD (or any community member) could give it a try...

Cheers,

Guilherme

Re: [RFC PATCH 07/40] drm/amd/display: add CRTC gamma TF to driver-private props

2023-05-09 Thread Joshua Ashton

I am okay with us dropping the shaper + 3D LUT from crtc. It has
problems anyway wrt. atomicity.

On Tue, 9 May 2023 at 16:34, Melissa Wen  wrote:
>
> On 05/08, Harry Wentland wrote:
> >
> >
> > On 4/23/23 10:10, Melissa Wen wrote:
> > > From: Joshua Ashton 
> > >
> > > Add predefined transfer function property to DRM CRTC gamma to convert
> > > to wire encoding with or without gamma LUT.
> > >
> >
> > Are all these new CRTC properties used by gamescope? I would be reluctant
> > to merge them if they're currently not needed.
>
> The regamma TF yes. The shaper and 3D LUT not yet.
>
> I'll double check with Joshie and drop from the series what we don't
> have a short-term perspective of usage.
>
> >
> > > Co-developed-by: Melissa Wen 
> > > Signed-off-by: Melissa Wen 
> > > Signed-off-by: Joshua Ashton 
> > > ---
> > >  drivers/gpu/drm/amd/amdgpu/amdgpu_display.c   | 22 ++
> > >  drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h  |  4 
> > >  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 23 +++
> > >  .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c| 13 +++
> > >  4 files changed, 62 insertions(+)
> > >
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c 
> > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> > > index 2abe5fe87c10..1913903cab88 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> > > @@ -1248,6 +1248,19 @@ amdgpu_display_user_framebuffer_create(struct 
> > > drm_device *dev,
> > >  }
> > >
> > >  #ifdef CONFIG_STEAM_DECK
> > > +static const struct drm_prop_enum_list drm_transfer_function_enum_list[] 
> > > = {
> > > +   { DRM_TRANSFER_FUNCTION_DEFAULT, "Default" },
> > > +   { DRM_TRANSFER_FUNCTION_SRGB, "sRGB" },
> > > +   { DRM_TRANSFER_FUNCTION_BT709, "BT.709" },
> > > +   { DRM_TRANSFER_FUNCTION_PQ, "PQ (Perceptual Quantizer)" },
> > > +   { DRM_TRANSFER_FUNCTION_LINEAR, "Linear" },
> > > +   { DRM_TRANSFER_FUNCTION_UNITY, "Unity" },
> > > +   { DRM_TRANSFER_FUNCTION_HLG, "HLG (Hybrid Log Gamma)" },
> > > +   { DRM_TRANSFER_FUNCTION_GAMMA22, "Gamma 2.2" },
> > > +   { DRM_TRANSFER_FUNCTION_GAMMA24, "Gamma 2.4" },
> > > +   { DRM_TRANSFER_FUNCTION_GAMMA26, "Gamma 2.6" },
> > > +};
> > > +
> >
> > Would it be better to prefix things with AMD_/amd_ to avoid confusion? On 
> > the other
> > hand, these will likely just move into DRM core once we get the generic 
> > color uAPI.
> >
> > Harry
> >
> > >  static int
> > >  amdgpu_display_create_color_properties(struct amdgpu_device *adev)
> > >  {
> > > @@ -1281,6 +1294,15 @@ amdgpu_display_create_color_properties(struct 
> > > amdgpu_device *adev)
> > > return -ENOMEM;
> > > adev->mode_info.lut3d_size_property = prop;
> > >
> > > +   prop = drm_property_create_enum(adev_to_drm(adev),
> > > +   DRM_MODE_PROP_ENUM,
> > > +   "GAMMA_TF",
> > > +   drm_transfer_function_enum_list,
> > > +   
> > > ARRAY_SIZE(drm_transfer_function_enum_list));
> > > +   if (!prop)
> > > +   return -ENOMEM;
> > > +   adev->mode_info.gamma_tf_property = prop;
> > > +
> > > return 0;
> > >  }
> > >  #endif
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h 
> > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
> > > index 205fa4f5bea7..76337e18c728 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
> > > @@ -368,6 +368,10 @@ struct amdgpu_mode_info {
> > >  * LUT as supported by the driver (read-only).
> > >  */
> > > struct drm_property *lut3d_size_property;
> > > +   /**
> > > +* @gamma_tf_property: Transfer function for CRTC regamma.
> > > +*/
> > > +   struct drm_property *gamma_tf_property;
> > >  #endif
> > >  };
> > >
> > > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
> > > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> > > index 09c3e1858b56..1e90a2dd445e 100644
> > > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> > > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> > > @@ -699,6 +699,23 @@ static inline void amdgpu_dm_set_mst_status(uint8_t 
> > > *status,
> > >
> > >  extern const struct amdgpu_ip_block_version dm_ip_block;
> > >
> > > +#ifdef CONFIG_STEAM_DECK
> > > +enum drm_transfer_function {
> > > +   DRM_TRANSFER_FUNCTION_DEFAULT,
> > > +
> > > +   DRM_TRANSFER_FUNCTION_SRGB,
> > > +   DRM_TRANSFER_FUNCTION_BT709,
> > > +   DRM_TRANSFER_FUNCTION_PQ,
> > > +   DRM_TRANSFER_FUNCTION_LINEAR,
> > > +   DRM_TRANSFER_FUNCTION_UNITY,
> > > +   DRM_TRANSFER_FUNCTION_HLG,
> > > +   DRM_TRANSFER_FUNCTION_GAMMA22,
> > > +   DRM_TRANSFER_FUNCTION_GAMMA24,
> > > +   DRM_TRANSFER_FUNCTION_GAMMA26,
> > > +   DRM_TRANSFER_FUNCTION_MAX,
> > > +};
> > > +#endif
> > > +
> > >  struct dm_plane_state {
> > > struct drm_plane_state base;
> > > struct dc_plane_state *d

1 2 >

1 - 100 of 131 matches

Mail list logo