[PATCH] drm/amdkfd: Use gpu_offset for user queue's wptr

2023-09-14 Thread YuBiao Wang
Directly use tbo's start address will miss the domain start offset. Need
to use gpu_offset instead.

Signed-off-by: YuBiao Wang 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 77159b03a422..36e7171ad9a7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -216,7 +216,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, 
struct queue *q,
 
if (q->wptr_bo) {
wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE 
- 1);
-   queue_input.wptr_mc_addr = 
((uint64_t)q->wptr_bo->tbo.resource->start << PAGE_SHIFT) + wptr_addr_off;
+   queue_input.wptr_mc_addr = amdgpu_bo_gpu_offset(q->wptr_bo) + 
wptr_addr_off;
}
 
queue_input.is_kfd_process = 1;
-- 
2.34.1



[PATCH] drm/amdgpu: use error code EOPNOTSUPP instead of ENOTSUPPT

2023-09-14 Thread Yang Wang
WARNING: ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP

$ find drivers/gpu/drm/amd -type f \
  -exec sed -i 's/\-ENOTSUPP/\-EOPNOTSUPP/g' {} \;

Signed-off-by: Yang Wang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  6 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c   |  2 +-
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c  |  2 +-
 .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c|  6 +++---
 .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c|  4 ++--
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c   | 18 +-
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c   | 18 +-
 drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c |  4 ++--
 10 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e055e06d020c..a7f0b6ccd137 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1268,7 +1268,7 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device 
*adev)
r = pci_resize_resource(adev->pdev, 0, rbar_size);
if (r == -ENOSPC)
DRM_INFO("Not enough PCI address space for a large BAR.");
-   else if (r && r != -ENOTSUPP)
+   else if (r && r != -EOPNOTSUPP)
DRM_ERROR("Problem resizing BAR0 (%d).", r);
 
pci_assign_unassigned_bus_resources(adev->pdev->bus);
@@ -5747,7 +5747,7 @@ int amdgpu_device_baco_enter(struct drm_device *dev)
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
 
if (!amdgpu_device_supports_baco(dev))
-   return -ENOTSUPP;
+   return -EOPNOTSUPP;
 
if (ras && adev->ras_enabled &&
adev->nbio.funcs->enable_doorbell_interrupt)
@@ -5763,7 +5763,7 @@ int amdgpu_device_baco_exit(struct drm_device *dev)
int ret = 0;
 
if (!amdgpu_device_supports_baco(dev))
-   return -ENOTSUPP;
+   return -EOPNOTSUPP;
 
ret = amdgpu_dpm_baco_exit(adev);
if (ret)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index e3471293846f..ffd7035603cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2126,7 +2126,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
((flags & AMD_ASIC_MASK) == CHIP_RAVEN)) {
dev_info(>dev,
 "SME is not compatible with RAVEN\n");
-   return -ENOTSUPP;
+   return -EOPNOTSUPP;
}
 
 #ifdef CONFIG_DRM_AMDGPU_SI
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 42fc0cc13fdd..e31b7f9fcddc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -867,7 +867,7 @@ static int gfx_v11_0_get_gfx_shadow_info(struct 
amdgpu_device *adev,
return 0;
} else {
memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info));
-   return -ENOTSUPP;
+   return -EOPNOTSUPP;
}
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 65d2b9ae16bb..120b815a3f27 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -262,7 +262,7 @@ static int set_queue_properties_from_user(struct 
queue_properties *q_properties,
else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)
q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;
else
-   return -ENOTSUPP;
+   return -EOPNOTSUPP;
 
if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
q_properties->format = KFD_QUEUE_FORMAT_AQL;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index e6f1620acdd4..2c88dbe2d718 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -147,7 +147,7 @@ int smu_get_dpm_freq_range(struct smu_context *smu,
   uint32_t *min,
   uint32_t *max)
 {
-   int ret = -ENOTSUPP;
+   int ret = -EOPNOTSUPP;
 
if (!min && !max)
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index 650482cedd1f..f48ec6c62307 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -2591,7 +2591,7 @@ static int navi10_od_edit_dpm_table(struct smu_context 
*smu, enum PP_OD_DPM_TABL
case PP_OD_EDIT_SCLK_VDDC_TABLE:
if (!navi10_od_feature_is_supported(od_settings, 
SMU_11_0_ODCAP_GFXCLK_LIMITS)) {
dev_warn(smu->adev->dev, "GFXCLK_LIMITS 

Re: [PATCH] drm/amd/display: fix the ability to use lower resolution modes on eDP

2023-09-14 Thread Hamza Mahfooz



On 9/14/23 17:04, Hamza Mahfooz wrote:


On 9/14/23 16:40, Harry Wentland wrote:

On 2023-09-14 13:53, Hamza Mahfooz wrote:

On eDP we can receive invalid modes from dm_update_crtc_state() for
entirely new streams for which drm_mode_set_crtcinfo() shouldn't be
called on. So, instead of calling drm_mode_set_crtcinfo() from within
create_stream_for_sink() we can instead call it from
amdgpu_dm_connector_mode_valid(). Since, we are guaranteed to only call
drm_mode_set_crtcinfo() for valid modes from that function (invalid
modes are rejected by that callback) and that is the only user
of create_validate_stream_for_sink() that we need to call
drm_mode_set_crtcinfo() for (as before commit cb841d27b876
("drm/amd/display: Always pass connector_state to stream validation"),
that is the only place where create_validate_stream_for_sink()'s
dm_state was NULL).



I don't seem to see how a NULL dm_state in
create_validate_stream_for_sink() (or create_stream_for_sink() for that
matter) has an impact on the drm_mode_set_crtcinfo() call. That one 
depends

on !old_stream and 


If we look back to commit 4a2df0d1f28e ("drm/amd/display: Fixed
non-native modes not lighting up") it seems like the intent was to only
have drm_mode_set_crtcinfo() called for
amdgpu_dm_connector_mode_valid(). Since, even if we go that far back
create_stream_for_sink()'s dm_state was only NULL when it was called
from amdgpu_dm_connector_mode_valid().



It does look like  is an empty mode if we can't find a 
preferred_mode,

though. Not sure if that can cause an issue.


I don't think it should be an issue, since before commit 4a2df0d1f28e
("drm/amd/display: Fixed non-native modes not lighting up") we always


I meant to refer to commit bd49f19039c1 ("drm/amd/display: Always set
crtcinfo from create_stream_for_sink") here.

called drm_mode_set_crtcinfo() in the aforementioned case (and only for 
that case).




Harry


Cc: sta...@vger.kernel.org
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2693
Fixes: cb841d27b876 ("drm/amd/display: Always pass connector_state to 
stream validation")

Signed-off-by: Hamza Mahfooz 
---
  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

index 933c9b5d5252..beef4fef7338 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -6128,8 +6128,6 @@ create_stream_for_sink(struct 
amdgpu_dm_connector *aconnector,

  if (recalculate_timing)
  drm_mode_set_crtcinfo(_mode, 0);
-    else if (!old_stream)
-    drm_mode_set_crtcinfo(, 0);
  /*
   * If scaling is enabled and refresh rate didn't change
@@ -6691,6 +6689,8 @@ enum drm_mode_status 
amdgpu_dm_connector_mode_valid(struct drm_connector *connec

  goto fail;
  }
+    drm_mode_set_crtcinfo(mode, 0);
+
  stream = create_validate_stream_for_sink(aconnector, mode,
   to_dm_connector_state(connector->state),
   NULL);



--
Hamza



Re: [PATCH] drm/amd/display: fix the ability to use lower resolution modes on eDP

2023-09-14 Thread Hamza Mahfooz



On 9/14/23 16:40, Harry Wentland wrote:

On 2023-09-14 13:53, Hamza Mahfooz wrote:

On eDP we can receive invalid modes from dm_update_crtc_state() for
entirely new streams for which drm_mode_set_crtcinfo() shouldn't be
called on. So, instead of calling drm_mode_set_crtcinfo() from within
create_stream_for_sink() we can instead call it from
amdgpu_dm_connector_mode_valid(). Since, we are guaranteed to only call
drm_mode_set_crtcinfo() for valid modes from that function (invalid
modes are rejected by that callback) and that is the only user
of create_validate_stream_for_sink() that we need to call
drm_mode_set_crtcinfo() for (as before commit cb841d27b876
("drm/amd/display: Always pass connector_state to stream validation"),
that is the only place where create_validate_stream_for_sink()'s
dm_state was NULL).



I don't seem to see how a NULL dm_state in
create_validate_stream_for_sink() (or create_stream_for_sink() for that
matter) has an impact on the drm_mode_set_crtcinfo() call. That one depends
on !old_stream and 


If we look back to commit 4a2df0d1f28e ("drm/amd/display: Fixed
non-native modes not lighting up") it seems like the intent was to only
have drm_mode_set_crtcinfo() called for
amdgpu_dm_connector_mode_valid(). Since, even if we go that far back
create_stream_for_sink()'s dm_state was only NULL when it was called
from amdgpu_dm_connector_mode_valid().



It does look like  is an empty mode if we can't find a preferred_mode,
though. Not sure if that can cause an issue.


I don't think it should be an issue, since before commit 4a2df0d1f28e
("drm/amd/display: Fixed non-native modes not lighting up") we always
called drm_mode_set_crtcinfo() in the aforementioned case (and only for 
that case).




Harry


Cc: sta...@vger.kernel.org
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2693
Fixes: cb841d27b876 ("drm/amd/display: Always pass connector_state to stream 
validation")
Signed-off-by: Hamza Mahfooz 
---
  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 933c9b5d5252..beef4fef7338 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -6128,8 +6128,6 @@ create_stream_for_sink(struct amdgpu_dm_connector 
*aconnector,
  
  	if (recalculate_timing)

drm_mode_set_crtcinfo(_mode, 0);
-   else if (!old_stream)
-   drm_mode_set_crtcinfo(, 0);
  
  	/*

 * If scaling is enabled and refresh rate didn't change
@@ -6691,6 +6689,8 @@ enum drm_mode_status 
amdgpu_dm_connector_mode_valid(struct drm_connector *connec
goto fail;
}
  
+	drm_mode_set_crtcinfo(mode, 0);

+
stream = create_validate_stream_for_sink(aconnector, mode,
 
to_dm_connector_state(connector->state),
 NULL);



--
Hamza



Re: [PATCH] drm/amd/display: Handle NULL dccg in dce110_disable_stream

2023-09-14 Thread Alex Deucher
On Mon, Sep 11, 2023 at 10:36 AM Michel Dänzer  wrote:
>
> From: Michel Dänzer 
>
> It can be NULL e.g. with Raven.
>
> Fixes: 927e784c180c ("drm/amd/display: Add symclk enable/disable during 
> stream enable/disable")
> Signed-off-by: Michel Dänzer 

Looks like this was fixed in:
https://gitlab.freedesktop.org/agd5f/linux/-/commit/7f7925e258288cfcfa2b0e0631fcd91a39744f94
but needs to be applied to 6.6.  I'll queue it up for 6.6.

Alex

> ---
>  drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c 
> b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
> index ad967b58d7be..a9dca69ee8f6 100644
> --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
> +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
> @@ -1179,9 +1179,11 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx)
> dccg->funcs->set_dtbclk_dto(dccg, _params);
> dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst);
> dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, 
> dp_hpo_inst);
> -   } else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST 
> && dccg->funcs->disable_symclk_se)
> +   } else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST &&
> +  dccg && dccg->funcs->disable_symclk_se) {
> dccg->funcs->disable_symclk_se(dccg, 
> stream_enc->stream_enc_inst,
> -   link_enc->transmitter - TRANSMITTER_UNIPHY_A);
> +  link_enc->transmitter - 
> TRANSMITTER_UNIPHY_A);
> +   }
>
> if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) {
> /* TODO: This looks like a bug to me as we are disabling HPO 
> IO when
> --
> 2.40.1
>


Re: [PATCH] fix a memory leak in amdgpu_ras_feature_enable

2023-09-14 Thread Alex Deucher
Applied.  Thanks!

On Thu, Sep 14, 2023 at 5:53 AM Zhang, Hawking  wrote:
>
> [AMD Official Use Only - General]
>
> Reviewed-by: Hawking Zhang 
>
> Regards,
> Hawking
> -Original Message-
> From: Cong Liu 
> Sent: Thursday, September 14, 2023 17:46
> To: Deucher, Alexander ; Koenig, Christian 
> ; Pan, Xinhui ; David Airlie 
> ; Daniel Vetter ; Yang, Stanley 
> ; Zhang, Hawking 
> Cc: Cong Liu ; amd-gfx@lists.freedesktop.org; 
> dri-de...@lists.freedesktop.org; linux-ker...@vger.kernel.org
> Subject: [PATCH] fix a memory leak in amdgpu_ras_feature_enable
>
> This patch fixes a memory leak in the amdgpu_ras_feature_enable() function.
> The leak occurs when the function sends a command to the firmware to enable 
> or disable a RAS feature for a GFX block. If the command fails, the kfree() 
> function is not called to free the info memory.
>
> Fixes: bf7aa8bea9cb ("drm/amdgpu: Free ras cmd input buffer properly")
> Signed-off-by: Cong Liu 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 8eb6f6943778..b4a8ea946410 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -802,6 +802,7 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
> enable ? "enable":"disable",
> get_ras_block_str(head),
> amdgpu_ras_is_poison_mode_supported(adev), 
> ret);
> +   kfree(info);
> return ret;
> }
>
> --
> 2.34.1
>


Re: [PATCH] drm/amd/display: fix the ability to use lower resolution modes on eDP

2023-09-14 Thread Harry Wentland
On 2023-09-14 13:53, Hamza Mahfooz wrote:
> On eDP we can receive invalid modes from dm_update_crtc_state() for
> entirely new streams for which drm_mode_set_crtcinfo() shouldn't be
> called on. So, instead of calling drm_mode_set_crtcinfo() from within
> create_stream_for_sink() we can instead call it from
> amdgpu_dm_connector_mode_valid(). Since, we are guaranteed to only call
> drm_mode_set_crtcinfo() for valid modes from that function (invalid
> modes are rejected by that callback) and that is the only user
> of create_validate_stream_for_sink() that we need to call
> drm_mode_set_crtcinfo() for (as before commit cb841d27b876
> ("drm/amd/display: Always pass connector_state to stream validation"),
> that is the only place where create_validate_stream_for_sink()'s
> dm_state was NULL).
> 

I don't seem to see how a NULL dm_state in
create_validate_stream_for_sink() (or create_stream_for_sink() for that
matter) has an impact on the drm_mode_set_crtcinfo() call. That one depends
on !old_stream and 

It does look like  is an empty mode if we can't find a preferred_mode,
though. Not sure if that can cause an issue.

Harry

> Cc: sta...@vger.kernel.org
> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2693
> Fixes: cb841d27b876 ("drm/amd/display: Always pass connector_state to stream 
> validation")
> Signed-off-by: Hamza Mahfooz 
> ---
>  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index 933c9b5d5252..beef4fef7338 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -6128,8 +6128,6 @@ create_stream_for_sink(struct amdgpu_dm_connector 
> *aconnector,
>  
>   if (recalculate_timing)
>   drm_mode_set_crtcinfo(_mode, 0);
> - else if (!old_stream)
> - drm_mode_set_crtcinfo(, 0);
>  
>   /*
>* If scaling is enabled and refresh rate didn't change
> @@ -6691,6 +6689,8 @@ enum drm_mode_status 
> amdgpu_dm_connector_mode_valid(struct drm_connector *connec
>   goto fail;
>   }
>  
> + drm_mode_set_crtcinfo(mode, 0);
> +
>   stream = create_validate_stream_for_sink(aconnector, mode,
>
> to_dm_connector_state(connector->state),
>NULL);



[PATCH 4/4] drm/amdgpu/gmc11: disable AGP on GC 11.5

2023-09-14 Thread Alex Deucher
AGP aperture is deprecated and no longer functional.

v2: fix typo (Alex)

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index e8dc002b5a79..e04ae66fef1c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -742,6 +742,9 @@ static int gmc_v11_0_mc_init(struct amdgpu_device *adev)
 
/* force GART to avoid a hw issue in GC11 */
adev->gmc.gart_placement = AMDGPU_GART_PLACEMENT_HIGH;
+   /* AGP aperture is removed on GC11.5 */
+   if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 5, 0))
+   adev->gmc.no_agp = true;
 
gmc_v11_0_vram_gtt_location(adev, >gmc);
 
-- 
2.41.0



[PATCH 3/4] drm/amdgpu/gmc: add a flag to disable AGP

2023-09-14 Thread Alex Deucher
Allows the driver to disable the AGP aperture when
it's not needed.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index c41321c456fe..ed9be37eca66 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -326,7 +326,7 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, 
struct amdgpu_gmc *mc)
const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1);
u64 size_af, size_bf;
 
-   if (amdgpu_sriov_vf(adev)) {
+   if (amdgpu_sriov_vf(adev) || mc->no_agp) {
mc->agp_start = 0x;
mc->agp_end = 0x0;
mc->agp_size = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index caa15639d3d5..65641d7da212 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -340,6 +340,7 @@ struct amdgpu_gmc {
 
u64 noretry_flags;
enum amdgpu_gart_placement gart_placement;
+   bool no_agp;
 };
 
 #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) 
((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
-- 
2.41.0



[PATCH 1/4] drm/amdgpu/gmc: add a way to force a particular placement for GART

2023-09-14 Thread Alex Deucher
We normally place GART based on the location of VRAM and the
available address space around that, but provide an option
to force a particular location for hardware that needs it.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 19 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  7 +++
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index c7793db6d098..c41321c456fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -286,11 +286,22 @@ void amdgpu_gmc_gart_location(struct amdgpu_device *adev, 
struct amdgpu_gmc *mc)
mc->gart_size = max(size_bf, size_af);
}
 
-   if ((size_bf >= mc->gart_size && size_bf < size_af) ||
-   (size_af < mc->gart_size))
-   mc->gart_start = 0;
-   else
+   switch (mc->gart_placement) {
+   case AMDGPU_GART_PLACEMENT_HIGH:
mc->gart_start = max_mc_address - mc->gart_size + 1;
+   break;
+   case AMDGPU_GART_PLACEMENT_LOW:
+   mc->gart_start = 0;
+   break;
+   case AMDGPU_GART_PLACEMENT_BEST_FIT:
+   default:
+   if ((size_bf >= mc->gart_size && size_bf < size_af) ||
+   (size_af < mc->gart_size))
+   mc->gart_start = 0;
+   else
+   mc->gart_start = max_mc_address - mc->gart_size + 1;
+   break;
+   }
 
mc->gart_start &= ~(four_gb - 1);
mc->gart_end = mc->gart_start + mc->gart_size - 1;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index fdc25cd559b6..caa15639d3d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -197,6 +197,12 @@ struct amdgpu_mem_partition_info {
 
 #define INVALID_PFN-1
 
+enum amdgpu_gart_placement {
+   AMDGPU_GART_PLACEMENT_BEST_FIT = 0,
+   AMDGPU_GART_PLACEMENT_HIGH,
+   AMDGPU_GART_PLACEMENT_LOW,
+};
+
 struct amdgpu_gmc {
/* FB's physical address in MMIO space (for CPU to
 * map FB). This is different compared to the agp/
@@ -333,6 +339,7 @@ struct amdgpu_gmc {
u64 MC_VM_MX_L1_TLB_CNTL;
 
u64 noretry_flags;
+   enum amdgpu_gart_placement gart_placement;
 };
 
 #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) 
((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
-- 
2.41.0



[PATCH 2/4] drm/amdgpu/gmc11: set gart placement GC11

2023-09-14 Thread Alex Deucher
Needed to avoid a hardware issue.

v2: force high for all GC11 parts for consistency (Alex)

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index e1f47f9c1881..e8dc002b5a79 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -740,6 +740,9 @@ static int gmc_v11_0_mc_init(struct amdgpu_device *adev)
else
adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
 
+   /* force GART to avoid a hw issue in GC11 */
+   adev->gmc.gart_placement = AMDGPU_GART_PLACEMENT_HIGH;
+
gmc_v11_0_vram_gtt_location(adev, >gmc);
 
return 0;
-- 
2.41.0



Re: [PATCH v2] drm/amdkfd: Use partial migrations in GPU page faults

2023-09-14 Thread Felix Kuehling



On 2023-09-14 10:59, Chen, Xiaogang wrote:


On 9/13/2023 5:03 PM, Felix Kuehling wrote:

On 2023-09-11 10:04, Xiaogang.Chen wrote:

From: Xiaogang Chen

This patch implements partial migration in gpu page fault according 
to migration
granularity(default 2MB) and not split svm range in cpu page fault 
handling.
A svm range may include pages from both system ram and vram of one 
gpu now.

These chagnes are expected to improve migration performance and reduce
mmu callback and TLB flush workloads.

Signed-off-by: xiaogang chen
---
  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 151 
++-

  drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |   6 +-
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c |  88 ++---
  drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   7 +-
  4 files changed, 171 insertions(+), 81 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c

index 7d82c7da223a..653a2edbaba4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -445,7 +445,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, 
struct svm_range *prange,

  pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",


Should this message also be updated: migrated -> collected?

yes, it was left as original version. cpages is number of collected 
pages from hmm. Will change to: pr_debug("partial migration, 
0x%lx/0x%llx pages collected\n",



   cpages, npages);
  else
-    pr_debug("0x%lx pages migrated\n", cpages);
+    pr_debug("0x%lx pages collected\n", cpages);
    r = svm_migrate_copy_to_vram(node, prange, , 
, scratch, ttm_res_offset);

  migrate_vma_pages();
@@ -479,6 +479,8 @@ svm_migrate_vma_to_vram(struct kfd_node *node, 
struct svm_range *prange,

   * svm_migrate_ram_to_vram - migrate svm range from system to device
   * @prange: range structure
   * @best_loc: the device to migrate to
+ * @start_mgr: start page to migrate
+ * @last_mgr: last page to migrate
   * @mm: the process mm structure
   * @trigger: reason of migration
   *
@@ -489,6 +491,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, 
struct svm_range *prange,

   */
  static int
  svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+    unsigned long start_mgr, unsigned long last_mgr,
  struct mm_struct *mm, uint32_t trigger)
  {
  unsigned long addr, start, end;
@@ -498,23 +501,30 @@ svm_migrate_ram_to_vram(struct svm_range 
*prange, uint32_t best_loc,

  unsigned long cpages = 0;
  long r = 0;
  -    if (prange->actual_loc == best_loc) {
-    pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
- prange->svms, prange->start, prange->last, best_loc);
+    if (!best_loc) {
+    pr_debug("request svms 0x%p [0x%lx 0x%lx] migrate to sys 
ram\n",


This message seems incorrect. This function migrates to VRAM.

I think this message is for input checking: when migrate from ram to 
varm and we see best_loc is sys ram this message indicates that the 
caller try to migration to sys ram, not vram, then do nothing. It does 
not indicate what this function does as pr_debug will add function 
name at beginning.


OK, makes sense, I wasn't reading it carefully. No need to add the 
function name to the message, because dyn-debug can add the file name, 
function name and line number by itself.






+ prange->svms, start_mgr, last_mgr);
  return 0;
  }
  +    if (start_mgr < prange->start || last_mgr > prange->last) {
+    pr_debug("migration range [0x%lx 0x%lx] out prange [0x%lx 
0x%lx]\n",

+ start_mgr, last_mgr, prange->start, prange->last);
+    return -EFAULT;
+    }
+
  node = svm_range_get_node_by_id(prange, best_loc);
  if (!node) {
  pr_debug("failed to get kfd node by id 0x%x\n", best_loc);
  return -ENODEV;
  }
  -    pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
- prange->start, prange->last, best_loc);
+    pr_debug("svms 0x%p [0x%lx 0x%lx] in [0x%lx 0x%lx] to gpu 0x%x\n",
+ prange->svms, start_mgr, last_mgr, prange->start, 
prange->last,

+ best_loc);
  -    start = prange->start << PAGE_SHIFT;
-    end = (prange->last + 1) << PAGE_SHIFT;
+    start = start_mgr << PAGE_SHIFT;
+    end = (last_mgr + 1) << PAGE_SHIFT;
    r = svm_range_vram_node_new(node, prange, true);
  if (r) {
@@ -544,8 +554,11 @@ svm_migrate_ram_to_vram(struct svm_range 
*prange, uint32_t best_loc,

    if (cpages) {
  prange->actual_loc = best_loc;
-    svm_range_free_dma_mappings(prange, true);
-    } else {
+    prange->vram_pages = prange->vram_pages + cpages;
+    } else if (!prange->actual_loc) {
+    /* if no page migrated and all pages from prange are at
+ * sys ram drop svm_bo got from svm_range_vram_node_new
+ */
  svm_range_vram_node_free(prange);
  }
  @@ 

Re: [PATCH 4/4] drm/amdgpu/gmc11: disable AGP on GC 11.5

2023-09-14 Thread Alex Deucher
On Thu, Sep 14, 2023 at 2:31 PM Alex Deucher  wrote:
>
> AGP aperture is deprecated and no longer functional.
>
> Signed-off-by: Alex Deucher 
> ---
>  drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> index 6947b598e9b2..7ee91b66f761 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> @@ -745,6 +745,9 @@ static int gmc_v11_0_mc_init(struct amdgpu_device *adev)
> adev->gmc.gart_placement = AMDGPU_GART_PLACEMENT_LOW;
> else
> adev->gmc.gart_placement = AMDGPU_GART_PLACEMENT_HIGH;
> +   /* AGP aperture is removed on GC11.5 */
> +   if (amdgpu_ip_version(adev, GC_HWIP, -2) >= IP_VERSION(11, 5, 0))

Not sure what happened here.  I've replaced the -2 with 0 locally.

Alex

> +   adev->gmc.no_agp = true;
>
> gmc_v11_0_vram_gtt_location(adev, >gmc);
>
> --
> 2.41.0
>


[PATCH 2/4] drm/amdgpu/gmc11: set gart placement GC11

2023-09-14 Thread Alex Deucher
Needed to avoid a hardware issue.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index e1f47f9c1881..6947b598e9b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -740,6 +740,12 @@ static int gmc_v11_0_mc_init(struct amdgpu_device *adev)
else
adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
 
+   /* force GART to avoid a hw issue in GC11 */
+   if (adev->flags & AMD_IS_APU)
+   adev->gmc.gart_placement = AMDGPU_GART_PLACEMENT_LOW;
+   else
+   adev->gmc.gart_placement = AMDGPU_GART_PLACEMENT_HIGH;
+
gmc_v11_0_vram_gtt_location(adev, >gmc);
 
return 0;
-- 
2.41.0



[PATCH 3/4] drm/amdgpu/gmc: add a flag to disable AGP

2023-09-14 Thread Alex Deucher
Allows the driver to disable the AGP aperture when
it's not needed.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index c41321c456fe..ed9be37eca66 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -326,7 +326,7 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, 
struct amdgpu_gmc *mc)
const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1);
u64 size_af, size_bf;
 
-   if (amdgpu_sriov_vf(adev)) {
+   if (amdgpu_sriov_vf(adev) || mc->no_agp) {
mc->agp_start = 0x;
mc->agp_end = 0x0;
mc->agp_size = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index caa15639d3d5..65641d7da212 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -340,6 +340,7 @@ struct amdgpu_gmc {
 
u64 noretry_flags;
enum amdgpu_gart_placement gart_placement;
+   bool no_agp;
 };
 
 #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) 
((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
-- 
2.41.0



[PATCH 4/4] drm/amdgpu/gmc11: disable AGP on GC 11.5

2023-09-14 Thread Alex Deucher
AGP aperture is deprecated and no longer functional.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 6947b598e9b2..7ee91b66f761 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -745,6 +745,9 @@ static int gmc_v11_0_mc_init(struct amdgpu_device *adev)
adev->gmc.gart_placement = AMDGPU_GART_PLACEMENT_LOW;
else
adev->gmc.gart_placement = AMDGPU_GART_PLACEMENT_HIGH;
+   /* AGP aperture is removed on GC11.5 */
+   if (amdgpu_ip_version(adev, GC_HWIP, -2) >= IP_VERSION(11, 5, 0))
+   adev->gmc.no_agp = true;
 
gmc_v11_0_vram_gtt_location(adev, >gmc);
 
-- 
2.41.0



[PATCH 1/4] drm/amdgpu/gmc: add a way to force a particular placement for GART

2023-09-14 Thread Alex Deucher
We normally place GART based on the location of VRAM and the
available address space around that, but provide an option
to force a particular location for hardware that needs it.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 19 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  7 +++
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index c7793db6d098..c41321c456fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -286,11 +286,22 @@ void amdgpu_gmc_gart_location(struct amdgpu_device *adev, 
struct amdgpu_gmc *mc)
mc->gart_size = max(size_bf, size_af);
}
 
-   if ((size_bf >= mc->gart_size && size_bf < size_af) ||
-   (size_af < mc->gart_size))
-   mc->gart_start = 0;
-   else
+   switch (mc->gart_placement) {
+   case AMDGPU_GART_PLACEMENT_HIGH:
mc->gart_start = max_mc_address - mc->gart_size + 1;
+   break;
+   case AMDGPU_GART_PLACEMENT_LOW:
+   mc->gart_start = 0;
+   break;
+   case AMDGPU_GART_PLACEMENT_BEST_FIT:
+   default:
+   if ((size_bf >= mc->gart_size && size_bf < size_af) ||
+   (size_af < mc->gart_size))
+   mc->gart_start = 0;
+   else
+   mc->gart_start = max_mc_address - mc->gart_size + 1;
+   break;
+   }
 
mc->gart_start &= ~(four_gb - 1);
mc->gart_end = mc->gart_start + mc->gart_size - 1;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index fdc25cd559b6..caa15639d3d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -197,6 +197,12 @@ struct amdgpu_mem_partition_info {
 
 #define INVALID_PFN-1
 
+enum amdgpu_gart_placement {
+   AMDGPU_GART_PLACEMENT_BEST_FIT = 0,
+   AMDGPU_GART_PLACEMENT_HIGH,
+   AMDGPU_GART_PLACEMENT_LOW,
+};
+
 struct amdgpu_gmc {
/* FB's physical address in MMIO space (for CPU to
 * map FB). This is different compared to the agp/
@@ -333,6 +339,7 @@ struct amdgpu_gmc {
u64 MC_VM_MX_L1_TLB_CNTL;
 
u64 noretry_flags;
+   enum amdgpu_gart_placement gart_placement;
 };
 
 #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) 
((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
-- 
2.41.0



[PATCH] drm/amd/display: fix the ability to use lower resolution modes on eDP

2023-09-14 Thread Hamza Mahfooz
On eDP we can receive invalid modes from dm_update_crtc_state() for
entirely new streams for which drm_mode_set_crtcinfo() shouldn't be
called on. So, instead of calling drm_mode_set_crtcinfo() from within
create_stream_for_sink() we can instead call it from
amdgpu_dm_connector_mode_valid(). Since, we are guaranteed to only call
drm_mode_set_crtcinfo() for valid modes from that function (invalid
modes are rejected by that callback) and that is the only user
of create_validate_stream_for_sink() that we need to call
drm_mode_set_crtcinfo() for (as before commit cb841d27b876
("drm/amd/display: Always pass connector_state to stream validation"),
that is the only place where create_validate_stream_for_sink()'s
dm_state was NULL).

Cc: sta...@vger.kernel.org
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2693
Fixes: cb841d27b876 ("drm/amd/display: Always pass connector_state to stream 
validation")
Signed-off-by: Hamza Mahfooz 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 933c9b5d5252..beef4fef7338 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -6128,8 +6128,6 @@ create_stream_for_sink(struct amdgpu_dm_connector 
*aconnector,
 
if (recalculate_timing)
drm_mode_set_crtcinfo(_mode, 0);
-   else if (!old_stream)
-   drm_mode_set_crtcinfo(, 0);
 
/*
 * If scaling is enabled and refresh rate didn't change
@@ -6691,6 +6689,8 @@ enum drm_mode_status 
amdgpu_dm_connector_mode_valid(struct drm_connector *connec
goto fail;
}
 
+   drm_mode_set_crtcinfo(mode, 0);
+
stream = create_validate_stream_for_sink(aconnector, mode,
 
to_dm_connector_state(connector->state),
 NULL);
-- 
2.42.0



RE: [PATCH] drm/amdkfd: Align unique_id format to match amdgpu

2023-09-14 Thread Russell, Kent
[AMD Official Use Only - General]

> -Original Message-
> From: Kuehling, Felix 
> Sent: Thursday, September 14, 2023 1:25 PM
> To: Russell, Kent ; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdkfd: Align unique_id format to match amdgpu
>
>
> On 2023-09-14 13:09, Kent Russell wrote:
> > unique_id is printed as %016llx in amdgpu, but %llu in KFD. Call the
> > sysfs_show_gen_prop function directly and use the %016llx format, to
> > align with amdgpu. Don't need to add a new macro since this is a one-off.
>
> Doesn't this break the ABI? Any tool currently reading the unique ID
> would expect it to be decimal.

A fair point, and was something I was on the fence on. The inquiry came from 
rocm-smi, noting that the unique_ids didn't align. One would assume that 
reading a sysfs file would be done as a string and then converted, but you're 
right. We can't risk breaking other tools just because we want to fix one of 
ours and make assumptions about how tools consume the information. I'll talk to 
the SMI guys about trying to convert to hex in SMI to align instead.

 Kent

>
> Regards,
>Felix
>
>
> >
> > Signed-off-by: Kent Russell 
> > ---
> >   drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 2 +-
> >   1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> > index c8c75ff7cea8..4dac29cdab20 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> > @@ -538,7 +538,7 @@ static ssize_t node_show(struct kobject *kobj, struct
> attribute *attr,
> >   dev->node_props.debug_prop);
> > sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version",
> >   dev->gpu->kfd->sdma_fw_version);
> > -   sysfs_show_64bit_prop(buffer, offs, "unique_id",
> > +   sysfs_show_gen_prop(buffer, offs, "%s %016llx\n", "unique_id",
> >   dev->gpu->adev->unique_id);
> > sysfs_show_32bit_prop(buffer, offs, "num_xcc",
> >   NUM_XCC(dev->gpu->xcc_mask));


Re: [PATCH 27/28] drm/amd/display: Drop unused code

2023-09-14 Thread Harry Wentland
On 2023-09-13 22:00, Rodrigo Siqueira wrote:
> There are multiple parts of the code that DC does not use anymore, and
> this commit drops those dead codes.
> 
> Signed-off-by: Rodrigo Siqueira 

The commit title is a bit non-descript. I think something like
"drop unused link_fpga.c" would be better.

Either way this is
Reviewed-by: Harry Wentland 

Harry

> ---
>  .../drm/amd/display/dc/bios/bios_parser2.c|  9 --
>  drivers/gpu/drm/amd/display/dc/link/Makefile  |  4 +-
>  .../display/dc/link/accessories/link_fpga.c   | 95 ---
>  .../display/dc/link/accessories/link_fpga.h   | 30 --
>  .../gpu/drm/amd/display/dc/link/link_dpms.c   |  1 -
>  .../drm/amd/display/dc/link/link_factory.c|  1 -
>  6 files changed, 2 insertions(+), 138 deletions(-)
>  delete mode 100644 
> drivers/gpu/drm/amd/display/dc/link/accessories/link_fpga.c
>  delete mode 100644 
> drivers/gpu/drm/amd/display/dc/link/accessories/link_fpga.h
> 
> diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c 
> b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
> index 484d62bcf2c2..2ec496be778a 100644
> --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
> +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
> @@ -1723,15 +1723,6 @@ static void bios_parser_set_scratch_critical_state(
>   bios_set_scratch_critical_state(dcb, state);
>  }
>  
> -struct atom_dig_transmitter_info_header_v5_3 {
> -struct atom_common_table_header table_header;
> -uint16_t dpphy_hdmi_settings_offset;
> -uint16_t dpphy_dvi_settings_offset;
> -uint16_t dpphy_dp_setting_table_offset;
> -uint16_t uniphy_xbar_settings_v2_table_offset;
> -uint16_t dpphy_internal_reg_overide_offset;
> -};
> -
>  static enum bp_result bios_parser_get_firmware_info(
>   struct dc_bios *dcb,
>   struct dc_firmware_info *info)
> diff --git a/drivers/gpu/drm/amd/display/dc/link/Makefile 
> b/drivers/gpu/drm/amd/display/dc/link/Makefile
> index 6af8a97d4a77..84c7af5fa589 100644
> --- a/drivers/gpu/drm/amd/display/dc/link/Makefile
> +++ b/drivers/gpu/drm/amd/display/dc/link/Makefile
> @@ -33,7 +33,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_LINK)
>  
> ###
>  # accessories
>  
> ###
> -LINK_ACCESSORIES = link_dp_trace.o link_dp_cts.o link_fpga.o
> +LINK_ACCESSORIES = link_dp_trace.o link_dp_cts.o
>  
>  AMD_DAL_LINK_ACCESSORIES = $(addprefix $(AMDDALPATH)/dc/link/accessories/, \
>  $(LINK_ACCESSORIES))
> @@ -61,4 +61,4 @@ link_edp_panel_control.o link_dp_irq_handler.o 
> link_dp_dpia_bw.o
>  AMD_DAL_LINK_PROTOCOLS = $(addprefix $(AMDDALPATH)/dc/link/protocols/, \
>  $(LINK_PROTOCOLS))
>  
> -AMD_DISPLAY_FILES += $(AMD_DAL_LINK_PROTOCOLS)
> \ No newline at end of file
> +AMD_DISPLAY_FILES += $(AMD_DAL_LINK_PROTOCOLS)
> diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_fpga.c 
> b/drivers/gpu/drm/amd/display/dc/link/accessories/link_fpga.c
> deleted file mode 100644
> index d3cc604eed67..
> --- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_fpga.c
> +++ /dev/null
> @@ -1,95 +0,0 @@
> -/*
> - * Copyright 2023 Advanced Micro Devices, Inc.
> - *
> - * Permission is hereby granted, free of charge, to any person obtaining a
> - * copy of this software and associated documentation files (the "Software"),
> - * to deal in the Software without restriction, including without limitation
> - * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> - * and/or sell copies of the Software, and to permit persons to whom the
> - * Software is furnished to do so, subject to the following conditions:
> - *
> - * The above copyright notice and this permission notice shall be included in
> - * all copies or substantial portions of the Software.
> - *
> - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> - * OTHER DEALINGS IN THE SOFTWARE.
> - *
> - * Authors: AMD
> - *
> - */
> -#include "link_fpga.h"
> -#include "link/link_dpms.h"
> -#include "dm_helpers.h"
> -#include "link_hwss.h"
> -#include "dccg.h"
> -#include "resource.h"
> -
> -#define DC_LOGGER_INIT(logger)
> -
> -void dp_fpga_hpo_enable_link_and_stream(struct dc_state *state, struct 
> pipe_ctx *pipe_ctx)
> -{
> - struct dc *dc = pipe_ctx->stream->ctx->dc;
> - struct dc_stream_state *stream = pipe_ctx->stream;
> - struct link_mst_stream_allocation_table proposed_table = {0};
> - struct fixed31_32 avg_time_slots_per_mtp;
> - uint8_t req_slot_count = 0;
> 

Re: [PATCH] drm/amdkfd: Align unique_id format to match amdgpu

2023-09-14 Thread Felix Kuehling



On 2023-09-14 13:09, Kent Russell wrote:

unique_id is printed as %016llx in amdgpu, but %llu in KFD. Call the
sysfs_show_gen_prop function directly and use the %016llx format, to
align with amdgpu. Don't need to add a new macro since this is a one-off.


Doesn't this break the ABI? Any tool currently reading the unique ID 
would expect it to be decimal.


Regards,
  Felix




Signed-off-by: Kent Russell 
---
  drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index c8c75ff7cea8..4dac29cdab20 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -538,7 +538,7 @@ static ssize_t node_show(struct kobject *kobj, struct 
attribute *attr,
  dev->node_props.debug_prop);
sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version",
  dev->gpu->kfd->sdma_fw_version);
-   sysfs_show_64bit_prop(buffer, offs, "unique_id",
+   sysfs_show_gen_prop(buffer, offs, "%s %016llx\n", "unique_id",
  dev->gpu->adev->unique_id);
sysfs_show_32bit_prop(buffer, offs, "num_xcc",
  NUM_XCC(dev->gpu->xcc_mask));


[PATCH] drm/amdkfd: Align unique_id format to match amdgpu

2023-09-14 Thread Kent Russell
unique_id is printed as %016llx in amdgpu, but %llu in KFD. Call the
sysfs_show_gen_prop function directly and use the %016llx format, to
align with amdgpu. Don't need to add a new macro since this is a one-off.

Signed-off-by: Kent Russell 
---
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index c8c75ff7cea8..4dac29cdab20 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -538,7 +538,7 @@ static ssize_t node_show(struct kobject *kobj, struct 
attribute *attr,
  dev->node_props.debug_prop);
sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version",
  dev->gpu->kfd->sdma_fw_version);
-   sysfs_show_64bit_prop(buffer, offs, "unique_id",
+   sysfs_show_gen_prop(buffer, offs, "%s %016llx\n", "unique_id",
  dev->gpu->adev->unique_id);
sysfs_show_32bit_prop(buffer, offs, "num_xcc",
  NUM_XCC(dev->gpu->xcc_mask));
-- 
2.34.1



[PATCH] drm/amdgpu: update IP count INFO query

2023-09-14 Thread Sathishkumar S
update the query to return the number of functional
instances where there is more than an instance of the requested
type and for others continue to return one.

Signed-off-by: Sathishkumar S 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 90 +
 1 file changed, 61 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 3a48bec10aea..9521fa7a1bf9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -200,6 +200,44 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, 
unsigned long flags)
return r;
 }
 
+static enum amd_ip_block_type amdgpu_ip_get_block_type(
+   struct amdgpu_device *adev, uint32_t ip)
+{
+   enum amd_ip_block_type type;
+
+   switch (ip) {
+   case AMDGPU_HW_IP_GFX:
+   type = AMD_IP_BLOCK_TYPE_GFX;
+   break;
+   case AMDGPU_HW_IP_COMPUTE:
+   type = AMD_IP_BLOCK_TYPE_GFX;
+   break;
+   case AMDGPU_HW_IP_DMA:
+   type = AMD_IP_BLOCK_TYPE_SDMA;
+   break;
+   case AMDGPU_HW_IP_UVD:
+   case AMDGPU_HW_IP_UVD_ENC:
+   type = AMD_IP_BLOCK_TYPE_UVD;
+   break;
+   case AMDGPU_HW_IP_VCE:
+   type = AMD_IP_BLOCK_TYPE_VCE;
+   break;
+   case AMDGPU_HW_IP_VCN_DEC:
+   case AMDGPU_HW_IP_VCN_ENC:
+   type = AMD_IP_BLOCK_TYPE_VCN;
+   break;
+   case AMDGPU_HW_IP_VCN_JPEG:
+   type = (amdgpu_device_ip_get_ip_block(adev, 
AMD_IP_BLOCK_TYPE_JPEG)) ?
+  AMD_IP_BLOCK_TYPE_JPEG : 
AMD_IP_BLOCK_TYPE_VCN;
+   break;
+   default:
+   type = AMD_IP_BLOCK_TYPE_NUM;
+   break;
+   }
+
+   return type;
+}
+
 static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
struct drm_amdgpu_query_fw *query_fw,
struct amdgpu_device *adev)
@@ -592,45 +630,39 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
}
case AMDGPU_INFO_HW_IP_COUNT: {
enum amd_ip_block_type type;
+   struct amdgpu_ip_block *ip_block = NULL;
uint32_t count = 0;
 
-   switch (info->query_hw_ip.type) {
-   case AMDGPU_HW_IP_GFX:
-   type = AMD_IP_BLOCK_TYPE_GFX;
-   break;
-   case AMDGPU_HW_IP_COMPUTE:
-   type = AMD_IP_BLOCK_TYPE_GFX;
-   break;
-   case AMDGPU_HW_IP_DMA:
-   type = AMD_IP_BLOCK_TYPE_SDMA;
-   break;
-   case AMDGPU_HW_IP_UVD:
-   type = AMD_IP_BLOCK_TYPE_UVD;
+   type = amdgpu_ip_get_block_type(adev, info->query_hw_ip.type);
+   ip_block = amdgpu_device_ip_get_ip_block(adev, type);
+   if (!ip_block || !ip_block->status.valid)
+   return -EINVAL;
+
+   switch (type) {
+   case AMD_IP_BLOCK_TYPE_GFX:
+   case AMD_IP_BLOCK_TYPE_VCE:
+   count = 1;
break;
-   case AMDGPU_HW_IP_VCE:
-   type = AMD_IP_BLOCK_TYPE_VCE;
+   case AMD_IP_BLOCK_TYPE_SDMA:
+   count = adev->sdma.num_instances;
break;
-   case AMDGPU_HW_IP_UVD_ENC:
-   type = AMD_IP_BLOCK_TYPE_UVD;
+   case AMD_IP_BLOCK_TYPE_JPEG:
+   count = adev->jpeg.num_jpeg_inst;
break;
-   case AMDGPU_HW_IP_VCN_DEC:
-   case AMDGPU_HW_IP_VCN_ENC:
-   type = AMD_IP_BLOCK_TYPE_VCN;
+   case AMD_IP_BLOCK_TYPE_VCN:
+   count = adev->vcn.num_vcn_inst;
break;
-   case AMDGPU_HW_IP_VCN_JPEG:
-   type = (amdgpu_device_ip_get_ip_block(adev, 
AMD_IP_BLOCK_TYPE_JPEG)) ?
-   AMD_IP_BLOCK_TYPE_JPEG : AMD_IP_BLOCK_TYPE_VCN;
+   case AMD_IP_BLOCK_TYPE_UVD:
+   count = adev->uvd.num_uvd_inst;
break;
+   /* For all other IP block types not listed in the switch 
statement
+* the ip status is valid here and the instance count is one.
+*/
default:
-   return -EINVAL;
+   count = 1;
+   break;
}
 
-   for (i = 0; i < adev->num_ip_blocks; i++)
-   if (adev->ip_blocks[i].version->type == type &&
-   adev->ip_blocks[i].status.valid &&
-   count < 

Re: [PATCH v2] drm/amdkfd: Use partial migrations in GPU page faults

2023-09-14 Thread Chen, Xiaogang



On 9/13/2023 5:03 PM, Felix Kuehling wrote:

On 2023-09-11 10:04, Xiaogang.Chen wrote:

From: Xiaogang Chen

This patch implements partial migration in gpu page fault according 
to migration
granularity(default 2MB) and not split svm range in cpu page fault 
handling.
A svm range may include pages from both system ram and vram of one 
gpu now.

These chagnes are expected to improve migration performance and reduce
mmu callback and TLB flush workloads.

Signed-off-by: xiaogang chen
---
  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 151 ++-
  drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |   6 +-
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c |  88 ++---
  drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   7 +-
  4 files changed, 171 insertions(+), 81 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c

index 7d82c7da223a..653a2edbaba4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -445,7 +445,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, 
struct svm_range *prange,

  pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",


Should this message also be updated: migrated -> collected?

yes, it was left as original version. cpages is number of collected 
pages from hmm. Will change to: pr_debug("partial migration, 
0x%lx/0x%llx pages collected\n",



   cpages, npages);
  else
-    pr_debug("0x%lx pages migrated\n", cpages);
+    pr_debug("0x%lx pages collected\n", cpages);
    r = svm_migrate_copy_to_vram(node, prange, , , 
scratch, ttm_res_offset);

  migrate_vma_pages();
@@ -479,6 +479,8 @@ svm_migrate_vma_to_vram(struct kfd_node *node, 
struct svm_range *prange,

   * svm_migrate_ram_to_vram - migrate svm range from system to device
   * @prange: range structure
   * @best_loc: the device to migrate to
+ * @start_mgr: start page to migrate
+ * @last_mgr: last page to migrate
   * @mm: the process mm structure
   * @trigger: reason of migration
   *
@@ -489,6 +491,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, 
struct svm_range *prange,

   */
  static int
  svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+    unsigned long start_mgr, unsigned long last_mgr,
  struct mm_struct *mm, uint32_t trigger)
  {
  unsigned long addr, start, end;
@@ -498,23 +501,30 @@ svm_migrate_ram_to_vram(struct svm_range 
*prange, uint32_t best_loc,

  unsigned long cpages = 0;
  long r = 0;
  -    if (prange->actual_loc == best_loc) {
-    pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
- prange->svms, prange->start, prange->last, best_loc);
+    if (!best_loc) {
+    pr_debug("request svms 0x%p [0x%lx 0x%lx] migrate to sys 
ram\n",


This message seems incorrect. This function migrates to VRAM.

I think this message is for input checking: when migrate from ram to 
varm and we see best_loc is sys ram this message indicates that the 
caller try to migration to sys ram, not vram, then do nothing. It does 
not indicate what this function does as pr_debug will add function name 
at beginning.



+ prange->svms, start_mgr, last_mgr);
  return 0;
  }
  +    if (start_mgr < prange->start || last_mgr > prange->last) {
+    pr_debug("migration range [0x%lx 0x%lx] out prange [0x%lx 
0x%lx]\n",

+ start_mgr, last_mgr, prange->start, prange->last);
+    return -EFAULT;
+    }
+
  node = svm_range_get_node_by_id(prange, best_loc);
  if (!node) {
  pr_debug("failed to get kfd node by id 0x%x\n", best_loc);
  return -ENODEV;
  }
  -    pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
- prange->start, prange->last, best_loc);
+    pr_debug("svms 0x%p [0x%lx 0x%lx] in [0x%lx 0x%lx] to gpu 0x%x\n",
+ prange->svms, start_mgr, last_mgr, prange->start, 
prange->last,

+ best_loc);
  -    start = prange->start << PAGE_SHIFT;
-    end = (prange->last + 1) << PAGE_SHIFT;
+    start = start_mgr << PAGE_SHIFT;
+    end = (last_mgr + 1) << PAGE_SHIFT;
    r = svm_range_vram_node_new(node, prange, true);
  if (r) {
@@ -544,8 +554,11 @@ svm_migrate_ram_to_vram(struct svm_range 
*prange, uint32_t best_loc,

    if (cpages) {
  prange->actual_loc = best_loc;
-    svm_range_free_dma_mappings(prange, true);
-    } else {
+    prange->vram_pages = prange->vram_pages + cpages;
+    } else if (!prange->actual_loc) {
+    /* if no page migrated and all pages from prange are at
+ * sys ram drop svm_bo got from svm_range_vram_node_new
+ */
  svm_range_vram_node_free(prange);
  }
  @@ -670,7 +683,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device 
*adev, struct svm_range *prange,

  static long
  svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range 
*prange,
 struct vm_area_struct *vma, 

[PATCH v4] drm/amdgpu: Add EXT_COHERENT memory allocation flags

2023-09-14 Thread David Francis
These flags (for GEM and SVM allocations) allocate
memory that allows for system-scope atomic semantics.

On GFX943 these flags cause caches to be avoided on
non-local memory.

On all other ASICs they are identical in functionality to the
equivalent COHERENT flags.

Corresponding Thunk patch is at
https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/pull/88

v3: changed name of flag
v4: added checks for invalid flag combinations

Reviewed-by: David Yat Sin w
Signed-off-by: David Francis 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c |  7 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c  |  1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c   |  1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c   |  1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c|  5 -
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 10 +-
 include/uapi/drm/amdgpu_drm.h| 10 +-
 include/uapi/linux/kfd_ioctl.h   |  3 +++
 8 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index b5b940485059..ec9e9e95e5f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1738,9 +1738,16 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT)
alloc_flags |= AMDGPU_GEM_CREATE_COHERENT;
+   if (flags & KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT)
+   alloc_flags |= AMDGPU_GEM_CREATE_EXT_COHERENT;
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED)
alloc_flags |= AMDGPU_GEM_CREATE_UNCACHED;
 
+   if (((flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT) && (flags & 
KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT)) ||
+   ((flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED) && (flags & 
KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT)) ||
+   ((flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT) && (flags & 
KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED)))
+   return -EINVAL; 
+
*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
if (!*mem) {
ret = -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 12210598e5b8..76b618735dc0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -331,6 +331,7 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct 
dma_buf *dma_buf)
 
flags |= other->flags & (AMDGPU_GEM_CREATE_CPU_GTT_USWC |
 AMDGPU_GEM_CREATE_COHERENT |
+AMDGPU_GEM_CREATE_EXT_COHERENT |
 AMDGPU_GEM_CREATE_UNCACHED);
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index fa87a85e1017..c45fde9f1887 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -634,6 +634,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
}
 
if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+  AMDGPU_GEM_CREATE_EXT_COHERENT |
   AMDGPU_GEM_CREATE_UNCACHED))
*flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) |
 AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 671e288c7575..dd135d8b25f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -543,6 +543,7 @@ static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev,
}
 
if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+  AMDGPU_GEM_CREATE_EXT_COHERENT |
   AMDGPU_GEM_CREATE_UNCACHED))
*flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) |
 AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 3d13d0bba7b1..e63d9c39fc35 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1185,7 +1185,8 @@ static void gmc_v9_0_get_coherence_flags(struct 
amdgpu_device *adev,
 {
struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
bool is_vram = bo->tbo.resource->mem_type == TTM_PL_VRAM;
-   bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT;
+   bool coherent = bo->flags & (AMDGPU_GEM_CREATE_COHERENT | 
AMDGPU_GEM_CREATE_EXT_COHERENT);
+   bool ext_coherent = bo->flags & AMDGPU_GEM_CREATE_EXT_COHERENT;
bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED;
struct amdgpu_vm *vm = mapping->bo_va->base.vm;
unsigned int mtype_local, mtype;
@@ -1253,6 +1254,8 @@ static void 

Re: [PATCH v2] drm/amdgpu: always use legacy tlb flush on cyan_skilfish

2023-09-14 Thread Felix Kuehling

On 2023-09-14 10:02, Christian König wrote:



Am 14.09.23 um 15:59 schrieb Felix Kuehling:


On 2023-09-14 9:39, Christian König wrote:
Is a single legacy flush sufficient to emulate an heavyweight flush 
as well?


On previous generations we needed to issue at least two legacy 
flushes for this.
I assume you are referring to the Vega20 XGMI workaround. That is a 
very different issue. Because PTEs would be cached in L2, we had to 
always use a heavy-weight flush that would also flush the L2 cache as 
well, and follow that with another legacy flush to deal with race 
conditions where stale PTEs could be re-fetched from L2 before the L2 
flush was complete.


No, we also have another (badly documented) workaround which issues a 
legacy flush before each heavy weight on some hw generations. See the 
my TLB flush cleanup patches.




A heavy-weight flush guarantees that there are no more possible 
memory accesses using the old PTEs. With physically addressed caches 
on GFXv9 that includes a cache flush because the address translation 
happened before putting data into the cache. I think the address 
translation and cache architecture works differently on GFXv10. So 
maybe the cache-flush is not required here.


But even then a legacy flush probably allows for in-flight memory 
accesses with old physical addresses to complete after the TLB flush. 
So there is a small risk of memory corruption that was assumed to not 
be accessed by the GPU any more. Or when using IOMMU device isolation 
it would result in IOMMU faults if the DMA mappings are invalidated 
slightly too early.


Mhm, that's quite bad. Any idea how to avoid that?


A few ideas

 * Add an arbitrary delay and hope that it is longer than the FIFOs in
   the HW
 * Execute an atomic operation to memory on some GPU engine that could
   act as a fence, maybe just a RELEASE_MEM on the CP to some writeback
   location would do the job
 * If needed, RELEASE_MEM could also perform a cache flush

Regards,
  Felix




Regards,
Christian.



Regards,
  Felix




And please don't push before getting an rb from Felix as well.

Regards,
Christian.


Am 14.09.23 um 11:23 schrieb Lang Yu:

cyan_skilfish has problems with other flush types.

v2: fix incorrect ternary conditional operator usage.(Yifan)

Signed-off-by: Lang Yu 
Cc:  # v5.15+
---
  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 7 ++-
  1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c

index d3da13f4c80e..c6d11047169a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -236,7 +236,8 @@ static void gmc_v10_0_flush_vm_hub(struct 
amdgpu_device *adev, uint32_t vmid,

  {
  bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, 
vmhub);

  struct amdgpu_vmhub *hub = >vmhub[vmhub];
-    u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, 
flush_type);

+    u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid,
+  (adev->asic_type != CHIP_CYAN_SKILLFISH) ? 
flush_type : 0);

  u32 tmp;
  /* Use register 17 for GART */
  const unsigned int eng = 17;
@@ -331,6 +332,8 @@ static void gmc_v10_0_flush_gpu_tlb(struct 
amdgpu_device *adev, uint32_t vmid,

    int r;
  +    flush_type = (adev->asic_type != CHIP_CYAN_SKILLFISH) ? 
flush_type : 0;

+
  /* flush hdp cache */
  adev->hdp.funcs->flush_hdp(adev, NULL);
  @@ -426,6 +429,8 @@ static int 
gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,

  struct amdgpu_ring *ring = >gfx.kiq[0].ring;
  struct amdgpu_kiq *kiq = >gfx.kiq[0];
  +    flush_type = (adev->asic_type != CHIP_CYAN_SKILLFISH) ? 
flush_type : 0;

+
  if (amdgpu_emu_mode == 0 && ring->sched.ready) {
  spin_lock(>gfx.kiq[0].ring_lock);
  /* 2 dwords flush + 8 dwords fence */




Re: [PATCH v2] drm/amdgpu: always use legacy tlb flush on cyan_skilfish

2023-09-14 Thread Christian König




Am 14.09.23 um 15:59 schrieb Felix Kuehling:


On 2023-09-14 9:39, Christian König wrote:
Is a single legacy flush sufficient to emulate an heavyweight flush 
as well?


On previous generations we needed to issue at least two legacy 
flushes for this.
I assume you are referring to the Vega20 XGMI workaround. That is a 
very different issue. Because PTEs would be cached in L2, we had to 
always use a heavy-weight flush that would also flush the L2 cache as 
well, and follow that with another legacy flush to deal with race 
conditions where stale PTEs could be re-fetched from L2 before the L2 
flush was complete.


No, we also have another (badly documented) workaround which issues a 
legacy flush before each heavy weight on some hw generations. See the my 
TLB flush cleanup patches.




A heavy-weight flush guarantees that there are no more possible memory 
accesses using the old PTEs. With physically addressed caches on GFXv9 
that includes a cache flush because the address translation happened 
before putting data into the cache. I think the address translation 
and cache architecture works differently on GFXv10. So maybe the 
cache-flush is not required here.


But even then a legacy flush probably allows for in-flight memory 
accesses with old physical addresses to complete after the TLB flush. 
So there is a small risk of memory corruption that was assumed to not 
be accessed by the GPU any more. Or when using IOMMU device isolation 
it would result in IOMMU faults if the DMA mappings are invalidated 
slightly too early.


Mhm, that's quite bad. Any idea how to avoid that?

Regards,
Christian.



Regards,
  Felix




And please don't push before getting an rb from Felix as well.

Regards,
Christian.


Am 14.09.23 um 11:23 schrieb Lang Yu:

cyan_skilfish has problems with other flush types.

v2: fix incorrect ternary conditional operator usage.(Yifan)

Signed-off-by: Lang Yu 
Cc:  # v5.15+
---
  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 7 ++-
  1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c

index d3da13f4c80e..c6d11047169a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -236,7 +236,8 @@ static void gmc_v10_0_flush_vm_hub(struct 
amdgpu_device *adev, uint32_t vmid,

  {
  bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, 
vmhub);

  struct amdgpu_vmhub *hub = >vmhub[vmhub];
-    u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, 
flush_type);

+    u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid,
+  (adev->asic_type != CHIP_CYAN_SKILLFISH) ? flush_type 
: 0);

  u32 tmp;
  /* Use register 17 for GART */
  const unsigned int eng = 17;
@@ -331,6 +332,8 @@ static void gmc_v10_0_flush_gpu_tlb(struct 
amdgpu_device *adev, uint32_t vmid,

    int r;
  +    flush_type = (adev->asic_type != CHIP_CYAN_SKILLFISH) ? 
flush_type : 0;

+
  /* flush hdp cache */
  adev->hdp.funcs->flush_hdp(adev, NULL);
  @@ -426,6 +429,8 @@ static int 
gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,

  struct amdgpu_ring *ring = >gfx.kiq[0].ring;
  struct amdgpu_kiq *kiq = >gfx.kiq[0];
  +    flush_type = (adev->asic_type != CHIP_CYAN_SKILLFISH) ? 
flush_type : 0;

+
  if (amdgpu_emu_mode == 0 && ring->sched.ready) {
  spin_lock(>gfx.kiq[0].ring_lock);
  /* 2 dwords flush + 8 dwords fence */






Re: [PATCH v2] drm/amdgpu: always use legacy tlb flush on cyan_skilfish

2023-09-14 Thread Felix Kuehling



On 2023-09-14 9:39, Christian König wrote:
Is a single legacy flush sufficient to emulate an heavyweight flush as 
well?


On previous generations we needed to issue at least two legacy flushes 
for this.
I assume you are referring to the Vega20 XGMI workaround. That is a very 
different issue. Because PTEs would be cached in L2, we had to always 
use a heavy-weight flush that would also flush the L2 cache as well, and 
follow that with another legacy flush to deal with race conditions where 
stale PTEs could be re-fetched from L2 before the L2 flush was complete.


A heavy-weight flush guarantees that there are no more possible memory 
accesses using the old PTEs. With physically addressed caches on GFXv9 
that includes a cache flush because the address translation happened 
before putting data into the cache. I think the address translation and 
cache architecture works differently on GFXv10. So maybe the cache-flush 
is not required here.


But even then a legacy flush probably allows for in-flight memory 
accesses with old physical addresses to complete after the TLB flush. So 
there is a small risk of memory corruption that was assumed to not be 
accessed by the GPU any more. Or when using IOMMU device isolation it 
would result in IOMMU faults if the DMA mappings are invalidated 
slightly too early.


Regards,
  Felix




And please don't push before getting an rb from Felix as well.

Regards,
Christian.


Am 14.09.23 um 11:23 schrieb Lang Yu:

cyan_skilfish has problems with other flush types.

v2: fix incorrect ternary conditional operator usage.(Yifan)

Signed-off-by: Lang Yu 
Cc:  # v5.15+
---
  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 7 ++-
  1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c

index d3da13f4c80e..c6d11047169a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -236,7 +236,8 @@ static void gmc_v10_0_flush_vm_hub(struct 
amdgpu_device *adev, uint32_t vmid,

  {
  bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, 
vmhub);

  struct amdgpu_vmhub *hub = >vmhub[vmhub];
-    u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, 
flush_type);

+    u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid,
+  (adev->asic_type != CHIP_CYAN_SKILLFISH) ? flush_type 
: 0);

  u32 tmp;
  /* Use register 17 for GART */
  const unsigned int eng = 17;
@@ -331,6 +332,8 @@ static void gmc_v10_0_flush_gpu_tlb(struct 
amdgpu_device *adev, uint32_t vmid,

    int r;
  +    flush_type = (adev->asic_type != CHIP_CYAN_SKILLFISH) ? 
flush_type : 0;

+
  /* flush hdp cache */
  adev->hdp.funcs->flush_hdp(adev, NULL);
  @@ -426,6 +429,8 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct 
amdgpu_device *adev,

  struct amdgpu_ring *ring = >gfx.kiq[0].ring;
  struct amdgpu_kiq *kiq = >gfx.kiq[0];
  +    flush_type = (adev->asic_type != CHIP_CYAN_SKILLFISH) ? 
flush_type : 0;

+
  if (amdgpu_emu_mode == 0 && ring->sched.ready) {
  spin_lock(>gfx.kiq[0].ring_lock);
  /* 2 dwords flush + 8 dwords fence */




Re: [PATCH] drm/amdkfd: fix add queue process context clear without runtime enable

2023-09-14 Thread Eric Huang



On 2023-09-12 21:52, Jonathan Kim wrote:

There are cases where HSA runtime is not enabled through the
AMDKFD_IOC_RUNTIME_ENABLE call when adding queues and the MES ADD_QUEUE
API should clear the MES process context instead of SET_SHADER_DEBUGGER.
Such examples are legacy HSA runtime builds that do not support the
current exception handling and running KFD tests.

The only time ADD_QUEUE.skip_process_ctx_clear is required is for
debugger use cases where a debugged process is always runtime enabled
when adding a queue.

Tested-by: Shikai Guo 
Signed-off-by: Jonathan Kim 

Reviewed-by: Eric Huang 


---
  drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 6 --
  1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 6d07a5dd2648..77159b03a422 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -227,8 +227,10 @@ static int add_queue_mes(struct device_queue_manager *dqm, 
struct queue *q,
queue_input.tba_addr = qpd->tba_addr;
queue_input.tma_addr = qpd->tma_addr;
queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device);
-   queue_input.skip_process_ctx_clear = 
qpd->pqm->process->debug_trap_enabled ||
-
kfd_dbg_has_ttmps_always_setup(q->device);
+   queue_input.skip_process_ctx_clear =
+   qpd->pqm->process->runtime_info.runtime_state == 
DEBUG_RUNTIME_STATE_ENABLED &&
+   
(qpd->pqm->process->debug_trap_enabled ||
+
kfd_dbg_has_ttmps_always_setup(q->device));
  
  	queue_type = convert_to_mes_queue_type(q->properties.type);

if (queue_type < 0) {




Re: 回复: [PATCH] drm/amdgpu: Ignore first evction failure during suspend

2023-09-14 Thread Christian König

Am 14.09.23 um 15:37 schrieb Felix Kuehling:


Userptr and SVM restore work is scheduled to the system WQ with 
schedule_delayed_work. See amdgpu_amdkfd_evict_userptr and 
svm_range_evict. This would need to use queue_delayed_work with the 
system_freezable_wq.


BO restoration is scheduled with queue_delayed_work on our own 
kfd_restore_wq that was allocated with alloc_ordered_workqueue. This 
would need to add the WQ_FREEZABLE flag when we create the wq in 
kfd_process_create_wq.


There is also evict_process_worker scheduled with 
schedule_delayed_work, which handles stopping of user mode queues, 
signaling of eviction fences and scheduling of restore work when BOs 
are evicted. I think that should not be freezable because it's needed 
to signal the eviction fences to allow suspend to evict BOs.


To make sure I'm not misunderstanding, I assume that freezing a 
freezable workqueue flushes work items in progress and prevents 
execution of more work until it is unfrozen. I assume work items are 
not frozen in the middle of execution, because that would not solve 
the problem.




I was wondering the exact same thing and to be honest I don't know that 
detail either and of hand can't find any documentation about it.


My suspicion is that a work item can freeze when it calls schedule(), 
e.g. when taking a look or similar.


That would then indeed not work at all and we would need to make sure 
that the work is completed manually somehow.


Regards,
Christian.


Regards,
  Felix


On 2023-09-14 2:23, Christian König wrote:

[putting Harry on BCC, sorry for the noise]

Yeah, that is clearly a bug in the KFD.

During the second eviction the hw should already be disabled, so we 
don't have any SDMA or similar to evict BOs any more and can only 
copy them with the CPU.


@Felix what workqueue do you guys use for the restore work? I've just 
double checked and on the system workqueues you explicitly need to 
specify that stuff is freezable. E.g. use system_freezable_wq instead 
of system_wq.


Alternatively as Xinhui mentioned it might be necessary to flush all 
restore work before the first eviction phase or we have the chance 
that BOs are moved back into VRAM again.


Regards,
Christian.

Am 14.09.23 um 03:54 schrieb Pan, Xinhui:


[AMD Official Use Only - General]


I just make one debug patch to show busy BO’s alloc-trace when the 
eviction fails in suspend.


And dmesg log attached.

Looks like they are just kfd user Bos and locked by evict/restore work.

So in kfd suspend callback, it really need to flush the 
evict/restore work before HW fini as it do now.


That is why the first very early eviction fails and the second 
eviction succeed.


Thanks

xinhui

*From:* Pan, Xinhui
*Sent:* Thursday, September 14, 2023 8:02 AM
*To:* Koenig, Christian ; Kuehling, Felix 
; Christian König 
; amd-gfx@lists.freedesktop.org; 
Wentland, Harry 
*Cc:* Deucher, Alexander ; Fan, Shikang 

*Subject:* RE: 回复: [PATCH] drm/amdgpu: Ignore first evction failure 
during suspend


Chris,

I can dump these busy BOs with their alloc/free stack later today.

BTW, the two evictions and the kfd suspend are all called before 
hw_fini. IOW, between phase 1 and phase 2. SDMA is turned only in 
phase2. So current code works fine maybe.


*From:* Koenig, Christian 
*Sent:* Wednesday, September 13, 2023 10:29 PM
*To:* Kuehling, Felix ; Christian König 
; Pan, Xinhui 
; amd-gfx@lists.freedesktop.org; Wentland, Harry 

*Cc:* Deucher, Alexander ; Fan, Shikang 

*Subject:* Re: 回复: [PATCH] drm/amdgpu: Ignore first evction failure 
during suspend


[+Harry]

Am 13.09.23 um 15:54 schrieb Felix Kuehling:

On 2023-09-13 4:07, Christian König wrote:

[+Fleix]

Well that looks like quite a serious bug.

If I'm not completely mistaken the KFD work item tries to
restore the process by moving BOs into memory even after the
suspend freeze. Normally work items are frozen together with
the user space processes unless explicitly marked as not
freezable.

That this causes problem during the first eviction phase is
just the tip of the iceberg here. If a BO is moved into
invisible memory during this we wouldn't be able to get it
out of that in the second phase because SDMA and hw is
already turned off.

@Felix any idea how that can happen? Have you guys marked a
work item / work queue as not freezable?

We don't set anything to non-freezable in KFD.

Regards,
  Felix

Or maybe the display guys?


Do you guys in the display do any delayed update in a work item 
which is marked as not-freezable?


Otherwise I have absolutely no idea what's going on here.

Thanks,
Christian.


@Xinhui please investigate what work item that is and where
that is coming from. Something like "if (adev->in_suspend)
dump_stack();" in the right place should probably do it.

Thanks,
Christian.

Am 

RE: [PATCH] drm/amd/pm: Remove SMUv13.0.6 unsupported feature

2023-09-14 Thread Wang, Yang(Kevin)
[AMD Official Use Only - General]

Reviewed-by: Yang Wang 

Best Regards,
Kevin

-Original Message-
From: Lazar, Lijo 
Sent: Thursday, September 14, 2023 9:06 PM
To: amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking ; Deucher, Alexander 
; Kamal, Asad ; Wang, 
Yang(Kevin) 
Subject: [PATCH] drm/amd/pm: Remove SMUv13.0.6 unsupported feature

Selectively updating feature mask is not supported in SMU v13.0.6.
Remove the callback corresponding to that.

Signed-off-by: Lijo Lazar 
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 94b0a7226ba4..4ba6790cac5a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -,7 +,6 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = 
{
.allow_xgmi_power_down = smu_v13_0_6_allow_xgmi_power_down,
.log_thermal_throttling_event = 
smu_v13_0_6_log_thermal_throttling_event,
.get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
-   .set_pp_feature_mask = smu_cmn_set_pp_feature_mask,
.get_gpu_metrics = smu_v13_0_6_get_gpu_metrics,
.get_thermal_temperature_range = 
smu_v13_0_6_get_thermal_temperature_range,
.mode1_reset_is_support = smu_v13_0_6_is_mode1_reset_supported,
--
2.25.1



Re: [PATCH v2] drm/amdgpu: always use legacy tlb flush on cyan_skilfish

2023-09-14 Thread Christian König

Is a single legacy flush sufficient to emulate an heavyweight flush as well?

On previous generations we needed to issue at least two legacy flushes 
for this.


And please don't push before getting an rb from Felix as well.

Regards,
Christian.


Am 14.09.23 um 11:23 schrieb Lang Yu:

cyan_skilfish has problems with other flush types.

v2: fix incorrect ternary conditional operator usage.(Yifan)

Signed-off-by: Lang Yu 
Cc:  # v5.15+
---
  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 7 ++-
  1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index d3da13f4c80e..c6d11047169a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -236,7 +236,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device 
*adev, uint32_t vmid,
  {
bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub);
struct amdgpu_vmhub *hub = >vmhub[vmhub];
-   u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
+   u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid,
+ (adev->asic_type != CHIP_CYAN_SKILLFISH) ? flush_type : 
0);
u32 tmp;
/* Use register 17 for GART */
const unsigned int eng = 17;
@@ -331,6 +332,8 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
  
  	int r;
  
+	flush_type = (adev->asic_type != CHIP_CYAN_SKILLFISH) ? flush_type : 0;

+
/* flush hdp cache */
adev->hdp.funcs->flush_hdp(adev, NULL);
  
@@ -426,6 +429,8 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,

struct amdgpu_ring *ring = >gfx.kiq[0].ring;
struct amdgpu_kiq *kiq = >gfx.kiq[0];
  
+	flush_type = (adev->asic_type != CHIP_CYAN_SKILLFISH) ? flush_type : 0;

+
if (amdgpu_emu_mode == 0 && ring->sched.ready) {
spin_lock(>gfx.kiq[0].ring_lock);
/* 2 dwords flush + 8 dwords fence */




Re: 回复: [PATCH] drm/amdgpu: Ignore first evction failure during suspend

2023-09-14 Thread Felix Kuehling
Userptr and SVM restore work is scheduled to the system WQ with 
schedule_delayed_work. See amdgpu_amdkfd_evict_userptr and 
svm_range_evict. This would need to use queue_delayed_work with the 
system_freezable_wq.


BO restoration is scheduled with queue_delayed_work on our own 
kfd_restore_wq that was allocated with alloc_ordered_workqueue. This 
would need to add the WQ_FREEZABLE flag when we create the wq in 
kfd_process_create_wq.


There is also evict_process_worker scheduled with schedule_delayed_work, 
which handles stopping of user mode queues, signaling of eviction fences 
and scheduling of restore work when BOs are evicted. I think that should 
not be freezable because it's needed to signal the eviction fences to 
allow suspend to evict BOs.


To make sure I'm not misunderstanding, I assume that freezing a 
freezable workqueue flushes work items in progress and prevents 
execution of more work until it is unfrozen. I assume work items are not 
frozen in the middle of execution, because that would not solve the problem.


Regards,
  Felix


On 2023-09-14 2:23, Christian König wrote:

[putting Harry on BCC, sorry for the noise]

Yeah, that is clearly a bug in the KFD.

During the second eviction the hw should already be disabled, so we 
don't have any SDMA or similar to evict BOs any more and can only copy 
them with the CPU.


@Felix what workqueue do you guys use for the restore work? I've just 
double checked and on the system workqueues you explicitly need to 
specify that stuff is freezable. E.g. use system_freezable_wq instead 
of system_wq.


Alternatively as Xinhui mentioned it might be necessary to flush all 
restore work before the first eviction phase or we have the chance 
that BOs are moved back into VRAM again.


Regards,
Christian.

Am 14.09.23 um 03:54 schrieb Pan, Xinhui:


[AMD Official Use Only - General]


I just make one debug patch to show busy BO’s alloc-trace when the 
eviction fails in suspend.


And dmesg log attached.

Looks like they are just kfd user Bos and locked by evict/restore work.

So in kfd suspend callback, it really need to flush the evict/restore 
work before HW fini as it do now.


That is why the first very early eviction fails and the second 
eviction succeed.


Thanks

xinhui

*From:* Pan, Xinhui
*Sent:* Thursday, September 14, 2023 8:02 AM
*To:* Koenig, Christian ; Kuehling, Felix 
; Christian König 
; amd-gfx@lists.freedesktop.org; 
Wentland, Harry 
*Cc:* Deucher, Alexander ; Fan, Shikang 

*Subject:* RE: 回复: [PATCH] drm/amdgpu: Ignore first evction failure 
during suspend


Chris,

I can dump these busy BOs with their alloc/free stack later today.

BTW, the two evictions and the kfd suspend are all called before 
hw_fini. IOW, between phase 1 and phase 2. SDMA is turned only in 
phase2. So current code works fine maybe.


*From:* Koenig, Christian 
*Sent:* Wednesday, September 13, 2023 10:29 PM
*To:* Kuehling, Felix ; Christian König 
; Pan, Xinhui ; 
amd-gfx@lists.freedesktop.org; Wentland, Harry 
*Cc:* Deucher, Alexander ; Fan, Shikang 

*Subject:* Re: 回复: [PATCH] drm/amdgpu: Ignore first evction failure 
during suspend


[+Harry]

Am 13.09.23 um 15:54 schrieb Felix Kuehling:

On 2023-09-13 4:07, Christian König wrote:

[+Fleix]

Well that looks like quite a serious bug.

If I'm not completely mistaken the KFD work item tries to
restore the process by moving BOs into memory even after the
suspend freeze. Normally work items are frozen together with
the user space processes unless explicitly marked as not
freezable.

That this causes problem during the first eviction phase is
just the tip of the iceberg here. If a BO is moved into
invisible memory during this we wouldn't be able to get it
out of that in the second phase because SDMA and hw is
already turned off.

@Felix any idea how that can happen? Have you guys marked a
work item / work queue as not freezable?

We don't set anything to non-freezable in KFD.

Regards,
  Felix

Or maybe the display guys?


Do you guys in the display do any delayed update in a work item which 
is marked as not-freezable?


Otherwise I have absolutely no idea what's going on here.

Thanks,
Christian.


@Xinhui please investigate what work item that is and where
that is coming from. Something like "if (adev->in_suspend)
dump_stack();" in the right place should probably do it.

Thanks,
Christian.

Am 13.09.23 um 07:13 schrieb Pan, Xinhui:

[AMD Official Use Only - General]

I notice that only user space process are frozen on my
side.  kthread and workqueue  keeps running. Maybe some
kernel configs are not enabled.

I made one module which just prints something like i++
with mutex lock both in workqueue and kthread. I paste
some logs below.


Re: [PATCH] drm/amd/pm: Remove SMUv13.0.6 unsupported feature

2023-09-14 Thread Alex Deucher
On Thu, Sep 14, 2023 at 9:31 AM Lijo Lazar  wrote:
>
> Selectively updating feature mask is not supported in SMU v13.0.6.
> Remove the callback corresponding to that.
>
> Signed-off-by: Lijo Lazar 

Acked-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 1 -
>  1 file changed, 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
> b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> index 94b0a7226ba4..4ba6790cac5a 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> @@ -,7 +,6 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs 
> = {
> .allow_xgmi_power_down = smu_v13_0_6_allow_xgmi_power_down,
> .log_thermal_throttling_event = 
> smu_v13_0_6_log_thermal_throttling_event,
> .get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
> -   .set_pp_feature_mask = smu_cmn_set_pp_feature_mask,
> .get_gpu_metrics = smu_v13_0_6_get_gpu_metrics,
> .get_thermal_temperature_range = 
> smu_v13_0_6_get_thermal_temperature_range,
> .mode1_reset_is_support = smu_v13_0_6_is_mode1_reset_supported,
> --
> 2.25.1
>


[PATCH] fix a memory leak in amdgpu_ras_feature_enable

2023-09-14 Thread Cong Liu
This patch fixes a memory leak in the amdgpu_ras_feature_enable() function.
The leak occurs when the function sends a command to the firmware to enable
or disable a RAS feature for a GFX block. If the command fails, the kfree()
function is not called to free the info memory.

Fixes: bf7aa8bea9cb ("drm/amdgpu: Free ras cmd input buffer properly")
Signed-off-by: Cong Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 8eb6f6943778..b4a8ea946410 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -802,6 +802,7 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
enable ? "enable":"disable",
get_ras_block_str(head),
amdgpu_ras_is_poison_mode_supported(adev), ret);
+   kfree(info);
return ret;
}
 
-- 
2.34.1



Fwd: Kernel 6.6-rc1 fails to reboot or shutdown Ryzen 5825U

2023-09-14 Thread Bagas Sanjaya
Hi,

I notice a regression report on Bugzilla [1]. Quoting from it:

> The Kernel stalls at boot very long with a drm-amdgpu message, but fails to 
> restart or shutdown with secure boot enabled or not. Magic key works to exit. 
> Nothing wrong in the Kernel 6.5 cycle.

Later, the reporter (Cc'ed) described the regression:

> Let me be clearer, it does not shutdown at all: magic key for shut down has 
> no effect (o or b). The keyboard is dead. Plus, $ shutdown -r now hangs too. 
> Restart works when using Alt+PrtSc+b. Same when booting stalls for long.
> 
> We started bisecting with 20230903 daily kernel, the bug was there. 6.6-rc1 
> has been removed. Take good note that next boot log after shutdown may or may 
> not be the same log. Plus, booting requires now and then magic key to 
> restart, because the Kernel hangs.  In this case, we must click enter twice + 
> Esc to boot in desktop. 
> 
> It booted ok after a cold shutdown with enter twice and ESC ounce + 
> backspace. 
> ...
> In all cases, tpm and secure boot are enabled. If secure boot is disabled, 
> when we shut down, magic key works to restart.

He then pasted journalctl excerpt at the point where the hang occured:

> This where it stalls for restart. Shut down hangs at the Lenovo image:
> 
> Sep 13 21:43:08 mm kernel: amdgpu :04:00.0: amdgpu: SE 1, SH per SE 1, CU 
> per SH 8, active_cu_number 8
> Sep 13 21:43:08 mm kernel: amdgpu :04:00.0: amdgpu: ring gfx uses VM inv 
> eng 0 on hub 0
> Sep 13 21:43:08 mm kernel: amdgpu :04:00.0: amdgpu: ring gfx_low uses VM 
> inv eng 1 on hub 0
> Sep 13 21:43:08 mm kernel: amdgpu :04:00.0: amdgpu: ring gfx_high uses VM 
> inv eng 4 on hub 0
> Sep 13 21:43:08 mm kernel: amdgpu :04:00.0: amdgpu: ring comp_1.0.0 uses 
> VM inv eng 5 on hub 0
> Sep 13 21:43:08 mm kernel: amdgpu :04:00.0: amdgpu: ring comp_1.1.0 uses 
> VM inv eng 6 on hub 0
> Sep 13 21:43:08 mm kernel: amdgpu :04:00.0: amdgpu: ring comp_1.2.0 uses 
> VM inv eng 7 on hub 0
> Sep 13 21:43:08 mm kernel: amdgpu :04:00.0: amdgpu: ring comp_1.3.0 uses 
> VM inv eng 8 on hub 0
> Sep 13 21:43:08 mm kernel: amdgpu :04:00.0: amdgpu: ring comp_1.0.1 uses 
> VM inv eng 9 on hub 0
> Sep 13 21:43:08 mm kernel: amdgpu :04:00.0: amdgpu: ring comp_1.1.1 uses 
> VM inv eng 10 on hub 0
> Sep 13 21:43:08 mm kernel: amdgpu :04:00.0: amdgpu: ring comp_1.2.1 uses 
> VM inv eng 11 on hub 0
> Sep 13 21:43:08 mm kernel: amdgpu :04:00.0: amdgpu: ring comp_1.3.1 uses 
> VM inv eng 12 on hub 0
> Sep 13 21:43:08 mm kernel: amdgpu :04:00.0: amdgpu: ring kiq_0.2.1.0 uses 
> VM inv eng 13 on hub 0
> Sep 13 21:43:08 mm kernel: amdgpu :04:00.0: amdgpu: ring sdma0 uses VM 
> inv eng 0 on hub 8
> Sep 13 21:43:08 mm kernel: amdgpu :04:00.0: amdgpu: ring vcn_dec uses VM 
> inv eng 1 on hub 8
> Sep 13 21:43:08 mm kernel: amdgpu :04:00.0: amdgpu: ring vcn_enc0 uses VM 
> inv eng 4 on hub 8
> Sep 13 21:43:08 mm kernel: amdgpu :04:00.0: amdgpu: ring vcn_enc1 uses VM 
> inv eng 5 on hub 8
> Sep 13 21:43:08 mm kernel: amdgpu :04:00.0: amdgpu: ring jpeg_dec uses VM 
> inv eng 6 on hub 8
> Sep 13 21:43:08 mm kernel: [drm] Initialized amdgpu 3.54.0 20150101 for 
> :04:00.0 on minor 0
> Sep 13 21:43:08 mm kernel: fbcon: amdgpudrmfb (fb0) is primary device
> Sep 13 21:43:08 mm kernel: [drm] DSC precompute is not needed.

See Bugzilla for the full thread and links to complete journalctl log.

Anyway, I'm adding this regression to regzbot:

#regzbot introduced: v6.5..v6.6 
https://bugzilla.kernel.org/show_bug.cgi?id=217905
#regzbot title: shutdown/reboot hang on Ryzen 5825U (stuck on amdgpu 
initialization)

Thanks.

[1]: https://bugzilla.kernel.org/show_bug.cgi?id=217905

-- 
An old man doll... just what I always wanted! - Clara


[PATCH] alpha: clean up some inconsistent indenting

2023-09-14 Thread Jiapeng Chong
No functional modification involved.

drivers/gpu/drm/amd/amdgpu/../display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c:491 
dcn32_auto_dpm_test_log() warn: inconsistent indenting.

Reported-by: Abaci Robot 
Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=6574
Signed-off-by: Jiapeng Chong 
---
 .../display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c  | 72 +--
 1 file changed, 34 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c 
b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
index 37ffa0050e60..e34b1d6dd964 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
@@ -460,25 +460,25 @@ static int dcn32_get_dispclk_from_dentist(struct clk_mgr 
*clk_mgr_base)
 
 static void dcn32_auto_dpm_test_log(struct dc_clocks *new_clocks, struct 
clk_mgr_internal *clk_mgr)
 {
-unsigned int dispclk_khz_reg= REG_READ(CLK1_CLK0_CURRENT_CNT); // 
DISPCLK
-unsigned int dppclk_khz_reg = REG_READ(CLK1_CLK1_CURRENT_CNT); // 
DPPCLK
-unsigned int dprefclk_khz_reg   = REG_READ(CLK1_CLK2_CURRENT_CNT); // 
DPREFCLK
-unsigned int dcfclk_khz_reg = REG_READ(CLK1_CLK3_CURRENT_CNT); // 
DCFCLK
-unsigned int dtbclk_khz_reg = REG_READ(CLK1_CLK4_CURRENT_CNT); // 
DTBCLK
-unsigned int fclk_khz_reg   = REG_READ(CLK4_CLK0_CURRENT_CNT); // FCLK
-
-// Overrides for these clocks in case there is no p_state change support
-int dramclk_khz_override = new_clocks->dramclk_khz;
-int fclk_khz_override = new_clocks->fclk_khz;
-
-int num_fclk_levels = 
clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_fclk_levels - 1;
-
-if (!new_clocks->p_state_change_support) {
-   dramclk_khz_override = clk_mgr->base.bw_params->max_memclk_mhz * 
1000;
-}
-if (!new_clocks->fclk_p_state_change_support) {
-   fclk_khz_override = 
clk_mgr->base.bw_params->clk_table.entries[num_fclk_levels].fclk_mhz * 1000;
-}
+   unsigned int dispclk_khz_reg= REG_READ(CLK1_CLK0_CURRENT_CNT); // 
DISPCLK
+   unsigned int dppclk_khz_reg = REG_READ(CLK1_CLK1_CURRENT_CNT); // 
DPPCLK
+   unsigned int dprefclk_khz_reg   = REG_READ(CLK1_CLK2_CURRENT_CNT); // 
DPREFCLK
+   unsigned int dcfclk_khz_reg = REG_READ(CLK1_CLK3_CURRENT_CNT); // 
DCFCLK
+   unsigned int dtbclk_khz_reg = REG_READ(CLK1_CLK4_CURRENT_CNT); // 
DTBCLK
+   unsigned int fclk_khz_reg   = REG_READ(CLK4_CLK0_CURRENT_CNT); // 
FCLK
+
+   // Overrides for these clocks in case there is no p_state change support
+   int dramclk_khz_override = new_clocks->dramclk_khz;
+   int fclk_khz_override = new_clocks->fclk_khz;
+
+   int num_fclk_levels = 
clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_fclk_levels - 1;
+
+   if (!new_clocks->p_state_change_support) {
+   dramclk_khz_override = clk_mgr->base.bw_params->max_memclk_mhz 
* 1000;
+   }
+   if (!new_clocks->fclk_p_state_change_support) {
+   fclk_khz_override = 
clk_mgr->base.bw_params->clk_table.entries[num_fclk_levels].fclk_mhz * 1000;
+   }
 


//  IMPORTANT:  When adding more clocks to these logs, do NOT 
put a newline
@@ -488,26 +488,22 @@ static void dcn32_auto_dpm_test_log(struct dc_clocks 
*new_clocks, struct clk_mgr
//
//  AutoDPMTest: clk1:%d - clk2:%d - 
clk3:%d - clk4:%d\n"


-   if (new_clocks &&
-   new_clocks->dramclk_khz > 0 &&
-   new_clocks->fclk_khz > 0 &&
-   new_clocks->dcfclk_khz > 0 &&
-   new_clocks->dppclk_khz > 0) {
-
+   if (new_clocks && new_clocks->dramclk_khz > 0 && new_clocks->fclk_khz > 
0 &&
+   new_clocks->dcfclk_khz > 0 && new_clocks->dppclk_khz > 0) {
DC_LOG_AUTO_DPM_TEST("AutoDPMTest: dramclk:%d - fclk:%d - "
-   "dcfclk:%d - dppclk:%d - dispclk_hw:%d - "
-   "dppclk_hw:%d - dprefclk_hw:%d - dcfclk_hw:%d - "
-   "dtbclk_hw:%d - fclk_hw:%d\n",
-   dramclk_khz_override,
-   fclk_khz_override,
-   new_clocks->dcfclk_khz,
-   new_clocks->dppclk_khz,
-   dispclk_khz_reg,
-   dppclk_khz_reg,
-   dprefclk_khz_reg,
-   dcfclk_khz_reg,
-   dtbclk_khz_reg,
-   fclk_khz_reg);
+"dcfclk:%d - dppclk:%d - dispclk_hw:%d - "
+"dppclk_hw:%d - dprefclk_hw:%d - 
dcfclk_hw:%d - "
+"dtbclk_hw:%d - fclk_hw:%d\n",
+   

[PATCH] drm/amd/pm: Remove SMUv13.0.6 unsupported feature

2023-09-14 Thread Lijo Lazar
Selectively updating feature mask is not supported in SMU v13.0.6.
Remove the callback corresponding to that.

Signed-off-by: Lijo Lazar 
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 94b0a7226ba4..4ba6790cac5a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -,7 +,6 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = 
{
.allow_xgmi_power_down = smu_v13_0_6_allow_xgmi_power_down,
.log_thermal_throttling_event = 
smu_v13_0_6_log_thermal_throttling_event,
.get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
-   .set_pp_feature_mask = smu_cmn_set_pp_feature_mask,
.get_gpu_metrics = smu_v13_0_6_get_gpu_metrics,
.get_thermal_temperature_range = 
smu_v13_0_6_get_thermal_temperature_range,
.mode1_reset_is_support = smu_v13_0_6_is_mode1_reset_supported,
-- 
2.25.1



RE: [PATCH] drm/amdgpu: Fix vbios version string search

2023-09-14 Thread Zhang, Hawking
[AMD Official Use Only - General]

Reviewed-by: Hawking Zhang 

Regards,
Hawking
-Original Message-
From: Lazar, Lijo 
Sent: Thursday, September 14, 2023 17:24
To: amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking ; Deucher, Alexander 

Subject: [PATCH] drm/amdgpu: Fix vbios version string search

Search for vbios version string in STRING_OFFSET-ATOM_ROM_HEADER region first. 
If those offsets are not populated, use the hardcoded region.

Signed-off-by: Lijo Lazar 
---
 drivers/gpu/drm/amd/amdgpu/atom.c | 19 ++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c 
b/drivers/gpu/drm/amd/amdgpu/atom.c
index 9f63ddb89b75..2c221000782c 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.c
+++ b/drivers/gpu/drm/amd/amdgpu/atom.c
@@ -1444,10 +1444,27 @@ static void atom_get_vbios_pn(struct atom_context *ctx)

 static void atom_get_vbios_version(struct atom_context *ctx)  {
+   unsigned short start = 3, end;
unsigned char *vbios_ver;
+   unsigned char *p_rom;
+
+   p_rom = ctx->bios;
+   /* Search from strings offset if it's present */
+   start = *(unsigned short *)(p_rom +
+   OFFSET_TO_GET_ATOMBIOS_STRING_START);
+
+   /* Search till atom rom header start point */
+   end = *(unsigned short *)(p_rom + OFFSET_TO_ATOM_ROM_HEADER_POINTER);
+
+   /* Use hardcoded offsets, if the offsets are not populated */
+   if (end <= start) {
+   start = 3;
+   end = 1024;
+   }

/* find anchor ATOMBIOSBK-AMD */
-   vbios_ver = atom_find_str_in_rom(ctx, BIOS_VERSION_PREFIX, 3, 1024, 64);
+   vbios_ver =
+   atom_find_str_in_rom(ctx, BIOS_VERSION_PREFIX, start, end, 64);
if (vbios_ver != NULL) {
/* skip ATOMBIOSBK-AMD VER */
vbios_ver += 18;
--
2.25.1



RE: [PATCH] fix a memory leak in amdgpu_ras_feature_enable

2023-09-14 Thread Zhang, Hawking
[AMD Official Use Only - General]

Reviewed-by: Hawking Zhang 

Regards,
Hawking
-Original Message-
From: Cong Liu 
Sent: Thursday, September 14, 2023 17:46
To: Deucher, Alexander ; Koenig, Christian 
; Pan, Xinhui ; David Airlie 
; Daniel Vetter ; Yang, Stanley 
; Zhang, Hawking 
Cc: Cong Liu ; amd-gfx@lists.freedesktop.org; 
dri-de...@lists.freedesktop.org; linux-ker...@vger.kernel.org
Subject: [PATCH] fix a memory leak in amdgpu_ras_feature_enable

This patch fixes a memory leak in the amdgpu_ras_feature_enable() function.
The leak occurs when the function sends a command to the firmware to enable or 
disable a RAS feature for a GFX block. If the command fails, the kfree() 
function is not called to free the info memory.

Fixes: bf7aa8bea9cb ("drm/amdgpu: Free ras cmd input buffer properly")
Signed-off-by: Cong Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 8eb6f6943778..b4a8ea946410 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -802,6 +802,7 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
enable ? "enable":"disable",
get_ras_block_str(head),
amdgpu_ras_is_poison_mode_supported(adev), ret);
+   kfree(info);
return ret;
}

--
2.34.1



RE: [PATCH v2] drm/amdgpu: always use legacy tlb flush on cyan_skilfish

2023-09-14 Thread Zhang, Yifan
[AMD Official Use Only - General]

This patch is :

Reviewed-by: Yifan Zhang 

-Original Message-
From: Yu, Lang 
Sent: Thursday, September 14, 2023 5:24 PM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Koenig, Christian 
; Zhang, Yifan ; Yu, Lang 
; sta...@vger.kernel.org
Subject: [PATCH v2] drm/amdgpu: always use legacy tlb flush on cyan_skilfish

cyan_skilfish has problems with other flush types.

v2: fix incorrect ternary conditional operator usage.(Yifan)

Signed-off-by: Lang Yu 
Cc:  # v5.15+
---
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index d3da13f4c80e..c6d11047169a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -236,7 +236,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device 
*adev, uint32_t vmid,  {
bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub);
struct amdgpu_vmhub *hub = >vmhub[vmhub];
-   u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
+   u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid,
+ (adev->asic_type != CHIP_CYAN_SKILLFISH) ? flush_type : 
0);
u32 tmp;
/* Use register 17 for GART */
const unsigned int eng = 17;
@@ -331,6 +332,8 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,

int r;

+   flush_type = (adev->asic_type != CHIP_CYAN_SKILLFISH) ? flush_type :
+0;
+
/* flush hdp cache */
adev->hdp.funcs->flush_hdp(adev, NULL);

@@ -426,6 +429,8 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct 
amdgpu_device *adev,
struct amdgpu_ring *ring = >gfx.kiq[0].ring;
struct amdgpu_kiq *kiq = >gfx.kiq[0];

+   flush_type = (adev->asic_type != CHIP_CYAN_SKILLFISH) ? flush_type :
+0;
+
if (amdgpu_emu_mode == 0 && ring->sched.ready) {
spin_lock(>gfx.kiq[0].ring_lock);
/* 2 dwords flush + 8 dwords fence */
--
2.25.1



[PATCH v2] drm/amdgpu: always use legacy tlb flush on cyan_skilfish

2023-09-14 Thread Lang Yu
cyan_skilfish has problems with other flush types.

v2: fix incorrect ternary conditional operator usage.(Yifan)

Signed-off-by: Lang Yu 
Cc:  # v5.15+
---
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index d3da13f4c80e..c6d11047169a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -236,7 +236,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device 
*adev, uint32_t vmid,
 {
bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub);
struct amdgpu_vmhub *hub = >vmhub[vmhub];
-   u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
+   u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid,
+ (adev->asic_type != CHIP_CYAN_SKILLFISH) ? flush_type : 
0);
u32 tmp;
/* Use register 17 for GART */
const unsigned int eng = 17;
@@ -331,6 +332,8 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 
int r;
 
+   flush_type = (adev->asic_type != CHIP_CYAN_SKILLFISH) ? flush_type : 0;
+
/* flush hdp cache */
adev->hdp.funcs->flush_hdp(adev, NULL);
 
@@ -426,6 +429,8 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct 
amdgpu_device *adev,
struct amdgpu_ring *ring = >gfx.kiq[0].ring;
struct amdgpu_kiq *kiq = >gfx.kiq[0];
 
+   flush_type = (adev->asic_type != CHIP_CYAN_SKILLFISH) ? flush_type : 0;
+
if (amdgpu_emu_mode == 0 && ring->sched.ready) {
spin_lock(>gfx.kiq[0].ring_lock);
/* 2 dwords flush + 8 dwords fence */
-- 
2.25.1



[PATCH] drm/amdgpu: Fix vbios version string search

2023-09-14 Thread Lijo Lazar
Search for vbios version string in STRING_OFFSET-ATOM_ROM_HEADER region
first. If those offsets are not populated, use the hardcoded region.

Signed-off-by: Lijo Lazar 
---
 drivers/gpu/drm/amd/amdgpu/atom.c | 19 ++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c 
b/drivers/gpu/drm/amd/amdgpu/atom.c
index 9f63ddb89b75..2c221000782c 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.c
+++ b/drivers/gpu/drm/amd/amdgpu/atom.c
@@ -1444,10 +1444,27 @@ static void atom_get_vbios_pn(struct atom_context *ctx)
 
 static void atom_get_vbios_version(struct atom_context *ctx)
 {
+   unsigned short start = 3, end;
unsigned char *vbios_ver;
+   unsigned char *p_rom;
+
+   p_rom = ctx->bios;
+   /* Search from strings offset if it's present */
+   start = *(unsigned short *)(p_rom +
+   OFFSET_TO_GET_ATOMBIOS_STRING_START);
+
+   /* Search till atom rom header start point */
+   end = *(unsigned short *)(p_rom + OFFSET_TO_ATOM_ROM_HEADER_POINTER);
+
+   /* Use hardcoded offsets, if the offsets are not populated */
+   if (end <= start) {
+   start = 3;
+   end = 1024;
+   }
 
/* find anchor ATOMBIOSBK-AMD */
-   vbios_ver = atom_find_str_in_rom(ctx, BIOS_VERSION_PREFIX, 3, 1024, 64);
+   vbios_ver =
+   atom_find_str_in_rom(ctx, BIOS_VERSION_PREFIX, start, end, 64);
if (vbios_ver != NULL) {
/* skip ATOMBIOSBK-AMD VER */
vbios_ver += 18;
-- 
2.25.1



Re: [PATCH] drm/amdgpu: always use legacy tlb flush on cyan_skilfish

2023-09-14 Thread Lang Yu
On 09/14/ , Zhang, Yifan wrote:
> [Public]
> 
> -Original Message-
> From: amd-gfx  On Behalf Of Lang Yu
> Sent: Thursday, September 14, 2023 3:00 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander ; Yu, Lang 
> ; Koenig, Christian ; 
> sta...@vger.kernel.org
> Subject: [PATCH] drm/amdgpu: always use legacy tlb flush on cyan_skilfish
> 
> cyan_skilfish has problems with other flush types.
> 
> Signed-off-by: Lang Yu 
> Cc:  # v5.15+
> ---
>  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 7 ++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index d3da13f4c80e..27504ac21653 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -236,7 +236,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device 
> *adev, uint32_t vmid,  {
> bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub);
> struct amdgpu_vmhub *hub = >vmhub[vmhub];
> -   u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
> +   u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid,
> + (adev->asic_type != CHIP_CYAN_SKILLFISH) ? flush_type : 
> 0);
> u32 tmp;
> /* Use register 17 for GART */
> const unsigned int eng = 17;
> @@ -331,6 +332,8 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device 
> *adev, uint32_t vmid,
> 
> int r;
> 
> +   flush_type = (adev->asic_type != CHIP_CYAN_SKILLFISH) ? : 0;
> Should be flush_type = (adev->asic_type != CHIP_CYAN_SKILLFISH) ? flush_type 
> : 0; here ?
> +
> /* flush hdp cache */
> adev->hdp.funcs->flush_hdp(adev, NULL);
> 
> @@ -426,6 +429,8 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct 
> amdgpu_device *adev,
> struct amdgpu_ring *ring = >gfx.kiq[0].ring;
> struct amdgpu_kiq *kiq = >gfx.kiq[0];
> 
> +   flush_type = (adev->asic_type != CHIP_CYAN_SKILLFISH) ? : 0;
> Should be flush_type = (adev->asic_type != CHIP_CYAN_SKILLFISH) ? flush_type 
> : 0; here ?

Yes, thank you! Will fix it.

Regards,
Lang

> if (amdgpu_emu_mode == 0 && ring->sched.ready) {
> spin_lock(>gfx.kiq[0].ring_lock);
> /* 2 dwords flush + 8 dwords fence */
> --
> 2.25.1
> 


RE: [PATCH] drm/amdgpu: always use legacy tlb flush on cyan_skilfish

2023-09-14 Thread Zhang, Yifan
[Public]

-Original Message-
From: amd-gfx  On Behalf Of Lang Yu
Sent: Thursday, September 14, 2023 3:00 PM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Yu, Lang ; 
Koenig, Christian ; sta...@vger.kernel.org
Subject: [PATCH] drm/amdgpu: always use legacy tlb flush on cyan_skilfish

cyan_skilfish has problems with other flush types.

Signed-off-by: Lang Yu 
Cc:  # v5.15+
---
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index d3da13f4c80e..27504ac21653 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -236,7 +236,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device 
*adev, uint32_t vmid,  {
bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub);
struct amdgpu_vmhub *hub = >vmhub[vmhub];
-   u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
+   u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid,
+ (adev->asic_type != CHIP_CYAN_SKILLFISH) ? flush_type : 
0);
u32 tmp;
/* Use register 17 for GART */
const unsigned int eng = 17;
@@ -331,6 +332,8 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,

int r;

+   flush_type = (adev->asic_type != CHIP_CYAN_SKILLFISH) ? : 0;
Should be flush_type = (adev->asic_type != CHIP_CYAN_SKILLFISH) ? flush_type : 
0; here ?
+
/* flush hdp cache */
adev->hdp.funcs->flush_hdp(adev, NULL);

@@ -426,6 +429,8 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct 
amdgpu_device *adev,
struct amdgpu_ring *ring = >gfx.kiq[0].ring;
struct amdgpu_kiq *kiq = >gfx.kiq[0];

+   flush_type = (adev->asic_type != CHIP_CYAN_SKILLFISH) ? : 0;
Should be flush_type = (adev->asic_type != CHIP_CYAN_SKILLFISH) ? flush_type : 
0; here ?
+
if (amdgpu_emu_mode == 0 && ring->sched.ready) {
spin_lock(>gfx.kiq[0].ring_lock);
/* 2 dwords flush + 8 dwords fence */
--
2.25.1



Re: [PATCH v6 4/9] drm/amdgpu: create GFX-gen11 usermode queue

2023-09-14 Thread Shashank Sharma



On 14/09/2023 09:45, Christian König wrote:

Am 08.09.23 um 18:04 schrieb Shashank Sharma:

A Memory queue descriptor (MQD) of a userqueue defines it in
the hw's context. As MQD format can vary between different
graphics IPs, we need gfx GEN specific handlers to create MQDs.

This patch:
- Introduces MQD handler functions for the usermode queues.
- Adds new functions to create and destroy userqueue MQD for
   GFX-GEN-11 IP

V1: Worked on review comments from Alex:
 - Make MQD functions GEN and IP specific

V2: Worked on review comments from Alex:
 - Reuse the existing adev->mqd[ip] for MQD creation
 - Formatting and arrangement of code

V3:
 - Integration with doorbell manager

V4: Review comments addressed:
 - Do not create a new file for userq, reuse gfx_v11_0.c (Alex)
 - Align name of structure members (Luben)
 - Don't break up the Cc tag list and the Sob tag list in commit
   message (Luben)
V5:
    - No need to reserve the bo for MQD (Christian).
    - Some more changes to support IP specific MQD creation.

V6:
    - Add a comment reminding us to replace the 
amdgpu_bo_create_kernel()
  calls while creating MQD object to amdgpu_bo_create() once 
eviction

  fences are ready (Christian).

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
Signed-off-by: Arvind Yadav 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 16 
  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c    | 77 +++
  .../gpu/drm/amd/include/amdgpu_userqueue.h    |  7 ++
  3 files changed, 100 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c

index 44769423ba30..03fc8e89eafb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -140,12 +140,28 @@ int amdgpu_userq_ioctl(struct drm_device *dev, 
void *data,

  return r;
  }
  +extern const struct amdgpu_userq_funcs userq_gfx_v11_funcs;
+
+static void
+amdgpu_userqueue_setup_gfx(struct amdgpu_userq_mgr *uq_mgr)
+{
+    int maj;
+    struct amdgpu_device *adev = uq_mgr->adev;
+    uint32_t version = adev->ip_versions[GC_HWIP][0];
+
+    /* We support usermode queue only for GFX V11 as of now */
+    maj = IP_VERSION_MAJ(version);
+    if (maj == 11)
+    uq_mgr->userq_funcs[AMDGPU_HW_IP_GFX] = _gfx_v11_funcs;
+}


That belongs into gfx_v11.c and not here.



Agree,




+
  int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, 
struct amdgpu_device *adev)

  {
  mutex_init(_mgr->userq_mutex);
  idr_init_base(_mgr->userq_idr, 1);
  userq_mgr->adev = adev;
  +    amdgpu_userqueue_setup_gfx(userq_mgr);
  return 0;
  }
  diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c

index 0451533ddde4..6760abda08df 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -30,6 +30,7 @@
  #include "amdgpu_psp.h"
  #include "amdgpu_smu.h"
  #include "amdgpu_atomfirmware.h"
+#include "amdgpu_userqueue.h"
  #include "imu_v11_0.h"
  #include "soc21.h"
  #include "nvd.h"
@@ -6422,3 +6423,79 @@ const struct amdgpu_ip_block_version 
gfx_v11_0_ip_block =

  .rev = 0,
  .funcs = _v11_0_ip_funcs,
  };
+
+static int gfx_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
+  struct drm_amdgpu_userq_in *args_in,
+  struct amdgpu_usermode_queue *queue)
+{
+    struct amdgpu_device *adev = uq_mgr->adev;
+    struct amdgpu_mqd *mqd_gfx_generic = >mqds[AMDGPU_HW_IP_GFX];
+    struct drm_amdgpu_userq_mqd_gfx_v11_0 mqd_user;
+    struct amdgpu_mqd_prop userq_props;
+    int r;
+
+    /* Incoming MQD parameters from userspace to be saved here */
+    memset(_user, 0, sizeof(mqd_user));
+
+    /* Structure to initialize MQD for userqueue using generic MQD 
init function */

+    memset(_props, 0, sizeof(userq_props));
+
+    if (args_in->mqd_size != sizeof(struct 
drm_amdgpu_userq_mqd_gfx_v11_0)) {

+    DRM_ERROR("MQD size mismatch\n");
+    return -EINVAL;
+    }
+
+    if (copy_from_user(_user, u64_to_user_ptr(args_in->mqd), 
args_in->mqd_size)) {

+    DRM_ERROR("Failed to get user MQD\n");
+    return -EFAULT;
+    }
+
+    /*
+ * Create BO for actual Userqueue MQD now
+ * Todo: replace the calls to bo_create_kernel() with 
bo_create() and use

+ * implicit pinning for the MQD buffers.


Well not implicit pinning, but rather fencing of the BO.


Noted.

- Shashank



Regards,
Christian.


+ */
+    r = amdgpu_bo_create_kernel(adev, mqd_gfx_generic->mqd_size, 
PAGE_SIZE,

+    AMDGPU_GEM_DOMAIN_GTT,
+    >mqd.obj,
+    >mqd.gpu_addr,
+    >mqd.cpu_ptr);
+    if (r) {
+    DRM_ERROR("Failed to allocate BO for userqueue (%d)", r);
+    return -ENOMEM;
+    }
+    memset(queue->mqd.cpu_ptr, 0, mqd_gfx_generic->mqd_size);
+
+    /* Initialize the 

Re: [PATCH v6 4/9] drm/amdgpu: create GFX-gen11 usermode queue

2023-09-14 Thread Christian König

Am 08.09.23 um 18:04 schrieb Shashank Sharma:

A Memory queue descriptor (MQD) of a userqueue defines it in
the hw's context. As MQD format can vary between different
graphics IPs, we need gfx GEN specific handlers to create MQDs.

This patch:
- Introduces MQD handler functions for the usermode queues.
- Adds new functions to create and destroy userqueue MQD for
   GFX-GEN-11 IP

V1: Worked on review comments from Alex:
 - Make MQD functions GEN and IP specific

V2: Worked on review comments from Alex:
 - Reuse the existing adev->mqd[ip] for MQD creation
 - Formatting and arrangement of code

V3:
 - Integration with doorbell manager

V4: Review comments addressed:
 - Do not create a new file for userq, reuse gfx_v11_0.c (Alex)
 - Align name of structure members (Luben)
 - Don't break up the Cc tag list and the Sob tag list in commit
   message (Luben)
V5:
- No need to reserve the bo for MQD (Christian).
- Some more changes to support IP specific MQD creation.

V6:
- Add a comment reminding us to replace the amdgpu_bo_create_kernel()
  calls while creating MQD object to amdgpu_bo_create() once eviction
  fences are ready (Christian).

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
Signed-off-by: Arvind Yadav 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 16 
  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c| 77 +++
  .../gpu/drm/amd/include/amdgpu_userqueue.h|  7 ++
  3 files changed, 100 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index 44769423ba30..03fc8e89eafb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -140,12 +140,28 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
return r;
  }
  
+extern const struct amdgpu_userq_funcs userq_gfx_v11_funcs;

+
+static void
+amdgpu_userqueue_setup_gfx(struct amdgpu_userq_mgr *uq_mgr)
+{
+   int maj;
+   struct amdgpu_device *adev = uq_mgr->adev;
+   uint32_t version = adev->ip_versions[GC_HWIP][0];
+
+   /* We support usermode queue only for GFX V11 as of now */
+   maj = IP_VERSION_MAJ(version);
+   if (maj == 11)
+   uq_mgr->userq_funcs[AMDGPU_HW_IP_GFX] = _gfx_v11_funcs;
+}


That belongs into gfx_v11.c and not here.


+
  int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct 
amdgpu_device *adev)
  {
mutex_init(_mgr->userq_mutex);
idr_init_base(_mgr->userq_idr, 1);
userq_mgr->adev = adev;
  
+	amdgpu_userqueue_setup_gfx(userq_mgr);

return 0;
  }
  
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c

index 0451533ddde4..6760abda08df 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -30,6 +30,7 @@
  #include "amdgpu_psp.h"
  #include "amdgpu_smu.h"
  #include "amdgpu_atomfirmware.h"
+#include "amdgpu_userqueue.h"
  #include "imu_v11_0.h"
  #include "soc21.h"
  #include "nvd.h"
@@ -6422,3 +6423,79 @@ const struct amdgpu_ip_block_version gfx_v11_0_ip_block =
.rev = 0,
.funcs = _v11_0_ip_funcs,
  };
+
+static int gfx_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
+ struct drm_amdgpu_userq_in *args_in,
+ struct amdgpu_usermode_queue *queue)
+{
+   struct amdgpu_device *adev = uq_mgr->adev;
+   struct amdgpu_mqd *mqd_gfx_generic = >mqds[AMDGPU_HW_IP_GFX];
+   struct drm_amdgpu_userq_mqd_gfx_v11_0 mqd_user;
+   struct amdgpu_mqd_prop userq_props;
+   int r;
+
+   /* Incoming MQD parameters from userspace to be saved here */
+   memset(_user, 0, sizeof(mqd_user));
+
+   /* Structure to initialize MQD for userqueue using generic MQD init 
function */
+   memset(_props, 0, sizeof(userq_props));
+
+   if (args_in->mqd_size != sizeof(struct drm_amdgpu_userq_mqd_gfx_v11_0)) 
{
+   DRM_ERROR("MQD size mismatch\n");
+   return -EINVAL;
+   }
+
+   if (copy_from_user(_user, u64_to_user_ptr(args_in->mqd), 
args_in->mqd_size)) {
+   DRM_ERROR("Failed to get user MQD\n");
+   return -EFAULT;
+   }
+
+   /*
+* Create BO for actual Userqueue MQD now
+* Todo: replace the calls to bo_create_kernel() with bo_create() and 
use
+* implicit pinning for the MQD buffers.


Well not implicit pinning, but rather fencing of the BO.

Regards,
Christian.


+*/
+   r = amdgpu_bo_create_kernel(adev, mqd_gfx_generic->mqd_size, PAGE_SIZE,
+   AMDGPU_GEM_DOMAIN_GTT,
+   >mqd.obj,
+   >mqd.gpu_addr,
+   >mqd.cpu_ptr);
+   if (r) {
+   DRM_ERROR("Failed to allocate BO for userqueue 

[PATCH] drm/amdgpu: always use legacy tlb flush on cyan_skilfish

2023-09-14 Thread Lang Yu
cyan_skilfish has problems with other flush types.

Signed-off-by: Lang Yu 
Cc:  # v5.15+
---
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index d3da13f4c80e..27504ac21653 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -236,7 +236,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device 
*adev, uint32_t vmid,
 {
bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub);
struct amdgpu_vmhub *hub = >vmhub[vmhub];
-   u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
+   u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid,
+ (adev->asic_type != CHIP_CYAN_SKILLFISH) ? flush_type : 
0);
u32 tmp;
/* Use register 17 for GART */
const unsigned int eng = 17;
@@ -331,6 +332,8 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 
int r;
 
+   flush_type = (adev->asic_type != CHIP_CYAN_SKILLFISH) ? : 0;
+
/* flush hdp cache */
adev->hdp.funcs->flush_hdp(adev, NULL);
 
@@ -426,6 +429,8 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct 
amdgpu_device *adev,
struct amdgpu_ring *ring = >gfx.kiq[0].ring;
struct amdgpu_kiq *kiq = >gfx.kiq[0];
 
+   flush_type = (adev->asic_type != CHIP_CYAN_SKILLFISH) ? : 0;
+
if (amdgpu_emu_mode == 0 && ring->sched.ready) {
spin_lock(>gfx.kiq[0].ring_lock);
/* 2 dwords flush + 8 dwords fence */
-- 
2.25.1



RE: [PATCH] Revert "drm/amdgpu: Report vbios version instead of PN"

2023-09-14 Thread Zhang, Hawking
[AMD Official Use Only - General]

Reviewed-by: Hawking Zhang 

Regards,
Hawking
-Original Message-
From: Lazar, Lijo 
Sent: Thursday, September 14, 2023 14:18
To: amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking ; Deucher, Alexander 

Subject: [PATCH] Revert "drm/amdgpu: Report vbios version instead of PN"

This reverts commit c187a67725b47f9c1603359a51b79cc19e27442a.

vbios_version sysfs node is used to identify Part Number also. Revert to the 
same so that it doesn't break scripts/software which parse this.

Signed-off-by: Lijo Lazar 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index 73ee14f7a9a4..dce9e7d5e4ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -1776,7 +1776,7 @@ static ssize_t amdgpu_atombios_get_vbios_version(struct 
device *dev,
struct amdgpu_device *adev = drm_to_adev(ddev);
struct atom_context *ctx = adev->mode_info.atom_context;

-   return sysfs_emit(buf, "%s\n", ctx->vbios_ver_str);
+   return sysfs_emit(buf, "%s\n", ctx->vbios_pn);
 }

 static DEVICE_ATTR(vbios_version, 0444, amdgpu_atombios_get_vbios_version,
--
2.25.1



Re: 回复: [PATCH] drm/amdgpu: Ignore first evction failure during suspend

2023-09-14 Thread Christian König

[putting Harry on BCC, sorry for the noise]

Yeah, that is clearly a bug in the KFD.

During the second eviction the hw should already be disabled, so we 
don't have any SDMA or similar to evict BOs any more and can only copy 
them with the CPU.


@Felix what workqueue do you guys use for the restore work? I've just 
double checked and on the system workqueues you explicitly need to 
specify that stuff is freezable. E.g. use system_freezable_wq instead of 
system_wq.


Alternatively as Xinhui mentioned it might be necessary to flush all 
restore work before the first eviction phase or we have the chance that 
BOs are moved back into VRAM again.


Regards,
Christian.

Am 14.09.23 um 03:54 schrieb Pan, Xinhui:


[AMD Official Use Only - General]


I just make one debug patch to show busy BO’s alloc-trace when the 
eviction fails in suspend.


And dmesg log attached.

Looks like they are just kfd user Bos and locked by evict/restore work.

So in kfd suspend callback, it really need to flush the evict/restore 
work before HW fini as it do now.


That is why the first very early eviction fails and the second 
eviction succeed.


Thanks

xinhui

*From:* Pan, Xinhui
*Sent:* Thursday, September 14, 2023 8:02 AM
*To:* Koenig, Christian ; Kuehling, Felix 
; Christian König 
; amd-gfx@lists.freedesktop.org; 
Wentland, Harry 
*Cc:* Deucher, Alexander ; Fan, Shikang 

*Subject:* RE: 回复: [PATCH] drm/amdgpu: Ignore first evction failure 
during suspend


Chris,

I can dump these busy BOs with their alloc/free stack later today.

BTW, the two evictions and the kfd suspend are all called before 
hw_fini. IOW, between phase 1 and phase 2. SDMA is turned only in 
phase2. So current code works fine maybe.


*From:* Koenig, Christian 
*Sent:* Wednesday, September 13, 2023 10:29 PM
*To:* Kuehling, Felix ; Christian König 
; Pan, Xinhui ; 
amd-gfx@lists.freedesktop.org; Wentland, Harry 
*Cc:* Deucher, Alexander ; Fan, Shikang 

*Subject:* Re: 回复: [PATCH] drm/amdgpu: Ignore first evction failure 
during suspend


[+Harry]

Am 13.09.23 um 15:54 schrieb Felix Kuehling:

On 2023-09-13 4:07, Christian König wrote:

[+Fleix]

Well that looks like quite a serious bug.

If I'm not completely mistaken the KFD work item tries to
restore the process by moving BOs into memory even after the
suspend freeze. Normally work items are frozen together with
the user space processes unless explicitly marked as not
freezable.

That this causes problem during the first eviction phase is
just the tip of the iceberg here. If a BO is moved into
invisible memory during this we wouldn't be able to get it out
of that in the second phase because SDMA and hw is already
turned off.

@Felix any idea how that can happen? Have you guys marked a
work item / work queue as not freezable?

We don't set anything to non-freezable in KFD.

Regards,
  Felix

Or maybe the display guys?


Do you guys in the display do any delayed update in a work item which 
is marked as not-freezable?


Otherwise I have absolutely no idea what's going on here.

Thanks,
Christian.


@Xinhui please investigate what work item that is and where
that is coming from. Something like "if (adev->in_suspend)
dump_stack();" in the right place should probably do it.

Thanks,
Christian.

Am 13.09.23 um 07:13 schrieb Pan, Xinhui:

[AMD Official Use Only - General]

I notice that only user space process are frozen on my
side.  kthread and workqueue  keeps running. Maybe some
kernel configs are not enabled.

I made one module which just prints something like i++
with mutex lock both in workqueue and kthread. I paste
some logs below.

[438619.696196] XH: 14 from workqueue

[438619.700193] XH: 15 from kthread

[438620.394335] PM: suspend entry (deep)

[438620.399619] Filesystems sync: 0.001 seconds

[438620.403887] PM: Preparing system for sleep (deep)

[438620.409299] Freezing user space processes

[438620.414862] Freezing user space processes completed
(elapsed 0.001 seconds)

[438620.421881] OOM killer disabled.

[438620.425197] Freezing remaining freezable tasks

[438620.430890] Freezing remaining freezable tasks
completed (elapsed 0.001 seconds)

[438620.438348] PM: Suspending system (deep)

.

[438623.746038] PM: suspend of devices complete after
3303.137 msecs

[438623.752125] PM: start suspend of devices complete
after 3309.713 msecs

[438623.758722] PM: suspend debug: Waiting for 5 second(s).

[438623.792166] XH: 22 from kthread

[438623.824140] XH: 23 from workqueue


[PATCH] Revert "drm/amdgpu: Report vbios version instead of PN"

2023-09-14 Thread Lijo Lazar
This reverts commit c187a67725b47f9c1603359a51b79cc19e27442a.

vbios_version sysfs node is used to identify Part Number also. Revert to
the same so that it doesn't break scripts/software which parse this.

Signed-off-by: Lijo Lazar 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index 73ee14f7a9a4..dce9e7d5e4ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -1776,7 +1776,7 @@ static ssize_t amdgpu_atombios_get_vbios_version(struct 
device *dev,
struct amdgpu_device *adev = drm_to_adev(ddev);
struct atom_context *ctx = adev->mode_info.atom_context;
 
-   return sysfs_emit(buf, "%s\n", ctx->vbios_ver_str);
+   return sysfs_emit(buf, "%s\n", ctx->vbios_pn);
 }
 
 static DEVICE_ATTR(vbios_version, 0444, amdgpu_atombios_get_vbios_version,
-- 
2.25.1