RE: [PATCH] drm/amdkfd: On GFX11 check PCIe atomics support and set CP_HQD_HQ_STATUS0[29]

2023-04-10 Thread Sider, Graham
[Public]

A minor nitpick/suggestions below to make the comments a bit more concise. With 
that change the patch is

Reviewed-by: Graham Sider 

Best,
Graham

> -Original Message-
> From: amd-gfx  On Behalf Of
> Sreekant Somasekharan
> Sent: Monday, April 3, 2023 3:59 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Somasekharan, Sreekant 
> Subject: [PATCH] drm/amdkfd: On GFX11 check PCIe atomics support and set
> CP_HQD_HQ_STATUS0[29]
> 
> Caution: This message originated from an External Source. Use proper
> caution when opening attachments, clicking links, or responding.
> 
> 
> On GFX11, CP_HQD_HQ_STATUS0[29] bit will be used by CPFW to
> acknowledge whether PCIe atomics are supported. The default value of this
> bit is set to 0. Driver will check whether PCIe atomics are supported and set
> the bit to 1 if supported. This will force CPFW to use real atomic ops.
> If the bit is not set, CPFW will default to read/modify/write using the
> firmware itself.
> 
> This is applicable only to RS64 based GFX11 with MEC FW greater than or
> equal to 509. If MEC FW is less than 509, PCIe atomics needs to be supported,
> else it will skip the device.
> 
> This commit also involves moving amdgpu_amdkfd_device_probe() function
> call after per-IP early_init loop in amdgpu_device_ip_early_init() function so
> as to check for RS64 enabled device.
> 
> Signed-off-by: Sreekant Somasekharan
> 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c   |  2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c  | 11 +++
>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c |  9 +
>  3 files changed, 21 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 7116119ed038..b3a754ca0923 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2150,7 +2150,6 @@ static int amdgpu_device_ip_early_init(struct
> amdgpu_device *adev)
> adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
> }
> 
> -   amdgpu_amdkfd_device_probe(adev);
> 
> adev->pm.pp_feature = amdgpu_pp_feature_mask;
> if (amdgpu_sriov_vf(adev) || sched_policy ==
> KFD_SCHED_POLICY_NO_HWS) @@ -2206,6 +2205,7 @@ static int
> amdgpu_device_ip_early_init(struct amdgpu_device *adev)
> if (!total)
> return -ENODEV;
> 
> +   amdgpu_amdkfd_device_probe(adev);
> adev->cg_flags &= amdgpu_cg_mask;
> adev->pg_flags &= amdgpu_pg_mask;
> 
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 521dfa88aad8..64a295a35d37 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -204,6 +204,17 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
> /* Navi1x+ */
> if (gc_version >= IP_VERSION(10, 1, 1))
> kfd->device_info.needs_pci_atomics = true;
> +   } else if (gc_version < IP_VERSION(12, 0, 0)) {
> +   /* On GFX11 running on RS64, MEC FW version must be 
> greater
> than
> +* or equal to version 509 to support acknowledging 
> whether
> +* PCIe atomics are supported. Before MEC version 
> 509, PCIe
> +* atomics are required. After that, the FW's use of 
> atomics
> +* is controlled by CP_HQD_HQ_STATUS0[29].
> +* This will fail on GFX11 when PCIe atomics are not 
> supported
> +* and MEC FW version < 509 for RS64 based CPFW.
> +*/

Could probably make this comment and the one below a bit more concise. 
Suggestion:

/*
 * PCIe atomics support acknowledgement in GFX11 RS64 CPFW requires version >= 
509.
 * Prior RS64 CPFW versions (and all F32) require PCIe atomics support.
 */

> +   kfd->device_info.needs_pci_atomics = true;
> +   kfd->device_info.no_atomic_fw_version =
> + kfd->adev->gfx.rs64_enable ? 509 : 0;
> }
> } else {
> kfd->device_info.doorbell_size = 4; diff --git
> a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
> index 4a9af800b1f1..c5ea594abbf6 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
> @@ -143,6 +143,15 @@ static void init_mqd(struct mqd_manager *mm, void
> **mqd,
> 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
> 1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
> 
> +   /*
> +* If PCIe atomics are supported, set CP_HQD_HQ_STATUS0[29] == 1
> +* to force CPFW to use atomics. This is supported only on MEC FW
> +* version >= 509 and on RS64 based CPFW 

RE: [PATCH v2] drm/amdgpu: add print for iommu translation mode

2023-04-04 Thread Sider, Graham
[AMD Official Use Only - General]

Ping :)

Best,
Graham

> -Original Message-
> From: Kuehling, Felix 
> Sent: Monday, March 20, 2023 12:16 PM
> To: Sider, Graham ; amd-
> g...@lists.freedesktop.org
> Subject: Re: [PATCH v2] drm/amdgpu: add print for iommu translation mode
> 
> On 2023-03-20 10:08, Graham Sider wrote:
> > Add log to display whether RAM is direct vs DMA mapped.
> >
> > Signed-off-by: Graham Sider 
> 
> Acked-by: Felix Kuehling 
> 
> 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +-
> >   1 file changed, 5 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > index 3bd6c5aef796..83774824694b 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > @@ -3528,8 +3528,12 @@ static void
> amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
> > struct iommu_domain *domain;
> >
> > domain = iommu_get_domain_for_dev(adev->dev);
> > -   if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
> > +   if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY) {
> > +   dev_info(adev->dev, "RAM is direct mapped to GPU (not
> translated by IOMMU)\n");
> > adev->ram_is_direct_mapped = true;
> > +   } else {
> > +   dev_info(adev->dev, "RAM is DMA mapped to GPU
> (translated by IOMMU)\n");
> > +   }
> >   }
> >
> >   static const struct attribute *amdgpu_dev_attributes[] = {


RE: [PATCH] drm/amdkfd: On GFX11 check PCIe atomics support and set CP_HQD_HQ_STATUS0[29]

2023-04-04 Thread Sider, Graham
[Public]

> -Original Message-
> From: amd-gfx  On Behalf Of
> Russell, Kent
> Sent: Tuesday, April 4, 2023 9:43 AM
> To: Somasekharan, Sreekant ; amd-
> g...@lists.freedesktop.org
> Cc: Somasekharan, Sreekant 
> Subject: RE: [PATCH] drm/amdkfd: On GFX11 check PCIe atomics support and
> set CP_HQD_HQ_STATUS0[29]
> 
> Caution: This message originated from an External Source. Use proper
> caution when opening attachments, clicking links, or responding.
> 
> 
> [AMD Official Use Only - General]
> 
> Comments inline
> 
> > -Original Message-
> > From: amd-gfx  On Behalf Of
> > Sreekant Somasekharan
> > Sent: Monday, April 3, 2023 3:59 PM
> > To: amd-gfx@lists.freedesktop.org
> > Cc: Somasekharan, Sreekant 
> > Subject: [PATCH] drm/amdkfd: On GFX11 check PCIe atomics support and
> > set CP_HQD_HQ_STATUS0[29]
> >
> > On GFX11, CP_HQD_HQ_STATUS0[29] bit will be used by CPFW to
> > acknowledge whether PCIe atomics are supported. The default value of
> > this bit is set to 0. Driver will check whether PCIe atomics are
> > supported and set the bit to 1 if supported. This will force CPFW to use 
> > real
> atomic ops.
> > If the bit is not set, CPFW will default to read/modify/write using
> > the firmware itself.
> >
> > This is applicable only to RS64 based GFX11 with MEC FW greater than
> > or equal to 509. If MEC FW is less than 509, PCIe atomics needs to be
> > supported, else it will skip the device.
> >
> > This commit also involves moving amdgpu_amdkfd_device_probe()
> function
> > call after per-IP early_init loop in amdgpu_device_ip_early_init()
> > function so as to check for RS64 enabled device.
> >
> > Signed-off-by: Sreekant Somasekharan
> 
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c   |  2 +-
> >  drivers/gpu/drm/amd/amdkfd/kfd_device.c  | 11 +++
> >  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c |  9 +
> >  3 files changed, 21 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > index 7116119ed038..b3a754ca0923 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > @@ -2150,7 +2150,6 @@ static int amdgpu_device_ip_early_init(struct
> > amdgpu_device *adev)
> >   adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
> >   }
> >
> > - amdgpu_amdkfd_device_probe(adev);
> >
> >   adev->pm.pp_feature = amdgpu_pp_feature_mask;
> >   if (amdgpu_sriov_vf(adev) || sched_policy ==
> > KFD_SCHED_POLICY_NO_HWS)
> > @@ -2206,6 +2205,7 @@ static int amdgpu_device_ip_early_init(struct
> > amdgpu_device *adev)
> >   if (!total)
> >   return -ENODEV;
> >
> > + amdgpu_amdkfd_device_probe(adev);
> >   adev->cg_flags &= amdgpu_cg_mask;
> >   adev->pg_flags &= amdgpu_pg_mask;
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > index 521dfa88aad8..64a295a35d37 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > @@ -204,6 +204,17 @@ static void kfd_device_info_init(struct kfd_dev
> *kfd,
> >   /* Navi1x+ */
> >   if (gc_version >= IP_VERSION(10, 1, 1))
> >   kfd->device_info.needs_pci_atomics =
> > true;
> > + } else if (gc_version < IP_VERSION(12, 0, 0)) {
> 
> 
> What if we get a GFX9 with MEC v509? Wouldn't that trigger this too?
> Wondering if this should be if (gc_version>=IP_VERSION(11,0,0) &&
> gc_version < IP_VERSION(12,0,0)) thus ensuring it's only GFX11. Or maybe
> there is some better check than that. Either way, checking that it's < GFX11
> might false-positive on GFX10- too, so we should probably be explicit in our
> GFX check that it's only GFX11.

The previous condition is for gc_version < IP_VERSION(11, 0, 0), so that 
condition will (and currently is) taken for gfx9/gfx10/etc.

That's to say the logic after this change will look like:

If (KFD_IS_SOC15(kfd)) {
<...>
If (gc_version < IP_VERSION(11, 0, 0)) {
<...>
} else if (gc_version < IP_VERSION(12, 0, 0)) {
<...>
}
}

So this new path will only be taken for gfx11.

Best,
Graham

> 
>  Kent
> 
> > + /* On GFX11 running on RS64, MEC FW version must
> > + be
> > greater than
> > +  * or equal to version 509 to support
> > + acknowledging
> > whether
> > +  * PCIe atomics are supported. Before MEC
> > + version 509,
> > PCIe
> > +  * atomics are required. After that, the FW's
> > + use of
> > atomics
> > +  * is controlled by CP_HQD_HQ_STATUS0[29].
> > +  * This will fail on GFX11 when PCIe atomics are
> > + not
> > supported
> > +  * and MEC FW version < 509 for RS64 based CPFW.
> > +   

RE: [PATCH] drm/amdgpu: add print for iommu translation mode

2023-03-21 Thread Sider, Graham
[Public]

> -Original Message-
> From: Christian König 
> Sent: Tuesday, March 21, 2023 2:53 PM
> To: Sider, Graham ; Russell, Kent
> ; Mahfooz, Hamza ;
> amd-gfx@lists.freedesktop.org
> Cc: Kuehling, Felix 
> Subject: Re: [PATCH] drm/amdgpu: add print for iommu translation mode
> 
> Caution: This message originated from an External Source. Use proper
> caution when opening attachments, clicking links, or responding.
> 
> 
> Am 17.03.23 um 21:04 schrieb Sider, Graham:
> > [AMD Official Use Only - General]
> >
> >
> >
> >> -Original Message-
> >> From: Russell, Kent 
> >> Sent: Friday, March 17, 2023 3:58 PM
> >> To: Mahfooz, Hamza ; Sider, Graham
> >> ; amd-gfx@lists.freedesktop.org
> >> Cc: Kuehling, Felix 
> >> Subject: RE: [PATCH] drm/amdgpu: add print for iommu translation mode
> >>
> >> [AMD Official Use Only - General]
> >>
> >>
> >>
> >>> -Original Message-
> >>> From: amd-gfx  On Behalf Of
> >>> Hamza Mahfooz
> >>> Sent: Friday, March 17, 2023 3:58 PM
> >>> To: Sider, Graham ;
> >>> amd-gfx@lists.freedesktop.org
> >>> Cc: Kuehling, Felix 
> >>> Subject: Re: [PATCH] drm/amdgpu: add print for iommu translation
> >>> mode
> >>>
> >>>
> >>> On 3/17/23 15:47, Graham Sider wrote:
> >>>> Add log to display whether RAM is direct vs DMA mapped.
> >>>>
> >>>> Signed-off-by: Graham Sider 
> >>> If this information is only useful for debugging purposes, please
> >>> use
> >>> drm_dbg() instead of pr_info().
> > It's useful for more than just debug I would say. Just a quick way to grep
> whether IOMMU is off/pt vs device isolation mode.
> 
> Mhm, shouldn't the IOMMU code note that as well?
>

As of right now, not exactly. Copy-pasting Felix's comment here:

The kernel log [currently] tells you the default IOMMU domain, but it may not 
match the domain actually used for the GPU. Without this message there is no 
easy way to tell from a kernel log. This will help with triaging issues from 
logs provided by external and internal users.

Graham

> 
> Christian.
> 
> >
> > Graham
> >
> >>>> ---
> >>>>drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +-
> >>>>1 file changed, 5 insertions(+), 1 deletion(-)
> >>>>
> >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>>> index 8bba5e6872a1..8797a9523244 100644
> >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>>> @@ -3528,8 +3528,12 @@ static void
> >>> amdgpu_device_check_iommu_direct_map(struct amdgpu_device
> *adev)
> >>>>struct iommu_domain *domain;
> >>>>
> >>>>domain = iommu_get_domain_for_dev(adev->dev);
> >>>> -  if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
> >>>> +  if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY) {
> >>>> +  pr_info("RAM is direct mapped to GPU (not traslated by
> >> traslated -> translated
> >>
> > Thanks, my keyboard keeps skipping the on the 'n' key lately :( time for a
> clean.
> >
> > Graham
> >
> >>   Kent
> >>> IOMMU)\n");
> >>>>adev->ram_is_direct_mapped = true;
> >>>> +  } else {
> >>>> +  pr_info("RAM is DMA mapped to GPU (translated by
> >>> IOMMU)\n");
> >>>> +  }
> >>>>}
> >>>>
> >>>>static const struct attribute *amdgpu_dev_attributes[] = {
> >>> --
> >>> Hamza


RE: [PATCH] drm/amdgpu: add print for iommu translation mode

2023-03-20 Thread Sider, Graham
[Public]

> -Original Message-
> From: Kuehling, Felix 
> Sent: Friday, March 17, 2023 5:16 PM
> To: Sider, Graham ; Russell, Kent
> ; Mahfooz, Hamza ;
> amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdgpu: add print for iommu translation mode
> 
> On 2023-03-17 16:04, Sider, Graham wrote:
> > [AMD Official Use Only - General]
> >
> >
> >
> >> -Original Message-
> >> From: Russell, Kent 
> >> Sent: Friday, March 17, 2023 3:58 PM
> >> To: Mahfooz, Hamza ; Sider, Graham
> >> ; amd-gfx@lists.freedesktop.org
> >> Cc: Kuehling, Felix 
> >> Subject: RE: [PATCH] drm/amdgpu: add print for iommu translation mode
> >>
> >> [AMD Official Use Only - General]
> >>
> >>
> >>
> >>> -Original Message-
> >>> From: amd-gfx  On Behalf Of
> >>> Hamza Mahfooz
> >>> Sent: Friday, March 17, 2023 3:58 PM
> >>> To: Sider, Graham ;
> >>> amd-gfx@lists.freedesktop.org
> >>> Cc: Kuehling, Felix 
> >>> Subject: Re: [PATCH] drm/amdgpu: add print for iommu translation
> >>> mode
> >>>
> >>>
> >>> On 3/17/23 15:47, Graham Sider wrote:
> >>>> Add log to display whether RAM is direct vs DMA mapped.
> >>>>
> >>>> Signed-off-by: Graham Sider 
> >>> If this information is only useful for debugging purposes, please
> >>> use
> >>> drm_dbg() instead of pr_info().
> > It's useful for more than just debug I would say. Just a quick way to grep
> whether IOMMU is off/pt vs device isolation mode.
> 
> I agree. The kernel log otherwise tells you the default IOMMU domain, but it
> may not match the domain actually used for the GPU. Without this message
> there is no easy way to tell from a kernel log. This will help with triaging 
> issues
> from logs provided by external and internal users.
> 
> 
> >
> > Graham
> >
> >>>> ---
> >>>>drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +-
> >>>>1 file changed, 5 insertions(+), 1 deletion(-)
> >>>>
> >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>>> index 8bba5e6872a1..8797a9523244 100644
> >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>>> @@ -3528,8 +3528,12 @@ static void
> >>> amdgpu_device_check_iommu_direct_map(struct amdgpu_device
> *adev)
> >>>>  struct iommu_domain *domain;
> >>>>
> >>>>  domain = iommu_get_domain_for_dev(adev->dev);
> >>>> -if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
> >>>> +if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY) {
> >>>> +pr_info("RAM is direct mapped to GPU (not traslated by
> 
> Use dev_info. That way you can tell which GPU the message applies to in a
> multi-GPU system.
> 

Good point - will do that. Thanks!

Graham

> Regards,
>    Felix
> 
> 
> >> traslated -> translated
> >>
> > Thanks, my keyboard keeps skipping the on the 'n' key lately :( time for a
> clean.
> >
> > Graham
> >
> >>   Kent
> >>> IOMMU)\n");
> >>>>  adev->ram_is_direct_mapped = true;
> >>>> +} else {
> >>>> +pr_info("RAM is DMA mapped to GPU (translated by
> >>> IOMMU)\n");
> >>>> +}
> >>>>}
> >>>>
> >>>>static const struct attribute *amdgpu_dev_attributes[] = {
> >>> --
> >>> Hamza


RE: [PATCH] drm/amdgpu: add print for iommu translation mode

2023-03-17 Thread Sider, Graham
[AMD Official Use Only - General]



> -Original Message-
> From: Russell, Kent 
> Sent: Friday, March 17, 2023 3:58 PM
> To: Mahfooz, Hamza ; Sider, Graham
> ; amd-gfx@lists.freedesktop.org
> Cc: Kuehling, Felix 
> Subject: RE: [PATCH] drm/amdgpu: add print for iommu translation mode
> 
> [AMD Official Use Only - General]
> 
> 
> 
> > -Original Message-
> > From: amd-gfx  On Behalf Of
> > Hamza Mahfooz
> > Sent: Friday, March 17, 2023 3:58 PM
> > To: Sider, Graham ;
> > amd-gfx@lists.freedesktop.org
> > Cc: Kuehling, Felix 
> > Subject: Re: [PATCH] drm/amdgpu: add print for iommu translation mode
> >
> >
> > On 3/17/23 15:47, Graham Sider wrote:
> > > Add log to display whether RAM is direct vs DMA mapped.
> > >
> > > Signed-off-by: Graham Sider 
> >
> > If this information is only useful for debugging purposes, please use
> > drm_dbg() instead of pr_info().

It's useful for more than just debug I would say. Just a quick way to grep 
whether IOMMU is off/pt vs device isolation mode.

Graham

> >
> > > ---
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +-
> > >   1 file changed, 5 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > index 8bba5e6872a1..8797a9523244 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > @@ -3528,8 +3528,12 @@ static void
> > amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
> > >   struct iommu_domain *domain;
> > >
> > >   domain = iommu_get_domain_for_dev(adev->dev);
> > > - if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
> > > + if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY) {
> > > + pr_info("RAM is direct mapped to GPU (not traslated by
> traslated -> translated
> 

Thanks, my keyboard keeps skipping the on the 'n' key lately :( time for a 
clean.

Graham

>  Kent
> > IOMMU)\n");
> > >   adev->ram_is_direct_mapped = true;
> > > + } else {
> > > + pr_info("RAM is DMA mapped to GPU (translated by
> > IOMMU)\n");
> > > + }
> > >   }
> > >
> > >   static const struct attribute *amdgpu_dev_attributes[] = {
> >
> > --
> > Hamza


RE: [PATCH] drm/amdgpu: remove unconditional trap enable on add gfx11 queues

2023-01-19 Thread Sider, Graham
[AMD Official Use Only - General]

Reviewed-by: Graham Sider 

-Original Message-
From: Kim, Jonathan  
Sent: Thursday, January 19, 2023 7:44 PM
To: amd-gfx@lists.freedesktop.org
Cc: Sider, Graham ; Kuehling, Felix 
; Kim, Jonathan ; Sider, Graham 

Subject: [PATCH] drm/amdgpu: remove unconditional trap enable on add gfx11 
queues

Rebase of driver has incorrect unconditional trap enablement for GFX11 when 
adding mes queues.

Reported-by: Graham Sider 
Signed-off-by: Jonathan Kim 
---
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index bfa305079bfc..00e64838bb8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -190,19 +190,18 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
mes_add_queue_pkt.queue_type =
convert_to_mes_queue_type(input->queue_type);
mes_add_queue_pkt.paging = input->paging;
mes_add_queue_pkt.vm_context_cntl = vm_cntx_cntl;
mes_add_queue_pkt.gws_base = input->gws_base;
mes_add_queue_pkt.gws_size = input->gws_size;
mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
mes_add_queue_pkt.tma_addr = input->tma_addr;
mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
-   mes_add_queue_pkt.trap_en = 1;
 
/* For KFD, gds_size is re-used for queue size (needed in MES for AQL 
queues) */
mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
mes_add_queue_pkt.gds_size = input->queue_size;
 
if (!(((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 4) &&
  (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) &&
  (adev->ip_versions[GC_HWIP][0] <= IP_VERSION(11, 0, 3
mes_add_queue_pkt.trap_en = 1;
--
2.25.1


RE: [PATCH] drm/amdkfd: update GFX11 CWSR trap handler

2022-11-01 Thread Sider, Graham
[AMD Official Use Only - General]

> -Original Message-
> From: Sider, Graham 
> Sent: Wednesday, October 26, 2022 5:05 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kuehling, Felix ; Kasiviswanathan, Harish
> ; Cornwall, Jay
> ; Sider, Graham 
> Subject: [PATCH] drm/amdkfd: update GFX11 CWSR trap handler
> 
> From: Jay Cornwall 
> 
> With corresponding FW change fixes issue where triggering CWSR on a
> workgroup with waves in s_barrier wouldn't lead to a back-off and therefore
> cause a hang.
> 
> Signed-off-by: Jay Cornwall 
> Tested-by: Graham Sider 

Reviewed-by: Graham Sider 

> ---
>  .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h| 764 +-
>  .../amd/amdkfd/cwsr_trap_handler_gfx10.asm|   6 +
>  2 files changed, 389 insertions(+), 381 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
> b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
> index c7118843db05..0c4c5499bb5c 100644
> --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
> +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
> @@ -2495,442 +2495,444 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
>   0xbf9f, 0x,
>  };
>  static const uint32_t cwsr_trap_gfx11_hex[] = {
> - 0xbfa1, 0xbfa0021e,
> + 0xbfa1, 0xbfa00221,
>   0xb0804006, 0xb8f8f802,
>   0x9178ff78, 0x00020006,
> - 0xb8fbf803, 0xbf0d9f6d,
> - 0xbfa20006, 0x8b6eff78,
> - 0x2000, 0xbfa10009,
> - 0x8b6eff6d, 0x00ff,
> - 0xbfa2001e, 0x8b6eff7b,
> - 0x0400, 0xbfa20041,
> - 0xbf830010, 0xb8fbf803,
> - 0xbfa0fffa, 0x8b6eff7b,
> - 0x0900, 0xbfa20015,
> - 0x8b6eff7b, 0x71ff,
> - 0xbfa10008, 0x8b6fff7b,
> - 0x7080, 0xbfa10001,
> - 0xbeee1287, 0xb8eff801,
> - 0x846e8c6e, 0x8b6e6f6e,
> - 0xbfa2000a, 0x8b6eff6d,
> - 0x00ff, 0xbfa20007,
> - 0xb8eef801, 0x8b6eff6e,
> - 0x0800, 0xbfa20003,
> + 0xb8fbf803, 0xbf0d9e6d,
> + 0xbfa10001, 0xbfbd,
> + 0xbf0d9f6d, 0xbfa20006,
> + 0x8b6eff78, 0x2000,
> + 0xbfa10009, 0x8b6eff6d,
> + 0x00ff, 0xbfa2001e,
>   0x8b6eff7b, 0x0400,
> - 0xbfa20026, 0xbefa4d82,
> - 0xbf89fc07, 0x84fa887a,
> - 0xf4005bbd, 0xf810,
> - 0xbf89fc07, 0x846e976e,
> - 0x9177ff77, 0x0080,
> - 0x8c776e77, 0xf4045bbd,
> - 0xf800, 0xbf89fc07,
> - 0xf4045ebd, 0xf808,
> - 0xbf89fc07, 0x8bee6e6e,
> - 0xbfa10001, 0xbe80486e,
> - 0x8b6eff6d, 0x01ff,
> - 0xbfa20005, 0x8c78ff78,
> - 0x2000, 0x80ec886c,
> - 0x82ed806d, 0xbfa5,
> - 0x8b6eff6d, 0x0100,
> - 0xbfa20002, 0x806c846c,
> - 0x826d806d, 0x8b6dff6d,
> - 0x, 0x8bfe7e7e,
> - 0x8bea6a6a, 0xb978f802,
> - 0xbe804a6c, 0x8b6dff6d,
> - 0x, 0xbefa0080,
> - 0xb97a0283, 0xbeee007e,
> - 0xbeef007f, 0xbefe0180,
> - 0xbefe4d84, 0xbf89fc07,
> - 0x8b7aff7f, 0x0400,
> - 0x847a857a, 0x8c6d7a6d,
> - 0xbefa007e, 0x8b7bff7f,
> - 0x, 0xbefe00c1,
> - 0xbeff00c1, 0xdca6c000,
> - 0x007a, 0x7e000280,
> - 0xbefe007a, 0xbeff007b,
> - 0xb8fb02dc, 0x847b997b,
> - 0xb8fa3b05, 0x807a817a,
> - 0xbf0d997b, 0xbfa20002,
> - 0x847a897a, 0xbfa1,
> - 0x847a8a7a, 0xb8fb1e06,
> - 0x847b8a7b, 0x807a7b7a,
> + 0xbfa20041, 0xbf830010,
> + 0xb8fbf803, 0xbfa0fffa,
> + 0x8b6eff7b, 0x0900,
> + 0xbfa20015, 0x8b6eff7b,
> + 0x71ff, 0xbfa10008,
> + 0x8b6fff7b, 0x7080,
> + 0xbfa10001, 0xbeee1287,
> + 0xb8eff801, 0x846e8c6e,
> + 0x8b6e6f6e, 0xbfa2000a,
> + 0x8b6eff6d, 0x00ff,
> + 0xbfa20007, 0xb8eef801,
> + 0x8b6eff6e, 0x0800,
> + 0xbfa20003, 0x8b6eff7b,
> + 0x0400, 0xbfa20026,
> + 0xbefa4d82, 0xbf89fc07,
> + 0x84fa887a, 0xf4005bbd,
> + 0xf810, 0xbf89fc07,
> + 0x846e976e, 0x9177ff77,
> + 0x0080, 0x8c776e77,
> + 0xf4045bbd, 0xf800,
> + 0xbf89fc07, 0xf4045ebd,
> + 0xf808, 0xbf89fc07,
> + 0x8bee6e6e, 0xbfa10001,
> + 0xbe80486e, 0x8b6eff6d,
> + 0x01ff, 0xbfa20005,
> + 0x8c78ff78, 0x2000,
> + 0x80ec886c, 0x82ed806d,
> + 0xbfa5, 0x8b6eff6d,
> + 0x0100, 0xbfa20002,
> + 0x806c846c, 0x826d806d,
> + 0x8b6dff6d, 0x,
> + 0x8bfe7e7e, 0x8bea6a6a,
> + 0xb978f802, 0xbe804a6c,
> + 0x8b6dff6d, 0x,
> + 0xbefa0080, 0xb97a0283,
> + 0xbeee007e, 0xbeef007f,
> + 0xbefe0180, 0xbefe4d84,
> + 0xbf89fc07, 0x8b7aff7f,
> + 0x0400, 0x847a857a,
> +

RE: [PATCH v2] drm/amdkfd: Fix UBSAN shift-out-of-bounds warning

2022-09-28 Thread Sider, Graham
[Public]

Reviewed-by: Graham Sider 

> -Original Message-
> From: Kuehling, Felix 
> Sent: Monday, September 26, 2022 6:36 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Sider, Graham ; Ellis Michael
> 
> Subject: [PATCH v2] drm/amdkfd: Fix UBSAN shift-out-of-bounds warning
> 
> This was fixed in initialize_cpsch before, but not in initialize_nocpsch.
> Factor sdma bitmap initialization into a helper function to apply the correct
> implementation in both cases without duplicating it.
> 
> v2: Added a range check
> 
> Reported-by: Ellis Michael 
> Signed-off-by: Felix Kuehling 
> ---
>  .../drm/amd/amdkfd/kfd_device_queue_manager.c | 45 +
> --
>  1 file changed, 21 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 007a3db69df1..ecb4c3abc629 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -1242,6 +1242,24 @@ static void init_interrupts(struct
> device_queue_manager *dqm)
>   dqm->dev->kfd2kgd->init_interrupts(dqm->dev-
> >adev, i);  }
> 
> +static void init_sdma_bitmaps(struct device_queue_manager *dqm) {
> + unsigned int num_sdma_queues =
> + min_t(unsigned int, sizeof(dqm->sdma_bitmap)*8,
> +   get_num_sdma_queues(dqm));
> + unsigned int num_xgmi_sdma_queues =
> + min_t(unsigned int, sizeof(dqm->xgmi_sdma_bitmap)*8,
> +   get_num_xgmi_sdma_queues(dqm));
> +
> + if (num_sdma_queues)
> + dqm->sdma_bitmap = GENMASK_ULL(num_sdma_queues-
> 1, 0);
> + if (num_xgmi_sdma_queues)
> + dqm->xgmi_sdma_bitmap =
> GENMASK_ULL(num_xgmi_sdma_queues-1, 0);
> +
> + dqm->sdma_bitmap &=
> ~get_reserved_sdma_queues_bitmap(dqm);
> + pr_info("sdma_bitmap: %llx\n", dqm->sdma_bitmap); }
> +
>  static int initialize_nocpsch(struct device_queue_manager *dqm)  {
>   int pipe, queue;
> @@ -1270,11 +1288,7 @@ static int initialize_nocpsch(struct
> device_queue_manager *dqm)
> 
>   memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid));
> 
> - dqm->sdma_bitmap = ~0ULL >> (64 -
> get_num_sdma_queues(dqm));
> - dqm->sdma_bitmap &=
> ~(get_reserved_sdma_queues_bitmap(dqm));
> - pr_info("sdma_bitmap: %llx\n", dqm->sdma_bitmap);
> -
> - dqm->xgmi_sdma_bitmap = ~0ULL >> (64 -
> get_num_xgmi_sdma_queues(dqm));
> + init_sdma_bitmaps(dqm);
> 
>   return 0;
>  }
> @@ -1452,9 +1466,6 @@ static int set_sched_resources(struct
> device_queue_manager *dqm)
> 
>  static int initialize_cpsch(struct device_queue_manager *dqm)  {
> - uint64_t num_sdma_queues;
> - uint64_t num_xgmi_sdma_queues;
> -
>   pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
> 
>   mutex_init(>lock_hidden);
> @@ -1463,24 +1474,10 @@ static int initialize_cpsch(struct
> device_queue_manager *dqm)
>   dqm->active_cp_queue_count = 0;
>   dqm->gws_queue_count = 0;
>   dqm->active_runlist = false;
> -
> - num_sdma_queues = get_num_sdma_queues(dqm);
> - if (num_sdma_queues >= BITS_PER_TYPE(dqm->sdma_bitmap))
> - dqm->sdma_bitmap = ULLONG_MAX;
> - else
> - dqm->sdma_bitmap = (BIT_ULL(num_sdma_queues) - 1);
> -
> - dqm->sdma_bitmap &=
> ~(get_reserved_sdma_queues_bitmap(dqm));
> - pr_info("sdma_bitmap: %llx\n", dqm->sdma_bitmap);
> -
> - num_xgmi_sdma_queues = get_num_xgmi_sdma_queues(dqm);
> - if (num_xgmi_sdma_queues >= BITS_PER_TYPE(dqm-
> >xgmi_sdma_bitmap))
> - dqm->xgmi_sdma_bitmap = ULLONG_MAX;
> - else
> - dqm->xgmi_sdma_bitmap =
> (BIT_ULL(num_xgmi_sdma_queues) - 1);
> -
>   INIT_WORK(>hw_exception_work,
> kfd_process_hw_exception);
> 
> + init_sdma_bitmaps(dqm);
> +
>   return 0;
>  }
> 
> --
> 2.32.0


RE: [PATCH] drm/amdkfd: Fix UBSAN shift-out-of-bounds warning

2022-09-26 Thread Sider, Graham
[AMD Official Use Only - General]

> -Original Message-
> From: amd-gfx  On Behalf Of Felix
> Kuehling
> Sent: Wednesday, September 21, 2022 6:30 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Ellis Michael 
> Subject: [PATCH] drm/amdkfd: Fix UBSAN shift-out-of-bounds warning
> 
> Caution: This message originated from an External Source. Use proper
> caution when opening attachments, clicking links, or responding.
> 
> 
> This was fixed in initialize_cpsch before, but not in initialize_nocpsch.
> Factor sdma bitmap initialization into a helper function to apply the correct
> implementation in both cases without duplicating it.
> 
> Reported-by: Ellis Michael 
> Signed-off-by: Felix Kuehling 
> ---
>  .../drm/amd/amdkfd/kfd_device_queue_manager.c | 41 --
> -
>  1 file changed, 17 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index e83725a28106..f88ec6a11ad2 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -1240,6 +1240,20 @@ static void init_interrupts(struct
> device_queue_manager *dqm)
> dqm->dev->kfd2kgd->init_interrupts(dqm->dev->adev, 
> i);  }
> 
> +static void init_sdma_bitmaps(struct device_queue_manager *dqm) {
> +   uint64_t num_sdma_queues = get_num_sdma_queues(dqm);
> +   uint64_t num_xgmi_sdma_queues =
> get_num_xgmi_sdma_queues(dqm);
> +
> +   if (num_sdma_queues)
> +   dqm->sdma_bitmap = GENMASK_ULL(num_sdma_queues-1, 0);
> +   if (num_xgmi_sdma_queues)
> +   dqm->xgmi_sdma_bitmap =
> + GENMASK_ULL(num_xgmi_sdma_queues-1, 0);

I think we still want a safeguard here in case we ever get into a situation 
where num_sdma_queues is > 64. Otherwise we could hit an unsigned wraparound 
(in __GENMASK_ULL: (~ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h --> would cause 
a wrap plus shift > width of type warning if h > 63).

Something along the lines of

dqm->sdma_bitmap = GENMASK_ULL(min(num_sdma_queues, BITS_PER_LONG_LONG) - 1, 0);

Could work as a safeguard. Same for xgmi_sdma_bitmap.

Best,
Graham

> +
> +   dqm->sdma_bitmap &= ~get_reserved_sdma_queues_bitmap(dqm);
> +   pr_info("sdma_bitmap: %llx\n", dqm->sdma_bitmap); }
> +
>  static int initialize_nocpsch(struct device_queue_manager *dqm)  {
> int pipe, queue;
> @@ -1268,11 +1282,7 @@ static int initialize_nocpsch(struct
> device_queue_manager *dqm)
> 
> memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid));
> 
> -   dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
> -   dqm->sdma_bitmap &= ~(get_reserved_sdma_queues_bitmap(dqm));
> -   pr_info("sdma_bitmap: %llx\n", dqm->sdma_bitmap);
> -
> -   dqm->xgmi_sdma_bitmap = ~0ULL >> (64 -
> get_num_xgmi_sdma_queues(dqm));
> +   init_sdma_bitmaps(dqm);
> 
> return 0;
>  }
> @@ -1450,9 +1460,6 @@ static int set_sched_resources(struct
> device_queue_manager *dqm)
> 
>  static int initialize_cpsch(struct device_queue_manager *dqm)  {
> -   uint64_t num_sdma_queues;
> -   uint64_t num_xgmi_sdma_queues;
> -
> pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
> 
> mutex_init(>lock_hidden);
> @@ -1461,24 +1468,10 @@ static int initialize_cpsch(struct
> device_queue_manager *dqm)
> dqm->active_cp_queue_count = 0;
> dqm->gws_queue_count = 0;
> dqm->active_runlist = false;
> -
> -   num_sdma_queues = get_num_sdma_queues(dqm);
> -   if (num_sdma_queues >= BITS_PER_TYPE(dqm->sdma_bitmap))
> -   dqm->sdma_bitmap = ULLONG_MAX;
> -   else
> -   dqm->sdma_bitmap = (BIT_ULL(num_sdma_queues) - 1);
> -
> -   dqm->sdma_bitmap &= ~(get_reserved_sdma_queues_bitmap(dqm));
> -   pr_info("sdma_bitmap: %llx\n", dqm->sdma_bitmap);
> -
> -   num_xgmi_sdma_queues = get_num_xgmi_sdma_queues(dqm);
> -   if (num_xgmi_sdma_queues >= BITS_PER_TYPE(dqm-
> >xgmi_sdma_bitmap))
> -   dqm->xgmi_sdma_bitmap = ULLONG_MAX;
> -   else
> -   dqm->xgmi_sdma_bitmap = (BIT_ULL(num_xgmi_sdma_queues) -
> 1);
> -
> INIT_WORK(>hw_exception_work, kfd_process_hw_exception);
> 
> +   init_sdma_bitmaps(dqm);
> +
> return 0;
>  }
> 
> --
> 2.32.0


RE: [PATCH] drm/amdgpu: Enable SA software trap.

2022-09-22 Thread Sider, Graham
[Public]



> -Original Message-
> From: Belanger, David 
> Sent: Thursday, September 22, 2022 2:49 PM
> To: Sider, Graham ; amd-
> g...@lists.freedesktop.org
> Cc: Cornwall, Jay ; Kuehling, Felix
> 
> Subject: RE: [PATCH] drm/amdgpu: Enable SA software trap.
> 
> [Public]
> 
> 
> 
> > -Original Message-
> > From: Sider, Graham 
> > Sent: Thursday, September 22, 2022 1:56 PM
> > To: Belanger, David ; amd-
> > g...@lists.freedesktop.org
> > Cc: Cornwall, Jay ; Kuehling, Felix
> > ; Belanger, David 
> > Subject: RE: [PATCH] drm/amdgpu: Enable SA software trap.
> >
> > [Public]
> >
> > > -Original Message-
> > > From: amd-gfx  On Behalf Of
> > > David Belanger
> > > Sent: Thursday, September 22, 2022 12:17 PM
> > > To: amd-gfx@lists.freedesktop.org
> > > Cc: Cornwall, Jay ; Kuehling, Felix
> > > ; Belanger, David
> 
> > > Subject: [PATCH] drm/amdgpu: Enable SA software trap.
> > >
> > > Caution: This message originated from an External Source. Use proper
> > > caution when opening attachments, clicking links, or responding.
> > >
> > >
> > > Enables support for software trap for MES >= 4.
> > > Adapted from implementation from Jay Cornwall.
> > >
> > > v2: Add IP version check in conditions.
> > >
> > > Signed-off-by: Jay Cornwall 
> > > Signed-off-by: David Belanger 
> > > Reviewed-by: Felix Kuehling 
> > > ---
> > >  drivers/gpu/drm/amd/amdgpu/mes_v11_0.c|   6 +-
> > >  .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h| 771 +---
> --
> > >  .../amd/amdkfd/cwsr_trap_handler_gfx10.asm|  21 +
> > >  .../gpu/drm/amd/amdkfd/kfd_int_process_v11.c  |  26 +-
> > >  4 files changed, 437 insertions(+), 387 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> > > b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> > > index b64cd46a159a..cbc506b958b1 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> > > @@ -185,7 +185,11 @@ static int mes_v11_0_add_hw_queue(struct
> > > amdgpu_mes *mes,
> > > mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
> > > mes_add_queue_pkt.tma_addr = input->tma_addr;
> > > mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
> > > -   mes_add_queue_pkt.trap_en = 1;
> > > +
> > > +   if (!(((adev->mes.sched_version &
> AMDGPU_MES_VERSION_MASK)
> > > + >=
> > > 4) &&
> > > + (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) 
> > > &&
> > > + (adev->ip_versions[GC_HWIP][0] <= IP_VERSION(11, 0, 
> > > 3
> > > +   mes_add_queue_pkt.trap_en = 1;
> >
> > I think the value for trap_en here is backwards. It should be set to 0
> > for this condition and default to 1 otherwise.
> >
> > Best,
> > Graham
> 
> Note that the condition is reversed with the "!" operator.
> 
> David B.
> 

Ah, I read it too quickly. Looks good.

Best,
Graham

> >
> > >
> > > return mes_v11_0_submit_pkt_and_poll_completion(mes,
> > > _add_queue_pkt,
> > > sizeof(mes_add_queue_pkt), diff --git
> > > a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
> > > b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
> > > index 60a81649cf12..c7118843db05 100644
> > > --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
> > > +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
> > > @@ -742,7 +742,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
> > > 0xbf88fffe, 0x877aff7f,
> > > 0x0400, 0x8f7a857a,
> > > 0x886d7a6d, 0xb97b02dc,
> > > -   0x8f7b997b, 0xb97a2a05,
> > > +   0x8f7b997b, 0xb97a3a05,
> > > 0x807a817a, 0xbf0d997b,
> > > 0xbf850002, 0x8f7a897a,
> > > 0xbf820001, 0x8f7a8a7a,
> > > @@ -819,7 +819,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
> > > 0xbefe037c, 0xbefc0370,
> > > 0xf4611c7a, 0xf800,
> > > 0x80708470, 0xbefc037e,
> > > -   0xb9702a05, 0x80708170,
> > > +   0xb9703a05, 0x80708170,
> > > 0xbf0d9973, 0xbf850002,
> > > 0x8f708970, 0xbf820001,
> > > 0x8f708a70, 0xb

RE: [PATCH] drm/amdgpu: Enable SA software trap.

2022-09-22 Thread Sider, Graham
[Public]

> -Original Message-
> From: amd-gfx  On Behalf Of
> David Belanger
> Sent: Thursday, September 22, 2022 12:17 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Cornwall, Jay ; Kuehling, Felix
> ; Belanger, David 
> Subject: [PATCH] drm/amdgpu: Enable SA software trap.
> 
> Caution: This message originated from an External Source. Use proper
> caution when opening attachments, clicking links, or responding.
> 
> 
> Enables support for software trap for MES >= 4.
> Adapted from implementation from Jay Cornwall.
> 
> v2: Add IP version check in conditions.
> 
> Signed-off-by: Jay Cornwall 
> Signed-off-by: David Belanger 
> Reviewed-by: Felix Kuehling 
> ---
>  drivers/gpu/drm/amd/amdgpu/mes_v11_0.c|   6 +-
>  .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h| 771 +-
>  .../amd/amdkfd/cwsr_trap_handler_gfx10.asm|  21 +
>  .../gpu/drm/amd/amdkfd/kfd_int_process_v11.c  |  26 +-
>  4 files changed, 437 insertions(+), 387 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> index b64cd46a159a..cbc506b958b1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> @@ -185,7 +185,11 @@ static int mes_v11_0_add_hw_queue(struct
> amdgpu_mes *mes,
> mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
> mes_add_queue_pkt.tma_addr = input->tma_addr;
> mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
> -   mes_add_queue_pkt.trap_en = 1;
> +
> +   if (!(((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >=
> 4) &&
> + (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) &&
> + (adev->ip_versions[GC_HWIP][0] <= IP_VERSION(11, 0, 3
> +   mes_add_queue_pkt.trap_en = 1;

I think the value for trap_en here is backwards. It should be set to 0 for this 
condition and default to 1 otherwise.

Best,
Graham

> 
> return mes_v11_0_submit_pkt_and_poll_completion(mes,
> _add_queue_pkt, sizeof(mes_add_queue_pkt), diff 
> --git
> a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
> b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
> index 60a81649cf12..c7118843db05 100644
> --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
> +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
> @@ -742,7 +742,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
> 0xbf88fffe, 0x877aff7f,
> 0x0400, 0x8f7a857a,
> 0x886d7a6d, 0xb97b02dc,
> -   0x8f7b997b, 0xb97a2a05,
> +   0x8f7b997b, 0xb97a3a05,
> 0x807a817a, 0xbf0d997b,
> 0xbf850002, 0x8f7a897a,
> 0xbf820001, 0x8f7a8a7a,
> @@ -819,7 +819,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
> 0xbefe037c, 0xbefc0370,
> 0xf4611c7a, 0xf800,
> 0x80708470, 0xbefc037e,
> -   0xb9702a05, 0x80708170,
> +   0xb9703a05, 0x80708170,
> 0xbf0d9973, 0xbf850002,
> 0x8f708970, 0xbf820001,
> 0x8f708a70, 0xb97a1e06,
> @@ -1069,7 +1069,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
> 0xb9f9f816, 0x876f7bff,
> 0xf800, 0x906f8b6f,
> 0xb9efa2c3, 0xb9f3f801,
> -   0xb96e2a05, 0x806e816e,
> +   0xb96e3a05, 0x806e816e,
> 0xbf0d9972, 0xbf850002,
> 0x8f6e896e, 0xbf820001,
> 0x8f6e8a6e, 0xb96f1e06,
> @@ -2114,7 +2114,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
> 0x007a, 0x7e000280,
> 0xbefe037a, 0xbeff037b,
> 0xb97b02dc, 0x8f7b997b,
> -   0xb97a2a05, 0x807a817a,
> +   0xb97a3a05, 0x807a817a,
> 0xbf0d997b, 0xbf850002,
> 0x8f7a897a, 0xbf820001,
> 0x8f7a8a7a, 0xb97b1e06,
> @@ -2157,7 +2157,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
> 0x0100, 0xe0704100,
> 0x705d0100, 0xe0704200,
> 0x705d0200, 0xe0704300,
> -   0x705d0300, 0xb9702a05,
> +   0x705d0300, 0xb9703a05,
> 0x80708170, 0xbf0d9973,
> 0xbf850002, 0x8f708970,
> 0xbf820001, 0x8f708a70,
> @@ -2189,7 +2189,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
> 0xbefe03ff, 0x,
> 0xbeff0380, 0xe0704000,
> 0x705d0200, 0xbefe03c1,
> -   0xb9702a05, 0x80708170,
> +   0xb9703a05, 0x80708170,
> 0xbf0d9973, 0xbf850002,
> 0x8f708970, 0xbf820001,
> 0x8f708a70, 0xb97a1e06,
> @@ -2475,7 +2475,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
> 0xb9ef4803, 0x876f7bff,
> 0xf800, 0x906f8b6f,
> 0xb9efa2c3, 0xb9f3f801,
> -   0xb96e2a05, 0x806e816e,
> +   0xb96e3a05, 0x806e816e,
> 0xbf0d9972, 0xbf850002,
> 0x8f6e896e, 0xbf820001,
> 0x8f6e8a6e, 0xb96f1e06,
> @@ -2494,438 +2494,441 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
> 0xbf9f, 0xbf9f,
> 0xbf9f, 0x,
>  };
> -
>  static const uint32_t cwsr_trap_gfx11_hex[] = {
> -   0xbfa1, 0xbfa0021b,

RE: [PATCH v4 2/3] drm/amdkfd: Enable GFX11 usermode queue oversubscription

2022-06-22 Thread Sider, Graham
[AMD Official Use Only - General]

>> On 2022-06-22 11:36, Graham Sider wrote:
>> Starting with GFX11, MES requires wptr BOs to be GTT allocated/mapped to
>> GART for usermode queues in order to support oversubscription. In the
>> case that work is submitted to an unmapped queue, MES must have a GART
>> wptr address to determine whether the queue should be mapped.
>>
>> This change is accompanied with changes in MES and is applicable for
>> MES_API_VERSION >= 2.
>>
>> v3:
>> - Use amdgpu_vm_bo_lookup_mapping for wptr_bo mapping lookup
>> - Move wptr_bo refcount increment to amdgpu_amdkfd_map_gtt_bo_to_gart
>> - Remove list_del_init from amdgpu_amdkfd_map_gtt_bo_to_gart
>> - Cleanup/fix create_queue wptr_bo error handling
>> v4:
>> - Add MES version shift/mask defines to amdgpu_mes.h
>> - Change version check from MES_VERSION to MES_API_VERSION
>> - Add check in kfd_ioctl_create_queue before wptr bo pin/GART map to
>> ensure bo is a single page.
>>
>> Signed-off-by: Graham Sider graham.si...@amd.com
>> Acked-by: Alex Deucher 
>> alexander.deuc...@amd.com
>> Reviewed-by: Philip Yang philip.y...@amd.com
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  2 +
>>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 48 +++
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h   |  7 +++
>>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 45 -
>>  .../drm/amd/amdkfd/kfd_device_queue_manager.c |  9 +++-
>>  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  |  2 +
>>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  3 ++
>>  .../amd/amdkfd/kfd_process_queue_manager.c| 20 +---
>>  8 files changed, 127 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> index 648c031942e9..b25b41f50213 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> @@ -286,6 +286,8 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct 
>> kgd_mem *mem,
>>  
>>   void **kptr, uint64_t *size);
>>  void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem);
>>
>> +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct 
>> amdgpu_bo *bo);
>> +
>>  int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
>>  
>>  struct dma_fence **ef);
>>  int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> index afd6e6923189..615ac2895d62 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> @@ -2148,6 +2148,54 @@ int amdgpu_amdkfd_gpuvm_sync_memory(
>>   return ret;
>>  }
>>
>> +/**
>> + * amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and increment 
>> reference count
>> + * @adev: Device to which allocated BO belongs
>> + * @bo: Buffer object to be mapped
>> + *
>> + * Before return, bo reference count is incremented. To release the 
>> reference and unpin/
>> + * unmap the BO, call amdgpu_amdkfd_free_gtt_mem.
>> + */
>> +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct 
>> amdgpu_bo *bo)
>> +{
>> +   int ret;
>> +
>> +   ret = amdgpu_bo_reserve(bo, true);
>> +   if (ret) {
>> +   pr_err("Failed to reserve bo. ret %d\n", ret);
>> +   goto err_reserve_bo_failed;
>> +   }
>> +
>> +   ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
>> +   if (ret) {
>> +   pr_err("Failed to pin bo. ret %d\n", ret);
>> +   goto err_pin_bo_failed;
>> +   }
>> +
>> +   ret = amdgpu_ttm_alloc_gart(>tbo);
>> +   if (ret) {
>> +   pr_err("Failed to bind bo to GART. ret %d\n", ret);
>> +   goto err_map_bo_gart_failed;
>> +   }
>> +
>> +   amdgpu_amdkfd_remove_eviction_fence(
>> +   bo, bo->kfd_bo->process_info->eviction_fence);
>> +
>> +   amdgpu_bo_unreserve(bo);
>> +
>> +   bo = amdgpu_bo_ref(bo);
>> +
>> +   return 0;
>> +
>> +err_map_bo_gart_failed:
>> +   amdgpu_bo_unpin(bo);
>> +err_pin_bo_failed:
>> +   amdgpu_bo_unreserve(bo);
>> +err_reserve_bo_failed:
>> +
>> +   return ret;
>> +}
>> +
>>  /** amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel() - Map a GTT BO for kernel 
>> CPU access
>>   *
>>   * @mem: Buffer object to be mapped for CPU access
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
>> index be4b51a5b5c7..137a2cc2e807 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
>> +++ 

RE: [PATCH v2 2/2] drm/amdkfd: Free queue after unmap queue success

2022-06-20 Thread Sider, Graham
[Public]

Reviewed-by: Graham Sider 

-Original Message-
From: Yang, Philip  
Sent: Friday, June 17, 2022 3:55 PM
To: amd-gfx@lists.freedesktop.org
Cc: Sider, Graham ; Yang, Philip 
Subject: [PATCH v2 2/2] drm/amdkfd: Free queue after unmap queue success

After queue unmap or remove from MES successfully, free queue sysfs entries, 
doorbell and remove from queue list. Otherwise, application may destroy queue 
again, cause below kernel warning or crash backtrace.

For outstanding queues, either application forget to destroy or failed to 
destroy, kfd_process_notifier_release will remove queue sysfs entries, 
kfd_process_wq_release will free queue doorbell.

v2: decrement_queue_count for MES queue

 refcount_t: underflow; use-after-free.
 WARNING: CPU: 7 PID: 3053 at lib/refcount.c:28
  Call Trace:
   kobject_put+0xd6/0x1a0
   kfd_procfs_del_queue+0x27/0x30 [amdgpu]
   pqm_destroy_queue+0xeb/0x240 [amdgpu]
   kfd_ioctl_destroy_queue+0x32/0x70 [amdgpu]
   kfd_ioctl+0x27d/0x500 [amdgpu]
   do_syscall_64+0x35/0x80

 WARNING: CPU: 2 PID: 3053 at 
drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_device_queue_manager.c:400
  Call Trace:
   deallocate_doorbell.isra.0+0x39/0x40 [amdgpu]
   destroy_queue_cpsch+0xb3/0x270 [amdgpu]
   pqm_destroy_queue+0x108/0x240 [amdgpu]
   kfd_ioctl_destroy_queue+0x32/0x70 [amdgpu]
   kfd_ioctl+0x27d/0x500 [amdgpu]

 general protection fault, probably for non-canonical address
0xdead0108:
 Call Trace:
  pqm_destroy_queue+0xf0/0x200 [amdgpu]
  kfd_ioctl_destroy_queue+0x2f/0x60 [amdgpu]
  kfd_ioctl+0x19b/0x600 [amdgpu]

Signed-off-by: Philip Yang 
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 28 +++
 .../amd/amdkfd/kfd_process_queue_manager.c|  2 +-
 2 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 21aeb05b17db..213246a5b4e4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1872,6 +1872,22 @@ static int destroy_queue_cpsch(struct 
device_queue_manager *dqm,
 
}
 
+   if (q->properties.is_active) {
+   if (!dqm->dev->shared_resources.enable_mes) {
+   retval = execute_queues_cpsch(dqm,
+ 
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+   if (retval == -ETIME)
+   qpd->reset_wavefronts = true;
+   } else {
+   retval = remove_queue_mes(dqm, q, qpd);
+   }
+
+   if (retval)
+   goto failed_unmap_queue;
+
+   decrement_queue_count(dqm, qpd, q);
+   }
+
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
 
@@ -1885,17 +1901,6 @@ static int destroy_queue_cpsch(struct 
device_queue_manager *dqm,
 
list_del(>list);
qpd->queue_count--;
-   if (q->properties.is_active) {
-   if (!dqm->dev->shared_resources.enable_mes) {
-   decrement_queue_count(dqm, qpd, q);
-   retval = execute_queues_cpsch(dqm,
- 
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
-   if (retval == -ETIME)
-   qpd->reset_wavefronts = true;
-   } else {
-   retval = remove_queue_mes(dqm, q, qpd);
-   }
-   }
 
/*
 * Unconditionally decrement this counter, regardless of the queue's @@ 
-1912,6 +1917,7 @@ static int destroy_queue_cpsch(struct device_queue_manager 
*dqm,
 
return retval;
 
+failed_unmap_queue:
 failed_try_destroy_debugged_queue:
 
dqm_unlock(dqm);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index dc00484ff484..99f2a6412201 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -419,7 +419,6 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, 
unsigned int qid)
}
 
if (pqn->q) {
-   kfd_procfs_del_queue(pqn->q);
dqm = pqn->q->device->dqm;
retval = dqm->ops.destroy_queue(dqm, >qpd, pqn->q);
if (retval) {
@@ -439,6 +438,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, 
unsigned int qid)
if (dev->shared_resources.enable_mes)
amdgpu_amdkfd_free_gtt_mem(dev->adev,
   pqn->q->gang_ctx_bo);
+   kfd_procfs_del_queue(pqn->q);
uninit_queue(pqn->q);
}
 
--
2.35.1


RE: [PATCH v2 1/2] drm/amdkfd: Add queue to MES if it becomes active

2022-06-20 Thread Sider, Graham
[Public]

Reviewed-by: Graham Sider 

-Original Message-
From: Yang, Philip  
Sent: Friday, June 17, 2022 3:55 PM
To: amd-gfx@lists.freedesktop.org
Cc: Sider, Graham ; Yang, Philip 
Subject: [PATCH v2 1/2] drm/amdkfd: Add queue to MES if it becomes active

We remove the user queue from MES scheduler to update queue properties.
If the queue becomes active after updating, add the user queue to MES 
scheduler, to be able to handle command packet submission.

v2: don't break pqm_set_gws

Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index e1797657b04c..21aeb05b17db 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -811,7 +811,6 @@ static int update_queue(struct device_queue_manager *dqm, 
struct queue *q,
struct mqd_manager *mqd_mgr;
struct kfd_process_device *pdd;
bool prev_active = false;
-   bool add_queue = false;
 
dqm_lock(dqm);
pdd = kfd_get_process_device_data(q->device, q->process); @@ -887,7 
+886,7 @@ static int update_queue(struct device_queue_manager *dqm, struct 
queue *q,
if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
if (!dqm->dev->shared_resources.enable_mes)
retval = map_queues_cpsch(dqm);
-   else if (add_queue)
+   else if (q->properties.is_active)
retval = add_queue_mes(dqm, q, >qpd);
} else if (q->properties.is_active &&
 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
--
2.35.1


RE: [PATCH 1/2] drm/amdkfd: Add queue to MES if it becomes active

2022-06-15 Thread Sider, Graham
[Public]

Reviewed-by: Graham Sider 


> -Original Message-
> From: Yang, Philip 
> Sent: Wednesday, June 15, 2022 5:57 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Sider, Graham ; Yang, Philip
> 
> Subject: [PATCH 1/2] drm/amdkfd: Add queue to MES if it becomes active
> 
> We remove the user queue from MES scheduler to update queue
> properties.
> If the queue becomes active after updating, add the user queue to MES
> scheduler, to be able to handle command packet submission.
> 
> Signed-off-by: Philip Yang 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index e1797657b04c..67ae5b6385a2 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -866,8 +866,10 @@ static int update_queue(struct
> device_queue_manager *dqm, struct queue *q,
>* dqm->active_queue_count to determine whether a new runlist
> must be
>* uploaded.
>*/
> - if (q->properties.is_active && !prev_active) {
> - increment_queue_count(dqm, >qpd, q);
> + if (q->properties.is_active) {
> + add_queue = true;
> + if (!prev_active)
> + increment_queue_count(dqm, >qpd, q);
>   } else if (!q->properties.is_active && prev_active) {
>   decrement_queue_count(dqm, >qpd, q);
>   } else if (q->gws && !q->properties.is_gws) {
> --
> 2.35.1


RE: [PATCH v3 2/3] drm/amdkfd: Enable GFX11 usermode queue oversubscription

2022-06-15 Thread Sider, Graham
[AMD Official Use Only - General]

> -Original Message-
> From: Koenig, Christian 
> Sent: Wednesday, June 15, 2022 3:29 AM
> To: Sider, Graham ; amd-
> g...@lists.freedesktop.org
> Cc: Joshi, Mukul ; Kuehling, Felix
> ; Yang, Philip 
> Subject: Re: [PATCH v3 2/3] drm/amdkfd: Enable GFX11 usermode queue
> oversubscription
> 
> 
> 
> Am 13.06.22 um 17:20 schrieb Graham Sider:
> > Starting with GFX11, MES requires wptr BOs to be GTT allocated/mapped
> > to GART for usermode queues in order to support oversubscription. In
> > the case that work is submitted to an unmapped queue, MES must have a
> > GART wptr address to determine whether the queue should be mapped.
> >
> > This change is accompanied with changes in MES and is applicable for
> > MES_VERSION >= 3.
> >
> > v2:
> > - Update MES_VERSION check from 2 to 3.
> > v3:
> > - Use amdgpu_vm_bo_lookup_mapping for wptr_bo mapping lookup
> > - Move wptr_bo refcount increment to
> amdgpu_amdkfd_map_gtt_bo_to_gart
> > - Remove list_del_init from amdgpu_amdkfd_map_gtt_bo_to_gart
> > - Cleanup/fix create_queue wptr_bo error handling
> >
> > Signed-off-by: Graham Sider 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  1 +
> >   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 49
> +++
> >   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 37 +-
> >   .../drm/amd/amdkfd/kfd_device_queue_manager.c |  9 +++-
> >   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  |  2 +
> >   drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  3 ++
> >   .../amd/amdkfd/kfd_process_queue_manager.c| 17 +--
> >   7 files changed, 110 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> > index 429b16ba10bf..dba26d1e3be9 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> > @@ -301,6 +301,7 @@ int
> amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device
> *adev,
> > struct kgd_mem *mem, void **kptr, uint64_t *size);
> >   void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct
> amdgpu_device *adev,
> > struct kgd_mem *mem);
> > +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev,
> > +struct amdgpu_bo *bo);
> >
> >   int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
> > struct dma_fence **ef);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > index efab923056f4..888d08128a94 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > @@ -2030,6 +2030,55 @@ int amdgpu_amdkfd_gpuvm_sync_memory(
> > return ret;
> >   }
> >
> > +/**
> > + * amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and
> increment
> > +reference count
> > + * @adev: Device to which allocated BO belongs
> > + * @bo: Buffer object to be mapped
> > + *
> > + * Before return, bo reference count is incremented. To release the
> > +reference and unpin/
> > + * unmap the BO, call amdgpu_amdkfd_free_gtt_mem.
> > + */
> > +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev,
> > +struct amdgpu_bo *bo) {
> > +   int ret;
> > +
> > +   ret = amdgpu_bo_reserve(bo, true);
> > +   if (ret) {
> > +   pr_err("Failed to reserve bo. ret %d\n", ret);
> > +   goto err_reserve_bo_failed;
> > +   }
> > +
> > +   ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
> > +   if (ret) {
> > +   pr_err("Failed to pin bo. ret %d\n", ret);
> > +   goto err_pin_bo_failed;
> > +   }
> 
> 
> Oh! Is that something we do for every MQD? When yes that here is pretty
> much a NAK.
> 
> We can't do this or create a trivial deny of service attack against the kernel
> driver.
> 
> Regards,
> Christian.
> 

Hi Christian, could you elaborate on this? Right now this is only being used to 
pin the queue wptr BO.

Best,
Graham

> > +
> > +   ret = amdgpu_ttm_alloc_gart(>tbo);
> > +   if (ret) {
> > +   pr_err("Failed to bind bo to GART. ret %d\n", ret);
> > +   goto err_map_bo_gart_failed;
> > +   }
> > +
> > +   amdgpu_amdkfd_remove_eviction_fence(
> > +   bo, bo->kfd_bo->process_info->eviction_fence);
> > +   list_del_

RE: [PATCH v3 2/3] drm/amdkfd: Enable GFX11 usermode queue oversubscription

2022-06-14 Thread Sider, Graham
[AMD Official Use Only - General]

>> From: Yang, Philip  
>> Sent: Tuesday, June 14, 2022 2:22 PM
>> To: Sider, Graham ; amd-gfx@lists.freedesktop.org
>> Cc: Kuehling, Felix ; Joshi, Mukul 
>> ; Yang, Philip 
>> Subject: Re: [PATCH v3 2/3] drm/amdkfd: Enable GFX11 usermode queue 
>> oversubscription
>>
>>
>> On 2022-06-13 11:20, Graham Sider wrote:
>> Starting with GFX11, MES requires wptr BOs to be GTT allocated/mapped to
>> GART for usermode queues in order to support oversubscription. In the
>> case that work is submitted to an unmapped queue, MES must have a GART
>> wptr address to determine whether the queue should be mapped.
>>
>> This change is accompanied with changes in MES and is applicable for
>> MES_VERSION >= 3.
>>
>> v2:
>> - Update MES_VERSION check from 2 to 3.
>> v3:
>> - Use amdgpu_vm_bo_lookup_mapping for wptr_bo mapping lookup
>> - Move wptr_bo refcount increment to amdgpu_amdkfd_map_gtt_bo_to_gart
>> - Remove list_del_init from amdgpu_amdkfd_map_gtt_bo_to_gart
>> - Cleanup/fix create_queue wptr_bo error handling
>> Two nit-pick below, with those fixed, this patch is
>> Reviewed-by: Philip Yangmailto:philip.y...@amd.com
>>
>> Signed-off-by: Graham Sider mailto:graham.si...@amd.com
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  1 +
>> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 49 +++
>>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 37 +-
>> .../drm/amd/amdkfd/kfd_device_queue_manager.c |  9 +++-
>> .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  |  2 +
>>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  3 ++
>> .../amd/amdkfd/kfd_process_queue_manager.c| 17 +--
>>  7 files changed, 110 insertions(+), 8 deletions(-)
>> 
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> index 429b16ba10bf..dba26d1e3be9 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> @@ -301,6 +301,7 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct 
>> amdgpu_device *adev,
>>  struct kgd_mem *mem, void **kptr, uint64_t *size);
>>  void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct amdgpu_device 
>> *adev,
>>  struct kgd_mem *mem);
>> +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct 
>> amdgpu_bo *bo);
>>  
>>  int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
>>  struct dma_fence **ef);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> index efab923056f4..888d08128a94 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> @@ -2030,6 +2030,55 @@ int amdgpu_amdkfd_gpuvm_sync_memory(
>>  return ret;
>>  }
>>  
>> +/**
>> + * amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and increment 
>> reference count
>> + * @adev: Device to which allocated BO belongs
>> + * @bo: Buffer object to be mapped
>> + *
>> + * Before return, bo reference count is incremented. To release the 
>> reference and unpin/
>> + * unmap the BO, call amdgpu_amdkfd_free_gtt_mem.
>> + */
>> +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct 
>> amdgpu_bo *bo)
>> +{
>> +int ret;
>> +
>> +ret = amdgpu_bo_reserve(bo, true);
>> +if (ret) {
>> +pr_err("Failed to reserve bo. ret %d\n", ret);
>> +goto err_reserve_bo_failed;
>> +}
>> +
>> +ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
>> +if (ret) {
>> +pr_err("Failed to pin bo. ret %d\n", ret);
>> +goto err_pin_bo_failed;
>> +}
>> +
>> +ret = amdgpu_ttm_alloc_gart(>tbo);
>> +if (ret) {
>> +pr_err("Failed to bind bo to GART. ret %d\n", ret);
>> +goto err_map_bo_gart_failed;
>> +}
>> +
>> +amdgpu_amdkfd_remove_eviction_fence(
>> +bo, bo->kfd_bo->process_info->eviction_fence);
>> +list_del_init(>kfd_bo->validate_list.head);
>
> pinned bo should keep in validate_list as PDB/PTB may move and update, please 
> remove list_del_init here.
>

Thought I deleted this line - good catch.

>> +
>> +amdgpu_bo_unreserve(bo);
>> +
>> +bo = amdgpu_

RE: [PATCH v2 2/3] drm/amdkfd: Enable GFX11 usermode queue oversubscription

2022-06-13 Thread Sider, Graham
[Public]

> -Original Message-
> From: Yu, Lang 
> Sent: Friday, June 10, 2022 10:38 PM
> To: Sider, Graham 
> Cc: amd-gfx@lists.freedesktop.org; Joshi, Mukul ;
> Kuehling, Felix ; Yang, Philip
> 
> Subject: Re: [PATCH v2 2/3] drm/amdkfd: Enable GFX11 usermode queue
> oversubscription
> 
> On 06/10/ , Graham Sider wrote:
> > Starting with GFX11, MES requires wptr BOs to be GTT allocated/mapped
> > to GART for usermode queues in order to support oversubscription. In
> > the case that work is submitted to an unmapped queue, MES must have a
> > GART wptr address to determine whether the queue should be mapped.
> >
> > This change is accompanied with changes in MES and is applicable for
> > MES_VERSION >= 3.
> >
> > v2: Update MES_VERSION check from 2 to 3.
> >
> > Signed-off-by: Graham Sider 
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  1 +
> >  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 39
> 
> >  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 45
> ++-
> >  .../drm/amd/amdkfd/kfd_device_queue_manager.c |  9 +++-
> > .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  |  2 +
> >  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  3 ++
> >  .../amd/amdkfd/kfd_process_queue_manager.c| 19 +---
> >  7 files changed, 110 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> > index 429b16ba10bf..dba26d1e3be9 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> > @@ -301,6 +301,7 @@ int
> amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device
> *adev,
> > struct kgd_mem *mem, void **kptr, uint64_t *size);  void
> > amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct
> amdgpu_device *adev,
> > struct kgd_mem *mem);
> > +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev,
> > +struct amdgpu_bo *bo);
> >
> >  int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
> > struct dma_fence **ef);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > index efab923056f4..2d452655eb04 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > @@ -2030,6 +2030,45 @@ int amdgpu_amdkfd_gpuvm_sync_memory(
> > return ret;
> >  }
> >
> > +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev,
> > +struct amdgpu_bo *bo) {
> > +   int ret;
> > +
> > +   ret = amdgpu_bo_reserve(bo, true);
> > +   if (ret) {
> > +   pr_err("Failed to reserve bo. ret %d\n", ret);
> > +   goto err_reserve_bo_failed;
> > +   }
> > +
> > +   ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
> > +   if (ret) {
> > +   pr_err("Failed to pin bo. ret %d\n", ret);
> > +   goto err_pin_bo_failed;
> > +   }
> > +
> > +   ret = amdgpu_ttm_alloc_gart(>tbo);
> > +   if (ret) {
> > +   pr_err("Failed to bind bo to GART. ret %d\n", ret);
> > +   goto err_map_bo_gart_failed;
> > +   }
> > +
> > +   amdgpu_amdkfd_remove_eviction_fence(
> > +   bo, bo->kfd_bo->process_info->eviction_fence);
> > +   list_del_init(>kfd_bo->validate_list.head);
> > +
> > +   amdgpu_bo_unreserve(bo);
> > +
> > +   return 0;
> > +
> > +err_map_bo_gart_failed:
> > +   amdgpu_bo_unpin(bo);
> > +err_pin_bo_failed:
> > +   amdgpu_bo_unreserve(bo);
> > +err_reserve_bo_failed:
> > +
> > +   return ret;
> > +}
> > +
> >  int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct
> amdgpu_device *adev,
> > struct kgd_mem *mem, void **kptr, uint64_t *size)  { diff --
> git
> > a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > index e9766e165c38..58d5ebed1b32 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > @@ -290,6 +290,11 @@ static int kfd_ioctl_create_queue(struct file *filep,
> struct kfd_process *p,
> > struct queue_properties q_properties;
> > uint32_t doorbell_offset_in_process = 0;
> >
> > +   struct amdgpu_bo_va_mapping *wptr_mapping;
> > +   struct interval_tree_node *wptr_

RE: [PATCH v2 2/3] drm/amdkfd: Enable GFX11 usermode queue oversubscription

2022-06-13 Thread Sider, Graham
[AMD Official Use Only - General]

Thanks for the great comments Felix--will apply these.

Best,
Graham

-Original Message-
From: Kuehling, Felix  
Sent: Friday, June 10, 2022 7:06 PM
To: Sider, Graham ; amd-gfx@lists.freedesktop.org
Cc: Joshi, Mukul ; Yang, Philip 
Subject: Re: [PATCH v2 2/3] drm/amdkfd: Enable GFX11 usermode queue 
oversubscription

Am 2022-06-10 um 13:13 schrieb Graham Sider:
> Starting with GFX11, MES requires wptr BOs to be GTT allocated/mapped 
> to GART for usermode queues in order to support oversubscription. In 
> the case that work is submitted to an unmapped queue, MES must have a 
> GART wptr address to determine whether the queue should be mapped.
>
> This change is accompanied with changes in MES and is applicable for 
> MES_VERSION >= 3.
>
> v2: Update MES_VERSION check from 2 to 3.
>
> Signed-off-by: Graham Sider 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  1 +
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 39 
>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 45 ++-
>   .../drm/amd/amdkfd/kfd_device_queue_manager.c |  9 +++-
>   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  |  2 +
>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  3 ++
>   .../amd/amdkfd/kfd_process_queue_manager.c| 19 +---
>   7 files changed, 110 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 429b16ba10bf..dba26d1e3be9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -301,6 +301,7 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct 
> amdgpu_device *adev,
>   struct kgd_mem *mem, void **kptr, uint64_t *size);
>   void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct amdgpu_device 
> *adev,
>   struct kgd_mem *mem);
> +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, 
> +struct amdgpu_bo *bo);
>   
>   int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
>   struct dma_fence **ef);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index efab923056f4..2d452655eb04 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -2030,6 +2030,45 @@ int amdgpu_amdkfd_gpuvm_sync_memory(
>   return ret;
>   }
>   
> +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, 
> +struct amdgpu_bo *bo)

I think this function should take a reference count on the bo to ensure the 
pinned BO is not freed prematurely (even if broken user mode frees the BO 
before destroying the queue). Add a comment that the correct way to release the 
reference and unpin/unmap the BO is to call amdgpu_amdkfd_free_gtt_mem. See 
another comment below about moving the amdgpu_bo_ref here.


> +{
> + int ret;
> +
> + ret = amdgpu_bo_reserve(bo, true);
> + if (ret) {
> + pr_err("Failed to reserve bo. ret %d\n", ret);
> + goto err_reserve_bo_failed;
> + }
> +
> + ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
> + if (ret) {
> + pr_err("Failed to pin bo. ret %d\n", ret);
> + goto err_pin_bo_failed;
> + }
> +
> + ret = amdgpu_ttm_alloc_gart(>tbo);
> + if (ret) {
> + pr_err("Failed to bind bo to GART. ret %d\n", ret);
> + goto err_map_bo_gart_failed;
> + }
> +
> + amdgpu_amdkfd_remove_eviction_fence(
> + bo, bo->kfd_bo->process_info->eviction_fence);
> + list_del_init(>kfd_bo->validate_list.head);

Please see Lang, Yu's patch "drm/amdkfd: add pinned BOs to kfd_bo_list". 
We realized that pinned BOs still need to be on the validate list. So please 
remove the list_del_init here.


> +
> + amdgpu_bo_unreserve(bo);
> +
> + return 0;
> +
> +err_map_bo_gart_failed:
> + amdgpu_bo_unpin(bo);
> +err_pin_bo_failed:
> + amdgpu_bo_unreserve(bo);
> +err_reserve_bo_failed:
> +
> + return ret;
> +}
> +
>   int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev,
>   struct kgd_mem *mem, void **kptr, uint64_t *size)
>   {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index e9766e165c38..58d5ebed1b32 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -290,6 +290,11 @@ static int kfd_ioctl_create_queue(struct file *filep, 
> struct kfd_process *p,
>   

RE: [PATCH 2/4] drm/amdkfd: Pass MES/RS64 information to sysfs

2022-06-08 Thread Sider, Graham
[AMD Official Use Only - General]

I was originally using the MES scheduler version in Thunk to check whether to 
use userptr or non-paged memory for the queue allocation. Just today though 
I've changed this to always use non-paged so this isn't needed anymore. Maybe 
instead we can think about adding this to debugfs, I agree.

Thanks,
Graham 

-Original Message-
From: Kuehling, Felix  
Sent: Wednesday, June 8, 2022 3:31 PM
To: Sider, Graham ; amd-gfx@lists.freedesktop.org
Cc: Yang, Philip ; Joshi, Mukul 
Subject: Re: [PATCH 2/4] drm/amdkfd: Pass MES/RS64 information to sysfs

On 2022-06-07 18:50, Graham Sider wrote:
> Make MES/RS64 CP enablement and MES scheduler/MES KIQ versions 
> available through sysfs.
>
> Signed-off-by: Graham Sider 

Why do we need to expose this to user mode applications? They don't interact 
directly with the scheduler or the KIQ. I cannot think of any reasonable 
conditions in usermode that would need the scheduler or KIQ version number. It 
may make sense in debugfs, but not here.

Regards,
   Felix


> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 8 
>   1 file changed, 8 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> index 02efae4f5549..51c8a285baaf 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> @@ -571,6 +571,14 @@ static ssize_t node_show(struct kobject *kobj, struct 
> attribute *attr,
> dev->gpu->sdma_fw_version);
>   sysfs_show_64bit_prop(buffer, offs, "unique_id",
> dev->gpu->adev->unique_id);
> + sysfs_show_32bit_prop(buffer, offs, "mes_enabled",
> +   (uint32_t)dev->gpu->adev->enable_mes);
> + sysfs_show_32bit_prop(buffer, offs, "rs64_cp_enabled",
> +   
> (uint32_t)dev->gpu->adev->gfx.rs64_enable);
> + sysfs_show_32bit_prop(buffer, offs, "mes_sched_version",
> +   dev->gpu->adev->mes.sched_version);
> + sysfs_show_32bit_prop(buffer, offs, "mes_kiq_version",
> +   dev->gpu->adev->mes.kiq_version);
>   
>   }
>   


RE: [PATCH] drm/amdkfd: Fix static checker warning on MES queue type

2022-05-12 Thread Sider, Graham
[AMD Official Use Only - General]

Thanks for mentioning, yes I'll add a reported-by for Dan on the commit.

Best,
Graham

> -Original Message-
> From: Alex Deucher 
> Sent: Thursday, May 12, 2022 5:33 PM
> To: Sider, Graham 
> Cc: amd-gfx list ; Joshi, Mukul
> ; Kuehling, Felix ; Dan
> Carpenter 
> Subject: Re: [PATCH] drm/amdkfd: Fix static checker warning on MES queue
> type
> 
> [CAUTION: External Email]
> 
> On Thu, May 12, 2022 at 3:17 PM Graham Sider 
> wrote:
> >
> > convert_to_mes_queue_type return can be negative, but
> > queue_input.queue_type is uint32_t. Put return in integer var and cast
> > to unsigned after negative check.
> >
> > Signed-off-by: Graham Sider 
> 
> Add a reported-by for Dan's email?
> 
> Reviewed-by: Alex Deucher 
> 
> > ---
> >  drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 7 ---
> >  1 file changed, 4 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > index e9c9a3a67ab0..e1797657b04c 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > @@ -176,7 +176,7 @@ static int add_queue_mes(struct
> device_queue_manager *dqm, struct queue *q,
> > struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev-
> >adev;
> > struct kfd_process_device *pdd = qpd_to_pdd(qpd);
> > struct mes_add_queue_input queue_input;
> > -   int r;
> > +   int r, queue_type;
> >
> > if (dqm->is_hws_hang)
> > return -EIO;
> > @@ -201,12 +201,13 @@ static int add_queue_mes(struct
> device_queue_manager *dqm, struct queue *q,
> > queue_input.tba_addr = qpd->tba_addr;
> > queue_input.tma_addr = qpd->tma_addr;
> >
> > -   queue_input.queue_type = convert_to_mes_queue_type(q-
> >properties.type);
> > -   if (queue_input.queue_type < 0) {
> > +   queue_type = convert_to_mes_queue_type(q->properties.type);
> > +   if (queue_type < 0) {
> > pr_err("Queue type not supported with MES, queue:%d\n",
> > q->properties.type);
> > return -EINVAL;
> > }
> > +   queue_input.queue_type = (uint32_t)queue_type;
> >
> > if (q->gws) {
> > queue_input.gws_base = 0;
> > --
> > 2.25.1
> >


RE: [PATCH] drm/amdkfd: correct sdma queue number in kfd device init (v2)

2021-12-19 Thread Sider, Graham
[Public]

> -Original Message-
> From: Kim, Jonathan 
> Sent: Monday, December 20, 2021 12:44 AM
> To: Chen, Guchun ; amd-
> g...@lists.freedesktop.org; Deucher, Alexander
> ; Sider, Graham
> ; Kuehling, Felix 
> Subject: RE: [PATCH] drm/amdkfd: correct sdma queue number in kfd device
> init (v2)
> 
> [AMD Official Use Only]
> 
> > -Original Message-
> > From: Chen, Guchun 
> > Sent: December 19, 2021 10:09 PM
> > To: amd-gfx@lists.freedesktop.org; Deucher, Alexander
> > ; Sider, Graham
> ;
> > Kuehling, Felix ; Kim, Jonathan
> > 
> > Cc: Chen, Guchun 
> > Subject: [PATCH] drm/amdkfd: correct sdma queue number in kfd device
> > init (v2)
> >
> > sdma queue number is not correct like on vega20, this patch promises
> > the
> 
> I think you've also fixed Vega12 and Raven (they were being set to 8 before
> rather than 2).  No need to mention this in your description, just double
> checking.
> 

I believe it was only Vega20 that was being set incorrectly. The condition was:

sdma_version >= IP_VERSION(4, 0, 0)  &&
sdma_version <= IP_VERSION(4, 2, 0))

which encapsulates Vega12 and Raven setting sdma_queues_per_engine to 2, but 
also accidently encapsulates Vega20.

> > setting keeps the same after code refactor.
> > Additionally, improve code to use switch case to list IP version to
> > complete kfd device_info structure filling.
> > This keeps consistency with the IP parse code in amdgpu_discovery.c.
> >
> > v2: use dev_warn for the default switch case;
> > set default sdma queue per engine(8) and IH handler to v9.
> > (Jonathan)
> >
> > Fixes: a9e2c4dc6cc4("drm/amdkfd: add kfd_device_info_init function")
> > Signed-off-by: Guchun Chen 
> 
> Other than the missing checks for Raven when setting the interrupt class
> (see inline comments and reference kgd2kfd_probe in kfd_device.c) and
> one nit-pick inline, this looks good to me.
> 
> With those fixed, this patch is
> Reviewed-by: Jonathan Kim 
> 

Other than Jon's comments, this patch is also

Reviewed-by: Graham Sider  

> > ---
> >  drivers/gpu/drm/amd/amdkfd/kfd_device.c | 77
> > ++---
> >  1 file changed, 68 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > index facc28f58c1f..36406a261203 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > @@ -59,11 +59,75 @@ static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
> >
> >  static int kfd_resume(struct kfd_dev *kfd);
> >
> > +static void kfd_device_info_set_sdma_queue_num(struct kfd_dev *kfd)
> {
> > + uint32_t sdma_version = kfd->adev-
> > >ip_versions[SDMA0_HWIP][0];
> > +
> > + switch (sdma_version) {
> 
> Please pull in the indentation for all cases to line up with the switch block.
> 
> > + case IP_VERSION(4, 0, 0):/* VEGA10 */
> > + case IP_VERSION(4, 0, 1):/* VEGA12 */
> > + case IP_VERSION(4, 1, 0):/* RAVEN */
> > + case IP_VERSION(4, 1, 1):/* RAVEN */
> 
> As mentioned, you've also fixed Vega12 & Raven here I presume since afaik,
> they're based off Vega10?
> 
> > + case IP_VERSION(4, 1, 2):/* RENIOR */
> > + case IP_VERSION(5, 2, 1):/* VANGOGH */
> > + case IP_VERSION(5, 2, 3):/* YELLOW_CARP */
> > + kfd->device_info.num_sdma_queues_per_engine =
> > 2;
> > + break;
> > + case IP_VERSION(4, 2, 0):/* VEGA20 */
> > + case IP_VERSION(4, 2, 2):/* ARCTUTUS */
> > + case IP_VERSION(4, 4, 0):/* ALDEBARAN */
> > + case IP_VERSION(5, 0, 0):/* NAVI10 */
> > + case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */
> > + case IP_VERSION(5, 0, 2):/* NAVI14 */
> > + case IP_VERSION(5, 0, 5):/* NAVI12 */
> > + case IP_VERSION(5, 2, 0):/* SIENNA_CICHLID */
> > + case IP_VERSION(5, 2, 2):/* NAVY_FLOUDER */
> > + case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */
> > + kfd->device_info.num_sdma_queues_per_engine =
> > 8;
> > + break;
> > + default:
> > + dev_warn(kfd_device,
> > + "Default sdma queue per engine(8) is set
> > + due
> > to "
> > + "mismatch of sdma ip
> > 

RE: [PATCH] drm/amdkfd: correct sdma queue number in kfd device init

2021-12-17 Thread Sider, Graham
[Public]

> -Original Message-
> From: Chen, Guchun 
> Sent: Friday, December 17, 2021 9:31 AM
> To: amd-gfx@lists.freedesktop.org; Deucher, Alexander
> ; Sider, Graham
> ; Kuehling, Felix ;
> Kim, Jonathan 
> Cc: Chen, Guchun 
> Subject: [PATCH] drm/amdkfd: correct sdma queue number in kfd device init
> 
> sdma queue number is not correct like on vega20, this patch promises the
> setting keeps the same after code refactor.
> Additionally, improve code to use switch case to list IP version to complete
> kfd device_info structure filling.
> This keeps consistency with the IP parse code in amdgpu_discovery.c.
> 
> Fixes: a9e2c4dc6cc4("drm/amdkfd: add kfd_device_info_init function")
> Signed-off-by: Guchun Chen 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c | 74
> ++---
>  1 file changed, 65 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index facc28f58c1f..e50bf992f298 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -59,11 +59,72 @@ static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
> 
>  static int kfd_resume(struct kfd_dev *kfd);
> 
> +static void kfd_device_info_set_sdma_queue_num(struct kfd_dev *kfd) {
> + uint32_t sdma_version = kfd->adev->ip_versions[SDMA0_HWIP][0];
> +
> + switch (sdma_version) {
> + case IP_VERSION(4, 0, 0):/* VEGA10 */
> + case IP_VERSION(4, 0, 1):/* VEGA12 */
> + case IP_VERSION(4, 1, 0):/* RAVEN */
> + case IP_VERSION(4, 1, 1):/* RAVEN */
> + case IP_VERSION(4, 1, 2):/* RENIOR */
> + case IP_VERSION(5, 2, 1):/* VANGOGH */
> + case IP_VERSION(5, 2, 3):/* YELLOW_CARP */
> + kfd->device_info.num_sdma_queues_per_engine =
> 2;
> + break;
> + case IP_VERSION(4, 2, 0):/* VEGA20 */

Thanks for spotting this Guchun. My previous patch should have used a "<" 
instead of a "<=" on IP_VERSION(4, 2, 0).

> + case IP_VERSION(4, 2, 2):/* ARCTUTUS */
> + case IP_VERSION(4, 4, 0):/* ALDEBARAN */
> + case IP_VERSION(5, 0, 0):/* NAVI10 */
> + case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */
> + case IP_VERSION(5, 0, 2):/* NAVI14 */
> + case IP_VERSION(5, 0, 5):/* NAVI12 */
> + case IP_VERSION(5, 2, 0):/* SIENNA_CICHLID */
> + case IP_VERSION(5, 2, 2):/* NAVY_FLOUDER */
> + case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */
> + kfd->device_info.num_sdma_queues_per_engine =
> 8;
> + break;
> + default:
> + dev_err(kfd_device,
> + "Failed to find sdma ip
> blocks(SDMA_HWIP:0x%x) in %s\n",
> +sdma_version, __func__);
> + }
> +}
> +
> +static void kfd_device_info_set_event_interrupt_class(struct kfd_dev
> +*kfd) {
> + uint32_t gc_version = KFD_GC_VERSION(kfd);
> +
> + switch (gc_version) {
> + case IP_VERSION(9, 0, 1): /* VEGA10 */
> + case IP_VERSION(9, 2, 1): /* VEGA12 */
> + case IP_VERSION(9, 3, 0): /* RENOIR */
> + case IP_VERSION(9, 4, 0): /* VEGA20 */
> + case IP_VERSION(9, 4, 1): /* ARCTURUS */
> + case IP_VERSION(9, 4, 2): /* ALDEBARAN */
> + case IP_VERSION(10, 3, 1): /* VANGOGH */
> + case IP_VERSION(10, 3, 3): /* YELLOW_CARP */
> + case IP_VERSION(10, 1, 3): /* CYAN_SKILLFISH */
> + case IP_VERSION(10, 1, 10): /* NAVI10 */
> + case IP_VERSION(10, 1, 2): /* NAVI12 */
> + case IP_VERSION(10, 1, 1): /* NAVI14 */
> + case IP_VERSION(10, 3, 0): /* SIENNA_CICHLID */
> + case IP_VERSION(10, 3, 2): /* NAVY_FLOUNDER */
> + case IP_VERSION(10, 3, 4): /* DIMGREY_CAVEFISH */
> + case IP_VERSION(10, 3, 5): /* BEIGE_GOBY */
> + kfd->device_info.event_interrupt_class =
> _interrupt_class_v9;
> + break;
> + default:
> + dev_err(kfd_device, "Failed to find gc ip
> blocks(GC_HWIP:0x%x) in %s\n",
> + gc_version, __func__);
> + }
> +}

I understand the appeal of moving to a switch for the SDMA queue num above 
since its setting isn't very linear w.r.t. the SDMA versioning. That said I 
don't know if I understand moving to a switch for the event interrupt class 
here. To clarify, originally when I set all SOC15 to event_interrupt_class_v9 
it was to follow what was in the device_info structs in drm-staging-next at 
that time--that said if the plan is in a following pat

RE: [PATCH] drm/amdkfd: Correct the value of the no_atomic_fw_version variable

2021-12-02 Thread Sider, Graham
[AMD Official Use Only]

> From: chen gong 
> Sent: Thursday, December 2, 2021 3:56 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Sider, Graham ; Gong, Curry
> 
> Subject: [PATCH] drm/amdkfd: Correct the value of the
> no_atomic_fw_version variable
> 
> 145:
> navi10IP_VERSION(10, 1, 10)
> navi12IP_VERSION(10, 1, 2)
> navi14IP_VERSION(10, 1, 1)
> 
> 92:
> sienna_cichlidIP_VERSION(10, 3, 0)
> navy_flounder IP_VERSION(10, 3, 2)
> vangogh   IP_VERSION(10, 3, 1)
> dimgrey_cavefish  IP_VERSION(10, 3, 4)
> beige_gobyIP_VERSION(10, 3, 5)
> yellow_carp   IP_VERSION(10, 3, 3)
> 
> Signed-off-by: chen gong 

Good catch, these were mistakenly reversed. Thanks!

Reviewed-by: Graham Sider 

> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index e6fded7..267668b 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -94,9 +94,9 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
>   if (gc_version < IP_VERSION(11, 0, 0)) {
>   /* Navi2x+, Navi1x+ */
>   if (gc_version >= IP_VERSION(10, 3, 0))
> - kfd->device_info.no_atomic_fw_version =
> 145;
> - else if (gc_version >= IP_VERSION(10, 1, 1))
>   kfd->device_info.no_atomic_fw_version =
> 92;
> + else if (gc_version >= IP_VERSION(10, 1, 1))
> + kfd->device_info.no_atomic_fw_version =
> 145;
> 
>   /* Navi1x+ */
>   if (gc_version >= IP_VERSION(10, 1, 1))
> --
> 2.7.4


RE: [PATCH v2 3/4] drm/amdkfd: move to dynamic device_info creation

2021-11-22 Thread Sider, Graham
[AMD Official Use Only]

> -Original Message-
> From: Kuehling, Felix 
> Sent: Friday, November 19, 2021 4:30 PM
> To: Sider, Graham ; amd-
> g...@lists.freedesktop.org
> Cc: Deucher, Alexander 
> Subject: Re: [PATCH v2 3/4] drm/amdkfd: move to dynamic device_info
> creation
> 
> On 2021-11-19 2:52 p.m., Graham Sider wrote:
> > Change unsupported asic condition to only probe f2g, move device_info
> > initialization post-switch and map to heap.
> >
> > Signed-off-by: Graham Sider 
> > ---
> >   drivers/gpu/drm/amd/amdkfd/kfd_device.c | 183 ++
> --
> >   drivers/gpu/drm/amd/amdkfd/kfd_priv.h   |   2 +-
> >   2 files changed, 79 insertions(+), 106 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > index 676cb9c3166c..7ddea653b3d9 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > @@ -574,191 +574,151 @@ static void kfd_device_info_init(struct
> > kfd_dev *kfd,
> >
> >   struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
> >   {
> > -   struct kfd_dev *kfd;
> > -   const struct kfd_device_info *device_info;
> > -   const struct kfd2kgd_calls *f2g;
> > +   struct kfd_dev *kfd = NULL;
> > +   struct kfd_device_info *device_info = NULL;
> > +   const struct kfd2kgd_calls *f2g = NULL;
> > struct pci_dev *pdev = adev->pdev;
> > +   uint32_t gfx_target_version = 0;
> >
> > switch (adev->asic_type) {
> >   #ifdef KFD_SUPPORT_IOMMU_V2
> >   #ifdef CONFIG_DRM_AMDGPU_CIK
> > case CHIP_KAVERI:
> > -   if (vf)
> > -   device_info = NULL;
> > -   else
> > -   device_info = _device_info;
> > -   f2g = _v7_kfd2kgd;
> > +   gfx_target_version = 7;
> > +   if (!vf)
> > +   f2g = _v7_kfd2kgd;
> > break;
> >   #endif
> > case CHIP_CARRIZO:
> > -   if (vf)
> > -   device_info = NULL;
> > -   else
> > -   device_info = _device_info;
> > -   f2g = _v8_kfd2kgd;
> > +   gfx_target_version = 80001;
> > +   if (!vf)
> > +   f2g = _v8_kfd2kgd;
> > break;
> >   #endif
> >   #ifdef CONFIG_DRM_AMDGPU_CIK
> > case CHIP_HAWAII:
> > -   if (vf)
> > -   device_info = NULL;
> > -   else
> > -   device_info = _device_info;
> > -   f2g = _v7_kfd2kgd;
> > +   gfx_target_version = 70001;
> > +   if (!vf)
> > +   f2g = _v7_kfd2kgd;
> > break;
> >   #endif
> > case CHIP_TONGA:
> > -   if (vf)
> > -   device_info = NULL;
> > -   else
> > -   device_info = _device_info;
> > -   f2g = _v8_kfd2kgd;
> > +   gfx_target_version = 80002;
> > +   if (!vf)
> > +   f2g = _v8_kfd2kgd;
> > break;
> > case CHIP_FIJI:
> > -   if (vf)
> > -   device_info = _vf_device_info;
> > -   else
> > -   device_info = _device_info;
> > +   gfx_target_version = 80003;
> > f2g = _v8_kfd2kgd;
> > break;
> > case CHIP_POLARIS10:
> > -   if (vf)
> > -   device_info = _vf_device_info;
> > -   else
> > -   device_info = _device_info;
> > +   gfx_target_version = 80003;
> > f2g = _v8_kfd2kgd;
> > break;
> > case CHIP_POLARIS11:
> > -   if (vf)
> > -   device_info = NULL;
> > -   else
> > -   device_info = _device_info;
> > -   f2g = _v8_kfd2kgd;
> > +   gfx_target_version = 80003;
> > +   if (!vf)
> > +   f2g = _v8_kfd2kgd;
> > break;
> > case CHIP_POLARIS12:
> > -   if (vf)
> > -   device_info = NULL;
> > -   else
> > -   device_info = _device_info;
> > -   f2g = _v8_kfd2kgd;
> > +   gfx_target_version = 80003;
> > +   if (!vf)
> > +   f2g = _v8_kfd2kgd;
> > break;
> > case CHIP_VEGAM:
> > -   if

RE: [PATCH v2 2/4] drm/amdkfd: add kfd_device_info_init function

2021-11-22 Thread Sider, Graham
[AMD Official Use Only]

> -Original Message-
> From: Kuehling, Felix 
> Sent: Friday, November 19, 2021 4:20 PM
> To: Sider, Graham ; amd-
> g...@lists.freedesktop.org
> Cc: Deucher, Alexander 
> Subject: Re: [PATCH v2 2/4] drm/amdkfd: add kfd_device_info_init function
> 
> On 2021-11-19 2:52 p.m., Graham Sider wrote:
> > Initializes device_info structs given either asic_type (enum) if GFX
> > version is less than GFX9, or GC IP version if greater. Also takes in
> > vf and the target compiler gfx version.
> >
> > Inclusion/exclusion to certain conditions for certain GC IP versions
> > may be necessary on npi bringup on a case-by-case basis, but for the
> > most part should be minimal (e.g. adding one || asic_version ==
> IP_VERSION(X ,X, X) case).
> >
> > Signed-off-by: Graham Sider 
> > ---
> >   drivers/gpu/drm/amd/amdkfd/kfd_device.c | 61
> +
> >   1 file changed, 61 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > index e11fc4e20c32..676cb9c3166c 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > @@ -511,6 +511,67 @@ static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
> >
> >   static int kfd_resume(struct kfd_dev *kfd);
> >
> > +static void kfd_device_info_init(struct kfd_dev *kfd,
> > +struct kfd_device_info *device_info,
> > +bool vf, uint32_t gfx_target_version)
> 
> This will give you a compile warning about an unused static function.
> Maybe squash this with the commit that actually starts using this function.
> 

Sounds good.

> 
> > +{
> > +   uint32_t gc_version = KFD_GC_VERSION(kfd);
> > +   uint32_t asic_type = kfd->adev->asic_type;
> > +
> > +   device_info->max_pasid_bits = 16;
> > +   device_info->max_no_of_hqd = 24;
> > +   device_info->num_of_watch_points = 4;
> > +   device_info->mqd_size_aligned = MQD_SIZE_ALIGNED;
> > +   device_info->gfx_target_version = gfx_target_version;
> > +
> > +   if (KFD_IS_SOC15(kfd)) {
> > +   device_info->doorbell_size = 8;
> > +   device_info->ih_ring_entry_size = 8 * sizeof(uint32_t);
> > +   device_info->event_interrupt_class =
> _interrupt_class_v9;
> > +   device_info->supports_cwsr = true;
> > +
> > +   if ((gc_version >= IP_VERSION(9, 0, 1)  &&
> > +gc_version <= IP_VERSION(9, 3, 0)) ||
> > +gc_version == IP_VERSION(10, 3, 1) ||
> > +gc_version == IP_VERSION(10, 3, 3))
> > +   device_info->num_sdma_queues_per_engine = 2;
> > +   else
> > +   device_info->num_sdma_queues_per_engine = 8;
> 
> I feel this should be based on the SDMA IP version, not the GC IP version.
> 

Can the SDMA queues/engine be determined by the SDMA IP versions? I would have 
thought those were instead done on a chip-by-chip basis. E.g. in 
amdgpu_discovery.c this is how the number of SDMA instances is defined.

> 
> > +
> > +   /* Navi2x+, Navi1x+ */
> > +   if (gc_version >= IP_VERSION(10, 3, 0))
> 
> There needs to be a maximum check here. This case should not automatically
> apply to future ASICs e.g. GFX11.
> 

Just a thought on this: assuming on future asics this field is going to 
continue to be populated, might it be better to just continue adding cases here 
as they arise? Adding a check for e.g. < GFX11, would require eventually 
bumping that check to < GFX12 alongside another check for >= GFX11. So at the 
end of the day, if a >= check is going to be needed anyway, is a maximum check 
necessary? Of course this wouldn't apply to below regarding the 
needs_pci_atomics bool, since as you mention on future asics it can be kept as 
defaulted to false.

> 
> > +   device_info->no_atomic_fw_version = 145;
> > +   else if (gc_version >= IP_VERSION(10, 1, 1))
> > +   device_info->no_atomic_fw_version = 92;
> > +
> > +   /* Raven */
> > +   if (gc_version == IP_VERSION(9, 1, 0) ||
> > +   gc_version == IP_VERSION(9, 2, 2))
> > +   device_info->needs_iommu_device = true;
> > +
> > +   /* Navi1x+ */
> > +   if (gc_version >= IP_VERSION(10, 1, 1))
> 
> There needs to be a maximum check here. On future ASICs (maybe GFX11) I
> would not expect atomics to be required.
> 

See ab

RE: [PATCH 2/4] drm/amdkfd: add kfd_device_info_init function

2021-11-19 Thread Sider, Graham
[AMD Official Use Only]

> -Original Message-
> From: Alex Deucher 
> Sent: Friday, November 19, 2021 1:55 PM
> To: Sider, Graham 
> Cc: amd-gfx list ; Kuehling, Felix
> 
> Subject: Re: [PATCH 2/4] drm/amdkfd: add kfd_device_info_init function
> 
> On Fri, Nov 19, 2021 at 11:28 AM Graham Sider 
> wrote:
> >
> > Initializes device_info structs given either asic_type (enum) if GFX
> > version is less than GFX9, or GC IP version if greater. Also takes in
> > vf and the target compiler gfx version.
> >
> > Inclusion/exclusion to certain conditions for certain GC IP versions
> > may be necessary on npi bringup on a case-by-case basis, but for the
> > most part should be minimal (e.g. adding one || asic_version ==
> IP_VERSION(X ,X, X) case).
> >
> > Signed-off-by: Graham Sider 
> > ---
> >  drivers/gpu/drm/amd/amdkfd/kfd_device.c | 58
> > +
> >  1 file changed, 58 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > index e11fc4e20c32..23e35a466cf0 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > @@ -511,6 +511,64 @@ static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
> >
> >  static int kfd_resume(struct kfd_dev *kfd);
> >
> > +static void kfd_device_info_init(struct kfd_device_info *device_info,
> > +uint32_t asic_version, bool is_soc15,
> > +bool vf,
> 
> I'm not crazy about reusing asic_version for both the GC version and the asic
> type.  Seems like it would be easier to just pass adev and access what you
> need directly.
> 
> Alex
> 

That's fair, I was on the fence about it as well. I think setting some vars via 
adev in device_info_init itself is a good middle ground in terms of verbosity 
(what I was attempting to limit to a degree here). Thanks for the 
recommendation.

Graham

> 
> > +uint32_t gfx_target_version) {
> > +   device_info->max_pasid_bits = 16;
> > +   device_info->max_no_of_hqd = 24;
> > +   device_info->num_of_watch_points = 4;
> > +   device_info->mqd_size_aligned = MQD_SIZE_ALIGNED;
> > +   device_info->gfx_target_version = gfx_target_version;
> > +
> > +   if (is_soc15) {
> > +   device_info->doorbell_size = 8;
> > +   device_info->ih_ring_entry_size = 8 * sizeof(uint32_t);
> > +   device_info->event_interrupt_class = 
> > _interrupt_class_v9;
> > +   device_info->supports_cwsr = true;
> > +
> > +   if ((asic_version >= IP_VERSION(9, 0, 1)  &&
> > +asic_version <= IP_VERSION(9, 3, 0)) ||
> > +asic_version == IP_VERSION(10, 3, 1) ||
> > +asic_version == IP_VERSION(10, 3, 3))
> > +   device_info->num_sdma_queues_per_engine = 2;
> > +   else
> > +   device_info->num_sdma_queues_per_engine = 8;
> > +
> > +   // Navi2x+, Navi1x+
> > +   if (asic_version >= IP_VERSION(10, 3, 0))
> > +   device_info->no_atomic_fw_version = 145;
> > +   else if (asic_version >= IP_VERSION(10, 1, 1))
> > +   device_info->no_atomic_fw_version = 92;
> > +
> > +   // Raven
> > +   if (asic_version == IP_VERSION(9, 1, 0) ||
> > +   asic_version == IP_VERSION(9, 2, 2))
> > +   device_info->needs_iommu_device = true;
> > +
> > +   // Navi1x+
> > +   if (asic_version >= IP_VERSION(10, 1, 1))
> > +   device_info->needs_pci_atomics = true;
> > +   } else {
> > +   device_info->doorbell_size = 4;
> > +   device_info->ih_ring_entry_size = 4 * sizeof(uint32_t);
> > +   device_info->event_interrupt_class =
> _interrupt_class_cik;
> > +   device_info->num_sdma_queues_per_engine = 2;
> > +
> > +   if (asic_version != CHIP_KAVERI &&
> > +   asic_version != CHIP_HAWAII &&
> > +   asic_version != CHIP_TONGA)
> > +   device_info->supports_cwsr = true;
> > +
> > +   if (asic_version == CHIP_KAVERI ||
> > +   asic_version == CHIP_CARRIZO)
> > +   device_info->needs_iommu_device = true;
> > +
> > +   if (asic_version != CHIP_HAWAII && !vf)
> > +   device_info->needs_pci_atomics = true;
> > +   }
> > +}
> > +
> >  struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)  {
> > struct kfd_dev *kfd;
> > --
> > 2.25.1
> >


RE: [PATCH] drm/amdkfd: Remove unused entries in table

2021-11-18 Thread Sider, Graham
[AMD Official Use Only]

Reviewed-by: Graham Sider 

> Remove unused entries in kfd_device_info table: num_xgmi_sdma_engines
> and num_sdma_queues_per_engine. They are calculated in
> kfd_get_num_sdma_engines and kfd_get_num_xgmi_sdma_engines
> instead.
> 
> Signed-off-by: Amber Lin 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c | 58 -
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h   |  2 -
>  2 files changed, 60 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 3fea47e37c17..e1294fba0c26 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -68,8 +68,6 @@ static const struct kfd_device_info kaveri_device_info =
> {
> .supports_cwsr = false,
> .needs_iommu_device = true,
> .needs_pci_atomics = false,
> -   .num_sdma_engines = 2,
> -   .num_xgmi_sdma_engines = 0,
> .num_sdma_queues_per_engine = 2,  };
> 
> @@ -87,8 +85,6 @@ static const struct kfd_device_info carrizo_device_info =
> {
> .supports_cwsr = true,
> .needs_iommu_device = true,
> .needs_pci_atomics = false,
> -   .num_sdma_engines = 2,
> -   .num_xgmi_sdma_engines = 0,
> .num_sdma_queues_per_engine = 2,  };
> 
> @@ -105,8 +101,6 @@ static const struct kfd_device_info raven_device_info
> = {
> .supports_cwsr = true,
> .needs_iommu_device = true,
> .needs_pci_atomics = true,
> -   .num_sdma_engines = 1,
> -   .num_xgmi_sdma_engines = 0,
> .num_sdma_queues_per_engine = 2,  };  #endif @@ -126,8 +120,6 @@
> static const struct kfd_device_info hawaii_device_info = {
> .supports_cwsr = false,
> .needs_iommu_device = false,
> .needs_pci_atomics = false,
> -   .num_sdma_engines = 2,
> -   .num_xgmi_sdma_engines = 0,
> .num_sdma_queues_per_engine = 2,  };  #endif @@ -145,8 +137,6 @@
> static const struct kfd_device_info tonga_device_info = {
> .supports_cwsr = false,
> .needs_iommu_device = false,
> .needs_pci_atomics = true,
> -   .num_sdma_engines = 2,
> -   .num_xgmi_sdma_engines = 0,
> .num_sdma_queues_per_engine = 2,  };
> 
> @@ -163,8 +153,6 @@ static const struct kfd_device_info fiji_device_info = {
> .supports_cwsr = true,
> .needs_iommu_device = false,
> .needs_pci_atomics = true,
> -   .num_sdma_engines = 2,
> -   .num_xgmi_sdma_engines = 0,
> .num_sdma_queues_per_engine = 2,  };
> 
> @@ -181,8 +169,6 @@ static const struct kfd_device_info fiji_vf_device_info
> = {
> .supports_cwsr = true,
> .needs_iommu_device = false,
> .needs_pci_atomics = false,
> -   .num_sdma_engines = 2,
> -   .num_xgmi_sdma_engines = 0,
> .num_sdma_queues_per_engine = 2,  };
> 
> @@ -200,8 +186,6 @@ static const struct kfd_device_info
> polaris10_device_info = {
> .supports_cwsr = true,
> .needs_iommu_device = false,
> .needs_pci_atomics = true,
> -   .num_sdma_engines = 2,
> -   .num_xgmi_sdma_engines = 0,
> .num_sdma_queues_per_engine = 2,  };
> 
> @@ -218,8 +202,6 @@ static const struct kfd_device_info
> polaris10_vf_device_info = {
> .supports_cwsr = true,
> .needs_iommu_device = false,
> .needs_pci_atomics = false,
> -   .num_sdma_engines = 2,
> -   .num_xgmi_sdma_engines = 0,
> .num_sdma_queues_per_engine = 2,  };
> 
> @@ -236,8 +218,6 @@ static const struct kfd_device_info
> polaris11_device_info = {
> .supports_cwsr = true,
> .needs_iommu_device = false,
> .needs_pci_atomics = true,
> -   .num_sdma_engines = 2,
> -   .num_xgmi_sdma_engines = 0,
> .num_sdma_queues_per_engine = 2,  };
> 
> @@ -254,8 +234,6 @@ static const struct kfd_device_info
> polaris12_device_info = {
> .supports_cwsr = true,
> .needs_iommu_device = false,
> .needs_pci_atomics = true,
> -   .num_sdma_engines = 2,
> -   .num_xgmi_sdma_engines = 0,
> .num_sdma_queues_per_engine = 2,  };
> 
> @@ -272,8 +250,6 @@ static const struct kfd_device_info
> vegam_device_info = {
> .supports_cwsr = true,
> .needs_iommu_device = false,
> .needs_pci_atomics = true,
> -   .num_sdma_engines = 2,
> -   .num_xgmi_sdma_engines = 0,
> .num_sdma_queues_per_engine = 2,  };
> 
> @@ -290,8 +266,6 @@ static const struct kfd_device_info
> vega10_device_info = {
> .supports_cwsr = true,
> .needs_iommu_device = false,
> .needs_pci_atomics = false,
> -   .num_sdma_engines = 2,
> -   .num_xgmi_sdma_engines = 0,
> .num_sdma_queues_per_engine = 2,  };
> 
> @@ -308,8 +282,6 @@ static const struct kfd_device_info
> vega10_vf_device_info = {
> .supports_cwsr = true,
> .needs_iommu_device = false,
> .needs_pci_atomics = false,
> -   

RE: [PATCH v2 3/3] drm/amdkfd: convert misc checks to IP version checking

2021-11-10 Thread Sider, Graham
[AMD Official Use Only]

> Am 2021-11-09 um 5:42 p.m. schrieb Graham Sider:
> > Switch to IP version checking instead of asic_type on various KFD
> > version checks.
> >
> > Signed-off-by: Graham Sider 
> > ---
> >  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  |  2 +-
> >  drivers/gpu/drm/amd/amdkfd/kfd_crat.c |  2 +-
> >  drivers/gpu/drm/amd/amdkfd/kfd_device.c   | 27 ++-
> >  .../drm/amd/amdkfd/kfd_device_queue_manager.c |  3 +--
> > .../amd/amdkfd/kfd_device_queue_manager_v9.c  |  2 +-
> >  drivers/gpu/drm/amd/amdkfd/kfd_events.c   |  6 +++--
> >  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c  |  2 +-
> >  drivers/gpu/drm/amd/amdkfd/kfd_process.c  |  7 +++--
> >  drivers/gpu/drm/amd/amdkfd/kfd_svm.c  |  6 ++---
> >  drivers/gpu/drm/amd/amdkfd/kfd_topology.c |  4 +--
> >  10 files changed, 31 insertions(+), 30 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > index 2466a73b8c7d..f70117b00b14 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > @@ -1603,7 +1603,7 @@ static int
> kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
> > }
> > mutex_unlock(>mutex);
> >
> > -   if (dev->device_info->asic_family == CHIP_ALDEBARAN) {
> > +   if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2)) {
> > err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev,
> > (struct kgd_mem *) mem, true);
> > if (err) {
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> > b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> > index 19dd472e9b06..b6d887edac85 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> > @@ -1992,7 +1992,7 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int
> *avail_size,
> > sub_type_hdr->flags |=
> CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
> > sub_type_hdr->io_interface_type =
> CRAT_IOLINK_TYPE_XGMI;
> > sub_type_hdr->num_hops_xgmi = 1;
> > -   if (kdev->adev->asic_type == CHIP_ALDEBARAN) {
> > +   if (KFD_GC_VERSION(kdev) == IP_VERSION(9, 4, 2)) {
> > sub_type_hdr->minimum_bandwidth_mbs =
> >
>   amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(
> > kdev->adev, NULL,
> true);
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > index ee813bd57c92..594dd28a391f 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > @@ -848,23 +848,23 @@ struct kfd_dev *kgd2kfd_probe(struct
> > amdgpu_device *adev, bool vf)  static void kfd_cwsr_init(struct
> > kfd_dev *kfd)  {
> > if (cwsr_enable && kfd->device_info->supports_cwsr) {
> > -   if (kfd->device_info->asic_family < CHIP_VEGA10) {
> > +   if (KFD_GC_VERSION(kfd) < IP_VERSION(9, 0, 1)) {
> > BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) >
> PAGE_SIZE);
> > kfd->cwsr_isa = cwsr_trap_gfx8_hex;
> > kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
> > -   } else if (kfd->device_info->asic_family == CHIP_ARCTURUS)
> {
> > +   } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1)) {
> > BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) >
> PAGE_SIZE);
> > kfd->cwsr_isa = cwsr_trap_arcturus_hex;
> > kfd->cwsr_isa_size =
> sizeof(cwsr_trap_arcturus_hex);
> > -   } else if (kfd->device_info->asic_family ==
> CHIP_ALDEBARAN) {
> > +   } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2)) {
> > BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex) >
> PAGE_SIZE);
> > kfd->cwsr_isa = cwsr_trap_aldebaran_hex;
> > kfd->cwsr_isa_size =
> sizeof(cwsr_trap_aldebaran_hex);
> > -   } else if (kfd->device_info->asic_family < CHIP_NAVI10) {
> > +   } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) {
> > BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) >
> PAGE_SIZE);
> > kfd->cwsr_isa = cwsr_trap_gfx9_hex;
> > kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
> > -   } else if (kfd->device_info->asic_family <
> CHIP_SIENNA_CICHLID) {
> > +   } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 3, 0)) {
> > BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) >
> PAGE_SIZE);
> > kfd->cwsr_isa = cwsr_trap_nv1x_hex;
> > kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex);
> @@ -886,14
> > +886,16 @@ static int kfd_gws_init(struct kfd_dev *kfd)
> > return 0;
> >
> > if (hws_gws_support
> > -   || (kfd->device_info->asic_family == CHIP_VEGA10
> > +   || (KFD_GC_VERSION(kfd) == IP_VERSION(9, 0, 1)
> > && 

RE: [PATCH 10/13] drm/amdkfd: replace kgd_dev in get amdgpu_amdkfd funcs

2021-10-26 Thread Sider, Graham
[AMD Official Use Only]

> -Original Message-
> From: Kuehling, Felix 
> Sent: Tuesday, October 26, 2021 5:24 PM
> To: Sider, Graham ; amd-
> g...@lists.freedesktop.org
> Cc: Joshi, Mukul 
> Subject: Re: [PATCH 10/13] drm/amdkfd: replace kgd_dev in get
> amdgpu_amdkfd funcs
> 
> Am 2021-10-19 um 5:13 p.m. schrieb Graham Sider:
> > Modified definitions:
> >
> > - amdgpu_amdkfd_get_fw_version
> > - amdgpu_amdkfd_get_local_mem_info
> > - amdgpu_amdkfd_get_gpu_clock_counter
> > - amdgpu_amdkfd_get_max_engine_clock_in_mhz
> > - amdgpu_amdkfd_get_cu_info
> > - amdgpu_amdkfd_get_dmabuf_info
> > - amdgpu_amdkfd_get_vram_usage
> > - amdgpu_amdkfd_get_hive_id
> > - amdgpu_amdkfd_get_unique_id
> > - amdgpu_amdkfd_get_mmio_remap_phys_addr
> > - amdgpu_amdkfd_get_num_gws
> > - amdgpu_amdkfd_get_asic_rev_id
> > - amdgpu_amdkfd_get_noretry
> > - amdgpu_amdkfd_get_xgmi_hops_count
> > - amdgpu_amdkfd_get_xgmi_bandwidth_mbytes
> > - amdgpu_amdkfd_get_pcie_bandwidth_mbytes
> >
> > Also replaces kfd_device_by_kgd with kfd_device_by_adev, now
> searching
> > via adev rather than kgd.
> 
> Some of these functions are so trivial, they could probably be replaced with
> direct accesses to the respective fields in adev from KFD:
> 
>   * amdgpu_amdkfd_get_hive_id
>   * amdgpu_amdkfd_get_unique_id
>   * amdgpu_amdkfd_get_mmio_remap_phys_addr
>   * amdgpu_amdkfd_get_num_gws
>   * amdgpu_amdkfd_get_asic_rev_id
>   * amdgpu_amdkfd_get_noretry
> 
> I'd do that with a separate follow-up patch, though.
> 
> Regards,
>   Felix
> 

Sounds good, I'll go ahead and replace these ones mentioned in a follow-up. 
Thanks!

Best,
Graham

> 
> >
> > Signed-off-by: Graham Sider 
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c| 73 +++-
> ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h| 38 +-
> >  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 16 ++--
> >  drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 16 ++--
> >  drivers/gpu/drm/amd/amdkfd/kfd_device.c   | 14 ++--
> >  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c  |  2 +-
> >  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  2 +-
> >  .../amd/amdkfd/kfd_process_queue_manager.c|  2 +-
> >  drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 18 ++---
> >  9 files changed, 82 insertions(+), 99 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > index 69fc8f0d9c45..79a2e37baa59 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > @@ -358,11 +358,9 @@ void amdgpu_amdkfd_free_gws(struct
> amdgpu_device *adev, void *mem_obj)
> > amdgpu_bo_unref();
> >  }
> >
> > -uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
> > +uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev,
> >   enum kgd_engine_type type)
> >  {
> > -   struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
> > -
> > switch (type) {
> > case KGD_ENGINE_PFP:
> > return adev->gfx.pfp_fw_version;
> > @@ -395,11 +393,9 @@ uint32_t amdgpu_amdkfd_get_fw_version(struct
> kgd_dev *kgd,
> > return 0;
> >  }
> >
> > -void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
> > +void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device
> *adev,
> >   struct kfd_local_mem_info *mem_info)  {
> > -   struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
> > -
> > memset(mem_info, 0, sizeof(*mem_info));
> >
> > mem_info->local_mem_size_public = adev->gmc.visible_vram_size;
> @@
> > -424,19 +420,15 @@ void amdgpu_amdkfd_get_local_mem_info(struct
> kgd_dev *kgd,
> > mem_info->mem_clk_max = 100;
> >  }
> >
> > -uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd)
> > +uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct
> amdgpu_device
> > +*adev)
> >  {
> > -   struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
> > -
> > if (adev->gfx.funcs->get_gpu_clock_counter)
> > return adev->gfx.funcs->get_gpu_clock_counter(adev);
> > return 0;
> >  }
> >
> > -uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct
> kgd_dev
> > *kgd)
> > +uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct
> > +amdgpu_device *adev)
> >  {
> > -   struct a

RE: [PATCH 02/13] drm/amdkfd: replace kgd_dev in static gfx v7 funcs

2021-10-26 Thread Sider, Graham
[AMD Official Use Only]

> -Original Message-
> From: Kuehling, Felix 
> Sent: Tuesday, October 26, 2021 4:07 PM
> To: Sider, Graham ; amd-
> g...@lists.freedesktop.org
> Cc: Joshi, Mukul 
> Subject: Re: [PATCH 02/13] drm/amdkfd: replace kgd_dev in static gfx v7
> funcs
> 
> Am 2021-10-19 um 5:13 p.m. schrieb Graham Sider:
> > Static funcs in amdgpu_amdkfd_gfx_v7.c now using amdgpu_device.
> 
> Doesn't this cause pointer type mismatch errors when assigning the function
> pointers in gfx_v7_kfd2kgd? Those only get updated in patch 7.
> 
> Regards,
>   Felix
> 

The function definitions changed in patches 2 through 6 are only used internally
within these files and aren't assigned to any of the gfx_v*_kfd2kgd entries. 
Patches
7 through 11 deal with the kfd2kgd functions that would cause a mismatch if done
file-by-file.

Best,
Graham

> 
> >
> > Signed-off-by: Graham Sider 
> > ---
> >  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 51
> > +--
> >  1 file changed, 23 insertions(+), 28 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> > index b91d27e39bad..d00ba8d65a6d 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> > @@ -87,38 +87,33 @@ static inline struct amdgpu_device
> *get_amdgpu_device(struct kgd_dev *kgd)
> > return (struct amdgpu_device *)kgd;
> >  }
> >
> > -static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t
> > pipe,
> > +static void lock_srbm(struct amdgpu_device *adev, uint32_t mec,
> > +uint32_t pipe,
> > uint32_t queue, uint32_t vmid)
> >  {
> > -   struct amdgpu_device *adev = get_amdgpu_device(kgd);
> > uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) |
> > QUEUEID(queue);
> >
> > mutex_lock(>srbm_mutex);
> > WREG32(mmSRBM_GFX_CNTL, value);
> >  }
> >
> > -static void unlock_srbm(struct kgd_dev *kgd)
> > +static void unlock_srbm(struct amdgpu_device *adev)
> >  {
> > -   struct amdgpu_device *adev = get_amdgpu_device(kgd);
> > -
> > WREG32(mmSRBM_GFX_CNTL, 0);
> > mutex_unlock(>srbm_mutex);
> >  }
> >
> > -static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
> > +static void acquire_queue(struct amdgpu_device *adev, uint32_t
> > +pipe_id,
> > uint32_t queue_id)
> >  {
> > -   struct amdgpu_device *adev = get_amdgpu_device(kgd);
> > -
> > uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
> > uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
> >
> > -   lock_srbm(kgd, mec, pipe, queue_id, 0);
> > +   lock_srbm(adev, mec, pipe, queue_id, 0);
> >  }
> >
> > -static void release_queue(struct kgd_dev *kgd)
> > +static void release_queue(struct amdgpu_device *adev)
> >  {
> > -   unlock_srbm(kgd);
> > +   unlock_srbm(adev);
> >  }
> >
> >  static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t
> > vmid, @@ -129,14 +124,14 @@ static void
> > kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,  {
> > struct amdgpu_device *adev = get_amdgpu_device(kgd);
> >
> > -   lock_srbm(kgd, 0, 0, 0, vmid);
> > +   lock_srbm(adev, 0, 0, 0, vmid);
> >
> > WREG32(mmSH_MEM_CONFIG, sh_mem_config);
> > WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base);
> > WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
> > WREG32(mmSH_MEM_BASES, sh_mem_bases);
> >
> > -   unlock_srbm(kgd);
> > +   unlock_srbm(adev);
> >  }
> >
> >  static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid,
> > @@ -174,12 +169,12 @@ static int kgd_init_interrupts(struct kgd_dev
> *kgd, uint32_t pipe_id)
> > mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
> > pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
> >
> > -   lock_srbm(kgd, mec, pipe, 0, 0);
> > +   lock_srbm(adev, mec, pipe, 0, 0);
> >
> > WREG32(mmCPC_INT_CNTL,
> CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
> >
>   CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
> >
> > -   unlock_srbm(kgd);
> > +   unlock_srbm(adev);
> >
> > return 0;
> >  }
> > @@ -220,7 +215,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void
> > *mqd, uint32_t pipe_id,
> >
> > m = get_mqd(mqd);
> >
> > -   acquire_que

RE: [PATCH v5 7/9] drm/amd/pm: Add vangogh throttler translation

2021-06-07 Thread Sider, Graham
Great, thanks for all the feedback Lijo. Out of the new bit definitions in 
amdgpu_smu.h are there any that currently exist that are more applicable for 
these mappings? *_THM_GFX and *_THM_SOC only exist in VanGogh and Renoir. With 
the expansion of the MEM and LIQUID bits there is not enough room in the 
temperature field to add two new definitions.

Best,
Graham

-Original Message-
From: Lazar, Lijo  
Sent: Monday, June 7, 2021 10:35 AM
To: Sider, Graham ; amd-gfx@lists.freedesktop.org
Cc: Kasiviswanathan, Harish ; Sakhnovitch, 
Elena (Elen) 
Subject: Re: [PATCH v5 7/9] drm/amd/pm: Add vangogh throttler translation



On 6/7/2021 7:14 PM, Graham Sider wrote:
> Perform dependent to independent throttle status translation for 
> vangogh.
> 
> Signed-off-by: Graham Sider 
> ---
>   .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c  | 38 ++-
>   1 file changed, 29 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c 
> b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> index 77f532a49e37..589304367929 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> @@ -190,6 +190,20 @@ static struct cmn2asic_mapping 
> vangogh_workload_map[PP_SMC_POWER_PROFILE_COUNT]
>   WORKLOAD_MAP(PP_SMC_POWER_PROFILE_CUSTOM,   
> WORKLOAD_PPLIB_CUSTOM_BIT),
>   };
>   
> +static const uint8_t vangogh_throttler_map[] = {
> + [THROTTLER_STATUS_BIT_SPL]  = (SMU_THROTTLER_SPL_BIT),
> + [THROTTLER_STATUS_BIT_FPPT] = (SMU_THROTTLER_FPPT_BIT),
> + [THROTTLER_STATUS_BIT_SPPT] = (SMU_THROTTLER_SPPT_BIT),
> + [THROTTLER_STATUS_BIT_SPPT_APU] = (SMU_THROTTLER_SPPT_APU_BIT),
> + [THROTTLER_STATUS_BIT_THM_CORE] = (SMU_THROTTLER_TEMP_CORE_BIT),
> + [THROTTLER_STATUS_BIT_THM_GFX]  = (SMU_THROTTLER_TEMP_VR_GFX_BIT),
> + [THROTTLER_STATUS_BIT_THM_SOC]  = (SMU_THROTTLER_TEMP_VR_SOC_BIT),

Above two mappings don't look correct. They essentially mean throttling due to 
GFX/SOC domain temperatures in APU exceeding their limits, not the VR 
temperatures. Except those mappings, rest of the patch series looks good to me.

Thanks,
Lijo

> + [THROTTLER_STATUS_BIT_TDC_VDD]  = (SMU_THROTTLER_TDC_VDD_BIT),
> + [THROTTLER_STATUS_BIT_TDC_SOC]  = (SMU_THROTTLER_TDC_SOC_BIT),
> + [THROTTLER_STATUS_BIT_TDC_GFX]  = (SMU_THROTTLER_TDC_GFX_BIT),
> + [THROTTLER_STATUS_BIT_TDC_CVIP] = (SMU_THROTTLER_TDC_CVIP_BIT),
> +};
> +
>   static int vangogh_tables_init(struct smu_context *smu)
>   {
>   struct smu_table_context *smu_table = >smu_table; @@ -226,7 
> +240,7 @@ static int vangogh_tables_init(struct smu_context *smu)
>   goto err0_out;
>   smu_table->metrics_time = 0;
>   
> - smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v2_1);
> + smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v2_2);
>   smu_table->gpu_metrics_table = 
> kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
>   if (!smu_table->gpu_metrics_table)
>   goto err1_out;
> @@ -1632,8 +1646,8 @@ static ssize_t vangogh_get_legacy_gpu_metrics(struct 
> smu_context *smu,
> void **table)
>   {
>   struct smu_table_context *smu_table = >smu_table;
> - struct gpu_metrics_v2_1 *gpu_metrics =
> - (struct gpu_metrics_v2_1 *)smu_table->gpu_metrics_table;
> + struct gpu_metrics_v2_2 *gpu_metrics =
> + (struct gpu_metrics_v2_2 *)smu_table->gpu_metrics_table;
>   SmuMetrics_legacy_t metrics;
>   int ret = 0;
>   
> @@ -1641,7 +1655,7 @@ static ssize_t vangogh_get_legacy_gpu_metrics(struct 
> smu_context *smu,
>   if (ret)
>   return ret;
>   
> - smu_cmn_init_soft_gpu_metrics(gpu_metrics, 2, 1);
> + smu_cmn_init_soft_gpu_metrics(gpu_metrics, 2, 2);
>   
>   gpu_metrics->temperature_gfx = metrics.GfxTemperature;
>   gpu_metrics->temperature_soc = metrics.SocTemperature; @@ -1674,20 
> +1688,23 @@ static ssize_t vangogh_get_legacy_gpu_metrics(struct smu_context 
> *smu,
>   gpu_metrics->current_l3clk[0] = metrics.L3Frequency[0];
>   
>   gpu_metrics->throttle_status = metrics.ThrottlerStatus;
> + gpu_metrics->indep_throttle_status =
> + 
> smu_cmn_get_indep_throttler_status(metrics.ThrottlerStatus,
> +
> vangogh_throttler_map);
>   
>   gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
>   
>   *table = (void *)gpu_metrics;
>   
> - return sizeof(struct gpu_metrics_v2_1);
> + return sizeof(struct gpu_metric

RE: [PATCH v3 4/8] drm/amd/pm: Add navi1x throttler translation

2021-06-04 Thread Sider, Graham
Thanks Evan and Lijo. Keep in mind that the ASIC dependent DWORD with those 
bits is still being kept. That said, I have no problem with listing them out 
separately in the new field as well. I'll make the ASICs that don't support 
VR_MEM1/LIQUID1 map to VR_MEM0/LIQUID0 and not touch the *1 bits. If you have a 
problem with this approach let me know.

Best,
Graham

-Original Message-
From: Lazar, Lijo  
Sent: Friday, June 4, 2021 12:52 AM
To: Quan, Evan ; Sider, Graham ; 
amd-gfx@lists.freedesktop.org
Cc: Kasiviswanathan, Harish ; Sakhnovitch, 
Elena (Elen) 
Subject: RE: [PATCH v3 4/8] drm/amd/pm: Add navi1x throttler translation

[AMD Official Use Only]

A modified version of 2  -  
List all the possible ones and merge those which mean the same - ex: 
terminology changes like THM and TEMP.

In the mail earlier, I meant to list them out separately as the intention is to 
convey the throttle reason to the user- it's better to point out the exact 
regulator which is heating up. 

Thanks,
Lijo

-Original Message-
From: Quan, Evan 
Sent: Friday, June 4, 2021 7:47 AM
To: Lazar, Lijo ; Sider, Graham ; 
amd-gfx@lists.freedesktop.org
Cc: Kasiviswanathan, Harish ; Sakhnovitch, 
Elena (Elen) 
Subject: RE: [PATCH v3 4/8] drm/amd/pm: Add navi1x throttler translation

[AMD Official Use Only]

Thanks Lijo and Graham. Yes, I know that only some specific ASICs support 
VR_MEM1 and LIQUID1.
However, the problem is about the design:
1. should we just list those which are commonly supported by all ASICs.
2. Or we list all the possible throttlers and mask out those unsupported for 
some specific ASICs

BR
Evan
> -Original Message-
> From: Lazar, Lijo 
> Sent: Thursday, June 3, 2021 10:01 PM
> To: Sider, Graham ; Quan, Evan 
> ; amd-gfx@lists.freedesktop.org
> Cc: Kasiviswanathan, Harish ; 
> Sakhnovitch, Elena (Elen) 
> Subject: RE: [PATCH v3 4/8] drm/amd/pm: Add navi1x throttler 
> translation
> 
> [AMD Official Use Only]
> 
> VR_*0/1 reflect the throttle status of separate voltage rails - 
> availability of both depends on board and FW capability to query their 
> temperature.
> 
> Thanks,
> Lijo
> 
> -Original Message-
> From: Sider, Graham 
> Sent: Thursday, June 3, 2021 6:41 PM
> To: Quan, Evan ; amd-gfx@lists.freedesktop.org
> Cc: Lazar, Lijo ; Kasiviswanathan, Harish 
> ; Sakhnovitch, Elena (Elen) 
> 
> Subject: RE: [PATCH v3 4/8] drm/amd/pm: Add navi1x throttler 
> translation
> 
> Some ASICs use a single VR_MEM bit, whereas others split it into
> VR_MEM0 and VR_MEM1. To avoid confusion, we've combined the VR_MEM0 
> and
> VR_MEM1 bits on those ASICs. For consistency we did the same with
> LIQUID0 and LIQUID1.
> 
> -Original Message-
> From: Quan, Evan 
> Sent: Wednesday, June 2, 2021 12:37 AM
> To: Sider, Graham ; amd- 
> g...@lists.freedesktop.org
> Cc: Lazar, Lijo ; Kasiviswanathan, Harish 
> ; Sider, Graham 
> ; Sakhnovitch, Elena (Elen) 
> 
> Subject: RE: [PATCH v3 4/8] drm/amd/pm: Add navi1x throttler 
> translation
> 
> [AMD Official Use Only]
> 
> 
> 
> > -Original Message-
> > From: amd-gfx  On Behalf Of 
> > Graham Sider
> > Sent: Wednesday, June 2, 2021 2:12 AM
> > To: amd-gfx@lists.freedesktop.org
> > Cc: Lazar, Lijo ; Kasiviswanathan, Harish 
> > ; Sider, Graham 
> > ; Sakhnovitch, Elena (Elen) 
> > 
> > Subject: [PATCH v3 4/8] drm/amd/pm: Add navi1x throttler translation
> >
> > Perform dependent to independent throttle status translation for 
> > navi1x. Makes use of lookup table navi1x_throttler_map.
> >
> > Signed-off-by: Graham Sider 
> > ---
> >  .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 43
> > +++
> >  1 file changed, 43 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> > b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> > index 78fe13183e8b..bf376b1be08d 100644
> > --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> > +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> > @@ -238,6 +238,28 @@ static struct cmn2asic_mapping 
> > navi10_workload_map[PP_SMC_POWER_PROFILE_COUNT] =
> > WORKLOAD_MAP(PP_SMC_POWER_PROFILE_CUSTOM,
> > WORKLOAD_PPLIB_CUSTOM_BIT),
> >  };
> >
> > +static const uint8_t navi1x_throttler_map[] = {
> > +   [THROTTLER_TEMP_EDGE_BIT]   =
> > (SMU_THROTTLER_TEMP_EDGE_BIT),
> > +   [THROTTLER_TEMP_HOTSPOT_BIT]=
> > (SMU_THROTTLER_TEMP_HOTSPOT_BIT),
> > +   [THROTTLER_TEMP_MEM_BIT]=
> > (SMU_THROTTLER_TEMP_MEM_BIT),
> > +   [THROTTLER_TEMP_VR_GFX_BIT] =
> > (SMU_THROTTLER_TEMP_VR_GFX_BIT),
> > +   [THROTTLER_TEMP_VR_MEM0_BIT] 

RE: [PATCH v3 4/8] drm/amd/pm: Add navi1x throttler translation

2021-06-03 Thread Sider, Graham
Some ASICs use a single VR_MEM bit, whereas others split it into VR_MEM0 and 
VR_MEM1. To avoid confusion, we've combined the VR_MEM0 and VR_MEM1 bits on 
those ASICs. For consistency we did the same with LIQUID0 and LIQUID1. 

-Original Message-
From: Quan, Evan  
Sent: Wednesday, June 2, 2021 12:37 AM
To: Sider, Graham ; amd-gfx@lists.freedesktop.org
Cc: Lazar, Lijo ; Kasiviswanathan, Harish 
; Sider, Graham ; 
Sakhnovitch, Elena (Elen) 
Subject: RE: [PATCH v3 4/8] drm/amd/pm: Add navi1x throttler translation

[AMD Official Use Only]



> -Original Message-
> From: amd-gfx  On Behalf Of 
> Graham Sider
> Sent: Wednesday, June 2, 2021 2:12 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Lazar, Lijo ; Kasiviswanathan, Harish 
> ; Sider, Graham 
> ; Sakhnovitch, Elena (Elen) 
> 
> Subject: [PATCH v3 4/8] drm/amd/pm: Add navi1x throttler translation
> 
> Perform dependent to independent throttle status translation for 
> navi1x. Makes use of lookup table navi1x_throttler_map.
> 
> Signed-off-by: Graham Sider 
> ---
>  .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 43
> +++
>  1 file changed, 43 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> index 78fe13183e8b..bf376b1be08d 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> @@ -238,6 +238,28 @@ static struct cmn2asic_mapping 
> navi10_workload_map[PP_SMC_POWER_PROFILE_COUNT] =
>   WORKLOAD_MAP(PP_SMC_POWER_PROFILE_CUSTOM,
>   WORKLOAD_PPLIB_CUSTOM_BIT),
>  };
> 
> +static const uint8_t navi1x_throttler_map[] = {
> + [THROTTLER_TEMP_EDGE_BIT]   =
> (SMU_THROTTLER_TEMP_EDGE_BIT),
> + [THROTTLER_TEMP_HOTSPOT_BIT]=
> (SMU_THROTTLER_TEMP_HOTSPOT_BIT),
> + [THROTTLER_TEMP_MEM_BIT]=
> (SMU_THROTTLER_TEMP_MEM_BIT),
> + [THROTTLER_TEMP_VR_GFX_BIT] =
> (SMU_THROTTLER_TEMP_VR_GFX_BIT),
> + [THROTTLER_TEMP_VR_MEM0_BIT]=
> (SMU_THROTTLER_TEMP_VR_MEM_BIT),
> + [THROTTLER_TEMP_VR_MEM1_BIT]=
> (SMU_THROTTLER_TEMP_VR_MEM_BIT),
[Quan, Evan] I'm wondering why you map the two ASIC dependent bits to the same 
non ASIC independent bit. Instead of defining two non ASIC independent bits.
> + [THROTTLER_TEMP_VR_SOC_BIT] =
> (SMU_THROTTLER_TEMP_VR_SOC_BIT),
> + [THROTTLER_TEMP_LIQUID0_BIT]=
> (SMU_THROTTLER_TEMP_LIQUID_BIT),
> + [THROTTLER_TEMP_LIQUID1_BIT]=
> (SMU_THROTTLER_TEMP_LIQUID_BIT),
[Quan, Evan] Same question here and for Patch4.

BR
Evan
> + [THROTTLER_TDC_GFX_BIT] =
> (SMU_THROTTLER_TDC_GFX_BIT),
> + [THROTTLER_TDC_SOC_BIT] =
> (SMU_THROTTLER_TDC_SOC_BIT),
> + [THROTTLER_PPT0_BIT]=
> (SMU_THROTTLER_PPT0_BIT),
> + [THROTTLER_PPT1_BIT]=
> (SMU_THROTTLER_PPT1_BIT),
> + [THROTTLER_PPT2_BIT]=
> (SMU_THROTTLER_PPT2_BIT),
> + [THROTTLER_PPT3_BIT]=
> (SMU_THROTTLER_PPT3_BIT),
> + [THROTTLER_FIT_BIT] = (SMU_THROTTLER_FIT_BIT),
> + [THROTTLER_PPM_BIT] =
> (SMU_THROTTLER_PPM_BIT),
> + [THROTTLER_APCC_BIT]=
> (SMU_THROTTLER_APCC_BIT),
> +};
> +
> +
>  static bool is_asic_secure(struct smu_context *smu)  {
>   struct amdgpu_device *adev = smu->adev; @@ -524,6 +546,19 @@ static 
> int navi10_tables_init(struct smu_context
> *smu)
>   return -ENOMEM;
>  }
> 
> +static uint64_t navi1x_get_indep_throttler_status(
> + const unsigned long dep_status)
> +{
> + uint64_t indep_status = 0;
> + uint8_t dep_bit = 0;
> +
> + for_each_set_bit(dep_bit, _status, 32)
> + indep_status |= smu_u64_throttler_bit(dep_status,
> + navi1x_throttler_map[dep_bit], dep_bit);
> +
> + return indep_status;
> +}
> +
>  static int navi10_get_legacy_smu_metrics_data(struct smu_context *smu,
> MetricsMember_t member,
> uint32_t *value)
> @@ -2673,6 +2708,8 @@ static ssize_t
> navi10_get_legacy_gpu_metrics(struct smu_context *smu,
>   gpu_metrics->current_dclk0 = metrics.CurrClock[PPCLK_DCLK];
> 
>   gpu_metrics->throttle_status = metrics.ThrottlerStatus;
> + gpu_metrics->indep_throttle_status =
> +
>   navi1x_get_indep_throttler_status(metrics.ThrottlerStatus);
> 
>   gpu_metrics->current_fan_speed = metrics.CurrFanSpeed;
> 
> @@ -2750,6 +2787,8 @@ static ssize_t navi10_get_gpu_metrics(struct 
> smu_context *smu,
>   gpu_metrics->current_dclk0 = metrics.CurrClock[PPCLK_DCLK];
> 
&g

RE: [PATCH v2 4/8] drm/amd/pm: Add navi1x throttler translation

2021-05-31 Thread Sider, Graham
That's fair, I wasn't sure if adding a lookup table for the bitmapping of each 
ASIC was necessarily wanted, but it would definitely result in less runtime 
overhead.

This way we can also make use of the for_each_set_bit() macro in bitops.h. I'll 
make the change and fix the padding and resubmit. Thanks for the feedback.

Graham

-Original Message-
From: Lazar, Lijo  
Sent: Monday, May 31, 2021 1:13 AM
To: Sider, Graham ; amd-gfx@lists.freedesktop.org
Cc: Kasiviswanathan, Harish ; Sider, Graham 
; Sakhnovitch, Elena (Elen) 
Subject: RE: [PATCH v2 4/8] drm/amd/pm: Add navi1x throttler translation

[AMD Official Use Only]



-Original Message-
From: amd-gfx  On Behalf Of Graham Sider
Sent: Saturday, May 29, 2021 1:28 AM
To: amd-gfx@lists.freedesktop.org
Cc: Kasiviswanathan, Harish ; Sider, Graham 
; Sakhnovitch, Elena (Elen) 
Subject: [PATCH v2 4/8] drm/amd/pm: Add navi1x throttler translation

Perform dependent to independent throttle status translation for navi1x.

Signed-off-by: Graham Sider 
---
 .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 56 +++
 1 file changed, 56 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index 78fe13183e8b..878ec698909c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -524,6 +524,54 @@ static int navi10_tables_init(struct smu_context *smu)
return -ENOMEM;
 }
 
+static uint64_t navi1x_get_indep_throttler_status(
+   uint32_t dep_status)
+{
+   if (dep_status == 0)
+   return 0;
+
+   uint64_t indep_status = 0;

< > Extending Harish's comments - it's very unlikely that all or even 50% of 
these bits are set together. It may be better to do something like  

while (bit=ffs(dep_status)) {
 indep_status |= 1ULL  << mapping[bit-1]; dep_status &= ~(1UL << bit - 1); } 
Will need a lookup table, should be fine though. 

Thanks,
Lijo

+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_EDGE_BIT, THROTTLER_TEMP_EDGE_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_HOTSPOT_BIT, 
THROTTLER_TEMP_HOTSPOT_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_MEM_BIT, THROTTLER_TEMP_MEM_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_VR_GFX_BIT, 
THROTTLER_TEMP_VR_GFX_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_VR_MEM_BIT, 
THROTTLER_TEMP_VR_MEM0_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_VR_MEM_BIT, 
THROTTLER_TEMP_VR_MEM1_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_VR_SOC_BIT, 
THROTTLER_TEMP_VR_SOC_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_LIQUID_BIT, 
THROTTLER_TEMP_LIQUID0_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_LIQUID_BIT, 
THROTTLER_TEMP_LIQUID1_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TDC_GFX_BIT, THROTTLER_TDC_GFX_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TDC_SOC_BIT, THROTTLER_TDC_SOC_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_PPT0_BIT, THROTTLER_PPT0_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_PPT1_BIT, THROTTLER_PPT1_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_PPT2_BIT, THROTTLER_PPT2_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_PPT3_BIT, THROTTLER_PPT3_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_FIT_BIT, THROTTLER_FIT_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_PPM_BIT, THROTTLER_PPM_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_APCC_BIT, THROTTLER_APCC_BIT);
+
+   return indep_status;
+}
+
 static int navi10_get_legacy_smu_metrics_data(struct smu_context *smu,
  MetricsMember_t member,
  uint32_t *value)
@@ -2673,6 +2721,8 @@ static ssize_t navi10_get_legacy_gpu_metrics(struct 
smu_context *smu,
gpu_metrics->current_dclk0 = metrics.CurrClock[PPCLK_DCLK];
 
gpu_metrics->throttle_status = metrics.ThrottlerStatus;
+   gpu_me

RE: [PATCH 1/6] drm/amd/pm: Add ASIC independent throttle bits

2021-05-25 Thread Sider, Graham
Most of these changes are due to Van Gogh having a different naming scheme than 
the rest. Just to confirm, let me know if this translation is what you're 
referring to with the below defines for Van Gogh:

THROTTLER_STATUS_BIT_SPL->  SMU_THROTTLER_SPL_BIT
THROTTLER_STATUS_BIT_FPPT   ->  SMU_THROTTLER_FPPT_BIT
THROTTER_STATUS_BIT_SPPT->  SMU_THROTTLER_SPPT_BIT
THROTTLER_STATUS_BIT_SPPT_APU   ->  SMU_THROTTLER_SPPT_APU_BIT
THROTTLER_STATUS_BIT_THM_CORE   ->  SMU_THROTTLER_TEMP_CORE_BIT
THROTTLER_STATUS_BIT_THM_GFX->  SMU_THROTTLER_TEMP_VR_GFX_BIT
THROTTLER_STATUS_BIT_THM_SOC->  SMU_THROTTLER_TEMP_VR_SOC_BIT
THROTTLER_STATUS_BIT_TDC_VDD->  SMU_THROTTLER_TDC_VDD_BIT
THROTTLER_STATUS_BIT_TDC_SOC->  SMU_THROTTLER_TDC_SOC_BIT
THROTTLER_STATUS_BIT_TDC_GFX->  SMU_THROTTLER_TDC_GFX_BIT
THROTTLER_STATUS_BIT_TDC_CVIP   ->  SMU_THROTTLER_TDC_CVIP_BIT

Graham

-Original Message-
From: Lazar, Lijo  
Sent: Monday, May 24, 2021 2:58 AM
To: Sider, Graham ; amd-gfx@lists.freedesktop.org
Cc: Kasiviswanathan, Harish ; Sakhnovitch, 
Elena (Elen) 
Subject: Re: [PATCH 1/6] drm/amd/pm: Add ASIC independent throttle bits

There are duplicates in this list. It's better to classify as 
Power/Temperature/Current/Others and map; maybe, allocate 16 bit each in a 
64-bit mask. Also, keep the naming consistent and start with "SMU_", that's 
what we do for others like SMU messages.

Power throttlers
-

#define SMU_THROTTLER_PPT1_BIT  
#define SMU_THROTTLER_PPT0_BIT  
#define SMU_THROTTLER_PPT2_BIT  
#define SMU_THROTTLER_PPT3_BIT  
#define SMU_THROTTLER_SPL_BIT   
#define SMU_THROTTLER_FPPT_BIT  
#define SMU_THROTTLER_SPPT_BIT  
#define SMU_THROTTLER_SPPT_APU_BIT  

Current Throttlers
---

#define SMU_THROTTLER_TDC_GFX_BIT   
#define SMU_THROTTLER_TDC_VDD_BIT   
#define SMU_THROTTLER_TDC_SOC_BIT   
#define SMU_THROTTLER_TDC_MEM_BIT => Should be the one used for HBM as well 
#define SMU_THROTTLER_TDC_CVIP_BIT
#define SMU_THROTTLER_APCC_BIT  

Temperature


#define SMU_THROTTLER_TEMP_GPU_BIT
#define SMU_THROTTLER_TEMP_CORE_BIT 
#define SMU_THROTTLER_TEMP_MEM_BIT  
#define SMU_THROTTLER_TEMP_EDGE_BIT 
#define SMU_THROTTLER_TEMP_HOTSPOT_BIT

#define SMU_THROTTLER_TEMP_VR_GFX_BIT   
#define SMU_THROTTLER_TEMP_VR_SOC_BIT   
#define SMU_THROTTLER_TEMP_VR_MEM_BIT
#define SMU_THROTTLER_VRHOT0_BIT
#define SMU_THROTTLER_VRHOT1_BIT

#define SMU_THROTTLER_TEMP_LIQUID_BIT

Others
---

#define SMU_THROTTLER_PPM_BIT   
#define SMU_THROTTLER_FIT_BIT   

--
Thanks,
Lijo

On 5/20/2021 7:59 PM, Graham Sider wrote:
> Add new defines for thermal throttle status bits which are ASIC 
> independent. This bit field will be visible to userspace via 
> gpu_metrics, replacing the previous ASIC dependent bit fields.
> ---
>   drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h | 32 +
>   1 file changed, 32 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h 
> b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
> index 523f9d2982e9..fbbebb1da913 100644
> --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
> +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
> @@ -35,6 +35,38 @@
>   
>   #define SMU_DPM_USER_PROFILE_RESTORE (1 << 0)
>   
> +#define INDEP_THROTTLER_PPT0_BIT 0
> +#define INDEP_THROTTLER_PPT1_BIT 1
> +#define INDEP_THROTTLER_PPT2_BIT 2
> +#define INDEP_THROTTLER_PPT3_BIT 3
> +#define INDEP_THROTTLER_TDC_GFX_BIT  4
> +#define INDEP_THROTTLER_TDC_SOC_BIT  5
> +#define INDEP_THROTTLER_TDC_HBM_BIT  6
> +#define INDEP_THROTTLER_TEMP_GPU_BIT 7
> +#define INDEP_THROTTLER_TEMP_MEM_BIT 8
> +#define INDEP_THROTTLER_TEMP_EDGE_BIT9
> +#define INDEP_THROTTLER_TEMP_HOTSPOT_BIT 10
> +#define INDEP_THROTTLER_TEMP_VR_GFX_BIT  11
> +#define INDEP_THROTTLER_TEMP_VR_SOC_BIT  12
> +#define INDEP_THROTTLER_TEMP_VR_MEM_BIT  13
> +#define INDEP_THROTTLER_TEMP_LIQUID_BIT  14
> +#define INDEP_THROTTLER_APCC_BIT 15
> +#define INDEP_THROTTLER_PPM_BIT  16
> +#define INDEP_THROTTLER_FIT_BIT  17
> +#define INDEP_THROTTLER_VRHOT0_BIT   18
> +#define INDEP_THROTTLER_VRHOT1_BIT   19
> +#define INDEP_THROTTLER_STATUS_BIT_SPL   20
> +#define INDEP_THROTTLER_STATUS_BIT_FPPT  21
> +#define INDEP_THROTTLER_STATUS_BIT_SPPT  22
> +#define INDEP_THROTTLER_STATUS_BIT_SPPT_APU  23
> +#define INDEP_THROTT

RE: [PATCH 2/6] drm/amd/pm: Add arcturus throttler translation

2021-05-21 Thread Sider, Graham
Right, that all makes sense. I'm fine with either of these options. Thanks for 
the insights -- I'll give this a bit more thought and get back to you.

Best,
Graham

-Original Message-
From: Alex Deucher  
Sent: Friday, May 21, 2021 5:50 PM
To: Sider, Graham 
Cc: amd-gfx list ; Kasiviswanathan, Harish 
; Sakhnovitch, Elena (Elen) 

Subject: Re: [PATCH 2/6] drm/amd/pm: Add arcturus throttler translation

[CAUTION: External Email]

On Fri, May 21, 2021 at 5:47 PM Alex Deucher  wrote:
>
> On Fri, May 21, 2021 at 5:32 PM Sider, Graham  wrote:
> >
> > Would this be referring to tools that may parse 
> > /sys/class/.../device/gpu_metrics or the actual gpu_metrics_vX_Y structs? 
> > For the latter, if there are tools that parse dependent on version vX_Y, I 
> > agree that we would not want to break those.
> >
> > Since most ASICs are using different version currently, we would have to 
> > create a duplicate struct for each gpu_metrics version currently being 
> > used, unless I'm misunderstanding. I'm not sure if this is what you had in 
> > mind - let me know.
> >
>
> Just update them all to the latest version.  The newer ones are just 
> supersets of the previous versions.  I think a newer revision just 
> went in in the last day or two for some additional new data, you can 
> probably just piggy back on that since the code is not upstream yet.

Another option would be to leave the current throttle status as is, and add a 
new one that provides the standardized format.  Not sure if there is much value 
in having both though.  That said, we could increase the size of the new one to 
64 bits to accommodate future throttle status bits.

Alex

>
> Alex
>
>
> > Best,
> > Graham
> >
> > -Original Message-
> > From: Alex Deucher 
> > Sent: Friday, May 21, 2021 4:15 PM
> > To: Sider, Graham 
> > Cc: amd-gfx list ; Kasiviswanathan, 
> > Harish ; Sakhnovitch, Elena (Elen) 
> > 
> > Subject: Re: [PATCH 2/6] drm/amd/pm: Add arcturus throttler 
> > translation
> >
> > [CAUTION: External Email]
> >
> > On Fri, May 21, 2021 at 1:39 PM Sider, Graham  wrote:
> > >
> > > Hi Alex,
> > >
> > > Are you referring to bumping the gpu_metrics_vX_Y version number? 
> > > Different ASICs are currently using different version numbers already, so 
> > > I'm not sure how feasible this might be (e.g. arcturus ==  
> > > gpu_metrics_v1_1, navi1x == gpu_metrics_v1_3, vangogh == 
> > > gpu_metrics_v2_1).
> > >
> > > Technically speaking no new fields have been added to any of the 
> > > gpu_metrics versions, just a change in representation in the 
> > > throttle_status field. Let me know your thoughts on this.
> > >
> >
> > I don't know if we have any existing tools out there that parse this data, 
> > but if so, they would interpret it incorrectly after this change.  If we 
> > bump the version, at least the tools will know how to handle it.
> >
> > Alex
> >
> >
> > > Best,
> > > Graham
> > >
> > > -Original Message-
> > > From: Alex Deucher 
> > > Sent: Friday, May 21, 2021 10:27 AM
> > > To: Sider, Graham 
> > > Cc: amd-gfx list ; Kasiviswanathan, 
> > > Harish ; Sakhnovitch, Elena (Elen) 
> > > 
> > > Subject: Re: [PATCH 2/6] drm/amd/pm: Add arcturus throttler 
> > > translation
> > >
> > > [CAUTION: External Email]
> > >
> > > General comment on the patch series, do you want to bump the metrics 
> > > table version since the meaning of the throttler status has changed?
> > >
> > > Alex
> > >
> > > On Thu, May 20, 2021 at 10:30 AM Graham Sider  
> > > wrote:
> > > >
> > > > Perform dependent to independent throttle status translation for 
> > > > arcturus.
> > > > ---
> > > >  .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 62
> > > > ---
> > > >  1 file changed, 53 insertions(+), 9 deletions(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
> > > > b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
> > > > index 1735a96dd307..7c01c0bf2073 100644
> > > > --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
> > > > +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
> > > > @@ -540,6 +540,49 @@ static int arcturus_freqs_in_same_level(int32_t 
> > > > frequency1,
> > > > return (abs(frequency1 - frequency2) <=

RE: [PATCH 2/6] drm/amd/pm: Add arcturus throttler translation

2021-05-21 Thread Sider, Graham
Would this be referring to tools that may parse 
/sys/class/.../device/gpu_metrics or the actual gpu_metrics_vX_Y structs? For 
the latter, if there are tools that parse dependent on version vX_Y, I agree 
that we would not want to break those.

Since most ASICs are using different version currently, we would have to create 
a duplicate struct for each gpu_metrics version currently being used, unless 
I'm misunderstanding. I'm not sure if this is what you had in mind - let me 
know.

Best,
Graham

-Original Message-
From: Alex Deucher  
Sent: Friday, May 21, 2021 4:15 PM
To: Sider, Graham 
Cc: amd-gfx list ; Kasiviswanathan, Harish 
; Sakhnovitch, Elena (Elen) 

Subject: Re: [PATCH 2/6] drm/amd/pm: Add arcturus throttler translation

[CAUTION: External Email]

On Fri, May 21, 2021 at 1:39 PM Sider, Graham  wrote:
>
> Hi Alex,
>
> Are you referring to bumping the gpu_metrics_vX_Y version number? Different 
> ASICs are currently using different version numbers already, so I'm not sure 
> how feasible this might be (e.g. arcturus ==  gpu_metrics_v1_1, navi1x == 
> gpu_metrics_v1_3, vangogh == gpu_metrics_v2_1).
>
> Technically speaking no new fields have been added to any of the gpu_metrics 
> versions, just a change in representation in the throttle_status field. Let 
> me know your thoughts on this.
>

I don't know if we have any existing tools out there that parse this data, but 
if so, they would interpret it incorrectly after this change.  If we bump the 
version, at least the tools will know how to handle it.

Alex


> Best,
> Graham
>
> -Original Message-
> From: Alex Deucher 
> Sent: Friday, May 21, 2021 10:27 AM
> To: Sider, Graham 
> Cc: amd-gfx list ; Kasiviswanathan, 
> Harish ; Sakhnovitch, Elena (Elen) 
> 
> Subject: Re: [PATCH 2/6] drm/amd/pm: Add arcturus throttler 
> translation
>
> [CAUTION: External Email]
>
> General comment on the patch series, do you want to bump the metrics table 
> version since the meaning of the throttler status has changed?
>
> Alex
>
> On Thu, May 20, 2021 at 10:30 AM Graham Sider  wrote:
> >
> > Perform dependent to independent throttle status translation for 
> > arcturus.
> > ---
> >  .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 62
> > ---
> >  1 file changed, 53 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
> > b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
> > index 1735a96dd307..7c01c0bf2073 100644
> > --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
> > +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
> > @@ -540,6 +540,49 @@ static int arcturus_freqs_in_same_level(int32_t 
> > frequency1,
> > return (abs(frequency1 - frequency2) <= EPSILON);  }
> >
> > +static uint32_t arcturus_get_indep_throttler_status(
> > +   unsigned long
> > +dep_throttler_status) {
> > +   unsigned long indep_throttler_status = 0;
> > +
> > +   __assign_bit(INDEP_THROTTLER_TEMP_EDGE_BIT, _throttler_status,
> > + test_bit(THROTTLER_TEMP_EDGE_BIT, _throttler_status));
> > +   __assign_bit(INDEP_THROTTLER_TEMP_HOTSPOT_BIT, 
> > _throttler_status,
> > + test_bit(THROTTLER_TEMP_HOTSPOT_BIT, 
> > _throttler_status));
> > +   __assign_bit(INDEP_THROTTLER_TEMP_MEM_BIT, _throttler_status,
> > + test_bit(THROTTLER_TEMP_MEM_BIT, _throttler_status));
> > +   __assign_bit(INDEP_THROTTLER_TEMP_VR_GFX_BIT, 
> > _throttler_status,
> > + test_bit(THROTTLER_TEMP_VR_GFX_BIT, 
> > _throttler_status));
> > +   __assign_bit(INDEP_THROTTLER_TEMP_VR_MEM_BIT, 
> > _throttler_status,
> > + test_bit(THROTTLER_TEMP_VR_MEM_BIT, 
> > _throttler_status));
> > +   __assign_bit(INDEP_THROTTLER_TEMP_VR_SOC_BIT, 
> > _throttler_status,
> > + test_bit(THROTTLER_TEMP_VR_SOC_BIT, 
> > _throttler_status));
> > +   __assign_bit(INDEP_THROTTLER_TDC_GFX_BIT, _throttler_status,
> > + test_bit(THROTTLER_TDC_GFX_BIT, _throttler_status));
> > +   __assign_bit(INDEP_THROTTLER_TDC_SOC_BIT, _throttler_status,
> > + test_bit(THROTTLER_TDC_SOC_BIT, _throttler_status));
> > +   __assign_bit(INDEP_THROTTLER_PPT0_BIT, _throttler_status,
> > + test_bit(THROTTLER_PPT0_BIT, _throttler_status));
> > +   __assign_bit(INDEP_THROTTLER_PPT1_BIT, _throttler_status,
> > + test_bit(THROTTLER_PPT1_BIT, _throttler_status));
> > +   __assign_bit(INDEP_THROTTLER_PPT2_BIT, _throttler_st

RE: [PATCH 2/6] drm/amd/pm: Add arcturus throttler translation

2021-05-21 Thread Sider, Graham
Hi Alex,

Are you referring to bumping the gpu_metrics_vX_Y version number? Different 
ASICs are currently using different version numbers already, so I'm not sure 
how feasible this might be (e.g. arcturus ==  gpu_metrics_v1_1, navi1x == 
gpu_metrics_v1_3, vangogh == gpu_metrics_v2_1).

Technically speaking no new fields have been added to any of the gpu_metrics 
versions, just a change in representation in the throttle_status field. Let me 
know your thoughts on this.

Best,
Graham

-Original Message-
From: Alex Deucher  
Sent: Friday, May 21, 2021 10:27 AM
To: Sider, Graham 
Cc: amd-gfx list ; Kasiviswanathan, Harish 
; Sakhnovitch, Elena (Elen) 

Subject: Re: [PATCH 2/6] drm/amd/pm: Add arcturus throttler translation

[CAUTION: External Email]

General comment on the patch series, do you want to bump the metrics table 
version since the meaning of the throttler status has changed?

Alex

On Thu, May 20, 2021 at 10:30 AM Graham Sider  wrote:
>
> Perform dependent to independent throttle status translation for 
> arcturus.
> ---
>  .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 62 
> ---
>  1 file changed, 53 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c 
> b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
> index 1735a96dd307..7c01c0bf2073 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
> @@ -540,6 +540,49 @@ static int arcturus_freqs_in_same_level(int32_t 
> frequency1,
> return (abs(frequency1 - frequency2) <= EPSILON);  }
>
> +static uint32_t arcturus_get_indep_throttler_status(
> +   unsigned long 
> +dep_throttler_status) {
> +   unsigned long indep_throttler_status = 0;
> +
> +   __assign_bit(INDEP_THROTTLER_TEMP_EDGE_BIT, _throttler_status,
> + test_bit(THROTTLER_TEMP_EDGE_BIT, _throttler_status));
> +   __assign_bit(INDEP_THROTTLER_TEMP_HOTSPOT_BIT, 
> _throttler_status,
> + test_bit(THROTTLER_TEMP_HOTSPOT_BIT, 
> _throttler_status));
> +   __assign_bit(INDEP_THROTTLER_TEMP_MEM_BIT, _throttler_status,
> + test_bit(THROTTLER_TEMP_MEM_BIT, _throttler_status));
> +   __assign_bit(INDEP_THROTTLER_TEMP_VR_GFX_BIT, _throttler_status,
> + test_bit(THROTTLER_TEMP_VR_GFX_BIT, _throttler_status));
> +   __assign_bit(INDEP_THROTTLER_TEMP_VR_MEM_BIT, _throttler_status,
> + test_bit(THROTTLER_TEMP_VR_MEM_BIT, _throttler_status));
> +   __assign_bit(INDEP_THROTTLER_TEMP_VR_SOC_BIT, _throttler_status,
> + test_bit(THROTTLER_TEMP_VR_SOC_BIT, _throttler_status));
> +   __assign_bit(INDEP_THROTTLER_TDC_GFX_BIT, _throttler_status,
> + test_bit(THROTTLER_TDC_GFX_BIT, _throttler_status));
> +   __assign_bit(INDEP_THROTTLER_TDC_SOC_BIT, _throttler_status,
> + test_bit(THROTTLER_TDC_SOC_BIT, _throttler_status));
> +   __assign_bit(INDEP_THROTTLER_PPT0_BIT, _throttler_status,
> + test_bit(THROTTLER_PPT0_BIT, _throttler_status));
> +   __assign_bit(INDEP_THROTTLER_PPT1_BIT, _throttler_status,
> + test_bit(THROTTLER_PPT1_BIT, _throttler_status));
> +   __assign_bit(INDEP_THROTTLER_PPT2_BIT, _throttler_status,
> + test_bit(THROTTLER_PPT2_BIT, _throttler_status));
> +   __assign_bit(INDEP_THROTTLER_PPT3_BIT, _throttler_status,
> + test_bit(THROTTLER_PPT3_BIT, _throttler_status));
> +   __assign_bit(INDEP_THROTTLER_PPM_BIT, _throttler_status,
> + test_bit(THROTTLER_PPM_BIT, _throttler_status));
> +   __assign_bit(INDEP_THROTTLER_FIT_BIT, _throttler_status,
> + test_bit(THROTTLER_FIT_BIT, _throttler_status));
> +   __assign_bit(INDEP_THROTTLER_APCC_BIT, _throttler_status,
> + test_bit(THROTTLER_APCC_BIT, _throttler_status));
> +   __assign_bit(INDEP_THROTTLER_VRHOT0_BIT, _throttler_status,
> + test_bit(THROTTLER_VRHOT0_BIT, _throttler_status));
> +   __assign_bit(INDEP_THROTTLER_VRHOT1_BIT, _throttler_status,
> + test_bit(THROTTLER_VRHOT1_BIT, 
> + _throttler_status));
> +
> +   return (uint32_t)indep_throttler_status; }
> +
>  static int arcturus_get_smu_metrics_data(struct smu_context *smu,
>  MetricsMember_t member,
>  uint32_t *value) @@ -629,7 
> +672,7 @@ static int arcturus_get_smu_metrics_data(struct smu_context *smu,
> SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
> break;
> case METRICS_THROTTLER_STATUS:
> -   *value = metrics->Throttl