date:20231031

RE: [PATCH] drm/amdgpu: Don't warn for unsupported set_xgmi_plpd_mode

2023-10-31 Thread Yang, Stanley

[AMD Official Use Only - General]

Reviewed-by: Stanley.Yang 

Regards,
Stanley
> -Original Message-
> From: amd-gfx  On Behalf Of Tao
> Zhou
> Sent: Tuesday, October 31, 2023 3:08 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Lazar, Lijo ; Zhou1, Tao 
> Subject: [PATCH] drm/amdgpu: Don't warn for unsupported
> set_xgmi_plpd_mode
>
> set_xgmi_plpd_mode may be unsupported and this isn't error, no need to
> print warning for it.
>
> Suggested-by: lijo.la...@amd.com
> Signed-off-by: Tao Zhou 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> index 0533f873001b..c9b09bddbcdc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> @@ -1138,7 +1138,8 @@ static int amdgpu_ras_error_inject_xgmi(struct
> amdgpu_device *adev,
>   if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
>   dev_warn(adev->dev, "Failed to disallow df cstate");
>
> - if (amdgpu_dpm_set_xgmi_plpd_mode(adev,
> XGMI_PLPD_DISALLOW))
> + ret = amdgpu_dpm_set_xgmi_plpd_mode(adev,
> XGMI_PLPD_DISALLOW);
> + if (ret && ret != -EOPNOTSUPP)
>   dev_warn(adev->dev, "Failed to disallow XGMI power down");
>
>   ret = psp_ras_trigger_error(&adev->psp, block_info, instance_mask);
> @@ -1146,7 +1147,8 @@ static int amdgpu_ras_error_inject_xgmi(struct
> amdgpu_device *adev,
>   if (amdgpu_ras_intr_triggered())
>   return ret;
>
> - if (amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DEFAULT))
> + ret = amdgpu_dpm_set_xgmi_plpd_mode(adev,
> XGMI_PLPD_DEFAULT);
> + if (ret && ret != -EOPNOTSUPP)
>   dev_warn(adev->dev, "Failed to allow XGMI power down");
>
>   if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
> --
> 2.35.1

[PATCH v2 1/2] drm/amdgpu: Optimize the asic type fix code

2023-10-31 Thread Ma Jun

Use a new struct array to define the asic information which
asic type needs to be fixed.

Signed-off-by: Ma Jun 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 35 ++---
 include/drm/amd_asic_type.h |  5 
 2 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 84703e0a73bd..756cf49557a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2067,6 +2067,14 @@ static const struct pci_device_id pciidlist[] = {
 
 MODULE_DEVICE_TABLE(pci, pciidlist);
 
+static const struct amdgpu_asic_type_quirk asic_type_quirks[] = {
+   /* differentiate between P10 and P11 asics with the same DID */
+   {0x67FF, 0xE3, CHIP_POLARIS10},
+   {0x67FF, 0xE7, CHIP_POLARIS10},
+   {0x67FF, 0xF3, CHIP_POLARIS10},
+   {0x67FF, 0xF7, CHIP_POLARIS10},
+};
+
 static struct drm_driver amdgpu_kms_driver;
 
 static void amdgpu_get_secondary_funcs(struct amdgpu_device *adev)
@@ -2109,6 +2117,22 @@ static void amdgpu_init_debug_options(struct 
amdgpu_device *adev)
}
 }
 
+static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long 
flags)
+{
+   int i;
+
+   for (i = 0; i < ARRAY_SIZE(asic_type_quirks); i++) {
+   if (pdev->device == asic_type_quirks[i].device &&
+   pdev->revision == asic_type_quirks[i].revision) {
+   flags &= ~AMD_ASIC_MASK;
+   flags |= asic_type_quirks[i].type;
+   break;
+   }
+   }
+
+   return flags;
+}
+
 static int amdgpu_pci_probe(struct pci_dev *pdev,
const struct pci_device_id *ent)
 {
@@ -2138,15 +2162,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
 "See modparam exp_hw_support\n");
return -ENODEV;
}
-   /* differentiate between P10 and P11 asics with the same DID */
-   if (pdev->device == 0x67FF &&
-   (pdev->revision == 0xE3 ||
-pdev->revision == 0xE7 ||
-pdev->revision == 0xF3 ||
-pdev->revision == 0xF7)) {
-   flags &= ~AMD_ASIC_MASK;
-   flags |= CHIP_POLARIS10;
-   }
+
+   flags = amdgpu_fix_asic_type(pdev, flags);
 
/* Due to hardware bugs, S/G Display on raven requires a 1:1 IOMMU 
mapping,
 * however, SME requires an indirect IOMMU mapping because the 
encryption
diff --git a/include/drm/amd_asic_type.h b/include/drm/amd_asic_type.h
index 90b69270f2fa..724c45e3e9a7 100644
--- a/include/drm/amd_asic_type.h
+++ b/include/drm/amd_asic_type.h
@@ -68,4 +68,9 @@ enum amd_asic_type {
 
 extern const char *amdgpu_asic_name[];
 
+struct amdgpu_asic_type_quirk {
+   unsigned short device;  /* PCI device ID */
+   u8 revision;/* revision ID */
+   unsigned short type;/* real ASIC type */
+};
 #endif /*__AMD_ASIC_TYPE_H__ */
-- 
2.34.1

[PATCH v2 2/2] drm/amdgpu: Fix the the asic type of some new asics

2023-10-31 Thread Ma Jun

Some new asics use the same device id as Sienna_Cichlid.
So fix it.

Signed-off-by: Ma Jun 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 756cf49557a2..93c8c8b763ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2073,6 +2073,17 @@ static const struct amdgpu_asic_type_quirk 
asic_type_quirks[] = {
{0x67FF, 0xE7, CHIP_POLARIS10},
{0x67FF, 0xF3, CHIP_POLARIS10},
{0x67FF, 0xF7, CHIP_POLARIS10},
+   /* differentiate between Sienna_Cichlid and new asics with the same DID 
*/
+   {0x73BF, 0x18, CHIP_IP_DISCOVERY},
+   {0x73BF, 0x19, CHIP_IP_DISCOVERY},
+   {0x73BF, 0xD2, CHIP_IP_DISCOVERY},
+   {0x73BF, 0xD4, CHIP_IP_DISCOVERY},
+   {0x73BF, 0xD5, CHIP_IP_DISCOVERY},
+   {0x73BF, 0xD6, CHIP_IP_DISCOVERY},
+   {0x73BF, 0xD8, CHIP_IP_DISCOVERY},
+   {0x73BF, 0xD9, CHIP_IP_DISCOVERY},
+   {0x73BF, 0xDA, CHIP_IP_DISCOVERY},
+   {0x73BF, 0xDB, CHIP_IP_DISCOVERY},
 };
 
 static struct drm_driver amdgpu_kms_driver;
-- 
2.34.1

Re: [PATCH 5/6] drm/amdgpu: Add flag to disable implicit sync for GEM operations.

2023-10-31 Thread kernel test robot

Hi Tatsuyuki,

kernel test robot noticed the following build warnings:

[auto build test WARNING on drm-misc/drm-misc-next]
[also build test WARNING on drm-exynos/exynos-drm-next 
drm-intel/for-linux-next-fixes linus/master v6.6]
[cannot apply to drm/drm-next drm-intel/for-linux-next drm-tip/drm-tip 
next-20231031]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:
https://github.com/intel-lab-lkp/linux/commits/Tatsuyuki-Ishi/drm-amdgpu-Don-t-implicit-sync-PRT-maps/20231031-224530
base:   git://anongit.freedesktop.org/drm/drm-misc drm-misc-next
patch link:
https://lore.kernel.org/r/20231031134059.171277-6-ishitatsuyuki%40gmail.com
patch subject: [PATCH 5/6] drm/amdgpu: Add flag to disable implicit sync for 
GEM operations.
config: arc-randconfig-001-20231101 
(https://download.01.org/0day-ci/archive/20231101/202311011037.bt6nsywa-...@intel.com/config)
compiler: arceb-elf-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): 
(https://download.01.org/0day-ci/archive/20231101/202311011037.bt6nsywa-...@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot 
| Closes: 
https://lore.kernel.org/oe-kbuild-all/202311011037.bt6nsywa-...@intel.com/

All warnings (new ones prefixed by >>):

   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:178: warning: Function parameter or 
member 'evicted' not described in 'amdgpu_vm_bo_set_evicted'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:178: warning: expecting prototype for 
amdgpu_vm_bo_evicted(). Prototype was for amdgpu_vm_bo_set_evicted() instead
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:1667: warning: Function parameter or 
>> member 'sync_unmap' not described in 'amdgpu_vm_bo_unmap'


vim +1667 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

d38ceaf99ed015 Alex Deucher  2015-04-20  1650  
d38ceaf99ed015 Alex Deucher  2015-04-20  1651  /**
d38ceaf99ed015 Alex Deucher  2015-04-20  1652   * amdgpu_vm_bo_unmap - 
remove bo mapping from vm
d38ceaf99ed015 Alex Deucher  2015-04-20  1653   *
d38ceaf99ed015 Alex Deucher  2015-04-20  1654   * @adev: amdgpu_device 
pointer
d38ceaf99ed015 Alex Deucher  2015-04-20  1655   * @bo_va: bo_va to remove 
the address from
d38ceaf99ed015 Alex Deucher  2015-04-20  1656   * @saddr: where to the BO 
is mapped
d38ceaf99ed015 Alex Deucher  2015-04-20  1657   *
d38ceaf99ed015 Alex Deucher  2015-04-20  1658   * Remove a mapping of the 
BO at the specefied addr from the VM.
7fc48e5912795c Andrey Grodzovsky 2018-06-11  1659   *
7fc48e5912795c Andrey Grodzovsky 2018-06-11  1660   * Returns:
7fc48e5912795c Andrey Grodzovsky 2018-06-11  1661   * 0 for success, error for 
failure.
d38ceaf99ed015 Alex Deucher  2015-04-20  1662   *
49b02b180a541d Chunming Zhou 2015-11-13  1663   * Object has to be reserved 
and unreserved outside!
d38ceaf99ed015 Alex Deucher  2015-04-20  1664   */
1550024e9de031 Tatsuyuki Ishi2023-10-31  1665  int 
amdgpu_vm_bo_unmap(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
1550024e9de031 Tatsuyuki Ishi2023-10-31  1666  uint64_t 
saddr, bool sync_unmap)
d38ceaf99ed015 Alex Deucher  2015-04-20 @1667  {
d38ceaf99ed015 Alex Deucher  2015-04-20  1668   struct 
amdgpu_bo_va_mapping *mapping;
ec681545afe5a4 Christian König   2017-08-01  1669   struct amdgpu_vm *vm = 
bo_va->base.vm;
7fc11959018f8b Christian König   2015-07-30  1670   bool valid = true;
d38ceaf99ed015 Alex Deucher  2015-04-20  1671  
6c7fc503a47f9b Christian König   2015-06-05  1672   saddr /= 
AMDGPU_GPU_PAGE_SIZE;
32b41ac21fde8f Christian König   2016-03-08  1673  
7fc11959018f8b Christian König   2015-07-30  1674   
list_for_each_entry(mapping, &bo_va->valids, list) {
a9f87f64525435 Christian König   2017-03-30  1675   if 
(mapping->start == saddr)
7fc11959018f8b Christian König   2015-07-30  1676   break;
7fc11959018f8b Christian König   2015-07-30  1677   }
7fc11959018f8b Christian König   2015-07-30  1678  
7fc11959018f8b Christian König   2015-07-30  1679   if (&mapping->list == 
&bo_va->valids) {
7fc11959018f8b Christian König   2015-07-30  1680   valid = false;
7fc11959018f8b Christian König   2015-07-30  1681  
7fc11959018f8b Christian König   2015-07-30  1682   
list_for_each_entry(mapping, &bo_va->invalids, list) {
a9f87f64525435 Christian König   2017-03-30  1683   if 
(mapping->start == saddr)
d38ceaf99ed015 Alex Deucher  2015-04-20  1684   
break;
d38ceaf99ed015 Alex Deucher  2015-04-20  1685   }
d38ceaf99ed015 Alex Deucher

Recall: [PATCH 1/2] drm/amdgpu: Optimize the asic type fix code

2023-10-31 Thread Ma, Jun

Ma, Jun would like to recall the message, "[PATCH 1/2] drm/amdgpu: Optimize the 
asic type fix code".

Recall: [PATCH 2/2] drm/amdgpu: Fix the the asic type of some new asics

2023-10-31 Thread Ma, Jun

Ma, Jun would like to recall the message, "[PATCH 2/2] drm/amdgpu: Fix the the 
asic type of some new asics".

[PATCH 2/2] drm/amdgpu: Fix the the asic type of some new asics

2023-10-31 Thread Ma Jun

Some special new asics use the same device id as Sienna_Cichlid.
So fix it.

Signed-off-by: Ma Jun 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 756cf49557a2..a62961649171 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2073,6 +2073,17 @@ static const struct amdgpu_asic_type_quirk 
asic_type_quirks[] = {
{0x67FF, 0xE7, CHIP_POLARIS10},
{0x67FF, 0xF3, CHIP_POLARIS10},
{0x67FF, 0xF7, CHIP_POLARIS10},
+   /* differentiate between Navi21 and new asics with the same DID */
+   {0x73BF, 0x18, CHIP_IP_DISCOVERY},
+   {0x73BF, 0x19, CHIP_IP_DISCOVERY},
+   {0x73BF, 0xD2, CHIP_IP_DISCOVERY},
+   {0x73BF, 0xD4, CHIP_IP_DISCOVERY},
+   {0x73BF, 0xD5, CHIP_IP_DISCOVERY},
+   {0x73BF, 0xD6, CHIP_IP_DISCOVERY},
+   {0x73BF, 0xD8, CHIP_IP_DISCOVERY},
+   {0x73BF, 0xD9, CHIP_IP_DISCOVERY},
+   {0x73BF, 0xDA, CHIP_IP_DISCOVERY},
+   {0x73BF, 0xDB, CHIP_IP_DISCOVERY},
 };
 
 static struct drm_driver amdgpu_kms_driver;
-- 
2.34.1

[PATCH 1/2] drm/amdgpu: Optimize the asic type fix code

2023-10-31 Thread Ma Jun

Use a new struct array to define the asic information which
asic type needs to be fixed.

Signed-off-by: Ma Jun 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 35 ++---
 include/drm/amd_asic_type.h |  5 
 2 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 84703e0a73bd..756cf49557a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2067,6 +2067,14 @@ static const struct pci_device_id pciidlist[] = {
 
 MODULE_DEVICE_TABLE(pci, pciidlist);
 
+static const struct amdgpu_asic_type_quirk asic_type_quirks[] = {
+   /* differentiate between P10 and P11 asics with the same DID */
+   {0x67FF, 0xE3, CHIP_POLARIS10},
+   {0x67FF, 0xE7, CHIP_POLARIS10},
+   {0x67FF, 0xF3, CHIP_POLARIS10},
+   {0x67FF, 0xF7, CHIP_POLARIS10},
+};
+
 static struct drm_driver amdgpu_kms_driver;
 
 static void amdgpu_get_secondary_funcs(struct amdgpu_device *adev)
@@ -2109,6 +2117,22 @@ static void amdgpu_init_debug_options(struct 
amdgpu_device *adev)
}
 }
 
+static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long 
flags)
+{
+   int i;
+
+   for (i = 0; i < ARRAY_SIZE(asic_type_quirks); i++) {
+   if (pdev->device == asic_type_quirks[i].device &&
+   pdev->revision == asic_type_quirks[i].revision) {
+   flags &= ~AMD_ASIC_MASK;
+   flags |= asic_type_quirks[i].type;
+   break;
+   }
+   }
+
+   return flags;
+}
+
 static int amdgpu_pci_probe(struct pci_dev *pdev,
const struct pci_device_id *ent)
 {
@@ -2138,15 +2162,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
 "See modparam exp_hw_support\n");
return -ENODEV;
}
-   /* differentiate between P10 and P11 asics with the same DID */
-   if (pdev->device == 0x67FF &&
-   (pdev->revision == 0xE3 ||
-pdev->revision == 0xE7 ||
-pdev->revision == 0xF3 ||
-pdev->revision == 0xF7)) {
-   flags &= ~AMD_ASIC_MASK;
-   flags |= CHIP_POLARIS10;
-   }
+
+   flags = amdgpu_fix_asic_type(pdev, flags);
 
/* Due to hardware bugs, S/G Display on raven requires a 1:1 IOMMU 
mapping,
 * however, SME requires an indirect IOMMU mapping because the 
encryption
diff --git a/include/drm/amd_asic_type.h b/include/drm/amd_asic_type.h
index 90b69270f2fa..724c45e3e9a7 100644
--- a/include/drm/amd_asic_type.h
+++ b/include/drm/amd_asic_type.h
@@ -68,4 +68,9 @@ enum amd_asic_type {
 
 extern const char *amdgpu_asic_name[];
 
+struct amdgpu_asic_type_quirk {
+   unsigned short device;  /* PCI device ID */
+   u8 revision;/* revision ID */
+   unsigned short type;/* real ASIC type */
+};
 #endif /*__AMD_ASIC_TYPE_H__ */
-- 
2.34.1

Re: [PATCH 3/6] drm/amdgpu: Flush VM updates for split bindings eagerly.

2023-10-31 Thread kernel test robot

Hi Tatsuyuki,

kernel test robot noticed the following build warnings:

[auto build test WARNING on drm-misc/drm-misc-next]
[also build test WARNING on drm-exynos/exynos-drm-next 
drm-intel/for-linux-next-fixes linus/master v6.6]
[cannot apply to drm/drm-next drm-intel/for-linux-next drm-tip/drm-tip 
next-20231031]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:
https://github.com/intel-lab-lkp/linux/commits/Tatsuyuki-Ishi/drm-amdgpu-Don-t-implicit-sync-PRT-maps/20231031-224530
base:   git://anongit.freedesktop.org/drm/drm-misc drm-misc-next
patch link:
https://lore.kernel.org/r/20231031134059.171277-4-ishitatsuyuki%40gmail.com
patch subject: [PATCH 3/6] drm/amdgpu: Flush VM updates for split bindings 
eagerly.
config: arc-randconfig-001-20231101 
(https://download.01.org/0day-ci/archive/20231101/202311010948.g6i55ptu-...@intel.com/config)
compiler: arceb-elf-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): 
(https://download.01.org/0day-ci/archive/20231101/202311010948.g6i55ptu-...@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot 
| Closes: 
https://lore.kernel.org/oe-kbuild-all/202311010948.g6i55ptu-...@intel.com/

All warnings (new ones prefixed by >>):

>> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c:608: warning: Excess function 
>> parameter 'bo_va' description in 'amdgpu_gem_va_update_vm'
>> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c:608: warning: Excess function 
>> parameter 'operation' description in 'amdgpu_gem_va_update_vm'


vim +608 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

d38ceaf99ed015f Alex Deucher2015-04-20  594  
d38ceaf99ed015f Alex Deucher2015-04-20  595  /**
d38ceaf99ed015f Alex Deucher2015-04-20  596   * amdgpu_gem_va_update_vm 
-update the bo_va in its VM
d38ceaf99ed015f Alex Deucher2015-04-20  597   *
d38ceaf99ed015f Alex Deucher2015-04-20  598   * @adev: amdgpu_device 
pointer
dc54d3d1744d23e Christian König 2017-03-13  599   * @vm: vm to update
d38ceaf99ed015f Alex Deucher2015-04-20  600   * @bo_va: bo_va to update
dc54d3d1744d23e Christian König 2017-03-13  601   * @operation: map, unmap 
or clear
d38ceaf99ed015f Alex Deucher2015-04-20  602   *
2ffdaafb5d5f37b Christian König 2017-01-27  603   * Update the bo_va 
directly after setting its address. Errors are not
d38ceaf99ed015f Alex Deucher2015-04-20  604   * vital here, so they are 
not reported back to userspace.
d38ceaf99ed015f Alex Deucher2015-04-20  605   */
d38ceaf99ed015f Alex Deucher2015-04-20  606  static void 
amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
ddf1ffe56ab385a Tatsuyuki Ishi  2023-10-31  607 
struct amdgpu_vm *vm)
d38ceaf99ed015f Alex Deucher2015-04-20 @608  {
ddf1ffe56ab385a Tatsuyuki Ishi  2023-10-31  609 struct amdgpu_bo_va 
*bo_va;
3ff8a0e90ac Christian König 2017-08-03  610 int r;
d38ceaf99ed015f Alex Deucher2015-04-20  611  
3ff8a0e90ac Christian König 2017-08-03  612 if 
(!amdgpu_vm_ready(vm))
3ff8a0e90ac Christian König 2017-08-03  613 return;
e410b5cbabe70b1 Chunming Zhou   2015-12-07  614  
f34678187a33970 Nicolai Hähnle  2017-03-23  615 r = 
amdgpu_vm_clear_freed(adev, vm, NULL);
d38ceaf99ed015f Alex Deucher2015-04-20  616 if (r)
2ffdaafb5d5f37b Christian König 2017-01-27  617 goto error;
194a33643b1161f monk.liu2015-07-22  618  
ddf1ffe56ab385a Tatsuyuki Ishi  2023-10-31  619 
spin_lock(&vm->status_lock);
ddf1ffe56ab385a Tatsuyuki Ishi  2023-10-31  620 while 
(!list_empty(&vm->dirty)) {
ddf1ffe56ab385a Tatsuyuki Ishi  2023-10-31  621 bo_va = 
list_first_entry(&vm->dirty, struct amdgpu_bo_va,
ddf1ffe56ab385a Tatsuyuki Ishi  2023-10-31  622 
 base.vm_status);
ddf1ffe56ab385a Tatsuyuki Ishi  2023-10-31  623 
spin_unlock(&vm->status_lock);
ddf1ffe56ab385a Tatsuyuki Ishi  2023-10-31  624  
8f8cc3fb43508a2 Christian König 2022-03-17  625 r = 
amdgpu_vm_bo_update(adev, bo_va, false);
0abc6878fc2d699 Christian König 2017-09-01  626 if (r)
0abc6878fc2d699 Christian König 2017-09-01  627 goto 
error;
ddf1ffe56ab385a Tatsuyuki Ishi  2023-10-31  628 
spin_lock(&vm->status_lock);
93bab704c1513f8 Gustavo A. R. Silva 2018-02-14  629 }
ddf1ffe56ab385a Tatsuyuki Ishi  2023-10-31  630 
spin_unlock(&vm->status_lock);
93bab704c1513f8 Gustavo A. R. Silva 2018-02-14

[PATCH v7d 23/23] drm: restore CONFIG_DRM_USE_DYNAMIC_DEBUG un-BROKEN

2023-10-31 Thread Jim Cromie

Lots of burn-in testing needed before signing, upstreaming.

NOTE: I set default Y to maximize testing by default.
Is there a better way to do this ?

Signed-off-by: Jim Cromie 
---
 drivers/gpu/drm/Kconfig | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 3caa020391c7..708f5e8cb205 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -55,8 +55,7 @@ config DRM_DEBUG_MM
 
 config DRM_USE_DYNAMIC_DEBUG
bool "use dynamic debug to implement drm.debug"
-   default n
-   depends on BROKEN
+   default y
depends on DRM
depends on DYNAMIC_DEBUG || DYNAMIC_DEBUG_CORE
depends on JUMP_LABEL
-- 
2.41.0

[PATCH v7d 22/23] drm-drivers: DRM_CLASSMAP_USE in 2nd batch of drivers, helpers

2023-10-31 Thread Jim Cromie

Add a DRM_CLASSMAP_USE declaration to 2nd batch of helpers and *_drv.c
files.  For drivers, add the decl just above the module's PARAMs,
since it identifies the "inherited" drm.debug param.

Note: with CONFIG_DRM_USE_DYNAMIC_DEBUG=y, a module not also declaring
DRM_CLASSMAP_USE will have its class'd prdbgs stuck in the initial
(disabled, but for DEBUG) state.

The stuck sites are evident in /proc/dynamic_debug/control as:

   class:_UNKNOWN_ _id:N# control's last column

rather than a proper "enumeration":

   class:DRM_UT_CORE

This set of updates was found by choosing M for all DRM-config items I
found (not allmodconfig), building & modprobing them, and grepping
"class unknown," control.  There may yet be others.

Signed-off-by: Jim Cromie 
---
 drivers/gpu/drm/drm_gem_shmem_helper.c | 2 ++
 drivers/gpu/drm/gud/gud_drv.c  | 2 ++
 drivers/gpu/drm/mgag200/mgag200_drv.c  | 2 ++
 drivers/gpu/drm/qxl/qxl_drv.c  | 2 ++
 drivers/gpu/drm/radeon/radeon_drv.c| 2 ++
 drivers/gpu/drm/udl/udl_main.c | 2 ++
 drivers/gpu/drm/vkms/vkms_drv.c| 2 ++
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c| 2 ++
 8 files changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c 
b/drivers/gpu/drm/drm_gem_shmem_helper.c
index e435f986cd13..066d906e3199 100644
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -23,6 +23,8 @@
 #include 
 #include 
 
+DRM_CLASSMAP_USE(drm_debug_classes);
+
 MODULE_IMPORT_NS(DMA_BUF);
 
 /**
diff --git a/drivers/gpu/drm/gud/gud_drv.c b/drivers/gpu/drm/gud/gud_drv.c
index 9d7bf8ee45f1..5b555045fce4 100644
--- a/drivers/gpu/drm/gud/gud_drv.c
+++ b/drivers/gpu/drm/gud/gud_drv.c
@@ -31,6 +31,8 @@
 
 #include "gud_internal.h"
 
+DRM_CLASSMAP_USE(drm_debug_classes);
+
 /* Only used internally */
 static const struct drm_format_info gud_drm_format_r1 = {
.format = GUD_DRM_FORMAT_R1,
diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.c 
b/drivers/gpu/drm/mgag200/mgag200_drv.c
index abddf37f0ea1..d678eb8e028d 100644
--- a/drivers/gpu/drm/mgag200/mgag200_drv.c
+++ b/drivers/gpu/drm/mgag200/mgag200_drv.c
@@ -24,6 +24,8 @@ static int mgag200_modeset = -1;
 MODULE_PARM_DESC(modeset, "Disable/Enable modesetting");
 module_param_named(modeset, mgag200_modeset, int, 0400);
 
+DRM_CLASSMAP_USE(drm_debug_classes);
+
 int mgag200_init_pci_options(struct pci_dev *pdev, u32 option, u32 option2)
 {
struct device *dev = &pdev->dev;
diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c
index b30ede1cf62d..91942ffcc2b4 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.c
+++ b/drivers/gpu/drm/qxl/qxl_drv.c
@@ -65,6 +65,8 @@ module_param_named(modeset, qxl_modeset, int, 0400);
 MODULE_PARM_DESC(num_heads, "Number of virtual crtcs to expose (default 4)");
 module_param_named(num_heads, qxl_num_crtc, int, 0400);
 
+DRM_CLASSMAP_USE(drm_debug_classes);
+
 static struct drm_driver qxl_driver;
 static struct pci_driver qxl_pci_driver;
 
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c 
b/drivers/gpu/drm/radeon/radeon_drv.c
index fa531493b111..ab29945af657 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -247,6 +247,8 @@ int radeon_cik_support = 1;
 MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled (default), 0 = 
disabled)");
 module_param_named(cik_support, radeon_cik_support, int, 0444);
 
+DRM_CLASSMAP_USE(drm_debug_classes);
+
 static struct pci_device_id pciidlist[] = {
radeon_PCI_IDS
 };
diff --git a/drivers/gpu/drm/udl/udl_main.c b/drivers/gpu/drm/udl/udl_main.c
index 3ebe2ce55dfd..ba57c14454e5 100644
--- a/drivers/gpu/drm/udl/udl_main.c
+++ b/drivers/gpu/drm/udl/udl_main.c
@@ -19,6 +19,8 @@
 
 #define NR_USB_REQUEST_CHANNEL 0x12
 
+DRM_CLASSMAP_USE(drm_debug_classes);
+
 #define MAX_TRANSFER (PAGE_SIZE*16 - BULK_SIZE)
 #define WRITES_IN_FLIGHT (20)
 #define MAX_VENDOR_DESCRIPTOR_SIZE 256
diff --git a/drivers/gpu/drm/vkms/vkms_drv.c b/drivers/gpu/drm/vkms/vkms_drv.c
index dd0af086e7fa..086797c4b82b 100644
--- a/drivers/gpu/drm/vkms/vkms_drv.c
+++ b/drivers/gpu/drm/vkms/vkms_drv.c
@@ -39,6 +39,8 @@
 
 static struct vkms_config *default_config;
 
+DRM_CLASSMAP_USE(drm_debug_classes);
+
 static bool enable_cursor = true;
 module_param_named(enable_cursor, enable_cursor, bool, 0444);
 MODULE_PARM_DESC(enable_cursor, "Enable/Disable cursor support");
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 8b24ecf60e3e..9cb6be422621 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -275,6 +275,8 @@ static int vmw_probe(struct pci_dev *, const struct 
pci_device_id *);
 static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val,
  void *ptr);
 
+DRM_CLASSMAP_USE(drm_debug_classes);
+
 MODULE_PARM_DESC(restrict_iommu, "Try to limit IOMMU usage for TTM pages");
 module_param_named(restrict_iommu, vmw_restrict_iommu, int, 06

[PATCH v7d 17/23] dyndbg-doc: add classmap info to howto

2023-10-31 Thread Jim Cromie

Add some basic info on classmap usage and api

cc: linux-...@vger.kernel.org
Signed-off-by: Jim Cromie 
---
v5- adjustments per Randy Dunlap, me
v7b- checkpatch fixes
---
 .../admin-guide/dynamic-debug-howto.rst   | 60 ++-
 1 file changed, 59 insertions(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/dynamic-debug-howto.rst 
b/Documentation/admin-guide/dynamic-debug-howto.rst
index 0b3d39c610d9..028c2cb5b4c5 100644
--- a/Documentation/admin-guide/dynamic-debug-howto.rst
+++ b/Documentation/admin-guide/dynamic-debug-howto.rst
@@ -225,7 +225,6 @@ the ``p`` flag has meaning, other flags are ignored.
 Note the regexp ``^[-+=][fslmpt_]+$`` matches a flags specification.
 To clear all flags at once, use ``=_`` or ``-fslmpt``.
 
-
 Debug messages during Boot Process
 ==
 
@@ -375,3 +374,62 @@ just a shortcut for ``print_hex_dump(KERN_DEBUG)``.
 For ``print_hex_dump_debug()``/``print_hex_dump_bytes()``, format string is
 its ``prefix_str`` argument, if it is constant string; or ``hexdump``
 in case ``prefix_str`` is built dynamically.
+
+Dynamic Debug classmaps
+===
+
+Dyndbg allows selection/grouping of *prdbg* callsites using structural
+info: module, file, function, line.  Classmaps allow authors to add
+their own domain-oriented groupings using class-names.  Classmaps are
+exported, so they referencable from other modules.
+
+  # enable classes individually
+  :#> ddcmd class DRM_UT_CORE +p
+  :#> ddcmd class DRM_UT_KMS +p
+  # or more selectively
+  :#> ddcmd class DRM_UT_CORE module drm +p
+
+The "class FOO" syntax protects class'd prdbgs from generic overwrite::
+
+  # IOW this doesn't wipe any DRM.debug settings
+  :#> ddcmd -p
+
+To support the DRM.debug parameter, DYNDBG_CLASSMAP_PARAM* updates all
+classes in a classmap, mapping param-bits 0..N onto the classes:
+DRM_UT_<*> for the DRM use-case.
+
+Dynamic Debug Classmap API
+==
+
+DYNDBG_CLASSMAP_DEFINE - modules use this to create classmaps, naming
+each of the classes (stringified enum-symbols: "DRM_UT_<*>"), and
+type, and mapping the class-names to consecutive _class_ids.
+
+By doing so, modules tell dyndbg that they are have prdbgs with those
+class_ids, and they authorize dyndbg to accept "class FOO" for the
+module defining that classname.
+
+There are 2 types of classmaps:
+
+ DD_CLASS_TYPE_DISJOINT_BITS: classes are independent, like DRM.debug
+ DD_CLASS_TYPE_LEVEL_NUM: classes are relative, ordered (V3 > V2)
+
+DYNDBG_CLASSMAP_PARAM - refers to a DEFINEd classmap, exposing the set
+of defined classes to manipulation as a group.  This interface
+enforces the relatedness of classes of DD_CLASS_TYPE_LEVEL_NUM typed
+classmaps; all classes are independent in the >control parser itself.
+
+DYNDBG_CLASSMAP_USE - drm drivers invoke this to ref the CLASSMAP that
+drm DEFINEs.  This shares the classmap definition, and authorizes
+dyndbg to apply changes to the user module's class'd pr_debugs.  It
+also tells dyndbg how to initialize the user's prdbgs at modprobe,
+based upon the current setting of the parent's controlling param.
+
+Modules or module-groups (drm & drivers) can define multiple
+classmaps, as long as they share the limited 0..62 per-module-group
+_class_id range, without overlap.
+
+``#define DEBUG`` will enable all pr_debugs in scope, including any
+class'd ones.  This won't be reflected in the PARAM readback value,
+but the pr_debug callsites can be toggled into agreement with the
+param.
-- 
2.41.0

[PATCH v7d 21/23] drm: use correct ccflags-y spelling

2023-10-31 Thread Jim Cromie

Incorrectly spelled CFLAGS- failed to add -DDYNAMIC_DEBUG_MODULE,
which broke builds with:

CONFIG_DRM_USE_DYNAMIC_DEBUG=y
CONFIG_DYNAMIC_DEBUG_CORE=y
CONFIG_DYNAMIC_DEBUG=n

Also add subdir-ccflags so that all drivers pick up the addition.

Fixes: 84ec67288c10 ("drm_print: wrap drm_*_dbg in dyndbg descriptor factory 
macro")
Signed-off-by: Jim Cromie 
---
 drivers/gpu/drm/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 215e78e79125..22b1984cc982 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -3,7 +3,8 @@
 # Makefile for the drm device driver.  This driver provides support for the
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
-CFLAGS-$(CONFIG_DRM_USE_DYNAMIC_DEBUG) += -DDYNAMIC_DEBUG_MODULE
+ccflags-$(CONFIG_DRM_USE_DYNAMIC_DEBUG)+= 
-DDYNAMIC_DEBUG_MODULE
+subdir-ccflags-$(CONFIG_DRM_USE_DYNAMIC_DEBUG) += -DDYNAMIC_DEBUG_MODULE
 
 drm-y := \
drm_aperture.o \
-- 
2.41.0

[PATCH v7d 19/23] dyndbg: add _DPRINTK_FLAGS_INCL_LOOKUP

2023-10-31 Thread Jim Cromie

dyndbg's dynamic prefixing (by +tmfsl flags) is needlessly expensive.

When an enabled (with +p) pr_debug is called, _DPRINTK_FLAGS_INCL_ANY
prefix decorations are sprintf'd into stack-mem for every call.

This string (or part of it) could be cached once its 1st generated,
and retrieved thereafter, as long as its deleted any time the
callsite's flags are changed afterwards.

So consider the prefix/decoration flags: 'tmfsl', and what should be
in the cache:

-t  thread-id. not part of the "callsite" info, derived from current.
doesn't belong in the cache. it would be wrong.
can be done in outer: dynamic_emit_prefix()

-l  line number
this could be part of the prefix, but would bloat the cache
can also be done in outer: dynamic_emit_prefix()

-mfs  module, function, source-file
we cache these, composed into a sub-string.
they are "lookups", currently to descriptor fields,
could be accessor macros to "compressed" tables.
cache saves more access work.

All enabled together, they compose a prefix string like:

  # outer   -inner--   outer
  "[tid] module:function:sourcfile:line: "

So this patch extracts _DPRINTK_FLAGS_INCL_LOOKUP macro out of
_DPRINTK_FLAGS_INCL_ANY macro, then redefs latter.

Next re-refactor dynamic_emit_prefix inner/outer fns accordingly.

Signed-off-by: Jim Cromie 
---
 include/linux/dynamic_debug.h | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index 4ffddf5e9152..b4550f80cfd5 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -40,10 +40,12 @@ struct _ddebug {
 #define _DPRINTK_FLAGS_INCL_SOURCENAME (1<<5)
 #define _DPRINTK_FLAGS_PREFIX_CACHED   (1<<7)
 
-#define _DPRINTK_FLAGS_INCL_ANY\
-   (_DPRINTK_FLAGS_INCL_MODNAME | _DPRINTK_FLAGS_INCL_FUNCNAME |\
-_DPRINTK_FLAGS_INCL_LINENO  | _DPRINTK_FLAGS_INCL_TID |\
+#define _DPRINTK_FLAGS_INCL_LOOKUP \
+   (_DPRINTK_FLAGS_INCL_MODNAME | _DPRINTK_FLAGS_INCL_FUNCNAME |   \
 _DPRINTK_FLAGS_INCL_SOURCENAME)
+#define _DPRINTK_FLAGS_INCL_ANY
\
+   (_DPRINTK_FLAGS_INCL_LINENO | _DPRINTK_FLAGS_INCL_TID | \
+_DPRINTK_FLAGS_INCL_LOOKUP)
 
 #if defined DEBUG
 #define _DPRINTK_FLAGS_DEFAULT _DPRINTK_FLAGS_PRINT
-- 
2.41.0

[PATCH v7d 20/23] dyndbg: refactor *dynamic_emit_prefix

2023-10-31 Thread Jim Cromie

Refactor the split of duties between outer & inner fns.

The outer fn was previously just an inline unlikely forward to inner,
which did all the work.

Now, outer handles +t and +l flags itself, and calls inner only when
_DPRINTK_FLAGS_INCL_LOOKUP is needed.

No functional change.

But it does make the results of the inner-fn more cache-friendly
(fewer entries, reused more often):

1- no spurious [TID] or  noise
2- no LINE-number to bloat the cache (avg 9 pr_debugs/fn)
3- only LOOKUP stuff

Currently LOOKUPs are descriptor-field refs but could be replaced by
accessor functions.  This would allow the __dyndbg_sites section to be
de-duplicated and reclaimed; currently module, filename fields are
~90% repeated.  As the accessors get more expensive, the value of
caching part of the prefix goes up.

Also change inner-fn to return count of extra chars written to the
buffer, and drop "inline" from outer, let the compiler decide.  Maybe
also change name accordingly.

Signed-off-by: Jim Cromie 
---
fixup whitespace
---
 lib/dynamic_debug.c | 39 ++-
 1 file changed, 22 insertions(+), 17 deletions(-)

diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index f878a6f09fc8..213110ec1e9c 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -774,11 +774,28 @@ static int remaining(int wrote)
return 0;
 }
 
-static char *__dynamic_emit_prefix(const struct _ddebug *desc, char *buf)
+static int __dynamic_emit_prefix(const struct _ddebug *desc, char *buf, int 
pos)
+{
+   if (desc->flags & _DPRINTK_FLAGS_INCL_MODNAME)
+   pos += snprintf(buf + pos, remaining(pos), "%s:",
+   desc->modname);
+   if (desc->flags & _DPRINTK_FLAGS_INCL_FUNCNAME)
+   pos += snprintf(buf + pos, remaining(pos), "%s:",
+   desc->function);
+   if (desc->flags & _DPRINTK_FLAGS_INCL_SOURCENAME)
+   pos += snprintf(buf + pos, remaining(pos), "%s:",
+   trim_prefix(desc->filename));
+   return pos;
+}
+
+static char *dynamic_emit_prefix(struct _ddebug *desc, char *buf)
 {
int pos_after_tid;
int pos = 0;
 
+   if (likely(!(desc->flags & _DPRINTK_FLAGS_INCL_ANY)))
+   return buf;
+
if (desc->flags & _DPRINTK_FLAGS_INCL_TID) {
if (in_interrupt())
pos += snprintf(buf + pos, remaining(pos), " ");
@@ -787,15 +804,10 @@ static char *__dynamic_emit_prefix(const struct _ddebug 
*desc, char *buf)
task_pid_vnr(current));
}
pos_after_tid = pos;
-   if (desc->flags & _DPRINTK_FLAGS_INCL_MODNAME)
-   pos += snprintf(buf + pos, remaining(pos), "%s:",
-   desc->modname);
-   if (desc->flags & _DPRINTK_FLAGS_INCL_FUNCNAME)
-   pos += snprintf(buf + pos, remaining(pos), "%s:",
-   desc->function);
-   if (desc->flags & _DPRINTK_FLAGS_INCL_SOURCENAME)
-   pos += snprintf(buf + pos, remaining(pos), "%s:",
-   trim_prefix(desc->filename));
+
+   if (unlikely(desc->flags & _DPRINTK_FLAGS_INCL_LOOKUP))
+   pos += __dynamic_emit_prefix(desc, buf, pos);
+
if (desc->flags & _DPRINTK_FLAGS_INCL_LINENO)
pos += snprintf(buf + pos, remaining(pos), "%d:",
desc->lineno);
@@ -807,13 +819,6 @@ static char *__dynamic_emit_prefix(const struct _ddebug 
*desc, char *buf)
return buf;
 }
 
-static inline char *dynamic_emit_prefix(struct _ddebug *desc, char *buf)
-{
-   if (unlikely(desc->flags & _DPRINTK_FLAGS_INCL_ANY))
-   return __dynamic_emit_prefix(desc, buf);
-   return buf;
-}
-
 void __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...)
 {
va_list args;
-- 
2.41.0

[PATCH v7d 18/23] dyndbg: reserve flag bit _DPRINTK_FLAGS_PREFIX_CACHED

2023-10-31 Thread Jim Cromie

Reserve bit 7 to remember that a pr-debug callsite is/was:
- enabled, with +p
- wants a dynamic-prefix, with one+ of module:function:sourcfile
- was previously called
- was thus saved in the cache. NOT YET.

Its unclear whether any cache fetch would be faster than 2-3 field
fetches, but theres another factor; the 3 columns in the __dyndbg
section are highly redundant and compressible, but to get the
compression, we need field accessors, which will rebalance the
tradeoff.

So, for now, its just the bit reservation.

Signed-off-by: Jim Cromie 
---
 include/linux/dynamic_debug.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index 2a7832b1ba5b..4ffddf5e9152 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -38,6 +38,7 @@ struct _ddebug {
 #define _DPRINTK_FLAGS_INCL_LINENO (1<<3)
 #define _DPRINTK_FLAGS_INCL_TID(1<<4)
 #define _DPRINTK_FLAGS_INCL_SOURCENAME (1<<5)
+#define _DPRINTK_FLAGS_PREFIX_CACHED   (1<<7)
 
 #define _DPRINTK_FLAGS_INCL_ANY\
(_DPRINTK_FLAGS_INCL_MODNAME | _DPRINTK_FLAGS_INCL_FUNCNAME |\
-- 
2.41.0

[PATCH v7d 15/23] dyndbg: refactor ddebug_classparam_clamp_input

2023-10-31 Thread Jim Cromie

Extract input validation code, from param_set_dyndbg_module_classes()
(the sys-node >handler) to new: ddebug_classparam_clamp_input(kp),
call it from former.  It takes kernel-param arg, so it can complain
about "foo: bad input".

Reuse ddparam_clamp_input(kp) in ddebug_sync_classbits(),
to validate inputs from parent's params, just like our own.
To support that reuse, alter ddebug_sync_classbits() and caller to
pass kp instead of kp->arg.

Signed-off-by: Jim Cromie 
---
 lib/dynamic_debug.c | 70 ++---
 1 file changed, 47 insertions(+), 23 deletions(-)

diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 7d3261dede77..f878a6f09fc8 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -653,6 +653,30 @@ static int ddebug_apply_class_bitmap(const struct 
ddebug_class_param *dcp,
 
 #define CLASSMAP_BITMASK(width) ((1UL << (width)) - 1)
 
+static void ddebug_class_param_clamp_input(unsigned long *inrep, const struct 
kernel_param *kp)
+{
+   const struct ddebug_class_param *dcp = kp->arg;
+   const struct ddebug_class_map *map = dcp->map;
+
+   switch (map->map_type) {
+   case DD_CLASS_TYPE_DISJOINT_BITS:
+   /* expect bits. mask and warn if too many */
+   if (*inrep & ~CLASSMAP_BITMASK(map->length)) {
+   pr_warn("%s: input: 0x%lx exceeds mask: 0x%lx, 
masking\n",
+   KP_NAME(kp), *inrep, 
CLASSMAP_BITMASK(map->length));
+   *inrep &= CLASSMAP_BITMASK(map->length);
+   }
+   break;
+   case DD_CLASS_TYPE_LEVEL_NUM:
+   /* input is bitpos, of highest verbosity to be enabled */
+   if (*inrep > map->length) {
+   pr_warn("%s: level:%ld exceeds max:%d, clamping\n",
+   KP_NAME(kp), *inrep, map->length);
+   *inrep = map->length;
+   }
+   break;
+   }
+}
 static int param_set_dyndbg_module_classes(const char *instr,
   const struct kernel_param *kp,
   const char *modnm)
@@ -671,26 +695,15 @@ static int param_set_dyndbg_module_classes(const char 
*instr,
pr_err("expecting numeric input, not: %s > %s\n", instr, 
KP_NAME(kp));
return -EINVAL;
}
+   ddebug_class_param_clamp_input(&inrep, kp);
 
switch (map->map_type) {
case DD_CLASS_TYPE_DISJOINT_BITS:
-   /* expect bits. mask and warn if too many */
-   if (inrep & ~CLASSMAP_BITMASK(map->length)) {
-   pr_warn("%s: input: 0x%lx exceeds mask: 0x%lx, 
masking\n",
-   KP_NAME(kp), inrep, 
CLASSMAP_BITMASK(map->length));
-   inrep &= CLASSMAP_BITMASK(map->length);
-   }
v2pr_info("bits:0x%lx > %s.%s\n", inrep, modnm ?: "*", 
KP_NAME(kp));
totct += ddebug_apply_class_bitmap(dcp, &inrep, *dcp->bits, 
modnm);
*dcp->bits = inrep;
break;
case DD_CLASS_TYPE_LEVEL_NUM:
-   /* input is bitpos, of highest verbosity to be enabled */
-   if (inrep > map->length) {
-   pr_warn("%s: level:%ld exceeds max:%d, clamping\n",
-   KP_NAME(kp), inrep, map->length);
-   inrep = map->length;
-   }
old_bits = CLASSMAP_BITMASK(*dcp->lvl);
new_bits = CLASSMAP_BITMASK(inrep);
v2pr_info("lvl:%ld bits:0x%lx > %s\n", inrep, new_bits, 
KP_NAME(kp));
@@ -1157,16 +1170,27 @@ static const char * const ddebug_classmap_typenames[] = 
{
  ddebug_classmap_typenames[_cm->map_type]);\
})
 
-static void ddebug_sync_classbits(const struct ddebug_class_param *dcp, const 
char *modname)
+static void ddebug_sync_classbits(const struct kernel_param *kp, const char 
*modname)
 {
-   /* clamp initial bitvec, mask off hi-bits */
-   if (*dcp->bits & ~CLASSMAP_BITMASK(dcp->map->length)) {
-   *dcp->bits &= CLASSMAP_BITMASK(dcp->map->length);
-   v2pr_info("preset classbits: %lx\n", *dcp->bits);
+   struct ddebug_class_param *dcp = kp->arg;
+   unsigned long new_bits;
+
+   ddebug_class_param_clamp_input(dcp->bits, kp);
+
+   switch (dcp->map->map_type) {
+   case DD_CLASS_TYPE_DISJOINT_BITS:
+   v2pr_info("  %s: classbits: 0x%lx\n", KP_NAME(kp), *dcp->bits);
+   ddebug_apply_class_bitmap(dcp, dcp->bits, 0UL, modname);
+   break;
+   case DD_CLASS_TYPE_LEVEL_NUM:
+   new_bits = CLASSMAP_BITMASK(*dcp->lvl);
+   v2pr_info("  %s: lvl:%ld bits:0x%lx\n", KP_NAME(kp), *dcp->lvl, 
new_bits);
+   ddebug_apply_class_bitmap(dcp, &new_bits, 0UL, modname);
+   break;
+   default:
+

[PATCH v7d 16/23] dyndbg-API: promote DYNDBG_CLASSMAP_PARAM to API

2023-10-31 Thread Jim Cromie

move the DYNDBG_CLASSMAP_PARAM macro from test-dynamic-debug.c into
the header, and refine it, by distinguishing the 2 use cases:

1.DYNDBG_CLASSMAP_PARAM_REF
for DRM, to pass in extern __drm_debug by name.
dyndbg keeps bits in it, so drm can still use it as before

2.DYNDBG_CLASSMAP_PARAM
new user (test_dynamic_debug) doesn't need to share state,
decls a static long unsigned int to store the bitvec.

__DYNDBG_CLASSMAP_PARAM
   bottom layer - allocate,init a ddebug-class-param, module-param-cb.

Also clean up and improve comments in test-code, and add
MODULE_DESCRIPTIONs.

Signed-off-by: Jim Cromie 
---
---
 drivers/gpu/drm/drm_print.c |  8 ++
 include/drm/drm_print.h |  6 ++--
 include/linux/dynamic_debug.h   | 37 +++-
 lib/test_dynamic_debug.c| 50 +
 lib/test_dynamic_debug_submod.c |  9 +-
 5 files changed, 69 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c
index dabcfa0dd279..8f4b609353a5 100644
--- a/drivers/gpu/drm/drm_print.c
+++ b/drivers/gpu/drm/drm_print.c
@@ -69,12 +69,8 @@ DRM_CLASSMAP_DEFINE(drm_debug_classes, 
DD_CLASS_TYPE_DISJOINT_BITS,
"DRM_UT_DP",
"DRM_UT_DRMRES");
 
-static struct ddebug_class_param drm_debug_bitmap = {
-   .bits = &__drm_debug,
-   .flags = "p",
-   .map = &drm_debug_classes,
-};
-module_param_cb(debug, ¶m_ops_dyndbg_classes, &drm_debug_bitmap, 0600);
+DRM_CLASSMAP_PARAM_REF(debug, __drm_debug, drm_debug_classes, p);
+
 #endif
 
 void __drm_puts_coredump(struct drm_printer *p, const char *str)
diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
index 706afc97c79c..94d4f5500030 100644
--- a/include/drm/drm_print.h
+++ b/include/drm/drm_print.h
@@ -322,11 +322,13 @@ enum drm_debug_category {
 };
 
 #ifdef CONFIG_DRM_USE_DYNAMIC_DEBUG
-#define DRM_CLASSMAP_DEFINE(...) DYNDBG_CLASSMAP_DEFINE(__VA_ARGS__)
-#define DRM_CLASSMAP_USE(name)   DYNDBG_CLASSMAP_USE(name)
+#define DRM_CLASSMAP_DEFINE(...)   DYNDBG_CLASSMAP_DEFINE(__VA_ARGS__)
+#define DRM_CLASSMAP_USE(name) DYNDBG_CLASSMAP_USE(name)
+#define DRM_CLASSMAP_PARAM_REF(...)DYNDBG_CLASSMAP_PARAM_REF(__VA_ARGS__)
 #else
 #define DRM_CLASSMAP_DEFINE(...)
 #define DRM_CLASSMAP_USE(name)
+#define DRM_CLASSMAP_PARAM_REF(...)
 #endif
 
 static inline bool drm_debug_enabled_raw(enum drm_debug_category category)
diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index 373d152f4285..2a7832b1ba5b 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -91,7 +91,7 @@ struct ddebug_class_map {
  * used to validate a "class FOO .." >control command on the module
  */
 #define DYNDBG_CLASSMAP_DEFINE(_var, _maptype, _base, ...) \
-   const char *_var##_classnames[] = { __VA_ARGS__ };  \
+   static const char *_var##_classnames[] = { __VA_ARGS__ };   \
struct ddebug_class_map __aligned(8) __used \
__section("__dyndbg_classes") _var = {  \
.mod = THIS_MODULE, \
@@ -145,6 +145,41 @@ struct ddebug_class_param {
const struct ddebug_class_map *map;
 };
 
+/**
+ * DYNDBG_CLASSMAP_PARAM - wrap a dyndbg-classmap with a controlling sys-param
+ * @_name  sysfs node name
+ * @_var   name of the struct classmap var defining the controlled classes
+ * @_flags flags to be toggled, typically just 'p'
+ *
+ * Creates a sysfs-param to control the classes defined by the
+ * classmap.  Keeps bits in a private/static
+ */
+#define DYNDBG_CLASSMAP_PARAM(_name, _var, _flags) \
+   static unsigned long _name##_bvec;  \
+   __DYNDBG_CLASSMAP_PARAM(_name, _name##_bvec, _var, _flags)
+
+/**
+ * DYNDBG_CLASSMAP_PARAM_REF - wrap a dyndbg-classmap with a controlling 
sys-param
+ * @_name  sysfs node name
+ * @_bits  name of the module's unsigned long bit-vector, ex: __drm_debug
+ * @_var   name of the struct classmap var defining the controlled classes
+ * @_flags flags to be toggled, typically just 'p'
+ *
+ * Creates a sysfs-param to control the classmap, keeping bitvec in user 
@_bits.
+ * This lets drm use __drm_debug elsewhere too.
+ */
+#define DYNDBG_CLASSMAP_PARAM_REF(_name, _bits, _var, _flags)  \
+   __DYNDBG_CLASSMAP_PARAM(_name, _bits, _var, _flags)
+
+#define __DYNDBG_CLASSMAP_PARAM(_name, _bits, _var, _flags)\
+   static struct ddebug_class_param _name##_##_flags = {   \
+   .bits = &(_bits),   \
+   .flags = #_flags,   \
+   .map = &(_var), \
+   };  \
+   module_param_cb(_name, ¶m_ops_dyndbg_classes,   \
+

[PATCH v7d 14/23] dyndbg-API: fix CONFIG_DRM_USE_DYNAMIC_DEBUG regression

2023-10-31 Thread Jim Cromie

DECLARE_DYNDBG_CLASSMAP() has a design error; its usage fails a basic
K&R rule: "define once, refer many times".

When DRM_USE_DYNAMIC_DEBUG=y, DECLARE_DYNDBG_CLASSMAP() is used across
DRM core & drivers; they all repeat the same classmap-defn args, which
must match for the modules to respond together when DRM.debug
categories are enabled.

Worse, it causes the CONFIG_DRM_USE_DYNAMIC_DEBUG=Y regression; 1st
drm.ko loads, and dyndbg initializes its DRM.debug callsites, then a
drm-driver loads, but too late - it missed the DRM.debug enablement.

So replace it with 2 macros:
  DYNDBG_CLASSMAP_DEFINE - invoked once from core - drm.ko
  DYNDBG_CLASSMAP_USE- from all drm drivers and helpers.

DYNDBG_CLASSMAP_DEFINE: based on DECLARE_DYNDBG_CLASSMAP, but now it
drops the static on the constructed classmap variable, and exports it
instead.

DYNDBG_CLASSMAP_USE: then refers to the exported var by name:
* used from drivers, helper-mods
* lets us drop the repetitive "classname" args
* fixes 2nd-defn problem
* creates a ddebug_class_user record in new __dyndbg_class_users section
  this allows ddebug_add_module(etal) to handle them per-module.

The distinction, and the usage record, allows dyndbg to initialize the
driver's DRM.debug callsites separately after it is modprobed.

Since DRM now needs updates to use the new macros, it also gets 2
wrappers: DRM_CLASSMAP_DEFINE, DRM_CLASSMAP_USE which declutter the
users by hiding the ifdef CONFIG_DRM_USE_DYNAMIC_DEBUG.

1st, dyndbg's existing __dyndbg_classes[] section does:

. catalogs the classmaps defined by the module (or builtin modules)
. authorizes dyndbg to >control those class'd prdbgs for the module.
. DYNDBG_CLASSMAP_DEFINE(and old one) creates classmaps in this section.

This patch adds __dyndbg_class_users[] section:

. catalogs uses/references to the classmap definitions.
. authorizes dyndbg to >control those class'd prdbgs in ref'g module.
. DYNDBG_CLASSMAP_USE() creates classmap-user records in this section.

Now ddebug_add_module(etal) can handle classmap-uses similar to (and
after) classmaps; when a dependent module is loaded, its parent's
kernel params are scanned to find if a param is wired to dyndbg's
param-ops, whose classmap ref matches the one ref'd by the client.

To support this, theres a few data/header changes:

. new struct ddebug_class_user
  contains: user-module-name, &classmap-defn
  it records drm-driver's use of a classmap in the section, allowing lookup

struct ddebug_info gets 2 new fields to encapsulate the new section:
  class_users, num_class_users.
  set by dynamic_debug_init() for builtins.
  or by kernel/module/main:load_info() for loadable modules.

vmlinux.lds.h: new BOUNDED_SECTION for __dyndbg_class_users

dynamic_debug.c has 2 changes in ddebug_add_module(), ddebug_change():

ddebug_add_module() already calls ddebug_attach_module_classes()
to handle classmaps DEFINEd by a module, now it also calls
ddebug_attach_user_module_classes() to handle USEd classmaps.  To
avoid this work when possible, 1st scan the module's descriptors and
count the number of class'd pr_debugs.

ddebug_attach_user_module_classes() scans the module's class_users
section, follows the refs to the parent's classmap, and calls
ddebug_apply_params() on each.  It also avoids work by checking the
module's class-ct.

ddebug_apply_params(new fn):

It scans module's/builtin kernel-params, calls ddebug_match_apply_kparam
for each to find the params/sysfs-nodes which may be wired to a classmap.

ddebug_match_apply_kparam(new fn):

1st, it tests the kernel-param.ops is dyndbg's; this guarantees that
the attached arg is a struct ddebug_class_param, which has a ref to
the param's state, and to the classmap defining the param's handling.

2nd, it requires that the classmap ref'd by the kparam is the one
we're called for; modules can use many separate classmaps (as
test_dynamic_debug does).

Then apply the "parent" kparam's setting to the dependent module,
using ddebug_apply_class_bitmap().

ddebug_change(and callees) also gets adjustments:

ddebug_find_valid_class(): This does a search over the module's
classmaps, looking for the class FOO echo'd to >control.  So now it
searches over __dyndbg_class_users[] after __dyndbg_classes[].

ddebug_class_name(): return class-names for defined AND used classes.

test_dynamic_debug.c, test_dynamic_debug_submod.c:

This (already) demonstrates the 2 types of classmaps & sysfs-params,
following the 4-part recipe:

1. define an enum for the classmap: DRM.debug has DRM_UT_{CORE,KMS,...}
   multiple classes must share 0-62 classid space.
2. DYNDBG_CLASSMAP_DEFINE(.. DRM_UT_{CORE,KMS,...})
3. DYNDBG_CLASSMAP_PARAM* (classmap)
4. DYNDBG_CLASSMAP_USE()
   by _submod only, skipping 2,3

Move all the enum declarations together, to better explain how they
share the 0..62 class-id space available to a module (non-overlapping
subranges).

reorg macros 2,3 by name.  This gives a tabular format, making it easy
to see the pattern of repetition

[PATCH v7d 11/23] dyndbg: tighten fn-sig of ddebug_apply_class_bitmap

2023-10-31 Thread Jim Cromie

old_bits arg is currently a pointer to the input bits, but this could
allow inadvertent changes to the input by the fn.  Disallow this.
And constify new_bits while here.

Signed-off-by: Jim Cromie 
---
 lib/dynamic_debug.c | 21 +++--
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 8158943b350d..8beb98a831f5 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -593,7 +593,8 @@ static int ddebug_exec_queries(char *query, const char 
*modname)
 
 /* apply a new class-param setting */
 static int ddebug_apply_class_bitmap(const struct ddebug_class_param *dcp,
-unsigned long *new_bits, unsigned long 
*old_bits,
+const unsigned long *new_bits,
+const unsigned long old_bits,
 const char *query_modname)
 {
 #define QUERY_SIZE 128
@@ -602,12 +603,12 @@ static int ddebug_apply_class_bitmap(const struct 
ddebug_class_param *dcp,
int matches = 0;
int bi, ct;
 
-   if (*new_bits != *old_bits)
+   if (*new_bits != old_bits)
v2pr_info("apply bitmap: 0x%lx to: 0x%lx for %s\n", *new_bits,
- *old_bits, query_modname ?: "'*'");
+ old_bits, query_modname ?: "'*'");
 
for (bi = 0; bi < map->length; bi++) {
-   if (test_bit(bi, new_bits) == test_bit(bi, old_bits))
+   if (test_bit(bi, new_bits) == test_bit(bi, &old_bits))
continue;
 
snprintf(query, QUERY_SIZE, "class %s %c%s", 
map->class_names[bi],
@@ -619,9 +620,9 @@ static int ddebug_apply_class_bitmap(const struct 
ddebug_class_param *dcp,
v2pr_info("bit_%d: %d matches on class: %s -> 0x%lx\n", bi,
  ct, map->class_names[bi], *new_bits);
}
-   if (*new_bits != *old_bits)
+   if (*new_bits != old_bits)
v2pr_info("applied bitmap: 0x%lx to: 0x%lx for %s\n", *new_bits,
- *old_bits, query_modname ?: "'*'");
+ old_bits, query_modname ?: "'*'");
 
return matches;
 }
@@ -678,7 +679,7 @@ static int param_set_dyndbg_classnames(const char *instr, 
const struct kernel_pa
continue;
}
curr_bits ^= BIT(cls_id);
-   totct += ddebug_apply_class_bitmap(dcp, &curr_bits, 
dcp->bits, NULL);
+   totct += ddebug_apply_class_bitmap(dcp, &curr_bits, 
*dcp->bits, NULL);
*dcp->bits = curr_bits;
v2pr_info("%s: changed bit %d:%s\n", KP_NAME(kp), 
cls_id,
  map->class_names[cls_id]);
@@ -688,7 +689,7 @@ static int param_set_dyndbg_classnames(const char *instr, 
const struct kernel_pa
old_bits = CLASSMAP_BITMASK(*dcp->lvl);
curr_bits = CLASSMAP_BITMASK(cls_id + (wanted ? 1 : 0 
));
 
-   totct += ddebug_apply_class_bitmap(dcp, &curr_bits, 
&old_bits, NULL);
+   totct += ddebug_apply_class_bitmap(dcp, &curr_bits, 
old_bits, NULL);
*dcp->lvl = (cls_id + (wanted ? 1 : 0));
v2pr_info("%s: changed bit-%d: \"%s\" %lx->%lx\n", 
KP_NAME(kp), cls_id,
  map->class_names[cls_id], old_bits, 
curr_bits);
@@ -742,7 +743,7 @@ static int param_set_dyndbg_module_classes(const char 
*instr,
inrep &= CLASSMAP_BITMASK(map->length);
}
v2pr_info("bits:0x%lx > %s.%s\n", inrep, modnm ?: "*", 
KP_NAME(kp));
-   totct += ddebug_apply_class_bitmap(dcp, &inrep, dcp->bits, 
modnm);
+   totct += ddebug_apply_class_bitmap(dcp, &inrep, *dcp->bits, 
modnm);
*dcp->bits = inrep;
break;
case DD_CLASS_TYPE_LEVEL_NUM:
@@ -755,7 +756,7 @@ static int param_set_dyndbg_module_classes(const char 
*instr,
old_bits = CLASSMAP_BITMASK(*dcp->lvl);
new_bits = CLASSMAP_BITMASK(inrep);
v2pr_info("lvl:%ld bits:0x%lx > %s\n", inrep, new_bits, 
KP_NAME(kp));
-   totct += ddebug_apply_class_bitmap(dcp, &new_bits, &old_bits, 
modnm);
+   totct += ddebug_apply_class_bitmap(dcp, &new_bits, old_bits, 
modnm);
*dcp->lvl = inrep;
break;
default:
-- 
2.41.0

[PATCH v7d 12/23] dyndbg: reduce verbose=3 messages in ddebug_add_module

2023-10-31 Thread Jim Cromie

The fn currently says "add-module", then "skipping" if the module has
no prdbgs.  Just check 1st and return quietly.

no functional change

Signed-off-by: Jim Cromie 
---
 lib/dynamic_debug.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 8beb98a831f5..45870a699507 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -1242,11 +1242,10 @@ static int ddebug_add_module(struct _ddebug_info *di, 
const char *modname)
 {
struct ddebug_table *dt;
 
-   v3pr_info("add-module: %s.%d sites\n", modname, di->num_descs);
-   if (!di->num_descs) {
-   v3pr_info(" skip %s\n", modname);
+   if (!di->num_descs)
return 0;
-   }
+
+   v3pr_info("add-module: %s %d sites\n", modname, di->num_descs);
 
dt = kzalloc(sizeof(*dt), GFP_KERNEL);
if (dt == NULL) {
-- 
2.41.0

[PATCH v7d 13/23] dyndbg-API: remove DD_CLASS_TYPE_(DISJOINT|LEVEL)_NAMES and code

2023-10-31 Thread Jim Cromie

Remove the NAMED class types; these 2 classmap types accept class
names at the PARAM interface, for example:

  echo +DRM_UT_CORE,-DRM_UT_KMS > /sys/module/drm/parameters/debug_names

The code works, but its only used by test-dynamic-debug, and wasn't
asked for by anyone else, so simplify things for now.

Signed-off-by: Jim Cromie 
---
 include/linux/dynamic_debug.h |  19 ++-
 lib/dynamic_debug.c   | 103 +++---
 lib/test_dynamic_debug.c  |  26 -
 3 files changed, 12 insertions(+), 136 deletions(-)

diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index 8116d0a0d33a..8eaf8eabdc8d 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -61,24 +61,13 @@ struct _ddebug {
 enum class_map_type {
DD_CLASS_TYPE_DISJOINT_BITS,
/**
-* DD_CLASS_TYPE_DISJOINT_BITS: classes are independent, one per bit.
-* expecting hex input. Built for drm.debug, basis for other types.
+* DD_CLASS_TYPE_DISJOINT_BITS: classes are independent, mapped to 
bits[0..N].
+* Expects hex input. Built for drm.debug, basis for other types.
 */
DD_CLASS_TYPE_LEVEL_NUM,
/**
-* DD_CLASS_TYPE_LEVEL_NUM: input is numeric level, 0-N.
-* N turns on just bits N-1 .. 0, so N=0 turns all bits off.
-*/
-   DD_CLASS_TYPE_DISJOINT_NAMES,
-   /**
-* DD_CLASS_TYPE_DISJOINT_NAMES: input is a CSV of [+-]CLASS_NAMES,
-* classes are independent, like _DISJOINT_BITS.
-*/
-   DD_CLASS_TYPE_LEVEL_NAMES,
-   /**
-* DD_CLASS_TYPE_LEVEL_NAMES: input is a CSV of [+-]CLASS_NAMES,
-* intended for names like: INFO,DEBUG,TRACE, with a module prefix
-* avoid EMERG,ALERT,CRIT,ERR,WARNING: they're not debug
+* DD_CLASS_TYPE_LEVEL_NUM: input is numeric level, 0..N.
+* Input N turns on bits 0..N-1
 */
 };
 
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 45870a699507..91c8b67fd8f8 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -632,77 +632,6 @@ static int ddebug_apply_class_bitmap(const struct 
ddebug_class_param *dcp,
 
 #define CLASSMAP_BITMASK(width) ((1UL << (width)) - 1)
 
-/* accept comma-separated-list of [+-] classnames */
-static int param_set_dyndbg_classnames(const char *instr, const struct 
kernel_param *kp)
-{
-   const struct ddebug_class_param *dcp = kp->arg;
-   const struct ddebug_class_map *map = dcp->map;
-   unsigned long curr_bits, old_bits;
-   char *cl_str, *p, *tmp;
-   int cls_id, totct = 0;
-   bool wanted;
-
-   cl_str = tmp = kstrdup(instr, GFP_KERNEL);
-   p = strchr(cl_str, '\n');
-   if (p)
-   *p = '\0';
-
-   /* start with previously set state-bits, then modify */
-   curr_bits = old_bits = *dcp->bits;
-   vpr_info("\"%s\" > %s:0x%lx\n", cl_str, KP_NAME(kp), curr_bits);
-
-   for (; cl_str; cl_str = p) {
-   p = strchr(cl_str, ',');
-   if (p)
-   *p++ = '\0';
-
-   if (*cl_str == '-') {
-   wanted = false;
-   cl_str++;
-   } else {
-   wanted = true;
-   if (*cl_str == '+')
-   cl_str++;
-   }
-   cls_id = match_string(map->class_names, map->length, cl_str);
-   if (cls_id < 0) {
-   pr_err("%s unknown to %s\n", cl_str, KP_NAME(kp));
-   continue;
-   }
-
-   /* have one or more valid class_ids of one *_NAMES type */
-   switch (map->map_type) {
-   case DD_CLASS_TYPE_DISJOINT_NAMES:
-   /* the +/- pertains to a single bit */
-   if (test_bit(cls_id, &curr_bits) == wanted) {
-   v3pr_info("no change on %s\n", cl_str);
-   continue;
-   }
-   curr_bits ^= BIT(cls_id);
-   totct += ddebug_apply_class_bitmap(dcp, &curr_bits, 
*dcp->bits, NULL);
-   *dcp->bits = curr_bits;
-   v2pr_info("%s: changed bit %d:%s\n", KP_NAME(kp), 
cls_id,
- map->class_names[cls_id]);
-   break;
-   case DD_CLASS_TYPE_LEVEL_NAMES:
-   /* cls_id = N in 0..max. wanted +/- determines N or N-1 
*/
-   old_bits = CLASSMAP_BITMASK(*dcp->lvl);
-   curr_bits = CLASSMAP_BITMASK(cls_id + (wanted ? 1 : 0 
));
-
-   totct += ddebug_apply_class_bitmap(dcp, &curr_bits, 
old_bits, NULL);
-   *dcp->lvl = (cls_id + (wanted ? 1 : 0));
-   v2pr_info("%s: changed bit-%d: \"%s\" %lx->%lx\n", 
KP_NAME(kp), cls_id,
- map->

[PATCH v7d 10/23] dyndbg: tighten ddebug_class_name() 1st arg type

2023-10-31 Thread Jim Cromie

Change function's 1st arg-type, and deref in the caller.
The fn doesn't need any other fields in the struct.

no functional change.

Signed-off-by: Jim Cromie 
---
 lib/dynamic_debug.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index b07aab422604..8158943b350d 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -1117,12 +1117,12 @@ static void *ddebug_proc_next(struct seq_file *m, void 
*p, loff_t *pos)
 #define class_in_range(class_id, map)  \
(class_id >= map->base && class_id < map->base + map->length)
 
-static const char *ddebug_class_name(struct ddebug_iter *iter, struct _ddebug 
*dp)
+static const char *ddebug_class_name(struct ddebug_table *dt, struct _ddebug 
*dp)
 {
-   struct ddebug_class_map *map = iter->table->classes;
-   int i, nc = iter->table->num_classes;
+   struct ddebug_class_map *map = dt->classes;
+   int i;
 
-   for (i = 0; i < nc; i++, map++)
+   for (i = 0; i < dt->num_classes; i++, map++)
if (class_in_range(dp->class_id, map))
return map->class_names[dp->class_id - map->base];
 
@@ -1156,7 +1156,7 @@ static int ddebug_proc_show(struct seq_file *m, void *p)
seq_puts(m, "\"");
 
if (dp->class_id != _DPRINTK_CLASS_DFLT) {
-   class = ddebug_class_name(iter, dp);
+   class = ddebug_class_name(iter->table, dp);
if (class)
seq_printf(m, " class:%s", class);
else
-- 
2.41.0

[PATCH v7d 09/23] dyndbg: silence debugs with no-change updates

2023-10-31 Thread Jim Cromie

check for actual changes before announcing them, declutter logs.

Signed-off-by: Jim Cromie 
---
 lib/dynamic_debug.c | 12 +++-
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index b0e11f6bfaa2..b07aab422604 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -591,7 +591,7 @@ static int ddebug_exec_queries(char *query, const char 
*modname)
return nfound;
 }
 
-/* apply a new bitmap to the sys-knob's current bit-state */
+/* apply a new class-param setting */
 static int ddebug_apply_class_bitmap(const struct ddebug_class_param *dcp,
 unsigned long *new_bits, unsigned long 
*old_bits,
 const char *query_modname)
@@ -602,8 +602,9 @@ static int ddebug_apply_class_bitmap(const struct 
ddebug_class_param *dcp,
int matches = 0;
int bi, ct;
 
-   v2pr_info("apply bitmap: 0x%lx to: 0x%lx for %s\n", *new_bits, 
*old_bits,
- query_modname ?: "");
+   if (*new_bits != *old_bits)
+   v2pr_info("apply bitmap: 0x%lx to: 0x%lx for %s\n", *new_bits,
+ *old_bits, query_modname ?: "'*'");
 
for (bi = 0; bi < map->length; bi++) {
if (test_bit(bi, new_bits) == test_bit(bi, old_bits))
@@ -618,8 +619,9 @@ static int ddebug_apply_class_bitmap(const struct 
ddebug_class_param *dcp,
v2pr_info("bit_%d: %d matches on class: %s -> 0x%lx\n", bi,
  ct, map->class_names[bi], *new_bits);
}
-   v2pr_info("applied bitmap: 0x%lx to: 0x%lx for %s\n", *new_bits, 
*old_bits,
- query_modname ?: "");
+   if (*new_bits != *old_bits)
+   v2pr_info("applied bitmap: 0x%lx to: 0x%lx for %s\n", *new_bits,
+ *old_bits, query_modname ?: "'*'");
 
return matches;
 }
-- 
2.41.0

[PATCH v7d 08/23] dyndbg: reduce verbose/debug clutter

2023-10-31 Thread Jim Cromie

currently, for verbose=3, these are logged (blank lines for clarity):

 dyndbg: query 0: "class DRM_UT_CORE +p" mod:*
 dyndbg: split into words: "class" "DRM_UT_CORE" "+p"

 dyndbg: op='+'
 dyndbg: flags=0x1
 dyndbg: *flagsp=0x1 *maskp=0x

 dyndbg: parsed: func="" file="" module="" format="" lineno=0-0 class=...
 dyndbg: no matches for query
 dyndbg: no-match: func="" file="" module="" format="" lineno=0-0 class=...
 dyndbg: processed 1 queries, with 0 matches, 0 errs

That is excessive, so this patch:
 - shrinks 3 lines of 2nd stanza to single line
 - drops 1st 2 lines of 3rd stanza
   3rd is like 1st, with result, not procedure.
   2nd is just status, retold in 4th, with more info.

Signed-off-by: Jim Cromie 
---
 lib/dynamic_debug.c | 14 +++---
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index b67c9b137447..b0e11f6bfaa2 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -266,9 +266,6 @@ static int ddebug_change(const struct ddebug_query *query,
}
mutex_unlock(&ddebug_lock);
 
-   if (!nfound && verbose)
-   pr_info("no matches for query\n");
-
return nfound;
 }
 
@@ -497,7 +494,6 @@ static int ddebug_parse_flags(const char *str, struct 
flag_settings *modifiers)
pr_err("bad flag-op %c, at start of %s\n", *str, str);
return -EINVAL;
}
-   v3pr_info("op='%c'\n", op);
 
for (; *str ; ++str) {
for (i = ARRAY_SIZE(opt_array) - 1; i >= 0; i--) {
@@ -511,7 +507,6 @@ static int ddebug_parse_flags(const char *str, struct 
flag_settings *modifiers)
return -EINVAL;
}
}
-   v3pr_info("flags=0x%x\n", modifiers->flags);
 
/* calculate final flags, mask based upon op */
switch (op) {
@@ -527,7 +522,7 @@ static int ddebug_parse_flags(const char *str, struct 
flag_settings *modifiers)
modifiers->flags = 0;
break;
}
-   v3pr_info("*flagsp=0x%x *maskp=0x%x\n", modifiers->flags, 
modifiers->mask);
+   v3pr_info("op='%c' flags=0x%x maskp=0x%x\n", op, modifiers->flags, 
modifiers->mask);
 
return 0;
 }
@@ -537,7 +532,7 @@ static int ddebug_exec_query(char *query_string, const char 
*modname)
struct flag_settings modifiers = {};
struct ddebug_query query = {};
 #define MAXWORDS 9
-   int nwords, nfound;
+   int nwords;
char *words[MAXWORDS];
 
nwords = ddebug_tokenize(query_string, words, MAXWORDS);
@@ -555,10 +550,7 @@ static int ddebug_exec_query(char *query_string, const 
char *modname)
return -EINVAL;
}
/* actually go and implement the change */
-   nfound = ddebug_change(&query, &modifiers);
-   vpr_info_dq(&query, nfound ? "applied" : "no-match");
-
-   return nfound;
+   return ddebug_change(&query, &modifiers);
 }
 
 /* handle multiple queries in query string, continue on error, return
-- 
2.41.0

[PATCH v7d 06/23] dyndbg: split param_set_dyndbg_classes to module/wrapper fns

2023-10-31 Thread Jim Cromie

rename param_set_dyndbg_classes: add _module_ name & arg, old name is
wrapper to new.  New arg allows caller to specify that only one module
is affected by a prdbgs update.

Outer fn preserves kernel_param interface, passing NULL to inner fn.
This selectivity will be used later to narrow the scope of changes
made.

no functional change.

Signed-off-by: Jim Cromie 
---
 lib/dynamic_debug.c | 37 ++---
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index ba41fdeaaf98..b67c9b137447 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -708,18 +708,9 @@ static int param_set_dyndbg_classnames(const char *instr, 
const struct kernel_pa
return 0;
 }
 
-/**
- * param_set_dyndbg_classes - class FOO >control
- * @instr: string echo>d to sysfs, input depends on map_type
- * @kp:kp->arg has state: bits/lvl, map, map_type
- *
- * Enable/disable prdbgs by their class, as given in the arguments to
- * DECLARE_DYNDBG_CLASSMAP.  For LEVEL map-types, enforce relative
- * levels by bitpos.
- *
- * Returns: 0 or <0 if error.
- */
-int param_set_dyndbg_classes(const char *instr, const struct kernel_param *kp)
+static int param_set_dyndbg_module_classes(const char *instr,
+  const struct kernel_param *kp,
+  const char *modnm)
 {
const struct ddebug_class_param *dcp = kp->arg;
const struct ddebug_class_map *map = dcp->map;
@@ -756,8 +747,8 @@ int param_set_dyndbg_classes(const char *instr, const 
struct kernel_param *kp)
KP_NAME(kp), inrep, 
CLASSMAP_BITMASK(map->length));
inrep &= CLASSMAP_BITMASK(map->length);
}
-   v2pr_info("bits:%lx > %s\n", inrep, KP_NAME(kp));
-   totct += ddebug_apply_class_bitmap(dcp, &inrep, dcp->bits, 
NULL);
+   v2pr_info("bits:0x%lx > %s.%s\n", inrep, modnm ?: "*", 
KP_NAME(kp));
+   totct += ddebug_apply_class_bitmap(dcp, &inrep, dcp->bits, 
modnm);
*dcp->bits = inrep;
break;
case DD_CLASS_TYPE_LEVEL_NUM:
@@ -770,7 +761,7 @@ int param_set_dyndbg_classes(const char *instr, const 
struct kernel_param *kp)
old_bits = CLASSMAP_BITMASK(*dcp->lvl);
new_bits = CLASSMAP_BITMASK(inrep);
v2pr_info("lvl:%ld bits:0x%lx > %s\n", inrep, new_bits, 
KP_NAME(kp));
-   totct += ddebug_apply_class_bitmap(dcp, &new_bits, &old_bits, 
NULL);
+   totct += ddebug_apply_class_bitmap(dcp, &new_bits, &old_bits, 
modnm);
*dcp->lvl = inrep;
break;
default:
@@ -779,6 +770,22 @@ int param_set_dyndbg_classes(const char *instr, const 
struct kernel_param *kp)
vpr_info("%s: total matches: %d\n", KP_NAME(kp), totct);
return 0;
 }
+
+/**
+ * param_set_dyndbg_classes - class FOO >control
+ * @instr: string echo>d to sysfs, input depends on map_type
+ * @kp:kp->arg has state: bits/lvl, map, map_type
+ *
+ * Enable/disable prdbgs by their class, as given in the arguments to
+ * DECLARE_DYNDBG_CLASSMAP.  For LEVEL map-types, enforce relative
+ * levels by bitpos.
+ *
+ * Returns: 0 or <0 if error.
+ */
+int param_set_dyndbg_classes(const char *instr, const struct kernel_param *kp)
+{
+   return param_set_dyndbg_module_classes(instr, kp, NULL);
+}
 EXPORT_SYMBOL(param_set_dyndbg_classes);
 
 /**
-- 
2.41.0

[PATCH v7d 07/23] dyndbg: drop NUM_TYPE_ARRAY

2023-10-31 Thread Jim Cromie

ARRAY_SIZE works here, since array decl is complete.

no functional change

Signed-off-by: Jim Cromie 
---
 include/linux/dynamic_debug.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index b53217e4b711..8116d0a0d33a 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -106,11 +106,9 @@ struct ddebug_class_map {
.mod_name = KBUILD_MODNAME, \
.base = _base,  \
.map_type = _maptype,   \
-   .length = NUM_TYPE_ARGS(char*, __VA_ARGS__),\
+   .length = ARRAY_SIZE(_var##_classnames),\
.class_names = _var##_classnames,   \
}
-#define NUM_TYPE_ARGS(eltype, ...) \
-(sizeof((eltype[]){__VA_ARGS__}) / sizeof(eltype))
 
 /* encapsulate linker provided built-in (or module) dyndbg data */
 struct _ddebug_info {
-- 
2.41.0

[PATCH v7d 05/23] dyndbg: ddebug_apply_class_bitmap - add module arg, select on it

2023-10-31 Thread Jim Cromie

Add query_module param to ddebug_apply_class_bitmap().  This allows
its caller to update just one module, or all (as currently).  We'll
use this later to propagate drm.debug to each USEr as they're
modprobed.

No functional change.

Signed-off-by: Jim Cromie 
---

after `modprobe i915`, heres the module dependencies,
though not all on drm.debug.

bash-5.2# lsmod
Module  Size  Used by
i915 3133440  0
drm_buddy  20480  1 i915
ttm90112  1 i915
i2c_algo_bit   16384  1 i915
video  61440  1 i915
wmi32768  1 video
drm_display_helper200704  1 i915
drm_kms_helper208896  2 drm_display_helper,i915
drm   606208  5 
drm_kms_helper,drm_display_helper,drm_buddy,i915,ttm
cec57344  2 drm_display_helper,i915
---
 lib/dynamic_debug.c | 19 ---
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index a3be2e7c8c84..ba41fdeaaf98 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -601,7 +601,8 @@ static int ddebug_exec_queries(char *query, const char 
*modname)
 
 /* apply a new bitmap to the sys-knob's current bit-state */
 static int ddebug_apply_class_bitmap(const struct ddebug_class_param *dcp,
-unsigned long *new_bits, unsigned long 
*old_bits)
+unsigned long *new_bits, unsigned long 
*old_bits,
+const char *query_modname)
 {
 #define QUERY_SIZE 128
char query[QUERY_SIZE];
@@ -609,7 +610,8 @@ static int ddebug_apply_class_bitmap(const struct 
ddebug_class_param *dcp,
int matches = 0;
int bi, ct;
 
-   v2pr_info("apply: 0x%lx to: 0x%lx\n", *new_bits, *old_bits);
+   v2pr_info("apply bitmap: 0x%lx to: 0x%lx for %s\n", *new_bits, 
*old_bits,
+ query_modname ?: "");
 
for (bi = 0; bi < map->length; bi++) {
if (test_bit(bi, new_bits) == test_bit(bi, old_bits))
@@ -618,12 +620,15 @@ static int ddebug_apply_class_bitmap(const struct 
ddebug_class_param *dcp,
snprintf(query, QUERY_SIZE, "class %s %c%s", 
map->class_names[bi],
 test_bit(bi, new_bits) ? '+' : '-', dcp->flags);
 
-   ct = ddebug_exec_queries(query, NULL);
+   ct = ddebug_exec_queries(query, query_modname);
matches += ct;
 
v2pr_info("bit_%d: %d matches on class: %s -> 0x%lx\n", bi,
  ct, map->class_names[bi], *new_bits);
}
+   v2pr_info("applied bitmap: 0x%lx to: 0x%lx for %s\n", *new_bits, 
*old_bits,
+ query_modname ?: "");
+
return matches;
 }
 
@@ -679,7 +684,7 @@ static int param_set_dyndbg_classnames(const char *instr, 
const struct kernel_pa
continue;
}
curr_bits ^= BIT(cls_id);
-   totct += ddebug_apply_class_bitmap(dcp, &curr_bits, 
dcp->bits);
+   totct += ddebug_apply_class_bitmap(dcp, &curr_bits, 
dcp->bits, NULL);
*dcp->bits = curr_bits;
v2pr_info("%s: changed bit %d:%s\n", KP_NAME(kp), 
cls_id,
  map->class_names[cls_id]);
@@ -689,7 +694,7 @@ static int param_set_dyndbg_classnames(const char *instr, 
const struct kernel_pa
old_bits = CLASSMAP_BITMASK(*dcp->lvl);
curr_bits = CLASSMAP_BITMASK(cls_id + (wanted ? 1 : 0 
));
 
-   totct += ddebug_apply_class_bitmap(dcp, &curr_bits, 
&old_bits);
+   totct += ddebug_apply_class_bitmap(dcp, &curr_bits, 
&old_bits, NULL);
*dcp->lvl = (cls_id + (wanted ? 1 : 0));
v2pr_info("%s: changed bit-%d: \"%s\" %lx->%lx\n", 
KP_NAME(kp), cls_id,
  map->class_names[cls_id], old_bits, 
curr_bits);
@@ -752,7 +757,7 @@ int param_set_dyndbg_classes(const char *instr, const 
struct kernel_param *kp)
inrep &= CLASSMAP_BITMASK(map->length);
}
v2pr_info("bits:%lx > %s\n", inrep, KP_NAME(kp));
-   totct += ddebug_apply_class_bitmap(dcp, &inrep, dcp->bits);
+   totct += ddebug_apply_class_bitmap(dcp, &inrep, dcp->bits, 
NULL);
*dcp->bits = inrep;
break;
case DD_CLASS_TYPE_LEVEL_NUM:
@@ -765,7 +770,7 @@ int param_set_dyndbg_classes(const char *instr, const 
struct kernel_param *kp)
old_bits = CLASSMAP_BITMASK(*dcp->lvl);
new_bits = CLASSMAP_BITMASK(inrep);
v2pr_info("lvl:%ld bits:0x%lx > %s\n", inrep, new_bits, 
KP_NAME(kp));
-   totct += ddebug_apply_class_bitmap(dcp, &new_bits, &old_bits);
+   totct += ddebug_apply_class_bi

[PATCH v7d 04/23] dyndbg: replace classmap list with a vector

2023-10-31 Thread Jim Cromie

Classmaps are stored/linked in a section/array, but are each added to
the module's ddebug_table.maps list-head.

This is unnecessary; even when ddebug_attach_classmap() is handling
the builtin section (with classmaps for multiple builtin modules), its
contents are ordered, so a module's possibly multiple classmaps will
be consecutive in the section, and could be treated as a vector/block,
since both start-addy and subrange length are in the ddebug_info arg.

So this changes:

struct ddebug_class_map drops list-head link.

struct ddebug_table drops the list-head maps, and gets: classes &
num_classes for the start-addy and num_classes, placed to improve
struct packing.

The loading: in ddebug_attach_module_classes(), replace the
for-the-modname list-add loop, with a forloop that finds the module's
subrange (start,length) of matching classmaps within the possibly
builtin classmaps vector, and saves those to the ddebug_table.

The reading/using: change list-foreach loops in ddebug_class_name() &
ddebug_find_valid_class() to walk the array from start to length.

Also:
Move #define __outvar up, above an added use in a fn-prototype.
Simplify ddebug_attach_module_classes args, ref has both addy,len.

no functional changes

Signed-off-by: Jim Cromie 
---
 include/linux/dynamic_debug.h |  1 -
 lib/dynamic_debug.c   | 61 ++-
 2 files changed, 32 insertions(+), 30 deletions(-)

diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index 5231aaf361c4..b53217e4b711 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -83,7 +83,6 @@ enum class_map_type {
 };
 
 struct ddebug_class_map {
-   struct list_head link;
struct module *mod;
const char *mod_name;   /* needed for builtins */
const char **class_names;
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index b984ce338921..a3be2e7c8c84 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -45,10 +45,11 @@ extern struct ddebug_class_map __start___dyndbg_classes[];
 extern struct ddebug_class_map __stop___dyndbg_classes[];
 
 struct ddebug_table {
-   struct list_head link, maps;
+   struct list_head link;
const char *mod_name;
-   unsigned int num_ddebugs;
struct _ddebug *ddebugs;
+   struct ddebug_class_map *classes;
+   unsigned int num_ddebugs, num_classes;
 };
 
 struct ddebug_query {
@@ -147,13 +148,15 @@ static void vpr_info_dq(const struct ddebug_query *query, 
const char *msg)
  query->first_lineno, query->last_lineno, query->class_string);
 }
 
+#define __outvar /* filled by callee */
 static struct ddebug_class_map *ddebug_find_valid_class(struct ddebug_table 
const *dt,
- const char 
*class_string, int *class_id)
+   const char 
*class_string,
+   __outvar int *class_id)
 {
struct ddebug_class_map *map;
-   int idx;
+   int i, idx;
 
-   list_for_each_entry(map, &dt->maps, link) {
+   for (map = dt->classes, i = 0; i < dt->num_classes; i++, map++) {
idx = match_string(map->class_names, map->length, class_string);
if (idx >= 0) {
*class_id = idx + map->base;
@@ -164,7 +167,6 @@ static struct ddebug_class_map 
*ddebug_find_valid_class(struct ddebug_table cons
return NULL;
 }
 
-#define __outvar /* filled by callee */
 /*
  * Search the tables for _ddebug's which match the given `query' and
  * apply the `flags' and `mask' to them.  Returns number of matching
@@ -,9 +1113,10 @@ static void *ddebug_proc_next(struct seq_file *m, void 
*p, loff_t *pos)
 
 static const char *ddebug_class_name(struct ddebug_iter *iter, struct _ddebug 
*dp)
 {
-   struct ddebug_class_map *map;
+   struct ddebug_class_map *map = iter->table->classes;
+   int i, nc = iter->table->num_classes;
 
-   list_for_each_entry(map, &iter->table->maps, link)
+   for (i = 0; i < nc; i++, map++)
if (class_in_range(dp->class_id, map))
return map->class_names[dp->class_id - map->base];
 
@@ -1197,30 +1200,31 @@ static const struct proc_ops proc_fops = {
.proc_write = ddebug_proc_write
 };
 
-static void ddebug_attach_module_classes(struct ddebug_table *dt,
-struct ddebug_class_map *classes,
-int num_classes)
+static void ddebug_attach_module_classes(struct ddebug_table *dt, struct 
_ddebug_info *di)
 {
struct ddebug_class_map *cm;
-   int i, j, ct = 0;
+   int i, nc = 0;
 
-   for (cm = classes, i = 0; i < num_classes; i++, cm++) {
+   /*
+* Find this module's classmaps in a subrange/wholerange of
+* the builtin/modular classmap vector/section.  Save the start
+* and length of the

[PATCH v7d 03/23] dyndbg: make ddebug_class_param union members same size

2023-10-31 Thread Jim Cromie

struct ddebug_class_param keeps a ref to the state-storage of the
param, make both flavors use the same unsigned long under-type.
ISTM this is simpler and safer.

Signed-off-by: Jim Cromie 
---
 include/linux/dynamic_debug.h | 2 +-
 lib/dynamic_debug.c   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index 4fcbf4d4fd0a..5231aaf361c4 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -124,7 +124,7 @@ struct _ddebug_info {
 struct ddebug_class_param {
union {
unsigned long *bits;
-   unsigned int *lvl;
+   unsigned long *lvl;
};
char flags[8];
const struct ddebug_class_map *map;
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index ceb3067a5c83..b984ce338921 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -796,7 +796,7 @@ int param_get_dyndbg_classes(char *buffer, const struct 
kernel_param *kp)
 
case DD_CLASS_TYPE_LEVEL_NAMES:
case DD_CLASS_TYPE_LEVEL_NUM:
-   return scnprintf(buffer, PAGE_SIZE, "%d\n", *dcp->lvl);
+   return scnprintf(buffer, PAGE_SIZE, "%ld\n", *dcp->lvl);
default:
return -1;
}
-- 
2.41.0

[PATCH v7d 02/23] dyndbg: reword "class unknown, " to "class:_UNKNOWN_"

2023-10-31 Thread Jim Cromie

This appears in the control-file to report an unknown class-name, which
indicates that the class_id is not authorized, and dyndbg will ignore
changes to it.  Generally, this means that a DYNDBG_CLASSMAP_DEFINE or
DYNDBG_CLASSMAP_USE is missing.

But the word "unknown" appears in quite a few prdbg formats, so thats
a suboptimal search term to find occurrences of the problem.  Thus
change it to "_UNKNOWN_" which properly shouts the condition.

Signed-off-by: Jim Cromie 
---
 lib/dynamic_debug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 6fba6423cc10..ceb3067a5c83 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -1151,7 +1151,7 @@ static int ddebug_proc_show(struct seq_file *m, void *p)
if (class)
seq_printf(m, " class:%s", class);
else
-   seq_printf(m, " class unknown, _id:%d", dp->class_id);
+   seq_printf(m, " class:_UNKNOWN_ _id:%d", dp->class_id);
}
seq_puts(m, "\n");
 
-- 
2.41.0

[PATCH v7d 01/23] test-dyndbg: fixup CLASSMAP usage error

2023-10-31 Thread Jim Cromie

more careful reading of test output reveals:

lib/test_dynamic_debug.c:103 [test_dynamic_debug]do_cats =pmf "doing 
categories\n"
lib/test_dynamic_debug.c:105 [test_dynamic_debug]do_cats =p "LOW msg\n" 
class:MID
lib/test_dynamic_debug.c:106 [test_dynamic_debug]do_cats =p "MID msg\n" class:HI
lib/test_dynamic_debug.c:107 [test_dynamic_debug]do_cats =_ "HI msg\n" class 
unknown, _id:13

That last line is wrong, the HI class is declared.

But the enum's 1st val (explicitly initialized) was wrong; it must be
_base, not _base+1 (a DECLARE_DYNDBG_CLASSMAP[1] param).  So the last
enumeration exceeded the range of mapped class-id's, which triggered
the "class unknown" report.  I intentionally coded in an error, but
forgot to verify its detection and remove it.

RFC:

This patch fixes a bad usage of DECLARE_DYNDBG_CLASSMAP(), showing
that it is too error-prone.  As noted in test-mod comments:

 * Using the CLASSMAP api:
 * - classmaps must have corresponding enum
 * - enum symbols must match/correlate with class-name strings in the map.
 * - base must equal enum's 1st value
 * - multiple maps must set their base to share the 0-62 class_id space !!
 *   (build-bug-on tips welcome)

Those shortcomings could largely be fixed with a __stringify_list
(which doesn't exist,) used in DECLARE_DYNDBG_CLASSMAP to stringify
__VA_ARGS__.  Then, API would accept DRM_UT_* values literally; all
the categories, in order, and not their stringifications, which
created all the usage complications above.

[1] name changes later to DYNDBG_CLASSMAP_DEFINE

Signed-off-by: Jim Cromie 
---
 lib/test_dynamic_debug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/test_dynamic_debug.c b/lib/test_dynamic_debug.c
index 8dd250ad022b..a01f0193a419 100644
--- a/lib/test_dynamic_debug.c
+++ b/lib/test_dynamic_debug.c
@@ -75,7 +75,7 @@ DD_SYS_WRAP(disjoint_bits, p);
 DD_SYS_WRAP(disjoint_bits, T);
 
 /* symbolic input, independent bits */
-enum cat_disjoint_names { LOW = 11, MID, HI };
+enum cat_disjoint_names { LOW = 10, MID, HI };
 DECLARE_DYNDBG_CLASSMAP(map_disjoint_names, DD_CLASS_TYPE_DISJOINT_NAMES, 10,
"LOW", "MID", "HI");
 DD_SYS_WRAP(disjoint_names, p);
-- 
2.41.0

[PATCH v7d 00/23] fix DRM_USE_DYNAMIC_DEBUG=y regression

2023-10-31 Thread Jim Cromie

hi Jason, DRM-folk

(v7d - refreshed onto v6.6, patch-21 squashed into 14)

This patchest fixes the chicken-egg initialization problem in the 1st
version of ddebug-class-maps, that DRM-CI uncovered.

The root-problem was DECLARE_DYNDBG_CLASSMAP, which broke the K&R rule:
"define once, refer many".  In patch 14 it is replaced by:

 DYNDBG_CLASSMAP_DEFINE - define and export a struct ddebug_class_map
 DYNDBG_CLASSMAP_USE - ref the exported struct

test-dynamic-debug is also extended with a -submod.ko, in order to
recapitulate the drm & drivers initialization scenario.

The final blocking bug was a missing __align(8) on the ddebug_class_user
record inserted by DYNDBG_CLASSMAP_USE.  This caused DRM=y (builtin
only) to have a corrupt record for drm_kms_helper (a builtin dependent).
Curiously, a clang build did not exhibit this problem.

Heres a part of dmesg, for a DRM=y kernel, booted with
 dynamic_debug.verbose=3 drm.debug=0x10

[0.466747] dyndbg: add-module: drm 406 sites
[0.467569] dyndbg: classes[0]: module:drm base:0 len:10 type:DISJOINT_BITS
[0.467743] dyndbg: module:drm attached 1 classes
[0.468557] dyndbg: builtin class: module:drm base:0 len:10 
type:DISJOINT_BITS
[0.468742] dyndbg:  found kp:drm.debug =0x10
[0.468743] dyndbg:   mapped to: module:drm base:0 len:10 type:DISJOINT_BITS
[0.469742] dyndbg:   drm.debug: classbits: 0x10
[0.470573] dyndbg: apply bitmap: 0x10 to: 0x0 for drm
[0.470743] dyndbg: query 0: "class DRM_UT_ATOMIC +p" mod:drm
[0.471743] dyndbg: split into words: "class" "DRM_UT_ATOMIC" "+p"
[0.472743] dyndbg: op='+' flags=0x1 maskp=0x
[0.473679] dyndbg: parsed: func="" file="" module="drm" format="" 
lineno=0-0 class=DRM_UT_ATOMIC
[0.473749] dyndbg: processed 1 queries, with 0 matches, 0 errs
[0.474742] dyndbg: bit_4: 0 matches on class: DRM_UT_ATOMIC -> 0x10
[0.475742] dyndbg: applied bitmap: 0x10 to: 0x0 for drm
[0.476686] dyndbg: 406 debug prints in module drm
[0.476743] dyndbg: add-module: drm_kms_helper 93 sites
[0.477727] dyndbg: class_ref[0] drm_kms_helper -> drm module:drm base:0 
len:10 type:DISJOINT_BITS
[0.477743] dyndbg: builtin class: module:drm base:0 len:10 
type:DISJOINT_BITS
[0.478742] dyndbg:  found kp:drm.debug =0x10
[0.478743] dyndbg:   mapped to: module:drm base:0 len:10 type:DISJOINT_BITS
[0.479743] dyndbg:   drm.debug: classbits: 0x10
[0.480592] dyndbg: apply bitmap: 0x10 to: 0x0 for drm_kms_helper
[0.480743] dyndbg: query 0: "class DRM_UT_ATOMIC +p" mod:drm_kms_helper
[0.481743] dyndbg: split into words: "class" "DRM_UT_ATOMIC" "+p"
[0.482743] dyndbg: op='+' flags=0x1 maskp=0x
[0.483743] dyndbg: parsed: func="" file="" module="drm_kms_helper" 
format="" lineno=0-0 class=DRM_UT_ATOMIC
[0.484750] dyndbg: class-ref: drm_kms_helper.DRM_UT_ATOMIC  
module:drm_kms_helper nd:93 nc:0 nu:1
[0.485809] dyndbg: processed 1 queries, with 44 matches, 0 errs
[0.486742] dyndbg: bit_4: 44 matches on class: DRM_UT_ATOMIC -> 0x10
[0.487742] dyndbg: applied bitmap: 0x10 to: 0x0 for drm_kms_helper
[0.488743] dyndbg: attach-client-module:  module:drm_kms_helper nd:93 nc:0 
nu:1
[0.489742] dyndbg:  93 debug prints in module drm_kms_helper

Id like to get this into linux-next, so widespread testing is appreciated.
lkp-robot reported BUILD SUCCESS on it, Im running it on my amdgpu desktop.
I have scripts to operate the test-module if anyone wants them.

Patches are also at https://github.com/jimc/linux/tree/dd-fix-7d


Jim Cromie (23):
  test-dyndbg: fixup CLASSMAP usage error
  dyndbg: reword "class unknown," to "class:_UNKNOWN_"
  dyndbg: make ddebug_class_param union members same size
  dyndbg: replace classmap list with a vector
  dyndbg: ddebug_apply_class_bitmap - add module arg, select on it
  dyndbg: split param_set_dyndbg_classes to module/wrapper fns
  dyndbg: drop NUM_TYPE_ARRAY
  dyndbg: reduce verbose/debug clutter
  dyndbg: silence debugs with no-change updates
  dyndbg: tighten ddebug_class_name() 1st arg type
  dyndbg: tighten fn-sig of ddebug_apply_class_bitmap
  dyndbg: reduce verbose=3 messages in ddebug_add_module
  dyndbg-API: remove DD_CLASS_TYPE_(DISJOINT|LEVEL)_NAMES and code
  dyndbg-API: fix CONFIG_DRM_USE_DYNAMIC_DEBUG regression
  dyndbg: refactor ddebug_classparam_clamp_input
  dyndbg-API: promote DYNDBG_CLASSMAP_PARAM to API
  dyndbg-doc: add classmap info to howto
  dyndbg: reserve flag bit _DPRINTK_FLAGS_PREFIX_CACHED
  dyndbg: add _DPRINTK_FLAGS_INCL_LOOKUP
  dyndbg: refactor *dynamic_emit_prefix
  drm: use correct ccflags-y spelling
  drm-drivers: DRM_CLASSMAP_USE in 2nd batch of drivers, helpers
  drm: restore CONFIG_DRM_USE_DYNAMIC_DEBUG un-BROKEN

 .../admin-guide/dynamic-debug-howto.rst   |  60 ++-
 MAINTAINERS   |   2 +-
 drivers/gpu/drm/Kconfig   |   3 +-
 drivers/gpu/drm/Makefile  |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdg

Re: [PATCH 2/6] drm/amdgpu: Separate eviction from VM status.

2023-10-31 Thread kernel test robot

Hi Tatsuyuki,

kernel test robot noticed the following build warnings:

[auto build test WARNING on drm-misc/drm-misc-next]
[also build test WARNING on drm/drm-next drm-exynos/exynos-drm-next 
drm-intel/for-linux-next drm-intel/for-linux-next-fixes drm-tip/drm-tip 
linus/master v6.6 next-20231031]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:
https://github.com/intel-lab-lkp/linux/commits/Tatsuyuki-Ishi/drm-amdgpu-Don-t-implicit-sync-PRT-maps/20231031-224530
base:   git://anongit.freedesktop.org/drm/drm-misc drm-misc-next
patch link:
https://lore.kernel.org/r/20231031134059.171277-3-ishitatsuyuki%40gmail.com
patch subject: [PATCH 2/6] drm/amdgpu: Separate eviction from VM status.
config: arc-randconfig-001-20231101 
(https://download.01.org/0day-ci/archive/20231101/202311010709.xbwkjvaq-...@intel.com/config)
compiler: arceb-elf-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): 
(https://download.01.org/0day-ci/archive/20231101/202311010709.xbwkjvaq-...@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot 
| Closes: 
https://lore.kernel.org/oe-kbuild-all/202311010709.xbwkjvaq-...@intel.com/

All warnings (new ones prefixed by >>):

>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:178: warning: Function parameter or 
>> member 'evicted' not described in 'amdgpu_vm_bo_set_evicted'
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:178: warning: expecting prototype for 
>> amdgpu_vm_bo_evicted(). Prototype was for amdgpu_vm_bo_set_evicted() instead


vim +178 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

dcb388eddb5f1b Nirmoy Das  2021-06-28  168  
bcdc9fd634d1f0 Christian König 2018-08-30  169  /**
bcdc9fd634d1f0 Christian König 2018-08-30  170   * amdgpu_vm_bo_evicted - vm_bo 
is evicted
bcdc9fd634d1f0 Christian König 2018-08-30  171   *
bcdc9fd634d1f0 Christian König 2018-08-30  172   * @vm_bo: vm_bo which is 
evicted
bcdc9fd634d1f0 Christian König 2018-08-30  173   *
bcdc9fd634d1f0 Christian König 2018-08-30  174   * State for PDs/PTs and per VM 
BOs which are not at the location they should
bcdc9fd634d1f0 Christian König 2018-08-30  175   * be.
bcdc9fd634d1f0 Christian König 2018-08-30  176   */
cac82290238e47 Tatsuyuki Ishi  2023-10-31  177  static void 
amdgpu_vm_bo_set_evicted(struct amdgpu_vm_bo_base *vm_bo, bool evicted)
bcdc9fd634d1f0 Christian König 2018-08-30 @178  {
bcdc9fd634d1f0 Christian König 2018-08-30  179  struct amdgpu_vm *vm = 
vm_bo->vm;
bcdc9fd634d1f0 Christian König 2018-08-30  180  struct amdgpu_bo *bo = 
vm_bo->bo;
bcdc9fd634d1f0 Christian König 2018-08-30  181  
757eb2bedd08a1 Philip Yang 2022-09-15  182  
spin_lock(&vm_bo->vm->status_lock);
cac82290238e47 Tatsuyuki Ishi  2023-10-31  183  if (evicted && 
bo->tbo.base.resv == vm->root.bo->tbo.base.resv) {
bcdc9fd634d1f0 Christian König 2018-08-30  184  if 
(bo->tbo.type == ttm_bo_type_kernel)
cac82290238e47 Tatsuyuki Ishi  2023-10-31  185  
list_move(&vm_bo->eviction_status, &vm->evicted);
bcdc9fd634d1f0 Christian König 2018-08-30  186  else
cac82290238e47 Tatsuyuki Ishi  2023-10-31  187  
list_move_tail(&vm_bo->eviction_status, &vm->evicted);
cac82290238e47 Tatsuyuki Ishi  2023-10-31  188  } else {
cac82290238e47 Tatsuyuki Ishi  2023-10-31  189  
list_del_init(&vm_bo->eviction_status);
cac82290238e47 Tatsuyuki Ishi  2023-10-31  190  }
757eb2bedd08a1 Philip Yang 2022-09-15  191  
spin_unlock(&vm_bo->vm->status_lock);
bcdc9fd634d1f0 Christian König 2018-08-30  192  }
cac82290238e47 Tatsuyuki Ishi  2023-10-31  193  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

Re: [PATCH v2] drm/radeon: replace 1-element arrays with flexible-array members

2023-10-31 Thread Alex Deucher

On Tue, Oct 31, 2023 at 1:09 PM José Pekkarinen
 wrote:
>
> Reported by coccinelle, the following patch will move the
> following 1 element arrays to flexible arrays.
>
> drivers/gpu/drm/radeon/atombios.h:5523:32-48: WARNING use flexible-array 
> member instead 
> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
> drivers/gpu/drm/radeon/atombios.h:5545:32-48: WARNING use flexible-array 
> member instead 
> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
> drivers/gpu/drm/radeon/atombios.h:5461:34-44: WARNING use flexible-array 
> member instead 
> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
> drivers/gpu/drm/radeon/atombios.h:4447:30-40: WARNING use flexible-array 
> member instead 
> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
> drivers/gpu/drm/radeon/atombios.h:4236:30-41: WARNING use flexible-array 
> member instead 
> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
> drivers/gpu/drm/radeon/atombios.h:7095:28-45: WARNING use flexible-array 
> member instead 
> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
> drivers/gpu/drm/radeon/atombios.h:3896:27-37: WARNING use flexible-array 
> member instead 
> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
> drivers/gpu/drm/radeon/atombios.h:5443:16-25: WARNING use flexible-array 
> member instead 
> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
> drivers/gpu/drm/radeon/atombios.h:5454:34-43: WARNING use flexible-array 
> member instead 
> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
> drivers/gpu/drm/radeon/atombios.h:4603:21-32: WARNING use flexible-array 
> member instead 
> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
> drivers/gpu/drm/radeon/atombios.h:4628:32-46: WARNING use flexible-array 
> member instead 
> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
> drivers/gpu/drm/radeon/atombios.h:6285:29-39: WARNING use flexible-array 
> member instead 
> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
> drivers/gpu/drm/radeon/atombios.h:4296:30-36: WARNING use flexible-array 
> member instead 
> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
> drivers/gpu/drm/radeon/atombios.h:4756:28-36: WARNING use flexible-array 
> member instead 
> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
> drivers/gpu/drm/radeon/atombios.h:4064:22-35: WARNING use flexible-array 
> member instead 
> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
> drivers/gpu/drm/radeon/atombios.h:7327:9-24: WARNING use flexible-array 
> member instead 
> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
> drivers/gpu/drm/radeon/atombios.h:7332:32-53: WARNING use flexible-array 
> member instead 
> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
> drivers/gpu/drm/radeon/atombios.h:7362:26-41: WARNING use flexible-array 
> member instead 
> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
> drivers/gpu/drm/radeon/atombios.h:7369:29-44: WARNING use flexible-array 
> member instead 
> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
> drivers/gpu/drm/radeon/atombios.h:7349:24-32: WARNING use flexible-array 
> member instead 
> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
> drivers/gpu/drm/radeon/atombios.h:7355:27-35: WARNING use flexible-array 
> member instead 
> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
>
> Signed-off-by: José Pekkarinen 
> ---
> [v1 -> v2] removed padding and hinted sensitive cases from original patch

Applied.  Thanks!

Alex

>
>  drivers/gpu/drm/radeon/atombios.h | 42 +++
>  1 file changed, 21 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/atombios.h 
> b/drivers/gpu/drm/radeon/atombios.h
> index 8a6621f1e82c..2db40789235c 100644
> --- a/drivers/gpu/drm/radeon/atombios.h
> +++ b/drivers/gpu/drm/radeon/atombios.h
> @@ -3893,7 +3893,7 @@ typedef struct _ATOM_GPIO_PIN_ASSIGNMENT
>  typedef struct _ATOM_GPIO_PIN_LUT
>  {
>ATOM_COMMON_TABLE_HEADER  sHeader;
> -  ATOM_GPIO_PIN_ASSIGNMENT asGPIO_Pin[1];
> +  ATOM_GPIO_PIN_ASSIGNMENT

Re: [3/3] drm/amdgpu: add a retry for IP discovery init

2023-10-31 Thread Mario Limonciello


On 10/27/2023 10:42, Alex Deucher wrote:

AMD dGPUs have integrated FW that runs as soon as the
device gets power and initializes the board (determines
the amount of memory, provides configuration details to
the driver, etc.).  For direct PCIe attached cards this
happens as soon as power is applied and normally completes
well before the OS has even started loading.  However, with
hotpluggable ports like USB4, the driver needs to wait for
this to complete before initializing the device.

This normally takes 60-100ms, but could take longer on
some older boards periodically due to memory training.

Retry for up to a second.  In the non-hotplug case, there
should be no change in behavior and this should complete
on the first try.

v2: adjust test criteria
v3: adjust checks for the masks, only enable on removable devices
v4: skip bif_fb_en check

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2925
Signed-off-by: Alex Deucher 
---


Reviewed-by: Mario Limonciello 


  drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 23 +--
  1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 5f9d75900bfa..9ca4d89352d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -99,6 +99,7 @@
  MODULE_FIRMWARE(FIRMWARE_IP_DISCOVERY);
  
  #define mmRCC_CONFIG_MEMSIZE	0xde3

+#define mmMP0_SMN_C2PMSG_330x16061
  #define mmMM_INDEX0x0
  #define mmMM_INDEX_HI 0x6
  #define mmMM_DATA 0x1
@@ -239,8 +240,26 @@ static int amdgpu_discovery_read_binary_from_sysmem(struct 
amdgpu_device *adev,
  static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
 uint8_t *binary)
  {
-   uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
-   int ret = 0;
+   uint64_t vram_size;
+   u32 msg;
+   int i, ret = 0;
+
+   /* It can take up to a second for IFWI init to complete on some dGPUs,
+* but generally it should be in the 60-100ms range.  Normally this 
starts
+* as soon as the device gets power so by the time the OS loads this 
has long
+* completed.  However, when a card is hotplugged via e.g., USB4, we 
need to
+* wait for this to complete.  Once the C2PMSG is updated, we can
+* continue.
+*/
+   if (dev_is_removable(&adev->pdev->dev)) {
+   for (i = 0; i < 1000; i++) {
+   msg = RREG32(mmMP0_SMN_C2PMSG_33);
+   if (msg & 0x8000)
+   break;
+   msleep(1);
+   }
+   }
+   vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
  
  	if (vram_size) {

uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;

Re: [2/3] drm/amdgpu: don't use pci_is_thunderbolt_attached()

2023-10-31 Thread Mario Limonciello


On 10/27/2023 10:42, Alex Deucher wrote:

It's only valid on Intel systems with the Intel VSEC.
Use dev_is_removable() instead.  This should do the right
thing regardless of the platform.

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2925
Signed-off-by: Alex Deucher 
---


Reviewed-by: Mario Limonciello 


  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 
  drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c | 5 +++--
  2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 2381de831271..5c90080e93ba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -41,6 +41,7 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  #include 
  #include 
@@ -2223,7 +2224,6 @@ static int amdgpu_device_parse_gpu_info_fw(struct 
amdgpu_device *adev)
   */
  static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
  {
-   struct drm_device *dev = adev_to_drm(adev);
struct pci_dev *parent;
int i, r;
bool total;
@@ -2294,7 +2294,7 @@ static int amdgpu_device_ip_early_init(struct 
amdgpu_device *adev)
(amdgpu_is_atpx_hybrid() ||
 amdgpu_has_atpx_dgpu_power_cntl()) &&
((adev->flags & AMD_IS_APU) == 0) &&
-   !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
+   !dev_is_removable(&adev->pdev->dev))
adev->flags |= AMD_IS_PX;
  
  	if (!(adev->flags & AMD_IS_APU)) {

@@ -4138,7 +4138,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
  
  	px = amdgpu_device_supports_px(ddev);
  
-	if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&

+   if (px || (!dev_is_removable(&adev->pdev->dev) &&
apple_gmux_detect(NULL, NULL)))
vga_switcheroo_register_client(adev->pdev,
   &amdgpu_switcheroo_ops, px);
@@ -4288,7 +4288,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
  
  	px = amdgpu_device_supports_px(adev_to_drm(adev));
  
-	if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&

+   if (px || (!dev_is_removable(&adev->pdev->dev) &&
apple_gmux_detect(NULL, NULL)))
vga_switcheroo_unregister_client(adev->pdev);
  
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c

index e523627cfe25..df218d5ca775 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
@@ -28,6 +28,7 @@
  #include "nbio/nbio_2_3_offset.h"
  #include "nbio/nbio_2_3_sh_mask.h"
  #include 
+#include 
  #include 
  
  #define smnPCIE_CONFIG_CNTL	0x11180044

@@ -361,7 +362,7 @@ static void nbio_v2_3_enable_aspm(struct amdgpu_device 
*adev,
  
  		data |= NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT;
  
-		if (pci_is_thunderbolt_attached(adev->pdev))

+   if (dev_is_removable(&adev->pdev->dev))
data |= NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT  << 
PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
else
data |= NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT << 
PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
@@ -480,7 +481,7 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device 
*adev)
  
  	def = data = RREG32_PCIE(smnPCIE_LC_CNTL);

data |= NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT << 
PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT;
-   if (pci_is_thunderbolt_attached(adev->pdev))
+   if (dev_is_removable(&adev->pdev->dev))
data |= NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT  << 
PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
else
data |= NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT << 
PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;

Re: [1/3] drm/amdgpu: don't use ATRM for external devices

2023-10-31 Thread Mario Limonciello


On 10/27/2023 10:42, Alex Deucher wrote:

The ATRM ACPI method is for fetching the dGPU vbios rom
image on laptops and all-in-one systems.  It should not be
used for external add in cards.  If the dGPU is thunderbolt
connected, don't try ATRM.

v2: pci_is_thunderbolt_attached only works for Intel.  Use
 pdev->external_facing instead.
v3: dev_is_removable() seems to be what we want

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2925
Signed-off-by: Alex Deucher 


Reviewed-by: Mario Limonciello 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c | 5 +
  1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
index 38ccec913f00..f3a09ecb7699 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@@ -29,6 +29,7 @@
  #include "amdgpu.h"
  #include "atom.h"
  
+#include 

  #include 
  #include 
  #include 
@@ -287,6 +288,10 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device 
*adev)
if (adev->flags & AMD_IS_APU)
return false;
  
+	/* ATRM is for on-platform devices only */

+   if (dev_is_removable(&adev->pdev->dev))
+   return false;
+
while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != 
NULL) {
dhandle = ACPI_HANDLE(&pdev->dev);
if (!dhandle)

Re: [PATCH] drm/radeon: replace 1-element arrays with flexible-array members

2023-10-31 Thread José Pekkarinen


On 2023-10-31 17:45, Alex Deucher wrote:

On Sat, Oct 28, 2023 at 8:05 AM José Pekkarinen
 wrote:


On 2023-10-27 20:55, Deucher, Alexander wrote:
> [Public]
>
>> -Original Message-
>> From: José Pekkarinen 
>> Sent: Friday, October 27, 2023 12:59 PM
>> To: Deucher, Alexander ; Koenig, Christian
>> ; Pan, Xinhui ;
>> sk...@linuxfoundation.org
>> Cc: José Pekkarinen ; airl...@gmail.com;
>> dan...@ffwll.ch; amd-gfx@lists.freedesktop.org; dri-
>> de...@lists.freedesktop.org; linux-ker...@vger.kernel.org;
>> linux-kernel-
>> ment...@lists.linuxfoundation.org
>> Subject: [PATCH] drm/radeon: replace 1-element arrays with
>> flexible-array
>> members
>>
>> Reported by coccinelle, the following patch will move the following 1
>> element
>> arrays to flexible arrays.
>>
>> drivers/gpu/drm/radeon/atombios.h:5523:32-48: WARNING use flexible-
>> array member instead
>> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
>> length-and-one-element-arrays)
>> drivers/gpu/drm/radeon/atombios.h:5545:32-48: WARNING use flexible-
>> array member instead
>> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
>> length-and-one-element-arrays)
>> drivers/gpu/drm/radeon/atombios.h:5461:34-44: WARNING use flexible-
>> array member instead
>> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
>> length-and-one-element-arrays)
>> drivers/gpu/drm/radeon/atombios.h:4447:30-40: WARNING use flexible-
>> array member instead
>> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
>> length-and-one-element-arrays)
>> drivers/gpu/drm/radeon/atombios.h:4236:30-41: WARNING use flexible-
>> array member instead
>> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
>> length-and-one-element-arrays)
>> drivers/gpu/drm/radeon/atombios.h:7044:24-37: WARNING use flexible-
>> array member instead
>> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
>> length-and-one-element-arrays)
>> drivers/gpu/drm/radeon/atombios.h:7054:24-37: WARNING use flexible-
>> array member instead
>> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
>> length-and-one-element-arrays)
>> drivers/gpu/drm/radeon/atombios.h:7095:28-45: WARNING use flexible-
>> array member instead
>> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
>> length-and-one-element-arrays)
>> drivers/gpu/drm/radeon/atombios.h:7553:8-17: WARNING use
>> flexible-array
>> member instead
>> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
>> length-and-one-element-arrays)
>> drivers/gpu/drm/radeon/atombios.h:7559:8-17: WARNING use
>> flexible-array
>> member instead
>> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
>> length-and-one-element-arrays)
>> drivers/gpu/drm/radeon/atombios.h:3896:27-37: WARNING use flexible-
>> array member instead
>> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
>> length-and-one-element-arrays)
>> drivers/gpu/drm/radeon/atombios.h:5443:16-25: WARNING use flexible-
>> array member instead
>> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
>> length-and-one-element-arrays)
>> drivers/gpu/drm/radeon/atombios.h:5454:34-43: WARNING use flexible-
>> array member instead
>> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
>> length-and-one-element-arrays)
>> drivers/gpu/drm/radeon/atombios.h:4603:21-32: WARNING use flexible-
>> array member instead
>> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
>> length-and-one-element-arrays)
>> drivers/gpu/drm/radeon/atombios.h:6299:32-44: WARNING use flexible-
>> array member instead
>> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
>> length-and-one-element-arrays)
>> drivers/gpu/drm/radeon/atombios.h:4628:32-46: WARNING use flexible-
>> array member instead
>> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
>> length-and-one-element-arrays)
>> drivers/gpu/drm/radeon/atombios.h:6285:29-39: WARNING use flexible-
>> array member instead
>> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
>> length-and-one-element-arrays)
>> drivers/gpu/drm/radeon/atombios.h:4296:30-36: WARNING use flexible-
>> array member instead
>> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
>> length-and-one-element-arrays)
>> drivers/gpu/drm/radeon/atombios.h:4756:28-36: WARNING use flexible-
>> array member instead
>> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
>> length-and-one-element-arrays)
>> drivers/gpu/drm/radeon/atombios.h:4064:22-35: WARNING use flexible-
>> array member instead
>> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
>> length-and-one-element-arrays)
>> drivers/gpu/drm/radeon/atombios.h:7327:9-24: WARNING use
>> flexible-array
>> member instead
>> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
>> leng

[PATCH v2] drm/radeon: replace 1-element arrays with flexible-array members

2023-10-31 Thread José Pekkarinen

Reported by coccinelle, the following patch will move the
following 1 element arrays to flexible arrays.

drivers/gpu/drm/radeon/atombios.h:5523:32-48: WARNING use flexible-array member 
instead 
(https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
drivers/gpu/drm/radeon/atombios.h:5545:32-48: WARNING use flexible-array member 
instead 
(https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
drivers/gpu/drm/radeon/atombios.h:5461:34-44: WARNING use flexible-array member 
instead 
(https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
drivers/gpu/drm/radeon/atombios.h:4447:30-40: WARNING use flexible-array member 
instead 
(https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
drivers/gpu/drm/radeon/atombios.h:4236:30-41: WARNING use flexible-array member 
instead 
(https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
drivers/gpu/drm/radeon/atombios.h:7095:28-45: WARNING use flexible-array member 
instead 
(https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
drivers/gpu/drm/radeon/atombios.h:3896:27-37: WARNING use flexible-array member 
instead 
(https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
drivers/gpu/drm/radeon/atombios.h:5443:16-25: WARNING use flexible-array member 
instead 
(https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
drivers/gpu/drm/radeon/atombios.h:5454:34-43: WARNING use flexible-array member 
instead 
(https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
drivers/gpu/drm/radeon/atombios.h:4603:21-32: WARNING use flexible-array member 
instead 
(https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
drivers/gpu/drm/radeon/atombios.h:4628:32-46: WARNING use flexible-array member 
instead 
(https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
drivers/gpu/drm/radeon/atombios.h:6285:29-39: WARNING use flexible-array member 
instead 
(https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
drivers/gpu/drm/radeon/atombios.h:4296:30-36: WARNING use flexible-array member 
instead 
(https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
drivers/gpu/drm/radeon/atombios.h:4756:28-36: WARNING use flexible-array member 
instead 
(https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
drivers/gpu/drm/radeon/atombios.h:4064:22-35: WARNING use flexible-array member 
instead 
(https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
drivers/gpu/drm/radeon/atombios.h:7327:9-24: WARNING use flexible-array member 
instead 
(https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
drivers/gpu/drm/radeon/atombios.h:7332:32-53: WARNING use flexible-array member 
instead 
(https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
drivers/gpu/drm/radeon/atombios.h:7362:26-41: WARNING use flexible-array member 
instead 
(https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
drivers/gpu/drm/radeon/atombios.h:7369:29-44: WARNING use flexible-array member 
instead 
(https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
drivers/gpu/drm/radeon/atombios.h:7349:24-32: WARNING use flexible-array member 
instead 
(https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)
drivers/gpu/drm/radeon/atombios.h:7355:27-35: WARNING use flexible-array member 
instead 
(https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays)

Signed-off-by: José Pekkarinen 
---
[v1 -> v2] removed padding and hinted sensitive cases from original patch

 drivers/gpu/drm/radeon/atombios.h | 42 +++
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/radeon/atombios.h 
b/drivers/gpu/drm/radeon/atombios.h
index 8a6621f1e82c..2db40789235c 100644
--- a/drivers/gpu/drm/radeon/atombios.h
+++ b/drivers/gpu/drm/radeon/atombios.h
@@ -3893,7 +3893,7 @@ typedef struct _ATOM_GPIO_PIN_ASSIGNMENT
 typedef struct _ATOM_GPIO_PIN_LUT
 {
   ATOM_COMMON_TABLE_HEADER  sHeader;
-  ATOM_GPIO_PIN_ASSIGNMENT asGPIO_Pin[1];
+  ATOM_GPIO_PIN_ASSIGNMENT asGPIO_Pin[];
 }ATOM_GPIO_PIN_LUT;
 
 // 
@@ -4061,7 +4061,7 @@ typedef struct _ATOM_SRC_DST_TABLE_FOR_ONE_OBJECT 
//usSrcDstTableOffset
   UCHAR   uc

[PATCH] drm/amdgpu: don't put MQDs in VRAM on ARM | ARM64

2023-10-31 Thread Alex Deucher

Issues were reported with commit 1cfb4d612127
("drm/amdgpu: put MQDs in VRAM") on an ADLINK Ampere
Altra Developer Platform (AVA developer platform).

Various ARM systems seem to have problems related
to PCIe and MMIO access.  In this case, I'm not sure
if this is specific to the ADLINK platform or ARM
in general.  Seems to be some coherency issue with
VRAM.  For now, just don't put MQDs in VRAM on ARM.

Link: https://lists.freedesktop.org/archives/amd-gfx/2023-October/100453.html
Fixes: 1cfb4d612127 ("drm/amdgpu: put MQDs in VRAM")
Signed-off-by: Alex Deucher 
Cc: alexey.kli...@linaro.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index c92e0aba69e1..a2a29dcb2422 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -385,9 +385,11 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
struct amdgpu_ring *ring = &kiq->ring;
u32 domain = AMDGPU_GEM_DOMAIN_GTT;
 
+#if !defined(CONFIG_ARM) && !defined(CONFIG_ARM64)
/* Only enable on gfx10 and 11 for now to avoid changing behavior on 
older chips */
if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
domain |= AMDGPU_GEM_DOMAIN_VRAM;
+#endif
 
/* create MQD for KIQ */
if (!adev->enable_mes_kiq && !ring->mqd_obj) {
-- 
2.41.0

[PATCH v4] drm/amdgpu: Add xcc param to SRIOV kiq write and WREG32_SOC15_IP_NO_KIQ (v4)

2023-10-31 Thread Victor Lu

WREG32/RREG32_SOC15_IP_NO_KIQ and amdgpu_virt_kiq_reg_write_reg_wait
are not using the correct rlcg interface or mec engine, respectively.

Add xcc instance parameter to them.

v4: Use GET_INST and squash commit with:
"drm/amdgpu: Add xcc_inst param to amdgpu_virt_kiq_reg_write_reg_wait"

v3: xcc not needed for MMMHUB

v2: rebase

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  5 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h  |  3 ++-
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 26 +--
 drivers/gpu/drm/amd/amdgpu/soc15_common.h |  6 +++---
 6 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index a0aa624f5a92..e179f022c428 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -73,9 +73,10 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
 
 void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t reg1,
-   uint32_t ref, uint32_t mask)
+   uint32_t ref, uint32_t mask,
+   uint32_t xcc_inst)
 {
-   struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+   struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_inst];
struct amdgpu_ring *ring = &kiq->ring;
signed long r, cnt = 0;
unsigned long flags;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 858ef21ae515..bb436d41b4ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -334,7 +334,8 @@ bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
 void amdgpu_virt_init_setting(struct amdgpu_device *adev);
 void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t rreg1,
-   uint32_t ref, uint32_t mask);
+   uint32_t ref, uint32_t mask,
+   uint32_t xcc_inst);
 int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index d8a4fddab9c1..a43d1aa42e11 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -268,7 +268,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
-   1 << vmid);
+   1 << vmid, GET_INST(GC, 0));
return;
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 19eaada35ede..93f100dd5d94 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -228,7 +228,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
-   1 << vmid);
+   1 << vmid, GET_INST(GC, 0));
return;
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 3a1050344b59..35ef7529cc8e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -817,7 +817,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
uint32_t vmhub, uint32_t flush_type)
 {
bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
-   u32 j, inv_req, tmp, sem, req, ack;
+   u32 j, inv_req, tmp, sem, req, ack, inst;
const unsigned int eng = 17;
struct amdgpu_vmhub *hub;
 
@@ -832,13 +832,17 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
/* This is necessary for a HW workaround under SRIOV as well
 * as GFXOFF under bare metal
 */
-   if (adev->gfx.kiq[0].ring.sched.ready &&
+   if (vmhub >= AMDGPU_MMHUB0(0))
+   inst = GET_INST(GC, 0);
+   else
+   inst = vmhub;
+   if (adev->gfx.kiq[inst].ring.sched.ready &&

Re: [PATCH] drm/radeon: replace 1-element arrays with flexible-array members

2023-10-31 Thread Alex Deucher

On Sat, Oct 28, 2023 at 8:05 AM José Pekkarinen
 wrote:
>
> On 2023-10-27 20:55, Deucher, Alexander wrote:
> > [Public]
> >
> >> -Original Message-
> >> From: José Pekkarinen 
> >> Sent: Friday, October 27, 2023 12:59 PM
> >> To: Deucher, Alexander ; Koenig, Christian
> >> ; Pan, Xinhui ;
> >> sk...@linuxfoundation.org
> >> Cc: José Pekkarinen ; airl...@gmail.com;
> >> dan...@ffwll.ch; amd-gfx@lists.freedesktop.org; dri-
> >> de...@lists.freedesktop.org; linux-ker...@vger.kernel.org;
> >> linux-kernel-
> >> ment...@lists.linuxfoundation.org
> >> Subject: [PATCH] drm/radeon: replace 1-element arrays with
> >> flexible-array
> >> members
> >>
> >> Reported by coccinelle, the following patch will move the following 1
> >> element
> >> arrays to flexible arrays.
> >>
> >> drivers/gpu/drm/radeon/atombios.h:5523:32-48: WARNING use flexible-
> >> array member instead
> >> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
> >> length-and-one-element-arrays)
> >> drivers/gpu/drm/radeon/atombios.h:5545:32-48: WARNING use flexible-
> >> array member instead
> >> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
> >> length-and-one-element-arrays)
> >> drivers/gpu/drm/radeon/atombios.h:5461:34-44: WARNING use flexible-
> >> array member instead
> >> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
> >> length-and-one-element-arrays)
> >> drivers/gpu/drm/radeon/atombios.h:4447:30-40: WARNING use flexible-
> >> array member instead
> >> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
> >> length-and-one-element-arrays)
> >> drivers/gpu/drm/radeon/atombios.h:4236:30-41: WARNING use flexible-
> >> array member instead
> >> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
> >> length-and-one-element-arrays)
> >> drivers/gpu/drm/radeon/atombios.h:7044:24-37: WARNING use flexible-
> >> array member instead
> >> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
> >> length-and-one-element-arrays)
> >> drivers/gpu/drm/radeon/atombios.h:7054:24-37: WARNING use flexible-
> >> array member instead
> >> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
> >> length-and-one-element-arrays)
> >> drivers/gpu/drm/radeon/atombios.h:7095:28-45: WARNING use flexible-
> >> array member instead
> >> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
> >> length-and-one-element-arrays)
> >> drivers/gpu/drm/radeon/atombios.h:7553:8-17: WARNING use
> >> flexible-array
> >> member instead
> >> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
> >> length-and-one-element-arrays)
> >> drivers/gpu/drm/radeon/atombios.h:7559:8-17: WARNING use
> >> flexible-array
> >> member instead
> >> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
> >> length-and-one-element-arrays)
> >> drivers/gpu/drm/radeon/atombios.h:3896:27-37: WARNING use flexible-
> >> array member instead
> >> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
> >> length-and-one-element-arrays)
> >> drivers/gpu/drm/radeon/atombios.h:5443:16-25: WARNING use flexible-
> >> array member instead
> >> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
> >> length-and-one-element-arrays)
> >> drivers/gpu/drm/radeon/atombios.h:5454:34-43: WARNING use flexible-
> >> array member instead
> >> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
> >> length-and-one-element-arrays)
> >> drivers/gpu/drm/radeon/atombios.h:4603:21-32: WARNING use flexible-
> >> array member instead
> >> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
> >> length-and-one-element-arrays)
> >> drivers/gpu/drm/radeon/atombios.h:6299:32-44: WARNING use flexible-
> >> array member instead
> >> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
> >> length-and-one-element-arrays)
> >> drivers/gpu/drm/radeon/atombios.h:4628:32-46: WARNING use flexible-
> >> array member instead
> >> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
> >> length-and-one-element-arrays)
> >> drivers/gpu/drm/radeon/atombios.h:6285:29-39: WARNING use flexible-
> >> array member instead
> >> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
> >> length-and-one-element-arrays)
> >> drivers/gpu/drm/radeon/atombios.h:4296:30-36: WARNING use flexible-
> >> array member instead
> >> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
> >> length-and-one-element-arrays)
> >> drivers/gpu/drm/radeon/atombios.h:4756:28-36: WARNING use flexible-
> >> array member instead
> >> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
> >> length-and-one-element-arrays)
> >> drivers/gpu/drm/radeon/atombios.h:4064:22-35: WARNING use flexible-
> >> array member instead
> >> (https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-
> >> length-and-one-element-arrays)
> >> driver

Re: [PATCH 1/3] drm/amdgpu: don't use ATRM for external devices

2023-10-31 Thread Alex Deucher

Ping on this series?

Alex

On Sat, Oct 28, 2023 at 2:32 AM Alex Deucher  wrote:
>
> The ATRM ACPI method is for fetching the dGPU vbios rom
> image on laptops and all-in-one systems.  It should not be
> used for external add in cards.  If the dGPU is thunderbolt
> connected, don't try ATRM.
>
> v2: pci_is_thunderbolt_attached only works for Intel.  Use
> pdev->external_facing instead.
> v3: dev_is_removable() seems to be what we want
>
> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2925
> Signed-off-by: Alex Deucher 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c | 5 +
>  1 file changed, 5 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
> index 38ccec913f00..f3a09ecb7699 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
> @@ -29,6 +29,7 @@
>  #include "amdgpu.h"
>  #include "atom.h"
>
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -287,6 +288,10 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device 
> *adev)
> if (adev->flags & AMD_IS_APU)
> return false;
>
> +   /* ATRM is for on-platform devices only */
> +   if (dev_is_removable(&adev->pdev->dev))
> +   return false;
> +
> while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != 
> NULL) {
> dhandle = ACPI_HANDLE(&pdev->dev);
> if (!dhandle)
> --
> 2.41.0
>

Re: [PATCH] drm/amdgpu: fix error handling in amdgpu_vm_init

2023-10-31 Thread Alex Deucher

On Tue, Oct 31, 2023 at 11:12 AM Christian König
 wrote:
>
> When clearing the root PD fails we need to properly release it again.
>
> Signed-off-by: Christian König 

Acked-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 31 +-
>  1 file changed, 16 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index d72daf15662f..5877f6e9b893 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2042,7 +2042,8 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long 
> timeout)
>   * Returns:
>   * 0 for success, error for failure.
>   */
> -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t 
> xcp_id)
> +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> +  int32_t xcp_id)
>  {
> struct amdgpu_bo *root_bo;
> struct amdgpu_bo_vm *root;
> @@ -2061,6 +2062,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
> amdgpu_vm *vm, int32_t xcp
> INIT_LIST_HEAD(&vm->done);
> INIT_LIST_HEAD(&vm->pt_freed);
> INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work);
> +   INIT_KFIFO(vm->faults);
>
> /* create scheduler entities for page table updates */
> r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL,
> @@ -2103,34 +2105,33 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
> amdgpu_vm *vm, int32_t xcp
> false, &root, xcp_id);
> if (r)
> goto error_free_delayed;
> -   root_bo = &root->bo;
> +
> +   root_bo = amdgpu_bo_ref(&root->bo);
> r = amdgpu_bo_reserve(root_bo, true);
> -   if (r)
> -   goto error_free_root;
> +   if (r) {
> +   amdgpu_bo_unref(&root->shadow);
> +   amdgpu_bo_unref(&root_bo);
> +   goto error_free_delayed;
> +   }
>
> +   amdgpu_vm_bo_base_init(&vm->root, vm, root_bo);
> r = dma_resv_reserve_fences(root_bo->tbo.base.resv, 1);
> if (r)
> -   goto error_unreserve;
> -
> -   amdgpu_vm_bo_base_init(&vm->root, vm, root_bo);
> +   goto error_free_root;
>
> r = amdgpu_vm_pt_clear(adev, vm, root, false);
> if (r)
> -   goto error_unreserve;
> +   goto error_free_root;
>
> amdgpu_bo_unreserve(vm->root.bo);
> -
> -   INIT_KFIFO(vm->faults);
> +   amdgpu_bo_unref(&root_bo);
>
> return 0;
>
> -error_unreserve:
> -   amdgpu_bo_unreserve(vm->root.bo);
> -
>  error_free_root:
> -   amdgpu_bo_unref(&root->shadow);
> +   amdgpu_vm_pt_free_root(adev, vm);
> +   amdgpu_bo_unreserve(vm->root.bo);
> amdgpu_bo_unref(&root_bo);
> -   vm->root.bo = NULL;
>
>  error_free_delayed:
> dma_fence_put(vm->last_tlb_flush);
> --
> 2.34.1
>

Re: [PATCH 5/6] drm/amdgpu: Add flag to disable implicit sync for GEM operations.

2023-10-31 Thread Michel Dänzer

On 10/31/23 15:34, Christian König wrote:
> Am 31.10.23 um 15:14 schrieb Michel Dänzer:
> 
>> FWIW, RADV will also want explicit sync in the CS ioctl.
> You can replace that with the DMA-buf IOCTLs like Faith is planning to do for 
> NVK. 

Those ioctls cannot disable implicit sync for the CS ioctl. They can be used 
for making implicit sync work correctly for individual BOs though, once 
implicit sync is disabled for the CS ioctl.

-- 
Earthling Michel Dänzer|  https://redhat.com
Libre software enthusiast  | Mesa and Xwayland developer

[PATCH] drm/amdgpu: fix error handling in amdgpu_vm_init

2023-10-31 Thread Christian König

When clearing the root PD fails we need to properly release it again.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 31 +-
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index d72daf15662f..5877f6e9b893 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2042,7 +2042,8 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long 
timeout)
  * Returns:
  * 0 for success, error for failure.
  */
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t 
xcp_id)
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+  int32_t xcp_id)
 {
struct amdgpu_bo *root_bo;
struct amdgpu_bo_vm *root;
@@ -2061,6 +2062,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm, int32_t xcp
INIT_LIST_HEAD(&vm->done);
INIT_LIST_HEAD(&vm->pt_freed);
INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work);
+   INIT_KFIFO(vm->faults);
 
/* create scheduler entities for page table updates */
r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL,
@@ -2103,34 +2105,33 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm, int32_t xcp
false, &root, xcp_id);
if (r)
goto error_free_delayed;
-   root_bo = &root->bo;
+
+   root_bo = amdgpu_bo_ref(&root->bo);
r = amdgpu_bo_reserve(root_bo, true);
-   if (r)
-   goto error_free_root;
+   if (r) {
+   amdgpu_bo_unref(&root->shadow);
+   amdgpu_bo_unref(&root_bo);
+   goto error_free_delayed;
+   }
 
+   amdgpu_vm_bo_base_init(&vm->root, vm, root_bo);
r = dma_resv_reserve_fences(root_bo->tbo.base.resv, 1);
if (r)
-   goto error_unreserve;
-
-   amdgpu_vm_bo_base_init(&vm->root, vm, root_bo);
+   goto error_free_root;
 
r = amdgpu_vm_pt_clear(adev, vm, root, false);
if (r)
-   goto error_unreserve;
+   goto error_free_root;
 
amdgpu_bo_unreserve(vm->root.bo);
-
-   INIT_KFIFO(vm->faults);
+   amdgpu_bo_unref(&root_bo);
 
return 0;
 
-error_unreserve:
-   amdgpu_bo_unreserve(vm->root.bo);
-
 error_free_root:
-   amdgpu_bo_unref(&root->shadow);
+   amdgpu_vm_pt_free_root(adev, vm);
+   amdgpu_bo_unreserve(vm->root.bo);
amdgpu_bo_unref(&root_bo);
-   vm->root.bo = NULL;
 
 error_free_delayed:
dma_fence_put(vm->last_tlb_flush);
-- 
2.34.1

Re: [PATCH 2/6] drm/amdgpu: Separate eviction from VM status.

2023-10-31 Thread Christian König


Am 31.10.23 um 15:39 schrieb Tatsuyuki Ishi:



On Oct 31, 2023, at 22:55, Christian König  wrote:

Am 31.10.23 um 14:40 schrieb Tatsuyuki Ishi:

In short, eviction never really belonged to the vm_status state machine.

I strongly disagree to that.


Even when evicted, the BO could belong to either the moved or done state.
The "evicted" state needed to handle both cases, causing greater confusion.

Additionally, there were inconsistencies in the definition of an evicted
BO. Some places are based on the `evict` parameter passed from the TTM move
callback, while the others were updated based on whether the BO got its
optimal placement. The second is more accurate for our use case. With this
refactor, the evicted state is solely determined by the second rule.

That strongly sounds like you don't understand what the evicted state it good 
for.

The evicted state is for page directories, page tables and per VM BOs which 
needs to move around before doing the next CS.

Please further explain what you try to do here.

This is mainly an attempt to address inconsistency in the definition of 
“eviction”. The TTM move callback sets eviction when eviction happens through 
ttm_bo_evict. This is however not the only way a BO might end up outside its 
preferred domains.

amdgpu_vm_bo_update later updates the eviction state based on whether the BO is 
in its preferred domains. In my understanding this includes all cases where the 
BO is evicted through ttm_bo_evict. Therefore we should apply this definition 
right from the move callback, not only after amdgpu_vm_bo_update has been 
called at least once.


No, that is something completely separated. The evicted state just means 
that we need to re-validate the BO.


One cause of this is that TTM moved the BO.

But a different cause is that TTM moved the BO, we tried to validated it 
but fallen back to GTT for now and called amdgpu_vm_bo_update(). 
amdgpu_vm_bo_update() then moves the BO into the evicted state again so 
that we try to move it into VRAM on the next command submission.


This is purely an optimization done to create enough pressure so that 
TTM can do it's work.


Christian.



Tatsuyuki.


Regards,
Christian.


Signed-off-by: Tatsuyuki Ishi 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 67 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c |  1 +
  3 files changed, 29 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 7b9762f1cddd..dd6f72e2a1d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -174,19 +174,23 @@ int amdgpu_vm_set_pasid(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
   * State for PDs/PTs and per VM BOs which are not at the location they should
   * be.
   */
-static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
+static void amdgpu_vm_bo_set_evicted(struct amdgpu_vm_bo_base *vm_bo, bool 
evicted)
  {
struct amdgpu_vm *vm = vm_bo->vm;
struct amdgpu_bo *bo = vm_bo->bo;
  - vm_bo->moved = true;
spin_lock(&vm_bo->vm->status_lock);
-   if (bo->tbo.type == ttm_bo_type_kernel)
-   list_move(&vm_bo->vm_status, &vm->evicted);
-   else
-   list_move_tail(&vm_bo->vm_status, &vm->evicted);
+   if (evicted && bo->tbo.base.resv == vm->root.bo->tbo.base.resv) {
+   if (bo->tbo.type == ttm_bo_type_kernel)
+   list_move(&vm_bo->eviction_status, &vm->evicted);
+   else
+   list_move_tail(&vm_bo->eviction_status, &vm->evicted);
+   } else {
+   list_del_init(&vm_bo->eviction_status);
+   }
spin_unlock(&vm_bo->vm->status_lock);
  }
+
  /**
   * amdgpu_vm_bo_moved - vm_bo is moved
   *
@@ -310,6 +314,7 @@ void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
base->bo = bo;
base->next = NULL;
INIT_LIST_HEAD(&base->vm_status);
+   INIT_LIST_HEAD(&base->eviction_status);
if (!bo)
return;
@@ -336,7 +341,7 @@ void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
 * is currently evicted. add the bo to the evicted list to make sure it
 * is validated on next vm use to avoid fault.
 * */
-   amdgpu_vm_bo_evicted(base);
+   amdgpu_vm_bo_set_evicted(base, true);
  }
/**
@@ -460,7 +465,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
while (!list_empty(&vm->evicted)) {
bo_base = list_first_entry(&vm->evicted,
   struct amdgpu_vm_bo_base,
-  vm_status);
+  eviction_status);
spin_unlock(&vm->status_lock);
bo = bo_base->bo;
@@ -1034,7 +1039,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm,

Re: [PATCH 3/6] drm/amdgpu: Flush VM updates for split bindings eagerly.

2023-10-31 Thread Tatsuyuki Ishi




> On Oct 31, 2023, at 23:17, Bas Nieuwenhuizen  wrote:
> 
> 
> 
> On Tue, Oct 31, 2023 at 3:08 PM Christian König  
> wrote:
> Am 31.10.23 um 14:59 schrieb Bas Nieuwenhuizen:
>> 
>> 
>> On Tue, Oct 31, 2023 at 2:57 PM Christian König  
>> wrote:
>> Am 31.10.23 um 14:40 schrieb Tatsuyuki Ishi:
>> > The current amdgpu_gem_va_update_vm only tries to perform updates for the
>> > BO specified in the GEM ioctl; however, when a binding is split, the
>> > adjacent bindings also need to be updated. Such updates currently ends up
>> > getting deferred until next submission which causes stalls.
>> 
>> Yeah, that is a necessity. The hardware simply doesn't support what you 
>> try to do here in all cases.
>> 
>> What can the hardware not do here? Is this just needing to wait for TLB 
>> flushes before we can free pagetables, can we just delay that?
> 
> On some hardware generations (especially Navi1x, but also everything older 
> than Polaris) you can't invalidate the TLB while it is in use.
> 
> For Polaris and older it just means that you don't have a guarantee that the 
> shader can't access the memory any more. So delaying the free operation helps 
> here.
> 
> But for Navi1x it's a workaround for a hardware bug. If you try to invalidate 
> the TLB while it is in use you can potentially triggering memory accesses to 
> random addresses.
> 
> That's why we still delay TLB invalidation's to the next CS and use a new 
> VMID for each submission instead of invalidating the old one.
> 
> I'm currently working on changing that for Navi2x and newer (maybe Vega as 
> well), but this is something you can really only do on some hw generations 
> after validating that it works.
> 
> I think as long as we make sure all significant work gets done 
> asynchronously, doing the TLB flushing on the next submit (/submissions, one 
> per queue?) is fine for our purposes.

For a bit more of context, the performance / frame timing in Forza with just 
patch 5 wasn’t quite right. As Bas said, ideally we want to perform all the PT 
updates right away, and only defer the TLB flush.

For now the state machine part of this patch doesn’t seem to be going in the 
right direction so I’ll consider dropping this change.

Tatsuyuki.

> 
> (As an aside after thinking some more I *think* we also need some work to 
> make these maps/unmaps (VALID->PRT and PRT->VALID) atomic, as I think it is 
> valid Vulkan to make these race. As such I'm speculating we'd need a bit more 
> reworking there too, not just a late free of the lower level pagetables)
> 
> - Bas 
> 
> Regards,
> Christian. 
> 
>>  
>> 
>> So this approach won't work in general.
>> 
>> Regards,
>> Christian.
>> 
>> >
>> > Introduce a new state "dirty", shared between per-VM BOs and traditional
>> > BOs, containing all BOs that have pending updates in `invalids`.
>> > amdgpu_gem_va_update_vm will now simply flush any pending updates for BOs
>> > in the dirty state.
>> >
>> > Signed-off-by: Tatsuyuki Ishi 
>> > ---
>> >   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 18 ---
>> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 66 ++---
>> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  3 ++
>> >   3 files changed, 63 insertions(+), 24 deletions(-)
>> >
>> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
>> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> > index a1b15d0d6c48..01d3a97248b0 100644
>> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> > @@ -604,10 +604,9 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, 
>> > void *data,
>> >* vital here, so they are not reported back to userspace.
>> >*/
>> >   static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
>> > - struct amdgpu_vm *vm,
>> > - struct amdgpu_bo_va *bo_va,
>> > - uint32_t operation)
>> > + struct amdgpu_vm *vm)
>> >   {
>> > + struct amdgpu_bo_va *bo_va;
>> >   int r;
>> >   
>> >   if (!amdgpu_vm_ready(vm))
>> > @@ -617,12 +616,18 @@ static void amdgpu_gem_va_update_vm(struct 
>> > amdgpu_device *adev,
>> >   if (r)
>> >   goto error;
>> >   
>> > - if (operation == AMDGPU_VA_OP_MAP ||
>> > - operation == AMDGPU_VA_OP_REPLACE) {
>> > + spin_lock(&vm->status_lock);
>> > + while (!list_empty(&vm->dirty)) {
>> > + bo_va = list_first_entry(&vm->dirty, struct amdgpu_bo_va,
>> > +  base.vm_status);
>> > + spin_unlock(&vm->status_lock);
>> > +
>> >   r = amdgpu_vm_bo_update(adev, bo_va, false);
>> >   if (r)
>> >   goto error;
>> > + spin_lock(&vm->status_lock);
>> >   }
>> > + spin_unlock(&vm->status_lock);
>> >   
>> >   r = amdgpu_vm_update_pdes(adev, vm, false);
>> >   
>> > @@ -792,8 +797,7 @@ int amdgpu_gem_va_ioctl(struct drm_d

Re: [PATCH 2/6] drm/amdgpu: Separate eviction from VM status.

2023-10-31 Thread Tatsuyuki Ishi




> On Oct 31, 2023, at 22:55, Christian König  wrote:
> 
> Am 31.10.23 um 14:40 schrieb Tatsuyuki Ishi:
>> In short, eviction never really belonged to the vm_status state machine.
> 
> I strongly disagree to that.
> 
>> Even when evicted, the BO could belong to either the moved or done state.
>> The "evicted" state needed to handle both cases, causing greater confusion.
>> 
>> Additionally, there were inconsistencies in the definition of an evicted
>> BO. Some places are based on the `evict` parameter passed from the TTM move
>> callback, while the others were updated based on whether the BO got its
>> optimal placement. The second is more accurate for our use case. With this
>> refactor, the evicted state is solely determined by the second rule.
> 
> That strongly sounds like you don't understand what the evicted state it good 
> for.
> 
> The evicted state is for page directories, page tables and per VM BOs which 
> needs to move around before doing the next CS.
> 
> Please further explain what you try to do here.

This is mainly an attempt to address inconsistency in the definition of 
“eviction”. The TTM move callback sets eviction when eviction happens through 
ttm_bo_evict. This is however not the only way a BO might end up outside its 
preferred domains.

amdgpu_vm_bo_update later updates the eviction state based on whether the BO is 
in its preferred domains. In my understanding this includes all cases where the 
BO is evicted through ttm_bo_evict. Therefore we should apply this definition 
right from the move callback, not only after amdgpu_vm_bo_update has been 
called at least once.

Tatsuyuki.

> 
> Regards,
> Christian.
> 
>> 
>> Signed-off-by: Tatsuyuki Ishi 
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 67 +--
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|  1 +
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c |  1 +
>>  3 files changed, 29 insertions(+), 40 deletions(-)
>> 
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 7b9762f1cddd..dd6f72e2a1d6 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -174,19 +174,23 @@ int amdgpu_vm_set_pasid(struct amdgpu_device *adev, 
>> struct amdgpu_vm *vm,
>>   * State for PDs/PTs and per VM BOs which are not at the location they 
>> should
>>   * be.
>>   */
>> -static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
>> +static void amdgpu_vm_bo_set_evicted(struct amdgpu_vm_bo_base *vm_bo, bool 
>> evicted)
>>  {
>>  struct amdgpu_vm *vm = vm_bo->vm;
>>  struct amdgpu_bo *bo = vm_bo->bo;
>>  -   vm_bo->moved = true;
>>  spin_lock(&vm_bo->vm->status_lock);
>> -if (bo->tbo.type == ttm_bo_type_kernel)
>> -list_move(&vm_bo->vm_status, &vm->evicted);
>> -else
>> -list_move_tail(&vm_bo->vm_status, &vm->evicted);
>> +if (evicted && bo->tbo.base.resv == vm->root.bo->tbo.base.resv) {
>> +if (bo->tbo.type == ttm_bo_type_kernel)
>> +list_move(&vm_bo->eviction_status, &vm->evicted);
>> +else
>> +list_move_tail(&vm_bo->eviction_status, &vm->evicted);
>> +} else {
>> +list_del_init(&vm_bo->eviction_status);
>> +}
>>  spin_unlock(&vm_bo->vm->status_lock);
>>  }
>> +
>>  /**
>>   * amdgpu_vm_bo_moved - vm_bo is moved
>>   *
>> @@ -310,6 +314,7 @@ void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base 
>> *base,
>>  base->bo = bo;
>>  base->next = NULL;
>>  INIT_LIST_HEAD(&base->vm_status);
>> +INIT_LIST_HEAD(&base->eviction_status);
>>  if (!bo)
>>  return;
>> @@ -336,7 +341,7 @@ void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base 
>> *base,
>>   * is currently evicted. add the bo to the evicted list to make sure it
>>   * is validated on next vm use to avoid fault.
>>   * */
>> -amdgpu_vm_bo_evicted(base);
>> +amdgpu_vm_bo_set_evicted(base, true);
>>  }
>>/**
>> @@ -460,7 +465,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device 
>> *adev, struct amdgpu_vm *vm,
>>  while (!list_empty(&vm->evicted)) {
>>  bo_base = list_first_entry(&vm->evicted,
>> struct amdgpu_vm_bo_base,
>> -   vm_status);
>> +   eviction_status);
>>  spin_unlock(&vm->status_lock);
>>  bo = bo_base->bo;
>> @@ -1034,7 +1039,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
>>  list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status)
>>  amdgpu_vm_bo_get_memory(bo_va, stats);
>>  -   list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status)
>> +list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.eviction_status)
>>  amdgpu_vm_bo_get_memory(bo_va, stats);
>>  list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status)
>> @@ -1153,21 +1158,10

Re: [PATCH 5/6] drm/amdgpu: Add flag to disable implicit sync for GEM operations.

2023-10-31 Thread Christian König




Am 31.10.23 um 15:14 schrieb Michel Dänzer:

On 10/31/23 14:40, Tatsuyuki Ishi wrote:

In Vulkan, it is the application's responsibility to perform adequate
synchronization before a sparse unmap, replace or BO destroy operation.
Until now, the kernel applied the same rule as implicitly-synchronized
APIs like OpenGL, which with per-VM BOs made page table updates stall the
queue completely. The newly added AMDGPU_VM_EXPLICIT_SYNC flag allows
drivers to opt-out of this behavior, while still ensuring adequate implicit
sync happens for kernel-initiated updates (e.g. BO moves).

We record whether to use implicit sync or not for each freed mapping. To
avoid increasing the mapping struct's size, this is union-ized with the
interval tree field which is unused after the unmap.

The reason this is done with a GEM ioctl flag, instead of being a VM /
context global setting, is that the current libdrm implementation shares
the DRM handle even between different kind of drivers (radeonsi vs radv).

Different drivers always use separate contexts though, even with the same DRM 
file description, don't they?


Separate contexts don't help here since the VA space is shared between 
the two.




FWIW, RADV will also want explicit sync in the CS ioctl.
You can replace that with the DMA-buf IOCTLs like Faith is planning to 
do for NVK. Regards, Christian.

Re: [PATCH 5/6] drm/amdgpu: Add flag to disable implicit sync for GEM operations.

2023-10-31 Thread Bas Nieuwenhuizen

On Tue, Oct 31, 2023 at 3:14 PM Michel Dänzer 
wrote:

> On 10/31/23 14:40, Tatsuyuki Ishi wrote:
> > In Vulkan, it is the application's responsibility to perform adequate
> > synchronization before a sparse unmap, replace or BO destroy operation.
> > Until now, the kernel applied the same rule as implicitly-synchronized
> > APIs like OpenGL, which with per-VM BOs made page table updates stall the
> > queue completely. The newly added AMDGPU_VM_EXPLICIT_SYNC flag allows
> > drivers to opt-out of this behavior, while still ensuring adequate
> implicit
> > sync happens for kernel-initiated updates (e.g. BO moves).
> >
> > We record whether to use implicit sync or not for each freed mapping. To
> > avoid increasing the mapping struct's size, this is union-ized with the
> > interval tree field which is unused after the unmap.
> >
> > The reason this is done with a GEM ioctl flag, instead of being a VM /
> > context global setting, is that the current libdrm implementation shares
> > the DRM handle even between different kind of drivers (radeonsi vs radv).
>
> Different drivers always use separate contexts though, even with the same
> DRM file description, don't they?
>
> FWIW, RADV will also want explicit sync in the CS ioctl.
>
> I think a crucial problem is that VA ioctls don't take a context so a
per-context flag doesn't solve this (the previous attempt used it because
all the sync changes were on the CS submit side and not the VA ioctl side)
. So I'd still like to solve that side for RADV, but I think the VA ioctl
flag makes sense here if we need to do anything different VA ioctl wise.


> --
> Earthling Michel Dänzer|  https://redhat.com
> Libre software enthusiast  | Mesa and Xwayland developer
>
>

Re: [PATCH 3/6] drm/amdgpu: Flush VM updates for split bindings eagerly.

2023-10-31 Thread Bas Nieuwenhuizen

On Tue, Oct 31, 2023 at 3:08 PM Christian König 
wrote:

> Am 31.10.23 um 14:59 schrieb Bas Nieuwenhuizen:
>
>
>
> On Tue, Oct 31, 2023 at 2:57 PM Christian König 
> wrote:
>
>> Am 31.10.23 um 14:40 schrieb Tatsuyuki Ishi:
>> > The current amdgpu_gem_va_update_vm only tries to perform updates for
>> the
>> > BO specified in the GEM ioctl; however, when a binding is split, the
>> > adjacent bindings also need to be updated. Such updates currently ends
>> up
>> > getting deferred until next submission which causes stalls.
>>
>> Yeah, that is a necessity. The hardware simply doesn't support what you
>> try to do here in all cases.
>>
>
> What can the hardware not do here? Is this just needing to wait for TLB
> flushes before we can free pagetables, can we just delay that?
>
>
> On some hardware generations (especially Navi1x, but also everything older
> than Polaris) you can't invalidate the TLB while it is in use.
>
> For Polaris and older it just means that you don't have a guarantee that
> the shader can't access the memory any more. So delaying the free operation
> helps here.
>
> But for Navi1x it's a workaround for a hardware bug. If you try to
> invalidate the TLB while it is in use you can potentially triggering memory
> accesses to random addresses.
>
> That's why we still delay TLB invalidation's to the next CS and use a new
> VMID for each submission instead of invalidating the old one.
>
> I'm currently working on changing that for Navi2x and newer (maybe Vega as
> well), but this is something you can really only do on some hw generations
> after validating that it works.
>

I think as long as we make sure all significant work gets done
asynchronously, doing the TLB flushing on the next submit (/submissions,
one per queue?) is fine for our purposes.

(As an aside after thinking some more I *think* we also need some work to
make these maps/unmaps (VALID->PRT and PRT->VALID) atomic, as I think it is
valid Vulkan to make these race. As such I'm speculating we'd need a bit
more reworking there too, not just a late free of the lower level
pagetables)

- Bas

>
> Regards,
> Christian.
>
>
>
>>
>> So this approach won't work in general.
>>
>> Regards,
>> Christian.
>>
>> >
>> > Introduce a new state "dirty", shared between per-VM BOs and traditional
>> > BOs, containing all BOs that have pending updates in `invalids`.
>> > amdgpu_gem_va_update_vm will now simply flush any pending updates for
>> BOs
>> > in the dirty state.
>> >
>> > Signed-off-by: Tatsuyuki Ishi 
>> > ---
>> >   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 18 ---
>> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 66 ++---
>> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  3 ++
>> >   3 files changed, 63 insertions(+), 24 deletions(-)
>> >
>> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> > index a1b15d0d6c48..01d3a97248b0 100644
>> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> > @@ -604,10 +604,9 @@ int amdgpu_gem_metadata_ioctl(struct drm_device
>> *dev, void *data,
>> >* vital here, so they are not reported back to userspace.
>> >*/
>> >   static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
>> > - struct amdgpu_vm *vm,
>> > - struct amdgpu_bo_va *bo_va,
>> > - uint32_t operation)
>> > + struct amdgpu_vm *vm)
>> >   {
>> > + struct amdgpu_bo_va *bo_va;
>> >   int r;
>> >
>> >   if (!amdgpu_vm_ready(vm))
>> > @@ -617,12 +616,18 @@ static void amdgpu_gem_va_update_vm(struct
>> amdgpu_device *adev,
>> >   if (r)
>> >   goto error;
>> >
>> > - if (operation == AMDGPU_VA_OP_MAP ||
>> > - operation == AMDGPU_VA_OP_REPLACE) {
>> > + spin_lock(&vm->status_lock);
>> > + while (!list_empty(&vm->dirty)) {
>> > + bo_va = list_first_entry(&vm->dirty, struct amdgpu_bo_va,
>> > +  base.vm_status);
>> > + spin_unlock(&vm->status_lock);
>> > +
>> >   r = amdgpu_vm_bo_update(adev, bo_va, false);
>> >   if (r)
>> >   goto error;
>> > + spin_lock(&vm->status_lock);
>> >   }
>> > + spin_unlock(&vm->status_lock);
>> >
>> >   r = amdgpu_vm_update_pdes(adev, vm, false);
>> >
>> > @@ -792,8 +797,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>> void *data,
>> >   break;
>> >   }
>> >   if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) &&
>> !amdgpu_vm_debug)
>> > - amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
>> > - args->operation);
>> > + amdgpu_gem_va_update_vm(adev, &fpriv->vm);
>> >
>> >   error:
>> >   drm_exec_fini(&exec);
>> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu

Re: [PATCH 5/6] drm/amdgpu: Add flag to disable implicit sync for GEM operations.

2023-10-31 Thread Michel Dänzer

On 10/31/23 14:40, Tatsuyuki Ishi wrote:
> In Vulkan, it is the application's responsibility to perform adequate
> synchronization before a sparse unmap, replace or BO destroy operation.
> Until now, the kernel applied the same rule as implicitly-synchronized
> APIs like OpenGL, which with per-VM BOs made page table updates stall the
> queue completely. The newly added AMDGPU_VM_EXPLICIT_SYNC flag allows
> drivers to opt-out of this behavior, while still ensuring adequate implicit
> sync happens for kernel-initiated updates (e.g. BO moves).
> 
> We record whether to use implicit sync or not for each freed mapping. To
> avoid increasing the mapping struct's size, this is union-ized with the
> interval tree field which is unused after the unmap.
> 
> The reason this is done with a GEM ioctl flag, instead of being a VM /
> context global setting, is that the current libdrm implementation shares
> the DRM handle even between different kind of drivers (radeonsi vs radv).

Different drivers always use separate contexts though, even with the same DRM 
file description, don't they?

FWIW, RADV will also want explicit sync in the CS ioctl.


-- 
Earthling Michel Dänzer|  https://redhat.com
Libre software enthusiast  | Mesa and Xwayland developer

Re: [PATCH 3/6] drm/amdgpu: Flush VM updates for split bindings eagerly.

2023-10-31 Thread Christian König


Am 31.10.23 um 14:59 schrieb Bas Nieuwenhuizen:



On Tue, Oct 31, 2023 at 2:57 PM Christian König 
 wrote:


Am 31.10.23 um 14:40 schrieb Tatsuyuki Ishi:
> The current amdgpu_gem_va_update_vm only tries to perform
updates for the
> BO specified in the GEM ioctl; however, when a binding is split, the
> adjacent bindings also need to be updated. Such updates
currently ends up
> getting deferred until next submission which causes stalls.

Yeah, that is a necessity. The hardware simply doesn't support
what you
try to do here in all cases.


What can the hardware not do here? Is this just needing to wait for 
TLB flushes before we can free pagetables, can we just delay that?


On some hardware generations (especially Navi1x, but also everything 
older than Polaris) you can't invalidate the TLB while it is in use.


For Polaris and older it just means that you don't have a guarantee that 
the shader can't access the memory any more. So delaying the free 
operation helps here.


But for Navi1x it's a workaround for a hardware bug. If you try to 
invalidate the TLB while it is in use you can potentially triggering 
memory accesses to random addresses.


That's why we still delay TLB invalidation's to the next CS and use a 
new VMID for each submission instead of invalidating the old one.


I'm currently working on changing that for Navi2x and newer (maybe Vega 
as well), but this is something you can really only do on some hw 
generations after validating that it works.


Regards,
Christian.




So this approach won't work in general.

Regards,
Christian.

>
> Introduce a new state "dirty", shared between per-VM BOs and
traditional
> BOs, containing all BOs that have pending updates in `invalids`.
> amdgpu_gem_va_update_vm will now simply flush any pending
updates for BOs
> in the dirty state.
>
> Signed-off-by: Tatsuyuki Ishi 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 18 ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 66
++---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  3 ++
>   3 files changed, 63 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index a1b15d0d6c48..01d3a97248b0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -604,10 +604,9 @@ int amdgpu_gem_metadata_ioctl(struct
drm_device *dev, void *data,
>    * vital here, so they are not reported back to userspace.
>    */
>   static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
> -                                 struct amdgpu_vm *vm,
> -                                 struct amdgpu_bo_va *bo_va,
> -                                 uint32_t operation)
> +                                 struct amdgpu_vm *vm)
>   {
> +     struct amdgpu_bo_va *bo_va;
>       int r;
>
>       if (!amdgpu_vm_ready(vm))
> @@ -617,12 +616,18 @@ static void amdgpu_gem_va_update_vm(struct
amdgpu_device *adev,
>       if (r)
>               goto error;
>
> -     if (operation == AMDGPU_VA_OP_MAP ||
> -         operation == AMDGPU_VA_OP_REPLACE) {
> +     spin_lock(&vm->status_lock);
> +     while (!list_empty(&vm->dirty)) {
> +             bo_va = list_first_entry(&vm->dirty, struct
amdgpu_bo_va,
> +                                      base.vm_status);
> +             spin_unlock(&vm->status_lock);
> +
>               r = amdgpu_vm_bo_update(adev, bo_va, false);
>               if (r)
>                       goto error;
> +             spin_lock(&vm->status_lock);
>       }
> +     spin_unlock(&vm->status_lock);
>
>       r = amdgpu_vm_update_pdes(adev, vm, false);
>
> @@ -792,8 +797,7 @@ int amdgpu_gem_va_ioctl(struct drm_device
*dev, void *data,
>               break;
>       }
>       if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) &&
!amdgpu_vm_debug)
> -             amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
> -  args->operation);
> +             amdgpu_gem_va_update_vm(adev, &fpriv->vm);
>
>   error:
>       drm_exec_fini(&exec);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index dd6f72e2a1d6..01d31891cd05 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -191,6 +191,21 @@ static void amdgpu_vm_bo_set_evicted(struct
amdgpu_vm_bo_base *vm_bo, bool evict
>       spin_unlock(&vm_bo->vm->status_lock);
>   }
>
> +/**
> + * amdgpu_vm_bo_dirty - vm_bo is dirty
> + *
> + * @vm_bo: vm_bo which is dirty
> + *
> + * State for normal and per VM BOs that are not moved, but have
new entries

Re: [PATCH 4/6] drm/amdgpu: Remove redundant state change after validation.

2023-10-31 Thread Christian König


Am 31.10.23 um 14:40 schrieb Tatsuyuki Ishi:

All the state changes are handled in the TTM move callback; doing it again
here just leads to more confusion.


The state move here is because we need to track which PDs/PTs are 
already validated and which have new locations reflected in the PDEs.


With this change here you will sooner or later run into PDE corruption.



The table update remains here because it needs to be done exactly once,
while doing it in the move callback will result it getting triggered twice,
once by the actual BO and once by the shadow BO.


The table update isn't done in the move callback because you can't take 
the appropriate locks there.


Regards,
Christian.




Signed-off-by: Tatsuyuki Ishi 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 7 ++-
  1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 01d31891cd05..50f7cee639ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -495,12 +495,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
return r;
}
  
-		if (bo->tbo.type != ttm_bo_type_kernel) {

-   amdgpu_vm_bo_moved(bo_base);
-   } else {
+   if (bo->tbo.type == ttm_bo_type_kernel)
vm->update_funcs->map_table(to_amdgpu_bo_vm(bo));
-   amdgpu_vm_bo_relocated(bo_base);
-   }
+
spin_lock(&vm->status_lock);
}
spin_unlock(&vm->status_lock);

Re: [PATCH 3/6] drm/amdgpu: Flush VM updates for split bindings eagerly.

2023-10-31 Thread Bas Nieuwenhuizen

On Tue, Oct 31, 2023 at 2:57 PM Christian König 
wrote:

> Am 31.10.23 um 14:40 schrieb Tatsuyuki Ishi:
> > The current amdgpu_gem_va_update_vm only tries to perform updates for the
> > BO specified in the GEM ioctl; however, when a binding is split, the
> > adjacent bindings also need to be updated. Such updates currently ends up
> > getting deferred until next submission which causes stalls.
>
> Yeah, that is a necessity. The hardware simply doesn't support what you
> try to do here in all cases.
>

What can the hardware not do here? Is this just needing to wait for TLB
flushes before we can free pagetables, can we just delay that?


>
> So this approach won't work in general.
>
> Regards,
> Christian.
>
> >
> > Introduce a new state "dirty", shared between per-VM BOs and traditional
> > BOs, containing all BOs that have pending updates in `invalids`.
> > amdgpu_gem_va_update_vm will now simply flush any pending updates for BOs
> > in the dirty state.
> >
> > Signed-off-by: Tatsuyuki Ishi 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 18 ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 66 ++---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  3 ++
> >   3 files changed, 63 insertions(+), 24 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> > index a1b15d0d6c48..01d3a97248b0 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> > @@ -604,10 +604,9 @@ int amdgpu_gem_metadata_ioctl(struct drm_device
> *dev, void *data,
> >* vital here, so they are not reported back to userspace.
> >*/
> >   static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
> > - struct amdgpu_vm *vm,
> > - struct amdgpu_bo_va *bo_va,
> > - uint32_t operation)
> > + struct amdgpu_vm *vm)
> >   {
> > + struct amdgpu_bo_va *bo_va;
> >   int r;
> >
> >   if (!amdgpu_vm_ready(vm))
> > @@ -617,12 +616,18 @@ static void amdgpu_gem_va_update_vm(struct
> amdgpu_device *adev,
> >   if (r)
> >   goto error;
> >
> > - if (operation == AMDGPU_VA_OP_MAP ||
> > - operation == AMDGPU_VA_OP_REPLACE) {
> > + spin_lock(&vm->status_lock);
> > + while (!list_empty(&vm->dirty)) {
> > + bo_va = list_first_entry(&vm->dirty, struct amdgpu_bo_va,
> > +  base.vm_status);
> > + spin_unlock(&vm->status_lock);
> > +
> >   r = amdgpu_vm_bo_update(adev, bo_va, false);
> >   if (r)
> >   goto error;
> > + spin_lock(&vm->status_lock);
> >   }
> > + spin_unlock(&vm->status_lock);
> >
> >   r = amdgpu_vm_update_pdes(adev, vm, false);
> >
> > @@ -792,8 +797,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void
> *data,
> >   break;
> >   }
> >   if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) &&
> !amdgpu_vm_debug)
> > - amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
> > - args->operation);
> > + amdgpu_gem_va_update_vm(adev, &fpriv->vm);
> >
> >   error:
> >   drm_exec_fini(&exec);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > index dd6f72e2a1d6..01d31891cd05 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > @@ -191,6 +191,21 @@ static void amdgpu_vm_bo_set_evicted(struct
> amdgpu_vm_bo_base *vm_bo, bool evict
> >   spin_unlock(&vm_bo->vm->status_lock);
> >   }
> >
> > +/**
> > + * amdgpu_vm_bo_dirty - vm_bo is dirty
> > + *
> > + * @vm_bo: vm_bo which is dirty
> > + *
> > + * State for normal and per VM BOs that are not moved, but have new
> entries in
> > + * bo_va->invalids.
> > + */
> > +static void amdgpu_vm_bo_dirty(struct amdgpu_vm_bo_base *vm_bo)
> > +{
> > + spin_lock(&vm_bo->vm->status_lock);
> > + list_move(&vm_bo->vm_status, &vm_bo->vm->dirty);
> > + spin_unlock(&vm_bo->vm->status_lock);
> > +}
> > +
> >   /**
> >* amdgpu_vm_bo_moved - vm_bo is moved
> >*
> > @@ -1042,6 +1057,9 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
> >   list_for_each_entry_safe(bo_va, tmp, &vm->evicted,
> base.eviction_status)
> >   amdgpu_vm_bo_get_memory(bo_va, stats);
> >
> > + list_for_each_entry_safe(bo_va, tmp, &vm->dirty, base.vm_status)
> > + amdgpu_vm_bo_get_memory(bo_va, stats);
> > +
> >   list_for_each_entry_safe(bo_va, tmp, &vm->relocated,
> base.vm_status)
> >   amdgpu_vm_bo_get_memory(bo_va, stats);
> >
> > @@ -1411,6 +1429,17 @@ int amdgpu_vm_handle_moved(struct amdgpu_device
> *adev,
> >   dma_resv_unlock(resv);
> >   spin_lock(&vm->status_lock);
> >   }
> > +
> > + while (!

Re: [PATCH 3/6] drm/amdgpu: Flush VM updates for split bindings eagerly.

2023-10-31 Thread Christian König


Am 31.10.23 um 14:40 schrieb Tatsuyuki Ishi:

The current amdgpu_gem_va_update_vm only tries to perform updates for the
BO specified in the GEM ioctl; however, when a binding is split, the
adjacent bindings also need to be updated. Such updates currently ends up
getting deferred until next submission which causes stalls.


Yeah, that is a necessity. The hardware simply doesn't support what you 
try to do here in all cases.


So this approach won't work in general.

Regards,
Christian.



Introduce a new state "dirty", shared between per-VM BOs and traditional
BOs, containing all BOs that have pending updates in `invalids`.
amdgpu_gem_va_update_vm will now simply flush any pending updates for BOs
in the dirty state.

Signed-off-by: Tatsuyuki Ishi 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 18 ---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 66 ++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  3 ++
  3 files changed, 63 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index a1b15d0d6c48..01d3a97248b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -604,10 +604,9 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void 
*data,
   * vital here, so they are not reported back to userspace.
   */
  static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
-   struct amdgpu_vm *vm,
-   struct amdgpu_bo_va *bo_va,
-   uint32_t operation)
+   struct amdgpu_vm *vm)
  {
+   struct amdgpu_bo_va *bo_va;
int r;
  
  	if (!amdgpu_vm_ready(vm))

@@ -617,12 +616,18 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device 
*adev,
if (r)
goto error;
  
-	if (operation == AMDGPU_VA_OP_MAP ||

-   operation == AMDGPU_VA_OP_REPLACE) {
+   spin_lock(&vm->status_lock);
+   while (!list_empty(&vm->dirty)) {
+   bo_va = list_first_entry(&vm->dirty, struct amdgpu_bo_va,
+base.vm_status);
+   spin_unlock(&vm->status_lock);
+
r = amdgpu_vm_bo_update(adev, bo_va, false);
if (r)
goto error;
+   spin_lock(&vm->status_lock);
}
+   spin_unlock(&vm->status_lock);
  
  	r = amdgpu_vm_update_pdes(adev, vm, false);
  
@@ -792,8 +797,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,

break;
}
if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug)
-   amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
-   args->operation);
+   amdgpu_gem_va_update_vm(adev, &fpriv->vm);
  
  error:

drm_exec_fini(&exec);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index dd6f72e2a1d6..01d31891cd05 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -191,6 +191,21 @@ static void amdgpu_vm_bo_set_evicted(struct 
amdgpu_vm_bo_base *vm_bo, bool evict
spin_unlock(&vm_bo->vm->status_lock);
  }
  
+/**

+ * amdgpu_vm_bo_dirty - vm_bo is dirty
+ *
+ * @vm_bo: vm_bo which is dirty
+ *
+ * State for normal and per VM BOs that are not moved, but have new entries in
+ * bo_va->invalids.
+ */
+static void amdgpu_vm_bo_dirty(struct amdgpu_vm_bo_base *vm_bo)
+{
+   spin_lock(&vm_bo->vm->status_lock);
+   list_move(&vm_bo->vm_status, &vm_bo->vm->dirty);
+   spin_unlock(&vm_bo->vm->status_lock);
+}
+
  /**
   * amdgpu_vm_bo_moved - vm_bo is moved
   *
@@ -1042,6 +1057,9 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.eviction_status)
amdgpu_vm_bo_get_memory(bo_va, stats);
  
+	list_for_each_entry_safe(bo_va, tmp, &vm->dirty, base.vm_status)

+   amdgpu_vm_bo_get_memory(bo_va, stats);
+
list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status)
amdgpu_vm_bo_get_memory(bo_va, stats);
  
@@ -1411,6 +1429,17 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,

dma_resv_unlock(resv);
spin_lock(&vm->status_lock);
}
+
+   while (!list_empty(&vm->dirty)) {
+   bo_va = list_first_entry(&vm->dirty, struct amdgpu_bo_va,
+base.vm_status);
+   spin_unlock(&vm->status_lock);
+
+   r = amdgpu_vm_bo_update(adev, bo_va, false);
+   if (r)
+   return r;
+   spin_lock(&vm->status_lock);
+   }
spin_unlock(&vm->status_lock);
  
  	return 0;

@@ -1476,19 +1505,16 @@ static void amdgpu_vm_bo_insert_map(struct 
amdgpu_device *adev,

RE: [PATCH v2] drm/amd/swsmu: update smu v14_0_0 driver if and metrics table

2023-10-31 Thread Deucher, Alexander

[AMD Official Use Only - General]

> -Original Message-
> From: amd-gfx  On Behalf Of Li Ma
> Sent: Monday, October 30, 2023 6:55 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander ; Zhang, Yifan
> ; Feng, Kenneth ; Ma, Li
> ; Du, Xiaojian 
> Subject: [PATCH v2] drm/amd/swsmu: update smu v14_0_0 driver if and
> metrics table
>
> Update driver if headers and metrics table in smu v14_0_0 after smu fw
> promotion. And drop the legacy metrics table.
> v1:
> update header files
> v2:
> drop legacy metrics table and add warning of checking pmfw version.
>
> Signed-off-by: Li Ma 

Acked-by: Alex Deucher

Re: [PATCH 2/6] drm/amdgpu: Separate eviction from VM status.

2023-10-31 Thread Christian König


Am 31.10.23 um 14:40 schrieb Tatsuyuki Ishi:

In short, eviction never really belonged to the vm_status state machine.


I strongly disagree to that.


Even when evicted, the BO could belong to either the moved or done state.
The "evicted" state needed to handle both cases, causing greater confusion.

Additionally, there were inconsistencies in the definition of an evicted
BO. Some places are based on the `evict` parameter passed from the TTM move
callback, while the others were updated based on whether the BO got its
optimal placement. The second is more accurate for our use case. With this
refactor, the evicted state is solely determined by the second rule.


That strongly sounds like you don't understand what the evicted state it 
good for.


The evicted state is for page directories, page tables and per VM BOs 
which needs to move around before doing the next CS.


Please further explain what you try to do here.

Regards,
Christian.



Signed-off-by: Tatsuyuki Ishi 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 67 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c |  1 +
  3 files changed, 29 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 7b9762f1cddd..dd6f72e2a1d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -174,19 +174,23 @@ int amdgpu_vm_set_pasid(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
   * State for PDs/PTs and per VM BOs which are not at the location they should
   * be.
   */
-static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
+static void amdgpu_vm_bo_set_evicted(struct amdgpu_vm_bo_base *vm_bo, bool 
evicted)
  {
struct amdgpu_vm *vm = vm_bo->vm;
struct amdgpu_bo *bo = vm_bo->bo;
  
-	vm_bo->moved = true;

spin_lock(&vm_bo->vm->status_lock);
-   if (bo->tbo.type == ttm_bo_type_kernel)
-   list_move(&vm_bo->vm_status, &vm->evicted);
-   else
-   list_move_tail(&vm_bo->vm_status, &vm->evicted);
+   if (evicted && bo->tbo.base.resv == vm->root.bo->tbo.base.resv) {
+   if (bo->tbo.type == ttm_bo_type_kernel)
+   list_move(&vm_bo->eviction_status, &vm->evicted);
+   else
+   list_move_tail(&vm_bo->eviction_status, &vm->evicted);
+   } else {
+   list_del_init(&vm_bo->eviction_status);
+   }
spin_unlock(&vm_bo->vm->status_lock);
  }
+
  /**
   * amdgpu_vm_bo_moved - vm_bo is moved
   *
@@ -310,6 +314,7 @@ void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
base->bo = bo;
base->next = NULL;
INIT_LIST_HEAD(&base->vm_status);
+   INIT_LIST_HEAD(&base->eviction_status);
  
  	if (!bo)

return;
@@ -336,7 +341,7 @@ void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
 * is currently evicted. add the bo to the evicted list to make sure it
 * is validated on next vm use to avoid fault.
 * */
-   amdgpu_vm_bo_evicted(base);
+   amdgpu_vm_bo_set_evicted(base, true);
  }
  
  /**

@@ -460,7 +465,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
while (!list_empty(&vm->evicted)) {
bo_base = list_first_entry(&vm->evicted,
   struct amdgpu_vm_bo_base,
-  vm_status);
+  eviction_status);
spin_unlock(&vm->status_lock);
  
  		bo = bo_base->bo;

@@ -1034,7 +1039,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status)
amdgpu_vm_bo_get_memory(bo_va, stats);
  
-	list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status)

+   list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.eviction_status)
amdgpu_vm_bo_get_memory(bo_va, stats);
  
  	list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status)

@@ -1153,21 +1158,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, 
struct amdgpu_bo_va *bo_va,
return r;
}
  
-	/* If the BO is not in its preferred location add it back to

-* the evicted list so that it gets validated again on the
-* next command submission.
-*/
-   if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv) {
-   uint32_t mem_type = bo->tbo.resource->mem_type;
-
-   if (!(bo->preferred_domains &
- amdgpu_mem_type_to_domain(mem_type)))
-   amdgpu_vm_bo_evicted(&bo_va->base);
-   else
-   amdgpu_vm_bo_idle(&bo_va->base);
-   } else {
+   if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv)
+   amdgpu_vm_bo_idle(&bo_va->base);
+

[PATCH 6/6] drm/amdgpu: Bump amdgpu driver version.

2023-10-31 Thread Tatsuyuki Ishi

For detection of the new explicit sync functionality without having to try
the ioctl.

Signed-off-by: Tatsuyuki Ishi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 81edf66dbea8..2aa406dee192 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -113,9 +113,10 @@
  *gl1c_cache_size, gl2c_cache_size, mall_size, 
enabled_rb_pipes_mask_hi
  *   3.53.0 - Support for GFX11 CP GFX shadowing
  *   3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support
+ * - 3.55.0 - Add AMDGPU_VM_EXPLICIT_SYNC flag for GEM operations.
  */
 #define KMS_DRIVER_MAJOR   3
-#define KMS_DRIVER_MINOR   54
+#define KMS_DRIVER_MINOR   55
 #define KMS_DRIVER_PATCHLEVEL  0
 
 unsigned int amdgpu_vram_limit = UINT_MAX;
-- 
2.42.0

[PATCH 5/6] drm/amdgpu: Add flag to disable implicit sync for GEM operations.

2023-10-31 Thread Tatsuyuki Ishi

In Vulkan, it is the application's responsibility to perform adequate
synchronization before a sparse unmap, replace or BO destroy operation.
Until now, the kernel applied the same rule as implicitly-synchronized
APIs like OpenGL, which with per-VM BOs made page table updates stall the
queue completely. The newly added AMDGPU_VM_EXPLICIT_SYNC flag allows
drivers to opt-out of this behavior, while still ensuring adequate implicit
sync happens for kernel-initiated updates (e.g. BO moves).

We record whether to use implicit sync or not for each freed mapping. To
avoid increasing the mapping struct's size, this is union-ized with the
interval tree field which is unused after the unmap.

The reason this is done with a GEM ioctl flag, instead of being a VM /
context global setting, is that the current libdrm implementation shares
the DRM handle even between different kind of drivers (radeonsi vs radv).

Signed-off-by: Tatsuyuki Ishi 
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   | 14 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h|  7 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h |  6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 55 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h| 23 
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c  | 18 +++---
 include/uapi/drm/amdgpu_drm.h |  2 +
 9 files changed, 74 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 7d6daf8d2bfa..10e129bff977 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1196,7 +1196,7 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem,
struct amdgpu_device *adev = entry->adev;
struct amdgpu_vm *vm = bo_va->base.vm;
 
-   amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
+   amdgpu_vm_bo_unmap(adev, bo_va, entry->va, true);
 
amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index 720011019741..612279e65bff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -122,7 +122,7 @@ int amdgpu_unmap_static_csa(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
}
}
 
-   r = amdgpu_vm_bo_unmap(adev, bo_va, csa_addr);
+   r = amdgpu_vm_bo_unmap(adev, bo_va, csa_addr, true);
if (r) {
DRM_ERROR("failed to do bo_unmap on static CSA, err=%d\n", r);
goto error;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 01d3a97248b0..0d9496a06947 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -672,9 +672,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
const uint32_t valid_flags = AMDGPU_VM_DELAY_UPDATE |
AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_MASK |
-   AMDGPU_VM_PAGE_NOALLOC;
+   AMDGPU_VM_PAGE_NOALLOC | AMDGPU_VM_EXPLICIT_SYNC;
const uint32_t prt_flags = AMDGPU_VM_DELAY_UPDATE |
-   AMDGPU_VM_PAGE_PRT;
+   AMDGPU_VM_PAGE_PRT | AMDGPU_VM_EXPLICIT_SYNC;
 
struct drm_amdgpu_gem_va *args = data;
struct drm_gem_object *gobj;
@@ -685,6 +685,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
struct drm_exec exec;
uint64_t va_flags;
uint64_t vm_size;
+   bool sync_unmap;
int r = 0;
 
if (args->va_address < AMDGPU_VA_RESERVED_SIZE) {
@@ -720,6 +721,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
}
 
+   sync_unmap = !(args->flags & AMDGPU_VM_EXPLICIT_SYNC);
+
switch (args->operation) {
case AMDGPU_VA_OP_MAP:
case AMDGPU_VA_OP_UNMAP:
@@ -779,19 +782,20 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void 
*data,
 va_flags);
break;
case AMDGPU_VA_OP_UNMAP:
-   r = amdgpu_vm_bo_unmap(adev, bo_va, args->va_address);
+   r = amdgpu_vm_bo_unmap(adev, bo_va, args->va_address,
+  sync_unmap);
break;
 
case AMDGPU_VA_OP_CLEAR:
r = amdgpu_vm_bo_clear_mappings(adev, &fpriv->vm,
args->va_address,
-   args->map_size);
+   args->map_size, sync_unmap);
break;
case AMDGPU_VA_OP_REPLACE:
va_flags = amdgpu_gem_va_map_flags(adev, args->flags);

[PATCH 4/6] drm/amdgpu: Remove redundant state change after validation.

2023-10-31 Thread Tatsuyuki Ishi

All the state changes are handled in the TTM move callback; doing it again
here just leads to more confusion.

The table update remains here because it needs to be done exactly once,
while doing it in the move callback will result it getting triggered twice,
once by the actual BO and once by the shadow BO.

Signed-off-by: Tatsuyuki Ishi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 01d31891cd05..50f7cee639ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -495,12 +495,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
return r;
}
 
-   if (bo->tbo.type != ttm_bo_type_kernel) {
-   amdgpu_vm_bo_moved(bo_base);
-   } else {
+   if (bo->tbo.type == ttm_bo_type_kernel)
vm->update_funcs->map_table(to_amdgpu_bo_vm(bo));
-   amdgpu_vm_bo_relocated(bo_base);
-   }
+
spin_lock(&vm->status_lock);
}
spin_unlock(&vm->status_lock);
-- 
2.42.0

[PATCH 2/6] drm/amdgpu: Separate eviction from VM status.

2023-10-31 Thread Tatsuyuki Ishi

In short, eviction never really belonged to the vm_status state machine.
Even when evicted, the BO could belong to either the moved or done state.
The "evicted" state needed to handle both cases, causing greater confusion.

Additionally, there were inconsistencies in the definition of an evicted
BO. Some places are based on the `evict` parameter passed from the TTM move
callback, while the others were updated based on whether the BO got its
optimal placement. The second is more accurate for our use case. With this
refactor, the evicted state is solely determined by the second rule.

Signed-off-by: Tatsuyuki Ishi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 67 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c |  1 +
 3 files changed, 29 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 7b9762f1cddd..dd6f72e2a1d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -174,19 +174,23 @@ int amdgpu_vm_set_pasid(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
  * State for PDs/PTs and per VM BOs which are not at the location they should
  * be.
  */
-static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
+static void amdgpu_vm_bo_set_evicted(struct amdgpu_vm_bo_base *vm_bo, bool 
evicted)
 {
struct amdgpu_vm *vm = vm_bo->vm;
struct amdgpu_bo *bo = vm_bo->bo;
 
-   vm_bo->moved = true;
spin_lock(&vm_bo->vm->status_lock);
-   if (bo->tbo.type == ttm_bo_type_kernel)
-   list_move(&vm_bo->vm_status, &vm->evicted);
-   else
-   list_move_tail(&vm_bo->vm_status, &vm->evicted);
+   if (evicted && bo->tbo.base.resv == vm->root.bo->tbo.base.resv) {
+   if (bo->tbo.type == ttm_bo_type_kernel)
+   list_move(&vm_bo->eviction_status, &vm->evicted);
+   else
+   list_move_tail(&vm_bo->eviction_status, &vm->evicted);
+   } else {
+   list_del_init(&vm_bo->eviction_status);
+   }
spin_unlock(&vm_bo->vm->status_lock);
 }
+
 /**
  * amdgpu_vm_bo_moved - vm_bo is moved
  *
@@ -310,6 +314,7 @@ void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
base->bo = bo;
base->next = NULL;
INIT_LIST_HEAD(&base->vm_status);
+   INIT_LIST_HEAD(&base->eviction_status);
 
if (!bo)
return;
@@ -336,7 +341,7 @@ void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
 * is currently evicted. add the bo to the evicted list to make sure it
 * is validated on next vm use to avoid fault.
 * */
-   amdgpu_vm_bo_evicted(base);
+   amdgpu_vm_bo_set_evicted(base, true);
 }
 
 /**
@@ -460,7 +465,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
while (!list_empty(&vm->evicted)) {
bo_base = list_first_entry(&vm->evicted,
   struct amdgpu_vm_bo_base,
-  vm_status);
+  eviction_status);
spin_unlock(&vm->status_lock);
 
bo = bo_base->bo;
@@ -1034,7 +1039,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status)
amdgpu_vm_bo_get_memory(bo_va, stats);
 
-   list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status)
+   list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.eviction_status)
amdgpu_vm_bo_get_memory(bo_va, stats);
 
list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status)
@@ -1153,21 +1158,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, 
struct amdgpu_bo_va *bo_va,
return r;
}
 
-   /* If the BO is not in its preferred location add it back to
-* the evicted list so that it gets validated again on the
-* next command submission.
-*/
-   if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv) {
-   uint32_t mem_type = bo->tbo.resource->mem_type;
-
-   if (!(bo->preferred_domains &
- amdgpu_mem_type_to_domain(mem_type)))
-   amdgpu_vm_bo_evicted(&bo_va->base);
-   else
-   amdgpu_vm_bo_idle(&bo_va->base);
-   } else {
+   if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv)
+   amdgpu_vm_bo_idle(&bo_va->base);
+   else
amdgpu_vm_bo_done(&bo_va->base);
-   }
 
list_splice_init(&bo_va->invalids, &bo_va->valids);
bo_va->cleared = clear;
@@ -1883,6 +1877,7 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev,
 
spin_lock(&vm->status_lock);
list_del(&bo_va->base.vm_status);
+   list_del(&bo_va->base.eviction_

[PATCH 3/6] drm/amdgpu: Flush VM updates for split bindings eagerly.

2023-10-31 Thread Tatsuyuki Ishi

The current amdgpu_gem_va_update_vm only tries to perform updates for the
BO specified in the GEM ioctl; however, when a binding is split, the
adjacent bindings also need to be updated. Such updates currently ends up
getting deferred until next submission which causes stalls.

Introduce a new state "dirty", shared between per-VM BOs and traditional
BOs, containing all BOs that have pending updates in `invalids`.
amdgpu_gem_va_update_vm will now simply flush any pending updates for BOs
in the dirty state.

Signed-off-by: Tatsuyuki Ishi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 18 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 66 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  3 ++
 3 files changed, 63 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index a1b15d0d6c48..01d3a97248b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -604,10 +604,9 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void 
*data,
  * vital here, so they are not reported back to userspace.
  */
 static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
-   struct amdgpu_vm *vm,
-   struct amdgpu_bo_va *bo_va,
-   uint32_t operation)
+   struct amdgpu_vm *vm)
 {
+   struct amdgpu_bo_va *bo_va;
int r;
 
if (!amdgpu_vm_ready(vm))
@@ -617,12 +616,18 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device 
*adev,
if (r)
goto error;
 
-   if (operation == AMDGPU_VA_OP_MAP ||
-   operation == AMDGPU_VA_OP_REPLACE) {
+   spin_lock(&vm->status_lock);
+   while (!list_empty(&vm->dirty)) {
+   bo_va = list_first_entry(&vm->dirty, struct amdgpu_bo_va,
+base.vm_status);
+   spin_unlock(&vm->status_lock);
+
r = amdgpu_vm_bo_update(adev, bo_va, false);
if (r)
goto error;
+   spin_lock(&vm->status_lock);
}
+   spin_unlock(&vm->status_lock);
 
r = amdgpu_vm_update_pdes(adev, vm, false);
 
@@ -792,8 +797,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
break;
}
if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug)
-   amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
-   args->operation);
+   amdgpu_gem_va_update_vm(adev, &fpriv->vm);
 
 error:
drm_exec_fini(&exec);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index dd6f72e2a1d6..01d31891cd05 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -191,6 +191,21 @@ static void amdgpu_vm_bo_set_evicted(struct 
amdgpu_vm_bo_base *vm_bo, bool evict
spin_unlock(&vm_bo->vm->status_lock);
 }
 
+/**
+ * amdgpu_vm_bo_dirty - vm_bo is dirty
+ *
+ * @vm_bo: vm_bo which is dirty
+ *
+ * State for normal and per VM BOs that are not moved, but have new entries in
+ * bo_va->invalids.
+ */
+static void amdgpu_vm_bo_dirty(struct amdgpu_vm_bo_base *vm_bo)
+{
+   spin_lock(&vm_bo->vm->status_lock);
+   list_move(&vm_bo->vm_status, &vm_bo->vm->dirty);
+   spin_unlock(&vm_bo->vm->status_lock);
+}
+
 /**
  * amdgpu_vm_bo_moved - vm_bo is moved
  *
@@ -1042,6 +1057,9 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.eviction_status)
amdgpu_vm_bo_get_memory(bo_va, stats);
 
+   list_for_each_entry_safe(bo_va, tmp, &vm->dirty, base.vm_status)
+   amdgpu_vm_bo_get_memory(bo_va, stats);
+
list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status)
amdgpu_vm_bo_get_memory(bo_va, stats);
 
@@ -1411,6 +1429,17 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
dma_resv_unlock(resv);
spin_lock(&vm->status_lock);
}
+
+   while (!list_empty(&vm->dirty)) {
+   bo_va = list_first_entry(&vm->dirty, struct amdgpu_bo_va,
+base.vm_status);
+   spin_unlock(&vm->status_lock);
+
+   r = amdgpu_vm_bo_update(adev, bo_va, false);
+   if (r)
+   return r;
+   spin_lock(&vm->status_lock);
+   }
spin_unlock(&vm->status_lock);
 
return 0;
@@ -1476,19 +1505,16 @@ static void amdgpu_vm_bo_insert_map(struct 
amdgpu_device *adev,
struct amdgpu_bo_va_mapping *mapping)
 {
struct amdgpu_vm *vm = bo_va->base.vm;
-   struct amdgpu_bo *bo = bo_va->base.bo;
 
mapping->bo_va = bo_va;
list_add(&mapping->list, &bo_va->

[PATCH 1/6] drm/amdgpu: Don't implicit sync PRT maps.

2023-10-31 Thread Tatsuyuki Ishi

These are considered map operations rather than unmap, and there is no
point of doing implicit synchronization here.

Signed-off-by: Tatsuyuki Ishi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index f5daadcec865..7b9762f1cddd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -902,7 +902,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
/* Implicitly sync to command submissions in the same VM before
 * unmapping. Sync to moving fences before mapping.
 */
-   if (!(flags & AMDGPU_PTE_VALID))
+   if (!(flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)))
sync_mode = AMDGPU_SYNC_EQ_OWNER;
else
sync_mode = AMDGPU_SYNC_EXPLICIT;
-- 
2.42.0

[PATCH 0/6] drm/amdgpu: Add flag to disable implicit sync for GEM operations.

2023-10-31 Thread Tatsuyuki Ishi

In Vulkan, it is the application's responsibility to perform adequate
synchronization before a sparse unmap, replace or BO destroy operation.
This adds an option to AMDGPU_VA_OPs to disable redundant implicit sync
that happens on sparse unmap or replace operations.

This has seen a significant improvement in stutter in Forza Horizon 5
and Forza Horizon 4. (As games that had significant issues in sparse
binding related stutter).

This patchset also address a tangential issue that some changes were
not being flushed immediately after the ioctls, but were deferred to be
processed on the next CS submission, which also results in stalling.
A refactor of state machine is included to achieve this.

Compared to the previous series [1], this specifically targets the VM
operations and keep everything else intact, including implicit sync on
kernel-initiated moves.

I've been able to pass a full Vulkan CTS run on Navi 10 with this.

Userspace code for this is available at [2] and a branch for the kernel
code is available at [3].

[1]: 
https://lore.kernel.org/all/20230821062005.109771-1-ishitatsuy...@gmail.com/
[2]: 
https://gitlab.freedesktop.org/ishitatsuyuki/mesa/-/commits/vm-explicit-sync
[3]: https://github.com/ishitatsuyuki/linux/tree/explicit-sync-drm-misc-next

Tatsuyuki Ishi (6):
  drm/amdgpu: Don't implicit sync PRT maps.
  drm/amdgpu: Separate eviction from VM status.
  drm/amdgpu: Flush VM updates for split bindings eagerly.
  drm/amdgpu: Remove redundant state change after validation.
  drm/amdgpu: Add flag to disable implicit sync for GEM operations.
  drm/amdgpu: Bump amdgpu driver version.

 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c   |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   |  32 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h|   7 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h |   6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 185 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|  27 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c |   1 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c  |  18 +-
 include/uapi/drm/amdgpu_drm.h |   2 +
 11 files changed, 165 insertions(+), 120 deletions(-)

-- 
2.42.0

Re: [PATCH] drm/amd/swsmu: remove fw version check in sw_init.

2023-10-31 Thread Alex Deucher

On Mon, Oct 30, 2023 at 11:37 PM Li Ma  wrote:
>
> dorp fw version check and using max table size to init table.
>
> Signed-off-by: Li Ma 
> Reviewed-by: Yifan Zhang 

Acked-by: Alex Deucher 

> ---
>  .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c| 17 -
>  1 file changed, 4 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c 
> b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> index 3efc6aed28f1..762b31455a0b 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> @@ -234,24 +234,15 @@ static int vangogh_tables_init(struct smu_context *smu)
>PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
> SMU_TABLE_INIT(tables, SMU_TABLE_ACTIVITY_MONITOR_COEFF, 
> sizeof(DpmActivityMonitorCoeffExt_t),
>PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
> +   SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, 
> max(sizeof(SmuMetrics_t), sizeof(SmuMetrics_legacy_t)),
> +  PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
>
> -   if (smu->smc_fw_if_version < 0x3) {
> -   SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, 
> sizeof(SmuMetrics_legacy_t),
> -   PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
> -   smu_table->metrics_table = 
> kzalloc(sizeof(SmuMetrics_legacy_t), GFP_KERNEL);
> -   } else {
> -   SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, 
> sizeof(SmuMetrics_t),
> -   PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
> -   smu_table->metrics_table = kzalloc(sizeof(SmuMetrics_t), 
> GFP_KERNEL);
> -   }
> +   smu_table->metrics_table = kzalloc(max(sizeof(SmuMetrics_t), 
> sizeof(SmuMetrics_legacy_t)), GFP_KERNEL);
> if (!smu_table->metrics_table)
> goto err0_out;
> smu_table->metrics_time = 0;
>
> -   if (smu->smc_fw_version >= 0x043F3E00)
> -   smu_table->gpu_metrics_table_size = sizeof(struct 
> gpu_metrics_v2_3);
> -   else
> -   smu_table->gpu_metrics_table_size = sizeof(struct 
> gpu_metrics_v2_2);
> +   smu_table->gpu_metrics_table_size = max(sizeof(struct 
> gpu_metrics_v2_3), sizeof(struct gpu_metrics_v2_2));
> smu_table->gpu_metrics_table = 
> kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
> if (!smu_table->gpu_metrics_table)
> goto err1_out;
> --
> 2.25.1
>

Re: [REGRESSION] rx7600 stopped working after "1cfb4d612127 drm/amdgpu: put MQDs in VRAM"

2023-10-31 Thread Christian König


Hi Alexey,

trying to answer some of the questions since Alex is currently on vacation.

Am 30.10.23 um 17:01 schrieb Alexey Klimov:

Hi Alex,

On Thu, 26 Oct 2023 at 19:53, Alex Deucher  wrote:

On Thu, Oct 26, 2023 at 1:33 PM Alexey Klimov  wrote:

#regzbot introduced: 1cfb4d612127
#regzbot title: rx7600 stopped working after "1cfb4d612127 drm/amdgpu: put MQDs in 
VRAM"

Hi all,

I've been playing with RX7600 and it was observed that amdgpu stopped working 
between kernel 6.2 and 6.5.
Then I narrowed it down to 6.4 <-> 6.5-rc1 and finally bisect pointed at 
1cfb4d6121276a829aa94d0e32a7f5e1830ebc21
And I manually checked if it boots/works on the previous commit and the 
mentioned one.

I guess the log also reveals warning in error path. Please see below.

I didn't check any further. This is simple debian testing system with the 
following cmdline options:
root@avadebian:~# cat /proc/cmdline
BOOT_IMAGE=/boot/vmlinuz-6.6-rc7+ ignore_loglevel root=/dev/nvme1n1p2 ro 
nr_cpus=32

So far simple revert (patch is below) returns things back to normal-ish: there 
are huge graphics artifacts on Xorg/X11 under 6.1 to upstream kernel. 
Wayland-based sway works great without issues. Not sure where should I report 
this.

Please let me know if I can help debugging, testing or provide some other logs 
regarding 1cfb4d612127? Any cmdline options to collect more info?

Please make sure you have this patch as well:
e602157ec089240861cd641ee2c7c64eeaec09bf ("drm/amdgpu: fix S3 issue if
MQD in VRAM")
Please open a ticket here so we can track this:
https://gitlab.freedesktop.org/drm/amd/-/issues/

The patch was there during testing and I will open a ticket there.


I think I see the problem.  Please see if attached patch 1 fixes the
issue.  If this fixes it, that would also explain the issues you are
seeing with Xorg.  It would appear there are limitations around MMIO
access on your platform and unfortunately most graphics APIs require
unaligned access to MMIO space with the CPU.  We can fix the kernel
side pretty easily, but userspace will be a problem.

Does it mean that we don't have unaligned access to PCIe MMIO space on
this Adlink Ampere AVA arm64 platform?


Yes, that is perfectly possible and makes that platform unusable for 
most gfx applications.


We had tons of reports for different ARM boards and HW generations and 
even looped in some ARM engineers.


Essentially if you want to run high level GFX stacks like Vulkan and 
OpenGL on a platform with AMD or NVIDIA hardware your platform needs to 
fulfill certain requirements:


1. Correctly implement the PCIe spec!

    We actually have tons of boards where people attach an PCIe root 
complex to the ARM CPU and expect that to work. The problem is that this 
isn't PCIe compliant!
    You actually need the ARM IP for PCIe for this to work correctly, 
without that the root complex can't do system memory coherent 
transactions for example.


2. Be able to run all types of memory accesses on PCIe BARs. For example 
some platforms can't do large reads and writes (vector operations) to 
PCIe BARs, but can do them to system memory.


    This is actually not a hardware requirement, but one of the Vulkan 
and OpenGL stack and applications based on them.
    You can work around this by disallowing CPU access to PCIe BARs, 
but that either cripples performance or even results in applications not 
working at all.



Do you know if it is related to the thing that PCIe BARs are mapped as
a device memory and not a normal memory? (and they should be mapped as
normal memory)


Depends on what you mean with this. When changing the mapping type 
results in allowing unaligned and bigger accesses then yes that would help.


We will upstream the patches to make at least the kernel side work as 
expected, but that's fixing only halve of the problem.


Regards,
Christian.



[..]


Just removing the addition of the AMDGPU_GEM_DOMAIN_VRAM domain here
will revert the behavior.  Since this is an important optimization and
we aren't seeing any issues on x86, I'd prefer to just limit your arch
to GTT if we can't resolve it some other way.

Try patch 1 and if that doesn't work we can fall back to some variant
of patch 2.

The patch 1 alone doesn't fix the issue. Both patches 1 & 2 do work
and amdgpu initializes. Still issues with Xorg and wayland works okay.

Apart from that I observed "amdgpu: [gfxhub] page fault" one time:

[   12.432567] amdgpu 000d:03:00.0: [drm:jpeg_v4_0_hw_init [amdgpu]]
JPEG decode initialized successfully.
[   12.442516] amdgpu 000d:03:00.0: amdgpu: [gfxhub] page fault
(src_id:0 ring:72 vmid:0 pasid:0, for process  pid 0 thread  pid 0)
[   12.454080] amdgpu 000d:03:00.0: amdgpu:   in page starting at
address 0x044b from client 10
[   12.457317] usb 1-4.4: new high-speed USB device number 4 using xhci_hcd
[   12.463548] amdgpu 000d:03:00.0: amdgpu:
GCVM_L2_PROTECTION_FAULT_STATUS:0x0890
[   12.463551] amdgpu 000d:03:00.0: amdgpu: Faulty UTCL2 client ID: CPF

RE: [PATCH] drm/amdgpu: handle extra UE register entries for gfx v9_4_3

2023-10-31 Thread Zhou1, Tao

[AMD Official Use Only - General]

In fact, the UE list has only one extra entry compared with CE list.
The code structure of handling CE and UE list one by one is more simple. The 
current approach has less loop cycles, either way is fine to me.

Regards,
Tao

> -Original Message-
> From: Yang, Stanley 
> Sent: Tuesday, October 31, 2023 7:02 PM
> To: Zhou1, Tao ; amd-gfx@lists.freedesktop.org
> Cc: Chai, Thomas ; Zhou1, Tao 
> Subject: RE: [PATCH] drm/amdgpu: handle extra UE register entries for gfx
> v9_4_3
>
> [AMD Official Use Only - General]
>
> Is it better to handle CE and UE list separately?
> Anyway Reviewed-by: Stanley.Yang 
>
> Regards,
> Stanley
> > -Original Message-
> > From: amd-gfx  On Behalf Of Tao
> > Zhou
> > Sent: Tuesday, October 31, 2023 3:09 PM
> > To: amd-gfx@lists.freedesktop.org
> > Cc: Chai, Thomas ; Zhou1, Tao
> 
> > Subject: [PATCH] drm/amdgpu: handle extra UE register entries for gfx
> > v9_4_3
> >
> > The UE registe list is larger than CE list.
> >
> > Reported-by: yipeng.c...@amd.com
> > Signed-off-by: Tao Zhou 
> > ---
> >  drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 38
> > +
> >  1 file changed, 38 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> > b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> > index 41bbabd9ad4d..046ae95b366a 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> > @@ -3799,6 +3799,27 @@ static void
> > gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev,
> >   }
> >   }
> >
> > + /* handle extra register entries of UE */
> > + for (; i < ARRAY_SIZE(gfx_v9_4_3_ue_reg_list); i++) {
> > + for (j = 0; j < gfx_v9_4_3_ue_reg_list[i].se_num; j++) {
> > + for (k = 0; k <
> > gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst; k++) {
> > + /* no need to select if instance number is 1 
> > */
> > + if (gfx_v9_4_3_ue_reg_list[i].se_num > 1
> > + ||
> > +
> >   gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst > 1)
> > +
> > + gfx_v9_4_3_xcc_select_se_sh(adev, j,
> > 0, k, xcc_id);
> > +
> > +
> >   amdgpu_ras_inst_query_ras_error_count(adev,
> > +
> >   &(gfx_v9_4_3_ue_reg_list[i].reg_entry),
> > + 1,
> > +
> >   gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_t
> > ype].mem_id_ent,
> > +
> >   gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_t
> > ype].size,
> > + GET_INST(GC, xcc_id),
> > +
> >   AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
> > + &ue_count);
> > + }
> > + }
> > + }
> > +
> >   gfx_v9_4_3_xcc_select_se_sh(adev, 0x, 0x, 0x,
> >   xcc_id);
> >   mutex_unlock(&adev->grbm_idx_mutex);
> > @@ -3838,6 +3859,23 @@ static void
> > gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev,
> >   }
> >   }
> >
> > + /* handle extra register entries of UE */
> > + for (; i < ARRAY_SIZE(gfx_v9_4_3_ue_reg_list); i++) {
> > + for (j = 0; j < gfx_v9_4_3_ue_reg_list[i].se_num; j++) {
> > + for (k = 0; k <
> > gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst; k++) {
> > + /* no need to select if instance number is 1 
> > */
> > + if (gfx_v9_4_3_ue_reg_list[i].se_num > 1
> > + ||
> > +
> >   gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst > 1)
> > +
> > + gfx_v9_4_3_xcc_select_se_sh(adev, j,
> > 0, k, xcc_id);
> > +
> > +
> >   amdgpu_ras_inst_reset_ras_error_count(adev,
> > +
> >   &(gfx_v9_4_3_ue_reg_list[i].reg_entry),
> > + 1,
> > + GET_INST(GC, xcc_id));
> > + }
> > + }
> > + }
> > +
> >   gfx_v9_4_3_xcc_select_se_sh(adev, 0x, 0x, 0x,
> >   xcc_id);
> >   mutex_unlock(&adev->grbm_idx_mutex);
> > --
> > 2.35.1
>

RE: [PATCH] drm/amdgpu: handle extra UE register entries for gfx v9_4_3

2023-10-31 Thread Yang, Stanley

[AMD Official Use Only - General]

Is it better to handle CE and UE list separately?
Anyway Reviewed-by: Stanley.Yang 

Regards,
Stanley
> -Original Message-
> From: amd-gfx  On Behalf Of Tao
> Zhou
> Sent: Tuesday, October 31, 2023 3:09 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Chai, Thomas ; Zhou1, Tao
> 
> Subject: [PATCH] drm/amdgpu: handle extra UE register entries for gfx v9_4_3
>
> The UE registe list is larger than CE list.
>
> Reported-by: yipeng.c...@amd.com
> Signed-off-by: Tao Zhou 
> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 38
> +
>  1 file changed, 38 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> index 41bbabd9ad4d..046ae95b366a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> @@ -3799,6 +3799,27 @@ static void
> gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev,
>   }
>   }
>
> + /* handle extra register entries of UE */
> + for (; i < ARRAY_SIZE(gfx_v9_4_3_ue_reg_list); i++) {
> + for (j = 0; j < gfx_v9_4_3_ue_reg_list[i].se_num; j++) {
> + for (k = 0; k <
> gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst; k++) {
> + /* no need to select if instance number is 1 */
> + if (gfx_v9_4_3_ue_reg_list[i].se_num > 1 ||
> +
>   gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst > 1)
> + gfx_v9_4_3_xcc_select_se_sh(adev, j,
> 0, k, xcc_id);
> +
> +
>   amdgpu_ras_inst_query_ras_error_count(adev,
> +
>   &(gfx_v9_4_3_ue_reg_list[i].reg_entry),
> + 1,
> +
>   gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_t
> ype].mem_id_ent,
> +
>   gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_t
> ype].size,
> + GET_INST(GC, xcc_id),
> +
>   AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
> + &ue_count);
> + }
> + }
> + }
> +
>   gfx_v9_4_3_xcc_select_se_sh(adev, 0x, 0x, 0x,
>   xcc_id);
>   mutex_unlock(&adev->grbm_idx_mutex);
> @@ -3838,6 +3859,23 @@ static void
> gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev,
>   }
>   }
>
> + /* handle extra register entries of UE */
> + for (; i < ARRAY_SIZE(gfx_v9_4_3_ue_reg_list); i++) {
> + for (j = 0; j < gfx_v9_4_3_ue_reg_list[i].se_num; j++) {
> + for (k = 0; k <
> gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst; k++) {
> + /* no need to select if instance number is 1 */
> + if (gfx_v9_4_3_ue_reg_list[i].se_num > 1 ||
> +
>   gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst > 1)
> + gfx_v9_4_3_xcc_select_se_sh(adev, j,
> 0, k, xcc_id);
> +
> +
>   amdgpu_ras_inst_reset_ras_error_count(adev,
> +
>   &(gfx_v9_4_3_ue_reg_list[i].reg_entry),
> + 1,
> + GET_INST(GC, xcc_id));
> + }
> + }
> + }
> +
>   gfx_v9_4_3_xcc_select_se_sh(adev, 0x, 0x, 0x,
>   xcc_id);
>   mutex_unlock(&adev->grbm_idx_mutex);
> --
> 2.35.1

RE: [PATCH] drm/amdgpu: check recovery status of xgmi hive in ras_reset_error_count

2023-10-31 Thread Yang, Stanley

[AMD Official Use Only - General]

Reviewed-by: Stanley.Yang 

Regards,
Stanley
> -Original Message-
> From: amd-gfx  On Behalf Of Tao
> Zhou
> Sent: Tuesday, October 31, 2023 3:13 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Zhou1, Tao ; Zhang, Hawking
> 
> Subject: [PATCH] drm/amdgpu: check recovery status of xgmi hive in
> ras_reset_error_count
>
> Handle xgmi hive case.
>
> Suggested-by: Hawking Zhang 
> Signed-off-by: Tao Zhou 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 11 ++-
>  1 file changed, 10 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 753260745554..0093c28f4343 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -1226,6 +1226,8 @@ int amdgpu_ras_reset_error_count(struct
> amdgpu_device *adev,
>   struct amdgpu_ras_block_object *block_obj =
> amdgpu_ras_get_ras_block(adev, block, 0);
>   struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
>   const struct amdgpu_mca_smu_funcs *mca_funcs = adev-
> >mca.mca_funcs;
> + struct amdgpu_hive_info *hive;
> + int hive_ras_recovery = 0;
>
>   if (!block_obj || !block_obj->hw_ops) {
>   dev_dbg_once(adev->dev, "%s doesn't config RAS
> function\n", @@ -1237,8 +1239,15 @@ int
> amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
>   !amdgpu_ras_get_mca_debug_mode(adev))
>   return -EOPNOTSUPP;
>
> + hive = amdgpu_get_xgmi_hive(adev);
> + if (hive) {
> + hive_ras_recovery = atomic_read(&hive->ras_recovery);
> + amdgpu_put_xgmi_hive(hive);
> + }
> +
>   /* skip ras error reset in gpu reset */
> - if ((amdgpu_in_reset(adev) || atomic_read(&ras->in_recovery)) &&
> + if ((amdgpu_in_reset(adev) || atomic_read(&ras->in_recovery) ||
> + hive_ras_recovery) &&
>   mca_funcs && mca_funcs->mca_set_debug_mode)
>   return -EOPNOTSUPP;
>
> --
> 2.35.1

Re: [PATCH] /drm/amdgpu: correct chunk_ptr to a pointer to chunk.

2023-10-31 Thread Christian König


Am 31.10.23 um 03:55 schrieb YuanShang:

The variable "chunk_ptr" should be a pointer pointing
to a struct drm_amdgpu_cs_chunk instead of to a pointer
of that.
Signed-off-by: YuanShang 


Good catch, Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 74769afaa33d..551b9466a441 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -208,7 +208,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
}
  
  	for (i = 0; i < p->nchunks; i++) {

-   struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL;
+   struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL;
struct drm_amdgpu_cs_chunk user_chunk;
uint32_t __user *cdata;

Re: [PATCH] drm/amd/display: remove redundant check

2023-10-31 Thread José Pekkarinen


On 2023-10-30 15:52, Aurabindo Pillai wrote:

On 10/29/2023 8:44 AM, José Pekkarinen wrote:

This patch addresses the following warning spotted by
using coccinelle where the case checked does the same
than the else case.

drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c:4664:8-10: 
WARNING: possible condition with no effect (if == else)


Signed-off-by: José Pekkarinen 
---
  .../drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c   | 4 


  1 file changed, 4 deletions(-)

diff --git 
a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c

index ecea008f19d3..d940dfa5ae43 100644
--- 
a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
+++ 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c

@@ -4661,10 +4661,6 @@ void dml32_CalculateMinAndMaxPrefetchMode(
  	} else if (AllowForPStateChangeOrStutterInVBlankFinal == 
dm_prefetch_support_uclk_fclk_and_stutter) {

*MinPrefetchMode = 0;
*MaxPrefetchMode = 0;
-   } else if (AllowForPStateChangeOrStutterInVBlankFinal ==
-   dm_prefetch_support_uclk_fclk_and_stutter_if_possible) {
-   *MinPrefetchMode = 0;
-   *MaxPrefetchMode = 3;
} else {
*MinPrefetchMode = 0;
*MaxPrefetchMode = 3;


What tree did you use to generate the patch? On amd-staging-drm-next,
MaxPrefetchMode is 0 for the second last branch, which is the correct
one, so this patch isnt needed.


I'm using the stable tree, sorry, if it is out of
date just ignore it then.

Thanks!

José.

[bug report] drm/amd/display: Add interface to enable DPIA trace

2023-10-31 Thread Dan Carpenter

Hello Stylon Wang,

The patch 71ba6b577a35: "drm/amd/display: Add interface to enable
DPIA trace" from Jun 30, 2023 (linux-next), leads to the following
Smatch static checker warning:

drivers/gpu/drm/amd/amdgpu/../display/dc/dc_dmub_srv.c:1041 
dc_dmub_srv_enable_dpia_trace()
error: we previously assumed 'dc_dmub_srv' could be null (see line 1040)

drivers/gpu/drm/amd/amdgpu/../display/dc/dc_dmub_srv.c
1033 void dc_dmub_srv_enable_dpia_trace(const struct dc *dc)
1034 {
1035 struct dc_dmub_srv *dc_dmub_srv = dc->ctx->dmub_srv;
1036 struct dmub_srv *dmub;
1037 enum dmub_status status;
1038 static const uint32_t timeout_us = 30;
1039 
1040 if (!dc_dmub_srv || !dc_dmub_srv->dmub) {
--> 1041 DC_LOG_ERROR("%s: invalid parameters.", __func__);
 ^
This macro dereferences dc_dmub_srv.

1042 return;
1043 }
1044 
1045 dmub = dc_dmub_srv->dmub;

regards,
dan carpenter

Re: [PATCH] drm/amd/display: avoid variable reinitialization

2023-10-31 Thread Bragatheswaran Manickavel




On 24/10/23 23:41, Bragatheswaran Manickavel wrote:

The member variable enable_hpo_pg_support is already initialized
and hence the reinitialization instruction can be removed. Issue
identified using the doubleinit.cocci Coccinelle semantic patch script.

Signed-off-by: Bragatheswaran Manickavel 
---
  drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c | 1 -
  1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c
index 99d55b958977..1fd9df8da09c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c
@@ -739,7 +739,6 @@ static const struct dc_debug_options debug_defaults_drv = {
.disable_boot_optimizations = false,
.disable_unbounded_requesting = false,
.disable_mem_low_power = false,
-   .enable_hpo_pg_support = false,
//must match enable_single_display_2to1_odm_policy to support dynamic 
ODM transitions
.enable_double_buffered_dsc_pg_support = true,
.enable_dp_dig_pixel_rate_div_policy = 1,



just a friendly ping

Thanks,
Bragathe

[PATCH] drm/amdgpu: check recovery status of xgmi hive in ras_reset_error_count

2023-10-31 Thread Tao Zhou

Handle xgmi hive case.

Suggested-by: Hawking Zhang 
Signed-off-by: Tao Zhou 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 753260745554..0093c28f4343 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1226,6 +1226,8 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device 
*adev,
struct amdgpu_ras_block_object *block_obj = 
amdgpu_ras_get_ras_block(adev, block, 0);
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+   struct amdgpu_hive_info *hive;
+   int hive_ras_recovery = 0;
 
if (!block_obj || !block_obj->hw_ops) {
dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
@@ -1237,8 +1239,15 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device 
*adev,
!amdgpu_ras_get_mca_debug_mode(adev))
return -EOPNOTSUPP;
 
+   hive = amdgpu_get_xgmi_hive(adev);
+   if (hive) {
+   hive_ras_recovery = atomic_read(&hive->ras_recovery);
+   amdgpu_put_xgmi_hive(hive);
+   }
+
/* skip ras error reset in gpu reset */
-   if ((amdgpu_in_reset(adev) || atomic_read(&ras->in_recovery)) &&
+   if ((amdgpu_in_reset(adev) || atomic_read(&ras->in_recovery) ||
+   hive_ras_recovery) &&
mca_funcs && mca_funcs->mca_set_debug_mode)
return -EOPNOTSUPP;
 
-- 
2.35.1

[PATCH] drm/amdgpu: handle extra UE register entries for gfx v9_4_3

2023-10-31 Thread Tao Zhou

The UE registe list is larger than CE list.

Reported-by: yipeng.c...@amd.com
Signed-off-by: Tao Zhou 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 38 +
 1 file changed, 38 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 41bbabd9ad4d..046ae95b366a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -3799,6 +3799,27 @@ static void gfx_v9_4_3_inst_query_ras_err_count(struct 
amdgpu_device *adev,
}
}
 
+   /* handle extra register entries of UE */
+   for (; i < ARRAY_SIZE(gfx_v9_4_3_ue_reg_list); i++) {
+   for (j = 0; j < gfx_v9_4_3_ue_reg_list[i].se_num; j++) {
+   for (k = 0; k < 
gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst; k++) {
+   /* no need to select if instance number is 1 */
+   if (gfx_v9_4_3_ue_reg_list[i].se_num > 1 ||
+   
gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst > 1)
+   gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, 
k, xcc_id);
+
+   amdgpu_ras_inst_query_ras_error_count(adev,
+   &(gfx_v9_4_3_ue_reg_list[i].reg_entry),
+   1,
+   
gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].mem_id_ent,
+   
gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].size,
+   GET_INST(GC, xcc_id),
+   AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+   &ue_count);
+   }
+   }
+   }
+
gfx_v9_4_3_xcc_select_se_sh(adev, 0x, 0x, 0x,
xcc_id);
mutex_unlock(&adev->grbm_idx_mutex);
@@ -3838,6 +3859,23 @@ static void gfx_v9_4_3_inst_reset_ras_err_count(struct 
amdgpu_device *adev,
}
}
 
+   /* handle extra register entries of UE */
+   for (; i < ARRAY_SIZE(gfx_v9_4_3_ue_reg_list); i++) {
+   for (j = 0; j < gfx_v9_4_3_ue_reg_list[i].se_num; j++) {
+   for (k = 0; k < 
gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst; k++) {
+   /* no need to select if instance number is 1 */
+   if (gfx_v9_4_3_ue_reg_list[i].se_num > 1 ||
+   
gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst > 1)
+   gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, 
k, xcc_id);
+
+   amdgpu_ras_inst_reset_ras_error_count(adev,
+   &(gfx_v9_4_3_ue_reg_list[i].reg_entry),
+   1,
+   GET_INST(GC, xcc_id));
+   }
+   }
+   }
+
gfx_v9_4_3_xcc_select_se_sh(adev, 0x, 0x, 0x,
xcc_id);
mutex_unlock(&adev->grbm_idx_mutex);
-- 
2.35.1

[PATCH] drm/amdgpu: Don't warn for unsupported set_xgmi_plpd_mode

2023-10-31 Thread Tao Zhou

set_xgmi_plpd_mode may be unsupported and this isn't error, no need to
print warning for it.

Suggested-by: lijo.la...@amd.com
Signed-off-by: Tao Zhou 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 0533f873001b..c9b09bddbcdc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -1138,7 +1138,8 @@ static int amdgpu_ras_error_inject_xgmi(struct 
amdgpu_device *adev,
if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
dev_warn(adev->dev, "Failed to disallow df cstate");
 
-   if (amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DISALLOW))
+   ret = amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DISALLOW);
+   if (ret && ret != -EOPNOTSUPP)
dev_warn(adev->dev, "Failed to disallow XGMI power down");
 
ret = psp_ras_trigger_error(&adev->psp, block_info, instance_mask);
@@ -1146,7 +1147,8 @@ static int amdgpu_ras_error_inject_xgmi(struct 
amdgpu_device *adev,
if (amdgpu_ras_intr_triggered())
return ret;
 
-   if (amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DEFAULT))
+   ret = amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DEFAULT);
+   if (ret && ret != -EOPNOTSUPP)
dev_warn(adev->dev, "Failed to allow XGMI power down");
 
if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
-- 
2.35.1

79 matches

Mail list logo