RE: [PATCH v2] drm/amdgpu: refactor code to reuse system information

2024-03-19 Thread Khatri, Sunil
[AMD Official Use Only - General]

Ignore this as I have send v3.

-Original Message-
From: Sunil Khatri 
Sent: Tuesday, March 19, 2024 8:41 PM
To: Deucher, Alexander ; Koenig, Christian 
; Sharma, Shashank 
Cc: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org; 
linux-ker...@vger.kernel.org; Zhang, Hawking ; Kuehling, 
Felix ; Lazar, Lijo ; Khatri, Sunil 

Subject: [PATCH v2] drm/amdgpu: refactor code to reuse system information

Refactor the code so debugfs and devcoredump can reuse the common information 
and avoid unnecessary copy of it.

created a new file which would be the right place to hold functions which will 
be used between ioctl, debugfs and devcoredump.

Cc: Christian König 
Cc: Alex Deucher 
Signed-off-by: Sunil Khatri 
---
 drivers/gpu/drm/amd/amdgpu/Makefile  |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_coreinfo.c | 146 +++  
drivers/gpu/drm/amd/amdgpu/amdgpu_coreinfo.h |  33 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c  | 117 +--
 4 files changed, 182 insertions(+), 116 deletions(-)  create mode 100644 
drivers/gpu/drm/amd/amdgpu/amdgpu_coreinfo.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_coreinfo.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 4536c8ad0e11..2c5c800c1ed6 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -80,7 +80,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o 
amdgpu_kms.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
-   amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o
+   amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o
+amdgpu_coreinfo.o

 amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_coreinfo.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_coreinfo.c
new file mode 100644
index ..597fc9d432ce
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_coreinfo.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+obtaining a
+ * copy of this software and associated documentation files (the
+"Software"),
+ * to deal in the Software without restriction, including without
+limitation
+ * the rights to use, copy, modify, merge, publish, distribute,
+sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom
+the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
+SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
+DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu_coreinfo.h"
+#include "amd_pcie.h"
+
+
+void amdgpu_coreinfo_devinfo(struct amdgpu_device *adev, struct
+drm_amdgpu_info_device *dev_info) {
+   int ret;
+   uint64_t vm_size;
+   uint32_t pcie_gen_mask;
+
+   dev_info->device_id = adev->pdev->device;
+   dev_info->chip_rev = adev->rev_id;
+   dev_info->external_rev = adev->external_rev_id;
+   dev_info->pci_rev = adev->pdev->revision;
+   dev_info->family = adev->family;
+   dev_info->num_shader_engines = adev->gfx.config.max_shader_engines;
+   dev_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
+   /* return all clocks in KHz */
+   dev_info->gpu_counter_freq = amdgpu_asic_get_xclk(adev) * 10;
+   if (adev->pm.dpm_enabled) {
+   dev_info->max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 
10;
+   dev_info->max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 
10;
+   dev_info->min_engine_clock = amdgpu_dpm_get_sclk(adev, true) * 
10;
+   dev_info->min_memory_clock = amdgpu_dpm_get_mclk(adev, true) * 
10;
+   } else {
+   dev_info->max_engine_clock =
+   dev_info->min_engine_clock =
+   adev->clock.default_sclk * 10;
+   dev_info->max_memory_clock =
+   dev_info->min_memory_clock =
+   adev->clock.default_mclk * 10;
+   }
+   dev_info->enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask;
+   dev_info->num_rb_pipes = adev->gfx.config.max_backends_per_se *
+   

[PATCH v3] drm/amdgpu: refactor code to reuse system information

2024-03-19 Thread Sunil Khatri
Refactor the code so debugfs and devcoredump can reuse
the common information and avoid unnecessary copy of it.

created a new file which would be the right place to
hold functions which will be used between ioctl, debugfs
and devcoredump.

Cc: Christian König 
Cc: Alex Deucher 
Signed-off-by: Sunil Khatri 
---
 drivers/gpu/drm/amd/amdgpu/Makefile  |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_coreinfo.c | 146 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_coreinfo.h |  33 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c  | 117 +--
 4 files changed, 182 insertions(+), 116 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_coreinfo.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_coreinfo.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 4536c8ad0e11..2c5c800c1ed6 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -80,7 +80,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o 
amdgpu_kms.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
-   amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o
+   amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o 
amdgpu_coreinfo.o
 
 amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_coreinfo.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_coreinfo.c
new file mode 100644
index ..597fc9d432ce
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_coreinfo.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu_coreinfo.h"
+#include "amd_pcie.h"
+
+
+void amdgpu_coreinfo_devinfo(struct amdgpu_device *adev, struct 
drm_amdgpu_info_device *dev_info)
+{
+   int ret;
+   uint64_t vm_size;
+   uint32_t pcie_gen_mask;
+
+   dev_info->device_id = adev->pdev->device;
+   dev_info->chip_rev = adev->rev_id;
+   dev_info->external_rev = adev->external_rev_id;
+   dev_info->pci_rev = adev->pdev->revision;
+   dev_info->family = adev->family;
+   dev_info->num_shader_engines = adev->gfx.config.max_shader_engines;
+   dev_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
+   /* return all clocks in KHz */
+   dev_info->gpu_counter_freq = amdgpu_asic_get_xclk(adev) * 10;
+   if (adev->pm.dpm_enabled) {
+   dev_info->max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 
10;
+   dev_info->max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 
10;
+   dev_info->min_engine_clock = amdgpu_dpm_get_sclk(adev, true) * 
10;
+   dev_info->min_memory_clock = amdgpu_dpm_get_mclk(adev, true) * 
10;
+   } else {
+   dev_info->max_engine_clock =
+   dev_info->min_engine_clock =
+   adev->clock.default_sclk * 10;
+   dev_info->max_memory_clock =
+   dev_info->min_memory_clock =
+   adev->clock.default_mclk * 10;
+   }
+   dev_info->enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask;
+   dev_info->num_rb_pipes = adev->gfx.config.max_backends_per_se *
+   adev->gfx.config.max_shader_engines;
+   dev_info->num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts;
+   dev_info->ids_flags = 0;
+   if (adev->flags & AMD_IS_APU)
+   dev_info->ids_flags |= AMDGPU_IDS_FLAGS_FUSION;
+   if (adev->gfx.mcbp)
+   dev_info->ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION;
+   if (amdgpu_is_tmz(adev))
+   dev_info->ids_flags |= AMDGPU_IDS_FLAGS_TMZ;
+   if 

Re: [PATCH] drm/amdgpu: refactor code to reuse system information

2024-03-19 Thread Khatri, Sunil

Sent a new patch based on discussion with Alex.

On 3/19/2024 8:34 PM, Christian König wrote:

Am 19.03.24 um 15:59 schrieb Alex Deucher:

On Tue, Mar 19, 2024 at 10:56 AM Christian König
 wrote:

Am 19.03.24 um 15:26 schrieb Alex Deucher:
On Tue, Mar 19, 2024 at 8:32 AM Sunil Khatri  
wrote:

Refactor the code so debugfs and devcoredump can reuse
the common information and avoid unnecessary copy of it.

created a new file which would be the right place to
hold functions which will be used between sysfs, debugfs
and devcoredump.

Cc: Christian König 
Cc: Alex Deucher 
Signed-off-by: Sunil Khatri 
---
   drivers/gpu/drm/amd/amdgpu/Makefile |   2 +-
   drivers/gpu/drm/amd/amdgpu/amdgpu.h |   1 +
   drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c | 151 


   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 118 +--
   4 files changed, 157 insertions(+), 115 deletions(-)
   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile

index 4536c8ad0e11..05d34f4b18f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -80,7 +80,7 @@ amdgpu-y += amdgpu_device.o 
amdgpu_doorbell_mgr.o amdgpu_kms.o \
  amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o 
amdgpu_rap.o \

  amdgpu_fw_attestation.o amdgpu_securedisplay.o \
  amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o 
amdgpu_lsdma.o \

-   amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o
+   amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o 
amdgpu_devinfo.o


   amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index 9c62552bec34..0267870aa9b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1609,4 +1609,5 @@ extern const struct attribute_group 
amdgpu_vram_mgr_attr_group;

   extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
   extern const struct attribute_group amdgpu_flash_attr_group;

+int amdgpu_device_info(struct amdgpu_device *adev, struct 
drm_amdgpu_info_device *dev_info);

   #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c

new file mode 100644
index ..d2c15a1dcb0d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person 
obtaining a
+ * copy of this software and associated documentation files (the 
"Software"),
+ * to deal in the Software without restriction, including without 
limitation
+ * the rights to use, copy, modify, merge, publish, distribute, 
sublicense,
+ * and/or sell copies of the Software, and to permit persons to 
whom the
+ * Software is furnished to do so, subject to the following 
conditions:

+ *
+ * The above copyright notice and this permission notice shall be 
included in

+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 
KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO 
EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
USE OR

+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amd_pcie.h"
+
+#include 
+
+int amdgpu_device_info(struct amdgpu_device *adev, struct 
drm_amdgpu_info_device *dev_info)

We can probably keep this in amdgpu_kms.c unless that file is getting
too big.  I don't think it warrants a new file at this point.  If you
do keep it in amdgpu_kms.c, I'd recommend renaming it to something
like amdgpu_kms_device_info() to keep the naming conventions.

We should not be using this for anything new in the first place.

A whole bunch of the stuff inside the devinfo structure has been
deprecated because we found that putting everything into one structure
was a bad idea.

It's a convenient way to collect a lot of useful information that we
want in the core dump.  Plus it's not going anywhere because we need
to keep compatibility in the IOCTL.


Yeah and exactly that is what I'm strictly against. The devinfo wasn't 
used for new stuff because we found that it is way to inflexible.


That's why we have multiple separate IOCTLs for the memory and 
firmware information for example.


We should really *not* reuse that for the device core dumping.

Rather just use the same information from the different IPs and 
subsystems directly. E.g. add a function to the VM, GFX etc for 
printing out devcoredump infos.


I have pushed new v2 based 

[PATCH v2] drm/amdgpu: refactor code to reuse system information

2024-03-19 Thread Sunil Khatri
Refactor the code so debugfs and devcoredump can reuse
the common information and avoid unnecessary copy of it.

created a new file which would be the right place to
hold functions which will be used between ioctl, debugfs
and devcoredump.

Cc: Christian König 
Cc: Alex Deucher 
Signed-off-by: Sunil Khatri 
---
 drivers/gpu/drm/amd/amdgpu/Makefile  |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_coreinfo.c | 146 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_coreinfo.h |  33 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c  | 117 +--
 4 files changed, 182 insertions(+), 116 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_coreinfo.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_coreinfo.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 4536c8ad0e11..2c5c800c1ed6 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -80,7 +80,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o 
amdgpu_kms.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
-   amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o
+   amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o 
amdgpu_coreinfo.o
 
 amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_coreinfo.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_coreinfo.c
new file mode 100644
index ..597fc9d432ce
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_coreinfo.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu_coreinfo.h"
+#include "amd_pcie.h"
+
+
+void amdgpu_coreinfo_devinfo(struct amdgpu_device *adev, struct 
drm_amdgpu_info_device *dev_info)
+{
+   int ret;
+   uint64_t vm_size;
+   uint32_t pcie_gen_mask;
+
+   dev_info->device_id = adev->pdev->device;
+   dev_info->chip_rev = adev->rev_id;
+   dev_info->external_rev = adev->external_rev_id;
+   dev_info->pci_rev = adev->pdev->revision;
+   dev_info->family = adev->family;
+   dev_info->num_shader_engines = adev->gfx.config.max_shader_engines;
+   dev_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
+   /* return all clocks in KHz */
+   dev_info->gpu_counter_freq = amdgpu_asic_get_xclk(adev) * 10;
+   if (adev->pm.dpm_enabled) {
+   dev_info->max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 
10;
+   dev_info->max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 
10;
+   dev_info->min_engine_clock = amdgpu_dpm_get_sclk(adev, true) * 
10;
+   dev_info->min_memory_clock = amdgpu_dpm_get_mclk(adev, true) * 
10;
+   } else {
+   dev_info->max_engine_clock =
+   dev_info->min_engine_clock =
+   adev->clock.default_sclk * 10;
+   dev_info->max_memory_clock =
+   dev_info->min_memory_clock =
+   adev->clock.default_mclk * 10;
+   }
+   dev_info->enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask;
+   dev_info->num_rb_pipes = adev->gfx.config.max_backends_per_se *
+   adev->gfx.config.max_shader_engines;
+   dev_info->num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts;
+   dev_info->ids_flags = 0;
+   if (adev->flags & AMD_IS_APU)
+   dev_info->ids_flags |= AMDGPU_IDS_FLAGS_FUSION;
+   if (adev->gfx.mcbp)
+   dev_info->ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION;
+   if (amdgpu_is_tmz(adev))
+   dev_info->ids_flags |= AMDGPU_IDS_FLAGS_TMZ;
+   if 

Re: [PATCH] drm/amdgpu: refactor code to reuse system information

2024-03-19 Thread Christian König

Am 19.03.24 um 15:59 schrieb Alex Deucher:

On Tue, Mar 19, 2024 at 10:56 AM Christian König
 wrote:

Am 19.03.24 um 15:26 schrieb Alex Deucher:

On Tue, Mar 19, 2024 at 8:32 AM Sunil Khatri  wrote:

Refactor the code so debugfs and devcoredump can reuse
the common information and avoid unnecessary copy of it.

created a new file which would be the right place to
hold functions which will be used between sysfs, debugfs
and devcoredump.

Cc: Christian König 
Cc: Alex Deucher 
Signed-off-by: Sunil Khatri 
---
   drivers/gpu/drm/amd/amdgpu/Makefile |   2 +-
   drivers/gpu/drm/amd/amdgpu/amdgpu.h |   1 +
   drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c | 151 
   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 118 +--
   4 files changed, 157 insertions(+), 115 deletions(-)
   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 4536c8ad0e11..05d34f4b18f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -80,7 +80,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o 
amdgpu_kms.o \
  amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
  amdgpu_fw_attestation.o amdgpu_securedisplay.o \
  amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
-   amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o
+   amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o 
amdgpu_devinfo.o

   amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9c62552bec34..0267870aa9b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1609,4 +1609,5 @@ extern const struct attribute_group 
amdgpu_vram_mgr_attr_group;
   extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
   extern const struct attribute_group amdgpu_flash_attr_group;

+int amdgpu_device_info(struct amdgpu_device *adev, struct 
drm_amdgpu_info_device *dev_info);
   #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
new file mode 100644
index ..d2c15a1dcb0d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amd_pcie.h"
+
+#include 
+
+int amdgpu_device_info(struct amdgpu_device *adev, struct 
drm_amdgpu_info_device *dev_info)

We can probably keep this in amdgpu_kms.c unless that file is getting
too big.  I don't think it warrants a new file at this point.  If you
do keep it in amdgpu_kms.c, I'd recommend renaming it to something
like amdgpu_kms_device_info() to keep the naming conventions.

We should not be using this for anything new in the first place.

A whole bunch of the stuff inside the devinfo structure has been
deprecated because we found that putting everything into one structure
was a bad idea.

It's a convenient way to collect a lot of useful information that we
want in the core dump.  Plus it's not going anywhere because we need
to keep compatibility in the IOCTL.


Yeah and exactly that is what I'm strictly against. The devinfo wasn't 
used for new stuff because we found that it is way to inflexible.


That's why we have multiple separate IOCTLs for the memory and firmware 
information for example.


We should really *not* reuse that for the device core dumping.

Rather just use the same information from the different IPs and 
subsystems directly. E.g. add a function to the VM, GFX etc for printing 
out devcoredump infos.


Christian.



Alex


Regards,
Christian.


+{
+   int ret;
+   uint64_t vm_size;
+   uint32_t pcie_gen_mask;
+
+   if (dev_info == 

Re: [PATCH] drm/amdgpu: refactor code to reuse system information

2024-03-19 Thread Alex Deucher
On Tue, Mar 19, 2024 at 10:56 AM Christian König
 wrote:
>
> Am 19.03.24 um 15:26 schrieb Alex Deucher:
> > On Tue, Mar 19, 2024 at 8:32 AM Sunil Khatri  wrote:
> >> Refactor the code so debugfs and devcoredump can reuse
> >> the common information and avoid unnecessary copy of it.
> >>
> >> created a new file which would be the right place to
> >> hold functions which will be used between sysfs, debugfs
> >> and devcoredump.
> >>
> >> Cc: Christian König 
> >> Cc: Alex Deucher 
> >> Signed-off-by: Sunil Khatri 
> >> ---
> >>   drivers/gpu/drm/amd/amdgpu/Makefile |   2 +-
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu.h |   1 +
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c | 151 
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 118 +--
> >>   4 files changed, 157 insertions(+), 115 deletions(-)
> >>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
> >> b/drivers/gpu/drm/amd/amdgpu/Makefile
> >> index 4536c8ad0e11..05d34f4b18f5 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> >> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> >> @@ -80,7 +80,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o 
> >> amdgpu_kms.o \
> >>  amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
> >>  amdgpu_fw_attestation.o amdgpu_securedisplay.o \
> >>  amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
> >> -   amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o
> >> +   amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o 
> >> amdgpu_devinfo.o
> >>
> >>   amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >> index 9c62552bec34..0267870aa9b1 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >> @@ -1609,4 +1609,5 @@ extern const struct attribute_group 
> >> amdgpu_vram_mgr_attr_group;
> >>   extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
> >>   extern const struct attribute_group amdgpu_flash_attr_group;
> >>
> >> +int amdgpu_device_info(struct amdgpu_device *adev, struct 
> >> drm_amdgpu_info_device *dev_info);
> >>   #endif
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c 
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
> >> new file mode 100644
> >> index ..d2c15a1dcb0d
> >> --- /dev/null
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
> >> @@ -0,0 +1,151 @@
> >> +// SPDX-License-Identifier: MIT
> >> +/*
> >> + * Copyright 2024 Advanced Micro Devices, Inc.
> >> + *
> >> + * Permission is hereby granted, free of charge, to any person obtaining a
> >> + * copy of this software and associated documentation files (the 
> >> "Software"),
> >> + * to deal in the Software without restriction, including without 
> >> limitation
> >> + * the rights to use, copy, modify, merge, publish, distribute, 
> >> sublicense,
> >> + * and/or sell copies of the Software, and to permit persons to whom the
> >> + * Software is furnished to do so, subject to the following conditions:
> >> + *
> >> + * The above copyright notice and this permission notice shall be 
> >> included in
> >> + * all copies or substantial portions of the Software.
> >> + *
> >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
> >> EXPRESS OR
> >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
> >> MERCHANTABILITY,
> >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT 
> >> SHALL
> >> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES 
> >> OR
> >> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> >> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> >> + * OTHER DEALINGS IN THE SOFTWARE.
> >> + *
> >> + */
> >> +
> >> +#include "amdgpu.h"
> >> +#include "amd_pcie.h"
> >> +
> >> +#include 
> >> +
> >> +int amdgpu_device_info(struct amdgpu_device *adev, struct 
> >> drm_amdgpu_info_device *dev_info)
> > We can probably keep this in amdgpu_kms.c unless that file is getting
> > too big.  I don't think it warrants a new file at this point.  If you
> > do keep it in amdgpu_kms.c, I'd recommend renaming it to something
> > like amdgpu_kms_device_info() to keep the naming conventions.
>
> We should not be using this for anything new in the first place.
>
> A whole bunch of the stuff inside the devinfo structure has been
> deprecated because we found that putting everything into one structure
> was a bad idea.

It's a convenient way to collect a lot of useful information that we
want in the core dump.  Plus it's not going anywhere because we need
to keep compatibility in the IOCTL.

Alex

>
> Regards,
> Christian.
>
> >
> >> +{
> >> +   int ret;
> >> +   uint64_t vm_size;
> >> +   uint32_t pcie_gen_mask;
> >> +
> >> +   if (dev_info == NULL)
> 

Re: [PATCH] drm/amdgpu: refactor code to reuse system information

2024-03-19 Thread Christian König

Am 19.03.24 um 15:26 schrieb Alex Deucher:

On Tue, Mar 19, 2024 at 8:32 AM Sunil Khatri  wrote:

Refactor the code so debugfs and devcoredump can reuse
the common information and avoid unnecessary copy of it.

created a new file which would be the right place to
hold functions which will be used between sysfs, debugfs
and devcoredump.

Cc: Christian König 
Cc: Alex Deucher 
Signed-off-by: Sunil Khatri 
---
  drivers/gpu/drm/amd/amdgpu/Makefile |   2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu.h |   1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c | 151 
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 118 +--
  4 files changed, 157 insertions(+), 115 deletions(-)
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 4536c8ad0e11..05d34f4b18f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -80,7 +80,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o 
amdgpu_kms.o \
 amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
 amdgpu_fw_attestation.o amdgpu_securedisplay.o \
 amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
-   amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o
+   amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o 
amdgpu_devinfo.o

  amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9c62552bec34..0267870aa9b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1609,4 +1609,5 @@ extern const struct attribute_group 
amdgpu_vram_mgr_attr_group;
  extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
  extern const struct attribute_group amdgpu_flash_attr_group;

+int amdgpu_device_info(struct amdgpu_device *adev, struct 
drm_amdgpu_info_device *dev_info);
  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
new file mode 100644
index ..d2c15a1dcb0d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amd_pcie.h"
+
+#include 
+
+int amdgpu_device_info(struct amdgpu_device *adev, struct 
drm_amdgpu_info_device *dev_info)

We can probably keep this in amdgpu_kms.c unless that file is getting
too big.  I don't think it warrants a new file at this point.  If you
do keep it in amdgpu_kms.c, I'd recommend renaming it to something
like amdgpu_kms_device_info() to keep the naming conventions.


We should not be using this for anything new in the first place.

A whole bunch of the stuff inside the devinfo structure has been 
deprecated because we found that putting everything into one structure 
was a bad idea.


Regards,
Christian.




+{
+   int ret;
+   uint64_t vm_size;
+   uint32_t pcie_gen_mask;
+
+   if (dev_info == NULL)
+   return -EINVAL;
+
+   dev_info->device_id = adev->pdev->device;
+   dev_info->chip_rev = adev->rev_id;
+   dev_info->external_rev = adev->external_rev_id;
+   dev_info->pci_rev = adev->pdev->revision;
+   dev_info->family = adev->family;
+   dev_info->num_shader_engines = adev->gfx.config.max_shader_engines;
+   dev_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
+   /* return all clocks in KHz */
+   dev_info->gpu_counter_freq = amdgpu_asic_get_xclk(adev) * 10;
+   if (adev->pm.dpm_enabled) {
+   dev_info->max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 
10;
+   dev_info->max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 
10;
+   

Re: [PATCH v2] drm/amdgpu: refactor code to reuse system information

2024-03-19 Thread Christian König

Am 19.03.24 um 15:44 schrieb Khatri, Sunil:


On 3/19/2024 8:07 PM, Christian König wrote:

Am 19.03.24 um 15:25 schrieb Sunil Khatri:

Refactor the code so debugfs and devcoredump can reuse
the common information and avoid unnecessary copy of it.

created a new file which would be the right place to
hold functions which will be used between ioctl, debugfs
and devcoredump.


Ok, taking a closer look that is certainly not a good idea.

The devinfo structure was just created because somebody thought that 
mixing all that stuff into one structure would be a good idea.


We have pretty much deprecated that approach and should *really* not 
change anything here any more.
To support the ioctl we are keeping that information same without 
changing it. The intent to add a new file is because we will have more 
information coming in this new file. Next in line is firmware 
information which is again a huge function with lot of information and 
to use that information in devcoredump and ioctl and sysfs the new 
file seems to be right idea after some discussions.
FYI: this will not be just one function in new file but more to come 
so code can be reused without copying it.


That is exactly the point. I would say we should really *not* use that 
code for devcoredump.


This code is rather deprecated and we expose a lot of the information 
through other IOCTLs.


So the problem here isn't that you want to add a new file, but rather 
that you touch deprecated functionality.


Regards,
Christian.



Regards,
Christian.



Cc: Christian König 
Cc: Alex Deucher 
Signed-off-by: Sunil Khatri 
---
  drivers/gpu/drm/amd/amdgpu/Makefile |   2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu.h |   1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c | 151 


  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 118 +--
  4 files changed, 157 insertions(+), 115 deletions(-)
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile

index 4536c8ad0e11..05d34f4b18f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -80,7 +80,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o 
amdgpu_kms.o \

  amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
  amdgpu_fw_attestation.o amdgpu_securedisplay.o \
  amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
-    amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o
+    amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o 
amdgpu_devinfo.o

    amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index 9c62552bec34..0267870aa9b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1609,4 +1609,5 @@ extern const struct attribute_group 
amdgpu_vram_mgr_attr_group;

  extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
  extern const struct attribute_group amdgpu_flash_attr_group;
  +int amdgpu_device_info(struct amdgpu_device *adev, struct 
drm_amdgpu_info_device *dev_info);

  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c

new file mode 100644
index ..fdcbc1984031
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person 
obtaining a
+ * copy of this software and associated documentation files (the 
"Software"),
+ * to deal in the Software without restriction, including without 
limitation
+ * the rights to use, copy, modify, merge, publish, distribute, 
sublicense,
+ * and/or sell copies of the Software, and to permit persons to 
whom the
+ * Software is furnished to do so, subject to the following 
conditions:

+ *
+ * The above copyright notice and this permission notice shall be 
included in

+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO 
EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
USE OR

+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amd_pcie.h"
+
+#include 
+
+int amdgpu_device_info(struct amdgpu_device *adev, struct 
drm_amdgpu_info_device *dev_info)

+{
+    int ret;
+    uint64_t vm_size;
+    uint32_t pcie_gen_mask;
+
+    if (dev_info == NULL)
+    return -EINVAL;
+
+    dev_info->device_id = adev->pdev->device;
+    

[linux-next:master] BUILD REGRESSION 226d3c72fcde130a99d760895ebdd20e78e02cb5

2024-03-19 Thread kernel test robot
tree/branch: 
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master
branch HEAD: 226d3c72fcde130a99d760895ebdd20e78e02cb5  Add linux-next specific 
files for 20240319

Error/Warning ids grouped by kconfigs:

gcc_recent_errors
|-- alpha-allyesconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_vcn.c:warning:.bin-directive-output-may-be-truncated-writing-bytes-into-a-region-of-size-between-and
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- arc-allmodconfig
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- arc-allyesconfig
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- arm-allmodconfig
|   |-- 
arch-arm-mach-omap2-prm33xx.c:warning:expecting-prototype-for-am33xx_prm_global_warm_sw_reset().-Prototype-was-for-am33xx_prm_global_sw_reset()-instead
|   |-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_vcn.c:warning:.bin-directive-output-may-be-truncated-writing-bytes-into-a-region-of-size-between-and
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- arm-allyesconfig
|   |-- 
arch-arm-mach-omap2-prm33xx.c:warning:expecting-prototype-for-am33xx_prm_global_warm_sw_reset().-Prototype-was-for-am33xx_prm_global_sw_reset()-instead
|   |-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_vcn.c:warning:.bin-directive-output-may-be-truncated-writing-bytes-into-a-region-of-size-between-and
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- arm-omap2plus_defconfig
|   |-- 
arch-arm-mach-omap2-prm33xx.c:warning:expecting-prototype-for-am33xx_prm_global_warm_sw_reset().-Prototype-was-for-am33xx_prm_global_sw_reset()-instead
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- arm-randconfig-r062-20240319
|   `-- 
arch-arm-mach-omap2-prm33xx.c:warning:expecting-prototype-for-am33xx_prm_global_warm_sw_reset().-Prototype-was-for-am33xx_prm_global_sw_reset()-instead
|-- arm64-defconfig
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- csky-allmodconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_vcn.c:warning:.bin-directive-output-may-be-truncated-writing-bytes-into-a-region-of-size-between-and
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- csky-allyesconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_vcn.c:warning:.bin-directive-output-may-be-truncated-writing-bytes-into-a-region-of-size-between-and
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- i386-allmodconfig
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- i386-allyesconfig
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- i386-buildonly-randconfig-005-20240319
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- i386-randconfig-011-20240319
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- i386-randconfig-062-20240319
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- loongarch-allmodconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_vcn.c:warning:.bin-directive-output-may-be-truncated-writing-bytes-into-a-region-of-size-between-and
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- loongarch-defconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_vcn.c:warning:.bin-directive-output-may-be-truncated-writing-bytes-into-a-region-of-size-between-and
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- loongarch-randconfig-002-20240319
|   `-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_vcn.c:warning:.bin-directive-output-may-be-truncated-writing-bytes-into-a-region-of-size-between-and
|-- loongarch-randconfig-r051-20240319
|   `-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_vcn.c:warning:.bin-directive-output-may-be-truncated-writing-bytes-into-a-region-of-size-between-and
|-- m68k-allmodconfig
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- m68k-allyesconfig
|   `-- 
fs-ubifs

Re: [PATCH v2] drm/amdgpu: refactor code to reuse system information

2024-03-19 Thread Khatri, Sunil



On 3/19/2024 8:07 PM, Christian König wrote:

Am 19.03.24 um 15:25 schrieb Sunil Khatri:

Refactor the code so debugfs and devcoredump can reuse
the common information and avoid unnecessary copy of it.

created a new file which would be the right place to
hold functions which will be used between ioctl, debugfs
and devcoredump.


Ok, taking a closer look that is certainly not a good idea.

The devinfo structure was just created because somebody thought that 
mixing all that stuff into one structure would be a good idea.


We have pretty much deprecated that approach and should *really* not 
change anything here any more.
To support the ioctl we are keeping that information same without 
changing it. The intent to add a new file is because we will have more 
information coming in this new file. Next in line is firmware 
information which is again a huge function with lot of information and 
to use that information in devcoredump and ioctl and sysfs the new file 
seems to be right idea after some discussions.
FYI: this will not be just one function in new file but more to come so 
code can be reused without copying it.


Regards,
Christian.



Cc: Christian König 
Cc: Alex Deucher 
Signed-off-by: Sunil Khatri 
---
  drivers/gpu/drm/amd/amdgpu/Makefile |   2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu.h |   1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c | 151 
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 118 +--
  4 files changed, 157 insertions(+), 115 deletions(-)
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile

index 4536c8ad0e11..05d34f4b18f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -80,7 +80,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o 
amdgpu_kms.o \

  amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
  amdgpu_fw_attestation.o amdgpu_securedisplay.o \
  amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
-    amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o
+    amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o 
amdgpu_devinfo.o

    amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index 9c62552bec34..0267870aa9b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1609,4 +1609,5 @@ extern const struct attribute_group 
amdgpu_vram_mgr_attr_group;

  extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
  extern const struct attribute_group amdgpu_flash_attr_group;
  +int amdgpu_device_info(struct amdgpu_device *adev, struct 
drm_amdgpu_info_device *dev_info);

  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c

new file mode 100644
index ..fdcbc1984031
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person 
obtaining a
+ * copy of this software and associated documentation files (the 
"Software"),
+ * to deal in the Software without restriction, including without 
limitation
+ * the rights to use, copy, modify, merge, publish, distribute, 
sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom 
the

+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
included in

+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO 
EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
USE OR

+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amd_pcie.h"
+
+#include 
+
+int amdgpu_device_info(struct amdgpu_device *adev, struct 
drm_amdgpu_info_device *dev_info)

+{
+    int ret;
+    uint64_t vm_size;
+    uint32_t pcie_gen_mask;
+
+    if (dev_info == NULL)
+    return -EINVAL;
+
+    dev_info->device_id = adev->pdev->device;
+    dev_info->chip_rev = adev->rev_id;
+    dev_info->external_rev = adev->external_rev_id;
+    dev_info->pci_rev = adev->pdev->revision;
+    dev_info->family = adev->family;
+    dev_info->num_shader_engines = adev->gfx.config.max_shader_engines;
+    dev_info->num_shader_arrays_per_engine = 
adev->gfx.config.max_sh_per_se;

+    /* return all clocks in KHz */
+    

Re: [PATCH v2] drm/amdgpu: refactor code to reuse system information

2024-03-19 Thread Christian König

Am 19.03.24 um 15:25 schrieb Sunil Khatri:

Refactor the code so debugfs and devcoredump can reuse
the common information and avoid unnecessary copy of it.

created a new file which would be the right place to
hold functions which will be used between ioctl, debugfs
and devcoredump.


Ok, taking a closer look that is certainly not a good idea.

The devinfo structure was just created because somebody thought that 
mixing all that stuff into one structure would be a good idea.


We have pretty much deprecated that approach and should *really* not 
change anything here any more.


Regards,
Christian.



Cc: Christian König 
Cc: Alex Deucher 
Signed-off-by: Sunil Khatri 
---
  drivers/gpu/drm/amd/amdgpu/Makefile |   2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu.h |   1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c | 151 
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 118 +--
  4 files changed, 157 insertions(+), 115 deletions(-)
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 4536c8ad0e11..05d34f4b18f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -80,7 +80,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o 
amdgpu_kms.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
-   amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o
+   amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o 
amdgpu_devinfo.o
  
  amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index 9c62552bec34..0267870aa9b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1609,4 +1609,5 @@ extern const struct attribute_group 
amdgpu_vram_mgr_attr_group;
  extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
  extern const struct attribute_group amdgpu_flash_attr_group;
  
+int amdgpu_device_info(struct amdgpu_device *adev, struct drm_amdgpu_info_device *dev_info);

  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
new file mode 100644
index ..fdcbc1984031
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amd_pcie.h"
+
+#include 
+
+int amdgpu_device_info(struct amdgpu_device *adev, struct 
drm_amdgpu_info_device *dev_info)
+{
+   int ret;
+   uint64_t vm_size;
+   uint32_t pcie_gen_mask;
+
+   if (dev_info == NULL)
+   return -EINVAL;
+
+   dev_info->device_id = adev->pdev->device;
+   dev_info->chip_rev = adev->rev_id;
+   dev_info->external_rev = adev->external_rev_id;
+   dev_info->pci_rev = adev->pdev->revision;
+   dev_info->family = adev->family;
+   dev_info->num_shader_engines = adev->gfx.config.max_shader_engines;
+   dev_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
+   /* return all clocks in KHz */
+   dev_info->gpu_counter_freq = amdgpu_asic_get_xclk(adev) * 10;
+   if (adev->pm.dpm_enabled) {
+   dev_info->max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 
10;
+   dev_info->max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 
10;
+   dev_info->min_engine_clock = amdgpu_dpm_get_sclk(adev, true) * 
10;
+   dev_info->min_memory_clock = amdgpu_dpm_get_mclk(adev, true) * 
10;
+   } else {
+   dev_info->max_engine_clock =
+   

Re: [PATCH] drm/amdgpu: refactor code to reuse system information

2024-03-19 Thread Alex Deucher
On Tue, Mar 19, 2024 at 8:32 AM Sunil Khatri  wrote:
>
> Refactor the code so debugfs and devcoredump can reuse
> the common information and avoid unnecessary copy of it.
>
> created a new file which would be the right place to
> hold functions which will be used between sysfs, debugfs
> and devcoredump.
>
> Cc: Christian König 
> Cc: Alex Deucher 
> Signed-off-by: Sunil Khatri 
> ---
>  drivers/gpu/drm/amd/amdgpu/Makefile |   2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h |   1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c | 151 
>  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 118 +--
>  4 files changed, 157 insertions(+), 115 deletions(-)
>  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
> b/drivers/gpu/drm/amd/amdgpu/Makefile
> index 4536c8ad0e11..05d34f4b18f5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -80,7 +80,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o 
> amdgpu_kms.o \
> amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
> amdgpu_fw_attestation.o amdgpu_securedisplay.o \
> amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
> -   amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o
> +   amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o 
> amdgpu_devinfo.o
>
>  amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 9c62552bec34..0267870aa9b1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1609,4 +1609,5 @@ extern const struct attribute_group 
> amdgpu_vram_mgr_attr_group;
>  extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
>  extern const struct attribute_group amdgpu_flash_attr_group;
>
> +int amdgpu_device_info(struct amdgpu_device *adev, struct 
> drm_amdgpu_info_device *dev_info);
>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
> new file mode 100644
> index ..d2c15a1dcb0d
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
> @@ -0,0 +1,151 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright 2024 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#include "amdgpu.h"
> +#include "amd_pcie.h"
> +
> +#include 
> +
> +int amdgpu_device_info(struct amdgpu_device *adev, struct 
> drm_amdgpu_info_device *dev_info)

We can probably keep this in amdgpu_kms.c unless that file is getting
too big.  I don't think it warrants a new file at this point.  If you
do keep it in amdgpu_kms.c, I'd recommend renaming it to something
like amdgpu_kms_device_info() to keep the naming conventions.

> +{
> +   int ret;
> +   uint64_t vm_size;
> +   uint32_t pcie_gen_mask;
> +
> +   if (dev_info == NULL)
> +   return -EINVAL;
> +
> +   dev_info->device_id = adev->pdev->device;
> +   dev_info->chip_rev = adev->rev_id;
> +   dev_info->external_rev = adev->external_rev_id;
> +   dev_info->pci_rev = adev->pdev->revision;
> +   dev_info->family = adev->family;
> +   dev_info->num_shader_engines = adev->gfx.config.max_shader_engines;
> +   dev_info->num_shader_arrays_per_engine = 
> adev->gfx.config.max_sh_per_se;
> +   /* return all clocks in KHz */
> +   dev_info->gpu_counter_freq = amdgpu_asic_get_xclk(adev) * 10;
> +   if (adev->pm.dpm_enabled) {
> +   dev_info->max_engine_clock = amdgpu_dpm_get_sclk(adev, false) 
> * 10;
> +   dev_info->max_memory_clock = amdgpu_dpm_get_mclk(adev, false) 
> * 10;
> +   dev_info->min_engine_clock = amdgpu_dpm_get_sclk(adev, true) 
> * 10;
> +   

[PATCH v2] drm/amdgpu: refactor code to reuse system information

2024-03-19 Thread Sunil Khatri
Refactor the code so debugfs and devcoredump can reuse
the common information and avoid unnecessary copy of it.

created a new file which would be the right place to
hold functions which will be used between ioctl, debugfs
and devcoredump.

Cc: Christian König 
Cc: Alex Deucher 
Signed-off-by: Sunil Khatri 
---
 drivers/gpu/drm/amd/amdgpu/Makefile |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu.h |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c | 151 
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 118 +--
 4 files changed, 157 insertions(+), 115 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 4536c8ad0e11..05d34f4b18f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -80,7 +80,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o 
amdgpu_kms.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
-   amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o
+   amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o 
amdgpu_devinfo.o
 
 amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9c62552bec34..0267870aa9b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1609,4 +1609,5 @@ extern const struct attribute_group 
amdgpu_vram_mgr_attr_group;
 extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
 extern const struct attribute_group amdgpu_flash_attr_group;
 
+int amdgpu_device_info(struct amdgpu_device *adev, struct 
drm_amdgpu_info_device *dev_info);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
new file mode 100644
index ..fdcbc1984031
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amd_pcie.h"
+
+#include 
+
+int amdgpu_device_info(struct amdgpu_device *adev, struct 
drm_amdgpu_info_device *dev_info)
+{
+   int ret;
+   uint64_t vm_size;
+   uint32_t pcie_gen_mask;
+
+   if (dev_info == NULL)
+   return -EINVAL;
+
+   dev_info->device_id = adev->pdev->device;
+   dev_info->chip_rev = adev->rev_id;
+   dev_info->external_rev = adev->external_rev_id;
+   dev_info->pci_rev = adev->pdev->revision;
+   dev_info->family = adev->family;
+   dev_info->num_shader_engines = adev->gfx.config.max_shader_engines;
+   dev_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
+   /* return all clocks in KHz */
+   dev_info->gpu_counter_freq = amdgpu_asic_get_xclk(adev) * 10;
+   if (adev->pm.dpm_enabled) {
+   dev_info->max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 
10;
+   dev_info->max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 
10;
+   dev_info->min_engine_clock = amdgpu_dpm_get_sclk(adev, true) * 
10;
+   dev_info->min_memory_clock = amdgpu_dpm_get_mclk(adev, true) * 
10;
+   } else {
+   dev_info->max_engine_clock =
+   dev_info->min_engine_clock =
+   adev->clock.default_sclk * 10;
+   dev_info->max_memory_clock =
+   dev_info->min_memory_clock =
+   adev->clock.default_mclk * 10;
+   }
+   dev_info->enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask;
+   dev_info->num_rb_pipes = 

Re: [PATCH] drm/amdgpu: refactor code to reuse system information

2024-03-19 Thread Alex Deucher
On Tue, Mar 19, 2024 at 10:22 AM Lazar, Lijo  wrote:
>
>
>
> On 3/19/2024 7:27 PM, Khatri, Sunil wrote:
> >
> > On 3/19/2024 7:19 PM, Lazar, Lijo wrote:
> >>
> >> On 3/19/2024 6:02 PM, Sunil Khatri wrote:
> >>> Refactor the code so debugfs and devcoredump can reuse
> >>> the common information and avoid unnecessary copy of it.
> >>>
> >>> created a new file which would be the right place to
> >>> hold functions which will be used between sysfs, debugfs
> >>> and devcoredump.
> >>>
> >>> Cc: Christian König 
> >>> Cc: Alex Deucher 
> >>> Signed-off-by: Sunil Khatri 
> >>> ---
> >>>   drivers/gpu/drm/amd/amdgpu/Makefile |   2 +-
> >>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h |   1 +
> >>>   drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c | 151 
> >>>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 118 +--
> >>>   4 files changed, 157 insertions(+), 115 deletions(-)
> >>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile
> >>> b/drivers/gpu/drm/amd/amdgpu/Makefile
> >>> index 4536c8ad0e11..05d34f4b18f5 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> >>> @@ -80,7 +80,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o
> >>> amdgpu_kms.o \
> >>>   amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
> >>>   amdgpu_fw_attestation.o amdgpu_securedisplay.o \
> >>>   amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
> >>> -amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o
> >>> +amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o
> >>> amdgpu_devinfo.o
> >>> amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
> >>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>> index 9c62552bec34..0267870aa9b1 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>> @@ -1609,4 +1609,5 @@ extern const struct attribute_group
> >>> amdgpu_vram_mgr_attr_group;
> >>>   extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
> >>>   extern const struct attribute_group amdgpu_flash_attr_group;
> >>>   +int amdgpu_device_info(struct amdgpu_device *adev, struct
> >>> drm_amdgpu_info_device *dev_info);
> >>>   #endif
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
> >>> new file mode 100644
> >>> index ..d2c15a1dcb0d
> >>> --- /dev/null
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
> >>> @@ -0,0 +1,151 @@
> >>> +// SPDX-License-Identifier: MIT
> >>> +/*
> >>> + * Copyright 2024 Advanced Micro Devices, Inc.
> >>> + *
> >>> + * Permission is hereby granted, free of charge, to any person
> >>> obtaining a
> >>> + * copy of this software and associated documentation files (the
> >>> "Software"),
> >>> + * to deal in the Software without restriction, including without
> >>> limitation
> >>> + * the rights to use, copy, modify, merge, publish, distribute,
> >>> sublicense,
> >>> + * and/or sell copies of the Software, and to permit persons to whom
> >>> the
> >>> + * Software is furnished to do so, subject to the following conditions:
> >>> + *
> >>> + * The above copyright notice and this permission notice shall be
> >>> included in
> >>> + * all copies or substantial portions of the Software.
> >>> + *
> >>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> >>> EXPRESS OR
> >>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> >>> MERCHANTABILITY,
> >>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO
> >>> EVENT SHALL
> >>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
> >>> DAMAGES OR
> >>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> >>> OTHERWISE,
> >>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
> >>> USE OR
> >>> + * OTHER DEALINGS IN THE SOFTWARE.
> >>> + *
> >>> + */
> >>> +
> >>> +#include "amdgpu.h"
> >>> +#include "amd_pcie.h"
> >>> +
> >>> +#include 
> >>> +
> >>> +int amdgpu_device_info(struct amdgpu_device *adev, struct
> >>> drm_amdgpu_info_device *dev_info)
> >>> +{
> >>> +int ret;
> >>> +uint64_t vm_size;
> >>> +uint32_t pcie_gen_mask;
> >>> +
> >>> +if (dev_info == NULL)
> >>> +return -EINVAL;
> >>> +
> >>> +dev_info->device_id = adev->pdev->device;
> >>> +dev_info->chip_rev = adev->rev_id;
> >>> +dev_info->external_rev = adev->external_rev_id;
> >>> +dev_info->pci_rev = adev->pdev->revision;
> >>> +dev_info->family = adev->family;
> >>> +dev_info->num_shader_engines = adev->gfx.config.max_shader_engines;
> >>> +dev_info->num_shader_arrays_per_engine =
> >>> adev->gfx.config.max_sh_per_se;
> >>> +/* return all clocks in KHz */
> >>> +dev_info->gpu_counter_freq = amdgpu_asic_get_xclk(adev) * 10;
> >>> +if (adev->pm.dpm_enabled) {

Re: [PATCH] drm/amdgpu: refactor code to reuse system information

2024-03-19 Thread Khatri, Sunil


On 3/19/2024 7:43 PM, Lazar, Lijo wrote:


On 3/19/2024 7:27 PM, Khatri, Sunil wrote:

On 3/19/2024 7:19 PM, Lazar, Lijo wrote:

On 3/19/2024 6:02 PM, Sunil Khatri wrote:

Refactor the code so debugfs and devcoredump can reuse
the common information and avoid unnecessary copy of it.

created a new file which would be the right place to
hold functions which will be used between sysfs, debugfs
and devcoredump.

Cc: Christian König
Cc: Alex Deucher
Signed-off-by: Sunil Khatri
---
   drivers/gpu/drm/amd/amdgpu/Makefile |   2 +-
   drivers/gpu/drm/amd/amdgpu/amdgpu.h |   1 +
   drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c | 151 
   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 118 +--
   4 files changed, 157 insertions(+), 115 deletions(-)
   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 4536c8ad0e11..05d34f4b18f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -80,7 +80,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o
amdgpu_kms.o \
   amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
   amdgpu_fw_attestation.o amdgpu_securedisplay.o \
   amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
-    amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o
+    amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o
amdgpu_devinfo.o
     amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9c62552bec34..0267870aa9b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1609,4 +1609,5 @@ extern const struct attribute_group
amdgpu_vram_mgr_attr_group;
   extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
   extern const struct attribute_group amdgpu_flash_attr_group;
   +int amdgpu_device_info(struct amdgpu_device *adev, struct
drm_amdgpu_info_device *dev_info);
   #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
new file mode 100644
index ..d2c15a1dcb0d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
obtaining a
+ * copy of this software and associated documentation files (the
"Software"),
+ * to deal in the Software without restriction, including without
limitation
+ * the rights to use, copy, modify, merge, publish, distribute,
sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom
the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO
EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amd_pcie.h"
+
+#include 
+
+int amdgpu_device_info(struct amdgpu_device *adev, struct
drm_amdgpu_info_device *dev_info)
+{
+    int ret;
+    uint64_t vm_size;
+    uint32_t pcie_gen_mask;
+
+    if (dev_info == NULL)
+    return -EINVAL;
+
+    dev_info->device_id = adev->pdev->device;
+    dev_info->chip_rev = adev->rev_id;
+    dev_info->external_rev = adev->external_rev_id;
+    dev_info->pci_rev = adev->pdev->revision;
+    dev_info->family = adev->family;
+    dev_info->num_shader_engines = adev->gfx.config.max_shader_engines;
+    dev_info->num_shader_arrays_per_engine =
adev->gfx.config.max_sh_per_se;
+    /* return all clocks in KHz */
+    dev_info->gpu_counter_freq = amdgpu_asic_get_xclk(adev) * 10;
+    if (adev->pm.dpm_enabled) {
+    dev_info->max_engine_clock = amdgpu_dpm_get_sclk(adev,
false) * 10;
+    dev_info->max_memory_clock = amdgpu_dpm_get_mclk(adev,
false) * 10;
+    dev_info->min_engine_clock = amdgpu_dpm_get_sclk(adev, true)
* 10;
+    dev_info->min_memory_clock = amdgpu_dpm_get_mclk(adev, true)
* 10;
+    } else {
+    dev_info->max_engine_clock =
+    dev_info->min_engine_clock =
+    adev->clock.default_sclk * 10;
+    dev_info->max_memory_clock =
+    dev_info->min_memory_clock =
+    adev->clock.default_mclk * 10;
+    }
+    dev_info->enabled_rb_pipes_mask =
adev->gfx.config.backend_enable_mask;
+    dev_info->num_rb_pipes = 

Re: [PATCH] drm/amdgpu: refactor code to reuse system information

2024-03-19 Thread Lazar, Lijo



On 3/19/2024 7:27 PM, Khatri, Sunil wrote:
> 
> On 3/19/2024 7:19 PM, Lazar, Lijo wrote:
>>
>> On 3/19/2024 6:02 PM, Sunil Khatri wrote:
>>> Refactor the code so debugfs and devcoredump can reuse
>>> the common information and avoid unnecessary copy of it.
>>>
>>> created a new file which would be the right place to
>>> hold functions which will be used between sysfs, debugfs
>>> and devcoredump.
>>>
>>> Cc: Christian König 
>>> Cc: Alex Deucher 
>>> Signed-off-by: Sunil Khatri 
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/Makefile |   2 +-
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h |   1 +
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c | 151 
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 118 +--
>>>   4 files changed, 157 insertions(+), 115 deletions(-)
>>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile
>>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>>> index 4536c8ad0e11..05d34f4b18f5 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>>> @@ -80,7 +80,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o
>>> amdgpu_kms.o \
>>>   amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
>>>   amdgpu_fw_attestation.o amdgpu_securedisplay.o \
>>>   amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
>>> -    amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o
>>> +    amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o
>>> amdgpu_devinfo.o
>>>     amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index 9c62552bec34..0267870aa9b1 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -1609,4 +1609,5 @@ extern const struct attribute_group
>>> amdgpu_vram_mgr_attr_group;
>>>   extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
>>>   extern const struct attribute_group amdgpu_flash_attr_group;
>>>   +int amdgpu_device_info(struct amdgpu_device *adev, struct
>>> drm_amdgpu_info_device *dev_info);
>>>   #endif
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
>>> new file mode 100644
>>> index ..d2c15a1dcb0d
>>> --- /dev/null
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
>>> @@ -0,0 +1,151 @@
>>> +// SPDX-License-Identifier: MIT
>>> +/*
>>> + * Copyright 2024 Advanced Micro Devices, Inc.
>>> + *
>>> + * Permission is hereby granted, free of charge, to any person
>>> obtaining a
>>> + * copy of this software and associated documentation files (the
>>> "Software"),
>>> + * to deal in the Software without restriction, including without
>>> limitation
>>> + * the rights to use, copy, modify, merge, publish, distribute,
>>> sublicense,
>>> + * and/or sell copies of the Software, and to permit persons to whom
>>> the
>>> + * Software is furnished to do so, subject to the following conditions:
>>> + *
>>> + * The above copyright notice and this permission notice shall be
>>> included in
>>> + * all copies or substantial portions of the Software.
>>> + *
>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>>> EXPRESS OR
>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>>> MERCHANTABILITY,
>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO
>>> EVENT SHALL
>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
>>> DAMAGES OR
>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>>> OTHERWISE,
>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
>>> USE OR
>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>> + *
>>> + */
>>> +
>>> +#include "amdgpu.h"
>>> +#include "amd_pcie.h"
>>> +
>>> +#include 
>>> +
>>> +int amdgpu_device_info(struct amdgpu_device *adev, struct
>>> drm_amdgpu_info_device *dev_info)
>>> +{
>>> +    int ret;
>>> +    uint64_t vm_size;
>>> +    uint32_t pcie_gen_mask;
>>> +
>>> +    if (dev_info == NULL)
>>> +    return -EINVAL;
>>> +
>>> +    dev_info->device_id = adev->pdev->device;
>>> +    dev_info->chip_rev = adev->rev_id;
>>> +    dev_info->external_rev = adev->external_rev_id;
>>> +    dev_info->pci_rev = adev->pdev->revision;
>>> +    dev_info->family = adev->family;
>>> +    dev_info->num_shader_engines = adev->gfx.config.max_shader_engines;
>>> +    dev_info->num_shader_arrays_per_engine =
>>> adev->gfx.config.max_sh_per_se;
>>> +    /* return all clocks in KHz */
>>> +    dev_info->gpu_counter_freq = amdgpu_asic_get_xclk(adev) * 10;
>>> +    if (adev->pm.dpm_enabled) {
>>> +    dev_info->max_engine_clock = amdgpu_dpm_get_sclk(adev,
>>> false) * 10;
>>> +    dev_info->max_memory_clock = amdgpu_dpm_get_mclk(adev,
>>> false) * 10;
>>> +    dev_info->min_engine_clock = amdgpu_dpm_get_sclk(adev, true)
>>> * 10;
>>> +    

Re: [PATCH] drm/amdgpu: refactor code to reuse system information

2024-03-19 Thread Khatri, Sunil



On 3/19/2024 7:19 PM, Lazar, Lijo wrote:


On 3/19/2024 6:02 PM, Sunil Khatri wrote:

Refactor the code so debugfs and devcoredump can reuse
the common information and avoid unnecessary copy of it.

created a new file which would be the right place to
hold functions which will be used between sysfs, debugfs
and devcoredump.

Cc: Christian König 
Cc: Alex Deucher 
Signed-off-by: Sunil Khatri 
---
  drivers/gpu/drm/amd/amdgpu/Makefile |   2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu.h |   1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c | 151 
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 118 +--
  4 files changed, 157 insertions(+), 115 deletions(-)
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 4536c8ad0e11..05d34f4b18f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -80,7 +80,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o 
amdgpu_kms.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
-   amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o
+   amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o 
amdgpu_devinfo.o
  
  amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index 9c62552bec34..0267870aa9b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1609,4 +1609,5 @@ extern const struct attribute_group 
amdgpu_vram_mgr_attr_group;
  extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
  extern const struct attribute_group amdgpu_flash_attr_group;
  
+int amdgpu_device_info(struct amdgpu_device *adev, struct drm_amdgpu_info_device *dev_info);

  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
new file mode 100644
index ..d2c15a1dcb0d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amd_pcie.h"
+
+#include 
+
+int amdgpu_device_info(struct amdgpu_device *adev, struct 
drm_amdgpu_info_device *dev_info)
+{
+   int ret;
+   uint64_t vm_size;
+   uint32_t pcie_gen_mask;
+
+   if (dev_info == NULL)
+   return -EINVAL;
+
+   dev_info->device_id = adev->pdev->device;
+   dev_info->chip_rev = adev->rev_id;
+   dev_info->external_rev = adev->external_rev_id;
+   dev_info->pci_rev = adev->pdev->revision;
+   dev_info->family = adev->family;
+   dev_info->num_shader_engines = adev->gfx.config.max_shader_engines;
+   dev_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
+   /* return all clocks in KHz */
+   dev_info->gpu_counter_freq = amdgpu_asic_get_xclk(adev) * 10;
+   if (adev->pm.dpm_enabled) {
+   dev_info->max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 
10;
+   dev_info->max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 
10;
+   dev_info->min_engine_clock = amdgpu_dpm_get_sclk(adev, true) * 
10;
+   dev_info->min_memory_clock = amdgpu_dpm_get_mclk(adev, true) * 
10;
+   } else {
+   dev_info->max_engine_clock =
+   dev_info->min_engine_clock =
+   adev->clock.default_sclk * 10;
+   dev_info->max_memory_clock =
+   dev_info->min_memory_clock =
+   adev->clock.default_mclk * 10;
+   }
+   

Re: [PATCH v9 2/3] drm/amdgpu: Enable clear page functionality

2024-03-19 Thread Paneer Selvam, Arunpravin

Hi Christian,

On 3/19/2024 7:17 PM, Christian König wrote:

Am 19.03.24 um 12:41 schrieb Paneer Selvam, Arunpravin:

Hi Christian,

On 3/19/2024 3:58 PM, Christian König wrote:



Am 18.03.24 um 22:40 schrieb Arunpravin Paneer Selvam:

Add clear page support in vram memory region.

v1(Christian):
   - Dont handle clear page as TTM flag since when moving the BO back
 in from GTT again we don't need that.
   - Make a specialized version of amdgpu_fill_buffer() which only
 clears the VRAM areas which are not already cleared
   - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
 amdgpu_object.c

v2:
   - Modify the function name amdgpu_ttm_* (Alex)
   - Drop the delayed parameter (Christian)
   - handle amdgpu_res_cleared() just above the size
 calculation (Christian)
   - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the 
buffers

 in the free path to properly wait for fences etc.. (Christian)

v3(Christian):
   - Remove buffer clear code in VRAM manager instead change the
 AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set
 the DRM_BUDDY_CLEARED flag.
   - Remove ! from amdgpu_res_cleared() check.

Signed-off-by: Arunpravin Paneer Selvam 


Suggested-by: Christian König 
Acked-by: Felix Kuehling 


Just a few nit picks below, but in general already looks good to me.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    | 22 ---
  .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    | 25 
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 61 
++-

  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |  5 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  6 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
  6 files changed, 111 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

index 8bc79924d171..c92d92b28a57 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -39,6 +39,7 @@
  #include "amdgpu.h"
  #include "amdgpu_trace.h"
  #include "amdgpu_amdkfd.h"
+#include "amdgpu_vram_mgr.h"
    /**
   * DOC: amdgpu_object
@@ -601,8 +602,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
  if (!amdgpu_bo_support_uswc(bo->flags))
  bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
  -    if (adev->ras_enabled)
-    bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+    bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
    bo->tbo.bdev = >mman.bdev;
  if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
@@ -632,15 +632,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
    if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
  bo->tbo.resource->mem_type == TTM_PL_VRAM) {
-    struct dma_fence *fence;
+    struct dma_fence *fence = NULL;
  -    r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, , 
true);

+    r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, );
  if (unlikely(r))
  goto fail_unreserve;
  -    dma_resv_add_fence(bo->tbo.base.resv, fence,
-   DMA_RESV_USAGE_KERNEL);
-    dma_fence_put(fence);
+    if (fence) {
+    dma_resv_add_fence(bo->tbo.base.resv, fence,
+   DMA_RESV_USAGE_KERNEL);
+    dma_fence_put(fence);
+    }
  }
  if (!bp->resv)
  amdgpu_bo_unreserve(bo);
@@ -1365,8 +1367,12 @@ void amdgpu_bo_release_notify(struct 
ttm_buffer_object *bo)

  if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
  return;
  -    r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, 
, true);

+    r = amdgpu_fill_buffer(abo, 0, bo->base.resv, , true);
  if (!WARN_ON(r)) {
+    struct amdgpu_vram_mgr_resource *vres;
+
+    vres = to_amdgpu_vram_mgr_resource(bo->resource);
+    vres->flags |= DRM_BUDDY_CLEARED;


Those lines should probably be in the VRAM manager.
This flag is set based on the amdgpu_fill_buffer() function return 
value, can we move the amdgpu_fill_buffer() function call and
DRM_BUDDY_CLEARED flag set lines to amdgpu_vram_mgr_del() in VRAM 
manager and does it require to wipe the VRAM buffers here as well

without setting the DRM_BUDDY_CLEARED.


No that won't work. The call to amdgpu_fill_buffer() *must* be here 
because that here is called before the resource is actually freed.


Only setting the vres->flags should probably be moved into 
amdgpu_vram_mgr.c.


So instead of calling that manually here just make a function for it 
to keep the code related to the buddy allocator in one place.

I got it, Thanks.

Regards,
Arun.


Regards,
Christian.




  amdgpu_bo_fence(abo, fence, false);
  dma_fence_put(fence);
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h

index 381101d2bf05..50fcd86e1033 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ 

Re: [PATCH] drm/amdgpu: refactor code to reuse system information

2024-03-19 Thread Lazar, Lijo



On 3/19/2024 6:02 PM, Sunil Khatri wrote:
> Refactor the code so debugfs and devcoredump can reuse
> the common information and avoid unnecessary copy of it.
> 
> created a new file which would be the right place to
> hold functions which will be used between sysfs, debugfs
> and devcoredump.
> 
> Cc: Christian König 
> Cc: Alex Deucher 
> Signed-off-by: Sunil Khatri 
> ---
>  drivers/gpu/drm/amd/amdgpu/Makefile |   2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h |   1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c | 151 
>  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 118 +--
>  4 files changed, 157 insertions(+), 115 deletions(-)
>  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
> b/drivers/gpu/drm/amd/amdgpu/Makefile
> index 4536c8ad0e11..05d34f4b18f5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -80,7 +80,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o 
> amdgpu_kms.o \
>   amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
>   amdgpu_fw_attestation.o amdgpu_securedisplay.o \
>   amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
> - amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o
> + amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o 
> amdgpu_devinfo.o
>  
>  amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 9c62552bec34..0267870aa9b1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1609,4 +1609,5 @@ extern const struct attribute_group 
> amdgpu_vram_mgr_attr_group;
>  extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
>  extern const struct attribute_group amdgpu_flash_attr_group;
>  
> +int amdgpu_device_info(struct amdgpu_device *adev, struct 
> drm_amdgpu_info_device *dev_info);
>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
> new file mode 100644
> index ..d2c15a1dcb0d
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
> @@ -0,0 +1,151 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright 2024 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#include "amdgpu.h"
> +#include "amd_pcie.h"
> +
> +#include 
> +
> +int amdgpu_device_info(struct amdgpu_device *adev, struct 
> drm_amdgpu_info_device *dev_info)
> +{
> + int ret;
> + uint64_t vm_size;
> + uint32_t pcie_gen_mask;
> +
> + if (dev_info == NULL)
> + return -EINVAL;
> +
> + dev_info->device_id = adev->pdev->device;
> + dev_info->chip_rev = adev->rev_id;
> + dev_info->external_rev = adev->external_rev_id;
> + dev_info->pci_rev = adev->pdev->revision;
> + dev_info->family = adev->family;
> + dev_info->num_shader_engines = adev->gfx.config.max_shader_engines;
> + dev_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
> + /* return all clocks in KHz */
> + dev_info->gpu_counter_freq = amdgpu_asic_get_xclk(adev) * 10;
> + if (adev->pm.dpm_enabled) {
> + dev_info->max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 
> 10;
> + dev_info->max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 
> 10;
> + dev_info->min_engine_clock = amdgpu_dpm_get_sclk(adev, true) * 
> 10;
> + dev_info->min_memory_clock = amdgpu_dpm_get_mclk(adev, true) * 
> 10;
> + } else {
> + dev_info->max_engine_clock =
> + dev_info->min_engine_clock =
> + adev->clock.default_sclk * 10;
> + dev_info->max_memory_clock =
> + 

Re: [PATCH v9 2/3] drm/amdgpu: Enable clear page functionality

2024-03-19 Thread Christian König

Am 19.03.24 um 12:41 schrieb Paneer Selvam, Arunpravin:

Hi Christian,

On 3/19/2024 3:58 PM, Christian König wrote:



Am 18.03.24 um 22:40 schrieb Arunpravin Paneer Selvam:

Add clear page support in vram memory region.

v1(Christian):
   - Dont handle clear page as TTM flag since when moving the BO back
 in from GTT again we don't need that.
   - Make a specialized version of amdgpu_fill_buffer() which only
 clears the VRAM areas which are not already cleared
   - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
 amdgpu_object.c

v2:
   - Modify the function name amdgpu_ttm_* (Alex)
   - Drop the delayed parameter (Christian)
   - handle amdgpu_res_cleared() just above the size
 calculation (Christian)
   - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the 
buffers

 in the free path to properly wait for fences etc.. (Christian)

v3(Christian):
   - Remove buffer clear code in VRAM manager instead change the
 AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set
 the DRM_BUDDY_CLEARED flag.
   - Remove ! from amdgpu_res_cleared() check.

Signed-off-by: Arunpravin Paneer Selvam 


Suggested-by: Christian König 
Acked-by: Felix Kuehling 


Just a few nit picks below, but in general already looks good to me.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    | 22 ---
  .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    | 25 
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 61 
++-

  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |  5 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  6 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
  6 files changed, 111 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

index 8bc79924d171..c92d92b28a57 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -39,6 +39,7 @@
  #include "amdgpu.h"
  #include "amdgpu_trace.h"
  #include "amdgpu_amdkfd.h"
+#include "amdgpu_vram_mgr.h"
    /**
   * DOC: amdgpu_object
@@ -601,8 +602,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
  if (!amdgpu_bo_support_uswc(bo->flags))
  bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
  -    if (adev->ras_enabled)
-    bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+    bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
    bo->tbo.bdev = >mman.bdev;
  if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
@@ -632,15 +632,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
    if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
  bo->tbo.resource->mem_type == TTM_PL_VRAM) {
-    struct dma_fence *fence;
+    struct dma_fence *fence = NULL;
  -    r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, , 
true);

+    r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, );
  if (unlikely(r))
  goto fail_unreserve;
  -    dma_resv_add_fence(bo->tbo.base.resv, fence,
-   DMA_RESV_USAGE_KERNEL);
-    dma_fence_put(fence);
+    if (fence) {
+    dma_resv_add_fence(bo->tbo.base.resv, fence,
+   DMA_RESV_USAGE_KERNEL);
+    dma_fence_put(fence);
+    }
  }
  if (!bp->resv)
  amdgpu_bo_unreserve(bo);
@@ -1365,8 +1367,12 @@ void amdgpu_bo_release_notify(struct 
ttm_buffer_object *bo)

  if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
  return;
  -    r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, 
, true);

+    r = amdgpu_fill_buffer(abo, 0, bo->base.resv, , true);
  if (!WARN_ON(r)) {
+    struct amdgpu_vram_mgr_resource *vres;
+
+    vres = to_amdgpu_vram_mgr_resource(bo->resource);
+    vres->flags |= DRM_BUDDY_CLEARED;


Those lines should probably be in the VRAM manager.
This flag is set based on the amdgpu_fill_buffer() function return 
value, can we move the amdgpu_fill_buffer() function call and
DRM_BUDDY_CLEARED flag set lines to amdgpu_vram_mgr_del() in VRAM 
manager and does it require to wipe the VRAM buffers here as well

without setting the DRM_BUDDY_CLEARED.


No that won't work. The call to amdgpu_fill_buffer() *must* be here 
because that here is called before the resource is actually freed.


Only setting the vres->flags should probably be moved into 
amdgpu_vram_mgr.c.


So instead of calling that manually here just make a function for it to 
keep the code related to the buddy allocator in one place.


Regards,
Christian.




  amdgpu_bo_fence(abo, fence, false);
  dma_fence_put(fence);
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h

index 381101d2bf05..50fcd86e1033 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct 
amdgpu_res_cursor *cur, uint64_t size)

Re: [PATCH] drm/amdgpu/pm: Don't use OD table on Arcturus

2024-03-19 Thread Alex Deucher
On Tue, Mar 19, 2024 at 6:22 AM Ma Jun  wrote:
>
> OD is not supported on Arcturus, so the OD table
> should not be used.
>
> Signed-off-by: Ma Jun 

Acked-by: Alex Deucher 

> ---
>  .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 33 +++
>  1 file changed, 5 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c 
> b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
> index a58708bcda89..dc3a5d863a04 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
> @@ -1289,11 +1289,8 @@ static int arcturus_get_power_limit(struct smu_context 
> *smu,
> uint32_t *max_power_limit,
> uint32_t *min_power_limit)
>  {
> -   struct smu_11_0_powerplay_table *powerplay_table =
> -   (struct smu_11_0_powerplay_table 
> *)smu->smu_table.power_play_table;
> -   struct smu_11_0_overdrive_table *od_settings = smu->od_settings;
> PPTable_t *pptable = smu->smu_table.driver_pptable;
> -   uint32_t power_limit, od_percent_upper = 0, od_percent_lower = 0;
> +   uint32_t power_limit;
>
> if (smu_v11_0_get_current_power_limit(smu, _limit)) {
> /* the last hope to figure out the ppt limit */
> @@ -1309,30 +1306,10 @@ static int arcturus_get_power_limit(struct 
> smu_context *smu,
> *current_power_limit = power_limit;
> if (default_power_limit)
> *default_power_limit = power_limit;
> -
> -   if (powerplay_table) {
> -   if (smu->od_enabled &&
> -   od_settings->cap[SMU_11_0_ODCAP_POWER_LIMIT]) 
> {
> -   od_percent_upper = 
> le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_ODSETTING_POWERPERCENTAGE]);
> -   od_percent_lower = 
> le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]);
> -   } else if (od_settings->cap[SMU_11_0_ODCAP_POWER_LIMIT]) {
> -   od_percent_upper = 0;
> -   od_percent_lower = 
> le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]);
> -   }
> -   }
> -
> -   dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d 
> (default power: %d)\n",
> -   od_percent_upper, 
> od_percent_lower, power_limit);
> -
> -   if (max_power_limit) {
> -   *max_power_limit = power_limit * (100 + od_percent_upper);
> -   *max_power_limit /= 100;
> -   }
> -
> -   if (min_power_limit) {
> -   *min_power_limit = power_limit * (100 - od_percent_lower);
> -   *min_power_limit /= 100;
> -   }
> +   if (max_power_limit)
> +   *max_power_limit = power_limit;
> +   if (min_power_limit)
> +   *min_power_limit = power_limit;
>
> return 0;
>  }
> --
> 2.34.1
>


Re: [PATCH] drm/amdgpu: refactor code to reuse system information

2024-03-19 Thread Khatri, Sunil
Validated the code by using the function in same way as ioctl would use 
in devcoredump and getting the valid values.


Also this would be the container of the information that we need to 
share between ioctl, debugfs and devcoredump and keep updating this 
based on information needed.



On 3/19/2024 6:02 PM, Sunil Khatri wrote:

Refactor the code so debugfs and devcoredump can reuse
the common information and avoid unnecessary copy of it.

created a new file which would be the right place to
hold functions which will be used between sysfs, debugfs
and devcoredump.

Cc: Christian König 
Cc: Alex Deucher 
Signed-off-by: Sunil Khatri 
---
  drivers/gpu/drm/amd/amdgpu/Makefile |   2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu.h |   1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c | 151 
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 118 +--
  4 files changed, 157 insertions(+), 115 deletions(-)
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 4536c8ad0e11..05d34f4b18f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -80,7 +80,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o 
amdgpu_kms.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
-   amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o
+   amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o 
amdgpu_devinfo.o
  
  amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index 9c62552bec34..0267870aa9b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1609,4 +1609,5 @@ extern const struct attribute_group 
amdgpu_vram_mgr_attr_group;
  extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
  extern const struct attribute_group amdgpu_flash_attr_group;
  
+int amdgpu_device_info(struct amdgpu_device *adev, struct drm_amdgpu_info_device *dev_info);

  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
new file mode 100644
index ..d2c15a1dcb0d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amd_pcie.h"
+
+#include 
+
+int amdgpu_device_info(struct amdgpu_device *adev, struct 
drm_amdgpu_info_device *dev_info)
+{
+   int ret;
+   uint64_t vm_size;
+   uint32_t pcie_gen_mask;
+
+   if (dev_info == NULL)
+   return -EINVAL;
+
+   dev_info->device_id = adev->pdev->device;
+   dev_info->chip_rev = adev->rev_id;
+   dev_info->external_rev = adev->external_rev_id;
+   dev_info->pci_rev = adev->pdev->revision;
+   dev_info->family = adev->family;
+   dev_info->num_shader_engines = adev->gfx.config.max_shader_engines;
+   dev_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
+   /* return all clocks in KHz */
+   dev_info->gpu_counter_freq = amdgpu_asic_get_xclk(adev) * 10;
+   if (adev->pm.dpm_enabled) {
+   dev_info->max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 
10;
+   dev_info->max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 
10;
+   dev_info->min_engine_clock = amdgpu_dpm_get_sclk(adev, true) * 
10;
+   dev_info->min_memory_clock = amdgpu_dpm_get_mclk(adev, true) * 
10;
+   } else {
+   dev_info->max_engine_clock =
+   dev_info->min_engine_clock =
+  

[PATCH] drm/amdgpu: refactor code to reuse system information

2024-03-19 Thread Sunil Khatri
Refactor the code so debugfs and devcoredump can reuse
the common information and avoid unnecessary copy of it.

created a new file which would be the right place to
hold functions which will be used between sysfs, debugfs
and devcoredump.

Cc: Christian König 
Cc: Alex Deucher 
Signed-off-by: Sunil Khatri 
---
 drivers/gpu/drm/amd/amdgpu/Makefile |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu.h |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c | 151 
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 118 +--
 4 files changed, 157 insertions(+), 115 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 4536c8ad0e11..05d34f4b18f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -80,7 +80,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o 
amdgpu_kms.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
-   amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o
+   amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o 
amdgpu_devinfo.o
 
 amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9c62552bec34..0267870aa9b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1609,4 +1609,5 @@ extern const struct attribute_group 
amdgpu_vram_mgr_attr_group;
 extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
 extern const struct attribute_group amdgpu_flash_attr_group;
 
+int amdgpu_device_info(struct amdgpu_device *adev, struct 
drm_amdgpu_info_device *dev_info);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
new file mode 100644
index ..d2c15a1dcb0d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amd_pcie.h"
+
+#include 
+
+int amdgpu_device_info(struct amdgpu_device *adev, struct 
drm_amdgpu_info_device *dev_info)
+{
+   int ret;
+   uint64_t vm_size;
+   uint32_t pcie_gen_mask;
+
+   if (dev_info == NULL)
+   return -EINVAL;
+
+   dev_info->device_id = adev->pdev->device;
+   dev_info->chip_rev = adev->rev_id;
+   dev_info->external_rev = adev->external_rev_id;
+   dev_info->pci_rev = adev->pdev->revision;
+   dev_info->family = adev->family;
+   dev_info->num_shader_engines = adev->gfx.config.max_shader_engines;
+   dev_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
+   /* return all clocks in KHz */
+   dev_info->gpu_counter_freq = amdgpu_asic_get_xclk(adev) * 10;
+   if (adev->pm.dpm_enabled) {
+   dev_info->max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 
10;
+   dev_info->max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 
10;
+   dev_info->min_engine_clock = amdgpu_dpm_get_sclk(adev, true) * 
10;
+   dev_info->min_memory_clock = amdgpu_dpm_get_mclk(adev, true) * 
10;
+   } else {
+   dev_info->max_engine_clock =
+   dev_info->min_engine_clock =
+   adev->clock.default_sclk * 10;
+   dev_info->max_memory_clock =
+   dev_info->min_memory_clock =
+   adev->clock.default_mclk * 10;
+   }
+   dev_info->enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask;
+   dev_info->num_rb_pipes = 

Re: [PATCH v9 2/3] drm/amdgpu: Enable clear page functionality

2024-03-19 Thread Paneer Selvam, Arunpravin

Hi Christian,

On 3/19/2024 3:58 PM, Christian König wrote:



Am 18.03.24 um 22:40 schrieb Arunpravin Paneer Selvam:

Add clear page support in vram memory region.

v1(Christian):
   - Dont handle clear page as TTM flag since when moving the BO back
 in from GTT again we don't need that.
   - Make a specialized version of amdgpu_fill_buffer() which only
 clears the VRAM areas which are not already cleared
   - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
 amdgpu_object.c

v2:
   - Modify the function name amdgpu_ttm_* (Alex)
   - Drop the delayed parameter (Christian)
   - handle amdgpu_res_cleared() just above the size
 calculation (Christian)
   - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the buffers
 in the free path to properly wait for fences etc.. (Christian)

v3(Christian):
   - Remove buffer clear code in VRAM manager instead change the
 AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set
 the DRM_BUDDY_CLEARED flag.
   - Remove ! from amdgpu_res_cleared() check.

Signed-off-by: Arunpravin Paneer Selvam 


Suggested-by: Christian König 
Acked-by: Felix Kuehling 


Just a few nit picks below, but in general already looks good to me.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    | 22 ---
  .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    | 25 
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 61 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |  5 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  6 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
  6 files changed, 111 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

index 8bc79924d171..c92d92b28a57 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -39,6 +39,7 @@
  #include "amdgpu.h"
  #include "amdgpu_trace.h"
  #include "amdgpu_amdkfd.h"
+#include "amdgpu_vram_mgr.h"
    /**
   * DOC: amdgpu_object
@@ -601,8 +602,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
  if (!amdgpu_bo_support_uswc(bo->flags))
  bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
  -    if (adev->ras_enabled)
-    bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+    bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
    bo->tbo.bdev = >mman.bdev;
  if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
@@ -632,15 +632,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
    if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
  bo->tbo.resource->mem_type == TTM_PL_VRAM) {
-    struct dma_fence *fence;
+    struct dma_fence *fence = NULL;
  -    r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, , 
true);

+    r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, );
  if (unlikely(r))
  goto fail_unreserve;
  -    dma_resv_add_fence(bo->tbo.base.resv, fence,
-   DMA_RESV_USAGE_KERNEL);
-    dma_fence_put(fence);
+    if (fence) {
+    dma_resv_add_fence(bo->tbo.base.resv, fence,
+   DMA_RESV_USAGE_KERNEL);
+    dma_fence_put(fence);
+    }
  }
  if (!bp->resv)
  amdgpu_bo_unreserve(bo);
@@ -1365,8 +1367,12 @@ void amdgpu_bo_release_notify(struct 
ttm_buffer_object *bo)

  if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
  return;
  -    r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, 
, true);

+    r = amdgpu_fill_buffer(abo, 0, bo->base.resv, , true);
  if (!WARN_ON(r)) {
+    struct amdgpu_vram_mgr_resource *vres;
+
+    vres = to_amdgpu_vram_mgr_resource(bo->resource);
+    vres->flags |= DRM_BUDDY_CLEARED;


Those lines should probably be in the VRAM manager.
This flag is set based on the amdgpu_fill_buffer() function return 
value, can we move the amdgpu_fill_buffer() function call and
DRM_BUDDY_CLEARED flag set lines to amdgpu_vram_mgr_del() in VRAM 
manager and does it require to wipe the VRAM buffers here as well

without setting the DRM_BUDDY_CLEARED.



  amdgpu_bo_fence(abo, fence, false);
  dma_fence_put(fence);
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h

index 381101d2bf05..50fcd86e1033 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct 
amdgpu_res_cursor *cur, uint64_t size)

  }
  }
  +/**
+ * amdgpu_res_cleared - check if blocks are cleared
+ *
+ * @cur: the cursor to extract the block
+ *
+ * Check if the @cur block is cleared
+ */
+static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
+{
+    struct drm_buddy_block *block;
+
+    switch (cur->mem_type) {
+    case TTM_PL_VRAM:
+    block = cur->node;
+
+    if (!amdgpu_vram_mgr_is_cleared(block))
+    return false;

Re: [PATCH v9 2/2] drm/amdgpu: sync page table freeing with tlb flush

2024-03-19 Thread Christian König

Am 18.03.24 um 17:11 schrieb Shashank Sharma:

The idea behind this patch is to delay the freeing of PT entry objects
until the TLB flush is done.

This patch:
- Adds a tlb_flush_waitlist in amdgpu_vm_update_params which will keep the
   objects that need to be freed after tlb_flush.
- Adds PT entries in this list in amdgpu_vm_ptes_update after finding
   the PT entry.
- Changes functionality of amdgpu_vm_pt_free_dfs from (df_search + free)
   to simply freeing of the BOs, also renames it to
   amdgpu_vm_pt_free_list to reflect this same.
- Exports function amdgpu_vm_pt_free_list to be called directly.
- Calls amdgpu_vm_pt_free_list directly from amdgpu_vm_update_range.

V2: rebase
V4: Addressed review comments from Christian
 - add only locked PTEs entries in TLB flush waitlist.
 - do not create a separate function for list flush.
 - do not create a new lock for TLB flush.
 - there is no need to wait on tlb_flush_fence exclusively.

V5: Addressed review comments from Christian
 - change the amdgpu_vm_pt_free_dfs's functionality to simple freeing
   of the objects and rename it.
 - add all the PTE objects in params->tlb_flush_waitlist
 - let amdgpu_vm_pt_free_root handle the freeing of BOs independently
 - call amdgpu_vm_pt_free_list directly

V6: Rebase
V7: Rebase
V8: Added a NULL check to fix this backtrace issue:
[  415.351447] BUG: kernel NULL pointer dereference, address: 0008
[  415.359245] #PF: supervisor write access in kernel mode
[  415.365081] #PF: error_code(0x0002) - not-present page
[  415.370817] PGD 101259067 P4D 101259067 PUD 10125a067 PMD 0
[  415.377140] Oops: 0002 [#1] PREEMPT SMP NOPTI
[  415.382004] CPU: 0 PID: 25481 Comm: test_with_MPI.e Tainted: G   OE  
   5.18.2-mi300-build-140423-ubuntu-22.04+ #24
[  415.394437] Hardware name: AMD Corporation Sh51p/Sh51p, BIOS RMO1001AS 
02/21/2024
[  415.402797] RIP: 0010:amdgpu_vm_ptes_update+0x6fd/0xa10 [amdgpu]
[  415.409648] Code: 4c 89 ff 4d 8d 66 30 e8 f1 ed ff ff 48 85 db 74 42 48 39 5d 
a0 74 40 48 8b 53 20 48 8b 4b 18 48 8d 43 18 48 8d 75 b0 4c 89 ff <48

89 51 08 48 89 0a 49 8b 56 30 48 89 42 08 48 89 53 18 4c 89 63

[  415.430621] RSP: 0018:c9000401f990 EFLAGS: 00010287
[  415.436456] RAX: 888147bb82f0 RBX: 888147bb82d8 RCX: 
[  415.26] RDX:  RSI: c9000401fa30 RDI: 888161f8
[  415.452397] RBP: c9000401fa80 R08:  R09: c9000401fa00
[  415.460368] R10: 0007f0cc R11: 0007f0c85000 R12: c9000401fb20
[  415.468340] R13: 0007f0d0 R14: c9000401faf0 R15: 888161f8
[  415.476312] FS:  7f132ff89840() GS:889f87c0() 
knlGS:
[  415.485350] CS:  0010 DS:  ES:  CR0: 80050033
[  415.491767] CR2: 0008 CR3: 000161d46003 CR4: 00770ef0
[  415.499738] PKRU: 5554
[  415.502750] Call Trace:
[  415.505482]  
[  415.507825]  amdgpu_vm_update_range+0x32a/0x880 [amdgpu]
[  415.513869]  amdgpu_vm_clear_freed+0x117/0x250 [amdgpu]
[  415.519814]  amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu+0x18c/0x250 [amdgpu]
[  415.527729]  kfd_ioctl_unmap_memory_from_gpu+0xed/0x340 [amdgpu]
[  415.534551]  kfd_ioctl+0x3b6/0x510 [amdgpu]

V9: Addressed review comments from Christian
 - No NULL check reqd for root PT freeing
 - Free PT list regardless of needs_flush
 - Move adding BOs in list in a separate function

Cc: Christian König 
Cc: Alex Deucher 
Cc: Felix Kuehling 
Cc: Rajneesh Bhardwaj 
Acked-by: Felix Kuehling 
Acked-by: Rajneesh Bhardwaj 
Tested-by: Rajneesh Bhardwaj 
Signed-off-by: Shashank Sharma 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|  3 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|  7 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 66 +++
  3 files changed, 53 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 104bf600c85f..8fada1152664 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -986,6 +986,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
params.unlocked = unlocked;
params.needs_flush = flush_tlb;
params.allow_override = allow_override;
+   INIT_LIST_HEAD(_flush_waitlist);
  
  	/* Implicitly sync to command submissions in the same VM before

 * unmapping. Sync to moving fences before mapping.
@@ -1076,6 +1077,8 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
tlb_cb = NULL;
}
  
+	amdgpu_vm_pt_free_list(adev, );

+
  error_free:
kfree(tlb_cb);
amdgpu_vm_eviction_unlock(vm);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index b0a4fe683352..54d7da396de0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ 

Re: [PATCH v9 2/3] drm/amdgpu: Enable clear page functionality

2024-03-19 Thread Christian König




Am 18.03.24 um 22:40 schrieb Arunpravin Paneer Selvam:

Add clear page support in vram memory region.

v1(Christian):
   - Dont handle clear page as TTM flag since when moving the BO back
 in from GTT again we don't need that.
   - Make a specialized version of amdgpu_fill_buffer() which only
 clears the VRAM areas which are not already cleared
   - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
 amdgpu_object.c

v2:
   - Modify the function name amdgpu_ttm_* (Alex)
   - Drop the delayed parameter (Christian)
   - handle amdgpu_res_cleared() just above the size
 calculation (Christian)
   - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the buffers
 in the free path to properly wait for fences etc.. (Christian)

v3(Christian):
   - Remove buffer clear code in VRAM manager instead change the
 AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set
 the DRM_BUDDY_CLEARED flag.
   - Remove ! from amdgpu_res_cleared() check.

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Christian König 
Acked-by: Felix Kuehling 


Just a few nit picks below, but in general already looks good to me.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c| 22 ---
  .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h| 25 
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 61 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |  5 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  6 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
  6 files changed, 111 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 8bc79924d171..c92d92b28a57 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -39,6 +39,7 @@
  #include "amdgpu.h"
  #include "amdgpu_trace.h"
  #include "amdgpu_amdkfd.h"
+#include "amdgpu_vram_mgr.h"
  
  /**

   * DOC: amdgpu_object
@@ -601,8 +602,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
if (!amdgpu_bo_support_uswc(bo->flags))
bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
  
-	if (adev->ras_enabled)

-   bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+   bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
  
  	bo->tbo.bdev = >mman.bdev;

if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
@@ -632,15 +632,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
  
  	if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&

bo->tbo.resource->mem_type == TTM_PL_VRAM) {
-   struct dma_fence *fence;
+   struct dma_fence *fence = NULL;
  
-		r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, , true);

+   r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, );
if (unlikely(r))
goto fail_unreserve;
  
-		dma_resv_add_fence(bo->tbo.base.resv, fence,

-  DMA_RESV_USAGE_KERNEL);
-   dma_fence_put(fence);
+   if (fence) {
+   dma_resv_add_fence(bo->tbo.base.resv, fence,
+  DMA_RESV_USAGE_KERNEL);
+   dma_fence_put(fence);
+   }
}
if (!bp->resv)
amdgpu_bo_unreserve(bo);
@@ -1365,8 +1367,12 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object 
*bo)
if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
return;
  
-	r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, , true);

+   r = amdgpu_fill_buffer(abo, 0, bo->base.resv, , true);
if (!WARN_ON(r)) {
+   struct amdgpu_vram_mgr_resource *vres;
+
+   vres = to_amdgpu_vram_mgr_resource(bo->resource);
+   vres->flags |= DRM_BUDDY_CLEARED;


Those lines should probably be in the VRAM manager.


amdgpu_bo_fence(abo, fence, false);
dma_fence_put(fence);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index 381101d2bf05..50fcd86e1033 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct 
amdgpu_res_cursor *cur, uint64_t size)
}
  }
  
+/**

+ * amdgpu_res_cleared - check if blocks are cleared
+ *
+ * @cur: the cursor to extract the block
+ *
+ * Check if the @cur block is cleared
+ */
+static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
+{
+   struct drm_buddy_block *block;
+
+   switch (cur->mem_type) {
+   case TTM_PL_VRAM:
+   block = cur->node;
+
+   if (!amdgpu_vram_mgr_is_cleared(block))
+   return false;
+   break;
+   default:
+   return false;
+   }
+
+   return true;
+}
+
  #endif
diff --git 

[PATCH] drm/amdgpu/pm: Don't use OD table on Arcturus

2024-03-19 Thread Ma Jun
OD is not supported on Arcturus, so the OD table
should not be used.

Signed-off-by: Ma Jun 
---
 .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 33 +++
 1 file changed, 5 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
index a58708bcda89..dc3a5d863a04 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
@@ -1289,11 +1289,8 @@ static int arcturus_get_power_limit(struct smu_context 
*smu,
uint32_t *max_power_limit,
uint32_t *min_power_limit)
 {
-   struct smu_11_0_powerplay_table *powerplay_table =
-   (struct smu_11_0_powerplay_table 
*)smu->smu_table.power_play_table;
-   struct smu_11_0_overdrive_table *od_settings = smu->od_settings;
PPTable_t *pptable = smu->smu_table.driver_pptable;
-   uint32_t power_limit, od_percent_upper = 0, od_percent_lower = 0;
+   uint32_t power_limit;
 
if (smu_v11_0_get_current_power_limit(smu, _limit)) {
/* the last hope to figure out the ppt limit */
@@ -1309,30 +1306,10 @@ static int arcturus_get_power_limit(struct smu_context 
*smu,
*current_power_limit = power_limit;
if (default_power_limit)
*default_power_limit = power_limit;
-
-   if (powerplay_table) {
-   if (smu->od_enabled &&
-   od_settings->cap[SMU_11_0_ODCAP_POWER_LIMIT]) {
-   od_percent_upper = 
le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_ODSETTING_POWERPERCENTAGE]);
-   od_percent_lower = 
le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]);
-   } else if (od_settings->cap[SMU_11_0_ODCAP_POWER_LIMIT]) {
-   od_percent_upper = 0;
-   od_percent_lower = 
le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]);
-   }
-   }
-
-   dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d 
(default power: %d)\n",
-   od_percent_upper, 
od_percent_lower, power_limit);
-
-   if (max_power_limit) {
-   *max_power_limit = power_limit * (100 + od_percent_upper);
-   *max_power_limit /= 100;
-   }
-
-   if (min_power_limit) {
-   *min_power_limit = power_limit * (100 - od_percent_lower);
-   *min_power_limit /= 100;
-   }
+   if (max_power_limit)
+   *max_power_limit = power_limit;
+   if (min_power_limit)
+   *min_power_limit = power_limit;
 
return 0;
 }
-- 
2.34.1



Re: [PATCH] drm/amdgpu: Remove pci address checks from acpi_vfct_bios

2024-03-19 Thread Christian König

Am 19.03.24 um 02:55 schrieb Kurt Kartaltepe:

On Mon, Mar 18, 2024 at 12:57 PM Alex Deucher  wrote:

On Mon, Mar 18, 2024 at 3:52 PM Alex Deucher  wrote:

...

Depends on the platform, but recent ones use VFCT.  That said, there
should only ever be one IGPU in the system so I think we could just
rely on the VID and DID for APUs in this case and check everything for
dGPUs.

Is there a reason why you need this option?  Even beyond this, I could
envision other problems related to APUs and ACPI if these changed.

Alex

So there are multiple factors in play. I am trying to make use of the
lovely usb4/tb3 controllers on the 7940HS with the reportedly Intel
Tamales Module 2 pci/pci bridge over the usb4 interface. This provides
a handy way to expand the pcie bus but configuring ACPI and pcie
topology isn't generally an option on consumer BIOS (unless you want
to enlighten me). This leaves us in the situation where the bios can
enumerate devices poorly resulting in inaccessible devices due to
address conflicts. To resolve address conflicts the only option I'm
aware of is pci=assign-busses, maybe this could also be configured at
runtime but assign-busses seemed nice in some ways.


Well what problems do you run into? The ACPI and BIOS assignments 
usually work much better than whatever the Linux PCI subsystem comes up 
with.


The PCI subsystem in the Linux kernel for example can't handle back to 
back resources behind multiple downstream bridges.


So when the BIOS fails to assign something it's extremely unlikely that 
the Linux kernel will do the right thing either.


Regards,
Christian.



I havnt experienced any issues with the APU (graphics, hardware
encoders/decoders) but I do think assign-busses might be renumbering
again after suspend/resume/pci rescans but I need to debug further,
maybe suspend/resume are just broken when ACPI addresses are wrong.
Obviously the graphics user space (compositors, mesa might be working
as expected) dont handle the device switching addresses while in use,
for amdgpu kernel side I haven't inspected deeply yet.

I'm not sure if this is the right approach to solving the problem, and
given your input i'm considering it may be better, though not
upstreamable, to implement renumbering only for specified devices like
this pci bridge or investigate runtime management of the pci bus
addresses. The current assign-busses implementation is quite the big
hammer admittedly.

--Kurt Kartaltepe




Re: [PATCH] drm/amdgpu: Remove pci address checks from acpi_vfct_bios

2024-03-19 Thread Kurt Kartaltepe
On Mon, Mar 18, 2024 at 8:42 AM Alex Deucher  wrote:
>
> On Mon, Mar 18, 2024 at 10:19 AM Kurt Kartaltepe  
> wrote:
> >
> > On Mon, Mar 18, 2024 at 6:37 AM Alex Deucher  wrote:
> > >
> > > On Mon, Mar 18, 2024 at 4:47 AM Kurt Kartaltepe  
> > > wrote:
> > > >
> > > > These checks prevent using amdgpu with the pcie=assign-busses parameter
> > > > which will re-address devices from their acpi values.
> > > >
> > > > Signed-off-by: Kurt Kartaltepe 
> > >
> > > This will likely break multi-GPU functionality.  The BDF values are
> > > how the sbios/driver differentiates between the VFCT images.  If you
> > > have multiple GPUs in the system, the driver won't be able to figure
> > > out which one goes with which GPU an you may end up assigning the
> > > wrong image to the wrong device.
> > >
> > > Alex
> >
> > The vendor and device portions must be correct in the existing
> > kernels, so device type differentiation should already work without
> > BDF values.
> >
> > So does that mean the concern is images are different for devices with
> > the same vendor:device pairs? There are sites out there dedicated to
> > dumping AMD's video roms which seem to suggest all discrete devices
> > would be fine loading the same rom. Is there another platform you are
> > thinking of where devices with the same vendor:device values would
> > need different images?
>
> That is incorrect.  The vbios images are board specific.  Using the
> wrong image can cause a lot of problems.  The vbios exists to handle
> board specific design variations (e.g., the number and type of display
> connectors, the i2c/aux channel mappings, board specific clock and
> voltage settings, etc.).  The PCI DID just indicates the chip used on
> the board.  The actual board design varies with each AIB vendor (e.g.,
> Sapphire and XFX both make 7900XTX boards, but they can have very
> different configurations.

Thanks for the explanation, that makes sense.

Is my understanding correct that IGPUs (my case) simply won't have
vbios available in any other mechanism. If so perhaps this isnt
feasible in amdgpu as the BDF information is lost in reassignment.

--Kurt Kartaltepe


Re: [linux-next:master] BUILD REGRESSION 2e93f143ca010a5013528e1cfdc895f024fe8c21

2024-03-19 Thread Zhihao Cheng

在 2024/3/18 22:33, kernel test robot 写道:

tree/branch: 
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master
branch HEAD: 2e93f143ca010a5013528e1cfdc895f024fe8c21  Add linux-next specific 
files for 20240318

Error/Warning ids grouped by kconfigs:

gcc_recent_errors
|-- arc-allmodconfig
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- arc-allyesconfig
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- arm-allmodconfig
|   |-- 
arch-arm-mach-omap2-prm33xx.c:warning:expecting-prototype-for-am33xx_prm_global_warm_sw_reset().-Prototype-was-for-am33xx_prm_global_sw_reset()-instead
|   |-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_vcn.c:warning:.bin-directive-output-may-be-truncated-writing-bytes-into-a-region-of-size-between-and
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- arm-allyesconfig
|   |-- 
arch-arm-mach-omap2-prm33xx.c:warning:expecting-prototype-for-am33xx_prm_global_warm_sw_reset().-Prototype-was-for-am33xx_prm_global_sw_reset()-instead
|   |-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_vcn.c:warning:.bin-directive-output-may-be-truncated-writing-bytes-into-a-region-of-size-between-and
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- arm64-defconfig
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- csky-allmodconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_vcn.c:warning:.bin-directive-output-may-be-truncated-writing-bytes-into-a-region-of-size-between-and
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- csky-allyesconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_vcn.c:warning:.bin-directive-output-may-be-truncated-writing-bytes-into-a-region-of-size-between-and
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead


Hi, Richard,
I sent out the warning fix patch in 
https://patchwork.ozlabs.org/project/linux-mtd/patch/20240227024204.1080739-1-chengzhih...@huawei.com/


Re: [PATCH] drm/amdgpu: Remove pci address checks from acpi_vfct_bios

2024-03-19 Thread Kurt Kartaltepe
On Mon, Mar 18, 2024 at 12:57 PM Alex Deucher  wrote:
>
> On Mon, Mar 18, 2024 at 3:52 PM Alex Deucher  wrote:
> >
...
> > Depends on the platform, but recent ones use VFCT.  That said, there
> > should only ever be one IGPU in the system so I think we could just
> > rely on the VID and DID for APUs in this case and check everything for
> > dGPUs.
>
> Is there a reason why you need this option?  Even beyond this, I could
> envision other problems related to APUs and ACPI if these changed.
>
> Alex

So there are multiple factors in play. I am trying to make use of the
lovely usb4/tb3 controllers on the 7940HS with the reportedly Intel
Tamales Module 2 pci/pci bridge over the usb4 interface. This provides
a handy way to expand the pcie bus but configuring ACPI and pcie
topology isn't generally an option on consumer BIOS (unless you want
to enlighten me). This leaves us in the situation where the bios can
enumerate devices poorly resulting in inaccessible devices due to
address conflicts. To resolve address conflicts the only option I'm
aware of is pci=assign-busses, maybe this could also be configured at
runtime but assign-busses seemed nice in some ways.

I havnt experienced any issues with the APU (graphics, hardware
encoders/decoders) but I do think assign-busses might be renumbering
again after suspend/resume/pci rescans but I need to debug further,
maybe suspend/resume are just broken when ACPI addresses are wrong.
Obviously the graphics user space (compositors, mesa might be working
as expected) dont handle the device switching addresses while in use,
for amdgpu kernel side I haven't inspected deeply yet.

I'm not sure if this is the right approach to solving the problem, and
given your input i'm considering it may be better, though not
upstreamable, to implement renumbering only for specified devices like
this pci bridge or investigate runtime management of the pci bus
addresses. The current assign-busses implementation is quite the big
hammer admittedly.

--Kurt Kartaltepe


Re: [PATCH] drm/amdgpu: Remove pci address checks from acpi_vfct_bios

2024-03-19 Thread Kurt Kartaltepe
On Mon, Mar 18, 2024 at 6:37 AM Alex Deucher  wrote:
>
> On Mon, Mar 18, 2024 at 4:47 AM Kurt Kartaltepe  wrote:
> >
> > These checks prevent using amdgpu with the pcie=assign-busses parameter
> > which will re-address devices from their acpi values.
> >
> > Signed-off-by: Kurt Kartaltepe 
>
> This will likely break multi-GPU functionality.  The BDF values are
> how the sbios/driver differentiates between the VFCT images.  If you
> have multiple GPUs in the system, the driver won't be able to figure
> out which one goes with which GPU an you may end up assigning the
> wrong image to the wrong device.
>
> Alex

The vendor and device portions must be correct in the existing
kernels, so device type differentiation should already work without
BDF values.

So does that mean the concern is images are different for devices with
the same vendor:device pairs? There are sites out there dedicated to
dumping AMD's video roms which seem to suggest all discrete devices
would be fine loading the same rom. Is there another platform you are
thinking of where devices with the same vendor:device values would
need different images?

(Sorry this is my first patch to the mailing list and I am replying
with gmail, I hope it doesnt break things).

--Kurt Kartaltepe


Re: [PATCH] Revert "drm/amd/amdgpu: Fix potential ioremap() memory leaks in amdgpu_device_init()"

2024-03-19 Thread Ma, Jun
Thanks, I will fix it when push to the next branch

Regards,
Ma Jun

On 3/19/2024 4:22 PM, Christian König wrote:
> Am 19.03.24 um 09:07 schrieb Ma Jun:
>> This patch causes the following iounmap erorr and calltrace
>> iounmap: bad address d0b3631f
> 
>> So just revert it and amdgpu_device_fini_sw() will cleanup the
>> rmmio mapping.
> 
> I think we can improve the wording here. Something like this:
> 
> The original patch was unjustified because amdgpu_device_fini_sw() will 
> always cleanup the
> rmmio mapping.
> 
>>
>> This reverts commit 923f7a82d2e12a99744a940954f3829ab18a9dc7.
> 
> You Signed-off-by tag is missing.
> 
> With that fixed the patch is Reviewed-by: Christian König 
> 
> 
> Regards,
> Christian.
> 
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 16 ++--
>>   1 file changed, 6 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index 70261eb9b0bb..3204b8f6edeb 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -4042,10 +4042,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>>   * early on during init and before calling to RREG32.
>>   */
>>  adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, 
>> "amdgpu-reset-dev");
>> -if (!adev->reset_domain) {
>> -r = -ENOMEM;
>> -goto unmap_memory;
>> -}
>> +if (!adev->reset_domain)
>> +return -ENOMEM;
>>   
>>  /* detect hw virtualization here */
>>  amdgpu_detect_virtualization(adev);
>> @@ -4055,7 +4053,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>>  r = amdgpu_device_get_job_timeout_settings(adev);
>>  if (r) {
>>  dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
>> -goto unmap_memory;
>> +return r;
>>  }
>>   
>>  amdgpu_device_set_mcbp(adev);
>> @@ -4063,12 +4061,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>>  /* early init functions */
>>  r = amdgpu_device_ip_early_init(adev);
>>  if (r)
>> -goto unmap_memory;
>> +return r;
>>   
>>  /* Get rid of things like offb */
>>  r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, 
>> _kms_driver);
>>  if (r)
>> -goto unmap_memory;
>> +return r;
>>   
>>  /* Enable TMZ based on IP_VERSION */
>>  amdgpu_gmc_tmz_set(adev);
>> @@ -4078,7 +4076,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>>  if (adev->gmc.xgmi.supported) {
>>  r = adev->gfxhub.funcs->get_xgmi_info(adev);
>>  if (r)
>> -goto unmap_memory;
>> +return r;
>>  }
>>   
>>  /* enable PCIE atomic ops */
>> @@ -4347,8 +4345,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>>   failed:
>>  amdgpu_vf_error_trans_all(adev);
>>   
>> -unmap_memory:
>> -iounmap(adev->rmmio);
>>  return r;
>>   }
>>   
> 


Re: [PATCH] Revert "drm/amd/amdgpu: Fix potential ioremap() memory leaks in amdgpu_device_init()"

2024-03-19 Thread Christian König

Am 19.03.24 um 09:07 schrieb Ma Jun:

This patch causes the following iounmap erorr and calltrace
iounmap: bad address d0b3631f



So just revert it and amdgpu_device_fini_sw() will cleanup the
rmmio mapping.


I think we can improve the wording here. Something like this:

The original patch was unjustified because amdgpu_device_fini_sw() will 
always cleanup the

rmmio mapping.



This reverts commit 923f7a82d2e12a99744a940954f3829ab18a9dc7.


You Signed-off-by tag is missing.

With that fixed the patch is Reviewed-by: Christian König 



Regards,
Christian.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 16 ++--
  1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 70261eb9b0bb..3204b8f6edeb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4042,10 +4042,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 * early on during init and before calling to RREG32.
 */
adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, 
"amdgpu-reset-dev");
-   if (!adev->reset_domain) {
-   r = -ENOMEM;
-   goto unmap_memory;
-   }
+   if (!adev->reset_domain)
+   return -ENOMEM;
  
  	/* detect hw virtualization here */

amdgpu_detect_virtualization(adev);
@@ -4055,7 +4053,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
r = amdgpu_device_get_job_timeout_settings(adev);
if (r) {
dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
-   goto unmap_memory;
+   return r;
}
  
  	amdgpu_device_set_mcbp(adev);

@@ -4063,12 +4061,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
/* early init functions */
r = amdgpu_device_ip_early_init(adev);
if (r)
-   goto unmap_memory;
+   return r;
  
  	/* Get rid of things like offb */

r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, 
_kms_driver);
if (r)
-   goto unmap_memory;
+   return r;
  
  	/* Enable TMZ based on IP_VERSION */

amdgpu_gmc_tmz_set(adev);
@@ -4078,7 +4076,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (adev->gmc.xgmi.supported) {
r = adev->gfxhub.funcs->get_xgmi_info(adev);
if (r)
-   goto unmap_memory;
+   return r;
}
  
  	/* enable PCIE atomic ops */

@@ -4347,8 +4345,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
  failed:
amdgpu_vf_error_trans_all(adev);
  
-unmap_memory:

-   iounmap(adev->rmmio);
return r;
  }
  




[PATCH] Revert "drm/amd/amdgpu: Fix potential ioremap() memory leaks in amdgpu_device_init()"

2024-03-19 Thread Ma Jun
This patch causes the following iounmap erorr and calltrace
iounmap: bad address d0b3631f

So just revert it and amdgpu_device_fini_sw() will cleanup the
rmmio mapping.

This reverts commit 923f7a82d2e12a99744a940954f3829ab18a9dc7.
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 16 ++--
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 70261eb9b0bb..3204b8f6edeb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4042,10 +4042,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 * early on during init and before calling to RREG32.
 */
adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, 
"amdgpu-reset-dev");
-   if (!adev->reset_domain) {
-   r = -ENOMEM;
-   goto unmap_memory;
-   }
+   if (!adev->reset_domain)
+   return -ENOMEM;
 
/* detect hw virtualization here */
amdgpu_detect_virtualization(adev);
@@ -4055,7 +4053,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
r = amdgpu_device_get_job_timeout_settings(adev);
if (r) {
dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
-   goto unmap_memory;
+   return r;
}
 
amdgpu_device_set_mcbp(adev);
@@ -4063,12 +4061,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
/* early init functions */
r = amdgpu_device_ip_early_init(adev);
if (r)
-   goto unmap_memory;
+   return r;
 
/* Get rid of things like offb */
r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, 
_kms_driver);
if (r)
-   goto unmap_memory;
+   return r;
 
/* Enable TMZ based on IP_VERSION */
amdgpu_gmc_tmz_set(adev);
@@ -4078,7 +4076,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (adev->gmc.xgmi.supported) {
r = adev->gfxhub.funcs->get_xgmi_info(adev);
if (r)
-   goto unmap_memory;
+   return r;
}
 
/* enable PCIE atomic ops */
@@ -4347,8 +4345,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 failed:
amdgpu_vf_error_trans_all(adev);
 
-unmap_memory:
-   iounmap(adev->rmmio);
return r;
 }
 
-- 
2.34.1



Re: [PATCH] drm/amd/pm set pp_dpm_*clk as read only for SRIOV one VF mode

2024-03-19 Thread JingWen Chen
Acked-by: Jingwen Chen 

On 2024/3/15 14:31, Lin.Cao wrote:
> pp_dpm_*clk should be set as read only for SRIOV one VF mode, remove
> S_IWUGO flag and _store function of these debugfs in one VF mode.
>
> Signed-off-by: Lin.Cao 
> ---
>  drivers/gpu/drm/amd/pm/amdgpu_pm.c | 10 +-
>  1 file changed, 9 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
> b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> index efc631bddf4a..2883a1d873ab 100644
> --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> @@ -2367,7 +2367,15 @@ static int default_attr_update(struct amdgpu_device 
> *adev, struct amdgpu_device_
>   }
>  
>   /* setting should not be allowed from VF if not in one VF mode */
> - if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) {
> + if (amdgpu_sriov_vf(adev) && (!amdgpu_sriov_is_pp_one_vf(adev) ||
> + DEVICE_ATTR_IS(pp_dpm_sclk) ||
> + DEVICE_ATTR_IS(pp_dpm_mclk) ||
> + DEVICE_ATTR_IS(pp_dpm_socclk) ||
> + DEVICE_ATTR_IS(pp_dpm_fclk) ||
> + DEVICE_ATTR_IS(pp_dpm_vclk) ||
> + DEVICE_ATTR_IS(pp_dpm_vclk1) ||
> + DEVICE_ATTR_IS(pp_dpm_dclk) ||
> + DEVICE_ATTR_IS(pp_dpm_dclk1))) {
>   dev_attr->attr.mode &= ~S_IWUGO;
>   dev_attr->store = NULL;
>   }

-- 
Best Regards,
JingWen Chen