RE: [PATCH] drm/amdgpu: Fix logic to determine TOS reload

2024-09-30 Thread Bhardwaj, Rajneesh
[Public]

Looks good to me.
Acked-by: Rajneesh Bhardwaj 

-Original Message-
From: Lazar, Lijo 
Sent: Monday, September 30, 2024 8:21 AM
To: amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking ; Deucher, Alexander 
; Bhardwaj, Rajneesh ; 
Xu, Feifei 
Subject: [PATCH] drm/amdgpu: Fix logic to determine TOS reload

Avoid comparing TOS version on APUs. On APUs driver doesn't take care of TOS 
load.

Fixes: 2edc5ecbf1a9 ("drm/amdgpu: Add interface for TOS reload cases")

Signed-off-by: Lijo Lazar 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 04be0fabb4f5..2a2d34fe8707 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -2295,7 +2295,7 @@ bool amdgpu_psp_tos_reload_needed(struct amdgpu_device 
*adev)  {
struct psp_context *psp = &adev->psp;

-   if (amdgpu_sriov_vf(adev))
+   if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU))
return false;

if (psp->funcs && psp->funcs->is_reload_needed)
--
2.25.1



RE: [PATCH] drm/amdgpu: bump driver version for cleared VRAM

2024-09-26 Thread Bhardwaj, Rajneesh
[Public]

Reviewed-by: Rajneesh Bhardwaj 

-Original Message-
From: amd-gfx  On Behalf Of Alex Deucher
Sent: Thursday, September 26, 2024 9:46 AM
To: Deucher, Alexander ; Olsak, Marek 

Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: bump driver version for cleared VRAM

Ping?

On Fri, Sep 6, 2024 at 2:04 PM Alex Deucher  wrote:
>
> Driver now clears VRAM on allocation.  Bump the driver version so mesa
> knows when it will get cleared vram by default.
>
> Signed-off-by: Alex Deucher 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index d90473aec942..fad556be840b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -117,9 +117,10 @@
>   * - 3.56.0 - Update IB start address and size alignment for decode and 
> encode
>   * - 3.57.0 - Compute tunneling on GFX10+
>   * - 3.58.0 - Add GFX12 DCC support
> + * - 3.59.0 - Cleared VRAM
>   */
>  #define KMS_DRIVER_MAJOR   3
> -#define KMS_DRIVER_MINOR   58
> +#define KMS_DRIVER_MINOR   59
>  #define KMS_DRIVER_PATCHLEVEL  0
>
>  /*
> --
> 2.46.0
>


Re: [PATCH 6/7] drm/amdgpu: Check gmc requirement for reset on init

2024-09-24 Thread Bhardwaj, Rajneesh

Reviewed-by: Rajneesh Bhardwaj 

On 9/24/2024 1:56 AM, Lijo Lazar wrote:

Add a callback to check if there is any condition detected by GMC block
for reset on init. One case is if a pending NPS change request is
detected. If reset is done because of NPS switch, refresh NPS info from
discovery table.

Signed-off-by: Lijo Lazar 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 13 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  5 +
  drivers/gpu/drm/amd/amdgpu/soc15.c  |  2 ++
  3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 21f1e65c9dc9..011fe3a847d0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -1261,12 +1261,15 @@ int amdgpu_gmc_get_nps_memranges(struct amdgpu_device 
*adev,
struct amdgpu_gmc_memrange *ranges;
int range_cnt, ret, i, j;
uint32_t nps_type;
+   bool refresh;
  
  	if (!mem_ranges)

return -EINVAL;
  
+	refresh = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) &&

+ (adev->gmc.reset_flags & AMDGPU_GMC_INIT_RESET_NPS);
ret = amdgpu_discovery_get_nps_info(adev, &nps_type, &ranges,
-   &range_cnt, false);
+   &range_cnt, refresh);
  
  	if (ret)

return ret;
@@ -1392,3 +1395,11 @@ void amdgpu_gmc_prepare_nps_mode_change(struct 
amdgpu_device *adev)
adev->dev,
"NPS mode change request done, reload driver to complete the 
change\n");
  }
+
+bool amdgpu_gmc_need_reset_on_init(struct amdgpu_device *adev)
+{
+   if (adev->gmc.gmc_funcs->need_reset_on_init)
+   return adev->gmc.gmc_funcs->need_reset_on_init(adev);
+
+   return false;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index b13d6adb5efd..d4cd247fe574 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -78,6 +78,8 @@ enum amdgpu_memory_partition {
 BIT(AMDGPU_NPS3_PARTITION_MODE) | BIT(AMDGPU_NPS4_PARTITION_MODE) | \
 BIT(AMDGPU_NPS6_PARTITION_MODE) | BIT(AMDGPU_NPS8_PARTITION_MODE))
  
+#define AMDGPU_GMC_INIT_RESET_NPS  BIT(0)

+
  /*
   * GMC page fault information
   */
@@ -169,6 +171,7 @@ struct amdgpu_gmc_funcs {
/* Request NPS mode */
int (*request_mem_partition_mode)(struct amdgpu_device *adev,
  int nps_mode);
+   bool (*need_reset_on_init)(struct amdgpu_device *adev);
  };
  
  struct amdgpu_xgmi_ras {

@@ -314,6 +317,7 @@ struct amdgpu_gmc {
const struct amdgpu_gmc_funcs   *gmc_funcs;
enum amdgpu_memory_partitionrequested_nps_mode;
uint32_t supported_nps_modes;
+   uint32_t reset_flags;
  
  	struct amdgpu_xgmi xgmi;

struct amdgpu_irq_src   ecc_irq;
@@ -468,5 +472,6 @@ int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev,
  int amdgpu_gmc_request_memory_partition(struct amdgpu_device *adev,
int nps_mode);
  void amdgpu_gmc_prepare_nps_mode_change(struct amdgpu_device *adev);
+bool amdgpu_gmc_need_reset_on_init(struct amdgpu_device *adev);
  
  #endif

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 619933f252aa..97ca4931a7ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -833,6 +833,8 @@ static bool soc15_need_reset_on_init(struct amdgpu_device 
*adev)
  
  	if (amdgpu_psp_tos_reload_needed(adev))

return true;
+   if (amdgpu_gmc_need_reset_on_init(adev))
+   return true;
/* Just return false for soc15 GPUs.  Reset does not seem to
 * be necessary.
 */


Re: [PATCH 4/7] drm/amdgpu: Add sysfs interfaces for NPS mode

2024-09-24 Thread Bhardwaj, Rajneesh

Reviewed-by: Rajneesh Bhardwaj 

On 9/24/2024 1:56 AM, Lijo Lazar wrote:

Add a sysfs interface to see available NPS modes to switch to -

cat /sys/bus/pci/devices/../available_memory_paritition

Make the current_memory_partition sysfs node read/write for requesting a
new NPS mode. The request is only cached and at a later point a driver
unload/reload is required to switch to the new NPS mode.

Ex:
echo NPS1 > /sys/bus/pci/devices/../current_memory_paritition
echo NPS4 > /sys/bus/pci/devices/../current_memory_paritition

The above interfaces will be available only if the SOC supports more than
one NPS mode.

Also modify the current memory partition sysfs logic to be more
generic.

Signed-off-by: Lijo Lazar 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 114 
  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |   6 ++
  2 files changed, 104 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 758fda4e628f..24a1f931d9ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -1130,6 +1130,79 @@ int amdgpu_gmc_vram_checking(struct amdgpu_device *adev)
return ret;
  }
  
+static const char *nps_desc[] = {

+   [AMDGPU_NPS1_PARTITION_MODE] = "NPS1",
+   [AMDGPU_NPS2_PARTITION_MODE] = "NPS2",
+   [AMDGPU_NPS3_PARTITION_MODE] = "NPS3",
+   [AMDGPU_NPS4_PARTITION_MODE] = "NPS4",
+   [AMDGPU_NPS6_PARTITION_MODE] = "NPS6",
+   [AMDGPU_NPS8_PARTITION_MODE] = "NPS8",
+};
+
+static ssize_t available_memory_partition_show(struct device *dev,
+  struct device_attribute *addr,
+  char *buf)
+{
+   struct drm_device *ddev = dev_get_drvdata(dev);
+   struct amdgpu_device *adev = drm_to_adev(ddev);
+   int size = 0, mode;
+   char *sep = "";
+
+   for_each_inst(mode, adev->gmc.supported_nps_modes) {
+   size += sysfs_emit_at(buf, size, "%s%s", sep, nps_desc[mode]);
+   sep = ", ";
+   }
+   size += sysfs_emit_at(buf, size, "\n");
+
+   return size;
+}
+
+static ssize_t current_memory_partition_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+   struct drm_device *ddev = dev_get_drvdata(dev);
+   struct amdgpu_device *adev = drm_to_adev(ddev);
+   enum amdgpu_memory_partition mode;
+   struct amdgpu_hive_info *hive;
+   int i;
+
+   mode = UNKNOWN_MEMORY_PARTITION_MODE;
+   for_each_inst(i, adev->gmc.supported_nps_modes) {
+   if (!strncasecmp(nps_desc[i], buf, strlen(nps_desc[i]))) {
+   mode = i;
+   break;
+   }
+   }
+
+   if (mode == UNKNOWN_MEMORY_PARTITION_MODE)
+   return -EINVAL;
+
+   if (mode == adev->gmc.gmc_funcs->query_mem_partition_mode(adev)) {
+   dev_info(
+   adev->dev,
+   "requested NPS mode is same as current NPS mode, 
skipping\n");
+   return count;
+   }
+
+   /* If device is part of hive, all devices in the hive should request the
+* same mode. Hence store the requested mode in hive.
+*/
+   hive = amdgpu_get_xgmi_hive(adev);
+   if (hive) {
+   atomic_set(&hive->requested_nps_mode, mode);
+   amdgpu_put_xgmi_hive(hive);
+   } else {
+   adev->gmc.requested_nps_mode = mode;
+   }
+
+   dev_info(
+   adev->dev,
+   "NPS mode change requested, please remove and reload the 
driver\n");
+
+   return count;
+}
+
  static ssize_t current_memory_partition_show(
struct device *dev, struct device_attribute *addr, char *buf)
  {
@@ -1138,38 +1211,47 @@ static ssize_t current_memory_partition_show(
enum amdgpu_memory_partition mode;
  
  	mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);

-   switch (mode) {
-   case AMDGPU_NPS1_PARTITION_MODE:
-   return sysfs_emit(buf, "NPS1\n");
-   case AMDGPU_NPS2_PARTITION_MODE:
-   return sysfs_emit(buf, "NPS2\n");
-   case AMDGPU_NPS3_PARTITION_MODE:
-   return sysfs_emit(buf, "NPS3\n");
-   case AMDGPU_NPS4_PARTITION_MODE:
-   return sysfs_emit(buf, "NPS4\n");
-   case AMDGPU_NPS6_PARTITION_MODE:
-   return sysfs_emit(buf, "NPS6\n");
-   case AMDGPU_NPS8_PARTITION_MODE:
-   return sysfs_emit(buf, "NPS8\n");
-   default:
+   if ((mode > ARRAY_SIZE(nps_desc)) ||
+   (BIT(mode) & AMDGPU_ALL_NPS_MASK) != BIT(mode))
return sysfs_emit(buf, "UNKNOWN\n");
-   }
+
+   return sysfs_emit(buf, "%s\n", nps_desc[mode]);
  }
  
-static DEVICE_ATTR_RO(current_memory_par

Re: [PATCH 1/7] drm/amdgpu: Add option to refresh NPS data

2024-09-24 Thread Bhardwaj, Rajneesh

Reviewed-by: Rajneesh Bhardwaj 

On 9/24/2024 1:56 AM, Lijo Lazar wrote:

In certain use cases, NPS data needs to be refreshed again from
discovery table. Add API parameter to refresh NPS data from discovery
table.

Signed-off-by: Lijo Lazar 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 68 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c   |  2 +-
  3 files changed, 55 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 4bd61c169ca8..9f9a1867da72 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -1723,37 +1723,75 @@ union nps_info {
struct nps_info_v1_0 v1;
  };
  
+static int amdgpu_discovery_refresh_nps_info(struct amdgpu_device *adev,

+union nps_info *nps_data)
+{
+   uint64_t vram_size, pos, offset;
+   struct nps_info_header *nhdr;
+   struct binary_header bhdr;
+   uint16_t checksum;
+
+   vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
+   pos = vram_size - DISCOVERY_TMR_OFFSET;
+   amdgpu_device_vram_access(adev, pos, &bhdr, sizeof(bhdr), false);
+
+   offset = le16_to_cpu(bhdr.table_list[NPS_INFO].offset);
+   checksum = le16_to_cpu(bhdr.table_list[NPS_INFO].checksum);
+
+   amdgpu_device_vram_access(adev, (pos + offset), nps_data,
+ sizeof(*nps_data), false);
+
+   nhdr = (struct nps_info_header *)(nps_data);
+   if (!amdgpu_discovery_verify_checksum((uint8_t *)nps_data,
+ le32_to_cpu(nhdr->size_bytes),
+ checksum)) {
+   dev_err(adev->dev, "nps data refresh, checksum mismatch\n");
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
  int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev,
  uint32_t *nps_type,
  struct amdgpu_gmc_memrange **ranges,
- int *range_cnt)
+ int *range_cnt, bool refresh)
  {
struct amdgpu_gmc_memrange *mem_ranges;
struct binary_header *bhdr;
union nps_info *nps_info;
+   union nps_info nps_data;
u16 offset;
-   int i;
+   int i, r;
  
  	if (!nps_type || !range_cnt || !ranges)

return -EINVAL;
  
-	if (!adev->mman.discovery_bin) {

-   dev_err(adev->dev,
-   "fetch mem range failed, ip discovery uninitialized\n");
-   return -EINVAL;
-   }
+   if (refresh) {
+   r = amdgpu_discovery_refresh_nps_info(adev, &nps_data);
+   if (r)
+   return r;
+   nps_info = &nps_data;
+   } else {
+   if (!adev->mman.discovery_bin) {
+   dev_err(adev->dev,
+   "fetch mem range failed, ip discovery 
uninitialized\n");
+   return -EINVAL;
+   }
  
-	bhdr = (struct binary_header *)adev->mman.discovery_bin;

-   offset = le16_to_cpu(bhdr->table_list[NPS_INFO].offset);
+   bhdr = (struct binary_header *)adev->mman.discovery_bin;
+   offset = le16_to_cpu(bhdr->table_list[NPS_INFO].offset);
  
-	if (!offset)

-   return -ENOENT;
+   if (!offset)
+   return -ENOENT;
  
-	/* If verification fails, return as if NPS table doesn't exist */

-   if (amdgpu_discovery_verify_npsinfo(adev, bhdr))
-   return -ENOENT;
+   /* If verification fails, return as if NPS table doesn't exist 
*/
+   if (amdgpu_discovery_verify_npsinfo(adev, bhdr))
+   return -ENOENT;
  
-	nps_info = (union nps_info *)(adev->mman.discovery_bin + offset);

+   nps_info =
+   (union nps_info *)(adev->mman.discovery_bin + offset);
+   }
  
  	switch (le16_to_cpu(nps_info->v1.header.version_major)) {

case 1:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
index f5d36525ec3e..b44d56465c5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
@@ -33,6 +33,6 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device 
*adev);
  int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev,
  uint32_t *nps_type,
  struct amdgpu_gmc_memrange **ranges,
- int *range_cnt);
+ int *range_cnt, bool refresh);
  
  #endif /* __AMDGPU_DISCOVERY__ */

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 17a19

Re: [PATCH v2 01/10] drm/amdgpu: Add init levels

2024-09-11 Thread Bhardwaj, Rajneesh

The series is

Acked-and-tested-by: Rajneesh Bhardwaj 

On 9/11/2024 2:58 AM, Lijo Lazar wrote:

Add init levels to define the level to which device needs to be
initialized.

Signed-off-by: Lijo Lazar 
---

v2:
Add comments describing init levels
Drop unnecessary assignment
Rename AMDGPU_INIT_LEVEL_MINIMAL to AMDGPU_INIT_LEVEL_MINIMAL_XGMI

  drivers/gpu/drm/amd/amdgpu/amdgpu.h| 22 
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 66 ++
  2 files changed, 88 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 6e6580ab7e04..d8299383af11 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -820,6 +820,24 @@ struct amdgpu_mqd {
struct amdgpu_mqd_prop *p);
  };
  
+/*

+ * Custom Init levels could be defined for different situations where a full
+ * initialization of all hardware blocks are not expected. Sample cases are
+ * custom init sequences after resume after S0i3/S3, reset on initialization,
+ * partial reset of blocks etc. Presently, this defines only two levels. Levels
+ * are described in corresponding struct definitions - amdgpu_init_default,
+ * amdgpu_init_minimal_xgmi.
+ */
+enum amdgpu_init_lvl_id {
+   AMDGPU_INIT_LEVEL_DEFAULT,
+   AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
+};
+
+struct amdgpu_init_level {
+   enum amdgpu_init_lvl_id level;
+   uint32_t hwini_ip_block_mask;
+};
+
  #define AMDGPU_RESET_MAGIC_NUM 64
  #define AMDGPU_MAX_DF_PERFMONS 4
  struct amdgpu_reset_domain;
@@ -1169,6 +1187,8 @@ struct amdgpu_device {
boolenforce_isolation[MAX_XCP];
/* Added this mutex for cleaner shader isolation between GFX and 
compute processes */
struct mutexenforce_isolation_mutex;
+
+   struct amdgpu_init_level *init_lvl;
  };
  
  static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,

@@ -1623,4 +1643,6 @@ extern const struct attribute_group 
amdgpu_vram_mgr_attr_group;
  extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
  extern const struct attribute_group amdgpu_flash_attr_group;
  
+void amdgpu_set_init_level(struct amdgpu_device *adev,

+  enum amdgpu_init_lvl_id lvl);
  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 61a189e30bcd..2ecc70f220d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -144,6 +144,50 @@ const char *amdgpu_asic_name[] = {
"LAST",
  };
  
+#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMDGPU_MAX_IP_NUM - 1, 0)

+/*
+ * Default init level where all blocks are expected to be initialized. This is
+ * the level of initialization expected by default and also after a full reset
+ * of the device.
+ */
+struct amdgpu_init_level amdgpu_init_default = {
+   .level = AMDGPU_INIT_LEVEL_DEFAULT,
+   .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
+};
+
+/*
+ * Minimal blocks needed to be initialized before a XGMI hive can be reset. 
This
+ * is used for cases like reset on initialization where the entire hive needs 
to
+ * be reset before first use.
+ */
+struct amdgpu_init_level amdgpu_init_minimal_xgmi = {
+   .level = AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
+   .hwini_ip_block_mask =
+   BIT(AMD_IP_BLOCK_TYPE_GMC) | BIT(AMD_IP_BLOCK_TYPE_SMC) |
+   BIT(AMD_IP_BLOCK_TYPE_COMMON) | BIT(AMD_IP_BLOCK_TYPE_IH)
+};
+
+static inline bool amdgpu_ip_member_of_hwini(struct amdgpu_device *adev,
+enum amd_ip_block_type block)
+{
+   return (adev->init_lvl->hwini_ip_block_mask & (1U << block)) != 0;
+}
+
+void amdgpu_set_init_level(struct amdgpu_device *adev,
+  enum amdgpu_init_lvl_id lvl)
+{
+   switch (lvl) {
+   case AMDGPU_INIT_LEVEL_MINIMAL_XGMI:
+   adev->init_lvl = &amdgpu_init_minimal_xgmi;
+   break;
+   case AMDGPU_INIT_LEVEL_DEFAULT:
+   fallthrough;
+   default:
+   adev->init_lvl = &amdgpu_init_default;
+   break;
+   }
+}
+
  static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device 
*adev);
  
  /**

@@ -2633,6 +2677,9 @@ static int amdgpu_device_ip_hw_init_phase1(struct 
amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].status.hw)
continue;
+   if (!amdgpu_ip_member_of_hwini(
+   adev, adev->ip_blocks[i].version->type))
+   continue;
if (adev->ip_blocks[i].version->type == 
AMD_IP_BLOCK_TYPE_COMMON ||
(amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type 
== AMD_IP_BLOCK_TYPE_PSP)) ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
@@ -2658,6 +270

RE: [PATCH 2/2] drm/ttm: Make ttm shrinkers NUMA aware

2024-07-02 Thread Bhardwaj, Rajneesh
[AMD Official Use Only - AMD Internal Distribution Only]

-Original Message-
From: Koenig, Christian 
Sent: Tuesday, July 2, 2024 2:25 PM
To: Alex Deucher ; Bhardwaj, Rajneesh 
; Maling list - DRI developers 

Cc: amd-gfx@lists.freedesktop.org; Kuehling, Felix ; 
Deucher, Alexander 
Subject: Re: [PATCH 2/2] drm/ttm: Make ttm shrinkers NUMA aware



Am 02.07.24 um 20:20 schrieb Alex Deucher:
> + dri-devel
>
> On Tue, Jul 2, 2024 at 1:40 PM Rajneesh Bhardwaj
>  wrote:
>> Otherwise the nid is always passed as 0 during memory reclaim so make
>> TTM shrinkers NUMA aware.
>>
>> Signed-off-by: Rajneesh Bhardwaj 
>> ---
>>   drivers/gpu/drm/ttm/ttm_pool.c | 2 +-
>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c
>> b/drivers/gpu/drm/ttm/ttm_pool.c index cc27d5c7afe8..f93ac9089a60
>> 100644
>> --- a/drivers/gpu/drm/ttm/ttm_pool.c
>> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
>> @@ -812,7 +812,7 @@ int ttm_pool_mgr_init(unsigned long num_pages)
>>  &ttm_pool_debugfs_shrink_fops);
>>   #endif
>>
>> -   mm_shrinker = shrinker_alloc(0, "drm-ttm_pool");
>> +   mm_shrinker = shrinker_alloc(SHRINKER_NUMA_AWARE,
>> + "drm-ttm_pool");

You also need to make ttm_pool_shrink() actually use the nid.

Yeah, Did you mean setting the nid of the shrinker control structure from 
something like ttm_global_init() -passes NUMA node id dev_to_node(dev) to 
ttm_pool_mgr_init and use it to set the mm_shrinker->sc.nid ?

Just setting the flag won't really help us.

Regards,
Christian.

>>  if (!mm_shrinker)
>>  return -ENOMEM;
>>
>> --
>> 2.34.1
>>



Re: [PATCH] drm/ttm: Implement strict NUMA pool allocations

2024-03-22 Thread Bhardwaj, Rajneesh



On 3/22/2024 11:29 AM, Ruhl, Michael J wrote:

-Original Message-
From: dri-devel  On Behalf Of
Rajneesh Bhardwaj
Sent: Friday, March 22, 2024 3:08 AM
To: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org
Cc: felix.kuehl...@amd.com; alexander.deuc...@amd.com;
christian.koe...@amd.com; Rajneesh Bhardwaj
; Joe Greathouse

Subject: [PATCH] drm/ttm: Implement strict NUMA pool allocations

This change allows TTM to be flexible to honor NUMA localized
allocations which can result in significant performance improvement on a
multi socket NUMA system. On GFXIP 9.4.3 based AMD APUs, we see
manyfold benefits of this change resulting not only in ~10% performance
improvement in certain benchmarks but also generating more consistent
and less sporadic results specially when the NUMA balancing is not
explecitely disabled. In certain scenarios, workloads show a run-to-run
variability e.g. HPL would show a ~10x performance drop after running
back to back 4-5 times and would recover later on a subsequent run. This
is seen with memory intensive other workloads too. It was seen that when
CPU caches were dropped e.g. sudo sysctl -w vm.drop_caches=1, the
variability reduced but the performance was still well below a good run.

Use of __GFP_THISNODE flag ensures that during memory allocation, kernel
prioritizes memory allocations from the local or closest NUMA node
thereby reducing memory access latency. When memory is allocated using
__GFP_THISNODE flag, memory allocations will predominantly be done on
the local node, consequency, the shrinkers may priotitize reclaiming
memory from caches assocoated with local node to maintain memory
locality and minimize latency, thereby provide better shinker targeting.

Reduced memory pressure on remote nodes, can also indirectly influence
shrinker behavior by potentially reducing the frequency and intensity of
memory reclamation operation on remote nodes and could provide improved
overall system performance.

While this change could be more beneficial in general, i.e., without the
use of a module parameter, but in absence of widespread testing, limit
it to the AMD GFXIP 9.4.3 based ttm pool initializations only.


Cc: Joe Greathouse 
Signed-off-by: Rajneesh Bhardwaj 
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  8 
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |  7 ++-
drivers/gpu/drm/ttm/tests/ttm_pool_test.c | 10 +-
drivers/gpu/drm/ttm/ttm_device.c  |  2 +-
drivers/gpu/drm/ttm/ttm_pool.c|  7 ++-
include/drm/ttm/ttm_pool.h|  4 +++-
7 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9c62552bec34..96532cfc6230 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -253,6 +253,7 @@ extern int amdgpu_user_partt_mode;
extern int amdgpu_agp;

extern int amdgpu_wbrf;
+extern bool strict_numa_alloc;

#define AMDGPU_VM_MAX_NUM_CTX   4096
#define AMDGPU_SG_THRESHOLD (256*1024*1024)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 80b9642f2bc4..a183a6b4493d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -781,6 +781,14 @@ int queue_preemption_timeout_ms = 9000;
module_param(queue_preemption_timeout_ms, int, 0644);
MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue preemption
timeout in ms (1 = Minimum, 9000 = default)");

+/**
+ * DOC: strict_numa_alloc(bool)
+ * Policy to force NUMA allocation requests from the proximity NUMA domain
only.
+ */
+bool strict_numa_alloc;
+module_param(strict_numa_alloc, bool, 0444);
+MODULE_PARM_DESC(strict_numa_alloc, "Force NUMA allocation requests
to be satisfied from the closest node only (false = default)");
+
/**
  * DOC: debug_evictions(bool)
  * Enable extra debug messages to help determine the cause of evictions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index b0ed10f4de60..a9f78f85e28c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1768,6 +1768,7 @@ static int amdgpu_ttm_reserve_tmr(struct
amdgpu_device *adev)

static int amdgpu_ttm_pools_init(struct amdgpu_device *adev)
{
+   bool policy = true;
int i;

if (!adev->gmc.is_app_apu || !adev->gmc.num_mem_partitions)
@@ -1779,11 +1780,15 @@ static int amdgpu_ttm_pools_init(struct
amdgpu_device *adev)
if (!adev->mman.ttm_pools)
return -ENOMEM;

+   /* Policy not only depends on the module param but also on the ASIC
+* setting use_strict_numa_alloc as well.
+*/
for (i = 0; i < adev->gmc.num_mem_partitions; i++) {
ttm_pool_init(&adev->mman.ttm_pools[i], adev->dev,
  adev->gmc.mem_partitions[i].num

Re: [PATCH] drm/ttm: Implement strict NUMA pool allocations

2024-03-22 Thread Bhardwaj, Rajneesh





On 3/22/2024 9:15 AM, Christian König wrote:

Am 22.03.24 um 08:07 schrieb Rajneesh Bhardwaj:

This change allows TTM to be flexible to honor NUMA localized
allocations which can result in significant performance improvement on a
multi socket NUMA system. On GFXIP 9.4.3 based AMD APUs, we see
manyfold benefits of this change resulting not only in ~10% performance
improvement in certain benchmarks but also generating more consistent
and less sporadic results specially when the NUMA balancing is not
explecitely disabled. In certain scenarios, workloads show a run-to-run
variability e.g. HPL would show a ~10x performance drop after running
back to back 4-5 times and would recover later on a subsequent run. This
is seen with memory intensive other workloads too. It was seen that when
CPU caches were dropped e.g. sudo sysctl -w vm.drop_caches=1, the
variability reduced but the performance was still well below a good run.

Use of __GFP_THISNODE flag ensures that during memory allocation, kernel
prioritizes memory allocations from the local or closest NUMA node
thereby reducing memory access latency.


Exactly that's what it doesn't do.

__GFP_THISNODE just means it enforces allocation from the specified node.


Thanks for the feedback, Christian.

Sure, maybe I should have made it clear in the commit message that there 
is no fallback to zonelist while allocating slab cache. And this is 
exactly what we want, we don't want the pages to be landing on remote nodes.




Additional to that there is a mandatory requirement that this flag is 
only used if it has to be used for correctness. And that is simply not 
the case here.



Sorry ,I didn't quite understand this. What is the mandatory 
requirement, could you please clarify this?  Based on the documentation 
I read about this and after reading the kernel source code, I didn't 
find any hard requirement to discourage use of this.





So as long as nobody can explain why that should help this is an 
absolutely no-go.



Could you please clarify the controversial part here? We have absolutely 
strong backing data the proves the usefulness besides this change 
restricts us to a certain HW IP. Also, the possibilty of OOM killer 
seems to less actually when we use __THISNODE.


https://elixir.bootlin.com/linux/latest/source/mm/page_alloc.c#L3439


Another important thing I want to highlight here is that for AMD 
GFXIP9.4.3 APU, which has unified HBM stack (no RAM/VRAM distinction), 
we get the backing vram pages using ttm_pool_alloc_page(nid) already.






Regards,
Christian.


  When memory is allocated using
__GFP_THISNODE flag, memory allocations will predominantly be done on
the local node, consequency, the shrinkers may priotitize reclaiming
memory from caches assocoated with local node to maintain memory
locality and minimize latency, thereby provide better shinker targeting.

Reduced memory pressure on remote nodes, can also indirectly influence
shrinker behavior by potentially reducing the frequency and intensity of
memory reclamation operation on remote nodes and could provide improved
overall system performance.

While this change could be more beneficial in general, i.e., without the
use of a module parameter, but in absence of widespread testing, limit
it to the AMD GFXIP 9.4.3 based ttm pool initializations only.


Cc: Joe Greathouse 
Signed-off-by: Rajneesh Bhardwaj 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  8 
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |  7 ++-
  drivers/gpu/drm/ttm/tests/ttm_pool_test.c | 10 +-
  drivers/gpu/drm/ttm/ttm_device.c  |  2 +-
  drivers/gpu/drm/ttm/ttm_pool.c    |  7 ++-
  include/drm/ttm/ttm_pool.h    |  4 +++-
  7 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index 9c62552bec34..96532cfc6230 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -253,6 +253,7 @@ extern int amdgpu_user_partt_mode;
  extern int amdgpu_agp;
    extern int amdgpu_wbrf;
+extern bool strict_numa_alloc;
    #define AMDGPU_VM_MAX_NUM_CTX    4096
  #define AMDGPU_SG_THRESHOLD    (256*1024*1024)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

index 80b9642f2bc4..a183a6b4493d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -781,6 +781,14 @@ int queue_preemption_timeout_ms = 9000;
  module_param(queue_preemption_timeout_ms, int, 0644);
  MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue preemption 
timeout in ms (1 = Minimum, 9000 = default)");

  +/**
+ * DOC: strict_numa_alloc(bool)
+ * Policy to force NUMA allocation requests from the proximity NUMA 
domain only.

+ */
+bool strict_numa_alloc;
+module_param(strict_numa_alloc, bool, 0444);
+MODULE_PARM_DESC(strict_numa_alloc, "Force N

Re: [PATCH v7 2/2] drm/amdgpu: sync page table freeing with tlb flush

2024-03-18 Thread Bhardwaj, Rajneesh

The change you shared with me fixes the crash. Pl include in v8.


On 3/18/2024 10:06 AM, Sharma, Shashank wrote:


[AMD Official Use Only - General]


Already sent a NULL check patch based on this backtrace, I am waiting 
for Rajneesh's feedback.


Regards
Shashank

*From:* Bhardwaj, Rajneesh 
*Sent:* Monday, March 18, 2024 3:04 PM
*To:* Sharma, Shashank ; 
amd-gfx@lists.freedesktop.org 
*Cc:* Koenig, Christian ; Deucher, Alexander 
; Kuehling, Felix 
*Subject:* Re: [PATCH v7 2/2] drm/amdgpu: sync page table freeing with 
tlb flush


HI Shashank

We'll probably need a v8 with the null pointer crash fixed i.e. before 
freeing the PT entries check for a valid entry before calling 
amdgpu_vm_pt_free. The crash is seen with device memory allocators but 
the system memory allocators are looking fine.


[  127.255863] [drm] Using MTYPE_RW for local memory
[  333.606136] hugetlbfs: test_with_MPI.e (25268): Using mlock ulimits 
for SHM_HUGETLB is obsolete
[  415.351447] BUG: kernel NULL pointer dereference, address: 
0008

[  415.359245] #PF: supervisor write access in kernel mode
[  415.365081] #PF: error_code(0x0002) - not-present page
[  415.370817] PGD 101259067 P4D 101259067 PUD 10125a067 PMD 0
[  415.377140] Oops: 0002 [#1] PREEMPT SMP NOPTI
[  415.382004] CPU: 0 PID: 25481 Comm: test_with_MPI.e Tainted: 
G   OE 5.18.2-mi300-build-140423-ubuntu-22.04+ #24
[  415.394437] Hardware name: AMD Corporation Sh51p/Sh51p, BIOS 
RMO1001AS 02/21/2024

[  415.402797] RIP: 0010:amdgpu_vm_ptes_update+0x6fd/0xa10 [amdgpu]
[  415.409648] Code: 4c 89 ff 4d 8d 66 30 e8 f1 ed ff ff 48 85 db 74 
42 48 39 5d a0 74 40 48 8b 53 20 48 8b 4b 18 48 8d 43 18 48 8d 75 b0 
4c 89 ff <48

> 89 51 08 48 89 0a 49 8b 56 30 48 89 42 08 48 89 53 18 4c 89 63
[  415.430621] RSP: 0018:c9000401f990 EFLAGS: 00010287
[  415.436456] RAX: 888147bb82f0 RBX: 888147bb82d8 RCX: 

[  415.26] RDX:  RSI: c9000401fa30 RDI: 
888161f8
[  415.452397] RBP: c9000401fa80 R08:  R09: 
c9000401fa00
[  415.460368] R10: 0007f0cc R11: 0007f0c85000 R12: 
c9000401fb20
[  415.468340] R13: 0007f0d0 R14: c9000401faf0 R15: 
888161f8
[  415.476312] FS:  7f132ff89840() GS:889f87c0() 
knlGS:

[  415.485350] CS:  0010 DS:  ES:  CR0: 80050033
[  415.491767] CR2: 0008 CR3: 000161d46003 CR4: 
00770ef0

[  415.499738] PKRU: 5554
[  415.502750] Call Trace:
[  415.505482]  
[  415.507825]  amdgpu_vm_update_range+0x32a/0x880 [amdgpu]
[  415.513869]  amdgpu_vm_clear_freed+0x117/0x250 [amdgpu]
[  415.519814] amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu+0x18c/0x250 
[amdgpu]

[  415.527729]  kfd_ioctl_unmap_memory_from_gpu+0xed/0x340 [amdgpu]
[  415.534551]  kfd_ioctl+0x3b6/0x510 [amdgpu]
[  415.539324]  ? kfd_ioctl_get_dmabuf_info+0x1d0/0x1d0 [amdgpu]
[  415.545844]  ? __fget_light+0xc5/0x100
[  415.550037]  __x64_sys_ioctl+0x91/0xc0
[  415.554227]  do_syscall_64+0x5c/0x80
[  415.558223]  ? debug_smp_processor_id+0x17/0x20
[  415.563285]  ? fpregs_assert_state_consistent+0x23/0x60
[  415.569124]  ? exit_to_user_mode_prepare+0x45/0x190
[  415.574572]  ? ksys_write+0xce/0xe0


On 3/18/2024 8:08 AM, Shashank Sharma wrote:

The idea behind this patch is to delay the freeing of PT entry
objects until the TLB flush is done. This patch: - Adds a
tlb_flush_waitlist in amdgpu_vm_update_params which will keep the
objects that need to be freed after tlb_flush. - Adds PT entries
in this list in amdgpu_vm_ptes_update after finding the PT entry.
- Changes functionality of amdgpu_vm_pt_free_dfs from (df_search +
free) to simply freeing of the BOs, also renames it to
amdgpu_vm_pt_free_list to reflect this same. - Exports function
amdgpu_vm_pt_free_list to be called directly. - Calls
amdgpu_vm_pt_free_list directly from amdgpu_vm_update_range. V2:
rebase V4: Addressed review comments from Christian - add only
locked PTEs entries in TLB flush waitlist. - do not create a
separate function for list flush. - do not create a new lock for
TLB flush. - there is no need to wait on tlb_flush_fence
exclusively. V5: Addressed review comments from Christian - change
the amdgpu_vm_pt_free_dfs's functionality to simple freeing of the
objects and rename it. - add all the PTE objects in
params->tlb_flush_waitlist - let amdgpu_vm_pt_free_root handle the
freeing of BOs independently - call amdgpu_vm_pt_free_list
directly V6: Rebase V7: Rebase Cc: Christian König
 <mailto:christian.koe...@amd.com> Cc:
Alex Deucher 
<mailto:alexander.deuc...@amd.com> Cc: Felix Kuehling
 <mailto:felix.kuehl...@amd.com> Cc:
Rajneesh Bhardwaj 
<mailto:rajneesh.bhard...@amd.com> Acked-by: Felix Kuehli

Re: [PATCH v7 2/2] drm/amdgpu: sync page table freeing with tlb flush

2024-03-18 Thread Bhardwaj, Rajneesh

HI Shashank

We'll probably need a v8 with the null pointer crash fixed i.e. before 
freeing the PT entries check for a valid entry before calling 
amdgpu_vm_pt_free. The crash is seen with device memory allocators but 
the system memory allocators are looking fine.


[  127.255863] [drm] Using MTYPE_RW for local memory
[  333.606136] hugetlbfs: test_with_MPI.e (25268): Using mlock ulimits 
for SHM_HUGETLB is obsolete
[  415.351447] BUG: kernel NULL pointer dereference, address: 
0008

[  415.359245] #PF: supervisor write access in kernel mode
[  415.365081] #PF: error_code(0x0002) - not-present page
[  415.370817] PGD 101259067 P4D 101259067 PUD 10125a067 PMD 0
[  415.377140] Oops: 0002 [#1] PREEMPT SMP NOPTI
[  415.382004] CPU: 0 PID: 25481 Comm: test_with_MPI.e Tainted: 
G   OE 5.18.2-mi300-build-140423-ubuntu-22.04+ #24
[  415.394437] Hardware name: AMD Corporation Sh51p/Sh51p, BIOS 
RMO1001AS 02/21/2024

[  415.402797] RIP: 0010:amdgpu_vm_ptes_update+0x6fd/0xa10 [amdgpu]
[  415.409648] Code: 4c 89 ff 4d 8d 66 30 e8 f1 ed ff ff 48 85 db 74 42 
48 39 5d a0 74 40 48 8b 53 20 48 8b 4b 18 48 8d 43 18 48 8d 75 b0 4c 89 
ff <48

> 89 51 08 48 89 0a 49 8b 56 30 48 89 42 08 48 89 53 18 4c 89 63
[  415.430621] RSP: 0018:c9000401f990 EFLAGS: 00010287
[  415.436456] RAX: 888147bb82f0 RBX: 888147bb82d8 RCX: 

[  415.26] RDX:  RSI: c9000401fa30 RDI: 
888161f8
[  415.452397] RBP: c9000401fa80 R08:  R09: 
c9000401fa00
[  415.460368] R10: 0007f0cc R11: 0007f0c85000 R12: 
c9000401fb20
[  415.468340] R13: 0007f0d0 R14: c9000401faf0 R15: 
888161f8
[  415.476312] FS:  7f132ff89840() GS:889f87c0() 
knlGS:

[  415.485350] CS:  0010 DS:  ES:  CR0: 80050033
[  415.491767] CR2: 0008 CR3: 000161d46003 CR4: 
00770ef0

[  415.499738] PKRU: 5554
[  415.502750] Call Trace:
[  415.505482]  
[  415.507825]  amdgpu_vm_update_range+0x32a/0x880 [amdgpu]
[  415.513869]  amdgpu_vm_clear_freed+0x117/0x250 [amdgpu]
[  415.519814] amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu+0x18c/0x250 
[amdgpu]

[  415.527729]  kfd_ioctl_unmap_memory_from_gpu+0xed/0x340 [amdgpu]
[  415.534551]  kfd_ioctl+0x3b6/0x510 [amdgpu]
[  415.539324]  ? kfd_ioctl_get_dmabuf_info+0x1d0/0x1d0 [amdgpu]
[  415.545844]  ? __fget_light+0xc5/0x100
[  415.550037]  __x64_sys_ioctl+0x91/0xc0
[  415.554227]  do_syscall_64+0x5c/0x80
[  415.558223]  ? debug_smp_processor_id+0x17/0x20
[  415.563285]  ? fpregs_assert_state_consistent+0x23/0x60
[  415.569124]  ? exit_to_user_mode_prepare+0x45/0x190
[  415.574572]  ? ksys_write+0xce/0xe0



On 3/18/2024 8:08 AM, Shashank Sharma wrote:


The idea behind this patch is to delay the freeing of PT entry objects
until the TLB flush is done.

This patch:
- Adds a tlb_flush_waitlist in amdgpu_vm_update_params which will keep the
   objects that need to be freed after tlb_flush.
- Adds PT entries in this list in amdgpu_vm_ptes_update after finding
   the PT entry.
- Changes functionality of amdgpu_vm_pt_free_dfs from (df_search + free)
   to simply freeing of the BOs, also renames it to
   amdgpu_vm_pt_free_list to reflect this same.
- Exports function amdgpu_vm_pt_free_list to be called directly.
- Calls amdgpu_vm_pt_free_list directly from amdgpu_vm_update_range.

V2: rebase
V4: Addressed review comments from Christian
 - add only locked PTEs entries in TLB flush waitlist.
 - do not create a separate function for list flush.
 - do not create a new lock for TLB flush.
 - there is no need to wait on tlb_flush_fence exclusively.

V5: Addressed review comments from Christian
 - change the amdgpu_vm_pt_free_dfs's functionality to simple freeing
   of the objects and rename it.
 - add all the PTE objects in params->tlb_flush_waitlist
 - let amdgpu_vm_pt_free_root handle the freeing of BOs independently
 - call amdgpu_vm_pt_free_list directly

V6: Rebase
V7: Rebase

Cc: Christian König
Cc: Alex Deucher
Cc: Felix Kuehling
Cc: Rajneesh Bhardwaj
Acked-by: Felix Kuehling
Acked-by: Rajneesh Bhardwaj
Tested-by: Rajneesh Bhardwaj
Signed-off-by: Shashank Sharma
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|  5 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|  7 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 53 +--
  3 files changed, 40 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 26f1c3359642..eaa402f99fe0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -977,6 +977,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
params.unlocked = unlocked;
params.needs_flush = flush_tlb;
params.allow_override = allow_override;
+   INIT_LIST_HEAD(¶ms.tlb_flush_waitlist);
  
  	/* Implicitly sync to 

Re: [PATCH v5 1/2] drm/amdgpu: implement TLB flush fence

2024-03-11 Thread Bhardwaj, Rajneesh

Acked-and-tested-by: Rajneesh Bhardwaj 

On 3/11/2024 10:37 AM, Sharma, Shashank wrote:


On 07/03/2024 20:22, Philip Yang wrote:



On 2024-03-06 09:41, Shashank Sharma wrote:

From: Christian König

The problem is that when (for example) 4k pages are replaced
with a single 2M page we need to wait for change to be flushed
out by invalidating the TLB before the PT can be freed.

Solve this by moving the TLB flush into a DMA-fence object which
can be used to delay the freeing of the PT BOs until it is signaled.

V2: (Shashank)
 - rebase
 - set dma_fence_error only in case of error
 - add tlb_flush fence only when PT/PD BO is locked (Felix)
 - use vm->pasid when f is NULL (Mukul)

V4: - add a wait for (f->dependency) in tlb_fence_work (Christian)
 - move the misplaced fence_create call to the end (Philip)

V5: - free the f->dependency properly (Christian)

Cc: Christian Koenig
Cc: Felix Kuehling
Cc: Rajneesh Bhardwaj
Cc: Alex Deucher
Reviewed-by: Shashank Sharma
Signed-off-by: Christian König
Signed-off-by: Shashank Sharma
---
  drivers/gpu/drm/amd/amdgpu/Makefile   |   3 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |  10 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h    |   4 +
  .../gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c  | 112 
++

  4 files changed, 128 insertions(+), 1 deletion(-)
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile

index fa26a4e3a99d..91ab4cf29b5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -70,7 +70,8 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o 
amdgpu_kms.o \

  amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \
  atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
  atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
-    amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_ib.o 
amdgpu_pll.o \
+    amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o 
amdgpu_vm_tlb_fence.o \

+    amdgpu_ib.o amdgpu_pll.o \
  amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
  amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o 
amdgpu_virt.o \

  amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 0960e0a665d3..310aae6fb49b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -988,6 +988,15 @@ int amdgpu_vm_update_range(struct amdgpu_device 
*adev, struct amdgpu_vm *vm,

    r = vm->update_funcs->commit(¶ms, fence);
  +    /* Prepare a TLB flush fence to be attached to PTs */
+    if (!unlocked && params.needs_flush && vm->is_compute_context) {
+    amdgpu_vm_tlb_fence_create(adev, vm, fence);
+
+    /* Makes sure no PD/PT is freed before the flush */
+    dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence,
+   DMA_RESV_USAGE_BOOKKEEP);
+    }
+
  error_unlock:
  amdgpu_vm_eviction_unlock(vm);
  drm_dev_exit(idx);
@@ -2237,6 +2246,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,

    mutex_init(&vm->eviction_lock);
  vm->evicting = false;
+    vm->tlb_fence_context = dma_fence_context_alloc(1);
    r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level,
  false, &root, xcp_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

index 64b3f69efa57..298f604b8e5f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -341,6 +341,7 @@ struct amdgpu_vm {
  atomic64_t    tlb_seq;
  uint64_t    tlb_seq_va;
  uint64_t    *tlb_seq_cpu_addr;
+    uint64_t    tlb_fence_context;
    atomic64_t    kfd_last_flushed_seq;
  @@ -594,5 +595,8 @@ void amdgpu_vm_update_fault_cache(struct 
amdgpu_device *adev,

    uint64_t addr,
    uint32_t status,
    unsigned int vmhub);
+void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct dma_fence **fence);
    #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c

new file mode 100644
index ..51cddfa3f1e8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person 
obtaining a
+ * copy of this software and associated documentation files (the 
"Software"),
+ * to deal in the Software without restriction, including without 
limitation
+ * the rights to use, copy, modify, merge, publish, distribute, 
sublicense,
+ * and/or sell copies of the Software, and to permit persons to 
whom the

Re: [PATCH 1/2] drm/amdkfd: update SIMD distribution algo for GFXIP 9.4.2 onwards

2024-02-13 Thread Bhardwaj, Rajneesh



On 2/13/2024 3:52 PM, Felix Kuehling wrote:


On 2024-02-09 20:49, Rajneesh Bhardwaj wrote:

In certain cooperative group dispatch scenarios the default SPI resource
allocation may cause reduced per-CU workgroup occupancy. Set
COMPUTE_RESOURCE_LIMITS.FORCE_SIMD_DIST=1 to mitigate soft hang
scenarions.

Suggested-by: Joseph Greathouse 
Signed-off-by: Rajneesh Bhardwaj 
---
* Incorporate review feedback from Felix from
https://www.mail-archive.com/amd-gfx@lists.freedesktop.org/msg102840.html
   and split one of the suggested gfx11 changes as a seperate patch.


  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c    | 9 +
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h  | 1 +
  drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 4 +++-
  3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c

index 42d881809dc7..697b6d530d12 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -303,6 +303,15 @@ static void update_mqd(struct mqd_manager *mm, 
void *mqd,

  update_cu_mask(mm, mqd, minfo, 0);
  set_priority(m, q);
  +    if (minfo && KFD_GC_VERSION(mm->dev) >= IP_VERSION(9, 4, 2)) {
+    if (minfo->update_flag & UPDATE_FLAG_IS_GWS)
+    m->compute_resource_limits |=
+    COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK;
+    else
+    m->compute_resource_limits &=
+    ~COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK;
+    }
+
  q->is_active = QUEUE_IS_ACTIVE(*q);
  }
  diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h

index 677281c0793e..65b504813576 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -532,6 +532,7 @@ struct queue_properties {
  enum mqd_update_flag {
  UPDATE_FLAG_DBG_WA_ENABLE = 1,
  UPDATE_FLAG_DBG_WA_DISABLE = 2,
+    UPDATE_FLAG_IS_GWS = 3, /* quirk for gfx9 IP */


This flat needs to be a separate bit. So it should be defined as 4. 
Otherwise it looks just like UPDATE_FLAG_DBG_WA_ENABLE | 
UPDATE_FLAG_DBG_WA_DISABLE. I agree that defining bit-masks in an enum 
is not ideal, but I've seen the same in other places.



Yes, I agree. I am have sent the updated patches. When we extend this in 
future if required, it would be 8, 16 and so on...not a great way to 
extend/define an enum IMO.





Regards,
  Felix



  };
    struct mqd_update_info {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c

index 43eff221eae5..4858112f9a53 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -95,6 +95,7 @@ void kfd_process_dequeue_from_device(struct 
kfd_process_device *pdd)

  int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
  void *gws)
  {
+    struct mqd_update_info minfo = {0};
  struct kfd_node *dev = NULL;
  struct process_queue_node *pqn;
  struct kfd_process_device *pdd;
@@ -146,9 +147,10 @@ int pqm_set_gws(struct process_queue_manager 
*pqm, unsigned int qid,

  }
    pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0;
+    minfo.update_flag = gws ? UPDATE_FLAG_IS_GWS : 0;
    return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
-    pqn->q, NULL);
+    pqn->q, &minfo);
  }
    void kfd_process_dequeue_from_all_devices(struct kfd_process *p)


Re: [Patch v2] drm/amdkfd: update SIMD distribution algo for GFXIP 9.4.2 onwards

2024-02-08 Thread Bhardwaj, Rajneesh



On 2/8/2024 2:41 PM, Felix Kuehling wrote:


On 2024-02-07 23:14, Rajneesh Bhardwaj wrote:

In certain cooperative group dispatch scenarios the default SPI resource
allocation may cause reduced per-CU workgroup occupancy. Set
COMPUTE_RESOURCE_LIMITS.FORCE_SIMD_DIST=1 to mitigate soft hang
scenarions.

Suggested-by: Joseph Greathouse 
Signed-off-by: Rajneesh Bhardwaj 
---
* Found a bug in the previous reviewed version
https://lists.freedesktop.org/archives/amd-gfx/2024-February/104101.html
   since the q->is_gws is unset for keeping the count.
* updated pqm_set_gws to pass minfo holding gws state for the active
   queues and use that to apply the FORCE_SIMD_DIST_MASK.

  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c    | 4 
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h  | 1 +
  drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 4 +++-
  3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c

index 42d881809dc7..0b71db4c96b5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -303,6 +303,10 @@ static void update_mqd(struct mqd_manager *mm, 
void *mqd,

  update_cu_mask(mm, mqd, minfo, 0);
  set_priority(m, q);
  +    if (minfo && KFD_GC_VERSION(mm->dev) >= IP_VERSION(9, 4, 2))
+    m->compute_resource_limits = minfo->gws ?
+    COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK : 0;
+


This looks OK because we don't set anything else in 
m->compute_resource_limits. If that ever changes, we have to be more 
careful here to not wipe out other fields in that register.



Yes, Should I change it to below and send a v3?

 m->compute_resource_limits |= minfo->gws ?
    COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK : 0;






  q->is_active = QUEUE_IS_ACTIVE(*q);
  }
  diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h

index 677281c0793e..f4b327a2d4a8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -542,6 +542,7 @@ struct mqd_update_info {
  } cu_mask;
  };
  enum mqd_update_flag update_flag;
+    bool gws;


Instead of adding a new bool, can we add a flag to mqd_update_flag?


Maybe, I initially thought about it but then I chose the bool approach 
since  those debug flags are generic KFD non per-Asic flags while this 
bool is per-Asic request so I felt they didn't fit together. On the 
other hand, those flags and this bool are both quirks anyways so maybe 
they can be together.   Please let me know your preference.





Looks good to me otherwise.

Regards,
  Felix



  };
    /**
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c

index 43eff221eae5..5416a110ced9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -95,6 +95,7 @@ void kfd_process_dequeue_from_device(struct 
kfd_process_device *pdd)

  int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
  void *gws)
  {
+    struct mqd_update_info minfo = {0};
  struct kfd_node *dev = NULL;
  struct process_queue_node *pqn;
  struct kfd_process_device *pdd;
@@ -146,9 +147,10 @@ int pqm_set_gws(struct process_queue_manager 
*pqm, unsigned int qid,

  }
    pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0;
+    minfo.gws = !!gws;
    return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
-    pqn->q, NULL);
+    pqn->q, &minfo);
  }
    void kfd_process_dequeue_from_all_devices(struct kfd_process *p)


RE: [PATCH 2/2] drm/amdgpu: use GTT only as fallback for VRAM|GTT

2023-11-27 Thread Bhardwaj, Rajneesh
[AMD Official Use Only - General]

-Original Message-
From: amd-gfx  On Behalf Of Hamza Mahfooz
Sent: Monday, November 27, 2023 10:53 AM
To: Christian König ; 
jani.nik...@linux.intel.com; kher...@redhat.com; d...@redhat.com; 
za...@vmware.com; Olsak, Marek ; 
linux-graphics-maintai...@vmware.com; amd-gfx@lists.freedesktop.org; 
nouv...@lists.freedesktop.org; intel-...@lists.freedesktop.org; 
virtualizat...@lists.linux.dev; spice-de...@lists.freedesktop.org; 
dri-de...@lists.freedesktop.org
Subject: Re: [PATCH 2/2] drm/amdgpu: use GTT only as fallback for VRAM|GTT

On 11/27/23 09:54, Christian König wrote:
> Try to fill up VRAM as well by setting the busy flag on GTT allocations.
>
> This fixes the issue that when VRAM was evacuated for suspend it's
> never filled up again unless the application is restarted.

I found the subject description a bit misleading. Maybe use a Fixes tag 
describing it is a fix for suspend resume regression other than that, looks 
good to me.

Acked-by: Rajneesh Bhardwaj 

>

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2893

> Signed-off-by: Christian König 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 6 ++
>   1 file changed, 6 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index aa0dd6dad068..ddc8fb4db678 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -173,6 +173,12 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo 
> *abo, u32 domain)
>   abo->flags & AMDGPU_GEM_CREATE_PREEMPTIBLE ?
>   AMDGPU_PL_PREEMPT : TTM_PL_TT;
>   places[c].flags = 0;
> + /*
> +  * When GTT is just an alternative to VRAM make sure that we
> +  * only use it as fallback and still try to fill up VRAM first.
> +  */
> + if (domain & AMDGPU_GEM_DOMAIN_VRAM)
> + places[c].flags |= TTM_PL_FLAG_BUSY;
>   c++;
>   }
>
--
Hamza



Re: [Patch v2] drm/ttm: Schedule delayed_delete worker closer

2023-11-08 Thread Bhardwaj, Rajneesh



On 11/8/2023 9:49 AM, Christian König wrote:

Am 08.11.23 um 13:58 schrieb Rajneesh Bhardwaj:

Try to allocate system memory on the NUMA node the device is closest to
and try to run delayed_delete workers on a CPU of this node as well.

To optimize the memory clearing operation when a TTM BO gets freed by
the delayed_delete worker, scheduling it closer to a NUMA node where the
memory was initially allocated helps avoid the cases where the worker
gets randomly scheduled on the CPU cores that are across interconnect
boundaries such as xGMI, PCIe etc.

This change helps USWC GTT allocations on NUMA systems (dGPU) and AMD
APU platforms such as GFXIP9.4.3.

Acked-by: Felix Kuehling 
Signed-off-by: Rajneesh Bhardwaj 


Reviewed-by: Christian König 

Going to push this to drm-misc-next.


Thanks Christian, there is a new regression reported and I am checking 
on that. Please don't submit it yet.





Thanks,
Christian.


---

Changes in v2:
  - Absorbed the feedback provided by Christian in the commit message 
and

    the comment.

  drivers/gpu/drm/ttm/ttm_bo.c | 8 +++-
  drivers/gpu/drm/ttm/ttm_device.c | 3 ++-
  2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 5757b9415e37..6f28a77a565b 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -370,7 +370,13 @@ static void ttm_bo_release(struct kref *kref)
  spin_unlock(&bo->bdev->lru_lock);
    INIT_WORK(&bo->delayed_delete, ttm_bo_delayed_delete);
-    queue_work(bdev->wq, &bo->delayed_delete);
+
+    /* Schedule the worker on the closest NUMA node. This
+ * improves performance since system memory might be
+ * cleared on free and that is best done on a CPU core
+ * close to it.
+ */
+    queue_work_node(bdev->pool.nid, bdev->wq, 
&bo->delayed_delete);

  return;
  }
  diff --git a/drivers/gpu/drm/ttm/ttm_device.c 
b/drivers/gpu/drm/ttm/ttm_device.c

index 43e27ab77f95..72b81a2ee6c7 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -213,7 +213,8 @@ int ttm_device_init(struct ttm_device *bdev, 
struct ttm_device_funcs *funcs,

  bdev->funcs = funcs;
    ttm_sys_man_init(bdev);
-    ttm_pool_init(&bdev->pool, dev, NUMA_NO_NODE, use_dma_alloc, 
use_dma32);

+
+    ttm_pool_init(&bdev->pool, dev, dev_to_node(dev), use_dma_alloc, 
use_dma32);

    bdev->vma_manager = vma_manager;
  spin_lock_init(&bdev->lru_lock);




Re: [PATCH] drm/amdgpu: Use pcie domain of xcc acpi objects

2023-10-24 Thread Bhardwaj, Rajneesh
[AMD Official Use Only - General]

Looks good to me.
Reviewed-by: Rajneesh Bhardwaj 

Regards,
Rajneesh

From: Lazar, Lijo 
Sent: Monday, October 23, 2023 2:43:01 PM
To: amd-gfx@lists.freedesktop.org 
Cc: Deucher, Alexander ; Kasiviswanathan, Harish 
; Zhang, Hawking ; 
Bhardwaj, Rajneesh 
Subject: Re: [PATCH] drm/amdgpu: Use pcie domain of xcc acpi objects


[AMD Official Use Only - General]



Thanks,
Lijo

From: amd-gfx  on behalf of Lijo Lazar 

Sent: Friday, October 20, 2023 8:44:22 PM
To: amd-gfx@lists.freedesktop.org 
Cc: Deucher, Alexander ; Kasiviswanathan, Harish 
; Zhang, Hawking 
Subject: [PATCH] drm/amdgpu: Use pcie domain of xcc acpi objects

PCI domain/segment information of xccs is available through ACPI DSM
methods. Consider that also while looking for devices.

Signed-off-by: Lijo Lazar 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 40 +---
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 2bca37044ad0..d62e49758635 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -68,7 +68,7 @@ struct amdgpu_acpi_xcc_info {
 struct amdgpu_acpi_dev_info {
 struct list_head list;
 struct list_head xcc_list;
-   uint16_t bdf;
+   uint32_t sbdf;
 uint16_t supp_xcp_mode;
 uint16_t xcp_mode;
 uint16_t mem_mode;
@@ -927,7 +927,7 @@ static acpi_status amdgpu_acpi_get_node_id(acpi_handle 
handle,
 #endif
 }

-static struct amdgpu_acpi_dev_info *amdgpu_acpi_get_dev(u16 bdf)
+static struct amdgpu_acpi_dev_info *amdgpu_acpi_get_dev(u32 sbdf)
 {
 struct amdgpu_acpi_dev_info *acpi_dev;

@@ -935,14 +935,14 @@ static struct amdgpu_acpi_dev_info 
*amdgpu_acpi_get_dev(u16 bdf)
 return NULL;

 list_for_each_entry(acpi_dev, &amdgpu_acpi_dev_list, list)
-   if (acpi_dev->bdf == bdf)
+   if (acpi_dev->sbdf == sbdf)
 return acpi_dev;

 return NULL;
 }

 static int amdgpu_acpi_dev_init(struct amdgpu_acpi_dev_info **dev_info,
-   struct amdgpu_acpi_xcc_info *xcc_info, u16 bdf)
+   struct amdgpu_acpi_xcc_info *xcc_info, u32 sbdf)
 {
 struct amdgpu_acpi_dev_info *tmp;
 union acpi_object *obj;
@@ -955,7 +955,7 @@ static int amdgpu_acpi_dev_init(struct amdgpu_acpi_dev_info 
**dev_info,

 INIT_LIST_HEAD(&tmp->xcc_list);
 INIT_LIST_HEAD(&tmp->list);
-   tmp->bdf = bdf;
+   tmp->sbdf = sbdf;

 obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
   AMD_XCC_DSM_GET_SUPP_MODE, NULL,
@@ -1007,7 +1007,7 @@ static int amdgpu_acpi_dev_init(struct 
amdgpu_acpi_dev_info **dev_info,

 DRM_DEBUG_DRIVER(
 "New dev(%x): Supported xcp mode: %x curr xcp_mode : %x mem 
mode : %x, tmr base: %llx tmr size: %llx  ",
-   tmp->bdf, tmp->supp_xcp_mode, tmp->xcp_mode, tmp->mem_mode,
+   tmp->sbdf, tmp->supp_xcp_mode, tmp->xcp_mode, tmp->mem_mode,
 tmp->tmr_base, tmp->tmr_size);
 list_add_tail(&tmp->list, &amdgpu_acpi_dev_list);
 *dev_info = tmp;
@@ -1023,7 +1023,7 @@ static int amdgpu_acpi_dev_init(struct 
amdgpu_acpi_dev_info **dev_info,
 }

 static int amdgpu_acpi_get_xcc_info(struct amdgpu_acpi_xcc_info *xcc_info,
-   u16 *bdf)
+   u32 *sbdf)
 {
 union acpi_object *obj;
 acpi_status status;
@@ -1054,8 +1054,10 @@ static int amdgpu_acpi_get_xcc_info(struct 
amdgpu_acpi_xcc_info *xcc_info,
 xcc_info->phy_id = (obj->integer.value >> 32) & 0xFF;
 /* xcp node of this xcc [47:40] */
 xcc_info->xcp_node = (obj->integer.value >> 40) & 0xFF;
+   /* PF domain of this xcc [31:16] */
+   *sbdf = (obj->integer.value) & 0x;
 /* PF bus/dev/fn of this xcc [63:48] */
-   *bdf = (obj->integer.value >> 48) & 0x;
+   *sbdf |= (obj->integer.value >> 48) & 0x;
 ACPI_FREE(obj);
 obj = NULL;

@@ -1079,7 +1081,7 @@ static int amdgpu_acpi_enumerate_xcc(void)
 struct acpi_device *acpi_dev;
 char hid[ACPI_ID_LEN];
 int ret, id;
-   u16 bdf;
+   u32 sbdf;

 INIT_LIST_HEAD(&amdgpu_acpi_dev_list);
 xa_init(&numa_info_xa);
@@ -1107,16 +1109,16 @@ static int amdgpu_acpi_enumerate_xcc(void)
 xcc_info->handle = acpi_device_handle(acpi_dev);
 acpi_dev_put(acpi_dev);

-   ret = amdgpu_acpi_get_xcc_info(xcc_info, &bdf);
+   ret = amdgpu_acpi_get_xcc_info

Re: [Patch v2 2/2] drm/amdgpu: Use ttm_pages_limit to override vram reporting

2023-10-03 Thread Bhardwaj, Rajneesh



On 10/3/2023 2:07 PM, Felix Kuehling wrote:


On 2023-10-02 16:21, Rajneesh Bhardwaj wrote:

On GFXIP9.4.3 APU, allow the memory reporting as per the ttm pages
limit in NPS1 mode.

Signed-off-by: Rajneesh Bhardwaj 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 17 -
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  9 +
  2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

index 38b5457baded..131e150d8a93 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -28,6 +28,7 @@
  #include "amdgpu.h"
  #include "amdgpu_gfx.h"
  #include "amdgpu_dma_buf.h"
+#include 
  #include 
  #include 
  #include "amdgpu_xgmi.h"
@@ -806,10 +807,24 @@ void amdgpu_amdkfd_unlock_kfd(struct 
amdgpu_device *adev)
  u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int 
xcp_id)

  {
  u64 tmp;
+    int num_nodes;
  s8 mem_id = KFD_XCP_MEM_ID(adev, xcp_id);
    if (adev->gmc.num_mem_partitions && xcp_id >= 0 && mem_id >= 
0) {

-    tmp = adev->gmc.mem_partitions[mem_id].size;
+    if (adev->gmc.is_app_apu && adev->gmc.num_mem_partitions == 
1) {

+    num_nodes = num_online_nodes();
+    /* In NPS1 mode, we should restrict the vram reporting
+ * tied to the ttm_pages_limit which is 1/2 of the system
+ * memory. For other partition modes, the HBM is uniformly
+ * divided already per numa node reported. If user wants to
+ * go beyond the default ttm limit and maximize the ROCm
+ * allocations, they can go up to max ttm and sysmem 
limits.

+ */
+
+    tmp = (ttm_tt_pages_limit() << PAGE_SHIFT) / num_nodes;


I don't know why you need a local variable for num_nodes. Just divide 
by num_online_nodes(). Other than that, the series is


Reviewed-by: Felix Kuehling 



Thanks for the review. I will amend this and push with your reviewed-by 
tag to amd-staging-drm-next.






+    } else {
+    tmp = adev->gmc.mem_partitions[mem_id].size;
+    }
  do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition);
  return ALIGN_DOWN(tmp, PAGE_SIZE);
  } else {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

index 268ee533e7c1..b090cd42f81f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1896,15 +1896,14 @@ static void
  gmc_v9_0_init_acpi_mem_ranges(struct amdgpu_device *adev,
    struct amdgpu_mem_partition_info *mem_ranges)
  {
-    int num_ranges = 0, ret, mem_groups;
  struct amdgpu_numa_info numa_info;
  int node_ids[MAX_MEM_RANGES];
+    int num_ranges = 0, ret;
  int num_xcc, xcc_id;
  uint32_t xcc_mask;
    num_xcc = NUM_XCC(adev->gfx.xcc_mask);
  xcc_mask = (1U << num_xcc) - 1;
-    mem_groups = hweight32(adev->aid_mask);
    for_each_inst(xcc_id, xcc_mask)    {
  ret = amdgpu_acpi_get_mem_info(adev, xcc_id, &numa_info);
@@ -1929,12 +1928,6 @@ gmc_v9_0_init_acpi_mem_ranges(struct 
amdgpu_device *adev,

  }
    adev->gmc.num_mem_partitions = num_ranges;
-
-    /* If there is only partition, don't use entire size */
-    if (adev->gmc.num_mem_partitions == 1) {
-    mem_ranges[0].size = mem_ranges[0].size * (mem_groups - 1);
-    do_div(mem_ranges[0].size, mem_groups);
-    }
  }
    static void


Re: [PATCH 2/3] drm/amdgpu: Initialize acpi mem ranges after TTM

2023-10-02 Thread Bhardwaj, Rajneesh
I found an issue with this patch, that leads to performance drop. This 
leads to incorrectly initialize numa pools on a multi node system. I am 
working on the fix and will send another change set.


On 9/29/2023 2:18 PM, Rajneesh Bhardwaj wrote:

Move ttm init before acpi mem range init so we can use ttm_pages_limit
to override vram size for GFXIP 9.4.3. The vram size override change
will be introduced in a future commit.

Acked-by: Felix Kuehling 
Signed-off-by: Rajneesh Bhardwaj 
---
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 10 +-
  1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 268ee533e7c1..005ea719d2fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -2190,17 +2190,17 @@ static int gmc_v9_0_sw_init(void *handle)
  
  	amdgpu_gmc_get_vbios_allocations(adev);
  
+	/* Memory manager */

+   r = amdgpu_bo_init(adev);
+   if (r)
+   return r;
+
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) {
r = gmc_v9_0_init_mem_ranges(adev);
if (r)
return r;
}
  
-	/* Memory manager */

-   r = amdgpu_bo_init(adev);
-   if (r)
-   return r;
-
r = gmc_v9_0_gart_init(adev);
if (r)
return r;


Re: [PATCH] drm/amdgpu: Rework memory limits to allow big allocations

2023-08-22 Thread Bhardwaj, Rajneesh



On 8/21/2023 4:32 PM, Felix Kuehling wrote:


On 2023-08-21 15:20, Rajneesh Bhardwaj wrote:

Rework the KFD max system memory and ttm limit to allow bigger
system memory allocations upto 63/64 of the available memory which is
controlled by ttm module params pages_limit and page_pool_size. Also for
NPS1 mode, report the max ttm limit as the available VRAM size. For max
system memory limit, leave 1GB exclusively outside ROCm allocations i.e.
on 16GB system, >14 GB can be used by ROCm still leaving some memory for
other system applications and on 128GB systems (e.g. GFXIP 9.4.3 APU in
NPS1 mode) nearly >120GB can be used by ROCm.

Signed-off-by: Rajneesh Bhardwaj 
---
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  5 ++--
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 25 +--
  2 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

index 9e18fe5eb190..3387dcdf1bc9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -44,6 +44,7 @@
   * changes to accumulate
   */
  #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
+#define ONE_GB    (1UL << 30)
    /*
   * Align VRAM availability to 2MB to avoid fragmentation caused by 
4K allocations in the tail 2MB

@@ -117,11 +118,11 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
  return;
    si_meminfo(&si);
-    mem = si.freeram - si.freehigh;
+    mem = si.totalram - si.totalhigh;
  mem *= si.mem_unit;
    spin_lock_init(&kfd_mem_limit.mem_limit_lock);
-    kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
+    kfd_mem_limit.max_system_mem_limit = mem - (mem >> 6) - (ONE_GB);


I believe this is an OK heuristic for large systems and medium-sized 
systems. But it produces a negative number or an underflow for systems 
with very small system memory (about 1.1GB).  It's not practical to 
run ROCm on such a small system, but the code at least needs to be 
robust here and produce something meaningful. E.g.



Sure, I agree.



kfd_mem_limit.max_system_mem_limit = mem - (mem >> 6);
if (kfd_mem_limit.max_system_mem_limit < 2 * ONE_GB)
    kfd_mem_limit.max_system_mem_limit <<= 1;
else
    kfd_mem_limit.max_system_mem_limit -= ONE_GB;

Since this change affects all GPUs and the change below is specific to 
GFXv9.4.3 APUs, I'd separate this into two patches.



Ok, will split into two changes.




  kfd_mem_limit.max_ttm_mem_limit = ttm_tt_pages_limit() << 
PAGE_SHIFT;

  pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
  (kfd_mem_limit.max_system_mem_limit >> 20),
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

index 8447fcada8bb..4962e35df617 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -25,6 +25,7 @@
  #include 
    #include 
+#include 
    #include "amdgpu.h"
  #include "gmc_v9_0.h"
@@ -1877,6 +1878,7 @@ static void
  gmc_v9_0_init_acpi_mem_ranges(struct amdgpu_device *adev,
    struct amdgpu_mem_partition_info *mem_ranges)
  {
+    uint64_t max_ttm_size = ttm_tt_pages_limit() << PAGE_SHIFT;
  int num_ranges = 0, ret, mem_groups;
  struct amdgpu_numa_info numa_info;
  int node_ids[MAX_MEM_RANGES];
@@ -1913,8 +1915,17 @@ gmc_v9_0_init_acpi_mem_ranges(struct 
amdgpu_device *adev,

    /* If there is only partition, don't use entire size */
  if (adev->gmc.num_mem_partitions == 1) {
-    mem_ranges[0].size = mem_ranges[0].size * (mem_groups - 1);
-    do_div(mem_ranges[0].size, mem_groups);
+    if (max_ttm_size > mem_ranges[0].size || max_ttm_size <= 0) {


This gives some weird dis-continuous behaviour. For max_ttm_size > 
mem_ranges[0].size it gives you 3/4. For max_ttm_size == 
mem_ranges[0].size it gives you all the memory.


Also, why is this only applied for num_mem_partitions == 1? The TTM 
limit also applies when there are more memory partitions. Would it 
make more sense to always evenly divide the ttm_tt_pages_limit between 
all the memory partitions? And cap the size at the NUMA node size. I 
think that would eliminate special cases for different 
memory-partition configs and give you sensible behaviour in all cases.



I think TTM doesn't check what values are being passed to pages_limt or 
page_pool_size so when the user passes an arbitrary number here, I 
wanted to retain the default behavior for NPS1 mode i.e. 3/4th of the 
available NUMA memory should be reported as VRAM. Also for >NPS1 mode, 
the partition size is already proportionately divided i.e in TPX/NPS4 
mode, we have 1/4th NUMA memory visible as VRAM but KFD limits will be 
already bigger than that and we will be capped by VRAM size so this 
change was only for NPS1 mode where the memory ranges are limited by 
NUMA_NO_NODE special condition.




Regards,
  Felix



+    /* Report 

Re: [PATCH] amd/amdkfd: drop unused KFD_IOCTL_SVM_FLAG_UNCACHED flag

2023-06-02 Thread Bhardwaj, Rajneesh
I think the case for MTYPE_UC still needs to be dealt with for svm 
ranges but the UNCACHED flag here looks misplaced and should be removed 
, other than that it looks good, reviewed-by: Rajneesh Bhardwaj 



On 6/2/2023 1:01 PM, Alex Deucher wrote:

Was leftover from GC 9.4.3 bring up and is currently
unused.  Drop it for now.

Cc: philip.y...@amd.com
Cc: rajneesh.bhard...@amd.com
Cc: felix.kuehl...@amd.com
Signed-off-by: Alex Deucher 
---
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 2 +-
  include/uapi/linux/kfd_ioctl.h   | 2 --
  2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 615eab3f78c9..5ff1a5a89d96 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1154,7 +1154,7 @@ svm_range_get_pte_flags(struct kfd_node *node,
uint64_t pte_flags;
bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN);
bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT;
-   bool uncached = flags & KFD_IOCTL_SVM_FLAG_UNCACHED;
+   bool uncached = false; /*flags & KFD_IOCTL_SVM_FLAG_UNCACHED;*/
unsigned int mtype_local;
  
  	if (domain == SVM_RANGE_VRAM_DOMAIN)

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 2a9671e1ddb5..2da5c3ad71bd 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -623,8 +623,6 @@ enum kfd_mmio_remap {
  #define KFD_IOCTL_SVM_FLAG_GPU_READ_MOSTLY 0x0020
  /* Keep GPU memory mapping always valid as if XNACK is disable */
  #define KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED   0x0040
-/* Uncached access to memory */
-#define KFD_IOCTL_SVM_FLAG_UNCACHED 0x0080
  
  /**

   * kfd_ioctl_svm_op - SVM ioctl operations


Re: [PATCH 5/7] drm/amdgpu: for S0ix, skip SMDA 5.x+ suspend/resume

2022-12-15 Thread Bhardwaj, Rajneesh

Don't we need a similar check on resume_phase2?

Other than that, looks good to me.

Acked-by: Rajneesh Bhardwaj 

On 12/14/2022 5:16 PM, Alex Deucher wrote:

SDMA 5.x is part of the GFX block so it's controlled via
GFXOFF.  Skip suspend as it should be handled the same
as GFX.

v2: drop SDMA 4.x.  That requires special handling.

Signed-off-by: Alex Deucher 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a99b327d5f09..5c0719c03c37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3028,6 +3028,12 @@ static int amdgpu_device_ip_suspend_phase2(struct 
amdgpu_device *adev)
 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX))
continue;
  
+		/* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */

+   if (adev->in_s0ix &&
+   (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) &&
+   (adev->ip_blocks[i].version->type == 
AMD_IP_BLOCK_TYPE_SDMA))
+   continue;
+
/* XXX handle errors */
r = adev->ip_blocks[i].version->funcs->suspend(adev);
/* XXX handle errors */


Re: [PATCH 1/2] drm/amdgpu: add GART, GPUVM, and GTT to glossary

2022-12-02 Thread Bhardwaj, Rajneesh

Both patches are:

Reviewed-by: Rajneesh Bhardwaj 

On 12/1/2022 4:41 PM, Alex Deucher wrote:

Add definitions to clarify GPU virtual memory.

v2: clarify the terms a bit more

Reviewed-by: Luben Tuikov 
Suggested-by: Peter Maucher 
Signed-off-by: Alex Deucher 
---
  Documentation/gpu/amdgpu/amdgpu-glossary.rst | 23 
  1 file changed, 23 insertions(+)

diff --git a/Documentation/gpu/amdgpu/amdgpu-glossary.rst 
b/Documentation/gpu/amdgpu/amdgpu-glossary.rst
index 326896e9800d..00a47ebb0b0f 100644
--- a/Documentation/gpu/amdgpu/amdgpu-glossary.rst
+++ b/Documentation/gpu/amdgpu/amdgpu-glossary.rst
@@ -30,12 +30,35 @@ we have a dedicated glossary for Display Core at
  EOP
End Of Pipe/Pipeline
  
+GART

+  Graphics Address Remapping Table.  This is the name we use for the GPUVM
+  page table used by the GPU kernel driver.  It remaps system resources
+  (memory or MMIO space) into the GPU's address space so the GPU can access
+  them.  The name GART harkens back to the days of AGP when the platform
+  provided an MMU that the GPU could use to get a contiguous view of
+  scattered pages for DMA.  The MMU has since moved on to the GPU, but the
+  name stuck.
+
  GC
Graphics and Compute
  
  GMC

Graphic Memory Controller
  
+GPUVM

+  GPU Virtual Memory.  This is the GPU's MMU.  The GPU supports multiple
+  virtual address spaces that can be in flight at any given time.  These
+  allow the GPU to remap VRAM and system resources into GPU virtual address
+  spaces for use by the GPU kernel driver and applications using the GPU.
+  These provide memory protection for different applications using the GPU.
+
+GTT
+  Graphics Translation Tables.  This is a memory pool managed through TTM
+  which provides access to system resources (memory or MMIO space) for
+  use by the GPU. These addresses can be mapped into the "GART" GPUVM page
+  table for use by the kernel driver or into per process GPUVM page tables
+  for application usage.
+
  IH
Interrupt Handler
  


Re: [PATCH] drm/amdkfd: Fix error handling in kfd_criu_restore_events

2022-11-03 Thread Bhardwaj, Rajneesh
[AMD Official Use Only - General]

Yes it helps avoid the unbalanced lock messages seen during criu restore 
failures for events. Looks good to me.

Reviewed-by: Rajneesh Bhardwaj 

Regards,
Rajneesh

From: amd-gfx  on behalf of Felix 
Kuehling 
Sent: Thursday, November 3, 2022 7:13:09 PM
To: amd-gfx@lists.freedesktop.org 
Subject: [PATCH] drm/amdkfd: Fix error handling in kfd_criu_restore_events

mutex_unlock before the exit label because all the error code paths that
jump there didn't take that lock. This fixes unbalanced locking errors
in case of restore errors.

Fixes: 40e8a766a761 ("drm/amdkfd: CRIU checkpoint and restore events")
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_events.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 83e3ce9f6049..729d26d648af 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -506,6 +506,7 @@ int kfd_criu_restore_event(struct file *devkfd,
 ret = create_other_event(p, ev, &ev_priv->event_id);
 break;
 }
+   mutex_unlock(&p->event_mutex);

 exit:
 if (ret)
@@ -513,8 +514,6 @@ int kfd_criu_restore_event(struct file *devkfd,

 kfree(ev_priv);

-   mutex_unlock(&p->event_mutex);
-
 return ret;
 }

--
2.32.0



Re: [PATCH v3] drm/amdkfd: Fix error handling in criu_checkpoint

2022-11-03 Thread Bhardwaj, Rajneesh

This one is more elegant. Looks good to me!

Reviewed-and-tested-by: Rajneesh Bhardwaj 

On 11/3/2022 7:12 PM, Felix Kuehling wrote:

Checkpoint BOs last. That way we don't need to close dmabuf FDs if
something else fails later. This avoids problematic access to user mode
memory in the error handling code path.

criu_checkpoint_bos has its own error handling and cleanup that does not
depend on access to user memory.

In the private data, keep BOs before the remaining objects. This is
necessary to restore things in the correct order as restoring events
depends on the events-page BO being restored first.

Fixes: be072b06c739 ("drm/amdkfd: CRIU export BOs as prime dmabuf objects")
Reported-by: Jann Horn 
CC: Rajneesh Bhardwaj 
Signed-off-by: Felix Kuehling 

---

v3: Keep order of private data and restore order the same.
---
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 34 +++-
  1 file changed, 15 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 5feaba6a77de..6d291aa6386b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1950,7 +1950,7 @@ static int criu_checkpoint(struct file *filep,
  {
int ret;
uint32_t num_devices, num_bos, num_objects;
-   uint64_t priv_size, priv_offset = 0;
+   uint64_t priv_size, priv_offset = 0, bo_priv_offset;
  
  	if (!args->devices || !args->bos || !args->priv_data)

return -EINVAL;
@@ -1994,38 +1994,34 @@ static int criu_checkpoint(struct file *filep,
if (ret)
goto exit_unlock;
  
-	ret = criu_checkpoint_bos(p, num_bos, (uint8_t __user *)args->bos,

-   (uint8_t __user *)args->priv_data, &priv_offset);
-   if (ret)
-   goto exit_unlock;
+   /* Leave room for BOs in the private data. They need to be restored
+* before events, but we checkpoint them last to simplify the error
+* handling.
+*/
+   bo_priv_offset = priv_offset;
+   priv_offset += num_bos * sizeof(struct kfd_criu_bo_priv_data);
  
  	if (num_objects) {

ret = kfd_criu_checkpoint_queues(p, (uint8_t __user 
*)args->priv_data,
 &priv_offset);
if (ret)
-   goto close_bo_fds;
+   goto exit_unlock;
  
  		ret = kfd_criu_checkpoint_events(p, (uint8_t __user *)args->priv_data,

 &priv_offset);
if (ret)
-   goto close_bo_fds;
+   goto exit_unlock;
  
  		ret = kfd_criu_checkpoint_svm(p, (uint8_t __user *)args->priv_data, &priv_offset);

if (ret)
-   goto close_bo_fds;
+   goto exit_unlock;
}
  
-close_bo_fds:

-   if (ret) {
-   /* If IOCTL returns err, user assumes all FDs opened in 
criu_dump_bos are closed */
-   uint32_t i;
-   struct kfd_criu_bo_bucket *bo_buckets = (struct kfd_criu_bo_bucket 
*) args->bos;
-
-   for (i = 0; i < num_bos; i++) {
-   if (bo_buckets[i].alloc_flags & 
KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
-   close_fd(bo_buckets[i].dmabuf_fd);
-   }
-   }
+   /* This must be the last thing in this function that can fail.
+* Otherwise we leak dmabuf file descriptors.
+*/
+   ret = criu_checkpoint_bos(p, num_bos, (uint8_t __user *)args->bos,
+  (uint8_t __user *)args->priv_data, &bo_priv_offset);
  
  exit_unlock:

mutex_unlock(&p->mutex);


Re: [PATCH] drm/amdkfd: Fix error handling in criu_checkpoint

2022-11-01 Thread Bhardwaj, Rajneesh



On 11/1/2022 3:15 PM, Felix Kuehling wrote:

Checkpoint BOs last. That way we don't need to close dmabuf FDs if
something else fails later. This avoids problematic access to user mode
memory in the error handling code path.

criu_checkpoint_bos has its own error handling and cleanup that does not
depend on access to user memory.



This seems to be breaking the restore operation. I did a quick pytorch 
based test and I can confirm that restore operation fails with this 
change applied.


[  +0.03] CR2: 55b6726e0020 CR3: 0001283fe000 CR4: 
003 50ee0

[  +0.02] Call Trace:
[  +0.02]  
[  +0.03]  kfd_ioctl_criu+0xd4c/0x1930 [amdgpu]
[  +0.000185]  ? __might_fault+0x32/0x80
[  +0.04]  ? lock_release+0x1fd/0x2b0
[  +0.10]  kfd_ioctl+0x29b/0x600 [amdgpu]
[  +0.000153]  ? kfd_ioctl_get_tile_config+0x130/0x130 [amdgpu]
[  +0.000158]  __x64_sys_ioctl+0x8b/0xd0
[  +0.03]  ? lockdep_hardirqs_on+0x79/0x100
[  +0.07]  do_syscall_64+0x34/0x80
[  +0.04]  entry_SYSCALL_64_after_hwframe+0x44/0xae
[  +0.05] RIP: 0033:0x7f1c87e7f317
[  +0.02] Code: b3 66 90 48 8b 05 71 4b 2d 00 64 c7 00 26 00 00 00 
48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 
05 <4 8> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 41 4b 2d 00 f7 d8 64 89 01 48
[  +0.03] RSP: 002b:7fff630af518 EFLAGS: 0202 ORIG_RAX: 
00 10
[  +0.03] RAX: ffda RBX: 7f1c89351620 RCX: 
7f1c87e 7f317
[  +0.02] RDX: 7fff630af5c0 RSI: c0384b22 RDI: 
000 5
[  +0.02] RBP: 7fff630af550 R08:  R09: 
7f1c87e d7c10
[  +0.02] R10: 705f757067646d61 R11: 0202 R12: 
55a4a05 14c60
[  +0.02] R13: 55a49eb81540 R14: 55a49e90eea9 R15: 
7fff630 b069c

[  +0.10]  
[  +0.02] irq event stamp: 50181
[  +0.02] hardirqs last  enabled at (50187): [] 
__up _console_sem+0x52/0x60
[  +0.03] hardirqs last disabled at (50192): [] 
__up _console_sem+0x37/0x60
[  +0.03] softirqs last  enabled at (45940): [] 
sock _setsockopt+0x223/0xfa0
[  +0.03] softirqs last disabled at (45938): [] 
rele ase_sock+0x19/0xa0

[  +0.04] ---[ end trace  ]---
[  +0.02] amdgpu: Could not allocate idr
[  +0.000245] amdgpu: Failed to restore CRIU ret:-12
[Nov 1 22:11] loop0: detected capacity change from 0 to 8

https://github.com/checkpoint-restore/criu/blob/criu-dev/plugins/amdgpu/amdgpu_plugin.c 
:



(00.093977) 11: Added GPU mapping [0xC093 -> 0xC093]
(00.093982) 11: ===Maps===
(00.093987) 11: GPU: 0xC093 -> 0xC093
(00.093992) 11: CPU: 00 -> 00
(00.093997) 11: ==
(00.094002) 11: Matched destination node 0xC093
(00.094007) 11: All nodes mapped successfully
(00.094012) 11: Matched nodes 0xC093 and after
(00.094017) 11: Maps after all nodes matched
(00.094022) 11: ===Maps===
(00.094027) 11: GPU: 0xC093 -> 0xC093
(00.094032) 11: CPU: 00 -> 00
(00.094037) 11: ==
(00.094041) 11: amdgpu_plugin: Restoring 1 devices
(00.094319) 11: amdgpu_plugin: amdgpu_plugin: passing drm render fd 
= 10 to driver

(00.094326) 11: amdgpu_plugin: Restore devices Ok (ret:0)
(00.094331) 11: amdgpu_plugin: Restoring 184 BOs
(00.094349) 11: amdgpu_plugin: Restore BOs Ok
(00.095791) 11: Error (amdgpu_plugin.c:1830): amdgpu_plugin: Restore 
ioctl failed: Cannot allocate memory
(00.095916) 11: Error (amdgpu_plugin.c:1850): amdgpu_plugin: 
amdgpu_plugin: Failed to restore (ret:-1)

(00.095951) 11: Error (criu/files-ext.c:53): Unable to restore 0x143
(00.095961) 11: Error (criu/files.c:1213): Unable to open fd=4 id=0x143
(00.096078) Unlink remap /dev/shm/fvKoKz.cr.1.ghost
(00.096152) Error (criu/cr-restore.c:2531): Restoring FAILED.
(00.096181) amdgpu_plugin: amdgpu_plugin: finished  amdgpu_plugin 
(AMDGPU/KFD)

"restore.log" 4194L, 201090C



Fixes: be072b06c739 ("drm/amdkfd: CRIU export BOs as prime dmabuf objects")
Reported-by: Jann Horn 
CC: Rajneesh Bhardwaj 
Signed-off-by: Felix Kuehling 
---
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 27 +++-
  1 file changed, 8 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 5feaba6a77de..aabab9010812 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1994,38 +1994,27 @@ static int criu_checkpoint(struct file *filep,
if (ret)
goto exit_unlock;
  
-	ret = criu_checkpoint_bos(p, num_bos, (uint8_t __user *)args->bos,

-   (uint8_t __user *)args->priv_data, &priv_offset);
-   if (ret)
-   goto exit_unlock;
-
if (num_objects) {
ret = kfd_criu_checkpoint_queues(p, (uint8_t __user 
*)args->priv_data,
 &priv_offset);
  

RE: [PATCH] drm/amdkfd: Allocate doorbells only when needed

2022-09-01 Thread Bhardwaj, Rajneesh
[AMD Official Use Only - General]

This seems to have broken CRIU restore. After I revert this, I can get CRIU 
restore working.

From: amd-gfx  On Behalf Of Russell, Kent
Sent: Tuesday, August 23, 2022 8:07 AM
To: Kuehling, Felix ; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdkfd: Allocate doorbells only when needed

No one else seems to have any thoughts on it, so Reviewed-by: Kent Russell 
mailto:kent.russ...@amd.com>>

 Kent


From: amd-gfx 
mailto:amd-gfx-boun...@lists.freedesktop.org>>
 on behalf of Russell, Kent mailto:kent.russ...@amd.com>>
Sent: Monday, August 22, 2022 10:10 AM
To: Kuehling, Felix mailto:felix.kuehl...@amd.com>>; 
amd-gfx@lists.freedesktop.org 
mailto:amd-gfx@lists.freedesktop.org>>
Subject: RE: [PATCH] drm/amdkfd: Allocate doorbells only when needed

[AMD Official Use Only - General]

I can throw an Acked-by: Kent Russell 
mailto:kent.russ...@amd.com>> since we don't have an RB 
yet.

 Kent

> -Original Message-
> From: amd-gfx 
> mailto:amd-gfx-boun...@lists.freedesktop.org>>
>  On Behalf Of Felix
> Kuehling
> Sent: Wednesday, August 3, 2022 2:56 PM
> To: amd-gfx@lists.freedesktop.org
> Subject: [PATCH] drm/amdkfd: Allocate doorbells only when needed
>
> Only allocate doorbells when the first queue is created on a GPU or the
> doorbells need to be mapped into CPU or GPU virtual address space. This
> avoids allocating doorbells unnecessarily and can allow more processes
> to use KFD on multi-GPU systems.
>
> Signed-off-by: Felix Kuehling 
> mailto:felix.kuehl...@amd.com>>
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 13 +
>  drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c |  9 +
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c  |  5 -
>  3 files changed, 22 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 2b3d8bc8f0aa..907f4711abce 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -327,6 +327,12 @@ static int kfd_ioctl_create_queue(struct file *filep,
> struct kfd_process *p,
>goto err_bind_process;
>}
>
> + if (!pdd->doorbell_index &&
> + kfd_alloc_process_doorbells(dev, &pdd->doorbell_index) < 0) {
> + err = -ENOMEM;
> + goto err_alloc_doorbells;
> + }
> +
>/* Starting with GFX11, wptr BOs must be mapped to GART for MES to
> determine work
> * on unmapped queues for usermode queue oversubscription (no
> aggregated doorbell)
> */
> @@ -404,6 +410,7 @@ static int kfd_ioctl_create_queue(struct file *filep, 
> struct
> kfd_process *p,
>if (wptr_bo)
>amdgpu_amdkfd_free_gtt_mem(dev->adev, wptr_bo);
>  err_wptr_map_gart:
> +err_alloc_doorbells:
>  err_bind_process:
>  err_pdd:
>mutex_unlock(&p->mutex);
> @@ -1092,6 +1099,10 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file
> *filep,
>goto err_unlock;
>}
>offset = kfd_get_process_doorbells(pdd);
> + if (!offset) {
> + err = -ENOMEM;
> + goto err_unlock;
> + }
>} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
>if (args->size != PAGE_SIZE) {
>err = -EINVAL;
> @@ -2173,6 +2184,8 @@ static int criu_restore_memory_of_gpu(struct
> kfd_process_device *pdd,
>return -EINVAL;
>
>offset = kfd_get_process_doorbells(pdd);
> + if (!offset)
> + return -ENOMEM;

Looks like this is causing CRIU restore failure.

>} else if (bo_bucket->alloc_flags &
> KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
>/* MMIO BOs need remapped bus address */
>if (bo_bucket->size != PAGE_SIZE) {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> index cb3d2ccc5100..b33798f89ef0 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> @@ -157,6 +157,8 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct
> kfd_process *process,
>
>/* Calculate physical address of doorbell */
>address = kfd_get_process_doorbells(pdd);
> + if (!address)
> + return -ENOMEM;
>vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND |
> VM_NORESERVE |
>VM_DONTDUMP | VM_PFNMAP;
>
> @@ -275,6 +277,13 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
>
>  phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
>  {
> + if (!pdd->doorbell_index) {
> + int r = kfd_alloc_process_doorbells(pdd->dev,
> + &pdd->doorbell_index);
> + if (r)
> + 

Re: [PATCH] drm/amdgpu: Refactor code to handle non coherent and uncached

2022-07-20 Thread Bhardwaj, Rajneesh



On 7/20/2022 7:18 PM, Felix Kuehling wrote:


On 2022-07-18 18:52, Rajneesh Bhardwaj wrote:

This simplifies existing coherence handling for Arcturus and Aldabaran
to account for !coherent && uncached scenarios.

Cc: Joseph Greathouse 
Cc: Alex Deucher 
Signed-off-by: Rajneesh Bhardwaj 
---
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 53 +--
  1 file changed, 26 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

index d1657de5f875..0fdfd79f69ad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -471,45 +471,44 @@ static uint64_t get_pte_flags(struct 
amdgpu_device *adev, struct kgd_mem *mem)

    switch (adev->asic_type) {
  case CHIP_ARCTURUS:
-    if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
-    if (bo_adev == adev)
-    mapping_flags |= coherent ?
-    AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
-    else
-    mapping_flags |= coherent ?
-    AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
-    } else {
-    mapping_flags |= coherent ?
-    AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
-    }
-    break;
  case CHIP_ALDEBARAN:
-    if (coherent && uncached) {
-    if (adev->gmc.xgmi.connected_to_cpu ||
-    !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM))
-    snoop = true;
-    mapping_flags |= AMDGPU_VM_MTYPE_UC;
-    } else if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+    if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
  if (bo_adev == adev) {
-    mapping_flags |= coherent ?
-    AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
-    if (adev->gmc.xgmi.connected_to_cpu)
+    if (uncached)
+    mapping_flags |= AMDGPU_VM_MTYPE_UC;
+    else if (coherent)
+    mapping_flags |= AMDGPU_VM_MTYPE_CC;
+    else
+    mapping_flags |= AMDGPU_VM_MTYPE_RW;
+    if (adev->asic_type == CHIP_ALDEBARAN &&
+    adev->gmc.xgmi.connected_to_cpu)
  snoop = true;
  } else {
-    mapping_flags |= coherent ?
-    AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+    if (uncached || coherent)
+    mapping_flags |= AMDGPU_VM_MTYPE_UC;
+    else
+    mapping_flags |= AMDGPU_VM_MTYPE_NC;
  if (amdgpu_xgmi_same_hive(adev, bo_adev))
  snoop = true;
  }
  } else {
+    if (uncached || coherent)
+    mapping_flags |= AMDGPU_VM_MTYPE_UC;
+    else
+    mapping_flags |= AMDGPU_VM_MTYPE_NC;
  snoop = true;
-    mapping_flags |= coherent ?
-    AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
  }
  break;
  default:
-    mapping_flags |= coherent ?
-    AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+    if (uncached || coherent)
+    mapping_flags |= AMDGPU_VM_MTYPE_UC;
+    else
+    mapping_flags |= AMDGPU_VM_MTYPE_NC;
+
+    if (!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM))
+    snoop = true;
+
+


With the two extra blank lines removed, this patch is

Reviewed-by: Felix Kuehling 

Please check whether a similar cleanup can be made in 
svm_range_get_pte_flags, or maybe even, whether common code can be 
factored out of those two functions.



Thanks Felix for the review. Do you want me to send V2 with two lines 
removed or just apply to amd-staging-drm-next after deleting those two 
lines?



I will check svm_range_get_pte_flags and see if I can cleanup the code 
there and get back to you.





Regards,
  Felix



  }
    pte_flags = amdgpu_gem_va_map_flags(adev, mapping_flags);


RE: [PATCH v2] drm/amdkfd: CRIU export dmabuf handles for GTT BOs

2022-03-09 Thread Bhardwaj, Rajneesh
[AMD Official Use Only]

Please ignore the previous email, that was sent in error. This one is with the 
minor version bump so this looks good.

Reviewed-by : Rajneesh Bhardwaj 

-Original Message-
From: amd-gfx  On Behalf Of David Yat Sin
Sent: Tuesday, March 8, 2022 4:08 PM
To: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org
Cc: Kuehling, Felix ; Yat Sin, David 

Subject: [PATCH v2] drm/amdkfd: CRIU export dmabuf handles for GTT BOs

Export dmabuf handles for GTT BOs so that their contents can be accessed using 
SDMA during checkpoint/restore.

Signed-off-by: David Yat Sin 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 12 
 include/uapi/linux/kfd_ioctl.h   |  3 ++-
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 2c7d76e67ddb..e1e2362841f8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1759,7 +1759,8 @@ static int criu_checkpoint_bos(struct kfd_process *p,
goto exit;
}
}
-   if (bo_bucket->alloc_flags & 
KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+   if (bo_bucket->alloc_flags
+   & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | 
KFD_IOC_ALLOC_MEM_FLAGS_GTT)) 
+{
ret = 
criu_get_prime_handle(&dumper_bo->tbo.base,
bo_bucket->alloc_flags &

KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0, @@ -1812,7 +1813,8 @@ static 
int criu_checkpoint_bos(struct kfd_process *p,
 
 exit:
while (ret && bo_index--) {
-   if (bo_buckets[bo_index].alloc_flags & 
KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
+   if (bo_buckets[bo_index].alloc_flags
+   & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | 
KFD_IOC_ALLOC_MEM_FLAGS_GTT))
close_fd(bo_buckets[bo_index].dmabuf_fd);
}
 
@@ -2211,7 +2213,8 @@ static int criu_restore_bo(struct kfd_process *p,
 
pr_debug("map memory was successful for the BO\n");
/* create the dmabuf object and export the bo */
-   if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+   if (bo_bucket->alloc_flags
+   & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
ret = criu_get_prime_handle(&kgd_mem->bo->tbo.base, DRM_RDWR,
&bo_bucket->dmabuf_fd);
if (ret)
@@ -2281,7 +2284,8 @@ static int criu_restore_bos(struct kfd_process *p,
 
 exit:
while (ret && i--) {
-   if (bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
+   if (bo_buckets[i].alloc_flags
+  & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | 
KFD_IOC_ALLOC_MEM_FLAGS_GTT))
close_fd(bo_buckets[i].dmabuf_fd);
}
kvfree(bo_buckets);
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h 
index b40687bf1014..eb9ff85f8556 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -33,9 +33,10 @@
  * - 1.5 - Add SVM API
  * - 1.6 - Query clear flags in SVM get_attr API
  * - 1.7 - Checkpoint Restore (CRIU) API
+ * - 1.8 - CRIU - Support for SDMA transfers with GTT BOs
  */
 #define KFD_IOCTL_MAJOR_VERSION 1
-#define KFD_IOCTL_MINOR_VERSION 7
+#define KFD_IOCTL_MINOR_VERSION 8
 
 struct kfd_ioctl_get_version_args {
__u32 major_version;/* from KFD */
--
2.17.1


RE: [PATCH] drm/amdkfd: CRIU export dmabuf handles for GTT BOs

2022-03-09 Thread Bhardwaj, Rajneesh
[AMD Official Use Only]

Reviewed-by: Rajneesh Bhardwaj 

-Original Message-
From: amd-gfx  On Behalf Of David Yat Sin
Sent: Tuesday, March 8, 2022 2:12 PM
To: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org
Cc: Kuehling, Felix ; Yat Sin, David 

Subject: [PATCH] drm/amdkfd: CRIU export dmabuf handles for GTT BOs

Export dmabuf handles for GTT BOs so that their contents can be accessed using 
SDMA during checkpoint/restore.

Signed-off-by: David Yat Sin 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 2c7d76e67ddb..e1e2362841f8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1759,7 +1759,8 @@ static int criu_checkpoint_bos(struct kfd_process *p,
goto exit;
}
}
-   if (bo_bucket->alloc_flags & 
KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+   if (bo_bucket->alloc_flags
+   & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | 
KFD_IOC_ALLOC_MEM_FLAGS_GTT)) 
+{
ret = 
criu_get_prime_handle(&dumper_bo->tbo.base,
bo_bucket->alloc_flags &

KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0, @@ -1812,7 +1813,8 @@ static 
int criu_checkpoint_bos(struct kfd_process *p,
 
 exit:
while (ret && bo_index--) {
-   if (bo_buckets[bo_index].alloc_flags & 
KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
+   if (bo_buckets[bo_index].alloc_flags
+   & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | 
KFD_IOC_ALLOC_MEM_FLAGS_GTT))
close_fd(bo_buckets[bo_index].dmabuf_fd);
}
 
@@ -2211,7 +2213,8 @@ static int criu_restore_bo(struct kfd_process *p,
 
pr_debug("map memory was successful for the BO\n");
/* create the dmabuf object and export the bo */
-   if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+   if (bo_bucket->alloc_flags
+   & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
ret = criu_get_prime_handle(&kgd_mem->bo->tbo.base, DRM_RDWR,
&bo_bucket->dmabuf_fd);
if (ret)
@@ -2281,7 +2284,8 @@ static int criu_restore_bos(struct kfd_process *p,
 
 exit:
while (ret && i--) {
-   if (bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
+   if (bo_buckets[i].alloc_flags
+  & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | 
KFD_IOC_ALLOC_MEM_FLAGS_GTT))
close_fd(bo_buckets[i].dmabuf_fd);
}
kvfree(bo_buckets);
--
2.17.1


Re: [PATCH 1/1] drm/amdkfd: Fix criu_restore_bo error handling

2022-02-18 Thread Bhardwaj, Rajneesh
[AMD Official Use Only]

Reviewed-by: Rajneesh Bhardwaj 

Regards,
Rajneesh

From: Kuehling, Felix 
Sent: Friday, February 18, 2022 5:32:18 PM
To: amd-gfx@lists.freedesktop.org 
Cc: Bhardwaj, Rajneesh ; Tom Rix 
Subject: [PATCH 1/1] drm/amdkfd: Fix criu_restore_bo error handling

Clang static analysis reports this problem
kfd_chardev.c:2327:2: warning: 1st function call argument
  is an uninitialized value
  kvfree(bo_privs);
  ^~~~

Make sure bo_buckets and bo_privs are initialized so freeing them in the
error handling code path will never result in undefined behaviour.

Fixes: 73fa13b6a511 ("drm/amdkfd: CRIU Implement KFD restore ioctl")
Reported-by: Tom Rix 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index fead2ed46dc6..ceeb0d5e9060 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -2092,8 +2092,8 @@ static int criu_restore_bos(struct kfd_process *p,
 uint64_t *priv_offset,
 uint64_t max_priv_data_size)
 {
-   struct kfd_criu_bo_bucket *bo_buckets;
-   struct kfd_criu_bo_priv_data *bo_privs;
+   struct kfd_criu_bo_bucket *bo_buckets = NULL;
+   struct kfd_criu_bo_priv_data *bo_privs = NULL;
 const bool criu_resume = true;
 bool flush_tlbs = false;
 int ret = 0, j = 0;
--
2.32.0



Re: [PATCH] drm/amdgpu: move lockdep assert to the right place.

2022-02-04 Thread Bhardwaj, Rajneesh



On 2/4/2022 1:50 PM, Christian König wrote:

Am 04.02.22 um 19:47 schrieb Bhardwaj, Rajneesh:


On 2/4/2022 1:32 PM, Christian König wrote:

Am 04.02.22 um 19:12 schrieb Bhardwaj, Rajneesh:

[Sorry for top posting]

Hi Christian

I think you forgot the below hunk, without which the issue is not 
fixed completely on a multi GPU system.


No, that is perfectly intentional. While removing a bo_va structure 
it can happen that there are still mappings attached to it (for 
example because the application crashed).



OK. but for me, at boot time, I see flood of warning messages coming 
from  amdgpu_vm_bo_del so the previous patch doesn't help.


Do you have a backtrace? That should not happen.

Could be that we have this buggy at a couple of different places.



This is on Ubuntu 18.04.


[    8.392405] WARNING: CPU: 13 PID: 2732 at 
/home/rajneesh/git/compute/kernel/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:2673 
amdgpu_vm_bo_del+0x386/0x3b

0 [amdgpu]
[    8.392586] Modules linked in: amdgpu ast iommu_v2 gpu_sched 
drm_vram_helper drm_ttm_helper ttm drm_kms_helper cfbfillrect 
syscopyarea cfbimgblt sy
sfillrect sysimgblt fb_sys_fops cfbcopyarea drm 
drm_panel_orientation_quirks k10temp nf_tables nfnetlink ip_tables 
x_tables i2c_piix4
[    8.392628] CPU: 13 PID: 2732 Comm: plymouthd Not tainted 
5.13.0-kfd-rajneesh #1055
[    8.392632] Hardware name: GIGABYTE MZ01-CE0-00/MZ01-CE0-00, BIOS F02 
08/29/2018

[    8.392635] RIP: 0010:amdgpu_vm_bo_del+0x386/0x3b0 [amdgpu]
[    8.392749] Code: 0f 85 56 fe ff ff 48 c7 c2 28 6b b3 c0 be 21 01 00 
00 48 c7 c7 58 6b b3 c0 c6 05 64 64 62 00 01 e8 5f be a4 d4 e9 32 fe ff 
ff <0f

> 0b eb 8a 49 8b be 88 01 00 00 e9 af fc ff ff be 03 00 00 00 e8
[    8.392752] RSP: 0018:af33471d7d98 EFLAGS: 00010246
[    8.392756] RAX:  RBX: a0877160 RCX: 
0001
[    8.392758] RDX: 0001 RSI: a08772ae01f8 RDI: 
a0800a426f68
[    8.392761] RBP: a087691fd980 R08: c0a4e2e0 R09: 
000a
[    8.392763] R10: af33471d7d68 R11: 0001 R12: 
a0801d540010
[    8.392765] R13: a08771615c00 R14: a08024166618 R15: 
a08771615e60
[    8.392768] FS:  7f7b80179740() GS:a08f3dec() 
knlGS:

[    8.392771] CS:  0010 DS:  ES:  CR0: 80050033
[    8.392773] CR2: 55839db51eb8 CR3: 00010f49c000 CR4: 
003506e0

[    8.392775] Call Trace:
[    8.392779]  ? _raw_spin_unlock_irqrestore+0x30/0x40
[    8.392787]  amdgpu_driver_postclose_kms+0x94/0x270 [amdgpu]
[    8.392897]  drm_file_free.part.14+0x1e3/0x230 [drm]
[    8.392918]  drm_release+0x6e/0xf0 [drm]
[    8.392937]  __fput+0xa3/0x250
[    8.392942]  task_work_run+0x6d/0xb0
[    8.392949]  exit_to_user_mode_prepare+0x1c9/0x1d0
[    8.392953]  syscall_exit_to_user_mode+0x19/0x50
[    8.392957]  do_syscall_64+0x42/0x70
[    8.392960]  entry_SYSCALL_64_after_hwframe+0x44/0xae
[    8.392964] RIP: 0033:0x7f7b7f8679e4
[    8.392967] Code: eb 89 e8 6f 44 02 00 66 2e 0f 1f 84 00 00 00 00 00 
0f 1f 44 00 00 48 8d 05 21 ff 2d 00 8b 00 85 c0 75 13 b8 03 00 00 00 0f 
05 <48> 3d 00 f0 ff ff 77 3c f3 c3 66 90 53 89 fb 48 83 ec 10 e8 94 fe
[    8.392970] RSP: 002b:7ffe6bb0cdb8 EFLAGS: 0246 ORIG_RAX: 
0003
[    8.392973] RAX:  RBX: 55839db4b760 RCX: 
7f7b7f8679e4
[    8.392974] RDX: 55839db4aed0 RSI:  RDI: 
000b
[    8.392976] RBP: 000b R08: 55839db4aee0 R09: 
7f7b7fb42c40
[    8.392978] R10: 0007 R11: 0246 R12: 
e280
[    8.392979] R13: 000d R14: 7f7b7fb5b1e0 R15: 
7f7b7fb5b130

[    8.392988] irq event stamp: 1264799
[    8.392990] hardirqs last  enabled at (1264805): [] 
console_unlock+0x339/0x530
[    8.392994] hardirqs last disabled at (1264810): [] 
console_unlock+0x3a6/0x530





Regards,
Christian.






Because of this locking the VM before the remove is mandatory. Only 
while adding a bo_va structure we can avoid that.


Regards,
Christian.



diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index dcc80d6e099e..6f68fc9da56a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2670,8 +2670,6 @@ void amdgpu_vm_bo_del(struct amdgpu_device 
*adev,

    struct amdgpu_vm *vm = bo_va->base.vm;
    struct amdgpu_vm_bo_base **base;

- dma_resv_assert_held(vm->root.bo->tbo.base.resv);
-
    if (bo) {
    dma_resv_assert_held(bo->tbo.base.resv);
    if (bo->tbo.base.resv == vm->root.bo->tbo.base.resv)


If you chose to include the above hunk, please feel free to add

Reviewed-and-tested-by: Rajneesh Bhardwaj 

On 2/4/2022 11:27 AM, Felix Kuehling wrote:


Am 2022-02-04 um 03:52 schrieb Christian König:

Since newly added BOs don't have any mappings it's ok to add them
without hold

Re: [PATCH] drm/amdgpu: move lockdep assert to the right place.

2022-02-04 Thread Bhardwaj, Rajneesh



On 2/4/2022 1:32 PM, Christian König wrote:

Am 04.02.22 um 19:12 schrieb Bhardwaj, Rajneesh:

[Sorry for top posting]

Hi Christian

I think you forgot the below hunk, without which the issue is not 
fixed completely on a multi GPU system.


No, that is perfectly intentional. While removing a bo_va structure it 
can happen that there are still mappings attached to it (for example 
because the application crashed).



OK. but for me, at boot time, I see flood of warning messages coming 
from  amdgpu_vm_bo_del so the previous patch doesn't help.





Because of this locking the VM before the remove is mandatory. Only 
while adding a bo_va structure we can avoid that.


Regards,
Christian.



diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index dcc80d6e099e..6f68fc9da56a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2670,8 +2670,6 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev,
    struct amdgpu_vm *vm = bo_va->base.vm;
    struct amdgpu_vm_bo_base **base;

-   dma_resv_assert_held(vm->root.bo->tbo.base.resv);
-
    if (bo) {
    dma_resv_assert_held(bo->tbo.base.resv);
    if (bo->tbo.base.resv == vm->root.bo->tbo.base.resv)


If you chose to include the above hunk, please feel free to add

Reviewed-and-tested-by: Rajneesh Bhardwaj 

On 2/4/2022 11:27 AM, Felix Kuehling wrote:


Am 2022-02-04 um 03:52 schrieb Christian König:

Since newly added BOs don't have any mappings it's ok to add them
without holding the VM lock. Only when we add per VM BOs the lock is
mandatory.

Signed-off-by: Christian König 
Reported-by: Bhardwaj, Rajneesh 


Reviewed-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index fdc6a1fd74af..dcc80d6e099e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -375,6 +375,8 @@ static void amdgpu_vm_bo_base_init(struct 
amdgpu_vm_bo_base *base,

  if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv)
  return;
  + dma_resv_assert_held(vm->root.bo->tbo.base.resv);
+
  vm->bulk_moveable = false;
  if (bo->tbo.type == ttm_bo_type_kernel && bo->parent)
  amdgpu_vm_bo_relocated(base);
@@ -2260,8 +2262,6 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct 
amdgpu_device *adev,

  {
  struct amdgpu_bo_va *bo_va;
  - dma_resv_assert_held(vm->root.bo->tbo.base.resv);
-
  bo_va = kzalloc(sizeof(struct amdgpu_bo_va), GFP_KERNEL);
  if (bo_va == NULL) {
  return NULL;




Re: [PATCH] drm/amdgpu: move lockdep assert to the right place.

2022-02-04 Thread Bhardwaj, Rajneesh

[Sorry for top posting]

Hi Christian

I think you forgot the below hunk, without which the issue is not fixed 
completely on a multi GPU system.


diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index dcc80d6e099e..6f68fc9da56a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2670,8 +2670,6 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev,
    struct amdgpu_vm *vm = bo_va->base.vm;
    struct amdgpu_vm_bo_base **base;

-   dma_resv_assert_held(vm->root.bo->tbo.base.resv);
-
    if (bo) {
    dma_resv_assert_held(bo->tbo.base.resv);
    if (bo->tbo.base.resv == vm->root.bo->tbo.base.resv)


If you chose to include the above hunk, please feel free to add

Reviewed-and-tested-by: Rajneesh Bhardwaj 

On 2/4/2022 11:27 AM, Felix Kuehling wrote:


Am 2022-02-04 um 03:52 schrieb Christian König:

Since newly added BOs don't have any mappings it's ok to add them
without holding the VM lock. Only when we add per VM BOs the lock is
mandatory.

Signed-off-by: Christian König 
Reported-by: Bhardwaj, Rajneesh 


Reviewed-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index fdc6a1fd74af..dcc80d6e099e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -375,6 +375,8 @@ static void amdgpu_vm_bo_base_init(struct 
amdgpu_vm_bo_base *base,

  if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv)
  return;
  +    dma_resv_assert_held(vm->root.bo->tbo.base.resv);
+
  vm->bulk_moveable = false;
  if (bo->tbo.type == ttm_bo_type_kernel && bo->parent)
  amdgpu_vm_bo_relocated(base);
@@ -2260,8 +2262,6 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct 
amdgpu_device *adev,

  {
  struct amdgpu_bo_va *bo_va;
  -    dma_resv_assert_held(vm->root.bo->tbo.base.resv);
-
  bo_va = kzalloc(sizeof(struct amdgpu_bo_va), GFP_KERNEL);
  if (bo_va == NULL) {
  return NULL;


RE: [Patch v5 00/24] CHECKPOINT RESTORE WITH ROCm

2022-02-03 Thread Bhardwaj, Rajneesh
[AMD Official Use Only]

Thank you Felix for the review and your guidance.

-Original Message-
From: Kuehling, Felix  
Sent: Thursday, February 3, 2022 10:22 PM
To: Bhardwaj, Rajneesh ; 
amd-gfx@lists.freedesktop.org
Cc: Yat Sin, David ; Deucher, Alexander 
; dri-de...@lists.freedesktop.org
Subject: Re: [Patch v5 00/24] CHECKPOINT RESTORE WITH ROCm

The series is

Reviewed-by: Felix Kuehling 


Am 2022-02-03 um 04:08 schrieb Rajneesh Bhardwaj:
> V5: Proposed IOCTL APIs for CRIU with consolidated feedback
>
> CRIU is a user space tool which is very popular for container live 
> migration in datacentres. It can checkpoint a running application, 
> save its complete state, memory contents and all system resources to 
> images on disk which can be migrated to another m achine and restored later.
> More information on CRIU can be found at https://criu.org/Main_Page
>
> CRIU currently does not support Checkpoint / Restore with applications 
> that have devices files open so it cannot perform checkpoint and 
> restore on GPU devices which are very complex and have their own VRAM 
> managed privately. CRIU, however can support external devices by using 
> a plugin architecture. We feel that we are getting close to finalizing 
> our IOCTL APIs which were again changed since V3 for an improved modular 
> design.
>
> Our changes to CRIU user space  are can be obtained from here:
> https://github.com/RadeonOpenCompute/criu/tree/amdgpu_rfc-211222
>
> We have tested the following scenarios:
>   - Checkpoint / Restore of a Pytorch (BERT) workload
>   - kfdtests with queues and events
>   - Gfx9 and Gfx10 based multi GPU test systems
>   - On baremetal and inside a docker container
>   - Restoring on a different system
>
> V1: Initial
> V2: Addressed review comments
> V3: Rebased on latest amd-staging-drm-next (5.15 based)
> v4: New API design and basic support for SVM, however there is an 
> outstanding issue with SVM restore which is currently under debug and 
> hopefully that won't impact the ioctl APIs as SVMs are treated as 
> private data hidden from user space like queues and events with the 
> new approch.
> V5: Fix the SVM related issues and finalize the APIs.
>
> David Yat Sin (9):
>drm/amdkfd: CRIU Implement KFD unpause operation
>drm/amdkfd: CRIU add queues support
>drm/amdkfd: CRIU restore queue ids
>drm/amdkfd: CRIU restore sdma id for queues
>drm/amdkfd: CRIU restore queue doorbell id
>drm/amdkfd: CRIU checkpoint and restore queue mqds
>drm/amdkfd: CRIU checkpoint and restore queue control stack
>drm/amdkfd: CRIU checkpoint and restore events
>drm/amdkfd: CRIU implement gpu_id remapping
>
> Rajneesh Bhardwaj (15):
>x86/configs: CRIU update debug rock defconfig
>drm/amdkfd: CRIU Introduce Checkpoint-Restore APIs
>drm/amdkfd: CRIU Implement KFD process_info ioctl
>drm/amdkfd: CRIU Implement KFD checkpoint ioctl
>drm/amdkfd: CRIU Implement KFD restore ioctl
>drm/amdkfd: CRIU Implement KFD resume ioctl
>drm/amdkfd: CRIU export BOs as prime dmabuf objects
>drm/amdkfd: CRIU checkpoint and restore xnack mode
>drm/amdkfd: CRIU allow external mm for svm ranges
>drm/amdkfd: use user_gpu_id for svm ranges
>drm/amdkfd: CRIU Discover svm ranges
>drm/amdkfd: CRIU Save Shared Virtual Memory ranges
>drm/amdkfd: CRIU prepare for svm resume
>drm/amdkfd: CRIU resume shared virtual memory ranges
>drm/amdkfd: Bump up KFD API version for CRIU
>
>   arch/x86/configs/rock-dbg_defconfig   |   53 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|7 +-
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   64 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |   20 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |2 +
>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 1471 ++---
>   drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c   |2 +-
>   .../drm/amd/amdkfd/kfd_device_queue_manager.c |  185 ++-
>   .../drm/amd/amdkfd/kfd_device_queue_manager.h |   16 +-
>   drivers/gpu/drm/amd/amdkfd/kfd_events.c   |  313 +++-
>   drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h  |   14 +
>   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c  |   75 +
>   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  |   77 +
>   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   |   92 ++
>   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c   |   84 +
>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  160 +-
>   drivers/gpu/drm/amd/amdkfd/kfd_process.c  |   72 +-
>   .../amd/amdkfd/kfd_process_queue_manager.c|  372 -
>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c  |  331 +++-
>   drivers/gpu/drm/amd/amdkfd/kfd_svm.h  |   39 +
>   include/uapi/linux/kfd_ioctl.h|   84 +-
>   21 files changed, 3193 insertions(+), 340 deletions(-)
>


RE: [PATCH 1/1] Add available memory ioctl for libhsakmt

2022-01-17 Thread Bhardwaj, Rajneesh
[Public]



From: amd-gfx  On Behalf Of Deucher, 
Alexander
Sent: Monday, January 10, 2022 4:11 PM
To: Phillips, Daniel ; amd-gfx@lists.freedesktop.org; 
dri-de...@lists.freedesktop.org
Subject: Re: [PATCH 1/1] Add available memory ioctl for libhsakmt


[Public]


[Public]

This is missing your signed-off-by.  Additionally, for UAPI changes, we need a 
link the patches for the userspace component that will make use of it.

Alex


From: amd-gfx 
mailto:amd-gfx-boun...@lists.freedesktop.org>>
 on behalf of Daniel Phillips 
mailto:daniel.phill...@amd.com>>
Sent: Monday, January 10, 2022 3:54 PM
To: amd-gfx@lists.freedesktop.org 
mailto:amd-gfx@lists.freedesktop.org>>; 
dri-de...@lists.freedesktop.org 
mailto:dri-de...@lists.freedesktop.org>>
Cc: Phillips, Daniel mailto:daniel.phill...@amd.com>>
Subject: [PATCH 1/1] Add available memory ioctl for libhsakmt

From: Daniel Phillips mailto:dphil...@amd.com>>

Add an ioctl to inquire memory available for allocation by libhsakmt
per node, allowing for space consumed by page translation tables.
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h  |  1 +
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c| 14 ++
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c| 17 +
 include/uapi/linux/kfd_ioctl.h  | 14 --
 4 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index fcbc8a9c9e06..64c6c36685d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -266,6 +266,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct 
amdgpu_device *adev,
 void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
 void *drm_priv);
 uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
+size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev);
 int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 struct amdgpu_device *adev, uint64_t va, uint64_t size,
 void *drm_priv, struct kgd_mem **mem,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 86a1a6c109d9..b7490a659173 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -190,6 +190,20 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct 
amdgpu_device *adev,
 return ret;
 }

+size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev)
+{
+   uint64_t reserved_for_pt =
+   ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
+   size_t available_memory;
+
+   spin_lock(&kfd_mem_limit.mem_limit_lock);
+   available_memory =
+   adev->gmc.real_vram_size -
+   adev->kfd.vram_used - reserved_for_pt;
+   spin_unlock(&kfd_mem_limit.mem_limit_lock);
+   return available_memory;
+}
+
 static void unreserve_mem_limit(struct amdgpu_device *adev,
 uint64_t size, u32 alloc_flag)
 {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 4bfc0c8ab764..5c2f6d97ff1c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -486,6 +486,20 @@ static int kfd_ioctl_get_queue_wave_state(struct file 
*filep,
 return r;
 }

+static int kfd_ioctl_get_available_memory(struct file *filep,
+struct kfd_process *p, void *data)
+{
+   struct kfd_ioctl_get_available_memory_args *args = data;
+   struct kfd_dev *dev;
+
+   dev = kfd_device_by_id(args->gpu_id);
Once CRIU changes land, this need to change to kfd_process_device_data_by_id() 
and then use pdd->dev


+   if (!dev)
+   return -EINVAL;
+
+   args->available = amdgpu_amdkfd_get_available_memory(dev->adev);
+   return 0;
+}
+
 static int kfd_ioctl_set_memory_policy(struct file *filep,
 struct kfd_process *p, void *data)
 {
@@ -1959,6 +1973,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {

 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE,
 kfd_ioctl_set_xnack_mode, 0),
+
+   AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY,
+   kfd_ioctl_get_available_memory, 0),
 };

 #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index af96af174dc4..94a99add2432 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -32,9 +32,10 @@
  * - 1.4 - Indicate new SRAM EDC bit in device properties
  * - 1.5 - Add SVM API
  * - 1.6 - Query clear flags in SVM get_attr API
+ * - 1.7 - Add available_memory ioctl
  */
 #define KF

Re: [PATCH] drm/ttm: Don't inherit GEM object VMAs in child process

2022-01-10 Thread Bhardwaj, Rajneesh

Hi Christian


I have reverted the change from the amd-staging-drm-next as per the 
discussion.  Thank you.



Regards

Rajneesh

On 1/4/2022 1:08 PM, Felix Kuehling wrote:

[+Adrian]

Am 2021-12-23 um 2:05 a.m. schrieb Christian König:


Am 22.12.21 um 21:53 schrieb Daniel Vetter:

On Mon, Dec 20, 2021 at 01:12:51PM -0500, Bhardwaj, Rajneesh wrote:

[SNIP]
Still sounds funky. I think minimally we should have an ack from CRIU
developers that this is officially the right way to solve this
problem. I
really don't want to have random one-off hacks that don't work across
the
board, for a problem where we (drm subsystem) really shouldn't be the
only
one with this problem. Where "this problem" means that the mmap space is
per file description, and not per underlying inode or real device or
whatever. That part sounds like a CRIU problem, and I expect CRIU folks
want a consistent solution across the board for this. Hence please
grab an
ack from them.

Unfortunately it's a KFD design problem. AMD used a single device
node, then mmaped different objects from the same offset to different
processes and expected it to work the rest of the fs subsystem without
churn.

This may be true for mmaps in the KFD device, but not for mmaps in the
DRM render nodes.



So yes, this is indeed because the mmap space is per file descriptor
for the use case here.

No. This is a different problem.

The problem has to do with the way that DRM manages mmap permissions. In
order to be able to mmap an offset in the render node, there needs to be
a BO that was created in the same render node. If you fork a process, it
inherits the VMA. But KFD doesn't know anything about the inherited BOs
from the parent process. Therefore those BOs don't get checkpointed and
restored in the child process. When the CRIU checkpoint is restored, our
CRIU plugin never creates a BO corresponding to the VMA in the child
process' render node FD. We've also lost the relationship between the
parent and child-process' render node FDs. After "fork" the render node
FD points to the same struct file in parent and child. After restoring
the CRIU checkpoint, they are separate struct files, created by separate
"open" system calls. Therefore the mmap call that restores the VMA fails
in the child process.

At least for KFD, there is no point inheriting BOs from a child process,
because the GPU has no way of accessing the BOs in the child process.
The child process has no GPU address space, no user mode queues, no way
to do anything with the GPU before it completely reinitializes its KFD
context.

We can workaround this issue in user mode with madvise(...,
MADV_DONTFORK). In fact we've already done this for some BOs to avoid a
memory leak in the parent process while a child process exists. But it's
slightly racy because there is a short time window where VMA exists
without the VM_DONTCOPY flag. A fork during that time window could still
create a child process with an inherited VMA.

Therefore a safer solution is to set the vm_flags in the VMA in the
driver when the VMA is first created.

Regards,
   Felix



And thanks for pointing this out, this indeed makes the whole change
extremely questionable.

Regards,
Christian.


Cheers, Daniel



Re: [PATCH] drm/ttm: Don't inherit GEM object VMAs in child process

2021-12-22 Thread Bhardwaj, Rajneesh

Sorry for the typo in my previous email. Please read Adrian Reber*

On 12/22/2021 8:49 PM, Bhardwaj, Rajneesh wrote:


Adding Adrian Rebel who is the CRIU maintainer and CRIU list

On 12/22/2021 3:53 PM, Daniel Vetter wrote:

On Mon, Dec 20, 2021 at 01:12:51PM -0500, Bhardwaj, Rajneesh wrote:

On 12/20/2021 4:29 AM, Daniel Vetter wrote:

On Fri, Dec 10, 2021 at 07:58:50AM +0100, Christian König wrote:

Am 09.12.21 um 19:28 schrieb Felix Kuehling:

Am 2021-12-09 um 10:30 a.m. schrieb Christian König:

That still won't work.

But I think we could do this change for the amdgpu mmap callback only.

If graphics user mode has problems with it, we could even make this
specific to KFD BOs in the amdgpu_gem_object_mmap callback.

I think it's fine for the whole amdgpu stack, my concern is more about
radeon, nouveau and the ARM stacks which are using this as well.

That blew up so nicely the last time we tried to change it and I know of at
least one case where radeon was/is used with BOs in a child process.

I'm way late and burried again, but I think it'd be good to be consistent



I had committed this change into our amd-staging-drm-next branch last 
week after I got the ACK and RB from Felix and Christian.




here across drivers. Or at least across drm drivers. And we've had the vma
open/close refcounting to make fork work since forever.

I think if we do this we should really only do this for mmap() where this
applies, but reading through the thread here I'm honestly confused why
this is a problem. If CRIU can't handle forked mmaps it needs to be
thought that, not hacked around. Or at least I'm not understanding why
this shouldn't work ...
-Daniel


Hi Daniel

In the v2
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.kernel.org%2Fall%2Fa1a865f5-ad2c-29c8-cbe4-2635d53eceb6%40amd.com%2FT%2F&data=04%7C01%7Crajneesh.bhardwaj%40amd.com%7Ce4634a16c37149da173408d9c58d1338%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637758031981907821%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&sdata=h0z4sO19bsJecMqeHGdz%2BHZElKuyzK%2BW%2FMbLWA79I10%3D&reserved=0
I pretty much limited the scope of the change to KFD BOs on mmap. Regarding
CRIU, I think its not a CRIU problem as CRIU on restore, only tries to
recreate all the child processes and then mmaps all the VMAs it sees (as per
checkpoint snapshot) in the new process address space after the VMA
placements are finalized in the position independent code phase. Since the
inherited VMAs don't have access rights the criu mmap fails.

Still sounds funky. I think minimally we should have an ack from CRIU
developers that this is officially the right way to solve this problem. I
really don't want to have random one-off hacks that don't work across the
board, for a problem where we (drm subsystem) really shouldn't be the only
one with this problem. Where "this problem" means that the mmap space is
per file description, and not per underlying inode or real device or
whatever. That part sounds like a CRIU problem, and I expect CRIU folks
want a consistent solution across the board for this. Hence please grab an
ack from them.

Cheers, Daniel



Maybe Adrian can share his views on this.

Hi Adrian - For the context, on CRIU restore we see mmap failures ( in 
PIE restore phase) due to permission issues on the (render node) VMAs 
that were inherited since the application that check pointed had 
forked.  The VMAs ideally should not be in the child process but the 
smaps file shows these VMAs in the child address space. We didn't want 
to use madvise to avoid this copy and rather change in the kernel mode 
to limit the impact to our user space library thunk. Based on my 
understanding, during PIE restore phase, after the VMA placements are 
finalized, CRIU does a sys_mmap on all the VMA it sees in the VmaEntry 
list and I think its not an issue as per CRIU design but do you think 
we could handle this corner case better inside CRIU?




Regards,

Rajneesh


Regards,
Christian.


Regards,
     Felix



Regards,
Christian.

Am 09.12.21 um 16:29 schrieb Bhardwaj, Rajneesh:

Sounds good. I will send a v2 with only ttm_bo_mmap_obj change. Thank
you!

On 12/9/2021 10:27 AM, Christian König wrote:

Hi Rajneesh,

yes, separating this from the drm_gem_mmap_obj() change is certainly
a good idea.


The child cannot access the BOs mapped by the parent anyway with
access restrictions applied

exactly that is not correct. That behavior is actively used by some
userspace stacks as far as I know.

Regards,
Christian.

Am 09.12.21 um 16:23 schrieb Bhardwaj, Rajneesh:

Thanks Christian. Would it make it less intrusive if I just use the
flag for ttm bo mmap and remove the drm_gem_mmap_obj change from
this patch? For our use case, just the ttm_bo_mmap_obj change
should suffice and we don't want to put any more work arounds in
the user spa

Re: [PATCH] drm/ttm: Don't inherit GEM object VMAs in child process

2021-12-22 Thread Bhardwaj, Rajneesh

Adding Adrian Rebel who is the CRIU maintainer and CRIU list

On 12/22/2021 3:53 PM, Daniel Vetter wrote:

On Mon, Dec 20, 2021 at 01:12:51PM -0500, Bhardwaj, Rajneesh wrote:

On 12/20/2021 4:29 AM, Daniel Vetter wrote:

On Fri, Dec 10, 2021 at 07:58:50AM +0100, Christian König wrote:

Am 09.12.21 um 19:28 schrieb Felix Kuehling:

Am 2021-12-09 um 10:30 a.m. schrieb Christian König:

That still won't work.

But I think we could do this change for the amdgpu mmap callback only.

If graphics user mode has problems with it, we could even make this
specific to KFD BOs in the amdgpu_gem_object_mmap callback.

I think it's fine for the whole amdgpu stack, my concern is more about
radeon, nouveau and the ARM stacks which are using this as well.

That blew up so nicely the last time we tried to change it and I know of at
least one case where radeon was/is used with BOs in a child process.

I'm way late and burried again, but I think it'd be good to be consistent



I had committed this change into our amd-staging-drm-next branch last 
week after I got the ACK and RB from Felix and Christian.




here across drivers. Or at least across drm drivers. And we've had the vma
open/close refcounting to make fork work since forever.

I think if we do this we should really only do this for mmap() where this
applies, but reading through the thread here I'm honestly confused why
this is a problem. If CRIU can't handle forked mmaps it needs to be
thought that, not hacked around. Or at least I'm not understanding why
this shouldn't work ...
-Daniel


Hi Daniel

In the v2
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.kernel.org%2Fall%2Fa1a865f5-ad2c-29c8-cbe4-2635d53eceb6%40amd.com%2FT%2F&data=04%7C01%7Crajneesh.bhardwaj%40amd.com%7Ce4634a16c37149da173408d9c58d1338%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637758031981907821%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&sdata=h0z4sO19bsJecMqeHGdz%2BHZElKuyzK%2BW%2FMbLWA79I10%3D&reserved=0
I pretty much limited the scope of the change to KFD BOs on mmap. Regarding
CRIU, I think its not a CRIU problem as CRIU on restore, only tries to
recreate all the child processes and then mmaps all the VMAs it sees (as per
checkpoint snapshot) in the new process address space after the VMA
placements are finalized in the position independent code phase. Since the
inherited VMAs don't have access rights the criu mmap fails.

Still sounds funky. I think minimally we should have an ack from CRIU
developers that this is officially the right way to solve this problem. I
really don't want to have random one-off hacks that don't work across the
board, for a problem where we (drm subsystem) really shouldn't be the only
one with this problem. Where "this problem" means that the mmap space is
per file description, and not per underlying inode or real device or
whatever. That part sounds like a CRIU problem, and I expect CRIU folks
want a consistent solution across the board for this. Hence please grab an
ack from them.

Cheers, Daniel



Maybe Adrian can share his views on this.

Hi Adrian - For the context, on CRIU restore we see mmap failures ( in 
PIE restore phase) due to permission issues on the (render node) VMAs 
that were inherited since the application that check pointed had 
forked.  The VMAs ideally should not be in the child process but the 
smaps file shows these VMAs in the child address space. We didn't want 
to use madvise to avoid this copy and rather change in the kernel mode 
to limit the impact to our user space library thunk. Based on my 
understanding, during PIE restore phase, after the VMA placements are 
finalized, CRIU does a sys_mmap on all the VMA it sees in the VmaEntry 
list and I think its not an issue as per CRIU design but do you think we 
could handle this corner case better inside CRIU?






Regards,

Rajneesh


Regards,
Christian.


Regards,
     Felix



Regards,
Christian.

Am 09.12.21 um 16:29 schrieb Bhardwaj, Rajneesh:

Sounds good. I will send a v2 with only ttm_bo_mmap_obj change. Thank
you!

On 12/9/2021 10:27 AM, Christian König wrote:

Hi Rajneesh,

yes, separating this from the drm_gem_mmap_obj() change is certainly
a good idea.


The child cannot access the BOs mapped by the parent anyway with
access restrictions applied

exactly that is not correct. That behavior is actively used by some
userspace stacks as far as I know.

Regards,
Christian.

Am 09.12.21 um 16:23 schrieb Bhardwaj, Rajneesh:

Thanks Christian. Would it make it less intrusive if I just use the
flag for ttm bo mmap and remove the drm_gem_mmap_obj change from
this patch? For our use case, just the ttm_bo_mmap_obj change
should suffice and we don't want to put any more work arounds in
the user space (thunk, in our case).

The child cannot access the BOs mapped by the parent anyway with
access restrictions applied so

Re: [PATCH] drm/ttm: Don't inherit GEM object VMAs in child process

2021-12-20 Thread Bhardwaj, Rajneesh



On 12/20/2021 4:29 AM, Daniel Vetter wrote:

On Fri, Dec 10, 2021 at 07:58:50AM +0100, Christian König wrote:

Am 09.12.21 um 19:28 schrieb Felix Kuehling:

Am 2021-12-09 um 10:30 a.m. schrieb Christian König:

That still won't work.

But I think we could do this change for the amdgpu mmap callback only.

If graphics user mode has problems with it, we could even make this
specific to KFD BOs in the amdgpu_gem_object_mmap callback.

I think it's fine for the whole amdgpu stack, my concern is more about
radeon, nouveau and the ARM stacks which are using this as well.

That blew up so nicely the last time we tried to change it and I know of at
least one case where radeon was/is used with BOs in a child process.

I'm way late and burried again, but I think it'd be good to be consistent
here across drivers. Or at least across drm drivers. And we've had the vma
open/close refcounting to make fork work since forever.

I think if we do this we should really only do this for mmap() where this
applies, but reading through the thread here I'm honestly confused why
this is a problem. If CRIU can't handle forked mmaps it needs to be
thought that, not hacked around. Or at least I'm not understanding why
this shouldn't work ...
-Daniel



Hi Daniel

In the v2 
https://lore.kernel.org/all/a1a865f5-ad2c-29c8-cbe4-2635d53ec...@amd.com/T/ 
I pretty much limited the scope of the change to KFD BOs on mmap. 
Regarding CRIU, I think its not a CRIU problem as CRIU on restore, only 
tries to recreate all the child processes and then mmaps all the VMAs it 
sees (as per checkpoint snapshot) in the new process address space after 
the VMA placements are finalized in the position independent code phase. 
Since the inherited VMAs don't have access rights the criu mmap fails.


Regards,

Rajneesh


Regards,
Christian.


Regards,
    Felix



Regards,
Christian.

Am 09.12.21 um 16:29 schrieb Bhardwaj, Rajneesh:

Sounds good. I will send a v2 with only ttm_bo_mmap_obj change. Thank
you!

On 12/9/2021 10:27 AM, Christian König wrote:

Hi Rajneesh,

yes, separating this from the drm_gem_mmap_obj() change is certainly
a good idea.


The child cannot access the BOs mapped by the parent anyway with
access restrictions applied

exactly that is not correct. That behavior is actively used by some
userspace stacks as far as I know.

Regards,
Christian.

Am 09.12.21 um 16:23 schrieb Bhardwaj, Rajneesh:

Thanks Christian. Would it make it less intrusive if I just use the
flag for ttm bo mmap and remove the drm_gem_mmap_obj change from
this patch? For our use case, just the ttm_bo_mmap_obj change
should suffice and we don't want to put any more work arounds in
the user space (thunk, in our case).

The child cannot access the BOs mapped by the parent anyway with
access restrictions applied so I wonder why even inherit the vma?

On 12/9/2021 2:54 AM, Christian König wrote:

Am 08.12.21 um 21:53 schrieb Rajneesh Bhardwaj:

When an application having open file access to a node forks, its
shared
mappings also get reflected in the address space of child process
even
though it cannot access them with the object permissions applied.
With the
existing permission checks on the gem objects, it might be
reasonable to
also create the VMAs with VM_DONTCOPY flag so a user space
application
doesn't need to explicitly call the madvise(addr, len,
MADV_DONTFORK)
system call to prevent the pages in the mapped range to appear in
the
address space of the child process. It also prevents the memory
leaks
due to additional reference counts on the mapped BOs in the child
process that prevented freeing the memory in the parent for which
we had
worked around earlier in the user space inside the thunk library.

Additionally, we faced this issue when using CRIU to checkpoint
restore
an application that had such inherited mappings in the child which
confuse CRIU when it mmaps on restore. Having this flag set for the
render node VMAs helps. VMAs mapped via KFD already take care of
this so
this is needed only for the render nodes.

Unfortunately that is most likely a NAK. We already tried
something similar.

While it is illegal by the OpenGL specification and doesn't work
for most userspace stacks, we do have some implementations which
call fork() with a GL context open and expect it to work.

Regards,
Christian.


Cc: Felix Kuehling 

Signed-off-by: David Yat Sin 
Signed-off-by: Rajneesh Bhardwaj 
---
    drivers/gpu/drm/drm_gem.c   | 3 ++-
    drivers/gpu/drm/ttm/ttm_bo_vm.c | 2 +-
    2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 09c820045859..d9c4149f36dd 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -1058,7 +1058,8 @@ int drm_gem_mmap_obj(struct drm_gem_object
*obj, unsigned long obj_size,
    goto err_drm_gem_object_put;
    }
    -    vma->vm_flags |= VM_IO | VM_P

Re: [PATCH] drm/ttm: Don't inherit GEM object VMAs in child process

2021-12-09 Thread Bhardwaj, Rajneesh

Sounds good. I will send a v2 with only ttm_bo_mmap_obj change. Thank you!

On 12/9/2021 10:27 AM, Christian König wrote:

Hi Rajneesh,

yes, separating this from the drm_gem_mmap_obj() change is certainly a 
good idea.


The child cannot access the BOs mapped by the parent anyway with 
access restrictions applied


exactly that is not correct. That behavior is actively used by some 
userspace stacks as far as I know.


Regards,
Christian.

Am 09.12.21 um 16:23 schrieb Bhardwaj, Rajneesh:
Thanks Christian. Would it make it less intrusive if I just use the 
flag for ttm bo mmap and remove the drm_gem_mmap_obj change from this 
patch? For our use case, just the ttm_bo_mmap_obj change should 
suffice and we don't want to put any more work arounds in the user 
space (thunk, in our case).


The child cannot access the BOs mapped by the parent anyway with 
access restrictions applied so I wonder why even inherit the vma?


On 12/9/2021 2:54 AM, Christian König wrote:

Am 08.12.21 um 21:53 schrieb Rajneesh Bhardwaj:
When an application having open file access to a node forks, its 
shared

mappings also get reflected in the address space of child process even
though it cannot access them with the object permissions applied. 
With the
existing permission checks on the gem objects, it might be 
reasonable to

also create the VMAs with VM_DONTCOPY flag so a user space application
doesn't need to explicitly call the madvise(addr, len, MADV_DONTFORK)
system call to prevent the pages in the mapped range to appear in the
address space of the child process. It also prevents the memory leaks
due to additional reference counts on the mapped BOs in the child
process that prevented freeing the memory in the parent for which 
we had

worked around earlier in the user space inside the thunk library.

Additionally, we faced this issue when using CRIU to checkpoint 
restore

an application that had such inherited mappings in the child which
confuse CRIU when it mmaps on restore. Having this flag set for the
render node VMAs helps. VMAs mapped via KFD already take care of 
this so

this is needed only for the render nodes.


Unfortunately that is most likely a NAK. We already tried something 
similar.


While it is illegal by the OpenGL specification and doesn't work for 
most userspace stacks, we do have some implementations which call 
fork() with a GL context open and expect it to work.


Regards,
Christian.



Cc: Felix Kuehling 

Signed-off-by: David Yat Sin 
Signed-off-by: Rajneesh Bhardwaj 
---
  drivers/gpu/drm/drm_gem.c   | 3 ++-
  drivers/gpu/drm/ttm/ttm_bo_vm.c | 2 +-
  2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 09c820045859..d9c4149f36dd 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -1058,7 +1058,8 @@ int drm_gem_mmap_obj(struct drm_gem_object 
*obj, unsigned long obj_size,

  goto err_drm_gem_object_put;
  }
  -    vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | 
VM_DONTDUMP;

+    vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND
+    | VM_DONTDUMP | VM_DONTCOPY;
  vma->vm_page_prot = 
pgprot_writecombine(vm_get_page_prot(vma->vm_flags));

  vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
  }
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c 
b/drivers/gpu/drm/ttm/ttm_bo_vm.c

index 33680c94127c..420a4898fdd2 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -566,7 +566,7 @@ int ttm_bo_mmap_obj(struct vm_area_struct *vma, 
struct ttm_buffer_object *bo)

    vma->vm_private_data = bo;
  -    vma->vm_flags |= VM_PFNMAP;
+    vma->vm_flags |= VM_PFNMAP | VM_DONTCOPY;
  vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
  return 0;
  }






Re: [PATCH] drm/ttm: Don't inherit GEM object VMAs in child process

2021-12-09 Thread Bhardwaj, Rajneesh
Thanks Christian. Would it make it less intrusive if I just use the flag 
for ttm bo mmap and remove the drm_gem_mmap_obj change from this patch? 
For our use case, just the ttm_bo_mmap_obj change should suffice and we 
don't want to put any more work arounds in the user space (thunk, in our 
case).


The child cannot access the BOs mapped by the parent anyway with access 
restrictions applied so I wonder why even inherit the vma?


On 12/9/2021 2:54 AM, Christian König wrote:

Am 08.12.21 um 21:53 schrieb Rajneesh Bhardwaj:

When an application having open file access to a node forks, its shared
mappings also get reflected in the address space of child process even
though it cannot access them with the object permissions applied. 
With the

existing permission checks on the gem objects, it might be reasonable to
also create the VMAs with VM_DONTCOPY flag so a user space application
doesn't need to explicitly call the madvise(addr, len, MADV_DONTFORK)
system call to prevent the pages in the mapped range to appear in the
address space of the child process. It also prevents the memory leaks
due to additional reference counts on the mapped BOs in the child
process that prevented freeing the memory in the parent for which we had
worked around earlier in the user space inside the thunk library.

Additionally, we faced this issue when using CRIU to checkpoint restore
an application that had such inherited mappings in the child which
confuse CRIU when it mmaps on restore. Having this flag set for the
render node VMAs helps. VMAs mapped via KFD already take care of this so
this is needed only for the render nodes.


Unfortunately that is most likely a NAK. We already tried something 
similar.


While it is illegal by the OpenGL specification and doesn't work for 
most userspace stacks, we do have some implementations which call 
fork() with a GL context open and expect it to work.


Regards,
Christian.



Cc: Felix Kuehling 

Signed-off-by: David Yat Sin 
Signed-off-by: Rajneesh Bhardwaj 
---
  drivers/gpu/drm/drm_gem.c   | 3 ++-
  drivers/gpu/drm/ttm/ttm_bo_vm.c | 2 +-
  2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 09c820045859..d9c4149f36dd 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -1058,7 +1058,8 @@ int drm_gem_mmap_obj(struct drm_gem_object 
*obj, unsigned long obj_size,

  goto err_drm_gem_object_put;
  }
  -    vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | 
VM_DONTDUMP;

+    vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND
+    | VM_DONTDUMP | VM_DONTCOPY;
  vma->vm_page_prot = 
pgprot_writecombine(vm_get_page_prot(vma->vm_flags));

  vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
  }
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c 
b/drivers/gpu/drm/ttm/ttm_bo_vm.c

index 33680c94127c..420a4898fdd2 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -566,7 +566,7 @@ int ttm_bo_mmap_obj(struct vm_area_struct *vma, 
struct ttm_buffer_object *bo)

    vma->vm_private_data = bo;
  -    vma->vm_flags |= VM_PFNMAP;
+    vma->vm_flags |= VM_PFNMAP | VM_DONTCOPY;
  vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
  return 0;
  }




Boot error on Gfx 9 with latest amd-staging-drm-next

2021-04-01 Thread Bhardwaj, Rajneesh

Hi Everyone,

On latest amd-staging-drm-next, the below patch is causing errors at 
boot time and should be reverted.


Error on boot on Vega 10.

[ +0.007084] loop1: detected capacity change from 327992 to 0
[ +0.244709] amdgpu :63:00.0: [drm:amdgpu_ring_test_helper [amdgpu]] 
*ERROR* ring kiq_2.1.0 test failed (-110)

[ +0.000220] [drm:amdgpu_gfx_enable_kcq [amdgpu]] *ERROR* KCQ enable failed
[ +0.000164] [drm:amdgpu_device_init [amdgpu]] *ERROR* hw_init of IP 
block  failed -110

[ +0.000143] amdgpu :63:00.0: amdgpu: amdgpu_device_ip_init failed
[ +0.30] amdgpu :63:00.0: amdgpu: Fatal error during GPU init
[ +0.000237] amdgpu: probe of :63:00.0 failed with error -110


Culprit:

commit 08410e955066bb65c258ed1409f3fcbaa0b83209
Author: Peng Ju Zhou 
Date: Mon Mar 22 15:18:01 2021 +0800


drm/amdgpu: indirect register access for nv12 sriov


1. expand rlcg interface for gc & mmhub indirect access
2. add rlcg interface for no kiq


Signed-off-by: Peng Ju Zhou 
Reviewed-by: Emily.Deng 


Please revert this commit.


Thanks

Rajneesh


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 2/2] drm/amdgpu: fix a few compiler warnings

2021-03-11 Thread Bhardwaj, Rajneesh
[AMD Official Use Only - Internal Distribution Only]

Reviewed-by: Rajneesh Bhardwaj 

-Original Message-
From: amd-gfx  On Behalf Of Oak Zeng
Sent: Wednesday, March 10, 2021 10:29 PM
To: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org
Cc: Zeng, Oak 
Subject: [PATCH 2/2] drm/amdgpu: fix a few compiler warnings

[CAUTION: External Email]

1. make function mmhub_v1_7_setup_vm_pt_regs static 2. indent a if statement

Signed-off-by: Oak Zeng 
Reported-by: kernel test robot 
Reported-by: Dan Carpenter 
---
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c | 4 ++--  
drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c
index 3b4193c..8fca72e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c
@@ -88,14 +88,14 @@ int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev)
adev->gmc.xgmi.num_physical_nodes = max_region + 1;

if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes)
-   return -EINVAL;
+   return -EINVAL;

adev->gmc.xgmi.physical_node_id =
REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL,
  PF_LFB_REGION);

if (adev->gmc.xgmi.physical_node_id > max_physical_node_id)
-   return -EINVAL;
+   return -EINVAL;

adev->gmc.xgmi.node_segment_size = seg_size;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c 
b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
index ac74d66..29d7f50 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
@@ -53,7 +53,7 @@ static u64 mmhub_v1_7_get_fb_location(struct amdgpu_device 
*adev)
return base;
 }

-void mmhub_v1_7_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+static void mmhub_v1_7_setup_vm_pt_regs(struct amdgpu_device *adev, 
+uint32_t vmid,
uint64_t page_table_base)  {
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
--
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=04%7C01%7Crajneesh.bhardwaj%40amd.com%7C8f296a25634a47c40dde08d8e43dde97%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637510301669209178%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=ucX2H8f4HhlXKJ4OBcZZwjfBTBAXYDSrGpPF%2BK1JOLw%3D&reserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 4/7] drm/amdgpu: track what pmops flow we are in

2021-03-10 Thread Bhardwaj, Rajneesh
No, those are global system states. Here we are dealing with device pm 
states.


On 3/10/2021 5:16 AM, Liang, Prike wrote:

[AMD Official Use Only - Internal Distribution Only]

Maybe we can use the acpi_target_system_state() interface to identify the 
system-wide suspend target level Sx and then can parse the return code by the 
following macro definition. If have bandwidth will update and refine the AMDGPU 
Sx[0..5] suspend/resume sequence.

#define ACPI_STATE_S0   (u8) 0
#define ACPI_STATE_S1   (u8) 1
#define ACPI_STATE_S2   (u8) 2
#define ACPI_STATE_S3   (u8) 3
#define ACPI_STATE_S4   (u8) 4
#define ACPI_STATE_S5   (u8) 5

Thanks,
Prike

-Original Message-
From: amd-gfx  On Behalf Of
Bhardwaj, Rajneesh
Sent: Wednesday, March 10, 2021 1:25 AM
To: Alex Deucher ; Lazar, Lijo

Cc: Deucher, Alexander ; amd-
g...@lists.freedesktop.org
Subject: Re: [PATCH 4/7] drm/amdgpu: track what pmops flow we are in

pm_message_t events seem to be the right thing to use here instead of
driver's privately managed power states. Please have a look

https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir
.bootlin.com%2Flinux%2Fv4.7%2Fsource%2Fdrivers%2Fgpu%2Fdrm%2Fi915
%2Fi915_drv.c%23L714&data=04%7C01%7CPrike.Liang%40amd.com%7
C473ede68e7a347ff606b08d8e3204e94%7C3dd8961fe4884e608e11a82d994e
183d%7C0%7C0%7C637509075233985095%7CUnknown%7CTWFpbGZsb3d8e
yJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D
%7C1000&sdata=Y%2BNKrW2LfzB157fZ%2FuLn7QAu%2BmxVgHjzG8LO
CH8z6J4%3D&reserved=0

https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir
.bootlin.com%2Flinux%2Fv4.7%2Fsource%2Fdrivers%2Fgpu%2Fdrm%2Fdrm_
sysfs.c%23L43&data=04%7C01%7CPrike.Liang%40amd.com%7C473ede6
8e7a347ff606b08d8e3204e94%7C3dd8961fe4884e608e11a82d994e183d%7C
0%7C0%7C637509075233985095%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiM
C4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000
&sdata=svJsR97DiTwcbYHn3Y9dDV0YQCVzx5HLiebqQ9mTRY8%3D&am
p;reserved=0

Thanks,

Rajneesh


On 3/9/2021 10:47 AM, Alex Deucher wrote:

[CAUTION: External Email]

On Tue, Mar 9, 2021 at 1:19 AM Lazar, Lijo  wrote:

[AMD Public Use]

This seems a duplicate of dev_pm_info states. Can't we reuse that?

Are you referring to the PM_EVENT_ messages in
dev_pm_info.power_state?  Maybe.  I was not able to find much
documentation on how those should be used.  Do you know?

Alex



Thanks,
Lijo

-Original Message-
From: amd-gfx  On Behalf Of
Alex Deucher
Sent: Tuesday, March 9, 2021 9:40 AM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander 
Subject: [PATCH 4/7] drm/amdgpu: track what pmops flow we are in

We reuse the same suspend and resume functions for all of the pmops

states, so flag what state we are in so that we can alter behavior deeper in
the driver depending on the current flow.

Signed-off-by: Alex Deucher 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu.h   | 20 +++-
   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   | 58

+++

   drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c |  3 +-
   3 files changed, 70 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index d47626ce9bc5..4ddc5cc983c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -347,6 +347,24 @@ int amdgpu_device_ip_block_add(struct
amdgpu_device *adev,  bool amdgpu_get_bios(struct amdgpu_device
*adev);  bool amdgpu_read_bios(struct amdgpu_device *adev);

+/*
+ * PM Ops
+ */
+enum amdgpu_pmops_state {
+   AMDGPU_PMOPS_NONE = 0,
+   AMDGPU_PMOPS_PREPARE,
+   AMDGPU_PMOPS_COMPLETE,
+   AMDGPU_PMOPS_SUSPEND,
+   AMDGPU_PMOPS_RESUME,
+   AMDGPU_PMOPS_FREEZE,
+   AMDGPU_PMOPS_THAW,
+   AMDGPU_PMOPS_POWEROFF,
+   AMDGPU_PMOPS_RESTORE,
+   AMDGPU_PMOPS_RUNTIME_SUSPEND,
+   AMDGPU_PMOPS_RUNTIME_RESUME,
+   AMDGPU_PMOPS_RUNTIME_IDLE,
+};
+
   /*
* Clocks
*/
@@ -1019,8 +1037,8 @@ struct amdgpu_device {
  u8  reset_magic[AMDGPU_RESET_MAGIC_NUM];

  /* s3/s4 mask */
+   enum amdgpu_pmops_state pmops_state;
  boolin_suspend;
-   boolin_hibernate;

  /*
   * The combination flag in_poweroff_reboot_com used to
identify the poweroff diff --git
a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 3e6bb7d79652..0312c52bd39d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1297,34 +1297,54 @@ amdgpu_pci_shutdown(struct pci_dev *pdev)

static int amdgpu_pmops_prepare(struct device *dev)  {

  struct drm_device *drm_dev = dev_get_drvdata(dev);
+   struct amdgpu_device *adev = drm_to_adev(drm_dev);
+   int r;

+   adev-&

Re: [PATCH 4/7] drm/amdgpu: track what pmops flow we are in

2021-03-09 Thread Bhardwaj, Rajneesh
pm_message_t events seem to be the right thing to use here instead of 
driver's privately managed power states. Please have a look


https://elixir.bootlin.com/linux/v4.7/source/drivers/gpu/drm/i915/i915_drv.c#L714

https://elixir.bootlin.com/linux/v4.7/source/drivers/gpu/drm/drm_sysfs.c#L43

Thanks,

Rajneesh


On 3/9/2021 10:47 AM, Alex Deucher wrote:

[CAUTION: External Email]

On Tue, Mar 9, 2021 at 1:19 AM Lazar, Lijo  wrote:

[AMD Public Use]

This seems a duplicate of dev_pm_info states. Can't we reuse that?

Are you referring to the PM_EVENT_ messages in
dev_pm_info.power_state?  Maybe.  I was not able to find much
documentation on how those should be used.  Do you know?

Alex



Thanks,
Lijo

-Original Message-
From: amd-gfx  On Behalf Of Alex Deucher
Sent: Tuesday, March 9, 2021 9:40 AM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander 
Subject: [PATCH 4/7] drm/amdgpu: track what pmops flow we are in

We reuse the same suspend and resume functions for all of the pmops states, so 
flag what state we are in so that we can alter behavior deeper in the driver 
depending on the current flow.

Signed-off-by: Alex Deucher 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h   | 20 +++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   | 58 +++
  drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c |  3 +-
  3 files changed, 70 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index d47626ce9bc5..4ddc5cc983c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -347,6 +347,24 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev, 
 bool amdgpu_get_bios(struct amdgpu_device *adev);  bool 
amdgpu_read_bios(struct amdgpu_device *adev);

+/*
+ * PM Ops
+ */
+enum amdgpu_pmops_state {
+   AMDGPU_PMOPS_NONE = 0,
+   AMDGPU_PMOPS_PREPARE,
+   AMDGPU_PMOPS_COMPLETE,
+   AMDGPU_PMOPS_SUSPEND,
+   AMDGPU_PMOPS_RESUME,
+   AMDGPU_PMOPS_FREEZE,
+   AMDGPU_PMOPS_THAW,
+   AMDGPU_PMOPS_POWEROFF,
+   AMDGPU_PMOPS_RESTORE,
+   AMDGPU_PMOPS_RUNTIME_SUSPEND,
+   AMDGPU_PMOPS_RUNTIME_RESUME,
+   AMDGPU_PMOPS_RUNTIME_IDLE,
+};
+
  /*
   * Clocks
   */
@@ -1019,8 +1037,8 @@ struct amdgpu_device {
 u8  reset_magic[AMDGPU_RESET_MAGIC_NUM];

 /* s3/s4 mask */
+   enum amdgpu_pmops_state pmops_state;
 boolin_suspend;
-   boolin_hibernate;

 /*
  * The combination flag in_poweroff_reboot_com used to identify the 
poweroff diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 3e6bb7d79652..0312c52bd39d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1297,34 +1297,54 @@ amdgpu_pci_shutdown(struct pci_dev *pdev)  static int 
amdgpu_pmops_prepare(struct device *dev)  {
 struct drm_device *drm_dev = dev_get_drvdata(dev);
+   struct amdgpu_device *adev = drm_to_adev(drm_dev);
+   int r;

+   adev->pmops_state = AMDGPU_PMOPS_PREPARE;
 /* Return a positive number here so
  * DPM_FLAG_SMART_SUSPEND works properly
  */
 if (amdgpu_device_supports_boco(drm_dev))
-   return pm_runtime_suspended(dev) &&
+   r= pm_runtime_suspended(dev) &&
 pm_suspend_via_firmware();
-
-   return 0;
+   else
+   r = 0;
+   adev->pmops_state = AMDGPU_PMOPS_NONE;
+   return r;
  }

  static void amdgpu_pmops_complete(struct device *dev)  {
+   struct drm_device *drm_dev = dev_get_drvdata(dev);
+   struct amdgpu_device *adev = drm_to_adev(drm_dev);
+
+   adev->pmops_state = AMDGPU_PMOPS_COMPLETE;
 /* nothing to do */
+   adev->pmops_state = AMDGPU_PMOPS_NONE;
  }

  static int amdgpu_pmops_suspend(struct device *dev)  {
 struct drm_device *drm_dev = dev_get_drvdata(dev);
+   struct amdgpu_device *adev = drm_to_adev(drm_dev);
+   int r;

-   return amdgpu_device_suspend(drm_dev, true);
+   adev->pmops_state = AMDGPU_PMOPS_SUSPEND;
+   r = amdgpu_device_suspend(drm_dev, true);
+   adev->pmops_state = AMDGPU_PMOPS_NONE;
+   return r;
  }

  static int amdgpu_pmops_resume(struct device *dev)  {
 struct drm_device *drm_dev = dev_get_drvdata(dev);
+   struct amdgpu_device *adev = drm_to_adev(drm_dev);
+   int r;

-   return amdgpu_device_resume(drm_dev, true);
+   adev->pmops_state = AMDGPU_PMOPS_RESUME;
+   r = amdgpu_device_resume(drm_dev, true);
+   adev->pmops_state = AMDGPU_PMOPS_NONE;
+   return r;
  }

  static int amdgpu_pmops_freeze(struct device *dev) @@ -1333,9 +1353,9 @@ 
static int amdgpu_pmops_freeze(struct device *dev)
 struct amdgpu_device *adev = drm_to_adev(drm_dev);
 int r;

-   adev->in_hibern

Re: [PATCH] drm/ttm: ioremap buffer according to TTM mem caching setting

2021-03-04 Thread Bhardwaj, Rajneesh


On 3/4/2021 12:31 PM, Christian König wrote:

[CAUTION: External Email]

Am 04.03.21 um 18:01 schrieb Bhardwaj, Rajneesh:

I was wondering if a managed version of such API exists but looks like
none. We only have devm_ioremap_wc but that is valid only for
PAGE_CACHE_MODE_WC whereas ioremap_cache uses _WB. One more small
comment below.


Acked-by: Rajneesh Bhardwaj 

On 3/4/2021 11:04 AM, Oak Zeng wrote:

If tbo.mem.bus.caching is cached, buffer is intended to be mapped
as cached from CPU. Map it with ioremap_cache.

This wasn't necessary before as device memory was never mapped
as cached from CPU side. It becomes necessary for aldebaran as
device memory is mapped cached from CPU.

Signed-off-by: Oak Zeng 
Reviewed-by: Christian Konig 
---
  drivers/gpu/drm/ttm/ttm_bo_util.c | 14 ++
  1 file changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 031e581..7429464 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -91,6 +91,10 @@ static int ttm_resource_ioremap(struct ttm_device
*bdev,
    if (mem->bus.caching == ttm_write_combined)
  addr = ioremap_wc(mem->bus.offset, bus_size);
+#ifdef CONFIG_X86



Please use #if defined (CONFIG_X86)


Actually #ifdef is usually preferred.


oops, i was referring to IS_ENABLED (CONFIG) and not if defined.




Christian.




+    else if (mem->bus.caching == ttm_cached)
+    addr = ioremap_cache(mem->bus.offset, bus_size);
+#endif
  else
  addr = ioremap(mem->bus.offset, bus_size);
  if (!addr) {
@@ -372,6 +376,11 @@ static int ttm_bo_ioremap(struct
ttm_buffer_object *bo,
  if (mem->bus.caching == ttm_write_combined)
  map->virtual = ioremap_wc(bo->mem.bus.offset + offset,
    size);
+#ifdef CONFIG_X86
+    else if (mem->bus.caching == ttm_cached)
+    map->virtual = ioremap_cache(bo->mem.bus.offset + offset,
+  size);
+#endif
  else
  map->virtual = ioremap(bo->mem.bus.offset + offset,
 size);
@@ -490,6 +499,11 @@ int ttm_bo_vmap(struct ttm_buffer_object *bo,
struct dma_buf_map *map)
  else if (mem->bus.caching == ttm_write_combined)
  vaddr_iomem = ioremap_wc(mem->bus.offset,
   bo->base.size);
+    else if (mem->bus.caching == ttm_cached)
+#ifdef CONFIG_X86
+    vaddr_iomem = ioremap_cache(mem->bus.offset,
+  bo->base.size);
+#endif
  else
  vaddr_iomem = ioremap(mem->bus.offset, bo->base.size);

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=04%7C01%7Crajneesh.bhardwaj%40amd.com%7Cc4386544ea10487d3a0c08d8df3363a1%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637504759264793970%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=nM2UtQQdActyapfZSrhfx%2BoJ%2BdszV4Yp62LTehsUWwY%3D&reserved=0 




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/ttm: ioremap buffer according to TTM mem caching setting

2021-03-04 Thread Bhardwaj, Rajneesh
I was wondering if a managed version of such API exists but looks like 
none. We only have devm_ioremap_wc but that is valid only for 
PAGE_CACHE_MODE_WC whereas ioremap_cache uses _WB. One more small 
comment below.



Acked-by: Rajneesh Bhardwaj 

On 3/4/2021 11:04 AM, Oak Zeng wrote:

If tbo.mem.bus.caching is cached, buffer is intended to be mapped
as cached from CPU. Map it with ioremap_cache.

This wasn't necessary before as device memory was never mapped
as cached from CPU side. It becomes necessary for aldebaran as
device memory is mapped cached from CPU.

Signed-off-by: Oak Zeng 
Reviewed-by: Christian Konig 
---
  drivers/gpu/drm/ttm/ttm_bo_util.c | 14 ++
  1 file changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 031e581..7429464 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -91,6 +91,10 @@ static int ttm_resource_ioremap(struct ttm_device *bdev,
  
  		if (mem->bus.caching == ttm_write_combined)

addr = ioremap_wc(mem->bus.offset, bus_size);
+#ifdef CONFIG_X86



Please use #if defined (CONFIG_X86)


+   else if (mem->bus.caching == ttm_cached)
+   addr = ioremap_cache(mem->bus.offset, bus_size);
+#endif
else
addr = ioremap(mem->bus.offset, bus_size);
if (!addr) {
@@ -372,6 +376,11 @@ static int ttm_bo_ioremap(struct ttm_buffer_object *bo,
if (mem->bus.caching == ttm_write_combined)
map->virtual = ioremap_wc(bo->mem.bus.offset + offset,
  size);
+#ifdef CONFIG_X86
+   else if (mem->bus.caching == ttm_cached)
+   map->virtual = ioremap_cache(bo->mem.bus.offset + 
offset,
+ size);
+#endif
else
map->virtual = ioremap(bo->mem.bus.offset + offset,
   size);
@@ -490,6 +499,11 @@ int ttm_bo_vmap(struct ttm_buffer_object *bo, struct 
dma_buf_map *map)
else if (mem->bus.caching == ttm_write_combined)
vaddr_iomem = ioremap_wc(mem->bus.offset,
 bo->base.size);
+   else if (mem->bus.caching == ttm_cached)
+#ifdef CONFIG_X86
+   vaddr_iomem = ioremap_cache(mem->bus.offset,
+ bo->base.size);
+#endif
else
vaddr_iomem = ioremap(mem->bus.offset, bo->base.size);
  

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: enable BACO runpm by default on sienna ciclid and navy flounder

2021-03-01 Thread Bhardwaj, Rajneesh
Reviewed-by: Rajneesh Bhardwaj >


On 3/1/2021 10:57 AM, Alex Deucher wrote:

[CAUTION: External Email]

It works fine and was only disabled because primary GPUs
don't enter runpm if there is a console bound to the fbdev due
to the kmap.  This will at least allow runpm on secondary cards.

Signed-off-by: Alex Deucher 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 --
  1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index c03e8f52dae4..fbb863c1cfcc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -172,8 +172,6 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, 
unsigned long flags)
 switch (adev->asic_type) {
 case CHIP_VEGA20:
 case CHIP_ARCTURUS:
-   case CHIP_SIENNA_CICHLID:
-   case CHIP_NAVY_FLOUNDER:
 /* enable runpm if runpm=1 */
 if (amdgpu_runtime_pm > 0)
 adev->runpm = true;
--
2.29.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=04%7C01%7Crajneesh.bhardwaj%40amd.com%7C4de9048c03f74fd660ee08d8dccad4e8%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637502111016587667%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=25uhZSvD8KzaVri8EhHQAdpxzlN57TMZM9nd16NfuBQ%3D&reserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: Only check for S0ix if AMD_PMC is configured

2021-03-01 Thread Bhardwaj, Rajneesh

Reviewed-by: Rajneesh Bhardwaj 

On 2/26/2021 5:27 PM, Alex Deucher wrote:

[CAUTION: External Email]

The S0ix check only makes sense if the AMD PMC driver is
present.  We need to use the legacy S3 pathes when the
PMC driver is not present.

Signed-off-by: Alex Deucher 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 8155c54392c8..36a741d63ddc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -903,10 +903,11 @@ void amdgpu_acpi_fini(struct amdgpu_device *adev)
   */
  bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev)
  {
+#if defined(CONFIG_AMD_PMC)
 if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) {
 if (adev->flags & AMD_IS_APU)
 return true;
 }
-
+#endif
 return false;
  }
--
2.29.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=04%7C01%7Crajneesh.bhardwaj%40amd.com%7Cb87b540fd0f94816b82008d8daa5d54a%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637499753077416653%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=BAGVzTXPSiYXg7VgC0LeNinwMwDlcGJ%2BehuPffi6k4w%3D&reserved=0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 2/3] drm/amdgpu: use runpm flag rather than fbcon for kfd runtime suspend (v2)

2021-02-04 Thread Bhardwaj, Rajneesh
[AMD Public Use]

Reviewed-by: Rajneesh Bhardwaj 

-Original Message-
From: amd-gfx  On Behalf Of Alex Deucher
Sent: Thursday, February 4, 2021 3:05 PM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander 
Subject: [PATCH 2/3] drm/amdgpu: use runpm flag rather than fbcon for kfd 
runtime suspend (v2)

[CAUTION: External Email]

the flag used by kfd is not actually related to fbcon, it just happens to 
align.  Use the runpm flag instead so that we can decouple it from the fbcon 
flag.

v2: fix resume as well

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 0ee6514ee55c..b7ebd424bbc7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3734,7 +3734,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool 
fbcon)

r = amdgpu_device_ip_suspend_phase1(adev);

-   amdgpu_amdkfd_suspend(adev, !fbcon);
+   amdgpu_amdkfd_suspend(adev, adev->in_runpm);

/* evict vram memory */
amdgpu_bo_evict_vram(adev);
@@ -3818,7 +3818,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool 
fbcon)
}
}
}
-   r = amdgpu_amdkfd_resume(adev, !fbcon);
+   r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
if (r)
return r;

--
2.29.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=04%7C01%7Crajneesh.bhardwaj%40amd.com%7C9c0a731196964976779a08d8c9482859%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637480659048145725%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=SYs%2FSx9iZmQemW3IcKm7z3Kv5QQQVwjOA7b5idtxE%2Fk%3D&reserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdgpu: enable DPM_FLAG_MAY_SKIP_RESUME and DPM_FLAG_SMART_SUSPEND flags

2021-02-04 Thread Bhardwaj, Rajneesh


On 2/4/2021 10:14 AM, Felix Kuehling wrote:

Am 2021-02-04 um 9:37 a.m. schrieb Alex Deucher:

On Wed, Feb 3, 2021 at 2:56 AM Lazar, Lijo  wrote:

[AMD Public Use]


-Original Message-
From: amd-gfx  On Behalf Of Alex Deucher
Sent: Tuesday, February 2, 2021 10:48 PM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander 
Subject: [PATCH 2/2] drm/amdgpu: enable DPM_FLAG_MAY_SKIP_RESUME and 
DPM_FLAG_SMART_SUSPEND flags

Once the device has runtime suspended, we don't need to power it back up again 
for system suspend.  Likewise for resume, we don't to power up the device again 
on resume only to power it back off again via runtime pm because it's still 
idle.

Signed-off-by: Alex Deucher 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index b4780182f990..b78847fa769b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -206,6 +206,12 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, 
unsigned long flags)
 if (amdgpu_device_supports_atpx(dev) &&
 !amdgpu_is_atpx_hybrid())
 dev_pm_set_driver_flags(dev->dev, 
DPM_FLAG_NO_DIRECT_COMPLETE);
+   /* we want direct complete for BOCO */
+   if ((amdgpu_device_supports_atpx(dev) &&
+   amdgpu_is_atpx_hybrid()) ||
+   amdgpu_device_supports_boco(dev))
+   dev_pm_set_driver_flags(dev->dev, 
DPM_FLAG_SMART_SUSPEND |
+   DPM_FLAG_MAY_SKIP_RESUME);

Device runtime suspend callback does -
 amdgpu_device_suspend(drm_dev, false)

System suspend callback does -
 amdgpu_device_suspend(drm_dev, true)

One of the effects of this flag is for KFD to decide whether to evict all 
processes. It is done during system suspend but not during runtime device 
suspend. Will that have an impact if  the system suspend routine is skipped in 
this way?

+ Rajneesh

Can you comment on this?  Idea of this patch is to not wake the device
for system suspend and resume if it's already in runtime suspend.


KFD doesn't allow amdgpu driver to runtime suspend the device as long as 
the per process device data is valid. This patch only enables direct 
complete path for already runtime suspended amdgpu device so its 
implicit that kfd had no active process. While we may be OK to not 
explicitly cancel scheduled work or suspend all kfd processes (since gpu 
is already runtime suspended) we still leave KFD unlocked for a system 
wide suspend via direct complete path.  I think we should still be ok 
with this since locking is only used for making sure that kfd is not 
open during a gpu reset.


Also i suggest using DPM_FLAG_SMART_PREPARE as well, since we are 
relying on prepare for skipping late/noirq phase during system suspend.



Acked-by: Rajneesh Bhardwaj 



KFD only allows runtime suspend when there are no processes using the
GPU. Therefore it should be safe (in theory) to skip process eviction if
you're already in runtime suspend. Just make sure all the suspend/resume
calls into KFD are paired up correctly. If you skip suspend but then
later call resume anyway, it will likely cause problems.

For testing this, I'd suggest running some KFD application (e.g. kfdtest
or an OpenCL app with ROCm-based OpenCL) before suspend, then suspend,
then run the app again after resume to make sure KFD is still good.

Regards,
   Felix



Alex


Thanks,
Lijo

 pm_runtime_use_autosuspend(dev->dev);
 pm_runtime_set_autosuspend_delay(dev->dev, 5000);
 pm_runtime_allow(dev->dev);
--
2.29.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/1] drm/amdkfd: Add IPC API

2020-07-14 Thread Bhardwaj, Rajneesh

Hi Felix

While the detailed review for this is already going on, you might want 
to consider below hunk if you happen to send v2.


--- a/drivers/gpu/drm/amd/amdkfd/kfd_ipc.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_ipc.c
@@ -100,11 +100,10 @@ void kfd_ipc_obj_put(struct kfd_ipc_obj **obj)
    }
 }

-int kfd_ipc_init(void)
+void kfd_ipc_init(void)
 {
    mutex_init(&kfd_ipc_handles.lock);
    hash_init(kfd_ipc_handles.handles);
-   return 0;
 }

 static int kfd_import_dmabuf_create_kfd_bo(struct kfd_dev *dev,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_module.c

index dacc131792cf..2dac77cd7b0c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -57,9 +57,7 @@ static int kfd_init(void)
    if (err < 0)
    goto err_topology;

-   err = kfd_ipc_init();
-   if (err < 0)
-   goto err_ipc;
+   kfd_ipc_init();

Thanks

Rajneesh


On 7/13/2020 11:14 PM, Felix Kuehling wrote:

[CAUTION: External Email]

This allows exporting and importing buffers. The API generates handles
that can be used with the HIP IPC API, i.e. big numbers rather than
file descriptors.

Signed-off-by: Felix Kuehling 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|   5 +
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  56 +++-
  drivers/gpu/drm/amd/amdkfd/Makefile   |   3 +-
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  |  74 ++---
  drivers/gpu/drm/amd/amdkfd/kfd_ipc.c  | 263 ++
  drivers/gpu/drm/amd/amdkfd/kfd_ipc.h  |  55 
  drivers/gpu/drm/amd/amdkfd/kfd_module.c   |   5 +
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |   5 +
  include/uapi/linux/kfd_ioctl.h|  62 -
  9 files changed, 488 insertions(+), 40 deletions(-)
  create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_ipc.c
  create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_ipc.h

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 3f2b695cf19e..0f8dc4c4f924 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -49,6 +49,7 @@ struct kfd_bo_va_list {
  struct kgd_mem {
 struct mutex lock;
 struct amdgpu_bo *bo;
+   struct kfd_ipc_obj *ipc_obj;
 struct list_head bo_va_list;
 /* protected by amdkfd_process_info.lock */
 struct ttm_validate_buffer validate_list;
@@ -240,9 +241,13 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev 
*kgd,

  int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
   struct dma_buf *dmabuf,
+ struct kfd_ipc_obj *ipc_obj,
   uint64_t va, void *vm,
   struct kgd_mem **mem, uint64_t *size,
   uint64_t *mmap_offset);
+int amdgpu_amdkfd_gpuvm_export_ipc_obj(struct kgd_dev *kgd, void *vm,
+  struct kgd_mem *mem,
+  struct kfd_ipc_obj **ipc_obj);

  void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
  void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index c408936e8f98..cd5f23c0c2ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -29,6 +29,7 @@
  #include "amdgpu_vm.h"
  #include "amdgpu_amdkfd.h"
  #include "amdgpu_dma_buf.h"
+#include "kfd_ipc.h"
  #include 

  /* BO flag to indicate a KFD userptr BO */
@@ -1353,6 +1354,9 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 *size = 0;
 }

+   /* Unreference the ipc_obj if applicable */
+   kfd_ipc_obj_put(&mem->ipc_obj);
+
 /* Free the BO*/
 drm_gem_object_put_unlocked(&mem->bo->tbo.base);
 mutex_destroy(&mem->lock);
@@ -1656,6 +1660,7 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev 
*kgd,

  int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
   struct dma_buf *dma_buf,
+ struct kfd_ipc_obj *ipc_obj,
   uint64_t va, void *vm,
   struct kgd_mem **mem, uint64_t *size,
   uint64_t *mmap_offset)
@@ -1692,15 +1697,18 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev 
*kgd,

 INIT_LIST_HEAD(&(*mem)->bo_va_list);
 mutex_init(&(*mem)->lock);
-
-   (*mem)->alloc_flags =
-   ((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
-   KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT)
-   | KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE
-   | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
+  

Re: [PATCH 1/2] drm/amdgpu: rework runtime pm enablement for BACO

2020-06-24 Thread Bhardwaj, Rajneesh



On 6/24/2020 3:05 PM, Alex Deucher wrote:

[CAUTION: External Email]

Add a switch statement to simplify asic checks.  Note
that BACO is not supported on APUs, so there is no
need to check them.


why not base this on smu_context to really query the 
SMU_FEATURE_BACO_BIT and then base the below flow on that instead of 
nested logic vs case? I am not sure if there was any issue with 
smu_context earlier?




Signed-off-by: Alex Deucher 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 34 -
  1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 4ec544783a45..0fec39eed164 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -167,19 +167,29 @@ int amdgpu_driver_load_kms(struct drm_device *dev, 
unsigned long flags)
 }

 if (amdgpu_device_supports_boco(dev) &&
-   (amdgpu_runtime_pm != 0)) /* enable runpm by default for boco */
-   adev->runpm = true;
-   else if (amdgpu_device_supports_baco(dev) &&
-(amdgpu_runtime_pm != 0) &&
-(adev->asic_type >= CHIP_TOPAZ) &&
-(adev->asic_type != CHIP_VEGA10) &&
-(adev->asic_type != CHIP_VEGA20) &&
-(adev->asic_type != CHIP_SIENNA_CICHLID) &&
-(adev->asic_type != CHIP_ARCTURUS)) /* enable runpm on VI+ */
-   adev->runpm = true;
-   else if (amdgpu_device_supports_baco(dev) &&
-(amdgpu_runtime_pm > 0))  /* enable runpm if runpm=1 on CI */
+   (amdgpu_runtime_pm != 0)) { /* enable runpm by default for boco */
 adev->runpm = true;
+   } else if (amdgpu_device_supports_baco(dev) &&
+  (amdgpu_runtime_pm != 0)) {
+   switch (adev->asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_CIK
+   case CHIP_BONAIRE:
+   case CHIP_HAWAII:
+#endif
+   case CHIP_VEGA10:
+   case CHIP_VEGA20:
+   case CHIP_ARCTURUS:
+   case CHIP_SIENNA_CICHLID:
+   /* enable runpm if runpm=1 */
+   if (amdgpu_runtime_pm > 0)
+   adev->runpm = true;
+   break;
+   default:
+   /* enable runpm on VI+ */
+   adev->runpm = true;
+   break;
+   }
+   }

 /* Call ACPI methods: require modeset init
  * but failure is not fatal
--
2.25.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7Crajneesh.bhardwaj%40amd.com%7Cd5d794bda2c44e0c902008d818719558%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637286223410087832&sdata=yErSk5DyDDXPX8Y1cXp14QxX9pgwRlIj6%2FuIhNKYN%2Bk%3D&reserved=0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdgpu: enable runtime pm on vega10 when noretry=0

2020-06-24 Thread Bhardwaj, Rajneesh



On 6/24/2020 3:05 PM, Alex Deucher wrote:

[CAUTION: External Email]

The failures with ROCm only happen with noretry=1, so
enable runtime pm when noretry=0 (the current default).

Signed-off-by: Alex Deucher 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 6 +-
  1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 0fec39eed164..341d072edd95 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -176,7 +176,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned 
long flags)
 case CHIP_BONAIRE:
 case CHIP_HAWAII:
  #endif
-   case CHIP_VEGA10:
 case CHIP_VEGA20:
 case CHIP_ARCTURUS:
 case CHIP_SIENNA_CICHLID:
@@ -184,6 +183,11 @@ int amdgpu_driver_load_kms(struct drm_device *dev, 
unsigned long flags)
 if (amdgpu_runtime_pm > 0)
 adev->runpm = true;
 break;
+   case CHIP_VEGA10:
+   /* turn runpm on if noretry=0 */
+   if (!amdgpu_noretry)
+   adev->runpm = true;
+   break;


Though it fixes the ROCm pytorch issue but aren't there any stability 
and performance optimization concerns as it will impact recoverable page 
faults?


I have no objection to this otherwise.

+ felix

Acked-by: Rajneesh Bhardwaj 



 default:
 /* enable runpm on VI+ */
 adev->runpm = true;
--
2.25.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7Crajneesh.bhardwaj%40amd.com%7Cc985ef0414bd41b48eb508d8187196ed%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637286223437553622&sdata=wRJbu3%2F3zu%2BHZ3KA%2FZmyh1yhgATM2zONRr%2FvI5KsxrM%3D&reserved=0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 3/4] drm/amdgpu/fence: fix ref count leak when pm_runtime_get_sync fails

2020-06-17 Thread Bhardwaj, Rajneesh
[AMD Official Use Only - Internal Distribution Only]

Reviewed-by: Rajneesh Bhardwaj 

-Original Message-
From: amd-gfx  On Behalf Of Alex Deucher
Sent: Wednesday, June 17, 2020 3:02 PM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander 
Subject: [PATCH 3/4] drm/amdgpu/fence: fix ref count leak when 
pm_runtime_get_sync fails

[CAUTION: External Email]

The call to pm_runtime_get_sync increments the counter even in case of failure, 
leading to incorrect ref count.
In case of failure, decrement the ref count before returning.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 8537f4704348..60b323d7caf2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -755,8 +755,10 @@ static int amdgpu_debugfs_gpu_recover(struct seq_file *m, 
void *data)
int r;

r = pm_runtime_get_sync(dev->dev);
-   if (r < 0)
+   if (r < 0) {
+   pm_runtime_put_autosuspend(dev->dev);
return 0;
+   }

seq_printf(m, "gpu recover\n");
amdgpu_device_gpu_recover(adev, NULL);
--
2.25.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7Crajneesh.bhardwaj%40amd.com%7Cdcc3842e55284da6689f08d812f0fb07%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637280173535571918&sdata=NU1AzDtdUzmvXItL05quhP35rT1plj4%2B3vOJE9Rr2lA%3D&reserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 4/4] drm/amdkfd: fix ref count leak when pm_runtime_get_sync fails

2020-06-17 Thread Bhardwaj, Rajneesh
[AMD Official Use Only - Internal Distribution Only]

Acked-by: Rajneesh Bhardwaj 

-Original Message-
From: Kuehling, Felix  
Sent: Wednesday, June 17, 2020 4:17 PM
To: Alex Deucher ; amd-gfx@lists.freedesktop.org; 
Bhardwaj, Rajneesh 
Cc: Deucher, Alexander 
Subject: Re: [PATCH 4/4] drm/amdkfd: fix ref count leak when 
pm_runtime_get_sync fails

[+Rajneesh]

Am 2020-06-17 um 3:02 p.m. schrieb Alex Deucher:
> The call to pm_runtime_get_sync increments the counter even in case of 
> failure, leading to incorrect ref count.
> In case of failure, decrement the ref count before returning.
>
> Signed-off-by: Alex Deucher 

Reviewed-by: Felix Kuehling 


> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index a9a7f5aa2710..0b4f40905f55 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -1116,8 +1116,10 @@ struct kfd_process_device 
> *kfd_bind_process_to_device(struct kfd_dev *dev,
>*/
>   if (!pdd->runtime_inuse) {
>   err = pm_runtime_get_sync(dev->ddev->dev);
> - if (err < 0)
> + if (err < 0) {
> + pm_runtime_put_autosuspend(dev->ddev->dev);
>   return ERR_PTR(err);
> + }
>   }
>  
>   err = kfd_iommu_bind_process_to_device(pdd);
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: restrict bo mapping within gpu address limits

2020-06-02 Thread Bhardwaj, Rajneesh


On 6/2/2020 3:51 PM, Christian König wrote:

Hi Rajneesh,

I think we have reviewed the patch multiple times now, you can push it 
to the amd-staging-drm-next branch.


Thanks Christian. Just wanted to make sure its sent once on the public 
list. I'll push it to the branch now.





Regards,
Christian.

Am 02.06.20 um 20:27 schrieb Rajneesh Bhardwaj:

Have strict check on bo mapping since on some systems, such as A+A or
hybrid, the cpu might support 5 level paging or can address memory above
48 bits but gpu might be limited by hardware to just use 48 bits. In
general, this applies to all asics where this limitation can be checked
against their max_pfn range. This restricts the range to map bo within
pratical limits of cpu and gpu for shared virtual memory access.

Reviewed-by: Oak Zeng 
Reviewed-by: Christian König 
Reviewed-by: Hawking Zhang 
Acked-by: Alex Deucher 
Signed-off-by: Rajneesh Bhardwaj 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 --
  1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 7417754e9141..71e005cf2952 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2208,7 +2208,8 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
  /* make sure object fit at this offset */
  eaddr = saddr + size - 1;
  if (saddr >= eaddr ||
-    (bo && offset + size > amdgpu_bo_size(bo)))
+    (bo && offset + size > amdgpu_bo_size(bo)) ||
+    (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
  return -EINVAL;
    saddr /= AMDGPU_GPU_PAGE_SIZE;
@@ -2273,7 +2274,8 @@ int amdgpu_vm_bo_replace_map(struct 
amdgpu_device *adev,

  /* make sure object fit at this offset */
  eaddr = saddr + size - 1;
  if (saddr >= eaddr ||
-    (bo && offset + size > amdgpu_bo_size(bo)))
+    (bo && offset + size > amdgpu_bo_size(bo)) ||
+    (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
  return -EINVAL;
    /* Allocate all the needed memory */



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [Patch v2 3/4] drm/amdkfd: refactor runtime pm for baco

2020-02-04 Thread Bhardwaj, Rajneesh

Hi Felix,

Thanks for the review feedback!

On 2/4/2020 4:28 PM, Felix Kuehling wrote:

On 2020-01-31 10:37 p.m., Rajneesh Bhardwaj wrote:

So far the kfd driver implemented same routines for runtime and system
wide suspend and resume (s2idle or mem). During system wide suspend the
kfd aquires an atomic lock that prevents any more user processes to
create queues and interact with kfd driver and amd gpu. This mechanism
created problem when amdgpu device is runtime suspended with BACO
enabled. Any application that relies on kfd driver fails to load because
the driver reports a locked kfd device since gpu is runtime suspended.

However, in an ideal case, when gpu is runtime  suspended the kfd driver
should be able to:

  - auto resume amdgpu driver whenever a client requests compute service
  - prevent runtime suspend for amdgpu  while kfd is in use

This change refactors the amdgpu and amdkfd drivers to support BACO and
runtime power management.

Signed-off-by: Rajneesh Bhardwaj 


One small comment inline. Other than that patches 1-3 are

Reviewed-by: Felix Kuehling 

Also, I believe patch 1 is unchanged from v1 and already got a 
Reviewed-by from Alex. Please remember to add that tag before you submit.



You mean https://www.spinics.net/lists/amd-gfx/msg44689.html? I have 
already added Alex's RB tag.






The last patch that enabled runtime PM by default, I'd leave the 
decision to submit that up to Alex. There may be other considerations 
than just KFD.



Sure, but its needed along with the series otherwise we can't test/use it.





See inline ...



---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 12 +++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  8 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  4 +-
  drivers/gpu/drm/amd/amdkfd/kfd_device.c    | 29 +--
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h  |  1 +
  drivers/gpu/drm/amd/amdkfd/kfd_process.c   | 43 --
  6 files changed, 70 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

index 8609287620ea..314c4a2a0354 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -178,18 +178,18 @@ void amdgpu_amdkfd_interrupt(struct 
amdgpu_device *adev,

  kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry);
  }
  -void amdgpu_amdkfd_suspend(struct amdgpu_device *adev)
+void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm)
  {
  if (adev->kfd.dev)
-    kgd2kfd_suspend(adev->kfd.dev);
+    kgd2kfd_suspend(adev->kfd.dev, run_pm);
  }
  -int amdgpu_amdkfd_resume(struct amdgpu_device *adev)
+int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm)
  {
  int r = 0;
    if (adev->kfd.dev)
-    r = kgd2kfd_resume(adev->kfd.dev);
+    r = kgd2kfd_resume(adev->kfd.dev, run_pm);
    return r;
  }
@@ -713,11 +713,11 @@ void kgd2kfd_exit(void)
  {
  }
  -void kgd2kfd_suspend(struct kfd_dev *kfd)
+void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
  {
  }
  -int kgd2kfd_resume(struct kfd_dev *kfd)
+int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
  {
  return 0;
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

index 47b0f2957d1f..9e8db702d878 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -122,8 +122,8 @@ struct amdkfd_process_info {
  int amdgpu_amdkfd_init(void);
  void amdgpu_amdkfd_fini(void);
  -void amdgpu_amdkfd_suspend(struct amdgpu_device *adev);
-int amdgpu_amdkfd_resume(struct amdgpu_device *adev);
+void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm);
+int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm);
  void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
  const void *ih_ring_entry);
  void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
@@ -249,8 +249,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
   struct drm_device *ddev,
   const struct kgd2kfd_shared_resources *gpu_resources);
  void kgd2kfd_device_exit(struct kfd_dev *kfd);
-void kgd2kfd_suspend(struct kfd_dev *kfd);
-int kgd2kfd_resume(struct kfd_dev *kfd);
+void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm);
+int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm);
  int kgd2kfd_pre_reset(struct kfd_dev *kfd);
  int kgd2kfd_post_reset(struct kfd_dev *kfd);
  void kgd2kfd_interrupt(struct kfd_dev *kfd, const void 
*ih_ring_entry);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 5030a09babb8..43843e6c4bcd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3311,7 +3311,7 @@ int amdgpu_device_suspend(struct drm_device 
*dev, bool fbcon)

  }
  }
  -    amdgpu_amdkfd_suspend(adev);
+    amdgpu_amdkfd_suspend(adev, !fbcon);
  

Re: [Patch v2 3/4] drm/amdkfd: refactor runtime pm for baco

2020-02-01 Thread Bhardwaj, Rajneesh



On 1/31/2020 11:21 PM, Zeng, Oak wrote:

[AMD Official Use Only - Internal Distribution Only]

Patch 1,2,3 work for me. See one comment inline, otherwise Reviewed-by: Oak Zeng 


Regards,
Oak



Thanks for the review and testing! My response below.

8< -snip >8---

  
-void kgd2kfd_suspend(struct kfd_dev *kfd)

+void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
[Oak] I think parameter run_pm is better to called: call_from_pm_runtime. Or 
you can add some comments to the parameter of this function to say when 
run_pm==true it is called from pm_runtime.



The main purpose of passing this arg is to skip locking kfd for runtime 
suspend and its already described in the comment above the block that 
skips locking.


Thanks,

Rajneesh

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [Patch v1 5/5] drm/amdkfd: refactor runtime pm for baco

2020-01-30 Thread Bhardwaj, Rajneesh



On 1/28/2020 3:09 PM, Zeng, Oak wrote:

[AMD Official Use Only - Internal Distribution Only]



Regards,
Oak

-Original Message-
From: amd-gfx  On Behalf Of Rajneesh 
Bhardwaj
Sent: Monday, January 27, 2020 8:29 PM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Kuehling, Felix 
; Bhardwaj, Rajneesh 
Subject: [Patch v1 5/5] drm/amdkfd: refactor runtime pm for baco

So far the kfd driver implemented same routines for runtime and system wide 
suspend and resume (s2idle or mem). During system wide suspend the kfd aquires 
an atomic lock that prevents any more user processes to create queues and 
interact with kfd driver and amd gpu. This mechanism created problem when 
amdgpu device is runtime suspended with BACO enabled. Any application that 
relies on kfd driver fails to load because the driver reports a locked kfd 
device since gpu is runtime suspended.

However, in an ideal case, when gpu is runtime  suspended the kfd driver should 
be able to:

  - auto resume amdgpu driver whenever a client requests compute service
  - prevent runtime suspend for amdgpu  while kfd is in use

This change refactors the amdgpu and amdkfd drivers to support BACO and runtime 
runtime power management.
[Oak] two runtime above



Thanks, will fix it.




Signed-off-by: Rajneesh Bhardwaj 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 12 -  
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  8 +++---  
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  4 +--
  drivers/gpu/drm/amd/amdkfd/kfd_device.c| 31 +-
  drivers/gpu/drm/amd/amdkfd/kfd_iommu.c |  5 +++-
  drivers/gpu/drm/amd/amdkfd/kfd_process.c   | 19 ++---
  6 files changed, 51 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 8609287620ea..314c4a2a0354 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -178,18 +178,18 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry);  }
  
-void amdgpu_amdkfd_suspend(struct amdgpu_device *adev)

+void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm)
[Oak] then name run_pm is a little bit confusing. Maybe system_wide_pm or 
none_runtime_pm?
  {
if (adev->kfd.dev)
-   kgd2kfd_suspend(adev->kfd.dev);
+   kgd2kfd_suspend(adev->kfd.dev, run_pm);
  }
  
-int amdgpu_amdkfd_resume(struct amdgpu_device *adev)

+int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm)
  {
int r = 0;
  
  	if (adev->kfd.dev)

-   r = kgd2kfd_resume(adev->kfd.dev);
+   r = kgd2kfd_resume(adev->kfd.dev, run_pm);
  
  	return r;

  }
@@ -713,11 +713,11 @@ void kgd2kfd_exit(void)  {  }
  
-void kgd2kfd_suspend(struct kfd_dev *kfd)

+void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
  {
  }
  
-int kgd2kfd_resume(struct kfd_dev *kfd)

+int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
  {
return 0;
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 0fa898d30207..3dce4a51e522 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -123,8 +123,8 @@ struct amdkfd_process_info {  int amdgpu_amdkfd_init(void); 
 void amdgpu_amdkfd_fini(void);
  
-void amdgpu_amdkfd_suspend(struct amdgpu_device *adev); -int amdgpu_amdkfd_resume(struct amdgpu_device *adev);

+void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm);
+int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm);
  void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
const void *ih_ring_entry);
  void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); @@ -250,8 +250,8 
@@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 struct drm_device *ddev,
 const struct kgd2kfd_shared_resources *gpu_resources); 
 void kgd2kfd_device_exit(struct kfd_dev *kfd); -void kgd2kfd_suspend(struct 
kfd_dev *kfd); -int kgd2kfd_resume(struct kfd_dev *kfd);
+void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm); int
+kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm);
  int kgd2kfd_pre_reset(struct kfd_dev *kfd);  int kgd2kfd_post_reset(struct 
kfd_dev *kfd);  void kgd2kfd_interrupt(struct kfd_dev *kfd, const void 
*ih_ring_entry); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index d9e5eac182d3..787d49e9f4de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3314,7 +3314,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool 
fbcon)
}
}
  
-	amdgpu_amdkfd_suspend(adev);

+   amdgpu_amdkfd_suspend(adev, fbcon);
  
  	amdgpu_ras_suspend(adev);
  
@@ -3398,7 +3398,7 @@ int amdgpu_device_res

Re: [Patch v1 1/5] drm/amdgpu: always enable runtime power management

2020-01-30 Thread Bhardwaj, Rajneesh



On 1/28/2020 3:14 PM, Alex Deucher wrote:

[CAUTION: External Email]

On Mon, Jan 27, 2020 at 8:30 PM Rajneesh Bhardwaj
 wrote:

This allows runtime power management to kick in on amdgpu driver when
the underlying hardware supports either BOCO or BACO. This can still be
avoided if boot arg amdgpu.runpm = 0 is supplied.

 BOCO: Bus Off, Chip Off
 BACO: Bus Alive, Chip Off

Signed-off-by: Rajneesh Bhardwaj 

This patch should be the last one in the series, otherwise we'll
enable runpm on BACO capable devices before the KFD code is in place.
Also, it's only supported on VI and newer asics, so we should use this
patch instead:
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpatchwork.freedesktop.org%2Fpatch%2F335402%2F&data=02%7C01%7Crajneesh.bhardwaj%40amd.com%7C01f67fc720d3423ee6b908d7a42eb68d%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637158392852429503&sdata=aU07GE56Vfb0JTSDsVDdyCdhxUkjHEVMAHiBaBC4V7g%3D&reserved=0

Alex


Thanks, Will fix in v2.



---
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 9 -
  1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 3a0ea9096498..7958d508486e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -169,11 +169,10 @@ int amdgpu_driver_load_kms(struct drm_device *dev, 
unsigned long flags)
 goto out;
 }

-   if (amdgpu_device_supports_boco(dev) &&
-   (amdgpu_runtime_pm != 0)) /* enable runpm by default */
-   adev->runpm = true;
-   else if (amdgpu_device_supports_baco(dev) &&
-(amdgpu_runtime_pm > 0)) /* enable runpm if runpm=1 */
+   /* always enable runtime power management except when amdgpu.runpm=0 */
+   if ((amdgpu_device_supports_boco(dev) ||
+   amdgpu_device_supports_baco(dev))
+   && (amdgpu_runtime_pm != 0))
 adev->runpm = true;

 /* Call ACPI methods: require modeset init
--
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7Crajneesh.bhardwaj%40amd.com%7C01f67fc720d3423ee6b908d7a42eb68d%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637158392852429503&sdata=%2BXwHkoDeyA9Q%2FwnSyaND6QOc1SxpGuAHkZ4JdaTM3wU%3D&reserved=0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [Patch v1 3/5] drm/amdkfd: Introduce debugfs option to disable baco

2020-01-30 Thread Bhardwaj, Rajneesh

Hi Alex


Thanks for your time and feedback!


On 1/28/2020 3:22 PM, Alex Deucher wrote:

[CAUTION: External Email]

On Mon, Jan 27, 2020 at 8:30 PM Rajneesh Bhardwaj
 wrote:

When BACO is enabled by default, sometimes it can cause additional
trouble to debug KFD issues. This debugfs override allows to temporarily
disable BACO for debug purpose without having to reboot the machine.

However, in some cases one suspend-resume cycle might be needed if
the device is already runtime suspended.

e.g

sudo rtcwake -m < mem or freeze > -s 15

or

by triggering autosuspend event from user space, by doing something
like:

echo 6000 > /sys/bus/pci/devices/\:03\:00.0/power/autosuspend_delay_ms

 Usage:

echo 0 > /sys/kernel/debug/kfd/enable_baco and run
cat /sys/kernel/debug/kfd/baco_status to verify whether BACO is
enabled or disabled by kfd driver.

It should be noted that while enabling baco again via kfd override, we
must do the following steps:

1. echo 0 > /sys/kernel/debug/kfd/enable_baco
2. sudo rtcwake -m < mem > -s 15

In this case, we need GPU to be fully reset which is done by BIOS. This
is not possible in case of S2idle i.e. freeze so we must use mem for
sleep.


I think we can drop this patch in favor of just using the standard
runtime pm control.  E.g.,
/sys/class/drm/card0/device/power/control



Sure, i was using the /sys/bus/pci way to do it and found it was not 
easy. Since this sysfs exists, will drop the patch.



Regards

Rajneesh



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7Crajneesh.bhardwaj%40amd.com%7Cfdaaf630ee6548c6bd9108d7a42fe314%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637158397891190611&sdata=3jE9jZbbw9IiCu7geMeCCsTC4u4tTdippeWYeSnX3oE%3D&reserved=0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [Patch v1 4/5] drm/amdkfd: show warning when kfd is locked

2020-01-30 Thread Bhardwaj, Rajneesh


On 1/28/2020 5:42 PM, Felix Kuehling wrote:

On 2020-01-27 20:29, Rajneesh Bhardwaj wrote:

During system suspend the kfd driver aquires a lock that prohibits
further kfd actions unless the gpu is resumed. This adds some info which
can be useful while debugging.

Signed-off-by: Rajneesh Bhardwaj 
---
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c

index 1aebda4bbbe7..081cc5f40d18 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -127,6 +127,8 @@ static int kfd_open(struct inode *inode, struct 
file *filep)

  return PTR_ERR(process);
    if (kfd_is_locked()) {
+    dev_warn(kfd_device, "kfd is locked!\n"
+    "process %d unreferenced", process->pasid);


If this is for debugging, make it dev_dbg. Printing warnings like this 
usually confuses people reporting completely unrelated problems that 
aren't even kernel problems at all. "Look, I found a warning in the 
kernel log. It must be a kernel problem."


Agree. Will fix in v2.




Regards,
  Felix



  kfd_unref_process(process);
  return -EAGAIN;
  }

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [Patch v1 5/5] drm/amdkfd: refactor runtime pm for baco

2020-01-30 Thread Bhardwaj, Rajneesh

Hello Felix,

Thanks for your time to review and for your feedback.

On 1/29/2020 5:52 PM, Felix Kuehling wrote:

HI Rajneesh,

See comments inline ...

And a general question: Why do you need to set the autosuspend_delay 
in so many places? Amdgpu only has a single call to this function 
during initialization.



We don't. I have called this out in cover letter since i too feel its 
not the best way to do what we want to do. I have seen weird issues on 
dual GPUs with KFDTest that sometimes results in system hang and gpu 
hang etc.


Even if i try with running kfdtest on just one node in a multi gpu 
system, the baco exit seems to wake up all gpus and then one goes to 
auto suspend again while other performs the desired kfdtest operation. I 
have never seen any issue with a single gpu card. So in current approch, 
i am making sure that the GPUs are not quickly auto-suspended while the 
kfd operations are ongoing but once the kfdtest is finished, the runtime 
suspend call with ensure to reset the timeout back to 5 seconds.



I would like to avoid this and appreciate some pointers where i can put 
get_sync calls while unbinding. I have tried a number of places but saw 
some issues. So any help will be highly appreciated, to identify the 
proper logical inverse of kfd_bind_process_to_device.





On 2020-01-27 20:29, Rajneesh Bhardwaj wrote:

So far the kfd driver implemented same routines for runtime and system
wide suspend and resume (s2idle or mem). During system wide suspend the
kfd aquires an atomic lock that prevents any more user processes to
create queues and interact with kfd driver and amd gpu. This mechanism
created problem when amdgpu device is runtime suspended with BACO
enabled. Any application that relies on kfd driver fails to load because
the driver reports a locked kfd device since gpu is runtime suspended.

However, in an ideal case, when gpu is runtime  suspended the kfd driver
should be able to:

  - auto resume amdgpu driver whenever a client requests compute service
  - prevent runtime suspend for amdgpu  while kfd is in use

This change refactors the amdgpu and amdkfd drivers to support BACO and
runtime runtime power management.

Signed-off-by: Rajneesh Bhardwaj 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 12 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  8 +++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  4 +--
  drivers/gpu/drm/amd/amdkfd/kfd_device.c    | 31 +-
  drivers/gpu/drm/amd/amdkfd/kfd_iommu.c |  5 +++-
  drivers/gpu/drm/amd/amdkfd/kfd_process.c   | 19 ++---
  6 files changed, 51 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

index 8609287620ea..314c4a2a0354 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -178,18 +178,18 @@ void amdgpu_amdkfd_interrupt(struct 
amdgpu_device *adev,

  kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry);
  }
  -void amdgpu_amdkfd_suspend(struct amdgpu_device *adev)
+void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm)
  {
  if (adev->kfd.dev)
-    kgd2kfd_suspend(adev->kfd.dev);
+    kgd2kfd_suspend(adev->kfd.dev, run_pm);
  }
  -int amdgpu_amdkfd_resume(struct amdgpu_device *adev)
+int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm)
  {
  int r = 0;
    if (adev->kfd.dev)
-    r = kgd2kfd_resume(adev->kfd.dev);
+    r = kgd2kfd_resume(adev->kfd.dev, run_pm);
    return r;
  }
@@ -713,11 +713,11 @@ void kgd2kfd_exit(void)
  {
  }
  -void kgd2kfd_suspend(struct kfd_dev *kfd)
+void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
  {
  }
  -int kgd2kfd_resume(struct kfd_dev *kfd)
+int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
  {
  return 0;
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

index 0fa898d30207..3dce4a51e522 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -123,8 +123,8 @@ struct amdkfd_process_info {
  int amdgpu_amdkfd_init(void);
  void amdgpu_amdkfd_fini(void);
  -void amdgpu_amdkfd_suspend(struct amdgpu_device *adev);
-int amdgpu_amdkfd_resume(struct amdgpu_device *adev);
+void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm);
+int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm);
  void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
  const void *ih_ring_entry);
  void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
@@ -250,8 +250,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
   struct drm_device *ddev,
   const struct kgd2kfd_shared_resources *gpu_resources);
  void kgd2kfd_device_exit(struct kfd_dev *kfd);
-void kgd2kfd_suspend(struct kfd_dev *kfd);
-int kgd2kfd_resume(struct kfd_dev *kfd);
+void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm);
+int