RE: [PATCH] drm/amdgpu: refine gprs init shaders to check coverage

2021-04-20 Thread Zhang, Hawking
[AMD Public Use]

Reviewed-by: Hawking Zhang 

Regards,
Hawking
-Original Message-
From: Dennis Li  
Sent: Wednesday, April 21, 2021 13:51
To: amd-gfx@lists.freedesktop.org; Deucher, Alexander 
; Kuehling, Felix ; Zhang, 
Hawking ; Koenig, Christian 
Cc: Li, Dennis 
Subject: [PATCH] drm/amdgpu: refine gprs init shaders to check coverage

Add codes to check whether all SIMDs are covered, make sure that all GPRs are 
initialized.

Signed-off-by: Dennis Li 

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 9889bd495ba5..9e629f239288 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4656,8 +4656,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct 
amdgpu_device *adev)
if (!ring->sched.ready)
return 0;
 
-   if (adev->asic_type == CHIP_ARCTURUS ||
-   adev->asic_type == CHIP_ALDEBARAN) {
+   if (adev->asic_type == CHIP_ARCTURUS) {
vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
vgpr_init_shader_size = 
sizeof(vgpr_init_compute_shader_arcturus);
vgpr_init_regs_ptr = vgpr_init_regs_arcturus; @@ -4924,7 
+4923,11 @@ static int gfx_v9_0_ecc_late_init(void *handle)
}
 
/* requires IBs so do in late init after IB pool is initialized */
-   r = gfx_v9_0_do_edc_gpr_workarounds(adev);
+   if (adev->asic_type == CHIP_ALDEBARAN)
+   r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
+   else
+   r = gfx_v9_0_do_edc_gpr_workarounds(adev);
+
if (r)
return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
index 9ca76a3ac38c..798c0e178201 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
@@ -22,6 +22,7 @@
  */
 #include "amdgpu.h"
 #include "soc15.h"
+#include "soc15d.h"
 
 #include "gc/gc_9_4_2_offset.h"
 #include "gc/gc_9_4_2_sh_mask.h"
@@ -79,6 +80,377 @@ static const struct soc15_reg_golden 
golden_settings_gc_9_4_2_alde[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, regTCI_CNTL_3, 0xff, 0x20),  };
 
+static const u32 vgpr_init_compute_shader_aldebaran[] = {
+   0xb8840904, 0xb8851a04, 0xb8861344, 0x9207c006, 0x92088405, 0x81070807,
+   0x81070407, 0x8e078207, 0xbe88008f, 0xc0410200, 0x0007, 0xd3d94000,
+   0x1880, 0xd3d94001, 0x1880, 0xd3d94002, 0x1880, 0xd3d94003,
+   0x1880, 0xd3d94004, 0x1880, 0xd3d94005, 0x1880, 0xd3d94006,
+   0x1880, 0xd3d94007, 0x1880, 0xd3d94008, 0x1880, 0xd3d94009,
+   0x1880, 0xd3d9400a, 0x1880, 0xd3d9400b, 0x1880, 0xd3d9400c,
+   0x1880, 0xd3d9400d, 0x1880, 0xd3d9400e, 0x1880, 0xd3d9400f,
+   0x1880, 0xd3d94010, 0x1880, 0xd3d94011, 0x1880, 0xd3d94012,
+   0x1880, 0xd3d94013, 0x1880, 0xd3d94014, 0x1880, 0xd3d94015,
+   0x1880, 0xd3d94016, 0x1880, 0xd3d94017, 0x1880, 0xd3d94018,
+   0x1880, 0xd3d94019, 0x1880, 0xd3d9401a, 0x1880, 0xd3d9401b,
+   0x1880, 0xd3d9401c, 0x1880, 0xd3d9401d, 0x1880, 0xd3d9401e,
+   0x1880, 0xd3d9401f, 0x1880, 0xd3d94020, 0x1880, 0xd3d94021,
+   0x1880, 0xd3d94022, 0x1880, 0xd3d94023, 0x1880, 0xd3d94024,
+   0x1880, 0xd3d94025, 0x1880, 0xd3d94026, 0x1880, 0xd3d94027,
+   0x1880, 0xd3d94028, 0x1880, 0xd3d94029, 0x1880, 0xd3d9402a,
+   0x1880, 0xd3d9402b, 0x1880, 0xd3d9402c, 0x1880, 0xd3d9402d,
+   0x1880, 0xd3d9402e, 0x1880, 0xd3d9402f, 0x1880, 0xd3d94030,
+   0x1880, 0xd3d94031, 0x1880, 0xd3d94032, 0x1880, 0xd3d94033,
+   0x1880, 0xd3d94034, 0x1880, 0xd3d94035, 0x1880, 0xd3d94036,
+   0x1880, 0xd3d94037, 0x1880, 0xd3d94038, 0x1880, 0xd3d94039,
+   0x1880, 0xd3d9403a, 0x1880, 0xd3d9403b, 0x1880, 0xd3d9403c,
+   0x1880, 0xd3d9403d, 0x1880, 0xd3d9403e, 0x1880, 0xd3d9403f,
+   0x1880, 0xd3d94040, 0x1880, 0xd3d94041, 0x1880, 0xd3d94042,
+   0x1880, 0xd3d94043, 0x1880, 0xd3d94044, 0x1880, 0xd3d94045,
+   0x1880, 0xd3d94046, 0x1880, 0xd3d94047, 0x1880, 0xd3d94048,
+   0x1880, 0xd3d94049, 0x1880, 0xd3d9404a, 0x1880, 0xd3d9404b,
+   0x1880, 0xd3d9404c, 0x1880, 0xd3d9404d, 0x1880, 0xd3d9404e,
+   0x1880, 0xd3d9404f, 0x1880, 0xd3d94050, 0x1880, 0xd3d94051,
+   0x1880, 0xd3d94052, 0x1880, 0xd3d94053, 0x1880, 0xd3d94054,
+   0x1880, 0xd3d94055, 0x1880, 0xd3d94056, 0x1880, 0xd3d94057,
+   0x1880, 0xd3d94058, 0x1880, 0xd3d94059, 0x1880, 0xd3d9405a,
+   0x1880, 0xd3d9405b, 0x1880, 0xd3d9405c, 0x1880, 0xd3d9405d,
+   0x1880, 0xd3d9405e, 0x1880, 0xd3d9405f, 0x1880, 0xd3d94060,
+   0x1880, 

[PATCH] drm/amdgpu: refine gprs init shaders to check coverage

2021-04-20 Thread Dennis Li
Add codes to check whether all SIMDs are covered, make sure that all
GPRs are initialized.

Signed-off-by: Dennis Li 

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 9889bd495ba5..9e629f239288 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4656,8 +4656,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct 
amdgpu_device *adev)
if (!ring->sched.ready)
return 0;
 
-   if (adev->asic_type == CHIP_ARCTURUS ||
-   adev->asic_type == CHIP_ALDEBARAN) {
+   if (adev->asic_type == CHIP_ARCTURUS) {
vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
vgpr_init_shader_size = 
sizeof(vgpr_init_compute_shader_arcturus);
vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
@@ -4924,7 +4923,11 @@ static int gfx_v9_0_ecc_late_init(void *handle)
}
 
/* requires IBs so do in late init after IB pool is initialized */
-   r = gfx_v9_0_do_edc_gpr_workarounds(adev);
+   if (adev->asic_type == CHIP_ALDEBARAN)
+   r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
+   else
+   r = gfx_v9_0_do_edc_gpr_workarounds(adev);
+
if (r)
return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
index 9ca76a3ac38c..798c0e178201 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
@@ -22,6 +22,7 @@
  */
 #include "amdgpu.h"
 #include "soc15.h"
+#include "soc15d.h"
 
 #include "gc/gc_9_4_2_offset.h"
 #include "gc/gc_9_4_2_sh_mask.h"
@@ -79,6 +80,377 @@ static const struct soc15_reg_golden 
golden_settings_gc_9_4_2_alde[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, regTCI_CNTL_3, 0xff, 0x20),
 };
 
+static const u32 vgpr_init_compute_shader_aldebaran[] = {
+   0xb8840904, 0xb8851a04, 0xb8861344, 0x9207c006, 0x92088405, 0x81070807,
+   0x81070407, 0x8e078207, 0xbe88008f, 0xc0410200, 0x0007, 0xd3d94000,
+   0x1880, 0xd3d94001, 0x1880, 0xd3d94002, 0x1880, 0xd3d94003,
+   0x1880, 0xd3d94004, 0x1880, 0xd3d94005, 0x1880, 0xd3d94006,
+   0x1880, 0xd3d94007, 0x1880, 0xd3d94008, 0x1880, 0xd3d94009,
+   0x1880, 0xd3d9400a, 0x1880, 0xd3d9400b, 0x1880, 0xd3d9400c,
+   0x1880, 0xd3d9400d, 0x1880, 0xd3d9400e, 0x1880, 0xd3d9400f,
+   0x1880, 0xd3d94010, 0x1880, 0xd3d94011, 0x1880, 0xd3d94012,
+   0x1880, 0xd3d94013, 0x1880, 0xd3d94014, 0x1880, 0xd3d94015,
+   0x1880, 0xd3d94016, 0x1880, 0xd3d94017, 0x1880, 0xd3d94018,
+   0x1880, 0xd3d94019, 0x1880, 0xd3d9401a, 0x1880, 0xd3d9401b,
+   0x1880, 0xd3d9401c, 0x1880, 0xd3d9401d, 0x1880, 0xd3d9401e,
+   0x1880, 0xd3d9401f, 0x1880, 0xd3d94020, 0x1880, 0xd3d94021,
+   0x1880, 0xd3d94022, 0x1880, 0xd3d94023, 0x1880, 0xd3d94024,
+   0x1880, 0xd3d94025, 0x1880, 0xd3d94026, 0x1880, 0xd3d94027,
+   0x1880, 0xd3d94028, 0x1880, 0xd3d94029, 0x1880, 0xd3d9402a,
+   0x1880, 0xd3d9402b, 0x1880, 0xd3d9402c, 0x1880, 0xd3d9402d,
+   0x1880, 0xd3d9402e, 0x1880, 0xd3d9402f, 0x1880, 0xd3d94030,
+   0x1880, 0xd3d94031, 0x1880, 0xd3d94032, 0x1880, 0xd3d94033,
+   0x1880, 0xd3d94034, 0x1880, 0xd3d94035, 0x1880, 0xd3d94036,
+   0x1880, 0xd3d94037, 0x1880, 0xd3d94038, 0x1880, 0xd3d94039,
+   0x1880, 0xd3d9403a, 0x1880, 0xd3d9403b, 0x1880, 0xd3d9403c,
+   0x1880, 0xd3d9403d, 0x1880, 0xd3d9403e, 0x1880, 0xd3d9403f,
+   0x1880, 0xd3d94040, 0x1880, 0xd3d94041, 0x1880, 0xd3d94042,
+   0x1880, 0xd3d94043, 0x1880, 0xd3d94044, 0x1880, 0xd3d94045,
+   0x1880, 0xd3d94046, 0x1880, 0xd3d94047, 0x1880, 0xd3d94048,
+   0x1880, 0xd3d94049, 0x1880, 0xd3d9404a, 0x1880, 0xd3d9404b,
+   0x1880, 0xd3d9404c, 0x1880, 0xd3d9404d, 0x1880, 0xd3d9404e,
+   0x1880, 0xd3d9404f, 0x1880, 0xd3d94050, 0x1880, 0xd3d94051,
+   0x1880, 0xd3d94052, 0x1880, 0xd3d94053, 0x1880, 0xd3d94054,
+   0x1880, 0xd3d94055, 0x1880, 0xd3d94056, 0x1880, 0xd3d94057,
+   0x1880, 0xd3d94058, 0x1880, 0xd3d94059, 0x1880, 0xd3d9405a,
+   0x1880, 0xd3d9405b, 0x1880, 0xd3d9405c, 0x1880, 0xd3d9405d,
+   0x1880, 0xd3d9405e, 0x1880, 0xd3d9405f, 0x1880, 0xd3d94060,
+   0x1880, 0xd3d94061, 0x1880, 0xd3d94062, 0x1880, 0xd3d94063,
+   0x1880, 0xd3d94064, 0x1880, 0xd3d94065, 0x1880, 0xd3d94066,
+   0x1880, 0xd3d94067, 0x1880, 0xd3d94068, 0x1880, 0xd3d94069,
+   0x1880, 0xd3d9406a, 0x1880, 0xd3d9406b, 0x1880, 0xd3d9406c,
+   0x1880, 0xd3d9406d, 0x1880, 0xd3d9406e, 

Re: [PATCH 2/2] drm/amdgpu: Add show_fdinfo() interface

2021-04-20 Thread Nieto, David M
[AMD Official Use Only - Internal Distribution Only]

I think we should probably add the pci domain to the bdf to match the format in 
the kernel

+   seq_printf(m, "pdev:\t%02x:%02x.%d\npasid:\t%u\n", bus, dev, fn,
+   fpriv->vm.pasid);

you can get it with

pci_domain_nr(pdev->bus)

David


From: Roy Sun 
Sent: Tuesday, April 20, 2021 8:46 PM
To: amd-gfx@lists.freedesktop.org 
Cc: Sun, Roy ; Nieto, David M 
Subject: [PATCH 2/2] drm/amdgpu: Add show_fdinfo() interface

Tracking devices, process info and fence info using
/proc/pid/fdinfo

Signed-off-by: David M Nieto 
Signed-off-by: Roy Sun 
---
 drivers/gpu/drm/amd/amdgpu/Makefile|  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c| 61 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h|  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c | 92 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h | 43 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  1 +
 8 files changed, 208 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index ee85e8aba636..d216b7ecb5d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -58,6 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
 amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
 amdgpu_fw_attestation.o amdgpu_securedisplay.o

+amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
+
 amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o

 # add asic specific block
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 125b25a5ce5b..3365feae15e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -107,6 +107,7 @@
 #include "amdgpu_gfxhub.h"
 #include "amdgpu_df.h"
 #include "amdgpu_smuio.h"
+#include "amdgpu_fdinfo.h"

 #define MAX_GPU_INSTANCE16

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 0350205c4897..01fe60fedcbe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -651,3 +651,64 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
 idr_destroy(>ctx_handles);
 mutex_destroy(>lock);
 }
+
+void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx, struct amdgpu_ctx_entity 
*centity,
+   ktime_t *total, ktime_t *max)
+{
+   ktime_t now, t1;
+   uint32_t i;
+
+   now = ktime_get();
+   for (i = 0; i < amdgpu_sched_jobs; i++) {
+   struct dma_fence *fence;
+   struct drm_sched_fence *s_fence;
+
+   spin_lock(>ring_lock);
+   fence = dma_fence_get(centity->fences[i]);
+   spin_unlock(>ring_lock);
+   if (!fence)
+   continue;
+   s_fence = to_drm_sched_fence(fence);
+   if (!dma_fence_is_signaled(_fence->scheduled))
+   continue;
+   t1 = s_fence->scheduled.timestamp;
+   if (t1 >= now)
+   continue;
+   if (dma_fence_is_signaled(_fence->finished) &&
+   s_fence->finished.timestamp < now)
+   *total += ktime_sub(s_fence->finished.timestamp, t1);
+   else
+   *total += ktime_sub(now, t1);
+   t1 = ktime_sub(now, t1);
+   dma_fence_put(fence);
+   *max = max(t1, *max);
+   }
+}
+
+ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
+   uint32_t idx, uint64_t *elapsed)
+{
+   struct idr *idp;
+   struct amdgpu_ctx *ctx;
+   uint32_t id;
+   struct amdgpu_ctx_entity *centity;
+   ktime_t total = 0, max = 0;
+
+   if (idx >= AMDGPU_MAX_ENTITY_NUM)
+   return 0;
+   idp = >ctx_handles;
+   mutex_lock(>lock);
+   idr_for_each_entry(idp, ctx, id) {
+   if (!ctx->entities[hwip][idx])
+   continue;
+
+   centity = ctx->entities[hwip][idx];
+   amdgpu_ctx_fence_time(ctx, centity, , );
+   }
+
+   mutex_unlock(>lock);
+   if (elapsed)
+   *elapsed = max;
+
+   return total;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index f54e10314661..10dcf59a5c6b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -87,5 +87,8 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
 void 

[PATCH 2/2] drm/amdgpu: Add show_fdinfo() interface

2021-04-20 Thread Roy Sun
Tracking devices, process info and fence info using
/proc/pid/fdinfo

Signed-off-by: David M Nieto 
Signed-off-by: Roy Sun 
---
 drivers/gpu/drm/amd/amdgpu/Makefile|  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c| 61 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h|  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c | 92 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h | 43 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  1 +
 8 files changed, 208 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index ee85e8aba636..d216b7ecb5d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -58,6 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o
 
+amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
+
 amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o
 
 # add asic specific block
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 125b25a5ce5b..3365feae15e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -107,6 +107,7 @@
 #include "amdgpu_gfxhub.h"
 #include "amdgpu_df.h"
 #include "amdgpu_smuio.h"
+#include "amdgpu_fdinfo.h"
 
 #define MAX_GPU_INSTANCE   16
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 0350205c4897..01fe60fedcbe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -651,3 +651,64 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
idr_destroy(>ctx_handles);
mutex_destroy(>lock);
 }
+
+void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx, struct amdgpu_ctx_entity 
*centity,
+   ktime_t *total, ktime_t *max)
+{
+   ktime_t now, t1;
+   uint32_t i;
+
+   now = ktime_get();
+   for (i = 0; i < amdgpu_sched_jobs; i++) {
+   struct dma_fence *fence;
+   struct drm_sched_fence *s_fence;
+
+   spin_lock(>ring_lock);
+   fence = dma_fence_get(centity->fences[i]);
+   spin_unlock(>ring_lock);
+   if (!fence)
+   continue;
+   s_fence = to_drm_sched_fence(fence);
+   if (!dma_fence_is_signaled(_fence->scheduled))
+   continue;
+   t1 = s_fence->scheduled.timestamp;
+   if (t1 >= now)
+   continue;
+   if (dma_fence_is_signaled(_fence->finished) &&
+   s_fence->finished.timestamp < now)
+   *total += ktime_sub(s_fence->finished.timestamp, t1);
+   else
+   *total += ktime_sub(now, t1);
+   t1 = ktime_sub(now, t1);
+   dma_fence_put(fence);
+   *max = max(t1, *max);
+   }
+}
+
+ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
+   uint32_t idx, uint64_t *elapsed)
+{
+   struct idr *idp;
+   struct amdgpu_ctx *ctx;
+   uint32_t id;
+   struct amdgpu_ctx_entity *centity;
+   ktime_t total = 0, max = 0;
+
+   if (idx >= AMDGPU_MAX_ENTITY_NUM)
+   return 0;
+   idp = >ctx_handles;
+   mutex_lock(>lock);
+   idr_for_each_entry(idp, ctx, id) {
+   if (!ctx->entities[hwip][idx])
+   continue;
+
+   centity = ctx->entities[hwip][idx];
+   amdgpu_ctx_fence_time(ctx, centity, , );
+   }
+
+   mutex_unlock(>lock);
+   if (elapsed)
+   *elapsed = max;
+
+   return total;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index f54e10314661..10dcf59a5c6b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -87,5 +87,8 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
 long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout);
 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
-
+ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
+   uint32_t idx, uint64_t *elapsed);
+void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx, struct amdgpu_ctx_entity 
*centity,
+   ktime_t *total, ktime_t *max);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 0369d3532bf0..01603378dbc9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ 

[PATCH 1/2] drm/scheduler: Change scheduled fence track

2021-04-20 Thread Roy Sun
Update the timestamp of scheduled fence on HW
completion of the previous fences

This allow more accurate tracking of the fence
execution in HW

Signed-off-by: David M Nieto 
Signed-off-by: Roy Sun 
---
 drivers/gpu/drm/scheduler/sched_main.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index 92d8de24d0a1..dc05a20a8ef2 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -515,7 +515,7 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler 
*sched)
 EXPORT_SYMBOL(drm_sched_resubmit_jobs);
 
 /**
- * drm_sched_resubmit_jobs_ext - helper to relunch certain number of jobs from 
mirror ring list
+ * drm_sched_resubmit_jobs_ext - helper to relaunch certain number of jobs 
from pending list
  *
  * @sched: scheduler instance
  * @max: job numbers to relaunch
@@ -671,7 +671,7 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched)
 static struct drm_sched_job *
 drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
 {
-   struct drm_sched_job *job;
+   struct drm_sched_job *job, *next;
 
/*
 * Don't destroy jobs while the timeout worker is running  OR thread
@@ -690,6 +690,13 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
if (job && dma_fence_is_signaled(>s_fence->finished)) {
/* remove job from pending_list */
list_del_init(>list);
+   /* account for the next fence in the queue */
+   next = list_first_entry_or_null(>pending_list,
+   struct drm_sched_job, list);
+   if (next) {
+   next->s_fence->scheduled.timestamp =
+   job->s_fence->finished.timestamp;
+   }
} else {
job = NULL;
/* queue timeout for next job */
-- 
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 5/6] drm/amdkfd: enable subsequent retry fault

2021-04-20 Thread philip yang

  


On 2021-04-20 9:22 p.m., Felix Kuehling
  wrote:


  Am 2021-04-20 um 4:21 p.m. schrieb Philip Yang:

  
After draining the stale retry fault, or failed to validate the range
to recover, have to remove the fault address from fault filter ring, to
be able to handle subsequent retry interrupt on same address. Otherwise
the retry fault will not be processed to recover until timeout passed.

Signed-off-by: Philip Yang 

  
  
Patches 1-3 and patch 5 are

Reviewed-by: Felix Kuehling 

I didn't see a patch 6. Was the email lost or not send intentionally?


6/6 is the patch from Alex to create unregistered range, which is
  under code review. I cherry-pick it on top of my patches to test.

Thanks.
Philip


  

  
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 45dd055118eb..d90e0cb6e573 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -2262,8 +2262,10 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 
 	mutex_lock(>migrate_mutex);
 
-	if (svm_range_skip_recover(prange))
+	if (svm_range_skip_recover(prange)) {
+		amdgpu_gmc_filter_faults_remove(adev, addr, pasid);
 		goto out_unlock_range;
+	}
 
 	timestamp = ktime_to_us(ktime_get()) - prange->validate_timestamp;
 	/* skip duplicate vm fault on different pages of same range */
@@ -2325,6 +2327,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 
 	if (r == -EAGAIN) {
 		pr_debug("recover vm fault later\n");
+		amdgpu_gmc_filter_faults_remove(adev, addr, pasid);
 		r = 0;
 	}
 	return r;

  

  

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdkfd: svm ranges creation for unregistered memory

2021-04-20 Thread Felix Kuehling

Am 2021-04-20 um 8:45 p.m. schrieb Felix Kuehling:
> Am 2021-04-19 um 9:52 p.m. schrieb Alex Sierra:
>> SVM ranges are created for unregistered memory, triggered
>> by page faults. These ranges are migrated/mapped to
>> GPU VRAM memory.
>>
>> Signed-off-by: Alex Sierra 
> This looks generally good to me. One more nit-pick inline in addition to
> Philip's comments. And one question.

I found another potential deadlock. See inline. [+Philip]


>
>
>> ---
>>  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 103 ++-
>>  1 file changed, 101 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> index 45dd055118eb..a8a92c533cf7 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> @@ -2179,6 +2179,84 @@ svm_range_best_restore_location(struct svm_range 
>> *prange,
>>  
>>  return -1;
>>  }
>> +static int
>> +svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
>> +unsigned long *start, unsigned long *last)
>> +{
>> +struct vm_area_struct *vma;
>> +struct interval_tree_node *node;
>> +unsigned long start_limit, end_limit;
>> +
>> +vma = find_vma(p->mm, addr);
>> +if (!vma || addr < vma->vm_start) {
>> +pr_debug("VMA does not exist in address [0x%llx]\n", addr);
>> +return -EFAULT;
>> +}
>> +start_limit = max(vma->vm_start,
>> +(unsigned long)ALIGN_DOWN(addr, 2UL << 20)) >> 
>> PAGE_SHIFT;
>> +end_limit = min(vma->vm_end,
>> +(unsigned long)ALIGN(addr + 1, 2UL << 20)) >> 
>> PAGE_SHIFT;
>> +/* First range that starts after the fault address */
>> +node = interval_tree_iter_first(>svms.objects, (addr >> PAGE_SHIFT) 
>> + 1, ULONG_MAX);
>> +if (node) {
>> +end_limit = min(end_limit, node->start);
>> +/* Last range that ends before the fault address */
>> +node = container_of(rb_prev(>rb), struct 
>> interval_tree_node, rb);
>> +} else {
>> +/* Last range must end before addr because there was no range 
>> after addr */
>> +node = container_of(rb_last(>svms.objects.rb_root),
>> +struct interval_tree_node, rb);
>> +}
>> +if (node)
>> +start_limit = max(start_limit, node->last + 1);
>> +
>> +*start = start_limit;
>> +*last = end_limit - 1;
>> +
>> +pr_debug("vma start: %lx start: %lx vma end: %lx last: %lx\n",
>> +  vma->vm_start >> PAGE_SHIFT, *start,
>> +  vma->vm_end >> PAGE_SHIFT, *last);
>> +
>> +return 0;
>> +
>> +}
>> +static struct
>> +svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
>> +struct kfd_process *p,
>> +struct mm_struct *mm,
>> +int64_t addr)
>> +{
>> +struct svm_range *prange = NULL;
>> +struct svm_range_list *svms;
>> +unsigned long start, last;
>> +uint32_t gpuid, gpuidx;
>> +
>> +if (svm_range_get_range_boundaries(p, addr << PAGE_SHIFT,
>> +   , ))
>> +return NULL;
>> +
>> +svms = >svms;
>> +prange = svm_range_new(>svms, start, last);
>> +if (!prange) {
>> +pr_debug("Failed to create prange in address [0x%llx]\\n", 
>> addr);
>> +goto out;
> You can just return here, since you're not doing any cleanup at the out:
> label.
>
>
>> +}
>> +if (kfd_process_gpuid_from_kgd(p, adev, , )) {
>> +pr_debug("failed to get gpuid from kgd\n");
>> +svm_range_free(prange);
>> +prange = NULL;
>> +goto out;
> Just return.
>
>
>> +}
>> +prange->preferred_loc = gpuid;
>> +prange->actual_loc = 0;
>> +/* Gurantee prange is migrate it */
>> +prange->validate_timestamp -= AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING;
> Is this really specific to svm_range_create_unregistered_range? Or
> should we always do this in svm_range_new to guarantee that new ranges
> can get validated?
>
> Regards,
>   Felix
>
>
>> +svm_range_add_to_svms(prange);
>> +svm_range_add_notifier_locked(mm, prange);
>> +
>> +out:
>> +return prange;
>> +}
>>  
>>  /* svm_range_skip_recover - decide if prange can be recovered
>>   * @prange: svm range structure
>> @@ -2228,6 +2306,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
>> unsigned int pasid,
>>  struct kfd_process *p;
>>  uint64_t timestamp;
>>  int32_t best_loc, gpuidx;
>> +bool write_locked = false;
>>  int r = 0;
>>  
>>  p = kfd_lookup_process_by_pasid(pasid);
>> @@ -2251,14 +2330,34 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
>> unsigned int pasid,
>>  }
>>  
>>  mmap_read_lock(mm);
>> +retry_write_locked:
>>  mutex_lock(>lock);
>>  prange = 

Re: [PATCH 5/6] drm/amdkfd: enable subsequent retry fault

2021-04-20 Thread Felix Kuehling
Am 2021-04-20 um 4:21 p.m. schrieb Philip Yang:
> After draining the stale retry fault, or failed to validate the range
> to recover, have to remove the fault address from fault filter ring, to
> be able to handle subsequent retry interrupt on same address. Otherwise
> the retry fault will not be processed to recover until timeout passed.
>
> Signed-off-by: Philip Yang 

Patches 1-3 and patch 5 are

Reviewed-by: Felix Kuehling 

I didn't see a patch 6. Was the email lost or not send intentionally?


> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index 45dd055118eb..d90e0cb6e573 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -2262,8 +2262,10 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
> unsigned int pasid,
>  
>   mutex_lock(>migrate_mutex);
>  
> - if (svm_range_skip_recover(prange))
> + if (svm_range_skip_recover(prange)) {
> + amdgpu_gmc_filter_faults_remove(adev, addr, pasid);
>   goto out_unlock_range;
> + }
>  
>   timestamp = ktime_to_us(ktime_get()) - prange->validate_timestamp;
>   /* skip duplicate vm fault on different pages of same range */
> @@ -2325,6 +2327,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
> unsigned int pasid,
>  
>   if (r == -EAGAIN) {
>   pr_debug("recover vm fault later\n");
> + amdgpu_gmc_filter_faults_remove(adev, addr, pasid);
>   r = 0;
>   }
>   return r;
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 4/6] drm/amdgpu: address remove from fault filter

2021-04-20 Thread Felix Kuehling
Am 2021-04-20 um 4:21 p.m. schrieb Philip Yang:
> Add interface to remove address from fault filter ring by resetting
> fault ring entry of the fault address timestamp to 0, then future vm
> fault on the address will be processed to recover.
>
> Check fault address from fault ring, add address into fault ring and
> remove address from fault ring are serialized in same interrupt deferred
> work, don't have race condition.
>
> Signed-off-by: Philip Yang 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 24 
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  2 ++
>  2 files changed, 26 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> index c39ed9eb0987..338e45fa66cb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> @@ -387,6 +387,30 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device 
> *adev, uint64_t addr,
>   return false;
>  }
>  
> +/**
> + * amdgpu_gmc_filter_faults_remove - remove address from VM faults filter
> + *
> + * @adev: amdgpu device structure
> + * @addr: address of the VM fault
> + * @pasid: PASID of the process causing the fault
> + *
> + * Remove the address from fault filter, then future vm fault on this address
> + * will pass to retry fault handler to recover.
> + */
> +void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t 
> addr,
> +  uint16_t pasid)
> +{
> + struct amdgpu_gmc *gmc = >gmc;
> +
> + uint64_t key = addr << 4 | pasid;
> + struct amdgpu_gmc_fault *fault;
> + uint32_t hash;
> +
> + hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER);
> + fault = >fault_ring[gmc->fault_hash[hash].idx];

You need to loop over the fault ring to find a fault with the matching
key since there may be hash collisions.

You also need to make sure you don't break the single link list of keys
with the same hash when you remove an entry. I think the easier way to
remove an entry without breaking this ring+closed hashing structure is
to reset the fault->key rather than the fault->timestamp.

Finally, you need to add locking to the fault ring structure. Currently
it's not protected by any locks because only one thread (the interrupt
handler) accesses it. Now you have another thread that can remove
entries, so you need to protect it with a lock. If you are handling
retry faults, you know that the interrupt handler is really a worker
thread, so you can use a mutex or a spin-lock, but it doesn't need to be
interrupt-safe.

Regards,
  Felix


> + fault->timestamp = 0;
> +}
> +
>  int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
>  {
>   int r;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> index 9d11c02a3938..498a7a0d5a9e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> @@ -318,6 +318,8 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev,
>struct amdgpu_gmc *mc);
>  bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
> uint16_t pasid, uint64_t timestamp);
> +void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t 
> addr,
> +  uint16_t pasid);
>  int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev);
>  void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
>  int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev);
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdkfd: svm ranges creation for unregistered memory

2021-04-20 Thread Felix Kuehling
Am 2021-04-19 um 9:52 p.m. schrieb Alex Sierra:
> SVM ranges are created for unregistered memory, triggered
> by page faults. These ranges are migrated/mapped to
> GPU VRAM memory.
>
> Signed-off-by: Alex Sierra 

This looks generally good to me. One more nit-pick inline in addition to
Philip's comments. And one question.


> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 103 ++-
>  1 file changed, 101 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index 45dd055118eb..a8a92c533cf7 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -2179,6 +2179,84 @@ svm_range_best_restore_location(struct svm_range 
> *prange,
>  
>   return -1;
>  }
> +static int
> +svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
> + unsigned long *start, unsigned long *last)
> +{
> + struct vm_area_struct *vma;
> + struct interval_tree_node *node;
> + unsigned long start_limit, end_limit;
> +
> + vma = find_vma(p->mm, addr);
> + if (!vma || addr < vma->vm_start) {
> + pr_debug("VMA does not exist in address [0x%llx]\n", addr);
> + return -EFAULT;
> + }
> + start_limit = max(vma->vm_start,
> + (unsigned long)ALIGN_DOWN(addr, 2UL << 20)) >> 
> PAGE_SHIFT;
> + end_limit = min(vma->vm_end,
> + (unsigned long)ALIGN(addr + 1, 2UL << 20)) >> 
> PAGE_SHIFT;
> + /* First range that starts after the fault address */
> + node = interval_tree_iter_first(>svms.objects, (addr >> PAGE_SHIFT) 
> + 1, ULONG_MAX);
> + if (node) {
> + end_limit = min(end_limit, node->start);
> + /* Last range that ends before the fault address */
> + node = container_of(rb_prev(>rb), struct 
> interval_tree_node, rb);
> + } else {
> + /* Last range must end before addr because there was no range 
> after addr */
> + node = container_of(rb_last(>svms.objects.rb_root),
> + struct interval_tree_node, rb);
> + }
> + if (node)
> + start_limit = max(start_limit, node->last + 1);
> +
> + *start = start_limit;
> + *last = end_limit - 1;
> +
> + pr_debug("vma start: %lx start: %lx vma end: %lx last: %lx\n",
> +   vma->vm_start >> PAGE_SHIFT, *start,
> +   vma->vm_end >> PAGE_SHIFT, *last);
> +
> + return 0;
> +
> +}
> +static struct
> +svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
> + struct kfd_process *p,
> + struct mm_struct *mm,
> + int64_t addr)
> +{
> + struct svm_range *prange = NULL;
> + struct svm_range_list *svms;
> + unsigned long start, last;
> + uint32_t gpuid, gpuidx;
> +
> + if (svm_range_get_range_boundaries(p, addr << PAGE_SHIFT,
> +, ))
> + return NULL;
> +
> + svms = >svms;
> + prange = svm_range_new(>svms, start, last);
> + if (!prange) {
> + pr_debug("Failed to create prange in address [0x%llx]\\n", 
> addr);
> + goto out;

You can just return here, since you're not doing any cleanup at the out:
label.


> + }
> + if (kfd_process_gpuid_from_kgd(p, adev, , )) {
> + pr_debug("failed to get gpuid from kgd\n");
> + svm_range_free(prange);
> + prange = NULL;
> + goto out;

Just return.


> + }
> + prange->preferred_loc = gpuid;
> + prange->actual_loc = 0;
> + /* Gurantee prange is migrate it */
> + prange->validate_timestamp -= AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING;

Is this really specific to svm_range_create_unregistered_range? Or
should we always do this in svm_range_new to guarantee that new ranges
can get validated?

Regards,
  Felix


> + svm_range_add_to_svms(prange);
> + svm_range_add_notifier_locked(mm, prange);
> +
> +out:
> + return prange;
> +}
>  
>  /* svm_range_skip_recover - decide if prange can be recovered
>   * @prange: svm range structure
> @@ -2228,6 +2306,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
> unsigned int pasid,
>   struct kfd_process *p;
>   uint64_t timestamp;
>   int32_t best_loc, gpuidx;
> + bool write_locked = false;
>   int r = 0;
>  
>   p = kfd_lookup_process_by_pasid(pasid);
> @@ -2251,14 +2330,34 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
> unsigned int pasid,
>   }
>  
>   mmap_read_lock(mm);
> +retry_write_locked:
>   mutex_lock(>lock);
>   prange = svm_range_from_addr(svms, addr, NULL);
>   if (!prange) {
>   pr_debug("failed to find prange svms 0x%p address [0x%llx]\n",
>svms, addr);
> - r = 

Re: [PATCH] drm/amdkfd: svm ranges creation for unregistered memory

2021-04-20 Thread philip yang

  


On 2021-04-19 9:52 p.m., Alex Sierra
  wrote:


  SVM ranges are created for unregistered memory, triggered
by page faults. These ranges are migrated/mapped to
GPU VRAM memory.

Signed-off-by: Alex Sierra 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 103 ++-
 1 file changed, 101 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 45dd055118eb..a8a92c533cf7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -2179,6 +2179,84 @@ svm_range_best_restore_location(struct svm_range *prange,
 
 	return -1;
 }
+static int
+svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
+unsigned long *start, unsigned long *last)
+{
+	struct vm_area_struct *vma;
+	struct interval_tree_node *node;
+	unsigned long start_limit, end_limit;
+
+	vma = find_vma(p->mm, addr);
+	if (!vma || addr < vma->vm_start) {
+		pr_debug("VMA does not exist in address [0x%llx]\n", addr);
+		return -EFAULT;
+	}
+	start_limit = max(vma->vm_start,
+			(unsigned long)ALIGN_DOWN(addr, 2UL << 20)) >> PAGE_SHIFT;

line over 80 columns

  
+	end_limit = min(vma->vm_end,
+			(unsigned long)ALIGN(addr + 1, 2UL << 20)) >> PAGE_SHIFT;

line over 80 columns

  
+	/* First range that starts after the fault address */
+	node = interval_tree_iter_first(>svms.objects, (addr >> PAGE_SHIFT) + 1, ULONG_MAX);

line over 80 columns

  
+	if (node) {
+		end_limit = min(end_limit, node->start);
+		/* Last range that ends before the fault address */
+		node = container_of(rb_prev(>rb), struct interval_tree_node, rb);

line over 80 columns

  
+	} else {
+		/* Last range must end before addr because there was no range after addr */

line over 80 columns

  
+		node = container_of(rb_last(>svms.objects.rb_root),
+struct interval_tree_node, rb);
+	}
+	if (node)
+		start_limit = max(start_limit, node->last + 1);
+
+	*start = start_limit;
+	*last = end_limit - 1;
+
+	pr_debug("vma start: %lx start: %lx vma end: %lx last: %lx\n",

use 0x%lx, to show address as hex

  
+		  vma->vm_start >> PAGE_SHIFT, *start,
+		  vma->vm_end >> PAGE_SHIFT, *last);
+
+	return 0;
+
+}
+static struct
+svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
+		struct kfd_process *p,
+		struct mm_struct *mm,
+		int64_t addr)
+{
+	struct svm_range *prange = NULL;
+	struct svm_range_list *svms;
+	unsigned long start, last;
+	uint32_t gpuid, gpuidx;
+
+	if (svm_range_get_range_boundaries(p, addr << PAGE_SHIFT,

We use pfn address inside svm code, and do address PAGE_SHIFT
to/from kernel functions, pass addr here.

  
+	   , )) 



  
+		return NULL;
+
+	svms = >svms;

svms is not used, can be removed.

  
+	prange = svm_range_new(>svms, start, last);
+	if (!prange) {
+		pr_debug("Failed to create prange in address [0x%llx]\\n", addr);
+		goto out;
+	}
+	if (kfd_process_gpuid_from_kgd(p, adev, , )) {
+		pr_debug("failed to get gpuid from kgd\n");
+		svm_range_free(prange);
+		prange = NULL;
+		goto out;
+	}
+	prange->preferred_loc = gpuid;
+	prange->actual_loc = 0;
+	/* Gurantee prange is migrate it */
+	prange->validate_timestamp -= AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING;
+	svm_range_add_to_svms(prange);
+	svm_range_add_notifier_locked(mm, prange);
+
+out:
+	return prange;
+}
 
 /* svm_range_skip_recover - decide if prange can be recovered
  * @prange: svm range structure
@@ -2228,6 +2306,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 	struct kfd_process *p;
 	uint64_t timestamp;
 	int32_t best_loc, gpuidx;
+	bool write_locked = false;
 	int r = 0;
 
 	p = kfd_lookup_process_by_pasid(pasid);
@@ -2251,14 +2330,34 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 	}
 
 	mmap_read_lock(mm);
+retry_write_locked:
 	mutex_lock(>lock);
 	prange = svm_range_from_addr(svms, addr, NULL);
 	if (!prange) {
 		pr_debug("failed to find prange svms 0x%p address [0x%llx]\n",
 			 svms, addr);
-		r = -EFAULT;
-		goto out_unlock_svms;
+		if (!write_locked) {
+			/* Need the write lock to create new range with MMU notifier.
+			 * Also flush pending deferred work to make sure the interval

line over 80 columns

  
+			 * tree is up to date before we add a new range
+			 */
+			mutex_unlock(>lock);
+			mmap_read_unlock(mm);
+			svm_range_list_lock_and_flush_work(svms, mm);
+			write_locked = true;
+			goto retry_write_locked;
+		}
+		prange = svm_range_create_unregistered_range(adev, p, mm, addr);
+		if (!prange) {
+			pr_debug("failed to create unregisterd range svms 0x%p address [0x%llx]\n",

line over 80 columns

  +			svms, addr);

indent
Regards,
Philip


  
+			mmap_write_downgrade(mm);
+			r = -EFAULT;
+			goto 

Re: [PATCH 35/40] drm/amd/amdgpu/amdgpu_cs: Repair some function naming disparity

2021-04-20 Thread Alex Deucher
Applied.  Thanks!

Alex

On Fri, Apr 16, 2021 at 11:54 AM Christian König
 wrote:
>
> Am 16.04.21 um 16:37 schrieb Lee Jones:
> > Fixes the following W=1 kernel build warning(s):
> >
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c:685: warning: expecting prototype 
> > for cs_parser_fini(). Prototype was for amdgpu_cs_parser_fini() instead
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c:1502: warning: expecting prototype 
> > for amdgpu_cs_wait_all_fence(). Prototype was for 
> > amdgpu_cs_wait_all_fences() instead
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c:1656: warning: expecting prototype 
> > for amdgpu_cs_find_bo_va(). Prototype was for amdgpu_cs_find_mapping() 
> > instead
> >
> > Cc: Alex Deucher 
> > Cc: "Christian König" 
> > Cc: David Airlie 
> > Cc: Daniel Vetter 
> > Cc: Sumit Semwal 
> > Cc: Jerome Glisse 
> > Cc: amd-gfx@lists.freedesktop.org
> > Cc: dri-de...@lists.freedesktop.org
> > Cc: linux-me...@vger.kernel.org
> > Cc: linaro-mm-...@lists.linaro.org
> > Signed-off-by: Lee Jones 
>
> Reviewed-by: Christian König 
>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 +++---
> >   1 file changed, 3 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > index b5c7669980458..90136f9dedd65 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > @@ -672,7 +672,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser 
> > *p)
> >   }
> >
> >   /**
> > - * cs_parser_fini() - clean parser states
> > + * amdgpu_cs_parser_fini() - clean parser states
> >* @parser: parser structure holding parsing context.
> >* @error:  error number
> >* @backoff:indicator to backoff the reservation
> > @@ -1488,7 +1488,7 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device 
> > *dev, void *data,
> >   }
> >
> >   /**
> > - * amdgpu_cs_wait_all_fence - wait on all fences to signal
> > + * amdgpu_cs_wait_all_fences - wait on all fences to signal
> >*
> >* @adev: amdgpu device
> >* @filp: file private
> > @@ -1639,7 +1639,7 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device 
> > *dev, void *data,
> >   }
> >
> >   /**
> > - * amdgpu_cs_find_bo_va - find bo_va for VM address
> > + * amdgpu_cs_find_mapping - find bo_va for VM address
> >*
> >* @parser: command submission parser context
> >* @addr: VM address
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 33/40] drm/amd/amdgpu/amdgpu_ring: Provide description for 'sched_score'

2021-04-20 Thread Alex Deucher
Applied.  Thanks!

Alex

On Fri, Apr 16, 2021 at 11:54 AM Christian König
 wrote:
>
> Am 16.04.21 um 16:37 schrieb Lee Jones:
> > Fixes the following W=1 kernel build warning(s):
> >
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c:169: warning: Function parameter 
> > or member 'sched_score' not described in 'amdgpu_ring_init'
> >
> > Cc: Alex Deucher 
> > Cc: "Christian König" 
> > Cc: David Airlie 
> > Cc: Daniel Vetter 
> > Cc: Sumit Semwal 
> > Cc: amd-gfx@lists.freedesktop.org
> > Cc: dri-de...@lists.freedesktop.org
> > Cc: linux-me...@vger.kernel.org
> > Cc: linaro-mm-...@lists.linaro.org
> > Signed-off-by: Lee Jones 
>
> Reviewed-by: Christian König 
>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 1 +
> >   1 file changed, 1 insertion(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> > index 688624ebe4211..7b634a1517f9c 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> > @@ -158,6 +158,7 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
> >* @irq_src: interrupt source to use for this ring
> >* @irq_type: interrupt type to use for this ring
> >* @hw_prio: ring priority (NORMAL/HIGH)
> > + * @sched_score: optional score atomic shared with other schedulers
> >*
> >* Initialize the driver information for the selected ring (all asics).
> >* Returns 0 on success, error on failure.
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 32/40] drm/amd/amdgpu/amdgpu_ttm: Fix incorrectly documented function 'amdgpu_ttm_copy_mem_to_mem()'

2021-04-20 Thread Alex Deucher
Applied.  Thanks!

Alex

On Fri, Apr 16, 2021 at 11:53 AM Christian König
 wrote:
>
> Am 16.04.21 um 16:37 schrieb Lee Jones:
> > Fixes the following W=1 kernel build warning(s):
> >
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c:311: warning: expecting prototype 
> > for amdgpu_copy_ttm_mem_to_mem(). Prototype was for 
> > amdgpu_ttm_copy_mem_to_mem() instead
> >
> > Cc: Alex Deucher 
> > Cc: "Christian König" 
> > Cc: David Airlie 
> > Cc: Daniel Vetter 
> > Cc: Sumit Semwal 
> > Cc: Jerome Glisse 
> > Cc: amd-gfx@lists.freedesktop.org
> > Cc: dri-de...@lists.freedesktop.org
> > Cc: linux-me...@vger.kernel.org
> > Cc: linaro-mm-...@lists.linaro.org
> > Signed-off-by: Lee Jones 
>
> Reviewed-by: Christian König 
>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +-
> >   1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > index 3bef0432cac2f..859314c0d6a39 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > @@ -288,7 +288,7 @@ static int amdgpu_ttm_map_buffer(struct 
> > ttm_buffer_object *bo,
> >   }
> >
> >   /**
> > - * amdgpu_copy_ttm_mem_to_mem - Helper function for copy
> > + * amdgpu_ttm_copy_mem_to_mem - Helper function for copy
> >* @adev: amdgpu device
> >* @src: buffer/address where to read from
> >* @dst: buffer/address where to write to
>
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 31/40] drm/amd/amdgpu/amdgpu_gart: Correct a couple of function names in the docs

2021-04-20 Thread Alex Deucher
Applied.  thanks!

Alex

On Fri, Apr 16, 2021 at 11:53 AM Christian König
 wrote:
>
> Am 16.04.21 um 16:37 schrieb Lee Jones:
> > Fixes the following W=1 kernel build warning(s):
> >
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c:73: warning: expecting prototype 
> > for amdgpu_dummy_page_init(). Prototype was for 
> > amdgpu_gart_dummy_page_init() instead
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c:96: warning: expecting prototype 
> > for amdgpu_dummy_page_fini(). Prototype was for 
> > amdgpu_gart_dummy_page_fini() instead
> >
> > Cc: Alex Deucher 
> > Cc: "Christian König" 
> > Cc: David Airlie 
> > Cc: Daniel Vetter 
> > Cc: Nirmoy Das 
> > Cc: amd-gfx@lists.freedesktop.org
> > Cc: dri-de...@lists.freedesktop.org
> > Signed-off-by: Lee Jones 
>
> Reviewed-by: Christian König 
>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 4 ++--
> >   1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> > index c5a9a4fb10d2b..5562b5c90c032 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> > @@ -60,7 +60,7 @@
> >*/
> >
> >   /**
> > - * amdgpu_dummy_page_init - init dummy page used by the driver
> > + * amdgpu_gart_dummy_page_init - init dummy page used by the driver
> >*
> >* @adev: amdgpu_device pointer
> >*
> > @@ -86,7 +86,7 @@ static int amdgpu_gart_dummy_page_init(struct 
> > amdgpu_device *adev)
> >   }
> >
> >   /**
> > - * amdgpu_dummy_page_fini - free dummy page used by the driver
> > + * amdgpu_gart_dummy_page_fini - free dummy page used by the driver
> >*
> >* @adev: amdgpu_device pointer
> >*
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 29/40] drm/amd/amdgpu/amdgpu_fence: Provide description for 'sched_score'

2021-04-20 Thread Alex Deucher
Applied.  Thanks!

Alex

On Fri, Apr 16, 2021 at 11:52 AM Christian König
 wrote:
>
> Am 16.04.21 um 16:37 schrieb Lee Jones:
> > Fixes the following W=1 kernel build warning(s):
> >
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c:444: warning: Function 
> > parameter or member 'sched_score' not described in 
> > 'amdgpu_fence_driver_init_ring'
> >
> > Cc: Alex Deucher 
> > Cc: "Christian König" 
> > Cc: David Airlie 
> > Cc: Daniel Vetter 
> > Cc: Sumit Semwal 
> > Cc: Jerome Glisse 
> > Cc: amd-gfx@lists.freedesktop.org
> > Cc: dri-de...@lists.freedesktop.org
> > Cc: linux-me...@vger.kernel.org
> > Cc: linaro-mm-...@lists.linaro.org
> > Signed-off-by: Lee Jones 
>
> Reviewed-by: Christian König 
>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 1 +
> >   1 file changed, 1 insertion(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> > index 47ea468596184..30772608eac6c 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> > @@ -434,6 +434,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring 
> > *ring,
> >*
> >* @ring: ring to init the fence driver on
> >* @num_hw_submission: number of entries on the hardware queue
> > + * @sched_score: optional score atomic shared with other schedulers
> >*
> >* Init the fence driver for the requested ring (all asics).
> >* Helper function for amdgpu_fence_driver_init().
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 25/40] drm/radeon/radeon_device: Provide function name in kernel-doc header

2021-04-20 Thread Alex Deucher
Applied.  Thanks!

Alex

On Fri, Apr 16, 2021 at 11:51 AM Christian König
 wrote:
>
> Am 16.04.21 um 16:37 schrieb Lee Jones:
> > Fixes the following W=1 kernel build warning(s):
> >
> >   drivers/gpu/drm/radeon/radeon_device.c:1101: warning: This comment starts 
> > with '/**', but isn't a kernel-doc comment. Refer 
> > Documentation/doc-guide/kernel-doc.rst
> >
> > Cc: Alex Deucher 
> > Cc: "Christian König" 
> > Cc: David Airlie 
> > Cc: Daniel Vetter 
> > Cc: amd-gfx@lists.freedesktop.org
> > Cc: dri-de...@lists.freedesktop.org
> > Signed-off-by: Lee Jones 
>
> Reviewed-by: Christian König 
>
> > ---
> >   drivers/gpu/drm/radeon/radeon_device.c | 3 ++-
> >   1 file changed, 2 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/radeon/radeon_device.c 
> > b/drivers/gpu/drm/radeon/radeon_device.c
> > index cc445c4cba2e3..46eea01950cb1 100644
> > --- a/drivers/gpu/drm/radeon/radeon_device.c
> > +++ b/drivers/gpu/drm/radeon/radeon_device.c
> > @@ -1098,7 +1098,8 @@ static bool radeon_check_pot_argument(int arg)
> >   }
> >
> >   /**
> > - * Determine a sensible default GART size according to ASIC family.
> > + * radeon_gart_size_auto - Determine a sensible default GART size
> > + * according to ASIC family.
> >*
> >* @family: ASIC family name
> >*/
>
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 26/40] drm/amd/amdgpu/amdgpu_device: Remove unused variable 'r'

2021-04-20 Thread Alex Deucher
Applied.  Thanks!

Alex

On Fri, Apr 16, 2021 at 10:38 AM Lee Jones  wrote:
>
> Fixes the following W=1 kernel build warning(s):
>
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c: In function 
> ‘amdgpu_device_suspend’:
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c:3733:6: warning: variable ‘r’ set 
> but not used [-Wunused-but-set-variable]
>
> Cc: Alex Deucher 
> Cc: "Christian König" 
> Cc: David Airlie 
> Cc: Daniel Vetter 
> Cc: Sumit Semwal 
> Cc: amd-gfx@lists.freedesktop.org
> Cc: dri-de...@lists.freedesktop.org
> Cc: linux-me...@vger.kernel.org
> Cc: linaro-mm-...@lists.linaro.org
> Signed-off-by: Lee Jones 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index b4ad1c055c702..eef54b265ffdd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -3730,7 +3730,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
>  int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
>  {
> struct amdgpu_device *adev = drm_to_adev(dev);
> -   int r;
>
> if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
> return 0;
> @@ -3745,7 +3744,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool 
> fbcon)
>
> amdgpu_ras_suspend(adev);
>
> -   r = amdgpu_device_ip_suspend_phase1(adev);
> +   amdgpu_device_ip_suspend_phase1(adev);
>
> if (!adev->in_s0ix)
> amdgpu_amdkfd_suspend(adev, adev->in_runpm);
> @@ -3755,7 +3754,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool 
> fbcon)
>
> amdgpu_fence_driver_suspend(adev);
>
> -   r = amdgpu_device_ip_suspend_phase2(adev);
> +   amdgpu_device_ip_suspend_phase2(adev);
> /* evict remaining vram memory
>  * This second call to evict vram is to evict the gart page table
>  * using the CPU.
> --
> 2.27.0
>
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: 16 bpc fixed point (RGBA16) framebuffer support for core and AMD.

2021-04-20 Thread Alex Deucher
On Fri, Apr 16, 2021 at 12:29 PM Mario Kleiner
 wrote:
>
> Friendly ping to the AMD people. Nicholas, Harry, Alex, any feedback?
> Would be great to get this in sooner than later.
>

No objections from me.

Alex


> Thanks and have a nice weekend,
> -mario
>
> On Fri, Mar 19, 2021 at 10:03 PM Mario Kleiner
>  wrote:
> >
> > Hi,
> >
> > this patch series adds the fourcc's for 16 bit fixed point unorm
> > framebuffers to the core, and then an implementation for AMD gpu's
> > with DisplayCore.
> >
> > This is intended to allow for pageflipping to, and direct scanout of,
> > Vulkan swapchain images in the format VK_FORMAT_R16G16B16A16_UNORM.
> > I have patched AMD's GPUOpen amdvlk OSS driver to enable this format
> > for swapchains, mapping to DRM_FORMAT_XBGR16161616:
> > Link: 
> > https://github.com/kleinerm/pal/commit/a25d4802074b13a8d5f7edc96ae45469ecbac3c4
> >
> > My main motivation for this is squeezing every bit of precision
> > out of the hardware for scientific and medical research applications,
> > where fp16 in the unorm range is limited to ~11 bpc effective linear
> > precision in the upper half [0.5;1.0] of the unorm range, although
> > the hardware could do at least 12 bpc.
> >
> > It has been successfully tested on AMD RavenRidge (DCN-1), and with
> > Polaris11 (DCE-11.2). Up to two displays were active on RavenRidge
> > (DP 2560x1440@144Hz + HDMI 2560x1440@120Hz), the maximum supported
> > on my hw, both running at 10 bpc DP output depth.
> >
> > Up to three displays were active on the Polaris (DP 2560x1440@144Hz +
> > 2560x1440@100Hz USB-C DP-altMode-to-HDMI converter + eDP 2880x1800@60Hz
> > Apple Retina panel), all running at 10 bpc output depth.
> >
> > No malfunctions, visual artifacts or other oddities were observed
> > (apart from an adventureous mess of cables and adapters on my desk),
> > suggesting it works.
> >
> > I used my automatic photometer measurement procedure to verify the
> > effective output precision of 10 bpc DP native signal + spatial
> > dithering in the gpu as enabled by the amdgpu driver. Results show
> > the expected 12 bpc precision i hoped for -- the current upper limit
> > for AMD display hw afaik.
> >
> > So it seems to work in the way i hoped :).
> >
> > Some open questions wrt. AMD DC, to be addressed in this patch series, or 
> > follow up
> > patches if neccessary:
> >
> > - For the atomic check for plane scaling, the current patch will
> > apply the same hw limits as for other rgb fixed point fb's, e.g.,
> > for 8 bpc rgb8. Is this correct? Or would we need to use the fp16
> > limits, because this is also a 64 bpp format? Or something new
> > entirely?
> >
> > - I haven't added the new fourcc to the DCC tables yet. Should i?
> >
> > - I had to change an assert for DCE to allow 36bpp linebuffers (patch 4/5).
> > It looks to me as if that assert was inconsistent with other places
> > in the driver where COLOR_DEPTH121212 is supported, and looking at
> > the code, the change seems harmless. At least on DCE-11.2 the change
> > didn't cause any noticeable (by myself) or measurable (by my equipment)
> > problems on any of the 3 connected displays.
> >
> > - Related to that change, while i needed to increase lb pixelsize to 36bpp
> > to get > 10 bpc effective precision on DCN, i didn't need to do that
> > on DCE. Also no change of lb pixelsize was needed on either DCN or DCe
> > to get > 10 bpc precision for fp16 framebuffers, so something seems to
> > behave differently for floating point 16 vs. fixed point 16. This all
> > seems to suggest one could leave lb pixelsize at the old 30 bpp value
> > on at least DCE-11.2 and still get the > 10 bpc precision if one wanted
> > to avoid the changes of patch 4/5.
> >
> > Thanks,
> > -mario
> >
> >
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v3 5/7] drm/vmwgfx: Inline ttm_bo_mmap() into vmwgfx driver

2021-04-20 Thread Daniel Vetter
On Tue, Apr 20, 2021 at 10:23 PM Felix Kuehling  wrote:
>
>
> Am 2021-04-20 um 4:51 a.m. schrieb Daniel Vetter:
> >>> Whole series is Reviewed-by: Christian König 
> >> Thanks a lot. If I'm not mistaken, the patches at [1] need to go in first.
> >> So it could take a a bit until this lands.
> >>
> >> Otherwise, this series could go through the same tree as [1] if nouveau and
> >> vmwgfx devs don't mind.
> > I would land it all through drm-misc-next. Maybe check with Alex on irc
> > for an ack for merging that way, but I don't think this will cause issues
> > against the amdgpu tree. Lots of ttm cleanup has landed this way already
> > past few months. Otherwise you could create a small topic branch with
> > these patches here and send that to Alex, and he can sort out the
> > interaction with Felix' series.
> > -Daniel
>
> My patch series involved some pretty far-reaching changes in KFD
> (renaming some variables in KFD and amdgpu, changing the KFD->amdgpu
> interface). We already submitted other patches on top of it that have
> dependencies on it. If we decide to deliver this through a different
> tree and remove it from amd-staging-drm-next, there will be conflicts to
> resolve when removing it from amd-staging-drm-next, and again the next
> time you merge with amd-staging-drm-next.

Ah then the usual way is for Alex to assemble a topic pull request
(stable, non-rebasing) with those select patches, which then gets
merged into drm-misc-next. Or we smash it all into amdgpu-next. Or we
just wait until -rc2 when drm-next is back open for business.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v3 5/7] drm/vmwgfx: Inline ttm_bo_mmap() into vmwgfx driver

2021-04-20 Thread Felix Kuehling

Am 2021-04-20 um 4:51 a.m. schrieb Daniel Vetter:
>>> Whole series is Reviewed-by: Christian König 
>> Thanks a lot. If I'm not mistaken, the patches at [1] need to go in first.
>> So it could take a a bit until this lands.
>>
>> Otherwise, this series could go through the same tree as [1] if nouveau and
>> vmwgfx devs don't mind.
> I would land it all through drm-misc-next. Maybe check with Alex on irc
> for an ack for merging that way, but I don't think this will cause issues
> against the amdgpu tree. Lots of ttm cleanup has landed this way already
> past few months. Otherwise you could create a small topic branch with
> these patches here and send that to Alex, and he can sort out the
> interaction with Felix' series.
> -Daniel

My patch series involved some pretty far-reaching changes in KFD
(renaming some variables in KFD and amdgpu, changing the KFD->amdgpu
interface). We already submitted other patches on top of it that have
dependencies on it. If we decide to deliver this through a different
tree and remove it from amd-staging-drm-next, there will be conflicts to
resolve when removing it from amd-staging-drm-next, and again the next
time you merge with amd-staging-drm-next.

Regards,
  Felix


>
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/6] drm/amdkfd: retry validation to recover range

2021-04-20 Thread Philip Yang
GPU vm retry fault recover range need retry validation if

1. range is split in parallel by unmap while recover
2. range migrate to system memory and range is updated in system
memory while recover

Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 0e0b4ffd20ab..40ef5709d0a7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1402,11 +1402,13 @@ static int svm_range_validate_and_map(struct mm_struct 
*mm,
svm_range_lock(prange);
if (!prange->actual_loc) {
if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
+   pr_debug("hmm update the range, need validate again\n");
r = -EAGAIN;
goto unlock_out;
}
}
if (!list_empty(>child_list)) {
+   pr_debug("range split by unmap in parallel, validate again\n");
r = -EAGAIN;
goto unlock_out;
}
@@ -2254,6 +2256,10 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
 out:
kfd_unref_process(p);
 
+   if (r == -EAGAIN) {
+   pr_debug("recover vm fault later\n");
+   r = 0;
+   }
return r;
 }
 
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 5/6] drm/amdkfd: enable subsequent retry fault

2021-04-20 Thread Philip Yang
After draining the stale retry fault, or failed to validate the range
to recover, have to remove the fault address from fault filter ring, to
be able to handle subsequent retry interrupt on same address. Otherwise
the retry fault will not be processed to recover until timeout passed.

Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 45dd055118eb..d90e0cb6e573 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -2262,8 +2262,10 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
 
mutex_lock(>migrate_mutex);
 
-   if (svm_range_skip_recover(prange))
+   if (svm_range_skip_recover(prange)) {
+   amdgpu_gmc_filter_faults_remove(adev, addr, pasid);
goto out_unlock_range;
+   }
 
timestamp = ktime_to_us(ktime_get()) - prange->validate_timestamp;
/* skip duplicate vm fault on different pages of same range */
@@ -2325,6 +2327,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
 
if (r == -EAGAIN) {
pr_debug("recover vm fault later\n");
+   amdgpu_gmc_filter_faults_remove(adev, addr, pasid);
r = 0;
}
return r;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/6] drm/amdgpu: return IH ring drain finished if ring is empty

2021-04-20 Thread Philip Yang
Sometimes IH do not setup ring wptr overflow flag after wptr exceed
rptr. As a workaround, if IH rptr equals to wptr, ring is empty,
return true to indicate IH ring checkpoint is processed, IH ring drain
is finished.

Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index faaa6aa2faaf..a36e191cf086 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -175,7 +175,9 @@ static bool amdgpu_ih_has_checkpoint_processed(struct 
amdgpu_device *adev,
cur_rptr += ih->ptr_mask + 1;
*prev_rptr = cur_rptr;
 
-   return cur_rptr >= checkpoint_wptr;
+   /* check ring is empty to workaround missing wptr overflow flag */
+   return cur_rptr >= checkpoint_wptr ||
+  (cur_rptr & ih->ptr_mask) == amdgpu_ih_get_wptr(adev, ih);
 }
 
 /**
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 4/6] drm/amdgpu: address remove from fault filter

2021-04-20 Thread Philip Yang
Add interface to remove address from fault filter ring by resetting
fault ring entry of the fault address timestamp to 0, then future vm
fault on the address will be processed to recover.

Check fault address from fault ring, add address into fault ring and
remove address from fault ring are serialized in same interrupt deferred
work, don't have race condition.

Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 24 
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  2 ++
 2 files changed, 26 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index c39ed9eb0987..338e45fa66cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -387,6 +387,30 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, 
uint64_t addr,
return false;
 }
 
+/**
+ * amdgpu_gmc_filter_faults_remove - remove address from VM faults filter
+ *
+ * @adev: amdgpu device structure
+ * @addr: address of the VM fault
+ * @pasid: PASID of the process causing the fault
+ *
+ * Remove the address from fault filter, then future vm fault on this address
+ * will pass to retry fault handler to recover.
+ */
+void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
+uint16_t pasid)
+{
+   struct amdgpu_gmc *gmc = >gmc;
+
+   uint64_t key = addr << 4 | pasid;
+   struct amdgpu_gmc_fault *fault;
+   uint32_t hash;
+
+   hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER);
+   fault = >fault_ring[gmc->fault_hash[hash].idx];
+   fault->timestamp = 0;
+}
+
 int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
 {
int r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 9d11c02a3938..498a7a0d5a9e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -318,6 +318,8 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev,
 struct amdgpu_gmc *mc);
 bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
  uint16_t pasid, uint64_t timestamp);
+void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
+uint16_t pasid);
 int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev);
 void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
 int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev);
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 3/6] drm/amdkfd: handle stale retry fault

2021-04-20 Thread Philip Yang
Retry fault interrupt maybe pending in IH ring after GPU page table
is updated to recover the vm fault, because each page of the range
generate retry fault interrupt. There is race if application unmap
range to remove and free the range first and then retry fault work
restore_pages handle the retry fault interrupt, because range can not be
found, this vm fault can not be recovered and report incorrect GPU vm
fault to application.

Before unmap to remove and free range, drain retry fault interrupt
from IH ring1 to ensure no retry fault comes after the range is removed.

Drain retry fault interrupt skip the range which is on deferred list
to remove, or the range is child range, which is split by unmap, does
not add to svms and have interval notifier.

Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 69 +++-
 1 file changed, 68 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 40ef5709d0a7..45dd055118eb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1830,6 +1830,28 @@ svm_range_handle_list_op(struct svm_range_list *svms, 
struct svm_range *prange)
}
 }
 
+static void svm_range_drain_retry_fault(struct svm_range_list *svms)
+{
+   struct kfd_process_device *pdd;
+   struct amdgpu_device *adev;
+   struct kfd_process *p;
+   uint32_t i;
+
+   p = container_of(svms, struct kfd_process, svms);
+
+   for (i = 0; i < p->n_pdds; i++) {
+   pdd = p->pdds[i];
+   if (!pdd)
+   continue;
+
+   pr_debug("drain retry fault gpu %d svms %p\n", i, svms);
+   adev = (struct amdgpu_device *)pdd->dev->kgd;
+
+   amdgpu_ih_wait_on_checkpoint_process(adev, >irq.ih1);
+   pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms);
+   }
+}
+
 static void svm_range_deferred_list_work(struct work_struct *work)
 {
struct svm_range_list *svms;
@@ -1847,6 +1869,10 @@ static void svm_range_deferred_list_work(struct 
work_struct *work)
pr_debug("prange 0x%p [0x%lx 0x%lx] op %d\n", prange,
 prange->start, prange->last, prange->work_item.op);
 
+   /* Make sure no stale retry fault coming after range is freed */
+   if (prange->work_item.op == SVM_OP_UNMAP_RANGE)
+   svm_range_drain_retry_fault(prange->svms);
+
mm = prange->work_item.mm;
mmap_write_lock(mm);
mutex_lock(>lock);
@@ -2154,6 +2180,44 @@ svm_range_best_restore_location(struct svm_range *prange,
return -1;
 }
 
+/* svm_range_skip_recover - decide if prange can be recovered
+ * @prange: svm range structure
+ *
+ * GPU vm retry fault handle skip recover the range for cases:
+ * 1. prange is on deferred list to be removed after unmap, it is stale fault,
+ *deferred list work will drain the stale fault before free the prange.
+ * 2. prange is on deferred list to add interval notifier after split, or
+ * 3. prange is child range, it is split from parent prange, recover later
+ *after interval notifier is added.
+ *
+ * Return: true to skip recover, false to recover
+ */
+static bool svm_range_skip_recover(struct svm_range *prange)
+{
+   struct svm_range_list *svms = prange->svms;
+
+   spin_lock(>deferred_list_lock);
+   if (list_empty(>deferred_list) &&
+   list_empty(>child_list)) {
+   spin_unlock(>deferred_list_lock);
+   return false;
+   }
+   spin_unlock(>deferred_list_lock);
+
+   if (prange->work_item.op == SVM_OP_UNMAP_RANGE) {
+   pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] unmapped\n",
+svms, prange, prange->start, prange->last);
+   return true;
+   }
+   if (prange->work_item.op == SVM_OP_ADD_RANGE_AND_MAP ||
+   prange->work_item.op == SVM_OP_ADD_RANGE) {
+   pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] not added yet\n",
+svms, prange, prange->start, prange->last);
+   return true;
+   }
+   return false;
+}
+
 int
 svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
uint64_t addr)
@@ -2189,7 +2253,6 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
mmap_read_lock(mm);
mutex_lock(>lock);
prange = svm_range_from_addr(svms, addr, NULL);
-
if (!prange) {
pr_debug("failed to find prange svms 0x%p address [0x%llx]\n",
 svms, addr);
@@ -2198,6 +2261,10 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
}
 
mutex_lock(>migrate_mutex);
+
+   if (svm_range_skip_recover(prange))
+   goto out_unlock_range;
+
timestamp = ktime_to_us(ktime_get()) - 

[PATCH] drm/amdgpu: Add graphics cache rinse packet for sdma 5.0

2021-04-20 Thread Alex Deucher
Add emit mem sync callback for sdma_v5_0

In amdgpu sync object test, three threads created jobs
to send GFX IB and SDMA IB in sequence. After the first
GFX thread joined, sometimes the third thread will reuse
the same physical page to store the SDMA IB. There will
be a risk that SDMA will read GFX IB in the previous physical
page. So it's better to flush the cache before commit sdma IB.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 28 ++
 1 file changed, 28 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 920fc6d4a127..d294ef6a625a 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -437,6 +437,33 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring 
*ring,
amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
 }
 
+/**
+ * sdma_v5_0_ring_emit_mem_sync - flush the IB by graphics cache rinse
+ *
+ * @ring: amdgpu ring pointer
+ * @job: job to retrieve vmid from
+ * @ib: IB object to schedule
+ *
+ * flush the IB by graphics cache rinse.
+ */
+static void sdma_v5_0_ring_emit_mem_sync(struct amdgpu_ring *ring)
+{
+uint32_t gcr_cntl =
+   SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB | SDMA_GCR_GLM_INV |
+   SDMA_GCR_GL1_INV | SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV |
+   SDMA_GCR_GLI_INV(1);
+
+   /* flush entire cache L0/L1/L2, this can be optimized by performance 
requirement */
+   amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ));
+   amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0));
+   amdgpu_ring_write(ring, 
SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(gcr_cntl) |
+   SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(0));
+   amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(0) |
+   SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(gcr_cntl >> 
16));
+   amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(0) |
+   SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(0));
+}
+
 /**
  * sdma_v5_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
  *
@@ -1643,6 +1670,7 @@ static const struct amdgpu_ring_funcs 
sdma_v5_0_ring_funcs = {
10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, 
vm fence */
.emit_ib_size = 5 + 7 + 6, /* sdma_v5_0_ring_emit_ib */
.emit_ib = sdma_v5_0_ring_emit_ib,
+   .emit_mem_sync = sdma_v5_0_ring_emit_mem_sync,
.emit_fence = sdma_v5_0_ring_emit_fence,
.emit_pipeline_sync = sdma_v5_0_ring_emit_pipeline_sync,
.emit_vm_flush = sdma_v5_0_ring_emit_vm_flush,
-- 
2.30.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [BUG] VAAPI encoder cause kernel panic if encoded video in 4K

2021-04-20 Thread Mikhail Gavrilov
On Wed, 14 Apr 2021 at 11:48, Christian König
 wrote:
>
> >> commit f63da9ae7584280582cbc834b20cc18bfb203b14
> >> Author: Philip Yang 
> >> Date:   Thu Apr 1 00:22:23 2021 -0400
> >>
> >>   drm/amdgpu: reserve fence slot to update page table
> >>
>
> That is expected behavior, the application is just buggy and causing a
> page fault on the GPU.
>
> The kernel should just not crash with a backtrace.
>

Any chance to see this commit to be backported to 5.12?
I plan to submit a bug report to OBS devs and don't want my system to
hang again and again when I would test their patches.

-- 
Best Regards,
Mike Gavrilov.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdgpu: Add show_fdinfo() interface

2021-04-20 Thread Christian König




Am 20.04.21 um 13:51 schrieb Roy Sun:

[SNIP]
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 848e175e99ff..72727117c479 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -150,6 +150,7 @@ struct amdgpu_vm_bo_base {
/* protected by spinlock */
struct list_headvm_status;
  
+	struct list_head		bo_head;


Well I'm repeating myself. This is a no-go.

You already have the same information in the VM state machine, see 
function amdgpu_debugfs_vm_bo_info().


Christian.


/* protected by the BO being reserved */
boolmoved;
  };
@@ -274,6 +275,7 @@ struct amdgpu_vm {
struct list_headinvalidated;
spinlock_t  invalidated_lock;
  
+	struct list_head	bo_list;

/* BO mappings freed, but not yet updated in the PT */
struct list_headfreed;
  
@@ -458,6 +460,8 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,

struct amdgpu_vm *vm);
  void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo);
  
+void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,

+   uint64_t *gtt_mem);
  #if defined(CONFIG_DEBUG_FS)
  void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m);
  #endif


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/2] drm/amdgpu: Add show_fdinfo() interface

2021-04-20 Thread Roy Sun
Tracking devices, process info and fence info using
/proc/pid/fdinfo

Signed-off-by: David M Nieto 
Signed-off-by: Roy Sun 
---
 drivers/gpu/drm/amd/amdgpu/Makefile|  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c| 61 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h|  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c | 92 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h | 43 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c|  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 21 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  4 +
 10 files changed, 233 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index ee85e8aba636..d216b7ecb5d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -58,6 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o
 
+amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
+
 amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o
 
 # add asic specific block
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 125b25a5ce5b..3365feae15e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -107,6 +107,7 @@
 #include "amdgpu_gfxhub.h"
 #include "amdgpu_df.h"
 #include "amdgpu_smuio.h"
+#include "amdgpu_fdinfo.h"
 
 #define MAX_GPU_INSTANCE   16
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 0350205c4897..01fe60fedcbe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -651,3 +651,64 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
idr_destroy(>ctx_handles);
mutex_destroy(>lock);
 }
+
+void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx, struct amdgpu_ctx_entity 
*centity,
+   ktime_t *total, ktime_t *max)
+{
+   ktime_t now, t1;
+   uint32_t i;
+
+   now = ktime_get();
+   for (i = 0; i < amdgpu_sched_jobs; i++) {
+   struct dma_fence *fence;
+   struct drm_sched_fence *s_fence;
+
+   spin_lock(>ring_lock);
+   fence = dma_fence_get(centity->fences[i]);
+   spin_unlock(>ring_lock);
+   if (!fence)
+   continue;
+   s_fence = to_drm_sched_fence(fence);
+   if (!dma_fence_is_signaled(_fence->scheduled))
+   continue;
+   t1 = s_fence->scheduled.timestamp;
+   if (t1 >= now)
+   continue;
+   if (dma_fence_is_signaled(_fence->finished) &&
+   s_fence->finished.timestamp < now)
+   *total += ktime_sub(s_fence->finished.timestamp, t1);
+   else
+   *total += ktime_sub(now, t1);
+   t1 = ktime_sub(now, t1);
+   dma_fence_put(fence);
+   *max = max(t1, *max);
+   }
+}
+
+ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
+   uint32_t idx, uint64_t *elapsed)
+{
+   struct idr *idp;
+   struct amdgpu_ctx *ctx;
+   uint32_t id;
+   struct amdgpu_ctx_entity *centity;
+   ktime_t total = 0, max = 0;
+
+   if (idx >= AMDGPU_MAX_ENTITY_NUM)
+   return 0;
+   idp = >ctx_handles;
+   mutex_lock(>lock);
+   idr_for_each_entry(idp, ctx, id) {
+   if (!ctx->entities[hwip][idx])
+   continue;
+
+   centity = ctx->entities[hwip][idx];
+   amdgpu_ctx_fence_time(ctx, centity, , );
+   }
+
+   mutex_unlock(>lock);
+   if (elapsed)
+   *elapsed = max;
+
+   return total;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index f54e10314661..10dcf59a5c6b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -87,5 +87,8 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
 long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout);
 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
-
+ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
+   uint32_t idx, uint64_t *elapsed);
+void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx, struct amdgpu_ctx_entity 
*centity,
+   ktime_t *total, ktime_t *max);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 

[PATCH 1/2] drm/scheduler: Change scheduled fence track

2021-04-20 Thread Roy Sun
Update the timestamp of scheduled fence on HW
completion of the previous fences

This allow more accurate tracking of the fence
execution in HW

Signed-off-by: David M Nieto 
Signed-off-by: Roy Sun 
---
 drivers/gpu/drm/scheduler/sched_main.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index 92d8de24d0a1..dc05a20a8ef2 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -515,7 +515,7 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler 
*sched)
 EXPORT_SYMBOL(drm_sched_resubmit_jobs);
 
 /**
- * drm_sched_resubmit_jobs_ext - helper to relunch certain number of jobs from 
mirror ring list
+ * drm_sched_resubmit_jobs_ext - helper to relaunch certain number of jobs 
from pending list
  *
  * @sched: scheduler instance
  * @max: job numbers to relaunch
@@ -671,7 +671,7 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched)
 static struct drm_sched_job *
 drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
 {
-   struct drm_sched_job *job;
+   struct drm_sched_job *job, *next;
 
/*
 * Don't destroy jobs while the timeout worker is running  OR thread
@@ -690,6 +690,13 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
if (job && dma_fence_is_signaled(>s_fence->finished)) {
/* remove job from pending_list */
list_del_init(>list);
+   /* account for the next fence in the queue */
+   next = list_first_entry_or_null(>pending_list,
+   struct drm_sched_job, list);
+   if (next) {
+   next->s_fence->scheduled.timestamp =
+   job->s_fence->finished.timestamp;
+   }
} else {
job = NULL;
/* queue timeout for next job */
-- 
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdgpu: Add show_fdinfo() interface

2021-04-20 Thread Christian König

Am 19.04.21 um 08:26 schrieb Roy Sun:

Tracking devices, process info and fence info using
/proc/pid/fdinfo

Signed-off-by: David M Nieto 
Signed-off-by: Roy Sun 
---
  drivers/gpu/drm/amd/amdgpu/Makefile|  2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c| 61 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h|  5 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  5 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c | 95 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h | 43 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c|  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 24 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  4 +
  10 files changed, 239 insertions(+), 2 deletions(-)
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index ee85e8aba636..d216b7ecb5d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -58,6 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o
  
+amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o

+
  amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o
  
  # add asic specific block

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 125b25a5ce5b..3365feae15e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -107,6 +107,7 @@
  #include "amdgpu_gfxhub.h"
  #include "amdgpu_df.h"
  #include "amdgpu_smuio.h"
+#include "amdgpu_fdinfo.h"
  
  #define MAX_GPU_INSTANCE		16
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c

index 0350205c4897..01fe60fedcbe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -651,3 +651,64 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
idr_destroy(>ctx_handles);
mutex_destroy(>lock);
  }
+
+void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx, struct amdgpu_ctx_entity 
*centity,
+   ktime_t *total, ktime_t *max)
+{
+   ktime_t now, t1;
+   uint32_t i;
+
+   now = ktime_get();
+   for (i = 0; i < amdgpu_sched_jobs; i++) {
+   struct dma_fence *fence;
+   struct drm_sched_fence *s_fence;
+
+   spin_lock(>ring_lock);
+   fence = dma_fence_get(centity->fences[i]);
+   spin_unlock(>ring_lock);
+   if (!fence)
+   continue;
+   s_fence = to_drm_sched_fence(fence);
+   if (!dma_fence_is_signaled(_fence->scheduled))
+   continue;
+   t1 = s_fence->scheduled.timestamp;
+   if (t1 >= now)
+   continue;
+   if (dma_fence_is_signaled(_fence->finished) &&
+   s_fence->finished.timestamp < now)
+   *total += ktime_sub(s_fence->finished.timestamp, t1);
+   else
+   *total += ktime_sub(now, t1);
+   t1 = ktime_sub(now, t1);
+   dma_fence_put(fence);
+   *max = max(t1, *max);
+   }
+}
+
+ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
+   uint32_t idx, uint64_t *elapsed)
+{
+   struct idr *idp;
+   struct amdgpu_ctx *ctx;
+   uint32_t id;
+   struct amdgpu_ctx_entity *centity;
+   ktime_t total = 0, max = 0;
+
+   if (idx >= AMDGPU_MAX_ENTITY_NUM)
+   return 0;
+   idp = >ctx_handles;
+   mutex_lock(>lock);
+   idr_for_each_entry(idp, ctx, id) {
+   if (!ctx->entities[hwip][idx])
+   continue;
+
+   centity = ctx->entities[hwip][idx];
+   amdgpu_ctx_fence_time(ctx, centity, , );
+   }
+
+   mutex_unlock(>lock);
+   if (elapsed)
+   *elapsed = max;
+
+   return total;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index f54e10314661..10dcf59a5c6b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -87,5 +87,8 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
  void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
  long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout);
  void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
-
+ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
+   uint32_t idx, uint64_t *elapsed);
+void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx, struct amdgpu_ctx_entity 
*centity,
+   ktime_t *total, ktime_t *max);
  #endif
diff --git 

RE: [PATCH 2/2] drm/amdgpu: Add show_fdinfo() interface

2021-04-20 Thread Deng, Emily
Hi Christian,
 Could you help to review these patches again, thanks.

Best wishes
Emily Deng
>-Original Message-
>From: amd-gfx  On Behalf Of Sun, Roy
>Sent: Tuesday, April 20, 2021 4:54 PM
>To: Sun, Roy ; amd-gfx@lists.freedesktop.org
>Cc: Nieto, David M 
>Subject: RE: [PATCH 2/2] drm/amdgpu: Add show_fdinfo() interface
>
>[AMD Official Use Only - Internal Distribution Only]
>
>Ping.
>Could you help review this patch again?
>
>BR
>Roy
>
>-Original Message-
>From: Roy Sun 
>Sent: Monday, April 19, 2021 2:26 PM
>To: amd-gfx@lists.freedesktop.org
>Cc: Sun, Roy ; Nieto, David M 
>Subject: [PATCH 2/2] drm/amdgpu: Add show_fdinfo() interface
>
>Tracking devices, process info and fence info using /proc/pid/fdinfo
>
>Signed-off-by: David M Nieto 
>Signed-off-by: Roy Sun 
>---
> drivers/gpu/drm/amd/amdgpu/Makefile|  2 +
> drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c| 61 ++
> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h|  5 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  5 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c | 95
>++  drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h |
>43 ++
> drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c|  1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 24 ++
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  4 +
> 10 files changed, 239 insertions(+), 2 deletions(-)  create mode 100644
>drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
> create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile
>b/drivers/gpu/drm/amd/amdgpu/Makefile
>index ee85e8aba636..d216b7ecb5d1 100644
>--- a/drivers/gpu/drm/amd/amdgpu/Makefile
>+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>@@ -58,6 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
>   amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o
>amdgpu_rap.o \
>   amdgpu_fw_attestation.o amdgpu_securedisplay.o
>
>+amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
>+
> amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o
>
> # add asic specific block
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>index 125b25a5ce5b..3365feae15e1 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>@@ -107,6 +107,7 @@
> #include "amdgpu_gfxhub.h"
> #include "amdgpu_df.h"
> #include "amdgpu_smuio.h"
>+#include "amdgpu_fdinfo.h"
>
> #define MAX_GPU_INSTANCE  16
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>index 0350205c4897..01fe60fedcbe 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>@@ -651,3 +651,64 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr
>*mgr)
>   idr_destroy(>ctx_handles);
>   mutex_destroy(>lock);
> }
>+
>+void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx, struct
>amdgpu_ctx_entity *centity,
>+  ktime_t *total, ktime_t *max)
>+{
>+  ktime_t now, t1;
>+  uint32_t i;
>+
>+  now = ktime_get();
>+  for (i = 0; i < amdgpu_sched_jobs; i++) {
>+  struct dma_fence *fence;
>+  struct drm_sched_fence *s_fence;
>+
>+  spin_lock(>ring_lock);
>+  fence = dma_fence_get(centity->fences[i]);
>+  spin_unlock(>ring_lock);
>+  if (!fence)
>+  continue;
>+  s_fence = to_drm_sched_fence(fence);
>+  if (!dma_fence_is_signaled(_fence->scheduled))
>+  continue;
>+  t1 = s_fence->scheduled.timestamp;
>+  if (t1 >= now)
>+  continue;
>+  if (dma_fence_is_signaled(_fence->finished) &&
>+  s_fence->finished.timestamp < now)
>+  *total += ktime_sub(s_fence->finished.timestamp, t1);
>+  else
>+  *total += ktime_sub(now, t1);
>+  t1 = ktime_sub(now, t1);
>+  dma_fence_put(fence);
>+  *max = max(t1, *max);
>+  }
>+}
>+
>+ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr,
>uint32_t hwip,
>+  uint32_t idx, uint64_t *elapsed)
>+{
>+  struct idr *idp;
>+  struct amdgpu_ctx *ctx;
>+  uint32_t id;
>+  struct amdgpu_ctx_entity *centity;
>+  ktime_t total = 0, max = 0;
>+
>+  if (idx >= AMDGPU_MAX_ENTITY_NUM)
>+  return 0;
>+  idp = >ctx_handles;
>+  mutex_lock(>lock);
>+  idr_for_each_entry(idp, ctx, id) {
>+  if (!ctx->entities[hwip][idx])
>+  continue;
>+
>+  centity = ctx->entities[hwip][idx];
>+  amdgpu_ctx_fence_time(ctx, centity, , );
>+  }
>+
>+  mutex_unlock(>lock);
>+  if (elapsed)
>+  *elapsed = max;
>+
>+  return total;
>+}
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>index 

RE: [PATCH 2/2] drm/amdgpu: Add show_fdinfo() interface

2021-04-20 Thread Sun, Roy
[AMD Official Use Only - Internal Distribution Only]

Ping.
Could you help review this patch again?

BR
Roy

-Original Message-
From: Roy Sun  
Sent: Monday, April 19, 2021 2:26 PM
To: amd-gfx@lists.freedesktop.org
Cc: Sun, Roy ; Nieto, David M 
Subject: [PATCH 2/2] drm/amdgpu: Add show_fdinfo() interface

Tracking devices, process info and fence info using /proc/pid/fdinfo

Signed-off-by: David M Nieto 
Signed-off-by: Roy Sun 
---
 drivers/gpu/drm/amd/amdgpu/Makefile|  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c| 61 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h|  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c | 95 ++  
drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h | 43 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c|  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 24 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  4 +
 10 files changed, 239 insertions(+), 2 deletions(-)  create mode 100644 
drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index ee85e8aba636..d216b7ecb5d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -58,6 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o
 
+amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
+
 amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o
 
 # add asic specific block
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 125b25a5ce5b..3365feae15e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -107,6 +107,7 @@
 #include "amdgpu_gfxhub.h"
 #include "amdgpu_df.h"
 #include "amdgpu_smuio.h"
+#include "amdgpu_fdinfo.h"
 
 #define MAX_GPU_INSTANCE   16
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 0350205c4897..01fe60fedcbe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -651,3 +651,64 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
idr_destroy(>ctx_handles);
mutex_destroy(>lock);
 }
+
+void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx, struct amdgpu_ctx_entity 
*centity,
+   ktime_t *total, ktime_t *max)
+{
+   ktime_t now, t1;
+   uint32_t i;
+
+   now = ktime_get();
+   for (i = 0; i < amdgpu_sched_jobs; i++) {
+   struct dma_fence *fence;
+   struct drm_sched_fence *s_fence;
+
+   spin_lock(>ring_lock);
+   fence = dma_fence_get(centity->fences[i]);
+   spin_unlock(>ring_lock);
+   if (!fence)
+   continue;
+   s_fence = to_drm_sched_fence(fence);
+   if (!dma_fence_is_signaled(_fence->scheduled))
+   continue;
+   t1 = s_fence->scheduled.timestamp;
+   if (t1 >= now)
+   continue;
+   if (dma_fence_is_signaled(_fence->finished) &&
+   s_fence->finished.timestamp < now)
+   *total += ktime_sub(s_fence->finished.timestamp, t1);
+   else
+   *total += ktime_sub(now, t1);
+   t1 = ktime_sub(now, t1);
+   dma_fence_put(fence);
+   *max = max(t1, *max);
+   }
+}
+
+ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
+   uint32_t idx, uint64_t *elapsed)
+{
+   struct idr *idp;
+   struct amdgpu_ctx *ctx;
+   uint32_t id;
+   struct amdgpu_ctx_entity *centity;
+   ktime_t total = 0, max = 0;
+
+   if (idx >= AMDGPU_MAX_ENTITY_NUM)
+   return 0;
+   idp = >ctx_handles;
+   mutex_lock(>lock);
+   idr_for_each_entry(idp, ctx, id) {
+   if (!ctx->entities[hwip][idx])
+   continue;
+
+   centity = ctx->entities[hwip][idx];
+   amdgpu_ctx_fence_time(ctx, centity, , );
+   }
+
+   mutex_unlock(>lock);
+   if (elapsed)
+   *elapsed = max;
+
+   return total;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index f54e10314661..10dcf59a5c6b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -87,5 +87,8 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);  void 
amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);  long 
amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout);  void 
amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
-
+ktime_t 

Re: [PATCH v3 5/7] drm/vmwgfx: Inline ttm_bo_mmap() into vmwgfx driver

2021-04-20 Thread Daniel Vetter
On Tue, Apr 20, 2021 at 09:51:27AM +0200, Thomas Zimmermann wrote:
> Hi
> 
> Am 16.04.21 um 15:51 schrieb Christian König:
> > Am 16.04.21 um 15:46 schrieb Christian König:
> > > Am 16.04.21 um 15:31 schrieb Thomas Zimmermann:
> > > > The vmwgfx driver is the only remaining user of ttm_bo_mmap(). Inline
> > > > the code. The internal helper ttm_bo_vm_lookup() is now also part of
> > > > vmwgfx as vmw_bo_vm_lookup().
> > > > 
> > > > v2:
> > > > * replace pr_err() with drm_err() (Zack)
> > > > 
> > > > Signed-off-by: Thomas Zimmermann 
> > > > Reviewed-by: Zack Rusin 
> > > > ---
> > > >   drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c | 56 ++--
> > > >   1 file changed, 53 insertions(+), 3 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
> > > > b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
> > > > index cb9975889e2f..c8b6543b4e39 100644
> > > > --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
> > > > +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
> > > > @@ -27,6 +27,32 @@
> > > >     #include "vmwgfx_drv.h"
> > > >   +static struct ttm_buffer_object *vmw_bo_vm_lookup(struct
> > > > ttm_device *bdev,
> > > > +  unsigned long offset,
> > > > +  unsigned long pages)
> > > > +{
> > > > +    struct vmw_private *dev_priv = container_of(bdev, struct
> > > > vmw_private, bdev);
> > > > +    struct drm_device *drm = _priv->drm;
> > > > +    struct drm_vma_offset_node *node;
> > > > +    struct ttm_buffer_object *bo = NULL;
> > > > +
> > > > +    drm_vma_offset_lock_lookup(bdev->vma_manager);
> > > > +
> > > > +    node = drm_vma_offset_lookup_locked(bdev->vma_manager,
> > > > offset, pages);
> > > > +    if (likely(node)) {
> > > > +    bo = container_of(node,
> struct ttm_buffer_object,
> > > > +  base.vma_node);
> > > > +    bo = ttm_bo_get_unless_zero(bo);
> > > > +    }
> > > > +
> > > > +    drm_vma_offset_unlock_lookup(bdev->vma_manager);
> > > > +
> > > > +    if (!bo)
> > > > +    drm_err(drm, "Could not find buffer object to map\n");
> > > > +
> > > > +    return bo;
> > > > +}
> > > > +
> > > >   int vmw_mmap(struct file *filp, struct vm_area_struct *vma)
> > > >   {
> > > >   static const struct vm_operations_struct vmw_vm_ops = {
> > > > @@ -41,10 +67,28 @@ int vmw_mmap(struct file *filp, struct
> > > > vm_area_struct *vma)
> > > >   };
> > > >   struct drm_file *file_priv = filp->private_data;
> > > >   struct vmw_private *dev_priv = vmw_priv(file_priv->minor->dev);
> > > > -    int ret = ttm_bo_mmap(filp, vma, _priv->bdev);
> > > > +    struct ttm_device *bdev = _priv->bdev;
> > > > +    struct ttm_buffer_object *bo;
> > > > +    int ret;
> > > > +
> > > > +    if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET_START))
> > > > +    return -EINVAL;
> > > > +
> > > > +    bo = vmw_bo_vm_lookup(bdev, vma->vm_pgoff, vma_pages(vma));
> > > > +    if (unlikely(!bo))
> > > > +    return -EINVAL;
> > > >   -    if (ret)
> > > > -    return ret;
> > > > +    if (unlikely(!bo->bdev->funcs->verify_access)) {
> > > > +    ret = -EPERM;
> > > > +    goto out_unref;
> > > > +    }
> > > > +    ret = bo->bdev->funcs->verify_access(bo, filp);
> > > 
> > > Is there any reason we can't call vmw_verify_access() directly here?
> > > 
> > > Would allow us to completely nuke the verify_access callback as well
> > > as far as I can see.
> > 
> > Forget what I said, couldn't see the next patch in my mailbox at time of
> > writing.
> > 
> > Whole series is Reviewed-by: Christian König 
> 
> Thanks a lot. If I'm not mistaken, the patches at [1] need to go in first.
> So it could take a a bit until this lands.
> 
> Otherwise, this series could go through the same tree as [1] if nouveau and
> vmwgfx devs don't mind.

I would land it all through drm-misc-next. Maybe check with Alex on irc
for an ack for merging that way, but I don't think this will cause issues
against the amdgpu tree. Lots of ttm cleanup has landed this way already
past few months. Otherwise you could create a small topic branch with
these patches here and send that to Alex, and he can sort out the
interaction with Felix' series.
-Daniel


> 
> Best regards
> Thomas
> 
> [1] https://patchwork.freedesktop.org/series/88822/
> 
> > 
> > Thanks for the nice cleanup,
> > Christian.
> > 
> > > 
> > > Regards,
> > > Christian.
> > > 
> > > > +    if (unlikely(ret != 0))
> > > > +    goto out_unref;
> > > > +
> > > > +    ret = ttm_bo_mmap_obj(vma, bo);
> > > > +    if (unlikely(ret != 0))
> > > > +    goto out_unref;
> > > >     vma->vm_ops = _vm_ops;
> > > >   @@ -52,7 +96,13 @@ int vmw_mmap(struct file *filp, struct
> > > > vm_area_struct *vma)
> > > >   if (!is_cow_mapping(vma->vm_flags))
> > > >   vma->vm_flags = (vma->vm_flags & ~VM_MIXEDMAP) | VM_PFNMAP;
> > > >   +    ttm_bo_put(bo); /* release extra ref taken
> by
> > > > ttm_bo_mmap_obj() */
> > > > +
> > > >   return 0;
> 

Re: [PATCH] drm/amdgpu: Add mem sync flag for IB allocated by SA

2021-04-20 Thread Huang Rui
On Tue, Apr 20, 2021 at 04:34:18PM +0800, Christian König wrote:
> Am 20.04.21 um 10:27 schrieb Jinzhou Su:
> > The buffer of SA bo will be used by many cases. So better
> > to flush the cache of indirect buffer allocated by SA before
> > commit the IB.
> >
> > Signed-off-by: Jinzhou Su 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 ++
> >   1 file changed, 2 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> > index 148a3b481b12..a2fe2dac32c1 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> > @@ -76,6 +76,8 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct 
> > amdgpu_vm *vm,
> > }
> >   
> > ib->ptr = amdgpu_sa_bo_cpu_addr(ib->sa_bo);
> > +   /* flush the cache before commit the IB */
> 
> Either drop the comment or change it a bit.
> 
> E.g. like "/* Invalidate the cache to make the GPU see the newest SA 
> content */
> 
> With that done the patch is Reviewed-by: Christian König 
> 

Yes, with this fix, patch looks good for me as well.

Reviewed-by: Huang Rui 

> 
> Regards,
> Christian.
> 
> > +   ib->flags = AMDGPU_IB_FLAG_EMIT_MEM_SYNC;
> >   
> > if (!vm)
> > ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
> 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: Add mem sync flag for IB allocated by SA

2021-04-20 Thread Christian König

Am 20.04.21 um 10:27 schrieb Jinzhou Su:

The buffer of SA bo will be used by many cases. So better
to flush the cache of indirect buffer allocated by SA before
commit the IB.

Signed-off-by: Jinzhou Su 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 148a3b481b12..a2fe2dac32c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -76,6 +76,8 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
}
  
  		ib->ptr = amdgpu_sa_bo_cpu_addr(ib->sa_bo);

+   /* flush the cache before commit the IB */


Either drop the comment or change it a bit.

E.g. like "/* Invalidate the cache to make the GPU see the newest SA 
content */


With that done the patch is Reviewed-by: Christian König 



Regards,
Christian.


+   ib->flags = AMDGPU_IB_FLAG_EMIT_MEM_SYNC;
  
  		if (!vm)

ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: Add mem sync flag for IB allocated by SA

2021-04-20 Thread Jinzhou Su
The buffer of SA bo will be used by many cases. So better
to flush the cache of indirect buffer allocated by SA before
commit the IB.

Signed-off-by: Jinzhou Su 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 148a3b481b12..a2fe2dac32c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -76,6 +76,8 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
}
 
ib->ptr = amdgpu_sa_bo_cpu_addr(ib->sa_bo);
+   /* flush the cache before commit the IB */
+   ib->flags = AMDGPU_IB_FLAG_EMIT_MEM_SYNC;
 
if (!vm)
ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
-- 
2.27.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v3 5/7] drm/vmwgfx: Inline ttm_bo_mmap() into vmwgfx driver

2021-04-20 Thread Thomas Zimmermann

Hi

Am 16.04.21 um 15:51 schrieb Christian König:

Am 16.04.21 um 15:46 schrieb Christian König:

Am 16.04.21 um 15:31 schrieb Thomas Zimmermann:

The vmwgfx driver is the only remaining user of ttm_bo_mmap(). Inline
the code. The internal helper ttm_bo_vm_lookup() is now also part of
vmwgfx as vmw_bo_vm_lookup().

v2:
* replace pr_err() with drm_err() (Zack)

Signed-off-by: Thomas Zimmermann 
Reviewed-by: Zack Rusin 
---
  drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c | 56 ++--
  1 file changed, 53 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c

index cb9975889e2f..c8b6543b4e39 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
@@ -27,6 +27,32 @@
    #include "vmwgfx_drv.h"
  +static struct ttm_buffer_object *vmw_bo_vm_lookup(struct 
ttm_device *bdev,

+  unsigned long offset,
+  unsigned long pages)
+{
+    struct vmw_private *dev_priv = container_of(bdev, struct 
vmw_private, bdev);

+    struct drm_device *drm = _priv->drm;
+    struct drm_vma_offset_node *node;
+    struct ttm_buffer_object *bo = NULL;
+
+    drm_vma_offset_lock_lookup(bdev->vma_manager);
+
+    node = drm_vma_offset_lookup_locked(bdev->vma_manager, offset, 
pages);

+    if (likely(node)) {
+    bo = container_of(node, 

struct ttm_buffer_object,

+  base.vma_node);
+    bo = ttm_bo_get_unless_zero(bo);
+    }
+
+    drm_vma_offset_unlock_lookup(bdev->vma_manager);
+
+    if (!bo)
+    drm_err(drm, "Could not find buffer object to map\n");
+
+    return bo;
+}
+
  int vmw_mmap(struct file *filp, struct vm_area_struct *vma)
  {
  static const struct vm_operations_struct vmw_vm_ops = {
@@ -41,10 +67,28 @@ int vmw_mmap(struct file *filp, struct 
vm_area_struct *vma)

  };
  struct drm_file *file_priv = filp->private_data;
  struct vmw_private *dev_priv = vmw_priv(file_priv->minor->dev);
-    int ret = ttm_bo_mmap(filp, vma, _priv->bdev);
+    struct ttm_device *bdev = _priv->bdev;
+    struct ttm_buffer_object *bo;
+    int ret;
+
+    if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET_START))
+    return -EINVAL;
+
+    bo = vmw_bo_vm_lookup(bdev, vma->vm_pgoff, vma_pages(vma));
+    if (unlikely(!bo))
+    return -EINVAL;
  -    if (ret)
-    return ret;
+    if (unlikely(!bo->bdev->funcs->verify_access)) {
+    ret = -EPERM;
+    goto out_unref;
+    }
+    ret = bo->bdev->funcs->verify_access(bo, filp);


Is there any reason we can't call vmw_verify_access() directly here?

Would allow us to completely nuke the verify_access callback as well 
as far as I can see.


Forget what I said, couldn't see the next patch in my mailbox at time of 
writing.


Whole series is Reviewed-by: Christian König 


Thanks a lot. If I'm not mistaken, the patches at [1] need to go in 
first. So it could take a a bit until this lands.


Otherwise, this series could go through the same tree as [1] if nouveau 
and vmwgfx devs don't mind.


Best regards
Thomas

[1] https://patchwork.freedesktop.org/series/88822/



Thanks for the nice cleanup,
Christian.



Regards,
Christian.


+    if (unlikely(ret != 0))
+    goto out_unref;
+
+    ret = ttm_bo_mmap_obj(vma, bo);
+    if (unlikely(ret != 0))
+    goto out_unref;
    vma->vm_ops = _vm_ops;
  @@ -52,7 +96,13 @@ int vmw_mmap(struct file *filp, struct 
vm_area_struct *vma)

  if (!is_cow_mapping(vma->vm_flags))
  vma->vm_flags = (vma->vm_flags & ~VM_MIXEDMAP) | VM_PFNMAP;
  +    ttm_bo_put(bo); /* release extra ref taken 
by 

ttm_bo_mmap_obj() */
+
  return 0;
+
+out_unref:
+    ttm_bo_put(bo);
+    return ret;
  }
    /* struct vmw_validation_mem callback */




___
dri-devel mailing list
dri-de...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


--
Thomas Zimmermann
Graphics Driver Developer
SUSE Software Solutions Germany GmbH
Maxfeldstr. 5, 90409 Nürnberg, Germany
(HRB 36809, AG Nürnberg)
Geschäftsführer: Felix Imendörffer



OpenPGP_signature
Description: OpenPGP digital signature
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: Add mem sync flag for SDMA IB test

2021-04-20 Thread Christian König

I suggest to rather do this in amdgpu_ib_get() instead.

As soon as the size is specified we get some memory from the SA 
allocator and should also set the flag to be on the save side.


Thanks,
Christian.

Am 20.04.21 um 08:55 schrieb Su, Jinzhou (Joe):

[AMD Official Use Only - Internal Distribution Only]

Hello Christian,

Thanks for your information. We add this due to sdma0 timeout during S3 stress 
test.
Will update this for other cases in v2.

Regards,
Joe

-Original Message-
From: Christian König 
Sent: Tuesday, April 20, 2021 2:39 PM
To: Su, Jinzhou (Joe) ; amd-gfx@lists.freedesktop.org
Cc: Huang, Ray 
Subject: Re: [PATCH] drm/amdgpu: Add mem sync flag for SDMA IB test

Am 20.04.21 um 04:23 schrieb Jinzhou Su:

The buffer for SDMA IB test is allocated by sa bo which may be used by
other purpose. Better to flush the cache before commit the IB.

Good point, but shouldn't we do this for a lot of other cases as well?

I think the only place where we should not set the flag is in the CS IOCTL.

Regards,
Christian.


Signed-off-by: Jinzhou Su 
---
   drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 1 +
   1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index b1ad9e52b234..da67f440b102 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -1000,6 +1000,7 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring 
*ring, long timeout)
ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
ib.length_dw = 8;
   
+	ib.flags = AMDGPU_IB_FLAG_EMIT_MEM_SYNC;

r = amdgpu_ib_schedule(ring, 1, , NULL, );
if (r)
goto err1;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amdgpu: Add mem sync flag for SDMA IB test

2021-04-20 Thread Su, Jinzhou (Joe)
[AMD Official Use Only - Internal Distribution Only]

Hello Christian,

Thanks for your information. We add this due to sdma0 timeout during S3 stress 
test.
Will update this for other cases in v2.

Regards,
Joe

-Original Message-
From: Christian König  
Sent: Tuesday, April 20, 2021 2:39 PM
To: Su, Jinzhou (Joe) ; amd-gfx@lists.freedesktop.org
Cc: Huang, Ray 
Subject: Re: [PATCH] drm/amdgpu: Add mem sync flag for SDMA IB test

Am 20.04.21 um 04:23 schrieb Jinzhou Su:
> The buffer for SDMA IB test is allocated by sa bo which may be used by 
> other purpose. Better to flush the cache before commit the IB.

Good point, but shouldn't we do this for a lot of other cases as well?

I think the only place where we should not set the flag is in the CS IOCTL.

Regards,
Christian.

>
> Signed-off-by: Jinzhou Su 
> ---
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 1 +
>   1 file changed, 1 insertion(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c 
> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> index b1ad9e52b234..da67f440b102 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> @@ -1000,6 +1000,7 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring 
> *ring, long timeout)
>   ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
>   ib.length_dw = 8;
>   
> + ib.flags = AMDGPU_IB_FLAG_EMIT_MEM_SYNC;
>   r = amdgpu_ib_schedule(ring, 1, , NULL, );
>   if (r)
>   goto err1;
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: Add mem sync flag for SDMA IB test

2021-04-20 Thread Christian König

Am 20.04.21 um 04:23 schrieb Jinzhou Su:

The buffer for SDMA IB test is allocated by sa bo
which may be used by other purpose. Better to flush
the cache before commit the IB.


Good point, but shouldn't we do this for a lot of other cases as well?

I think the only place where we should not set the flag is in the CS IOCTL.

Regards,
Christian.



Signed-off-by: Jinzhou Su 
---
  drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index b1ad9e52b234..da67f440b102 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -1000,6 +1000,7 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring 
*ring, long timeout)
ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
ib.length_dw = 8;
  
+	ib.flags = AMDGPU_IB_FLAG_EMIT_MEM_SYNC;

r = amdgpu_ib_schedule(ring, 1, , NULL, );
if (r)
goto err1;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx